summaryrefslogtreecommitdiffstats
path: root/WebCore/platform/text
diff options
context:
space:
mode:
Diffstat (limited to 'WebCore/platform/text')
-rw-r--r--WebCore/platform/text/AtomicString.h14
-rw-r--r--WebCore/platform/text/AtomicStringImpl.h2
-rw-r--r--WebCore/platform/text/BidiContext.cpp2
-rw-r--r--WebCore/platform/text/PlatformString.h9
-rw-r--r--WebCore/platform/text/RegularExpression.h2
-rw-r--r--WebCore/platform/text/String.cpp3
-rw-r--r--WebCore/platform/text/StringHash.h5
-rw-r--r--WebCore/platform/text/StringImpl.h4
-rw-r--r--WebCore/platform/text/TextBoundaries.cpp (renamed from WebCore/platform/text/TextBoundariesICU.cpp)34
-rw-r--r--WebCore/platform/text/TextBreakIterator.h2
-rw-r--r--WebCore/platform/text/TextBreakIteratorICU.cpp10
-rw-r--r--WebCore/platform/text/TextEncoding.cpp19
-rw-r--r--WebCore/platform/text/TextEncodingRegistry.cpp19
-rw-r--r--WebCore/platform/text/cf/StringCF.cpp4
-rw-r--r--WebCore/platform/text/cf/StringImplCF.cpp4
-rw-r--r--WebCore/platform/text/gtk/TextBreakIteratorGtk.cpp217
-rw-r--r--WebCore/platform/text/gtk/TextCodecGtk.cpp446
-rw-r--r--WebCore/platform/text/gtk/TextCodecGtk.h147
-rw-r--r--WebCore/platform/text/qt/TextBoundariesQt.cpp (renamed from WebCore/platform/text/qt/TextBoundaries.cpp)46
-rw-r--r--WebCore/platform/text/qt/TextBreakIteratorQt.cpp183
-rw-r--r--WebCore/platform/text/wince/TextBoundariesWince.cpp75
-rw-r--r--WebCore/platform/text/wince/TextBreakIteratorWince.cpp311
22 files changed, 1293 insertions, 265 deletions
diff --git a/WebCore/platform/text/AtomicString.h b/WebCore/platform/text/AtomicString.h
index 8805f4c..47d07c5 100644
--- a/WebCore/platform/text/AtomicString.h
+++ b/WebCore/platform/text/AtomicString.h
@@ -24,6 +24,14 @@
#include "AtomicStringImpl.h"
#include "PlatformString.h"
+// Define 'NO_IMPLICIT_ATOMICSTRING' before including this header,
+// to disallow (expensive) implicit String-->AtomicString conversions.
+#ifdef NO_IMPLICIT_ATOMICSTRING
+#define ATOMICSTRING_CONVERSION explicit
+#else
+#define ATOMICSTRING_CONVERSION
+#endif
+
namespace WebCore {
struct AtomicStringHash;
@@ -40,9 +48,9 @@ public:
AtomicString(const JSC::UString& s) : m_string(add(s)) { }
AtomicString(const JSC::Identifier& s) : m_string(add(s)) { }
#endif
- AtomicString(StringImpl* imp) : m_string(add(imp)) { }
+ ATOMICSTRING_CONVERSION AtomicString(StringImpl* imp) : m_string(add(imp)) { }
AtomicString(AtomicStringImpl* imp) : m_string(imp) { }
- AtomicString(const String& s) : m_string(add(s.impl())) { }
+ ATOMICSTRING_CONVERSION AtomicString(const String& s) : m_string(add(s.impl())) { }
// Hash table deleted values, which are only constructed and never copied or destroyed.
AtomicString(WTF::HashTableDeletedValueType) : m_string(WTF::HashTableDeletedValue) { }
@@ -96,7 +104,7 @@ public:
static void remove(StringImpl*);
-#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
+#if PLATFORM(CF)
AtomicString(CFStringRef s) : m_string(add(String(s).impl())) { }
CFStringRef createCFString() const { return m_string.createCFString(); }
#endif
diff --git a/WebCore/platform/text/AtomicStringImpl.h b/WebCore/platform/text/AtomicStringImpl.h
index d905afc..ba1c72c 100644
--- a/WebCore/platform/text/AtomicStringImpl.h
+++ b/WebCore/platform/text/AtomicStringImpl.h
@@ -1,6 +1,4 @@
/*
- * This file is part of the DOM implementation for KDE.
- *
* Copyright (C) 2006 Apple Computer, Inc.
*
* This library is free software; you can redistribute it and/or
diff --git a/WebCore/platform/text/BidiContext.cpp b/WebCore/platform/text/BidiContext.cpp
index 546571e..59db7bd 100644
--- a/WebCore/platform/text/BidiContext.cpp
+++ b/WebCore/platform/text/BidiContext.cpp
@@ -30,7 +30,7 @@ using namespace WTF::Unicode;
PassRefPtr<BidiContext> BidiContext::create(unsigned char level, Direction direction, bool override, BidiContext* parent)
{
- ASSERT(direction == level % 2 ? RightToLeft : LeftToRight);
+ ASSERT(direction == (level % 2 ? RightToLeft : LeftToRight));
if (parent)
return adoptRef(new BidiContext(level, direction, override, parent));
diff --git a/WebCore/platform/text/PlatformString.h b/WebCore/platform/text/PlatformString.h
index 8d19c17..247536a 100644
--- a/WebCore/platform/text/PlatformString.h
+++ b/WebCore/platform/text/PlatformString.h
@@ -41,7 +41,7 @@
#include <wtf/OwnPtr.h>
#endif
-#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
+#if PLATFORM(CF)
typedef const struct __CFString * CFStringRef;
#endif
@@ -206,7 +206,7 @@ public:
StringImpl* impl() const { return m_impl.get(); }
-#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
+#if PLATFORM(CF)
String(CFStringRef);
CFStringRef createCFString() const;
#endif
@@ -286,6 +286,11 @@ inline bool equalIgnoringCase(const String& a, const String& b) { return equalIg
inline bool equalIgnoringCase(const String& a, const char* b) { return equalIgnoringCase(a.impl(), b); }
inline bool equalIgnoringCase(const char* a, const String& b) { return equalIgnoringCase(a, b.impl()); }
+inline bool equalPossiblyIgnoringCase(const String& a, const String& b, bool ignoreCase)
+{
+ return ignoreCase ? equalIgnoringCase(a, b) : (a == b);
+}
+
inline bool equalIgnoringNullity(const String& a, const String& b) { return equalIgnoringNullity(a.impl(), b.impl()); }
inline bool operator!(const String& str) { return str.isNull(); }
diff --git a/WebCore/platform/text/RegularExpression.h b/WebCore/platform/text/RegularExpression.h
index 3254067..f1611e5 100644
--- a/WebCore/platform/text/RegularExpression.h
+++ b/WebCore/platform/text/RegularExpression.h
@@ -30,7 +30,7 @@
namespace WebCore {
-class RegularExpression {
+class RegularExpression : public FastAllocBase {
public:
RegularExpression(const String&, TextCaseSensitivity);
~RegularExpression();
diff --git a/WebCore/platform/text/String.cpp b/WebCore/platform/text/String.cpp
index 44582a9..24659a4 100644
--- a/WebCore/platform/text/String.cpp
+++ b/WebCore/platform/text/String.cpp
@@ -81,6 +81,9 @@ String::String(const char* str, unsigned length)
void String::append(const String& str)
{
+ if (str.isEmpty())
+ return;
+
// FIXME: This is extremely inefficient. So much so that we might want to take this
// out of String's API. We can make it better by optimizing the case where exactly
// one String is pointing at this StringImpl, but even then it's going to require a
diff --git a/WebCore/platform/text/StringHash.h b/WebCore/platform/text/StringHash.h
index fc6cb3c..21a478e 100644
--- a/WebCore/platform/text/StringHash.h
+++ b/WebCore/platform/text/StringHash.h
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2006, 2007, 2008 Apple Inc. All rights reserved
+ * Copyright (C) Research In Motion Limited 2009. All rights reserved.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
@@ -29,6 +30,10 @@
namespace WebCore {
+ // The hash() functions on StringHash and CaseFoldingHash do not support
+ // null strings. get(), contains(), and add() on HashMap<String,..., StringHash>
+ // cause a null-pointer dereference when passed null strings.
+
// FIXME: We should really figure out a way to put the computeHash function that's
// currently a member function of StringImpl into this file so we can be a little
// closer to having all the nearly-identical hash functions in one place.
diff --git a/WebCore/platform/text/StringImpl.h b/WebCore/platform/text/StringImpl.h
index dac25b2..5155fa5 100644
--- a/WebCore/platform/text/StringImpl.h
+++ b/WebCore/platform/text/StringImpl.h
@@ -37,7 +37,7 @@
#include <runtime/UString.h>
#endif
-#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
+#if PLATFORM(CF)
typedef const struct __CFString * CFStringRef;
#endif
@@ -168,7 +168,7 @@ public:
WTF::Unicode::Direction defaultWritingDirection();
-#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
+#if PLATFORM(CF)
CFStringRef createCFString();
#endif
#ifdef __OBJC__
diff --git a/WebCore/platform/text/TextBoundariesICU.cpp b/WebCore/platform/text/TextBoundaries.cpp
index b1e8ee2..2455f6d 100644
--- a/WebCore/platform/text/TextBoundariesICU.cpp
+++ b/WebCore/platform/text/TextBoundaries.cpp
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2006, 2007 Apple Inc. All rights reserved.
+ * Copyright (C) 2009 Dominik Röttsches <dominik.roettsches@access-company.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -26,39 +27,40 @@
#include "config.h"
#include "TextBoundaries.h"
-#include <unicode/ubrk.h>
-#include <unicode/uchar.h>
-
#include "StringImpl.h"
#include "TextBreakIterator.h"
+#include <wtf/unicode/Unicode.h>
+
+using namespace WTF;
+using namespace Unicode;
namespace WebCore {
int findNextWordFromIndex(const UChar* chars, int len, int position, bool forward)
{
- UBreakIterator* it = wordBreakIterator(chars, len);
+ TextBreakIterator* it = wordBreakIterator(chars, len);
if (forward) {
- position = ubrk_following(it, position);
- while (position != UBRK_DONE) {
+ position = textBreakFollowing(it, position);
+ while (position != TextBreakDone) {
// We stop searching when the character preceeding the break
// is alphanumeric.
- if (position < len && u_isalnum(chars[position - 1]))
+ if (position < len && isAlphanumeric(chars[position - 1]))
return position;
- position = ubrk_following(it, position);
+ position = textBreakFollowing(it, position);
}
return len;
} else {
- position = ubrk_preceding(it, position);
- while (position != UBRK_DONE) {
+ position = textBreakPreceding(it, position);
+ while (position != TextBreakDone) {
// We stop searching when the character following the break
// is alphanumeric.
- if (position > 0 && u_isalnum(chars[position]))
+ if (position > 0 && isAlphanumeric(chars[position]))
return position;
- position = ubrk_preceding(it, position);
+ position = textBreakPreceding(it, position);
}
return 0;
@@ -67,11 +69,11 @@ int findNextWordFromIndex(const UChar* chars, int len, int position, bool forwar
void findWordBoundary(const UChar* chars, int len, int position, int* start, int* end)
{
- UBreakIterator* it = wordBreakIterator(chars, len);
- *end = ubrk_following(it, position);
+ TextBreakIterator* it = wordBreakIterator(chars, len);
+ *end = textBreakFollowing(it, position);
if (*end < 0)
- *end = ubrk_last(it);
- *start = ubrk_previous(it);
+ *end = textBreakLast(it);
+ *start = textBreakPrevious(it);
}
} // namespace WebCore
diff --git a/WebCore/platform/text/TextBreakIterator.h b/WebCore/platform/text/TextBreakIterator.h
index 7b3b963..17cf5f0 100644
--- a/WebCore/platform/text/TextBreakIterator.h
+++ b/WebCore/platform/text/TextBreakIterator.h
@@ -47,7 +47,9 @@ namespace WebCore {
TextBreakIterator* sentenceBreakIterator(const UChar*, int length);
int textBreakFirst(TextBreakIterator*);
+ int textBreakLast(TextBreakIterator*);
int textBreakNext(TextBreakIterator*);
+ int textBreakPrevious(TextBreakIterator*);
int textBreakCurrent(TextBreakIterator*);
int textBreakPreceding(TextBreakIterator*, int);
int textBreakFollowing(TextBreakIterator*, int);
diff --git a/WebCore/platform/text/TextBreakIteratorICU.cpp b/WebCore/platform/text/TextBreakIteratorICU.cpp
index c922fbc..44423c0 100644
--- a/WebCore/platform/text/TextBreakIteratorICU.cpp
+++ b/WebCore/platform/text/TextBreakIteratorICU.cpp
@@ -90,11 +90,21 @@ int textBreakFirst(TextBreakIterator* bi)
return ubrk_first(bi);
}
+int textBreakLast(TextBreakIterator* bi)
+{
+ return ubrk_last(bi);
+}
+
int textBreakNext(TextBreakIterator* bi)
{
return ubrk_next(bi);
}
+int textBreakPrevious(TextBreakIterator* bi)
+{
+ return ubrk_previous(bi);
+}
+
int textBreakPreceding(TextBreakIterator* bi, int pos)
{
return ubrk_preceding(bi, pos);
diff --git a/WebCore/platform/text/TextEncoding.cpp b/WebCore/platform/text/TextEncoding.cpp
index c5c8cfd..ec9a8b0 100644
--- a/WebCore/platform/text/TextEncoding.cpp
+++ b/WebCore/platform/text/TextEncoding.cpp
@@ -32,10 +32,13 @@
#include "PlatformString.h"
#include "TextCodec.h"
#include "TextEncodingRegistry.h"
-#if USE(ICU_UNICODE) || USE(GLIB_ICU_UNICODE_HYBRID)
+#if USE(ICU_UNICODE)
#include <unicode/unorm.h>
#elif USE(QT4_UNICODE)
#include <QString>
+#elif USE(GLIB_UNICODE)
+#include <glib.h>
+#include <wtf/gtk/GOwnPtr.h>
#endif
#include <wtf/HashSet.h>
#include <wtf/OwnPtr.h>
@@ -84,7 +87,7 @@ CString TextEncoding::encode(const UChar* characters, size_t length, Unencodable
if (!length)
return "";
-#if USE(ICU_UNICODE) || USE(GLIB_ICU_UNICODE_HYBRID)
+#if USE(ICU_UNICODE)
// FIXME: What's the right place to do normalization?
// It's a little strange to do it inside the encode function.
// Perhaps normalization should be an explicit step done before calling encode.
@@ -114,6 +117,18 @@ CString TextEncoding::encode(const UChar* characters, size_t length, Unencodable
QString str(reinterpret_cast<const QChar*>(characters), length);
str = str.normalized(QString::NormalizationForm_C);
return newTextCodec(*this)->encode(reinterpret_cast<const UChar *>(str.utf16()), str.length(), handling);
+#elif USE(GLIB_UNICODE)
+ GOwnPtr<char> UTF8Source;
+ UTF8Source.set(g_utf16_to_utf8(characters, length, 0, 0, 0));
+
+ GOwnPtr<char> UTF8Normalized;
+ UTF8Normalized.set(g_utf8_normalize(UTF8Source.get(), -1, G_NORMALIZE_NFC));
+
+ long UTF16Length;
+ GOwnPtr<UChar> UTF16Normalized;
+ UTF16Normalized.set(g_utf8_to_utf16(UTF8Normalized.get(), -1, 0, &UTF16Length, 0));
+
+ return newTextCodec(*this)->encode(UTF16Normalized.get(), UTF16Length, handling);
#elif PLATFORM(WINCE)
// normalization will be done by Windows CE API
OwnPtr<TextCodec> textCodec = newTextCodec(*this);
diff --git a/WebCore/platform/text/TextEncodingRegistry.cpp b/WebCore/platform/text/TextEncodingRegistry.cpp
index d3e2965..a4be520 100644
--- a/WebCore/platform/text/TextEncodingRegistry.cpp
+++ b/WebCore/platform/text/TextEncodingRegistry.cpp
@@ -39,7 +39,7 @@
#include <wtf/StringExtras.h>
#include <wtf/Threading.h>
-#if USE(ICU_UNICODE) || USE(GLIB_ICU_UNICODE_HYBRID)
+#if USE(ICU_UNICODE)
#include "TextCodecICU.h"
#endif
#if PLATFORM(MAC)
@@ -48,6 +48,9 @@
#if PLATFORM(QT)
#include "qt/TextCodecQt.h"
#endif
+#if USE(GLIB_UNICODE)
+#include "gtk/TextCodecGtk.h"
+#endif
#if PLATFORM(WINCE) && !PLATFORM(QT)
#include "TextCodecWince.h"
#endif
@@ -217,11 +220,16 @@ static void buildBaseTextCodecMaps()
TextCodecUserDefined::registerEncodingNames(addToTextEncodingNameMap);
TextCodecUserDefined::registerCodecs(addToTextCodecMap);
-#if USE(ICU_UNICODE) || USE(GLIB_ICU_UNICODE_HYBRID)
+#if USE(ICU_UNICODE)
TextCodecICU::registerBaseEncodingNames(addToTextEncodingNameMap);
TextCodecICU::registerBaseCodecs(addToTextCodecMap);
#endif
+#if USE(GLIB_UNICODE)
+ TextCodecGtk::registerBaseEncodingNames(addToTextEncodingNameMap);
+ TextCodecGtk::registerBaseCodecs(addToTextCodecMap);
+#endif
+
#if PLATFORM(WINCE) && !PLATFORM(QT)
TextCodecWince::registerBaseEncodingNames(addToTextEncodingNameMap);
TextCodecWince::registerBaseCodecs(addToTextCodecMap);
@@ -230,7 +238,7 @@ static void buildBaseTextCodecMaps()
static void extendTextCodecMaps()
{
-#if USE(ICU_UNICODE) || USE(GLIB_ICU_UNICODE_HYBRID)
+#if USE(ICU_UNICODE)
TextCodecICU::registerExtendedEncodingNames(addToTextEncodingNameMap);
TextCodecICU::registerExtendedCodecs(addToTextCodecMap);
#endif
@@ -245,6 +253,11 @@ static void extendTextCodecMaps()
TextCodecMac::registerCodecs(addToTextCodecMap);
#endif
+#if USE(GLIB_UNICODE)
+ TextCodecGtk::registerExtendedEncodingNames(addToTextEncodingNameMap);
+ TextCodecGtk::registerExtendedCodecs(addToTextCodecMap);
+#endif
+
#if PLATFORM(WINCE) && !PLATFORM(QT)
TextCodecWince::registerExtendedEncodingNames(addToTextEncodingNameMap);
TextCodecWince::registerExtendedCodecs(addToTextCodecMap);
diff --git a/WebCore/platform/text/cf/StringCF.cpp b/WebCore/platform/text/cf/StringCF.cpp
index b770d0e..97691e5 100644
--- a/WebCore/platform/text/cf/StringCF.cpp
+++ b/WebCore/platform/text/cf/StringCF.cpp
@@ -21,7 +21,7 @@
#include "config.h"
#include "PlatformString.h"
-#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
+#if PLATFORM(CF)
#include <CoreFoundation/CoreFoundation.h>
@@ -52,4 +52,4 @@ CFStringRef String::createCFString() const
}
-#endif // PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
+#endif // PLATFORM(CF)
diff --git a/WebCore/platform/text/cf/StringImplCF.cpp b/WebCore/platform/text/cf/StringImplCF.cpp
index 8a2ae79..aff45b3 100644
--- a/WebCore/platform/text/cf/StringImplCF.cpp
+++ b/WebCore/platform/text/cf/StringImplCF.cpp
@@ -21,7 +21,7 @@
#include "config.h"
#include "StringImpl.h"
-#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
+#if PLATFORM(CF)
#include <CoreFoundation/CoreFoundation.h>
#include <wtf/MainThread.h>
@@ -159,4 +159,4 @@ CFStringRef StringImpl::createCFString()
}
-#endif // PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
+#endif // PLATFORM(CF)
diff --git a/WebCore/platform/text/gtk/TextBreakIteratorGtk.cpp b/WebCore/platform/text/gtk/TextBreakIteratorGtk.cpp
new file mode 100644
index 0000000..7a10b41
--- /dev/null
+++ b/WebCore/platform/text/gtk/TextBreakIteratorGtk.cpp
@@ -0,0 +1,217 @@
+/*
+ * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
+ * Copyright (C) 2007 Apple Inc. All rights reserved.
+ * Copyright (C) 2008 Jürg Billeter <j@bitron.ch>
+ * Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "TextBreakIterator.h"
+
+#include <pango/pango.h>
+#include <wtf/gtk/GOwnPtr.h>
+
+namespace WebCore {
+
+enum UBreakIteratorType {
+ UBRK_CHARACTER,
+ UBRK_WORD,
+ UBRK_LINE,
+ UBRK_SENTENCE
+};
+
+class TextBreakIterator {
+public:
+ UBreakIteratorType m_type;
+ int m_length;
+ PangoLogAttr* m_logAttrs;
+ int m_index;
+};
+
+static TextBreakIterator* setUpIterator(bool& createdIterator, TextBreakIterator*& iterator,
+ UBreakIteratorType type, const UChar* string, int length)
+{
+ if (!string)
+ return 0;
+
+ if (!createdIterator) {
+ iterator = new TextBreakIterator();
+ createdIterator = true;
+ }
+ if (!iterator)
+ return 0;
+
+ long utf8len;
+ GOwnPtr<char> utf8;
+ utf8.set(g_utf16_to_utf8(string, length, 0, &utf8len, 0));
+
+ // FIXME: assumes no surrogate pairs
+
+ iterator->m_type = type;
+ iterator->m_length = length;
+ if (createdIterator)
+ g_free(iterator->m_logAttrs);
+ iterator->m_logAttrs = g_new0(PangoLogAttr, length + 1);
+ iterator->m_index = -1;
+ pango_get_log_attrs(utf8.get(), utf8len, -1, 0, iterator->m_logAttrs, length + 1);
+
+ return iterator;
+}
+
+TextBreakIterator* characterBreakIterator(const UChar* string, int length)
+{
+ static bool createdCharacterBreakIterator = false;
+ static TextBreakIterator* staticCharacterBreakIterator;
+ return setUpIterator(createdCharacterBreakIterator, staticCharacterBreakIterator, UBRK_CHARACTER, string, length);
+}
+
+TextBreakIterator* cursorMovementIterator(const UChar* string, int length)
+{
+ // FIXME: This needs closer inspection to achieve behaviour identical to the ICU version.
+ return characterBreakIterator(string, length);
+}
+
+TextBreakIterator* wordBreakIterator(const UChar* string, int length)
+{
+ static bool createdWordBreakIterator = false;
+ static TextBreakIterator* staticWordBreakIterator;
+ return setUpIterator(createdWordBreakIterator, staticWordBreakIterator, UBRK_WORD, string, length);
+}
+
+TextBreakIterator* lineBreakIterator(const UChar* string, int length)
+{
+ static bool createdLineBreakIterator = false;
+ static TextBreakIterator* staticLineBreakIterator;
+ return setUpIterator(createdLineBreakIterator, staticLineBreakIterator, UBRK_LINE, string, length);
+}
+
+TextBreakIterator* sentenceBreakIterator(const UChar* string, int length)
+{
+ static bool createdSentenceBreakIterator = false;
+ static TextBreakIterator* staticSentenceBreakIterator;
+ return setUpIterator(createdSentenceBreakIterator, staticSentenceBreakIterator, UBRK_SENTENCE, string, length);
+}
+
+int textBreakFirst(TextBreakIterator* bi)
+{
+ // see textBreakLast
+
+ int firstCursorPosition = -1;
+ int pos = 0;
+ while (pos <= bi->m_length && (firstCursorPosition < 0)) {
+ if (bi->m_logAttrs[pos].is_cursor_position)
+ firstCursorPosition = pos;
+ }
+ bi->m_index = firstCursorPosition;
+ return firstCursorPosition;
+}
+
+int textBreakLast(TextBreakIterator* bi)
+{
+ // TextBreakLast is not meant to find just any break according to bi->m_type
+ // but really the one near the last character.
+ // (cmp ICU documentation for ubrk_first and ubrk_last)
+ // From ICU docs for ubrk_last:
+ // "Determine the index immediately beyond the last character in the text being scanned."
+
+ // So we should advance or traverse back based on bi->m_logAttrs cursor positions.
+ // If last character position in the original string is a whitespace,
+ // traverse to the left until the first non-white character position is found
+ // and return the position of the first white-space char after this one.
+ // Otherwise return m_length, as "the first character beyond the last" is outside our string.
+
+ bool whiteSpaceAtTheEnd = true;
+ int nextWhiteSpacePos = bi->m_length;
+
+ int pos = bi->m_length;
+ while (pos >= 0 && whiteSpaceAtTheEnd) {
+ if (bi->m_logAttrs[pos].is_cursor_position) {
+ if (whiteSpaceAtTheEnd = bi->m_logAttrs[pos].is_white)
+ nextWhiteSpacePos = pos;
+ }
+ pos--;
+ }
+ bi->m_index = nextWhiteSpacePos;
+ return nextWhiteSpacePos;
+}
+
+int textBreakNext(TextBreakIterator* bi)
+{
+ for (int i = bi->m_index + 1; i <= bi->m_length; i++) {
+
+ // FIXME: UBRK_WORD case: Single multibyte characters (i.e. white space around them), such as the euro symbol €,
+ // are not marked as word_start & word_end as opposed to the way ICU does it.
+ // This leads to - for example - different word selection behaviour when right clicking.
+
+ if ((bi->m_type == UBRK_LINE && bi->m_logAttrs[i].is_line_break)
+ || (bi->m_type == UBRK_WORD && (bi->m_logAttrs[i].is_word_start || bi->m_logAttrs[i].is_word_end))
+ || (bi->m_type == UBRK_CHARACTER && bi->m_logAttrs[i].is_cursor_position)
+ || (bi->m_type == UBRK_SENTENCE && (bi->m_logAttrs[i].is_sentence_start || bi->m_logAttrs[i].is_sentence_end)) ) {
+ bi->m_index = i;
+ return i;
+ }
+ }
+ return TextBreakDone;
+}
+
+int textBreakPrevious(TextBreakIterator* bi)
+{
+ for (int i = bi->m_index - 1; i >= 0; i--) {
+ if ((bi->m_type == UBRK_LINE && bi->m_logAttrs[i].is_line_break)
+ || (bi->m_type == UBRK_WORD && (bi->m_logAttrs[i].is_word_start || bi->m_logAttrs[i].is_word_end))
+ || (bi->m_type == UBRK_CHARACTER && bi->m_logAttrs[i].is_cursor_position)
+ || (bi->m_type == UBRK_SENTENCE && (bi->m_logAttrs[i].is_sentence_start || bi->m_logAttrs[i].is_sentence_end)) ) {
+ bi->m_index = i;
+ return i;
+ }
+ }
+ return textBreakFirst(bi);
+}
+
+int textBreakPreceding(TextBreakIterator* bi, int pos)
+{
+ bi->m_index = pos;
+ return textBreakPrevious(bi);
+}
+
+int textBreakFollowing(TextBreakIterator* bi, int pos)
+{
+ if (pos < 0)
+ pos = -1;
+ bi->m_index = pos;
+ return textBreakNext(bi);
+}
+
+int textBreakCurrent(TextBreakIterator* bi)
+{
+ return bi->m_index;
+}
+
+bool isTextBreak(TextBreakIterator* bi, int pos)
+{
+ if (bi->m_index < 0)
+ return false;
+
+ return ((bi->m_type == UBRK_LINE && bi->m_logAttrs[bi->m_index].is_line_break)
+ || (bi->m_type == UBRK_WORD && bi->m_logAttrs[bi->m_index].is_word_end)
+ || (bi->m_type == UBRK_CHARACTER && bi->m_logAttrs[bi->m_index].is_char_break)
+ || (bi->m_type == UBRK_SENTENCE && bi->m_logAttrs[bi->m_index].is_sentence_end) );
+}
+
+}
diff --git a/WebCore/platform/text/gtk/TextCodecGtk.cpp b/WebCore/platform/text/gtk/TextCodecGtk.cpp
new file mode 100644
index 0000000..31da3b7
--- /dev/null
+++ b/WebCore/platform/text/gtk/TextCodecGtk.cpp
@@ -0,0 +1,446 @@
+/*
+ * Copyright (C) 2004, 2006, 2007, 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ * Copyright (C) 2008 Jürg Billeter <j@bitron.ch>
+ * Copyright (C) 2009 Dominik Röttsches <dominik.roettsches@access-company.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextCodecGtk.h"
+
+#include "CString.h"
+#include "PlatformString.h"
+#include <wtf/Assertions.h>
+#include <wtf/HashMap.h>
+#include <wtf/gtk/GOwnPtr.h>
+#include "Logging.h"
+
+using std::min;
+
+namespace WebCore {
+
+// TextCodec's appendOmittingBOM() is gone (http://trac.webkit.org/changeset/33380).
+// That's why we need to avoid generating extra BOM's for the conversion result.
+// This can be achieved by specifying the UTF-16 codecs' endianness explicitly when initializing GLib.
+
+#if (G_BYTE_ORDER == G_BIG_ENDIAN)
+ const gchar* WebCore::TextCodecGtk::m_internalEncodingName = "UTF-16BE";
+#else
+ const gchar* WebCore::TextCodecGtk::m_internalEncodingName = "UTF-16LE";
+#endif
+
+
+// We're specifying the list of text codecs and their aliases here.
+// For each codec the first entry is the canonical name, remaining ones are used as aliases.
+// Each alias list must be terminated by a 0.
+
+// Unicode
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_UTF_8 = { "UTF-8", 0 };
+
+// Western
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_ISO_8859_1 = { "ISO-8859-1", "CP819", "IBM819", "ISO-IR-100", "ISO8859-1", "ISO_8859-1", "ISO_8859-1:1987", "L1", "LATIN1", "CSISOLATIN1", 0 };
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_MACROMAN = { "MACROMAN", "MAC", "MACINTOSH", "CSMACINTOSH", 0 };
+
+// Japanese
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_SHIFT_JIS = { "Shift_JIS", "MS_KANJI", "SHIFT-JIS", "SJIS", "CSSHIFTJIS", 0 };
+ TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_EUC_JP = { "EUC-JP", "EUC_JP", "EUCJP", "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE", "CSEUCPKDFMTJAPANESE", 0 };
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_ISO_2022_JP = { "ISO-2022-JP", 0 };
+
+// Traditional Chinese
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_BIG5 = { "BIG5", "BIG-5", "BIG-FIVE", "BIG5", "BIGFIVE", "CN-BIG5", "CSBIG5", 0 };
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_BIG5_HKSCS = { "BIG5-HKSCS", "BIG5-HKSCS:2004", "BIG5HKSCS", 0 };
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_CP950 = { "CP950", 0 };
+
+// Korean
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_ISO_2022_KR = { "ISO-2022-KR", "CSISO2022KR", 0 };
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_CP949 = { "CP949", "UHC", 0 };
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_EUC_KR = { "EUC-KR", "CSEUCKR", 0 };
+
+// Arabic
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_ISO_8859_6 = { "ISO-8859-6", "ARABIC", "ASMO-708", "ECMA-114", "ISO-IR-127", "ISO8859-6", "ISO_8859-6", "ISO_8859-6:1987", "CSISOLATINARABIC", 0 };
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_CP1256 = { "windows-1256", "CP1256", "MS-ARAB", 0 }; // rearranged, windows-1256 now declared the canonical name and put to lowercase to fix /fast/encoding/ahram-org-eg.html test case
+
+// Hebrew
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_ISO_8859_8 = { "ISO-8859-8", "HEBREW", "ISO-8859-8", "ISO-IR-138", "ISO8859-8", "ISO_8859-8", "ISO_8859-8:1988", "CSISOLATINHEBREW", 0 };
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_CP1255 = { "windows-1255", "CP1255", "MS-HEBR", 0 }; // rearranged, moved windows-1255 as canonical and lowercased, fixing /fast/encoding/meta-charset.html
+
+// Greek
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_ISO_8859_7 = { "ISO-8859-7", "ECMA-118", "ELOT_928", "GREEK", "GREEK8", "ISO-IR-126", "ISO8859-7", "ISO_8859-7", "ISO_8859-7:1987", "ISO_8859-7:2003", "CSI", 0 };
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_CP869 = { "CP869", "869", "CP-GR", "IBM869", "CSIBM869", 0 };
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_WINDOWS_1253 = { "WINDOWS-1253", 0 };
+
+// Cyrillic
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_ISO_8859_5 = { "ISO-8859-5", "CYRILLIC", "ISO-IR-144", "ISO8859-5", "ISO_8859-5", "ISO_8859-5:1988", "CSISOLATINCYRILLIC", 0 };
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_KOI8_R = { "KOI8-R", "CSKOI8R", 0 };
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_CP866 = { "CP866", "866", "IBM866", "CSIBM866", 0 };
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_KOI8_U = { "KOI8-U", 0 };
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_WINDOWS_1251 = { "windows-1251", "CP1251", 0 }; // CP1251 added to pass /fast/encoding/charset-cp1251.html
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_MACCYRILLIC = { "mac-cyrillic", "MACCYRILLIC", "x-mac-cyrillic", 0 };
+
+// Thai
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_CP874 = { "CP874", "WINDOWS-874", 0 };
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_TIS_620 = { "TIS-620", 0 };
+
+// Simplified Chinese
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_GBK = { "GBK", 0 };
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_HZ = { "HZ", "HZ-GB-2312", 0 };
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_GB18030 = { "GB18030", 0 };
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_EUC_CN = { "EUC-CN", "EUCCN", "GB2312", "CN-GB", "CSGB2312", "EUC_CN", 0 };
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_2312_80 = { "GB_2312-80", "CHINESE", "csISO58GB231280", "GB2312.1980-0", "ISO-IR-58" };
+
+// Central European
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_ISO_8859_2 = { "ISO-8859-2", "ISO-IR-101", "ISO8859-2", "ISO_8859-2", "ISO_8859-2:1987", "L2", "LATIN2", "CSISOLATIN2", 0 };
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_CP1250 = { "CP1250", "MS-EE", "WINDOWS-1250", 0 };
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_MACCENTRALEUROPE = { "MAC-CENTRALEUROPE", 0 };
+
+// Vietnamese
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_CP1258 = { "CP1258", "WINDOWS-1258", 0 };
+
+// Turkish
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_CP1254 = { "CP1254", "MS-TURK", "WINDOWS-1254", 0 };
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_ISO_8859_9 = { "ISO-8859-9", "ISO-IR-148", "ISO8859-9", "ISO_8859-9", "ISO_8859-9:1989", "L5", "LATIN5", "CSISOLATIN5", 0 };
+
+// Baltic
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_CP1257 = { "CP1257", "WINBALTRIM", "WINDOWS-1257", 0 };
+TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_ISO_8859_4 = { "ISO-8859-4", "ISO-IR-110", "ISO8859-4", "ISO_8859-4", "ISO_8859-4:1988", "L4", "LATIN4", "CSISOLATIN4", 0 };
+
+gconstpointer const TextCodecGtk::m_iconvBaseCodecList[] = {
+ // Unicode
+ &m_codecAliases_UTF_8,
+
+ // Western
+ &m_codecAliases_ISO_8859_1
+};
+
+gconstpointer const TextCodecGtk::m_iconvExtendedCodecList[] =
+{
+ // Western
+ &m_codecAliases_MACROMAN,
+
+ // Japanese
+ &m_codecAliases_SHIFT_JIS,
+ &m_codecAliases_EUC_JP,
+ &m_codecAliases_ISO_2022_JP,
+
+ // Simplified Chinese
+ &m_codecAliases_BIG5,
+ &m_codecAliases_BIG5_HKSCS,
+ &m_codecAliases_CP950,
+
+ // Korean
+ &m_codecAliases_ISO_2022_KR,
+ &m_codecAliases_CP949,
+ &m_codecAliases_EUC_KR,
+
+ // Arabic
+ &m_codecAliases_ISO_8859_6,
+ &m_codecAliases_CP1256,
+
+ // Hebrew
+ &m_codecAliases_ISO_8859_8,
+ &m_codecAliases_CP1255,
+
+ // Greek
+ &m_codecAliases_ISO_8859_7,
+ &m_codecAliases_CP869,
+ &m_codecAliases_WINDOWS_1253,
+
+ // Cyrillic
+ &m_codecAliases_ISO_8859_5,
+ &m_codecAliases_KOI8_R,
+ &m_codecAliases_CP866,
+ &m_codecAliases_KOI8_U,
+ &m_codecAliases_WINDOWS_1251,
+ &m_codecAliases_MACCYRILLIC,
+
+ // Thai
+ &m_codecAliases_CP874,
+ &m_codecAliases_TIS_620,
+
+ // Traditional Chinese
+ &m_codecAliases_GBK,
+ &m_codecAliases_HZ,
+ &m_codecAliases_GB18030,
+ &m_codecAliases_EUC_CN,
+ &m_codecAliases_2312_80,
+
+ // Central European
+ &m_codecAliases_ISO_8859_2,
+ &m_codecAliases_CP1250,
+ &m_codecAliases_MACCENTRALEUROPE,
+
+ // Vietnamese
+ &m_codecAliases_CP1258,
+
+ // Turkish
+ &m_codecAliases_CP1254,
+ &m_codecAliases_ISO_8859_9,
+
+ // Baltic
+ &m_codecAliases_CP1257,
+ &m_codecAliases_ISO_8859_4
+};
+
+
+const size_t ConversionBufferSize = 16384;
+
+
+static PassOwnPtr<TextCodec> newTextCodecGtk(const TextEncoding& encoding, const void*)
+{
+ return new TextCodecGtk(encoding);
+}
+
+gboolean TextCodecGtk::isEncodingAvailable(const gchar* encName)
+{
+ GIConv tester;
+ // test decoding
+ tester = g_iconv_open(m_internalEncodingName, encName);
+ if (tester == reinterpret_cast<GIConv>(-1)) {
+ return false;
+ } else {
+ g_iconv_close(tester);
+ // test encoding
+ tester = g_iconv_open(encName, m_internalEncodingName);
+ if (tester == reinterpret_cast<GIConv>(-1)) {
+ return false;
+ } else {
+ g_iconv_close(tester);
+ return true;
+ }
+ }
+}
+
+void TextCodecGtk::registerEncodingNames(EncodingNameRegistrar registrar, bool extended)
+{
+ const void* const* encodingList;
+ unsigned int listLength = 0;
+ if (extended) {
+ encodingList = m_iconvExtendedCodecList;
+ listLength = sizeof(m_iconvExtendedCodecList)/sizeof(gpointer);
+ } else {
+ encodingList = m_iconvBaseCodecList;
+ listLength = sizeof(m_iconvBaseCodecList)/sizeof(gpointer);
+ }
+
+ for (unsigned int i = 0; i < listLength; ++i) {
+ codecAliasList *codecAliases = static_cast<codecAliasList*>(encodingList[i]);
+
+ // Our convention is, the first entry in codecAliases is the canonical name,
+ // see above in the list of declarations.
+ // Probe GLib for this one first. If it's not available, we skip the whole group of aliases.
+
+ int codecCount = 0;
+ const char *canonicalName;
+ canonicalName = (*codecAliases)[codecCount];
+
+ if(!isEncodingAvailable(canonicalName)) {
+ LOG(TextConversion, "Canonical encoding %s not available, skipping.", canonicalName);
+ continue;
+ }
+ registrar(canonicalName, canonicalName);
+
+ const char *currentAlias;
+ while ((currentAlias = (*codecAliases)[++codecCount])) {
+ if (isEncodingAvailable(currentAlias)) {
+ LOG(TextConversion, "Registering encoding name alias %s to canonical %s", currentAlias, canonicalName);
+ registrar(currentAlias, canonicalName);
+ }
+ }
+
+ }
+}
+
+void TextCodecGtk::registerCodecs(TextCodecRegistrar registrar, bool extended)
+{
+ const void* const* encodingList;
+ unsigned int listLength = 0;
+ if (extended) {
+ encodingList = m_iconvExtendedCodecList;
+ listLength = sizeof(m_iconvExtendedCodecList)/sizeof(gpointer);
+ } else {
+ encodingList = m_iconvBaseCodecList;
+ listLength = sizeof(m_iconvBaseCodecList)/sizeof(gpointer);
+ }
+
+ for (unsigned int i = 0; i < listLength; ++i) {
+ codecAliasList *codecAliases = static_cast<codecAliasList*>(encodingList[i]);
+ // by convention, the first "alias" should be the canonical name, see the definition of the alias lists
+ const gchar *codecName = (*codecAliases)[0];
+ if (isEncodingAvailable(codecName))
+ registrar(codecName, newTextCodecGtk, 0);
+ }
+}
+
+void TextCodecGtk::registerBaseEncodingNames(EncodingNameRegistrar registrar)
+{
+ registerEncodingNames(registrar, false);
+}
+
+void TextCodecGtk::registerBaseCodecs(TextCodecRegistrar registrar)
+{
+ registerCodecs(registrar, false);
+}
+
+void TextCodecGtk::registerExtendedEncodingNames(EncodingNameRegistrar registrar)
+{
+ registerEncodingNames(registrar, true);
+}
+
+void TextCodecGtk::registerExtendedCodecs(TextCodecRegistrar registrar)
+{
+ registerCodecs(registrar, true);
+}
+
+TextCodecGtk::TextCodecGtk(const TextEncoding& encoding)
+ : m_encoding(encoding)
+ , m_numBufferedBytes(0)
+ , m_iconvDecoder(reinterpret_cast<GIConv>(-1))
+ , m_iconvEncoder(reinterpret_cast<GIConv>(-1))
+{
+}
+
+TextCodecGtk::~TextCodecGtk()
+{
+ if (m_iconvDecoder != reinterpret_cast<GIConv>(-1)) {
+ g_iconv_close(m_iconvDecoder);
+ m_iconvDecoder = reinterpret_cast<GIConv>(-1);
+ }
+ if (m_iconvEncoder != reinterpret_cast<GIConv>(-1)) {
+ g_iconv_close(m_iconvEncoder);
+ m_iconvEncoder = reinterpret_cast<GIConv>(-1);
+ }
+}
+
+void TextCodecGtk::createIConvDecoder() const
+{
+ ASSERT(m_iconvDecoder == reinterpret_cast<GIConv>(-1));
+
+ m_iconvDecoder = g_iconv_open(m_internalEncodingName, m_encoding.name());
+}
+
+void TextCodecGtk::createIConvEncoder() const
+{
+ ASSERT(m_iconvDecoder == reinterpret_cast<GIConv>(-1));
+
+ m_iconvEncoder = g_iconv_open(m_encoding.name(), m_internalEncodingName);
+}
+
+String TextCodecGtk::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError)
+{
+ // Get a converter for the passed-in encoding.
+ if (m_iconvDecoder == reinterpret_cast<GIConv>(-1)) {
+ createIConvDecoder();
+ ASSERT(m_iconvDecoder != reinterpret_cast<GIConv>(-1));
+ if (m_iconvDecoder == reinterpret_cast<GIConv>(-1)) {
+ LOG_ERROR("Error creating IConv encoder even though encoding was in table.");
+ return String();
+ }
+ }
+
+ size_t countWritten, countRead, conversionLength;
+ const char* conversionBytes;
+ char* prefixedBytes = 0;
+
+ if (m_numBufferedBytes) {
+ conversionLength = length + m_numBufferedBytes;
+ prefixedBytes = static_cast<char*>(fastMalloc(conversionLength));
+ memcpy(prefixedBytes, m_bufferedBytes, m_numBufferedBytes);
+ memcpy(prefixedBytes + m_numBufferedBytes, bytes, length);
+
+ conversionBytes = prefixedBytes;
+
+ // all buffered bytes are consumed now
+ m_numBufferedBytes = 0;
+ } else {
+ // no previously buffered partial data,
+ // just convert the data that was passed in
+ conversionBytes = bytes;
+ conversionLength = length;
+ }
+
+ GOwnPtr<GError> err;
+ GOwnPtr<UChar> buffer;
+
+ buffer.outPtr() = reinterpret_cast<UChar*>(g_convert_with_iconv(conversionBytes, conversionLength, m_iconvDecoder, &countRead, &countWritten, &err.outPtr()));
+
+
+ if (err) {
+ LOG_ERROR("GIConv conversion error, Code %d: \"%s\"", err->code, err->message);
+ m_numBufferedBytes = 0; // reset state for subsequent calls to decode
+ fastFree(prefixedBytes);
+ sawError = true;
+ return String();
+ }
+
+ // Partial input at the end of the string may not result in an error being raised.
+ // From the gnome library documentation on g_convert_with_iconv:
+ // "Even if the conversion was successful, this may be less than len if there were partial characters at the end of the input."
+ // That's why we need to compare conversionLength against countRead
+
+ m_numBufferedBytes = conversionLength - countRead;
+ if (m_numBufferedBytes > 0) {
+ if (flush) {
+ LOG_ERROR("Partial bytes at end of input while flush requested.");
+ m_numBufferedBytes = 0; // reset state for subsequent calls to decode
+ fastFree(prefixedBytes);
+ sawError = true;
+ return String();
+ }
+ memcpy(m_bufferedBytes, conversionBytes + countRead, m_numBufferedBytes);
+ }
+
+ fastFree(prefixedBytes);
+
+ Vector<UChar> result;
+
+ result.append(buffer.get(), countWritten / sizeof(UChar));
+
+ return String::adopt(result);
+}
+
+CString TextCodecGtk::encode(const UChar* characters, size_t length, UnencodableHandling handling)
+{
+ if (!length)
+ return "";
+
+ if (m_iconvEncoder == reinterpret_cast<GIConv>(-1))
+ createIConvEncoder();
+ if (m_iconvEncoder == reinterpret_cast<GIConv>(-1))
+ return CString();
+
+ size_t count;
+
+ GOwnPtr<GError> err;
+ GOwnPtr<char> buffer;
+
+ buffer.outPtr() = g_convert_with_iconv(reinterpret_cast<const char*>(characters), length * sizeof(UChar), m_iconvEncoder, 0, &count, &err.outPtr());
+ if (err) {
+ LOG_ERROR("GIConv conversion error, Code %d: \"%s\"", err->code, err->message);
+ return CString();
+ }
+
+ return CString(buffer.get(), count);
+}
+
+} // namespace WebCore
diff --git a/WebCore/platform/text/gtk/TextCodecGtk.h b/WebCore/platform/text/gtk/TextCodecGtk.h
new file mode 100644
index 0000000..a8af752
--- /dev/null
+++ b/WebCore/platform/text/gtk/TextCodecGtk.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2004, 2006, 2007 Apple Inc. All rights reserved.
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ * Copyright (C) 2008 Jürg Billeter <j@bitron.ch>
+ * Copyright (C) 2009 Dominik Röttsches <dominik.roettsches@access-company.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TextCodecGTK_h
+#define TextCodecGTK_h
+
+#include <glib.h>
+#include "TextCodec.h"
+#include "TextEncoding.h"
+
+namespace WebCore {
+
+ class TextCodecGtk : public TextCodec {
+ public:
+ static void registerBaseEncodingNames(EncodingNameRegistrar);
+ static void registerBaseCodecs(TextCodecRegistrar);
+
+ static void registerExtendedEncodingNames(EncodingNameRegistrar);
+ static void registerExtendedCodecs(TextCodecRegistrar);
+
+ TextCodecGtk(const TextEncoding&);
+ virtual ~TextCodecGtk();
+
+ virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
+ virtual CString encode(const UChar*, size_t length, UnencodableHandling);
+
+ private:
+ void createIConvDecoder() const;
+ void createIConvEncoder() const;
+
+ static void registerEncodingNames(EncodingNameRegistrar registrar, bool extended);
+ static void registerCodecs(TextCodecRegistrar registrar, bool extended);
+ static gboolean isEncodingAvailable(const gchar*);
+
+ TextEncoding m_encoding;
+ size_t m_numBufferedBytes;
+ unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character
+ mutable GIConv m_iconvDecoder;
+ mutable GIConv m_iconvEncoder;
+
+ static const gchar* m_internalEncodingName;
+
+ typedef const gchar* const codecAliasList[];
+
+ // Unicode
+ static codecAliasList m_codecAliases_UTF_8;
+
+ // Western
+ static codecAliasList m_codecAliases_ISO_8859_1;
+ static codecAliasList m_codecAliases_MACROMAN;
+
+ // Japanese
+ static codecAliasList m_codecAliases_SHIFT_JIS;
+ static codecAliasList m_codecAliases_EUC_JP;
+ static codecAliasList m_codecAliases_ISO_2022_JP;
+
+ // Traditional Chinese
+ static codecAliasList m_codecAliases_BIG5;
+ static codecAliasList m_codecAliases_BIG5_HKSCS;
+ static codecAliasList m_codecAliases_CP950;
+
+ // Korean
+ static codecAliasList m_codecAliases_ISO_2022_KR;
+ static codecAliasList m_codecAliases_CP949;
+ static codecAliasList m_codecAliases_EUC_KR;
+
+ // Arabic
+ static codecAliasList m_codecAliases_ISO_8859_6;
+ static codecAliasList m_codecAliases_CP1256;
+
+ // Hebrew
+ static codecAliasList m_codecAliases_ISO_8859_8;
+ static codecAliasList m_codecAliases_CP1255;
+
+ // Greek
+ static codecAliasList m_codecAliases_ISO_8859_7;
+ static codecAliasList m_codecAliases_CP869;
+ static codecAliasList m_codecAliases_WINDOWS_1253;
+
+ // Cyrillic
+ static codecAliasList m_codecAliases_ISO_8859_5;
+ static codecAliasList m_codecAliases_KOI8_R;
+ static codecAliasList m_codecAliases_CP866;
+ static codecAliasList m_codecAliases_KOI8_U;
+ static codecAliasList m_codecAliases_WINDOWS_1251;
+ static codecAliasList m_codecAliases_MACCYRILLIC;
+
+ // Thai
+ static codecAliasList m_codecAliases_CP874;
+ static codecAliasList m_codecAliases_TIS_620;
+
+ // Simplified Chinese
+ static codecAliasList m_codecAliases_GBK;
+ static codecAliasList m_codecAliases_HZ;
+ static codecAliasList m_codecAliases_GB18030;
+ static codecAliasList m_codecAliases_EUC_CN;
+ static codecAliasList m_codecAliases_2312_80;
+
+ // Central European
+ static codecAliasList m_codecAliases_ISO_8859_2;
+ static codecAliasList m_codecAliases_CP1250;
+ static codecAliasList m_codecAliases_MACCENTRALEUROPE;
+
+ // Vietnamese
+ static codecAliasList m_codecAliases_CP1258;
+
+ // Turkish
+ static codecAliasList m_codecAliases_CP1254;
+ static codecAliasList m_codecAliases_ISO_8859_9;
+
+ // Baltic
+ static codecAliasList m_codecAliases_CP1257;
+ static codecAliasList m_codecAliases_ISO_8859_4;
+
+ static gconstpointer const m_iconvBaseCodecList[];
+ static gconstpointer const m_iconvExtendedCodecList[];
+
+ };
+
+} // namespace WebCore
+
+#endif // TextCodecGTK_h
diff --git a/WebCore/platform/text/qt/TextBoundaries.cpp b/WebCore/platform/text/qt/TextBoundariesQt.cpp
index ffc4c44..a354ca6 100644
--- a/WebCore/platform/text/qt/TextBoundaries.cpp
+++ b/WebCore/platform/text/qt/TextBoundariesQt.cpp
@@ -36,7 +36,6 @@
#include <QDebug>
#include <stdio.h>
-#if QT_VERSION >= 0x040400
#include <qtextboundaryfinder.h>
namespace WebCore {
@@ -76,48 +75,3 @@ void findWordBoundary(UChar const* buffer, int len, int position, int* start, in
}
-#else
-namespace WebCore {
-
-int findNextWordFromIndex(UChar const* buffer, int len, int position, bool forward)
-{
- QString str(reinterpret_cast<QChar const*>(buffer), len);
- notImplemented();
- return 0;
-}
-
-void findWordBoundary(UChar const* buffer, int len, int position, int* start, int* end)
-{
- QString str(reinterpret_cast<QChar const*>(buffer), len);
-
- if (position > str.length()) {
- *start = 0;
- *end = 0;
- return;
- }
-
- int currentPosition = position - 1;
- QString foundWord;
- while (currentPosition >= 0 &&
- str[currentPosition].isLetter()) {
- foundWord.prepend(str[currentPosition]);
- --currentPosition;
- }
-
- // currentPosition == 0 means the first char is not letter
- // currentPosition == -1 means we reached the beginning
- int startPos = (currentPosition < 0) ? 0 : ++currentPosition;
- currentPosition = position;
- if (str[currentPosition].isLetter()) {
- while (str[currentPosition].isLetter()) {
- foundWord.append(str[currentPosition]);
- ++currentPosition;
- }
- }
-
- *start = startPos;
- *end = currentPosition;
-}
-
-}
-#endif
diff --git a/WebCore/platform/text/qt/TextBreakIteratorQt.cpp b/WebCore/platform/text/qt/TextBreakIteratorQt.cpp
index d80e270..101947c 100644
--- a/WebCore/platform/text/qt/TextBreakIteratorQt.cpp
+++ b/WebCore/platform/text/qt/TextBreakIteratorQt.cpp
@@ -1,6 +1,4 @@
/*
- * This file is part of the DOM implementation for KDE.
- *
* Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
*
* This library is free software; you can redistribute it and/or
@@ -23,7 +21,6 @@
#include "config.h"
#include "TextBreakIterator.h"
-#if QT_VERSION >= 0x040400
#include <QtCore/qtextboundaryfinder.h>
#include <qdebug.h>
@@ -132,183 +129,3 @@ namespace WebCore {
}
}
-#else
-#include <qtextlayout.h>
-
-namespace WebCore {
-
- class TextBreakIterator {
- public:
- virtual int first() = 0;
- virtual int next() = 0;
- virtual int previous() = 0;
- inline int following(int pos)
- {
- currentPos = pos;
- return next();
- }
- inline int preceding(int pos)
- {
- currentPos = pos;
- return previous();
- }
- int currentPos;
- const UChar *string;
- int length;
- };
-
- class WordBreakIteratorQt : public TextBreakIterator {
- public:
- virtual int first();
- virtual int next();
- virtual int previous();
- };
-
- class CharBreakIteratorQt : public TextBreakIterator {
- public:
- virtual int first();
- virtual int next();
- virtual int previous();
- QTextLayout layout;
- };
-
- int WordBreakIteratorQt::first()
- {
- currentPos = 0;
- return currentPos;
- }
-
- int WordBreakIteratorQt::next()
- {
- if (currentPos >= length) {
- currentPos = -1;
- return currentPos;
- }
- bool haveSpace = false;
- while (currentPos < length) {
- if (haveSpace && !QChar(string[currentPos]).isSpace())
- break;
- if (QChar(string[currentPos]).isSpace())
- haveSpace = true;
- ++currentPos;
- }
- return currentPos;
- }
-
- int WordBreakIteratorQt::previous()
- {
- if (currentPos <= 0) {
- currentPos = -1;
- return currentPos;
- }
- bool haveSpace = false;
- while (currentPos > 0) {
- if (haveSpace && !QChar(string[currentPos]).isSpace())
- break;
- if (QChar(string[currentPos]).isSpace())
- haveSpace = true;
- --currentPos;
- }
- return currentPos;
- }
-
- int CharBreakIteratorQt::first()
- {
- currentPos = 0;
- return currentPos;
- }
-
- int CharBreakIteratorQt::next()
- {
- if (currentPos >= length)
- return -1;
- currentPos = layout.nextCursorPosition(currentPos);
- return currentPos;
- }
-
- int CharBreakIteratorQt::previous()
- {
- if (currentPos <= 0)
- return -1;
- currentPos = layout.previousCursorPosition(currentPos);
- return currentPos;
- }
-
-
-TextBreakIterator* wordBreakIterator(const UChar* string, int length)
-{
- static WordBreakIteratorQt *iterator = 0;
- if (!iterator)
- iterator = new WordBreakIteratorQt;
-
- iterator->string = string;
- iterator->length = length;
- iterator->currentPos = 0;
-
- return iterator;
-}
-
-TextBreakIterator* characterBreakIterator(const UChar* string, int length)
-{
- static CharBreakIteratorQt *iterator = 0;
- if (!iterator)
- iterator = new CharBreakIteratorQt;
-
- iterator->string = string;
- iterator->length = length;
- iterator->currentPos = 0;
- iterator->layout.setText(QString(reinterpret_cast<const QChar*>(string), length));
-
- return iterator;
-}
-
-TextBreakIterator* cursorMovementIterator(const UChar* string, int length)
-{
- return characterBreakIterator(string, length);
-}
-
-TextBreakIterator* lineBreakIterator(const UChar*, int)
-{
- // not yet implemented
- return 0;
-}
-
-TextBreakIterator* sentenceBreakIterator(const UChar*, int)
-{
- // not yet implemented
- return 0;
-}
-
-int textBreakFirst(TextBreakIterator* bi)
-{
- return bi->first();
-}
-
-int textBreakNext(TextBreakIterator* bi)
-{
- return bi->next();
-}
-
-int textBreakPreceding(TextBreakIterator* bi, int pos)
-{
- return bi->preceding(pos);
-}
-
-int textBreakFollowing(TextBreakIterator* bi, int pos)
-{
- return bi->following(pos);
-}
-
-int textBreakCurrent(TextBreakIterator* bi)
-{
- return bi->currentPos;
-}
-
-bool isTextBreak(TextBreakIterator*, int)
-{
- return true;
-}
-
-}
-
-#endif
diff --git a/WebCore/platform/text/wince/TextBoundariesWince.cpp b/WebCore/platform/text/wince/TextBoundariesWince.cpp
new file mode 100644
index 0000000..df6f757
--- /dev/null
+++ b/WebCore/platform/text/wince/TextBoundariesWince.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2006 Zack Rusin <zack@kde.org>
+ * Copyright (C) 2007-2009 Torch Mobile, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextBoundaries.h"
+
+#include "NotImplemented.h"
+#include "PlatformString.h"
+
+using namespace WTF::Unicode;
+
+namespace WebCore {
+
+int findNextWordFromIndex(const UChar * buffer, int len, int position, bool forward)
+{
+ notImplemented();
+ return 0;
+}
+
+void findWordBoundary(const UChar * buffer, int len, int position, int* start, int* end)
+{
+ if (position > len) {
+ *start = 0;
+ *end = 0;
+ return;
+ }
+
+ String str(buffer, len);
+
+ int currentPosition = position - 1;
+ String foundWord;
+ while (currentPosition >= 0 && isLetter(str[currentPosition])) {
+ UChar c = str[currentPosition];
+ foundWord.insert(&c, 1, 0);
+ --currentPosition;
+ }
+
+ // currentPosition == 0 means the first char is not letter
+ // currentPosition == -1 means we reached the beginning
+ int startPos = (currentPosition < 0) ? 0 : ++currentPosition;
+ currentPosition = position;
+ while (isLetter(str[currentPosition])) {
+ foundWord.append(str[currentPosition]);
+ ++currentPosition;
+ }
+
+ *start = startPos;
+ *end = currentPosition;
+}
+
+
+}
diff --git a/WebCore/platform/text/wince/TextBreakIteratorWince.cpp b/WebCore/platform/text/wince/TextBreakIteratorWince.cpp
new file mode 100644
index 0000000..26a5be2
--- /dev/null
+++ b/WebCore/platform/text/wince/TextBreakIteratorWince.cpp
@@ -0,0 +1,311 @@
+/*
+ * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
+ * Copyright (C) 2007-2009 Torch Mobile, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include "config.h"
+#include "TextBreakIterator.h"
+
+#include "PlatformString.h"
+#include <wtf/unicode/Unicode.h>
+
+using namespace WTF::Unicode;
+
+namespace WebCore {
+
+// Hack, not entirely correct
+static inline bool isCharStop(UChar c)
+{
+ CharCategory charCategory = category(c);
+ return charCategory != Mark_NonSpacing && (charCategory != Other_Surrogate || (c < 0xd800 || c >= 0xdc00));
+}
+
+static inline bool isLineStop(UChar c)
+{
+ return category(c) != Separator_Line;
+}
+
+static inline bool isSentenceStop(UChar c)
+{
+ return isPunct(c);
+}
+
+class TextBreakIterator {
+public:
+ void reset(const UChar* str, int len)
+ {
+ string = str;
+ length = len;
+ currentPos = 0;
+ }
+ virtual int first() = 0;
+ virtual int next() = 0;
+ virtual int previous() = 0;
+ int following(int position)
+ {
+ currentPos = position;
+ return next();
+ }
+ int preceding(int position)
+ {
+ currentPos = position;
+ return previous();
+ }
+
+ int currentPos;
+ const UChar* string;
+ int length;
+};
+
+struct WordBreakIterator: TextBreakIterator {
+ virtual int first();
+ virtual int next();
+ virtual int previous();
+};
+
+struct CharBreakIterator: TextBreakIterator {
+ virtual int first();
+ virtual int next();
+ virtual int previous();
+};
+
+struct LineBreakIterator: TextBreakIterator {
+ virtual int first();
+ virtual int next();
+ virtual int previous();
+};
+
+struct SentenceBreakIterator : TextBreakIterator {
+ virtual int first();
+ virtual int next();
+ virtual int previous();
+};
+
+int WordBreakIterator::first()
+{
+ currentPos = 0;
+ return currentPos;
+}
+
+int WordBreakIterator::next()
+{
+ if (currentPos == length) {
+ currentPos = -1;
+ return currentPos;
+ }
+ bool haveSpace = false;
+ while (currentPos < length) {
+ if (haveSpace && !isSpace(string[currentPos]))
+ break;
+ if (isSpace(string[currentPos]))
+ haveSpace = true;
+ ++currentPos;
+ }
+ return currentPos;
+}
+
+int WordBreakIterator::previous()
+{
+ if (!currentPos) {
+ currentPos = -1;
+ return currentPos;
+ }
+ bool haveSpace = false;
+ while (currentPos > 0) {
+ if (haveSpace && !isSpace(string[currentPos]))
+ break;
+ if (isSpace(string[currentPos]))
+ haveSpace = true;
+ --currentPos;
+ }
+ return currentPos;
+}
+
+int CharBreakIterator::first()
+{
+ currentPos = 0;
+ return currentPos;
+}
+
+int CharBreakIterator::next()
+{
+ if (currentPos >= length)
+ return -1;
+ ++currentPos;
+ while (currentPos < length && !isCharStop(string[currentPos]))
+ ++currentPos;
+ return currentPos;
+}
+
+int CharBreakIterator::previous()
+{
+ if (currentPos <= 0)
+ return -1;
+ if (currentPos > length)
+ currentPos = length;
+ --currentPos;
+ while (currentPos > 0 && !isCharStop(string[currentPos]))
+ --currentPos;
+ return currentPos;
+}
+
+int LineBreakIterator::first()
+{
+ currentPos = 0;
+ return currentPos;
+}
+
+int LineBreakIterator::next()
+{
+ if (currentPos == length) {
+ currentPos = -1;
+ return currentPos;
+ }
+ bool haveSpace = false;
+ while (currentPos < length) {
+ if (haveSpace && !isLineStop(string[currentPos]))
+ break;
+ if (isLineStop(string[currentPos]))
+ haveSpace = true;
+ ++currentPos;
+ }
+ return currentPos;
+}
+
+int LineBreakIterator::previous()
+{
+ if (!currentPos) {
+ currentPos = -1;
+ return currentPos;
+ }
+ bool haveSpace = false;
+ while (currentPos > 0) {
+ if (haveSpace && !isLineStop(string[currentPos]))
+ break;
+ if (isLineStop(string[currentPos]))
+ haveSpace = true;
+ --currentPos;
+ }
+ return currentPos;
+}
+
+int SentenceBreakIterator::first()
+{
+ currentPos = 0;
+ return currentPos;
+}
+
+int SentenceBreakIterator::next()
+{
+ if (currentPos == length) {
+ currentPos = -1;
+ return currentPos;
+ }
+ bool haveSpace = false;
+ while (currentPos < length) {
+ if (haveSpace && !isSentenceStop(string[currentPos]))
+ break;
+ if (isSentenceStop(string[currentPos]))
+ haveSpace = true;
+ ++currentPos;
+ }
+ return currentPos;
+}
+
+int SentenceBreakIterator::previous()
+{
+ if (!currentPos) {
+ currentPos = -1;
+ return currentPos;
+ }
+ bool haveSpace = false;
+ while (currentPos > 0) {
+ if (haveSpace && !isSentenceStop(string[currentPos]))
+ break;
+ if (isSentenceStop(string[currentPos]))
+ haveSpace = true;
+ --currentPos;
+ }
+ return currentPos;
+}
+
+TextBreakIterator* wordBreakIterator(const UChar* string, int length)
+{
+ DEFINE_STATIC_LOCAL(WordBreakIterator, iterator, ());
+ iterator.reset(string, length);
+ return &iterator;
+}
+
+TextBreakIterator* characterBreakIterator(const UChar* string, int length)
+{
+ DEFINE_STATIC_LOCAL(CharBreakIterator, iterator, ());
+ iterator.reset(string, length);
+ return &iterator;
+}
+
+TextBreakIterator* lineBreakIterator(const UChar* string, int length)
+{
+ DEFINE_STATIC_LOCAL(LineBreakIterator , iterator, ());
+ iterator.reset(string, length);
+ return &iterator;
+}
+
+TextBreakIterator* sentenceBreakIterator(const UChar* string, int length)
+{
+ DEFINE_STATIC_LOCAL(SentenceBreakIterator, iterator, ());
+ iterator.reset(string, length);
+ return &iterator;
+}
+
+int textBreakFirst(TextBreakIterator* breakIterator)
+{
+ return breakIterator->first();
+}
+
+int textBreakNext(TextBreakIterator* breakIterator)
+{
+ return breakIterator->next();
+}
+
+int textBreakPreceding(TextBreakIterator* breakIterator, int position)
+{
+ return breakIterator->preceding(position);
+}
+
+int textBreakFollowing(TextBreakIterator* breakIterator, int position)
+{
+ return breakIterator->following(position);
+}
+
+int textBreakCurrent(TextBreakIterator* breakIterator)
+{
+ return breakIterator->currentPos;
+}
+
+bool isTextBreak(TextBreakIterator*, int)
+{
+ return true;
+}
+
+TextBreakIterator* cursorMovementIterator(const UChar* string, int length)
+{
+ return characterBreakIterator(string, length);
+}
+
+}