2 files changed, 435 insertions, 0 deletions
diff --git a/Source/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp b/Source/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp
new file mode 100644
index 0000000..a01c3ee
--- /dev/null
+++ b/Source/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp
@@ -0,0 +1,192 @@
+/*
+ *  Copyright (C) 2008 Jürg Billeter <j@bitron.ch>
+ *  Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com>
+ *  Copyright (C) 2010 Igalia S.L.
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Library General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Library General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Library General Public License
+ *  along with this library; see the file COPYING.LIB.  If not, write to
+ *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ *  Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "UnicodeGLib.h"
+
+#include <wtf/Vector.h>
+#include <wtf/unicode/UTF8.h>
+
+#define UTF8_IS_SURROGATE(character) (character >= 0x10000 && character <= 0x10FFFF)
+
+namespace WTF {
+namespace Unicode {
+
+UChar32 foldCase(UChar32 ch)
+{
+    GOwnPtr<GError> gerror;
+
+    GOwnPtr<char> utf8char;
+    utf8char.set(g_ucs4_to_utf8(reinterpret_cast<gunichar*>(&ch), 1, 0, 0, &gerror.outPtr()));
+    if (gerror)
+        return ch;
+
+    GOwnPtr<char> utf8caseFolded;
+    utf8caseFolded.set(g_utf8_casefold(utf8char.get(), -1));
+
+    GOwnPtr<gunichar> ucs4Result;
+    ucs4Result.set(g_utf8_to_ucs4_fast(utf8caseFolded.get(), -1, 0));
+
+    return *ucs4Result;
+}
+
+static int getUTF16LengthFromUTF8(const gchar* utf8String, int length)
+{
+    int utf16Length = 0;
+    const gchar* inputString = utf8String;
+
+    while ((utf8String + length - inputString > 0) && *inputString) {
+        gunichar character = g_utf8_get_char(inputString);
+
+        utf16Length += UTF8_IS_SURROGATE(character) ? 2 : 1;
+        inputString = g_utf8_next_char(inputString);
+    }
+
+    return utf16Length;
+}
+
+typedef gchar* (*UTF8CaseFunction)(const gchar*, gssize length);
+
+static int convertCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error, UTF8CaseFunction caseFunction)
+{
+    *error = false;
+
+    // Allocate a buffer big enough to hold all the characters.
+    Vector<char> buffer(srcLength * 3);
+    char* utf8Target = buffer.data();
+    const UChar* utf16Source = src;
+    ConversionResult conversionResult = convertUTF16ToUTF8(&utf16Source, utf16Source + srcLength, &utf8Target, utf8Target + buffer.size(), true);
+    if (conversionResult != conversionOK) {
+        *error = true;
+        return -1;
+    }
+    buffer.shrink(utf8Target - buffer.data());
+
+    GOwnPtr<char> utf8Result(caseFunction(buffer.data(), buffer.size()));
+    long utf8ResultLength = strlen(utf8Result.get());
+
+    // Calculate the destination buffer size.
+    int realLength = getUTF16LengthFromUTF8(utf8Result.get(), utf8ResultLength);
+    if (realLength > resultLength) {
+        *error = true;
+        return realLength;
+    }
+
+    // Convert the result to UTF-16.
+    UChar* utf16Target = result;
+    const char* utf8Source = utf8Result.get();
+    conversionResult = convertUTF8ToUTF16(&utf8Source, utf8Source + utf8ResultLength, &utf16Target, utf16Target + resultLength, true);
+    long utf16ResultLength = utf16Target - result;
+    if (conversionResult != conversionOK)
+        *error = true;
+
+    return utf16ResultLength <= 0 ? -1 : utf16ResultLength;
+}
+int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
+{
+    return convertCase(result, resultLength, src, srcLength, error, g_utf8_casefold);
+}
+
+int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
+{
+    return convertCase(result, resultLength, src, srcLength, error, g_utf8_strdown);
+}
+
+int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
+{
+    return convertCase(result, resultLength, src, srcLength, error, g_utf8_strup);
+}
+
+Direction direction(UChar32 c)
+{
+    PangoBidiType type = pango_bidi_type_for_unichar(c);
+    switch (type) {
+    case PANGO_BIDI_TYPE_L:
+        return LeftToRight;
+    case PANGO_BIDI_TYPE_R:
+        return RightToLeft;
+    case PANGO_BIDI_TYPE_AL:
+        return RightToLeftArabic;
+    case PANGO_BIDI_TYPE_LRE:
+        return LeftToRightEmbedding;
+    case PANGO_BIDI_TYPE_RLE:
+        return RightToLeftEmbedding;
+    case PANGO_BIDI_TYPE_LRO:
+        return LeftToRightOverride;
+    case PANGO_BIDI_TYPE_RLO:
+        return RightToLeftOverride;
+    case PANGO_BIDI_TYPE_PDF:
+        return PopDirectionalFormat;
+    case PANGO_BIDI_TYPE_EN:
+        return EuropeanNumber;
+    case PANGO_BIDI_TYPE_AN:
+        return ArabicNumber;
+    case PANGO_BIDI_TYPE_ES:
+        return EuropeanNumberSeparator;
+    case PANGO_BIDI_TYPE_ET:
+        return EuropeanNumberTerminator;
+    case PANGO_BIDI_TYPE_CS:
+        return CommonNumberSeparator;
+    case PANGO_BIDI_TYPE_NSM:
+        return NonSpacingMark;
+    case PANGO_BIDI_TYPE_BN:
+        return BoundaryNeutral;
+    case PANGO_BIDI_TYPE_B:
+        return BlockSeparator;
+    case PANGO_BIDI_TYPE_S:
+        return SegmentSeparator;
+    case PANGO_BIDI_TYPE_WS:
+        return WhiteSpaceNeutral;
+    default:
+        return OtherNeutral;
+    }
+}
+
+int umemcasecmp(const UChar* a, const UChar* b, int len)
+{
+    GOwnPtr<char> utf8a;
+    GOwnPtr<char> utf8b;
+
+    utf8a.set(g_utf16_to_utf8(a, len, 0, 0, 0));
+    utf8b.set(g_utf16_to_utf8(b, len, 0, 0, 0));
+
+    GOwnPtr<char> foldedA;
+    GOwnPtr<char> foldedB;
+
+    foldedA.set(g_utf8_casefold(utf8a.get(), -1));
+    foldedB.set(g_utf8_casefold(utf8b.get(), -1));
+
+    // FIXME: umemcasecmp needs to mimic u_memcasecmp of icu
+    // from the ICU docs:
+    // "Compare two strings case-insensitively using full case folding.
+    // his is equivalent to u_strcmp(u_strFoldCase(s1, n, options), u_strFoldCase(s2, n, options))."
+    //
+    // So it looks like we don't need the full g_utf8_collate here,
+    // but really a bitwise comparison of casefolded unicode chars (not utf-8 bytes).
+    // As there is no direct equivalent to this icu function in GLib, for now
+    // we'll use g_utf8_collate():
+
+    return g_utf8_collate(foldedA.get(), foldedB.get());
+}
+
+}
+}
diff --git a/Source/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.h b/Source/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.h
new file mode 100644
index 0000000..46b00ea
--- /dev/null
+++ b/Source/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.h
@@ -0,0 +1,243 @@
+/*
+ *  Copyright (C) 2006 George Staikos <staikos@kde.org>
+ *  Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ *  Copyright (C) 2007 Apple Computer, Inc. All rights reserved.
+ *  Copyright (C) 2008 Jürg Billeter <j@bitron.ch>
+ *  Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com>
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Library General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Library General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Library General Public License
+ *  along with this library; see the file COPYING.LIB.  If not, write to
+ *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ *  Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef UnicodeGLib_h
+#define UnicodeGLib_h
+
+#include "UnicodeMacrosFromICU.h"
+#include "GOwnPtr.h"
+
+#include <glib.h>
+#include <pango/pango.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+typedef uint16_t UChar;
+typedef int32_t UChar32;
+
+namespace WTF {
+namespace Unicode {
+
+enum Direction {
+    LeftToRight,
+    RightToLeft,
+    EuropeanNumber,
+    EuropeanNumberSeparator,
+    EuropeanNumberTerminator,
+    ArabicNumber,
+    CommonNumberSeparator,
+    BlockSeparator,
+    SegmentSeparator,
+    WhiteSpaceNeutral,
+    OtherNeutral,
+    LeftToRightEmbedding,
+    LeftToRightOverride,
+    RightToLeftArabic,
+    RightToLeftEmbedding,
+    RightToLeftOverride,
+    PopDirectionalFormat,
+    NonSpacingMark,
+    BoundaryNeutral
+};
+
+enum DecompositionType {
+    DecompositionNone,
+    DecompositionCanonical,
+    DecompositionCompat,
+    DecompositionCircle,
+    DecompositionFinal,
+    DecompositionFont,
+    DecompositionFraction,
+    DecompositionInitial,
+    DecompositionIsolated,
+    DecompositionMedial,
+    DecompositionNarrow,
+    DecompositionNoBreak,
+    DecompositionSmall,
+    DecompositionSquare,
+    DecompositionSub,
+    DecompositionSuper,
+    DecompositionVertical,
+    DecompositionWide,
+};
+
+enum CharCategory {
+    NoCategory =  0,
+    Other_NotAssigned = U_MASK(G_UNICODE_UNASSIGNED),
+    Letter_Uppercase = U_MASK(G_UNICODE_UPPERCASE_LETTER),
+    Letter_Lowercase = U_MASK(G_UNICODE_LOWERCASE_LETTER),
+    Letter_Titlecase = U_MASK(G_UNICODE_TITLECASE_LETTER),
+    Letter_Modifier = U_MASK(G_UNICODE_MODIFIER_LETTER),
+    Letter_Other = U_MASK(G_UNICODE_OTHER_LETTER),
+
+    Mark_NonSpacing = U_MASK(G_UNICODE_NON_SPACING_MARK),
+    Mark_Enclosing = U_MASK(G_UNICODE_ENCLOSING_MARK),
+    Mark_SpacingCombining = U_MASK(G_UNICODE_COMBINING_MARK),
+
+    Number_DecimalDigit = U_MASK(G_UNICODE_DECIMAL_NUMBER),
+    Number_Letter = U_MASK(G_UNICODE_LETTER_NUMBER),
+    Number_Other = U_MASK(G_UNICODE_OTHER_NUMBER),
+
+    Separator_Space = U_MASK(G_UNICODE_SPACE_SEPARATOR),
+    Separator_Line = U_MASK(G_UNICODE_LINE_SEPARATOR),
+    Separator_Paragraph = U_MASK(G_UNICODE_PARAGRAPH_SEPARATOR),
+
+    Other_Control = U_MASK(G_UNICODE_CONTROL),
+    Other_Format = U_MASK(G_UNICODE_FORMAT),
+    Other_PrivateUse = U_MASK(G_UNICODE_PRIVATE_USE),
+    Other_Surrogate = U_MASK(G_UNICODE_SURROGATE),
+
+    Punctuation_Dash = U_MASK(G_UNICODE_DASH_PUNCTUATION),
+    Punctuation_Open = U_MASK(G_UNICODE_OPEN_PUNCTUATION),
+    Punctuation_Close = U_MASK(G_UNICODE_CLOSE_PUNCTUATION),
+    Punctuation_Connector = U_MASK(G_UNICODE_CONNECT_PUNCTUATION),
+    Punctuation_Other = U_MASK(G_UNICODE_OTHER_PUNCTUATION),
+
+    Symbol_Math = U_MASK(G_UNICODE_MATH_SYMBOL),
+    Symbol_Currency = U_MASK(G_UNICODE_CURRENCY_SYMBOL),
+    Symbol_Modifier = U_MASK(G_UNICODE_MODIFIER_SYMBOL),
+    Symbol_Other = U_MASK(G_UNICODE_OTHER_SYMBOL),
+
+    Punctuation_InitialQuote = U_MASK(G_UNICODE_INITIAL_PUNCTUATION),
+    Punctuation_FinalQuote = U_MASK(G_UNICODE_FINAL_PUNCTUATION)
+};
+
+UChar32 foldCase(UChar32);
+
+int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error);
+
+int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error);
+
+inline UChar32 toLower(UChar32 c)
+{
+    return g_unichar_tolower(c);
+}
+
+inline UChar32 toUpper(UChar32 c)
+{
+    return g_unichar_toupper(c);
+}
+
+int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error);
+
+inline UChar32 toTitleCase(UChar32 c)
+{
+    return g_unichar_totitle(c);
+}
+
+inline bool isArabicChar(UChar32 c)
+{
+    return c >= 0x0600 && c <= 0x06FF;
+}
+
+inline bool isAlphanumeric(UChar32 c)
+{
+    return g_unichar_isalnum(c);
+}
+
+inline bool isFormatChar(UChar32 c)
+{
+    return g_unichar_type(c) == G_UNICODE_FORMAT;
+}
+
+inline bool isSeparatorSpace(UChar32 c)
+{
+    return g_unichar_type(c) == G_UNICODE_SPACE_SEPARATOR;
+}
+
+inline bool isPrintableChar(UChar32 c)
+{
+    return g_unichar_isprint(c);
+}
+
+inline bool isDigit(UChar32 c)
+{
+    return g_unichar_isdigit(c);
+}
+
+inline bool isPunct(UChar32 c)
+{
+    return g_unichar_ispunct(c);
+}
+
+inline bool hasLineBreakingPropertyComplexContext(UChar32 c)
+{
+    // FIXME
+    return false;
+}
+
+inline bool hasLineBreakingPropertyComplexContextOrIdeographic(UChar32 c)
+{
+    // FIXME
+    return false;
+}
+
+inline UChar32 mirroredChar(UChar32 c)
+{
+    gunichar mirror = 0;
+    g_unichar_get_mirror_char(c, &mirror);
+    return mirror;
+}
+
+inline CharCategory category(UChar32 c)
+{
+    if (c > 0xffff)
+        return NoCategory;
+
+    return (CharCategory) U_MASK(g_unichar_type(c));
+}
+
+Direction direction(UChar32);
+
+inline bool isLower(UChar32 c)
+{
+    return g_unichar_islower(c);
+}
+
+inline int digitValue(UChar32 c)
+{
+    return g_unichar_digit_value(c);
+}
+
+inline uint8_t combiningClass(UChar32 c)
+{
+    // FIXME
+    // return g_unichar_combining_class(c);
+    return 0;
+}
+
+inline DecompositionType decompositionType(UChar32 c)
+{
+    // FIXME
+    return DecompositionNone;
+}
+
+int umemcasecmp(const UChar*, const UChar*, int len);
+
+}
+}
+
+#endif
+