diff options
Diffstat (limited to 'Source/JavaScriptCore/wtf/unicode/glib')
-rw-r--r-- | Source/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp | 192 | ||||
-rw-r--r-- | Source/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.h | 243 |
2 files changed, 435 insertions, 0 deletions
diff --git a/Source/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp b/Source/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp new file mode 100644 index 0000000..a01c3ee --- /dev/null +++ b/Source/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp @@ -0,0 +1,192 @@ +/* + * Copyright (C) 2008 Jürg Billeter <j@bitron.ch> + * Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com> + * Copyright (C) 2010 Igalia S.L. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#include "config.h" +#include "UnicodeGLib.h" + +#include <wtf/Vector.h> +#include <wtf/unicode/UTF8.h> + +#define UTF8_IS_SURROGATE(character) (character >= 0x10000 && character <= 0x10FFFF) + +namespace WTF { +namespace Unicode { + +UChar32 foldCase(UChar32 ch) +{ + GOwnPtr<GError> gerror; + + GOwnPtr<char> utf8char; + utf8char.set(g_ucs4_to_utf8(reinterpret_cast<gunichar*>(&ch), 1, 0, 0, &gerror.outPtr())); + if (gerror) + return ch; + + GOwnPtr<char> utf8caseFolded; + utf8caseFolded.set(g_utf8_casefold(utf8char.get(), -1)); + + GOwnPtr<gunichar> ucs4Result; + ucs4Result.set(g_utf8_to_ucs4_fast(utf8caseFolded.get(), -1, 0)); + + return *ucs4Result; +} + +static int getUTF16LengthFromUTF8(const gchar* utf8String, int length) +{ + int utf16Length = 0; + const gchar* inputString = utf8String; + + while ((utf8String + length - inputString > 0) && *inputString) { + gunichar character = g_utf8_get_char(inputString); + + utf16Length += UTF8_IS_SURROGATE(character) ? 2 : 1; + inputString = g_utf8_next_char(inputString); + } + + return utf16Length; +} + +typedef gchar* (*UTF8CaseFunction)(const gchar*, gssize length); + +static int convertCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error, UTF8CaseFunction caseFunction) +{ + *error = false; + + // Allocate a buffer big enough to hold all the characters. + Vector<char> buffer(srcLength * 3); + char* utf8Target = buffer.data(); + const UChar* utf16Source = src; + ConversionResult conversionResult = convertUTF16ToUTF8(&utf16Source, utf16Source + srcLength, &utf8Target, utf8Target + buffer.size(), true); + if (conversionResult != conversionOK) { + *error = true; + return -1; + } + buffer.shrink(utf8Target - buffer.data()); + + GOwnPtr<char> utf8Result(caseFunction(buffer.data(), buffer.size())); + long utf8ResultLength = strlen(utf8Result.get()); + + // Calculate the destination buffer size. + int realLength = getUTF16LengthFromUTF8(utf8Result.get(), utf8ResultLength); + if (realLength > resultLength) { + *error = true; + return realLength; + } + + // Convert the result to UTF-16. + UChar* utf16Target = result; + const char* utf8Source = utf8Result.get(); + conversionResult = convertUTF8ToUTF16(&utf8Source, utf8Source + utf8ResultLength, &utf16Target, utf16Target + resultLength, true); + long utf16ResultLength = utf16Target - result; + if (conversionResult != conversionOK) + *error = true; + + return utf16ResultLength <= 0 ? -1 : utf16ResultLength; +} +int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) +{ + return convertCase(result, resultLength, src, srcLength, error, g_utf8_casefold); +} + +int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) +{ + return convertCase(result, resultLength, src, srcLength, error, g_utf8_strdown); +} + +int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) +{ + return convertCase(result, resultLength, src, srcLength, error, g_utf8_strup); +} + +Direction direction(UChar32 c) +{ + PangoBidiType type = pango_bidi_type_for_unichar(c); + switch (type) { + case PANGO_BIDI_TYPE_L: + return LeftToRight; + case PANGO_BIDI_TYPE_R: + return RightToLeft; + case PANGO_BIDI_TYPE_AL: + return RightToLeftArabic; + case PANGO_BIDI_TYPE_LRE: + return LeftToRightEmbedding; + case PANGO_BIDI_TYPE_RLE: + return RightToLeftEmbedding; + case PANGO_BIDI_TYPE_LRO: + return LeftToRightOverride; + case PANGO_BIDI_TYPE_RLO: + return RightToLeftOverride; + case PANGO_BIDI_TYPE_PDF: + return PopDirectionalFormat; + case PANGO_BIDI_TYPE_EN: + return EuropeanNumber; + case PANGO_BIDI_TYPE_AN: + return ArabicNumber; + case PANGO_BIDI_TYPE_ES: + return EuropeanNumberSeparator; + case PANGO_BIDI_TYPE_ET: + return EuropeanNumberTerminator; + case PANGO_BIDI_TYPE_CS: + return CommonNumberSeparator; + case PANGO_BIDI_TYPE_NSM: + return NonSpacingMark; + case PANGO_BIDI_TYPE_BN: + return BoundaryNeutral; + case PANGO_BIDI_TYPE_B: + return BlockSeparator; + case PANGO_BIDI_TYPE_S: + return SegmentSeparator; + case PANGO_BIDI_TYPE_WS: + return WhiteSpaceNeutral; + default: + return OtherNeutral; + } +} + +int umemcasecmp(const UChar* a, const UChar* b, int len) +{ + GOwnPtr<char> utf8a; + GOwnPtr<char> utf8b; + + utf8a.set(g_utf16_to_utf8(a, len, 0, 0, 0)); + utf8b.set(g_utf16_to_utf8(b, len, 0, 0, 0)); + + GOwnPtr<char> foldedA; + GOwnPtr<char> foldedB; + + foldedA.set(g_utf8_casefold(utf8a.get(), -1)); + foldedB.set(g_utf8_casefold(utf8b.get(), -1)); + + // FIXME: umemcasecmp needs to mimic u_memcasecmp of icu + // from the ICU docs: + // "Compare two strings case-insensitively using full case folding. + // his is equivalent to u_strcmp(u_strFoldCase(s1, n, options), u_strFoldCase(s2, n, options))." + // + // So it looks like we don't need the full g_utf8_collate here, + // but really a bitwise comparison of casefolded unicode chars (not utf-8 bytes). + // As there is no direct equivalent to this icu function in GLib, for now + // we'll use g_utf8_collate(): + + return g_utf8_collate(foldedA.get(), foldedB.get()); +} + +} +} diff --git a/Source/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.h b/Source/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.h new file mode 100644 index 0000000..46b00ea --- /dev/null +++ b/Source/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.h @@ -0,0 +1,243 @@ +/* + * Copyright (C) 2006 George Staikos <staikos@kde.org> + * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> + * Copyright (C) 2007 Apple Computer, Inc. All rights reserved. + * Copyright (C) 2008 Jürg Billeter <j@bitron.ch> + * Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef UnicodeGLib_h +#define UnicodeGLib_h + +#include "UnicodeMacrosFromICU.h" +#include "GOwnPtr.h" + +#include <glib.h> +#include <pango/pango.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +typedef uint16_t UChar; +typedef int32_t UChar32; + +namespace WTF { +namespace Unicode { + +enum Direction { + LeftToRight, + RightToLeft, + EuropeanNumber, + EuropeanNumberSeparator, + EuropeanNumberTerminator, + ArabicNumber, + CommonNumberSeparator, + BlockSeparator, + SegmentSeparator, + WhiteSpaceNeutral, + OtherNeutral, + LeftToRightEmbedding, + LeftToRightOverride, + RightToLeftArabic, + RightToLeftEmbedding, + RightToLeftOverride, + PopDirectionalFormat, + NonSpacingMark, + BoundaryNeutral +}; + +enum DecompositionType { + DecompositionNone, + DecompositionCanonical, + DecompositionCompat, + DecompositionCircle, + DecompositionFinal, + DecompositionFont, + DecompositionFraction, + DecompositionInitial, + DecompositionIsolated, + DecompositionMedial, + DecompositionNarrow, + DecompositionNoBreak, + DecompositionSmall, + DecompositionSquare, + DecompositionSub, + DecompositionSuper, + DecompositionVertical, + DecompositionWide, +}; + +enum CharCategory { + NoCategory = 0, + Other_NotAssigned = U_MASK(G_UNICODE_UNASSIGNED), + Letter_Uppercase = U_MASK(G_UNICODE_UPPERCASE_LETTER), + Letter_Lowercase = U_MASK(G_UNICODE_LOWERCASE_LETTER), + Letter_Titlecase = U_MASK(G_UNICODE_TITLECASE_LETTER), + Letter_Modifier = U_MASK(G_UNICODE_MODIFIER_LETTER), + Letter_Other = U_MASK(G_UNICODE_OTHER_LETTER), + + Mark_NonSpacing = U_MASK(G_UNICODE_NON_SPACING_MARK), + Mark_Enclosing = U_MASK(G_UNICODE_ENCLOSING_MARK), + Mark_SpacingCombining = U_MASK(G_UNICODE_COMBINING_MARK), + + Number_DecimalDigit = U_MASK(G_UNICODE_DECIMAL_NUMBER), + Number_Letter = U_MASK(G_UNICODE_LETTER_NUMBER), + Number_Other = U_MASK(G_UNICODE_OTHER_NUMBER), + + Separator_Space = U_MASK(G_UNICODE_SPACE_SEPARATOR), + Separator_Line = U_MASK(G_UNICODE_LINE_SEPARATOR), + Separator_Paragraph = U_MASK(G_UNICODE_PARAGRAPH_SEPARATOR), + + Other_Control = U_MASK(G_UNICODE_CONTROL), + Other_Format = U_MASK(G_UNICODE_FORMAT), + Other_PrivateUse = U_MASK(G_UNICODE_PRIVATE_USE), + Other_Surrogate = U_MASK(G_UNICODE_SURROGATE), + + Punctuation_Dash = U_MASK(G_UNICODE_DASH_PUNCTUATION), + Punctuation_Open = U_MASK(G_UNICODE_OPEN_PUNCTUATION), + Punctuation_Close = U_MASK(G_UNICODE_CLOSE_PUNCTUATION), + Punctuation_Connector = U_MASK(G_UNICODE_CONNECT_PUNCTUATION), + Punctuation_Other = U_MASK(G_UNICODE_OTHER_PUNCTUATION), + + Symbol_Math = U_MASK(G_UNICODE_MATH_SYMBOL), + Symbol_Currency = U_MASK(G_UNICODE_CURRENCY_SYMBOL), + Symbol_Modifier = U_MASK(G_UNICODE_MODIFIER_SYMBOL), + Symbol_Other = U_MASK(G_UNICODE_OTHER_SYMBOL), + + Punctuation_InitialQuote = U_MASK(G_UNICODE_INITIAL_PUNCTUATION), + Punctuation_FinalQuote = U_MASK(G_UNICODE_FINAL_PUNCTUATION) +}; + +UChar32 foldCase(UChar32); + +int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error); + +int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error); + +inline UChar32 toLower(UChar32 c) +{ + return g_unichar_tolower(c); +} + +inline UChar32 toUpper(UChar32 c) +{ + return g_unichar_toupper(c); +} + +int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error); + +inline UChar32 toTitleCase(UChar32 c) +{ + return g_unichar_totitle(c); +} + +inline bool isArabicChar(UChar32 c) +{ + return c >= 0x0600 && c <= 0x06FF; +} + +inline bool isAlphanumeric(UChar32 c) +{ + return g_unichar_isalnum(c); +} + +inline bool isFormatChar(UChar32 c) +{ + return g_unichar_type(c) == G_UNICODE_FORMAT; +} + +inline bool isSeparatorSpace(UChar32 c) +{ + return g_unichar_type(c) == G_UNICODE_SPACE_SEPARATOR; +} + +inline bool isPrintableChar(UChar32 c) +{ + return g_unichar_isprint(c); +} + +inline bool isDigit(UChar32 c) +{ + return g_unichar_isdigit(c); +} + +inline bool isPunct(UChar32 c) +{ + return g_unichar_ispunct(c); +} + +inline bool hasLineBreakingPropertyComplexContext(UChar32 c) +{ + // FIXME + return false; +} + +inline bool hasLineBreakingPropertyComplexContextOrIdeographic(UChar32 c) +{ + // FIXME + return false; +} + +inline UChar32 mirroredChar(UChar32 c) +{ + gunichar mirror = 0; + g_unichar_get_mirror_char(c, &mirror); + return mirror; +} + +inline CharCategory category(UChar32 c) +{ + if (c > 0xffff) + return NoCategory; + + return (CharCategory) U_MASK(g_unichar_type(c)); +} + +Direction direction(UChar32); + +inline bool isLower(UChar32 c) +{ + return g_unichar_islower(c); +} + +inline int digitValue(UChar32 c) +{ + return g_unichar_digit_value(c); +} + +inline uint8_t combiningClass(UChar32 c) +{ + // FIXME + // return g_unichar_combining_class(c); + return 0; +} + +inline DecompositionType decompositionType(UChar32 c) +{ + // FIXME + return DecompositionNone; +} + +int umemcasecmp(const UChar*, const UChar*, int len); + +} +} + +#endif + |