diff options
author | Feng Qian <fqian@google.com> | 2009-06-17 12:12:20 -0700 |
---|---|---|
committer | Feng Qian <fqian@google.com> | 2009-06-17 12:12:20 -0700 |
commit | 5f1ab04193ad0130ca8204aadaceae083aca9881 (patch) | |
tree | 5a92cd389e2cfe7fb67197ce14b38469462379f8 /JavaScriptCore/wtf/unicode | |
parent | 194315e5a908cc8ed67d597010544803eef1ac59 (diff) | |
download | external_webkit-5f1ab04193ad0130ca8204aadaceae083aca9881.zip external_webkit-5f1ab04193ad0130ca8204aadaceae083aca9881.tar.gz external_webkit-5f1ab04193ad0130ca8204aadaceae083aca9881.tar.bz2 |
Get WebKit r44544.
Diffstat (limited to 'JavaScriptCore/wtf/unicode')
-rw-r--r-- | JavaScriptCore/wtf/unicode/Unicode.h | 2 | ||||
-rw-r--r-- | JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp | 214 | ||||
-rw-r--r-- | JavaScriptCore/wtf/unicode/glib/UnicodeGLib.h | 238 | ||||
-rw-r--r-- | JavaScriptCore/wtf/unicode/glib/UnicodeMacrosFromICU.h | 69 | ||||
-rw-r--r-- | JavaScriptCore/wtf/unicode/icu/UnicodeIcu.h | 6 | ||||
-rw-r--r-- | JavaScriptCore/wtf/unicode/qt4/UnicodeQt4.h | 11 |
6 files changed, 540 insertions, 0 deletions
diff --git a/JavaScriptCore/wtf/unicode/Unicode.h b/JavaScriptCore/wtf/unicode/Unicode.h index e6e8f23..f86a9b7 100644 --- a/JavaScriptCore/wtf/unicode/Unicode.h +++ b/JavaScriptCore/wtf/unicode/Unicode.h @@ -28,6 +28,8 @@ #include "qt4/UnicodeQt4.h" #elif USE(ICU_UNICODE) #include <wtf/unicode/icu/UnicodeIcu.h> +#elif USE(GLIB_UNICODE) +#include <wtf/unicode/glib/UnicodeGLib.h> #else #error "Unknown Unicode implementation" #endif diff --git a/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp b/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp new file mode 100644 index 0000000..a779b36 --- /dev/null +++ b/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp @@ -0,0 +1,214 @@ +/* + * Copyright (C) 2008 Jürg Billeter <j@bitron.ch> + * Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#include "UnicodeGLib.h" + +namespace WTF { +namespace Unicode { + +UChar32 foldCase(UChar32 ch) +{ + GOwnPtr<GError> gerror; + + GOwnPtr<char> utf8char; + utf8char.set(g_ucs4_to_utf8(reinterpret_cast<gunichar*>(&ch), 1, 0, 0, &gerror.outPtr())); + if (gerror) + return ch; + + GOwnPtr<char> utf8caseFolded; + utf8caseFolded.set(g_utf8_casefold(utf8char.get(), -1)); + + GOwnPtr<gunichar> ucs4Result; + ucs4Result.set(g_utf8_to_ucs4_fast(utf8caseFolded.get(), -1, 0)); + + return *ucs4Result; +} + +int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) +{ + *error = false; + GOwnPtr<GError> gerror; + + GOwnPtr<char> utf8src; + utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr())); + if (gerror) { + *error = true; + return -1; + } + + GOwnPtr<char> utf8result; + utf8result.set(g_utf8_casefold(utf8src.get(), -1)); + + long utf16resultLength = -1; + GOwnPtr<UChar> utf16result; + utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr())); + if (gerror) { + *error = true; + return -1; + } + + if (utf16resultLength > resultLength) { + *error = true; + return utf16resultLength; + } + memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar)); + + return utf16resultLength; +} + +int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) +{ + *error = false; + GOwnPtr<GError> gerror; + + GOwnPtr<char> utf8src; + utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr())); + if (gerror) { + *error = true; + return -1; + } + + GOwnPtr<char> utf8result; + utf8result.set(g_utf8_strdown(utf8src.get(), -1)); + + long utf16resultLength = -1; + GOwnPtr<UChar> utf16result; + utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr())); + if (gerror) { + *error = true; + return -1; + } + + if (utf16resultLength > resultLength) { + *error = true; + return utf16resultLength; + } + memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar)); + + return utf16resultLength; +} + +int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) +{ + *error = false; + GOwnPtr<GError> gerror; + + GOwnPtr<char> utf8src; + utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr())); + if (gerror) { + *error = true; + return -1; + } + + GOwnPtr<char> utf8result; + utf8result.set(g_utf8_strup(utf8src.get(), -1)); + + long utf16resultLength = -1; + GOwnPtr<UChar> utf16result; + utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr())); + if (gerror) { + *error = true; + return -1; + } + + if (utf16resultLength > resultLength) { + *error = true; + return utf16resultLength; + } + memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar)); + + return utf16resultLength; +} + +Direction direction(UChar32 c) +{ + PangoBidiType type = pango_bidi_type_for_unichar(c); + switch (type) { + case PANGO_BIDI_TYPE_L: + return LeftToRight; + case PANGO_BIDI_TYPE_R: + return RightToLeft; + case PANGO_BIDI_TYPE_AL: + return RightToLeftArabic; + case PANGO_BIDI_TYPE_LRE: + return LeftToRightEmbedding; + case PANGO_BIDI_TYPE_RLE: + return RightToLeftEmbedding; + case PANGO_BIDI_TYPE_LRO: + return LeftToRightOverride; + case PANGO_BIDI_TYPE_RLO: + return RightToLeftOverride; + case PANGO_BIDI_TYPE_PDF: + return PopDirectionalFormat; + case PANGO_BIDI_TYPE_EN: + return EuropeanNumber; + case PANGO_BIDI_TYPE_AN: + return ArabicNumber; + case PANGO_BIDI_TYPE_ES: + return EuropeanNumberSeparator; + case PANGO_BIDI_TYPE_ET: + return EuropeanNumberTerminator; + case PANGO_BIDI_TYPE_CS: + return CommonNumberSeparator; + case PANGO_BIDI_TYPE_NSM: + return NonSpacingMark; + case PANGO_BIDI_TYPE_BN: + return BoundaryNeutral; + case PANGO_BIDI_TYPE_B: + return BlockSeparator; + case PANGO_BIDI_TYPE_S: + return SegmentSeparator; + case PANGO_BIDI_TYPE_WS: + return WhiteSpaceNeutral; + default: + return OtherNeutral; + } +} + +int umemcasecmp(const UChar* a, const UChar* b, int len) +{ + GOwnPtr<char> utf8a; + GOwnPtr<char> utf8b; + + utf8a.set(g_utf16_to_utf8(a, len, 0, 0, 0)); + utf8b.set(g_utf16_to_utf8(b, len, 0, 0, 0)); + + GOwnPtr<char> foldedA; + GOwnPtr<char> foldedB; + + foldedA.set(g_utf8_casefold(utf8a.get(), -1)); + foldedB.set(g_utf8_casefold(utf8b.get(), -1)); + + // FIXME: umemcasecmp needs to mimic u_memcasecmp of icu + // from the ICU docs: + // "Compare two strings case-insensitively using full case folding. + // his is equivalent to u_strcmp(u_strFoldCase(s1, n, options), u_strFoldCase(s2, n, options))." + // + // So it looks like we don't need the full g_utf8_collate here, + // but really a bitwise comparison of casefolded unicode chars (not utf-8 bytes). + // As there is no direct equivalent to this icu function in GLib, for now + // we'll use g_utf8_collate(): + + return g_utf8_collate(foldedA.get(), foldedB.get()); +} + +} +} diff --git a/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.h b/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.h new file mode 100644 index 0000000..c03d3ec --- /dev/null +++ b/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.h @@ -0,0 +1,238 @@ +/* + * Copyright (C) 2006 George Staikos <staikos@kde.org> + * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> + * Copyright (C) 2007 Apple Computer, Inc. All rights reserved. + * Copyright (C) 2008 Jürg Billeter <j@bitron.ch> + * Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef UnicodeGLib_h +#define UnicodeGLib_h + +#include "UnicodeMacrosFromICU.h" +#include <wtf/GOwnPtr.h> + +#include <glib.h> +#include <pango/pango.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +typedef uint16_t UChar; +typedef int32_t UChar32; + +namespace WTF { +namespace Unicode { + +enum Direction { + LeftToRight, + RightToLeft, + EuropeanNumber, + EuropeanNumberSeparator, + EuropeanNumberTerminator, + ArabicNumber, + CommonNumberSeparator, + BlockSeparator, + SegmentSeparator, + WhiteSpaceNeutral, + OtherNeutral, + LeftToRightEmbedding, + LeftToRightOverride, + RightToLeftArabic, + RightToLeftEmbedding, + RightToLeftOverride, + PopDirectionalFormat, + NonSpacingMark, + BoundaryNeutral +}; + +enum DecompositionType { + DecompositionNone, + DecompositionCanonical, + DecompositionCompat, + DecompositionCircle, + DecompositionFinal, + DecompositionFont, + DecompositionFraction, + DecompositionInitial, + DecompositionIsolated, + DecompositionMedial, + DecompositionNarrow, + DecompositionNoBreak, + DecompositionSmall, + DecompositionSquare, + DecompositionSub, + DecompositionSuper, + DecompositionVertical, + DecompositionWide, +}; + +enum CharCategory { + NoCategory = 0, + Other_NotAssigned = U_MASK(G_UNICODE_UNASSIGNED), + Letter_Uppercase = U_MASK(G_UNICODE_UPPERCASE_LETTER), + Letter_Lowercase = U_MASK(G_UNICODE_LOWERCASE_LETTER), + Letter_Titlecase = U_MASK(G_UNICODE_TITLECASE_LETTER), + Letter_Modifier = U_MASK(G_UNICODE_MODIFIER_LETTER), + Letter_Other = U_MASK(G_UNICODE_OTHER_LETTER), + + Mark_NonSpacing = U_MASK(G_UNICODE_NON_SPACING_MARK), + Mark_Enclosing = U_MASK(G_UNICODE_ENCLOSING_MARK), + Mark_SpacingCombining = U_MASK(G_UNICODE_COMBINING_MARK), + + Number_DecimalDigit = U_MASK(G_UNICODE_DECIMAL_NUMBER), + Number_Letter = U_MASK(G_UNICODE_LETTER_NUMBER), + Number_Other = U_MASK(G_UNICODE_OTHER_NUMBER), + + Separator_Space = U_MASK(G_UNICODE_SPACE_SEPARATOR), + Separator_Line = U_MASK(G_UNICODE_LINE_SEPARATOR), + Separator_Paragraph = U_MASK(G_UNICODE_PARAGRAPH_SEPARATOR), + + Other_Control = U_MASK(G_UNICODE_CONTROL), + Other_Format = U_MASK(G_UNICODE_FORMAT), + Other_PrivateUse = U_MASK(G_UNICODE_PRIVATE_USE), + Other_Surrogate = U_MASK(G_UNICODE_SURROGATE), + + Punctuation_Dash = U_MASK(G_UNICODE_DASH_PUNCTUATION), + Punctuation_Open = U_MASK(G_UNICODE_OPEN_PUNCTUATION), + Punctuation_Close = U_MASK(G_UNICODE_CLOSE_PUNCTUATION), + Punctuation_Connector = U_MASK(G_UNICODE_CONNECT_PUNCTUATION), + Punctuation_Other = U_MASK(G_UNICODE_OTHER_PUNCTUATION), + + Symbol_Math = U_MASK(G_UNICODE_MATH_SYMBOL), + Symbol_Currency = U_MASK(G_UNICODE_CURRENCY_SYMBOL), + Symbol_Modifier = U_MASK(G_UNICODE_MODIFIER_SYMBOL), + Symbol_Other = U_MASK(G_UNICODE_OTHER_SYMBOL), + + Punctuation_InitialQuote = U_MASK(G_UNICODE_INITIAL_PUNCTUATION), + Punctuation_FinalQuote = U_MASK(G_UNICODE_FINAL_PUNCTUATION) +}; + +UChar32 foldCase(UChar32); + +int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error); + +int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error); + +inline UChar32 toLower(UChar32 c) +{ + return g_unichar_tolower(c); +} + +inline UChar32 toUpper(UChar32 c) +{ + return g_unichar_toupper(c); +} + +int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error); + +inline UChar32 toTitleCase(UChar32 c) +{ + return g_unichar_totitle(c); +} + +inline bool isArabicChar(UChar32 c) +{ + return c >= 0x0600 && c <= 0x06FF; +} + +inline bool isFormatChar(UChar32 c) +{ + return g_unichar_type(c) == G_UNICODE_FORMAT; +} + +inline bool isSeparatorSpace(UChar32 c) +{ + return g_unichar_type(c) == G_UNICODE_SPACE_SEPARATOR; +} + +inline bool isPrintableChar(UChar32 c) +{ + return g_unichar_isprint(c); +} + +inline bool isDigit(UChar32 c) +{ + return g_unichar_isdigit(c); +} + +inline bool isPunct(UChar32 c) +{ + return g_unichar_ispunct(c); +} + +inline bool hasLineBreakingPropertyComplexContext(UChar32 c) +{ + // FIXME + return false; +} + +inline bool hasLineBreakingPropertyComplexContextOrIdeographic(UChar32 c) +{ + // FIXME + return false; +} + +inline UChar32 mirroredChar(UChar32 c) +{ + gunichar mirror = 0; + g_unichar_get_mirror_char(c, &mirror); + return mirror; +} + +inline CharCategory category(UChar32 c) +{ + if (c > 0xffff) + return NoCategory; + + return (CharCategory) U_MASK(g_unichar_type(c)); +} + +Direction direction(UChar32); + +inline bool isLower(UChar32 c) +{ + return g_unichar_islower(c); +} + +inline int digitValue(UChar32 c) +{ + return g_unichar_digit_value(c); +} + +inline uint8_t combiningClass(UChar32 c) +{ + // FIXME + // return g_unichar_combining_class(c); + return 0; +} + +inline DecompositionType decompositionType(UChar32 c) +{ + // FIXME + return DecompositionNone; +} + +int umemcasecmp(const UChar*, const UChar*, int len); + +} +} + +#endif + diff --git a/JavaScriptCore/wtf/unicode/glib/UnicodeMacrosFromICU.h b/JavaScriptCore/wtf/unicode/glib/UnicodeMacrosFromICU.h new file mode 100644 index 0000000..5d3eca6 --- /dev/null +++ b/JavaScriptCore/wtf/unicode/glib/UnicodeMacrosFromICU.h @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2006 George Staikos <staikos@kde.org> + * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> + * Copyright (C) 2007 Apple Computer, Inc. All rights reserved. + * Copyright (C) 2008 Jürg Billeter <j@bitron.ch> + * Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef UnicodeMacrosFromICU_h +#define UnicodeMacrosFromICU_h + +// some defines from ICU + +#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) +#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) +#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) +#define U16_GET_SUPPLEMENTARY(lead, trail) \ + (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET) + +#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) +#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) + +#define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800) +#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) +#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) +#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) + +#define U16_PREV(s, start, i, c) { \ + (c)=(s)[--(i)]; \ + if(U16_IS_TRAIL(c)) { \ + uint16_t __c2; \ + if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ + --(i); \ + (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ + } \ + } \ +} + +#define U16_NEXT(s, i, length, c) { \ + (c)=(s)[(i)++]; \ + if(U16_IS_LEAD(c)) { \ + uint16_t __c2; \ + if((i)<(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ + ++(i); \ + (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ + } \ + } \ +} + +#define U_MASK(x) ((uint32_t)1<<(x)) + +#endif + diff --git a/JavaScriptCore/wtf/unicode/icu/UnicodeIcu.h b/JavaScriptCore/wtf/unicode/icu/UnicodeIcu.h index de5e082..35c6fbf 100644 --- a/JavaScriptCore/wtf/unicode/icu/UnicodeIcu.h +++ b/JavaScriptCore/wtf/unicode/icu/UnicodeIcu.h @@ -184,6 +184,12 @@ inline bool hasLineBreakingPropertyComplexContext(UChar32 c) return u_getIntPropertyValue(c, UCHAR_LINE_BREAK) == U_LB_COMPLEX_CONTEXT; } +inline bool hasLineBreakingPropertyComplexContextOrIdeographic(UChar32 c) +{ + int32_t prop = u_getIntPropertyValue(c, UCHAR_LINE_BREAK); + return prop == U_LB_COMPLEX_CONTEXT || prop == U_LB_IDEOGRAPHIC; +} + inline UChar32 mirroredChar(UChar32 c) { return u_charMirror(c); diff --git a/JavaScriptCore/wtf/unicode/qt4/UnicodeQt4.h b/JavaScriptCore/wtf/unicode/qt4/UnicodeQt4.h index f65e292..1531694 100644 --- a/JavaScriptCore/wtf/unicode/qt4/UnicodeQt4.h +++ b/JavaScriptCore/wtf/unicode/qt4/UnicodeQt4.h @@ -92,6 +92,17 @@ typedef uint32_t UChar32; } \ } +#define U16_PREV(s, start, i, c) { \ + (c)=(s)[--(i)]; \ + if(U16_IS_TRAIL(c)) { \ + uint16_t __c2; \ + if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ + --(i); \ + (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ + } \ + } \ +} + #define U_MASK(x) ((uint32_t)1<<(x)) namespace WTF { |