diff options
Diffstat (limited to 'JavaScriptCore/wtf/unicode')
| -rw-r--r-- | JavaScriptCore/wtf/unicode/Collator.h | 4 | ||||
| -rw-r--r-- | JavaScriptCore/wtf/unicode/CollatorDefault.cpp | 4 | ||||
| -rw-r--r-- | JavaScriptCore/wtf/unicode/UTF8.cpp | 2 | ||||
| -rw-r--r-- | JavaScriptCore/wtf/unicode/Unicode.h | 4 | ||||
| -rw-r--r-- | JavaScriptCore/wtf/unicode/UnicodeMacrosFromICU.h (renamed from JavaScriptCore/wtf/unicode/glib/UnicodeMacrosFromICU.h) | 2 | ||||
| -rw-r--r-- | JavaScriptCore/wtf/unicode/brew/UnicodeBrew.cpp | 181 | ||||
| -rw-r--r-- | JavaScriptCore/wtf/unicode/brew/UnicodeBrew.h | 194 | ||||
| -rw-r--r-- | JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp | 117 | ||||
| -rw-r--r-- | JavaScriptCore/wtf/unicode/glib/UnicodeGLib.h | 2 | ||||
| -rw-r--r-- | JavaScriptCore/wtf/unicode/icu/CollatorICU.cpp | 15 | ||||
| -rw-r--r-- | JavaScriptCore/wtf/unicode/qt4/UnicodeQt4.h | 76 | ||||
| -rw-r--r-- | JavaScriptCore/wtf/unicode/wince/UnicodeWinCE.cpp (renamed from JavaScriptCore/wtf/unicode/wince/UnicodeWince.cpp) | 43 | ||||
| -rw-r--r-- | JavaScriptCore/wtf/unicode/wince/UnicodeWinCE.h | 177 | ||||
| -rw-r--r-- | JavaScriptCore/wtf/unicode/wince/UnicodeWince.h | 216 |
14 files changed, 663 insertions, 374 deletions
diff --git a/JavaScriptCore/wtf/unicode/Collator.h b/JavaScriptCore/wtf/unicode/Collator.h index 51e8a06..fe6a809 100644 --- a/JavaScriptCore/wtf/unicode/Collator.h +++ b/JavaScriptCore/wtf/unicode/Collator.h @@ -29,8 +29,8 @@ #ifndef WTF_Collator_h #define WTF_Collator_h -#include <memory> #include <wtf/Noncopyable.h> +#include <wtf/PassOwnPtr.h> #include <wtf/unicode/Unicode.h> #if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION @@ -47,7 +47,7 @@ namespace WTF { ~Collator(); void setOrderLowerFirst(bool); - static std::auto_ptr<Collator> userDefault(); + static PassOwnPtr<Collator> userDefault(); Result collate(const ::UChar*, size_t, const ::UChar*, size_t) const; diff --git a/JavaScriptCore/wtf/unicode/CollatorDefault.cpp b/JavaScriptCore/wtf/unicode/CollatorDefault.cpp index eddbe53..4e05432 100644 --- a/JavaScriptCore/wtf/unicode/CollatorDefault.cpp +++ b/JavaScriptCore/wtf/unicode/CollatorDefault.cpp @@ -45,9 +45,9 @@ void Collator::setOrderLowerFirst(bool) { } -std::auto_ptr<Collator> Collator::userDefault() +PassOwnPtr<Collator> Collator::userDefault() { - return std::auto_ptr<Collator>(new Collator(0)); + return new Collator(0); } // A default implementation for platforms that lack Unicode-aware collation. diff --git a/JavaScriptCore/wtf/unicode/UTF8.cpp b/JavaScriptCore/wtf/unicode/UTF8.cpp index 21d5856..40c5609 100644 --- a/JavaScriptCore/wtf/unicode/UTF8.cpp +++ b/JavaScriptCore/wtf/unicode/UTF8.cpp @@ -240,7 +240,7 @@ ConversionResult convertUTF8ToUTF16( UChar* target = *targetStart; while (source < sourceEnd) { UChar32 ch = 0; - int extraBytesToRead = UTF8SequenceLength(*source) - 1; + int extraBytesToRead = inlineUTF8SequenceLength(*source) - 1; if (source + extraBytesToRead >= sourceEnd) { result = sourceExhausted; break; diff --git a/JavaScriptCore/wtf/unicode/Unicode.h b/JavaScriptCore/wtf/unicode/Unicode.h index d59439d..50524b1 100644 --- a/JavaScriptCore/wtf/unicode/Unicode.h +++ b/JavaScriptCore/wtf/unicode/Unicode.h @@ -32,7 +32,9 @@ #elif USE(GLIB_UNICODE) #include <wtf/unicode/glib/UnicodeGLib.h> #elif USE(WINCE_UNICODE) -#include <wtf/unicode/wince/UnicodeWince.h> +#include <wtf/unicode/wince/UnicodeWinCE.h> +#elif USE(BREWMP_UNICODE) +#include <wtf/unicode/brew/UnicodeBrew.h> #else #error "Unknown Unicode implementation" #endif diff --git a/JavaScriptCore/wtf/unicode/glib/UnicodeMacrosFromICU.h b/JavaScriptCore/wtf/unicode/UnicodeMacrosFromICU.h index 5d3eca6..f865ef1 100644 --- a/JavaScriptCore/wtf/unicode/glib/UnicodeMacrosFromICU.h +++ b/JavaScriptCore/wtf/unicode/UnicodeMacrosFromICU.h @@ -27,6 +27,7 @@ // some defines from ICU +#define U_IS_BMP(c) ((UChar32)(c)<=0xffff) #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) @@ -35,6 +36,7 @@ #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) +#define U16_LENGTH(c) ((uint32_t)(c) <= 0xffff ? 1 : 2) #define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800) #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) diff --git a/JavaScriptCore/wtf/unicode/brew/UnicodeBrew.cpp b/JavaScriptCore/wtf/unicode/brew/UnicodeBrew.cpp new file mode 100644 index 0000000..8367f17 --- /dev/null +++ b/JavaScriptCore/wtf/unicode/brew/UnicodeBrew.cpp @@ -0,0 +1,181 @@ +/* + * Copyright (C) 2006 George Staikos <staikos@kde.org> + * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> + * Copyright (C) 2007-2009 Torch Mobile, Inc. + * Copyright (C) 2010 Company 100, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#include "config.h" +#include "UnicodeBrew.h" + +#include <wchar.h> +#include <wctype.h> + +namespace WTF { +namespace Unicode { + +UChar toLower(UChar c) +{ + return towlower(c); +} + +UChar toUpper(UChar c) +{ + return towupper(c); +} + +UChar foldCase(UChar c) +{ + return towlower(c); +} + +bool isPrintableChar(UChar c) +{ + return !!iswprint(c); +} + +bool isUpper(UChar c) +{ + return !!iswupper(c); +} + +bool isLower(UChar c) +{ + return !!iswlower(c); +} + +bool isDigit(UChar c) +{ + return !!iswdigit(c); +} + +bool isPunct(UChar c) +{ + return !!iswpunct(c); +} + +bool isAlphanumeric(UChar c) +{ + return !!iswalnum(c); +} + +int toLower(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError) +{ + const UChar* sourceIterator = source; + const UChar* sourceEnd = source + sourceLength; + UChar* resultIterator = result; + UChar* resultEnd = result + resultLength; + + if (sourceLength <= resultLength) { + while (sourceIterator < sourceEnd) + *resultIterator++ = towlower(*sourceIterator++); + } else { + while (resultIterator < resultEnd) + *resultIterator++ = towlower(*sourceIterator++); + } + + int remainingCharacters = sourceIterator < sourceEnd ? sourceEnd - sourceIterator : 0; + *isError = !!remainingCharacters; + if (resultIterator < resultEnd) + *resultIterator = 0; + + return (resultIterator - result) + remainingCharacters; +} + +int toUpper(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError) +{ + const UChar* sourceIterator = source; + const UChar* sourceEnd = source + sourceLength; + UChar* resultIterator = result; + UChar* resultEnd = result + resultLength; + + if (sourceLength <= resultLength) { + while (sourceIterator < sourceEnd) + *resultIterator++ = towupper(*sourceIterator++); + } else { + while (resultIterator < resultEnd) + *resultIterator++ = towupper(*sourceIterator++); + } + + int remainingCharacters = sourceIterator < sourceEnd ? sourceEnd - sourceIterator : 0; + *isError = !!remainingCharacters; + if (resultIterator < resultEnd) + *resultIterator = 0; + + return (resultIterator - result) + remainingCharacters; +} + +int foldCase(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError) +{ + *isError = false; + if (resultLength < sourceLength) { + *isError = true; + return sourceLength; + } + for (int i = 0; i < sourceLength; ++i) + result[i] = foldCase(source[i]); + return sourceLength; +} + +UChar toTitleCase(UChar c) +{ + return towupper(c); +} + +Direction direction(UChar32 c) +{ + return static_cast<Direction>(ICU::direction(c)); +} + +CharCategory category(unsigned int c) +{ + return static_cast<CharCategory>(TO_MASK((int8_t) ICU::category(c))); +} + +DecompositionType decompositionType(UChar32 c) +{ + return static_cast<DecompositionType>(ICU::decompositionType(c)); +} + +unsigned char combiningClass(UChar32 c) +{ + return ICU::combiningClass(c); +} + +UChar mirroredChar(UChar32 c) +{ + return ICU::mirroredChar(c); +} + +int digitValue(UChar c) +{ + return ICU::digitValue(c); +} + +bool isSpace(UChar c) +{ + return !!iswspace(c); +} + +bool isLetter(UChar c) +{ + return !!iswalpha(c); +} + +} // namespace Unicode +} // namespace WTF diff --git a/JavaScriptCore/wtf/unicode/brew/UnicodeBrew.h b/JavaScriptCore/wtf/unicode/brew/UnicodeBrew.h new file mode 100644 index 0000000..1d7576f --- /dev/null +++ b/JavaScriptCore/wtf/unicode/brew/UnicodeBrew.h @@ -0,0 +1,194 @@ +/* + * Copyright (C) 2006 George Staikos <staikos@kde.org> + * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> + * Copyright (C) 2007 Apple Computer, Inc. All rights reserved. + * Copyright (C) 2007-2009 Torch Mobile, Inc. + * Copyright (C) 2010 Company 100, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef UnicodeBrew_h +#define UnicodeBrew_h + +#include "UnicodeFromICU.h" +#include "UnicodeMacrosFromICU.h" + +namespace WTF { +namespace Unicode { + +enum Direction { + LeftToRight = ICU::U_LEFT_TO_RIGHT, + RightToLeft = ICU::U_RIGHT_TO_LEFT, + EuropeanNumber = ICU::U_EUROPEAN_NUMBER, + EuropeanNumberSeparator = ICU::U_EUROPEAN_NUMBER_SEPARATOR, + EuropeanNumberTerminator = ICU::U_EUROPEAN_NUMBER_TERMINATOR, + ArabicNumber = ICU::U_ARABIC_NUMBER, + CommonNumberSeparator = ICU::U_COMMON_NUMBER_SEPARATOR, + BlockSeparator = ICU::U_BLOCK_SEPARATOR, + SegmentSeparator = ICU::U_SEGMENT_SEPARATOR, + WhiteSpaceNeutral = ICU::U_WHITE_SPACE_NEUTRAL, + OtherNeutral = ICU::U_OTHER_NEUTRAL, + LeftToRightEmbedding = ICU::U_LEFT_TO_RIGHT_EMBEDDING, + LeftToRightOverride = ICU::U_LEFT_TO_RIGHT_OVERRIDE, + RightToLeftArabic = ICU::U_RIGHT_TO_LEFT_ARABIC, + RightToLeftEmbedding = ICU::U_RIGHT_TO_LEFT_EMBEDDING, + RightToLeftOverride = ICU::U_RIGHT_TO_LEFT_OVERRIDE, + PopDirectionalFormat = ICU::U_POP_DIRECTIONAL_FORMAT, + NonSpacingMark = ICU::U_DIR_NON_SPACING_MARK, + BoundaryNeutral = ICU::U_BOUNDARY_NEUTRAL +}; + +enum DecompositionType { + DecompositionNone = ICU::U_DT_NONE, + DecompositionCanonical = ICU::U_DT_CANONICAL, + DecompositionCompat = ICU::U_DT_COMPAT, + DecompositionCircle = ICU::U_DT_CIRCLE, + DecompositionFinal = ICU::U_DT_FINAL, + DecompositionFont = ICU::U_DT_FONT, + DecompositionFraction = ICU::U_DT_FRACTION, + DecompositionInitial = ICU::U_DT_INITIAL, + DecompositionIsolated = ICU::U_DT_ISOLATED, + DecompositionMedial = ICU::U_DT_MEDIAL, + DecompositionNarrow = ICU::U_DT_NARROW, + DecompositionNoBreak = ICU::U_DT_NOBREAK, + DecompositionSmall = ICU::U_DT_SMALL, + DecompositionSquare = ICU::U_DT_SQUARE, + DecompositionSub = ICU::U_DT_SUB, + DecompositionSuper = ICU::U_DT_SUPER, + DecompositionVertical = ICU::U_DT_VERTICAL, + DecompositionWide = ICU::U_DT_WIDE, +}; + +enum CharCategory { + NoCategory = 0, + Other_NotAssigned = TO_MASK(ICU::U_GENERAL_OTHER_TYPES), + Letter_Uppercase = TO_MASK(ICU::U_UPPERCASE_LETTER), + Letter_Lowercase = TO_MASK(ICU::U_LOWERCASE_LETTER), + Letter_Titlecase = TO_MASK(ICU::U_TITLECASE_LETTER), + Letter_Modifier = TO_MASK(ICU::U_MODIFIER_LETTER), + Letter_Other = TO_MASK(ICU::U_OTHER_LETTER), + + Mark_NonSpacing = TO_MASK(ICU::U_NON_SPACING_MARK), + Mark_Enclosing = TO_MASK(ICU::U_ENCLOSING_MARK), + Mark_SpacingCombining = TO_MASK(ICU::U_COMBINING_SPACING_MARK), + + Number_DecimalDigit = TO_MASK(ICU::U_DECIMAL_DIGIT_NUMBER), + Number_Letter = TO_MASK(ICU::U_LETTER_NUMBER), + Number_Other = TO_MASK(ICU::U_OTHER_NUMBER), + + Separator_Space = TO_MASK(ICU::U_SPACE_SEPARATOR), + Separator_Line = TO_MASK(ICU::U_LINE_SEPARATOR), + Separator_Paragraph = TO_MASK(ICU::U_PARAGRAPH_SEPARATOR), + + Other_Control = TO_MASK(ICU::U_CONTROL_CHAR), + Other_Format = TO_MASK(ICU::U_FORMAT_CHAR), + Other_PrivateUse = TO_MASK(ICU::U_PRIVATE_USE_CHAR), + Other_Surrogate = TO_MASK(ICU::U_SURROGATE), + + Punctuation_Dash = TO_MASK(ICU::U_DASH_PUNCTUATION), + Punctuation_Open = TO_MASK(ICU::U_START_PUNCTUATION), + Punctuation_Close = TO_MASK(ICU::U_END_PUNCTUATION), + Punctuation_Connector = TO_MASK(ICU::U_CONNECTOR_PUNCTUATION), + Punctuation_Other = TO_MASK(ICU::U_OTHER_PUNCTUATION), + + Symbol_Math = TO_MASK(ICU::U_MATH_SYMBOL), + Symbol_Currency = TO_MASK(ICU::U_CURRENCY_SYMBOL), + Symbol_Modifier = TO_MASK(ICU::U_MODIFIER_SYMBOL), + Symbol_Other = TO_MASK(ICU::U_OTHER_SYMBOL), + + Punctuation_InitialQuote = TO_MASK(ICU::U_INITIAL_PUNCTUATION), + Punctuation_FinalQuote = TO_MASK(ICU::U_FINAL_PUNCTUATION) +}; + +UChar foldCase(UChar); + +int foldCase(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError); + +int toLower(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError); + +UChar toUpper(UChar); +UChar toLower(UChar); + +bool isUpper(UChar); + +int toUpper(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError); + +UChar toTitleCase(UChar); + +inline bool isArabicChar(UChar32 c) +{ + return c >= 0x0600 && c <= 0x06FF; +} + +bool isAlphanumeric(UChar); + +CharCategory category(unsigned int); + +inline bool isSeparatorSpace(UChar c) +{ + return category(c) == Separator_Space; +} + +bool isPrintableChar(UChar); + +bool isDigit(UChar); + +bool isPunct(UChar); + +inline bool hasLineBreakingPropertyComplexContext(UChar32) +{ + // FIXME: implement! + return false; +} + +inline bool hasLineBreakingPropertyComplexContextOrIdeographic(UChar32 c) +{ + // FIXME + return false; +} + +UChar mirroredChar(UChar32); + +Direction direction(UChar32); + +bool isLower(UChar); + +int digitValue(UChar); + +unsigned char combiningClass(UChar32); + +DecompositionType decompositionType(UChar32); + +inline int umemcasecmp(const UChar* a, const UChar* b, int len) +{ + for (int i = 0; i < len; ++i) { + UChar c1 = foldCase(a[i]); + UChar c2 = foldCase(b[i]); + if (c1 != c2) + return c1 - c2; + } + return 0; +} + +bool isSpace(UChar); +bool isLetter(UChar); + +} // namespace Unicode +} // namespace WTF + +#endif diff --git a/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp b/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp index e20c376..a01c3ee 100644 --- a/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp +++ b/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp @@ -1,6 +1,7 @@ /* * Copyright (C) 2008 Jürg Billeter <j@bitron.ch> * Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com> + * Copyright (C) 2010 Igalia S.L. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public @@ -22,6 +23,11 @@ #include "config.h" #include "UnicodeGLib.h" +#include <wtf/Vector.h> +#include <wtf/unicode/UTF8.h> + +#define UTF8_IS_SURROGATE(character) (character >= 0x10000 && character <= 0x10FFFF) + namespace WTF { namespace Unicode { @@ -43,100 +49,71 @@ UChar32 foldCase(UChar32 ch) return *ucs4Result; } -int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) +static int getUTF16LengthFromUTF8(const gchar* utf8String, int length) { - *error = false; - GOwnPtr<GError> gerror; + int utf16Length = 0; + const gchar* inputString = utf8String; - GOwnPtr<char> utf8src; - utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr())); - if (gerror) { - *error = true; - return -1; - } - - GOwnPtr<char> utf8result; - utf8result.set(g_utf8_casefold(utf8src.get(), -1)); + while ((utf8String + length - inputString > 0) && *inputString) { + gunichar character = g_utf8_get_char(inputString); - long utf16resultLength = -1; - GOwnPtr<UChar> utf16result; - utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr())); - if (gerror) { - *error = true; - return -1; + utf16Length += UTF8_IS_SURROGATE(character) ? 2 : 1; + inputString = g_utf8_next_char(inputString); } - if (utf16resultLength > resultLength) { - *error = true; - return utf16resultLength; - } - memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar)); - - return utf16resultLength; + return utf16Length; } -int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) +typedef gchar* (*UTF8CaseFunction)(const gchar*, gssize length); + +static int convertCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error, UTF8CaseFunction caseFunction) { *error = false; - GOwnPtr<GError> gerror; - GOwnPtr<char> utf8src; - utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr())); - if (gerror) { + // Allocate a buffer big enough to hold all the characters. + Vector<char> buffer(srcLength * 3); + char* utf8Target = buffer.data(); + const UChar* utf16Source = src; + ConversionResult conversionResult = convertUTF16ToUTF8(&utf16Source, utf16Source + srcLength, &utf8Target, utf8Target + buffer.size(), true); + if (conversionResult != conversionOK) { *error = true; return -1; } + buffer.shrink(utf8Target - buffer.data()); - GOwnPtr<char> utf8result; - utf8result.set(g_utf8_strdown(utf8src.get(), -1)); + GOwnPtr<char> utf8Result(caseFunction(buffer.data(), buffer.size())); + long utf8ResultLength = strlen(utf8Result.get()); - long utf16resultLength = -1; - GOwnPtr<UChar> utf16result; - utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr())); - if (gerror) { + // Calculate the destination buffer size. + int realLength = getUTF16LengthFromUTF8(utf8Result.get(), utf8ResultLength); + if (realLength > resultLength) { *error = true; - return -1; + return realLength; } - if (utf16resultLength > resultLength) { + // Convert the result to UTF-16. + UChar* utf16Target = result; + const char* utf8Source = utf8Result.get(); + conversionResult = convertUTF8ToUTF16(&utf8Source, utf8Source + utf8ResultLength, &utf16Target, utf16Target + resultLength, true); + long utf16ResultLength = utf16Target - result; + if (conversionResult != conversionOK) *error = true; - return utf16resultLength; - } - memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar)); - return utf16resultLength; + return utf16ResultLength <= 0 ? -1 : utf16ResultLength; } - -int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) +int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) { - *error = false; - GOwnPtr<GError> gerror; - - GOwnPtr<char> utf8src; - utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr())); - if (gerror) { - *error = true; - return -1; - } - - GOwnPtr<char> utf8result; - utf8result.set(g_utf8_strup(utf8src.get(), -1)); - - long utf16resultLength = -1; - GOwnPtr<UChar> utf16result; - utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr())); - if (gerror) { - *error = true; - return -1; - } + return convertCase(result, resultLength, src, srcLength, error, g_utf8_casefold); +} - if (utf16resultLength > resultLength) { - *error = true; - return utf16resultLength; - } - memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar)); +int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) +{ + return convertCase(result, resultLength, src, srcLength, error, g_utf8_strdown); +} - return utf16resultLength; +int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) +{ + return convertCase(result, resultLength, src, srcLength, error, g_utf8_strup); } Direction direction(UChar32 c) diff --git a/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.h b/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.h index d72e707..46b00ea 100644 --- a/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.h +++ b/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.h @@ -26,7 +26,7 @@ #define UnicodeGLib_h #include "UnicodeMacrosFromICU.h" -#include <wtf/gtk/GOwnPtr.h> +#include "GOwnPtr.h" #include <glib.h> #include <pango/pango.h> diff --git a/JavaScriptCore/wtf/unicode/icu/CollatorICU.cpp b/JavaScriptCore/wtf/unicode/icu/CollatorICU.cpp index a1753a4..805b114 100644 --- a/JavaScriptCore/wtf/unicode/icu/CollatorICU.cpp +++ b/JavaScriptCore/wtf/unicode/icu/CollatorICU.cpp @@ -57,11 +57,11 @@ Collator::Collator(const char* locale) { } -std::auto_ptr<Collator> Collator::userDefault() +PassOwnPtr<Collator> Collator::userDefault() { #if OS(DARWIN) && PLATFORM(CF) // Mac OS X doesn't set UNIX locale to match user-selected one, so ICU default doesn't work. -#if !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD) && !OS(IPHONE_OS) +#if !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD) && !OS(IOS) RetainPtr<CFLocaleRef> currentLocale(AdoptCF, CFLocaleCopyCurrent()); CFStringRef collationOrder = (CFStringRef)CFLocaleGetValue(currentLocale.get(), kCFLocaleCollatorIdentifier); #else @@ -69,13 +69,12 @@ std::auto_ptr<Collator> Collator::userDefault() CFStringRef collationOrder = collationOrderRetainer.get(); #endif char buf[256]; - if (collationOrder) { - CFStringGetCString(collationOrder, buf, sizeof(buf), kCFStringEncodingASCII); - return std::auto_ptr<Collator>(new Collator(buf)); - } else - return std::auto_ptr<Collator>(new Collator("")); + if (!collationOrder) + return adoptPtr(new Collator("")); + CFStringGetCString(collationOrder, buf, sizeof(buf), kCFStringEncodingASCII); + return adoptPtr(new Collator(buf)); #else - return std::auto_ptr<Collator>(new Collator(0)); + return adoptPtr(new Collator(0)); #endif } diff --git a/JavaScriptCore/wtf/unicode/qt4/UnicodeQt4.h b/JavaScriptCore/wtf/unicode/qt4/UnicodeQt4.h index 9b1754a..eaa7a07 100644 --- a/JavaScriptCore/wtf/unicode/qt4/UnicodeQt4.h +++ b/JavaScriptCore/wtf/unicode/qt4/UnicodeQt4.h @@ -23,12 +23,17 @@ #ifndef WTF_UNICODE_QT4_H #define WTF_UNICODE_QT4_H +#include "UnicodeMacrosFromICU.h" + #include <QChar> #include <QString> #include <config.h> #include <stdint.h> +#if USE(QT_ICU_TEXT_BREAKING) +#include <unicode/ubrk.h> +#endif QT_BEGIN_NAMESPACE namespace QUnicodeTables { @@ -56,52 +61,15 @@ namespace QUnicodeTables { QT_END_NAMESPACE // ugly hack to make UChar compatible with JSChar in API/JSStringRef.h -#if defined(Q_OS_WIN) || COMPILER(WINSCW) || COMPILER(RVCT) +#if defined(Q_OS_WIN) || COMPILER(WINSCW) || (COMPILER(RVCT) && !OS(LINUX)) typedef wchar_t UChar; #else typedef uint16_t UChar; #endif -typedef uint32_t UChar32; - -// some defines from ICU - -#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) -#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) -#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) -#define U16_GET_SUPPLEMENTARY(lead, trail) \ - (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET) - -#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) -#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) - -#define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800) -#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) -#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) -#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) - -#define U16_NEXT(s, i, length, c) { \ - (c)=(s)[(i)++]; \ - if(U16_IS_LEAD(c)) { \ - uint16_t __c2; \ - if((i)<(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ - ++(i); \ - (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ - } \ - } \ -} -#define U16_PREV(s, start, i, c) { \ - (c)=(s)[--(i)]; \ - if(U16_IS_TRAIL(c)) { \ - uint16_t __c2; \ - if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ - --(i); \ - (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ - } \ - } \ -} - -#define U_MASK(x) ((uint32_t)1<<(x)) +#if !USE(QT_ICU_TEXT_BREAKING) +typedef uint32_t UChar32; +#endif namespace WTF { namespace Unicode { @@ -188,7 +156,7 @@ enum CharCategory { inline UChar32 toLower(UChar32 ch) { - return QChar::toLower(ch); + return QChar::toLower(uint32_t(ch)); } inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) @@ -244,9 +212,9 @@ inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLen return rindex + needed; } -inline UChar32 toUpper(UChar32 ch) +inline UChar32 toUpper(UChar32 c) { - return QChar::toUpper(ch); + return QChar::toUpper(uint32_t(c)); } inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) @@ -304,12 +272,12 @@ inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLen inline int toTitleCase(UChar32 c) { - return QChar::toTitleCase(c); + return QChar::toTitleCase(uint32_t(c)); } inline UChar32 foldCase(UChar32 c) { - return QChar::toCaseFolded(c); + return QChar::toCaseFolded(uint32_t(c)); } inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) @@ -334,12 +302,12 @@ inline bool isPrintableChar(UChar32 c) { const uint test = U_MASK(QChar::Other_Control) | U_MASK(QChar::Other_NotAssigned); - return !(U_MASK(QChar::category(c)) & test); + return !(U_MASK(QChar::category(uint32_t(c))) & test); } inline bool isSeparatorSpace(UChar32 c) { - return QChar::category(c) == QChar::Separator_Space; + return QChar::category(uint32_t(c)) == QChar::Separator_Space; } inline bool isPunct(UChar32 c) @@ -351,12 +319,12 @@ inline bool isPunct(UChar32 c) U_MASK(QChar::Punctuation_InitialQuote) | U_MASK(QChar::Punctuation_FinalQuote) | U_MASK(QChar::Punctuation_Other); - return U_MASK(QChar::category(c)) & test; + return U_MASK(QChar::category(uint32_t(c))) & test; } inline bool isLower(UChar32 c) { - return QChar::category(c) == QChar::Letter_Lowercase; + return QChar::category(uint32_t(c)) == QChar::Letter_Lowercase; } inline bool hasLineBreakingPropertyComplexContext(UChar32) @@ -367,12 +335,12 @@ inline bool hasLineBreakingPropertyComplexContext(UChar32) inline UChar32 mirroredChar(UChar32 c) { - return QChar::mirroredChar(c); + return QChar::mirroredChar(uint32_t(c)); } inline uint8_t combiningClass(UChar32 c) { - return QChar::combiningClass(c); + return QChar::combiningClass(uint32_t(c)); } inline DecompositionType decompositionType(UChar32 c) @@ -394,12 +362,12 @@ inline int umemcasecmp(const UChar* a, const UChar* b, int len) inline Direction direction(UChar32 c) { - return (Direction)QChar::direction(c); + return (Direction)QChar::direction(uint32_t(c)); } inline CharCategory category(UChar32 c) { - return (CharCategory) U_MASK(QChar::category(c)); + return (CharCategory) U_MASK(QChar::category(uint32_t(c))); } } } diff --git a/JavaScriptCore/wtf/unicode/wince/UnicodeWince.cpp b/JavaScriptCore/wtf/unicode/wince/UnicodeWinCE.cpp index 2df44f8..96dac7d 100644 --- a/JavaScriptCore/wtf/unicode/wince/UnicodeWince.cpp +++ b/JavaScriptCore/wtf/unicode/wince/UnicodeWinCE.cpp @@ -20,64 +20,69 @@ */ #include "config.h" -#include "UnicodeWince.h" +#include "UnicodeWinCE.h" #include <wchar.h> namespace WTF { namespace Unicode { -wchar_t toLower(wchar_t c) +UChar toLower(UChar c) { return towlower(c); } -wchar_t toUpper(wchar_t c) +UChar toUpper(UChar c) { return towupper(c); } -wchar_t foldCase(wchar_t c) +UChar foldCase(UChar c) { return towlower(c); } -bool isPrintableChar(wchar_t c) +bool isPrintableChar(UChar c) { return !!iswprint(c); } -bool isSpace(wchar_t c) +bool isSpace(UChar c) { return !!iswspace(c); } -bool isLetter(wchar_t c) +bool isLetter(UChar c) { return !!iswalpha(c); } -bool isUpper(wchar_t c) +bool isUpper(UChar c) { return !!iswupper(c); } -bool isLower(wchar_t c) +bool isLower(UChar c) { return !!iswlower(c); } -bool isDigit(wchar_t c) +bool isDigit(UChar c) { return !!iswdigit(c); } -bool isPunct(wchar_t c) +bool isPunct(UChar c) { return !!iswpunct(c); } -int toLower(wchar_t* result, int resultLength, const wchar_t* source, int sourceLength, bool* isError) +bool isAlphanumeric(UChar c) +{ + return !!iswalnum(c); +} + +int toLower(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError) { const UChar* sourceIterator = source; const UChar* sourceEnd = source + sourceLength; @@ -94,14 +99,14 @@ int toLower(wchar_t* result, int resultLength, const wchar_t* source, int source if (sourceIterator < sourceEnd) remainingCharacters += sourceEnd - sourceIterator; - *isError = (remainingCharacters != 0); + *isError = !!remainingCharacters; if (resultIterator < resultEnd) *resultIterator = 0; return (resultIterator - result) + remainingCharacters; } -int toUpper(wchar_t* result, int resultLength, const wchar_t* source, int sourceLength, bool* isError) +int toUpper(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError) { const UChar* sourceIterator = source; const UChar* sourceEnd = source + sourceLength; @@ -118,14 +123,14 @@ int toUpper(wchar_t* result, int resultLength, const wchar_t* source, int source if (sourceIterator < sourceEnd) remainingCharacters += sourceEnd - sourceIterator; - *isError = (remainingCharacters != 0); + *isError = !!remainingCharacters; if (resultIterator < resultEnd) *resultIterator = 0; return (resultIterator - result) + remainingCharacters; } -int foldCase(wchar_t* result, int resultLength, const wchar_t* source, int sourceLength, bool* isError) +int foldCase(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError) { *isError = false; if (resultLength < sourceLength) { @@ -137,7 +142,7 @@ int foldCase(wchar_t* result, int resultLength, const wchar_t* source, int sourc return sourceLength; } -wchar_t toTitleCase(wchar_t c) +UChar toTitleCase(UChar c) { return towupper(c); } @@ -162,12 +167,12 @@ unsigned char combiningClass(UChar32 c) return UnicodeCE::combiningClass(c); } -wchar_t mirroredChar(UChar32 c) +UChar mirroredChar(UChar32 c) { return UnicodeCE::mirroredChar(c); } -int digitValue(wchar_t c) +int digitValue(UChar c) { return UnicodeCE::digitValue(c); } diff --git a/JavaScriptCore/wtf/unicode/wince/UnicodeWinCE.h b/JavaScriptCore/wtf/unicode/wince/UnicodeWinCE.h new file mode 100644 index 0000000..2688aa9 --- /dev/null +++ b/JavaScriptCore/wtf/unicode/wince/UnicodeWinCE.h @@ -0,0 +1,177 @@ +/* + * Copyright (C) 2006 George Staikos <staikos@kde.org> + * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> + * Copyright (C) 2007 Apple Computer, Inc. All rights reserved. + * Copyright (C) 2007-2009 Torch Mobile, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef WTF_UnicodeWinCE_h +#define WTF_UnicodeWinCE_h + +#include "UnicodeMacrosFromICU.h" + +#include "ce_unicode.h" + +#define TO_MASK(x) (1 << (x)) + +namespace WTF { +namespace Unicode { + +enum Direction { + LeftToRight = UnicodeCE::U_LEFT_TO_RIGHT, + RightToLeft = UnicodeCE::U_RIGHT_TO_LEFT, + EuropeanNumber = UnicodeCE::U_EUROPEAN_NUMBER, + EuropeanNumberSeparator = UnicodeCE::U_EUROPEAN_NUMBER_SEPARATOR, + EuropeanNumberTerminator = UnicodeCE::U_EUROPEAN_NUMBER_TERMINATOR, + ArabicNumber = UnicodeCE::U_ARABIC_NUMBER, + CommonNumberSeparator = UnicodeCE::U_COMMON_NUMBER_SEPARATOR, + BlockSeparator = UnicodeCE::U_BLOCK_SEPARATOR, + SegmentSeparator = UnicodeCE::U_SEGMENT_SEPARATOR, + WhiteSpaceNeutral = UnicodeCE::U_WHITE_SPACE_NEUTRAL, + OtherNeutral = UnicodeCE::U_OTHER_NEUTRAL, + LeftToRightEmbedding = UnicodeCE::U_LEFT_TO_RIGHT_EMBEDDING, + LeftToRightOverride = UnicodeCE::U_LEFT_TO_RIGHT_OVERRIDE, + RightToLeftArabic = UnicodeCE::U_RIGHT_TO_LEFT_ARABIC, + RightToLeftEmbedding = UnicodeCE::U_RIGHT_TO_LEFT_EMBEDDING, + RightToLeftOverride = UnicodeCE::U_RIGHT_TO_LEFT_OVERRIDE, + PopDirectionalFormat = UnicodeCE::U_POP_DIRECTIONAL_FORMAT, + NonSpacingMark = UnicodeCE::U_DIR_NON_SPACING_MARK, + BoundaryNeutral = UnicodeCE::U_BOUNDARY_NEUTRAL +}; + +enum DecompositionType { + DecompositionNone = UnicodeCE::U_DT_NONE, + DecompositionCanonical = UnicodeCE::U_DT_CANONICAL, + DecompositionCompat = UnicodeCE::U_DT_COMPAT, + DecompositionCircle = UnicodeCE::U_DT_CIRCLE, + DecompositionFinal = UnicodeCE::U_DT_FINAL, + DecompositionFont = UnicodeCE::U_DT_FONT, + DecompositionFraction = UnicodeCE::U_DT_FRACTION, + DecompositionInitial = UnicodeCE::U_DT_INITIAL, + DecompositionIsolated = UnicodeCE::U_DT_ISOLATED, + DecompositionMedial = UnicodeCE::U_DT_MEDIAL, + DecompositionNarrow = UnicodeCE::U_DT_NARROW, + DecompositionNoBreak = UnicodeCE::U_DT_NOBREAK, + DecompositionSmall = UnicodeCE::U_DT_SMALL, + DecompositionSquare = UnicodeCE::U_DT_SQUARE, + DecompositionSub = UnicodeCE::U_DT_SUB, + DecompositionSuper = UnicodeCE::U_DT_SUPER, + DecompositionVertical = UnicodeCE::U_DT_VERTICAL, + DecompositionWide = UnicodeCE::U_DT_WIDE +}; + +enum CharCategory { + NoCategory = 0, + Other_NotAssigned = TO_MASK(UnicodeCE::U_GENERAL_OTHER_TYPES), + Letter_Uppercase = TO_MASK(UnicodeCE::U_UPPERCASE_LETTER), + Letter_Lowercase = TO_MASK(UnicodeCE::U_LOWERCASE_LETTER), + Letter_Titlecase = TO_MASK(UnicodeCE::U_TITLECASE_LETTER), + Letter_Modifier = TO_MASK(UnicodeCE::U_MODIFIER_LETTER), + Letter_Other = TO_MASK(UnicodeCE::U_OTHER_LETTER), + + Mark_NonSpacing = TO_MASK(UnicodeCE::U_NON_SPACING_MARK), + Mark_Enclosing = TO_MASK(UnicodeCE::U_ENCLOSING_MARK), + Mark_SpacingCombining = TO_MASK(UnicodeCE::U_COMBINING_SPACING_MARK), + + Number_DecimalDigit = TO_MASK(UnicodeCE::U_DECIMAL_DIGIT_NUMBER), + Number_Letter = TO_MASK(UnicodeCE::U_LETTER_NUMBER), + Number_Other = TO_MASK(UnicodeCE::U_OTHER_NUMBER), + + Separator_Space = TO_MASK(UnicodeCE::U_SPACE_SEPARATOR), + Separator_Line = TO_MASK(UnicodeCE::U_LINE_SEPARATOR), + Separator_Paragraph = TO_MASK(UnicodeCE::U_PARAGRAPH_SEPARATOR), + + Other_Control = TO_MASK(UnicodeCE::U_CONTROL_CHAR), + Other_Format = TO_MASK(UnicodeCE::U_FORMAT_CHAR), + Other_PrivateUse = TO_MASK(UnicodeCE::U_PRIVATE_USE_CHAR), + Other_Surrogate = TO_MASK(UnicodeCE::U_SURROGATE), + + Punctuation_Dash = TO_MASK(UnicodeCE::U_DASH_PUNCTUATION), + Punctuation_Open = TO_MASK(UnicodeCE::U_START_PUNCTUATION), + Punctuation_Close = TO_MASK(UnicodeCE::U_END_PUNCTUATION), + Punctuation_Connector = TO_MASK(UnicodeCE::U_CONNECTOR_PUNCTUATION), + Punctuation_Other = TO_MASK(UnicodeCE::U_OTHER_PUNCTUATION), + + Symbol_Math = TO_MASK(UnicodeCE::U_MATH_SYMBOL), + Symbol_Currency = TO_MASK(UnicodeCE::U_CURRENCY_SYMBOL), + Symbol_Modifier = TO_MASK(UnicodeCE::U_MODIFIER_SYMBOL), + Symbol_Other = TO_MASK(UnicodeCE::U_OTHER_SYMBOL), + + Punctuation_InitialQuote = TO_MASK(UnicodeCE::U_INITIAL_PUNCTUATION), + Punctuation_FinalQuote = TO_MASK(UnicodeCE::U_FINAL_PUNCTUATION) +}; + +CharCategory category(unsigned int); + +bool isSpace(UChar); +bool isLetter(UChar); +bool isPrintableChar(UChar); +bool isUpper(UChar); +bool isLower(UChar); +bool isPunct(UChar); +bool isDigit(UChar); +bool isAlphanumeric(UChar); +inline bool isSeparatorSpace(UChar c) { return category(c) == Separator_Space; } +inline bool isHighSurrogate(UChar c) { return (c & 0xfc00) == 0xd800; } +inline bool isLowSurrogate(UChar c) { return (c & 0xfc00) == 0xdc00; } + +UChar toLower(UChar); +UChar toUpper(UChar); +UChar foldCase(UChar); +UChar toTitleCase(UChar); +int toLower(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError); +int toUpper(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError); +int foldCase(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError); + +int digitValue(UChar); + +UChar mirroredChar(UChar32); +unsigned char combiningClass(UChar32); +DecompositionType decompositionType(UChar32); +Direction direction(UChar32); +inline bool isArabicChar(UChar32 c) +{ + return c >= 0x0600 && c <= 0x06FF; +} + +inline bool hasLineBreakingPropertyComplexContext(UChar32) +{ + return false; // FIXME: implement! +} + +inline int umemcasecmp(const UChar* a, const UChar* b, int len) +{ + for (int i = 0; i < len; ++i) { + UChar c1 = foldCase(a[i]); + UChar c2 = foldCase(b[i]); + if (c1 != c2) + return c1 - c2; + } + return 0; +} + +inline UChar32 surrogateToUcs4(UChar high, UChar low) +{ + return (UChar32(high) << 10) + low - 0x35fdc00; +} + +} // namespace Unicode +} // namespace WTF + +#endif // WTF_UnicodeWinCE_h diff --git a/JavaScriptCore/wtf/unicode/wince/UnicodeWince.h b/JavaScriptCore/wtf/unicode/wince/UnicodeWince.h deleted file mode 100644 index db656ec..0000000 --- a/JavaScriptCore/wtf/unicode/wince/UnicodeWince.h +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Copyright (C) 2006 George Staikos <staikos@kde.org> - * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> - * Copyright (C) 2007 Apple Computer, Inc. All rights reserved. - * Copyright (C) 2007-2009 Torch Mobile, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#ifndef UNICODE_WINCE_H -#define UNICODE_WINCE_H - -#include "ce_unicode.h" - -#define TO_MASK(x) (1 << (x)) - -// some defines from ICU needed one or two places - -#define U16_IS_LEAD(c) (((c) & 0xfffffc00) == 0xd800) -#define U16_IS_TRAIL(c) (((c) & 0xfffffc00) == 0xdc00) -#define U16_SURROGATE_OFFSET ((0xd800 << 10UL) + 0xdc00 - 0x10000) -#define U16_GET_SUPPLEMENTARY(lead, trail) \ - (((UChar32)(lead) << 10UL) + (UChar32)(trail) - U16_SURROGATE_OFFSET) - -#define U16_LEAD(supplementary) (UChar)(((supplementary) >> 10) + 0xd7c0) -#define U16_TRAIL(supplementary) (UChar)(((supplementary) & 0x3ff) | 0xdc00) - -#define U_IS_SURROGATE(c) (((c) & 0xfffff800) == 0xd800) -#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) -#define U16_IS_SURROGATE_LEAD(c) (((c) & 0x400) == 0) - -#define U16_NEXT(s, i, length, c) { \ - (c)=(s)[(i)++]; \ - if (U16_IS_LEAD(c)) { \ - uint16_t __c2; \ - if ((i) < (length) && U16_IS_TRAIL(__c2 = (s)[(i)])) { \ - ++(i); \ - (c) = U16_GET_SUPPLEMENTARY((c), __c2); \ - } \ - } \ -} - -#define U16_PREV(s, start, i, c) { \ - (c)=(s)[--(i)]; \ - if (U16_IS_TRAIL(c)) { \ - uint16_t __c2; \ - if ((i) > (start) && U16_IS_LEAD(__c2 = (s)[(i) - 1])) { \ - --(i); \ - (c) = U16_GET_SUPPLEMENTARY(__c2, (c)); \ - } \ - } \ -} - -#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) - -namespace WTF { - - namespace Unicode { - - enum Direction { - LeftToRight = UnicodeCE::U_LEFT_TO_RIGHT, - RightToLeft = UnicodeCE::U_RIGHT_TO_LEFT, - EuropeanNumber = UnicodeCE::U_EUROPEAN_NUMBER, - EuropeanNumberSeparator = UnicodeCE::U_EUROPEAN_NUMBER_SEPARATOR, - EuropeanNumberTerminator = UnicodeCE::U_EUROPEAN_NUMBER_TERMINATOR, - ArabicNumber = UnicodeCE::U_ARABIC_NUMBER, - CommonNumberSeparator = UnicodeCE::U_COMMON_NUMBER_SEPARATOR, - BlockSeparator = UnicodeCE::U_BLOCK_SEPARATOR, - SegmentSeparator = UnicodeCE::U_SEGMENT_SEPARATOR, - WhiteSpaceNeutral = UnicodeCE::U_WHITE_SPACE_NEUTRAL, - OtherNeutral = UnicodeCE::U_OTHER_NEUTRAL, - LeftToRightEmbedding = UnicodeCE::U_LEFT_TO_RIGHT_EMBEDDING, - LeftToRightOverride = UnicodeCE::U_LEFT_TO_RIGHT_OVERRIDE, - RightToLeftArabic = UnicodeCE::U_RIGHT_TO_LEFT_ARABIC, - RightToLeftEmbedding = UnicodeCE::U_RIGHT_TO_LEFT_EMBEDDING, - RightToLeftOverride = UnicodeCE::U_RIGHT_TO_LEFT_OVERRIDE, - PopDirectionalFormat = UnicodeCE::U_POP_DIRECTIONAL_FORMAT, - NonSpacingMark = UnicodeCE::U_DIR_NON_SPACING_MARK, - BoundaryNeutral = UnicodeCE::U_BOUNDARY_NEUTRAL - }; - - enum DecompositionType { - DecompositionNone = UnicodeCE::U_DT_NONE, - DecompositionCanonical = UnicodeCE::U_DT_CANONICAL, - DecompositionCompat = UnicodeCE::U_DT_COMPAT, - DecompositionCircle = UnicodeCE::U_DT_CIRCLE, - DecompositionFinal = UnicodeCE::U_DT_FINAL, - DecompositionFont = UnicodeCE::U_DT_FONT, - DecompositionFraction = UnicodeCE::U_DT_FRACTION, - DecompositionInitial = UnicodeCE::U_DT_INITIAL, - DecompositionIsolated = UnicodeCE::U_DT_ISOLATED, - DecompositionMedial = UnicodeCE::U_DT_MEDIAL, - DecompositionNarrow = UnicodeCE::U_DT_NARROW, - DecompositionNoBreak = UnicodeCE::U_DT_NOBREAK, - DecompositionSmall = UnicodeCE::U_DT_SMALL, - DecompositionSquare = UnicodeCE::U_DT_SQUARE, - DecompositionSub = UnicodeCE::U_DT_SUB, - DecompositionSuper = UnicodeCE::U_DT_SUPER, - DecompositionVertical = UnicodeCE::U_DT_VERTICAL, - DecompositionWide = UnicodeCE::U_DT_WIDE, - }; - - enum CharCategory { - NoCategory = 0, - Other_NotAssigned = TO_MASK(UnicodeCE::U_GENERAL_OTHER_TYPES), - Letter_Uppercase = TO_MASK(UnicodeCE::U_UPPERCASE_LETTER), - Letter_Lowercase = TO_MASK(UnicodeCE::U_LOWERCASE_LETTER), - Letter_Titlecase = TO_MASK(UnicodeCE::U_TITLECASE_LETTER), - Letter_Modifier = TO_MASK(UnicodeCE::U_MODIFIER_LETTER), - Letter_Other = TO_MASK(UnicodeCE::U_OTHER_LETTER), - - Mark_NonSpacing = TO_MASK(UnicodeCE::U_NON_SPACING_MARK), - Mark_Enclosing = TO_MASK(UnicodeCE::U_ENCLOSING_MARK), - Mark_SpacingCombining = TO_MASK(UnicodeCE::U_COMBINING_SPACING_MARK), - - Number_DecimalDigit = TO_MASK(UnicodeCE::U_DECIMAL_DIGIT_NUMBER), - Number_Letter = TO_MASK(UnicodeCE::U_LETTER_NUMBER), - Number_Other = TO_MASK(UnicodeCE::U_OTHER_NUMBER), - - Separator_Space = TO_MASK(UnicodeCE::U_SPACE_SEPARATOR), - Separator_Line = TO_MASK(UnicodeCE::U_LINE_SEPARATOR), - Separator_Paragraph = TO_MASK(UnicodeCE::U_PARAGRAPH_SEPARATOR), - - Other_Control = TO_MASK(UnicodeCE::U_CONTROL_CHAR), - Other_Format = TO_MASK(UnicodeCE::U_FORMAT_CHAR), - Other_PrivateUse = TO_MASK(UnicodeCE::U_PRIVATE_USE_CHAR), - Other_Surrogate = TO_MASK(UnicodeCE::U_SURROGATE), - - Punctuation_Dash = TO_MASK(UnicodeCE::U_DASH_PUNCTUATION), - Punctuation_Open = TO_MASK(UnicodeCE::U_START_PUNCTUATION), - Punctuation_Close = TO_MASK(UnicodeCE::U_END_PUNCTUATION), - Punctuation_Connector = TO_MASK(UnicodeCE::U_CONNECTOR_PUNCTUATION), - Punctuation_Other = TO_MASK(UnicodeCE::U_OTHER_PUNCTUATION), - - Symbol_Math = TO_MASK(UnicodeCE::U_MATH_SYMBOL), - Symbol_Currency = TO_MASK(UnicodeCE::U_CURRENCY_SYMBOL), - Symbol_Modifier = TO_MASK(UnicodeCE::U_MODIFIER_SYMBOL), - Symbol_Other = TO_MASK(UnicodeCE::U_OTHER_SYMBOL), - - Punctuation_InitialQuote = TO_MASK(UnicodeCE::U_INITIAL_PUNCTUATION), - Punctuation_FinalQuote = TO_MASK(UnicodeCE::U_FINAL_PUNCTUATION) - }; - - CharCategory category(unsigned int); - - bool isSpace(wchar_t); - bool isLetter(wchar_t); - bool isPrintableChar(wchar_t); - bool isUpper(wchar_t); - bool isLower(wchar_t); - bool isPunct(wchar_t); - bool isDigit(wchar_t); - inline bool isSeparatorSpace(wchar_t c) { return category(c) == Separator_Space; } - inline bool isHighSurrogate(wchar_t c) { return (c & 0xfc00) == 0xd800; } - inline bool isLowSurrogate(wchar_t c) { return (c & 0xfc00) == 0xdc00; } - - wchar_t toLower(wchar_t); - wchar_t toUpper(wchar_t); - wchar_t foldCase(wchar_t); - wchar_t toTitleCase(wchar_t); - int toLower(wchar_t* result, int resultLength, const wchar_t* source, int sourceLength, bool* isError); - int toUpper(wchar_t* result, int resultLength, const wchar_t* source, int sourceLength, bool* isError); - int foldCase(UChar* result, int resultLength, const wchar_t* source, int sourceLength, bool* isError); - - int digitValue(wchar_t); - - wchar_t mirroredChar(UChar32); - unsigned char combiningClass(UChar32); - DecompositionType decompositionType(UChar32); - Direction direction(UChar32); - inline bool isArabicChar(UChar32) - { - return false; // FIXME: implement! - } - - inline bool hasLineBreakingPropertyComplexContext(UChar32) - { - return false; // FIXME: implement! - } - - inline int umemcasecmp(const wchar_t* a, const wchar_t* b, int len) - { - for (int i = 0; i < len; ++i) { - wchar_t c1 = foldCase(a[i]); - wchar_t c2 = foldCase(b[i]); - if (c1 != c2) - return c1 - c2; - } - return 0; - } - - inline UChar32 surrogateToUcs4(wchar_t high, wchar_t low) - { - return (UChar32(high) << 10) + low - 0x35fdc00; - } - - } // namespace Unicode - -} // namespace WTF - -#endif -// vim: ts=2 sw=2 et |
