diff options
Diffstat (limited to 'JavaScriptCore/wtf/unicode')
-rw-r--r-- | JavaScriptCore/wtf/unicode/Collator.h | 67 | ||||
-rw-r--r-- | JavaScriptCore/wtf/unicode/CollatorDefault.cpp | 75 | ||||
-rw-r--r-- | JavaScriptCore/wtf/unicode/UTF8.cpp | 401 | ||||
-rw-r--r-- | JavaScriptCore/wtf/unicode/UTF8.h | 80 | ||||
-rw-r--r-- | JavaScriptCore/wtf/unicode/Unicode.h | 44 | ||||
-rw-r--r-- | JavaScriptCore/wtf/unicode/UnicodeMacrosFromICU.h | 100 | ||||
-rw-r--r-- | JavaScriptCore/wtf/unicode/brew/UnicodeBrew.cpp | 181 | ||||
-rw-r--r-- | JavaScriptCore/wtf/unicode/brew/UnicodeBrew.h | 194 | ||||
-rw-r--r-- | JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp | 192 | ||||
-rw-r--r-- | JavaScriptCore/wtf/unicode/glib/UnicodeGLib.h | 243 | ||||
-rw-r--r-- | JavaScriptCore/wtf/unicode/icu/CollatorICU.cpp | 149 | ||||
-rw-r--r-- | JavaScriptCore/wtf/unicode/icu/UnicodeIcu.h | 235 | ||||
-rw-r--r-- | JavaScriptCore/wtf/unicode/qt4/UnicodeQt4.h | 375 | ||||
-rw-r--r-- | JavaScriptCore/wtf/unicode/wince/UnicodeWinCE.cpp | 181 | ||||
-rw-r--r-- | JavaScriptCore/wtf/unicode/wince/UnicodeWinCE.h | 177 |
15 files changed, 0 insertions, 2694 deletions
diff --git a/JavaScriptCore/wtf/unicode/Collator.h b/JavaScriptCore/wtf/unicode/Collator.h deleted file mode 100644 index fe6a809..0000000 --- a/JavaScriptCore/wtf/unicode/Collator.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (C) 2008 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of - * its contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef WTF_Collator_h -#define WTF_Collator_h - -#include <wtf/Noncopyable.h> -#include <wtf/PassOwnPtr.h> -#include <wtf/unicode/Unicode.h> - -#if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION -struct UCollator; -#endif - -namespace WTF { - - class Collator : public Noncopyable { - public: - enum Result { Equal = 0, Greater = 1, Less = -1 }; - - Collator(const char* locale); // Parsing is lenient; e.g. language identifiers (such as "en-US") are accepted, too. - ~Collator(); - void setOrderLowerFirst(bool); - - static PassOwnPtr<Collator> userDefault(); - - Result collate(const ::UChar*, size_t, const ::UChar*, size_t) const; - - private: -#if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION - void createCollator() const; - void releaseCollator(); - mutable UCollator* m_collator; -#endif - char* m_locale; - bool m_lowerFirst; - }; -} - -using WTF::Collator; - -#endif diff --git a/JavaScriptCore/wtf/unicode/CollatorDefault.cpp b/JavaScriptCore/wtf/unicode/CollatorDefault.cpp deleted file mode 100644 index 4e05432..0000000 --- a/JavaScriptCore/wtf/unicode/CollatorDefault.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (C) 2008 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of - * its contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "Collator.h" - -#if !USE(ICU_UNICODE) || UCONFIG_NO_COLLATION - -namespace WTF { - -Collator::Collator(const char*) -{ -} - -Collator::~Collator() -{ -} - -void Collator::setOrderLowerFirst(bool) -{ -} - -PassOwnPtr<Collator> Collator::userDefault() -{ - return new Collator(0); -} - -// A default implementation for platforms that lack Unicode-aware collation. -Collator::Result Collator::collate(const UChar* lhs, size_t lhsLength, const UChar* rhs, size_t rhsLength) const -{ - int lmin = lhsLength < rhsLength ? lhsLength : rhsLength; - int l = 0; - while (l < lmin && *lhs == *rhs) { - lhs++; - rhs++; - l++; - } - - if (l < lmin) - return (*lhs > *rhs) ? Greater : Less; - - if (lhsLength == rhsLength) - return Equal; - - return (lhsLength > rhsLength) ? Greater : Less; -} - -} - -#endif diff --git a/JavaScriptCore/wtf/unicode/UTF8.cpp b/JavaScriptCore/wtf/unicode/UTF8.cpp deleted file mode 100644 index dc24ed5..0000000 --- a/JavaScriptCore/wtf/unicode/UTF8.cpp +++ /dev/null @@ -1,401 +0,0 @@ -/* - * Copyright (C) 2007 Apple Inc. All rights reserved. - * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "UTF8.h" -#include <wtf/StringHasher.h> - -#include "ASCIICType.h" - -namespace WTF { -namespace Unicode { - -// FIXME: Use definition from CharacterNames.h. -static const UChar replacementCharacter = 0xFFFD; - -inline int inlineUTF8SequenceLengthNonASCII(char b0) -{ - if ((b0 & 0xC0) != 0xC0) - return 0; - if ((b0 & 0xE0) == 0xC0) - return 2; - if ((b0 & 0xF0) == 0xE0) - return 3; - if ((b0 & 0xF8) == 0xF0) - return 4; - return 0; -} - -inline int inlineUTF8SequenceLength(char b0) -{ - return isASCII(b0) ? 1 : inlineUTF8SequenceLengthNonASCII(b0); -} - -int UTF8SequenceLength(char b0) -{ - return isASCII(b0) ? 1 : inlineUTF8SequenceLengthNonASCII(b0); -} - -int decodeUTF8Sequence(const char* sequence) -{ - // Handle 0-byte sequences (never valid). - const unsigned char b0 = sequence[0]; - const int length = inlineUTF8SequenceLength(b0); - if (length == 0) - return -1; - - // Handle 1-byte sequences (plain ASCII). - const unsigned char b1 = sequence[1]; - if (length == 1) { - if (b1) - return -1; - return b0; - } - - // Handle 2-byte sequences. - if ((b1 & 0xC0) != 0x80) - return -1; - const unsigned char b2 = sequence[2]; - if (length == 2) { - if (b2) - return -1; - const int c = ((b0 & 0x1F) << 6) | (b1 & 0x3F); - if (c < 0x80) - return -1; - return c; - } - - // Handle 3-byte sequences. - if ((b2 & 0xC0) != 0x80) - return -1; - const unsigned char b3 = sequence[3]; - if (length == 3) { - if (b3) - return -1; - const int c = ((b0 & 0xF) << 12) | ((b1 & 0x3F) << 6) | (b2 & 0x3F); - if (c < 0x800) - return -1; - // UTF-16 surrogates should never appear in UTF-8 data. - if (c >= 0xD800 && c <= 0xDFFF) - return -1; - return c; - } - - // Handle 4-byte sequences. - if ((b3 & 0xC0) != 0x80) - return -1; - const unsigned char b4 = sequence[4]; - if (length == 4) { - if (b4) - return -1; - const int c = ((b0 & 0x7) << 18) | ((b1 & 0x3F) << 12) | ((b2 & 0x3F) << 6) | (b3 & 0x3F); - if (c < 0x10000 || c > 0x10FFFF) - return -1; - return c; - } - - return -1; -} - -// Once the bits are split out into bytes of UTF-8, this is a mask OR-ed -// into the first byte, depending on how many bytes follow. There are -// as many entries in this table as there are UTF-8 sequence types. -// (I.e., one byte sequence, two byte... etc.). Remember that sequencs -// for *legal* UTF-8 will be 4 or fewer bytes total. -static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; - -ConversionResult convertUTF16ToUTF8( - const UChar** sourceStart, const UChar* sourceEnd, - char** targetStart, char* targetEnd, bool strict) -{ - ConversionResult result = conversionOK; - const UChar* source = *sourceStart; - char* target = *targetStart; - while (source < sourceEnd) { - UChar32 ch; - unsigned short bytesToWrite = 0; - const UChar32 byteMask = 0xBF; - const UChar32 byteMark = 0x80; - const UChar* oldSource = source; // In case we have to back up because of target overflow. - ch = static_cast<unsigned short>(*source++); - // If we have a surrogate pair, convert to UChar32 first. - if (ch >= 0xD800 && ch <= 0xDBFF) { - // If the 16 bits following the high surrogate are in the source buffer... - if (source < sourceEnd) { - UChar32 ch2 = static_cast<unsigned short>(*source); - // If it's a low surrogate, convert to UChar32. - if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) { - ch = ((ch - 0xD800) << 10) + (ch2 - 0xDC00) + 0x0010000; - ++source; - } else if (strict) { // it's an unpaired high surrogate - --source; // return to the illegal value itself - result = sourceIllegal; - break; - } - } else { // We don't have the 16 bits following the high surrogate. - --source; // return to the high surrogate - result = sourceExhausted; - break; - } - } else if (strict) { - // UTF-16 surrogate values are illegal in UTF-32 - if (ch >= 0xDC00 && ch <= 0xDFFF) { - --source; // return to the illegal value itself - result = sourceIllegal; - break; - } - } - // Figure out how many bytes the result will require - if (ch < (UChar32)0x80) { - bytesToWrite = 1; - } else if (ch < (UChar32)0x800) { - bytesToWrite = 2; - } else if (ch < (UChar32)0x10000) { - bytesToWrite = 3; - } else if (ch < (UChar32)0x110000) { - bytesToWrite = 4; - } else { - bytesToWrite = 3; - ch = replacementCharacter; - } - - target += bytesToWrite; - if (target > targetEnd) { - source = oldSource; // Back up source pointer! - target -= bytesToWrite; - result = targetExhausted; - break; - } - switch (bytesToWrite) { // note: everything falls through. - case 4: *--target = (char)((ch | byteMark) & byteMask); ch >>= 6; - case 3: *--target = (char)((ch | byteMark) & byteMask); ch >>= 6; - case 2: *--target = (char)((ch | byteMark) & byteMask); ch >>= 6; - case 1: *--target = (char)(ch | firstByteMark[bytesToWrite]); - } - target += bytesToWrite; - } - *sourceStart = source; - *targetStart = target; - return result; -} - -// This must be called with the length pre-determined by the first byte. -// If presented with a length > 4, this returns false. The Unicode -// definition of UTF-8 goes up to 4-byte sequences. -static bool isLegalUTF8(const unsigned char* source, int length) -{ - unsigned char a; - const unsigned char* srcptr = source + length; - switch (length) { - default: return false; - // Everything else falls through when "true"... - case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; - case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; - case 2: if ((a = (*--srcptr)) > 0xBF) return false; - - switch (*source) { - // no fall-through in this inner switch - case 0xE0: if (a < 0xA0) return false; break; - case 0xED: if (a > 0x9F) return false; break; - case 0xF0: if (a < 0x90) return false; break; - case 0xF4: if (a > 0x8F) return false; break; - default: if (a < 0x80) return false; - } - - case 1: if (*source >= 0x80 && *source < 0xC2) return false; - } - if (*source > 0xF4) - return false; - return true; -} - -// Magic values subtracted from a buffer value during UTF8 conversion. -// This table contains as many values as there might be trailing bytes -// in a UTF-8 sequence. -static const UChar32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, - 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; - -static inline UChar32 readUTF8Sequence(const char*& sequence, unsigned length) -{ - UChar32 character = 0; - - // The cases all fall through. - switch (length) { - case 6: character += static_cast<unsigned char>(*sequence++); character <<= 6; - case 5: character += static_cast<unsigned char>(*sequence++); character <<= 6; - case 4: character += static_cast<unsigned char>(*sequence++); character <<= 6; - case 3: character += static_cast<unsigned char>(*sequence++); character <<= 6; - case 2: character += static_cast<unsigned char>(*sequence++); character <<= 6; - case 1: character += static_cast<unsigned char>(*sequence++); - } - - return character - offsetsFromUTF8[length - 1]; -} - -ConversionResult convertUTF8ToUTF16( - const char** sourceStart, const char* sourceEnd, - UChar** targetStart, UChar* targetEnd, bool strict) -{ - ConversionResult result = conversionOK; - const char* source = *sourceStart; - UChar* target = *targetStart; - while (source < sourceEnd) { - int utf8SequenceLength = inlineUTF8SequenceLength(*source); - if (sourceEnd - source < utf8SequenceLength) { - result = sourceExhausted; - break; - } - // Do this check whether lenient or strict - if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(source), utf8SequenceLength)) { - result = sourceIllegal; - break; - } - - UChar32 character = readUTF8Sequence(source, utf8SequenceLength); - - if (target >= targetEnd) { - source -= utf8SequenceLength; // Back up source pointer! - result = targetExhausted; - break; - } - - if (U_IS_BMP(character)) { - // UTF-16 surrogate values are illegal in UTF-32 - if (U_IS_SURROGATE(character)) { - if (strict) { - source -= utf8SequenceLength; // return to the illegal value itself - result = sourceIllegal; - break; - } else - *target++ = replacementCharacter; - } else - *target++ = character; // normal case - } else if (U_IS_SUPPLEMENTARY(character)) { - // target is a character in range 0xFFFF - 0x10FFFF - if (target + 1 >= targetEnd) { - source -= utf8SequenceLength; // Back up source pointer! - result = targetExhausted; - break; - } - *target++ = U16_LEAD(character); - *target++ = U16_TRAIL(character); - } else { - if (strict) { - source -= utf8SequenceLength; // return to the start - result = sourceIllegal; - break; // Bail out; shouldn't continue - } else - *target++ = replacementCharacter; - } - } - *sourceStart = source; - *targetStart = target; - return result; -} - -unsigned calculateStringHashFromUTF8(const char* data, const char* dataEnd, unsigned& utf16Length) -{ - if (!data) - return 0; - - WTF::StringHasher stringHasher; - utf16Length = 0; - - while (data < dataEnd) { - if (isASCII(*data)) { - stringHasher.addCharacter(*data++); - utf16Length++; - continue; - } - - int utf8SequenceLength = inlineUTF8SequenceLengthNonASCII(*data); - - if (dataEnd - data < utf8SequenceLength) - return false; - - if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(data), utf8SequenceLength)) - return 0; - - UChar32 character = readUTF8Sequence(data, utf8SequenceLength); - ASSERT(!isASCII(character)); - - if (U_IS_BMP(character)) { - // UTF-16 surrogate values are illegal in UTF-32 - if (U_IS_SURROGATE(character)) - return 0; - stringHasher.addCharacter(static_cast<UChar>(character)); // normal case - utf16Length++; - } else if (U_IS_SUPPLEMENTARY(character)) { - stringHasher.addCharacters(static_cast<UChar>(U16_LEAD(character)), - static_cast<UChar>(U16_TRAIL(character))); - utf16Length += 2; - } else - return 0; - } - - return stringHasher.hash(); -} - -bool equalUTF16WithUTF8(const UChar* a, const UChar* aEnd, const char* b, const char* bEnd) -{ - while (b < bEnd) { - if (isASCII(*b)) { - if (*a++ != *b++) - return false; - continue; - } - - int utf8SequenceLength = inlineUTF8SequenceLengthNonASCII(*b); - - if (bEnd - b < utf8SequenceLength) - return false; - - if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(b), utf8SequenceLength)) - return 0; - - UChar32 character = readUTF8Sequence(b, utf8SequenceLength); - ASSERT(!isASCII(character)); - - if (U_IS_BMP(character)) { - // UTF-16 surrogate values are illegal in UTF-32 - if (U_IS_SURROGATE(character)) - return false; - if (*a++ != character) - return false; - } else if (U_IS_SUPPLEMENTARY(character)) { - if (*a++ != U16_LEAD(character)) - return false; - if (*a++ != U16_TRAIL(character)) - return false; - } else - return false; - } - - return a == aEnd; -} - -} // namespace Unicode -} // namespace WTF diff --git a/JavaScriptCore/wtf/unicode/UTF8.h b/JavaScriptCore/wtf/unicode/UTF8.h deleted file mode 100644 index 1f4baca..0000000 --- a/JavaScriptCore/wtf/unicode/UTF8.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (C) 2007 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef WTF_UTF8_h -#define WTF_UTF8_h - -#include "Unicode.h" - -namespace WTF { -namespace Unicode { - - // Given a first byte, gives the length of the UTF-8 sequence it begins. - // Returns 0 for bytes that are not legal starts of UTF-8 sequences. - // Only allows sequences of up to 4 bytes, since that works for all Unicode characters (U-00000000 to U-0010FFFF). - int UTF8SequenceLength(char); - - // Takes a null-terminated C-style string with a UTF-8 sequence in it and converts it to a character. - // Only allows Unicode characters (U-00000000 to U-0010FFFF). - // Returns -1 if the sequence is not valid (including presence of extra bytes). - int decodeUTF8Sequence(const char*); - - typedef enum { - conversionOK, // conversion successful - sourceExhausted, // partial character in source, but hit end - targetExhausted, // insuff. room in target for conversion - sourceIllegal // source sequence is illegal/malformed - } ConversionResult; - - // These conversion functions take a "strict" argument. When this - // flag is set to strict, both irregular sequences and isolated surrogates - // will cause an error. When the flag is set to lenient, both irregular - // sequences and isolated surrogates are converted. - // - // Whether the flag is strict or lenient, all illegal sequences will cause - // an error return. This includes sequences such as: <F4 90 80 80>, <C0 80>, - // or <A0> in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code - // must check for illegal sequences. - // - // When the flag is set to lenient, characters over 0x10FFFF are converted - // to the replacement character; otherwise (when the flag is set to strict) - // they constitute an error. - - ConversionResult convertUTF8ToUTF16( - const char** sourceStart, const char* sourceEnd, - UChar** targetStart, UChar* targetEnd, bool strict = true); - - ConversionResult convertUTF16ToUTF8( - const UChar** sourceStart, const UChar* sourceEnd, - char** targetStart, char* targetEnd, bool strict = true); - - unsigned calculateStringHashFromUTF8(const char* data, const char* dataEnd, unsigned& utf16Length); - - bool equalUTF16WithUTF8(const UChar* a, const UChar* aEnd, const char* b, const char* bEnd); - -} // namespace Unicode -} // namespace WTF - -#endif // WTF_UTF8_h diff --git a/JavaScriptCore/wtf/unicode/Unicode.h b/JavaScriptCore/wtf/unicode/Unicode.h deleted file mode 100644 index 50524b1..0000000 --- a/JavaScriptCore/wtf/unicode/Unicode.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (C) 2006 George Staikos <staikos@kde.org> - * Copyright (C) 2006, 2008, 2009 Apple Inc. All rights reserved. - * Copyright (C) 2007-2009 Torch Mobile, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#ifndef WTF_UNICODE_H -#define WTF_UNICODE_H - -#include <wtf/Assertions.h> - -#if USE(QT4_UNICODE) -#include "qt4/UnicodeQt4.h" -#elif USE(ICU_UNICODE) -#include <wtf/unicode/icu/UnicodeIcu.h> -#elif USE(GLIB_UNICODE) -#include <wtf/unicode/glib/UnicodeGLib.h> -#elif USE(WINCE_UNICODE) -#include <wtf/unicode/wince/UnicodeWinCE.h> -#elif USE(BREWMP_UNICODE) -#include <wtf/unicode/brew/UnicodeBrew.h> -#else -#error "Unknown Unicode implementation" -#endif - -COMPILE_ASSERT(sizeof(UChar) == 2, UCharIsTwoBytes); - -#endif // WTF_UNICODE_H diff --git a/JavaScriptCore/wtf/unicode/UnicodeMacrosFromICU.h b/JavaScriptCore/wtf/unicode/UnicodeMacrosFromICU.h deleted file mode 100644 index 8959912..0000000 --- a/JavaScriptCore/wtf/unicode/UnicodeMacrosFromICU.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (C) 1999-2004, International Business Machines Corporation and others. All Rights Reserved. - * Copyright (C) 2006 George Staikos <staikos@kde.org> - * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> - * Copyright (C) 2007 Apple Computer, Inc. All rights reserved. - * Copyright (C) 2008 Jürg Billeter <j@bitron.ch> - * Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#ifndef UnicodeMacrosFromICU_h -#define UnicodeMacrosFromICU_h - -// some defines from ICU - -#define U_IS_BMP(c) ((UChar32)(c)<=0xffff) -#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) -#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) -#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) -#define U16_GET_SUPPLEMENTARY(lead, trail) \ - (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET) - -#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) -#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) -#define U16_LENGTH(c) ((uint32_t)(c) <= 0xffff ? 1 : 2) - -#define U_IS_SUPPLEMENTARY(c) ((UChar32)((c)-0x10000)<=0xfffff) -#define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800) -#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) -#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) -#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) - -#define U16_GET(s, start, i, length, c) { \ - (c)=(s)[i]; \ - if(U16_IS_SURROGATE(c)) { \ - uint16_t __c2; \ - if(U16_IS_SURROGATE_LEAD(c)) { \ - if((i)+1<(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \ - (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ - } \ - } else { \ - if((i)-1>=(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ - (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ - } \ - } \ - } \ -} - -#define U16_PREV(s, start, i, c) { \ - (c)=(s)[--(i)]; \ - if(U16_IS_TRAIL(c)) { \ - uint16_t __c2; \ - if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ - --(i); \ - (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ - } \ - } \ -} - -#define U16_BACK_1(s, start, i) { \ - if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ - --(i); \ - } \ -} - -#define U16_NEXT(s, i, length, c) { \ - (c)=(s)[(i)++]; \ - if(U16_IS_LEAD(c)) { \ - uint16_t __c2; \ - if((i)<(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ - ++(i); \ - (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ - } \ - } \ -} - -#define U16_FWD_1(s, i, length) { \ - if(U16_IS_LEAD((s)[(i)++]) && (i)<(length) && U16_IS_TRAIL((s)[i])) { \ - ++(i); \ - } \ -} - -#define U_MASK(x) ((uint32_t)1<<(x)) - -#endif diff --git a/JavaScriptCore/wtf/unicode/brew/UnicodeBrew.cpp b/JavaScriptCore/wtf/unicode/brew/UnicodeBrew.cpp deleted file mode 100644 index 8367f17..0000000 --- a/JavaScriptCore/wtf/unicode/brew/UnicodeBrew.cpp +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Copyright (C) 2006 George Staikos <staikos@kde.org> - * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> - * Copyright (C) 2007-2009 Torch Mobile, Inc. - * Copyright (C) 2010 Company 100, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - */ - -#include "config.h" -#include "UnicodeBrew.h" - -#include <wchar.h> -#include <wctype.h> - -namespace WTF { -namespace Unicode { - -UChar toLower(UChar c) -{ - return towlower(c); -} - -UChar toUpper(UChar c) -{ - return towupper(c); -} - -UChar foldCase(UChar c) -{ - return towlower(c); -} - -bool isPrintableChar(UChar c) -{ - return !!iswprint(c); -} - -bool isUpper(UChar c) -{ - return !!iswupper(c); -} - -bool isLower(UChar c) -{ - return !!iswlower(c); -} - -bool isDigit(UChar c) -{ - return !!iswdigit(c); -} - -bool isPunct(UChar c) -{ - return !!iswpunct(c); -} - -bool isAlphanumeric(UChar c) -{ - return !!iswalnum(c); -} - -int toLower(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError) -{ - const UChar* sourceIterator = source; - const UChar* sourceEnd = source + sourceLength; - UChar* resultIterator = result; - UChar* resultEnd = result + resultLength; - - if (sourceLength <= resultLength) { - while (sourceIterator < sourceEnd) - *resultIterator++ = towlower(*sourceIterator++); - } else { - while (resultIterator < resultEnd) - *resultIterator++ = towlower(*sourceIterator++); - } - - int remainingCharacters = sourceIterator < sourceEnd ? sourceEnd - sourceIterator : 0; - *isError = !!remainingCharacters; - if (resultIterator < resultEnd) - *resultIterator = 0; - - return (resultIterator - result) + remainingCharacters; -} - -int toUpper(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError) -{ - const UChar* sourceIterator = source; - const UChar* sourceEnd = source + sourceLength; - UChar* resultIterator = result; - UChar* resultEnd = result + resultLength; - - if (sourceLength <= resultLength) { - while (sourceIterator < sourceEnd) - *resultIterator++ = towupper(*sourceIterator++); - } else { - while (resultIterator < resultEnd) - *resultIterator++ = towupper(*sourceIterator++); - } - - int remainingCharacters = sourceIterator < sourceEnd ? sourceEnd - sourceIterator : 0; - *isError = !!remainingCharacters; - if (resultIterator < resultEnd) - *resultIterator = 0; - - return (resultIterator - result) + remainingCharacters; -} - -int foldCase(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError) -{ - *isError = false; - if (resultLength < sourceLength) { - *isError = true; - return sourceLength; - } - for (int i = 0; i < sourceLength; ++i) - result[i] = foldCase(source[i]); - return sourceLength; -} - -UChar toTitleCase(UChar c) -{ - return towupper(c); -} - -Direction direction(UChar32 c) -{ - return static_cast<Direction>(ICU::direction(c)); -} - -CharCategory category(unsigned int c) -{ - return static_cast<CharCategory>(TO_MASK((int8_t) ICU::category(c))); -} - -DecompositionType decompositionType(UChar32 c) -{ - return static_cast<DecompositionType>(ICU::decompositionType(c)); -} - -unsigned char combiningClass(UChar32 c) -{ - return ICU::combiningClass(c); -} - -UChar mirroredChar(UChar32 c) -{ - return ICU::mirroredChar(c); -} - -int digitValue(UChar c) -{ - return ICU::digitValue(c); -} - -bool isSpace(UChar c) -{ - return !!iswspace(c); -} - -bool isLetter(UChar c) -{ - return !!iswalpha(c); -} - -} // namespace Unicode -} // namespace WTF diff --git a/JavaScriptCore/wtf/unicode/brew/UnicodeBrew.h b/JavaScriptCore/wtf/unicode/brew/UnicodeBrew.h deleted file mode 100644 index 1d7576f..0000000 --- a/JavaScriptCore/wtf/unicode/brew/UnicodeBrew.h +++ /dev/null @@ -1,194 +0,0 @@ -/* - * Copyright (C) 2006 George Staikos <staikos@kde.org> - * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> - * Copyright (C) 2007 Apple Computer, Inc. All rights reserved. - * Copyright (C) 2007-2009 Torch Mobile, Inc. - * Copyright (C) 2010 Company 100, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#ifndef UnicodeBrew_h -#define UnicodeBrew_h - -#include "UnicodeFromICU.h" -#include "UnicodeMacrosFromICU.h" - -namespace WTF { -namespace Unicode { - -enum Direction { - LeftToRight = ICU::U_LEFT_TO_RIGHT, - RightToLeft = ICU::U_RIGHT_TO_LEFT, - EuropeanNumber = ICU::U_EUROPEAN_NUMBER, - EuropeanNumberSeparator = ICU::U_EUROPEAN_NUMBER_SEPARATOR, - EuropeanNumberTerminator = ICU::U_EUROPEAN_NUMBER_TERMINATOR, - ArabicNumber = ICU::U_ARABIC_NUMBER, - CommonNumberSeparator = ICU::U_COMMON_NUMBER_SEPARATOR, - BlockSeparator = ICU::U_BLOCK_SEPARATOR, - SegmentSeparator = ICU::U_SEGMENT_SEPARATOR, - WhiteSpaceNeutral = ICU::U_WHITE_SPACE_NEUTRAL, - OtherNeutral = ICU::U_OTHER_NEUTRAL, - LeftToRightEmbedding = ICU::U_LEFT_TO_RIGHT_EMBEDDING, - LeftToRightOverride = ICU::U_LEFT_TO_RIGHT_OVERRIDE, - RightToLeftArabic = ICU::U_RIGHT_TO_LEFT_ARABIC, - RightToLeftEmbedding = ICU::U_RIGHT_TO_LEFT_EMBEDDING, - RightToLeftOverride = ICU::U_RIGHT_TO_LEFT_OVERRIDE, - PopDirectionalFormat = ICU::U_POP_DIRECTIONAL_FORMAT, - NonSpacingMark = ICU::U_DIR_NON_SPACING_MARK, - BoundaryNeutral = ICU::U_BOUNDARY_NEUTRAL -}; - -enum DecompositionType { - DecompositionNone = ICU::U_DT_NONE, - DecompositionCanonical = ICU::U_DT_CANONICAL, - DecompositionCompat = ICU::U_DT_COMPAT, - DecompositionCircle = ICU::U_DT_CIRCLE, - DecompositionFinal = ICU::U_DT_FINAL, - DecompositionFont = ICU::U_DT_FONT, - DecompositionFraction = ICU::U_DT_FRACTION, - DecompositionInitial = ICU::U_DT_INITIAL, - DecompositionIsolated = ICU::U_DT_ISOLATED, - DecompositionMedial = ICU::U_DT_MEDIAL, - DecompositionNarrow = ICU::U_DT_NARROW, - DecompositionNoBreak = ICU::U_DT_NOBREAK, - DecompositionSmall = ICU::U_DT_SMALL, - DecompositionSquare = ICU::U_DT_SQUARE, - DecompositionSub = ICU::U_DT_SUB, - DecompositionSuper = ICU::U_DT_SUPER, - DecompositionVertical = ICU::U_DT_VERTICAL, - DecompositionWide = ICU::U_DT_WIDE, -}; - -enum CharCategory { - NoCategory = 0, - Other_NotAssigned = TO_MASK(ICU::U_GENERAL_OTHER_TYPES), - Letter_Uppercase = TO_MASK(ICU::U_UPPERCASE_LETTER), - Letter_Lowercase = TO_MASK(ICU::U_LOWERCASE_LETTER), - Letter_Titlecase = TO_MASK(ICU::U_TITLECASE_LETTER), - Letter_Modifier = TO_MASK(ICU::U_MODIFIER_LETTER), - Letter_Other = TO_MASK(ICU::U_OTHER_LETTER), - - Mark_NonSpacing = TO_MASK(ICU::U_NON_SPACING_MARK), - Mark_Enclosing = TO_MASK(ICU::U_ENCLOSING_MARK), - Mark_SpacingCombining = TO_MASK(ICU::U_COMBINING_SPACING_MARK), - - Number_DecimalDigit = TO_MASK(ICU::U_DECIMAL_DIGIT_NUMBER), - Number_Letter = TO_MASK(ICU::U_LETTER_NUMBER), - Number_Other = TO_MASK(ICU::U_OTHER_NUMBER), - - Separator_Space = TO_MASK(ICU::U_SPACE_SEPARATOR), - Separator_Line = TO_MASK(ICU::U_LINE_SEPARATOR), - Separator_Paragraph = TO_MASK(ICU::U_PARAGRAPH_SEPARATOR), - - Other_Control = TO_MASK(ICU::U_CONTROL_CHAR), - Other_Format = TO_MASK(ICU::U_FORMAT_CHAR), - Other_PrivateUse = TO_MASK(ICU::U_PRIVATE_USE_CHAR), - Other_Surrogate = TO_MASK(ICU::U_SURROGATE), - - Punctuation_Dash = TO_MASK(ICU::U_DASH_PUNCTUATION), - Punctuation_Open = TO_MASK(ICU::U_START_PUNCTUATION), - Punctuation_Close = TO_MASK(ICU::U_END_PUNCTUATION), - Punctuation_Connector = TO_MASK(ICU::U_CONNECTOR_PUNCTUATION), - Punctuation_Other = TO_MASK(ICU::U_OTHER_PUNCTUATION), - - Symbol_Math = TO_MASK(ICU::U_MATH_SYMBOL), - Symbol_Currency = TO_MASK(ICU::U_CURRENCY_SYMBOL), - Symbol_Modifier = TO_MASK(ICU::U_MODIFIER_SYMBOL), - Symbol_Other = TO_MASK(ICU::U_OTHER_SYMBOL), - - Punctuation_InitialQuote = TO_MASK(ICU::U_INITIAL_PUNCTUATION), - Punctuation_FinalQuote = TO_MASK(ICU::U_FINAL_PUNCTUATION) -}; - -UChar foldCase(UChar); - -int foldCase(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError); - -int toLower(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError); - -UChar toUpper(UChar); -UChar toLower(UChar); - -bool isUpper(UChar); - -int toUpper(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError); - -UChar toTitleCase(UChar); - -inline bool isArabicChar(UChar32 c) -{ - return c >= 0x0600 && c <= 0x06FF; -} - -bool isAlphanumeric(UChar); - -CharCategory category(unsigned int); - -inline bool isSeparatorSpace(UChar c) -{ - return category(c) == Separator_Space; -} - -bool isPrintableChar(UChar); - -bool isDigit(UChar); - -bool isPunct(UChar); - -inline bool hasLineBreakingPropertyComplexContext(UChar32) -{ - // FIXME: implement! - return false; -} - -inline bool hasLineBreakingPropertyComplexContextOrIdeographic(UChar32 c) -{ - // FIXME - return false; -} - -UChar mirroredChar(UChar32); - -Direction direction(UChar32); - -bool isLower(UChar); - -int digitValue(UChar); - -unsigned char combiningClass(UChar32); - -DecompositionType decompositionType(UChar32); - -inline int umemcasecmp(const UChar* a, const UChar* b, int len) -{ - for (int i = 0; i < len; ++i) { - UChar c1 = foldCase(a[i]); - UChar c2 = foldCase(b[i]); - if (c1 != c2) - return c1 - c2; - } - return 0; -} - -bool isSpace(UChar); -bool isLetter(UChar); - -} // namespace Unicode -} // namespace WTF - -#endif diff --git a/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp b/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp deleted file mode 100644 index a01c3ee..0000000 --- a/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Copyright (C) 2008 Jürg Billeter <j@bitron.ch> - * Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com> - * Copyright (C) 2010 Igalia S.L. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#include "config.h" -#include "UnicodeGLib.h" - -#include <wtf/Vector.h> -#include <wtf/unicode/UTF8.h> - -#define UTF8_IS_SURROGATE(character) (character >= 0x10000 && character <= 0x10FFFF) - -namespace WTF { -namespace Unicode { - -UChar32 foldCase(UChar32 ch) -{ - GOwnPtr<GError> gerror; - - GOwnPtr<char> utf8char; - utf8char.set(g_ucs4_to_utf8(reinterpret_cast<gunichar*>(&ch), 1, 0, 0, &gerror.outPtr())); - if (gerror) - return ch; - - GOwnPtr<char> utf8caseFolded; - utf8caseFolded.set(g_utf8_casefold(utf8char.get(), -1)); - - GOwnPtr<gunichar> ucs4Result; - ucs4Result.set(g_utf8_to_ucs4_fast(utf8caseFolded.get(), -1, 0)); - - return *ucs4Result; -} - -static int getUTF16LengthFromUTF8(const gchar* utf8String, int length) -{ - int utf16Length = 0; - const gchar* inputString = utf8String; - - while ((utf8String + length - inputString > 0) && *inputString) { - gunichar character = g_utf8_get_char(inputString); - - utf16Length += UTF8_IS_SURROGATE(character) ? 2 : 1; - inputString = g_utf8_next_char(inputString); - } - - return utf16Length; -} - -typedef gchar* (*UTF8CaseFunction)(const gchar*, gssize length); - -static int convertCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error, UTF8CaseFunction caseFunction) -{ - *error = false; - - // Allocate a buffer big enough to hold all the characters. - Vector<char> buffer(srcLength * 3); - char* utf8Target = buffer.data(); - const UChar* utf16Source = src; - ConversionResult conversionResult = convertUTF16ToUTF8(&utf16Source, utf16Source + srcLength, &utf8Target, utf8Target + buffer.size(), true); - if (conversionResult != conversionOK) { - *error = true; - return -1; - } - buffer.shrink(utf8Target - buffer.data()); - - GOwnPtr<char> utf8Result(caseFunction(buffer.data(), buffer.size())); - long utf8ResultLength = strlen(utf8Result.get()); - - // Calculate the destination buffer size. - int realLength = getUTF16LengthFromUTF8(utf8Result.get(), utf8ResultLength); - if (realLength > resultLength) { - *error = true; - return realLength; - } - - // Convert the result to UTF-16. - UChar* utf16Target = result; - const char* utf8Source = utf8Result.get(); - conversionResult = convertUTF8ToUTF16(&utf8Source, utf8Source + utf8ResultLength, &utf16Target, utf16Target + resultLength, true); - long utf16ResultLength = utf16Target - result; - if (conversionResult != conversionOK) - *error = true; - - return utf16ResultLength <= 0 ? -1 : utf16ResultLength; -} -int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) -{ - return convertCase(result, resultLength, src, srcLength, error, g_utf8_casefold); -} - -int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) -{ - return convertCase(result, resultLength, src, srcLength, error, g_utf8_strdown); -} - -int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) -{ - return convertCase(result, resultLength, src, srcLength, error, g_utf8_strup); -} - -Direction direction(UChar32 c) -{ - PangoBidiType type = pango_bidi_type_for_unichar(c); - switch (type) { - case PANGO_BIDI_TYPE_L: - return LeftToRight; - case PANGO_BIDI_TYPE_R: - return RightToLeft; - case PANGO_BIDI_TYPE_AL: - return RightToLeftArabic; - case PANGO_BIDI_TYPE_LRE: - return LeftToRightEmbedding; - case PANGO_BIDI_TYPE_RLE: - return RightToLeftEmbedding; - case PANGO_BIDI_TYPE_LRO: - return LeftToRightOverride; - case PANGO_BIDI_TYPE_RLO: - return RightToLeftOverride; - case PANGO_BIDI_TYPE_PDF: - return PopDirectionalFormat; - case PANGO_BIDI_TYPE_EN: - return EuropeanNumber; - case PANGO_BIDI_TYPE_AN: - return ArabicNumber; - case PANGO_BIDI_TYPE_ES: - return EuropeanNumberSeparator; - case PANGO_BIDI_TYPE_ET: - return EuropeanNumberTerminator; - case PANGO_BIDI_TYPE_CS: - return CommonNumberSeparator; - case PANGO_BIDI_TYPE_NSM: - return NonSpacingMark; - case PANGO_BIDI_TYPE_BN: - return BoundaryNeutral; - case PANGO_BIDI_TYPE_B: - return BlockSeparator; - case PANGO_BIDI_TYPE_S: - return SegmentSeparator; - case PANGO_BIDI_TYPE_WS: - return WhiteSpaceNeutral; - default: - return OtherNeutral; - } -} - -int umemcasecmp(const UChar* a, const UChar* b, int len) -{ - GOwnPtr<char> utf8a; - GOwnPtr<char> utf8b; - - utf8a.set(g_utf16_to_utf8(a, len, 0, 0, 0)); - utf8b.set(g_utf16_to_utf8(b, len, 0, 0, 0)); - - GOwnPtr<char> foldedA; - GOwnPtr<char> foldedB; - - foldedA.set(g_utf8_casefold(utf8a.get(), -1)); - foldedB.set(g_utf8_casefold(utf8b.get(), -1)); - - // FIXME: umemcasecmp needs to mimic u_memcasecmp of icu - // from the ICU docs: - // "Compare two strings case-insensitively using full case folding. - // his is equivalent to u_strcmp(u_strFoldCase(s1, n, options), u_strFoldCase(s2, n, options))." - // - // So it looks like we don't need the full g_utf8_collate here, - // but really a bitwise comparison of casefolded unicode chars (not utf-8 bytes). - // As there is no direct equivalent to this icu function in GLib, for now - // we'll use g_utf8_collate(): - - return g_utf8_collate(foldedA.get(), foldedB.get()); -} - -} -} diff --git a/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.h b/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.h deleted file mode 100644 index 46b00ea..0000000 --- a/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.h +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Copyright (C) 2006 George Staikos <staikos@kde.org> - * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> - * Copyright (C) 2007 Apple Computer, Inc. All rights reserved. - * Copyright (C) 2008 Jürg Billeter <j@bitron.ch> - * Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#ifndef UnicodeGLib_h -#define UnicodeGLib_h - -#include "UnicodeMacrosFromICU.h" -#include "GOwnPtr.h" - -#include <glib.h> -#include <pango/pango.h> -#include <stdint.h> -#include <stdlib.h> -#include <string.h> - -typedef uint16_t UChar; -typedef int32_t UChar32; - -namespace WTF { -namespace Unicode { - -enum Direction { - LeftToRight, - RightToLeft, - EuropeanNumber, - EuropeanNumberSeparator, - EuropeanNumberTerminator, - ArabicNumber, - CommonNumberSeparator, - BlockSeparator, - SegmentSeparator, - WhiteSpaceNeutral, - OtherNeutral, - LeftToRightEmbedding, - LeftToRightOverride, - RightToLeftArabic, - RightToLeftEmbedding, - RightToLeftOverride, - PopDirectionalFormat, - NonSpacingMark, - BoundaryNeutral -}; - -enum DecompositionType { - DecompositionNone, - DecompositionCanonical, - DecompositionCompat, - DecompositionCircle, - DecompositionFinal, - DecompositionFont, - DecompositionFraction, - DecompositionInitial, - DecompositionIsolated, - DecompositionMedial, - DecompositionNarrow, - DecompositionNoBreak, - DecompositionSmall, - DecompositionSquare, - DecompositionSub, - DecompositionSuper, - DecompositionVertical, - DecompositionWide, -}; - -enum CharCategory { - NoCategory = 0, - Other_NotAssigned = U_MASK(G_UNICODE_UNASSIGNED), - Letter_Uppercase = U_MASK(G_UNICODE_UPPERCASE_LETTER), - Letter_Lowercase = U_MASK(G_UNICODE_LOWERCASE_LETTER), - Letter_Titlecase = U_MASK(G_UNICODE_TITLECASE_LETTER), - Letter_Modifier = U_MASK(G_UNICODE_MODIFIER_LETTER), - Letter_Other = U_MASK(G_UNICODE_OTHER_LETTER), - - Mark_NonSpacing = U_MASK(G_UNICODE_NON_SPACING_MARK), - Mark_Enclosing = U_MASK(G_UNICODE_ENCLOSING_MARK), - Mark_SpacingCombining = U_MASK(G_UNICODE_COMBINING_MARK), - - Number_DecimalDigit = U_MASK(G_UNICODE_DECIMAL_NUMBER), - Number_Letter = U_MASK(G_UNICODE_LETTER_NUMBER), - Number_Other = U_MASK(G_UNICODE_OTHER_NUMBER), - - Separator_Space = U_MASK(G_UNICODE_SPACE_SEPARATOR), - Separator_Line = U_MASK(G_UNICODE_LINE_SEPARATOR), - Separator_Paragraph = U_MASK(G_UNICODE_PARAGRAPH_SEPARATOR), - - Other_Control = U_MASK(G_UNICODE_CONTROL), - Other_Format = U_MASK(G_UNICODE_FORMAT), - Other_PrivateUse = U_MASK(G_UNICODE_PRIVATE_USE), - Other_Surrogate = U_MASK(G_UNICODE_SURROGATE), - - Punctuation_Dash = U_MASK(G_UNICODE_DASH_PUNCTUATION), - Punctuation_Open = U_MASK(G_UNICODE_OPEN_PUNCTUATION), - Punctuation_Close = U_MASK(G_UNICODE_CLOSE_PUNCTUATION), - Punctuation_Connector = U_MASK(G_UNICODE_CONNECT_PUNCTUATION), - Punctuation_Other = U_MASK(G_UNICODE_OTHER_PUNCTUATION), - - Symbol_Math = U_MASK(G_UNICODE_MATH_SYMBOL), - Symbol_Currency = U_MASK(G_UNICODE_CURRENCY_SYMBOL), - Symbol_Modifier = U_MASK(G_UNICODE_MODIFIER_SYMBOL), - Symbol_Other = U_MASK(G_UNICODE_OTHER_SYMBOL), - - Punctuation_InitialQuote = U_MASK(G_UNICODE_INITIAL_PUNCTUATION), - Punctuation_FinalQuote = U_MASK(G_UNICODE_FINAL_PUNCTUATION) -}; - -UChar32 foldCase(UChar32); - -int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error); - -int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error); - -inline UChar32 toLower(UChar32 c) -{ - return g_unichar_tolower(c); -} - -inline UChar32 toUpper(UChar32 c) -{ - return g_unichar_toupper(c); -} - -int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error); - -inline UChar32 toTitleCase(UChar32 c) -{ - return g_unichar_totitle(c); -} - -inline bool isArabicChar(UChar32 c) -{ - return c >= 0x0600 && c <= 0x06FF; -} - -inline bool isAlphanumeric(UChar32 c) -{ - return g_unichar_isalnum(c); -} - -inline bool isFormatChar(UChar32 c) -{ - return g_unichar_type(c) == G_UNICODE_FORMAT; -} - -inline bool isSeparatorSpace(UChar32 c) -{ - return g_unichar_type(c) == G_UNICODE_SPACE_SEPARATOR; -} - -inline bool isPrintableChar(UChar32 c) -{ - return g_unichar_isprint(c); -} - -inline bool isDigit(UChar32 c) -{ - return g_unichar_isdigit(c); -} - -inline bool isPunct(UChar32 c) -{ - return g_unichar_ispunct(c); -} - -inline bool hasLineBreakingPropertyComplexContext(UChar32 c) -{ - // FIXME - return false; -} - -inline bool hasLineBreakingPropertyComplexContextOrIdeographic(UChar32 c) -{ - // FIXME - return false; -} - -inline UChar32 mirroredChar(UChar32 c) -{ - gunichar mirror = 0; - g_unichar_get_mirror_char(c, &mirror); - return mirror; -} - -inline CharCategory category(UChar32 c) -{ - if (c > 0xffff) - return NoCategory; - - return (CharCategory) U_MASK(g_unichar_type(c)); -} - -Direction direction(UChar32); - -inline bool isLower(UChar32 c) -{ - return g_unichar_islower(c); -} - -inline int digitValue(UChar32 c) -{ - return g_unichar_digit_value(c); -} - -inline uint8_t combiningClass(UChar32 c) -{ - // FIXME - // return g_unichar_combining_class(c); - return 0; -} - -inline DecompositionType decompositionType(UChar32 c) -{ - // FIXME - return DecompositionNone; -} - -int umemcasecmp(const UChar*, const UChar*, int len); - -} -} - -#endif - diff --git a/JavaScriptCore/wtf/unicode/icu/CollatorICU.cpp b/JavaScriptCore/wtf/unicode/icu/CollatorICU.cpp deleted file mode 100644 index 805b114..0000000 --- a/JavaScriptCore/wtf/unicode/icu/CollatorICU.cpp +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (C) 2008 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of - * its contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "Collator.h" - -#if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION - -#include "Assertions.h" -#include "Threading.h" -#include <unicode/ucol.h> -#include <string.h> - -#if OS(DARWIN) -#include "RetainPtr.h" -#include <CoreFoundation/CoreFoundation.h> -#endif - -namespace WTF { - -static UCollator* cachedCollator; -static Mutex& cachedCollatorMutex() -{ - AtomicallyInitializedStatic(Mutex&, mutex = *new Mutex); - return mutex; -} - -Collator::Collator(const char* locale) - : m_collator(0) - , m_locale(locale ? strdup(locale) : 0) - , m_lowerFirst(false) -{ -} - -PassOwnPtr<Collator> Collator::userDefault() -{ -#if OS(DARWIN) && PLATFORM(CF) - // Mac OS X doesn't set UNIX locale to match user-selected one, so ICU default doesn't work. -#if !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD) && !OS(IOS) - RetainPtr<CFLocaleRef> currentLocale(AdoptCF, CFLocaleCopyCurrent()); - CFStringRef collationOrder = (CFStringRef)CFLocaleGetValue(currentLocale.get(), kCFLocaleCollatorIdentifier); -#else - RetainPtr<CFStringRef> collationOrderRetainer(AdoptCF, (CFStringRef)CFPreferencesCopyValue(CFSTR("AppleCollationOrder"), kCFPreferencesAnyApplication, kCFPreferencesCurrentUser, kCFPreferencesAnyHost)); - CFStringRef collationOrder = collationOrderRetainer.get(); -#endif - char buf[256]; - if (!collationOrder) - return adoptPtr(new Collator("")); - CFStringGetCString(collationOrder, buf, sizeof(buf), kCFStringEncodingASCII); - return adoptPtr(new Collator(buf)); -#else - return adoptPtr(new Collator(0)); -#endif -} - -Collator::~Collator() -{ - releaseCollator(); - free(m_locale); -} - -void Collator::setOrderLowerFirst(bool lowerFirst) -{ - m_lowerFirst = lowerFirst; -} - -Collator::Result Collator::collate(const UChar* lhs, size_t lhsLength, const UChar* rhs, size_t rhsLength) const -{ - if (!m_collator) - createCollator(); - - return static_cast<Result>(ucol_strcoll(m_collator, lhs, lhsLength, rhs, rhsLength)); -} - -void Collator::createCollator() const -{ - ASSERT(!m_collator); - UErrorCode status = U_ZERO_ERROR; - - { - Locker<Mutex> lock(cachedCollatorMutex()); - if (cachedCollator) { - const char* cachedCollatorLocale = ucol_getLocaleByType(cachedCollator, ULOC_REQUESTED_LOCALE, &status); - ASSERT(U_SUCCESS(status)); - ASSERT(cachedCollatorLocale); - - UColAttributeValue cachedCollatorLowerFirst = ucol_getAttribute(cachedCollator, UCOL_CASE_FIRST, &status); - ASSERT(U_SUCCESS(status)); - - // FIXME: default locale is never matched, because ucol_getLocaleByType returns the actual one used, not 0. - if (m_locale && 0 == strcmp(cachedCollatorLocale, m_locale) - && ((UCOL_LOWER_FIRST == cachedCollatorLowerFirst && m_lowerFirst) || (UCOL_UPPER_FIRST == cachedCollatorLowerFirst && !m_lowerFirst))) { - m_collator = cachedCollator; - cachedCollator = 0; - return; - } - } - } - - m_collator = ucol_open(m_locale, &status); - if (U_FAILURE(status)) { - status = U_ZERO_ERROR; - m_collator = ucol_open("", &status); // Fallback to Unicode Collation Algorithm. - } - ASSERT(U_SUCCESS(status)); - - ucol_setAttribute(m_collator, UCOL_CASE_FIRST, m_lowerFirst ? UCOL_LOWER_FIRST : UCOL_UPPER_FIRST, &status); - ASSERT(U_SUCCESS(status)); -} - -void Collator::releaseCollator() -{ - { - Locker<Mutex> lock(cachedCollatorMutex()); - if (cachedCollator) - ucol_close(cachedCollator); - cachedCollator = m_collator; - m_collator = 0; - } -} - -} - -#endif diff --git a/JavaScriptCore/wtf/unicode/icu/UnicodeIcu.h b/JavaScriptCore/wtf/unicode/icu/UnicodeIcu.h deleted file mode 100644 index a2a5c0a..0000000 --- a/JavaScriptCore/wtf/unicode/icu/UnicodeIcu.h +++ /dev/null @@ -1,235 +0,0 @@ -/* - * Copyright (C) 2006 George Staikos <staikos@kde.org> - * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> - * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#ifndef WTF_UNICODE_ICU_H -#define WTF_UNICODE_ICU_H - -#include <stdlib.h> -#include <unicode/uchar.h> -#include <unicode/ustring.h> -#include <unicode/utf16.h> - -namespace WTF { -namespace Unicode { - -enum Direction { - LeftToRight = U_LEFT_TO_RIGHT, - RightToLeft = U_RIGHT_TO_LEFT, - EuropeanNumber = U_EUROPEAN_NUMBER, - EuropeanNumberSeparator = U_EUROPEAN_NUMBER_SEPARATOR, - EuropeanNumberTerminator = U_EUROPEAN_NUMBER_TERMINATOR, - ArabicNumber = U_ARABIC_NUMBER, - CommonNumberSeparator = U_COMMON_NUMBER_SEPARATOR, - BlockSeparator = U_BLOCK_SEPARATOR, - SegmentSeparator = U_SEGMENT_SEPARATOR, - WhiteSpaceNeutral = U_WHITE_SPACE_NEUTRAL, - OtherNeutral = U_OTHER_NEUTRAL, - LeftToRightEmbedding = U_LEFT_TO_RIGHT_EMBEDDING, - LeftToRightOverride = U_LEFT_TO_RIGHT_OVERRIDE, - RightToLeftArabic = U_RIGHT_TO_LEFT_ARABIC, - RightToLeftEmbedding = U_RIGHT_TO_LEFT_EMBEDDING, - RightToLeftOverride = U_RIGHT_TO_LEFT_OVERRIDE, - PopDirectionalFormat = U_POP_DIRECTIONAL_FORMAT, - NonSpacingMark = U_DIR_NON_SPACING_MARK, - BoundaryNeutral = U_BOUNDARY_NEUTRAL -}; - -enum DecompositionType { - DecompositionNone = U_DT_NONE, - DecompositionCanonical = U_DT_CANONICAL, - DecompositionCompat = U_DT_COMPAT, - DecompositionCircle = U_DT_CIRCLE, - DecompositionFinal = U_DT_FINAL, - DecompositionFont = U_DT_FONT, - DecompositionFraction = U_DT_FRACTION, - DecompositionInitial = U_DT_INITIAL, - DecompositionIsolated = U_DT_ISOLATED, - DecompositionMedial = U_DT_MEDIAL, - DecompositionNarrow = U_DT_NARROW, - DecompositionNoBreak = U_DT_NOBREAK, - DecompositionSmall = U_DT_SMALL, - DecompositionSquare = U_DT_SQUARE, - DecompositionSub = U_DT_SUB, - DecompositionSuper = U_DT_SUPER, - DecompositionVertical = U_DT_VERTICAL, - DecompositionWide = U_DT_WIDE, -}; - -enum CharCategory { - NoCategory = 0, - Other_NotAssigned = U_MASK(U_GENERAL_OTHER_TYPES), - Letter_Uppercase = U_MASK(U_UPPERCASE_LETTER), - Letter_Lowercase = U_MASK(U_LOWERCASE_LETTER), - Letter_Titlecase = U_MASK(U_TITLECASE_LETTER), - Letter_Modifier = U_MASK(U_MODIFIER_LETTER), - Letter_Other = U_MASK(U_OTHER_LETTER), - - Mark_NonSpacing = U_MASK(U_NON_SPACING_MARK), - Mark_Enclosing = U_MASK(U_ENCLOSING_MARK), - Mark_SpacingCombining = U_MASK(U_COMBINING_SPACING_MARK), - - Number_DecimalDigit = U_MASK(U_DECIMAL_DIGIT_NUMBER), - Number_Letter = U_MASK(U_LETTER_NUMBER), - Number_Other = U_MASK(U_OTHER_NUMBER), - - Separator_Space = U_MASK(U_SPACE_SEPARATOR), - Separator_Line = U_MASK(U_LINE_SEPARATOR), - Separator_Paragraph = U_MASK(U_PARAGRAPH_SEPARATOR), - - Other_Control = U_MASK(U_CONTROL_CHAR), - Other_Format = U_MASK(U_FORMAT_CHAR), - Other_PrivateUse = U_MASK(U_PRIVATE_USE_CHAR), - Other_Surrogate = U_MASK(U_SURROGATE), - - Punctuation_Dash = U_MASK(U_DASH_PUNCTUATION), - Punctuation_Open = U_MASK(U_START_PUNCTUATION), - Punctuation_Close = U_MASK(U_END_PUNCTUATION), - Punctuation_Connector = U_MASK(U_CONNECTOR_PUNCTUATION), - Punctuation_Other = U_MASK(U_OTHER_PUNCTUATION), - - Symbol_Math = U_MASK(U_MATH_SYMBOL), - Symbol_Currency = U_MASK(U_CURRENCY_SYMBOL), - Symbol_Modifier = U_MASK(U_MODIFIER_SYMBOL), - Symbol_Other = U_MASK(U_OTHER_SYMBOL), - - Punctuation_InitialQuote = U_MASK(U_INITIAL_PUNCTUATION), - Punctuation_FinalQuote = U_MASK(U_FINAL_PUNCTUATION) -}; - -inline UChar32 foldCase(UChar32 c) -{ - return u_foldCase(c, U_FOLD_CASE_DEFAULT); -} - -inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) -{ - UErrorCode status = U_ZERO_ERROR; - int realLength = u_strFoldCase(result, resultLength, src, srcLength, U_FOLD_CASE_DEFAULT, &status); - *error = !U_SUCCESS(status); - return realLength; -} - -inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) -{ - UErrorCode status = U_ZERO_ERROR; - int realLength = u_strToLower(result, resultLength, src, srcLength, "", &status); - *error = !!U_FAILURE(status); - return realLength; -} - -inline UChar32 toLower(UChar32 c) -{ - return u_tolower(c); -} - -inline UChar32 toUpper(UChar32 c) -{ - return u_toupper(c); -} - -inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) -{ - UErrorCode status = U_ZERO_ERROR; - int realLength = u_strToUpper(result, resultLength, src, srcLength, "", &status); - *error = !!U_FAILURE(status); - return realLength; -} - -inline UChar32 toTitleCase(UChar32 c) -{ - return u_totitle(c); -} - -inline bool isArabicChar(UChar32 c) -{ - return ublock_getCode(c) == UBLOCK_ARABIC; -} - -inline bool isAlphanumeric(UChar32 c) -{ - return u_isalnum(c); -} - -inline bool isSeparatorSpace(UChar32 c) -{ - return u_charType(c) == U_SPACE_SEPARATOR; -} - -inline bool isPrintableChar(UChar32 c) -{ - return !!u_isprint(c); -} - -inline bool isPunct(UChar32 c) -{ - return !!u_ispunct(c); -} - -inline bool hasLineBreakingPropertyComplexContext(UChar32 c) -{ - return u_getIntPropertyValue(c, UCHAR_LINE_BREAK) == U_LB_COMPLEX_CONTEXT; -} - -inline bool hasLineBreakingPropertyComplexContextOrIdeographic(UChar32 c) -{ - int32_t prop = u_getIntPropertyValue(c, UCHAR_LINE_BREAK); - return prop == U_LB_COMPLEX_CONTEXT || prop == U_LB_IDEOGRAPHIC; -} - -inline UChar32 mirroredChar(UChar32 c) -{ - return u_charMirror(c); -} - -inline CharCategory category(UChar32 c) -{ - return static_cast<CharCategory>(U_GET_GC_MASK(c)); -} - -inline Direction direction(UChar32 c) -{ - return static_cast<Direction>(u_charDirection(c)); -} - -inline bool isLower(UChar32 c) -{ - return !!u_islower(c); -} - -inline uint8_t combiningClass(UChar32 c) -{ - return u_getCombiningClass(c); -} - -inline DecompositionType decompositionType(UChar32 c) -{ - return static_cast<DecompositionType>(u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE)); -} - -inline int umemcasecmp(const UChar* a, const UChar* b, int len) -{ - return u_memcasecmp(a, b, len, U_FOLD_CASE_DEFAULT); -} - -} } - -#endif // WTF_UNICODE_ICU_H diff --git a/JavaScriptCore/wtf/unicode/qt4/UnicodeQt4.h b/JavaScriptCore/wtf/unicode/qt4/UnicodeQt4.h deleted file mode 100644 index eaa7a07..0000000 --- a/JavaScriptCore/wtf/unicode/qt4/UnicodeQt4.h +++ /dev/null @@ -1,375 +0,0 @@ -/* - * Copyright (C) 2006 George Staikos <staikos@kde.org> - * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> - * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#ifndef WTF_UNICODE_QT4_H -#define WTF_UNICODE_QT4_H - -#include "UnicodeMacrosFromICU.h" - -#include <QChar> -#include <QString> - -#include <config.h> - -#include <stdint.h> -#if USE(QT_ICU_TEXT_BREAKING) -#include <unicode/ubrk.h> -#endif - -QT_BEGIN_NAMESPACE -namespace QUnicodeTables { - struct Properties { - ushort category : 8; - ushort line_break_class : 8; - ushort direction : 8; - ushort combiningClass :8; - ushort joining : 2; - signed short digitValue : 6; /* 5 needed */ - ushort unicodeVersion : 4; - ushort lowerCaseSpecial : 1; - ushort upperCaseSpecial : 1; - ushort titleCaseSpecial : 1; - ushort caseFoldSpecial : 1; /* currently unused */ - signed short mirrorDiff : 16; - signed short lowerCaseDiff : 16; - signed short upperCaseDiff : 16; - signed short titleCaseDiff : 16; - signed short caseFoldDiff : 16; - }; - Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4); - Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2); -} -QT_END_NAMESPACE - -// ugly hack to make UChar compatible with JSChar in API/JSStringRef.h -#if defined(Q_OS_WIN) || COMPILER(WINSCW) || (COMPILER(RVCT) && !OS(LINUX)) -typedef wchar_t UChar; -#else -typedef uint16_t UChar; -#endif - -#if !USE(QT_ICU_TEXT_BREAKING) -typedef uint32_t UChar32; -#endif - -namespace WTF { -namespace Unicode { - -enum Direction { - LeftToRight = QChar::DirL, - RightToLeft = QChar::DirR, - EuropeanNumber = QChar::DirEN, - EuropeanNumberSeparator = QChar::DirES, - EuropeanNumberTerminator = QChar::DirET, - ArabicNumber = QChar::DirAN, - CommonNumberSeparator = QChar::DirCS, - BlockSeparator = QChar::DirB, - SegmentSeparator = QChar::DirS, - WhiteSpaceNeutral = QChar::DirWS, - OtherNeutral = QChar::DirON, - LeftToRightEmbedding = QChar::DirLRE, - LeftToRightOverride = QChar::DirLRO, - RightToLeftArabic = QChar::DirAL, - RightToLeftEmbedding = QChar::DirRLE, - RightToLeftOverride = QChar::DirRLO, - PopDirectionalFormat = QChar::DirPDF, - NonSpacingMark = QChar::DirNSM, - BoundaryNeutral = QChar::DirBN -}; - -enum DecompositionType { - DecompositionNone = QChar::NoDecomposition, - DecompositionCanonical = QChar::Canonical, - DecompositionCompat = QChar::Compat, - DecompositionCircle = QChar::Circle, - DecompositionFinal = QChar::Final, - DecompositionFont = QChar::Font, - DecompositionFraction = QChar::Fraction, - DecompositionInitial = QChar::Initial, - DecompositionIsolated = QChar::Isolated, - DecompositionMedial = QChar::Medial, - DecompositionNarrow = QChar::Narrow, - DecompositionNoBreak = QChar::NoBreak, - DecompositionSmall = QChar::Small, - DecompositionSquare = QChar::Square, - DecompositionSub = QChar::Sub, - DecompositionSuper = QChar::Super, - DecompositionVertical = QChar::Vertical, - DecompositionWide = QChar::Wide -}; - -enum CharCategory { - NoCategory = 0, - Mark_NonSpacing = U_MASK(QChar::Mark_NonSpacing), - Mark_SpacingCombining = U_MASK(QChar::Mark_SpacingCombining), - Mark_Enclosing = U_MASK(QChar::Mark_Enclosing), - Number_DecimalDigit = U_MASK(QChar::Number_DecimalDigit), - Number_Letter = U_MASK(QChar::Number_Letter), - Number_Other = U_MASK(QChar::Number_Other), - Separator_Space = U_MASK(QChar::Separator_Space), - Separator_Line = U_MASK(QChar::Separator_Line), - Separator_Paragraph = U_MASK(QChar::Separator_Paragraph), - Other_Control = U_MASK(QChar::Other_Control), - Other_Format = U_MASK(QChar::Other_Format), - Other_Surrogate = U_MASK(QChar::Other_Surrogate), - Other_PrivateUse = U_MASK(QChar::Other_PrivateUse), - Other_NotAssigned = U_MASK(QChar::Other_NotAssigned), - Letter_Uppercase = U_MASK(QChar::Letter_Uppercase), - Letter_Lowercase = U_MASK(QChar::Letter_Lowercase), - Letter_Titlecase = U_MASK(QChar::Letter_Titlecase), - Letter_Modifier = U_MASK(QChar::Letter_Modifier), - Letter_Other = U_MASK(QChar::Letter_Other), - Punctuation_Connector = U_MASK(QChar::Punctuation_Connector), - Punctuation_Dash = U_MASK(QChar::Punctuation_Dash), - Punctuation_Open = U_MASK(QChar::Punctuation_Open), - Punctuation_Close = U_MASK(QChar::Punctuation_Close), - Punctuation_InitialQuote = U_MASK(QChar::Punctuation_InitialQuote), - Punctuation_FinalQuote = U_MASK(QChar::Punctuation_FinalQuote), - Punctuation_Other = U_MASK(QChar::Punctuation_Other), - Symbol_Math = U_MASK(QChar::Symbol_Math), - Symbol_Currency = U_MASK(QChar::Symbol_Currency), - Symbol_Modifier = U_MASK(QChar::Symbol_Modifier), - Symbol_Other = U_MASK(QChar::Symbol_Other) -}; - - -// FIXME: handle surrogates correctly in all methods - -inline UChar32 toLower(UChar32 ch) -{ - return QChar::toLower(uint32_t(ch)); -} - -inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) -{ - const UChar *e = src + srcLength; - const UChar *s = src; - UChar *r = result; - uint rindex = 0; - - // this avoids one out of bounds check in the loop - if (s < e && QChar(*s).isLowSurrogate()) { - if (r) - r[rindex] = *s++; - ++rindex; - } - - int needed = 0; - while (s < e && (rindex < uint(resultLength) || !r)) { - uint c = *s; - if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate()) - c = QChar::surrogateToUcs4(*(s - 1), c); - const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c); - if (prop->lowerCaseSpecial) { - QString qstring; - if (c < 0x10000) { - qstring += QChar(c); - } else { - qstring += QChar(*(s-1)); - qstring += QChar(*s); - } - qstring = qstring.toLower(); - for (int i = 0; i < qstring.length(); ++i) { - if (rindex >= uint(resultLength)) { - needed += qstring.length() - i; - break; - } - if (r) - r[rindex] = qstring.at(i).unicode(); - ++rindex; - } - } else { - if (r) - r[rindex] = *s + prop->lowerCaseDiff; - ++rindex; - } - ++s; - } - if (s < e) - needed += e - s; - *error = (needed != 0); - if (rindex < uint(resultLength)) - r[rindex] = 0; - return rindex + needed; -} - -inline UChar32 toUpper(UChar32 c) -{ - return QChar::toUpper(uint32_t(c)); -} - -inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) -{ - const UChar *e = src + srcLength; - const UChar *s = src; - UChar *r = result; - int rindex = 0; - - // this avoids one out of bounds check in the loop - if (s < e && QChar(*s).isLowSurrogate()) { - if (r) - r[rindex] = *s++; - ++rindex; - } - - int needed = 0; - while (s < e && (rindex < resultLength || !r)) { - uint c = *s; - if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate()) - c = QChar::surrogateToUcs4(*(s - 1), c); - const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c); - if (prop->upperCaseSpecial) { - QString qstring; - if (c < 0x10000) { - qstring += QChar(c); - } else { - qstring += QChar(*(s-1)); - qstring += QChar(*s); - } - qstring = qstring.toUpper(); - for (int i = 0; i < qstring.length(); ++i) { - if (rindex >= resultLength) { - needed += qstring.length() - i; - break; - } - if (r) - r[rindex] = qstring.at(i).unicode(); - ++rindex; - } - } else { - if (r) - r[rindex] = *s + prop->upperCaseDiff; - ++rindex; - } - ++s; - } - if (s < e) - needed += e - s; - *error = (needed != 0); - if (rindex < resultLength) - r[rindex] = 0; - return rindex + needed; -} - -inline int toTitleCase(UChar32 c) -{ - return QChar::toTitleCase(uint32_t(c)); -} - -inline UChar32 foldCase(UChar32 c) -{ - return QChar::toCaseFolded(uint32_t(c)); -} - -inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) -{ - // FIXME: handle special casing. Easiest with some low level API in Qt - *error = false; - if (resultLength < srcLength) { - *error = true; - return srcLength; - } - for (int i = 0; i < srcLength; ++i) - result[i] = QChar::toCaseFolded(ushort(src[i])); - return srcLength; -} - -inline bool isArabicChar(UChar32 c) -{ - return c >= 0x0600 && c <= 0x06FF; -} - -inline bool isPrintableChar(UChar32 c) -{ - const uint test = U_MASK(QChar::Other_Control) | - U_MASK(QChar::Other_NotAssigned); - return !(U_MASK(QChar::category(uint32_t(c))) & test); -} - -inline bool isSeparatorSpace(UChar32 c) -{ - return QChar::category(uint32_t(c)) == QChar::Separator_Space; -} - -inline bool isPunct(UChar32 c) -{ - const uint test = U_MASK(QChar::Punctuation_Connector) | - U_MASK(QChar::Punctuation_Dash) | - U_MASK(QChar::Punctuation_Open) | - U_MASK(QChar::Punctuation_Close) | - U_MASK(QChar::Punctuation_InitialQuote) | - U_MASK(QChar::Punctuation_FinalQuote) | - U_MASK(QChar::Punctuation_Other); - return U_MASK(QChar::category(uint32_t(c))) & test; -} - -inline bool isLower(UChar32 c) -{ - return QChar::category(uint32_t(c)) == QChar::Letter_Lowercase; -} - -inline bool hasLineBreakingPropertyComplexContext(UChar32) -{ - // FIXME: Implement this to return whether the character has line breaking property SA (Complex Context). - return false; -} - -inline UChar32 mirroredChar(UChar32 c) -{ - return QChar::mirroredChar(uint32_t(c)); -} - -inline uint8_t combiningClass(UChar32 c) -{ - return QChar::combiningClass(uint32_t(c)); -} - -inline DecompositionType decompositionType(UChar32 c) -{ - return (DecompositionType)QChar::decompositionTag(c); -} - -inline int umemcasecmp(const UChar* a, const UChar* b, int len) -{ - // handle surrogates correctly - for (int i = 0; i < len; ++i) { - uint c1 = QChar::toCaseFolded(ushort(a[i])); - uint c2 = QChar::toCaseFolded(ushort(b[i])); - if (c1 != c2) - return c1 - c2; - } - return 0; -} - -inline Direction direction(UChar32 c) -{ - return (Direction)QChar::direction(uint32_t(c)); -} - -inline CharCategory category(UChar32 c) -{ - return (CharCategory) U_MASK(QChar::category(uint32_t(c))); -} - -} } - -#endif // WTF_UNICODE_QT4_H diff --git a/JavaScriptCore/wtf/unicode/wince/UnicodeWinCE.cpp b/JavaScriptCore/wtf/unicode/wince/UnicodeWinCE.cpp deleted file mode 100644 index 96dac7d..0000000 --- a/JavaScriptCore/wtf/unicode/wince/UnicodeWinCE.cpp +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Copyright (C) 2006 George Staikos <staikos@kde.org> - * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> - * Copyright (C) 2007-2009 Torch Mobile, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - */ - -#include "config.h" -#include "UnicodeWinCE.h" - -#include <wchar.h> - -namespace WTF { -namespace Unicode { - -UChar toLower(UChar c) -{ - return towlower(c); -} - -UChar toUpper(UChar c) -{ - return towupper(c); -} - -UChar foldCase(UChar c) -{ - return towlower(c); -} - -bool isPrintableChar(UChar c) -{ - return !!iswprint(c); -} - -bool isSpace(UChar c) -{ - return !!iswspace(c); -} - -bool isLetter(UChar c) -{ - return !!iswalpha(c); -} - -bool isUpper(UChar c) -{ - return !!iswupper(c); -} - -bool isLower(UChar c) -{ - return !!iswlower(c); -} - -bool isDigit(UChar c) -{ - return !!iswdigit(c); -} - -bool isPunct(UChar c) -{ - return !!iswpunct(c); -} - -bool isAlphanumeric(UChar c) -{ - return !!iswalnum(c); -} - -int toLower(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError) -{ - const UChar* sourceIterator = source; - const UChar* sourceEnd = source + sourceLength; - UChar* resultIterator = result; - UChar* resultEnd = result + resultLength; - - int remainingCharacters = 0; - if (sourceLength <= resultLength) - while (sourceIterator < sourceEnd) - *resultIterator++ = towlower(*sourceIterator++); - else - while (resultIterator < resultEnd) - *resultIterator++ = towlower(*sourceIterator++); - - if (sourceIterator < sourceEnd) - remainingCharacters += sourceEnd - sourceIterator; - *isError = !!remainingCharacters; - if (resultIterator < resultEnd) - *resultIterator = 0; - - return (resultIterator - result) + remainingCharacters; -} - -int toUpper(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError) -{ - const UChar* sourceIterator = source; - const UChar* sourceEnd = source + sourceLength; - UChar* resultIterator = result; - UChar* resultEnd = result + resultLength; - - int remainingCharacters = 0; - if (sourceLength <= resultLength) - while (sourceIterator < sourceEnd) - *resultIterator++ = towupper(*sourceIterator++); - else - while (resultIterator < resultEnd) - *resultIterator++ = towupper(*sourceIterator++); - - if (sourceIterator < sourceEnd) - remainingCharacters += sourceEnd - sourceIterator; - *isError = !!remainingCharacters; - if (resultIterator < resultEnd) - *resultIterator = 0; - - return (resultIterator - result) + remainingCharacters; -} - -int foldCase(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError) -{ - *isError = false; - if (resultLength < sourceLength) { - *isError = true; - return sourceLength; - } - for (int i = 0; i < sourceLength; ++i) - result[i] = foldCase(source[i]); - return sourceLength; -} - -UChar toTitleCase(UChar c) -{ - return towupper(c); -} - -Direction direction(UChar32 c) -{ - return static_cast<Direction>(UnicodeCE::direction(c)); -} - -CharCategory category(unsigned int c) -{ - return static_cast<CharCategory>(TO_MASK((__int8) UnicodeCE::category(c))); -} - -DecompositionType decompositionType(UChar32 c) -{ - return static_cast<DecompositionType>(UnicodeCE::decompositionType(c)); -} - -unsigned char combiningClass(UChar32 c) -{ - return UnicodeCE::combiningClass(c); -} - -UChar mirroredChar(UChar32 c) -{ - return UnicodeCE::mirroredChar(c); -} - -int digitValue(UChar c) -{ - return UnicodeCE::digitValue(c); -} - -} // namespace Unicode -} // namespace WTF diff --git a/JavaScriptCore/wtf/unicode/wince/UnicodeWinCE.h b/JavaScriptCore/wtf/unicode/wince/UnicodeWinCE.h deleted file mode 100644 index 2688aa9..0000000 --- a/JavaScriptCore/wtf/unicode/wince/UnicodeWinCE.h +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright (C) 2006 George Staikos <staikos@kde.org> - * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> - * Copyright (C) 2007 Apple Computer, Inc. All rights reserved. - * Copyright (C) 2007-2009 Torch Mobile, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#ifndef WTF_UnicodeWinCE_h -#define WTF_UnicodeWinCE_h - -#include "UnicodeMacrosFromICU.h" - -#include "ce_unicode.h" - -#define TO_MASK(x) (1 << (x)) - -namespace WTF { -namespace Unicode { - -enum Direction { - LeftToRight = UnicodeCE::U_LEFT_TO_RIGHT, - RightToLeft = UnicodeCE::U_RIGHT_TO_LEFT, - EuropeanNumber = UnicodeCE::U_EUROPEAN_NUMBER, - EuropeanNumberSeparator = UnicodeCE::U_EUROPEAN_NUMBER_SEPARATOR, - EuropeanNumberTerminator = UnicodeCE::U_EUROPEAN_NUMBER_TERMINATOR, - ArabicNumber = UnicodeCE::U_ARABIC_NUMBER, - CommonNumberSeparator = UnicodeCE::U_COMMON_NUMBER_SEPARATOR, - BlockSeparator = UnicodeCE::U_BLOCK_SEPARATOR, - SegmentSeparator = UnicodeCE::U_SEGMENT_SEPARATOR, - WhiteSpaceNeutral = UnicodeCE::U_WHITE_SPACE_NEUTRAL, - OtherNeutral = UnicodeCE::U_OTHER_NEUTRAL, - LeftToRightEmbedding = UnicodeCE::U_LEFT_TO_RIGHT_EMBEDDING, - LeftToRightOverride = UnicodeCE::U_LEFT_TO_RIGHT_OVERRIDE, - RightToLeftArabic = UnicodeCE::U_RIGHT_TO_LEFT_ARABIC, - RightToLeftEmbedding = UnicodeCE::U_RIGHT_TO_LEFT_EMBEDDING, - RightToLeftOverride = UnicodeCE::U_RIGHT_TO_LEFT_OVERRIDE, - PopDirectionalFormat = UnicodeCE::U_POP_DIRECTIONAL_FORMAT, - NonSpacingMark = UnicodeCE::U_DIR_NON_SPACING_MARK, - BoundaryNeutral = UnicodeCE::U_BOUNDARY_NEUTRAL -}; - -enum DecompositionType { - DecompositionNone = UnicodeCE::U_DT_NONE, - DecompositionCanonical = UnicodeCE::U_DT_CANONICAL, - DecompositionCompat = UnicodeCE::U_DT_COMPAT, - DecompositionCircle = UnicodeCE::U_DT_CIRCLE, - DecompositionFinal = UnicodeCE::U_DT_FINAL, - DecompositionFont = UnicodeCE::U_DT_FONT, - DecompositionFraction = UnicodeCE::U_DT_FRACTION, - DecompositionInitial = UnicodeCE::U_DT_INITIAL, - DecompositionIsolated = UnicodeCE::U_DT_ISOLATED, - DecompositionMedial = UnicodeCE::U_DT_MEDIAL, - DecompositionNarrow = UnicodeCE::U_DT_NARROW, - DecompositionNoBreak = UnicodeCE::U_DT_NOBREAK, - DecompositionSmall = UnicodeCE::U_DT_SMALL, - DecompositionSquare = UnicodeCE::U_DT_SQUARE, - DecompositionSub = UnicodeCE::U_DT_SUB, - DecompositionSuper = UnicodeCE::U_DT_SUPER, - DecompositionVertical = UnicodeCE::U_DT_VERTICAL, - DecompositionWide = UnicodeCE::U_DT_WIDE -}; - -enum CharCategory { - NoCategory = 0, - Other_NotAssigned = TO_MASK(UnicodeCE::U_GENERAL_OTHER_TYPES), - Letter_Uppercase = TO_MASK(UnicodeCE::U_UPPERCASE_LETTER), - Letter_Lowercase = TO_MASK(UnicodeCE::U_LOWERCASE_LETTER), - Letter_Titlecase = TO_MASK(UnicodeCE::U_TITLECASE_LETTER), - Letter_Modifier = TO_MASK(UnicodeCE::U_MODIFIER_LETTER), - Letter_Other = TO_MASK(UnicodeCE::U_OTHER_LETTER), - - Mark_NonSpacing = TO_MASK(UnicodeCE::U_NON_SPACING_MARK), - Mark_Enclosing = TO_MASK(UnicodeCE::U_ENCLOSING_MARK), - Mark_SpacingCombining = TO_MASK(UnicodeCE::U_COMBINING_SPACING_MARK), - - Number_DecimalDigit = TO_MASK(UnicodeCE::U_DECIMAL_DIGIT_NUMBER), - Number_Letter = TO_MASK(UnicodeCE::U_LETTER_NUMBER), - Number_Other = TO_MASK(UnicodeCE::U_OTHER_NUMBER), - - Separator_Space = TO_MASK(UnicodeCE::U_SPACE_SEPARATOR), - Separator_Line = TO_MASK(UnicodeCE::U_LINE_SEPARATOR), - Separator_Paragraph = TO_MASK(UnicodeCE::U_PARAGRAPH_SEPARATOR), - - Other_Control = TO_MASK(UnicodeCE::U_CONTROL_CHAR), - Other_Format = TO_MASK(UnicodeCE::U_FORMAT_CHAR), - Other_PrivateUse = TO_MASK(UnicodeCE::U_PRIVATE_USE_CHAR), - Other_Surrogate = TO_MASK(UnicodeCE::U_SURROGATE), - - Punctuation_Dash = TO_MASK(UnicodeCE::U_DASH_PUNCTUATION), - Punctuation_Open = TO_MASK(UnicodeCE::U_START_PUNCTUATION), - Punctuation_Close = TO_MASK(UnicodeCE::U_END_PUNCTUATION), - Punctuation_Connector = TO_MASK(UnicodeCE::U_CONNECTOR_PUNCTUATION), - Punctuation_Other = TO_MASK(UnicodeCE::U_OTHER_PUNCTUATION), - - Symbol_Math = TO_MASK(UnicodeCE::U_MATH_SYMBOL), - Symbol_Currency = TO_MASK(UnicodeCE::U_CURRENCY_SYMBOL), - Symbol_Modifier = TO_MASK(UnicodeCE::U_MODIFIER_SYMBOL), - Symbol_Other = TO_MASK(UnicodeCE::U_OTHER_SYMBOL), - - Punctuation_InitialQuote = TO_MASK(UnicodeCE::U_INITIAL_PUNCTUATION), - Punctuation_FinalQuote = TO_MASK(UnicodeCE::U_FINAL_PUNCTUATION) -}; - -CharCategory category(unsigned int); - -bool isSpace(UChar); -bool isLetter(UChar); -bool isPrintableChar(UChar); -bool isUpper(UChar); -bool isLower(UChar); -bool isPunct(UChar); -bool isDigit(UChar); -bool isAlphanumeric(UChar); -inline bool isSeparatorSpace(UChar c) { return category(c) == Separator_Space; } -inline bool isHighSurrogate(UChar c) { return (c & 0xfc00) == 0xd800; } -inline bool isLowSurrogate(UChar c) { return (c & 0xfc00) == 0xdc00; } - -UChar toLower(UChar); -UChar toUpper(UChar); -UChar foldCase(UChar); -UChar toTitleCase(UChar); -int toLower(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError); -int toUpper(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError); -int foldCase(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError); - -int digitValue(UChar); - -UChar mirroredChar(UChar32); -unsigned char combiningClass(UChar32); -DecompositionType decompositionType(UChar32); -Direction direction(UChar32); -inline bool isArabicChar(UChar32 c) -{ - return c >= 0x0600 && c <= 0x06FF; -} - -inline bool hasLineBreakingPropertyComplexContext(UChar32) -{ - return false; // FIXME: implement! -} - -inline int umemcasecmp(const UChar* a, const UChar* b, int len) -{ - for (int i = 0; i < len; ++i) { - UChar c1 = foldCase(a[i]); - UChar c2 = foldCase(b[i]); - if (c1 != c2) - return c1 - c2; - } - return 0; -} - -inline UChar32 surrogateToUcs4(UChar high, UChar low) -{ - return (UChar32(high) << 10) + low - 0x35fdc00; -} - -} // namespace Unicode -} // namespace WTF - -#endif // WTF_UnicodeWinCE_h |