summaryrefslogtreecommitdiffstats
path: root/Source/JavaScriptCore/wtf/unicode
diff options
context:
space:
mode:
Diffstat (limited to 'Source/JavaScriptCore/wtf/unicode')
-rw-r--r--Source/JavaScriptCore/wtf/unicode/Collator.h67
-rw-r--r--Source/JavaScriptCore/wtf/unicode/CollatorDefault.cpp75
-rw-r--r--Source/JavaScriptCore/wtf/unicode/UTF8.cpp401
-rw-r--r--Source/JavaScriptCore/wtf/unicode/UTF8.h80
-rw-r--r--Source/JavaScriptCore/wtf/unicode/Unicode.h44
-rw-r--r--Source/JavaScriptCore/wtf/unicode/UnicodeMacrosFromICU.h100
-rw-r--r--Source/JavaScriptCore/wtf/unicode/brew/UnicodeBrew.cpp181
-rw-r--r--Source/JavaScriptCore/wtf/unicode/brew/UnicodeBrew.h194
-rw-r--r--Source/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp192
-rw-r--r--Source/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.h243
-rw-r--r--Source/JavaScriptCore/wtf/unicode/icu/CollatorICU.cpp149
-rw-r--r--Source/JavaScriptCore/wtf/unicode/icu/UnicodeIcu.h235
-rw-r--r--Source/JavaScriptCore/wtf/unicode/qt4/UnicodeQt4.h375
-rw-r--r--Source/JavaScriptCore/wtf/unicode/wince/UnicodeWinCE.cpp181
-rw-r--r--Source/JavaScriptCore/wtf/unicode/wince/UnicodeWinCE.h177
15 files changed, 2694 insertions, 0 deletions
diff --git a/Source/JavaScriptCore/wtf/unicode/Collator.h b/Source/JavaScriptCore/wtf/unicode/Collator.h
new file mode 100644
index 0000000..fe6a809
--- /dev/null
+++ b/Source/JavaScriptCore/wtf/unicode/Collator.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
+ * its contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef WTF_Collator_h
+#define WTF_Collator_h
+
+#include <wtf/Noncopyable.h>
+#include <wtf/PassOwnPtr.h>
+#include <wtf/unicode/Unicode.h>
+
+#if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION
+struct UCollator;
+#endif
+
+namespace WTF {
+
+ class Collator : public Noncopyable {
+ public:
+ enum Result { Equal = 0, Greater = 1, Less = -1 };
+
+ Collator(const char* locale); // Parsing is lenient; e.g. language identifiers (such as "en-US") are accepted, too.
+ ~Collator();
+ void setOrderLowerFirst(bool);
+
+ static PassOwnPtr<Collator> userDefault();
+
+ Result collate(const ::UChar*, size_t, const ::UChar*, size_t) const;
+
+ private:
+#if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION
+ void createCollator() const;
+ void releaseCollator();
+ mutable UCollator* m_collator;
+#endif
+ char* m_locale;
+ bool m_lowerFirst;
+ };
+}
+
+using WTF::Collator;
+
+#endif
diff --git a/Source/JavaScriptCore/wtf/unicode/CollatorDefault.cpp b/Source/JavaScriptCore/wtf/unicode/CollatorDefault.cpp
new file mode 100644
index 0000000..4e05432
--- /dev/null
+++ b/Source/JavaScriptCore/wtf/unicode/CollatorDefault.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
+ * its contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "Collator.h"
+
+#if !USE(ICU_UNICODE) || UCONFIG_NO_COLLATION
+
+namespace WTF {
+
+Collator::Collator(const char*)
+{
+}
+
+Collator::~Collator()
+{
+}
+
+void Collator::setOrderLowerFirst(bool)
+{
+}
+
+PassOwnPtr<Collator> Collator::userDefault()
+{
+ return new Collator(0);
+}
+
+// A default implementation for platforms that lack Unicode-aware collation.
+Collator::Result Collator::collate(const UChar* lhs, size_t lhsLength, const UChar* rhs, size_t rhsLength) const
+{
+ int lmin = lhsLength < rhsLength ? lhsLength : rhsLength;
+ int l = 0;
+ while (l < lmin && *lhs == *rhs) {
+ lhs++;
+ rhs++;
+ l++;
+ }
+
+ if (l < lmin)
+ return (*lhs > *rhs) ? Greater : Less;
+
+ if (lhsLength == rhsLength)
+ return Equal;
+
+ return (lhsLength > rhsLength) ? Greater : Less;
+}
+
+}
+
+#endif
diff --git a/Source/JavaScriptCore/wtf/unicode/UTF8.cpp b/Source/JavaScriptCore/wtf/unicode/UTF8.cpp
new file mode 100644
index 0000000..dc24ed5
--- /dev/null
+++ b/Source/JavaScriptCore/wtf/unicode/UTF8.cpp
@@ -0,0 +1,401 @@
+/*
+ * Copyright (C) 2007 Apple Inc. All rights reserved.
+ * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "UTF8.h"
+#include <wtf/StringHasher.h>
+
+#include "ASCIICType.h"
+
+namespace WTF {
+namespace Unicode {
+
+// FIXME: Use definition from CharacterNames.h.
+static const UChar replacementCharacter = 0xFFFD;
+
+inline int inlineUTF8SequenceLengthNonASCII(char b0)
+{
+ if ((b0 & 0xC0) != 0xC0)
+ return 0;
+ if ((b0 & 0xE0) == 0xC0)
+ return 2;
+ if ((b0 & 0xF0) == 0xE0)
+ return 3;
+ if ((b0 & 0xF8) == 0xF0)
+ return 4;
+ return 0;
+}
+
+inline int inlineUTF8SequenceLength(char b0)
+{
+ return isASCII(b0) ? 1 : inlineUTF8SequenceLengthNonASCII(b0);
+}
+
+int UTF8SequenceLength(char b0)
+{
+ return isASCII(b0) ? 1 : inlineUTF8SequenceLengthNonASCII(b0);
+}
+
+int decodeUTF8Sequence(const char* sequence)
+{
+ // Handle 0-byte sequences (never valid).
+ const unsigned char b0 = sequence[0];
+ const int length = inlineUTF8SequenceLength(b0);
+ if (length == 0)
+ return -1;
+
+ // Handle 1-byte sequences (plain ASCII).
+ const unsigned char b1 = sequence[1];
+ if (length == 1) {
+ if (b1)
+ return -1;
+ return b0;
+ }
+
+ // Handle 2-byte sequences.
+ if ((b1 & 0xC0) != 0x80)
+ return -1;
+ const unsigned char b2 = sequence[2];
+ if (length == 2) {
+ if (b2)
+ return -1;
+ const int c = ((b0 & 0x1F) << 6) | (b1 & 0x3F);
+ if (c < 0x80)
+ return -1;
+ return c;
+ }
+
+ // Handle 3-byte sequences.
+ if ((b2 & 0xC0) != 0x80)
+ return -1;
+ const unsigned char b3 = sequence[3];
+ if (length == 3) {
+ if (b3)
+ return -1;
+ const int c = ((b0 & 0xF) << 12) | ((b1 & 0x3F) << 6) | (b2 & 0x3F);
+ if (c < 0x800)
+ return -1;
+ // UTF-16 surrogates should never appear in UTF-8 data.
+ if (c >= 0xD800 && c <= 0xDFFF)
+ return -1;
+ return c;
+ }
+
+ // Handle 4-byte sequences.
+ if ((b3 & 0xC0) != 0x80)
+ return -1;
+ const unsigned char b4 = sequence[4];
+ if (length == 4) {
+ if (b4)
+ return -1;
+ const int c = ((b0 & 0x7) << 18) | ((b1 & 0x3F) << 12) | ((b2 & 0x3F) << 6) | (b3 & 0x3F);
+ if (c < 0x10000 || c > 0x10FFFF)
+ return -1;
+ return c;
+ }
+
+ return -1;
+}
+
+// Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
+// into the first byte, depending on how many bytes follow. There are
+// as many entries in this table as there are UTF-8 sequence types.
+// (I.e., one byte sequence, two byte... etc.). Remember that sequencs
+// for *legal* UTF-8 will be 4 or fewer bytes total.
+static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+
+ConversionResult convertUTF16ToUTF8(
+ const UChar** sourceStart, const UChar* sourceEnd,
+ char** targetStart, char* targetEnd, bool strict)
+{
+ ConversionResult result = conversionOK;
+ const UChar* source = *sourceStart;
+ char* target = *targetStart;
+ while (source < sourceEnd) {
+ UChar32 ch;
+ unsigned short bytesToWrite = 0;
+ const UChar32 byteMask = 0xBF;
+ const UChar32 byteMark = 0x80;
+ const UChar* oldSource = source; // In case we have to back up because of target overflow.
+ ch = static_cast<unsigned short>(*source++);
+ // If we have a surrogate pair, convert to UChar32 first.
+ if (ch >= 0xD800 && ch <= 0xDBFF) {
+ // If the 16 bits following the high surrogate are in the source buffer...
+ if (source < sourceEnd) {
+ UChar32 ch2 = static_cast<unsigned short>(*source);
+ // If it's a low surrogate, convert to UChar32.
+ if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
+ ch = ((ch - 0xD800) << 10) + (ch2 - 0xDC00) + 0x0010000;
+ ++source;
+ } else if (strict) { // it's an unpaired high surrogate
+ --source; // return to the illegal value itself
+ result = sourceIllegal;
+ break;
+ }
+ } else { // We don't have the 16 bits following the high surrogate.
+ --source; // return to the high surrogate
+ result = sourceExhausted;
+ break;
+ }
+ } else if (strict) {
+ // UTF-16 surrogate values are illegal in UTF-32
+ if (ch >= 0xDC00 && ch <= 0xDFFF) {
+ --source; // return to the illegal value itself
+ result = sourceIllegal;
+ break;
+ }
+ }
+ // Figure out how many bytes the result will require
+ if (ch < (UChar32)0x80) {
+ bytesToWrite = 1;
+ } else if (ch < (UChar32)0x800) {
+ bytesToWrite = 2;
+ } else if (ch < (UChar32)0x10000) {
+ bytesToWrite = 3;
+ } else if (ch < (UChar32)0x110000) {
+ bytesToWrite = 4;
+ } else {
+ bytesToWrite = 3;
+ ch = replacementCharacter;
+ }
+
+ target += bytesToWrite;
+ if (target > targetEnd) {
+ source = oldSource; // Back up source pointer!
+ target -= bytesToWrite;
+ result = targetExhausted;
+ break;
+ }
+ switch (bytesToWrite) { // note: everything falls through.
+ case 4: *--target = (char)((ch | byteMark) & byteMask); ch >>= 6;
+ case 3: *--target = (char)((ch | byteMark) & byteMask); ch >>= 6;
+ case 2: *--target = (char)((ch | byteMark) & byteMask); ch >>= 6;
+ case 1: *--target = (char)(ch | firstByteMark[bytesToWrite]);
+ }
+ target += bytesToWrite;
+ }
+ *sourceStart = source;
+ *targetStart = target;
+ return result;
+}
+
+// This must be called with the length pre-determined by the first byte.
+// If presented with a length > 4, this returns false. The Unicode
+// definition of UTF-8 goes up to 4-byte sequences.
+static bool isLegalUTF8(const unsigned char* source, int length)
+{
+ unsigned char a;
+ const unsigned char* srcptr = source + length;
+ switch (length) {
+ default: return false;
+ // Everything else falls through when "true"...
+ case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
+ case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
+ case 2: if ((a = (*--srcptr)) > 0xBF) return false;
+
+ switch (*source) {
+ // no fall-through in this inner switch
+ case 0xE0: if (a < 0xA0) return false; break;
+ case 0xED: if (a > 0x9F) return false; break;
+ case 0xF0: if (a < 0x90) return false; break;
+ case 0xF4: if (a > 0x8F) return false; break;
+ default: if (a < 0x80) return false;
+ }
+
+ case 1: if (*source >= 0x80 && *source < 0xC2) return false;
+ }
+ if (*source > 0xF4)
+ return false;
+ return true;
+}
+
+// Magic values subtracted from a buffer value during UTF8 conversion.
+// This table contains as many values as there might be trailing bytes
+// in a UTF-8 sequence.
+static const UChar32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
+ 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
+
+static inline UChar32 readUTF8Sequence(const char*& sequence, unsigned length)
+{
+ UChar32 character = 0;
+
+ // The cases all fall through.
+ switch (length) {
+ case 6: character += static_cast<unsigned char>(*sequence++); character <<= 6;
+ case 5: character += static_cast<unsigned char>(*sequence++); character <<= 6;
+ case 4: character += static_cast<unsigned char>(*sequence++); character <<= 6;
+ case 3: character += static_cast<unsigned char>(*sequence++); character <<= 6;
+ case 2: character += static_cast<unsigned char>(*sequence++); character <<= 6;
+ case 1: character += static_cast<unsigned char>(*sequence++);
+ }
+
+ return character - offsetsFromUTF8[length - 1];
+}
+
+ConversionResult convertUTF8ToUTF16(
+ const char** sourceStart, const char* sourceEnd,
+ UChar** targetStart, UChar* targetEnd, bool strict)
+{
+ ConversionResult result = conversionOK;
+ const char* source = *sourceStart;
+ UChar* target = *targetStart;
+ while (source < sourceEnd) {
+ int utf8SequenceLength = inlineUTF8SequenceLength(*source);
+ if (sourceEnd - source < utf8SequenceLength) {
+ result = sourceExhausted;
+ break;
+ }
+ // Do this check whether lenient or strict
+ if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(source), utf8SequenceLength)) {
+ result = sourceIllegal;
+ break;
+ }
+
+ UChar32 character = readUTF8Sequence(source, utf8SequenceLength);
+
+ if (target >= targetEnd) {
+ source -= utf8SequenceLength; // Back up source pointer!
+ result = targetExhausted;
+ break;
+ }
+
+ if (U_IS_BMP(character)) {
+ // UTF-16 surrogate values are illegal in UTF-32
+ if (U_IS_SURROGATE(character)) {
+ if (strict) {
+ source -= utf8SequenceLength; // return to the illegal value itself
+ result = sourceIllegal;
+ break;
+ } else
+ *target++ = replacementCharacter;
+ } else
+ *target++ = character; // normal case
+ } else if (U_IS_SUPPLEMENTARY(character)) {
+ // target is a character in range 0xFFFF - 0x10FFFF
+ if (target + 1 >= targetEnd) {
+ source -= utf8SequenceLength; // Back up source pointer!
+ result = targetExhausted;
+ break;
+ }
+ *target++ = U16_LEAD(character);
+ *target++ = U16_TRAIL(character);
+ } else {
+ if (strict) {
+ source -= utf8SequenceLength; // return to the start
+ result = sourceIllegal;
+ break; // Bail out; shouldn't continue
+ } else
+ *target++ = replacementCharacter;
+ }
+ }
+ *sourceStart = source;
+ *targetStart = target;
+ return result;
+}
+
+unsigned calculateStringHashFromUTF8(const char* data, const char* dataEnd, unsigned& utf16Length)
+{
+ if (!data)
+ return 0;
+
+ WTF::StringHasher stringHasher;
+ utf16Length = 0;
+
+ while (data < dataEnd) {
+ if (isASCII(*data)) {
+ stringHasher.addCharacter(*data++);
+ utf16Length++;
+ continue;
+ }
+
+ int utf8SequenceLength = inlineUTF8SequenceLengthNonASCII(*data);
+
+ if (dataEnd - data < utf8SequenceLength)
+ return false;
+
+ if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(data), utf8SequenceLength))
+ return 0;
+
+ UChar32 character = readUTF8Sequence(data, utf8SequenceLength);
+ ASSERT(!isASCII(character));
+
+ if (U_IS_BMP(character)) {
+ // UTF-16 surrogate values are illegal in UTF-32
+ if (U_IS_SURROGATE(character))
+ return 0;
+ stringHasher.addCharacter(static_cast<UChar>(character)); // normal case
+ utf16Length++;
+ } else if (U_IS_SUPPLEMENTARY(character)) {
+ stringHasher.addCharacters(static_cast<UChar>(U16_LEAD(character)),
+ static_cast<UChar>(U16_TRAIL(character)));
+ utf16Length += 2;
+ } else
+ return 0;
+ }
+
+ return stringHasher.hash();
+}
+
+bool equalUTF16WithUTF8(const UChar* a, const UChar* aEnd, const char* b, const char* bEnd)
+{
+ while (b < bEnd) {
+ if (isASCII(*b)) {
+ if (*a++ != *b++)
+ return false;
+ continue;
+ }
+
+ int utf8SequenceLength = inlineUTF8SequenceLengthNonASCII(*b);
+
+ if (bEnd - b < utf8SequenceLength)
+ return false;
+
+ if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(b), utf8SequenceLength))
+ return 0;
+
+ UChar32 character = readUTF8Sequence(b, utf8SequenceLength);
+ ASSERT(!isASCII(character));
+
+ if (U_IS_BMP(character)) {
+ // UTF-16 surrogate values are illegal in UTF-32
+ if (U_IS_SURROGATE(character))
+ return false;
+ if (*a++ != character)
+ return false;
+ } else if (U_IS_SUPPLEMENTARY(character)) {
+ if (*a++ != U16_LEAD(character))
+ return false;
+ if (*a++ != U16_TRAIL(character))
+ return false;
+ } else
+ return false;
+ }
+
+ return a == aEnd;
+}
+
+} // namespace Unicode
+} // namespace WTF
diff --git a/Source/JavaScriptCore/wtf/unicode/UTF8.h b/Source/JavaScriptCore/wtf/unicode/UTF8.h
new file mode 100644
index 0000000..1f4baca
--- /dev/null
+++ b/Source/JavaScriptCore/wtf/unicode/UTF8.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2007 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef WTF_UTF8_h
+#define WTF_UTF8_h
+
+#include "Unicode.h"
+
+namespace WTF {
+namespace Unicode {
+
+ // Given a first byte, gives the length of the UTF-8 sequence it begins.
+ // Returns 0 for bytes that are not legal starts of UTF-8 sequences.
+ // Only allows sequences of up to 4 bytes, since that works for all Unicode characters (U-00000000 to U-0010FFFF).
+ int UTF8SequenceLength(char);
+
+ // Takes a null-terminated C-style string with a UTF-8 sequence in it and converts it to a character.
+ // Only allows Unicode characters (U-00000000 to U-0010FFFF).
+ // Returns -1 if the sequence is not valid (including presence of extra bytes).
+ int decodeUTF8Sequence(const char*);
+
+ typedef enum {
+ conversionOK, // conversion successful
+ sourceExhausted, // partial character in source, but hit end
+ targetExhausted, // insuff. room in target for conversion
+ sourceIllegal // source sequence is illegal/malformed
+ } ConversionResult;
+
+ // These conversion functions take a "strict" argument. When this
+ // flag is set to strict, both irregular sequences and isolated surrogates
+ // will cause an error. When the flag is set to lenient, both irregular
+ // sequences and isolated surrogates are converted.
+ //
+ // Whether the flag is strict or lenient, all illegal sequences will cause
+ // an error return. This includes sequences such as: <F4 90 80 80>, <C0 80>,
+ // or <A0> in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code
+ // must check for illegal sequences.
+ //
+ // When the flag is set to lenient, characters over 0x10FFFF are converted
+ // to the replacement character; otherwise (when the flag is set to strict)
+ // they constitute an error.
+
+ ConversionResult convertUTF8ToUTF16(
+ const char** sourceStart, const char* sourceEnd,
+ UChar** targetStart, UChar* targetEnd, bool strict = true);
+
+ ConversionResult convertUTF16ToUTF8(
+ const UChar** sourceStart, const UChar* sourceEnd,
+ char** targetStart, char* targetEnd, bool strict = true);
+
+ unsigned calculateStringHashFromUTF8(const char* data, const char* dataEnd, unsigned& utf16Length);
+
+ bool equalUTF16WithUTF8(const UChar* a, const UChar* aEnd, const char* b, const char* bEnd);
+
+} // namespace Unicode
+} // namespace WTF
+
+#endif // WTF_UTF8_h
diff --git a/Source/JavaScriptCore/wtf/unicode/Unicode.h b/Source/JavaScriptCore/wtf/unicode/Unicode.h
new file mode 100644
index 0000000..50524b1
--- /dev/null
+++ b/Source/JavaScriptCore/wtf/unicode/Unicode.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2006 George Staikos <staikos@kde.org>
+ * Copyright (C) 2006, 2008, 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2007-2009 Torch Mobile, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef WTF_UNICODE_H
+#define WTF_UNICODE_H
+
+#include <wtf/Assertions.h>
+
+#if USE(QT4_UNICODE)
+#include "qt4/UnicodeQt4.h"
+#elif USE(ICU_UNICODE)
+#include <wtf/unicode/icu/UnicodeIcu.h>
+#elif USE(GLIB_UNICODE)
+#include <wtf/unicode/glib/UnicodeGLib.h>
+#elif USE(WINCE_UNICODE)
+#include <wtf/unicode/wince/UnicodeWinCE.h>
+#elif USE(BREWMP_UNICODE)
+#include <wtf/unicode/brew/UnicodeBrew.h>
+#else
+#error "Unknown Unicode implementation"
+#endif
+
+COMPILE_ASSERT(sizeof(UChar) == 2, UCharIsTwoBytes);
+
+#endif // WTF_UNICODE_H
diff --git a/Source/JavaScriptCore/wtf/unicode/UnicodeMacrosFromICU.h b/Source/JavaScriptCore/wtf/unicode/UnicodeMacrosFromICU.h
new file mode 100644
index 0000000..8959912
--- /dev/null
+++ b/Source/JavaScriptCore/wtf/unicode/UnicodeMacrosFromICU.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 1999-2004, International Business Machines Corporation and others. All Rights Reserved.
+ * Copyright (C) 2006 George Staikos <staikos@kde.org>
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ * Copyright (C) 2007 Apple Computer, Inc. All rights reserved.
+ * Copyright (C) 2008 Jürg Billeter <j@bitron.ch>
+ * Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef UnicodeMacrosFromICU_h
+#define UnicodeMacrosFromICU_h
+
+// some defines from ICU
+
+#define U_IS_BMP(c) ((UChar32)(c)<=0xffff)
+#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
+#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
+#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
+#define U16_GET_SUPPLEMENTARY(lead, trail) \
+ (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
+
+#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
+#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
+#define U16_LENGTH(c) ((uint32_t)(c) <= 0xffff ? 1 : 2)
+
+#define U_IS_SUPPLEMENTARY(c) ((UChar32)((c)-0x10000)<=0xfffff)
+#define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800)
+#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
+#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
+#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
+
+#define U16_GET(s, start, i, length, c) { \
+ (c)=(s)[i]; \
+ if(U16_IS_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if(U16_IS_SURROGATE_LEAD(c)) { \
+ if((i)+1<(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
+ (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+ } \
+ } else { \
+ if((i)-1>=(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+ (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+ } \
+ } \
+ } \
+}
+
+#define U16_PREV(s, start, i, c) { \
+ (c)=(s)[--(i)]; \
+ if(U16_IS_TRAIL(c)) { \
+ uint16_t __c2; \
+ if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+ --(i); \
+ (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+ } \
+ } \
+}
+
+#define U16_BACK_1(s, start, i) { \
+ if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
+ --(i); \
+ } \
+}
+
+#define U16_NEXT(s, i, length, c) { \
+ (c)=(s)[(i)++]; \
+ if(U16_IS_LEAD(c)) { \
+ uint16_t __c2; \
+ if((i)<(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
+ ++(i); \
+ (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+ } \
+ } \
+}
+
+#define U16_FWD_1(s, i, length) { \
+ if(U16_IS_LEAD((s)[(i)++]) && (i)<(length) && U16_IS_TRAIL((s)[i])) { \
+ ++(i); \
+ } \
+}
+
+#define U_MASK(x) ((uint32_t)1<<(x))
+
+#endif
diff --git a/Source/JavaScriptCore/wtf/unicode/brew/UnicodeBrew.cpp b/Source/JavaScriptCore/wtf/unicode/brew/UnicodeBrew.cpp
new file mode 100644
index 0000000..8367f17
--- /dev/null
+++ b/Source/JavaScriptCore/wtf/unicode/brew/UnicodeBrew.cpp
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2006 George Staikos <staikos@kde.org>
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ * Copyright (C) 2007-2009 Torch Mobile, Inc.
+ * Copyright (C) 2010 Company 100, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include "config.h"
+#include "UnicodeBrew.h"
+
+#include <wchar.h>
+#include <wctype.h>
+
+namespace WTF {
+namespace Unicode {
+
+UChar toLower(UChar c)
+{
+ return towlower(c);
+}
+
+UChar toUpper(UChar c)
+{
+ return towupper(c);
+}
+
+UChar foldCase(UChar c)
+{
+ return towlower(c);
+}
+
+bool isPrintableChar(UChar c)
+{
+ return !!iswprint(c);
+}
+
+bool isUpper(UChar c)
+{
+ return !!iswupper(c);
+}
+
+bool isLower(UChar c)
+{
+ return !!iswlower(c);
+}
+
+bool isDigit(UChar c)
+{
+ return !!iswdigit(c);
+}
+
+bool isPunct(UChar c)
+{
+ return !!iswpunct(c);
+}
+
+bool isAlphanumeric(UChar c)
+{
+ return !!iswalnum(c);
+}
+
+int toLower(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError)
+{
+ const UChar* sourceIterator = source;
+ const UChar* sourceEnd = source + sourceLength;
+ UChar* resultIterator = result;
+ UChar* resultEnd = result + resultLength;
+
+ if (sourceLength <= resultLength) {
+ while (sourceIterator < sourceEnd)
+ *resultIterator++ = towlower(*sourceIterator++);
+ } else {
+ while (resultIterator < resultEnd)
+ *resultIterator++ = towlower(*sourceIterator++);
+ }
+
+ int remainingCharacters = sourceIterator < sourceEnd ? sourceEnd - sourceIterator : 0;
+ *isError = !!remainingCharacters;
+ if (resultIterator < resultEnd)
+ *resultIterator = 0;
+
+ return (resultIterator - result) + remainingCharacters;
+}
+
+int toUpper(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError)
+{
+ const UChar* sourceIterator = source;
+ const UChar* sourceEnd = source + sourceLength;
+ UChar* resultIterator = result;
+ UChar* resultEnd = result + resultLength;
+
+ if (sourceLength <= resultLength) {
+ while (sourceIterator < sourceEnd)
+ *resultIterator++ = towupper(*sourceIterator++);
+ } else {
+ while (resultIterator < resultEnd)
+ *resultIterator++ = towupper(*sourceIterator++);
+ }
+
+ int remainingCharacters = sourceIterator < sourceEnd ? sourceEnd - sourceIterator : 0;
+ *isError = !!remainingCharacters;
+ if (resultIterator < resultEnd)
+ *resultIterator = 0;
+
+ return (resultIterator - result) + remainingCharacters;
+}
+
+int foldCase(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError)
+{
+ *isError = false;
+ if (resultLength < sourceLength) {
+ *isError = true;
+ return sourceLength;
+ }
+ for (int i = 0; i < sourceLength; ++i)
+ result[i] = foldCase(source[i]);
+ return sourceLength;
+}
+
+UChar toTitleCase(UChar c)
+{
+ return towupper(c);
+}
+
+Direction direction(UChar32 c)
+{
+ return static_cast<Direction>(ICU::direction(c));
+}
+
+CharCategory category(unsigned int c)
+{
+ return static_cast<CharCategory>(TO_MASK((int8_t) ICU::category(c)));
+}
+
+DecompositionType decompositionType(UChar32 c)
+{
+ return static_cast<DecompositionType>(ICU::decompositionType(c));
+}
+
+unsigned char combiningClass(UChar32 c)
+{
+ return ICU::combiningClass(c);
+}
+
+UChar mirroredChar(UChar32 c)
+{
+ return ICU::mirroredChar(c);
+}
+
+int digitValue(UChar c)
+{
+ return ICU::digitValue(c);
+}
+
+bool isSpace(UChar c)
+{
+ return !!iswspace(c);
+}
+
+bool isLetter(UChar c)
+{
+ return !!iswalpha(c);
+}
+
+} // namespace Unicode
+} // namespace WTF
diff --git a/Source/JavaScriptCore/wtf/unicode/brew/UnicodeBrew.h b/Source/JavaScriptCore/wtf/unicode/brew/UnicodeBrew.h
new file mode 100644
index 0000000..1d7576f
--- /dev/null
+++ b/Source/JavaScriptCore/wtf/unicode/brew/UnicodeBrew.h
@@ -0,0 +1,194 @@
+/*
+ * Copyright (C) 2006 George Staikos <staikos@kde.org>
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ * Copyright (C) 2007 Apple Computer, Inc. All rights reserved.
+ * Copyright (C) 2007-2009 Torch Mobile, Inc.
+ * Copyright (C) 2010 Company 100, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef UnicodeBrew_h
+#define UnicodeBrew_h
+
+#include "UnicodeFromICU.h"
+#include "UnicodeMacrosFromICU.h"
+
+namespace WTF {
+namespace Unicode {
+
+enum Direction {
+ LeftToRight = ICU::U_LEFT_TO_RIGHT,
+ RightToLeft = ICU::U_RIGHT_TO_LEFT,
+ EuropeanNumber = ICU::U_EUROPEAN_NUMBER,
+ EuropeanNumberSeparator = ICU::U_EUROPEAN_NUMBER_SEPARATOR,
+ EuropeanNumberTerminator = ICU::U_EUROPEAN_NUMBER_TERMINATOR,
+ ArabicNumber = ICU::U_ARABIC_NUMBER,
+ CommonNumberSeparator = ICU::U_COMMON_NUMBER_SEPARATOR,
+ BlockSeparator = ICU::U_BLOCK_SEPARATOR,
+ SegmentSeparator = ICU::U_SEGMENT_SEPARATOR,
+ WhiteSpaceNeutral = ICU::U_WHITE_SPACE_NEUTRAL,
+ OtherNeutral = ICU::U_OTHER_NEUTRAL,
+ LeftToRightEmbedding = ICU::U_LEFT_TO_RIGHT_EMBEDDING,
+ LeftToRightOverride = ICU::U_LEFT_TO_RIGHT_OVERRIDE,
+ RightToLeftArabic = ICU::U_RIGHT_TO_LEFT_ARABIC,
+ RightToLeftEmbedding = ICU::U_RIGHT_TO_LEFT_EMBEDDING,
+ RightToLeftOverride = ICU::U_RIGHT_TO_LEFT_OVERRIDE,
+ PopDirectionalFormat = ICU::U_POP_DIRECTIONAL_FORMAT,
+ NonSpacingMark = ICU::U_DIR_NON_SPACING_MARK,
+ BoundaryNeutral = ICU::U_BOUNDARY_NEUTRAL
+};
+
+enum DecompositionType {
+ DecompositionNone = ICU::U_DT_NONE,
+ DecompositionCanonical = ICU::U_DT_CANONICAL,
+ DecompositionCompat = ICU::U_DT_COMPAT,
+ DecompositionCircle = ICU::U_DT_CIRCLE,
+ DecompositionFinal = ICU::U_DT_FINAL,
+ DecompositionFont = ICU::U_DT_FONT,
+ DecompositionFraction = ICU::U_DT_FRACTION,
+ DecompositionInitial = ICU::U_DT_INITIAL,
+ DecompositionIsolated = ICU::U_DT_ISOLATED,
+ DecompositionMedial = ICU::U_DT_MEDIAL,
+ DecompositionNarrow = ICU::U_DT_NARROW,
+ DecompositionNoBreak = ICU::U_DT_NOBREAK,
+ DecompositionSmall = ICU::U_DT_SMALL,
+ DecompositionSquare = ICU::U_DT_SQUARE,
+ DecompositionSub = ICU::U_DT_SUB,
+ DecompositionSuper = ICU::U_DT_SUPER,
+ DecompositionVertical = ICU::U_DT_VERTICAL,
+ DecompositionWide = ICU::U_DT_WIDE,
+};
+
+enum CharCategory {
+ NoCategory = 0,
+ Other_NotAssigned = TO_MASK(ICU::U_GENERAL_OTHER_TYPES),
+ Letter_Uppercase = TO_MASK(ICU::U_UPPERCASE_LETTER),
+ Letter_Lowercase = TO_MASK(ICU::U_LOWERCASE_LETTER),
+ Letter_Titlecase = TO_MASK(ICU::U_TITLECASE_LETTER),
+ Letter_Modifier = TO_MASK(ICU::U_MODIFIER_LETTER),
+ Letter_Other = TO_MASK(ICU::U_OTHER_LETTER),
+
+ Mark_NonSpacing = TO_MASK(ICU::U_NON_SPACING_MARK),
+ Mark_Enclosing = TO_MASK(ICU::U_ENCLOSING_MARK),
+ Mark_SpacingCombining = TO_MASK(ICU::U_COMBINING_SPACING_MARK),
+
+ Number_DecimalDigit = TO_MASK(ICU::U_DECIMAL_DIGIT_NUMBER),
+ Number_Letter = TO_MASK(ICU::U_LETTER_NUMBER),
+ Number_Other = TO_MASK(ICU::U_OTHER_NUMBER),
+
+ Separator_Space = TO_MASK(ICU::U_SPACE_SEPARATOR),
+ Separator_Line = TO_MASK(ICU::U_LINE_SEPARATOR),
+ Separator_Paragraph = TO_MASK(ICU::U_PARAGRAPH_SEPARATOR),
+
+ Other_Control = TO_MASK(ICU::U_CONTROL_CHAR),
+ Other_Format = TO_MASK(ICU::U_FORMAT_CHAR),
+ Other_PrivateUse = TO_MASK(ICU::U_PRIVATE_USE_CHAR),
+ Other_Surrogate = TO_MASK(ICU::U_SURROGATE),
+
+ Punctuation_Dash = TO_MASK(ICU::U_DASH_PUNCTUATION),
+ Punctuation_Open = TO_MASK(ICU::U_START_PUNCTUATION),
+ Punctuation_Close = TO_MASK(ICU::U_END_PUNCTUATION),
+ Punctuation_Connector = TO_MASK(ICU::U_CONNECTOR_PUNCTUATION),
+ Punctuation_Other = TO_MASK(ICU::U_OTHER_PUNCTUATION),
+
+ Symbol_Math = TO_MASK(ICU::U_MATH_SYMBOL),
+ Symbol_Currency = TO_MASK(ICU::U_CURRENCY_SYMBOL),
+ Symbol_Modifier = TO_MASK(ICU::U_MODIFIER_SYMBOL),
+ Symbol_Other = TO_MASK(ICU::U_OTHER_SYMBOL),
+
+ Punctuation_InitialQuote = TO_MASK(ICU::U_INITIAL_PUNCTUATION),
+ Punctuation_FinalQuote = TO_MASK(ICU::U_FINAL_PUNCTUATION)
+};
+
+UChar foldCase(UChar);
+
+int foldCase(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError);
+
+int toLower(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError);
+
+UChar toUpper(UChar);
+UChar toLower(UChar);
+
+bool isUpper(UChar);
+
+int toUpper(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError);
+
+UChar toTitleCase(UChar);
+
+inline bool isArabicChar(UChar32 c)
+{
+ return c >= 0x0600 && c <= 0x06FF;
+}
+
+bool isAlphanumeric(UChar);
+
+CharCategory category(unsigned int);
+
+inline bool isSeparatorSpace(UChar c)
+{
+ return category(c) == Separator_Space;
+}
+
+bool isPrintableChar(UChar);
+
+bool isDigit(UChar);
+
+bool isPunct(UChar);
+
+inline bool hasLineBreakingPropertyComplexContext(UChar32)
+{
+ // FIXME: implement!
+ return false;
+}
+
+inline bool hasLineBreakingPropertyComplexContextOrIdeographic(UChar32 c)
+{
+ // FIXME
+ return false;
+}
+
+UChar mirroredChar(UChar32);
+
+Direction direction(UChar32);
+
+bool isLower(UChar);
+
+int digitValue(UChar);
+
+unsigned char combiningClass(UChar32);
+
+DecompositionType decompositionType(UChar32);
+
+inline int umemcasecmp(const UChar* a, const UChar* b, int len)
+{
+ for (int i = 0; i < len; ++i) {
+ UChar c1 = foldCase(a[i]);
+ UChar c2 = foldCase(b[i]);
+ if (c1 != c2)
+ return c1 - c2;
+ }
+ return 0;
+}
+
+bool isSpace(UChar);
+bool isLetter(UChar);
+
+} // namespace Unicode
+} // namespace WTF
+
+#endif
diff --git a/Source/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp b/Source/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp
new file mode 100644
index 0000000..a01c3ee
--- /dev/null
+++ b/Source/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp
@@ -0,0 +1,192 @@
+/*
+ * Copyright (C) 2008 Jürg Billeter <j@bitron.ch>
+ * Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com>
+ * Copyright (C) 2010 Igalia S.L.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "UnicodeGLib.h"
+
+#include <wtf/Vector.h>
+#include <wtf/unicode/UTF8.h>
+
+#define UTF8_IS_SURROGATE(character) (character >= 0x10000 && character <= 0x10FFFF)
+
+namespace WTF {
+namespace Unicode {
+
+UChar32 foldCase(UChar32 ch)
+{
+ GOwnPtr<GError> gerror;
+
+ GOwnPtr<char> utf8char;
+ utf8char.set(g_ucs4_to_utf8(reinterpret_cast<gunichar*>(&ch), 1, 0, 0, &gerror.outPtr()));
+ if (gerror)
+ return ch;
+
+ GOwnPtr<char> utf8caseFolded;
+ utf8caseFolded.set(g_utf8_casefold(utf8char.get(), -1));
+
+ GOwnPtr<gunichar> ucs4Result;
+ ucs4Result.set(g_utf8_to_ucs4_fast(utf8caseFolded.get(), -1, 0));
+
+ return *ucs4Result;
+}
+
+static int getUTF16LengthFromUTF8(const gchar* utf8String, int length)
+{
+ int utf16Length = 0;
+ const gchar* inputString = utf8String;
+
+ while ((utf8String + length - inputString > 0) && *inputString) {
+ gunichar character = g_utf8_get_char(inputString);
+
+ utf16Length += UTF8_IS_SURROGATE(character) ? 2 : 1;
+ inputString = g_utf8_next_char(inputString);
+ }
+
+ return utf16Length;
+}
+
+typedef gchar* (*UTF8CaseFunction)(const gchar*, gssize length);
+
+static int convertCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error, UTF8CaseFunction caseFunction)
+{
+ *error = false;
+
+ // Allocate a buffer big enough to hold all the characters.
+ Vector<char> buffer(srcLength * 3);
+ char* utf8Target = buffer.data();
+ const UChar* utf16Source = src;
+ ConversionResult conversionResult = convertUTF16ToUTF8(&utf16Source, utf16Source + srcLength, &utf8Target, utf8Target + buffer.size(), true);
+ if (conversionResult != conversionOK) {
+ *error = true;
+ return -1;
+ }
+ buffer.shrink(utf8Target - buffer.data());
+
+ GOwnPtr<char> utf8Result(caseFunction(buffer.data(), buffer.size()));
+ long utf8ResultLength = strlen(utf8Result.get());
+
+ // Calculate the destination buffer size.
+ int realLength = getUTF16LengthFromUTF8(utf8Result.get(), utf8ResultLength);
+ if (realLength > resultLength) {
+ *error = true;
+ return realLength;
+ }
+
+ // Convert the result to UTF-16.
+ UChar* utf16Target = result;
+ const char* utf8Source = utf8Result.get();
+ conversionResult = convertUTF8ToUTF16(&utf8Source, utf8Source + utf8ResultLength, &utf16Target, utf16Target + resultLength, true);
+ long utf16ResultLength = utf16Target - result;
+ if (conversionResult != conversionOK)
+ *error = true;
+
+ return utf16ResultLength <= 0 ? -1 : utf16ResultLength;
+}
+int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
+{
+ return convertCase(result, resultLength, src, srcLength, error, g_utf8_casefold);
+}
+
+int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
+{
+ return convertCase(result, resultLength, src, srcLength, error, g_utf8_strdown);
+}
+
+int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
+{
+ return convertCase(result, resultLength, src, srcLength, error, g_utf8_strup);
+}
+
+Direction direction(UChar32 c)
+{
+ PangoBidiType type = pango_bidi_type_for_unichar(c);
+ switch (type) {
+ case PANGO_BIDI_TYPE_L:
+ return LeftToRight;
+ case PANGO_BIDI_TYPE_R:
+ return RightToLeft;
+ case PANGO_BIDI_TYPE_AL:
+ return RightToLeftArabic;
+ case PANGO_BIDI_TYPE_LRE:
+ return LeftToRightEmbedding;
+ case PANGO_BIDI_TYPE_RLE:
+ return RightToLeftEmbedding;
+ case PANGO_BIDI_TYPE_LRO:
+ return LeftToRightOverride;
+ case PANGO_BIDI_TYPE_RLO:
+ return RightToLeftOverride;
+ case PANGO_BIDI_TYPE_PDF:
+ return PopDirectionalFormat;
+ case PANGO_BIDI_TYPE_EN:
+ return EuropeanNumber;
+ case PANGO_BIDI_TYPE_AN:
+ return ArabicNumber;
+ case PANGO_BIDI_TYPE_ES:
+ return EuropeanNumberSeparator;
+ case PANGO_BIDI_TYPE_ET:
+ return EuropeanNumberTerminator;
+ case PANGO_BIDI_TYPE_CS:
+ return CommonNumberSeparator;
+ case PANGO_BIDI_TYPE_NSM:
+ return NonSpacingMark;
+ case PANGO_BIDI_TYPE_BN:
+ return BoundaryNeutral;
+ case PANGO_BIDI_TYPE_B:
+ return BlockSeparator;
+ case PANGO_BIDI_TYPE_S:
+ return SegmentSeparator;
+ case PANGO_BIDI_TYPE_WS:
+ return WhiteSpaceNeutral;
+ default:
+ return OtherNeutral;
+ }
+}
+
+int umemcasecmp(const UChar* a, const UChar* b, int len)
+{
+ GOwnPtr<char> utf8a;
+ GOwnPtr<char> utf8b;
+
+ utf8a.set(g_utf16_to_utf8(a, len, 0, 0, 0));
+ utf8b.set(g_utf16_to_utf8(b, len, 0, 0, 0));
+
+ GOwnPtr<char> foldedA;
+ GOwnPtr<char> foldedB;
+
+ foldedA.set(g_utf8_casefold(utf8a.get(), -1));
+ foldedB.set(g_utf8_casefold(utf8b.get(), -1));
+
+ // FIXME: umemcasecmp needs to mimic u_memcasecmp of icu
+ // from the ICU docs:
+ // "Compare two strings case-insensitively using full case folding.
+ // his is equivalent to u_strcmp(u_strFoldCase(s1, n, options), u_strFoldCase(s2, n, options))."
+ //
+ // So it looks like we don't need the full g_utf8_collate here,
+ // but really a bitwise comparison of casefolded unicode chars (not utf-8 bytes).
+ // As there is no direct equivalent to this icu function in GLib, for now
+ // we'll use g_utf8_collate():
+
+ return g_utf8_collate(foldedA.get(), foldedB.get());
+}
+
+}
+}
diff --git a/Source/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.h b/Source/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.h
new file mode 100644
index 0000000..46b00ea
--- /dev/null
+++ b/Source/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.h
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2006 George Staikos <staikos@kde.org>
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ * Copyright (C) 2007 Apple Computer, Inc. All rights reserved.
+ * Copyright (C) 2008 Jürg Billeter <j@bitron.ch>
+ * Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef UnicodeGLib_h
+#define UnicodeGLib_h
+
+#include "UnicodeMacrosFromICU.h"
+#include "GOwnPtr.h"
+
+#include <glib.h>
+#include <pango/pango.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+typedef uint16_t UChar;
+typedef int32_t UChar32;
+
+namespace WTF {
+namespace Unicode {
+
+enum Direction {
+ LeftToRight,
+ RightToLeft,
+ EuropeanNumber,
+ EuropeanNumberSeparator,
+ EuropeanNumberTerminator,
+ ArabicNumber,
+ CommonNumberSeparator,
+ BlockSeparator,
+ SegmentSeparator,
+ WhiteSpaceNeutral,
+ OtherNeutral,
+ LeftToRightEmbedding,
+ LeftToRightOverride,
+ RightToLeftArabic,
+ RightToLeftEmbedding,
+ RightToLeftOverride,
+ PopDirectionalFormat,
+ NonSpacingMark,
+ BoundaryNeutral
+};
+
+enum DecompositionType {
+ DecompositionNone,
+ DecompositionCanonical,
+ DecompositionCompat,
+ DecompositionCircle,
+ DecompositionFinal,
+ DecompositionFont,
+ DecompositionFraction,
+ DecompositionInitial,
+ DecompositionIsolated,
+ DecompositionMedial,
+ DecompositionNarrow,
+ DecompositionNoBreak,
+ DecompositionSmall,
+ DecompositionSquare,
+ DecompositionSub,
+ DecompositionSuper,
+ DecompositionVertical,
+ DecompositionWide,
+};
+
+enum CharCategory {
+ NoCategory = 0,
+ Other_NotAssigned = U_MASK(G_UNICODE_UNASSIGNED),
+ Letter_Uppercase = U_MASK(G_UNICODE_UPPERCASE_LETTER),
+ Letter_Lowercase = U_MASK(G_UNICODE_LOWERCASE_LETTER),
+ Letter_Titlecase = U_MASK(G_UNICODE_TITLECASE_LETTER),
+ Letter_Modifier = U_MASK(G_UNICODE_MODIFIER_LETTER),
+ Letter_Other = U_MASK(G_UNICODE_OTHER_LETTER),
+
+ Mark_NonSpacing = U_MASK(G_UNICODE_NON_SPACING_MARK),
+ Mark_Enclosing = U_MASK(G_UNICODE_ENCLOSING_MARK),
+ Mark_SpacingCombining = U_MASK(G_UNICODE_COMBINING_MARK),
+
+ Number_DecimalDigit = U_MASK(G_UNICODE_DECIMAL_NUMBER),
+ Number_Letter = U_MASK(G_UNICODE_LETTER_NUMBER),
+ Number_Other = U_MASK(G_UNICODE_OTHER_NUMBER),
+
+ Separator_Space = U_MASK(G_UNICODE_SPACE_SEPARATOR),
+ Separator_Line = U_MASK(G_UNICODE_LINE_SEPARATOR),
+ Separator_Paragraph = U_MASK(G_UNICODE_PARAGRAPH_SEPARATOR),
+
+ Other_Control = U_MASK(G_UNICODE_CONTROL),
+ Other_Format = U_MASK(G_UNICODE_FORMAT),
+ Other_PrivateUse = U_MASK(G_UNICODE_PRIVATE_USE),
+ Other_Surrogate = U_MASK(G_UNICODE_SURROGATE),
+
+ Punctuation_Dash = U_MASK(G_UNICODE_DASH_PUNCTUATION),
+ Punctuation_Open = U_MASK(G_UNICODE_OPEN_PUNCTUATION),
+ Punctuation_Close = U_MASK(G_UNICODE_CLOSE_PUNCTUATION),
+ Punctuation_Connector = U_MASK(G_UNICODE_CONNECT_PUNCTUATION),
+ Punctuation_Other = U_MASK(G_UNICODE_OTHER_PUNCTUATION),
+
+ Symbol_Math = U_MASK(G_UNICODE_MATH_SYMBOL),
+ Symbol_Currency = U_MASK(G_UNICODE_CURRENCY_SYMBOL),
+ Symbol_Modifier = U_MASK(G_UNICODE_MODIFIER_SYMBOL),
+ Symbol_Other = U_MASK(G_UNICODE_OTHER_SYMBOL),
+
+ Punctuation_InitialQuote = U_MASK(G_UNICODE_INITIAL_PUNCTUATION),
+ Punctuation_FinalQuote = U_MASK(G_UNICODE_FINAL_PUNCTUATION)
+};
+
+UChar32 foldCase(UChar32);
+
+int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error);
+
+int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error);
+
+inline UChar32 toLower(UChar32 c)
+{
+ return g_unichar_tolower(c);
+}
+
+inline UChar32 toUpper(UChar32 c)
+{
+ return g_unichar_toupper(c);
+}
+
+int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error);
+
+inline UChar32 toTitleCase(UChar32 c)
+{
+ return g_unichar_totitle(c);
+}
+
+inline bool isArabicChar(UChar32 c)
+{
+ return c >= 0x0600 && c <= 0x06FF;
+}
+
+inline bool isAlphanumeric(UChar32 c)
+{
+ return g_unichar_isalnum(c);
+}
+
+inline bool isFormatChar(UChar32 c)
+{
+ return g_unichar_type(c) == G_UNICODE_FORMAT;
+}
+
+inline bool isSeparatorSpace(UChar32 c)
+{
+ return g_unichar_type(c) == G_UNICODE_SPACE_SEPARATOR;
+}
+
+inline bool isPrintableChar(UChar32 c)
+{
+ return g_unichar_isprint(c);
+}
+
+inline bool isDigit(UChar32 c)
+{
+ return g_unichar_isdigit(c);
+}
+
+inline bool isPunct(UChar32 c)
+{
+ return g_unichar_ispunct(c);
+}
+
+inline bool hasLineBreakingPropertyComplexContext(UChar32 c)
+{
+ // FIXME
+ return false;
+}
+
+inline bool hasLineBreakingPropertyComplexContextOrIdeographic(UChar32 c)
+{
+ // FIXME
+ return false;
+}
+
+inline UChar32 mirroredChar(UChar32 c)
+{
+ gunichar mirror = 0;
+ g_unichar_get_mirror_char(c, &mirror);
+ return mirror;
+}
+
+inline CharCategory category(UChar32 c)
+{
+ if (c > 0xffff)
+ return NoCategory;
+
+ return (CharCategory) U_MASK(g_unichar_type(c));
+}
+
+Direction direction(UChar32);
+
+inline bool isLower(UChar32 c)
+{
+ return g_unichar_islower(c);
+}
+
+inline int digitValue(UChar32 c)
+{
+ return g_unichar_digit_value(c);
+}
+
+inline uint8_t combiningClass(UChar32 c)
+{
+ // FIXME
+ // return g_unichar_combining_class(c);
+ return 0;
+}
+
+inline DecompositionType decompositionType(UChar32 c)
+{
+ // FIXME
+ return DecompositionNone;
+}
+
+int umemcasecmp(const UChar*, const UChar*, int len);
+
+}
+}
+
+#endif
+
diff --git a/Source/JavaScriptCore/wtf/unicode/icu/CollatorICU.cpp b/Source/JavaScriptCore/wtf/unicode/icu/CollatorICU.cpp
new file mode 100644
index 0000000..805b114
--- /dev/null
+++ b/Source/JavaScriptCore/wtf/unicode/icu/CollatorICU.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
+ * its contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "Collator.h"
+
+#if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION
+
+#include "Assertions.h"
+#include "Threading.h"
+#include <unicode/ucol.h>
+#include <string.h>
+
+#if OS(DARWIN)
+#include "RetainPtr.h"
+#include <CoreFoundation/CoreFoundation.h>
+#endif
+
+namespace WTF {
+
+static UCollator* cachedCollator;
+static Mutex& cachedCollatorMutex()
+{
+ AtomicallyInitializedStatic(Mutex&, mutex = *new Mutex);
+ return mutex;
+}
+
+Collator::Collator(const char* locale)
+ : m_collator(0)
+ , m_locale(locale ? strdup(locale) : 0)
+ , m_lowerFirst(false)
+{
+}
+
+PassOwnPtr<Collator> Collator::userDefault()
+{
+#if OS(DARWIN) && PLATFORM(CF)
+ // Mac OS X doesn't set UNIX locale to match user-selected one, so ICU default doesn't work.
+#if !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD) && !OS(IOS)
+ RetainPtr<CFLocaleRef> currentLocale(AdoptCF, CFLocaleCopyCurrent());
+ CFStringRef collationOrder = (CFStringRef)CFLocaleGetValue(currentLocale.get(), kCFLocaleCollatorIdentifier);
+#else
+ RetainPtr<CFStringRef> collationOrderRetainer(AdoptCF, (CFStringRef)CFPreferencesCopyValue(CFSTR("AppleCollationOrder"), kCFPreferencesAnyApplication, kCFPreferencesCurrentUser, kCFPreferencesAnyHost));
+ CFStringRef collationOrder = collationOrderRetainer.get();
+#endif
+ char buf[256];
+ if (!collationOrder)
+ return adoptPtr(new Collator(""));
+ CFStringGetCString(collationOrder, buf, sizeof(buf), kCFStringEncodingASCII);
+ return adoptPtr(new Collator(buf));
+#else
+ return adoptPtr(new Collator(0));
+#endif
+}
+
+Collator::~Collator()
+{
+ releaseCollator();
+ free(m_locale);
+}
+
+void Collator::setOrderLowerFirst(bool lowerFirst)
+{
+ m_lowerFirst = lowerFirst;
+}
+
+Collator::Result Collator::collate(const UChar* lhs, size_t lhsLength, const UChar* rhs, size_t rhsLength) const
+{
+ if (!m_collator)
+ createCollator();
+
+ return static_cast<Result>(ucol_strcoll(m_collator, lhs, lhsLength, rhs, rhsLength));
+}
+
+void Collator::createCollator() const
+{
+ ASSERT(!m_collator);
+ UErrorCode status = U_ZERO_ERROR;
+
+ {
+ Locker<Mutex> lock(cachedCollatorMutex());
+ if (cachedCollator) {
+ const char* cachedCollatorLocale = ucol_getLocaleByType(cachedCollator, ULOC_REQUESTED_LOCALE, &status);
+ ASSERT(U_SUCCESS(status));
+ ASSERT(cachedCollatorLocale);
+
+ UColAttributeValue cachedCollatorLowerFirst = ucol_getAttribute(cachedCollator, UCOL_CASE_FIRST, &status);
+ ASSERT(U_SUCCESS(status));
+
+ // FIXME: default locale is never matched, because ucol_getLocaleByType returns the actual one used, not 0.
+ if (m_locale && 0 == strcmp(cachedCollatorLocale, m_locale)
+ && ((UCOL_LOWER_FIRST == cachedCollatorLowerFirst && m_lowerFirst) || (UCOL_UPPER_FIRST == cachedCollatorLowerFirst && !m_lowerFirst))) {
+ m_collator = cachedCollator;
+ cachedCollator = 0;
+ return;
+ }
+ }
+ }
+
+ m_collator = ucol_open(m_locale, &status);
+ if (U_FAILURE(status)) {
+ status = U_ZERO_ERROR;
+ m_collator = ucol_open("", &status); // Fallback to Unicode Collation Algorithm.
+ }
+ ASSERT(U_SUCCESS(status));
+
+ ucol_setAttribute(m_collator, UCOL_CASE_FIRST, m_lowerFirst ? UCOL_LOWER_FIRST : UCOL_UPPER_FIRST, &status);
+ ASSERT(U_SUCCESS(status));
+}
+
+void Collator::releaseCollator()
+{
+ {
+ Locker<Mutex> lock(cachedCollatorMutex());
+ if (cachedCollator)
+ ucol_close(cachedCollator);
+ cachedCollator = m_collator;
+ m_collator = 0;
+ }
+}
+
+}
+
+#endif
diff --git a/Source/JavaScriptCore/wtf/unicode/icu/UnicodeIcu.h b/Source/JavaScriptCore/wtf/unicode/icu/UnicodeIcu.h
new file mode 100644
index 0000000..a2a5c0a
--- /dev/null
+++ b/Source/JavaScriptCore/wtf/unicode/icu/UnicodeIcu.h
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2006 George Staikos <staikos@kde.org>
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef WTF_UNICODE_ICU_H
+#define WTF_UNICODE_ICU_H
+
+#include <stdlib.h>
+#include <unicode/uchar.h>
+#include <unicode/ustring.h>
+#include <unicode/utf16.h>
+
+namespace WTF {
+namespace Unicode {
+
+enum Direction {
+ LeftToRight = U_LEFT_TO_RIGHT,
+ RightToLeft = U_RIGHT_TO_LEFT,
+ EuropeanNumber = U_EUROPEAN_NUMBER,
+ EuropeanNumberSeparator = U_EUROPEAN_NUMBER_SEPARATOR,
+ EuropeanNumberTerminator = U_EUROPEAN_NUMBER_TERMINATOR,
+ ArabicNumber = U_ARABIC_NUMBER,
+ CommonNumberSeparator = U_COMMON_NUMBER_SEPARATOR,
+ BlockSeparator = U_BLOCK_SEPARATOR,
+ SegmentSeparator = U_SEGMENT_SEPARATOR,
+ WhiteSpaceNeutral = U_WHITE_SPACE_NEUTRAL,
+ OtherNeutral = U_OTHER_NEUTRAL,
+ LeftToRightEmbedding = U_LEFT_TO_RIGHT_EMBEDDING,
+ LeftToRightOverride = U_LEFT_TO_RIGHT_OVERRIDE,
+ RightToLeftArabic = U_RIGHT_TO_LEFT_ARABIC,
+ RightToLeftEmbedding = U_RIGHT_TO_LEFT_EMBEDDING,
+ RightToLeftOverride = U_RIGHT_TO_LEFT_OVERRIDE,
+ PopDirectionalFormat = U_POP_DIRECTIONAL_FORMAT,
+ NonSpacingMark = U_DIR_NON_SPACING_MARK,
+ BoundaryNeutral = U_BOUNDARY_NEUTRAL
+};
+
+enum DecompositionType {
+ DecompositionNone = U_DT_NONE,
+ DecompositionCanonical = U_DT_CANONICAL,
+ DecompositionCompat = U_DT_COMPAT,
+ DecompositionCircle = U_DT_CIRCLE,
+ DecompositionFinal = U_DT_FINAL,
+ DecompositionFont = U_DT_FONT,
+ DecompositionFraction = U_DT_FRACTION,
+ DecompositionInitial = U_DT_INITIAL,
+ DecompositionIsolated = U_DT_ISOLATED,
+ DecompositionMedial = U_DT_MEDIAL,
+ DecompositionNarrow = U_DT_NARROW,
+ DecompositionNoBreak = U_DT_NOBREAK,
+ DecompositionSmall = U_DT_SMALL,
+ DecompositionSquare = U_DT_SQUARE,
+ DecompositionSub = U_DT_SUB,
+ DecompositionSuper = U_DT_SUPER,
+ DecompositionVertical = U_DT_VERTICAL,
+ DecompositionWide = U_DT_WIDE,
+};
+
+enum CharCategory {
+ NoCategory = 0,
+ Other_NotAssigned = U_MASK(U_GENERAL_OTHER_TYPES),
+ Letter_Uppercase = U_MASK(U_UPPERCASE_LETTER),
+ Letter_Lowercase = U_MASK(U_LOWERCASE_LETTER),
+ Letter_Titlecase = U_MASK(U_TITLECASE_LETTER),
+ Letter_Modifier = U_MASK(U_MODIFIER_LETTER),
+ Letter_Other = U_MASK(U_OTHER_LETTER),
+
+ Mark_NonSpacing = U_MASK(U_NON_SPACING_MARK),
+ Mark_Enclosing = U_MASK(U_ENCLOSING_MARK),
+ Mark_SpacingCombining = U_MASK(U_COMBINING_SPACING_MARK),
+
+ Number_DecimalDigit = U_MASK(U_DECIMAL_DIGIT_NUMBER),
+ Number_Letter = U_MASK(U_LETTER_NUMBER),
+ Number_Other = U_MASK(U_OTHER_NUMBER),
+
+ Separator_Space = U_MASK(U_SPACE_SEPARATOR),
+ Separator_Line = U_MASK(U_LINE_SEPARATOR),
+ Separator_Paragraph = U_MASK(U_PARAGRAPH_SEPARATOR),
+
+ Other_Control = U_MASK(U_CONTROL_CHAR),
+ Other_Format = U_MASK(U_FORMAT_CHAR),
+ Other_PrivateUse = U_MASK(U_PRIVATE_USE_CHAR),
+ Other_Surrogate = U_MASK(U_SURROGATE),
+
+ Punctuation_Dash = U_MASK(U_DASH_PUNCTUATION),
+ Punctuation_Open = U_MASK(U_START_PUNCTUATION),
+ Punctuation_Close = U_MASK(U_END_PUNCTUATION),
+ Punctuation_Connector = U_MASK(U_CONNECTOR_PUNCTUATION),
+ Punctuation_Other = U_MASK(U_OTHER_PUNCTUATION),
+
+ Symbol_Math = U_MASK(U_MATH_SYMBOL),
+ Symbol_Currency = U_MASK(U_CURRENCY_SYMBOL),
+ Symbol_Modifier = U_MASK(U_MODIFIER_SYMBOL),
+ Symbol_Other = U_MASK(U_OTHER_SYMBOL),
+
+ Punctuation_InitialQuote = U_MASK(U_INITIAL_PUNCTUATION),
+ Punctuation_FinalQuote = U_MASK(U_FINAL_PUNCTUATION)
+};
+
+inline UChar32 foldCase(UChar32 c)
+{
+ return u_foldCase(c, U_FOLD_CASE_DEFAULT);
+}
+
+inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ int realLength = u_strFoldCase(result, resultLength, src, srcLength, U_FOLD_CASE_DEFAULT, &status);
+ *error = !U_SUCCESS(status);
+ return realLength;
+}
+
+inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ int realLength = u_strToLower(result, resultLength, src, srcLength, "", &status);
+ *error = !!U_FAILURE(status);
+ return realLength;
+}
+
+inline UChar32 toLower(UChar32 c)
+{
+ return u_tolower(c);
+}
+
+inline UChar32 toUpper(UChar32 c)
+{
+ return u_toupper(c);
+}
+
+inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ int realLength = u_strToUpper(result, resultLength, src, srcLength, "", &status);
+ *error = !!U_FAILURE(status);
+ return realLength;
+}
+
+inline UChar32 toTitleCase(UChar32 c)
+{
+ return u_totitle(c);
+}
+
+inline bool isArabicChar(UChar32 c)
+{
+ return ublock_getCode(c) == UBLOCK_ARABIC;
+}
+
+inline bool isAlphanumeric(UChar32 c)
+{
+ return u_isalnum(c);
+}
+
+inline bool isSeparatorSpace(UChar32 c)
+{
+ return u_charType(c) == U_SPACE_SEPARATOR;
+}
+
+inline bool isPrintableChar(UChar32 c)
+{
+ return !!u_isprint(c);
+}
+
+inline bool isPunct(UChar32 c)
+{
+ return !!u_ispunct(c);
+}
+
+inline bool hasLineBreakingPropertyComplexContext(UChar32 c)
+{
+ return u_getIntPropertyValue(c, UCHAR_LINE_BREAK) == U_LB_COMPLEX_CONTEXT;
+}
+
+inline bool hasLineBreakingPropertyComplexContextOrIdeographic(UChar32 c)
+{
+ int32_t prop = u_getIntPropertyValue(c, UCHAR_LINE_BREAK);
+ return prop == U_LB_COMPLEX_CONTEXT || prop == U_LB_IDEOGRAPHIC;
+}
+
+inline UChar32 mirroredChar(UChar32 c)
+{
+ return u_charMirror(c);
+}
+
+inline CharCategory category(UChar32 c)
+{
+ return static_cast<CharCategory>(U_GET_GC_MASK(c));
+}
+
+inline Direction direction(UChar32 c)
+{
+ return static_cast<Direction>(u_charDirection(c));
+}
+
+inline bool isLower(UChar32 c)
+{
+ return !!u_islower(c);
+}
+
+inline uint8_t combiningClass(UChar32 c)
+{
+ return u_getCombiningClass(c);
+}
+
+inline DecompositionType decompositionType(UChar32 c)
+{
+ return static_cast<DecompositionType>(u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE));
+}
+
+inline int umemcasecmp(const UChar* a, const UChar* b, int len)
+{
+ return u_memcasecmp(a, b, len, U_FOLD_CASE_DEFAULT);
+}
+
+} }
+
+#endif // WTF_UNICODE_ICU_H
diff --git a/Source/JavaScriptCore/wtf/unicode/qt4/UnicodeQt4.h b/Source/JavaScriptCore/wtf/unicode/qt4/UnicodeQt4.h
new file mode 100644
index 0000000..eaa7a07
--- /dev/null
+++ b/Source/JavaScriptCore/wtf/unicode/qt4/UnicodeQt4.h
@@ -0,0 +1,375 @@
+/*
+ * Copyright (C) 2006 George Staikos <staikos@kde.org>
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef WTF_UNICODE_QT4_H
+#define WTF_UNICODE_QT4_H
+
+#include "UnicodeMacrosFromICU.h"
+
+#include <QChar>
+#include <QString>
+
+#include <config.h>
+
+#include <stdint.h>
+#if USE(QT_ICU_TEXT_BREAKING)
+#include <unicode/ubrk.h>
+#endif
+
+QT_BEGIN_NAMESPACE
+namespace QUnicodeTables {
+ struct Properties {
+ ushort category : 8;
+ ushort line_break_class : 8;
+ ushort direction : 8;
+ ushort combiningClass :8;
+ ushort joining : 2;
+ signed short digitValue : 6; /* 5 needed */
+ ushort unicodeVersion : 4;
+ ushort lowerCaseSpecial : 1;
+ ushort upperCaseSpecial : 1;
+ ushort titleCaseSpecial : 1;
+ ushort caseFoldSpecial : 1; /* currently unused */
+ signed short mirrorDiff : 16;
+ signed short lowerCaseDiff : 16;
+ signed short upperCaseDiff : 16;
+ signed short titleCaseDiff : 16;
+ signed short caseFoldDiff : 16;
+ };
+ Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4);
+ Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2);
+}
+QT_END_NAMESPACE
+
+// ugly hack to make UChar compatible with JSChar in API/JSStringRef.h
+#if defined(Q_OS_WIN) || COMPILER(WINSCW) || (COMPILER(RVCT) && !OS(LINUX))
+typedef wchar_t UChar;
+#else
+typedef uint16_t UChar;
+#endif
+
+#if !USE(QT_ICU_TEXT_BREAKING)
+typedef uint32_t UChar32;
+#endif
+
+namespace WTF {
+namespace Unicode {
+
+enum Direction {
+ LeftToRight = QChar::DirL,
+ RightToLeft = QChar::DirR,
+ EuropeanNumber = QChar::DirEN,
+ EuropeanNumberSeparator = QChar::DirES,
+ EuropeanNumberTerminator = QChar::DirET,
+ ArabicNumber = QChar::DirAN,
+ CommonNumberSeparator = QChar::DirCS,
+ BlockSeparator = QChar::DirB,
+ SegmentSeparator = QChar::DirS,
+ WhiteSpaceNeutral = QChar::DirWS,
+ OtherNeutral = QChar::DirON,
+ LeftToRightEmbedding = QChar::DirLRE,
+ LeftToRightOverride = QChar::DirLRO,
+ RightToLeftArabic = QChar::DirAL,
+ RightToLeftEmbedding = QChar::DirRLE,
+ RightToLeftOverride = QChar::DirRLO,
+ PopDirectionalFormat = QChar::DirPDF,
+ NonSpacingMark = QChar::DirNSM,
+ BoundaryNeutral = QChar::DirBN
+};
+
+enum DecompositionType {
+ DecompositionNone = QChar::NoDecomposition,
+ DecompositionCanonical = QChar::Canonical,
+ DecompositionCompat = QChar::Compat,
+ DecompositionCircle = QChar::Circle,
+ DecompositionFinal = QChar::Final,
+ DecompositionFont = QChar::Font,
+ DecompositionFraction = QChar::Fraction,
+ DecompositionInitial = QChar::Initial,
+ DecompositionIsolated = QChar::Isolated,
+ DecompositionMedial = QChar::Medial,
+ DecompositionNarrow = QChar::Narrow,
+ DecompositionNoBreak = QChar::NoBreak,
+ DecompositionSmall = QChar::Small,
+ DecompositionSquare = QChar::Square,
+ DecompositionSub = QChar::Sub,
+ DecompositionSuper = QChar::Super,
+ DecompositionVertical = QChar::Vertical,
+ DecompositionWide = QChar::Wide
+};
+
+enum CharCategory {
+ NoCategory = 0,
+ Mark_NonSpacing = U_MASK(QChar::Mark_NonSpacing),
+ Mark_SpacingCombining = U_MASK(QChar::Mark_SpacingCombining),
+ Mark_Enclosing = U_MASK(QChar::Mark_Enclosing),
+ Number_DecimalDigit = U_MASK(QChar::Number_DecimalDigit),
+ Number_Letter = U_MASK(QChar::Number_Letter),
+ Number_Other = U_MASK(QChar::Number_Other),
+ Separator_Space = U_MASK(QChar::Separator_Space),
+ Separator_Line = U_MASK(QChar::Separator_Line),
+ Separator_Paragraph = U_MASK(QChar::Separator_Paragraph),
+ Other_Control = U_MASK(QChar::Other_Control),
+ Other_Format = U_MASK(QChar::Other_Format),
+ Other_Surrogate = U_MASK(QChar::Other_Surrogate),
+ Other_PrivateUse = U_MASK(QChar::Other_PrivateUse),
+ Other_NotAssigned = U_MASK(QChar::Other_NotAssigned),
+ Letter_Uppercase = U_MASK(QChar::Letter_Uppercase),
+ Letter_Lowercase = U_MASK(QChar::Letter_Lowercase),
+ Letter_Titlecase = U_MASK(QChar::Letter_Titlecase),
+ Letter_Modifier = U_MASK(QChar::Letter_Modifier),
+ Letter_Other = U_MASK(QChar::Letter_Other),
+ Punctuation_Connector = U_MASK(QChar::Punctuation_Connector),
+ Punctuation_Dash = U_MASK(QChar::Punctuation_Dash),
+ Punctuation_Open = U_MASK(QChar::Punctuation_Open),
+ Punctuation_Close = U_MASK(QChar::Punctuation_Close),
+ Punctuation_InitialQuote = U_MASK(QChar::Punctuation_InitialQuote),
+ Punctuation_FinalQuote = U_MASK(QChar::Punctuation_FinalQuote),
+ Punctuation_Other = U_MASK(QChar::Punctuation_Other),
+ Symbol_Math = U_MASK(QChar::Symbol_Math),
+ Symbol_Currency = U_MASK(QChar::Symbol_Currency),
+ Symbol_Modifier = U_MASK(QChar::Symbol_Modifier),
+ Symbol_Other = U_MASK(QChar::Symbol_Other)
+};
+
+
+// FIXME: handle surrogates correctly in all methods
+
+inline UChar32 toLower(UChar32 ch)
+{
+ return QChar::toLower(uint32_t(ch));
+}
+
+inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
+{
+ const UChar *e = src + srcLength;
+ const UChar *s = src;
+ UChar *r = result;
+ uint rindex = 0;
+
+ // this avoids one out of bounds check in the loop
+ if (s < e && QChar(*s).isLowSurrogate()) {
+ if (r)
+ r[rindex] = *s++;
+ ++rindex;
+ }
+
+ int needed = 0;
+ while (s < e && (rindex < uint(resultLength) || !r)) {
+ uint c = *s;
+ if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate())
+ c = QChar::surrogateToUcs4(*(s - 1), c);
+ const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c);
+ if (prop->lowerCaseSpecial) {
+ QString qstring;
+ if (c < 0x10000) {
+ qstring += QChar(c);
+ } else {
+ qstring += QChar(*(s-1));
+ qstring += QChar(*s);
+ }
+ qstring = qstring.toLower();
+ for (int i = 0; i < qstring.length(); ++i) {
+ if (rindex >= uint(resultLength)) {
+ needed += qstring.length() - i;
+ break;
+ }
+ if (r)
+ r[rindex] = qstring.at(i).unicode();
+ ++rindex;
+ }
+ } else {
+ if (r)
+ r[rindex] = *s + prop->lowerCaseDiff;
+ ++rindex;
+ }
+ ++s;
+ }
+ if (s < e)
+ needed += e - s;
+ *error = (needed != 0);
+ if (rindex < uint(resultLength))
+ r[rindex] = 0;
+ return rindex + needed;
+}
+
+inline UChar32 toUpper(UChar32 c)
+{
+ return QChar::toUpper(uint32_t(c));
+}
+
+inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
+{
+ const UChar *e = src + srcLength;
+ const UChar *s = src;
+ UChar *r = result;
+ int rindex = 0;
+
+ // this avoids one out of bounds check in the loop
+ if (s < e && QChar(*s).isLowSurrogate()) {
+ if (r)
+ r[rindex] = *s++;
+ ++rindex;
+ }
+
+ int needed = 0;
+ while (s < e && (rindex < resultLength || !r)) {
+ uint c = *s;
+ if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate())
+ c = QChar::surrogateToUcs4(*(s - 1), c);
+ const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c);
+ if (prop->upperCaseSpecial) {
+ QString qstring;
+ if (c < 0x10000) {
+ qstring += QChar(c);
+ } else {
+ qstring += QChar(*(s-1));
+ qstring += QChar(*s);
+ }
+ qstring = qstring.toUpper();
+ for (int i = 0; i < qstring.length(); ++i) {
+ if (rindex >= resultLength) {
+ needed += qstring.length() - i;
+ break;
+ }
+ if (r)
+ r[rindex] = qstring.at(i).unicode();
+ ++rindex;
+ }
+ } else {
+ if (r)
+ r[rindex] = *s + prop->upperCaseDiff;
+ ++rindex;
+ }
+ ++s;
+ }
+ if (s < e)
+ needed += e - s;
+ *error = (needed != 0);
+ if (rindex < resultLength)
+ r[rindex] = 0;
+ return rindex + needed;
+}
+
+inline int toTitleCase(UChar32 c)
+{
+ return QChar::toTitleCase(uint32_t(c));
+}
+
+inline UChar32 foldCase(UChar32 c)
+{
+ return QChar::toCaseFolded(uint32_t(c));
+}
+
+inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
+{
+ // FIXME: handle special casing. Easiest with some low level API in Qt
+ *error = false;
+ if (resultLength < srcLength) {
+ *error = true;
+ return srcLength;
+ }
+ for (int i = 0; i < srcLength; ++i)
+ result[i] = QChar::toCaseFolded(ushort(src[i]));
+ return srcLength;
+}
+
+inline bool isArabicChar(UChar32 c)
+{
+ return c >= 0x0600 && c <= 0x06FF;
+}
+
+inline bool isPrintableChar(UChar32 c)
+{
+ const uint test = U_MASK(QChar::Other_Control) |
+ U_MASK(QChar::Other_NotAssigned);
+ return !(U_MASK(QChar::category(uint32_t(c))) & test);
+}
+
+inline bool isSeparatorSpace(UChar32 c)
+{
+ return QChar::category(uint32_t(c)) == QChar::Separator_Space;
+}
+
+inline bool isPunct(UChar32 c)
+{
+ const uint test = U_MASK(QChar::Punctuation_Connector) |
+ U_MASK(QChar::Punctuation_Dash) |
+ U_MASK(QChar::Punctuation_Open) |
+ U_MASK(QChar::Punctuation_Close) |
+ U_MASK(QChar::Punctuation_InitialQuote) |
+ U_MASK(QChar::Punctuation_FinalQuote) |
+ U_MASK(QChar::Punctuation_Other);
+ return U_MASK(QChar::category(uint32_t(c))) & test;
+}
+
+inline bool isLower(UChar32 c)
+{
+ return QChar::category(uint32_t(c)) == QChar::Letter_Lowercase;
+}
+
+inline bool hasLineBreakingPropertyComplexContext(UChar32)
+{
+ // FIXME: Implement this to return whether the character has line breaking property SA (Complex Context).
+ return false;
+}
+
+inline UChar32 mirroredChar(UChar32 c)
+{
+ return QChar::mirroredChar(uint32_t(c));
+}
+
+inline uint8_t combiningClass(UChar32 c)
+{
+ return QChar::combiningClass(uint32_t(c));
+}
+
+inline DecompositionType decompositionType(UChar32 c)
+{
+ return (DecompositionType)QChar::decompositionTag(c);
+}
+
+inline int umemcasecmp(const UChar* a, const UChar* b, int len)
+{
+ // handle surrogates correctly
+ for (int i = 0; i < len; ++i) {
+ uint c1 = QChar::toCaseFolded(ushort(a[i]));
+ uint c2 = QChar::toCaseFolded(ushort(b[i]));
+ if (c1 != c2)
+ return c1 - c2;
+ }
+ return 0;
+}
+
+inline Direction direction(UChar32 c)
+{
+ return (Direction)QChar::direction(uint32_t(c));
+}
+
+inline CharCategory category(UChar32 c)
+{
+ return (CharCategory) U_MASK(QChar::category(uint32_t(c)));
+}
+
+} }
+
+#endif // WTF_UNICODE_QT4_H
diff --git a/Source/JavaScriptCore/wtf/unicode/wince/UnicodeWinCE.cpp b/Source/JavaScriptCore/wtf/unicode/wince/UnicodeWinCE.cpp
new file mode 100644
index 0000000..96dac7d
--- /dev/null
+++ b/Source/JavaScriptCore/wtf/unicode/wince/UnicodeWinCE.cpp
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2006 George Staikos <staikos@kde.org>
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ * Copyright (C) 2007-2009 Torch Mobile, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include "config.h"
+#include "UnicodeWinCE.h"
+
+#include <wchar.h>
+
+namespace WTF {
+namespace Unicode {
+
+UChar toLower(UChar c)
+{
+ return towlower(c);
+}
+
+UChar toUpper(UChar c)
+{
+ return towupper(c);
+}
+
+UChar foldCase(UChar c)
+{
+ return towlower(c);
+}
+
+bool isPrintableChar(UChar c)
+{
+ return !!iswprint(c);
+}
+
+bool isSpace(UChar c)
+{
+ return !!iswspace(c);
+}
+
+bool isLetter(UChar c)
+{
+ return !!iswalpha(c);
+}
+
+bool isUpper(UChar c)
+{
+ return !!iswupper(c);
+}
+
+bool isLower(UChar c)
+{
+ return !!iswlower(c);
+}
+
+bool isDigit(UChar c)
+{
+ return !!iswdigit(c);
+}
+
+bool isPunct(UChar c)
+{
+ return !!iswpunct(c);
+}
+
+bool isAlphanumeric(UChar c)
+{
+ return !!iswalnum(c);
+}
+
+int toLower(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError)
+{
+ const UChar* sourceIterator = source;
+ const UChar* sourceEnd = source + sourceLength;
+ UChar* resultIterator = result;
+ UChar* resultEnd = result + resultLength;
+
+ int remainingCharacters = 0;
+ if (sourceLength <= resultLength)
+ while (sourceIterator < sourceEnd)
+ *resultIterator++ = towlower(*sourceIterator++);
+ else
+ while (resultIterator < resultEnd)
+ *resultIterator++ = towlower(*sourceIterator++);
+
+ if (sourceIterator < sourceEnd)
+ remainingCharacters += sourceEnd - sourceIterator;
+ *isError = !!remainingCharacters;
+ if (resultIterator < resultEnd)
+ *resultIterator = 0;
+
+ return (resultIterator - result) + remainingCharacters;
+}
+
+int toUpper(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError)
+{
+ const UChar* sourceIterator = source;
+ const UChar* sourceEnd = source + sourceLength;
+ UChar* resultIterator = result;
+ UChar* resultEnd = result + resultLength;
+
+ int remainingCharacters = 0;
+ if (sourceLength <= resultLength)
+ while (sourceIterator < sourceEnd)
+ *resultIterator++ = towupper(*sourceIterator++);
+ else
+ while (resultIterator < resultEnd)
+ *resultIterator++ = towupper(*sourceIterator++);
+
+ if (sourceIterator < sourceEnd)
+ remainingCharacters += sourceEnd - sourceIterator;
+ *isError = !!remainingCharacters;
+ if (resultIterator < resultEnd)
+ *resultIterator = 0;
+
+ return (resultIterator - result) + remainingCharacters;
+}
+
+int foldCase(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError)
+{
+ *isError = false;
+ if (resultLength < sourceLength) {
+ *isError = true;
+ return sourceLength;
+ }
+ for (int i = 0; i < sourceLength; ++i)
+ result[i] = foldCase(source[i]);
+ return sourceLength;
+}
+
+UChar toTitleCase(UChar c)
+{
+ return towupper(c);
+}
+
+Direction direction(UChar32 c)
+{
+ return static_cast<Direction>(UnicodeCE::direction(c));
+}
+
+CharCategory category(unsigned int c)
+{
+ return static_cast<CharCategory>(TO_MASK((__int8) UnicodeCE::category(c)));
+}
+
+DecompositionType decompositionType(UChar32 c)
+{
+ return static_cast<DecompositionType>(UnicodeCE::decompositionType(c));
+}
+
+unsigned char combiningClass(UChar32 c)
+{
+ return UnicodeCE::combiningClass(c);
+}
+
+UChar mirroredChar(UChar32 c)
+{
+ return UnicodeCE::mirroredChar(c);
+}
+
+int digitValue(UChar c)
+{
+ return UnicodeCE::digitValue(c);
+}
+
+} // namespace Unicode
+} // namespace WTF
diff --git a/Source/JavaScriptCore/wtf/unicode/wince/UnicodeWinCE.h b/Source/JavaScriptCore/wtf/unicode/wince/UnicodeWinCE.h
new file mode 100644
index 0000000..2688aa9
--- /dev/null
+++ b/Source/JavaScriptCore/wtf/unicode/wince/UnicodeWinCE.h
@@ -0,0 +1,177 @@
+/*
+ * Copyright (C) 2006 George Staikos <staikos@kde.org>
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ * Copyright (C) 2007 Apple Computer, Inc. All rights reserved.
+ * Copyright (C) 2007-2009 Torch Mobile, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef WTF_UnicodeWinCE_h
+#define WTF_UnicodeWinCE_h
+
+#include "UnicodeMacrosFromICU.h"
+
+#include "ce_unicode.h"
+
+#define TO_MASK(x) (1 << (x))
+
+namespace WTF {
+namespace Unicode {
+
+enum Direction {
+ LeftToRight = UnicodeCE::U_LEFT_TO_RIGHT,
+ RightToLeft = UnicodeCE::U_RIGHT_TO_LEFT,
+ EuropeanNumber = UnicodeCE::U_EUROPEAN_NUMBER,
+ EuropeanNumberSeparator = UnicodeCE::U_EUROPEAN_NUMBER_SEPARATOR,
+ EuropeanNumberTerminator = UnicodeCE::U_EUROPEAN_NUMBER_TERMINATOR,
+ ArabicNumber = UnicodeCE::U_ARABIC_NUMBER,
+ CommonNumberSeparator = UnicodeCE::U_COMMON_NUMBER_SEPARATOR,
+ BlockSeparator = UnicodeCE::U_BLOCK_SEPARATOR,
+ SegmentSeparator = UnicodeCE::U_SEGMENT_SEPARATOR,
+ WhiteSpaceNeutral = UnicodeCE::U_WHITE_SPACE_NEUTRAL,
+ OtherNeutral = UnicodeCE::U_OTHER_NEUTRAL,
+ LeftToRightEmbedding = UnicodeCE::U_LEFT_TO_RIGHT_EMBEDDING,
+ LeftToRightOverride = UnicodeCE::U_LEFT_TO_RIGHT_OVERRIDE,
+ RightToLeftArabic = UnicodeCE::U_RIGHT_TO_LEFT_ARABIC,
+ RightToLeftEmbedding = UnicodeCE::U_RIGHT_TO_LEFT_EMBEDDING,
+ RightToLeftOverride = UnicodeCE::U_RIGHT_TO_LEFT_OVERRIDE,
+ PopDirectionalFormat = UnicodeCE::U_POP_DIRECTIONAL_FORMAT,
+ NonSpacingMark = UnicodeCE::U_DIR_NON_SPACING_MARK,
+ BoundaryNeutral = UnicodeCE::U_BOUNDARY_NEUTRAL
+};
+
+enum DecompositionType {
+ DecompositionNone = UnicodeCE::U_DT_NONE,
+ DecompositionCanonical = UnicodeCE::U_DT_CANONICAL,
+ DecompositionCompat = UnicodeCE::U_DT_COMPAT,
+ DecompositionCircle = UnicodeCE::U_DT_CIRCLE,
+ DecompositionFinal = UnicodeCE::U_DT_FINAL,
+ DecompositionFont = UnicodeCE::U_DT_FONT,
+ DecompositionFraction = UnicodeCE::U_DT_FRACTION,
+ DecompositionInitial = UnicodeCE::U_DT_INITIAL,
+ DecompositionIsolated = UnicodeCE::U_DT_ISOLATED,
+ DecompositionMedial = UnicodeCE::U_DT_MEDIAL,
+ DecompositionNarrow = UnicodeCE::U_DT_NARROW,
+ DecompositionNoBreak = UnicodeCE::U_DT_NOBREAK,
+ DecompositionSmall = UnicodeCE::U_DT_SMALL,
+ DecompositionSquare = UnicodeCE::U_DT_SQUARE,
+ DecompositionSub = UnicodeCE::U_DT_SUB,
+ DecompositionSuper = UnicodeCE::U_DT_SUPER,
+ DecompositionVertical = UnicodeCE::U_DT_VERTICAL,
+ DecompositionWide = UnicodeCE::U_DT_WIDE
+};
+
+enum CharCategory {
+ NoCategory = 0,
+ Other_NotAssigned = TO_MASK(UnicodeCE::U_GENERAL_OTHER_TYPES),
+ Letter_Uppercase = TO_MASK(UnicodeCE::U_UPPERCASE_LETTER),
+ Letter_Lowercase = TO_MASK(UnicodeCE::U_LOWERCASE_LETTER),
+ Letter_Titlecase = TO_MASK(UnicodeCE::U_TITLECASE_LETTER),
+ Letter_Modifier = TO_MASK(UnicodeCE::U_MODIFIER_LETTER),
+ Letter_Other = TO_MASK(UnicodeCE::U_OTHER_LETTER),
+
+ Mark_NonSpacing = TO_MASK(UnicodeCE::U_NON_SPACING_MARK),
+ Mark_Enclosing = TO_MASK(UnicodeCE::U_ENCLOSING_MARK),
+ Mark_SpacingCombining = TO_MASK(UnicodeCE::U_COMBINING_SPACING_MARK),
+
+ Number_DecimalDigit = TO_MASK(UnicodeCE::U_DECIMAL_DIGIT_NUMBER),
+ Number_Letter = TO_MASK(UnicodeCE::U_LETTER_NUMBER),
+ Number_Other = TO_MASK(UnicodeCE::U_OTHER_NUMBER),
+
+ Separator_Space = TO_MASK(UnicodeCE::U_SPACE_SEPARATOR),
+ Separator_Line = TO_MASK(UnicodeCE::U_LINE_SEPARATOR),
+ Separator_Paragraph = TO_MASK(UnicodeCE::U_PARAGRAPH_SEPARATOR),
+
+ Other_Control = TO_MASK(UnicodeCE::U_CONTROL_CHAR),
+ Other_Format = TO_MASK(UnicodeCE::U_FORMAT_CHAR),
+ Other_PrivateUse = TO_MASK(UnicodeCE::U_PRIVATE_USE_CHAR),
+ Other_Surrogate = TO_MASK(UnicodeCE::U_SURROGATE),
+
+ Punctuation_Dash = TO_MASK(UnicodeCE::U_DASH_PUNCTUATION),
+ Punctuation_Open = TO_MASK(UnicodeCE::U_START_PUNCTUATION),
+ Punctuation_Close = TO_MASK(UnicodeCE::U_END_PUNCTUATION),
+ Punctuation_Connector = TO_MASK(UnicodeCE::U_CONNECTOR_PUNCTUATION),
+ Punctuation_Other = TO_MASK(UnicodeCE::U_OTHER_PUNCTUATION),
+
+ Symbol_Math = TO_MASK(UnicodeCE::U_MATH_SYMBOL),
+ Symbol_Currency = TO_MASK(UnicodeCE::U_CURRENCY_SYMBOL),
+ Symbol_Modifier = TO_MASK(UnicodeCE::U_MODIFIER_SYMBOL),
+ Symbol_Other = TO_MASK(UnicodeCE::U_OTHER_SYMBOL),
+
+ Punctuation_InitialQuote = TO_MASK(UnicodeCE::U_INITIAL_PUNCTUATION),
+ Punctuation_FinalQuote = TO_MASK(UnicodeCE::U_FINAL_PUNCTUATION)
+};
+
+CharCategory category(unsigned int);
+
+bool isSpace(UChar);
+bool isLetter(UChar);
+bool isPrintableChar(UChar);
+bool isUpper(UChar);
+bool isLower(UChar);
+bool isPunct(UChar);
+bool isDigit(UChar);
+bool isAlphanumeric(UChar);
+inline bool isSeparatorSpace(UChar c) { return category(c) == Separator_Space; }
+inline bool isHighSurrogate(UChar c) { return (c & 0xfc00) == 0xd800; }
+inline bool isLowSurrogate(UChar c) { return (c & 0xfc00) == 0xdc00; }
+
+UChar toLower(UChar);
+UChar toUpper(UChar);
+UChar foldCase(UChar);
+UChar toTitleCase(UChar);
+int toLower(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError);
+int toUpper(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError);
+int foldCase(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError);
+
+int digitValue(UChar);
+
+UChar mirroredChar(UChar32);
+unsigned char combiningClass(UChar32);
+DecompositionType decompositionType(UChar32);
+Direction direction(UChar32);
+inline bool isArabicChar(UChar32 c)
+{
+ return c >= 0x0600 && c <= 0x06FF;
+}
+
+inline bool hasLineBreakingPropertyComplexContext(UChar32)
+{
+ return false; // FIXME: implement!
+}
+
+inline int umemcasecmp(const UChar* a, const UChar* b, int len)
+{
+ for (int i = 0; i < len; ++i) {
+ UChar c1 = foldCase(a[i]);
+ UChar c2 = foldCase(b[i]);
+ if (c1 != c2)
+ return c1 - c2;
+ }
+ return 0;
+}
+
+inline UChar32 surrogateToUcs4(UChar high, UChar low)
+{
+ return (UChar32(high) << 10) + low - 0x35fdc00;
+}
+
+} // namespace Unicode
+} // namespace WTF
+
+#endif // WTF_UnicodeWinCE_h