summaryrefslogtreecommitdiffstats
path: root/Source/WebCore/icu
diff options
context:
space:
mode:
Diffstat (limited to 'Source/WebCore/icu')
-rw-r--r--Source/WebCore/icu/LICENSE25
-rw-r--r--Source/WebCore/icu/README4
-rw-r--r--Source/WebCore/icu/unicode/parseerr.h88
-rw-r--r--Source/WebCore/icu/unicode/platform.h267
-rw-r--r--Source/WebCore/icu/unicode/putil.h180
-rw-r--r--Source/WebCore/icu/unicode/ubrk.h563
-rw-r--r--Source/WebCore/icu/unicode/uchar.h2798
-rw-r--r--Source/WebCore/icu/unicode/ucnv.h1817
-rw-r--r--Source/WebCore/icu/unicode/ucnv_cb.h162
-rw-r--r--Source/WebCore/icu/unicode/ucnv_err.h456
-rw-r--r--Source/WebCore/icu/unicode/ucol.h1219
-rw-r--r--Source/WebCore/icu/unicode/ucoleitr.h268
-rw-r--r--Source/WebCore/icu/unicode/uconfig.h186
-rw-r--r--Source/WebCore/icu/unicode/ucsdet.h350
-rw-r--r--Source/WebCore/icu/unicode/uenum.h129
-rw-r--r--Source/WebCore/icu/unicode/uidna.h310
-rw-r--r--Source/WebCore/icu/unicode/uiter.h707
-rw-r--r--Source/WebCore/icu/unicode/uloc.h917
-rw-r--r--Source/WebCore/icu/unicode/umachine.h371
-rw-r--r--Source/WebCore/icu/unicode/unorm.h575
-rw-r--r--Source/WebCore/icu/unicode/urename.h1468
-rw-r--r--Source/WebCore/icu/unicode/uscript.h148
-rw-r--r--Source/WebCore/icu/unicode/usearch.h643
-rw-r--r--Source/WebCore/icu/unicode/uset.h745
-rw-r--r--Source/WebCore/icu/unicode/ushape.h234
-rw-r--r--Source/WebCore/icu/unicode/ustring.h1320
-rw-r--r--Source/WebCore/icu/unicode/utf.h221
-rw-r--r--Source/WebCore/icu/unicode/utf16.h605
-rw-r--r--Source/WebCore/icu/unicode/utf8.h627
-rw-r--r--Source/WebCore/icu/unicode/utf_old.h1
-rw-r--r--Source/WebCore/icu/unicode/utypes.h745
-rw-r--r--Source/WebCore/icu/unicode/uversion.h216
32 files changed, 18365 insertions, 0 deletions
diff --git a/Source/WebCore/icu/LICENSE b/Source/WebCore/icu/LICENSE
new file mode 100644
index 0000000..385d130
--- /dev/null
+++ b/Source/WebCore/icu/LICENSE
@@ -0,0 +1,25 @@
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright (c) 1995-2006 International Business Machines Corporation and others
+
+All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this
+software and associated documentation files (the "Software"), to deal in the Software
+without restriction, including without limitation the rights to use, copy, modify,
+merge, publish, distribute, and/or sell copies of the Software, and to permit persons
+to whom the Software is furnished to do so, provided that the above copyright notice(s)
+and this permission notice appear in all copies of the Software and that both the above
+copyright notice(s) and this permission notice appear in supporting documentation.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER
+OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR
+CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder shall not be used in
+advertising or otherwise to promote the sale, use or other dealings in this Software
+without prior written authorization of the copyright holder.
diff --git a/Source/WebCore/icu/README b/Source/WebCore/icu/README
new file mode 100644
index 0000000..389e2e8
--- /dev/null
+++ b/Source/WebCore/icu/README
@@ -0,0 +1,4 @@
+The headers in this directory are for compiling on Mac OS X 10.4.
+The Mac OS X 10.4 release includes the ICU binary, but not ICU headers.
+For other platforms, installed ICU headers should be used rather than these.
+They are specific to Mac OS X 10.4.
diff --git a/Source/WebCore/icu/unicode/parseerr.h b/Source/WebCore/icu/unicode/parseerr.h
new file mode 100644
index 0000000..d1ba394
--- /dev/null
+++ b/Source/WebCore/icu/unicode/parseerr.h
@@ -0,0 +1,88 @@
+/*
+**********************************************************************
+* Copyright (C) 1999-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Date Name Description
+* 03/14/00 aliu Creation.
+* 06/27/00 aliu Change from C++ class to C struct
+**********************************************************************
+*/
+#ifndef PARSEERR_H
+#define PARSEERR_H
+
+#include "unicode/utypes.h"
+
+
+/**
+ * The capacity of the context strings in UParseError.
+ * @stable ICU 2.0
+ */
+enum { U_PARSE_CONTEXT_LEN = 16 };
+
+/**
+ * A UParseError struct is used to returned detailed information about
+ * parsing errors. It is used by ICU parsing engines that parse long
+ * rules, patterns, or programs, where the text being parsed is long
+ * enough that more information than a UErrorCode is needed to
+ * localize the error.
+ *
+ * <p>The line, offset, and context fields are optional; parsing
+ * engines may choose not to use to use them.
+ *
+ * <p>The preContext and postContext strings include some part of the
+ * context surrounding the error. If the source text is "let for=7"
+ * and "for" is the error (e.g., because it is a reserved word), then
+ * some examples of what a parser might produce are the following:
+ *
+ * <pre>
+ * preContext postContext
+ * "" "" The parser does not support context
+ * "let " "=7" Pre- and post-context only
+ * "let " "for=7" Pre- and post-context and error text
+ * "" "for" Error text only
+ * </pre>
+ *
+ * <p>Examples of engines which use UParseError (or may use it in the
+ * future) are Transliterator, RuleBasedBreakIterator, and
+ * RegexPattern.
+ *
+ * @stable ICU 2.0
+ */
+typedef struct UParseError {
+
+ /**
+ * The line on which the error occured. If the parser uses this
+ * field, it sets it to the line number of the source text line on
+ * which the error appears, which will be be a value >= 1. If the
+ * parse does not support line numbers, the value will be <= 0.
+ * @stable ICU 2.0
+ */
+ int32_t line;
+
+ /**
+ * The character offset to the error. If the line field is >= 1,
+ * then this is the offset from the start of the line. Otherwise,
+ * this is the offset from the start of the text. If the parser
+ * does not support this field, it will have a value < 0.
+ * @stable ICU 2.0
+ */
+ int32_t offset;
+
+ /**
+ * Textual context before the error. Null-terminated. The empty
+ * string if not supported by parser.
+ * @stable ICU 2.0
+ */
+ UChar preContext[U_PARSE_CONTEXT_LEN];
+
+ /**
+ * The error itself and/or textual context after the error.
+ * Null-terminated. The empty string if not supported by parser.
+ * @stable ICU 2.0
+ */
+ UChar postContext[U_PARSE_CONTEXT_LEN];
+
+} UParseError;
+
+#endif
diff --git a/Source/WebCore/icu/unicode/platform.h b/Source/WebCore/icu/unicode/platform.h
new file mode 100644
index 0000000..9595a26
--- /dev/null
+++ b/Source/WebCore/icu/unicode/platform.h
@@ -0,0 +1,267 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* FILE NAME : platform.h
+*
+* Date Name Description
+* 05/13/98 nos Creation (content moved here from ptypes.h).
+* 03/02/99 stephen Added AS400 support.
+* 03/30/99 stephen Added Linux support.
+* 04/13/99 stephen Reworked for autoconf.
+******************************************************************************
+*/
+
+/* Define the platform we're on. */
+#ifndef U_DARWIN
+#define U_DARWIN
+#endif
+
+/* Define whether inttypes.h is available */
+#ifndef U_HAVE_INTTYPES_H
+#define U_HAVE_INTTYPES_H 1
+#endif
+
+/*
+ * Define what support for C++ streams is available.
+ * If U_IOSTREAM_SOURCE is set to 199711, then <iostream> is available
+ * (1997711 is the date the ISO/IEC C++ FDIS was published), and then
+ * one should qualify streams using the std namespace in ICU header
+ * files.
+ * If U_IOSTREAM_SOURCE is set to 198506, then <iostream.h> is
+ * available instead (198506 is the date when Stroustrup published
+ * "An Extensible I/O Facility for C++" at the summer USENIX conference).
+ * If U_IOSTREAM_SOURCE is 0, then C++ streams are not available and
+ * support for them will be silently suppressed in ICU.
+ *
+ */
+
+#ifndef U_IOSTREAM_SOURCE
+#define U_IOSTREAM_SOURCE 199711
+#endif
+
+/* Determines whether specific types are available */
+#ifndef U_HAVE_INT8_T
+#define U_HAVE_INT8_T 1
+#endif
+
+#ifndef U_HAVE_UINT8_T
+#define U_HAVE_UINT8_T 0
+#endif
+
+#ifndef U_HAVE_INT16_T
+#define U_HAVE_INT16_T 1
+#endif
+
+#ifndef U_HAVE_UINT16_T
+#define U_HAVE_UINT16_T 0
+#endif
+
+#ifndef U_HAVE_INT32_T
+#define U_HAVE_INT32_T 1
+#endif
+
+#ifndef U_HAVE_UINT32_T
+#define U_HAVE_UINT32_T 0
+#endif
+
+#ifndef U_HAVE_INT64_T
+#define U_HAVE_INT64_T 1
+#endif
+
+#ifndef U_HAVE_UINT64_T
+#define U_HAVE_UINT64_T 0
+#endif
+
+/*===========================================================================*/
+/* Generic data types */
+/*===========================================================================*/
+
+#include <sys/types.h>
+
+/* If your platform does not have the <inttypes.h> header, you may
+ need to edit the typedefs below. */
+#if U_HAVE_INTTYPES_H
+
+/* autoconf 2.13 sometimes can't properly find the data types in <inttypes.h> */
+/* os/390 needs <inttypes.h>, but it doesn't have int8_t, and it sometimes */
+/* doesn't have uint8_t depending on the OS version. */
+/* So we have this work around. */
+#ifdef OS390
+/* The features header is needed to get (u)int64_t sometimes. */
+#include <features.h>
+#if ! U_HAVE_INT8_T
+typedef signed char int8_t;
+#endif
+#if !defined(__uint8_t)
+#define __uint8_t 1
+typedef unsigned char uint8_t;
+#endif
+#endif /* OS390 */
+
+#include <inttypes.h>
+
+#else /* U_HAVE_INTTYPES_H */
+
+#if ! U_HAVE_INT8_T
+typedef signed char int8_t;
+#endif
+
+#if ! U_HAVE_UINT8_T
+typedef unsigned char uint8_t;
+#endif
+
+#if ! U_HAVE_INT16_T
+typedef signed short int16_t;
+#endif
+
+#if ! U_HAVE_UINT16_T
+typedef unsigned short uint16_t;
+#endif
+
+#if ! U_HAVE_INT32_T
+typedef signed int int32_t;
+#endif
+
+#if ! U_HAVE_UINT32_T
+typedef unsigned int uint32_t;
+#endif
+
+#if ! U_HAVE_INT64_T
+ typedef signed long long int64_t;
+/* else we may not have a 64-bit type */
+#endif
+
+#if ! U_HAVE_UINT64_T
+ typedef unsigned long long uint64_t;
+/* else we may not have a 64-bit type */
+#endif
+
+#endif
+
+/*===========================================================================*/
+/* Compiler and environment features */
+/*===========================================================================*/
+
+/* Define whether namespace is supported */
+#ifndef U_HAVE_NAMESPACE
+#define U_HAVE_NAMESPACE 1
+#endif
+
+/* Determines the endianness of the platform
+ It's done this way in case multiple architectures are being built at once.
+ For example, Darwin supports fat binaries, which can be both PPC and x86 based. */
+#if defined(BYTE_ORDER) && defined(BIG_ENDIAN)
+#define U_IS_BIG_ENDIAN (BYTE_ORDER == BIG_ENDIAN)
+#else
+#define U_IS_BIG_ENDIAN 1
+#endif
+
+/* 1 or 0 to enable or disable threads. If undefined, default is: enable threads. */
+#define ICU_USE_THREADS 1
+
+#ifndef U_DEBUG
+#define U_DEBUG 0
+#endif
+
+#ifndef U_RELEASE
+#define U_RELEASE 1
+#endif
+
+/* Determine whether to disable renaming or not. This overrides the
+ setting in umachine.h which is for all platforms. */
+#ifndef U_DISABLE_RENAMING
+#define U_DISABLE_RENAMING 1
+#endif
+
+/* Determine whether to override new and delete. */
+#ifndef U_OVERRIDE_CXX_ALLOCATION
+#define U_OVERRIDE_CXX_ALLOCATION 1
+#endif
+/* Determine whether to override placement new and delete for STL. */
+#ifndef U_HAVE_PLACEMENT_NEW
+#define U_HAVE_PLACEMENT_NEW 1
+#endif
+
+/* Determine whether to enable tracing. */
+#ifndef U_ENABLE_TRACING
+#define U_ENABLE_TRACING 1
+#endif
+
+/* Define the library suffix in a C syntax. */
+#define U_HAVE_LIB_SUFFIX 0
+#define U_LIB_SUFFIX_C_NAME
+#define U_LIB_SUFFIX_C_NAME_STRING ""
+
+/*===========================================================================*/
+/* Character data types */
+/*===========================================================================*/
+
+#if defined(OS390) || defined(OS400)
+# define U_CHARSET_FAMILY 1
+#endif
+
+/*===========================================================================*/
+/* Information about wchar support */
+/*===========================================================================*/
+
+#define U_HAVE_WCHAR_H 1
+#define U_SIZEOF_WCHAR_T 4
+
+#define U_HAVE_WCSCPY 1
+
+/*===========================================================================*/
+/* Information about POSIX support */
+/*===========================================================================*/
+
+#define U_HAVE_NL_LANGINFO 1
+#define U_HAVE_NL_LANGINFO_CODESET 1
+#define U_NL_LANGINFO_CODESET CODESET
+
+#if 1
+#define U_TZSET tzset
+#endif
+#if 0
+#define U_TIMEZONE
+#endif
+#if 1
+#define U_TZNAME tzname
+#endif
+
+#define U_HAVE_MMAP 1
+#define U_HAVE_POPEN 1
+
+/*===========================================================================*/
+/* Symbol import-export control */
+/*===========================================================================*/
+
+#define U_EXPORT
+/* U_CALLCONV is releated to U_EXPORT2 */
+#define U_EXPORT2
+
+/* cygwin needs to export/import data */
+#ifdef U_CYGWIN
+#define U_IMPORT __declspec(dllimport)
+#else
+#define U_IMPORT
+#endif
+
+/*===========================================================================*/
+/* Code alignment and C function inlining */
+/*===========================================================================*/
+
+#ifndef U_INLINE
+#define U_INLINE inline
+#endif
+
+#define U_ALIGN_CODE(n)
+
+/*===========================================================================*/
+/* Programs used by ICU code */
+/*===========================================================================*/
+
+#define U_MAKE "/usr/bin/gnumake"
diff --git a/Source/WebCore/icu/unicode/putil.h b/Source/WebCore/icu/unicode/putil.h
new file mode 100644
index 0000000..685df53
--- /dev/null
+++ b/Source/WebCore/icu/unicode/putil.h
@@ -0,0 +1,180 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* FILE NAME : putil.h
+*
+* Date Name Description
+* 05/14/98 nos Creation (content moved here from utypes.h).
+* 06/17/99 erm Added IEEE_754
+* 07/22/98 stephen Added IEEEremainder, max, min, trunc
+* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
+* 08/24/98 stephen Added longBitsFromDouble
+* 03/02/99 stephen Removed openFile(). Added AS400 support.
+* 04/15/99 stephen Converted to C
+* 11/15/99 helena Integrated S/390 changes for IEEE support.
+* 01/11/00 helena Added u_getVersion.
+******************************************************************************
+*/
+
+#ifndef PUTIL_H
+#define PUTIL_H
+
+#include "unicode/utypes.h"
+
+/* Define this to 1 if your platform supports IEEE 754 floating point,
+ to 0 if it does not. */
+#ifndef IEEE_754
+# define IEEE_754 1
+#endif
+
+/*==========================================================================*/
+/* Platform utilities */
+/*==========================================================================*/
+
+/**
+ * Platform utilities isolates the platform dependencies of the
+ * libarary. For each platform which this code is ported to, these
+ * functions may have to be re-implemented.
+ */
+
+/**
+ * Return the ICU data directory.
+ * The data directory is where common format ICU data files (.dat files)
+ * are loaded from. Note that normal use of the built-in ICU
+ * facilities does not require loading of an external data file;
+ * unless you are adding custom data to ICU, the data directory
+ * does not need to be set.
+ *
+ * The data directory is determined as follows:
+ * If u_setDataDirectory() has been called, that is it, otherwise
+ * if the ICU_DATA environment variable is set, use that, otherwise
+ * If a data directory was specifed at ICU build time
+ * (#define ICU_DATA_DIR "path"), use that,
+ * otherwise no data directory is available.
+ *
+ * @return the data directory, or an empty string ("") if no data directory has
+ * been specified.
+ *
+ * @stable ICU 2.0
+ */
+U_STABLE const char* U_EXPORT2 u_getDataDirectory(void);
+
+/**
+ * Set the ICU data directory.
+ * The data directory is where common format ICU data files (.dat files)
+ * are loaded from. Note that normal use of the built-in ICU
+ * facilities does not require loading of an external data file;
+ * unless you are adding custom data to ICU, the data directory
+ * does not need to be set.
+ *
+ * This function should be called at most once in a process, before the
+ * first ICU operation (e.g., u_init()) that will require the loading of an
+ * ICU data file.
+ * This function is not thread-safe. Use it before calling ICU APIs from
+ * multiple threads.
+ *
+ * @param directory The directory to be set.
+ *
+ * @see u_init
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 u_setDataDirectory(const char *directory);
+
+/**
+ * Please use ucnv_getDefaultName() instead.
+ * Return the default codepage for this platform and locale.
+ * This function can call setlocale() on Unix platforms. Please read the
+ * platform documentation on setlocale() before calling this function.
+ * @return the default codepage for this platform
+ * @internal
+ */
+U_INTERNAL const char* U_EXPORT2 uprv_getDefaultCodepage(void);
+
+/**
+ * Please use uloc_getDefault() instead.
+ * Return the default locale ID string by querying ths system, or
+ * zero if one cannot be found.
+ * This function can call setlocale() on Unix platforms. Please read the
+ * platform documentation on setlocale() before calling this function.
+ * @return the default locale ID string
+ * @internal
+ */
+U_INTERNAL const char* U_EXPORT2 uprv_getDefaultLocaleID(void);
+
+/**
+ * Filesystem file and path separator characters.
+ * Example: '/' and ':' on Unix, '\\' and ';' on Windows.
+ * @stable ICU 2.0
+ */
+#ifdef XP_MAC
+# define U_FILE_SEP_CHAR ':'
+# define U_FILE_ALT_SEP_CHAR ':'
+# define U_PATH_SEP_CHAR ';'
+# define U_FILE_SEP_STRING ":"
+# define U_FILE_ALT_SEP_STRING ":"
+# define U_PATH_SEP_STRING ";"
+#elif defined(WIN32) || defined(OS2)
+# define U_FILE_SEP_CHAR '\\'
+# define U_FILE_ALT_SEP_CHAR '/'
+# define U_PATH_SEP_CHAR ';'
+# define U_FILE_SEP_STRING "\\"
+# define U_FILE_ALT_SEP_STRING "/"
+# define U_PATH_SEP_STRING ";"
+#else
+# define U_FILE_SEP_CHAR '/'
+# define U_FILE_ALT_SEP_CHAR '/'
+# define U_PATH_SEP_CHAR ':'
+# define U_FILE_SEP_STRING "/"
+# define U_FILE_ALT_SEP_STRING "/"
+# define U_PATH_SEP_STRING ":"
+#endif
+
+/**
+ * Convert char characters to UChar characters.
+ * This utility function is useful only for "invariant characters"
+ * that are encoded in the platform default encoding.
+ * They are a small, constant subset of the encoding and include
+ * just the latin letters, digits, and some punctuation.
+ * For details, see U_CHARSET_FAMILY.
+ *
+ * @param cs Input string, points to <code>length</code>
+ * character bytes from a subset of the platform encoding.
+ * @param us Output string, points to memory for <code>length</code>
+ * Unicode characters.
+ * @param length The number of characters to convert; this may
+ * include the terminating <code>NUL</code>.
+ *
+ * @see U_CHARSET_FAMILY
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+u_charsToUChars(const char *cs, UChar *us, int32_t length);
+
+/**
+ * Convert UChar characters to char characters.
+ * This utility function is useful only for "invariant characters"
+ * that can be encoded in the platform default encoding.
+ * They are a small, constant subset of the encoding and include
+ * just the latin letters, digits, and some punctuation.
+ * For details, see U_CHARSET_FAMILY.
+ *
+ * @param us Input string, points to <code>length</code>
+ * Unicode characters that can be encoded with the
+ * codepage-invariant subset of the platform encoding.
+ * @param cs Output string, points to memory for <code>length</code>
+ * character bytes.
+ * @param length The number of characters to convert; this may
+ * include the terminating <code>NUL</code>.
+ *
+ * @see U_CHARSET_FAMILY
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+u_UCharsToChars(const UChar *us, char *cs, int32_t length);
+
+#endif
diff --git a/Source/WebCore/icu/unicode/ubrk.h b/Source/WebCore/icu/unicode/ubrk.h
new file mode 100644
index 0000000..81e12c0
--- /dev/null
+++ b/Source/WebCore/icu/unicode/ubrk.h
@@ -0,0 +1,563 @@
+/*
+* Copyright (C) 1996-2004, International Business Machines Corporation and others. All Rights Reserved.
+*****************************************************************************************
+*/
+
+#ifndef UBRK_H
+#define UBRK_H
+
+#include "unicode/utypes.h"
+#include "unicode/uloc.h"
+
+/**
+ * A text-break iterator.
+ * For usage in C programs.
+ */
+#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
+# define UBRK_TYPEDEF_UBREAK_ITERATOR
+ /**
+ * Opaque type representing an ICU Break iterator object.
+ * @stable ICU 2.0
+ */
+ typedef void UBreakIterator;
+#endif
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/parseerr.h"
+
+/**
+ * \file
+ * \brief C API: BreakIterator
+ *
+ * <h2> BreakIterator C API </h2>
+ *
+ * The BreakIterator C API defines methods for finding the location
+ * of boundaries in text. Pointer to a UBreakIterator maintain a
+ * current position and scan over text returning the index of characters
+ * where boundaries occur.
+ * <P>
+ * Line boundary analysis determines where a text string can be broken
+ * when line-wrapping. The mechanism correctly handles punctuation and
+ * hyphenated words.
+ * <P>
+ * Sentence boundary analysis allows selection with correct
+ * interpretation of periods within numbers and abbreviations, and
+ * trailing punctuation marks such as quotation marks and parentheses.
+ * <P>
+ * Word boundary analysis is used by search and replace functions, as
+ * well as within text editing applications that allow the user to
+ * select words with a double click. Word selection provides correct
+ * interpretation of punctuation marks within and following
+ * words. Characters that are not part of a word, such as symbols or
+ * punctuation marks, have word-breaks on both sides.
+ * <P>
+ * Character boundary analysis allows users to interact with
+ * characters as they expect to, for example, when moving the cursor
+ * through a text string. Character boundary analysis provides correct
+ * navigation of through character strings, regardless of how the
+ * character is stored. For example, an accented character might be
+ * stored as a base character and a diacritical mark. What users
+ * consider to be a character can differ between languages.
+ * <P>
+ * Title boundary analysis locates all positions,
+ * typically starts of words, that should be set to Title Case
+ * when title casing the text.
+ * <P>
+ *
+ * This is the interface for all text boundaries.
+ * <P>
+ * Examples:
+ * <P>
+ * Helper function to output text
+ * <pre>
+ * \code
+ * void printTextRange(UChar* str, int32_t start, int32_t end ) {
+ * UChar* result;
+ * UChar* temp;
+ * const char* res;
+ * temp=(UChar*)malloc(sizeof(UChar) * ((u_strlen(str)-start)+1));
+ * result=(UChar*)malloc(sizeof(UChar) * ((end-start)+1));
+ * u_strcpy(temp, &str[start]);
+ * u_strncpy(result, temp, end-start);
+ * res=(char*)malloc(sizeof(char) * (u_strlen(result)+1));
+ * u_austrcpy(res, result);
+ * printf("%s\n", res);
+ * }
+ * \endcode
+ * </pre>
+ * Print each element in order:
+ * <pre>
+ * \code
+ * void printEachForward( UBreakIterator* boundary, UChar* str) {
+ * int32_t end;
+ * int32_t start = ubrk_first(boundary);
+ * for (end = ubrk_next(boundary)); end != UBRK_DONE; start = end, end = ubrk_next(boundary)) {
+ * printTextRange(str, start, end );
+ * }
+ * }
+ * \endcode
+ * </pre>
+ * Print each element in reverse order:
+ * <pre>
+ * \code
+ * void printEachBackward( UBreakIterator* boundary, UChar* str) {
+ * int32_t start;
+ * int32_t end = ubrk_last(boundary);
+ * for (start = ubrk_previous(boundary); start != UBRK_DONE; end = start, start =ubrk_previous(boundary)) {
+ * printTextRange( str, start, end );
+ * }
+ * }
+ * \endcode
+ * </pre>
+ * Print first element
+ * <pre>
+ * \code
+ * void printFirst(UBreakIterator* boundary, UChar* str) {
+ * int32_t end;
+ * int32_t start = ubrk_first(boundary);
+ * end = ubrk_next(boundary);
+ * printTextRange( str, start, end );
+ * }
+ * \endcode
+ * </pre>
+ * Print last element
+ * <pre>
+ * \code
+ * void printLast(UBreakIterator* boundary, UChar* str) {
+ * int32_t start;
+ * int32_t end = ubrk_last(boundary);
+ * start = ubrk_previous(boundary);
+ * printTextRange(str, start, end );
+ * }
+ * \endcode
+ * </pre>
+ * Print the element at a specified position
+ * <pre>
+ * \code
+ * void printAt(UBreakIterator* boundary, int32_t pos , UChar* str) {
+ * int32_t start;
+ * int32_t end = ubrk_following(boundary, pos);
+ * start = ubrk_previous(boundary);
+ * printTextRange(str, start, end );
+ * }
+ * \endcode
+ * </pre>
+ * Creating and using text boundaries
+ * <pre>
+ * \code
+ * void BreakIterator_Example( void ) {
+ * UBreakIterator* boundary;
+ * UChar *stringToExamine;
+ * stringToExamine=(UChar*)malloc(sizeof(UChar) * (strlen("Aaa bbb ccc. Ddd eee fff.")+1) );
+ * u_uastrcpy(stringToExamine, "Aaa bbb ccc. Ddd eee fff.");
+ * printf("Examining: "Aaa bbb ccc. Ddd eee fff.");
+ *
+ * //print each sentence in forward and reverse order
+ * boundary = ubrk_open(UBRK_SENTENCE, "en_us", stringToExamine, u_strlen(stringToExamine), &status);
+ * printf("----- forward: -----------\n");
+ * printEachForward(boundary, stringToExamine);
+ * printf("----- backward: ----------\n");
+ * printEachBackward(boundary, stringToExamine);
+ * ubrk_close(boundary);
+ *
+ * //print each word in order
+ * boundary = ubrk_open(UBRK_WORD, "en_us", stringToExamine, u_strlen(stringToExamine), &status);
+ * printf("----- forward: -----------\n");
+ * printEachForward(boundary, stringToExamine);
+ * printf("----- backward: ----------\n");
+ * printEachBackward(boundary, stringToExamine);
+ * //print first element
+ * printf("----- first: -------------\n");
+ * printFirst(boundary, stringToExamine);
+ * //print last element
+ * printf("----- last: --------------\n");
+ * printLast(boundary, stringToExamine);
+ * //print word at charpos 10
+ * printf("----- at pos 10: ---------\n");
+ * printAt(boundary, 10 , stringToExamine);
+ *
+ * ubrk_close(boundary);
+ * }
+ * \endcode
+ * </pre>
+ */
+
+/** The possible types of text boundaries. @stable ICU 2.0 */
+typedef enum UBreakIteratorType {
+ /** Character breaks @stable ICU 2.0 */
+ UBRK_CHARACTER,
+ /** Word breaks @stable ICU 2.0 */
+ UBRK_WORD,
+ /** Line breaks @stable ICU 2.0 */
+ UBRK_LINE,
+ /** Sentence breaks @stable ICU 2.0 */
+ UBRK_SENTENCE,
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * Title Case breaks
+ * The iterator created using this type locates title boundaries as described for
+ * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
+ * please use Word Boundary iterator.
+ *
+ * @deprecated ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later.
+ */
+ UBRK_TITLE
+#endif /* U_HIDE_DEPRECATED_API */
+
+} UBreakIteratorType;
+
+/** Value indicating all text boundaries have been returned.
+ * @stable ICU 2.0
+ */
+#define UBRK_DONE ((int32_t) -1)
+
+
+/**
+ * Enum constants for the word break tags returned by
+ * getRuleStatus(). A range of values is defined for each category of
+ * word, to allow for further subdivisions of a category in future releases.
+ * Applications should check for tag values falling within the range, rather
+ * than for single individual values.
+ * @stable ICU 2.2
+*/
+typedef enum UWordBreak {
+ /** Tag value for "words" that do not fit into any of other categories.
+ * Includes spaces and most punctuation. */
+ UBRK_WORD_NONE = 0,
+ /** Upper bound for tags for uncategorized words. */
+ UBRK_WORD_NONE_LIMIT = 100,
+ /** Tag value for words that appear to be numbers, lower limit. */
+ UBRK_WORD_NUMBER = 100,
+ /** Tag value for words that appear to be numbers, upper limit. */
+ UBRK_WORD_NUMBER_LIMIT = 200,
+ /** Tag value for words that contain letters, excluding
+ * hiragana, katakana or ideographic characters, lower limit. */
+ UBRK_WORD_LETTER = 200,
+ /** Tag value for words containing letters, upper limit */
+ UBRK_WORD_LETTER_LIMIT = 300,
+ /** Tag value for words containing kana characters, lower limit */
+ UBRK_WORD_KANA = 300,
+ /** Tag value for words containing kana characters, upper limit */
+ UBRK_WORD_KANA_LIMIT = 400,
+ /** Tag value for words containing ideographic characters, lower limit */
+ UBRK_WORD_IDEO = 400,
+ /** Tag value for words containing ideographic characters, upper limit */
+ UBRK_WORD_IDEO_LIMIT = 500
+} UWordBreak;
+
+/**
+ * Enum constants for the line break tags returned by getRuleStatus().
+ * A range of values is defined for each category of
+ * word, to allow for further subdivisions of a category in future releases.
+ * Applications should check for tag values falling within the range, rather
+ * than for single individual values.
+ * @draft ICU 2.8
+*/
+typedef enum ULineBreakTag {
+ /** Tag value for soft line breaks, positions at which a line break
+ * is acceptable but not required */
+ UBRK_LINE_SOFT = 0,
+ /** Upper bound for soft line breaks. */
+ UBRK_LINE_SOFT_LIMIT = 100,
+ /** Tag value for a hard, or mandatory line break */
+ UBRK_LINE_HARD = 100,
+ /** Upper bound for hard line breaks. */
+ UBRK_LINE_HARD_LIMIT = 200
+} ULineBreakTag;
+
+
+
+/**
+ * Enum constants for the sentence break tags returned by getRuleStatus().
+ * A range of values is defined for each category of
+ * sentence, to allow for further subdivisions of a category in future releases.
+ * Applications should check for tag values falling within the range, rather
+ * than for single individual values.
+ * @draft ICU 2.8
+*/
+typedef enum USentenceBreakTag {
+ /** Tag value for for sentences ending with a sentence terminator
+ * ('.', '?', '!', etc.) character, possibly followed by a
+ * hard separator (CR, LF, PS, etc.)
+ */
+ UBRK_SENTENCE_TERM = 0,
+ /** Upper bound for tags for sentences ended by sentence terminators. */
+ UBRK_SENTENCE_TERM_LIMIT = 100,
+ /** Tag value for for sentences that do not contain an ending
+ * sentence terminator ('.', '?', '!', etc.) character, but
+ * are ended only by a hard separator (CR, LF, PS, etc.) or end of input.
+ */
+ UBRK_SENTENCE_SEP = 100,
+ /** Upper bound for tags for sentences ended by a separator. */
+ UBRK_SENTENCE_SEP_LIMIT = 200
+ /** Tag value for a hard, or mandatory line break */
+} USentenceBreakTag;
+
+
+/**
+ * Open a new UBreakIterator for locating text boundaries for a specified locale.
+ * A UBreakIterator may be used for detecting character, line, word,
+ * and sentence breaks in text.
+ * @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_WORD,
+ * UBRK_LINE, UBRK_SENTENCE
+ * @param locale The locale specifying the text-breaking conventions.
+ * @param text The text to be iterated over.
+ * @param textLength The number of characters in text, or -1 if null-terminated.
+ * @param status A UErrorCode to receive any errors.
+ * @return A UBreakIterator for the specified locale.
+ * @see ubrk_openRules
+ * @stable ICU 2.0
+ */
+U_STABLE UBreakIterator* U_EXPORT2
+ubrk_open(UBreakIteratorType type,
+ const char *locale,
+ const UChar *text,
+ int32_t textLength,
+ UErrorCode *status);
+
+/**
+ * Open a new UBreakIterator for locating text boundaries using specified breaking rules.
+ * The rule syntax is ... (TBD)
+ * @param rules A set of rules specifying the text breaking conventions.
+ * @param rulesLength The number of characters in rules, or -1 if null-terminated.
+ * @param text The text to be iterated over. May be null, in which case ubrk_setText() is
+ * used to specify the text to be iterated.
+ * @param textLength The number of characters in text, or -1 if null-terminated.
+ * @param parseErr Receives position and context information for any syntax errors
+ * detected while parsing the rules.
+ * @param status A UErrorCode to receive any errors.
+ * @return A UBreakIterator for the specified rules.
+ * @see ubrk_open
+ * @stable ICU 2.2
+ */
+U_STABLE UBreakIterator* U_EXPORT2
+ubrk_openRules(const UChar *rules,
+ int32_t rulesLength,
+ const UChar *text,
+ int32_t textLength,
+ UParseError *parseErr,
+ UErrorCode *status);
+
+/**
+ * Thread safe cloning operation
+ * @param bi iterator to be cloned
+ * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated.
+ * If buffer is not large enough, new memory will be allocated.
+ * Clients can use the U_BRK_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations.
+ * @param pBufferSize pointer to size of allocated space.
+ * If *pBufferSize == 0, a sufficient size for use in cloning will
+ * be returned ('pre-flighting')
+ * If *pBufferSize is not enough for a stack-based safe clone,
+ * new memory will be allocated.
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary.
+ * @return pointer to the new clone
+ * @stable ICU 2.0
+ */
+U_STABLE UBreakIterator * U_EXPORT2
+ubrk_safeClone(
+ const UBreakIterator *bi,
+ void *stackBuffer,
+ int32_t *pBufferSize,
+ UErrorCode *status);
+
+/**
+ * A recommended size (in bytes) for the memory buffer to be passed to ubrk_saveClone().
+ * @stable ICU 2.0
+ */
+#define U_BRK_SAFECLONE_BUFFERSIZE 512
+
+/**
+* Close a UBreakIterator.
+* Once closed, a UBreakIterator may no longer be used.
+* @param bi The break iterator to close.
+ * @stable ICU 2.0
+*/
+U_STABLE void U_EXPORT2
+ubrk_close(UBreakIterator *bi);
+
+/**
+ * Sets an existing iterator to point to a new piece of text
+ * @param bi The iterator to use
+ * @param text The text to be set
+ * @param textLength The length of the text
+ * @param status The error code
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ubrk_setText(UBreakIterator* bi,
+ const UChar* text,
+ int32_t textLength,
+ UErrorCode* status);
+
+/**
+ * Determine the most recently-returned text boundary.
+ *
+ * @param bi The break iterator to use.
+ * @return The character index most recently returned by \ref ubrk_next, \ref ubrk_previous,
+ * \ref ubrk_first, or \ref ubrk_last.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubrk_current(const UBreakIterator *bi);
+
+/**
+ * Determine the text boundary following the current text boundary.
+ *
+ * @param bi The break iterator to use.
+ * @return The character index of the next text boundary, or UBRK_DONE
+ * if all text boundaries have been returned.
+ * @see ubrk_previous
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubrk_next(UBreakIterator *bi);
+
+/**
+ * Determine the text boundary preceding the current text boundary.
+ *
+ * @param bi The break iterator to use.
+ * @return The character index of the preceding text boundary, or UBRK_DONE
+ * if all text boundaries have been returned.
+ * @see ubrk_next
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubrk_previous(UBreakIterator *bi);
+
+/**
+ * Determine the index of the first character in the text being scanned.
+ * This is not always the same as index 0 of the text.
+ * @param bi The break iterator to use.
+ * @return The character index of the first character in the text being scanned.
+ * @see ubrk_last
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubrk_first(UBreakIterator *bi);
+
+/**
+ * Determine the index immediately <EM>beyond</EM> the last character in the text being
+ * scanned.
+ * This is not the same as the last character.
+ * @param bi The break iterator to use.
+ * @return The character offset immediately <EM>beyond</EM> the last character in the
+ * text being scanned.
+ * @see ubrk_first
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubrk_last(UBreakIterator *bi);
+
+/**
+ * Determine the text boundary preceding the specified offset.
+ * The value returned is always smaller than offset, or UBRK_DONE.
+ * @param bi The break iterator to use.
+ * @param offset The offset to begin scanning.
+ * @return The text boundary preceding offset, or UBRK_DONE.
+ * @see ubrk_following
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubrk_preceding(UBreakIterator *bi,
+ int32_t offset);
+
+/**
+ * Determine the text boundary following the specified offset.
+ * The value returned is always greater than offset, or UBRK_DONE.
+ * @param bi The break iterator to use.
+ * @param offset The offset to begin scanning.
+ * @return The text boundary following offset, or UBRK_DONE.
+ * @see ubrk_preceding
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubrk_following(UBreakIterator *bi,
+ int32_t offset);
+
+/**
+* Get a locale for which text breaking information is available.
+* A UBreakIterator in a locale returned by this function will perform the correct
+* text breaking for the locale.
+* @param index The index of the desired locale.
+* @return A locale for which number text breaking information is available, or 0 if none.
+* @see ubrk_countAvailable
+* @stable ICU 2.0
+*/
+U_STABLE const char* U_EXPORT2
+ubrk_getAvailable(int32_t index);
+
+/**
+* Determine how many locales have text breaking information available.
+* This function is most useful as determining the loop ending condition for
+* calls to \ref ubrk_getAvailable.
+* @return The number of locales for which text breaking information is available.
+* @see ubrk_getAvailable
+* @stable ICU 2.0
+*/
+U_STABLE int32_t U_EXPORT2
+ubrk_countAvailable(void);
+
+
+/**
+* Returns true if the specfied position is a boundary position. As a side
+* effect, leaves the iterator pointing to the first boundary position at
+* or after "offset".
+* @param bi The break iterator to use.
+* @param offset the offset to check.
+* @return True if "offset" is a boundary position.
+* @stable ICU 2.0
+*/
+U_STABLE UBool U_EXPORT2
+ubrk_isBoundary(UBreakIterator *bi, int32_t offset);
+
+/**
+ * Return the status from the break rule that determined the most recently
+ * returned break position. The values appear in the rule source
+ * within brackets, {123}, for example. For rules that do not specify a
+ * status, a default value of 0 is returned.
+ * <p>
+ * For word break iterators, the possible values are defined in enum UWordBreak.
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+ubrk_getRuleStatus(UBreakIterator *bi);
+
+/**
+ * Get the statuses from the break rules that determined the most recently
+ * returned break position. The values appear in the rule source
+ * within brackets, {123}, for example. The default status value for rules
+ * that do not explicitly provide one is zero.
+ * <p>
+ * For word break iterators, the possible values are defined in enum UWordBreak.
+ * @param bi The break iterator to use
+ * @param fillInVec an array to be filled in with the status values.
+ * @param capacity the length of the supplied vector. A length of zero causes
+ * the function to return the number of status values, in the
+ * normal way, without attemtping to store any values.
+ * @param status receives error codes.
+ * @return The number of rule status values from rules that determined
+ * the most recent boundary returned by the break iterator.
+ * @draft ICU 3.0
+ */
+U_DRAFT int32_t U_EXPORT2
+ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status);
+
+/**
+ * Return the locale of the break iterator. You can choose between the valid and
+ * the actual locale.
+ * @param bi break iterator
+ * @param type locale type (valid or actual)
+ * @param status error code
+ * @return locale string
+ * @draft ICU 2.8 likely to change in ICU 3.0, based on feedback
+ */
+U_DRAFT const char* U_EXPORT2
+ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode* status);
+
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
+#endif
diff --git a/Source/WebCore/icu/unicode/uchar.h b/Source/WebCore/icu/unicode/uchar.h
new file mode 100644
index 0000000..7fd490c
--- /dev/null
+++ b/Source/WebCore/icu/unicode/uchar.h
@@ -0,0 +1,2798 @@
+/*
+**********************************************************************
+* Copyright (C) 1997-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File UCHAR.H
+*
+* Modification History:
+*
+* Date Name Description
+* 04/02/97 aliu Creation.
+* 03/29/99 helena Updated for C APIs.
+* 4/15/99 Madhu Updated for C Implementation and Javadoc
+* 5/20/99 Madhu Added the function u_getVersion()
+* 8/19/1999 srl Upgraded scripts to Unicode 3.0
+* 8/27/1999 schererm UCharDirection constants: U_...
+* 11/11/1999 weiv added u_isalnum(), cleaned comments
+* 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion().
+******************************************************************************
+*/
+
+#ifndef UCHAR_H
+#define UCHAR_H
+
+#include "unicode/utypes.h"
+
+U_CDECL_BEGIN
+
+/*==========================================================================*/
+/* Unicode version number */
+/*==========================================================================*/
+/**
+ * Unicode version number, default for the current ICU version.
+ * The actual Unicode Character Database (UCD) data is stored in uprops.dat
+ * and may be generated from UCD files from a different Unicode version.
+ * Call u_getUnicodeVersion to get the actual Unicode version of the data.
+ *
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.0
+ */
+#define U_UNICODE_VERSION "4.0.1"
+
+/**
+ * \file
+ * \brief C API: Unicode Properties
+ *
+ * This C API provides low-level access to the Unicode Character Database.
+ * In addition to raw property values, some convenience functions calculate
+ * derived properties, for example for Java-style programming.
+ *
+ * Unicode assigns each code point (not just assigned character) values for
+ * many properties.
+ * Most of them are simple boolean flags, or constants from a small enumerated list.
+ * For some properties, values are strings or other relatively more complex types.
+ *
+ * For more information see
+ * "About the Unicode Character Database" (http://www.unicode.org/ucd/)
+ * and the ICU User Guide chapter on Properties (http://oss.software.ibm.com/icu/userguide/properties.html).
+ *
+ * Many functions are designed to match java.lang.Character functions.
+ * See the individual function documentation,
+ * and see the JDK 1.4.1 java.lang.Character documentation
+ * at http://java.sun.com/j2se/1.4.1/docs/api/java/lang/Character.html
+ *
+ * There are also functions that provide easy migration from C/POSIX functions
+ * like isblank(). Their use is generally discouraged because the C/POSIX
+ * standards do not define their semantics beyond the ASCII range, which means
+ * that different implementations exhibit very different behavior.
+ * Instead, Unicode properties should be used directly.
+ *
+ * There are also only a few, broad C/POSIX character classes, and they tend
+ * to be used for conflicting purposes. For example, the "isalpha()" class
+ * is sometimes used to determine word boundaries, while a more sophisticated
+ * approach would at least distinguish initial letters from continuation
+ * characters (the latter including combining marks).
+ * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
+ * Another example: There is no "istitle()" class for titlecase characters.
+ *
+ * A summary of the behavior of some C/POSIX character classification implementations
+ * for Unicode is available at http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/posix_classes.html
+ *
+ * <strong>Important</strong>:
+ * The behavior of the ICU C/POSIX-style character classification
+ * functions is subject to change according to discussion of the above summary.
+ *
+ * Note: There are several ICU whitespace functions.
+ * Comparison:
+ * - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
+ * most of general categories "Z" (separators) + most whitespace ISO controls
+ * (including no-break spaces, but excluding IS1..IS4 and ZWSP)
+ * - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
+ * - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces)
+ * - u_isspace: Z + whitespace ISO controls (including no-break spaces)
+ * - u_isblank: "horizontal spaces" = TAB + Zs - ZWSP
+ */
+
+/**
+ * Constants.
+ */
+
+/** The lowest Unicode code point value. Code points are non-negative. @stable ICU 2.0 */
+#define UCHAR_MIN_VALUE 0
+
+/**
+ * The highest Unicode code point value (scalar value) according to
+ * The Unicode Standard. This is a 21-bit value (20.1 bits, rounded up).
+ * For a single character, UChar32 is a simple type that can hold any code point value.
+ *
+ * @see UChar32
+ * @stable ICU 2.0
+ */
+#define UCHAR_MAX_VALUE 0x10ffff
+
+/**
+ * Get a single-bit bit set (a flag) from a bit number 0..31.
+ * @stable ICU 2.1
+ */
+#define U_MASK(x) ((uint32_t)1<<(x))
+
+/*
+ * !! Note: Several comments in this file are machine-read by the
+ * genpname tool. These comments describe the correspondence between
+ * icu enum constants and UCD entities. Do not delete them. Update
+ * these comments as needed.
+ *
+ * Any comment of the form "/ *[name]* /" (spaces added) is such
+ * a comment.
+ *
+ * The U_JG_* and U_GC_*_MASK constants are matched by their symbolic
+ * name, which must match PropertyValueAliases.txt.
+ */
+
+/**
+ * Selection constants for Unicode properties.
+ * These constants are used in functions like u_hasBinaryProperty to select
+ * one of the Unicode properties.
+ *
+ * The properties APIs are intended to reflect Unicode properties as defined
+ * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
+ * For details about the properties see http://www.unicode.org/ucd/ .
+ * For names of Unicode properties see the UCD file PropertyAliases.txt.
+ *
+ * Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2,
+ * then properties marked with "new in Unicode 3.2" are not or not fully available.
+ * Check u_getUnicodeVersion to be sure.
+ *
+ * @see u_hasBinaryProperty
+ * @see u_getIntPropertyValue
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.1
+ */
+typedef enum UProperty {
+ /* See note !!. Comments of the form "Binary property Dash",
+ "Enumerated property Script", "Double property Numeric_Value",
+ and "String property Age" are read by genpname. */
+
+ /* Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that
+ debuggers display UCHAR_ALPHABETIC as the symbolic name for 0,
+ rather than UCHAR_BINARY_START. Likewise for other *_START
+ identifiers. */
+
+ /** Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha.
+ Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic @stable ICU 2.1 */
+ UCHAR_ALPHABETIC=0,
+ /** First constant for binary Unicode properties. @stable ICU 2.1 */
+ UCHAR_BINARY_START=UCHAR_ALPHABETIC,
+ /** Binary property ASCII_Hex_Digit. 0-9 A-F a-f @stable ICU 2.1 */
+ UCHAR_ASCII_HEX_DIGIT,
+ /** Binary property Bidi_Control.
+ Format controls which have specific functions
+ in the Bidi Algorithm. @stable ICU 2.1 */
+ UCHAR_BIDI_CONTROL,
+ /** Binary property Bidi_Mirrored.
+ Characters that may change display in RTL text.
+ Same as u_isMirrored.
+ See Bidi Algorithm, UTR 9. @stable ICU 2.1 */
+ UCHAR_BIDI_MIRRORED,
+ /** Binary property Dash. Variations of dashes. @stable ICU 2.1 */
+ UCHAR_DASH,
+ /** Binary property Default_Ignorable_Code_Point (new in Unicode 3.2).
+ Ignorable in most processing.
+ <2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space) @stable ICU 2.1 */
+ UCHAR_DEFAULT_IGNORABLE_CODE_POINT,
+ /** Binary property Deprecated (new in Unicode 3.2).
+ The usage of deprecated characters is strongly discouraged. @stable ICU 2.1 */
+ UCHAR_DEPRECATED,
+ /** Binary property Diacritic. Characters that linguistically modify
+ the meaning of another character to which they apply. @stable ICU 2.1 */
+ UCHAR_DIACRITIC,
+ /** Binary property Extender.
+ Extend the value or shape of a preceding alphabetic character,
+ e.g., length and iteration marks. @stable ICU 2.1 */
+ UCHAR_EXTENDER,
+ /** Binary property Full_Composition_Exclusion.
+ CompositionExclusions.txt+Singleton Decompositions+
+ Non-Starter Decompositions. @stable ICU 2.1 */
+ UCHAR_FULL_COMPOSITION_EXCLUSION,
+ /** Binary property Grapheme_Base (new in Unicode 3.2).
+ For programmatic determination of grapheme cluster boundaries.
+ [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ @stable ICU 2.1 */
+ UCHAR_GRAPHEME_BASE,
+ /** Binary property Grapheme_Extend (new in Unicode 3.2).
+ For programmatic determination of grapheme cluster boundaries.
+ Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ @stable ICU 2.1 */
+ UCHAR_GRAPHEME_EXTEND,
+ /** Binary property Grapheme_Link (new in Unicode 3.2).
+ For programmatic determination of grapheme cluster boundaries. @stable ICU 2.1 */
+ UCHAR_GRAPHEME_LINK,
+ /** Binary property Hex_Digit.
+ Characters commonly used for hexadecimal numbers. @stable ICU 2.1 */
+ UCHAR_HEX_DIGIT,
+ /** Binary property Hyphen. Dashes used to mark connections
+ between pieces of words, plus the Katakana middle dot. @stable ICU 2.1 */
+ UCHAR_HYPHEN,
+ /** Binary property ID_Continue.
+ Characters that can continue an identifier.
+ DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out."
+ ID_Start+Mn+Mc+Nd+Pc @stable ICU 2.1 */
+ UCHAR_ID_CONTINUE,
+ /** Binary property ID_Start.
+ Characters that can start an identifier.
+ Lu+Ll+Lt+Lm+Lo+Nl @stable ICU 2.1 */
+ UCHAR_ID_START,
+ /** Binary property Ideographic.
+ CJKV ideographs. @stable ICU 2.1 */
+ UCHAR_IDEOGRAPHIC,
+ /** Binary property IDS_Binary_Operator (new in Unicode 3.2).
+ For programmatic determination of
+ Ideographic Description Sequences. @stable ICU 2.1 */
+ UCHAR_IDS_BINARY_OPERATOR,
+ /** Binary property IDS_Trinary_Operator (new in Unicode 3.2).
+ For programmatic determination of
+ Ideographic Description Sequences. @stable ICU 2.1 */
+ UCHAR_IDS_TRINARY_OPERATOR,
+ /** Binary property Join_Control.
+ Format controls for cursive joining and ligation. @stable ICU 2.1 */
+ UCHAR_JOIN_CONTROL,
+ /** Binary property Logical_Order_Exception (new in Unicode 3.2).
+ Characters that do not use logical order and
+ require special handling in most processing. @stable ICU 2.1 */
+ UCHAR_LOGICAL_ORDER_EXCEPTION,
+ /** Binary property Lowercase. Same as u_isULowercase, different from u_islower.
+ Ll+Other_Lowercase @stable ICU 2.1 */
+ UCHAR_LOWERCASE,
+ /** Binary property Math. Sm+Other_Math @stable ICU 2.1 */
+ UCHAR_MATH,
+ /** Binary property Noncharacter_Code_Point.
+ Code points that are explicitly defined as illegal
+ for the encoding of characters. @stable ICU 2.1 */
+ UCHAR_NONCHARACTER_CODE_POINT,
+ /** Binary property Quotation_Mark. @stable ICU 2.1 */
+ UCHAR_QUOTATION_MARK,
+ /** Binary property Radical (new in Unicode 3.2).
+ For programmatic determination of
+ Ideographic Description Sequences. @stable ICU 2.1 */
+ UCHAR_RADICAL,
+ /** Binary property Soft_Dotted (new in Unicode 3.2).
+ Characters with a "soft dot", like i or j.
+ An accent placed on these characters causes
+ the dot to disappear. @stable ICU 2.1 */
+ UCHAR_SOFT_DOTTED,
+ /** Binary property Terminal_Punctuation.
+ Punctuation characters that generally mark
+ the end of textual units. @stable ICU 2.1 */
+ UCHAR_TERMINAL_PUNCTUATION,
+ /** Binary property Unified_Ideograph (new in Unicode 3.2).
+ For programmatic determination of
+ Ideographic Description Sequences. @stable ICU 2.1 */
+ UCHAR_UNIFIED_IDEOGRAPH,
+ /** Binary property Uppercase. Same as u_isUUppercase, different from u_isupper.
+ Lu+Other_Uppercase @stable ICU 2.1 */
+ UCHAR_UPPERCASE,
+ /** Binary property White_Space.
+ Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace.
+ Space characters+TAB+CR+LF-ZWSP-ZWNBSP @stable ICU 2.1 */
+ UCHAR_WHITE_SPACE,
+ /** Binary property XID_Continue.
+ ID_Continue modified to allow closure under
+ normalization forms NFKC and NFKD. @stable ICU 2.1 */
+ UCHAR_XID_CONTINUE,
+ /** Binary property XID_Start. ID_Start modified to allow
+ closure under normalization forms NFKC and NFKD. @stable ICU 2.1 */
+ UCHAR_XID_START,
+ /** Binary property Case_Sensitive. Either the source of a case
+ mapping or _in_ the target of a case mapping. Not the same as
+ the general category Cased_Letter. @stable ICU 2.6 */
+ UCHAR_CASE_SENSITIVE,
+ /** Binary property STerm (new in Unicode 4.0.1).
+ Sentence Terminal. Used in UAX #29: Text Boundaries
+ (http://www.unicode.org/reports/tr29/)
+ @draft ICU 3.0 */
+ UCHAR_S_TERM,
+ /** Binary property Variation_Selector (new in Unicode 4.0.1).
+ Indicates all those characters that qualify as Variation Selectors.
+ For details on the behavior of these characters,
+ see StandardizedVariants.html and 15.6 Variation Selectors.
+ @draft ICU 3.0 */
+ UCHAR_VARIATION_SELECTOR,
+ /** Binary property NFD_Inert.
+ ICU-specific property for characters that are inert under NFD,
+ i.e., they do not interact with adjacent characters.
+ Used for example in normalizing transforms in incremental mode
+ to find the boundary of safely normalizable text despite possible
+ text additions.
+
+ There is one such property per normalization form.
+ These properties are computed as follows - an inert character is:
+ a) unassigned, or ALL of the following:
+ b) of combining class 0.
+ c) not decomposed by this normalization form.
+ AND if NFC or NFKC,
+ d) can never compose with a previous character.
+ e) can never compose with a following character.
+ f) can never change if another character is added.
+ Example: a-breve might satisfy all but f, but if you
+ add an ogonek it changes to a-ogonek + breve
+
+ See also com.ibm.text.UCD.NFSkippable in the ICU4J repository,
+ and icu/source/common/unormimp.h .
+ @draft ICU 3.0 */
+ UCHAR_NFD_INERT,
+ /** Binary property NFKD_Inert.
+ ICU-specific property for characters that are inert under NFKD,
+ i.e., they do not interact with adjacent characters.
+ Used for example in normalizing transforms in incremental mode
+ to find the boundary of safely normalizable text despite possible
+ text additions.
+ @see UCHAR_NFD_INERT
+ @draft ICU 3.0 */
+ UCHAR_NFKD_INERT,
+ /** Binary property NFC_Inert.
+ ICU-specific property for characters that are inert under NFC,
+ i.e., they do not interact with adjacent characters.
+ Used for example in normalizing transforms in incremental mode
+ to find the boundary of safely normalizable text despite possible
+ text additions.
+ @see UCHAR_NFD_INERT
+ @draft ICU 3.0 */
+ UCHAR_NFC_INERT,
+ /** Binary property NFKC_Inert.
+ ICU-specific property for characters that are inert under NFKC,
+ i.e., they do not interact with adjacent characters.
+ Used for example in normalizing transforms in incremental mode
+ to find the boundary of safely normalizable text despite possible
+ text additions.
+ @see UCHAR_NFD_INERT
+ @draft ICU 3.0 */
+ UCHAR_NFKC_INERT,
+ /** Binary Property Segment_Starter.
+ ICU-specific property for characters that are starters in terms of
+ Unicode normalization and combining character sequences.
+ They have ccc=0 and do not occur in non-initial position of the
+ canonical decomposition of any character
+ (like " in NFD(a-umlaut) and a Jamo T in an NFD(Hangul LVT)).
+ ICU uses this property for segmenting a string for generating a set of
+ canonically equivalent strings, e.g. for canonical closure while
+ processing collation tailoring rules.
+ @draft ICU 3.0 */
+ UCHAR_SEGMENT_STARTER,
+ /** One more than the last constant for binary Unicode properties. @stable ICU 2.1 */
+ UCHAR_BINARY_LIMIT,
+
+ /** Enumerated property Bidi_Class.
+ Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */
+ UCHAR_BIDI_CLASS=0x1000,
+ /** First constant for enumerated/integer Unicode properties. @stable ICU 2.2 */
+ UCHAR_INT_START=UCHAR_BIDI_CLASS,
+ /** Enumerated property Block.
+ Same as ublock_getCode, returns UBlockCode values. @stable ICU 2.2 */
+ UCHAR_BLOCK,
+ /** Enumerated property Canonical_Combining_Class.
+ Same as u_getCombiningClass, returns 8-bit numeric values. @stable ICU 2.2 */
+ UCHAR_CANONICAL_COMBINING_CLASS,
+ /** Enumerated property Decomposition_Type.
+ Returns UDecompositionType values. @stable ICU 2.2 */
+ UCHAR_DECOMPOSITION_TYPE,
+ /** Enumerated property East_Asian_Width.
+ See http://www.unicode.org/reports/tr11/
+ Returns UEastAsianWidth values. @stable ICU 2.2 */
+ UCHAR_EAST_ASIAN_WIDTH,
+ /** Enumerated property General_Category.
+ Same as u_charType, returns UCharCategory values. @stable ICU 2.2 */
+ UCHAR_GENERAL_CATEGORY,
+ /** Enumerated property Joining_Group.
+ Returns UJoiningGroup values. @stable ICU 2.2 */
+ UCHAR_JOINING_GROUP,
+ /** Enumerated property Joining_Type.
+ Returns UJoiningType values. @stable ICU 2.2 */
+ UCHAR_JOINING_TYPE,
+ /** Enumerated property Line_Break.
+ Returns ULineBreak values. @stable ICU 2.2 */
+ UCHAR_LINE_BREAK,
+ /** Enumerated property Numeric_Type.
+ Returns UNumericType values. @stable ICU 2.2 */
+ UCHAR_NUMERIC_TYPE,
+ /** Enumerated property Script.
+ Same as uscript_getScript, returns UScriptCode values. @stable ICU 2.2 */
+ UCHAR_SCRIPT,
+ /** Enumerated property Hangul_Syllable_Type, new in Unicode 4.
+ Returns UHangulSyllableType values. @stable ICU 2.6 */
+ UCHAR_HANGUL_SYLLABLE_TYPE,
+ /** Enumerated property NFD_Quick_Check.
+ Returns UNormalizationCheckResult values. @draft ICU 3.0 */
+ UCHAR_NFD_QUICK_CHECK,
+ /** Enumerated property NFKD_Quick_Check.
+ Returns UNormalizationCheckResult values. @draft ICU 3.0 */
+ UCHAR_NFKD_QUICK_CHECK,
+ /** Enumerated property NFC_Quick_Check.
+ Returns UNormalizationCheckResult values. @draft ICU 3.0 */
+ UCHAR_NFC_QUICK_CHECK,
+ /** Enumerated property NFKC_Quick_Check.
+ Returns UNormalizationCheckResult values. @draft ICU 3.0 */
+ UCHAR_NFKC_QUICK_CHECK,
+ /** Enumerated property Lead_Canonical_Combining_Class.
+ ICU-specific property for the ccc of the first code point
+ of the decomposition, or lccc(c)=ccc(NFD(c)[0]).
+ Useful for checking for canonically ordered text;
+ see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
+ Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @draft ICU 3.0 */
+ UCHAR_LEAD_CANONICAL_COMBINING_CLASS,
+ /** Enumerated property Trail_Canonical_Combining_Class.
+ ICU-specific property for the ccc of the last code point
+ of the decomposition, or tccc(c)=ccc(NFD(c)[last]).
+ Useful for checking for canonically ordered text;
+ see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
+ Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @draft ICU 3.0 */
+ UCHAR_TRAIL_CANONICAL_COMBINING_CLASS,
+ /** One more than the last constant for enumerated/integer Unicode properties. @stable ICU 2.2 */
+ UCHAR_INT_LIMIT,
+
+ /** Bitmask property General_Category_Mask.
+ This is the General_Category property returned as a bit mask.
+ When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)),
+ returns bit masks for UCharCategory values where exactly one bit is set.
+ When used with u_getPropertyValueName() and u_getPropertyValueEnum(),
+ a multi-bit mask is used for sets of categories like "Letters".
+ Mask values should be cast to uint32_t.
+ @stable ICU 2.4 */
+ UCHAR_GENERAL_CATEGORY_MASK=0x2000,
+ /** First constant for bit-mask Unicode properties. @stable ICU 2.4 */
+ UCHAR_MASK_START=UCHAR_GENERAL_CATEGORY_MASK,
+ /** One more than the last constant for bit-mask Unicode properties. @stable ICU 2.4 */
+ UCHAR_MASK_LIMIT,
+
+ /** Double property Numeric_Value.
+ Corresponds to u_getNumericValue. @stable ICU 2.4 */
+ UCHAR_NUMERIC_VALUE=0x3000,
+ /** First constant for double Unicode properties. @stable ICU 2.4 */
+ UCHAR_DOUBLE_START=UCHAR_NUMERIC_VALUE,
+ /** One more than the last constant for double Unicode properties. @stable ICU 2.4 */
+ UCHAR_DOUBLE_LIMIT,
+
+ /** String property Age.
+ Corresponds to u_charAge. @stable ICU 2.4 */
+ UCHAR_AGE=0x4000,
+ /** First constant for string Unicode properties. @stable ICU 2.4 */
+ UCHAR_STRING_START=UCHAR_AGE,
+ /** String property Bidi_Mirroring_Glyph.
+ Corresponds to u_charMirror. @stable ICU 2.4 */
+ UCHAR_BIDI_MIRRORING_GLYPH,
+ /** String property Case_Folding.
+ Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */
+ UCHAR_CASE_FOLDING,
+ /** String property ISO_Comment.
+ Corresponds to u_getISOComment. @stable ICU 2.4 */
+ UCHAR_ISO_COMMENT,
+ /** String property Lowercase_Mapping.
+ Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */
+ UCHAR_LOWERCASE_MAPPING,
+ /** String property Name.
+ Corresponds to u_charName. @stable ICU 2.4 */
+ UCHAR_NAME,
+ /** String property Simple_Case_Folding.
+ Corresponds to u_foldCase. @stable ICU 2.4 */
+ UCHAR_SIMPLE_CASE_FOLDING,
+ /** String property Simple_Lowercase_Mapping.
+ Corresponds to u_tolower. @stable ICU 2.4 */
+ UCHAR_SIMPLE_LOWERCASE_MAPPING,
+ /** String property Simple_Titlecase_Mapping.
+ Corresponds to u_totitle. @stable ICU 2.4 */
+ UCHAR_SIMPLE_TITLECASE_MAPPING,
+ /** String property Simple_Uppercase_Mapping.
+ Corresponds to u_toupper. @stable ICU 2.4 */
+ UCHAR_SIMPLE_UPPERCASE_MAPPING,
+ /** String property Titlecase_Mapping.
+ Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */
+ UCHAR_TITLECASE_MAPPING,
+ /** String property Unicode_1_Name.
+ Corresponds to u_charName. @stable ICU 2.4 */
+ UCHAR_UNICODE_1_NAME,
+ /** String property Uppercase_Mapping.
+ Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */
+ UCHAR_UPPERCASE_MAPPING,
+ /** One more than the last constant for string Unicode properties. @stable ICU 2.4 */
+ UCHAR_STRING_LIMIT,
+
+ /** Represents a nonexistent or invalid property or property value. @stable ICU 2.4 */
+ UCHAR_INVALID_CODE = -1
+} UProperty;
+
+/**
+ * Data for enumerated Unicode general category types.
+ * See http://www.unicode.org/Public/UNIDATA/UnicodeData.html .
+ * @stable ICU 2.0
+ */
+typedef enum UCharCategory
+{
+ /** See note !!. Comments of the form "Cn" are read by genpname. */
+
+ /** Non-category for unassigned and non-character code points. @stable ICU 2.0 */
+ U_UNASSIGNED = 0,
+ /** Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) @stable ICU 2.0 */
+ U_GENERAL_OTHER_TYPES = 0,
+ /** Lu @stable ICU 2.0 */
+ U_UPPERCASE_LETTER = 1,
+ /** Ll @stable ICU 2.0 */
+ U_LOWERCASE_LETTER = 2,
+ /** Lt @stable ICU 2.0 */
+ U_TITLECASE_LETTER = 3,
+ /** Lm @stable ICU 2.0 */
+ U_MODIFIER_LETTER = 4,
+ /** Lo @stable ICU 2.0 */
+ U_OTHER_LETTER = 5,
+ /** Mn @stable ICU 2.0 */
+ U_NON_SPACING_MARK = 6,
+ /** Me @stable ICU 2.0 */
+ U_ENCLOSING_MARK = 7,
+ /** Mc @stable ICU 2.0 */
+ U_COMBINING_SPACING_MARK = 8,
+ /** Nd @stable ICU 2.0 */
+ U_DECIMAL_DIGIT_NUMBER = 9,
+ /** Nl @stable ICU 2.0 */
+ U_LETTER_NUMBER = 10,
+ /** No @stable ICU 2.0 */
+ U_OTHER_NUMBER = 11,
+ /** Zs @stable ICU 2.0 */
+ U_SPACE_SEPARATOR = 12,
+ /** Zl @stable ICU 2.0 */
+ U_LINE_SEPARATOR = 13,
+ /** Zp @stable ICU 2.0 */
+ U_PARAGRAPH_SEPARATOR = 14,
+ /** Cc @stable ICU 2.0 */
+ U_CONTROL_CHAR = 15,
+ /** Cf @stable ICU 2.0 */
+ U_FORMAT_CHAR = 16,
+ /** Co @stable ICU 2.0 */
+ U_PRIVATE_USE_CHAR = 17,
+ /** Cs @stable ICU 2.0 */
+ U_SURROGATE = 18,
+ /** Pd @stable ICU 2.0 */
+ U_DASH_PUNCTUATION = 19,
+ /** Ps @stable ICU 2.0 */
+ U_START_PUNCTUATION = 20,
+ /** Pe @stable ICU 2.0 */
+ U_END_PUNCTUATION = 21,
+ /** Pc @stable ICU 2.0 */
+ U_CONNECTOR_PUNCTUATION = 22,
+ /** Po @stable ICU 2.0 */
+ U_OTHER_PUNCTUATION = 23,
+ /** Sm @stable ICU 2.0 */
+ U_MATH_SYMBOL = 24,
+ /** Sc @stable ICU 2.0 */
+ U_CURRENCY_SYMBOL = 25,
+ /** Sk @stable ICU 2.0 */
+ U_MODIFIER_SYMBOL = 26,
+ /** So @stable ICU 2.0 */
+ U_OTHER_SYMBOL = 27,
+ /** Pi @stable ICU 2.0 */
+ U_INITIAL_PUNCTUATION = 28,
+ /** Pf @stable ICU 2.0 */
+ U_FINAL_PUNCTUATION = 29,
+ /** One higher than the last enum UCharCategory constant. @stable ICU 2.0 */
+ U_CHAR_CATEGORY_COUNT
+} UCharCategory;
+
+/**
+ * U_GC_XX_MASK constants are bit flags corresponding to Unicode
+ * general category values.
+ * For each category, the nth bit is set if the numeric value of the
+ * corresponding UCharCategory constant is n.
+ *
+ * There are also some U_GC_Y_MASK constants for groups of general categories
+ * like L for all letter categories.
+ *
+ * @see u_charType
+ * @see U_GET_GC_MASK
+ * @see UCharCategory
+ * @stable ICU 2.1
+ */
+#define U_GC_CN_MASK U_MASK(U_GENERAL_OTHER_TYPES)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LU_MASK U_MASK(U_UPPERCASE_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LL_MASK U_MASK(U_LOWERCASE_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LT_MASK U_MASK(U_TITLECASE_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LM_MASK U_MASK(U_MODIFIER_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LO_MASK U_MASK(U_OTHER_LETTER)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_MN_MASK U_MASK(U_NON_SPACING_MARK)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ME_MASK U_MASK(U_ENCLOSING_MARK)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_MC_MASK U_MASK(U_COMBINING_SPACING_MARK)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ND_MASK U_MASK(U_DECIMAL_DIGIT_NUMBER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_NL_MASK U_MASK(U_LETTER_NUMBER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_NO_MASK U_MASK(U_OTHER_NUMBER)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ZS_MASK U_MASK(U_SPACE_SEPARATOR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ZL_MASK U_MASK(U_LINE_SEPARATOR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ZP_MASK U_MASK(U_PARAGRAPH_SEPARATOR)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CC_MASK U_MASK(U_CONTROL_CHAR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CF_MASK U_MASK(U_FORMAT_CHAR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CO_MASK U_MASK(U_PRIVATE_USE_CHAR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CS_MASK U_MASK(U_SURROGATE)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PD_MASK U_MASK(U_DASH_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PS_MASK U_MASK(U_START_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PE_MASK U_MASK(U_END_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PC_MASK U_MASK(U_CONNECTOR_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PO_MASK U_MASK(U_OTHER_PUNCTUATION)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SM_MASK U_MASK(U_MATH_SYMBOL)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SC_MASK U_MASK(U_CURRENCY_SYMBOL)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SK_MASK U_MASK(U_MODIFIER_SYMBOL)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SO_MASK U_MASK(U_OTHER_SYMBOL)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PI_MASK U_MASK(U_INITIAL_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PF_MASK U_MASK(U_FINAL_PUNCTUATION)
+
+
+/** Mask constant for multiple UCharCategory bits (L Letters). @stable ICU 2.1 */
+#define U_GC_L_MASK \
+ (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK)
+
+/** Mask constant for multiple UCharCategory bits (LC Cased Letters). @stable ICU 2.1 */
+#define U_GC_LC_MASK \
+ (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK)
+
+/** Mask constant for multiple UCharCategory bits (M Marks). @stable ICU 2.1 */
+#define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK)
+
+/** Mask constant for multiple UCharCategory bits (N Numbers). @stable ICU 2.1 */
+#define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK)
+
+/** Mask constant for multiple UCharCategory bits (Z Separators). @stable ICU 2.1 */
+#define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK)
+
+/** Mask constant for multiple UCharCategory bits (C Others). @stable ICU 2.1 */
+#define U_GC_C_MASK \
+ (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK)
+
+/** Mask constant for multiple UCharCategory bits (P Punctuation). @stable ICU 2.1 */
+#define U_GC_P_MASK \
+ (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \
+ U_GC_PI_MASK|U_GC_PF_MASK)
+
+/** Mask constant for multiple UCharCategory bits (S Symbols). @stable ICU 2.1 */
+#define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK)
+
+/**
+ * This specifies the language directional property of a character set.
+ * @stable ICU 2.0
+ */
+typedef enum UCharDirection {
+ /** See note !!. Comments of the form "EN" are read by genpname. */
+
+ /** L @stable ICU 2.0 */
+ U_LEFT_TO_RIGHT = 0,
+ /** R @stable ICU 2.0 */
+ U_RIGHT_TO_LEFT = 1,
+ /** EN @stable ICU 2.0 */
+ U_EUROPEAN_NUMBER = 2,
+ /** ES @stable ICU 2.0 */
+ U_EUROPEAN_NUMBER_SEPARATOR = 3,
+ /** ET @stable ICU 2.0 */
+ U_EUROPEAN_NUMBER_TERMINATOR = 4,
+ /** AN @stable ICU 2.0 */
+ U_ARABIC_NUMBER = 5,
+ /** CS @stable ICU 2.0 */
+ U_COMMON_NUMBER_SEPARATOR = 6,
+ /** B @stable ICU 2.0 */
+ U_BLOCK_SEPARATOR = 7,
+ /** S @stable ICU 2.0 */
+ U_SEGMENT_SEPARATOR = 8,
+ /** WS @stable ICU 2.0 */
+ U_WHITE_SPACE_NEUTRAL = 9,
+ /** ON @stable ICU 2.0 */
+ U_OTHER_NEUTRAL = 10,
+ /** LRE @stable ICU 2.0 */
+ U_LEFT_TO_RIGHT_EMBEDDING = 11,
+ /** LRO @stable ICU 2.0 */
+ U_LEFT_TO_RIGHT_OVERRIDE = 12,
+ /** AL @stable ICU 2.0 */
+ U_RIGHT_TO_LEFT_ARABIC = 13,
+ /** RLE @stable ICU 2.0 */
+ U_RIGHT_TO_LEFT_EMBEDDING = 14,
+ /** RLO @stable ICU 2.0 */
+ U_RIGHT_TO_LEFT_OVERRIDE = 15,
+ /** PDF @stable ICU 2.0 */
+ U_POP_DIRECTIONAL_FORMAT = 16,
+ /** NSM @stable ICU 2.0 */
+ U_DIR_NON_SPACING_MARK = 17,
+ /** BN @stable ICU 2.0 */
+ U_BOUNDARY_NEUTRAL = 18,
+ /** @stable ICU 2.0 */
+ U_CHAR_DIRECTION_COUNT
+} UCharDirection;
+
+/**
+ * Constants for Unicode blocks, see the Unicode Data file Blocks.txt
+ * @stable ICU 2.0
+ */
+enum UBlockCode {
+
+ /** New No_Block value in Unicode 4. @stable ICU 2.6 */
+ UBLOCK_NO_BLOCK = 0, /*[none]*/ /* Special range indicating No_Block */
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BASIC_LATIN = 1, /*[0000]*/ /*See note !!*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LATIN_1_SUPPLEMENT=2, /*[0080]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LATIN_EXTENDED_A =3, /*[0100]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LATIN_EXTENDED_B =4, /*[0180]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_IPA_EXTENSIONS =5, /*[0250]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SPACING_MODIFIER_LETTERS =6, /*[02B0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_COMBINING_DIACRITICAL_MARKS =7, /*[0300]*/
+
+ /**
+ * Unicode 3.2 renames this block to "Greek and Coptic".
+ * @stable ICU 2.0
+ */
+ UBLOCK_GREEK =8, /*[0370]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CYRILLIC =9, /*[0400]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ARMENIAN =10, /*[0530]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HEBREW =11, /*[0590]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ARABIC =12, /*[0600]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SYRIAC =13, /*[0700]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_THAANA =14, /*[0780]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_DEVANAGARI =15, /*[0900]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BENGALI =16, /*[0980]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GURMUKHI =17, /*[0A00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GUJARATI =18, /*[0A80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ORIYA =19, /*[0B00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_TAMIL =20, /*[0B80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_TELUGU =21, /*[0C00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_KANNADA =22, /*[0C80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MALAYALAM =23, /*[0D00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SINHALA =24, /*[0D80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_THAI =25, /*[0E00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LAO =26, /*[0E80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_TIBETAN =27, /*[0F00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MYANMAR =28, /*[1000]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GEORGIAN =29, /*[10A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HANGUL_JAMO =30, /*[1100]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ETHIOPIC =31, /*[1200]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CHEROKEE =32, /*[13A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33, /*[1400]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_OGHAM =34, /*[1680]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_RUNIC =35, /*[16A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_KHMER =36, /*[1780]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MONGOLIAN =37, /*[1800]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LATIN_EXTENDED_ADDITIONAL =38, /*[1E00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GREEK_EXTENDED =39, /*[1F00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GENERAL_PUNCTUATION =40, /*[2000]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41, /*[2070]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CURRENCY_SYMBOLS =42, /*[20A0]*/
+
+ /**
+ * Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols".
+ * @stable ICU 2.0
+ */
+ UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43, /*[20D0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LETTERLIKE_SYMBOLS =44, /*[2100]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_NUMBER_FORMS =45, /*[2150]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ARROWS =46, /*[2190]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MATHEMATICAL_OPERATORS =47, /*[2200]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MISCELLANEOUS_TECHNICAL =48, /*[2300]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CONTROL_PICTURES =49, /*[2400]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50, /*[2440]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ENCLOSED_ALPHANUMERICS =51, /*[2460]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BOX_DRAWING =52, /*[2500]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BLOCK_ELEMENTS =53, /*[2580]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GEOMETRIC_SHAPES =54, /*[25A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MISCELLANEOUS_SYMBOLS =55, /*[2600]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_DINGBATS =56, /*[2700]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BRAILLE_PATTERNS =57, /*[2800]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_RADICALS_SUPPLEMENT =58, /*[2E80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_KANGXI_RADICALS =59, /*[2F00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60, /*[2FF0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61, /*[3000]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HIRAGANA =62, /*[3040]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_KATAKANA =63, /*[30A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BOPOMOFO =64, /*[3100]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HANGUL_COMPATIBILITY_JAMO =65, /*[3130]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_KANBUN =66, /*[3190]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BOPOMOFO_EXTENDED =67, /*[31A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68, /*[3200]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_COMPATIBILITY =69, /*[3300]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70, /*[3400]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71, /*[4E00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_YI_SYLLABLES =72, /*[A000]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_YI_RADICALS =73, /*[A490]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HANGUL_SYLLABLES =74, /*[AC00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HIGH_SURROGATES =75, /*[D800]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76, /*[DB80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LOW_SURROGATES =77, /*[DC00]*/
+
+ /**
+ * Same as UBLOCK_PRIVATE_USE_AREA.
+ * Until Unicode 3.1.1, the corresponding block name was "Private Use",
+ * and multiple code point ranges had this block.
+ * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and
+ * adds separate blocks for the supplementary PUAs.
+ *
+ * @stable ICU 2.0
+ */
+ UBLOCK_PRIVATE_USE = 78,
+ /**
+ * Same as UBLOCK_PRIVATE_USE.
+ * Until Unicode 3.1.1, the corresponding block name was "Private Use",
+ * and multiple code point ranges had this block.
+ * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and
+ * adds separate blocks for the supplementary PUAs.
+ *
+ * @stable ICU 2.0
+ */
+ UBLOCK_PRIVATE_USE_AREA =UBLOCK_PRIVATE_USE, /*[E000]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79, /*[F900]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80, /*[FB00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ARABIC_PRESENTATION_FORMS_A =81, /*[FB50]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_COMBINING_HALF_MARKS =82, /*[FE20]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_COMPATIBILITY_FORMS =83, /*[FE30]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SMALL_FORM_VARIANTS =84, /*[FE50]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ARABIC_PRESENTATION_FORMS_B =85, /*[FE70]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SPECIALS =86, /*[FFF0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87, /*[FF00]*/
+
+ /* New blocks in Unicode 3.1 */
+
+ /** @stable ICU 2.0 */
+ UBLOCK_OLD_ITALIC = 88 , /*[10300]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_GOTHIC = 89 , /*[10330]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_DESERET = 90 , /*[10400]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91 , /*[1D000]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_MUSICAL_SYMBOLS = 92 , /*[1D100]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93 , /*[1D400]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94 , /*[20000]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95 , /*[2F800]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_TAGS = 96, /*[E0000]*/
+
+ /* New blocks in Unicode 3.2 */
+
+ /**
+ * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
+ * @stable ICU 2.2
+ */
+ UBLOCK_CYRILLIC_SUPPLEMENTARY = 97,
+ /** @draft ICU 3.0 */
+ UBLOCK_CYRILLIC_SUPPLEMENT = UBLOCK_CYRILLIC_SUPPLEMENTARY, /*[0500]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_TAGALOG = 98, /*[1700]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_HANUNOO = 99, /*[1720]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_BUHID = 100, /*[1740]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_TAGBANWA = 101, /*[1760]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102, /*[27C0]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_SUPPLEMENTAL_ARROWS_A = 103, /*[27F0]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_SUPPLEMENTAL_ARROWS_B = 104, /*[2900]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105, /*[2980]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106, /*[2A00]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107, /*[31F0]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109, /*[F0000]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110, /*[100000]*/
+
+ /* New blocks in Unicode 4 */
+
+ /** @stable ICU 2.6 */
+ UBLOCK_LIMBU = 111, /*[1900]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_TAI_LE = 112, /*[1950]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_KHMER_SYMBOLS = 113, /*[19E0]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_PHONETIC_EXTENSIONS = 114, /*[1D00]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 115, /*[2B00]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_YIJING_HEXAGRAM_SYMBOLS = 116, /*[4DC0]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_LINEAR_B_SYLLABARY = 117, /*[10000]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_LINEAR_B_IDEOGRAMS = 118, /*[10080]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_AEGEAN_NUMBERS = 119, /*[10100]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_UGARITIC = 120, /*[10380]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_SHAVIAN = 121, /*[10450]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_OSMANYA = 122, /*[10480]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_CYPRIOT_SYLLABARY = 123, /*[10800]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_TAI_XUAN_JING_SYMBOLS = 124, /*[1D300]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_VARIATION_SELECTORS_SUPPLEMENT = 125, /*[E0100]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_COUNT,
+
+ /** @stable ICU 2.0 */
+ UBLOCK_INVALID_CODE=-1
+};
+
+/** @stable ICU 2.0 */
+typedef enum UBlockCode UBlockCode;
+
+/**
+ * East Asian Width constants.
+ *
+ * @see UCHAR_EAST_ASIAN_WIDTH
+ * @see u_getIntPropertyValue
+ * @stable ICU 2.2
+ */
+typedef enum UEastAsianWidth {
+ U_EA_NEUTRAL, /*[N]*/ /*See note !!*/
+ U_EA_AMBIGUOUS, /*[A]*/
+ U_EA_HALFWIDTH, /*[H]*/
+ U_EA_FULLWIDTH, /*[F]*/
+ U_EA_NARROW, /*[Na]*/
+ U_EA_WIDE, /*[W]*/
+ U_EA_COUNT
+} UEastAsianWidth;
+/*
+ * Implementation note:
+ * Keep UEastAsianWidth constant values in sync with names list in genprops/props2.c.
+ */
+
+/**
+ * Selector constants for u_charName().
+ * u_charName() returns the "modern" name of a
+ * Unicode character; or the name that was defined in
+ * Unicode version 1.0, before the Unicode standard merged
+ * with ISO-10646; or an "extended" name that gives each
+ * Unicode code point a unique name.
+ *
+ * @see u_charName
+ * @stable ICU 2.0
+ */
+typedef enum UCharNameChoice {
+ U_UNICODE_CHAR_NAME,
+ U_UNICODE_10_CHAR_NAME,
+ U_EXTENDED_CHAR_NAME,
+ U_CHAR_NAME_CHOICE_COUNT
+} UCharNameChoice;
+
+/**
+ * Selector constants for u_getPropertyName() and
+ * u_getPropertyValueName(). These selectors are used to choose which
+ * name is returned for a given property or value. All properties and
+ * values have a long name. Most have a short name, but some do not.
+ * Unicode allows for additional names, beyond the long and short
+ * name, which would be indicated by U_LONG_PROPERTY_NAME + i, where
+ * i=1, 2,...
+ *
+ * @see u_getPropertyName()
+ * @see u_getPropertyValueName()
+ * @stable ICU 2.4
+ */
+typedef enum UPropertyNameChoice {
+ U_SHORT_PROPERTY_NAME,
+ U_LONG_PROPERTY_NAME,
+ U_PROPERTY_NAME_CHOICE_COUNT
+} UPropertyNameChoice;
+
+/**
+ * Decomposition Type constants.
+ *
+ * @see UCHAR_DECOMPOSITION_TYPE
+ * @stable ICU 2.2
+ */
+typedef enum UDecompositionType {
+ U_DT_NONE, /*[none]*/ /*See note !!*/
+ U_DT_CANONICAL, /*[can]*/
+ U_DT_COMPAT, /*[com]*/
+ U_DT_CIRCLE, /*[enc]*/
+ U_DT_FINAL, /*[fin]*/
+ U_DT_FONT, /*[font]*/
+ U_DT_FRACTION, /*[fra]*/
+ U_DT_INITIAL, /*[init]*/
+ U_DT_ISOLATED, /*[iso]*/
+ U_DT_MEDIAL, /*[med]*/
+ U_DT_NARROW, /*[nar]*/
+ U_DT_NOBREAK, /*[nb]*/
+ U_DT_SMALL, /*[sml]*/
+ U_DT_SQUARE, /*[sqr]*/
+ U_DT_SUB, /*[sub]*/
+ U_DT_SUPER, /*[sup]*/
+ U_DT_VERTICAL, /*[vert]*/
+ U_DT_WIDE, /*[wide]*/
+ U_DT_COUNT /* 18 */
+} UDecompositionType;
+
+/**
+ * Joining Type constants.
+ *
+ * @see UCHAR_JOINING_TYPE
+ * @stable ICU 2.2
+ */
+typedef enum UJoiningType {
+ U_JT_NON_JOINING, /*[U]*/ /*See note !!*/
+ U_JT_JOIN_CAUSING, /*[C]*/
+ U_JT_DUAL_JOINING, /*[D]*/
+ U_JT_LEFT_JOINING, /*[L]*/
+ U_JT_RIGHT_JOINING, /*[R]*/
+ U_JT_TRANSPARENT, /*[T]*/
+ U_JT_COUNT /* 6 */
+} UJoiningType;
+
+/**
+ * Joining Group constants.
+ *
+ * @see UCHAR_JOINING_GROUP
+ * @stable ICU 2.2
+ */
+typedef enum UJoiningGroup {
+ U_JG_NO_JOINING_GROUP,
+ U_JG_AIN,
+ U_JG_ALAPH,
+ U_JG_ALEF,
+ U_JG_BEH,
+ U_JG_BETH,
+ U_JG_DAL,
+ U_JG_DALATH_RISH,
+ U_JG_E,
+ U_JG_FEH,
+ U_JG_FINAL_SEMKATH,
+ U_JG_GAF,
+ U_JG_GAMAL,
+ U_JG_HAH,
+ U_JG_HAMZA_ON_HEH_GOAL,
+ U_JG_HE,
+ U_JG_HEH,
+ U_JG_HEH_GOAL,
+ U_JG_HETH,
+ U_JG_KAF,
+ U_JG_KAPH,
+ U_JG_KNOTTED_HEH,
+ U_JG_LAM,
+ U_JG_LAMADH,
+ U_JG_MEEM,
+ U_JG_MIM,
+ U_JG_NOON,
+ U_JG_NUN,
+ U_JG_PE,
+ U_JG_QAF,
+ U_JG_QAPH,
+ U_JG_REH,
+ U_JG_REVERSED_PE,
+ U_JG_SAD,
+ U_JG_SADHE,
+ U_JG_SEEN,
+ U_JG_SEMKATH,
+ U_JG_SHIN,
+ U_JG_SWASH_KAF,
+ U_JG_SYRIAC_WAW,
+ U_JG_TAH,
+ U_JG_TAW,
+ U_JG_TEH_MARBUTA,
+ U_JG_TETH,
+ U_JG_WAW,
+ U_JG_YEH,
+ U_JG_YEH_BARREE,
+ U_JG_YEH_WITH_TAIL,
+ U_JG_YUDH,
+ U_JG_YUDH_HE,
+ U_JG_ZAIN,
+ U_JG_FE, /**< @stable ICU 2.6 */
+ U_JG_KHAPH, /**< @stable ICU 2.6 */
+ U_JG_ZHAIN, /**< @stable ICU 2.6 */
+ U_JG_COUNT
+} UJoiningGroup;
+
+/**
+ * Line Break constants.
+ *
+ * @see UCHAR_LINE_BREAK
+ * @stable ICU 2.2
+ */
+typedef enum ULineBreak {
+ U_LB_UNKNOWN, /*[XX]*/ /*See note !!*/
+ U_LB_AMBIGUOUS, /*[AI]*/
+ U_LB_ALPHABETIC, /*[AL]*/
+ U_LB_BREAK_BOTH, /*[B2]*/
+ U_LB_BREAK_AFTER, /*[BA]*/
+ U_LB_BREAK_BEFORE, /*[BB]*/
+ U_LB_MANDATORY_BREAK, /*[BK]*/
+ U_LB_CONTINGENT_BREAK, /*[CB]*/
+ U_LB_CLOSE_PUNCTUATION, /*[CL]*/
+ U_LB_COMBINING_MARK, /*[CM]*/
+ U_LB_CARRIAGE_RETURN, /*[CR]*/
+ U_LB_EXCLAMATION, /*[EX]*/
+ U_LB_GLUE, /*[GL]*/
+ U_LB_HYPHEN, /*[HY]*/
+ U_LB_IDEOGRAPHIC, /*[ID]*/
+ U_LB_INSEPERABLE,
+ /** Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0 @draft ICU 3.0 */
+ U_LB_INSEPARABLE=U_LB_INSEPERABLE,/*[IN]*/
+ U_LB_INFIX_NUMERIC, /*[IS]*/
+ U_LB_LINE_FEED, /*[LF]*/
+ U_LB_NONSTARTER, /*[NS]*/
+ U_LB_NUMERIC, /*[NU]*/
+ U_LB_OPEN_PUNCTUATION, /*[OP]*/
+ U_LB_POSTFIX_NUMERIC, /*[PO]*/
+ U_LB_PREFIX_NUMERIC, /*[PR]*/
+ U_LB_QUOTATION, /*[QU]*/
+ U_LB_COMPLEX_CONTEXT, /*[SA]*/
+ U_LB_SURROGATE, /*[SG]*/
+ U_LB_SPACE, /*[SP]*/
+ U_LB_BREAK_SYMBOLS, /*[SY]*/
+ U_LB_ZWSPACE, /*[ZW]*/
+ U_LB_NEXT_LINE, /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
+ U_LB_WORD_JOINER, /*[WJ]*/
+ U_LB_COUNT
+} ULineBreak;
+
+/**
+ * Numeric Type constants.
+ *
+ * @see UCHAR_NUMERIC_TYPE
+ * @stable ICU 2.2
+ */
+typedef enum UNumericType {
+ U_NT_NONE, /*[None]*/ /*See note !!*/
+ U_NT_DECIMAL, /*[de]*/
+ U_NT_DIGIT, /*[di]*/
+ U_NT_NUMERIC, /*[nu]*/
+ U_NT_COUNT
+} UNumericType;
+
+/**
+ * Hangul Syllable Type constants.
+ *
+ * @see UCHAR_HANGUL_SYLLABLE_TYPE
+ * @stable ICU 2.6
+ */
+typedef enum UHangulSyllableType {
+ U_HST_NOT_APPLICABLE, /*[NA]*/ /*See note !!*/
+ U_HST_LEADING_JAMO, /*[L]*/
+ U_HST_VOWEL_JAMO, /*[V]*/
+ U_HST_TRAILING_JAMO, /*[T]*/
+ U_HST_LV_SYLLABLE, /*[LV]*/
+ U_HST_LVT_SYLLABLE, /*[LVT]*/
+ U_HST_COUNT
+} UHangulSyllableType;
+
+/**
+ * Check a binary Unicode property for a code point.
+ *
+ * Unicode, especially in version 3.2, defines many more properties than the
+ * original set in UnicodeData.txt.
+ *
+ * The properties APIs are intended to reflect Unicode properties as defined
+ * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
+ * For details about the properties see http://www.unicode.org/ucd/ .
+ * For names of Unicode properties see the UCD file PropertyAliases.txt.
+ *
+ * Important: If ICU is built with UCD files from Unicode versions below 3.2,
+ * then properties marked with "new in Unicode 3.2" are not or not fully available.
+ *
+ * @param c Code point to test.
+ * @param which UProperty selector constant, identifies which binary property to check.
+ * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT.
+ * @return TRUE or FALSE according to the binary Unicode property value for c.
+ * Also FALSE if 'which' is out of bounds or if the Unicode version
+ * does not have data for the property at all, or not for this code point.
+ *
+ * @see UProperty
+ * @see u_getIntPropertyValue
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.1
+ */
+U_STABLE UBool U_EXPORT2
+u_hasBinaryProperty(UChar32 c, UProperty which);
+
+/**
+ * Check if a code point has the Alphabetic Unicode property.
+ * Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC).
+ * This is different from u_isalpha!
+ * @param c Code point to test
+ * @return true if the code point has the Alphabetic Unicode property, false otherwise
+ *
+ * @see UCHAR_ALPHABETIC
+ * @see u_isalpha
+ * @see u_hasBinaryProperty
+ * @stable ICU 2.1
+ */
+U_STABLE UBool U_EXPORT2
+u_isUAlphabetic(UChar32 c);
+
+/**
+ * Check if a code point has the Lowercase Unicode property.
+ * Same as u_hasBinaryProperty(c, UCHAR_LOWERCASE).
+ * This is different from u_islower!
+ * @param c Code point to test
+ * @return true if the code point has the Lowercase Unicode property, false otherwise
+ *
+ * @see UCHAR_LOWERCASE
+ * @see u_islower
+ * @see u_hasBinaryProperty
+ * @stable ICU 2.1
+ */
+U_STABLE UBool U_EXPORT2
+u_isULowercase(UChar32 c);
+
+/**
+ * Check if a code point has the Uppercase Unicode property.
+ * Same as u_hasBinaryProperty(c, UCHAR_UPPERCASE).
+ * This is different from u_isupper!
+ * @param c Code point to test
+ * @return true if the code point has the Uppercase Unicode property, false otherwise
+ *
+ * @see UCHAR_UPPERCASE
+ * @see u_isupper
+ * @see u_hasBinaryProperty
+ * @stable ICU 2.1
+ */
+U_STABLE UBool U_EXPORT2
+u_isUUppercase(UChar32 c);
+
+/**
+ * Check if a code point has the White_Space Unicode property.
+ * Same as u_hasBinaryProperty(c, UCHAR_WHITE_SPACE).
+ * This is different from both u_isspace and u_isWhitespace!
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * @param c Code point to test
+ * @return true if the code point has the White_Space Unicode property, false otherwise.
+ *
+ * @see UCHAR_WHITE_SPACE
+ * @see u_isWhitespace
+ * @see u_isspace
+ * @see u_isJavaSpaceChar
+ * @see u_hasBinaryProperty
+ * @stable ICU 2.1
+ */
+U_STABLE UBool U_EXPORT2
+u_isUWhiteSpace(UChar32 c);
+
+/**
+ * Get the property value for an enumerated or integer Unicode property for a code point.
+ * Also returns binary and mask property values.
+ *
+ * Unicode, especially in version 3.2, defines many more properties than the
+ * original set in UnicodeData.txt.
+ *
+ * The properties APIs are intended to reflect Unicode properties as defined
+ * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
+ * For details about the properties see http://www.unicode.org/ .
+ * For names of Unicode properties see the UCD file PropertyAliases.txt.
+ *
+ * Sample usage:
+ * UEastAsianWidth ea=(UEastAsianWidth)u_getIntPropertyValue(c, UCHAR_EAST_ASIAN_WIDTH);
+ * UBool b=(UBool)u_getIntPropertyValue(c, UCHAR_IDEOGRAPHIC);
+ *
+ * @param c Code point to test.
+ * @param which UProperty selector constant, identifies which property to check.
+ * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
+ * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
+ * @return Numeric value that is directly the property value or,
+ * for enumerated properties, corresponds to the numeric value of the enumerated
+ * constant of the respective property value enumeration type
+ * (cast to enum type if necessary).
+ * Returns 0 or 1 (for FALSE/TRUE) for binary Unicode properties.
+ * Returns a bit-mask for mask properties.
+ * Returns 0 if 'which' is out of bounds or if the Unicode version
+ * does not have data for the property at all, or not for this code point.
+ *
+ * @see UProperty
+ * @see u_hasBinaryProperty
+ * @see u_getIntPropertyMinValue
+ * @see u_getIntPropertyMaxValue
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_getIntPropertyValue(UChar32 c, UProperty which);
+
+/**
+ * Get the minimum value for an enumerated/integer/binary Unicode property.
+ * Can be used together with u_getIntPropertyMaxValue
+ * to allocate arrays of UnicodeSet or similar.
+ *
+ * @param which UProperty selector constant, identifies which binary property to check.
+ * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT.
+ * @return Minimum value returned by u_getIntPropertyValue for a Unicode property.
+ * 0 if the property selector is out of range.
+ *
+ * @see UProperty
+ * @see u_hasBinaryProperty
+ * @see u_getUnicodeVersion
+ * @see u_getIntPropertyMaxValue
+ * @see u_getIntPropertyValue
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_getIntPropertyMinValue(UProperty which);
+
+/**
+ * Get the maximum value for an enumerated/integer/binary Unicode property.
+ * Can be used together with u_getIntPropertyMinValue
+ * to allocate arrays of UnicodeSet or similar.
+ *
+ * Examples for min/max values (for Unicode 3.2):
+ *
+ * - UCHAR_BIDI_CLASS: 0/18 (U_LEFT_TO_RIGHT/U_BOUNDARY_NEUTRAL)
+ * - UCHAR_SCRIPT: 0/45 (USCRIPT_COMMON/USCRIPT_TAGBANWA)
+ * - UCHAR_IDEOGRAPHIC: 0/1 (FALSE/TRUE)
+ *
+ * For undefined UProperty constant values, min/max values will be 0/-1.
+ *
+ * @param which UProperty selector constant, identifies which binary property to check.
+ * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT.
+ * @return Maximum value returned by u_getIntPropertyValue for a Unicode property.
+ * <=0 if the property selector is out of range.
+ *
+ * @see UProperty
+ * @see u_hasBinaryProperty
+ * @see u_getUnicodeVersion
+ * @see u_getIntPropertyMaxValue
+ * @see u_getIntPropertyValue
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_getIntPropertyMaxValue(UProperty which);
+
+/**
+ * Get the numeric value for a Unicode code point as defined in the
+ * Unicode Character Database.
+ *
+ * A "double" return type is necessary because
+ * some numeric values are fractions, negative, or too large for int32_t.
+ *
+ * For characters without any numeric values in the Unicode Character Database,
+ * this function will return U_NO_NUMERIC_VALUE.
+ *
+ * Similar to java.lang.Character.getNumericValue(), but u_getNumericValue()
+ * also supports negative values, large values, and fractions,
+ * while Java's getNumericValue() returns values 10..35 for ASCII letters.
+ *
+ * @param c Code point to get the numeric value for.
+ * @return Numeric value of c, or U_NO_NUMERIC_VALUE if none is defined.
+ *
+ * @see U_NO_NUMERIC_VALUE
+ * @stable ICU 2.2
+ */
+U_STABLE double U_EXPORT2
+u_getNumericValue(UChar32 c);
+
+/**
+ * Special value that is returned by u_getNumericValue when
+ * no numeric value is defined for a code point.
+ *
+ * @see u_getNumericValue
+ * @stable ICU 2.2
+ */
+#define U_NO_NUMERIC_VALUE ((double)-123456789.)
+
+/**
+ * Determines whether the specified code point has the general category "Ll"
+ * (lowercase letter).
+ *
+ * Same as java.lang.Character.isLowerCase().
+ *
+ * This misses some characters that are also lowercase but
+ * have a different general category value.
+ * In order to include those, use UCHAR_LOWERCASE.
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is an Ll lowercase letter
+ *
+ * @see UCHAR_LOWERCASE
+ * @see u_isupper
+ * @see u_istitle
+ * @see u_islower
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_islower(UChar32 c);
+
+/**
+ * Determines whether the specified code point has the general category "Lu"
+ * (uppercase letter).
+ *
+ * Same as java.lang.Character.isUpperCase().
+ *
+ * This misses some characters that are also uppercase but
+ * have a different general category value.
+ * In order to include those, use UCHAR_UPPERCASE.
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is an Lu uppercase letter
+ *
+ * @see UCHAR_UPPERCASE
+ * @see u_islower
+ * @see u_istitle
+ * @see u_tolower
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isupper(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a titlecase letter.
+ * True for general category "Lt" (titlecase letter).
+ *
+ * Same as java.lang.Character.isTitleCase().
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is an Lt titlecase letter
+ *
+ * @see u_isupper
+ * @see u_islower
+ * @see u_totitle
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_istitle(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a digit character according to Java.
+ * True for characters with general category "Nd" (decimal digit numbers).
+ * Beginning with Unicode 4, this is the same as
+ * testing for the Numeric_Type of Decimal.
+ *
+ * Same as java.lang.Character.isDigit().
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a digit character according to Character.isDigit()
+ *
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isdigit(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a letter character.
+ * True for general categories "L" (letters).
+ *
+ * Same as java.lang.Character.isLetter().
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a letter character
+ *
+ * @see u_isdigit
+ * @see u_isalnum
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isalpha(UChar32 c);
+
+/**
+ * Determines whether the specified code point is an alphanumeric character
+ * (letter or digit) according to Java.
+ * True for characters with general categories
+ * "L" (letters) and "Nd" (decimal digit numbers).
+ *
+ * Same as java.lang.Character.isLetterOrDigit().
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is an alphanumeric character according to Character.isLetterOrDigit()
+ *
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isalnum(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a hexadecimal digit.
+ * This is equivalent to u_digit(c, 16)>=0.
+ * True for characters with general category "Nd" (decimal digit numbers)
+ * as well as Latin letters a-f and A-F in both ASCII and Fullwidth ASCII.
+ * (That is, for letters with code points
+ * 0041..0046, 0061..0066, FF21..FF26, FF41..FF46.)
+ *
+ * In order to narrow the definition of hexadecimal digits to only ASCII
+ * characters, use (c<=0x7f && u_isxdigit(c)).
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a hexadecimal digit
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+u_isxdigit(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a punctuation character.
+ * True for characters with general categories "P" (punctuation).
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a punctuation character
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+u_ispunct(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a "graphic" character
+ * (printable, excluding spaces).
+ * TRUE for all characters except those with general categories
+ * "Cc" (control codes), "Cf" (format controls), "Cs" (surrogates),
+ * "Cn" (unassigned), and "Z" (separators).
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a "graphic" character
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+u_isgraph(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a "blank" or "horizontal space",
+ * a character that visibly separates words on a line.
+ * The following are equivalent definitions:
+ *
+ * TRUE for Unicode White_Space characters except for "vertical space controls"
+ * where "vertical space controls" are the following characters:
+ * U+000A (LF) U+000B (VT) U+000C (FF) U+000D (CR) U+0085 (NEL) U+2028 (LS) U+2029 (PS)
+ *
+ * same as
+ *
+ * TRUE for U+0009 (TAB) and characters with general category "Zs" (space separators)
+ * except Zero Width Space (ZWSP, U+200B).
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a "blank"
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+u_isblank(UChar32 c);
+
+/**
+ * Determines whether the specified code point is "defined",
+ * which usually means that it is assigned a character.
+ * True for general categories other than "Cn" (other, not assigned),
+ * i.e., true for all code points mentioned in UnicodeData.txt.
+ *
+ * Note that non-character code points (e.g., U+FDD0) are not "defined"
+ * (they are Cn), but surrogate code points are "defined" (Cs).
+ *
+ * Same as java.lang.Character.isDefined().
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is assigned a character
+ *
+ * @see u_isdigit
+ * @see u_isalpha
+ * @see u_isalnum
+ * @see u_isupper
+ * @see u_islower
+ * @see u_istitle
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isdefined(UChar32 c);
+
+/**
+ * Determines if the specified character is a space character or not.
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the character to be tested
+ * @return true if the character is a space character; false otherwise.
+ *
+ * @see u_isJavaSpaceChar
+ * @see u_isWhitespace
+ * @see u_isUWhiteSpace
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isspace(UChar32 c);
+
+/**
+ * Determine if the specified code point is a space character according to Java.
+ * True for characters with general categories "Z" (separators),
+ * which does not include control codes (e.g., TAB or Line Feed).
+ *
+ * Same as java.lang.Character.isSpaceChar().
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a space character according to Character.isSpaceChar()
+ *
+ * @see u_isspace
+ * @see u_isWhitespace
+ * @see u_isUWhiteSpace
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+u_isJavaSpaceChar(UChar32 c);
+
+/**
+ * Determines if the specified code point is a whitespace character according to Java/ICU.
+ * A character is considered to be a Java whitespace character if and only
+ * if it satisfies one of the following criteria:
+ *
+ * - It is a Unicode separator (categories "Z"), but is not
+ * a no-break space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP).
+ * - It is U+0009 HORIZONTAL TABULATION.
+ * - It is U+000A LINE FEED.
+ * - It is U+000B VERTICAL TABULATION.
+ * - It is U+000C FORM FEED.
+ * - It is U+000D CARRIAGE RETURN.
+ * - It is U+001C FILE SEPARATOR.
+ * - It is U+001D GROUP SEPARATOR.
+ * - It is U+001E RECORD SEPARATOR.
+ * - It is U+001F UNIT SEPARATOR.
+ * - It is U+0085 NEXT LINE.
+ *
+ * Same as java.lang.Character.isWhitespace() except that Java omits U+0085.
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a whitespace character according to Java/ICU
+ *
+ * @see u_isspace
+ * @see u_isJavaSpaceChar
+ * @see u_isUWhiteSpace
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isWhitespace(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a control character
+ * (as defined by this function).
+ * A control character is one of the following:
+ * - ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f)
+ * - U_CONTROL_CHAR (Cc)
+ * - U_FORMAT_CHAR (Cf)
+ * - U_LINE_SEPARATOR (Zl)
+ * - U_PARAGRAPH_SEPARATOR (Zp)
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a control character
+ *
+ * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
+ * @see u_isprint
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_iscntrl(UChar32 c);
+
+/**
+ * Determines whether the specified code point is an ISO control code.
+ * True for U+0000..U+001f and U+007f..U+009f (general category "Cc").
+ *
+ * Same as java.lang.Character.isISOControl().
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is an ISO control code
+ *
+ * @see u_iscntrl
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+u_isISOControl(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a printable character.
+ * True for general categories <em>other</em> than "C" (controls).
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a printable character
+ *
+ * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
+ * @see u_iscntrl
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isprint(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a base character.
+ * True for general categories "L" (letters), "N" (numbers),
+ * "Mc" (spacing combining marks), and "Me" (enclosing marks).
+ *
+ * Note that this is different from the Unicode definition in
+ * chapter 3.5, conformance clause D13,
+ * which defines base characters to be all characters (not Cn)
+ * that do not graphically combine with preceding characters (M)
+ * and that are neither control (Cc) or format (Cf) characters.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a base character according to this function
+ *
+ * @see u_isalpha
+ * @see u_isdigit
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isbase(UChar32 c);
+
+/**
+ * Returns the bidirectional category value for the code point,
+ * which is used in the Unicode bidirectional algorithm
+ * (UAX #9 http://www.unicode.org/reports/tr9/).
+ * Note that some <em>unassigned</em> code points have bidi values
+ * of R or AL because they are in blocks that are reserved
+ * for Right-To-Left scripts.
+ *
+ * Same as java.lang.Character.getDirectionality()
+ *
+ * @param c the code point to be tested
+ * @return the bidirectional category (UCharDirection) value
+ *
+ * @see UCharDirection
+ * @stable ICU 2.0
+ */
+U_STABLE UCharDirection U_EXPORT2
+u_charDirection(UChar32 c);
+
+/**
+ * Determines whether the code point has the Bidi_Mirrored property.
+ * This property is set for characters that are commonly used in
+ * Right-To-Left contexts and need to be displayed with a "mirrored"
+ * glyph.
+ *
+ * Same as java.lang.Character.isMirrored().
+ * Same as UCHAR_BIDI_MIRRORED
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the character has the Bidi_Mirrored property
+ *
+ * @see UCHAR_BIDI_MIRRORED
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isMirrored(UChar32 c);
+
+/**
+ * Maps the specified character to a "mirror-image" character.
+ * For characters with the Bidi_Mirrored property, implementations
+ * sometimes need a "poor man's" mapping to another Unicode
+ * character (code point) such that the default glyph may serve
+ * as the mirror-image of the default glyph of the specified
+ * character. This is useful for text conversion to and from
+ * codepages with visual order, and for displays without glyph
+ * selecetion capabilities.
+ *
+ * @param c the code point to be mapped
+ * @return another Unicode code point that may serve as a mirror-image
+ * substitute, or c itself if there is no such mapping or c
+ * does not have the Bidi_Mirrored property
+ *
+ * @see UCHAR_BIDI_MIRRORED
+ * @see u_isMirrored
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_charMirror(UChar32 c);
+
+/**
+ * Returns the general category value for the code point.
+ *
+ * Same as java.lang.Character.getType().
+ *
+ * @param c the code point to be tested
+ * @return the general category (UCharCategory) value
+ *
+ * @see UCharCategory
+ * @stable ICU 2.0
+ */
+U_STABLE int8_t U_EXPORT2
+u_charType(UChar32 c);
+
+/**
+ * Get a single-bit bit set for the general category of a character.
+ * This bit set can be compared bitwise with U_GC_SM_MASK, U_GC_L_MASK, etc.
+ * Same as U_MASK(u_charType(c)).
+ *
+ * @param c the code point to be tested
+ * @return a single-bit mask corresponding to the general category (UCharCategory) value
+ *
+ * @see u_charType
+ * @see UCharCategory
+ * @see U_GC_CN_MASK
+ * @stable ICU 2.1
+ */
+#define U_GET_GC_MASK(c) U_MASK(u_charType(c))
+
+/**
+ * Callback from u_enumCharTypes(), is called for each contiguous range
+ * of code points c (where start<=c<limit)
+ * with the same Unicode general category ("character type").
+ *
+ * The callback function can stop the enumeration by returning FALSE.
+ *
+ * @param context an opaque pointer, as passed into utrie_enum()
+ * @param start the first code point in a contiguous range with value
+ * @param limit one past the last code point in a contiguous range with value
+ * @param type the general category for all code points in [start..limit[
+ * @return FALSE to stop the enumeration
+ *
+ * @stable ICU 2.1
+ * @see UCharCategory
+ * @see u_enumCharTypes
+ */
+typedef UBool U_CALLCONV
+UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type);
+
+/**
+ * Enumerate efficiently all code points with their Unicode general categories.
+ *
+ * This is useful for building data structures (e.g., UnicodeSet's),
+ * for enumerating all assigned code points (type!=U_UNASSIGNED), etc.
+ *
+ * For each contiguous range of code points with a given general category ("character type"),
+ * the UCharEnumTypeRange function is called.
+ * Adjacent ranges have different types.
+ * The Unicode Standard guarantees that the numeric value of the type is 0..31.
+ *
+ * @param enumRange a pointer to a function that is called for each contiguous range
+ * of code points with the same general category
+ * @param context an opaque pointer that is passed on to the callback function
+ *
+ * @stable ICU 2.1
+ * @see UCharCategory
+ * @see UCharEnumTypeRange
+ */
+U_STABLE void U_EXPORT2
+u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context);
+
+#if !UCONFIG_NO_NORMALIZATION
+
+/**
+ * Returns the combining class of the code point as specified in UnicodeData.txt.
+ *
+ * @param c the code point of the character
+ * @return the combining class of the character
+ * @stable ICU 2.0
+ */
+U_STABLE uint8_t U_EXPORT2
+u_getCombiningClass(UChar32 c);
+
+#endif
+
+/**
+ * Returns the decimal digit value of a decimal digit character.
+ * Such characters have the general category "Nd" (decimal digit numbers)
+ * and a Numeric_Type of Decimal.
+ *
+ * Unlike ICU releases before 2.6, no digit values are returned for any
+ * Han characters because Han number characters are often used with a special
+ * Chinese-style number format (with characters for powers of 10 in between)
+ * instead of in decimal-positional notation.
+ * Unicode 4 explicitly assigns Han number characters the Numeric_Type
+ * Numeric instead of Decimal.
+ * See Jitterbug 1483 for more details.
+ *
+ * Use u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE) and u_getNumericValue()
+ * for complete numeric Unicode properties.
+ *
+ * @param c the code point for which to get the decimal digit value
+ * @return the decimal digit value of c,
+ * or -1 if c is not a decimal digit character
+ *
+ * @see u_getNumericValue
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_charDigitValue(UChar32 c);
+
+/**
+ * Returns the Unicode allocation block that contains the character.
+ *
+ * @param c the code point to be tested
+ * @return the block value (UBlockCode) for c
+ *
+ * @see UBlockCode
+ * @stable ICU 2.0
+ */
+U_STABLE UBlockCode U_EXPORT2
+ublock_getCode(UChar32 c);
+
+/**
+ * Retrieve the name of a Unicode character.
+ * Depending on <code>nameChoice</code>, the character name written
+ * into the buffer is the "modern" name or the name that was defined
+ * in Unicode version 1.0.
+ * The name contains only "invariant" characters
+ * like A-Z, 0-9, space, and '-'.
+ * Unicode 1.0 names are only retrieved if they are different from the modern
+ * names and if the data file contains the data for them. gennames may or may
+ * not be called with a command line option to include 1.0 names in unames.dat.
+ *
+ * @param code The character (code point) for which to get the name.
+ * It must be <code>0<=code<=0x10ffff</code>.
+ * @param nameChoice Selector for which name to get.
+ * @param buffer Destination address for copying the name.
+ * The name will always be zero-terminated.
+ * If there is no name, then the buffer will be set to the empty string.
+ * @param bufferLength <code>==sizeof(buffer)</code>
+ * @param pErrorCode Pointer to a UErrorCode variable;
+ * check for <code>U_SUCCESS()</code> after <code>u_charName()</code>
+ * returns.
+ * @return The length of the name, or 0 if there is no name for this character.
+ * If the bufferLength is less than or equal to the length, then the buffer
+ * contains the truncated name and the returned length indicates the full
+ * length of the name.
+ * The length does not include the zero-termination.
+ *
+ * @see UCharNameChoice
+ * @see u_charFromName
+ * @see u_enumCharNames
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_charName(UChar32 code, UCharNameChoice nameChoice,
+ char *buffer, int32_t bufferLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Get the ISO 10646 comment for a character.
+ * The ISO 10646 comment is an informative field in the Unicode Character
+ * Database (UnicodeData.txt field 11) and is from the ISO 10646 names list.
+ *
+ * @param c The character (code point) for which to get the ISO comment.
+ * It must be <code>0<=c<=0x10ffff</code>.
+ * @param dest Destination address for copying the comment.
+ * The comment will be zero-terminated if possible.
+ * If there is no comment, then the buffer will be set to the empty string.
+ * @param destCapacity <code>==sizeof(dest)</code>
+ * @param pErrorCode Pointer to a UErrorCode variable;
+ * check for <code>U_SUCCESS()</code> after <code>u_getISOComment()</code>
+ * returns.
+ * @return The length of the comment, or 0 if there is no comment for this character.
+ * If the destCapacity is less than or equal to the length, then the buffer
+ * contains the truncated name and the returned length indicates the full
+ * length of the name.
+ * The length does not include the zero-termination.
+ *
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_getISOComment(UChar32 c,
+ char *dest, int32_t destCapacity,
+ UErrorCode *pErrorCode);
+
+/**
+ * Find a Unicode character by its name and return its code point value.
+ * The name is matched exactly and completely.
+ * If the name does not correspond to a code point, <i>pErrorCode</i>
+ * is set to <code>U_INVALID_CHAR_FOUND</code>.
+ * A Unicode 1.0 name is matched only if it differs from the modern name.
+ * Unicode names are all uppercase. Extended names are lowercase followed
+ * by an uppercase hexadecimal number, and within angle brackets.
+ *
+ * @param nameChoice Selector for which name to match.
+ * @param name The name to match.
+ * @param pErrorCode Pointer to a UErrorCode variable
+ * @return The Unicode value of the code point with the given name,
+ * or an undefined value if there is no such code point.
+ *
+ * @see UCharNameChoice
+ * @see u_charName
+ * @see u_enumCharNames
+ * @stable ICU 1.7
+ */
+U_STABLE UChar32 U_EXPORT2
+u_charFromName(UCharNameChoice nameChoice,
+ const char *name,
+ UErrorCode *pErrorCode);
+
+/**
+ * Type of a callback function for u_enumCharNames() that gets called
+ * for each Unicode character with the code point value and
+ * the character name.
+ * If such a function returns FALSE, then the enumeration is stopped.
+ *
+ * @param context The context pointer that was passed to u_enumCharNames().
+ * @param code The Unicode code point for the character with this name.
+ * @param nameChoice Selector for which kind of names is enumerated.
+ * @param name The character's name, zero-terminated.
+ * @param length The length of the name.
+ * @return TRUE if the enumeration should continue, FALSE to stop it.
+ *
+ * @see UCharNameChoice
+ * @see u_enumCharNames
+ * @stable ICU 1.7
+ */
+typedef UBool UEnumCharNamesFn(void *context,
+ UChar32 code,
+ UCharNameChoice nameChoice,
+ const char *name,
+ int32_t length);
+
+/**
+ * Enumerate all assigned Unicode characters between the start and limit
+ * code points (start inclusive, limit exclusive) and call a function
+ * for each, passing the code point value and the character name.
+ * For Unicode 1.0 names, only those are enumerated that differ from the
+ * modern names.
+ *
+ * @param start The first code point in the enumeration range.
+ * @param limit One more than the last code point in the enumeration range
+ * (the first one after the range).
+ * @param fn The function that is to be called for each character name.
+ * @param context An arbitrary pointer that is passed to the function.
+ * @param nameChoice Selector for which kind of names to enumerate.
+ * @param pErrorCode Pointer to a UErrorCode variable
+ *
+ * @see UCharNameChoice
+ * @see UEnumCharNamesFn
+ * @see u_charName
+ * @see u_charFromName
+ * @stable ICU 1.7
+ */
+U_STABLE void U_EXPORT2
+u_enumCharNames(UChar32 start, UChar32 limit,
+ UEnumCharNamesFn *fn,
+ void *context,
+ UCharNameChoice nameChoice,
+ UErrorCode *pErrorCode);
+
+/**
+ * Return the Unicode name for a given property, as given in the
+ * Unicode database file PropertyAliases.txt.
+ *
+ * In addition, this function maps the property
+ * UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
+ * "General_Category_Mask". These names are not in
+ * PropertyAliases.txt.
+ *
+ * @param property UProperty selector other than UCHAR_INVALID_CODE.
+ * If out of range, NULL is returned.
+ *
+ * @param nameChoice selector for which name to get. If out of range,
+ * NULL is returned. All properties have a long name. Most
+ * have a short name, but some do not. Unicode allows for
+ * additional names; if present these will be returned by
+ * U_LONG_PROPERTY_NAME + i, where i=1, 2,...
+ *
+ * @return a pointer to the name, or NULL if either the
+ * property or the nameChoice is out of range. If a given
+ * nameChoice returns NULL, then all larger values of
+ * nameChoice will return NULL, with one exception: if NULL is
+ * returned for U_SHORT_PROPERTY_NAME, then
+ * U_LONG_PROPERTY_NAME (and higher) may still return a
+ * non-NULL value. The returned pointer is valid until
+ * u_cleanup() is called.
+ *
+ * @see UProperty
+ * @see UPropertyNameChoice
+ * @stable ICU 2.4
+ */
+U_STABLE const char* U_EXPORT2
+u_getPropertyName(UProperty property,
+ UPropertyNameChoice nameChoice);
+
+/**
+ * Return the UProperty enum for a given property name, as specified
+ * in the Unicode database file PropertyAliases.txt. Short, long, and
+ * any other variants are recognized.
+ *
+ * In addition, this function maps the synthetic names "gcm" /
+ * "General_Category_Mask" to the property
+ * UCHAR_GENERAL_CATEGORY_MASK. These names are not in
+ * PropertyAliases.txt.
+ *
+ * @param alias the property name to be matched. The name is compared
+ * using "loose matching" as described in PropertyAliases.txt.
+ *
+ * @return a UProperty enum, or UCHAR_INVALID_CODE if the given name
+ * does not match any property.
+ *
+ * @see UProperty
+ * @stable ICU 2.4
+ */
+U_STABLE UProperty U_EXPORT2
+u_getPropertyEnum(const char* alias);
+
+/**
+ * Return the Unicode name for a given property value, as given in the
+ * Unicode database file PropertyValueAliases.txt.
+ *
+ * Note: Some of the names in PropertyValueAliases.txt can only be
+ * retrieved using UCHAR_GENERAL_CATEGORY_MASK, not
+ * UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" /
+ * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
+ * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
+ *
+ * @param property UProperty selector constant.
+ * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
+ * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
+ * If out of range, NULL is returned.
+ *
+ * @param value selector for a value for the given property. If out
+ * of range, NULL is returned. In general, valid values range
+ * from 0 up to some maximum. There are a few exceptions:
+ * (1.) UCHAR_BLOCK values begin at the non-zero value
+ * UBLOCK_BASIC_LATIN. (2.) UCHAR_CANONICAL_COMBINING_CLASS
+ * values are not contiguous and range from 0..240. (3.)
+ * UCHAR_GENERAL_CATEGORY_MASK values are not values of
+ * UCharCategory, but rather mask values produced by
+ * U_GET_GC_MASK(). This allows grouped categories such as
+ * [:L:] to be represented. Mask values range
+ * non-contiguously from 1..U_GC_P_MASK.
+ *
+ * @param nameChoice selector for which name to get. If out of range,
+ * NULL is returned. All values have a long name. Most have
+ * a short name, but some do not. Unicode allows for
+ * additional names; if present these will be returned by
+ * U_LONG_PROPERTY_NAME + i, where i=1, 2,...
+
+ * @return a pointer to the name, or NULL if either the
+ * property or the nameChoice is out of range. If a given
+ * nameChoice returns NULL, then all larger values of
+ * nameChoice will return NULL, with one exception: if NULL is
+ * returned for U_SHORT_PROPERTY_NAME, then
+ * U_LONG_PROPERTY_NAME (and higher) may still return a
+ * non-NULL value. The returned pointer is valid until
+ * u_cleanup() is called.
+ *
+ * @see UProperty
+ * @see UPropertyNameChoice
+ * @stable ICU 2.4
+ */
+U_STABLE const char* U_EXPORT2
+u_getPropertyValueName(UProperty property,
+ int32_t value,
+ UPropertyNameChoice nameChoice);
+
+/**
+ * Return the property value integer for a given value name, as
+ * specified in the Unicode database file PropertyValueAliases.txt.
+ * Short, long, and any other variants are recognized.
+ *
+ * Note: Some of the names in PropertyValueAliases.txt will only be
+ * recognized with UCHAR_GENERAL_CATEGORY_MASK, not
+ * UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" /
+ * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
+ * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
+ *
+ * @param property UProperty selector constant.
+ * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
+ * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
+ * If out of range, UCHAR_INVALID_CODE is returned.
+ *
+ * @param alias the value name to be matched. The name is compared
+ * using "loose matching" as described in
+ * PropertyValueAliases.txt.
+ *
+ * @return a value integer or UCHAR_INVALID_CODE if the given name
+ * does not match any value of the given property, or if the
+ * property is invalid. Note: U CHAR_GENERAL_CATEGORY values
+ * are not values of UCharCategory, but rather mask values
+ * produced by U_GET_GC_MASK(). This allows grouped
+ * categories such as [:L:] to be represented.
+ *
+ * @see UProperty
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+u_getPropertyValueEnum(UProperty property,
+ const char* alias);
+
+/**
+ * Determines if the specified character is permissible as the
+ * first character in an identifier according to Unicode
+ * (The Unicode Standard, Version 3.0, chapter 5.16 Identifiers).
+ * True for characters with general categories "L" (letters) and "Nl" (letter numbers).
+ *
+ * Same as java.lang.Character.isUnicodeIdentifierStart().
+ * Same as UCHAR_ID_START
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point may start an identifier
+ *
+ * @see UCHAR_ID_START
+ * @see u_isalpha
+ * @see u_isIDPart
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isIDStart(UChar32 c);
+
+/**
+ * Determines if the specified character is permissible
+ * in an identifier according to Java.
+ * True for characters with general categories "L" (letters),
+ * "Nl" (letter numbers), "Nd" (decimal digits),
+ * "Mc" and "Mn" (combining marks), "Pc" (connecting punctuation), and
+ * u_isIDIgnorable(c).
+ *
+ * Same as java.lang.Character.isUnicodeIdentifierPart().
+ * Almost the same as Unicode's ID_Continue (UCHAR_ID_CONTINUE)
+ * except that Unicode recommends to ignore Cf which is less than
+ * u_isIDIgnorable(c).
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point may occur in an identifier according to Java
+ *
+ * @see UCHAR_ID_CONTINUE
+ * @see u_isIDStart
+ * @see u_isIDIgnorable
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isIDPart(UChar32 c);
+
+/**
+ * Determines if the specified character should be regarded
+ * as an ignorable character in an identifier,
+ * according to Java.
+ * True for characters with general category "Cf" (format controls) as well as
+ * non-whitespace ISO controls
+ * (U+0000..U+0008, U+000E..U+001B, U+007F..U+0084, U+0086..U+009F).
+ *
+ * Same as java.lang.Character.isIdentifierIgnorable()
+ * except that Java also returns TRUE for U+0085 Next Line
+ * (it omits U+0085 from whitespace ISO controls).
+ *
+ * Note that Unicode just recommends to ignore Cf (format controls).
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is ignorable in identifiers according to Java
+ *
+ * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
+ * @see u_isIDStart
+ * @see u_isIDPart
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isIDIgnorable(UChar32 c);
+
+/**
+ * Determines if the specified character is permissible as the
+ * first character in a Java identifier.
+ * In addition to u_isIDStart(c), true for characters with
+ * general categories "Sc" (currency symbols) and "Pc" (connecting punctuation).
+ *
+ * Same as java.lang.Character.isJavaIdentifierStart().
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point may start a Java identifier
+ *
+ * @see u_isJavaIDPart
+ * @see u_isalpha
+ * @see u_isIDStart
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isJavaIDStart(UChar32 c);
+
+/**
+ * Determines if the specified character is permissible
+ * in a Java identifier.
+ * In addition to u_isIDPart(c), true for characters with
+ * general category "Sc" (currency symbols).
+ *
+ * Same as java.lang.Character.isJavaIdentifierPart().
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point may occur in a Java identifier
+ *
+ * @see u_isIDIgnorable
+ * @see u_isJavaIDStart
+ * @see u_isalpha
+ * @see u_isdigit
+ * @see u_isIDPart
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isJavaIDPart(UChar32 c);
+
+/**
+ * The given character is mapped to its lowercase equivalent according to
+ * UnicodeData.txt; if the character has no lowercase equivalent, the character
+ * itself is returned.
+ *
+ * Same as java.lang.Character.toLowerCase().
+ *
+ * This function only returns the simple, single-code point case mapping.
+ * Full case mappings may result in zero, one or more code points and depend
+ * on context or language etc.
+ * Full case mappings are applied by the string case mapping functions,
+ * see ustring.h and the UnicodeString class.
+ *
+ * @param c the code point to be mapped
+ * @return the Simple_Lowercase_Mapping of the code point, if any;
+ * otherwise the code point itself.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_tolower(UChar32 c);
+
+/**
+ * The given character is mapped to its uppercase equivalent according to UnicodeData.txt;
+ * if the character has no uppercase equivalent, the character itself is
+ * returned.
+ *
+ * Same as java.lang.Character.toUpperCase().
+ *
+ * This function only returns the simple, single-code point case mapping.
+ * Full case mappings may result in zero, one or more code points and depend
+ * on context or language etc.
+ * Full case mappings are applied by the string case mapping functions,
+ * see ustring.h and the UnicodeString class.
+ *
+ * @param c the code point to be mapped
+ * @return the Simple_Uppercase_Mapping of the code point, if any;
+ * otherwise the code point itself.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_toupper(UChar32 c);
+
+/**
+ * The given character is mapped to its titlecase equivalent
+ * according to UnicodeData.txt;
+ * if none is defined, the character itself is returned.
+ *
+ * Same as java.lang.Character.toTitleCase().
+ *
+ * This function only returns the simple, single-code point case mapping.
+ * Full case mappings may result in zero, one or more code points and depend
+ * on context or language etc.
+ * Full case mappings are applied by the string case mapping functions,
+ * see ustring.h and the UnicodeString class.
+ *
+ * @param c the code point to be mapped
+ * @return the Simple_Titlecase_Mapping of the code point, if any;
+ * otherwise the code point itself.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_totitle(UChar32 c);
+
+/** Option value for case folding: use default mappings defined in CaseFolding.txt. @stable ICU 2.0 */
+#define U_FOLD_CASE_DEFAULT 0
+
+/**
+ * Option value for case folding:
+ *
+ * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
+ * and dotless i appropriately for Turkic languages (tr, az).
+ *
+ * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
+ * are to be included for default mappings and
+ * excluded for the Turkic-specific mappings.
+ *
+ * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
+ * are to be excluded for default mappings and
+ * included for the Turkic-specific mappings.
+ *
+ * @stable ICU 2.0
+ */
+#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
+
+/**
+ * The given character is mapped to its case folding equivalent according to
+ * UnicodeData.txt and CaseFolding.txt;
+ * if the character has no case folding equivalent, the character
+ * itself is returned.
+ *
+ * This function only returns the simple, single-code point case mapping.
+ * Full case mappings may result in zero, one or more code points and depend
+ * on context or language etc.
+ * Full case mappings are applied by the string case mapping functions,
+ * see ustring.h and the UnicodeString class.
+ *
+ * @param c the code point to be mapped
+ * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @return the Simple_Case_Folding of the code point, if any;
+ * otherwise the code point itself.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_foldCase(UChar32 c, uint32_t options);
+
+/**
+ * Returns the decimal digit value of the code point in the
+ * specified radix.
+ *
+ * If the radix is not in the range <code>2<=radix<=36</code> or if the
+ * value of <code>c</code> is not a valid digit in the specified
+ * radix, <code>-1</code> is returned. A character is a valid digit
+ * if at least one of the following is true:
+ * <ul>
+ * <li>The character has a decimal digit value.
+ * Such characters have the general category "Nd" (decimal digit numbers)
+ * and a Numeric_Type of Decimal.
+ * In this case the value is the character's decimal digit value.</li>
+ * <li>The character is one of the uppercase Latin letters
+ * <code>'A'</code> through <code>'Z'</code>.
+ * In this case the value is <code>c-'A'+10</code>.</li>
+ * <li>The character is one of the lowercase Latin letters
+ * <code>'a'</code> through <code>'z'</code>.
+ * In this case the value is <code>ch-'a'+10</code>.</li>
+ * <li>Latin letters from both the ASCII range (0061..007A, 0041..005A)
+ * as well as from the Fullwidth ASCII range (FF41..FF5A, FF21..FF3A)
+ * are recognized.</li>
+ * </ul>
+ *
+ * Same as java.lang.Character.digit().
+ *
+ * @param ch the code point to be tested.
+ * @param radix the radix.
+ * @return the numeric value represented by the character in the
+ * specified radix,
+ * or -1 if there is no value or if the value exceeds the radix.
+ *
+ * @see UCHAR_NUMERIC_TYPE
+ * @see u_forDigit
+ * @see u_charDigitValue
+ * @see u_isdigit
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_digit(UChar32 ch, int8_t radix);
+
+/**
+ * Determines the character representation for a specific digit in
+ * the specified radix. If the value of <code>radix</code> is not a
+ * valid radix, or the value of <code>digit</code> is not a valid
+ * digit in the specified radix, the null character
+ * (<code>U+0000</code>) is returned.
+ * <p>
+ * The <code>radix</code> argument is valid if it is greater than or
+ * equal to 2 and less than or equal to 36.
+ * The <code>digit</code> argument is valid if
+ * <code>0 <= digit < radix</code>.
+ * <p>
+ * If the digit is less than 10, then
+ * <code>'0' + digit</code> is returned. Otherwise, the value
+ * <code>'a' + digit - 10</code> is returned.
+ *
+ * Same as java.lang.Character.forDigit().
+ *
+ * @param digit the number to convert to a character.
+ * @param radix the radix.
+ * @return the <code>char</code> representation of the specified digit
+ * in the specified radix.
+ *
+ * @see u_digit
+ * @see u_charDigitValue
+ * @see u_isdigit
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_forDigit(int32_t digit, int8_t radix);
+
+/**
+ * Get the "age" of the code point.
+ * The "age" is the Unicode version when the code point was first
+ * designated (as a non-character or for Private Use)
+ * or assigned a character.
+ * This can be useful to avoid emitting code points to receiving
+ * processes that do not accept newer characters.
+ * The data is from the UCD file DerivedAge.txt.
+ *
+ * @param c The code point.
+ * @param versionArray The Unicode version number array, to be filled in.
+ *
+ * @stable ICU 2.1
+ */
+U_STABLE void U_EXPORT2
+u_charAge(UChar32 c, UVersionInfo versionArray);
+
+/**
+ * Gets the Unicode version information.
+ * The version array is filled in with the version information
+ * for the Unicode standard that is currently used by ICU.
+ * For example, Unicode version 3.1.1 is represented as an array with
+ * the values { 3, 1, 1, 0 }.
+ *
+ * @param versionArray an output array that will be filled in with
+ * the Unicode version number
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+u_getUnicodeVersion(UVersionInfo versionArray);
+
+/**
+ * Get the FC_NFKC_Closure property string for a character.
+ * See Unicode Standard Annex #15 for details, search for "FC_NFKC_Closure"
+ * or for "FNC": http://www.unicode.org/reports/tr15/
+ *
+ * @param c The character (code point) for which to get the FC_NFKC_Closure string.
+ * It must be <code>0<=c<=0x10ffff</code>.
+ * @param dest Destination address for copying the string.
+ * The string will be zero-terminated if possible.
+ * If there is no FC_NFKC_Closure string,
+ * then the buffer will be set to the empty string.
+ * @param destCapacity <code>==sizeof(dest)</code>
+ * @param pErrorCode Pointer to a UErrorCode variable.
+ * @return The length of the string, or 0 if there is no FC_NFKC_Closure string for this character.
+ * If the destCapacity is less than or equal to the length, then the buffer
+ * contains the truncated name and the returned length indicates the full
+ * length of the name.
+ * The length does not include the zero-termination.
+ *
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode);
+
+U_CDECL_END
+
+#endif /*_UCHAR*/
+/*eof*/
diff --git a/Source/WebCore/icu/unicode/ucnv.h b/Source/WebCore/icu/unicode/ucnv.h
new file mode 100644
index 0000000..a042f7a
--- /dev/null
+++ b/Source/WebCore/icu/unicode/ucnv.h
@@ -0,0 +1,1817 @@
+/*
+**********************************************************************
+* Copyright (C) 1999-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+ * ucnv.h:
+ * External APIs for the ICU's codeset conversion library
+ * Bertrand A. Damiba
+ *
+ * Modification History:
+ *
+ * Date Name Description
+ * 04/04/99 helena Fixed internal header inclusion.
+ * 05/11/00 helena Added setFallback and usesFallback APIs.
+ * 06/29/2000 helena Major rewrite of the callback APIs.
+ * 12/07/2000 srl Update of documentation
+ */
+
+/**
+ * \file
+ * \brief C API: Character conversion
+ *
+ * <h2>Character Conversion C API</h2>
+ *
+ * <p>This API is used to convert codepage or character encoded data to and
+ * from UTF-16. You can open a converter with {@link ucnv_open() }. With that
+ * converter, you can get its properties, set options, convert your data and
+ * close the converter.</p>
+ *
+ * <p>Since many software programs recogize different converter names for
+ * different types of converters, there are other functions in this API to
+ * iterate over the converter aliases. The functions {@link ucnv_getAvailableName() },
+ * {@link ucnv_getAlias() } and {@link ucnv_getStandardName() } are some of the
+ * more frequently used alias functions to get this information.</p>
+ *
+ * <p>When a converter encounters an illegal, irregular, invalid or unmappable character
+ * its default behavior is to use a substitution character to replace the
+ * bad byte sequence. This behavior can be changed by using {@link ucnv_getFromUCallBack() }
+ * or {@link ucnv_getToUCallBack() } on the converter. The header ucnv_err.h defines
+ * many other callback actions that can be used instead of a character substitution.</p>
+ *
+ * <p>More information about this API can be found in our
+ * <a href="http://oss.software.ibm.com/icu/userguide/conversion.html">User's
+ * Guide</a>.</p>
+ */
+
+#ifndef UCNV_H
+#define UCNV_H
+
+#include "unicode/ucnv_err.h"
+#include "unicode/uenum.h"
+
+#ifndef __USET_H__
+
+/**
+ * USet is the C API type for Unicode sets.
+ * It is forward-declared here to avoid including the header file if related
+ * conversion APIs are not used.
+ * See unicode/uset.h
+ *
+ * @see ucnv_getUnicodeSet
+ * @stable ICU 2.6
+ */
+struct USet;
+/** @stable ICU 2.6 */
+typedef struct USet USet;
+
+#endif
+
+#if !UCONFIG_NO_CONVERSION
+
+U_CDECL_BEGIN
+
+/** Maximum length of a converter name including the terminating NULL @stable ICU 2.0 */
+#define UCNV_MAX_CONVERTER_NAME_LENGTH 60
+/** Maximum length of a converter name including path and terminating NULL @stable ICU 2.0 */
+#define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH)
+
+/** Shift in for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */
+#define UCNV_SI 0x0F
+/** Shift out for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */
+#define UCNV_SO 0x0E
+
+/**
+ * Enum for specifying basic types of converters
+ * @see ucnv_getType
+ * @stable ICU 2.0
+ */
+typedef enum {
+ UCNV_UNSUPPORTED_CONVERTER = -1,
+ UCNV_SBCS = 0,
+ UCNV_DBCS = 1,
+ UCNV_MBCS = 2,
+ UCNV_LATIN_1 = 3,
+ UCNV_UTF8 = 4,
+ UCNV_UTF16_BigEndian = 5,
+ UCNV_UTF16_LittleEndian = 6,
+ UCNV_UTF32_BigEndian = 7,
+ UCNV_UTF32_LittleEndian = 8,
+ UCNV_EBCDIC_STATEFUL = 9,
+ UCNV_ISO_2022 = 10,
+
+ UCNV_LMBCS_1 = 11,
+ UCNV_LMBCS_2,
+ UCNV_LMBCS_3,
+ UCNV_LMBCS_4,
+ UCNV_LMBCS_5,
+ UCNV_LMBCS_6,
+ UCNV_LMBCS_8,
+ UCNV_LMBCS_11,
+ UCNV_LMBCS_16,
+ UCNV_LMBCS_17,
+ UCNV_LMBCS_18,
+ UCNV_LMBCS_19,
+ UCNV_LMBCS_LAST = UCNV_LMBCS_19,
+ UCNV_HZ,
+ UCNV_SCSU,
+ UCNV_ISCII,
+ UCNV_US_ASCII,
+ UCNV_UTF7,
+ UCNV_BOCU1,
+ UCNV_UTF16,
+ UCNV_UTF32,
+ UCNV_CESU8,
+ UCNV_IMAP_MAILBOX,
+
+ /* Number of converter types for which we have conversion routines. */
+ UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES
+
+} UConverterType;
+
+/**
+ * Enum for specifying which platform a converter ID refers to.
+ * The use of platform/CCSID is not recommended. See ucnv_openCCSID().
+ *
+ * @see ucnv_getPlatform
+ * @see ucnv_openCCSID
+ * @see ucnv_getCCSID
+ * @stable ICU 2.0
+ */
+typedef enum {
+ UCNV_UNKNOWN = -1,
+ UCNV_IBM = 0
+} UConverterPlatform;
+
+/**
+ * Function pointer for error callback in the codepage to unicode direction.
+ * Called when an error has occured in conversion to unicode, or on open/close of the callback (see reason).
+ * @param context Pointer to the callback's private data
+ * @param args Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @see ucnv_setToUCallBack
+ * @see UConverterToUnicodeArgs
+ * @stable ICU 2.0
+ */
+typedef void (U_EXPORT2 *UConverterToUCallback) (
+ const void* context,
+ UConverterToUnicodeArgs *args,
+ const char *codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode *);
+
+/**
+ * Function pointer for error callback in the unicode to codepage direction.
+ * Called when an error has occured in conversion from unicode, or on open/close of the callback (see reason).
+ * @param context Pointer to the callback's private data
+ * @param args Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @see ucnv_setFromUCallBack
+ * @stable ICU 2.0
+ */
+typedef void (U_EXPORT2 *UConverterFromUCallback) (
+ const void* context,
+ UConverterFromUnicodeArgs *args,
+ const UChar* codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode *);
+
+U_CDECL_END
+
+/**
+ * Character that separates converter names from options and options from each other.
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_OPTION_SEP_CHAR ','
+
+/**
+ * String version of UCNV_OPTION_SEP_CHAR.
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_OPTION_SEP_STRING ","
+
+/**
+ * Character that separates a converter option from its value.
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_VALUE_SEP_CHAR '='
+
+/**
+ * String version of UCNV_VALUE_SEP_CHAR.
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_VALUE_SEP_STRING "="
+
+/**
+ * Converter option for specifying a locale.
+ * For example, ucnv_open("SCSU,locale=ja", &errorCode);
+ * See convrtrs.txt.
+ *
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_LOCALE_OPTION_STRING ",locale="
+
+/**
+ * Converter option for specifying a version selector (0..9) for some converters.
+ * For example, ucnv_open("UTF-7,version=1", &errorCode);
+ * See convrtrs.txt.
+ *
+ * @see ucnv_open
+ * @stable ICU 2.4
+ */
+#define UCNV_VERSION_OPTION_STRING ",version="
+
+/**
+ * Converter option for EBCDIC SBCS or mixed-SBCS/DBCS (stateful) codepages.
+ * Swaps Unicode mappings for EBCDIC LF and NL codes, as used on
+ * S/390 (z/OS) Unix System Services (Open Edition).
+ * For example, ucnv_open("ibm-1047,swaplfnl", &errorCode);
+ * See convrtrs.txt.
+ *
+ * @see ucnv_open
+ * @stable ICU 2.4
+ */
+#define UCNV_SWAP_LFNL_OPTION_STRING ",swaplfnl"
+
+/**
+ * Do a fuzzy compare of a two converter/alias names. The comparison
+ * is case-insensitive. It also ignores the characters '-', '_', and
+ * ' ' (dash, underscore, and space). Thus the strings "UTF-8",
+ * "utf_8", and "Utf 8" are exactly equivalent.
+ *
+ * @param name1 a converter name or alias, zero-terminated
+ * @param name2 a converter name or alias, zero-terminated
+ * @return 0 if the names match, or a negative value if the name1
+ * lexically precedes name2, or a positive value if the name1
+ * lexically follows name2.
+ * @stable ICU 2.0
+ */
+U_STABLE int U_EXPORT2
+ucnv_compareNames(const char *name1, const char *name2);
+
+
+/**
+ * Creates a UConverter object with the names specified as a C string.
+ * The actual name will be resolved with the alias file
+ * using a case-insensitive string comparison that ignores
+ * the delimiters '-', '_', and ' ' (dash, underscore, and space).
+ * E.g., the names "UTF8", "utf-8", and "Utf 8" are all equivalent.
+ * If <code>NULL</code> is passed for the converter name, it will create one with the
+ * getDefaultName return value.
+ *
+ * <p>A converter name for ICU 1.5 and above may contain options
+ * like a locale specification to control the specific behavior of
+ * the newly instantiated converter.
+ * The meaning of the options depends on the particular converter.
+ * If an option is not defined for or recognized by a given converter, then it is ignored.</p>
+ *
+ * <p>Options are appended to the converter name string, with a
+ * <code>UCNV_OPTION_SEP_CHAR</code> between the name and the first option and
+ * also between adjacent options.</p>
+ *
+ * <p>If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.</p>
+ *
+ * <p>The conversion behavior and names can vary between platforms. ICU may
+ * convert some characters differently from other platforms. Details on this topic
+ * are in the <a href="http://oss.software.ibm.com/icu/userguide/conversion.html">User's
+ * Guide</a>.</p>
+ *
+ * @param converterName Name of the uconv table, may have options appended
+ * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
+ * @return the created Unicode converter object, or <TT>NULL</TT> if an error occured
+ * @see ucnv_openU
+ * @see ucnv_openCCSID
+ * @see ucnv_close
+ * @stable ICU 2.0
+ */
+U_STABLE UConverter* U_EXPORT2
+ucnv_open(const char *converterName, UErrorCode *err);
+
+
+/**
+ * Creates a Unicode converter with the names specified as unicode string.
+ * The name should be limited to the ASCII-7 alphanumerics range.
+ * The actual name will be resolved with the alias file
+ * using a case-insensitive string comparison that ignores
+ * the delimiters '-', '_', and ' ' (dash, underscore, and space).
+ * E.g., the names "UTF8", "utf-8", and "Utf 8" are all equivalent.
+ * If <TT>NULL</TT> is passed for the converter name, it will create
+ * one with the ucnv_getDefaultName() return value.
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ * @param name : name of the uconv table in a zero terminated
+ * Unicode string
+ * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR,
+ * U_FILE_ACCESS_ERROR</TT>
+ * @return the created Unicode converter object, or <TT>NULL</TT> if an
+ * error occured
+ * @see ucnv_open
+ * @see ucnv_openCCSID
+ * @see ucnv_close
+ * @see ucnv_getDefaultName
+ * @stable ICU 2.0
+ */
+U_STABLE UConverter* U_EXPORT2
+ucnv_openU(const UChar *name,
+ UErrorCode *err);
+
+/**
+ * Creates a UConverter object from a CCSID number and platform pair.
+ * Note that the usefulness of this function is limited to platforms with numeric
+ * encoding IDs. Only IBM and Microsoft platforms use numeric (16-bit) identifiers for
+ * encodings.
+ *
+ * In addition, IBM CCSIDs and Unicode conversion tables are not 1:1 related.
+ * For many IBM CCSIDs there are multiple (up to six) Unicode conversion tables, and
+ * for some Unicode conversion tables there are multiple CCSIDs.
+ * Some "alternate" Unicode conversion tables are provided by the
+ * IBM CDRA conversion table registry.
+ * The most prominent example of a systematic modification of conversion tables that is
+ * not provided in the form of conversion table files in the repository is
+ * that S/390 Unix System Services swaps the codes for Line Feed and New Line in all
+ * EBCDIC codepages, which requires such a swap in the Unicode conversion tables as well.
+ *
+ * Only IBM default conversion tables are accessible with ucnv_openCCSID().
+ * ucnv_getCCSID() will return the same CCSID for all conversion tables that are associated
+ * with that CCSID.
+ *
+ * Currently, the only "platform" supported in the ICU converter API is UCNV_IBM.
+ *
+ * In summary, the use of CCSIDs and the associated API functions is not recommended.
+ *
+ * In order to open a converter with the default IBM CDRA Unicode conversion table,
+ * you can use this function or use the prefix "ibm-":
+ * \code
+ * char name[20];
+ * sprintf(name, "ibm-%hu", ccsid);
+ * cnv=ucnv_open(name, &errorCode);
+ * \endcode
+ *
+ * In order to open a converter with the IBM S/390 Unix System Services variant
+ * of a Unicode/EBCDIC conversion table,
+ * you can use the prefix "ibm-" together with the option string UCNV_SWAP_LFNL_OPTION_STRING:
+ * \code
+ * char name[20];
+ * sprintf(name, "ibm-%hu" UCNV_SWAP_LFNL_OPTION_STRING, ccsid);
+ * cnv=ucnv_open(name, &errorCode);
+ * \endcode
+ *
+ * In order to open a converter from a Microsoft codepage number, use the prefix "cp":
+ * \code
+ * char name[20];
+ * sprintf(name, "cp%hu", codepageID);
+ * cnv=ucnv_open(name, &errorCode);
+ * \endcode
+ *
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ *
+ * @param codepage codepage number to create
+ * @param platform the platform in which the codepage number exists
+ * @param err error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
+ * @return the created Unicode converter object, or <TT>NULL</TT> if an error
+ * occured.
+ * @see ucnv_open
+ * @see ucnv_openU
+ * @see ucnv_close
+ * @see ucnv_getCCSID
+ * @see ucnv_getPlatform
+ * @see UConverterPlatform
+ * @stable ICU 2.0
+ */
+U_STABLE UConverter* U_EXPORT2
+ucnv_openCCSID(int32_t codepage,
+ UConverterPlatform platform,
+ UErrorCode * err);
+
+/**
+ * <p>Creates a UConverter object specified from a packageName and a converterName.</p>
+ *
+ * <p>The packageName and converterName must point to an ICU udata object, as defined by
+ * <code> udata_open( packageName, "cnv", converterName, err) </code> or equivalent.
+ * Typically, packageName will refer to a (.dat) file, or to a package registered with
+ * udata_setAppData().</p>
+ *
+ * <p>The name will NOT be looked up in the alias mechanism, nor will the converter be
+ * stored in the converter cache or the alias table. The only way to open further converters
+ * is call this function multiple times, or use the ucnv_safeClone() function to clone a
+ * 'master' converter.</p>
+ *
+ * <p>A future version of ICU may add alias table lookups and/or caching
+ * to this function.</p>
+ *
+ * <p>Example Use:
+ * <code>cnv = ucnv_openPackage("myapp", "myconverter", &err);</code>
+ * </p>
+ *
+ * @param packageName name of the package (equivalent to 'path' in udata_open() call)
+ * @param converterName name of the data item to be used, without suffix.
+ * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
+ * @return the created Unicode converter object, or <TT>NULL</TT> if an error occured
+ * @see udata_open
+ * @see ucnv_open
+ * @see ucnv_safeClone
+ * @see ucnv_close
+ * @stable ICU 2.2
+ */
+U_STABLE UConverter* U_EXPORT2
+ucnv_openPackage(const char *packageName, const char *converterName, UErrorCode *err);
+
+/**
+ * Thread safe cloning operation
+ * @param cnv converter to be cloned
+ * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated.
+ * If buffer is not large enough, new memory will be allocated.
+ * Clients can use the U_CNV_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations.
+ * @param pBufferSize pointer to size of allocated space.
+ * If *pBufferSize == 0, a sufficient size for use in cloning will
+ * be returned ('pre-flighting')
+ * If *pBufferSize is not enough for a stack-based safe clone,
+ * new memory will be allocated.
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary.
+ * @return pointer to the new clone
+ * @stable ICU 2.0
+ */
+U_STABLE UConverter * U_EXPORT2
+ucnv_safeClone(const UConverter *cnv,
+ void *stackBuffer,
+ int32_t *pBufferSize,
+ UErrorCode *status);
+
+/**
+ * \def U_CNV_SAFECLONE_BUFFERSIZE
+ * Definition of a buffer size that is designed to be large enough for
+ * converters to be cloned with ucnv_safeClone().
+ * @stable ICU 2.0
+ */
+#define U_CNV_SAFECLONE_BUFFERSIZE 1024
+
+/**
+ * Deletes the unicode converter and releases resources associated
+ * with just this instance.
+ * Does not free up shared converter tables.
+ *
+ * @param converter the converter object to be deleted
+ * @see ucnv_open
+ * @see ucnv_openU
+ * @see ucnv_openCCSID
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_close(UConverter * converter);
+
+/**
+ * Fills in the output parameter, subChars, with the substitution characters
+ * as multiple bytes.
+ *
+ * @param converter the Unicode converter
+ * @param subChars the subsitution characters
+ * @param len on input the capacity of subChars, on output the number
+ * of bytes copied to it
+ * @param err the outgoing error status code.
+ * If the substitution character array is too small, an
+ * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
+ * @see ucnv_setSubstChars
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getSubstChars(const UConverter *converter,
+ char *subChars,
+ int8_t *len,
+ UErrorCode *err);
+
+/**
+ * Sets the substitution chars when converting from unicode to a codepage. The
+ * substitution is specified as a string of 1-4 bytes, and may contain
+ * <TT>NULL</TT> byte.
+ * @param converter the Unicode converter
+ * @param subChars the substitution character byte sequence we want set
+ * @param len the number of bytes in subChars
+ * @param err the error status code. <TT>U_INDEX_OUTOFBOUNDS_ERROR </TT> if
+ * len is bigger than the maximum number of bytes allowed in subchars
+ * @see ucnv_getSubstChars
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_setSubstChars(UConverter *converter,
+ const char *subChars,
+ int8_t len,
+ UErrorCode *err);
+
+/**
+ * Fills in the output parameter, errBytes, with the error characters from the
+ * last failing conversion.
+ *
+ * @param converter the Unicode converter
+ * @param errBytes the codepage bytes which were in error
+ * @param len on input the capacity of errBytes, on output the number of
+ * bytes which were copied to it
+ * @param err the error status code.
+ * If the substitution character array is too small, an
+ * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getInvalidChars(const UConverter *converter,
+ char *errBytes,
+ int8_t *len,
+ UErrorCode *err);
+
+/**
+ * Fills in the output parameter, errChars, with the error characters from the
+ * last failing conversion.
+ *
+ * @param converter the Unicode converter
+ * @param errUChars the UChars which were in error
+ * @param len on input the capacity of errUChars, on output the number of
+ * UChars which were copied to it
+ * @param err the error status code.
+ * If the substitution character array is too small, an
+ * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getInvalidUChars(const UConverter *converter,
+ UChar *errUChars,
+ int8_t *len,
+ UErrorCode *err);
+
+/**
+ * Resets the state of a converter to the default state. This is used
+ * in the case of an error, to restart a conversion from a known default state.
+ * It will also empty the internal output buffers.
+ * @param converter the Unicode converter
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_reset(UConverter *converter);
+
+/**
+ * Resets the to-Unicode part of a converter state to the default state.
+ * This is used in the case of an error to restart a conversion to
+ * Unicode to a known default state. It will also empty the internal
+ * output buffers used for the conversion to Unicode codepoints.
+ * @param converter the Unicode converter
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_resetToUnicode(UConverter *converter);
+
+/**
+ * Resets the from-Unicode part of a converter state to the default state.
+ * This is used in the case of an error to restart a conversion from
+ * Unicode to a known default state. It will also empty the internal output
+ * buffers used for the conversion from Unicode codepoints.
+ * @param converter the Unicode converter
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_resetFromUnicode(UConverter *converter);
+
+/**
+ * Returns the maximum number of bytes that are output per UChar in conversion
+ * from Unicode using this converter.
+ * The returned number can be used with UCNV_GET_MAX_BYTES_FOR_STRING
+ * to calculate the size of a target buffer for conversion from Unicode.
+ *
+ * Note: Before ICU 2.8, this function did not return reliable numbers for
+ * some stateful converters (EBCDIC_STATEFUL, ISO-2022) and LMBCS.
+ *
+ * This number may not be the same as the maximum number of bytes per
+ * "conversion unit". In other words, it may not be the intuitively expected
+ * number of bytes per character that would be published for a charset,
+ * and may not fulfill any other purpose than the allocation of an output
+ * buffer of guaranteed sufficient size for a given input length and converter.
+ *
+ * Examples for special cases that are taken into account:
+ * - Supplementary code points may convert to more bytes than BMP code points.
+ * This function returns bytes per UChar (UTF-16 code unit), not per
+ * Unicode code point, for efficient buffer allocation.
+ * - State-shifting output (SI/SO, escapes, etc.) from stateful converters.
+ * - When m input UChars are converted to n output bytes, then the maximum m/n
+ * is taken into account.
+ *
+ * The number returned here does not take into account
+ * (see UCNV_GET_MAX_BYTES_FOR_STRING):
+ * - callbacks which output more than one charset character sequence per call,
+ * like escape callbacks
+ * - initial and final non-character bytes that are output by some converters
+ * (automatic BOMs, initial escape sequence, final SI, etc.)
+ *
+ * Examples for returned values:
+ * - SBCS charsets: 1
+ * - Shift-JIS: 2
+ * - UTF-16: 2 (2 per BMP, 4 per surrogate _pair_, BOM not counted)
+ * - UTF-8: 3 (3 per BMP, 4 per surrogate _pair_)
+ * - EBCDIC_STATEFUL (EBCDIC mixed SBCS/DBCS): 3 (SO + DBCS)
+ * - ISO-2022: 3 (always outputs UTF-8)
+ * - ISO-2022-JP: 6 (4-byte escape sequences + DBCS)
+ * - ISO-2022-CN: 8 (4-byte designator sequences + 2-byte SS2/SS3 + DBCS)
+ *
+ * @param converter The Unicode converter.
+ * @return The maximum number of bytes per UChar that are output by ucnv_fromUnicode(),
+ * to be used together with UCNV_GET_MAX_BYTES_FOR_STRING for buffer allocation.
+ *
+ * @see UCNV_GET_MAX_BYTES_FOR_STRING
+ * @see ucnv_getMinCharSize
+ * @stable ICU 2.0
+ */
+U_STABLE int8_t U_EXPORT2
+ucnv_getMaxCharSize(const UConverter *converter);
+
+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Calculates the size of a buffer for conversion from Unicode to a charset.
+ * The calculated size is guaranteed to be sufficient for this conversion.
+ *
+ * It takes into account initial and final non-character bytes that are output
+ * by some converters.
+ * It does not take into account callbacks which output more than one charset
+ * character sequence per call, like escape callbacks.
+ * The default (substitution) callback only outputs one charset character sequence.
+ *
+ * @param length Number of UChars to be converted.
+ * @param maxCharSize Return value from ucnv_getMaxCharSize() for the converter
+ * that will be used.
+ * @return Size of a buffer that will be large enough to hold the output bytes of
+ * converting length UChars with the converter that returned the maxCharSize.
+ *
+ * @see ucnv_getMaxCharSize
+ * @draft ICU 2.8
+ */
+#define UCNV_GET_MAX_BYTES_FOR_STRING(length, maxCharSize) \
+ (((int32_t)(length)+10)*(int32_t)(maxCharSize))
+
+#endif /*U_HIDE_DRAFT_API*/
+
+/**
+ * Returns the minimum byte length for characters in this codepage.
+ * This is usually either 1 or 2.
+ * @param converter the Unicode converter
+ * @return the minimum number of bytes allowed by this particular converter
+ * @see ucnv_getMaxCharSize
+ * @stable ICU 2.0
+ */
+U_STABLE int8_t U_EXPORT2
+ucnv_getMinCharSize(const UConverter *converter);
+
+/**
+ * Returns the display name of the converter passed in based on the Locale
+ * passed in. If the locale contains no display name, the internal ASCII
+ * name will be filled in.
+ *
+ * @param converter the Unicode converter.
+ * @param displayLocale is the specific Locale we want to localised for
+ * @param displayName user provided buffer to be filled in
+ * @param displayNameCapacity size of displayName Buffer
+ * @param err error status code
+ * @return displayNameLength number of UChar needed in displayName
+ * @see ucnv_getName
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_getDisplayName(const UConverter *converter,
+ const char *displayLocale,
+ UChar *displayName,
+ int32_t displayNameCapacity,
+ UErrorCode *err);
+
+/**
+ * Gets the internal, canonical name of the converter (zero-terminated).
+ * The lifetime of the returned string will be that of the converter
+ * passed to this function.
+ * @param converter the Unicode converter
+ * @param err UErrorCode status
+ * @return the internal name of the converter
+ * @see ucnv_getDisplayName
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2
+ucnv_getName(const UConverter *converter, UErrorCode *err);
+
+/**
+ * Gets a codepage number associated with the converter. This is not guaranteed
+ * to be the one used to create the converter. Some converters do not represent
+ * platform registered codepages and return zero for the codepage number.
+ * The error code fill-in parameter indicates if the codepage number
+ * is available.
+ * Does not check if the converter is <TT>NULL</TT> or if converter's data
+ * table is <TT>NULL</TT>.
+ *
+ * Important: The use of CCSIDs is not recommended because it is limited
+ * to only two platforms in principle and only one (UCNV_IBM) in the current
+ * ICU converter API.
+ * Also, CCSIDs are insufficient to identify IBM Unicode conversion tables precisely.
+ * For more details see ucnv_openCCSID().
+ *
+ * @param converter the Unicode converter
+ * @param err the error status code.
+ * @return If any error occurrs, -1 will be returned otherwise, the codepage number
+ * will be returned
+ * @see ucnv_openCCSID
+ * @see ucnv_getPlatform
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_getCCSID(const UConverter *converter,
+ UErrorCode *err);
+
+/**
+ * Gets a codepage platform associated with the converter. Currently,
+ * only <TT>UCNV_IBM</TT> will be returned.
+ * Does not test if the converter is <TT>NULL</TT> or if converter's data
+ * table is <TT>NULL</TT>.
+ * @param converter the Unicode converter
+ * @param err the error status code.
+ * @return The codepage platform
+ * @stable ICU 2.0
+ */
+U_STABLE UConverterPlatform U_EXPORT2
+ucnv_getPlatform(const UConverter *converter,
+ UErrorCode *err);
+
+/**
+ * Gets the type of the converter
+ * e.g. SBCS, MBCS, DBCS, UTF8, UTF16_BE, UTF16_LE, ISO_2022,
+ * EBCDIC_STATEFUL, LATIN_1
+ * @param converter a valid, opened converter
+ * @return the type of the converter
+ * @stable ICU 2.0
+ */
+U_STABLE UConverterType U_EXPORT2
+ucnv_getType(const UConverter * converter);
+
+/**
+ * Gets the "starter" (lead) bytes for converters of type MBCS.
+ * Will fill in an <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if converter passed in
+ * is not MBCS. Fills in an array of type UBool, with the value of the byte
+ * as offset to the array. For example, if (starters[0x20] == TRUE) at return,
+ * it means that the byte 0x20 is a starter byte in this converter.
+ * Context pointers are always owned by the caller.
+ *
+ * @param converter a valid, opened converter of type MBCS
+ * @param starters an array of size 256 to be filled in
+ * @param err error status, <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if the
+ * converter is not a type which can return starters.
+ * @see ucnv_getType
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getStarters(const UConverter* converter,
+ UBool starters[256],
+ UErrorCode* err);
+
+
+/**
+ * Selectors for Unicode sets that can be returned by ucnv_getUnicodeSet().
+ * @see ucnv_getUnicodeSet
+ * @stable ICU 2.6
+ */
+typedef enum UConverterUnicodeSet {
+ /** Select the set of roundtrippable Unicode code points. @stable ICU 2.6 */
+ UCNV_ROUNDTRIP_SET,
+ /** Number of UConverterUnicodeSet selectors. @stable ICU 2.6 */
+ UCNV_SET_COUNT
+} UConverterUnicodeSet;
+
+
+/**
+ * Returns the set of Unicode code points that can be converted by an ICU converter.
+ *
+ * The current implementation returns only one kind of set (UCNV_ROUNDTRIP_SET):
+ * The set of all Unicode code points that can be roundtrip-converted
+ * (converted without any data loss) with the converter.
+ * This set will not include code points that have fallback mappings
+ * or are only the result of reverse fallback mappings.
+ * See UTR #22 "Character Mapping Markup Language"
+ * at http://www.unicode.org/reports/tr22/
+ *
+ * This is useful for example for
+ * - checking that a string or document can be roundtrip-converted with a converter,
+ * without/before actually performing the conversion
+ * - testing if a converter can be used for text for typical text for a certain locale,
+ * by comparing its roundtrip set with the set of ExemplarCharacters from
+ * ICU's locale data or other sources
+ *
+ * In the future, there may be more UConverterUnicodeSet choices to select
+ * sets with different properties.
+ *
+ * @param cnv The converter for which a set is requested.
+ * @param setFillIn A valid USet *. It will be cleared by this function before
+ * the converter's specific set is filled into the USet.
+ * @param whichSet A UConverterUnicodeSet selector;
+ * currently UCNV_ROUNDTRIP_SET is the only supported value.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ *
+ * @see UConverterUnicodeSet
+ * @see uset_open
+ * @see uset_close
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+ucnv_getUnicodeSet(const UConverter *cnv,
+ USet *setFillIn,
+ UConverterUnicodeSet whichSet,
+ UErrorCode *pErrorCode);
+
+/**
+ * Gets the current calback function used by the converter when an illegal
+ * or invalid codepage sequence is found.
+ * Context pointers are always owned by the caller.
+ *
+ * @param converter the unicode converter
+ * @param action fillin: returns the callback function pointer
+ * @param context fillin: returns the callback's private void* context
+ * @see ucnv_setToUCallBack
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getToUCallBack (const UConverter * converter,
+ UConverterToUCallback *action,
+ const void **context);
+
+/**
+ * Gets the current callback function used by the converter when illegal
+ * or invalid Unicode sequence is found.
+ * Context pointers are always owned by the caller.
+ *
+ * @param converter the unicode converter
+ * @param action fillin: returns the callback function pointer
+ * @param context fillin: returns the callback's private void* context
+ * @see ucnv_setFromUCallBack
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getFromUCallBack (const UConverter * converter,
+ UConverterFromUCallback *action,
+ const void **context);
+
+/**
+ * Changes the callback function used by the converter when
+ * an illegal or invalid sequence is found.
+ * Context pointers are always owned by the caller.
+ * Predefined actions and contexts can be found in the ucnv_err.h header.
+ *
+ * @param converter the unicode converter
+ * @param newAction the new callback function
+ * @param newContext the new toUnicode callback context pointer. This can be NULL.
+ * @param oldAction fillin: returns the old callback function pointer. This can be NULL.
+ * @param oldContext fillin: returns the old callback's private void* context. This can be NULL.
+ * @param err The error code status
+ * @see ucnv_getToUCallBack
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_setToUCallBack (UConverter * converter,
+ UConverterToUCallback newAction,
+ const void* newContext,
+ UConverterToUCallback *oldAction,
+ const void** oldContext,
+ UErrorCode * err);
+
+/**
+ * Changes the current callback function used by the converter when
+ * an illegal or invalid sequence is found.
+ * Context pointers are always owned by the caller.
+ * Predefined actions and contexts can be found in the ucnv_err.h header.
+ *
+ * @param converter the unicode converter
+ * @param newAction the new callback function
+ * @param newContext the new fromUnicode callback context pointer. This can be NULL.
+ * @param oldAction fillin: returns the old callback function pointer. This can be NULL.
+ * @param oldContext fillin: returns the old callback's private void* context. This can be NULL.
+ * @param err The error code status
+ * @see ucnv_getFromUCallBack
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_setFromUCallBack (UConverter * converter,
+ UConverterFromUCallback newAction,
+ const void *newContext,
+ UConverterFromUCallback *oldAction,
+ const void **oldContext,
+ UErrorCode * err);
+
+/**
+ * Converts an array of unicode characters to an array of codepage
+ * characters. This function is optimized for converting a continuous
+ * stream of data in buffer-sized chunks, where the entire source and
+ * target does not fit in available buffers.
+ *
+ * The source pointer is an in/out parameter. It starts out pointing where the
+ * conversion is to begin, and ends up pointing after the last UChar consumed.
+ *
+ * Target similarly starts out pointer at the first available byte in the output
+ * buffer, and ends up pointing after the last byte written to the output.
+ *
+ * The converter always attempts to consume the entire source buffer, unless
+ * (1.) the target buffer is full, or (2.) a failing error is returned from the
+ * current callback function. When a successful error status has been
+ * returned, it means that all of the source buffer has been
+ * consumed. At that point, the caller should reset the source and
+ * sourceLimit pointers to point to the next chunk.
+ *
+ * At the end of the stream (flush==TRUE), the input is completely consumed
+ * when *source==sourceLimit and no error code is set.
+ * The converter object is then automatically reset by this function.
+ * (This means that a converter need not be reset explicitly between data
+ * streams if it finishes the previous stream without errors.)
+ *
+ * This is a <I>stateful</I> conversion. Additionally, even when all source data has
+ * been consumed, some data may be in the converters' internal state.
+ * Call this function repeatedly, updating the target pointers with
+ * the next empty chunk of target in case of a
+ * <TT>U_BUFFER_OVERFLOW_ERROR</TT>, and updating the source pointers
+ * with the next chunk of source when a successful error status is
+ * returned, until there are no more chunks of source data.
+ * @param converter the Unicode converter
+ * @param target I/O parameter. Input : Points to the beginning of the buffer to copy
+ * codepage characters to. Output : points to after the last codepage character copied
+ * to <TT>target</TT>.
+ * @param targetLimit the pointer just after last of the <TT>target</TT> buffer
+ * @param source I/O parameter, pointer to pointer to the source Unicode character buffer.
+ * @param sourceLimit the pointer just after the last of the source buffer
+ * @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
+ * of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer
+ * e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT>
+ * For output data carried across calls, and other data without a specific source character
+ * (such as from escape sequences or callbacks) -1 will be placed for offsets.
+ * @param flush set to <TT>TRUE</TT> if the current source buffer is the last available
+ * chunk of the source, <TT>FALSE</TT> otherwise. Note that if a failing status is returned,
+ * this function may have to be called multiple times with flush set to <TT>TRUE</TT> until
+ * the source buffer is consumed.
+ * @param err the error status. <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be set if the
+ * converter is <TT>NULL</TT>.
+ * <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is
+ * still data to be written to the target.
+ * @see ucnv_fromUChars
+ * @see ucnv_convert
+ * @see ucnv_getMinCharSize
+ * @see ucnv_setToUCallBack
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_fromUnicode (UConverter * converter,
+ char **target,
+ const char *targetLimit,
+ const UChar ** source,
+ const UChar * sourceLimit,
+ int32_t* offsets,
+ UBool flush,
+ UErrorCode * err);
+
+/**
+ * Converts a buffer of codepage bytes into an array of unicode UChars
+ * characters. This function is optimized for converting a continuous
+ * stream of data in buffer-sized chunks, where the entire source and
+ * target does not fit in available buffers.
+ *
+ * The source pointer is an in/out parameter. It starts out pointing where the
+ * conversion is to begin, and ends up pointing after the last byte of source consumed.
+ *
+ * Target similarly starts out pointer at the first available UChar in the output
+ * buffer, and ends up pointing after the last UChar written to the output.
+ * It does NOT necessarily keep UChar sequences together.
+ *
+ * The converter always attempts to consume the entire source buffer, unless
+ * (1.) the target buffer is full, or (2.) a failing error is returned from the
+ * current callback function. When a successful error status has been
+ * returned, it means that all of the source buffer has been
+ * consumed. At that point, the caller should reset the source and
+ * sourceLimit pointers to point to the next chunk.
+ *
+ * At the end of the stream (flush==TRUE), the input is completely consumed
+ * when *source==sourceLimit and no error code is set
+ * The converter object is then automatically reset by this function.
+ * (This means that a converter need not be reset explicitly between data
+ * streams if it finishes the previous stream without errors.)
+ *
+ * This is a <I>stateful</I> conversion. Additionally, even when all source data has
+ * been consumed, some data may be in the converters' internal state.
+ * Call this function repeatedly, updating the target pointers with
+ * the next empty chunk of target in case of a
+ * <TT>U_BUFFER_OVERFLOW_ERROR</TT>, and updating the source pointers
+ * with the next chunk of source when a successful error status is
+ * returned, until there are no more chunks of source data.
+ * @param converter the Unicode converter
+ * @param target I/O parameter. Input : Points to the beginning of the buffer to copy
+ * UChars into. Output : points to after the last UChar copied.
+ * @param targetLimit the pointer just after the end of the <TT>target</TT> buffer
+ * @param source I/O parameter, pointer to pointer to the source codepage buffer.
+ * @param sourceLimit the pointer to the byte after the end of the source buffer
+ * @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
+ * of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer
+ * e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT>
+ * For output data carried across calls, and other data without a specific source character
+ * (such as from escape sequences or callbacks) -1 will be placed for offsets.
+ * @param flush set to <TT>TRUE</TT> if the current source buffer is the last available
+ * chunk of the source, <TT>FALSE</TT> otherwise. Note that if a failing status is returned,
+ * this function may have to be called multiple times with flush set to <TT>TRUE</TT> until
+ * the source buffer is consumed.
+ * @param err the error status. <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be set if the
+ * converter is <TT>NULL</TT>.
+ * <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is
+ * still data to be written to the target.
+ * @see ucnv_fromUChars
+ * @see ucnv_convert
+ * @see ucnv_getMinCharSize
+ * @see ucnv_setFromUCallBack
+ * @see ucnv_getNextUChar
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_toUnicode(UConverter *converter,
+ UChar **target,
+ const UChar *targetLimit,
+ const char **source,
+ const char *sourceLimit,
+ int32_t *offsets,
+ UBool flush,
+ UErrorCode *err);
+
+/**
+ * Convert the Unicode string into a codepage string using an existing UConverter.
+ * The output string is NUL-terminated if possible.
+ *
+ * This function is a more convenient but less powerful version of ucnv_fromUnicode().
+ * It is only useful for whole strings, not for streaming conversion.
+ *
+ * The maximum output buffer capacity required (barring output from callbacks) will be
+ * UCNV_GET_MAX_BYTES_FOR_STRING(srcLength, ucnv_getMaxCharSize(cnv)).
+ *
+ * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called)
+ * @param src the input Unicode string
+ * @param srcLength the input string length, or -1 if NUL-terminated
+ * @param dest destination string buffer, can be NULL if destCapacity==0
+ * @param destCapacity the number of chars available at dest
+ * @param pErrorCode normal ICU error code;
+ * common error codes that may be set by this function include
+ * U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING,
+ * U_ILLEGAL_ARGUMENT_ERROR, and conversion errors
+ * @return the length of the output string, not counting the terminating NUL;
+ * if the length is greater than destCapacity, then the string will not fit
+ * and a buffer of the indicated length would need to be passed in
+ * @see ucnv_fromUnicode
+ * @see ucnv_convert
+ * @see UCNV_GET_MAX_BYTES_FOR_STRING
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_fromUChars(UConverter *cnv,
+ char *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert the codepage string into a Unicode string using an existing UConverter.
+ * The output string is NUL-terminated if possible.
+ *
+ * This function is a more convenient but less powerful version of ucnv_toUnicode().
+ * It is only useful for whole strings, not for streaming conversion.
+ *
+ * The maximum output buffer capacity required (barring output from callbacks) will be
+ * 2*srcLength (each char may be converted into a surrogate pair).
+ *
+ * @param cnv the converter object to be used (ucnv_resetToUnicode() will be called)
+ * @param src the input codepage string
+ * @param srcLength the input string length, or -1 if NUL-terminated
+ * @param dest destination string buffer, can be NULL if destCapacity==0
+ * @param destCapacity the number of UChars available at dest
+ * @param pErrorCode normal ICU error code;
+ * common error codes that may be set by this function include
+ * U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING,
+ * U_ILLEGAL_ARGUMENT_ERROR, and conversion errors
+ * @return the length of the output string, not counting the terminating NUL;
+ * if the length is greater than destCapacity, then the string will not fit
+ * and a buffer of the indicated length would need to be passed in
+ * @see ucnv_toUnicode
+ * @see ucnv_convert
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_toUChars(UConverter *cnv,
+ UChar *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a codepage buffer into Unicode one character at a time.
+ * The input is completely consumed when the U_INDEX_OUTOFBOUNDS_ERROR is set.
+ *
+ * Advantage compared to ucnv_toUnicode() or ucnv_toUChars():
+ * - Faster for small amounts of data, for most converters, e.g.,
+ * US-ASCII, ISO-8859-1, UTF-8/16/32, and most "normal" charsets.
+ * (For complex converters, e.g., SCSU, UTF-7 and ISO 2022 variants,
+ * it uses ucnv_toUnicode() internally.)
+ * - Convenient.
+ *
+ * Limitations compared to ucnv_toUnicode():
+ * - Always assumes flush=TRUE.
+ * This makes ucnv_getNextUChar() unsuitable for "streaming" conversion,
+ * that is, for where the input is supplied in multiple buffers,
+ * because ucnv_getNextUChar() will assume the end of the input at the end
+ * of the first buffer.
+ * - Does not provide offset output.
+ *
+ * It is possible to "mix" ucnv_getNextUChar() and ucnv_toUnicode() because
+ * ucnv_getNextUChar() uses the current state of the converter
+ * (unlike ucnv_toUChars() which always resets first).
+ * However, if ucnv_getNextUChar() is called after ucnv_toUnicode()
+ * stopped in the middle of a character sequence (with flush=FALSE),
+ * then ucnv_getNextUChar() will always use the slower ucnv_toUnicode()
+ * internally until the next character boundary.
+ * (This is new in ICU 2.6. In earlier releases, ucnv_getNextUChar() had to
+ * start at a character boundary.)
+ *
+ * Instead of using ucnv_getNextUChar(), it is recommended
+ * to convert using ucnv_toUnicode() or ucnv_toUChars()
+ * and then iterate over the text using U16_NEXT() or a UCharIterator (uiter.h)
+ * or a C++ CharacterIterator or similar.
+ * This allows streaming conversion and offset output, for example.
+ *
+ * <p>Handling of surrogate pairs and supplementary-plane code points:<br>
+ * There are two different kinds of codepages that provide mappings for surrogate characters:
+ * <ul>
+ * <li>Codepages like UTF-8, UTF-32, and GB 18030 provide direct representations for Unicode
+ * code points U+10000-U+10ffff as well as for single surrogates U+d800-U+dfff.
+ * Each valid sequence will result in exactly one returned code point.
+ * If a sequence results in a single surrogate, then that will be returned
+ * by itself, even if a neighboring sequence encodes the matching surrogate.</li>
+ * <li>Codepages like SCSU and LMBCS (and UTF-16) provide direct representations only for BMP code points
+ * including surrogates. Code points in supplementary planes are represented with
+ * two sequences, each encoding a surrogate.
+ * For these codepages, matching pairs of surrogates will be combined into single
+ * code points for returning from this function.
+ * (Note that SCSU is actually a mix of these codepage types.)</li>
+ * </ul></p>
+ *
+ * @param converter an open UConverter
+ * @param source the address of a pointer to the codepage buffer, will be
+ * updated to point after the bytes consumed in the conversion call.
+ * @param sourceLimit points to the end of the input buffer
+ * @param err fills in error status (see ucnv_toUnicode)
+ * <code>U_INDEX_OUTOFBOUNDS_ERROR</code> will be set if the input
+ * is empty or does not convert to any output (e.g.: pure state-change
+ * codes SI/SO, escape sequences for ISO 2022,
+ * or if the callback did not output anything, ...).
+ * This function will not set a <code>U_BUFFER_OVERFLOW_ERROR</code> because
+ * the "buffer" is the return code. However, there might be subsequent output
+ * stored in the converter object
+ * that will be returned in following calls to this function.
+ * @return a UChar32 resulting from the partial conversion of source
+ * @see ucnv_toUnicode
+ * @see ucnv_toUChars
+ * @see ucnv_convert
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+ucnv_getNextUChar(UConverter * converter,
+ const char **source,
+ const char * sourceLimit,
+ UErrorCode * err);
+
+/**
+ * Convert from one external charset to another using two existing UConverters.
+ * Internally, two conversions - ucnv_toUnicode() and ucnv_fromUnicode() -
+ * are used, "pivoting" through 16-bit Unicode.
+ *
+ * There is a similar function, ucnv_convert(),
+ * which has the following limitations:
+ * - it takes charset names, not converter objects, so that
+ * - two converters are opened for each call
+ * - only single-string conversion is possible, not streaming operation
+ * - it does not provide enough information to find out,
+ * in case of failure, whether the toUnicode or
+ * the fromUnicode conversion failed
+ *
+ * By contrast, ucnv_convertEx()
+ * - takes UConverter parameters instead of charset names
+ * - fully exposes the pivot buffer for complete error handling
+ *
+ * ucnv_convertEx() also provides further convenience:
+ * - an option to reset the converters at the beginning
+ * (if reset==TRUE, see parameters;
+ * also sets *pivotTarget=*pivotSource=pivotStart)
+ * - allow NUL-terminated input
+ * (only a single NUL byte, will not work for charsets with multi-byte NULs)
+ * (if sourceLimit==NULL, see parameters)
+ * - terminate with a NUL on output
+ * (only a single NUL byte, not useful for charsets with multi-byte NULs),
+ * or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills
+ * the target buffer
+ * - the pivot buffer can be provided internally;
+ * in this case, the caller will not be able to get details about where an
+ * error occurred
+ * (if pivotStart==NULL, see below)
+ *
+ * The function returns when one of the following is true:
+ * - the entire source text has been converted successfully to the target buffer
+ * - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR)
+ * - a conversion error occurred
+ * (other U_FAILURE(), see description of pErrorCode)
+ *
+ * Limitation compared to the direct use of
+ * ucnv_fromUnicode() and ucnv_toUnicode():
+ * ucnv_convertEx() does not provide offset information.
+ *
+ * Limitation compared to ucnv_fromUChars() and ucnv_toUChars():
+ * ucnv_convertEx() does not support preflighting directly.
+ *
+ * Sample code for converting a single string from
+ * one external charset to UTF-8, ignoring the location of errors:
+ *
+ * \code
+ * int32_t
+ * myToUTF8(UConverter *cnv,
+ * const char *s, int32_t length,
+ * char *u8, int32_t capacity,
+ * UErrorCode *pErrorCode) {
+ * UConverter *utf8Cnv;
+ * char *target;
+ *
+ * if(U_FAILURE(*pErrorCode)) {
+ * return 0;
+ * }
+ *
+ * utf8Cnv=myGetCachedUTF8Converter(pErrorCode);
+ * if(U_FAILURE(*pErrorCode)) {
+ * return 0;
+ * }
+ *
+ * target=u8;
+ * ucnv_convertEx(cnv, utf8Cnv,
+ * &target, u8+capacity,
+ * &s, length>=0 ? s+length : NULL,
+ * NULL, NULL, NULL, NULL,
+ * TRUE, TRUE,
+ * pErrorCode);
+ *
+ * myReleaseCachedUTF8Converter(utf8Cnv);
+ *
+ * // return the output string length, but without preflighting
+ * return (int32_t)(target-u8);
+ * }
+ * \endcode
+ *
+ * @param targetCnv Output converter, used to convert from the UTF-16 pivot
+ * to the target using ucnv_fromUnicode().
+ * @param sourceCnv Input converter, used to convert from the source to
+ * the UTF-16 pivot using ucnv_toUnicode().
+ * @param target I/O parameter, same as for ucnv_fromUChars().
+ * Input: *target points to the beginning of the target buffer.
+ * Output: *target points to the first unit after the last char written.
+ * @param targetLimit Pointer to the first unit after the target buffer.
+ * @param source I/O parameter, same as for ucnv_toUChars().
+ * Input: *source points to the beginning of the source buffer.
+ * Output: *source points to the first unit after the last char read.
+ * @param sourceLimit Pointer to the first unit after the source buffer.
+ * @param pivotStart Pointer to the UTF-16 pivot buffer. If pivotStart==NULL,
+ * then an internal buffer is used and the other pivot
+ * arguments are ignored and can be NULL as well.
+ * @param pivotSource I/O parameter, same as source in ucnv_fromUChars() for
+ * conversion from the pivot buffer to the target buffer.
+ * @param pivotTarget I/O parameter, same as target in ucnv_toUChars() for
+ * conversion from the source buffer to the pivot buffer.
+ * It must be pivotStart<=*pivotSource<=*pivotTarget<=pivotLimit
+ * and pivotStart<pivotLimit (unless pivotStart==NULL).
+ * @param pivotLimit Pointer to the first unit after the pivot buffer.
+ * @param reset If TRUE, then ucnv_resetToUnicode(sourceCnv) and
+ * ucnv_resetFromUnicode(targetCnv) are called, and the
+ * pivot pointers are reset (*pivotTarget=*pivotSource=pivotStart).
+ * @param flush If true, indicates the end of the input.
+ * Passed directly to ucnv_toUnicode(), and carried over to
+ * ucnv_fromUnicode() when the source is empty as well.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * U_BUFFER_OVERFLOW_ERROR always refers to the target buffer
+ * because overflows into the pivot buffer are handled internally.
+ * Other conversion errors are from the source-to-pivot
+ * conversion if *pivotSource==pivotStart, otherwise from
+ * the pivot-to-target conversion.
+ *
+ * @see ucnv_convert
+ * @see ucnv_fromAlgorithmic
+ * @see ucnv_toAlgorithmic
+ * @see ucnv_fromUnicode
+ * @see ucnv_toUnicode
+ * @see ucnv_fromUChars
+ * @see ucnv_toUChars
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
+ char **target, const char *targetLimit,
+ const char **source, const char *sourceLimit,
+ UChar *pivotStart, UChar **pivotSource,
+ UChar **pivotTarget, const UChar *pivotLimit,
+ UBool reset, UBool flush,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert from one external charset to another.
+ * Internally, two converters are opened according to the name arguments,
+ * then the text is converted to and from the 16-bit Unicode "pivot"
+ * using ucnv_convertEx(), then the converters are closed again.
+ *
+ * This is a convenience function, not an efficient way to convert a lot of text:
+ * ucnv_convert()
+ * - takes charset names, not converter objects, so that
+ * - two converters are opened for each call
+ * - only single-string conversion is possible, not streaming operation
+ * - does not provide enough information to find out,
+ * in case of failure, whether the toUnicode or
+ * the fromUnicode conversion failed
+ * - allows NUL-terminated input
+ * (only a single NUL byte, will not work for charsets with multi-byte NULs)
+ * (if sourceLength==-1, see parameters)
+ * - terminate with a NUL on output
+ * (only a single NUL byte, not useful for charsets with multi-byte NULs),
+ * or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills
+ * the target buffer
+ * - a pivot buffer is provided internally
+ *
+ * The function returns when one of the following is true:
+ * - the entire source text has been converted successfully to the target buffer
+ * and either the target buffer is terminated with a single NUL byte
+ * or the error code is set to U_STRING_NOT_TERMINATED_WARNING
+ * - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR)
+ * and the full output string length is returned ("preflighting")
+ * - a conversion error occurred
+ * (other U_FAILURE(), see description of pErrorCode)
+ *
+ * @param toConverterName The name of the converter that is used to convert
+ * from the UTF-16 pivot buffer to the target.
+ * @param fromConverterName The name of the converter that is used to convert
+ * from the source to the UTF-16 pivot buffer.
+ * @param target Pointer to the output buffer.
+ * @param targetCapacity Capacity of the target, in bytes.
+ * @param source Pointer to the input buffer.
+ * @param sourceLength Length of the input text, in bytes, or -1 for NUL-terminated input.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity
+ * and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see ucnv_convertEx
+ * @see ucnv_fromAlgorithmic
+ * @see ucnv_toAlgorithmic
+ * @see ucnv_fromUnicode
+ * @see ucnv_toUnicode
+ * @see ucnv_fromUChars
+ * @see ucnv_toUChars
+ * @see ucnv_getNextUChar
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_convert(const char *toConverterName,
+ const char *fromConverterName,
+ char *target,
+ int32_t targetCapacity,
+ const char *source,
+ int32_t sourceLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert from one external charset to another.
+ * Internally, the text is converted to and from the 16-bit Unicode "pivot"
+ * using ucnv_convertEx(). ucnv_toAlgorithmic() works exactly like ucnv_convert()
+ * except that the two converters need not be looked up and opened completely.
+ *
+ * The source-to-pivot conversion uses the cnv converter parameter.
+ * The pivot-to-target conversion uses a purely algorithmic converter
+ * according to the specified type, e.g., UCNV_UTF8 for a UTF-8 converter.
+ *
+ * Internally, the algorithmic converter is opened and closed for each
+ * function call, which is more efficient than using the public ucnv_open()
+ * but somewhat less efficient than only resetting an existing converter
+ * and using ucnv_convertEx().
+ *
+ * This function is more convenient than ucnv_convertEx() for single-string
+ * conversions, especially when "preflighting" is desired (returning the length
+ * of the complete output even if it does not fit into the target buffer;
+ * see the User Guide Strings chapter). See ucnv_convert() for details.
+ *
+ * @param algorithmicType UConverterType constant identifying the desired target
+ * charset as a purely algorithmic converter.
+ * Those are converters for Unicode charsets like
+ * UTF-8, BOCU-1, SCSU, UTF-7, IMAP-mailbox-name, etc.,
+ * as well as US-ASCII and ISO-8859-1.
+ * @param cnv The converter that is used to convert
+ * from the source to the UTF-16 pivot buffer.
+ * @param target Pointer to the output buffer.
+ * @param targetCapacity Capacity of the target, in bytes.
+ * @param source Pointer to the input buffer.
+ * @param sourceLength Length of the input text, in bytes
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity
+ * and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see ucnv_fromAlgorithmic
+ * @see ucnv_convert
+ * @see ucnv_convertEx
+ * @see ucnv_fromUnicode
+ * @see ucnv_toUnicode
+ * @see ucnv_fromUChars
+ * @see ucnv_toUChars
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_toAlgorithmic(UConverterType algorithmicType,
+ UConverter *cnv,
+ char *target, int32_t targetCapacity,
+ const char *source, int32_t sourceLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert from one external charset to another.
+ * Internally, the text is converted to and from the 16-bit Unicode "pivot"
+ * using ucnv_convertEx(). ucnv_fromAlgorithmic() works exactly like ucnv_convert()
+ * except that the two converters need not be looked up and opened completely.
+ *
+ * The source-to-pivot conversion uses a purely algorithmic converter
+ * according to the specified type, e.g., UCNV_UTF8 for a UTF-8 converter.
+ * The pivot-to-target conversion uses the cnv converter parameter.
+ *
+ * Internally, the algorithmic converter is opened and closed for each
+ * function call, which is more efficient than using the public ucnv_open()
+ * but somewhat less efficient than only resetting an existing converter
+ * and using ucnv_convertEx().
+ *
+ * This function is more convenient than ucnv_convertEx() for single-string
+ * conversions, especially when "preflighting" is desired (returning the length
+ * of the complete output even if it does not fit into the target buffer;
+ * see the User Guide Strings chapter). See ucnv_convert() for details.
+ *
+ * @param cnv The converter that is used to convert
+ * from the UTF-16 pivot buffer to the target.
+ * @param algorithmicType UConverterType constant identifying the desired source
+ * charset as a purely algorithmic converter.
+ * Those are converters for Unicode charsets like
+ * UTF-8, BOCU-1, SCSU, UTF-7, IMAP-mailbox-name, etc.,
+ * as well as US-ASCII and ISO-8859-1.
+ * @param target Pointer to the output buffer.
+ * @param targetCapacity Capacity of the target, in bytes.
+ * @param source Pointer to the input buffer.
+ * @param sourceLength Length of the input text, in bytes
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity
+ * and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see ucnv_fromAlgorithmic
+ * @see ucnv_convert
+ * @see ucnv_convertEx
+ * @see ucnv_fromUnicode
+ * @see ucnv_toUnicode
+ * @see ucnv_fromUChars
+ * @see ucnv_toUChars
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_fromAlgorithmic(UConverter *cnv,
+ UConverterType algorithmicType,
+ char *target, int32_t targetCapacity,
+ const char *source, int32_t sourceLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Frees up memory occupied by unused, cached converter shared data.
+ *
+ * @return the number of cached converters successfully deleted
+ * @see ucnv_close
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_flushCache(void);
+
+/**
+ * Returns the number of available converters, as per the alias file.
+ *
+ * @return the number of available converters
+ * @see ucnv_getAvailableName
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_countAvailable(void);
+
+/**
+ * Gets the canonical converter name of the specified converter from a list of
+ * all available converters contaied in the alias file. All converters
+ * in this list can be opened.
+ *
+ * @param n the index to a converter available on the system (in the range <TT>[0..ucnv_countAvaiable()]</TT>)
+ * @return a pointer a string (library owned), or <TT>NULL</TT> if the index is out of bounds.
+ * @see ucnv_countAvailable
+ * @stable ICU 2.0
+ */
+U_STABLE const char* U_EXPORT2
+ucnv_getAvailableName(int32_t n);
+
+/**
+ * Returns a UEnumeration to enumerate all of the canonical converter
+ * names, as per the alias file, regardless of the ability to open each
+ * converter.
+ *
+ * @return A UEnumeration object for getting all the recognized canonical
+ * converter names.
+ * @see ucnv_getAvailableName
+ * @see uenum_close
+ * @see uenum_next
+ * @stable ICU 2.4
+ */
+U_STABLE UEnumeration * U_EXPORT2
+ucnv_openAllNames(UErrorCode *pErrorCode);
+
+/**
+ * Gives the number of aliases for a given converter or alias name.
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ * This method only enumerates the listed entries in the alias file.
+ * @param alias alias name
+ * @param pErrorCode error status
+ * @return number of names on alias list for given alias
+ * @stable ICU 2.0
+ */
+U_STABLE uint16_t U_EXPORT2
+ucnv_countAliases(const char *alias, UErrorCode *pErrorCode);
+
+/**
+ * Gives the name of the alias at given index of alias list.
+ * This method only enumerates the listed entries in the alias file.
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ * @param alias alias name
+ * @param n index in alias list
+ * @param pErrorCode result of operation
+ * @return returns the name of the alias at given index
+ * @see ucnv_countAliases
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2
+ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode);
+
+/**
+ * Fill-up the list of alias names for the given alias.
+ * This method only enumerates the listed entries in the alias file.
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ * @param alias alias name
+ * @param aliases fill-in list, aliases is a pointer to an array of
+ * <code>ucnv_countAliases()</code> string-pointers
+ * (<code>const char *</code>) that will be filled in.
+ * The strings themselves are owned by the library.
+ * @param pErrorCode result of operation
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode);
+
+/**
+ * Return a new UEnumeration object for enumerating all the
+ * alias names for a given converter that are recognized by a standard.
+ * This method only enumerates the listed entries in the alias file.
+ * The convrtrs.txt file can be modified to change the results of
+ * this function.
+ * The first result in this list is the same result given by
+ * <code>ucnv_getStandardName</code>, which is the default alias for
+ * the specified standard name. The returned object must be closed with
+ * <code>uenum_close</code> when you are done with the object.
+ *
+ * @param convName original converter name
+ * @param standard name of the standard governing the names; MIME and IANA
+ * are such standards
+ * @param pErrorCode The error code
+ * @return A UEnumeration object for getting all aliases that are recognized
+ * by a standard. If any of the parameters are invalid, NULL
+ * is returned.
+ * @see ucnv_getStandardName
+ * @see uenum_close
+ * @see uenum_next
+ * @stable ICU 2.2
+ */
+U_STABLE UEnumeration * U_EXPORT2
+ucnv_openStandardNames(const char *convName,
+ const char *standard,
+ UErrorCode *pErrorCode);
+
+/**
+ * Gives the number of standards associated to converter names.
+ * @return number of standards
+ * @stable ICU 2.0
+ */
+U_STABLE uint16_t U_EXPORT2
+ucnv_countStandards(void);
+
+/**
+ * Gives the name of the standard at given index of standard list.
+ * @param n index in standard list
+ * @param pErrorCode result of operation
+ * @return returns the name of the standard at given index. Owned by the library.
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2
+ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode);
+
+/**
+ * Returns a standard name for a given converter name.
+ * <p>
+ * Example alias table:<br>
+ * conv alias1 { STANDARD1 } alias2 { STANDARD1* }
+ * <p>
+ * Result of ucnv_getStandardName("conv", "STANDARD1") from example
+ * alias table:<br>
+ * <b>"alias2"</b>
+ *
+ * @param name original converter name
+ * @param standard name of the standard governing the names; MIME and IANA
+ * are such standards
+ * @param pErrorCode result of operation
+ * @return returns the standard converter name;
+ * if a standard converter name cannot be determined,
+ * then <code>NULL</code> is returned. Owned by the library.
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2
+ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode);
+
+/**
+ * This function will return the internal canonical converter name of the
+ * tagged alias. This is the opposite of ucnv_openStandardNames, which
+ * returns the tagged alias given the canonical name.
+ * <p>
+ * Example alias table:<br>
+ * conv alias1 { STANDARD1 } alias2 { STANDARD1* }
+ * <p>
+ * Result of ucnv_getStandardName("alias1", "STANDARD1") from example
+ * alias table:<br>
+ * <b>"conv"</b>
+ *
+ * @return returns the canonical converter name;
+ * if a standard or alias name cannot be determined,
+ * then <code>NULL</code> is returned. The returned string is
+ * owned by the library.
+ * @see ucnv_getStandardName
+ * @stable ICU 2.4
+ */
+U_STABLE const char * U_EXPORT2
+ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode);
+
+/**
+ * returns the current default converter name.
+ *
+ * @return returns the current default converter name;
+ * if a default converter name cannot be determined,
+ * then <code>NULL</code> is returned.
+ * Storage owned by the library
+ * @see ucnv_setDefaultName
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2
+ucnv_getDefaultName(void);
+
+/**
+ * sets the current default converter name. Caller must own the storage for 'name'
+ * and preserve it indefinitely.
+ * @param name the converter name to be the default (must exist).
+ * @see ucnv_getDefaultName
+ * @system SYSTEM API
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_setDefaultName(const char *name);
+
+/**
+ * Fixes the backslash character mismapping. For example, in SJIS, the backslash
+ * character in the ASCII portion is also used to represent the yen currency sign.
+ * When mapping from Unicode character 0x005C, it's unclear whether to map the
+ * character back to yen or backslash in SJIS. This function will take the input
+ * buffer and replace all the yen sign characters with backslash. This is necessary
+ * when the user tries to open a file with the input buffer on Windows.
+ * This function will test the converter to see whether such mapping is
+ * required. You can sometimes avoid using this function by using the correct version
+ * of Shift-JIS.
+ *
+ * @param cnv The converter representing the target codepage.
+ * @param source the input buffer to be fixed
+ * @param sourceLen the length of the input buffer
+ * @see ucnv_isAmbiguous
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_fixFileSeparator(const UConverter *cnv, UChar *source, int32_t sourceLen);
+
+/**
+ * Determines if the converter contains ambiguous mappings of the same
+ * character or not.
+ * @param cnv the converter to be tested
+ * @return TRUE if the converter contains ambiguous mapping of the same
+ * character, FALSE otherwise.
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+ucnv_isAmbiguous(const UConverter *cnv);
+
+/**
+ * Sets the converter to use fallback mapping or not.
+ * @param cnv The converter to set the fallback mapping usage on.
+ * @param usesFallback TRUE if the user wants the converter to take advantage of the fallback
+ * mapping, FALSE otherwise.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_setFallback(UConverter *cnv, UBool usesFallback);
+
+/**
+ * Determines if the converter uses fallback mappings or not.
+ * @param cnv The converter to be tested
+ * @return TRUE if the converter uses fallback, FALSE otherwise.
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+ucnv_usesFallback(const UConverter *cnv);
+
+/**
+ * Detects Unicode signature byte sequences at the start of the byte stream
+ * and returns the charset name of the indicated Unicode charset.
+ * NULL is returned when no Unicode signature is recognized.
+ * The number of bytes in the signature is output as well.
+ *
+ * The caller can ucnv_open() a converter using the charset name.
+ * The first code unit (UChar) from the start of the stream will be U+FEFF
+ * (the Unicode BOM/signature character) and can usually be ignored.
+ *
+ * For most Unicode charsets it is also possible to ignore the indicated
+ * number of initial stream bytes and start converting after them.
+ * However, there are stateful Unicode charsets (UTF-7 and BOCU-1) for which
+ * this will not work. Therefore, it is best to ignore the first output UChar
+ * instead of the input signature bytes.
+ * <p>
+ * Usage:
+ * @code
+ * UErrorCode err = U_ZERO_ERROR;
+ * char input[] = { '\xEF','\xBB', '\xBF','\x41','\x42','\x43' };
+ * int32_t signatureLength = 0;
+ * char *encoding = ucnv_detectUnicodeSignatures(input,sizeof(input),&signatureLength,&err);
+ * UConverter *conv = NULL;
+ * UChar output[100];
+ * UChar *target = output, *out;
+ * char *source = input;
+ * if(encoding!=NULL && U_SUCCESS(err)){
+ * // should signature be discarded ?
+ * conv = ucnv_open(encoding, &err);
+ * // do the conversion
+ * ucnv_toUnicode(conv,
+ * target, output + sizeof(output)/U_SIZEOF_UCHAR,
+ * source, input + sizeof(input),
+ * NULL, TRUE, &err);
+ * out = output;
+ * if (discardSignature){
+ * ++out; // ignore initial U+FEFF
+ * }
+ * while(out != target) {
+ * printf("%04x ", *out++);
+ * }
+ * puts("");
+ * }
+ *
+ * @endcode
+ *
+ * @param source The source string in which the signature should be detected.
+ * @param sourceLength Length of the input string, or -1 if terminated with a NUL byte.
+ * @param signatureLength A pointer to int32_t to receive the number of bytes that make up the signature
+ * of the detected UTF. 0 if not detected.
+ * Can be a NULL pointer.
+ * @param pErrorCode A pointer to receive information about any errors that may occur during detection.
+ * Must be a valid pointer to an error code value, which must not indicate a failure
+ * before the function call.
+ * @return The name of the encoding detected. NULL if encoding is not detected.
+ * @stable ICU 2.4
+ */
+U_STABLE const char* U_EXPORT2
+ucnv_detectUnicodeSignature(const char* source,
+ int32_t sourceLength,
+ int32_t *signatureLength,
+ UErrorCode *pErrorCode);
+
+#endif
+
+#endif
+/*_UCNV*/
diff --git a/Source/WebCore/icu/unicode/ucnv_cb.h b/Source/WebCore/icu/unicode/ucnv_cb.h
new file mode 100644
index 0000000..f0e67ba
--- /dev/null
+++ b/Source/WebCore/icu/unicode/ucnv_cb.h
@@ -0,0 +1,162 @@
+/*
+**********************************************************************
+* Copyright (C) 2000-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+ * ucnv_cb.h:
+ * External APIs for the ICU's codeset conversion library
+ * Helena Shih
+ *
+ * Modification History:
+ *
+ * Date Name Description
+ */
+
+/**
+ * \file
+ * \brief C UConverter functions to aid the writers of callbacks
+ *
+ * <h2> Callback API for UConverter </h2>
+ *
+ * These functions are provided here for the convenience of the callback
+ * writer. If you are just looking for callback functions to use, please
+ * see ucnv_err.h. DO NOT call these functions directly when you are
+ * working with converters, unless your code has been called as a callback
+ * via ucnv_setFromUCallback or ucnv_setToUCallback !!
+ *
+ * A note about error codes and overflow. Unlike other ICU functions,
+ * these functions do not expect the error status to be U_ZERO_ERROR.
+ * Callbacks must be much more careful about their error codes.
+ * The error codes used here are in/out parameters, which should be passed
+ * back in the callback's error parameter.
+ *
+ * For example, if you call ucnv_cbfromUWriteBytes to write data out
+ * to the output codepage, it may return U_BUFFER_OVERFLOW_ERROR if
+ * the data did not fit in the target. But this isn't a failing error,
+ * in fact, ucnv_cbfromUWriteBytes may be called AGAIN with the error
+ * status still U_BUFFER_OVERFLOW_ERROR to attempt to write further bytes,
+ * which will also go into the internal overflow buffers.
+ *
+ * Concerning offsets, the 'offset' parameters here are relative to the start
+ * of SOURCE. For example, Suppose the string "ABCD" was being converted
+ * from Unicode into a codepage which doesn't have a mapping for 'B'.
+ * 'A' will be written out correctly, but
+ * The FromU Callback will be called on an unassigned character for 'B'.
+ * At this point, this is the state of the world:
+ * Target: A [..] [points after A]
+ * Source: A B [C] D [points to C - B has been consumed]
+ * 0 1 2 3
+ * codePoint = "B" [the unassigned codepoint]
+ *
+ * Now, suppose a callback wants to write the substitution character '?' to
+ * the target. It calls ucnv_cbFromUWriteBytes() to write the ?.
+ * It should pass ZERO as the offset, because the offset as far as the
+ * callback is concerned is relative to the SOURCE pointer [which points
+ * before 'C'.] If the callback goes into the args and consumes 'C' also,
+ * it would call FromUWriteBytes with an offset of 1 (and advance the source
+ * pointer).
+ *
+ */
+
+#ifndef UCNV_CB_H
+#define UCNV_CB_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/ucnv.h"
+#include "unicode/ucnv_err.h"
+
+/**
+ * ONLY used by FromU callback functions.
+ * Writes out the specified byte output bytes to the target byte buffer or to converter internal buffers.
+ *
+ * @param args callback fromUnicode arguments
+ * @param source source bytes to write
+ * @param length length of bytes to write
+ * @param offsetIndex the relative offset index from callback.
+ * @param err error status. If <TT>U_BUFFER_OVERFLOW</TT> is returned, then U_BUFFER_OVERFLOW <STRONG>must</STRONG>
+ * be returned to the user, because it means that not all data could be written into the target buffer, and some is
+ * in the converter error buffer.
+ * @see ucnv_cbFromUWriteSub
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args,
+ const char* source,
+ int32_t length,
+ int32_t offsetIndex,
+ UErrorCode * err);
+
+/**
+ * ONLY used by FromU callback functions.
+ * This function will write out the correct substitution character sequence
+ * to the target.
+ *
+ * @param args callback fromUnicode arguments
+ * @param offsetIndex the relative offset index from the current source pointer to be used
+ * @param err error status. If <TT>U_BUFFER_OVERFLOW</TT> is returned, then U_BUFFER_OVERFLOW <STRONG>must</STRONG>
+ * be returned to the user, because it means that not all data could be written into the target buffer, and some is
+ * in the converter error buffer.
+ * @see ucnv_cbFromUWriteBytes
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args,
+ int32_t offsetIndex,
+ UErrorCode * err);
+
+/**
+ * ONLY used by fromU callback functions.
+ * This function will write out the error character(s) to the target UChar buffer.
+ *
+ * @param args callback fromUnicode arguments
+ * @param source pointer to pointer to first UChar to write [on exit: 1 after last UChar processed]
+ * @param sourceLimit pointer after last UChar to write
+ * @param offsetIndex the relative offset index from callback which will be set
+ * @param err error status <TT>U_BUFFER_OVERFLOW</TT>
+ * @see ucnv_cbToUWriteSub
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args,
+ const UChar** source,
+ const UChar* sourceLimit,
+ int32_t offsetIndex,
+ UErrorCode * err);
+
+/**
+ * ONLY used by ToU callback functions.
+ * This function will write out the specified characters to the target
+ * UChar buffer.
+ *
+ * @param args callback toUnicode arguments
+ * @param source source string to write
+ * @param length the length of source string
+ * @param offsetIndex the relative offset index which will be written.
+ * @param err error status <TT>U_BUFFER_OVERFLOW</TT>
+ * @see ucnv_cbToUWriteSub
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args,
+ const UChar* source,
+ int32_t length,
+ int32_t offsetIndex,
+ UErrorCode * err);
+
+/**
+ * ONLY used by ToU callback functions.
+ * This function will write out the Unicode substitution character (U+FFFD).
+ *
+ * @param args callback fromUnicode arguments
+ * @param offsetIndex the relative offset index from callback.
+ * @param err error status <TT>U_BUFFER_OVERFLOW</TT>
+ * @see ucnv_cbToUWriteUChars
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args,
+ int32_t offsetIndex,
+ UErrorCode * err);
+#endif
+
+#endif
diff --git a/Source/WebCore/icu/unicode/ucnv_err.h b/Source/WebCore/icu/unicode/ucnv_err.h
new file mode 100644
index 0000000..2a6104c
--- /dev/null
+++ b/Source/WebCore/icu/unicode/ucnv_err.h
@@ -0,0 +1,456 @@
+/*
+**********************************************************************
+* Copyright (C) 1999-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+ *
+ *
+ * ucnv_err.h:
+ */
+
+/**
+ * \file
+ * \brief C UConverter predefined error callbacks
+ *
+ * <h2>Error Behaviour Functions</h2>
+ * Defines some error behaviour functions called by ucnv_{from,to}Unicode
+ * These are provided as part of ICU and many are stable, but they
+ * can also be considered only as an example of what can be done with
+ * callbacks. You may of course write your own.
+ *
+ * If you want to write your own, you may also find the functions from
+ * ucnv_cb.h useful when writing your own callbacks.
+ *
+ * These functions, although public, should NEVER be called directly.
+ * They should be used as parameters to the ucnv_setFromUCallback
+ * and ucnv_setToUCallback functions, to set the behaviour of a converter
+ * when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
+ *
+ * usage example: 'STOP' doesn't need any context, but newContext
+ * could be set to something other than 'NULL' if needed. The available
+ * contexts in this header can modify the default behavior of the callback.
+ *
+ * \code
+ * UErrorCode err = U_ZERO_ERROR;
+ * UConverter *myConverter = ucnv_open("ibm-949", &err);
+ * const void *oldContext;
+ * UConverterFromUCallback oldAction;
+ *
+ *
+ * if (U_SUCCESS(err))
+ * {
+ * ucnv_setFromUCallBack(myConverter,
+ * UCNV_FROM_U_CALLBACK_STOP,
+ * NULL,
+ * &oldAction,
+ * &oldContext,
+ * &status);
+ * }
+ * \endcode
+ *
+ * The code above tells "myConverter" to stop when it encounters an
+ * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
+ * Unicode -> Codepage. The behavior from Codepage to Unicode is not changed,
+ * and ucnv_setToUCallBack would need to be called in order to change
+ * that behavior too.
+ *
+ * Here is an example with a context:
+ *
+ * \code
+ * UErrorCode err = U_ZERO_ERROR;
+ * UConverter *myConverter = ucnv_open("ibm-949", &err);
+ * const void *oldContext;
+ * UConverterFromUCallback oldAction;
+ *
+ *
+ * if (U_SUCCESS(err))
+ * {
+ * ucnv_setToUCallBack(myConverter,
+ * UCNV_TO_U_CALLBACK_SUBSTITUTE,
+ * UCNV_SUB_STOP_ON_ILLEGAL,
+ * &oldAction,
+ * &oldContext,
+ * &status);
+ * }
+ * \endcode
+ *
+ * The code above tells "myConverter" to stop when it encounters an
+ * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
+ * Codepage -> Unicode. Any unmapped and legal characters will be
+ * substituted to be the default substitution character.
+ */
+
+#ifndef UCNV_ERR_H
+#define UCNV_ERR_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+/** Forward declaring the UConverter structure. @stable ICU 2.0 */
+struct UConverter;
+
+/** @stable ICU 2.0 */
+typedef struct UConverter UConverter;
+
+/**
+ * FROM_U, TO_U context options for sub callback
+ * @stable ICU 2.0
+ */
+#define UCNV_SUB_STOP_ON_ILLEGAL "i"
+
+/**
+ * FROM_U, TO_U context options for skip callback
+ * @stable ICU 2.0
+ */
+#define UCNV_SKIP_STOP_ON_ILLEGAL "i"
+
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_ICU NULL
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_JAVA "J"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
+ * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_C "C"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape (&amp;#DDDD;)
+ * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape (&amp;#DDDD;)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_XML_DEC "D"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape (&amp;#xXXXX;)
+ * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape (&amp;#xXXXX;)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_XML_HEX "X"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape teh code unit according to Unicode (U+XXXXX)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_UNICODE "U"
+
+/**
+ * The process condition code to be used with the callbacks.
+ * Codes which are greater than UCNV_IRREGULAR should be
+ * passed on to any chained callbacks.
+ * @stable ICU 2.0
+ */
+typedef enum {
+ UCNV_UNASSIGNED = 0, /**< The code point is unassigned.
+ The error code U_INVALID_CHAR_FOUND will be set. */
+ UCNV_ILLEGAL = 1, /**< The code point is illegal. For example,
+ \\x81\\x2E is illegal in SJIS because \\x2E
+ is not a valid trail byte for the \\x81
+ lead byte.
+ Also, starting with Unicode 3.0.1, non-shortest byte sequences
+ in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061)
+ are also illegal, not just irregular.
+ The error code U_ILLEGAL_CHAR_FOUND will be set. */
+ UCNV_IRREGULAR = 2, /**< The codepoint is not a regular sequence in
+ the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF
+ are irregular UTF-8 byte sequences for single surrogate
+ code points.
+ The error code U_INVALID_CHAR_FOUND will be set. */
+ UCNV_RESET = 3, /**< The callback is called with this reason when a
+ 'reset' has occured. Callback should reset all
+ state. */
+ UCNV_CLOSE = 4, /**< Called when the converter is closed. The
+ callback should release any allocated memory.*/
+ UCNV_CLONE = 5 /**< Called when ucnv_safeClone() is called on the
+ converter. the pointer available as the
+ 'context' is an alias to the original converters'
+ context pointer. If the context must be owned
+ by the new converter, the callback must clone
+ the data and call ucnv_setFromUCallback
+ (or setToUCallback) with the correct pointer.
+ @stable ICU 2.2
+ */
+} UConverterCallbackReason;
+
+
+/**
+ * The structure for the fromUnicode callback function parameter.
+ * @stable ICU 2.0
+ */
+typedef struct {
+ uint16_t size; /**< The size of this struct. @stable ICU 2.0 */
+ UBool flush; /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 */
+ UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
+ const UChar *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */
+ const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
+ char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
+ const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
+ int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
+} UConverterFromUnicodeArgs;
+
+
+/**
+ * The structure for the toUnicode callback function parameter.
+ * @stable ICU 2.0
+ */
+typedef struct {
+ uint16_t size; /**< The size of this struct @stable ICU 2.0 */
+ UBool flush; /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 */
+ UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
+ const char *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */
+ const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
+ UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
+ const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
+ int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
+} UConverterToUnicodeArgs;
+
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ *
+ * @param context Pointer to the callback's private data
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err This should always be set to a failure status prior to calling.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (
+ const void *context,
+ UConverterFromUnicodeArgs *fromUArgs,
+ const UChar* codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ *
+ * @param context Pointer to the callback's private data
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err This should always be set to a failure status prior to calling.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (
+ const void *context,
+ UConverterToUnicodeArgs *toUArgs,
+ const char* codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback skips any ILLEGAL_SEQUENCE, or
+ * skips only UNASSINGED_SEQUENCE depending on the context parameter
+ * simply ignoring those characters.
+ *
+ * @param context The function currently recognizes the callback options:
+ * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ * NULL: Skips any ILLEGAL_SEQUENCE
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (
+ const void *context,
+ UConverterFromUnicodeArgs *fromUArgs,
+ const UChar* codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or
+ * UNASSIGNED_SEQUENCE depending on context parameter, with the
+ * current substitution string for the converter. This is the default
+ * callback.
+ *
+ * @param context The function currently recognizes the callback options:
+ * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ * NULL: Substitutes any ILLEGAL_SEQUENCE
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @see ucnv_setSubstChars
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
+ const void *context,
+ UConverterFromUnicodeArgs *fromUArgs,
+ const UChar* codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the
+ * hexadecimal representation of the illegal codepoints
+ *
+ * @param context The function currently recognizes the callback options:
+ * <ul>
+ * <li>UCNV_ESCAPE_ICU: Substitues the ILLEGAL SEQUENCE with the hexadecimal
+ * representation in the format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE").
+ * In the Event the converter doesn't support the characters {%,U}[A-F][0-9],
+ * it will substitute the illegal sequence with the substitution characters.
+ * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ * %UD84D%UDC56</li>
+ * <li>UCNV_ESCAPE_JAVA: Substitues the ILLEGAL SEQUENCE with the hexadecimal
+ * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
+ * In the Event the converter doesn't support the characters {\,u}[A-F][0-9],
+ * it will substitute the illegal sequence with the substitution characters.
+ * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ * \\uD84D\\uDC56</li>
+ * <li>UCNV_ESCAPE_C: Substitues the ILLEGAL SEQUENCE with the hexadecimal
+ * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
+ * In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9],
+ * it will substitute the illegal sequence with the substitution characters.
+ * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ * \\U00023456</li>
+ * <li>UCNV_ESCAPE_XML_DEC: Substitues the ILLEGAL SEQUENCE with the decimal
+ * representation in the format &amp;#DDDDDDDD;, e.g. "&amp;#65534;&amp;#172;&amp;#51454;").
+ * In the Event the converter doesn't support the characters {&amp;,#}[0-9],
+ * it will substitute the illegal sequence with the substitution characters.
+ * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ * &amp;#144470; and Zero padding is ignored.</li>
+ * <li>UCNV_ESCAPE_XML_HEX:Substitues the ILLEGAL SEQUENCE with the decimal
+ * representation in the format &#xXXXX, e.g. "&amp;#xFFFE;&amp;#x00AC;&amp;#xC8FE;").
+ * In the Event the converter doesn't support the characters {&,#,x}[0-9],
+ * it will substitute the illegal sequence with the substitution characters.
+ * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ * &amp;#x23456;</li>
+ * </ul>
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (
+ const void *context,
+ UConverterFromUnicodeArgs *fromUArgs,
+ const UChar* codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback skips any ILLEGAL_SEQUENCE, or
+ * skips only UNASSINGED_SEQUENCE depending on the context parameter
+ * simply ignoring those characters.
+ *
+ * @param context The function currently recognizes the callback options:
+ * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ * NULL: Skips any ILLEGAL_SEQUENCE
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (
+ const void *context,
+ UConverterToUnicodeArgs *toUArgs,
+ const char* codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or
+ * UNASSIGNED_SEQUENCE depending on context parameter, with the
+ * Unicode substitution character, U+FFFD.
+ *
+ * @param context The function currently recognizes the callback options:
+ * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ * NULL: Substitutes any ILLEGAL_SEQUENCE
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (
+ const void *context,
+ UConverterToUnicodeArgs *toUArgs,
+ const char* codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the
+ * hexadecimal representation of the illegal bytes
+ * (in the format %XNN, e.g. "%XFF%X0A%XC8%X03").
+ *
+ * @param context This function currently recognizes the callback options:
+ * UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC,
+ * UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE.
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+
+U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (
+ const void *context,
+ UConverterToUnicodeArgs *toUArgs,
+ const char* codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+#endif
+
+#endif
+
+/*UCNV_ERR_H*/
diff --git a/Source/WebCore/icu/unicode/ucol.h b/Source/WebCore/icu/unicode/ucol.h
new file mode 100644
index 0000000..cbfb1c3
--- /dev/null
+++ b/Source/WebCore/icu/unicode/ucol.h
@@ -0,0 +1,1219 @@
+/*
+*******************************************************************************
+* Copyright (c) 1996-2005, International Business Machines Corporation and others.
+* All Rights Reserved.
+*******************************************************************************
+*/
+
+#ifndef UCOL_H
+#define UCOL_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+#include "unicode/unorm.h"
+#include "unicode/parseerr.h"
+#include "unicode/uloc.h"
+#include "unicode/uset.h"
+
+/**
+ * \file
+ * \brief C API: Collator
+ *
+ * <h2> Collator C API </h2>
+ *
+ * The C API for Collator performs locale-sensitive
+ * string comparison. You use this service to build
+ * searching and sorting routines for natural language text.
+ * <em>Important: </em>The ICU collation service has been reimplemented
+ * in order to achieve better performance and UCA compliance.
+ * For details, see the
+ * <a href="http://icu.sourceforge.net/cvs/icu/~checkout~/icuhtml/design/collation/ICU_collation_design.htm">
+ * collation design document</a>.
+ * <p>
+ * For more information about the collation service see
+ * <a href="http://icu.sourceforge.net/icu/userguide/Collate_Intro.html">the users guide</a>.
+ * <p>
+ * Collation service provides correct sorting orders for most locales supported in ICU.
+ * If specific data for a locale is not available, the orders eventually falls back
+ * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>.
+ * <p>
+ * Sort ordering may be customized by providing your own set of rules. For more on
+ * this subject see the
+ * <a href="http://icu.sourceforge.net/icu/userguide/Collate_Customization.html">
+ * Collation customization</a> section of the users guide.
+ * <p>
+ * @see UCollationResult
+ * @see UNormalizationMode
+ * @see UCollationStrength
+ * @see UCollationElements
+ */
+
+/** A collation element iterator.
+* For usage in C programs.
+*/
+struct collIterate;
+/** structure representing collation element iterator instance
+ * @stable ICU 2.0
+ */
+typedef struct collIterate collIterate;
+
+/** A collator.
+* For usage in C programs.
+*/
+struct UCollator;
+/** structure representing a collator object instance
+ * @stable ICU 2.0
+ */
+typedef struct UCollator UCollator;
+
+
+/**
+ * UCOL_LESS is returned if source string is compared to be less than target
+ * string in the u_strcoll() method.
+ * UCOL_EQUAL is returned if source string is compared to be equal to target
+ * string in the u_strcoll() method.
+ * UCOL_GREATER is returned if source string is compared to be greater than
+ * target string in the u_strcoll() method.
+ * @see u_strcoll()
+ * <p>
+ * Possible values for a comparison result
+ * @stable ICU 2.0
+ */
+typedef enum {
+ /** string a == string b */
+ UCOL_EQUAL = 0,
+ /** string a > string b */
+ UCOL_GREATER = 1,
+ /** string a < string b */
+ UCOL_LESS = -1
+} UCollationResult ;
+
+
+/** Enum containing attribute values for controling collation behavior.
+ * Here are all the allowable values. Not every attribute can take every value. The only
+ * universal value is UCOL_DEFAULT, which resets the attribute value to the predefined
+ * value for that locale
+ * @stable ICU 2.0
+ */
+typedef enum {
+ /** accepted by most attributes */
+ UCOL_DEFAULT = -1,
+
+ /** Primary collation strength */
+ UCOL_PRIMARY = 0,
+ /** Secondary collation strength */
+ UCOL_SECONDARY = 1,
+ /** Tertiary collation strength */
+ UCOL_TERTIARY = 2,
+ /** Default collation strength */
+ UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY,
+ UCOL_CE_STRENGTH_LIMIT,
+ /** Quaternary collation strength */
+ UCOL_QUATERNARY=3,
+ /** Identical collation strength */
+ UCOL_IDENTICAL=15,
+ UCOL_STRENGTH_LIMIT,
+
+ /** Turn the feature off - works for UCOL_FRENCH_COLLATION,
+ UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE
+ & UCOL_DECOMPOSITION_MODE*/
+ UCOL_OFF = 16,
+ /** Turn the feature on - works for UCOL_FRENCH_COLLATION,
+ UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE
+ & UCOL_DECOMPOSITION_MODE*/
+ UCOL_ON = 17,
+
+ /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted */
+ UCOL_SHIFTED = 20,
+ /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable */
+ UCOL_NON_IGNORABLE = 21,
+
+ /** Valid for UCOL_CASE_FIRST -
+ lower case sorts before upper case */
+ UCOL_LOWER_FIRST = 24,
+ /** upper case sorts before lower case */
+ UCOL_UPPER_FIRST = 25,
+
+ UCOL_ATTRIBUTE_VALUE_COUNT
+
+} UColAttributeValue;
+
+/**
+ * Base letter represents a primary difference. Set comparison
+ * level to UCOL_PRIMARY to ignore secondary and tertiary differences.
+ * Use this to set the strength of a Collator object.
+ * Example of primary difference, "abc" &lt; "abd"
+ *
+ * Diacritical differences on the same base letter represent a secondary
+ * difference. Set comparison level to UCOL_SECONDARY to ignore tertiary
+ * differences. Use this to set the strength of a Collator object.
+ * Example of secondary difference, "ä" >> "a".
+ *
+ * Uppercase and lowercase versions of the same character represents a
+ * tertiary difference. Set comparison level to UCOL_TERTIARY to include
+ * all comparison differences. Use this to set the strength of a Collator
+ * object.
+ * Example of tertiary difference, "abc" &lt;&lt;&lt; "ABC".
+ *
+ * Two characters are considered "identical" when they have the same
+ * unicode spellings. UCOL_IDENTICAL.
+ * For example, "ä" == "ä".
+ *
+ * UCollationStrength is also used to determine the strength of sort keys
+ * generated from UCollator objects
+ * These values can be now found in the UColAttributeValue enum.
+ * @stable ICU 2.0
+ **/
+typedef UColAttributeValue UCollationStrength;
+
+/** Attributes that collation service understands. All the attributes can take UCOL_DEFAULT
+ * value, as well as the values specific to each one.
+ * @stable ICU 2.0
+ */
+typedef enum {
+ /** Attribute for direction of secondary weights - used in French.\
+ * Acceptable values are UCOL_ON, which results in secondary weights
+ * being considered backwards and UCOL_OFF which treats secondary
+ * weights in the order they appear.*/
+ UCOL_FRENCH_COLLATION,
+ /** Attribute for handling variable elements.\
+ * Acceptable values are UCOL_NON_IGNORABLE (default)
+ * which treats all the codepoints with non-ignorable
+ * primary weights in the same way,
+ * and UCOL_SHIFTED which causes codepoints with primary
+ * weights that are equal or below the variable top value
+ * to be ignored on primary level and moved to the quaternary
+ * level.*/
+ UCOL_ALTERNATE_HANDLING,
+ /** Controls the ordering of upper and lower case letters.\
+ * Acceptable values are UCOL_OFF (default), which orders
+ * upper and lower case letters in accordance to their tertiary
+ * weights, UCOL_UPPER_FIRST which forces upper case letters to
+ * sort before lower case letters, and UCOL_LOWER_FIRST which does
+ * the opposite. */
+ UCOL_CASE_FIRST,
+ /** Controls whether an extra case level (positioned before the third
+ * level) is generated or not.\ Acceptable values are UCOL_OFF (default),
+ * when case level is not generated, and UCOL_ON which causes the case
+ * level to be generated.\ Contents of the case level are affected by
+ * the value of UCOL_CASE_FIRST attribute.\ A simple way to ignore
+ * accent differences in a string is to set the strength to UCOL_PRIMARY
+ * and enable case level. */
+ UCOL_CASE_LEVEL,
+ /** Controls whether the normalization check and necessary normalizations
+ * are performed.\ When set to UCOL_OFF (default) no normalization check
+ * is performed.\ The correctness of the result is guaranteed only if the
+ * input data is in so-called FCD form (see users manual for more info).\
+ * When set to UCOL_ON, an incremental check is performed to see whether the input data
+ * is in the FCD form.\ If the data is not in the FCD form, incremental
+ * NFD normalization is performed. */
+ UCOL_NORMALIZATION_MODE,
+ /** An alias for UCOL_NORMALIZATION_MODE attribute */
+ UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE,
+ /** The strength attribute.\ Can be either UCOL_PRIMARY, UCOL_SECONDARY,
+ * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL.\ The usual strength
+ * for most locales (except Japanese) is tertiary.\ Quaternary strength
+ * is useful when combined with shifted setting for alternate handling
+ * attribute and for JIS x 4061 collation, when it is used to distinguish
+ * between Katakana and Hiragana (this is achieved by setting the
+ * UCOL_HIRAGANA_QUATERNARY mode to on.\ Otherwise, quaternary level
+ * is affected only by the number of non ignorable code points in
+ * the string.\ Identical strength is rarely useful, as it amounts
+ * to codepoints of the NFD form of the string. */
+ UCOL_STRENGTH,
+ /** when turned on, this attribute
+ * positions Hiragana before all
+ * non-ignorables on quaternary level
+ * This is a sneaky way to produce JIS
+ * sort order */
+ UCOL_HIRAGANA_QUATERNARY_MODE,
+ /** when turned on, this attribute
+ * generates a collation key
+ * for the numeric value of substrings
+ * of digits. This is a way to get '100'
+ * to sort AFTER '2'.*/
+ UCOL_NUMERIC_COLLATION,
+ UCOL_ATTRIBUTE_COUNT
+} UColAttribute;
+
+/** Options for retrieving the rule string
+ * @stable ICU 2.0
+ */
+typedef enum {
+ /** Retrieve tailoring only */
+ UCOL_TAILORING_ONLY,
+ /** Retrieve UCA rules and tailoring */
+ UCOL_FULL_RULES
+} UColRuleOption ;
+
+/**
+ * Open a UCollator for comparing strings.
+ * The UCollator pointer is used in all the calls to the Collation
+ * service. After finished, collator must be disposed of by calling
+ * {@link #ucol_close }.
+ * @param loc The locale containing the required collation rules.
+ * Special values for locales can be passed in -
+ * if NULL is passed for the locale, the default locale
+ * collation rules will be used. If empty string ("") or
+ * "root" are passed, UCA rules will be used.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @return A pointer to a UCollator, or 0 if an error occurred.
+ * @see ucol_openRules
+ * @see ucol_safeClone
+ * @see ucol_close
+ * @stable ICU 2.0
+ */
+U_STABLE UCollator* U_EXPORT2
+ucol_open(const char *loc, UErrorCode *status);
+
+/**
+ * Produce an UCollator instance according to the rules supplied.
+ * The rules are used to change the default ordering, defined in the
+ * UCA in a process called tailoring. The resulting UCollator pointer
+ * can be used in the same way as the one obtained by {@link #ucol_strcoll }.
+ * @param rules A string describing the collation rules. For the syntax
+ * of the rules please see users guide.
+ * @param rulesLength The length of rules, or -1 if null-terminated.
+ * @param normalizationMode The normalization mode: One of
+ * UCOL_OFF (expect the text to not need normalization),
+ * UCOL_ON (normalize), or
+ * UCOL_DEFAULT (set the mode according to the rules)
+ * @param strength The default collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
+ * UCOL_TERTIARY, UCOL_IDENTICAL,UCOL_DEFAULT_STRENGTH - can be also set in the rules.
+ * @param parseError A pointer to UParseError to recieve information about errors
+ * occurred during parsing. This argument can currently be set
+ * to NULL, but at users own risk. Please provide a real structure.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @return A pointer to a UCollator.\ It is not guaranteed that NULL be returned in case
+ * of error - please use status argument to check for errors.
+ * @see ucol_open
+ * @see ucol_safeClone
+ * @see ucol_close
+ * @stable ICU 2.0
+ */
+U_STABLE UCollator* U_EXPORT2
+ucol_openRules( const UChar *rules,
+ int32_t rulesLength,
+ UColAttributeValue normalizationMode,
+ UCollationStrength strength,
+ UParseError *parseError,
+ UErrorCode *status);
+
+/**
+ * Open a collator defined by a short form string.
+ * The structure and the syntax of the string is defined in the "Naming collators"
+ * section of the users guide:
+ * http://icu.sourceforge.net/icu/userguide/Collate_Concepts.html#Naming_Collators
+ * Attributes are overriden by the subsequent attributes. So, for "S2_S3", final
+ * strength will be 3. 3066bis locale overrides individual locale parts.
+ * The call to this function is equivalent to a call to ucol_open, followed by a
+ * series of calls to ucol_setAttribute and ucol_setVariableTop.
+ * @param definition A short string containing a locale and a set of attributes.
+ * Attributes not explicitly mentioned are left at the default
+ * state for a locale.
+ * @param parseError if not NULL, structure that will get filled with error's pre
+ * and post context in case of error.
+ * @param forceDefaults if FALSE, the settings that are the same as the collator
+ * default settings will not be applied (for example, setting
+ * French secondary on a French collator would not be executed).
+ * If TRUE, all the settings will be applied regardless of the
+ * collator default value. If the definition
+ * strings are to be cached, should be set to FALSE.
+ * @param status Error code. Apart from regular error conditions connected to
+ * instantiating collators (like out of memory or similar), this
+ * API will return an error if an invalid attribute or attribute/value
+ * combination is specified.
+ * @return A pointer to a UCollator or 0 if an error occured (including an
+ * invalid attribute).
+ * @see ucol_open
+ * @see ucol_setAttribute
+ * @see ucol_setVariableTop
+ * @see ucol_getShortDefinitionString
+ * @see ucol_normalizeShortDefinitionString
+ * @draft ICU 3.0
+ *
+ */
+U_CAPI UCollator* U_EXPORT2
+ucol_openFromShortString( const char *definition,
+ UBool forceDefaults,
+ UParseError *parseError,
+ UErrorCode *status);
+
+/**
+ * Get a set containing the contractions defined by the collator. The set includes
+ * both the UCA contractions and the contractions defined by the collator. This set
+ * will contain only strings. If a tailoring explicitly suppresses contractions from
+ * the UCA (like Russian), removed contractions will not be in the resulting set.
+ * @param coll collator
+ * @param conts the set to hold the result. It gets emptied before
+ * contractions are added.
+ * @param status to hold the error code
+ * @return the size of the contraction set
+ *
+ * @draft ICU 3.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucol_getContractions( const UCollator *coll,
+ USet *conts,
+ UErrorCode *status);
+
+
+/**
+ * Close a UCollator.
+ * Once closed, a UCollator should not be used.\ Every open collator should
+ * be closed.\ Otherwise, a memory leak will result.
+ * @param coll The UCollator to close.
+ * @see ucol_open
+ * @see ucol_openRules
+ * @see ucol_safeClone
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucol_close(UCollator *coll);
+
+/**
+ * Compare two strings.
+ * The strings will be compared using the options already specified.
+ * @param coll The UCollator containing the comparison rules.
+ * @param source The source string.
+ * @param sourceLength The length of source, or -1 if null-terminated.
+ * @param target The target string.
+ * @param targetLength The length of target, or -1 if null-terminated.
+ * @return The result of comparing the strings; one of UCOL_EQUAL,
+ * UCOL_GREATER, UCOL_LESS
+ * @see ucol_greater
+ * @see ucol_greaterOrEqual
+ * @see ucol_equal
+ * @stable ICU 2.0
+ */
+U_STABLE UCollationResult U_EXPORT2
+ucol_strcoll( const UCollator *coll,
+ const UChar *source,
+ int32_t sourceLength,
+ const UChar *target,
+ int32_t targetLength);
+
+/**
+ * Determine if one string is greater than another.
+ * This function is equivalent to {@link #ucol_strcoll } == UCOL_GREATER
+ * @param coll The UCollator containing the comparison rules.
+ * @param source The source string.
+ * @param sourceLength The length of source, or -1 if null-terminated.
+ * @param target The target string.
+ * @param targetLength The length of target, or -1 if null-terminated.
+ * @return TRUE if source is greater than target, FALSE otherwise.
+ * @see ucol_strcoll
+ * @see ucol_greaterOrEqual
+ * @see ucol_equal
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+ucol_greater(const UCollator *coll,
+ const UChar *source, int32_t sourceLength,
+ const UChar *target, int32_t targetLength);
+
+/**
+ * Determine if one string is greater than or equal to another.
+ * This function is equivalent to {@link #ucol_strcoll } != UCOL_LESS
+ * @param coll The UCollator containing the comparison rules.
+ * @param source The source string.
+ * @param sourceLength The length of source, or -1 if null-terminated.
+ * @param target The target string.
+ * @param targetLength The length of target, or -1 if null-terminated.
+ * @return TRUE if source is greater than or equal to target, FALSE otherwise.
+ * @see ucol_strcoll
+ * @see ucol_greater
+ * @see ucol_equal
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+ucol_greaterOrEqual(const UCollator *coll,
+ const UChar *source, int32_t sourceLength,
+ const UChar *target, int32_t targetLength);
+
+/**
+ * Compare two strings for equality.
+ * This function is equivalent to {@link #ucol_strcoll } == UCOL_EQUAL
+ * @param coll The UCollator containing the comparison rules.
+ * @param source The source string.
+ * @param sourceLength The length of source, or -1 if null-terminated.
+ * @param target The target string.
+ * @param targetLength The length of target, or -1 if null-terminated.
+ * @return TRUE if source is equal to target, FALSE otherwise
+ * @see ucol_strcoll
+ * @see ucol_greater
+ * @see ucol_greaterOrEqual
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+ucol_equal(const UCollator *coll,
+ const UChar *source, int32_t sourceLength,
+ const UChar *target, int32_t targetLength);
+
+/**
+ * Compare two UTF-8 encoded trings.
+ * The strings will be compared using the options already specified.
+ * @param coll The UCollator containing the comparison rules.
+ * @param sIter The source string iterator.
+ * @param tIter The target string iterator.
+ * @return The result of comparing the strings; one of UCOL_EQUAL,
+ * UCOL_GREATER, UCOL_LESS
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @see ucol_strcoll
+ * @stable ICU 2.6
+ */
+U_STABLE UCollationResult U_EXPORT2
+ucol_strcollIter( const UCollator *coll,
+ UCharIterator *sIter,
+ UCharIterator *tIter,
+ UErrorCode *status);
+
+/**
+ * Get the collation strength used in a UCollator.
+ * The strength influences how strings are compared.
+ * @param coll The UCollator to query.
+ * @return The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
+ * UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL
+ * @see ucol_setStrength
+ * @stable ICU 2.0
+ */
+U_STABLE UCollationStrength U_EXPORT2
+ucol_getStrength(const UCollator *coll);
+
+/**
+ * Set the collation strength used in a UCollator.
+ * The strength influences how strings are compared.
+ * @param coll The UCollator to set.
+ * @param strength The desired collation strength; one of UCOL_PRIMARY,
+ * UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL, UCOL_DEFAULT
+ * @see ucol_getStrength
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucol_setStrength(UCollator *coll,
+ UCollationStrength strength);
+
+/**
+ * Get the display name for a UCollator.
+ * The display name is suitable for presentation to a user.
+ * @param objLoc The locale of the collator in question.
+ * @param dispLoc The locale for display.
+ * @param result A pointer to a buffer to receive the attribute.
+ * @param resultLength The maximum size of result.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @return The total buffer size needed; if greater than resultLength,
+ * the output was truncated.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucol_getDisplayName( const char *objLoc,
+ const char *dispLoc,
+ UChar *result,
+ int32_t resultLength,
+ UErrorCode *status);
+
+/**
+ * Get a locale for which collation rules are available.
+ * A UCollator in a locale returned by this function will perform the correct
+ * collation for the locale.
+ * @param index The index of the desired locale.
+ * @return A locale for which collation rules are available, or 0 if none.
+ * @see ucol_countAvailable
+ * @stable ICU 2.0
+ */
+U_STABLE const char* U_EXPORT2
+ucol_getAvailable(int32_t index);
+
+/**
+ * Determine how many locales have collation rules available.
+ * This function is most useful as determining the loop ending condition for
+ * calls to {@link #ucol_getAvailable }.
+ * @return The number of locales for which collation rules are available.
+ * @see ucol_getAvailable
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucol_countAvailable(void);
+
+#if !UCONFIG_NO_SERVICE
+/**
+ * Create a string enumerator of all locales for which a valid
+ * collator may be opened.
+ * @param status input-output error code
+ * @return a string enumeration over locale strings. The caller is
+ * responsible for closing the result.
+ * @draft ICU 3.0
+ */
+U_DRAFT UEnumeration* U_EXPORT2
+ucol_openAvailableLocales(UErrorCode *status);
+#endif
+
+/**
+ * Create a string enumerator of all possible keywords that are relevant to
+ * collation. At this point, the only recognized keyword for this
+ * service is "collation".
+ * @param status input-output error code
+ * @return a string enumeration over locale strings. The caller is
+ * responsible for closing the result.
+ * @draft ICU 3.0
+ */
+U_DRAFT UEnumeration* U_EXPORT2
+ucol_getKeywords(UErrorCode *status);
+
+/**
+ * Given a keyword, create a string enumeration of all values
+ * for that keyword that are currently in use.
+ * @param keyword a particular keyword as enumerated by
+ * ucol_getKeywords. If any other keyword is passed in, *status is set
+ * to U_ILLEGAL_ARGUMENT_ERROR.
+ * @param status input-output error code
+ * @return a string enumeration over collation keyword values, or NULL
+ * upon error. The caller is responsible for closing the result.
+ * @draft ICU 3.0
+ */
+U_DRAFT UEnumeration* U_EXPORT2
+ucol_getKeywordValues(const char *keyword, UErrorCode *status);
+
+/**
+ * Return the functionally equivalent locale for the given
+ * requested locale, with respect to given keyword, for the
+ * collation service. If two locales return the same result, then
+ * collators instantiated for these locales will behave
+ * equivalently. The converse is not always true; two collators
+ * may in fact be equivalent, but return different results, due to
+ * internal details. The return result has no other meaning than
+ * that stated above, and implies nothing as to the relationship
+ * between the two locales. This is intended for use by
+ * applications who wish to cache collators, or otherwise reuse
+ * collators when possible. The functional equivalent may change
+ * over time. For more information, please see the <a
+ * href="http://icu.sourceforge.net/icu/userguide/locale.html#services">
+ * Locales and Services</a> section of the ICU User Guide.
+ * @param result fillin for the functionally equivalent locale
+ * @param resultCapacity capacity of the fillin buffer
+ * @param keyword a particular keyword as enumerated by
+ * ucol_getKeywords.
+ * @param locale the requested locale
+ * @param isAvailable if non-NULL, pointer to a fillin parameter that
+ * indicates whether the requested locale was 'available' to the
+ * collation service. A locale is defined as 'available' if it
+ * physically exists within the collation locale data.
+ * @param status pointer to input-output error code
+ * @return the actual buffer size needed for the locale. If greater
+ * than resultCapacity, the returned full name will be truncated and
+ * an error code will be returned.
+ * @draft ICU 3.0
+ */
+U_DRAFT int32_t U_EXPORT2
+ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
+ const char* keyword, const char* locale,
+ UBool* isAvailable, UErrorCode* status);
+
+/**
+ * Get the collation rules from a UCollator.
+ * The rules will follow the rule syntax.
+ * @param coll The UCollator to query.
+ * @param length
+ * @return The collation rules.
+ * @stable ICU 2.0
+ */
+U_STABLE const UChar* U_EXPORT2
+ucol_getRules( const UCollator *coll,
+ int32_t *length);
+
+/** Get the short definition string for a collator. This API harvests the collator's
+ * locale and the attribute set and produces a string that can be used for opening
+ * a collator with the same properties using the ucol_openFromShortString API.
+ * This string will be normalized.
+ * The structure and the syntax of the string is defined in the "Naming collators"
+ * section of the users guide:
+ * http://icu.sourceforge.net/icu/userguide/Collate_Concepts.html#Naming_Collators
+ * This API supports preflighting.
+ * @param coll a collator
+ * @param locale a locale that will appear as a collators locale in the resulting
+ * short string definition. If NULL, the locale will be harvested
+ * from the collator.
+ * @param buffer space to hold the resulting string
+ * @param capacity capacity of the buffer
+ * @param status for returning errors. All the preflighting errors are featured
+ * @return length of the resulting string
+ * @see ucol_openFromShortString
+ * @see ucol_normalizeShortDefinitionString
+ * @draft ICU 3.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucol_getShortDefinitionString(const UCollator *coll,
+ const char *locale,
+ char *buffer,
+ int32_t capacity,
+ UErrorCode *status);
+
+/** Verifies and normalizes short definition string.
+ * Normalized short definition string has all the option sorted by the argument name,
+ * so that equivalent definition strings are the same.
+ * This API supports preflighting.
+ * @param source definition string
+ * @param destination space to hold the resulting string
+ * @param capacity capacity of the buffer
+ * @param parseError if not NULL, structure that will get filled with error's pre
+ * and post context in case of error.
+ * @param status Error code. This API will return an error if an invalid attribute
+ * or attribute/value combination is specified. All the preflighting
+ * errors are also featured
+ * @return length of the resulting normalized string.
+ *
+ * @see ucol_openFromShortString
+ * @see ucol_getShortDefinitionString
+ *
+ * @draft ICU 3.0
+ */
+
+U_CAPI int32_t U_EXPORT2
+ucol_normalizeShortDefinitionString(const char *source,
+ char *destination,
+ int32_t capacity,
+ UParseError *parseError,
+ UErrorCode *status);
+
+
+/**
+ * Get a sort key for a string from a UCollator.
+ * Sort keys may be compared using <TT>strcmp</TT>.
+ * @param coll The UCollator containing the collation rules.
+ * @param source The string to transform.
+ * @param sourceLength The length of source, or -1 if null-terminated.
+ * @param result A pointer to a buffer to receive the attribute.
+ * @param resultLength The maximum size of result.
+ * @return The size needed to fully store the sort key..
+ * @see ucol_keyHashCode
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucol_getSortKey(const UCollator *coll,
+ const UChar *source,
+ int32_t sourceLength,
+ uint8_t *result,
+ int32_t resultLength);
+
+
+/** Gets the next count bytes of a sort key. Caller needs
+ * to preserve state array between calls and to provide
+ * the same type of UCharIterator set with the same string.
+ * The destination buffer provided must be big enough to store
+ * the number of requested bytes. Generated sortkey is not
+ * compatible with sortkeys generated using ucol_getSortKey
+ * API, since we don't do any compression. If uncompressed
+ * sortkeys are required, this API can be used.
+ * @param coll The UCollator containing the collation rules.
+ * @param iter UCharIterator containing the string we need
+ * the sort key to be calculated for.
+ * @param state Opaque state of sortkey iteration.
+ * @param dest Buffer to hold the resulting sortkey part
+ * @param count number of sort key bytes required.
+ * @param status error code indicator.
+ * @return the actual number of bytes of a sortkey. It can be
+ * smaller than count if we have reached the end of
+ * the sort key.
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+ucol_nextSortKeyPart(const UCollator *coll,
+ UCharIterator *iter,
+ uint32_t state[2],
+ uint8_t *dest, int32_t count,
+ UErrorCode *status);
+
+/** enum that is taken by ucol_getBound API
+ * See below for explanation
+ * do not change the values assigned to the
+ * members of this enum. Underlying code
+ * depends on them having these numbers
+ * @stable ICU 2.0
+ */
+typedef enum {
+ /** lower bound */
+ UCOL_BOUND_LOWER = 0,
+ /** upper bound that will match strings of exact size */
+ UCOL_BOUND_UPPER = 1,
+ /** upper bound that will match all the strings that have the same initial substring as the given string */
+ UCOL_BOUND_UPPER_LONG = 2,
+ UCOL_BOUND_VALUE_COUNT
+} UColBoundMode;
+
+/**
+ * Produce a bound for a given sortkey and a number of levels.
+ * Return value is always the number of bytes needed, regardless of
+ * whether the result buffer was big enough or even valid.<br>
+ * Resulting bounds can be used to produce a range of strings that are
+ * between upper and lower bounds. For example, if bounds are produced
+ * for a sortkey of string "smith", strings between upper and lower
+ * bounds with one level would include "Smith", "SMITH", "sMiTh".<br>
+ * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER
+ * is produced, strings matched would be as above. However, if bound
+ * produced using UCOL_BOUND_UPPER_LONG is used, the above example will
+ * also match "Smithsonian" and similar.<br>
+ * For more on usage, see example in cintltst/capitst.c in procedure
+ * TestBounds.
+ * Sort keys may be compared using <TT>strcmp</TT>.
+ * @param source The source sortkey.
+ * @param sourceLength The length of source, or -1 if null-terminated.
+ * (If an unmodified sortkey is passed, it is always null
+ * terminated).
+ * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which
+ * produces a lower inclusive bound, UCOL_BOUND_UPPER, that
+ * produces upper bound that matches strings of the same length
+ * or UCOL_BOUND_UPPER_LONG that matches strings that have the
+ * same starting substring as the source string.
+ * @param noOfLevels Number of levels required in the resulting bound (for most
+ * uses, the recommended value is 1). See users guide for
+ * explanation on number of levels a sortkey can have.
+ * @param result A pointer to a buffer to receive the resulting sortkey.
+ * @param resultLength The maximum size of result.
+ * @param status Used for returning error code if something went wrong. If the
+ * number of levels requested is higher than the number of levels
+ * in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is
+ * issued.
+ * @return The size needed to fully store the bound.
+ * @see ucol_keyHashCode
+ * @stable ICU 2.1
+ */
+U_STABLE int32_t U_EXPORT2
+ucol_getBound(const uint8_t *source,
+ int32_t sourceLength,
+ UColBoundMode boundType,
+ uint32_t noOfLevels,
+ uint8_t *result,
+ int32_t resultLength,
+ UErrorCode *status);
+
+/**
+ * Gets the version information for a Collator. Version is currently
+ * an opaque 32-bit number which depends, among other things, on major
+ * versions of the collator tailoring and UCA.
+ * @param coll The UCollator to query.
+ * @param info the version # information, the result will be filled in
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucol_getVersion(const UCollator* coll, UVersionInfo info);
+
+/**
+ * Gets the UCA version information for a Collator. Version is the
+ * UCA version number (3.1.1, 4.0).
+ * @param coll The UCollator to query.
+ * @param info the version # information, the result will be filled in
+ * @draft ICU 2.8
+ */
+U_DRAFT void U_EXPORT2
+ucol_getUCAVersion(const UCollator* coll, UVersionInfo info);
+
+/**
+ * Merge two sort keys. The levels are merged with their corresponding counterparts
+ * (primaries with primaries, secondaries with secondaries etc.). Between the values
+ * from the same level a separator is inserted.
+ * example (uncompressed):
+ * 191B1D 01 050505 01 910505 00 and 1F2123 01 050505 01 910505 00
+ * will be merged as
+ * 191B1D 02 1F212301 050505 02 050505 01 910505 02 910505 00
+ * This allows for concatenating of first and last names for sorting, among other things.
+ * If the destination buffer is not big enough, the results are undefined.
+ * If any of source lengths are zero or any of source pointers are NULL/undefined,
+ * result is of size zero.
+ * @param src1 pointer to the first sortkey
+ * @param src1Length length of the first sortkey
+ * @param src2 pointer to the second sortkey
+ * @param src2Length length of the second sortkey
+ * @param dest buffer to hold the result
+ * @param destCapacity size of the buffer for the result
+ * @return size of the result. If the buffer is big enough size is always
+ * src1Length+src2Length-1
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length,
+ const uint8_t *src2, int32_t src2Length,
+ uint8_t *dest, int32_t destCapacity);
+
+/**
+ * Universal attribute setter
+ * @param coll collator which attributes are to be changed
+ * @param attr attribute type
+ * @param value attribute value
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ * @see UColAttribute
+ * @see UColAttributeValue
+ * @see ucol_getAttribute
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status);
+
+/**
+ * Universal attribute getter
+ * @param coll collator which attributes are to be changed
+ * @param attr attribute type
+ * @return attribute value
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ * @see UColAttribute
+ * @see UColAttributeValue
+ * @see ucol_setAttribute
+ * @stable ICU 2.0
+ */
+U_STABLE UColAttributeValue U_EXPORT2
+ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status);
+
+/** Variable top
+ * is a two byte primary value which causes all the codepoints with primary values that
+ * are less or equal than the variable top to be shifted when alternate handling is set
+ * to UCOL_SHIFTED.
+ * Sets the variable top to a collation element value of a string supplied.
+ * @param coll collator which variable top needs to be changed
+ * @param varTop one or more (if contraction) UChars to which the variable top should be set
+ * @param len length of variable top string. If -1 it is considered to be zero terminated.
+ * @param status error code. If error code is set, the return value is undefined.
+ * Errors set by this function are: <br>
+ * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such
+ * a contraction<br>
+ * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
+ * @return a 32 bit value containing the value of the variable top in upper 16 bits.
+ * Lower 16 bits are undefined
+ * @see ucol_getVariableTop
+ * @see ucol_restoreVariableTop
+ * @stable ICU 2.0
+ */
+U_STABLE uint32_t U_EXPORT2
+ucol_setVariableTop(UCollator *coll,
+ const UChar *varTop, int32_t len,
+ UErrorCode *status);
+
+/**
+ * Gets the variable top value of a Collator.
+ * Lower 16 bits are undefined and should be ignored.
+ * @param coll collator which variable top needs to be retrieved
+ * @param status error code (not changed by function). If error code is set,
+ * the return value is undefined.
+ * @return the variable top value of a Collator.
+ * @see ucol_setVariableTop
+ * @see ucol_restoreVariableTop
+ * @stable ICU 2.0
+ */
+U_STABLE uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status);
+
+/**
+ * Sets the variable top to a collation element value supplied. Variable top is
+ * set to the upper 16 bits.
+ * Lower 16 bits are ignored.
+ * @param coll collator which variable top needs to be changed
+ * @param varTop CE value, as returned by ucol_setVariableTop or ucol)getVariableTop
+ * @param status error code (not changed by function)
+ * @see ucol_getVariableTop
+ * @see ucol_setVariableTop
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status);
+
+/**
+ * Thread safe cloning operation. The result is a clone of a given collator.
+ * @param coll collator to be cloned
+ * @param stackBuffer user allocated space for the new clone.
+ * If NULL new memory will be allocated.
+ * If buffer is not large enough, new memory will be allocated.
+ * Clients can use the U_COL_SAFECLONE_BUFFERSIZE.
+ * This will probably be enough to avoid memory allocations.
+ * @param pBufferSize pointer to size of allocated space.
+ * If *pBufferSize == 0, a sufficient size for use in cloning will
+ * be returned ('pre-flighting')
+ * If *pBufferSize is not enough for a stack-based safe clone,
+ * new memory will be allocated.
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any
+ * allocations were necessary.
+ * @return pointer to the new clone
+ * @see ucol_open
+ * @see ucol_openRules
+ * @see ucol_close
+ * @stable ICU 2.0
+ */
+U_STABLE UCollator* U_EXPORT2
+ucol_safeClone(const UCollator *coll,
+ void *stackBuffer,
+ int32_t *pBufferSize,
+ UErrorCode *status);
+
+/** default memory size for the new clone. It needs to be this large for os/400 large pointers
+ * @stable ICU 2.0
+ */
+#define U_COL_SAFECLONE_BUFFERSIZE 512
+
+/**
+ * Returns current rules. Delta defines whether full rules are returned or just the tailoring.
+ * Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough
+ * to store rules, will store up to available space.
+ * @param coll collator to get the rules from
+ * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
+ * @param buffer buffer to store the result in. If NULL, you'll get no rules.
+ * @param bufferLen lenght of buffer to store rules in. If less then needed you'll get only the part that fits in.
+ * @return current rules
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen);
+
+/**
+ * gets the locale name of the collator. If the collator
+ * is instantiated from the rules, then this function returns
+ * NULL.
+ * @param coll The UCollator for which the locale is needed
+ * @param type You can choose between requested, valid and actual
+ * locale. For description see the definition of
+ * ULocDataLocaleType in uloc.h
+ * @param status error code of the operation
+ * @return real locale name from which the collation data comes.
+ * If the collator was instantiated from rules, returns
+ * NULL.
+ * @deprecated ICU 2.8 Use ucol_getLocaleByType instead
+ */
+U_DEPRECATED const char * U_EXPORT2
+ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status);
+
+
+/**
+ * gets the locale name of the collator. If the collator
+ * is instantiated from the rules, then this function returns
+ * NULL.
+ * @param coll The UCollator for which the locale is needed
+ * @param type You can choose between requested, valid and actual
+ * locale. For description see the definition of
+ * ULocDataLocaleType in uloc.h
+ * @param status error code of the operation
+ * @return real locale name from which the collation data comes.
+ * If the collator was instantiated from rules, returns
+ * NULL.
+ * @draft ICU 2.8 likely to change in ICU 3.0, based on feedback
+ */
+U_DRAFT const char * U_EXPORT2
+ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status);
+
+/**
+ * Get an Unicode set that contains all the characters and sequences tailored in
+ * this collator. The result must be disposed of by using uset_close.
+ * @param coll The UCollator for which we want to get tailored chars
+ * @param status error code of the operation
+ * @return a pointer to newly created USet. Must be be disposed by using uset_close
+ * @see ucol_openRules
+ * @see uset_close
+ * @stable ICU 2.4
+ */
+U_STABLE USet * U_EXPORT2
+ucol_getTailoredSet(const UCollator *coll, UErrorCode *status);
+
+/**
+ * Returned by ucol_collatorToIdentifier to signify that collator is
+ * not encodable as an identifier.
+ * @internal ICU 3.0
+ */
+#define UCOL_SIT_COLLATOR_NOT_ENCODABLE 0x80000000
+
+/**
+ * Get a 31-bit identifier given a collator.
+ * @param coll UCollator
+ * @param locale a locale that will appear as a collators locale in the resulting
+ * short string definition. If NULL, the locale will be harvested
+ * from the collator.
+ * @param status holds error messages
+ * @return 31-bit identifier. MSB is used if the collator cannot be encoded. In that
+ * case UCOL_SIT_COLLATOR_NOT_ENCODABLE is returned
+ * @see ucol_openFromIdentifier
+ * @see ucol_identifierToShortString
+ * @internal ICU 3.0
+ */
+U_INTERNAL uint32_t U_EXPORT2
+ucol_collatorToIdentifier(const UCollator *coll,
+ const char *locale,
+ UErrorCode *status);
+
+/**
+ * Open a collator given a 31-bit identifier
+ * @param identifier 31-bit identifier, encoded by calling ucol_collatorToIdentifier
+ * @param forceDefaults if FALSE, the settings that are the same as the collator
+ * default settings will not be applied (for example, setting
+ * French secondary on a French collator would not be executed).
+ * If TRUE, all the settings will be applied regardless of the
+ * collator default value. If the definition
+ * strings that can be produced from a collator instantiated by
+ * calling this API are to be cached, should be set to FALSE.
+ * @param status for returning errors
+ * @return UCollator object
+ * @see ucol_collatorToIdentifier
+ * @see ucol_identifierToShortString
+ * @internal ICU 3.0
+ */
+U_INTERNAL UCollator* U_EXPORT2
+ucol_openFromIdentifier(uint32_t identifier,
+ UBool forceDefaults,
+ UErrorCode *status);
+
+
+/**
+ * Calculate the short definition string given an identifier. Supports preflighting.
+ * @param identifier 31-bit identifier, encoded by calling ucol_collatorToIdentifier
+ * @param buffer buffer to store the result
+ * @param capacity buffer capacity
+ * @param forceDefaults whether the settings that are the same as the default setting
+ * should be forced anyway. Setting this argument to FALSE reduces
+ * the number of different configurations, but decreases performace
+ * as a collator has to be instantiated.
+ * @param status for returning errors
+ * @return length of the short definition string
+ * @see ucol_collatorToIdentifier
+ * @see ucol_openFromIdentifier
+ * @see ucol_shortStringToIdentifier
+ * @internal ICU 3.0
+ */
+U_INTERNAL int32_t U_EXPORT2
+ucol_identifierToShortString(uint32_t identifier,
+ char *buffer,
+ int32_t capacity,
+ UBool forceDefaults,
+ UErrorCode *status);
+
+/**
+ * Calculate the identifier given a short definition string. Supports preflighting.
+ * @param definition short string definition
+ * @param forceDefaults whether the settings that are the same as the default setting
+ * should be forced anyway. Setting this argument to FALSE reduces
+ * the number of different configurations, but decreases performace
+ * as a collator has to be instantiated.
+ * @param status for returning errors
+ * @return identifier
+ * @see ucol_collatorToIdentifier
+ * @see ucol_openFromIdentifier
+ * @see ucol_identifierToShortString
+ * @internal ICU 3.0
+ */
+U_INTERNAL uint32_t U_EXPORT2
+ucol_shortStringToIdentifier(const char *definition,
+ UBool forceDefaults,
+ UErrorCode *status);
+
+
+
+/**
+ * Universal attribute getter that returns UCOL_DEFAULT if the value is default
+ * @param coll collator which attributes are to be changed
+ * @param attr attribute type
+ * @return attribute value or UCOL_DEFAULT if the value is default
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ * @see UColAttribute
+ * @see UColAttributeValue
+ * @see ucol_setAttribute
+ * @internal ICU 3.0
+ */
+U_INTERNAL UColAttributeValue U_EXPORT2
+ucol_getAttributeOrDefault(const UCollator *coll, UColAttribute attr, UErrorCode *status);
+
+/** Check whether two collators are equal. Collators are considered equal if they
+ * will sort strings the same. This means that both the current attributes and the
+ * rules must be equivalent. Currently used for RuleBasedCollator::operator==.
+ * @param source first collator
+ * @param target second collator
+ * @return TRUE or FALSE
+ * @internal ICU 3.0
+ */
+U_INTERNAL UBool U_EXPORT2
+ucol_equals(const UCollator *source, const UCollator *target);
+
+/** Calculates the set of unsafe code points, given a collator.
+ * A character is unsafe if you could append any character and cause the ordering to alter significantly.
+ * Collation sorts in normalized order, so anything that rearranges in normalization can cause this.
+ * Thus if you have a character like a_umlaut, and you add a lower_dot to it,
+ * then it normalizes to a_lower_dot + umlaut, and sorts differently.
+ * @param coll Collator
+ * @param unsafe a fill-in set to receive the unsafe points
+ * @param status for catching errors
+ * @return number of elements in the set
+ * @internal ICU 3.0
+ */
+U_INTERNAL int32_t U_EXPORT2
+ucol_getUnsafeSet( const UCollator *coll,
+ USet *unsafe,
+ UErrorCode *status);
+
+/** Reset UCA's static pointers. You don't want to use this, unless your static memory can go away.
+ * @internal ICU 3.2.1
+ */
+U_INTERNAL void U_EXPORT2
+ucol_forgetUCA(void);
+
+/** Touches all resources needed for instantiating a collator from a short string definition,
+ * thus filling up the cache.
+ * @param definition A short string containing a locale and a set of attributes.
+ * Attributes not explicitly mentioned are left at the default
+ * state for a locale.
+ * @param parseError if not NULL, structure that will get filled with error's pre
+ * and post context in case of error.
+ * @param forceDefaults if FALSE, the settings that are the same as the collator
+ * default settings will not be applied (for example, setting
+ * French secondary on a French collator would not be executed).
+ * If TRUE, all the settings will be applied regardless of the
+ * collator default value. If the definition
+ * strings are to be cached, should be set to FALSE.
+ * @param status Error code. Apart from regular error conditions connected to
+ * instantiating collators (like out of memory or similar), this
+ * API will return an error if an invalid attribute or attribute/value
+ * combination is specified.
+ * @see ucol_openFromShortString
+ * @internal ICU 3.2.1
+ */
+U_INTERNAL void U_EXPORT2
+ucol_prepareShortStringOpen( const char *definition,
+ UBool forceDefaults,
+ UParseError *parseError,
+ UErrorCode *status);
+
+/** Creates a binary image of a collator. This binary image can be stored and
+ * later used to instantiate a collator using ucol_openBinary.
+ * This API supports preflighting.
+ * @param coll Collator
+ * @param buffer a fill-in buffer to receive the binary image
+ * @param capacity capacity of the destination buffer
+ * @param status for catching errors
+ * @return size of the image
+ * @see ucol_openBinary
+ * @draft ICU 3.2
+ */
+U_DRAFT int32_t U_EXPORT2
+ucol_cloneBinary(const UCollator *coll,
+ uint8_t *buffer, int32_t capacity,
+ UErrorCode *status);
+
+/** Opens a collator from a collator binary image created using
+ * ucol_cloneBinary. Binary image used in instantiation of the
+ * collator remains owned by the user and should stay around for
+ * the lifetime of the collator. The API also takes a base collator
+ * which usualy should be UCA.
+ * @param bin binary image owned by the user and required through the
+ * lifetime of the collator
+ * @param length size of the image. If negative, the API will try to
+ * figure out the length of the image
+ * @param base fallback collator, usually UCA. Base is required to be
+ * present through the lifetime of the collator. Currently
+ * it cannot be NULL.
+ * @param status for catching errors
+ * @return newly created collator
+ * @see ucol_cloneBinary
+ * @draft ICU 3.2
+ */
+U_DRAFT UCollator* U_EXPORT2
+ucol_openBinary(const uint8_t *bin, int32_t length,
+ const UCollator *base,
+ UErrorCode *status);
+
+
+#endif /* #if !UCONFIG_NO_COLLATION */
+
+#endif
+
diff --git a/Source/WebCore/icu/unicode/ucoleitr.h b/Source/WebCore/icu/unicode/ucoleitr.h
new file mode 100644
index 0000000..7184e96
--- /dev/null
+++ b/Source/WebCore/icu/unicode/ucoleitr.h
@@ -0,0 +1,268 @@
+/*
+*******************************************************************************
+* Copyright (C) 2001-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*
+* File ucoleitr.cpp
+*
+* Modification History:
+*
+* Date Name Description
+* 02/15/2001 synwee Modified all methods to process its own function
+* instead of calling the equivalent c++ api (coleitr.h)
+*******************************************************************************/
+
+#ifndef UCOLEITR_H
+#define UCOLEITR_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+/**
+ * This indicates an error has occured during processing or if no more CEs is
+ * to be returned.
+ * @stable ICU 2.0
+ */
+#define UCOL_NULLORDER ((int32_t)0xFFFFFFFF)
+
+#include "unicode/ucol.h"
+
+/**
+ * The UCollationElements struct.
+ * For usage in C programs.
+ * @stable ICU 2.0
+ */
+typedef struct UCollationElements UCollationElements;
+
+/**
+ * \file
+ * \brief C API: UCollationElements
+ *
+ * The UCollationElements API is used as an iterator to walk through each
+ * character of an international string. Use the iterator to return the
+ * ordering priority of the positioned character. The ordering priority of a
+ * character, which we refer to as a key, defines how a character is collated
+ * in the given collation object.
+ * For example, consider the following in Spanish:
+ * <pre>
+ * . "ca" -> the first key is key('c') and second key is key('a').
+ * . "cha" -> the first key is key('ch') and second key is key('a').
+ * </pre>
+ * And in German,
+ * <pre>
+ * . "<ae ligature>b"-> the first key is key('a'), the second key is key('e'), and
+ * . the third key is key('b').
+ * </pre>
+ * <p>Example of the iterator usage: (without error checking)
+ * <pre>
+ * . void CollationElementIterator_Example()
+ * . {
+ * . UChar *s;
+ * . t_int32 order, primaryOrder;
+ * . UCollationElements *c;
+ * . UCollatorOld *coll;
+ * . UErrorCode success = U_ZERO_ERROR;
+ * . s=(UChar*)malloc(sizeof(UChar) * (strlen("This is a test")+1) );
+ * . u_uastrcpy(s, "This is a test");
+ * . coll = ucol_open(NULL, &success);
+ * . c = ucol_openElements(coll, str, u_strlen(str), &status);
+ * . order = ucol_next(c, &success);
+ * . ucol_reset(c);
+ * . order = ucol_prev(c, &success);
+ * . free(s);
+ * . ucol_close(coll);
+ * . ucol_closeElements(c);
+ * . }
+ * </pre>
+ * <p>
+ * ucol_next() returns the collation order of the next.
+ * ucol_prev() returns the collation order of the previous character.
+ * The Collation Element Iterator moves only in one direction between calls to
+ * ucol_reset. That is, ucol_next() and ucol_prev can not be inter-used.
+ * Whenever ucol_prev is to be called after ucol_next() or vice versa,
+ * ucol_reset has to be called first to reset the status, shifting pointers to
+ * either the end or the start of the string. Hence at the next call of
+ * ucol_prev or ucol_next, the first or last collation order will be returned.
+ * If a change of direction is done without a ucol_reset, the result is
+ * undefined.
+ * The result of a forward iterate (ucol_next) and reversed result of the
+ * backward iterate (ucol_prev) on the same string are equivalent, if
+ * collation orders with the value UCOL_IGNORABLE are ignored.
+ * Character based on the comparison level of the collator. A collation order
+ * consists of primary order, secondary order and tertiary order. The data
+ * type of the collation order is <strong>t_int32</strong>.
+ *
+ * @see UCollator
+ */
+
+/**
+ * Open the collation elements for a string.
+ *
+ * @param coll The collator containing the desired collation rules.
+ * @param text The text to iterate over.
+ * @param textLength The number of characters in text, or -1 if null-terminated
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return a struct containing collation element information
+ * @stable ICU 2.0
+ */
+U_STABLE UCollationElements* U_EXPORT2
+ucol_openElements(const UCollator *coll,
+ const UChar *text,
+ int32_t textLength,
+ UErrorCode *status);
+
+/**
+ * get a hash code for a key... Not very useful!
+ * @param key the given key.
+ * @param length the size of the key array.
+ * @return the hash code.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucol_keyHashCode(const uint8_t* key, int32_t length);
+
+/**
+ * Close a UCollationElements.
+ * Once closed, a UCollationElements may no longer be used.
+ * @param elems The UCollationElements to close.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucol_closeElements(UCollationElements *elems);
+
+/**
+ * Reset the collation elements to their initial state.
+ * This will move the 'cursor' to the beginning of the text.
+ * Property settings for collation will be reset to the current status.
+ * @param elems The UCollationElements to reset.
+ * @see ucol_next
+ * @see ucol_previous
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucol_reset(UCollationElements *elems);
+
+/**
+ * Get the ordering priority of the next collation element in the text.
+ * A single character may contain more than one collation element.
+ * @param elems The UCollationElements containing the text.
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return The next collation elements ordering, otherwise returns NULLORDER
+ * if an error has occured or if the end of string has been reached
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucol_next(UCollationElements *elems, UErrorCode *status);
+
+/**
+ * Get the ordering priority of the previous collation element in the text.
+ * A single character may contain more than one collation element.
+ * Note that internally a stack is used to store buffered collation elements.
+ * It is very rare that the stack will overflow, however if such a case is
+ * encountered, the problem can be solved by increasing the size
+ * UCOL_EXPAND_CE_BUFFER_SIZE in ucol_imp.h.
+ * @param elems The UCollationElements containing the text.
+ * @param status A pointer to an UErrorCode to receive any errors. Noteably
+ * a U_BUFFER_OVERFLOW_ERROR is returned if the internal stack
+ * buffer has been exhausted.
+ * @return The previous collation elements ordering, otherwise returns
+ * NULLORDER if an error has occured or if the start of string has
+ * been reached.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucol_previous(UCollationElements *elems, UErrorCode *status);
+
+/**
+ * Get the maximum length of any expansion sequences that end with the
+ * specified comparison order.
+ * This is useful for .... ?
+ * @param elems The UCollationElements containing the text.
+ * @param order A collation order returned by previous or next.
+ * @return maximum size of the expansion sequences ending with the collation
+ * element or 1 if collation element does not occur at the end of any
+ * expansion sequence
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucol_getMaxExpansion(const UCollationElements *elems, int32_t order);
+
+/**
+ * Set the text containing the collation elements.
+ * Property settings for collation will remain the same.
+ * In order to reset the iterator to the current collation property settings,
+ * the API reset() has to be called.
+ * @param elems The UCollationElements to set.
+ * @param text The source text containing the collation elements.
+ * @param textLength The length of text, or -1 if null-terminated.
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @see ucol_getText
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucol_setText( UCollationElements *elems,
+ const UChar *text,
+ int32_t textLength,
+ UErrorCode *status);
+
+/**
+ * Get the offset of the current source character.
+ * This is an offset into the text of the character containing the current
+ * collation elements.
+ * @param elems The UCollationElements to query.
+ * @return The offset of the current source character.
+ * @see ucol_setOffset
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucol_getOffset(const UCollationElements *elems);
+
+/**
+ * Set the offset of the current source character.
+ * This is an offset into the text of the character to be processed.
+ * Property settings for collation will remain the same.
+ * In order to reset the iterator to the current collation property settings,
+ * the API reset() has to be called.
+ * @param elems The UCollationElements to set.
+ * @param offset The desired character offset.
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @see ucol_getOffset
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucol_setOffset(UCollationElements *elems,
+ int32_t offset,
+ UErrorCode *status);
+
+/**
+* Get the primary order of a collation order.
+* @param order the collation order
+* @return the primary order of a collation order.
+* @stable ICU 2.6
+*/
+U_STABLE int32_t U_EXPORT2
+ucol_primaryOrder (int32_t order);
+
+/**
+* Get the secondary order of a collation order.
+* @param order the collation order
+* @return the secondary order of a collation order.
+* @stable ICU 2.6
+*/
+U_STABLE int32_t U_EXPORT2
+ucol_secondaryOrder (int32_t order);
+
+/**
+* Get the tertiary order of a collation order.
+* @param order the collation order
+* @return the tertiary order of a collation order.
+* @stable ICU 2.6
+*/
+U_STABLE int32_t U_EXPORT2
+ucol_tertiaryOrder (int32_t order);
+
+#endif /* #if !UCONFIG_NO_COLLATION */
+
+#endif
diff --git a/Source/WebCore/icu/unicode/uconfig.h b/Source/WebCore/icu/unicode/uconfig.h
new file mode 100644
index 0000000..997cf68
--- /dev/null
+++ b/Source/WebCore/icu/unicode/uconfig.h
@@ -0,0 +1,186 @@
+/*
+**********************************************************************
+* Copyright (C) 2002-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* file name: uconfig.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002sep19
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UCONFIG_H__
+#define __UCONFIG_H__
+
+/*!
+ * \file
+ * \brief Switches for excluding parts of ICU library code modules.
+ *
+ * Allows to build partial, smaller libraries for special purposes.
+ * By default, all modules are built.
+ * The switches are fairly coarse, controlling large modules.
+ * Basic services cannot be turned off.
+ *
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def UCONFIG_ONLY_COLLATION
+ * This switch turns off modules that are not needed for collation.
+ *
+ * It does not turn off legacy conversion because that is necessary
+ * for ICU to work on EBCDIC platforms (for the default converter).
+ * If you want "only collation" and do not build for EBCDIC,
+ * then you can #define UCONFIG_NO_LEGACY_CONVERSION 1 as well.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_ONLY_COLLATION
+# define UCONFIG_ONLY_COLLATION 0
+#endif
+
+#if UCONFIG_ONLY_COLLATION
+ /* common library */
+# define UCONFIG_NO_BREAK_ITERATION 1
+# define UCONFIG_NO_IDNA 1
+
+ /* i18n library */
+# if UCONFIG_NO_COLLATION
+# error Contradictory collation switches in uconfig.h.
+# endif
+# define UCONFIG_NO_FORMATTING 1
+# define UCONFIG_NO_TRANSLITERATION 1
+# define UCONFIG_NO_REGULAR_EXPRESSIONS 1
+#endif
+
+/* common library switches -------------------------------------------------- */
+
+/**
+ * \def UCONFIG_NO_CONVERSION
+ * ICU will not completely build with this switch turned on.
+ * This switch turns off all converters.
+ *
+ * @draft ICU 3.2
+ */
+#ifndef UCONFIG_NO_CONVERSION
+# define UCONFIG_NO_CONVERSION 0
+#endif
+
+#if UCONFIG_NO_CONVERSION
+# define UCONFIG_NO_LEGACY_CONVERSION 1
+#endif
+
+/**
+ * \def UCONFIG_NO_LEGACY_CONVERSION
+ * This switch turns off all converters except for
+ * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1)
+ * - US-ASCII
+ * - ISO-8859-1
+ *
+ * Turning off legacy conversion is not possible on EBCDIC platforms
+ * because they need ibm-37 or ibm-1047 default converters.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_LEGACY_CONVERSION
+# define UCONFIG_NO_LEGACY_CONVERSION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_NORMALIZATION
+ * This switch turns off normalization.
+ * It implies turning off several other services as well, for example
+ * collation and IDNA.
+ *
+ * @stable ICU 2.6
+ */
+#ifndef UCONFIG_NO_NORMALIZATION
+# define UCONFIG_NO_NORMALIZATION 0
+#elif UCONFIG_NO_NORMALIZATION
+ /* common library */
+# define UCONFIG_NO_IDNA 1
+
+ /* i18n library */
+# if UCONFIG_ONLY_COLLATION
+# error Contradictory collation switches in uconfig.h.
+# endif
+# define UCONFIG_NO_COLLATION 1
+# define UCONFIG_NO_TRANSLITERATION 1
+#endif
+
+/**
+ * \def UCONFIG_NO_BREAK_ITERATION
+ * This switch turns off break iteration.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_BREAK_ITERATION
+# define UCONFIG_NO_BREAK_ITERATION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_IDNA
+ * This switch turns off IDNA.
+ *
+ * @stable ICU 2.6
+ */
+#ifndef UCONFIG_NO_IDNA
+# define UCONFIG_NO_IDNA 0
+#endif
+
+/* i18n library switches ---------------------------------------------------- */
+
+/**
+ * \def UCONFIG_NO_COLLATION
+ * This switch turns off collation and collation-based string search.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_COLLATION
+# define UCONFIG_NO_COLLATION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_FORMATTING
+ * This switch turns off formatting and calendar/timezone services.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_FORMATTING
+# define UCONFIG_NO_FORMATTING 0
+#endif
+
+/**
+ * \def UCONFIG_NO_TRANSLITERATION
+ * This switch turns off transliteration.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_TRANSLITERATION
+# define UCONFIG_NO_TRANSLITERATION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_REGULAR_EXPRESSIONS
+ * This switch turns off regular expressions.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_REGULAR_EXPRESSIONS
+# define UCONFIG_NO_REGULAR_EXPRESSIONS 0
+#endif
+
+/**
+ * \def UCONFIG_NO_SERVICE
+ * This switch turns off service registration.
+ *
+ * @draft ICU 3.2
+ */
+#ifndef UCONFIG_NO_SERVICE
+# define UCONFIG_NO_SERVICE 0
+#endif
+
+#endif
diff --git a/Source/WebCore/icu/unicode/ucsdet.h b/Source/WebCore/icu/unicode/ucsdet.h
new file mode 100644
index 0000000..27e2e34
--- /dev/null
+++ b/Source/WebCore/icu/unicode/ucsdet.h
@@ -0,0 +1,350 @@
+/*
+ **********************************************************************
+ * Copyright (C) 2005-2006, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ * file name: ucsdet.h
+ * encoding: US-ASCII
+ * indentation:4
+ *
+ * created on: 2005Aug04
+ * created by: Andy Heninger
+ *
+ * ICU Character Set Detection, API for C
+ *
+ * Draft version 18 Oct 2005
+ *
+ */
+
+#ifndef __UCSDET_H
+#define __UCSDET_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+#include "unicode/uenum.h"
+
+/**
+ * \file
+ * \brief C API: Charset Detection API
+ *
+ * This API provides a facility for detecting the
+ * charset or encoding of character data in an unknown text format.
+ * The input data can be from an array of bytes.
+ * <p>
+ * Character set detection is at best an imprecise operation. The detection
+ * process will attempt to identify the charset that best matches the characteristics
+ * of the byte data, but the process is partly statistical in nature, and
+ * the results can not be guaranteed to always be correct.
+ * <p>
+ * For best accuracy in charset detection, the input data should be primarily
+ * in a single language, and a minimum of a few hundred bytes worth of plain text
+ * in the language are needed. The detection process will attempt to
+ * ignore html or xml style markup that could otherwise obscure the content.
+ */
+
+
+struct UCharsetDetector;
+/**
+ * Structure representing a charset detector
+ * @draft ICU 3.6
+ */
+typedef struct UCharsetDetector UCharsetDetector;
+
+struct UCharsetMatch;
+/**
+ * Opaque structure representing a match that was identified
+ * from a charset detection operation.
+ * @draft ICU 3.6
+ */
+typedef struct UCharsetMatch UCharsetMatch;
+
+/**
+ * Open a charset detector.
+ *
+ * @param status Any error conditions occurring during the open
+ * operation are reported back in this variable.
+ * @return the newly opened charset detector.
+ * @draft ICU 3.6
+ */
+U_DRAFT UCharsetDetector * U_EXPORT2
+ucsdet_open(UErrorCode *status);
+
+/**
+ * Close a charset detector. All storage and any other resources
+ * owned by this charset detector will be released. Failure to
+ * close a charset detector when finished with it can result in
+ * memory leaks in the application.
+ *
+ * @param ucsd The charset detector to be closed.
+ * @draft ICU 3.6
+ */
+U_DRAFT void U_EXPORT2
+ucsdet_close(UCharsetDetector *ucsd);
+
+/**
+ * Set the input byte data whose charset is to detected.
+ *
+ * Ownership of the input text byte array remains with the caller.
+ * The input string must not be altered or deleted until the charset
+ * detector is either closed or reset to refer to different input text.
+ *
+ * @param ucsd the charset detector to be used.
+ * @param textIn the input text of unknown encoding. .
+ * @param len the length of the input text, or -1 if the text
+ * is NUL terminated.
+ * @param status any error conditions are reported back in this variable.
+ *
+ * @draft ICU 3.6
+ */
+U_DRAFT void U_EXPORT2
+ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status);
+
+
+/** Set the declared encoding for charset detection.
+ * The declared encoding of an input text is an encoding obtained
+ * by the user from an http header or xml declaration or similar source that
+ * can be provided as an additional hint to the charset detector.
+ *
+ * How and whether the declared encoding will be used during the
+ * detection process is TBD.
+ *
+ * @param ucsd the charset detector to be used.
+ * @param encoding an encoding for the current data obtained from
+ * a header or declaration or other source outside
+ * of the byte data itself.
+ * @param length the length of the encoding name, or -1 if the name string
+ * is NUL terminated.
+ * @param status any error conditions are reported back in this variable.
+ *
+ * @draft ICU 3.6
+ */
+U_DRAFT void U_EXPORT2
+ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status);
+
+
+/**
+ * Return the charset that best matches the supplied input data.
+ *
+ * Note though, that because the detection
+ * only looks at the start of the input data,
+ * there is a possibility that the returned charset will fail to handle
+ * the full set of input data.
+ * <p>
+ * The returned UCharsetMatch object is owned by the UCharsetDetector.
+ * It will remain valid until the detector input is reset, or until
+ * the detector is closed.
+ * <p>
+ * The function will fail if
+ * <ul>
+ * <li>no charset appears to match the data.</li>
+ * <li>no input text has been provided</li>
+ * </ul>
+ *
+ * @param ucsd the charset detector to be used.
+ * @param status any error conditions are reported back in this variable.
+ * @return a UCharsetMatch representing the best matching charset,
+ * or NULL if no charset matches the byte data.
+ *
+ * @draft ICU 3.6
+ */
+U_DRAFT const UCharsetMatch * U_EXPORT2
+ucsdet_detect(UCharsetDetector *ucsd, UErrorCode *status);
+
+
+/**
+ * Find all charset matches that appear to be consistent with the input,
+ * returning an array of results. The results are ordered with the
+ * best quality match first.
+ *
+ * Because the detection only looks at a limited amount of the
+ * input byte data, some of the returned charsets may fail to handle
+ * the all of input data.
+ * <p>
+ * The returned UCharsetMatch objects are owned by the UCharsetDetector.
+ * They will remain valid until the detector is closed or modified
+ *
+ * <p>
+ * Return an error if
+ * <ul>
+ * <li>no charsets appear to match the input data.</li>
+ * <li>no input text has been provided</li>
+ * </ul>
+ *
+ * @param ucsd the charset detector to be used.
+ * @param matchesFound pointer to a variable that will be set to the
+ * number of charsets identified that are consistent with
+ * the input data. Output only.
+ * @param status any error conditions are reported back in this variable.
+ * @return A pointer to an array of pointers to UCharSetMatch objects.
+ * This array, and the UCharSetMatch instances to which it refers,
+ * are owned by the UCharsetDetector, and will remain valid until
+ * the detector is closed or modified.
+ * @draft ICU 3.4
+ */
+U_DRAFT const UCharsetMatch ** U_EXPORT2
+ucsdet_detectAll(UCharsetDetector *ucsd, int32_t *matchesFound, UErrorCode *status);
+
+
+
+/**
+ * Get the name of the charset represented by a UCharsetMatch.
+ *
+ * The storage for the returned name string is owned by the
+ * UCharsetMatch, and will remain valid while the UCharsetMatch
+ * is valid.
+ *
+ * The name returned is suitable for use with the ICU conversion APIs.
+ *
+ * @param ucsm The charset match object.
+ * @param status Any error conditions are reported back in this variable.
+ * @return The name of the matching charset.
+ *
+ * @draft ICU 3.6
+ */
+U_DRAFT const char * U_EXPORT2
+ucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status);
+
+/**
+ * Get a confidence number for the quality of the match of the byte
+ * data with the charset. Confidence numbers range from zero to 100,
+ * with 100 representing complete confidence and zero representing
+ * no confidence.
+ *
+ * The confidence values are somewhat arbitrary. They define an
+ * an ordering within the results for any single detection operation
+ * but are not generally comparable between the results for different input.
+ *
+ * A confidence value of ten does have a general meaning - it is used
+ * for charsets that can represent the input data, but for which there
+ * is no other indication that suggests that the charset is the correct one.
+ * Pure 7 bit ASCII data, for example, is compatible with a
+ * great many charsets, most of which will appear as possible matches
+ * with a confidence of 10.
+ *
+ * @param ucsm The charset match object.
+ * @param status Any error conditions are reported back in this variable.
+ * @return A confidence number for the charset match.
+ *
+ * @draft ICU 3.6
+ */
+U_DRAFT int32_t U_EXPORT2
+ucsdet_getConfidence(const UCharsetMatch *ucsm, UErrorCode *status);
+
+/**
+ * Get the RFC 3066 code for the language of the input data.
+ *
+ * The Charset Detection service is intended primarily for detecting
+ * charsets, not language. For some, but not all, charsets, a language is
+ * identified as a byproduct of the detection process, and that is what
+ * is returned by this function.
+ *
+ * CAUTION:
+ * 1. Language information is not available for input data encoded in
+ * all charsets. In particular, no language is identified
+ * for UTF-8 input data.
+ *
+ * 2. Closely related languages may sometimes be confused.
+ *
+ * If more accurate language detection is required, a linguistic
+ * analysis package should be used.
+ *
+ * The storage for the returned name string is owned by the
+ * UCharsetMatch, and will remain valid while the UCharsetMatch
+ * is valid.
+ *
+ * @param ucsm The charset match object.
+ * @param status Any error conditions are reported back in this variable.
+ * @return The RFC 3066 code for the language of the input data, or
+ * an empty string if the language could not be determined.
+ *
+ * @draft ICU 3.6
+ */
+U_DRAFT const char * U_EXPORT2
+ucsdet_getLanguage(const UCharsetMatch *ucsm, UErrorCode *status);
+
+
+/**
+ * Get the entire input text as a UChar string, placing it into
+ * a caller-supplied buffer. A terminating
+ * NUL character will be appended to the buffer if space is available.
+ *
+ * The number of UChars in the output string, not including the terminating
+ * NUL, is returned.
+ *
+ * If the supplied buffer is smaller than required to hold the output,
+ * the contents of the buffer are undefined. The full output string length
+ * (in UChars) is returned as always, and can be used to allocate a buffer
+ * of the correct size.
+ *
+ *
+ * @param ucsm The charset match object.
+ * @param buf A UChar buffer to be filled with the converted text data.
+ * @param cap The capacity of the buffer in UChars.
+ * @param status Any error conditions are reported back in this variable.
+ * @return The number of UChars in the output string.
+ *
+ * @draft ICU 3.6
+ */
+U_DRAFT int32_t U_EXPORT2
+ucsdet_getUChars(const UCharsetMatch *ucsm,
+ UChar *buf, int32_t cap, UErrorCode *status);
+
+
+
+/**
+ * Get an iterator over the set of all detectable charsets -
+ * over the charsets that are known to the charset detection
+ * service.
+ *
+ * The returned UEnumeration provides access to the names of
+ * the charsets.
+ *
+ * The state of the Charset detector that is passed in does not
+ * affect the result of this function, but requiring a valid, open
+ * charset detector as a parameter insures that the charset detection
+ * service has been safely initialized and that the required detection
+ * data is available.
+ *
+ * @param ucsd a Charset detector.
+ * @param status Any error conditions are reported back in this variable.
+ * @return an iterator providing access to the detectable charset names.
+ * @draft ICU 3.6
+ */
+
+U_DRAFT UEnumeration * U_EXPORT2
+ucsdet_getAllDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status);
+
+
+/**
+ * Test whether input filtering is enabled for this charset detector.
+ * Input filtering removes text that appears to be HTML or xml
+ * markup from the input before applying the code page detection
+ * heuristics.
+ *
+ * @param ucsd The charset detector to check.
+ * @return TRUE if filtering is enabled.
+ * @draft ICU 3.4
+ */
+U_DRAFT UBool U_EXPORT2
+ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd);
+
+
+/**
+ * Enable filtering of input text. If filtering is enabled,
+ * text within angle brackets ("<" and ">") will be removed
+ * before detection, which will remove most HTML or xml markup.
+ *
+ * @param ucsd the charset detector to be modified.
+ * @param filter <code>true</code> to enable input text filtering.
+ * @return The previous setting.
+ *
+ * @draft ICU 3.6
+ */
+U_DRAFT UBool U_EXPORT2
+ucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter);
+
+#endif
+#endif /* __UCSDET_H */
+
+
diff --git a/Source/WebCore/icu/unicode/uenum.h b/Source/WebCore/icu/unicode/uenum.h
new file mode 100644
index 0000000..63690f8
--- /dev/null
+++ b/Source/WebCore/icu/unicode/uenum.h
@@ -0,0 +1,129 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uenum.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:2
+*
+* created on: 2002jul08
+* created by: Vladimir Weinstein
+*/
+
+#ifndef __UENUM_H
+#define __UENUM_H
+
+#include "unicode/utypes.h"
+
+/**
+ * An enumeration object.
+ * For usage in C programs.
+ * @stable ICU 2.2
+ */
+struct UEnumeration;
+/** structure representing an enumeration object instance @stable ICU 2.2 */
+typedef struct UEnumeration UEnumeration;
+
+/**
+ * Disposes of resources in use by the iterator. If en is NULL,
+ * does nothing. After this call, any char* or UChar* pointer
+ * returned by uenum_unext() or uenum_next() is invalid.
+ * @param en UEnumeration structure pointer
+ * @stable ICU 2.2
+ */
+U_STABLE void U_EXPORT2
+uenum_close(UEnumeration* en);
+
+/**
+ * Returns the number of elements that the iterator traverses. If
+ * the iterator is out-of-sync with its service, status is set to
+ * U_ENUM_OUT_OF_SYNC_ERROR.
+ * This is a convenience function. It can end up being very
+ * expensive as all the items might have to be pre-fetched (depending
+ * on the type of data being traversed). Use with caution and only
+ * when necessary.
+ * @param en UEnumeration structure pointer
+ * @param status error code, can be U_ENUM_OUT_OF_SYNC_ERROR if the
+ * iterator is out of sync.
+ * @return number of elements in the iterator
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+uenum_count(UEnumeration* en, UErrorCode* status);
+
+/**
+ * Returns the next element in the iterator's list. If there are
+ * no more elements, returns NULL. If the iterator is out-of-sync
+ * with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
+ * NULL is returned. If the native service string is a char* string,
+ * it is converted to UChar* with the invariant converter.
+ * The result is terminated by (UChar)0.
+ * @param en the iterator object
+ * @param resultLength pointer to receive the length of the result
+ * (not including the terminating \\0).
+ * If the pointer is NULL it is ignored.
+ * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
+ * the iterator is out of sync with its service.
+ * @return a pointer to the string. The string will be
+ * zero-terminated. The return pointer is owned by this iterator
+ * and must not be deleted by the caller. The pointer is valid
+ * until the next call to any uenum_... method, including
+ * uenum_next() or uenum_unext(). When all strings have been
+ * traversed, returns NULL.
+ * @stable ICU 2.2
+ */
+U_STABLE const UChar* U_EXPORT2
+uenum_unext(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* status);
+
+/**
+ * Returns the next element in the iterator's list. If there are
+ * no more elements, returns NULL. If the iterator is out-of-sync
+ * with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
+ * NULL is returned. If the native service string is a UChar*
+ * string, it is converted to char* with the invariant converter.
+ * The result is terminated by (char)0. If the conversion fails
+ * (because a character cannot be converted) then status is set to
+ * U_INVARIANT_CONVERSION_ERROR and the return value is undefined
+ * (but non-NULL).
+ * @param en the iterator object
+ * @param resultLength pointer to receive the length of the result
+ * (not including the terminating \\0).
+ * If the pointer is NULL it is ignored.
+ * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
+ * the iterator is out of sync with its service. Set to
+ * U_INVARIANT_CONVERSION_ERROR if the underlying native string is
+ * UChar* and conversion to char* with the invariant converter
+ * fails. This error pertains only to current string, so iteration
+ * might be able to continue successfully.
+ * @return a pointer to the string. The string will be
+ * zero-terminated. The return pointer is owned by this iterator
+ * and must not be deleted by the caller. The pointer is valid
+ * until the next call to any uenum_... method, including
+ * uenum_next() or uenum_unext(). When all strings have been
+ * traversed, returns NULL.
+ * @stable ICU 2.2
+ */
+U_STABLE const char* U_EXPORT2
+uenum_next(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* status);
+
+/**
+ * Resets the iterator to the current list of service IDs. This
+ * re-establishes sync with the service and rewinds the iterator
+ * to start at the first element.
+ * @param en the iterator object
+ * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
+ * the iterator is out of sync with its service.
+ * @stable ICU 2.2
+ */
+U_STABLE void U_EXPORT2
+uenum_reset(UEnumeration* en, UErrorCode* status);
+
+#endif
diff --git a/Source/WebCore/icu/unicode/uidna.h b/Source/WebCore/icu/unicode/uidna.h
new file mode 100644
index 0000000..7b1dd0b
--- /dev/null
+++ b/Source/WebCore/icu/unicode/uidna.h
@@ -0,0 +1,310 @@
+/*
+ *******************************************************************************
+ *
+ * Copyright (C) 2003-2004, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ *
+ *******************************************************************************
+ * file name: uidna.h
+ * encoding: US-ASCII
+ * tab size: 8 (not used)
+ * indentation:4
+ *
+ * created on: 2003feb1
+ * created by: Ram Viswanadha
+ */
+
+#ifndef __UIDNA_H__
+#define __UIDNA_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_IDNA
+
+#include "unicode/parseerr.h"
+
+/**
+ *\file
+ * UIDNA API implements the IDNA protocol as defined in the IDNA RFC
+ * (http://www.ietf.org/rfc/rfc3490.txt).
+ * The RFC defines 2 operations: ToASCII and ToUnicode. Domain labels
+ * containing non-ASCII code points are required to be processed by
+ * ToASCII operation before passing it to resolver libraries. Domain names
+ * that are obtained from resolver libraries are required to be processed by
+ * ToUnicode operation before displaying the domain name to the user.
+ * IDNA requires that implementations process input strings with Nameprep
+ * (http://www.ietf.org/rfc/rfc3491.txt),
+ * which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt),
+ * and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt).
+ * Implementations of IDNA MUST fully implement Nameprep and Punycode;
+ * neither Nameprep nor Punycode are optional.
+ * The input and output of ToASCII and ToUnicode operations are Unicode
+ * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
+ * multiple times to an input string will yield the same result as applying the operation
+ * once.
+ * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
+ * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
+ *
+ */
+
+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Option to prohibit processing of unassigned codepoints in the input and
+ * do not check if the input conforms to STD-3 ASCII rules.
+ *
+ * @see uidna_toASCII uidna_toUnicode
+ * @stable ICU 2.6
+ */
+#define UIDNA_DEFAULT 0x0000
+/**
+ * Option to allow processing of unassigned codepoints in the input
+ *
+ * @see uidna_toASCII uidna_toUnicode
+ * @stable ICU 2.6
+ */
+#define UIDNA_ALLOW_UNASSIGNED 0x0001
+/**
+ * Option to check if input conforms to STD-3 ASCII rules
+ *
+ * @see uidna_toASCII uidna_toUnicode
+ * @stable ICU 2.6
+ */
+#define UIDNA_USE_STD3_RULES 0x0002
+
+#endif /*U_HIDE_DRAFT_API*/
+
+/**
+ * This function implements the ToASCII operation as defined in the IDNA RFC.
+ * This operation is done on <b>single labels</b> before sending it to something that expects
+ * ASCII names. A label is an individual part of a domain name. Labels are usually
+ * separated by dots; e.g." "www.example.com" is composed of 3 labels
+ * "www","example", and "com".
+ *
+ *
+ * @param src Input UChar array containing label in Unicode.
+ * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
+ * @param dest Output UChar array with ASCII (ACE encoded) label.
+ * @param destCapacity Size of dest.
+ * @param options A bit set of options:
+ *
+ * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
+ * and do not use STD3 ASCII rules
+ * If unassigned code points are found the operation fails with
+ * U_UNASSIGNED_ERROR error code.
+ *
+ * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
+ * If this option is set, the unassigned code points are in the input
+ * are treated as normal Unicode code points.
+ *
+ * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
+ * If this option is set and the input does not satisfy STD3 rules,
+ * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *
+ * @param parseError Pointer to UParseError struct to receive information on position
+ * of error if an error is encountered. Can be NULL.
+ * @param status ICU in/out error code parameter.
+ * U_INVALID_CHAR_FOUND if src contains
+ * unmatched single surrogates.
+ * U_INDEX_OUTOFBOUNDS_ERROR if src contains
+ * too many code points.
+ * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
+ * @return Number of ASCII characters converted.
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+uidna_toASCII(const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UParseError* parseError,
+ UErrorCode* status);
+
+
+/**
+ * This function implements the ToUnicode operation as defined in the IDNA RFC.
+ * This operation is done on <b>single labels</b> before sending it to something that expects
+ * Unicode names. A label is an individual part of a domain name. Labels are usually
+ * separated by dots; for e.g." "www.example.com" is composed of 3 labels
+ * "www","example", and "com".
+ *
+ * @param src Input UChar array containing ASCII (ACE encoded) label.
+ * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
+ * @param dest Output Converted UChar array containing Unicode equivalent of label.
+ * @param destCapacity Size of dest.
+ * @param options A bit set of options:
+ *
+ * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
+ * and do not use STD3 ASCII rules
+ * If unassigned code points are found the operation fails with
+ * U_UNASSIGNED_ERROR error code.
+ *
+ * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
+ * If this option is set, the unassigned code points are in the input
+ * are treated as normal Unicode code points. <b> Note: </b> This option is
+ * required on toUnicode operation because the RFC mandates
+ * verification of decoded ACE input by applying toASCII and comparing
+ * its output with source
+ *
+ *
+ *
+ * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
+ * If this option is set and the input does not satisfy STD3 rules,
+ * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *
+ * @param parseError Pointer to UParseError struct to receive information on position
+ * of error if an error is encountered. Can be NULL.
+ * @param status ICU in/out error code parameter.
+ * U_INVALID_CHAR_FOUND if src contains
+ * unmatched single surrogates.
+ * U_INDEX_OUTOFBOUNDS_ERROR if src contains
+ * too many code points.
+ * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
+ * @return Number of Unicode characters converted.
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+uidna_toUnicode(const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UParseError* parseError,
+ UErrorCode* status);
+
+
+/**
+ * Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
+ * This operation is done on complete domain names, e.g: "www.example.com".
+ * It is important to note that this operation can fail. If it fails, then the input
+ * domain name cannot be used as an Internationalized Domain Name and the application
+ * should have methods defined to deal with the failure.
+ *
+ * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
+ * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
+ * and then convert. This function does not offer that level of granularity. The options once
+ * set will apply to all labels in the domain name
+ *
+ * @param src Input UChar array containing IDN in Unicode.
+ * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
+ * @param dest Output UChar array with ASCII (ACE encoded) IDN.
+ * @param destCapacity Size of dest.
+ * @param options A bit set of options:
+ *
+ * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
+ * and do not use STD3 ASCII rules
+ * If unassigned code points are found the operation fails with
+ * U_UNASSIGNED_CODE_POINT_FOUND error code.
+ *
+ * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
+ * If this option is set, the unassigned code points are in the input
+ * are treated as normal Unicode code points.
+ *
+ * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
+ * If this option is set and the input does not satisfy STD3 rules,
+ * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *
+ * @param parseError Pointer to UParseError struct to receive information on position
+ * of error if an error is encountered. Can be NULL.
+ * @param status ICU in/out error code parameter.
+ * U_INVALID_CHAR_FOUND if src contains
+ * unmatched single surrogates.
+ * U_INDEX_OUTOFBOUNDS_ERROR if src contains
+ * too many code points.
+ * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
+ * @return Number of ASCII characters converted.
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+uidna_IDNToASCII( const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UParseError* parseError,
+ UErrorCode* status);
+
+/**
+ * Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
+ * This operation is done on complete domain names, e.g: "www.example.com".
+ *
+ * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
+ * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
+ * and then convert. This function does not offer that level of granularity. The options once
+ * set will apply to all labels in the domain name
+ *
+ * @param src Input UChar array containing IDN in ASCII (ACE encoded) form.
+ * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
+ * @param dest Output UChar array containing Unicode equivalent of source IDN.
+ * @param destCapacity Size of dest.
+ * @param options A bit set of options:
+ *
+ * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
+ * and do not use STD3 ASCII rules
+ * If unassigned code points are found the operation fails with
+ * U_UNASSIGNED_CODE_POINT_FOUND error code.
+ *
+ * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
+ * If this option is set, the unassigned code points are in the input
+ * are treated as normal Unicode code points.
+ *
+ * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
+ * If this option is set and the input does not satisfy STD3 rules,
+ * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *
+ * @param parseError Pointer to UParseError struct to receive information on position
+ * of error if an error is encountered. Can be NULL.
+ * @param status ICU in/out error code parameter.
+ * U_INVALID_CHAR_FOUND if src contains
+ * unmatched single surrogates.
+ * U_INDEX_OUTOFBOUNDS_ERROR if src contains
+ * too many code points.
+ * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
+ * @return Number of ASCII characters converted.
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UParseError* parseError,
+ UErrorCode* status);
+
+/**
+ * Compare two IDN strings for equivalence.
+ * This function splits the domain names into labels and compares them.
+ * According to IDN RFC, whenever two labels are compared, they are
+ * considered equal if and only if their ASCII forms (obtained by
+ * applying toASCII) match using an case-insensitive ASCII comparison.
+ * Two domain names are considered a match if and only if all labels
+ * match regardless of whether label separators match.
+ *
+ * @param s1 First source string.
+ * @param length1 Length of first source string, or -1 if NUL-terminated.
+ *
+ * @param s2 Second source string.
+ * @param length2 Length of second source string, or -1 if NUL-terminated.
+ * @param options A bit set of options:
+ *
+ * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
+ * and do not use STD3 ASCII rules
+ * If unassigned code points are found the operation fails with
+ * U_UNASSIGNED_CODE_POINT_FOUND error code.
+ *
+ * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
+ * If this option is set, the unassigned code points are in the input
+ * are treated as normal Unicode code points.
+ *
+ * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
+ * If this option is set and the input does not satisfy STD3 rules,
+ * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *
+ * @param status ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return <0 or 0 or >0 as usual for string comparisons
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+uidna_compare( const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ int32_t options,
+ UErrorCode* status);
+
+#endif /* #if !UCONFIG_NO_IDNA */
+
+#endif
diff --git a/Source/WebCore/icu/unicode/uiter.h b/Source/WebCore/icu/unicode/uiter.h
new file mode 100644
index 0000000..963df5c
--- /dev/null
+++ b/Source/WebCore/icu/unicode/uiter.h
@@ -0,0 +1,707 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uiter.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002jan18
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UITER_H__
+#define __UITER_H__
+
+/**
+ * \file
+ * \brief C API: Unicode Character Iteration
+ *
+ * @see UCharIterator
+ */
+
+#include "unicode/utypes.h"
+
+#ifdef XP_CPLUSPLUS
+ U_NAMESPACE_BEGIN
+
+ class CharacterIterator;
+ class Replaceable;
+
+ U_NAMESPACE_END
+#endif
+
+U_CDECL_BEGIN
+
+struct UCharIterator;
+typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
+
+/**
+ * Origin constants for UCharIterator.getIndex() and UCharIterator.move().
+ * @see UCharIteratorMove
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef enum UCharIteratorOrigin {
+ UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH
+} UCharIteratorOrigin;
+
+/** Constants for UCharIterator. @stable ICU 2.6 */
+enum {
+ /**
+ * Constant value that may be returned by UCharIteratorMove
+ * indicating that the final UTF-16 index is not known, but that the move succeeded.
+ * This can occur when moving relative to limit or length, or
+ * when moving relative to the current index after a setState()
+ * when the current UTF-16 index is not known.
+ *
+ * It would be very inefficient to have to count from the beginning of the text
+ * just to get the current/limit/length index after moving relative to it.
+ * The actual index can be determined with getIndex(UITER_CURRENT)
+ * which will count the UChars if necessary.
+ *
+ * @stable ICU 2.6
+ */
+ UITER_UNKNOWN_INDEX=-2
+};
+
+
+/**
+ * Constant for UCharIterator getState() indicating an error or
+ * an unknown state.
+ * Returned by uiter_getState()/UCharIteratorGetState
+ * when an error occurs.
+ * Also, some UCharIterator implementations may not be able to return
+ * a valid state for each position. This will be clearly documented
+ * for each such iterator (none of the public ones here).
+ *
+ * @stable ICU 2.6
+ */
+#define UITER_NO_STATE ((uint32_t)0xffffffff)
+
+/**
+ * Function type declaration for UCharIterator.getIndex().
+ *
+ * Gets the current position, or the start or limit of the
+ * iteration range.
+ *
+ * This function may perform slowly for UITER_CURRENT after setState() was called,
+ * or for UITER_LENGTH, because an iterator implementation may have to count
+ * UChars if the underlying storage is not UTF-16.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param origin get the 0, start, limit, length, or current index
+ * @return the requested index, or U_SENTINEL in an error condition
+ *
+ * @see UCharIteratorOrigin
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef int32_t U_CALLCONV
+UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin);
+
+/**
+ * Function type declaration for UCharIterator.move().
+ *
+ * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index).
+ *
+ * Moves the current position relative to the start or limit of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code units forward
+ * or backward by specifying a positive or negative delta.
+ * Out of bounds movement will be pinned to the start or limit.
+ *
+ * This function may perform slowly for moving relative to UITER_LENGTH
+ * because an iterator implementation may have to count the rest of the
+ * UChars if the native storage is not UTF-16.
+ *
+ * When moving relative to the limit or length, or
+ * relative to the current position after setState() was called,
+ * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient
+ * determination of the actual UTF-16 index.
+ * The actual index can be determined with getIndex(UITER_CURRENT)
+ * which will count the UChars if necessary.
+ * See UITER_UNKNOWN_INDEX for details.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param delta can be positive, zero, or negative
+ * @param origin move relative to the 0, start, limit, length, or current index
+ * @return the new index, or U_SENTINEL on an error condition,
+ * or UITER_UNKNOWN_INDEX when the index is not known.
+ *
+ * @see UCharIteratorOrigin
+ * @see UCharIterator
+ * @see UITER_UNKNOWN_INDEX
+ * @stable ICU 2.1
+ */
+typedef int32_t U_CALLCONV
+UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin);
+
+/**
+ * Function type declaration for UCharIterator.hasNext().
+ *
+ * Check if current() and next() can still
+ * return another code unit.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return boolean value for whether current() and next() can still return another code unit
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UBool U_CALLCONV
+UCharIteratorHasNext(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.hasPrevious().
+ *
+ * Check if previous() can still return another code unit.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return boolean value for whether previous() can still return another code unit
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UBool U_CALLCONV
+UCharIteratorHasPrevious(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.current().
+ *
+ * Return the code unit at the current position,
+ * or U_SENTINEL if there is none (index is at the limit).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code unit
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UChar32 U_CALLCONV
+UCharIteratorCurrent(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.next().
+ *
+ * Return the code unit at the current index and increment
+ * the index (post-increment, like s[i++]),
+ * or return U_SENTINEL if there is none (index is at the limit).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code unit (and post-increment the current index)
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UChar32 U_CALLCONV
+UCharIteratorNext(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.previous().
+ *
+ * Decrement the index and return the code unit from there
+ * (pre-decrement, like s[--i]),
+ * or return U_SENTINEL if there is none (index is at the start).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the previous code unit (after pre-decrementing the current index)
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UChar32 U_CALLCONV
+UCharIteratorPrevious(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.reservedFn().
+ * Reserved for future use.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param something some integer argument
+ * @return some integer
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef int32_t U_CALLCONV
+UCharIteratorReserved(UCharIterator *iter, int32_t something);
+
+/**
+ * Function type declaration for UCharIterator.getState().
+ *
+ * Get the "state" of the iterator in the form of a single 32-bit word.
+ * It is recommended that the state value be calculated to be as small as
+ * is feasible. For strings with limited lengths, fewer than 32 bits may
+ * be sufficient.
+ *
+ * This is used together with setState()/UCharIteratorSetState
+ * to save and restore the iterator position more efficiently than with
+ * getIndex()/move().
+ *
+ * The iterator state is defined as a uint32_t value because it is designed
+ * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state
+ * of the character iterator.
+ *
+ * With some UCharIterator implementations (e.g., UTF-8),
+ * getting and setting the UTF-16 index with existing functions
+ * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but
+ * relatively slow because the iterator has to "walk" from a known index
+ * to the requested one.
+ * This takes more time the farther it needs to go.
+ *
+ * An opaque state value allows an iterator implementation to provide
+ * an internal index (UTF-8: the source byte array index) for
+ * fast, constant-time restoration.
+ *
+ * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
+ * the UTF-16 index may not be restored as well, but the iterator can deliver
+ * the correct text contents and move relative to the current position
+ * without performance degradation.
+ *
+ * Some UCharIterator implementations may not be able to return
+ * a valid state for each position, in which case they return UITER_NO_STATE instead.
+ * This will be clearly documented for each such iterator (none of the public ones here).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the state word
+ *
+ * @see UCharIterator
+ * @see UCharIteratorSetState
+ * @see UITER_NO_STATE
+ * @stable ICU 2.6
+ */
+typedef uint32_t U_CALLCONV
+UCharIteratorGetState(const UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.setState().
+ *
+ * Restore the "state" of the iterator using a state word from a getState() call.
+ * The iterator object need not be the same one as for which getState() was called,
+ * but it must be of the same type (set up using the same uiter_setXYZ function)
+ * and it must iterate over the same string
+ * (binary identical regardless of memory address).
+ * For more about the state word see UCharIteratorGetState.
+ *
+ * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
+ * the UTF-16 index may not be restored as well, but the iterator can deliver
+ * the correct text contents and move relative to the current position
+ * without performance degradation.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param state the state word from a getState() call
+ * on a same-type, same-string iterator
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @see UCharIterator
+ * @see UCharIteratorGetState
+ * @stable ICU 2.6
+ */
+typedef void U_CALLCONV
+UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
+
+
+/**
+ * C API for code unit iteration.
+ * This can be used as a C wrapper around
+ * CharacterIterator, Replaceable, or implemented using simple strings, etc.
+ *
+ * There are two roles for using UCharIterator:
+ *
+ * A "provider" sets the necessary function pointers and controls the "protected"
+ * fields of the UCharIterator structure. A "provider" passes a UCharIterator
+ * into C APIs that need a UCharIterator as an abstract, flexible string interface.
+ *
+ * Implementations of such C APIs are "callers" of UCharIterator functions;
+ * they only use the "public" function pointers and never access the "protected"
+ * fields directly.
+ *
+ * The current() and next() functions only check the current index against the
+ * limit, and previous() only checks the current index against the start,
+ * to see if the iterator already reached the end of the iteration range.
+ *
+ * The assumption - in all iterators - is that the index is moved via the API,
+ * which means it won't go out of bounds, or the index is modified by
+ * user code that knows enough about the iterator implementation to set valid
+ * index values.
+ *
+ * UCharIterator functions return code unit values 0..0xffff,
+ * or U_SENTINEL if the iteration bounds are reached.
+ *
+ * @stable ICU 2.1
+ */
+struct UCharIterator {
+ /**
+ * (protected) Pointer to string or wrapped object or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ const void *context;
+
+ /**
+ * (protected) Length of string or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ int32_t length;
+
+ /**
+ * (protected) Start index or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ int32_t start;
+
+ /**
+ * (protected) Current index or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ int32_t index;
+
+ /**
+ * (protected) Limit index or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ int32_t limit;
+
+ /**
+ * (protected) Used by UTF-8 iterators and possibly others.
+ * @stable ICU 2.1
+ */
+ int32_t reservedField;
+
+ /**
+ * (public) Returns the current position or the
+ * start or limit index of the iteration range.
+ *
+ * @see UCharIteratorGetIndex
+ * @stable ICU 2.1
+ */
+ UCharIteratorGetIndex *getIndex;
+
+ /**
+ * (public) Moves the current position relative to the start or limit of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code units forward
+ * or backward by specifying a positive or negative delta.
+ *
+ * @see UCharIteratorMove
+ * @stable ICU 2.1
+ */
+ UCharIteratorMove *move;
+
+ /**
+ * (public) Check if current() and next() can still
+ * return another code unit.
+ *
+ * @see UCharIteratorHasNext
+ * @stable ICU 2.1
+ */
+ UCharIteratorHasNext *hasNext;
+
+ /**
+ * (public) Check if previous() can still return another code unit.
+ *
+ * @see UCharIteratorHasPrevious
+ * @stable ICU 2.1
+ */
+ UCharIteratorHasPrevious *hasPrevious;
+
+ /**
+ * (public) Return the code unit at the current position,
+ * or U_SENTINEL if there is none (index is at the limit).
+ *
+ * @see UCharIteratorCurrent
+ * @stable ICU 2.1
+ */
+ UCharIteratorCurrent *current;
+
+ /**
+ * (public) Return the code unit at the current index and increment
+ * the index (post-increment, like s[i++]),
+ * or return U_SENTINEL if there is none (index is at the limit).
+ *
+ * @see UCharIteratorNext
+ * @stable ICU 2.1
+ */
+ UCharIteratorNext *next;
+
+ /**
+ * (public) Decrement the index and return the code unit from there
+ * (pre-decrement, like s[--i]),
+ * or return U_SENTINEL if there is none (index is at the start).
+ *
+ * @see UCharIteratorPrevious
+ * @stable ICU 2.1
+ */
+ UCharIteratorPrevious *previous;
+
+ /**
+ * (public) Reserved for future use. Currently NULL.
+ *
+ * @see UCharIteratorReserved
+ * @stable ICU 2.1
+ */
+ UCharIteratorReserved *reservedFn;
+
+ /**
+ * (public) Return the state of the iterator, to be restored later with setState().
+ * This function pointer is NULL if the iterator does not implement it.
+ *
+ * @see UCharIteratorGet
+ * @stable ICU 2.6
+ */
+ UCharIteratorGetState *getState;
+
+ /**
+ * (public) Restore the iterator state from the state word from a call
+ * to getState().
+ * This function pointer is NULL if the iterator does not implement it.
+ *
+ * @see UCharIteratorSet
+ * @stable ICU 2.6
+ */
+ UCharIteratorSetState *setState;
+};
+
+/**
+ * Helper function for UCharIterator to get the code point
+ * at the current index.
+ *
+ * Return the code point that includes the code unit at the current position,
+ * or U_SENTINEL if there is none (index is at the limit).
+ * If the current code unit is a lead or trail surrogate,
+ * then the following or preceding surrogate is used to form
+ * the code point value.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code point
+ *
+ * @see UCharIterator
+ * @see U16_GET
+ * @see UnicodeString::char32At()
+ * @stable ICU 2.1
+ */
+U_STABLE UChar32 U_EXPORT2
+uiter_current32(UCharIterator *iter);
+
+/**
+ * Helper function for UCharIterator to get the next code point.
+ *
+ * Return the code point at the current index and increment
+ * the index (post-increment, like s[i++]),
+ * or return U_SENTINEL if there is none (index is at the limit).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code point (and post-increment the current index)
+ *
+ * @see UCharIterator
+ * @see U16_NEXT
+ * @stable ICU 2.1
+ */
+U_STABLE UChar32 U_EXPORT2
+uiter_next32(UCharIterator *iter);
+
+/**
+ * Helper function for UCharIterator to get the previous code point.
+ *
+ * Decrement the index and return the code point from there
+ * (pre-decrement, like s[--i]),
+ * or return U_SENTINEL if there is none (index is at the start).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the previous code point (after pre-decrementing the current index)
+ *
+ * @see UCharIterator
+ * @see U16_PREV
+ * @stable ICU 2.1
+ */
+U_STABLE UChar32 U_EXPORT2
+uiter_previous32(UCharIterator *iter);
+
+/**
+ * Get the "state" of the iterator in the form of a single 32-bit word.
+ * This is a convenience function that calls iter->getState(iter)
+ * if iter->getState is not NULL;
+ * if it is NULL or any other error occurs, then UITER_NO_STATE is returned.
+ *
+ * Some UCharIterator implementations may not be able to return
+ * a valid state for each position, in which case they return UITER_NO_STATE instead.
+ * This will be clearly documented for each such iterator (none of the public ones here).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the state word
+ *
+ * @see UCharIterator
+ * @see UCharIteratorGetState
+ * @see UITER_NO_STATE
+ * @stable ICU 2.6
+ */
+U_STABLE uint32_t U_EXPORT2
+uiter_getState(const UCharIterator *iter);
+
+/**
+ * Restore the "state" of the iterator using a state word from a getState() call.
+ * This is a convenience function that calls iter->setState(iter, state, pErrorCode)
+ * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param state the state word from a getState() call
+ * on a same-type, same-string iterator
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @see UCharIterator
+ * @see UCharIteratorSetState
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
+
+/**
+ * Set up a UCharIterator to iterate over a string.
+ *
+ * Sets the UCharIterator function pointers for iteration over the string s
+ * with iteration boundaries start=index=0 and length=limit=string length.
+ * The "provider" may set the start, index, and limit values at any time
+ * within the range 0..length.
+ * The length field will be ignored.
+ *
+ * The string pointer s is set into UCharIterator.context without copying
+ * or reallocating the string contents.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param s String to iterate over
+ * @param length Length of s, or -1 if NUL-terminated
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+U_STABLE void U_EXPORT2
+uiter_setString(UCharIterator *iter, const UChar *s, int32_t length);
+
+/**
+ * Set up a UCharIterator to iterate over a UTF-16BE string
+ * (byte vector with a big-endian pair of bytes per UChar).
+ *
+ * Everything works just like with a normal UChar iterator (uiter_setString),
+ * except that UChars are assembled from byte pairs,
+ * and that the length argument here indicates an even number of bytes.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param s UTF-16BE string to iterate over
+ * @param length Length of s as an even number of bytes, or -1 if NUL-terminated
+ * (NUL means pair of 0 bytes at even index from s)
+ *
+ * @see UCharIterator
+ * @see uiter_setString
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length);
+
+/**
+ * Set up a UCharIterator to iterate over a UTF-8 string.
+ *
+ * Sets the UCharIterator function pointers for iteration over the UTF-8 string s
+ * with UTF-8 iteration boundaries 0 and length.
+ * The implementation counts the UTF-16 index on the fly and
+ * lazily evaluates the UTF-16 length of the text.
+ *
+ * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length.
+ * When the reservedField is not 0, then it contains a supplementary code point
+ * and the UTF-16 index is between the two corresponding surrogates.
+ * At that point, the UTF-8 index is behind that code point.
+ *
+ * The UTF-8 string pointer s is set into UCharIterator.context without copying
+ * or reallocating the string contents.
+ *
+ * getState() returns a state value consisting of
+ * - the current UTF-8 source byte index (bits 31..1)
+ * - a flag (bit 0) that indicates whether the UChar position is in the middle
+ * of a surrogate pair
+ * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point)
+ *
+ * getState() cannot also encode the UTF-16 index in the state value.
+ * move(relative to limit or length), or
+ * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param s UTF-8 string to iterate over
+ * @param length Length of s in bytes, or -1 if NUL-terminated
+ *
+ * @see UCharIterator
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length);
+
+#ifdef XP_CPLUSPLUS
+
+/**
+ * Set up a UCharIterator to wrap around a C++ CharacterIterator.
+ *
+ * Sets the UCharIterator function pointers for iteration using the
+ * CharacterIterator charIter.
+ *
+ * The CharacterIterator pointer charIter is set into UCharIterator.context
+ * without copying or cloning the CharacterIterator object.
+ * The other "protected" UCharIterator fields are set to 0 and will be ignored.
+ * The iteration index and boundaries are controlled by the CharacterIterator.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param charIter CharacterIterator to wrap
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+U_STABLE void U_EXPORT2
+uiter_setCharacterIterator(UCharIterator *iter, CharacterIterator *charIter);
+
+/**
+ * Set up a UCharIterator to iterate over a C++ Replaceable.
+ *
+ * Sets the UCharIterator function pointers for iteration over the
+ * Replaceable rep with iteration boundaries start=index=0 and
+ * length=limit=rep->length().
+ * The "provider" may set the start, index, and limit values at any time
+ * within the range 0..length=rep->length().
+ * The length field will be ignored.
+ *
+ * The Replaceable pointer rep is set into UCharIterator.context without copying
+ * or cloning/reallocating the Replaceable object.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param rep Replaceable to iterate over
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+U_STABLE void U_EXPORT2
+uiter_setReplaceable(UCharIterator *iter, const Replaceable *rep);
+
+#endif
+
+U_CDECL_END
+
+#endif
diff --git a/Source/WebCore/icu/unicode/uloc.h b/Source/WebCore/icu/unicode/uloc.h
new file mode 100644
index 0000000..f584ec1
--- /dev/null
+++ b/Source/WebCore/icu/unicode/uloc.h
@@ -0,0 +1,917 @@
+/*
+**********************************************************************
+* Copyright (C) 1997-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File ULOC.H
+*
+* Modification History:
+*
+* Date Name Description
+* 04/01/97 aliu Creation.
+* 08/22/98 stephen JDK 1.2 sync.
+* 12/08/98 rtg New C API for Locale
+* 03/30/99 damiba overhaul
+* 03/31/99 helena Javadoc for uloc functions.
+* 04/15/99 Madhu Updated Javadoc
+********************************************************************************
+*/
+
+#ifndef ULOC_H
+#define ULOC_H
+
+#include "unicode/utypes.h"
+#include "unicode/uenum.h"
+
+/**
+ * \file
+ * \brief C API: Locale
+ *
+ * <h2> ULoc C API for Locale </h2>
+ * A <code>Locale</code> represents a specific geographical, political,
+ * or cultural region. An operation that requires a <code>Locale</code> to perform
+ * its task is called <em>locale-sensitive</em> and uses the <code>Locale</code>
+ * to tailor information for the user. For example, displaying a number
+ * is a locale-sensitive operation--the number should be formatted
+ * according to the customs/conventions of the user's native country,
+ * region, or culture. In the C APIs, a locales is simply a const char string.
+ *
+ * <P>
+ * You create a <code>Locale</code> with one of the three options listed below.
+ * Each of the component is separated by '_' in the locale string.
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * \code
+ * newLanguage
+ *
+ * newLanguage + newCountry
+ *
+ * newLanguage + newCountry + newVariant
+ * \endcode
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ * The first option is a valid <STRONG>ISO
+ * Language Code.</STRONG> These codes are the lower-case two-letter
+ * codes as defined by ISO-639.
+ * You can find a full list of these codes at a number of sites, such as:
+ * <BR><a href ="http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt">
+ * http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt</a>
+ *
+ * <P>
+ * The second option includes an additonal <STRONG>ISO Country
+ * Code.</STRONG> These codes are the upper-case two-letter codes
+ * as defined by ISO-3166.
+ * You can find a full list of these codes at a number of sites, such as:
+ * <BR><a href="http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html">
+ * http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html</a>
+ *
+ * <P>
+ * The third option requires another additonal information--the
+ * <STRONG>Variant.</STRONG>
+ * The Variant codes are vendor and browser-specific.
+ * For example, use WIN for Windows, MAC for Macintosh, and POSIX for POSIX.
+ * Where there are two variants, separate them with an underscore, and
+ * put the most important one first. For
+ * example, a Traditional Spanish collation might be referenced, with
+ * "ES", "ES", "Traditional_WIN".
+ *
+ * <P>
+ * Because a <code>Locale</code> is just an identifier for a region,
+ * no validity check is performed when you specify a <code>Locale</code>.
+ * If you want to see whether particular resources are available for the
+ * <code>Locale</code> you asked for, you must query those resources. For
+ * example, ask the <code>UNumberFormat</code> for the locales it supports
+ * using its <code>getAvailable</code> method.
+ * <BR><STRONG>Note:</STRONG> When you ask for a resource for a particular
+ * locale, you get back the best available match, not necessarily
+ * precisely what you asked for. For more information, look at
+ * <code>UResourceBundle</code>.
+ *
+ * <P>
+ * The <code>Locale</code> provides a number of convenient constants
+ * that you can use to specify the commonly used
+ * locales. For example, the following refers to a locale
+ * for the United States:
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * \code
+ * ULOC_US
+ * \endcode
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ *
+ * <P>
+ * Once you've specified a locale you can query it for information about
+ * itself. Use <code>uloc_getCountry</code> to get the ISO Country Code and
+ * <code>uloc_getLanguage</code> to get the ISO Language Code. You can
+ * use <code>uloc_getDisplayCountry</code> to get the
+ * name of the country suitable for displaying to the user. Similarly,
+ * you can use <code>uloc_getDisplayLanguage</code> to get the name of
+ * the language suitable for displaying to the user. Interestingly,
+ * the <code>uloc_getDisplayXXX</code> methods are themselves locale-sensitive
+ * and have two versions: one that uses the default locale and one
+ * that takes a locale as an argument and displays the name or country in
+ * a language appropriate to that locale.
+ *
+ * <P>
+ * The ICU provides a number of services that perform locale-sensitive
+ * operations. For example, the <code>unum_xxx</code> functions format
+ * numbers, currency, or percentages in a locale-sensitive manner.
+ * </P>
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * \code
+ * UErrorCode success = U_ZERO_ERROR;
+ * UNumberFormat *nf;
+ * const char* myLocale = "fr_FR";
+ *
+ * nf = unum_open( UNUM_DEFAULT, NULL, success );
+ * unum_close(nf);
+ * nf = unum_open( UNUM_CURRENCY, NULL, success );
+ * unum_close(nf);
+ * nf = unum_open( UNUM_PERCENT, NULL, success );
+ * unum_close(nf);
+ * \endcode
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ * Each of these methods has two variants; one with an explicit locale
+ * and one without; the latter using the default locale.
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * \code
+ *
+ * nf = unum_open( UNUM_DEFAULT, myLocale, success );
+ * unum_close(nf);
+ * nf = unum_open( UNUM_CURRENCY, myLocale, success );
+ * unum_close(nf);
+ * nf = unum_open( UNUM_PERCENT, myLocale, success );
+ * unum_close(nf);
+ * \endcode
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ * A <code>Locale</code> is the mechanism for identifying the kind of services
+ * (<code>UNumberFormat</code>) that you would like to get. The locale is
+ * <STRONG>just</STRONG> a mechanism for identifying these services.
+ *
+ * <P>
+ * Each international serivce that performs locale-sensitive operations
+ * allows you
+ * to get all the available objects of that type. You can sift
+ * through these objects by language, country, or variant,
+ * and use the display names to present a menu to the user.
+ * For example, you can create a menu of all the collation objects
+ * suitable for a given language. Such classes implement these
+ * three class methods:
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * \code
+ * const char* uloc_getAvailable(int32_t index);
+ * int32_t uloc_countAvailable();
+ * int32_t
+ * uloc_getDisplayName(const char* localeID,
+ * const char* inLocaleID,
+ * UChar* result,
+ * int32_t maxResultSize,
+ * UErrorCode* err);
+ *
+ * \endcode
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ * <P>
+ * Concerning POSIX/RFC1766 Locale IDs,
+ * the getLanguage/getCountry/getVariant/getName functions do understand
+ * the POSIX type form of language_COUNTRY.ENCODING\@VARIANT
+ * and if there is not an ICU-stype variant, uloc_getVariant() for example
+ * will return the one listed after the \@at sign. As well, the hyphen
+ * "-" is recognized as a country/variant separator similarly to RFC1766.
+ * So for example, "en-us" will be interpreted as en_US.
+ * As a result, uloc_getName() is far from a no-op, and will have the
+ * effect of converting POSIX/RFC1766 IDs into ICU form, although it does
+ * NOT map any of the actual codes (i.e. russian->ru) in any way.
+ * Applications should call uloc_getName() at the point where a locale ID
+ * is coming from an external source (user entry, OS, web browser)
+ * and pass the resulting string to other ICU functions. For example,
+ * don't use de-de\@EURO as an argument to resourcebundle.
+ *
+ * @see UResourceBundle
+ */
+
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_CHINESE "zh"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_ENGLISH "en"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_FRENCH "fr"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_GERMAN "de"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_ITALIAN "it"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_JAPANESE "ja"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_KOREAN "ko"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_SIMPLIFIED_CHINESE "zh_CN"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_TRADITIONAL_CHINESE "zh_TW"
+
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_CANADA "en_CA"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_CANADA_FRENCH "fr_CA"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_CHINA "zh_CN"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_PRC "zh_CN"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_FRANCE "fr_FR"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_GERMANY "de_DE"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_ITALY "it_IT"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_JAPAN "ja_JP"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_KOREA "ko_KR"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_TAIWAN "zh_TW"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_UK "en_GB"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_US "en_US"
+
+/**
+ * Useful constant for the maximum size of the language part of a locale ID.
+ * (including the terminating NULL).
+ * @stable ICU 2.0
+ */
+#define ULOC_LANG_CAPACITY 12
+
+/**
+ * Useful constant for the maximum size of the country part of a locale ID
+ * (including the terminating NULL).
+ * @stable ICU 2.0
+ */
+#define ULOC_COUNTRY_CAPACITY 4
+/**
+ * Useful constant for the maximum size of the whole locale ID
+ * (including the terminating NULL).
+ * @stable ICU 2.0
+ */
+#define ULOC_FULLNAME_CAPACITY 56
+
+
+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Useful constant for the maximum size of the script part of a locale ID
+ * (including the terminating NULL).
+ * @draft ICU 2.8
+ */
+#define ULOC_SCRIPT_CAPACITY 6
+
+/**
+ * Useful constant for the maximum size of keywords in a locale
+ * @draft ICU 2.8
+ */
+#define ULOC_KEYWORDS_CAPACITY 50
+
+/**
+ * Useful constant for the maximum size of keywords in a locale
+ * @draft ICU 2.8
+ */
+#define ULOC_KEYWORD_AND_VALUES_CAPACITY 100
+
+/**
+ * Character separating keywords from the locale string
+ * different for EBCDIC - TODO
+ * @draft ICU 2.8
+ */
+#define ULOC_KEYWORD_SEPARATOR '@'
+/**
+ * Character for assigning value to a keyword
+ * @draft ICU 2.8
+ */
+#define ULOC_KEYWORD_ASSIGN '='
+/**
+ * Character separating keywords
+ * @draft ICU 2.8
+ */
+#define ULOC_KEYWORD_ITEM_SEPARATOR ';'
+
+#endif /*U_HIDE_DRAFT_API*/
+
+/**
+ * Constants for *_getLocale()
+ * Allow user to select whether she wants information on
+ * requested, valid or actual locale.
+ * For example, a collator for "en_US_CALIFORNIA" was
+ * requested. In the current state of ICU (2.0),
+ * the requested locale is "en_US_CALIFORNIA",
+ * the valid locale is "en_US" (most specific locale supported by ICU)
+ * and the actual locale is "root" (the collation data comes unmodified
+ * from the UCA)
+ * The locale is considered supported by ICU if there is a core ICU bundle
+ * for that locale (although it may be empty).
+ * @stable ICU 2.1
+ */
+typedef enum {
+ /** This is locale the data actually comes from
+ * @stable ICU 2.1
+ */
+ ULOC_ACTUAL_LOCALE = 0,
+ /** This is the most specific locale supported by ICU
+ * @stable ICU 2.1
+ */
+ ULOC_VALID_LOCALE = 1,
+
+#ifndef U_HIDE_DEPRECATED_API
+ /** This is the requested locale
+ * @deprecated ICU 2.8
+ */
+ ULOC_REQUESTED_LOCALE = 2,
+#endif /* U_HIDE_DEPRECATED_API */
+
+ ULOC_DATA_LOCALE_TYPE_LIMIT
+} ULocDataLocaleType ;
+
+
+/**
+ * Gets ICU's default locale.
+ * The returned string is a snapshot in time, and will remain valid
+ * and unchanged even when uloc_setDefault() is called.
+ * The returned storage is owned by ICU, and must not be altered or deleted
+ * by the caller.
+ *
+ * @return the ICU default locale
+ * @system
+ * @stable ICU 2.0
+ */
+U_STABLE const char* U_EXPORT2
+uloc_getDefault(void);
+
+/**
+ * Sets ICU's default locale.
+ * By default (without calling this function), ICU's default locale will be based
+ * on information obtained from the underlying system environment.
+ * <p>
+ * Changes to ICU's default locale do not propagate back to the
+ * system environment.
+ * <p>
+ * Changes to ICU's default locale to not affect any ICU services that
+ * may already be open based on the previous default locale value.
+ *
+ * @param localeID the new ICU default locale. A value of NULL will try to get
+ * the system's default locale.
+ * @param status the error information if the setting of default locale fails
+ * @system
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+uloc_setDefault(const char* localeID,
+ UErrorCode* status);
+
+/**
+ * Gets the language code for the specified locale.
+ *
+ * @param localeID the locale to get the ISO language code with
+ * @param language the language code for localeID
+ * @param languageCapacity the size of the language buffer to store the
+ * language code with
+ * @param err error information if retrieving the language code failed
+ * @return the actual buffer size needed for the language code. If it's greater
+ * than languageCapacity, the returned language code will be truncated.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getLanguage(const char* localeID,
+ char* language,
+ int32_t languageCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets the script code for the specified locale.
+ *
+ * @param localeID the locale to get the ISO language code with
+ * @param script the language code for localeID
+ * @param scriptCapacity the size of the language buffer to store the
+ * language code with
+ * @param err error information if retrieving the language code failed
+ * @return the actual buffer size needed for the language code. If it's greater
+ * than scriptCapacity, the returned language code will be truncated.
+ * @draft ICU 2.8
+ */
+U_DRAFT int32_t U_EXPORT2
+uloc_getScript(const char* localeID,
+ char* script,
+ int32_t scriptCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets the country code for the specified locale.
+ *
+ * @param localeID the locale to get the country code with
+ * @param country the country code for localeID
+ * @param countryCapacity the size of the country buffer to store the
+ * country code with
+ * @param err error information if retrieving the country code failed
+ * @return the actual buffer size needed for the country code. If it's greater
+ * than countryCapacity, the returned country code will be truncated.
+ * @stable ICU 2.0
+ */
+U_DRAFT int32_t U_EXPORT2
+uloc_getCountry(const char* localeID,
+ char* country,
+ int32_t countryCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets the variant code for the specified locale.
+ *
+ * @param localeID the locale to get the variant code with
+ * @param variant the variant code for localeID
+ * @param variantCapacity the size of the variant buffer to store the
+ * variant code with
+ * @param err error information if retrieving the variant code failed
+ * @return the actual buffer size needed for the variant code. If it's greater
+ * than variantCapacity, the returned variant code will be truncated.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getVariant(const char* localeID,
+ char* variant,
+ int32_t variantCapacity,
+ UErrorCode* err);
+
+
+/**
+ * Gets the full name for the specified locale.
+ * Note: This has the effect of 'canonicalizing' the ICU locale ID to
+ * a certain extent. Upper and lower case are set as needed.
+ * It does NOT map aliased names in any way.
+ * See the top of this header file.
+ * This API supports preflighting.
+ *
+ * @param localeID the locale to get the full name with
+ * @param name fill in buffer for the name without keywords.
+ * @param nameCapacity capacity of the fill in buffer.
+ * @param err error information if retrieving the full name failed
+ * @return the actual buffer size needed for the full name. If it's greater
+ * than nameCapacity, the returned full name will be truncated.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getName(const char* localeID,
+ char* name,
+ int32_t nameCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets the full name for the specified locale.
+ * Note: This has the effect of 'canonicalizing' the string to
+ * a certain extent. Upper and lower case are set as needed,
+ * and if the components were in 'POSIX' format they are changed to
+ * ICU format. It does NOT map aliased names in any way.
+ * See the top of this header file.
+ *
+ * @param localeID the locale to get the full name with
+ * @param name the full name for localeID
+ * @param nameCapacity the size of the name buffer to store the
+ * full name with
+ * @param err error information if retrieving the full name failed
+ * @return the actual buffer size needed for the full name. If it's greater
+ * than nameCapacity, the returned full name will be truncated.
+ * @draft ICU 2.8
+ */
+U_DRAFT int32_t U_EXPORT2
+uloc_canonicalize(const char* localeID,
+ char* name,
+ int32_t nameCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets the ISO language code for the specified locale.
+ *
+ * @param localeID the locale to get the ISO language code with
+ * @return language the ISO language code for localeID
+ * @stable ICU 2.0
+ */
+U_STABLE const char* U_EXPORT2
+uloc_getISO3Language(const char* localeID);
+
+
+/**
+ * Gets the ISO country code for the specified locale.
+ *
+ * @param localeID the locale to get the ISO country code with
+ * @return country the ISO country code for localeID
+ * @stable ICU 2.0
+ */
+U_STABLE const char* U_EXPORT2
+uloc_getISO3Country(const char* localeID);
+
+/**
+ * Gets the Win32 LCID value for the specified locale.
+ * If the ICU locale is not recognized by Windows, 0 will be returned.
+ *
+ * @param localeID the locale to get the Win32 LCID value with
+ * @return country the Win32 LCID for localeID
+ * @stable ICU 2.0
+ */
+U_STABLE uint32_t U_EXPORT2
+uloc_getLCID(const char* localeID);
+
+/**
+ * Gets the language name suitable for display for the specified locale.
+ *
+ * @param locale the locale to get the ISO language code with
+ * @param displayLocale Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * inLocale would result in "Anglais", while passing Locale::getGerman()
+ * for inLocale would result in "Englisch".
+ * @param language the displayable language code for localeID
+ * @param languageCapacity the size of the language buffer to store the
+ * displayable language code with
+ * @param status error information if retrieving the displayable language code failed
+ * @return the actual buffer size needed for the displayable language code. If it's greater
+ * than languageCapacity, the returned language code will be truncated.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getDisplayLanguage(const char* locale,
+ const char* displayLocale,
+ UChar* language,
+ int32_t languageCapacity,
+ UErrorCode* status);
+
+/**
+ * Gets the script name suitable for display for the specified locale.
+ *
+ * @param locale the locale to get the displayable script code with. NULL may be used to specify the default.
+ * @param displayLocale Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * inLocale would result in "", while passing Locale::getGerman()
+ * for inLocale would result in "". NULL may be used to specify the default.
+ * @param script the displayable country code for localeID
+ * @param scriptCapacity the size of the script buffer to store the
+ * displayable script code with
+ * @param status error information if retrieving the displayable script code failed
+ * @return the actual buffer size needed for the displayable script code. If it's greater
+ * than scriptCapacity, the returned displayable script code will be truncated.
+ * @draft ICU 2.8
+ */
+U_DRAFT int32_t U_EXPORT2
+uloc_getDisplayScript(const char* locale,
+ const char* displayLocale,
+ UChar* script,
+ int32_t scriptCapacity,
+ UErrorCode* status);
+
+/**
+ * Gets the country name suitable for display for the specified locale.
+ *
+ * @param locale the locale to get the displayable country code with. NULL may be used to specify the default.
+ * @param displayLocale Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * inLocale would result in "Anglais", while passing Locale::getGerman()
+ * for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param country the displayable country code for localeID
+ * @param countryCapacity the size of the country buffer to store the
+ * displayable country code with
+ * @param status error information if retrieving the displayable country code failed
+ * @return the actual buffer size needed for the displayable country code. If it's greater
+ * than countryCapacity, the returned displayable country code will be truncated.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getDisplayCountry(const char* locale,
+ const char* displayLocale,
+ UChar* country,
+ int32_t countryCapacity,
+ UErrorCode* status);
+
+
+/**
+ * Gets the variant name suitable for display for the specified locale.
+ *
+ * @param locale the locale to get the displayable variant code with. NULL may be used to specify the default.
+ * @param displayLocale Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * inLocale would result in "Anglais", while passing Locale::getGerman()
+ * for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param variant the displayable variant code for localeID
+ * @param variantCapacity the size of the variant buffer to store the
+ * displayable variant code with
+ * @param status error information if retrieving the displayable variant code failed
+ * @return the actual buffer size needed for the displayable variant code. If it's greater
+ * than variantCapacity, the returned displayable variant code will be truncated.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getDisplayVariant(const char* locale,
+ const char* displayLocale,
+ UChar* variant,
+ int32_t variantCapacity,
+ UErrorCode* status);
+
+/**
+ * Gets the keyword name suitable for display for the specified locale.
+ * E.g: for the locale string de_DE\@collation=PHONEBOOK, this API gets the display
+ * string for the keyword collation.
+ * Usage:
+ * <code>
+ * UErrorCode status = U_ZERO_ERROR;
+ * const char* keyword =NULL;
+ * int32_t keywordLen = 0;
+ * int32_t keywordCount = 0;
+ * UChar displayKeyword[256];
+ * int32_t displayKeywordLen = 0;
+ * UEnumeration* keywordEnum = uloc_openKeywords("de_DE@collation=PHONEBOOK;calendar=TRADITIONAL", &status);
+ * for(keywordCount = uenum_count(keywordEnum, &status); keywordCount > 0 ; keywordCount--){
+ * if(U_FAILURE(status)){
+ * ...something went wrong so handle the error...
+ * break;
+ * }
+ * // the uenum_next returns NUL terminated string
+ * keyword = uenum_next(keywordEnum, &keywordLen, &status);
+ * displayKeywordLen = uloc_getDisplayKeyword(keyword, "en_US", displayKeyword, 256);
+ * ... do something interesting .....
+ * }
+ * uenum_close(keywordEnum);
+ * </code>
+ * @param keyword The keyword whose display string needs to be returned.
+ * @param displayLocale Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * inLocale would result in "Anglais", while passing Locale::getGerman()
+ * for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param dest the buffer to which the displayable keyword should be written.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param status error information if retrieving the displayable string failed.
+ * Should not be NULL and should not indicate failure on entry.
+ * @return the actual buffer size needed for the displayable variant code.
+ * @see #uloc_openKeywords
+ * @draft ICU 2.8
+ */
+U_DRAFT int32_t U_EXPORT2
+uloc_getDisplayKeyword(const char* keyword,
+ const char* displayLocale,
+ UChar* dest,
+ int32_t destCapacity,
+ UErrorCode* status);
+/**
+ * Gets the value of the keyword suitable for display for the specified locale.
+ * E.g: for the locale string de_DE\@collation=PHONEBOOK, this API gets the display
+ * string for PHONEBOOK, in the display locale, when "collation" is specified as the keyword.
+ *
+ * @param locale The locale to get the displayable variant code with. NULL may be used to specify the default.
+ * @param keyword The keyword for whose value should be used.
+ * @param displayLocale Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * inLocale would result in "Anglais", while passing Locale::getGerman()
+ * for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param dest the buffer to which the displayable keyword should be written.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param status error information if retrieving the displayable string failed.
+ * Should not be NULL and must not indicate failure on entry.
+ * @return the actual buffer size needed for the displayable variant code.
+ * @draft ICU 2.8
+ */
+U_DRAFT int32_t U_EXPORT2
+uloc_getDisplayKeywordValue( const char* locale,
+ const char* keyword,
+ const char* displayLocale,
+ UChar* dest,
+ int32_t destCapacity,
+ UErrorCode* status);
+/**
+ * Gets the full name suitable for display for the specified locale.
+ *
+ * @param localeID the locale to get the displayable name with. NULL may be used to specify the default.
+ * @param inLocaleID Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * inLocale would result in "Anglais", while passing Locale::getGerman()
+ * for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param result the displayable name for localeID
+ * @param maxResultSize the size of the name buffer to store the
+ * displayable full name with
+ * @param err error information if retrieving the displayable name failed
+ * @return the actual buffer size needed for the displayable name. If it's greater
+ * than maxResultSize, the returned displayable name will be truncated.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getDisplayName(const char* localeID,
+ const char* inLocaleID,
+ UChar* result,
+ int32_t maxResultSize,
+ UErrorCode* err);
+
+
+/**
+ * Gets the specified locale from a list of all available locales.
+ * The return value is a pointer to an item of
+ * a locale name array. Both this array and the pointers
+ * it contains are owned by ICU and should not be deleted or written through
+ * by the caller. The locale name is terminated by a null pointer.
+ * @param n the specific locale name index of the available locale list
+ * @return a specified locale name of all available locales
+ * @stable ICU 2.0
+ */
+U_STABLE const char* U_EXPORT2
+uloc_getAvailable(int32_t n);
+
+/**
+ * Gets the size of the all available locale list.
+ *
+ * @return the size of the locale list
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 uloc_countAvailable(void);
+
+/**
+ *
+ * Gets a list of all available language codes defined in ISO 639. This is a pointer
+ * to an array of pointers to arrays of char. All of these pointers are owned
+ * by ICU-- do not delete them, and do not write through them. The array is
+ * terminated with a null pointer.
+ * @return a list of all available language codes
+ * @stable ICU 2.0
+ */
+U_STABLE const char* const* U_EXPORT2
+uloc_getISOLanguages(void);
+
+/**
+ *
+ * Gets a list of all available 2-letter country codes defined in ISO 639. This is a
+ * pointer to an array of pointers to arrays of char. All of these pointers are
+ * owned by ICU-- do not delete them, and do not write through them. The array is
+ * terminated with a null pointer.
+ * @return a list of all available country codes
+ * @stable ICU 2.0
+ */
+U_STABLE const char* const* U_EXPORT2
+uloc_getISOCountries(void);
+
+/**
+ * Truncate the locale ID string to get the parent locale ID.
+ * Copies the part of the string before the last underscore.
+ * The parent locale ID will be an empty string if there is no
+ * underscore, or if there is only one underscore at localeID[0].
+ *
+ * @param localeID Input locale ID string.
+ * @param parent Output string buffer for the parent locale ID.
+ * @param parentCapacity Size of the output buffer.
+ * @param err A UErrorCode value.
+ * @return The length of the parent locale ID.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getParent(const char* localeID,
+ char* parent,
+ int32_t parentCapacity,
+ UErrorCode* err);
+
+
+
+
+/**
+ * Gets the full name for the specified locale.
+ * Note: This has the effect of 'canonicalizing' the string to
+ * a certain extent. Upper and lower case are set as needed,
+ * and if the components were in 'POSIX' format they are changed to
+ * ICU format. It does NOT map aliased names in any way.
+ * See the top of this header file.
+ * This API strips off the keyword part, so "de_DE\@collation=phonebook"
+ * will become "de_DE".
+ * This API supports preflighting.
+ *
+ * @param localeID the locale to get the full name with
+ * @param name fill in buffer for the name without keywords.
+ * @param nameCapacity capacity of the fill in buffer.
+ * @param err error information if retrieving the full name failed
+ * @return the actual buffer size needed for the full name. If it's greater
+ * than nameCapacity, the returned full name will be truncated.
+ * @draft ICU 2.8
+ */
+U_DRAFT int32_t U_EXPORT2
+uloc_getBaseName(const char* localeID,
+ char* name,
+ int32_t nameCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets an enumeration of keywords for the specified locale. Enumeration
+ * must get disposed of by the client using uenum_close function.
+ *
+ * @param localeID the locale to get the variant code with
+ * @param status error information if retrieving the keywords failed
+ * @return enumeration of keywords or NULL if there are no keywords.
+ * @draft ICU 2.8
+ */
+U_DRAFT UEnumeration* U_EXPORT2
+uloc_openKeywords(const char* localeID,
+ UErrorCode* status);
+
+/**
+ * Get the value for a keyword. Locale name does not need to be normalized.
+ *
+ * @param localeID locale name containing the keyword ("de_DE@currency=EURO;collation=PHONEBOOK")
+ * @param keywordName name of the keyword for which we want the value. Case insensitive.
+ * @param buffer receiving buffer
+ * @param bufferCapacity capacity of receiving buffer
+ * @param status containing error code - buffer not big enough.
+ * @return the length of keyword value
+ * @draft ICU 2.8
+ */
+U_DRAFT int32_t U_EXPORT2
+uloc_getKeywordValue(const char* localeID,
+ const char* keywordName,
+ char* buffer, int32_t bufferCapacity,
+ UErrorCode* status);
+
+
+/**
+ * Set the value of the specified keyword.
+ * NOTE: Unlike almost every other ICU function which takes a
+ * buffer, this function will NOT truncate the output text. If a
+ * BUFFER_OVERFLOW_ERROR is received, it means that the original
+ * buffer is untouched. This is done to prevent incorrect or possibly
+ * even malformed locales from being generated and used.
+ *
+ * @param keywordName name of the keyword to be set. Case insensitive.
+ * @param keywordValue value of the keyword to be set. If 0-length or
+ * NULL, will result in the keyword being removed. No error is given if
+ * that keyword does not exist.
+ * @param buffer input buffer containing locale to be modified.
+ * @param bufferCapacity capacity of receiving buffer
+ * @param status containing error code - buffer not big enough.
+ * @return the length needed for the buffer
+ * @see uloc_getKeywordValue
+ * @draft ICU 3.2
+ */
+U_DRAFT int32_t U_EXPORT2
+uloc_setKeywordValue(const char* keywordName,
+ const char* keywordValue,
+ char* buffer, int32_t bufferCapacity,
+ UErrorCode* status);
+
+/**
+ * enums for the 'outResult' parameter return value
+ * @see uloc_acceptLanguageFromHTTP
+ * @see uloc_acceptLanguage
+ * @draft ICU 3.2
+ */
+typedef enum {
+ ULOC_ACCEPT_FAILED = 0, /* No exact match was found. */
+ ULOC_ACCEPT_VALID = 1, /* An exact match was found. */
+ ULOC_ACCEPT_FALLBACK = 2 /* A fallback was found, for example,
+ Accept list contained 'ja_JP'
+ which matched available locale 'ja'. */
+} UAcceptResult;
+
+
+/**
+ * @param httpAcceptLanguage - "Accept-Language:" header as per HTTP.
+ * @param result - buffer to accept the result locale
+ * @param resultAvailable the size of the result buffer.
+ * @param availableLocales - list of available locales to match
+ * @param status Error status, may be BUFFER_OVERFLOW_ERROR
+ * @return length needed for the locale.
+ * @draft ICU 3.2
+ */
+U_DRAFT int32_t U_EXPORT2
+uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable,
+ UAcceptResult *outResult,
+ const char *httpAcceptLanguage,
+ UEnumeration* availableLocales,
+ UErrorCode *status);
+
+/**
+ * @param acceptList -list of acceptable languages
+ * @param acceptListCount - count of acceptList items
+ * @param result - buffer to accept the result locale
+ * @param resultAvailable the size of the result buffer.
+ * @param availableLocales - list of available locales to match
+ * @param status Error status, may be BUFFER_OVERFLOW_ERROR
+ * @return length needed for the locale.
+ * @draft ICU 3.2
+ */
+U_DRAFT int32_t U_EXPORT2
+uloc_acceptLanguage(char *result, int32_t resultAvailable,
+ UAcceptResult *outResult, const char **acceptList,
+ int32_t acceptListCount,
+ UEnumeration* availableLocales,
+ UErrorCode *status);
+
+/*eof*/
+
+
+#endif /*_ULOC*/
+
+
+
diff --git a/Source/WebCore/icu/unicode/umachine.h b/Source/WebCore/icu/unicode/umachine.h
new file mode 100644
index 0000000..d841f53
--- /dev/null
+++ b/Source/WebCore/icu/unicode/umachine.h
@@ -0,0 +1,371 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: umachine.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999sep13
+* created by: Markus W. Scherer
+*
+* This file defines basic types and constants for utf.h to be
+* platform-independent. umachine.h and utf.h are included into
+* utypes.h to provide all the general definitions for ICU.
+* All of these definitions used to be in utypes.h before
+* the UTF-handling macros made this unmaintainable.
+*/
+
+#ifndef __UMACHINE_H__
+#define __UMACHINE_H__
+
+
+/**
+ * \file
+ * \brief Basic types and constants for UTF
+ *
+ * <h2> Basic types and constants for UTF </h2>
+ * This file defines basic types and constants for utf.h to be
+ * platform-independent. umachine.h and utf.h are included into
+ * utypes.h to provide all the general definitions for ICU.
+ * All of these definitions used to be in utypes.h before
+ * the UTF-handling macros made this unmaintainable.
+ *
+ */
+/*==========================================================================*/
+/* Include platform-dependent definitions */
+/* which are contained in the platform-specific file platform.h */
+/*==========================================================================*/
+
+#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
+# include "unicode/pwin32.h"
+#else
+# include "unicode/platform.h"
+#endif
+
+/*
+ * ANSI C headers:
+ * stddef.h defines wchar_t
+ */
+#include <stddef.h>
+
+/*==========================================================================*/
+/* XP_CPLUSPLUS is a cross-platform symbol which should be defined when */
+/* using C++. It should not be defined when compiling under C. */
+/*==========================================================================*/
+
+#ifdef __cplusplus
+# ifndef XP_CPLUSPLUS
+# define XP_CPLUSPLUS
+# endif
+#else
+# undef XP_CPLUSPLUS
+#endif
+
+/*==========================================================================*/
+/* For C wrappers, we use the symbol U_STABLE. */
+/* This works properly if the includer is C or C++. */
+/* Functions are declared U_STABLE return-type U_EXPORT2 function-name()... */
+/*==========================================================================*/
+
+/**
+ * \def U_CFUNC
+ * This is used in a declaration of a library private ICU C function.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_CDECL_BEGIN
+ * This is used to begin a declaration of a library private ICU C API.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_CDECL_END
+ * This is used to end a declaration of a library private ICU C API
+ * @stable ICU 2.4
+ */
+
+#ifdef XP_CPLUSPLUS
+# define U_CFUNC extern "C"
+# define U_CDECL_BEGIN extern "C" {
+# define U_CDECL_END }
+#else
+# define U_CFUNC extern
+# define U_CDECL_BEGIN
+# define U_CDECL_END
+#endif
+
+/**
+ * \def U_NAMESPACE_BEGIN
+ * This is used to begin a declaration of a public ICU C++ API.
+ * If the compiler doesn't support namespaces, this does nothing.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_NAMESPACE_END
+ * This is used to end a declaration of a public ICU C++ API
+ * If the compiler doesn't support namespaces, this does nothing.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_NAMESPACE_USE
+ * This is used to specify that the rest of the code uses the
+ * public ICU C++ API namespace.
+ * If the compiler doesn't support namespaces, this does nothing.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_NAMESPACE_QUALIFIER
+ * This is used to qualify that a function or class is part of
+ * the public ICU C++ API namespace.
+ * If the compiler doesn't support namespaces, this does nothing.
+ * @stable ICU 2.4
+ */
+
+/* Define namespace symbols if the compiler supports it. */
+#if U_HAVE_NAMESPACE
+# define U_NAMESPACE_BEGIN namespace U_ICU_NAMESPACE {
+# define U_NAMESPACE_END }
+# define U_NAMESPACE_USE using namespace U_ICU_NAMESPACE;
+# define U_NAMESPACE_QUALIFIER U_ICU_NAMESPACE::
+#else
+# define U_NAMESPACE_BEGIN
+# define U_NAMESPACE_END
+# define U_NAMESPACE_USE
+# define U_NAMESPACE_QUALIFIER
+#endif
+
+/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/
+#define U_CAPI U_CFUNC U_EXPORT
+#define U_STABLE U_CAPI
+#define U_DRAFT U_CAPI
+#define U_DEPRECATED U_CAPI
+#define U_OBSOLETE U_CAPI
+#define U_INTERNAL U_CAPI
+
+/*==========================================================================*/
+/* limits for int32_t etc., like in POSIX inttypes.h */
+/*==========================================================================*/
+
+#ifndef INT8_MIN
+/** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */
+# define INT8_MIN ((int8_t)(-128))
+#endif
+#ifndef INT16_MIN
+/** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */
+# define INT16_MIN ((int16_t)(-32767-1))
+#endif
+#ifndef INT32_MIN
+/** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */
+# define INT32_MIN ((int32_t)(-2147483647-1))
+#endif
+
+#ifndef INT8_MAX
+/** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */
+# define INT8_MAX ((int8_t)(127))
+#endif
+#ifndef INT16_MAX
+/** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */
+# define INT16_MAX ((int16_t)(32767))
+#endif
+#ifndef INT32_MAX
+/** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */
+# define INT32_MAX ((int32_t)(2147483647))
+#endif
+
+#ifndef UINT8_MAX
+/** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */
+# define UINT8_MAX ((uint8_t)(255U))
+#endif
+#ifndef UINT16_MAX
+/** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */
+# define UINT16_MAX ((uint16_t)(65535U))
+#endif
+#ifndef UINT32_MAX
+/** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */
+# define UINT32_MAX ((uint32_t)(4294967295U))
+#endif
+
+#if defined(U_INT64_T_UNAVAILABLE)
+# error int64_t is required for decimal format and rule-based number format.
+#else
+# ifndef INT64_C
+/**
+ * Provides a platform independent way to specify a signed 64-bit integer constant.
+ * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C
+ * @draft ICU 2.8
+ */
+# define INT64_C(c) c ## LL
+# endif
+# ifndef UINT64_C
+/**
+ * Provides a platform independent way to specify an unsigned 64-bit integer constant.
+ * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C
+ * @draft ICU 2.8
+ */
+# define UINT64_C(c) c ## ULL
+# endif
+# ifndef U_INT64_MIN
+/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */
+# define U_INT64_MIN ((int64_t)(INT64_C(-9223372036854775807)-1))
+# endif
+# ifndef U_INT64_MAX
+/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */
+# define U_INT64_MAX ((int64_t)(INT64_C(9223372036854775807)))
+# endif
+# ifndef U_UINT64_MAX
+/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */
+# define U_UINT64_MAX ((uint64_t)(UINT64_C(18446744073709551615)))
+# endif
+#endif
+
+/*==========================================================================*/
+/* Boolean data type */
+/*==========================================================================*/
+
+/** The ICU boolean type @stable ICU 2.0 */
+typedef int8_t UBool;
+
+#ifndef TRUE
+/** The TRUE value of a UBool @stable ICU 2.0 */
+# define TRUE 1
+#endif
+#ifndef FALSE
+/** The FALSE value of a UBool @stable ICU 2.0 */
+# define FALSE 0
+#endif
+
+
+/*==========================================================================*/
+/* Unicode data types */
+/*==========================================================================*/
+
+/* wchar_t-related definitions -------------------------------------------- */
+
+/**
+ * \def U_HAVE_WCHAR_H
+ * Indicates whether <wchar.h> is available (1) or not (0). Set to 1 by default.
+ *
+ * @stable ICU 2.0
+ */
+#ifndef U_HAVE_WCHAR_H
+# define U_HAVE_WCHAR_H 1
+#endif
+
+/**
+ * \def U_SIZEOF_WCHAR_T
+ * U_SIZEOF_WCHAR_T==sizeof(wchar_t) (0 means it is not defined or autoconf could not set it)
+ *
+ * @stable ICU 2.0
+ */
+#if U_SIZEOF_WCHAR_T==0
+# undef U_SIZEOF_WCHAR_T
+# define U_SIZEOF_WCHAR_T 4
+#endif
+
+/*
+ * \def U_WCHAR_IS_UTF16
+ * Defined if wchar_t uses UTF-16.
+ *
+ * @stable ICU 2.0
+ */
+/*
+ * \def U_WCHAR_IS_UTF32
+ * Defined if wchar_t uses UTF-32.
+ *
+ * @stable ICU 2.0
+ */
+#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
+# ifdef __STDC_ISO_10646__
+# if (U_SIZEOF_WCHAR_T==2)
+# define U_WCHAR_IS_UTF16
+# elif (U_SIZEOF_WCHAR_T==4)
+# define U_WCHAR_IS_UTF32
+# endif
+# elif defined __UCS2__
+# if (__OS390__ || __OS400__) && (U_SIZEOF_WCHAR_T==2)
+# define U_WCHAR_IS_UTF16
+# endif
+# elif defined __UCS4__
+# if (U_SIZEOF_WCHAR_T==4)
+# define U_WCHAR_IS_UTF32
+# endif
+# elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
+# define U_WCHAR_IS_UTF16
+# endif
+#endif
+
+/* UChar and UChar32 definitions -------------------------------------------- */
+
+/** Number of bytes in a UChar. @stable ICU 2.0 */
+#define U_SIZEOF_UCHAR 2
+
+/**
+ * \var UChar
+ * Define UChar to be wchar_t if that is 16 bits wide; always assumed to be unsigned.
+ * If wchar_t is not 16 bits wide, then define UChar to be uint16_t.
+ * This makes the definition of UChar platform-dependent
+ * but allows direct string type compatibility with platforms with
+ * 16-bit wchar_t types.
+ *
+ * @stable ICU 2.0
+ */
+
+/* Define UChar to be compatible with wchar_t if possible. */
+#if U_SIZEOF_WCHAR_T==2
+ typedef wchar_t UChar;
+#else
+ typedef uint16_t UChar;
+#endif
+
+/**
+ * Define UChar32 as a type for single Unicode code points.
+ * UChar32 is a signed 32-bit integer (same as int32_t).
+ *
+ * The Unicode code point range is 0..0x10ffff.
+ * All other values (negative or >=0x110000) are illegal as Unicode code points.
+ * They may be used as sentinel values to indicate "done", "error"
+ * or similar non-code point conditions.
+ *
+ * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined
+ * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned)
+ * or else to be uint32_t.
+ * That is, the definition of UChar32 was platform-dependent.
+ *
+ * @see U_SENTINEL
+ * @stable ICU 2.4
+ */
+typedef int32_t UChar32;
+
+/*==========================================================================*/
+/* U_INLINE and U_ALIGN_CODE Set default values if these are not already */
+/* defined. Definitions normally are in */
+/* platform.h or the corresponding file for */
+/* the OS in use. */
+/*==========================================================================*/
+
+/**
+ * \def U_ALIGN_CODE
+ * This is used to align code fragments to a specific byte boundary.
+ * This is useful for getting consistent performance test results.
+ * @internal
+ */
+#ifndef U_ALIGN_CODE
+# define U_ALIGN_CODE(n)
+#endif
+
+#ifndef U_INLINE
+# define U_INLINE
+#endif
+
+#include "unicode/urename.h"
+
+#endif
diff --git a/Source/WebCore/icu/unicode/unorm.h b/Source/WebCore/icu/unicode/unorm.h
new file mode 100644
index 0000000..8bdbee7
--- /dev/null
+++ b/Source/WebCore/icu/unicode/unorm.h
@@ -0,0 +1,575 @@
+/*
+*******************************************************************************
+* Copyright (c) 1996-2004, International Business Machines Corporation
+* and others. All Rights Reserved.
+*******************************************************************************
+* File unorm.h
+*
+* Created by: Vladimir Weinstein 12052000
+*
+* Modification history :
+*
+* Date Name Description
+* 02/01/01 synwee Added normalization quickcheck enum and method.
+*/
+#ifndef UNORM_H
+#define UNORM_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/uiter.h"
+
+/**
+ * \file
+ * \brief C API: Unicode Normalization
+ *
+ * <h2>Unicode normalization API</h2>
+ *
+ * <code>unorm_normalize</code> transforms Unicode text into an equivalent composed or
+ * decomposed form, allowing for easier sorting and searching of text.
+ * <code>unorm_normalize</code> supports the standard normalization forms described in
+ * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
+ * Unicode Standard Annex #15 &#8212; Unicode Normalization Forms</a>.
+ *
+ * Characters with accents or other adornments can be encoded in
+ * several different ways in Unicode. For example, take the character A-acute.
+ * In Unicode, this can be encoded as a single character (the
+ * "composed" form):
+ *
+ * \code
+ * 00C1 LATIN CAPITAL LETTER A WITH ACUTE
+ * \endcode
+ *
+ * or as two separate characters (the "decomposed" form):
+ *
+ * \code
+ * 0041 LATIN CAPITAL LETTER A
+ * 0301 COMBINING ACUTE ACCENT
+ * \endcode
+ *
+ * To a user of your program, however, both of these sequences should be
+ * treated as the same "user-level" character "A with acute accent". When you are searching or
+ * comparing text, you must ensure that these two sequences are treated
+ * equivalently. In addition, you must handle characters with more than one
+ * accent. Sometimes the order of a character's combining accents is
+ * significant, while in other cases accent sequences in different orders are
+ * really equivalent.
+ *
+ * Similarly, the string "ffi" can be encoded as three separate letters:
+ *
+ * \code
+ * 0066 LATIN SMALL LETTER F
+ * 0066 LATIN SMALL LETTER F
+ * 0069 LATIN SMALL LETTER I
+ * \endcode
+ *
+ * or as the single character
+ *
+ * \code
+ * FB03 LATIN SMALL LIGATURE FFI
+ * \endcode
+ *
+ * The ffi ligature is not a distinct semantic character, and strictly speaking
+ * it shouldn't be in Unicode at all, but it was included for compatibility
+ * with existing character sets that already provided it. The Unicode standard
+ * identifies such characters by giving them "compatibility" decompositions
+ * into the corresponding semantic characters. When sorting and searching, you
+ * will often want to use these mappings.
+ *
+ * <code>unorm_normalize</code> helps solve these problems by transforming text into the
+ * canonical composed and decomposed forms as shown in the first example above.
+ * In addition, you can have it perform compatibility decompositions so that
+ * you can treat compatibility characters the same as their equivalents.
+ * Finally, <code>unorm_normalize</code> rearranges accents into the proper canonical
+ * order, so that you do not have to worry about accent rearrangement on your
+ * own.
+ *
+ * Form FCD, "Fast C or D", is also designed for collation.
+ * It allows to work on strings that are not necessarily normalized
+ * with an algorithm (like in collation) that works under "canonical closure", i.e., it treats precomposed
+ * characters and their decomposed equivalents the same.
+ *
+ * It is not a normalization form because it does not provide for uniqueness of representation. Multiple strings
+ * may be canonically equivalent (their NFDs are identical) and may all conform to FCD without being identical
+ * themselves.
+ *
+ * The form is defined such that the "raw decomposition", the recursive canonical decomposition of each character,
+ * results in a string that is canonically ordered. This means that precomposed characters are allowed for as long
+ * as their decompositions do not need canonical reordering.
+ *
+ * Its advantage for a process like collation is that all NFD and most NFC texts - and many unnormalized texts -
+ * already conform to FCD and do not need to be normalized (NFD) for such a process. The FCD quick check will
+ * return UNORM_YES for most strings in practice.
+ *
+ * unorm_normalize(UNORM_FCD) may be implemented with UNORM_NFD.
+ *
+ * For more details on FCD see the collation design document:
+ * http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/collation/ICU_collation_design.htm
+ *
+ * ICU collation performs either NFD or FCD normalization automatically if normalization
+ * is turned on for the collator object.
+ * Beyond collation and string search, normalized strings may be useful for string equivalence comparisons,
+ * transliteration/transcription, unique representations, etc.
+ *
+ * The W3C generally recommends to exchange texts in NFC.
+ * Note also that most legacy character encodings use only precomposed forms and often do not
+ * encode any combining marks by themselves. For conversion to such character encodings the
+ * Unicode text needs to be normalized to NFC.
+ * For more usage examples, see the Unicode Standard Annex.
+ */
+
+/**
+ * Constants for normalization modes.
+ * @stable ICU 2.0
+ */
+typedef enum {
+ /** No decomposition/composition. @stable ICU 2.0 */
+ UNORM_NONE = 1,
+ /** Canonical decomposition. @stable ICU 2.0 */
+ UNORM_NFD = 2,
+ /** Compatibility decomposition. @stable ICU 2.0 */
+ UNORM_NFKD = 3,
+ /** Canonical decomposition followed by canonical composition. @stable ICU 2.0 */
+ UNORM_NFC = 4,
+ /** Default normalization. @stable ICU 2.0 */
+ UNORM_DEFAULT = UNORM_NFC,
+ /** Compatibility decomposition followed by canonical composition. @stable ICU 2.0 */
+ UNORM_NFKC =5,
+ /** "Fast C or D" form. @stable ICU 2.0 */
+ UNORM_FCD = 6,
+
+ /** One more than the highest normalization mode constant. @stable ICU 2.0 */
+ UNORM_MODE_COUNT
+} UNormalizationMode;
+
+/**
+ * Constants for options flags for normalization.
+ * Use 0 for default options,
+ * including normalization according to the Unicode version
+ * that is currently supported by ICU (see u_getUnicodeVersion).
+ * @stable ICU 2.6
+ */
+enum {
+ /**
+ * Options bit set value to select Unicode 3.2 normalization
+ * (except NormalizationCorrections).
+ * At most one Unicode version can be selected at a time.
+ * @stable ICU 2.6
+ */
+ UNORM_UNICODE_3_2=0x20
+};
+
+/**
+ * Lowest-order bit number of unorm_compare() options bits corresponding to
+ * normalization options bits.
+ *
+ * The options parameter for unorm_compare() uses most bits for
+ * itself and for various comparison and folding flags.
+ * The most significant bits, however, are shifted down and passed on
+ * to the normalization implementation.
+ * (That is, from unorm_compare(..., options, ...),
+ * options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT will be passed on to the
+ * internal normalization functions.)
+ *
+ * @see unorm_compare
+ * @stable ICU 2.6
+ */
+#define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
+
+/**
+ * Normalize a string.
+ * The string will be normalized according the specified normalization mode
+ * and options.
+ *
+ * @param source The string to normalize.
+ * @param sourceLength The length of source, or -1 if NUL-terminated.
+ * @param mode The normalization mode; one of UNORM_NONE,
+ * UNORM_NFD, UNORM_NFC, UNORM_NFKC, UNORM_NFKD, UNORM_DEFAULT.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param result A pointer to a buffer to receive the result string.
+ * The result string is NUL-terminated if possible.
+ * @param resultLength The maximum size of result.
+ * @param status A pointer to a UErrorCode to receive any errors.
+ * @return The total buffer size needed; if greater than resultLength,
+ * the output was truncated, and the error code is set to U_BUFFER_OVERFLOW_ERROR.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+unorm_normalize(const UChar *source, int32_t sourceLength,
+ UNormalizationMode mode, int32_t options,
+ UChar *result, int32_t resultLength,
+ UErrorCode *status);
+#endif
+/**
+ * Result values for unorm_quickCheck().
+ * For details see Unicode Technical Report 15.
+ * @stable ICU 2.0
+ */
+typedef enum UNormalizationCheckResult {
+ /**
+ * Indicates that string is not in the normalized format
+ */
+ UNORM_NO,
+ /**
+ * Indicates that string is in the normalized format
+ */
+ UNORM_YES,
+ /**
+ * Indicates that string cannot be determined if it is in the normalized
+ * format without further thorough checks.
+ */
+ UNORM_MAYBE
+} UNormalizationCheckResult;
+#if !UCONFIG_NO_NORMALIZATION
+/**
+ * Performing quick check on a string, to quickly determine if the string is
+ * in a particular normalization format.
+ * Three types of result can be returned UNORM_YES, UNORM_NO or
+ * UNORM_MAYBE. Result UNORM_YES indicates that the argument
+ * string is in the desired normalized format, UNORM_NO determines that
+ * argument string is not in the desired normalized format. A
+ * UNORM_MAYBE result indicates that a more thorough check is required,
+ * the user may have to put the string in its normalized form and compare the
+ * results.
+ *
+ * @param source string for determining if it is in a normalized format
+ * @param sourcelength length of source to test, or -1 if NUL-terminated
+ * @param mode which normalization form to test for
+ * @param status a pointer to a UErrorCode to receive any errors
+ * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
+ *
+ * @see unorm_isNormalized
+ * @stable ICU 2.0
+ */
+U_STABLE UNormalizationCheckResult U_EXPORT2
+unorm_quickCheck(const UChar *source, int32_t sourcelength,
+ UNormalizationMode mode,
+ UErrorCode *status);
+
+/**
+ * Performing quick check on a string; same as unorm_quickCheck but
+ * takes an extra options parameter like most normalization functions.
+ *
+ * @param src String that is to be tested if it is in a normalization format.
+ * @param srcLength Length of source to test, or -1 if NUL-terminated.
+ * @param mode Which normalization form to test for.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
+ *
+ * @see unorm_quickCheck
+ * @see unorm_isNormalized
+ * @stable ICU 2.6
+ */
+U_STABLE UNormalizationCheckResult U_EXPORT2
+unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength,
+ UNormalizationMode mode, int32_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Test if a string is in a given normalization form.
+ * This is semantically equivalent to source.equals(normalize(source, mode)) .
+ *
+ * Unlike unorm_quickCheck(), this function returns a definitive result,
+ * never a "maybe".
+ * For NFD, NFKD, and FCD, both functions work exactly the same.
+ * For NFC and NFKC where quickCheck may return "maybe", this function will
+ * perform further tests to arrive at a TRUE/FALSE result.
+ *
+ * @param src String that is to be tested if it is in a normalization format.
+ * @param srcLength Length of source to test, or -1 if NUL-terminated.
+ * @param mode Which normalization form to test for.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Boolean value indicating whether the source string is in the
+ * "mode" normalization form.
+ *
+ * @see unorm_quickCheck
+ * @stable ICU 2.2
+ */
+U_STABLE UBool U_EXPORT2
+unorm_isNormalized(const UChar *src, int32_t srcLength,
+ UNormalizationMode mode,
+ UErrorCode *pErrorCode);
+
+/**
+ * Test if a string is in a given normalization form; same as unorm_isNormalized but
+ * takes an extra options parameter like most normalization functions.
+ *
+ * @param src String that is to be tested if it is in a normalization format.
+ * @param srcLength Length of source to test, or -1 if NUL-terminated.
+ * @param mode Which normalization form to test for.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Boolean value indicating whether the source string is in the
+ * "mode/options" normalization form.
+ *
+ * @see unorm_quickCheck
+ * @see unorm_isNormalized
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength,
+ UNormalizationMode mode, int32_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Iterative normalization forward.
+ * This function (together with unorm_previous) is somewhat
+ * similar to the C++ Normalizer class (see its non-static functions).
+ *
+ * Iterative normalization is useful when only a small portion of a longer
+ * string/text needs to be processed.
+ *
+ * For example, the likelihood may be high that processing the first 10% of some
+ * text will be sufficient to find certain data.
+ * Another example: When one wants to concatenate two normalized strings and get a
+ * normalized result, it is much more efficient to normalize just a small part of
+ * the result around the concatenation place instead of re-normalizing everything.
+ *
+ * The input text is an instance of the C character iteration API UCharIterator.
+ * It may wrap around a simple string, a CharacterIterator, a Replaceable, or any
+ * other kind of text object.
+ *
+ * If a buffer overflow occurs, then the caller needs to reset the iterator to the
+ * old index and call the function again with a larger buffer - if the caller cares
+ * for the actual output.
+ * Regardless of the output buffer, the iterator will always be moved to the next
+ * normalization boundary.
+ *
+ * This function (like unorm_previous) serves two purposes:
+ *
+ * 1) To find the next boundary so that the normalization of the part of the text
+ * from the current position to that boundary does not affect and is not affected
+ * by the part of the text beyond that boundary.
+ *
+ * 2) To normalize the text up to the boundary.
+ *
+ * The second step is optional, per the doNormalize parameter.
+ * It is omitted for operations like string concatenation, where the two adjacent
+ * string ends need to be normalized together.
+ * In such a case, the output buffer will just contain a copy of the text up to the
+ * boundary.
+ *
+ * pNeededToNormalize is an output-only parameter. Its output value is only defined
+ * if normalization was requested (doNormalize) and successful (especially, no
+ * buffer overflow).
+ * It is useful for operations like a normalizing transliterator, where one would
+ * not want to replace a piece of text if it is not modified.
+ *
+ * If doNormalize==TRUE and pNeededToNormalize!=NULL then *pNeeded... is set TRUE
+ * if the normalization was necessary.
+ *
+ * If doNormalize==FALSE then *pNeededToNormalize will be set to FALSE.
+ *
+ * If the buffer overflows, then *pNeededToNormalize will be undefined;
+ * essentially, whenever U_FAILURE is true (like in buffer overflows), this result
+ * will be undefined.
+ *
+ * @param src The input text in the form of a C character iterator.
+ * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
+ * @param destCapacity The number of UChars that fit into dest.
+ * @param mode The normalization mode.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param doNormalize Indicates if the source text up to the next boundary
+ * is to be normalized (TRUE) or just copied (FALSE).
+ * @param pNeededToNormalize Output flag indicating if the normalization resulted in
+ * different text from the input.
+ * Not defined if an error occurs including buffer overflow.
+ * Always FALSE if !doNormalize.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Length of output (number of UChars) when successful or buffer overflow.
+ *
+ * @see unorm_previous
+ * @see unorm_normalize
+ *
+ * @stable ICU 2.1
+ */
+U_STABLE int32_t U_EXPORT2
+unorm_next(UCharIterator *src,
+ UChar *dest, int32_t destCapacity,
+ UNormalizationMode mode, int32_t options,
+ UBool doNormalize, UBool *pNeededToNormalize,
+ UErrorCode *pErrorCode);
+
+/**
+ * Iterative normalization backward.
+ * This function (together with unorm_next) is somewhat
+ * similar to the C++ Normalizer class (see its non-static functions).
+ * For all details see unorm_next.
+ *
+ * @param src The input text in the form of a C character iterator.
+ * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
+ * @param destCapacity The number of UChars that fit into dest.
+ * @param mode The normalization mode.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param doNormalize Indicates if the source text up to the next boundary
+ * is to be normalized (TRUE) or just copied (FALSE).
+ * @param pNeededToNormalize Output flag indicating if the normalization resulted in
+ * different text from the input.
+ * Not defined if an error occurs including buffer overflow.
+ * Always FALSE if !doNormalize.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Length of output (number of UChars) when successful or buffer overflow.
+ *
+ * @see unorm_next
+ * @see unorm_normalize
+ *
+ * @stable ICU 2.1
+ */
+U_STABLE int32_t U_EXPORT2
+unorm_previous(UCharIterator *src,
+ UChar *dest, int32_t destCapacity,
+ UNormalizationMode mode, int32_t options,
+ UBool doNormalize, UBool *pNeededToNormalize,
+ UErrorCode *pErrorCode);
+
+/**
+ * Concatenate normalized strings, making sure that the result is normalized as well.
+ *
+ * If both the left and the right strings are in
+ * the normalization form according to "mode/options",
+ * then the result will be
+ *
+ * \code
+ * dest=normalize(left+right, mode, options)
+ * \endcode
+ *
+ * With the input strings already being normalized,
+ * this function will use unorm_next() and unorm_previous()
+ * to find the adjacent end pieces of the input strings.
+ * Only the concatenation of these end pieces will be normalized and
+ * then concatenated with the remaining parts of the input strings.
+ *
+ * It is allowed to have dest==left to avoid copying the entire left string.
+ *
+ * @param left Left source string, may be same as dest.
+ * @param leftLength Length of left source string, or -1 if NUL-terminated.
+ * @param right Right source string.
+ * @param rightLength Length of right source string, or -1 if NUL-terminated.
+ * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
+ * @param destCapacity The number of UChars that fit into dest.
+ * @param mode The normalization mode.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Length of output (number of UChars) when successful or buffer overflow.
+ *
+ * @see unorm_normalize
+ * @see unorm_next
+ * @see unorm_previous
+ *
+ * @stable ICU 2.1
+ */
+U_STABLE int32_t U_EXPORT2
+unorm_concatenate(const UChar *left, int32_t leftLength,
+ const UChar *right, int32_t rightLength,
+ UChar *dest, int32_t destCapacity,
+ UNormalizationMode mode, int32_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Option bit for unorm_compare:
+ * Both input strings are assumed to fulfill FCD conditions.
+ * @stable ICU 2.2
+ */
+#define UNORM_INPUT_IS_FCD 0x20000
+
+/**
+ * Option bit for unorm_compare:
+ * Perform case-insensitive comparison.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_IGNORE_CASE 0x10000
+
+#ifndef U_COMPARE_CODE_POINT_ORDER
+/* see also unistr.h and ustring.h */
+/**
+ * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
+ * Compare strings in code point order instead of code unit order.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_CODE_POINT_ORDER 0x8000
+#endif
+
+/**
+ * Compare two strings for canonical equivalence.
+ * Further options include case-insensitive comparison and
+ * code point order (as opposed to code unit order).
+ *
+ * Canonical equivalence between two strings is defined as their normalized
+ * forms (NFD or NFC) being identical.
+ * This function compares strings incrementally instead of normalizing
+ * (and optionally case-folding) both strings entirely,
+ * improving performance significantly.
+ *
+ * Bulk normalization is only necessary if the strings do not fulfill the FCD
+ * conditions. Only in this case, and only if the strings are relatively long,
+ * is memory allocated temporarily.
+ * For FCD strings and short non-FCD strings there is no memory allocation.
+ *
+ * Semantically, this is equivalent to
+ * strcmp[CodePointOrder](NFD(foldCase(NFD(s1))), NFD(foldCase(NFD(s2))))
+ * where code point order and foldCase are all optional.
+ *
+ * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
+ * the case folding must be performed first, then the normalization.
+ *
+ * @param s1 First source string.
+ * @param length1 Length of first source string, or -1 if NUL-terminated.
+ *
+ * @param s2 Second source string.
+ * @param length2 Length of second source string, or -1 if NUL-terminated.
+ *
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Case-sensitive comparison in code unit order, and the input strings
+ * are quick-checked for FCD.
+ *
+ * - UNORM_INPUT_IS_FCD
+ * Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
+ * If not set, the function will quickCheck for FCD
+ * and normalize if necessary.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_COMPARE_IGNORE_CASE
+ * Set to compare strings case-insensitively using case folding,
+ * instead of case-sensitively.
+ * If set, then the following case folding options are used.
+ *
+ * - Options as used with case-insensitive comparisons, currently:
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * (see u_strCaseCompare for details)
+ *
+ * - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
+ *
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @see unorm_normalize
+ * @see UNORM_FCD
+ * @see u_strCompare
+ * @see u_strCaseCompare
+ *
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+unorm_compare(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ uint32_t options,
+ UErrorCode *pErrorCode);
+
+#endif /* #if !UCONFIG_NO_NORMALIZATION */
+
+#endif
diff --git a/Source/WebCore/icu/unicode/urename.h b/Source/WebCore/icu/unicode/urename.h
new file mode 100644
index 0000000..5562592
--- /dev/null
+++ b/Source/WebCore/icu/unicode/urename.h
@@ -0,0 +1,1468 @@
+/*
+*******************************************************************************
+* Copyright (C) 2002-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*
+* file name: urename.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* Created by: Perl script written by Vladimir Weinstein
+*
+* Contains data for renaming ICU exports.
+* Gets included by umachine.h
+*
+* THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT
+* YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
+*/
+
+#ifndef URENAME_H
+#define URENAME_H
+
+/* Uncomment the following line to disable renaming on platforms
+ that do not use Autoconf. */
+/* #define U_DISABLE_RENAMING 1 */
+
+#if !U_DISABLE_RENAMING
+
+/* C exports renaming data */
+
+#define T_CString_int64ToString T_CString_int64ToString_3_2
+#define T_CString_integerToString T_CString_integerToString_3_2
+#define T_CString_stricmp T_CString_stricmp_3_2
+#define T_CString_stringToInteger T_CString_stringToInteger_3_2
+#define T_CString_strnicmp T_CString_strnicmp_3_2
+#define T_CString_toLowerCase T_CString_toLowerCase_3_2
+#define T_CString_toUpperCase T_CString_toUpperCase_3_2
+#define T_FileStream_close T_FileStream_close_3_2
+#define T_FileStream_eof T_FileStream_eof_3_2
+#define T_FileStream_error T_FileStream_error_3_2
+#define T_FileStream_file_exists T_FileStream_file_exists_3_2
+#define T_FileStream_getc T_FileStream_getc_3_2
+#define T_FileStream_open T_FileStream_open_3_2
+#define T_FileStream_peek T_FileStream_peek_3_2
+#define T_FileStream_putc T_FileStream_putc_3_2
+#define T_FileStream_read T_FileStream_read_3_2
+#define T_FileStream_readLine T_FileStream_readLine_3_2
+#define T_FileStream_remove T_FileStream_remove_3_2
+#define T_FileStream_rewind T_FileStream_rewind_3_2
+#define T_FileStream_size T_FileStream_size_3_2
+#define T_FileStream_stderr T_FileStream_stderr_3_2
+#define T_FileStream_stdin T_FileStream_stdin_3_2
+#define T_FileStream_stdout T_FileStream_stdout_3_2
+#define T_FileStream_ungetc T_FileStream_ungetc_3_2
+#define T_FileStream_write T_FileStream_write_3_2
+#define T_FileStream_writeLine T_FileStream_writeLine_3_2
+#define UCNV_FROM_U_CALLBACK_ESCAPE UCNV_FROM_U_CALLBACK_ESCAPE_3_2
+#define UCNV_FROM_U_CALLBACK_SKIP UCNV_FROM_U_CALLBACK_SKIP_3_2
+#define UCNV_FROM_U_CALLBACK_STOP UCNV_FROM_U_CALLBACK_STOP_3_2
+#define UCNV_FROM_U_CALLBACK_SUBSTITUTE UCNV_FROM_U_CALLBACK_SUBSTITUTE_3_2
+#define UCNV_TO_U_CALLBACK_ESCAPE UCNV_TO_U_CALLBACK_ESCAPE_3_2
+#define UCNV_TO_U_CALLBACK_SKIP UCNV_TO_U_CALLBACK_SKIP_3_2
+#define UCNV_TO_U_CALLBACK_STOP UCNV_TO_U_CALLBACK_STOP_3_2
+#define UCNV_TO_U_CALLBACK_SUBSTITUTE UCNV_TO_U_CALLBACK_SUBSTITUTE_3_2
+#define UDataMemory_createNewInstance UDataMemory_createNewInstance_3_2
+#define UDataMemory_init UDataMemory_init_3_2
+#define UDataMemory_isLoaded UDataMemory_isLoaded_3_2
+#define UDataMemory_normalizeDataPointer UDataMemory_normalizeDataPointer_3_2
+#define UDataMemory_setData UDataMemory_setData_3_2
+#define UDatamemory_assign UDatamemory_assign_3_2
+#define _ASCIIData _ASCIIData_3_2
+#define _Bocu1Data _Bocu1Data_3_2
+#define _CESU8Data _CESU8Data_3_2
+#define _HZData _HZData_3_2
+#define _IMAPData _IMAPData_3_2
+#define _ISCIIData _ISCIIData_3_2
+#define _ISO2022Data _ISO2022Data_3_2
+#define _LMBCSData1 _LMBCSData1_3_2
+#define _LMBCSData11 _LMBCSData11_3_2
+#define _LMBCSData16 _LMBCSData16_3_2
+#define _LMBCSData17 _LMBCSData17_3_2
+#define _LMBCSData18 _LMBCSData18_3_2
+#define _LMBCSData19 _LMBCSData19_3_2
+#define _LMBCSData2 _LMBCSData2_3_2
+#define _LMBCSData3 _LMBCSData3_3_2
+#define _LMBCSData4 _LMBCSData4_3_2
+#define _LMBCSData5 _LMBCSData5_3_2
+#define _LMBCSData6 _LMBCSData6_3_2
+#define _LMBCSData8 _LMBCSData8_3_2
+#define _Latin1Data _Latin1Data_3_2
+#define _MBCSData _MBCSData_3_2
+#define _SCSUData _SCSUData_3_2
+#define _UTF16BEData _UTF16BEData_3_2
+#define _UTF16Data _UTF16Data_3_2
+#define _UTF16LEData _UTF16LEData_3_2
+#define _UTF32BEData _UTF32BEData_3_2
+#define _UTF32Data _UTF32Data_3_2
+#define _UTF32LEData _UTF32LEData_3_2
+#define _UTF7Data _UTF7Data_3_2
+#define _UTF8Data _UTF8Data_3_2
+#define cmemory_cleanup cmemory_cleanup_3_2
+#define cmemory_inUse cmemory_inUse_3_2
+#define locale_getKeywords locale_getKeywords_3_2
+#define locale_get_default locale_get_default_3_2
+#define locale_set_default locale_set_default_3_2
+#define res_countArrayItems res_countArrayItems_3_2
+#define res_findResource res_findResource_3_2
+#define res_getAlias res_getAlias_3_2
+#define res_getArrayItem res_getArrayItem_3_2
+#define res_getBinary res_getBinary_3_2
+#define res_getIntVector res_getIntVector_3_2
+#define res_getResource res_getResource_3_2
+#define res_getString res_getString_3_2
+#define res_getTableItemByIndex res_getTableItemByIndex_3_2
+#define res_getTableItemByKey res_getTableItemByKey_3_2
+#define res_load res_load_3_2
+#define res_unload res_unload_3_2
+#define transliterator_cleanup transliterator_cleanup_3_2
+#define u_UCharsToChars u_UCharsToChars_3_2
+#define u_austrcpy u_austrcpy_3_2
+#define u_austrncpy u_austrncpy_3_2
+#define u_catclose u_catclose_3_2
+#define u_catgets u_catgets_3_2
+#define u_catopen u_catopen_3_2
+#define u_charAge u_charAge_3_2
+#define u_charDigitValue u_charDigitValue_3_2
+#define u_charDirection u_charDirection_3_2
+#define u_charFromName u_charFromName_3_2
+#define u_charMirror u_charMirror_3_2
+#define u_charName u_charName_3_2
+#define u_charType u_charType_3_2
+#define u_charsToUChars u_charsToUChars_3_2
+#define u_cleanup u_cleanup_3_2
+#define u_countChar32 u_countChar32_3_2
+#define u_digit u_digit_3_2
+#define u_enumCharNames u_enumCharNames_3_2
+#define u_enumCharTypes u_enumCharTypes_3_2
+#define u_errorName u_errorName_3_2
+#define u_fclose u_fclose_3_2
+#define u_feof u_feof_3_2
+#define u_fflush u_fflush_3_2
+#define u_fgetConverter u_fgetConverter_3_2
+#define u_fgetc u_fgetc_3_2
+#define u_fgetcodepage u_fgetcodepage_3_2
+#define u_fgetcx u_fgetcx_3_2
+#define u_fgetfile u_fgetfile_3_2
+#define u_fgetlocale u_fgetlocale_3_2
+#define u_fgets u_fgets_3_2
+#define u_file_read u_file_read_3_2
+#define u_file_write u_file_write_3_2
+#define u_file_write_flush u_file_write_flush_3_2
+#define u_finit u_finit_3_2
+#define u_foldCase u_foldCase_3_2
+#define u_fopen u_fopen_3_2
+#define u_forDigit u_forDigit_3_2
+#define u_formatMessage u_formatMessage_3_2
+#define u_formatMessageWithError u_formatMessageWithError_3_2
+#define u_fprintf u_fprintf_3_2
+#define u_fprintf_u u_fprintf_u_3_2
+#define u_fputc u_fputc_3_2
+#define u_fputs u_fputs_3_2
+#define u_frewind u_frewind_3_2
+#define u_fscanf u_fscanf_3_2
+#define u_fscanf_u u_fscanf_u_3_2
+#define u_fsetcodepage u_fsetcodepage_3_2
+#define u_fsetlocale u_fsetlocale_3_2
+#define u_fsettransliterator u_fsettransliterator_3_2
+#define u_fstropen u_fstropen_3_2
+#define u_fungetc u_fungetc_3_2
+#define u_getCombiningClass u_getCombiningClass_3_2
+#define u_getDataDirectory u_getDataDirectory_3_2
+#define u_getDefaultConverter u_getDefaultConverter_3_2
+#define u_getFC_NFKC_Closure u_getFC_NFKC_Closure_3_2
+#define u_getISOComment u_getISOComment_3_2
+#define u_getIntPropertyMaxValue u_getIntPropertyMaxValue_3_2
+#define u_getIntPropertyMinValue u_getIntPropertyMinValue_3_2
+#define u_getIntPropertyValue u_getIntPropertyValue_3_2
+#define u_getNumericValue u_getNumericValue_3_2
+#define u_getPropertyEnum u_getPropertyEnum_3_2
+#define u_getPropertyName u_getPropertyName_3_2
+#define u_getPropertyValueEnum u_getPropertyValueEnum_3_2
+#define u_getPropertyValueName u_getPropertyValueName_3_2
+#define u_getUnicodeProperties u_getUnicodeProperties_3_2
+#define u_getUnicodeVersion u_getUnicodeVersion_3_2
+#define u_getVersion u_getVersion_3_2
+#define u_growBufferFromStatic u_growBufferFromStatic_3_2
+#define u_hasBinaryProperty u_hasBinaryProperty_3_2
+#define u_init u_init_3_2
+#define u_isIDIgnorable u_isIDIgnorable_3_2
+#define u_isIDPart u_isIDPart_3_2
+#define u_isIDStart u_isIDStart_3_2
+#define u_isISOControl u_isISOControl_3_2
+#define u_isJavaIDPart u_isJavaIDPart_3_2
+#define u_isJavaIDStart u_isJavaIDStart_3_2
+#define u_isJavaSpaceChar u_isJavaSpaceChar_3_2
+#define u_isMirrored u_isMirrored_3_2
+#define u_isUAlphabetic u_isUAlphabetic_3_2
+#define u_isULowercase u_isULowercase_3_2
+#define u_isUUppercase u_isUUppercase_3_2
+#define u_isUWhiteSpace u_isUWhiteSpace_3_2
+#define u_isWhitespace u_isWhitespace_3_2
+#define u_isalnum u_isalnum_3_2
+#define u_isalpha u_isalpha_3_2
+#define u_isbase u_isbase_3_2
+#define u_isblank u_isblank_3_2
+#define u_iscntrl u_iscntrl_3_2
+#define u_isdefined u_isdefined_3_2
+#define u_isdigit u_isdigit_3_2
+#define u_isgraph u_isgraph_3_2
+#define u_islower u_islower_3_2
+#define u_isprint u_isprint_3_2
+#define u_ispunct u_ispunct_3_2
+#define u_isspace u_isspace_3_2
+#define u_istitle u_istitle_3_2
+#define u_isupper u_isupper_3_2
+#define u_isxdigit u_isxdigit_3_2
+#define u_lengthOfIdenticalLevelRun u_lengthOfIdenticalLevelRun_3_2
+#define u_locbund_close u_locbund_close_3_2
+#define u_locbund_getNumberFormat u_locbund_getNumberFormat_3_2
+#define u_locbund_init u_locbund_init_3_2
+#define u_memcasecmp u_memcasecmp_3_2
+#define u_memchr u_memchr_3_2
+#define u_memchr32 u_memchr32_3_2
+#define u_memcmp u_memcmp_3_2
+#define u_memcmpCodePointOrder u_memcmpCodePointOrder_3_2
+#define u_memcpy u_memcpy_3_2
+#define u_memmove u_memmove_3_2
+#define u_memrchr u_memrchr_3_2
+#define u_memrchr32 u_memrchr32_3_2
+#define u_memset u_memset_3_2
+#define u_parseMessage u_parseMessage_3_2
+#define u_parseMessageWithError u_parseMessageWithError_3_2
+#define u_printf_parse u_printf_parse_3_2
+#define u_releaseDefaultConverter u_releaseDefaultConverter_3_2
+#define u_scanf_parse u_scanf_parse_3_2
+#define u_setAtomicIncDecFunctions u_setAtomicIncDecFunctions_3_2
+#define u_setDataDirectory u_setDataDirectory_3_2
+#define u_setMemoryFunctions u_setMemoryFunctions_3_2
+#define u_setMutexFunctions u_setMutexFunctions_3_2
+#define u_shapeArabic u_shapeArabic_3_2
+#define u_snprintf u_snprintf_3_2
+#define u_snprintf_u u_snprintf_u_3_2
+#define u_sprintf u_sprintf_3_2
+#define u_sprintf_u u_sprintf_u_3_2
+#define u_sscanf u_sscanf_3_2
+#define u_sscanf_u u_sscanf_u_3_2
+#define u_strCaseCompare u_strCaseCompare_3_2
+#define u_strCompare u_strCompare_3_2
+#define u_strCompareIter u_strCompareIter_3_2
+#define u_strFindFirst u_strFindFirst_3_2
+#define u_strFindLast u_strFindLast_3_2
+#define u_strFoldCase u_strFoldCase_3_2
+#define u_strFromPunycode u_strFromPunycode_3_2
+#define u_strFromUTF32 u_strFromUTF32_3_2
+#define u_strFromUTF8 u_strFromUTF8_3_2
+#define u_strFromWCS u_strFromWCS_3_2
+#define u_strHasMoreChar32Than u_strHasMoreChar32Than_3_2
+#define u_strToLower u_strToLower_3_2
+#define u_strToPunycode u_strToPunycode_3_2
+#define u_strToTitle u_strToTitle_3_2
+#define u_strToUTF32 u_strToUTF32_3_2
+#define u_strToUTF8 u_strToUTF8_3_2
+#define u_strToUpper u_strToUpper_3_2
+#define u_strToWCS u_strToWCS_3_2
+#define u_strcasecmp u_strcasecmp_3_2
+#define u_strcat u_strcat_3_2
+#define u_strchr u_strchr_3_2
+#define u_strchr32 u_strchr32_3_2
+#define u_strcmp u_strcmp_3_2
+#define u_strcmpCodePointOrder u_strcmpCodePointOrder_3_2
+#define u_strcmpFold u_strcmpFold_3_2
+#define u_strcpy u_strcpy_3_2
+#define u_strcspn u_strcspn_3_2
+#define u_strlen u_strlen_3_2
+#define u_strncasecmp u_strncasecmp_3_2
+#define u_strncat u_strncat_3_2
+#define u_strncmp u_strncmp_3_2
+#define u_strncmpCodePointOrder u_strncmpCodePointOrder_3_2
+#define u_strncpy u_strncpy_3_2
+#define u_strpbrk u_strpbrk_3_2
+#define u_strrchr u_strrchr_3_2
+#define u_strrchr32 u_strrchr32_3_2
+#define u_strrstr u_strrstr_3_2
+#define u_strspn u_strspn_3_2
+#define u_strstr u_strstr_3_2
+#define u_strtok_r u_strtok_r_3_2
+#define u_terminateChars u_terminateChars_3_2
+#define u_terminateUChar32s u_terminateUChar32s_3_2
+#define u_terminateUChars u_terminateUChars_3_2
+#define u_terminateWChars u_terminateWChars_3_2
+#define u_tolower u_tolower_3_2
+#define u_totitle u_totitle_3_2
+#define u_toupper u_toupper_3_2
+#define u_uastrcpy u_uastrcpy_3_2
+#define u_uastrncpy u_uastrncpy_3_2
+#define u_unescape u_unescape_3_2
+#define u_unescapeAt u_unescapeAt_3_2
+#define u_versionFromString u_versionFromString_3_2
+#define u_versionToString u_versionToString_3_2
+#define u_vformatMessage u_vformatMessage_3_2
+#define u_vformatMessageWithError u_vformatMessageWithError_3_2
+#define u_vfprintf u_vfprintf_3_2
+#define u_vfprintf_u u_vfprintf_u_3_2
+#define u_vfscanf u_vfscanf_3_2
+#define u_vfscanf_u u_vfscanf_u_3_2
+#define u_vparseMessage u_vparseMessage_3_2
+#define u_vparseMessageWithError u_vparseMessageWithError_3_2
+#define u_vsnprintf u_vsnprintf_3_2
+#define u_vsnprintf_u u_vsnprintf_u_3_2
+#define u_vsprintf u_vsprintf_3_2
+#define u_vsprintf_u u_vsprintf_u_3_2
+#define u_vsscanf u_vsscanf_3_2
+#define u_vsscanf_u u_vsscanf_u_3_2
+#define u_writeDiff u_writeDiff_3_2
+#define u_writeIdenticalLevelRun u_writeIdenticalLevelRun_3_2
+#define u_writeIdenticalLevelRunTwoChars u_writeIdenticalLevelRunTwoChars_3_2
+#define ubidi_close ubidi_close_3_2
+#define ubidi_countRuns ubidi_countRuns_3_2
+#define ubidi_getDirection ubidi_getDirection_3_2
+#define ubidi_getLength ubidi_getLength_3_2
+#define ubidi_getLevelAt ubidi_getLevelAt_3_2
+#define ubidi_getLevels ubidi_getLevels_3_2
+#define ubidi_getLogicalIndex ubidi_getLogicalIndex_3_2
+#define ubidi_getLogicalMap ubidi_getLogicalMap_3_2
+#define ubidi_getLogicalRun ubidi_getLogicalRun_3_2
+#define ubidi_getMemory ubidi_getMemory_3_2
+#define ubidi_getParaLevel ubidi_getParaLevel_3_2
+#define ubidi_getRuns ubidi_getRuns_3_2
+#define ubidi_getText ubidi_getText_3_2
+#define ubidi_getVisualIndex ubidi_getVisualIndex_3_2
+#define ubidi_getVisualMap ubidi_getVisualMap_3_2
+#define ubidi_getVisualRun ubidi_getVisualRun_3_2
+#define ubidi_invertMap ubidi_invertMap_3_2
+#define ubidi_isInverse ubidi_isInverse_3_2
+#define ubidi_open ubidi_open_3_2
+#define ubidi_openSized ubidi_openSized_3_2
+#define ubidi_reorderLogical ubidi_reorderLogical_3_2
+#define ubidi_reorderVisual ubidi_reorderVisual_3_2
+#define ubidi_setInverse ubidi_setInverse_3_2
+#define ubidi_setLine ubidi_setLine_3_2
+#define ubidi_setPara ubidi_setPara_3_2
+#define ubidi_writeReordered ubidi_writeReordered_3_2
+#define ubidi_writeReverse ubidi_writeReverse_3_2
+#define ublock_getCode ublock_getCode_3_2
+#define ubrk_close ubrk_close_3_2
+#define ubrk_countAvailable ubrk_countAvailable_3_2
+#define ubrk_current ubrk_current_3_2
+#define ubrk_first ubrk_first_3_2
+#define ubrk_following ubrk_following_3_2
+#define ubrk_getAvailable ubrk_getAvailable_3_2
+#define ubrk_getLocaleByType ubrk_getLocaleByType_3_2
+#define ubrk_getRuleStatus ubrk_getRuleStatus_3_2
+#define ubrk_getRuleStatusVec ubrk_getRuleStatusVec_3_2
+#define ubrk_isBoundary ubrk_isBoundary_3_2
+#define ubrk_last ubrk_last_3_2
+#define ubrk_next ubrk_next_3_2
+#define ubrk_open ubrk_open_3_2
+#define ubrk_openRules ubrk_openRules_3_2
+#define ubrk_preceding ubrk_preceding_3_2
+#define ubrk_previous ubrk_previous_3_2
+#define ubrk_safeClone ubrk_safeClone_3_2
+#define ubrk_setText ubrk_setText_3_2
+#define ubrk_swap ubrk_swap_3_2
+#define ucal_add ucal_add_3_2
+#define ucal_clear ucal_clear_3_2
+#define ucal_clearField ucal_clearField_3_2
+#define ucal_close ucal_close_3_2
+#define ucal_countAvailable ucal_countAvailable_3_2
+#define ucal_equivalentTo ucal_equivalentTo_3_2
+#define ucal_get ucal_get_3_2
+#define ucal_getAttribute ucal_getAttribute_3_2
+#define ucal_getAvailable ucal_getAvailable_3_2
+#define ucal_getDSTSavings ucal_getDSTSavings_3_2
+#define ucal_getDefaultTimeZone ucal_getDefaultTimeZone_3_2
+#define ucal_getLimit ucal_getLimit_3_2
+#define ucal_getLocaleByType ucal_getLocaleByType_3_2
+#define ucal_getMillis ucal_getMillis_3_2
+#define ucal_getNow ucal_getNow_3_2
+#define ucal_getTimeZoneDisplayName ucal_getTimeZoneDisplayName_3_2
+#define ucal_inDaylightTime ucal_inDaylightTime_3_2
+#define ucal_isSet ucal_isSet_3_2
+#define ucal_open ucal_open_3_2
+#define ucal_openCountryTimeZones ucal_openCountryTimeZones_3_2
+#define ucal_openTimeZones ucal_openTimeZones_3_2
+#define ucal_roll ucal_roll_3_2
+#define ucal_set ucal_set_3_2
+#define ucal_setAttribute ucal_setAttribute_3_2
+#define ucal_setDate ucal_setDate_3_2
+#define ucal_setDateTime ucal_setDateTime_3_2
+#define ucal_setDefaultTimeZone ucal_setDefaultTimeZone_3_2
+#define ucal_setMillis ucal_setMillis_3_2
+#define ucal_setTimeZone ucal_setTimeZone_3_2
+#define ucase_addPropertyStarts ucase_addPropertyStarts_3_2
+#define ucase_close ucase_close_3_2
+#define ucase_fold ucase_fold_3_2
+#define ucase_getSingleton ucase_getSingleton_3_2
+#define ucase_getType ucase_getType_3_2
+#define ucase_getTypeOrIgnorable ucase_getTypeOrIgnorable_3_2
+#define ucase_isCaseSensitive ucase_isCaseSensitive_3_2
+#define ucase_isSoftDotted ucase_isSoftDotted_3_2
+#define ucase_open ucase_open_3_2
+#define ucase_openBinary ucase_openBinary_3_2
+#define ucase_swap ucase_swap_3_2
+#define ucase_toFullFolding ucase_toFullFolding_3_2
+#define ucase_toFullLower ucase_toFullLower_3_2
+#define ucase_toFullTitle ucase_toFullTitle_3_2
+#define ucase_toFullUpper ucase_toFullUpper_3_2
+#define ucase_tolower ucase_tolower_3_2
+#define ucase_totitle ucase_totitle_3_2
+#define ucase_toupper ucase_toupper_3_2
+#define uchar_addPropertyStarts uchar_addPropertyStarts_3_2
+#define uchar_getHST uchar_getHST_3_2
+#define uchar_swapNames uchar_swapNames_3_2
+#define ucln_common_lib_cleanup ucln_common_lib_cleanup_3_2
+#define ucln_common_registerCleanup ucln_common_registerCleanup_3_2
+#define ucln_i18n_registerCleanup ucln_i18n_registerCleanup_3_2
+#define ucln_registerCleanup ucln_registerCleanup_3_2
+#define ucmp8_close ucmp8_close_3_2
+#define ucmp8_compact ucmp8_compact_3_2
+#define ucmp8_expand ucmp8_expand_3_2
+#define ucmp8_flattenMem ucmp8_flattenMem_3_2
+#define ucmp8_getArray ucmp8_getArray_3_2
+#define ucmp8_getCount ucmp8_getCount_3_2
+#define ucmp8_getIndex ucmp8_getIndex_3_2
+#define ucmp8_getkBlockCount ucmp8_getkBlockCount_3_2
+#define ucmp8_getkUnicodeCount ucmp8_getkUnicodeCount_3_2
+#define ucmp8_init ucmp8_init_3_2
+#define ucmp8_initAdopt ucmp8_initAdopt_3_2
+#define ucmp8_initAlias ucmp8_initAlias_3_2
+#define ucmp8_initBogus ucmp8_initBogus_3_2
+#define ucmp8_initFromData ucmp8_initFromData_3_2
+#define ucmp8_isBogus ucmp8_isBogus_3_2
+#define ucmp8_open ucmp8_open_3_2
+#define ucmp8_openAdopt ucmp8_openAdopt_3_2
+#define ucmp8_openAlias ucmp8_openAlias_3_2
+#define ucmp8_set ucmp8_set_3_2
+#define ucmp8_setRange ucmp8_setRange_3_2
+#define ucnv_MBCSFromUChar32 ucnv_MBCSFromUChar32_3_2
+#define ucnv_MBCSFromUnicodeWithOffsets ucnv_MBCSFromUnicodeWithOffsets_3_2
+#define ucnv_MBCSGetType ucnv_MBCSGetType_3_2
+#define ucnv_MBCSGetUnicodeSetForBytes ucnv_MBCSGetUnicodeSetForBytes_3_2
+#define ucnv_MBCSGetUnicodeSetForUnicode ucnv_MBCSGetUnicodeSetForUnicode_3_2
+#define ucnv_MBCSIsLeadByte ucnv_MBCSIsLeadByte_3_2
+#define ucnv_MBCSSimpleGetNextUChar ucnv_MBCSSimpleGetNextUChar_3_2
+#define ucnv_MBCSToUnicodeWithOffsets ucnv_MBCSToUnicodeWithOffsets_3_2
+#define ucnv_cbFromUWriteBytes ucnv_cbFromUWriteBytes_3_2
+#define ucnv_cbFromUWriteSub ucnv_cbFromUWriteSub_3_2
+#define ucnv_cbFromUWriteUChars ucnv_cbFromUWriteUChars_3_2
+#define ucnv_cbToUWriteSub ucnv_cbToUWriteSub_3_2
+#define ucnv_cbToUWriteUChars ucnv_cbToUWriteUChars_3_2
+#define ucnv_close ucnv_close_3_2
+#define ucnv_compareNames ucnv_compareNames_3_2
+#define ucnv_convert ucnv_convert_3_2
+#define ucnv_convertEx ucnv_convertEx_3_2
+#define ucnv_copyPlatformString ucnv_copyPlatformString_3_2
+#define ucnv_countAliases ucnv_countAliases_3_2
+#define ucnv_countAvailable ucnv_countAvailable_3_2
+#define ucnv_countStandards ucnv_countStandards_3_2
+#define ucnv_createAlgorithmicConverter ucnv_createAlgorithmicConverter_3_2
+#define ucnv_createConverter ucnv_createConverter_3_2
+#define ucnv_createConverterFromPackage ucnv_createConverterFromPackage_3_2
+#define ucnv_createConverterFromSharedData ucnv_createConverterFromSharedData_3_2
+#define ucnv_detectUnicodeSignature ucnv_detectUnicodeSignature_3_2
+#define ucnv_extContinueMatchFromU ucnv_extContinueMatchFromU_3_2
+#define ucnv_extContinueMatchToU ucnv_extContinueMatchToU_3_2
+#define ucnv_extGetUnicodeSet ucnv_extGetUnicodeSet_3_2
+#define ucnv_extInitialMatchFromU ucnv_extInitialMatchFromU_3_2
+#define ucnv_extInitialMatchToU ucnv_extInitialMatchToU_3_2
+#define ucnv_extSimpleMatchFromU ucnv_extSimpleMatchFromU_3_2
+#define ucnv_extSimpleMatchToU ucnv_extSimpleMatchToU_3_2
+#define ucnv_fixFileSeparator ucnv_fixFileSeparator_3_2
+#define ucnv_flushCache ucnv_flushCache_3_2
+#define ucnv_fromAlgorithmic ucnv_fromAlgorithmic_3_2
+#define ucnv_fromUChars ucnv_fromUChars_3_2
+#define ucnv_fromUWriteBytes ucnv_fromUWriteBytes_3_2
+#define ucnv_fromUnicode ucnv_fromUnicode_3_2
+#define ucnv_fromUnicode_UTF8 ucnv_fromUnicode_UTF8_3_2
+#define ucnv_fromUnicode_UTF8_OFFSETS_LOGIC ucnv_fromUnicode_UTF8_OFFSETS_LOGIC_3_2
+#define ucnv_getAlias ucnv_getAlias_3_2
+#define ucnv_getAliases ucnv_getAliases_3_2
+#define ucnv_getAvailableName ucnv_getAvailableName_3_2
+#define ucnv_getCCSID ucnv_getCCSID_3_2
+#define ucnv_getCanonicalName ucnv_getCanonicalName_3_2
+#define ucnv_getCompleteUnicodeSet ucnv_getCompleteUnicodeSet_3_2
+#define ucnv_getDefaultName ucnv_getDefaultName_3_2
+#define ucnv_getDisplayName ucnv_getDisplayName_3_2
+#define ucnv_getFromUCallBack ucnv_getFromUCallBack_3_2
+#define ucnv_getInvalidChars ucnv_getInvalidChars_3_2
+#define ucnv_getInvalidUChars ucnv_getInvalidUChars_3_2
+#define ucnv_getMaxCharSize ucnv_getMaxCharSize_3_2
+#define ucnv_getMinCharSize ucnv_getMinCharSize_3_2
+#define ucnv_getName ucnv_getName_3_2
+#define ucnv_getNextUChar ucnv_getNextUChar_3_2
+#define ucnv_getNonSurrogateUnicodeSet ucnv_getNonSurrogateUnicodeSet_3_2
+#define ucnv_getPlatform ucnv_getPlatform_3_2
+#define ucnv_getStandard ucnv_getStandard_3_2
+#define ucnv_getStandardName ucnv_getStandardName_3_2
+#define ucnv_getStarters ucnv_getStarters_3_2
+#define ucnv_getSubstChars ucnv_getSubstChars_3_2
+#define ucnv_getToUCallBack ucnv_getToUCallBack_3_2
+#define ucnv_getType ucnv_getType_3_2
+#define ucnv_getUnicodeSet ucnv_getUnicodeSet_3_2
+#define ucnv_incrementRefCount ucnv_incrementRefCount_3_2
+#define ucnv_io_countAliases ucnv_io_countAliases_3_2
+#define ucnv_io_countAvailableAliases ucnv_io_countAvailableAliases_3_2
+#define ucnv_io_countAvailableConverters ucnv_io_countAvailableConverters_3_2
+#define ucnv_io_countStandards ucnv_io_countStandards_3_2
+#define ucnv_io_flushAvailableConverterCache ucnv_io_flushAvailableConverterCache_3_2
+#define ucnv_io_getAlias ucnv_io_getAlias_3_2
+#define ucnv_io_getAliases ucnv_io_getAliases_3_2
+#define ucnv_io_getAvailableConverter ucnv_io_getAvailableConverter_3_2
+#define ucnv_io_getConverterName ucnv_io_getConverterName_3_2
+#define ucnv_io_getDefaultConverterName ucnv_io_getDefaultConverterName_3_2
+#define ucnv_io_setDefaultConverterName ucnv_io_setDefaultConverterName_3_2
+#define ucnv_io_stripASCIIForCompare ucnv_io_stripASCIIForCompare_3_2
+#define ucnv_io_stripEBCDICForCompare ucnv_io_stripEBCDICForCompare_3_2
+#define ucnv_isAmbiguous ucnv_isAmbiguous_3_2
+#define ucnv_load ucnv_load_3_2
+#define ucnv_loadSharedData ucnv_loadSharedData_3_2
+#define ucnv_open ucnv_open_3_2
+#define ucnv_openAllNames ucnv_openAllNames_3_2
+#define ucnv_openCCSID ucnv_openCCSID_3_2
+#define ucnv_openPackage ucnv_openPackage_3_2
+#define ucnv_openStandardNames ucnv_openStandardNames_3_2
+#define ucnv_openU ucnv_openU_3_2
+#define ucnv_reset ucnv_reset_3_2
+#define ucnv_resetFromUnicode ucnv_resetFromUnicode_3_2
+#define ucnv_resetToUnicode ucnv_resetToUnicode_3_2
+#define ucnv_safeClone ucnv_safeClone_3_2
+#define ucnv_setDefaultName ucnv_setDefaultName_3_2
+#define ucnv_setFallback ucnv_setFallback_3_2
+#define ucnv_setFromUCallBack ucnv_setFromUCallBack_3_2
+#define ucnv_setSubstChars ucnv_setSubstChars_3_2
+#define ucnv_setToUCallBack ucnv_setToUCallBack_3_2
+#define ucnv_swap ucnv_swap_3_2
+#define ucnv_swapAliases ucnv_swapAliases_3_2
+#define ucnv_toAlgorithmic ucnv_toAlgorithmic_3_2
+#define ucnv_toUChars ucnv_toUChars_3_2
+#define ucnv_toUWriteCodePoint ucnv_toUWriteCodePoint_3_2
+#define ucnv_toUWriteUChars ucnv_toUWriteUChars_3_2
+#define ucnv_toUnicode ucnv_toUnicode_3_2
+#define ucnv_unload ucnv_unload_3_2
+#define ucnv_unloadSharedDataIfReady ucnv_unloadSharedDataIfReady_3_2
+#define ucnv_usesFallback ucnv_usesFallback_3_2
+#define ucol_allocWeights ucol_allocWeights_3_2
+#define ucol_assembleTailoringTable ucol_assembleTailoringTable_3_2
+#define ucol_calcSortKey ucol_calcSortKey_3_2
+#define ucol_calcSortKeySimpleTertiary ucol_calcSortKeySimpleTertiary_3_2
+#define ucol_cloneBinary ucol_cloneBinary_3_2
+#define ucol_cloneRuleData ucol_cloneRuleData_3_2
+#define ucol_close ucol_close_3_2
+#define ucol_closeElements ucol_closeElements_3_2
+#define ucol_collatorToIdentifier ucol_collatorToIdentifier_3_2
+#define ucol_countAvailable ucol_countAvailable_3_2
+#define ucol_createElements ucol_createElements_3_2
+#define ucol_doCE ucol_doCE_3_2
+#define ucol_equal ucol_equal_3_2
+#define ucol_equals ucol_equals_3_2
+#define ucol_getAttribute ucol_getAttribute_3_2
+#define ucol_getAttributeOrDefault ucol_getAttributeOrDefault_3_2
+#define ucol_getAvailable ucol_getAvailable_3_2
+#define ucol_getBound ucol_getBound_3_2
+#define ucol_getCEGenerator ucol_getCEGenerator_3_2
+#define ucol_getCEStrengthDifference ucol_getCEStrengthDifference_3_2
+#define ucol_getContractions ucol_getContractions_3_2
+#define ucol_getDisplayName ucol_getDisplayName_3_2
+#define ucol_getFirstCE ucol_getFirstCE_3_2
+#define ucol_getFunctionalEquivalent ucol_getFunctionalEquivalent_3_2
+#define ucol_getKeywordValues ucol_getKeywordValues_3_2
+#define ucol_getKeywords ucol_getKeywords_3_2
+#define ucol_getLocale ucol_getLocale_3_2
+#define ucol_getLocaleByType ucol_getLocaleByType_3_2
+#define ucol_getMaxExpansion ucol_getMaxExpansion_3_2
+#define ucol_getNextCE ucol_getNextCE_3_2
+#define ucol_getNextGenerated ucol_getNextGenerated_3_2
+#define ucol_getOffset ucol_getOffset_3_2
+#define ucol_getPrevCE ucol_getPrevCE_3_2
+#define ucol_getRules ucol_getRules_3_2
+#define ucol_getRulesEx ucol_getRulesEx_3_2
+#define ucol_getShortDefinitionString ucol_getShortDefinitionString_3_2
+#define ucol_getSimpleCEGenerator ucol_getSimpleCEGenerator_3_2
+#define ucol_getSortKey ucol_getSortKey_3_2
+#define ucol_getSortKeySize ucol_getSortKeySize_3_2
+#define ucol_getSortKeyWithAllocation ucol_getSortKeyWithAllocation_3_2
+#define ucol_getStrength ucol_getStrength_3_2
+#define ucol_getTailoredSet ucol_getTailoredSet_3_2
+#define ucol_getUCAVersion ucol_getUCAVersion_3_2
+#define ucol_getUnsafeSet ucol_getUnsafeSet_3_2
+#define ucol_getVariableTop ucol_getVariableTop_3_2
+#define ucol_getVersion ucol_getVersion_3_2
+#define ucol_greater ucol_greater_3_2
+#define ucol_greaterOrEqual ucol_greaterOrEqual_3_2
+#define ucol_identifierToShortString ucol_identifierToShortString_3_2
+#define ucol_initBuffers ucol_initBuffers_3_2
+#define ucol_initCollator ucol_initCollator_3_2
+#define ucol_initInverseUCA ucol_initInverseUCA_3_2
+#define ucol_initUCA ucol_initUCA_3_2
+#define ucol_inv_getGapPositions ucol_inv_getGapPositions_3_2
+#define ucol_inv_getNextCE ucol_inv_getNextCE_3_2
+#define ucol_inv_getPrevCE ucol_inv_getPrevCE_3_2
+#define ucol_isTailored ucol_isTailored_3_2
+#define ucol_keyHashCode ucol_keyHashCode_3_2
+#define ucol_mergeSortkeys ucol_mergeSortkeys_3_2
+#define ucol_next ucol_next_3_2
+#define ucol_nextSortKeyPart ucol_nextSortKeyPart_3_2
+#define ucol_nextWeight ucol_nextWeight_3_2
+#define ucol_normalizeShortDefinitionString ucol_normalizeShortDefinitionString_3_2
+#define ucol_open ucol_open_3_2
+#define ucol_openAvailableLocales ucol_openAvailableLocales_3_2
+#define ucol_openBinary ucol_openBinary_3_2
+#define ucol_openElements ucol_openElements_3_2
+#define ucol_openFromIdentifier ucol_openFromIdentifier_3_2
+#define ucol_openFromShortString ucol_openFromShortString_3_2
+#define ucol_openRules ucol_openRules_3_2
+#define ucol_open_internal ucol_open_internal_3_2
+#define ucol_previous ucol_previous_3_2
+#define ucol_primaryOrder ucol_primaryOrder_3_2
+#define ucol_prv_getSpecialCE ucol_prv_getSpecialCE_3_2
+#define ucol_prv_getSpecialPrevCE ucol_prv_getSpecialPrevCE_3_2
+#define ucol_reset ucol_reset_3_2
+#define ucol_restoreVariableTop ucol_restoreVariableTop_3_2
+#define ucol_safeClone ucol_safeClone_3_2
+#define ucol_secondaryOrder ucol_secondaryOrder_3_2
+#define ucol_setAttribute ucol_setAttribute_3_2
+#define ucol_setOffset ucol_setOffset_3_2
+#define ucol_setOptionsFromHeader ucol_setOptionsFromHeader_3_2
+#define ucol_setReqValidLocales ucol_setReqValidLocales_3_2
+#define ucol_setStrength ucol_setStrength_3_2
+#define ucol_setText ucol_setText_3_2
+#define ucol_setVariableTop ucol_setVariableTop_3_2
+#define ucol_shortStringToIdentifier ucol_shortStringToIdentifier_3_2
+#define ucol_sortKeyToString ucol_sortKeyToString_3_2
+#define ucol_strcoll ucol_strcoll_3_2
+#define ucol_strcollIter ucol_strcollIter_3_2
+#define ucol_swap ucol_swap_3_2
+#define ucol_swapBinary ucol_swapBinary_3_2
+#define ucol_swapInverseUCA ucol_swapInverseUCA_3_2
+#define ucol_tertiaryOrder ucol_tertiaryOrder_3_2
+#define ucol_tok_assembleTokenList ucol_tok_assembleTokenList_3_2
+#define ucol_tok_closeTokenList ucol_tok_closeTokenList_3_2
+#define ucol_tok_getNextArgument ucol_tok_getNextArgument_3_2
+#define ucol_tok_initTokenList ucol_tok_initTokenList_3_2
+#define ucol_tok_parseNextToken ucol_tok_parseNextToken_3_2
+#define ucol_updateInternalState ucol_updateInternalState_3_2
+#define ucurr_forLocale ucurr_forLocale_3_2
+#define ucurr_getDefaultFractionDigits ucurr_getDefaultFractionDigits_3_2
+#define ucurr_getName ucurr_getName_3_2
+#define ucurr_getRoundingIncrement ucurr_getRoundingIncrement_3_2
+#define ucurr_register ucurr_register_3_2
+#define ucurr_unregister ucurr_unregister_3_2
+#define udat_applyPattern udat_applyPattern_3_2
+#define udat_clone udat_clone_3_2
+#define udat_close udat_close_3_2
+#define udat_countAvailable udat_countAvailable_3_2
+#define udat_countSymbols udat_countSymbols_3_2
+#define udat_format udat_format_3_2
+#define udat_get2DigitYearStart udat_get2DigitYearStart_3_2
+#define udat_getAvailable udat_getAvailable_3_2
+#define udat_getCalendar udat_getCalendar_3_2
+#define udat_getLocaleByType udat_getLocaleByType_3_2
+#define udat_getNumberFormat udat_getNumberFormat_3_2
+#define udat_getSymbols udat_getSymbols_3_2
+#define udat_isLenient udat_isLenient_3_2
+#define udat_open udat_open_3_2
+#define udat_parse udat_parse_3_2
+#define udat_parseCalendar udat_parseCalendar_3_2
+#define udat_set2DigitYearStart udat_set2DigitYearStart_3_2
+#define udat_setCalendar udat_setCalendar_3_2
+#define udat_setLenient udat_setLenient_3_2
+#define udat_setNumberFormat udat_setNumberFormat_3_2
+#define udat_setSymbols udat_setSymbols_3_2
+#define udat_toPattern udat_toPattern_3_2
+#define udata_checkCommonData udata_checkCommonData_3_2
+#define udata_close udata_close_3_2
+#define udata_closeSwapper udata_closeSwapper_3_2
+#define udata_getHeaderSize udata_getHeaderSize_3_2
+#define udata_getInfo udata_getInfo_3_2
+#define udata_getInfoSize udata_getInfoSize_3_2
+#define udata_getLength udata_getLength_3_2
+#define udata_getMemory udata_getMemory_3_2
+#define udata_getRawMemory udata_getRawMemory_3_2
+#define udata_open udata_open_3_2
+#define udata_openChoice udata_openChoice_3_2
+#define udata_openSwapper udata_openSwapper_3_2
+#define udata_openSwapperForInputData udata_openSwapperForInputData_3_2
+#define udata_printError udata_printError_3_2
+#define udata_readInt16 udata_readInt16_3_2
+#define udata_readInt32 udata_readInt32_3_2
+#define udata_setAppData udata_setAppData_3_2
+#define udata_setCommonData udata_setCommonData_3_2
+#define udata_swapDataHeader udata_swapDataHeader_3_2
+#define udata_swapInvStringBlock udata_swapInvStringBlock_3_2
+#define uenum_close uenum_close_3_2
+#define uenum_count uenum_count_3_2
+#define uenum_next uenum_next_3_2
+#define uenum_nextDefault uenum_nextDefault_3_2
+#define uenum_openCharStringsEnumeration uenum_openCharStringsEnumeration_3_2
+#define uenum_openStringEnumeration uenum_openStringEnumeration_3_2
+#define uenum_reset uenum_reset_3_2
+#define uenum_unext uenum_unext_3_2
+#define uenum_unextDefault uenum_unextDefault_3_2
+#define ufile_close_translit ufile_close_translit_3_2
+#define ufile_fill_uchar_buffer ufile_fill_uchar_buffer_3_2
+#define ufile_flush_translit ufile_flush_translit_3_2
+#define ufile_getch ufile_getch_3_2
+#define ufile_getch32 ufile_getch32_3_2
+#define ufmt_64tou ufmt_64tou_3_2
+#define ufmt_defaultCPToUnicode ufmt_defaultCPToUnicode_3_2
+#define ufmt_digitvalue ufmt_digitvalue_3_2
+#define ufmt_isdigit ufmt_isdigit_3_2
+#define ufmt_ptou ufmt_ptou_3_2
+#define ufmt_uto64 ufmt_uto64_3_2
+#define ufmt_utop ufmt_utop_3_2
+#define uhash_close uhash_close_3_2
+#define uhash_compareCaselessUnicodeString uhash_compareCaselessUnicodeString_3_2
+#define uhash_compareChars uhash_compareChars_3_2
+#define uhash_compareIChars uhash_compareIChars_3_2
+#define uhash_compareLong uhash_compareLong_3_2
+#define uhash_compareUChars uhash_compareUChars_3_2
+#define uhash_compareUnicodeString uhash_compareUnicodeString_3_2
+#define uhash_count uhash_count_3_2
+#define uhash_deleteHashtable uhash_deleteHashtable_3_2
+#define uhash_deleteUVector uhash_deleteUVector_3_2
+#define uhash_deleteUnicodeString uhash_deleteUnicodeString_3_2
+#define uhash_find uhash_find_3_2
+#define uhash_freeBlock uhash_freeBlock_3_2
+#define uhash_get uhash_get_3_2
+#define uhash_geti uhash_geti_3_2
+#define uhash_hashCaselessUnicodeString uhash_hashCaselessUnicodeString_3_2
+#define uhash_hashChars uhash_hashChars_3_2
+#define uhash_hashIChars uhash_hashIChars_3_2
+#define uhash_hashLong uhash_hashLong_3_2
+#define uhash_hashUChars uhash_hashUChars_3_2
+#define uhash_hashUCharsN uhash_hashUCharsN_3_2
+#define uhash_hashUnicodeString uhash_hashUnicodeString_3_2
+#define uhash_iget uhash_iget_3_2
+#define uhash_igeti uhash_igeti_3_2
+#define uhash_iput uhash_iput_3_2
+#define uhash_iputi uhash_iputi_3_2
+#define uhash_iremove uhash_iremove_3_2
+#define uhash_iremovei uhash_iremovei_3_2
+#define uhash_nextElement uhash_nextElement_3_2
+#define uhash_open uhash_open_3_2
+#define uhash_openSize uhash_openSize_3_2
+#define uhash_put uhash_put_3_2
+#define uhash_puti uhash_puti_3_2
+#define uhash_remove uhash_remove_3_2
+#define uhash_removeAll uhash_removeAll_3_2
+#define uhash_removeElement uhash_removeElement_3_2
+#define uhash_removei uhash_removei_3_2
+#define uhash_setKeyComparator uhash_setKeyComparator_3_2
+#define uhash_setKeyDeleter uhash_setKeyDeleter_3_2
+#define uhash_setKeyHasher uhash_setKeyHasher_3_2
+#define uhash_setResizePolicy uhash_setResizePolicy_3_2
+#define uhash_setValueDeleter uhash_setValueDeleter_3_2
+#define uhash_toki uhash_toki_3_2
+#define uhash_tokp uhash_tokp_3_2
+#define uhst_addPropertyStarts uhst_addPropertyStarts_3_2
+#define uidna_IDNToASCII uidna_IDNToASCII_3_2
+#define uidna_IDNToUnicode uidna_IDNToUnicode_3_2
+#define uidna_compare uidna_compare_3_2
+#define uidna_toASCII uidna_toASCII_3_2
+#define uidna_toUnicode uidna_toUnicode_3_2
+#define uiter_current32 uiter_current32_3_2
+#define uiter_getState uiter_getState_3_2
+#define uiter_next32 uiter_next32_3_2
+#define uiter_previous32 uiter_previous32_3_2
+#define uiter_setCharacterIterator uiter_setCharacterIterator_3_2
+#define uiter_setReplaceable uiter_setReplaceable_3_2
+#define uiter_setState uiter_setState_3_2
+#define uiter_setString uiter_setString_3_2
+#define uiter_setUTF16BE uiter_setUTF16BE_3_2
+#define uiter_setUTF8 uiter_setUTF8_3_2
+#define uloc_acceptLanguage uloc_acceptLanguage_3_2
+#define uloc_acceptLanguageFromHTTP uloc_acceptLanguageFromHTTP_3_2
+#define uloc_canonicalize uloc_canonicalize_3_2
+#define uloc_countAvailable uloc_countAvailable_3_2
+#define uloc_getAvailable uloc_getAvailable_3_2
+#define uloc_getBaseName uloc_getBaseName_3_2
+#define uloc_getCountry uloc_getCountry_3_2
+#define uloc_getDefault uloc_getDefault_3_2
+#define uloc_getDisplayCountry uloc_getDisplayCountry_3_2
+#define uloc_getDisplayKeyword uloc_getDisplayKeyword_3_2
+#define uloc_getDisplayKeywordValue uloc_getDisplayKeywordValue_3_2
+#define uloc_getDisplayLanguage uloc_getDisplayLanguage_3_2
+#define uloc_getDisplayName uloc_getDisplayName_3_2
+#define uloc_getDisplayScript uloc_getDisplayScript_3_2
+#define uloc_getDisplayVariant uloc_getDisplayVariant_3_2
+#define uloc_getISO3Country uloc_getISO3Country_3_2
+#define uloc_getISO3Language uloc_getISO3Language_3_2
+#define uloc_getISOCountries uloc_getISOCountries_3_2
+#define uloc_getISOLanguages uloc_getISOLanguages_3_2
+#define uloc_getKeywordValue uloc_getKeywordValue_3_2
+#define uloc_getLCID uloc_getLCID_3_2
+#define uloc_getLanguage uloc_getLanguage_3_2
+#define uloc_getName uloc_getName_3_2
+#define uloc_getParent uloc_getParent_3_2
+#define uloc_getScript uloc_getScript_3_2
+#define uloc_getVariant uloc_getVariant_3_2
+#define uloc_openKeywordList uloc_openKeywordList_3_2
+#define uloc_openKeywords uloc_openKeywords_3_2
+#define uloc_setDefault uloc_setDefault_3_2
+#define uloc_setKeywordValue uloc_setKeywordValue_3_2
+#define ulocdata_getExemplarSet ulocdata_getExemplarSet_3_2
+#define ulocdata_getMeasurementSystem ulocdata_getMeasurementSystem_3_2
+#define ulocdata_getPaperSize ulocdata_getPaperSize_3_2
+#define umsg_applyPattern umsg_applyPattern_3_2
+#define umsg_clone umsg_clone_3_2
+#define umsg_close umsg_close_3_2
+#define umsg_format umsg_format_3_2
+#define umsg_getLocale umsg_getLocale_3_2
+#define umsg_getLocaleByType umsg_getLocaleByType_3_2
+#define umsg_open umsg_open_3_2
+#define umsg_parse umsg_parse_3_2
+#define umsg_setLocale umsg_setLocale_3_2
+#define umsg_toPattern umsg_toPattern_3_2
+#define umsg_vformat umsg_vformat_3_2
+#define umsg_vparse umsg_vparse_3_2
+#define umtx_atomic_dec umtx_atomic_dec_3_2
+#define umtx_atomic_inc umtx_atomic_inc_3_2
+#define umtx_cleanup umtx_cleanup_3_2
+#define umtx_destroy umtx_destroy_3_2
+#define umtx_init umtx_init_3_2
+#define umtx_lock umtx_lock_3_2
+#define umtx_unlock umtx_unlock_3_2
+#define unorm_addPropertyStarts unorm_addPropertyStarts_3_2
+#define unorm_closeIter unorm_closeIter_3_2
+#define unorm_compare unorm_compare_3_2
+#define unorm_compose unorm_compose_3_2
+#define unorm_concatenate unorm_concatenate_3_2
+#define unorm_decompose unorm_decompose_3_2
+#define unorm_getCanonStartSet unorm_getCanonStartSet_3_2
+#define unorm_getCanonicalDecomposition unorm_getCanonicalDecomposition_3_2
+#define unorm_getDecomposition unorm_getDecomposition_3_2
+#define unorm_getFCD16FromCodePoint unorm_getFCD16FromCodePoint_3_2
+#define unorm_getFCDTrie unorm_getFCDTrie_3_2
+#define unorm_getNX unorm_getNX_3_2
+#define unorm_getQuickCheck unorm_getQuickCheck_3_2
+#define unorm_getUnicodeVersion unorm_getUnicodeVersion_3_2
+#define unorm_haveData unorm_haveData_3_2
+#define unorm_internalIsFullCompositionExclusion unorm_internalIsFullCompositionExclusion_3_2
+#define unorm_internalNormalize unorm_internalNormalize_3_2
+#define unorm_internalNormalizeWithNX unorm_internalNormalizeWithNX_3_2
+#define unorm_internalQuickCheck unorm_internalQuickCheck_3_2
+#define unorm_isCanonSafeStart unorm_isCanonSafeStart_3_2
+#define unorm_isNFSkippable unorm_isNFSkippable_3_2
+#define unorm_isNormalized unorm_isNormalized_3_2
+#define unorm_isNormalizedWithOptions unorm_isNormalizedWithOptions_3_2
+#define unorm_next unorm_next_3_2
+#define unorm_normalize unorm_normalize_3_2
+#define unorm_openIter unorm_openIter_3_2
+#define unorm_previous unorm_previous_3_2
+#define unorm_quickCheck unorm_quickCheck_3_2
+#define unorm_quickCheckWithOptions unorm_quickCheckWithOptions_3_2
+#define unorm_setIter unorm_setIter_3_2
+#define unorm_swap unorm_swap_3_2
+#define unum_applyPattern unum_applyPattern_3_2
+#define unum_clone unum_clone_3_2
+#define unum_close unum_close_3_2
+#define unum_countAvailable unum_countAvailable_3_2
+#define unum_format unum_format_3_2
+#define unum_formatDouble unum_formatDouble_3_2
+#define unum_formatDoubleCurrency unum_formatDoubleCurrency_3_2
+#define unum_formatInt64 unum_formatInt64_3_2
+#define unum_getAttribute unum_getAttribute_3_2
+#define unum_getAvailable unum_getAvailable_3_2
+#define unum_getDoubleAttribute unum_getDoubleAttribute_3_2
+#define unum_getLocaleByType unum_getLocaleByType_3_2
+#define unum_getSymbol unum_getSymbol_3_2
+#define unum_getTextAttribute unum_getTextAttribute_3_2
+#define unum_open unum_open_3_2
+#define unum_parse unum_parse_3_2
+#define unum_parseDouble unum_parseDouble_3_2
+#define unum_parseDoubleCurrency unum_parseDoubleCurrency_3_2
+#define unum_parseInt64 unum_parseInt64_3_2
+#define unum_setAttribute unum_setAttribute_3_2
+#define unum_setDoubleAttribute unum_setDoubleAttribute_3_2
+#define unum_setSymbol unum_setSymbol_3_2
+#define unum_setTextAttribute unum_setTextAttribute_3_2
+#define unum_toPattern unum_toPattern_3_2
+#define upname_swap upname_swap_3_2
+#define uprops_getSource uprops_getSource_3_2
+#define uprops_swap uprops_swap_3_2
+#define uprv_asciiFromEbcdic uprv_asciiFromEbcdic_3_2
+#define uprv_asciitolower uprv_asciitolower_3_2
+#define uprv_ceil uprv_ceil_3_2
+#define uprv_cnttab_addContraction uprv_cnttab_addContraction_3_2
+#define uprv_cnttab_changeContraction uprv_cnttab_changeContraction_3_2
+#define uprv_cnttab_changeLastCE uprv_cnttab_changeLastCE_3_2
+#define uprv_cnttab_clone uprv_cnttab_clone_3_2
+#define uprv_cnttab_close uprv_cnttab_close_3_2
+#define uprv_cnttab_constructTable uprv_cnttab_constructTable_3_2
+#define uprv_cnttab_findCE uprv_cnttab_findCE_3_2
+#define uprv_cnttab_findCP uprv_cnttab_findCP_3_2
+#define uprv_cnttab_getCE uprv_cnttab_getCE_3_2
+#define uprv_cnttab_insertContraction uprv_cnttab_insertContraction_3_2
+#define uprv_cnttab_isTailored uprv_cnttab_isTailored_3_2
+#define uprv_cnttab_open uprv_cnttab_open_3_2
+#define uprv_cnttab_setContraction uprv_cnttab_setContraction_3_2
+#define uprv_compareASCIIPropertyNames uprv_compareASCIIPropertyNames_3_2
+#define uprv_compareEBCDICPropertyNames uprv_compareEBCDICPropertyNames_3_2
+#define uprv_compareInvAscii uprv_compareInvAscii_3_2
+#define uprv_compareInvEbcdic uprv_compareInvEbcdic_3_2
+#define uprv_convertToLCID uprv_convertToLCID_3_2
+#define uprv_convertToPosix uprv_convertToPosix_3_2
+#define uprv_copyAscii uprv_copyAscii_3_2
+#define uprv_copyEbcdic uprv_copyEbcdic_3_2
+#define uprv_dtostr uprv_dtostr_3_2
+#define uprv_ebcdicFromAscii uprv_ebcdicFromAscii_3_2
+#define uprv_ebcdictolower uprv_ebcdictolower_3_2
+#define uprv_fabs uprv_fabs_3_2
+#define uprv_floor uprv_floor_3_2
+#define uprv_fmax uprv_fmax_3_2
+#define uprv_fmin uprv_fmin_3_2
+#define uprv_fmod uprv_fmod_3_2
+#define uprv_free uprv_free_3_2
+#define uprv_getCharNameCharacters uprv_getCharNameCharacters_3_2
+#define uprv_getDefaultCodepage uprv_getDefaultCodepage_3_2
+#define uprv_getDefaultLocaleID uprv_getDefaultLocaleID_3_2
+#define uprv_getInfinity uprv_getInfinity_3_2
+#define uprv_getMaxCharNameLength uprv_getMaxCharNameLength_3_2
+#define uprv_getMaxValues uprv_getMaxValues_3_2
+#define uprv_getNaN uprv_getNaN_3_2
+#define uprv_getStaticCurrencyName uprv_getStaticCurrencyName_3_2
+#define uprv_getUTCtime uprv_getUTCtime_3_2
+#define uprv_haveProperties uprv_haveProperties_3_2
+#define uprv_init_collIterate uprv_init_collIterate_3_2
+#define uprv_int32Comparator uprv_int32Comparator_3_2
+#define uprv_isInfinite uprv_isInfinite_3_2
+#define uprv_isInvariantString uprv_isInvariantString_3_2
+#define uprv_isInvariantUString uprv_isInvariantUString_3_2
+#define uprv_isNaN uprv_isNaN_3_2
+#define uprv_isNegativeInfinity uprv_isNegativeInfinity_3_2
+#define uprv_isPositiveInfinity uprv_isPositiveInfinity_3_2
+#define uprv_isRuleWhiteSpace uprv_isRuleWhiteSpace_3_2
+#define uprv_itou uprv_itou_3_2
+#define uprv_loadPropsData uprv_loadPropsData_3_2
+#define uprv_log uprv_log_3_2
+#define uprv_log10 uprv_log10_3_2
+#define uprv_malloc uprv_malloc_3_2
+#define uprv_mapFile uprv_mapFile_3_2
+#define uprv_max uprv_max_3_2
+#define uprv_maxMantissa uprv_maxMantissa_3_2
+#define uprv_min uprv_min_3_2
+#define uprv_modf uprv_modf_3_2
+#define uprv_openRuleWhiteSpaceSet uprv_openRuleWhiteSpaceSet_3_2
+#define uprv_pathIsAbsolute uprv_pathIsAbsolute_3_2
+#define uprv_pow uprv_pow_3_2
+#define uprv_pow10 uprv_pow10_3_2
+#define uprv_realloc uprv_realloc_3_2
+#define uprv_round uprv_round_3_2
+#define uprv_sortArray uprv_sortArray_3_2
+#define uprv_strCompare uprv_strCompare_3_2
+#define uprv_strdup uprv_strdup_3_2
+#define uprv_strndup uprv_strndup_3_2
+#define uprv_syntaxError uprv_syntaxError_3_2
+#define uprv_timezone uprv_timezone_3_2
+#define uprv_toupper uprv_toupper_3_2
+#define uprv_trunc uprv_trunc_3_2
+#define uprv_tzname uprv_tzname_3_2
+#define uprv_tzset uprv_tzset_3_2
+#define uprv_uca_addAnElement uprv_uca_addAnElement_3_2
+#define uprv_uca_assembleTable uprv_uca_assembleTable_3_2
+#define uprv_uca_canonicalClosure uprv_uca_canonicalClosure_3_2
+#define uprv_uca_cloneTempTable uprv_uca_cloneTempTable_3_2
+#define uprv_uca_closeTempTable uprv_uca_closeTempTable_3_2
+#define uprv_uca_getCodePointFromRaw uprv_uca_getCodePointFromRaw_3_2
+#define uprv_uca_getImplicitFromRaw uprv_uca_getImplicitFromRaw_3_2
+#define uprv_uca_getImplicitPrimary uprv_uca_getImplicitPrimary_3_2
+#define uprv_uca_getRawFromCodePoint uprv_uca_getRawFromCodePoint_3_2
+#define uprv_uca_getRawFromImplicit uprv_uca_getRawFromImplicit_3_2
+#define uprv_uca_initImplicitConstants uprv_uca_initImplicitConstants_3_2
+#define uprv_uca_initTempTable uprv_uca_initTempTable_3_2
+#define uprv_uint16Comparator uprv_uint16Comparator_3_2
+#define uprv_uint32Comparator uprv_uint32Comparator_3_2
+#define uprv_unmapFile uprv_unmapFile_3_2
+#define uregex_appendReplacement uregex_appendReplacement_3_2
+#define uregex_appendTail uregex_appendTail_3_2
+#define uregex_clone uregex_clone_3_2
+#define uregex_close uregex_close_3_2
+#define uregex_end uregex_end_3_2
+#define uregex_find uregex_find_3_2
+#define uregex_findNext uregex_findNext_3_2
+#define uregex_flags uregex_flags_3_2
+#define uregex_getText uregex_getText_3_2
+#define uregex_group uregex_group_3_2
+#define uregex_groupCount uregex_groupCount_3_2
+#define uregex_lookingAt uregex_lookingAt_3_2
+#define uregex_matches uregex_matches_3_2
+#define uregex_open uregex_open_3_2
+#define uregex_openC uregex_openC_3_2
+#define uregex_pattern uregex_pattern_3_2
+#define uregex_replaceAll uregex_replaceAll_3_2
+#define uregex_replaceFirst uregex_replaceFirst_3_2
+#define uregex_reset uregex_reset_3_2
+#define uregex_setText uregex_setText_3_2
+#define uregex_split uregex_split_3_2
+#define uregex_start uregex_start_3_2
+#define ures_appendResPath ures_appendResPath_3_2
+#define ures_close ures_close_3_2
+#define ures_copyResb ures_copyResb_3_2
+#define ures_countArrayItems ures_countArrayItems_3_2
+#define ures_findResource ures_findResource_3_2
+#define ures_findSubResource ures_findSubResource_3_2
+#define ures_freeResPath ures_freeResPath_3_2
+#define ures_getBinary ures_getBinary_3_2
+#define ures_getByIndex ures_getByIndex_3_2
+#define ures_getByKey ures_getByKey_3_2
+#define ures_getByKeyWithFallback ures_getByKeyWithFallback_3_2
+#define ures_getFunctionalEquivalent ures_getFunctionalEquivalent_3_2
+#define ures_getInt ures_getInt_3_2
+#define ures_getIntVector ures_getIntVector_3_2
+#define ures_getKey ures_getKey_3_2
+#define ures_getKeywordValues ures_getKeywordValues_3_2
+#define ures_getLocale ures_getLocale_3_2
+#define ures_getLocaleByType ures_getLocaleByType_3_2
+#define ures_getName ures_getName_3_2
+#define ures_getNextResource ures_getNextResource_3_2
+#define ures_getNextString ures_getNextString_3_2
+#define ures_getPath ures_getPath_3_2
+#define ures_getSize ures_getSize_3_2
+#define ures_getString ures_getString_3_2
+#define ures_getStringByIndex ures_getStringByIndex_3_2
+#define ures_getStringByKey ures_getStringByKey_3_2
+#define ures_getType ures_getType_3_2
+#define ures_getUInt ures_getUInt_3_2
+#define ures_getVersion ures_getVersion_3_2
+#define ures_getVersionNumber ures_getVersionNumber_3_2
+#define ures_hasNext ures_hasNext_3_2
+#define ures_initStackObject ures_initStackObject_3_2
+#define ures_open ures_open_3_2
+#define ures_openAvailableLocales ures_openAvailableLocales_3_2
+#define ures_openDirect ures_openDirect_3_2
+#define ures_openFillIn ures_openFillIn_3_2
+#define ures_openU ures_openU_3_2
+#define ures_resetIterator ures_resetIterator_3_2
+#define ures_swap ures_swap_3_2
+#define uscript_closeRun uscript_closeRun_3_2
+#define uscript_getCode uscript_getCode_3_2
+#define uscript_getName uscript_getName_3_2
+#define uscript_getScript uscript_getScript_3_2
+#define uscript_getShortName uscript_getShortName_3_2
+#define uscript_nextRun uscript_nextRun_3_2
+#define uscript_openRun uscript_openRun_3_2
+#define uscript_resetRun uscript_resetRun_3_2
+#define uscript_setRunText uscript_setRunText_3_2
+#define usearch_close usearch_close_3_2
+#define usearch_first usearch_first_3_2
+#define usearch_following usearch_following_3_2
+#define usearch_getAttribute usearch_getAttribute_3_2
+#define usearch_getBreakIterator usearch_getBreakIterator_3_2
+#define usearch_getCollator usearch_getCollator_3_2
+#define usearch_getMatchedLength usearch_getMatchedLength_3_2
+#define usearch_getMatchedStart usearch_getMatchedStart_3_2
+#define usearch_getMatchedText usearch_getMatchedText_3_2
+#define usearch_getOffset usearch_getOffset_3_2
+#define usearch_getPattern usearch_getPattern_3_2
+#define usearch_getText usearch_getText_3_2
+#define usearch_handleNextCanonical usearch_handleNextCanonical_3_2
+#define usearch_handleNextExact usearch_handleNextExact_3_2
+#define usearch_handlePreviousCanonical usearch_handlePreviousCanonical_3_2
+#define usearch_handlePreviousExact usearch_handlePreviousExact_3_2
+#define usearch_last usearch_last_3_2
+#define usearch_next usearch_next_3_2
+#define usearch_open usearch_open_3_2
+#define usearch_openFromCollator usearch_openFromCollator_3_2
+#define usearch_preceding usearch_preceding_3_2
+#define usearch_previous usearch_previous_3_2
+#define usearch_reset usearch_reset_3_2
+#define usearch_setAttribute usearch_setAttribute_3_2
+#define usearch_setBreakIterator usearch_setBreakIterator_3_2
+#define usearch_setCollator usearch_setCollator_3_2
+#define usearch_setOffset usearch_setOffset_3_2
+#define usearch_setPattern usearch_setPattern_3_2
+#define usearch_setText usearch_setText_3_2
+#define userv_deleteStringPair userv_deleteStringPair_3_2
+#define uset_add uset_add_3_2
+#define uset_addAll uset_addAll_3_2
+#define uset_addRange uset_addRange_3_2
+#define uset_addString uset_addString_3_2
+#define uset_applyIntPropertyValue uset_applyIntPropertyValue_3_2
+#define uset_applyPattern uset_applyPattern_3_2
+#define uset_applyPropertyAlias uset_applyPropertyAlias_3_2
+#define uset_charAt uset_charAt_3_2
+#define uset_clear uset_clear_3_2
+#define uset_close uset_close_3_2
+#define uset_compact uset_compact_3_2
+#define uset_complement uset_complement_3_2
+#define uset_complementAll uset_complementAll_3_2
+#define uset_contains uset_contains_3_2
+#define uset_containsAll uset_containsAll_3_2
+#define uset_containsNone uset_containsNone_3_2
+#define uset_containsRange uset_containsRange_3_2
+#define uset_containsSome uset_containsSome_3_2
+#define uset_containsString uset_containsString_3_2
+#define uset_equals uset_equals_3_2
+#define uset_getItem uset_getItem_3_2
+#define uset_getItemCount uset_getItemCount_3_2
+#define uset_getSerializedRange uset_getSerializedRange_3_2
+#define uset_getSerializedRangeCount uset_getSerializedRangeCount_3_2
+#define uset_getSerializedSet uset_getSerializedSet_3_2
+#define uset_indexOf uset_indexOf_3_2
+#define uset_isEmpty uset_isEmpty_3_2
+#define uset_open uset_open_3_2
+#define uset_openPattern uset_openPattern_3_2
+#define uset_openPatternOptions uset_openPatternOptions_3_2
+#define uset_remove uset_remove_3_2
+#define uset_removeAll uset_removeAll_3_2
+#define uset_removeRange uset_removeRange_3_2
+#define uset_removeString uset_removeString_3_2
+#define uset_resemblesPattern uset_resemblesPattern_3_2
+#define uset_retain uset_retain_3_2
+#define uset_retainAll uset_retainAll_3_2
+#define uset_serialize uset_serialize_3_2
+#define uset_serializedContains uset_serializedContains_3_2
+#define uset_set uset_set_3_2
+#define uset_setSerializedToOne uset_setSerializedToOne_3_2
+#define uset_size uset_size_3_2
+#define uset_toPattern uset_toPattern_3_2
+#define usprep_close usprep_close_3_2
+#define usprep_open usprep_open_3_2
+#define usprep_prepare usprep_prepare_3_2
+#define usprep_swap usprep_swap_3_2
+#define ustr_foldCase ustr_foldCase_3_2
+#define ustr_toLower ustr_toLower_3_2
+#define ustr_toTitle ustr_toTitle_3_2
+#define ustr_toUpper ustr_toUpper_3_2
+#define utf8_appendCharSafeBody utf8_appendCharSafeBody_3_2
+#define utf8_back1SafeBody utf8_back1SafeBody_3_2
+#define utf8_countTrailBytes utf8_countTrailBytes_3_2
+#define utf8_nextCharSafeBody utf8_nextCharSafeBody_3_2
+#define utf8_prevCharSafeBody utf8_prevCharSafeBody_3_2
+#define utmscale_fromInt64 utmscale_fromInt64_3_2
+#define utmscale_getTimeScaleValue utmscale_getTimeScaleValue_3_2
+#define utmscale_toInt64 utmscale_toInt64_3_2
+#define utrace_cleanup utrace_cleanup_3_2
+#define utrace_data utrace_data_3_2
+#define utrace_entry utrace_entry_3_2
+#define utrace_exit utrace_exit_3_2
+#define utrace_format utrace_format_3_2
+#define utrace_functionName utrace_functionName_3_2
+#define utrace_getFunctions utrace_getFunctions_3_2
+#define utrace_getLevel utrace_getLevel_3_2
+#define utrace_level utrace_level_3_2
+#define utrace_setFunctions utrace_setFunctions_3_2
+#define utrace_setLevel utrace_setLevel_3_2
+#define utrace_vformat utrace_vformat_3_2
+#define utrans_clone utrans_clone_3_2
+#define utrans_close utrans_close_3_2
+#define utrans_countAvailableIDs utrans_countAvailableIDs_3_2
+#define utrans_getAvailableID utrans_getAvailableID_3_2
+#define utrans_getID utrans_getID_3_2
+#define utrans_getUnicodeID utrans_getUnicodeID_3_2
+#define utrans_open utrans_open_3_2
+#define utrans_openIDs utrans_openIDs_3_2
+#define utrans_openInverse utrans_openInverse_3_2
+#define utrans_openU utrans_openU_3_2
+#define utrans_register utrans_register_3_2
+#define utrans_rep_caseContextIterator utrans_rep_caseContextIterator_3_2
+#define utrans_setFilter utrans_setFilter_3_2
+#define utrans_trans utrans_trans_3_2
+#define utrans_transIncremental utrans_transIncremental_3_2
+#define utrans_transIncrementalUChars utrans_transIncrementalUChars_3_2
+#define utrans_transUChars utrans_transUChars_3_2
+#define utrans_unregister utrans_unregister_3_2
+#define utrans_unregisterID utrans_unregisterID_3_2
+#define utrie_clone utrie_clone_3_2
+#define utrie_close utrie_close_3_2
+#define utrie_enum utrie_enum_3_2
+#define utrie_get32 utrie_get32_3_2
+#define utrie_getData utrie_getData_3_2
+#define utrie_open utrie_open_3_2
+#define utrie_serialize utrie_serialize_3_2
+#define utrie_set32 utrie_set32_3_2
+#define utrie_setRange32 utrie_setRange32_3_2
+#define utrie_swap utrie_swap_3_2
+#define utrie_unserialize utrie_unserialize_3_2
+/* C++ class names renaming defines */
+
+#ifdef XP_CPLUSPLUS
+#if !U_HAVE_NAMESPACE
+
+#define AbsoluteValueSubstitution AbsoluteValueSubstitution_3_2
+#define AlternateSubstitutionSubtable AlternateSubstitutionSubtable_3_2
+#define AnchorTable AnchorTable_3_2
+#define AnyTransliterator AnyTransliterator_3_2
+#define ArabicOpenTypeLayoutEngine ArabicOpenTypeLayoutEngine_3_2
+#define ArabicShaping ArabicShaping_3_2
+#define BasicCalendarFactory BasicCalendarFactory_3_2
+#define BinarySearchLookupTable BinarySearchLookupTable_3_2
+#define BreakDictionary BreakDictionary_3_2
+#define BreakIterator BreakIterator_3_2
+#define BuddhistCalendar BuddhistCalendar_3_2
+#define CFactory CFactory_3_2
+#define Calendar Calendar_3_2
+#define CalendarAstronomer CalendarAstronomer_3_2
+#define CalendarCache CalendarCache_3_2
+#define CalendarData CalendarData_3_2
+#define CalendarService CalendarService_3_2
+#define CanonShaping CanonShaping_3_2
+#define CanonicalIterator CanonicalIterator_3_2
+#define CaseMapTransliterator CaseMapTransliterator_3_2
+#define ChainingContextualSubstitutionFormat1Subtable ChainingContextualSubstitutionFormat1Subtable_3_2
+#define ChainingContextualSubstitutionFormat2Subtable ChainingContextualSubstitutionFormat2Subtable_3_2
+#define ChainingContextualSubstitutionFormat3Subtable ChainingContextualSubstitutionFormat3Subtable_3_2
+#define ChainingContextualSubstitutionSubtable ChainingContextualSubstitutionSubtable_3_2
+#define CharSubstitutionFilter CharSubstitutionFilter_3_2
+#define CharacterIterator CharacterIterator_3_2
+#define ChoiceFormat ChoiceFormat_3_2
+#define ClassDefFormat1Table ClassDefFormat1Table_3_2
+#define ClassDefFormat2Table ClassDefFormat2Table_3_2
+#define ClassDefinitionTable ClassDefinitionTable_3_2
+#define CollationElementIterator CollationElementIterator_3_2
+#define CollationKey CollationKey_3_2
+#define Collator Collator_3_2
+#define CollatorFactory CollatorFactory_3_2
+#define CompoundTransliterator CompoundTransliterator_3_2
+#define ContextualGlyphSubstitutionProcessor ContextualGlyphSubstitutionProcessor_3_2
+#define ContextualSubstitutionBase ContextualSubstitutionBase_3_2
+#define ContextualSubstitutionFormat1Subtable ContextualSubstitutionFormat1Subtable_3_2
+#define ContextualSubstitutionFormat2Subtable ContextualSubstitutionFormat2Subtable_3_2
+#define ContextualSubstitutionFormat3Subtable ContextualSubstitutionFormat3Subtable_3_2
+#define ContextualSubstitutionSubtable ContextualSubstitutionSubtable_3_2
+#define CoverageFormat1Table CoverageFormat1Table_3_2
+#define CoverageFormat2Table CoverageFormat2Table_3_2
+#define CoverageTable CoverageTable_3_2
+#define CurrencyAmount CurrencyAmount_3_2
+#define CurrencyFormat CurrencyFormat_3_2
+#define CurrencyUnit CurrencyUnit_3_2
+#define CursiveAttachmentSubtable CursiveAttachmentSubtable_3_2
+#define DateFormat DateFormat_3_2
+#define DateFormatSymbols DateFormatSymbols_3_2
+#define DecimalFormat DecimalFormat_3_2
+#define DecimalFormatSymbols DecimalFormatSymbols_3_2
+#define DefaultCalendarFactory DefaultCalendarFactory_3_2
+#define DefaultCharMapper DefaultCharMapper_3_2
+#define DeviceTable DeviceTable_3_2
+#define DictionaryBasedBreakIterator DictionaryBasedBreakIterator_3_2
+#define DictionaryBasedBreakIteratorTables DictionaryBasedBreakIteratorTables_3_2
+#define DigitList DigitList_3_2
+#define Entry Entry_3_2
+#define EnumToOffset EnumToOffset_3_2
+#define EscapeTransliterator EscapeTransliterator_3_2
+#define EventListener EventListener_3_2
+#define ExtensionSubtable ExtensionSubtable_3_2
+#define FeatureListTable FeatureListTable_3_2
+#define FieldPosition FieldPosition_3_2
+#define FontRuns FontRuns_3_2
+#define Format Format_3_2
+#define Format1AnchorTable Format1AnchorTable_3_2
+#define Format2AnchorTable Format2AnchorTable_3_2
+#define Format3AnchorTable Format3AnchorTable_3_2
+#define Formattable Formattable_3_2
+#define ForwardCharacterIterator ForwardCharacterIterator_3_2
+#define FractionalPartSubstitution FractionalPartSubstitution_3_2
+#define FunctionReplacer FunctionReplacer_3_2
+#define GDEFMarkFilter GDEFMarkFilter_3_2
+#define GXLayoutEngine GXLayoutEngine_3_2
+#define GlyphDefinitionTableHeader GlyphDefinitionTableHeader_3_2
+#define GlyphIterator GlyphIterator_3_2
+#define GlyphLookupTableHeader GlyphLookupTableHeader_3_2
+#define GlyphPositioningLookupProcessor GlyphPositioningLookupProcessor_3_2
+#define GlyphPositioningTableHeader GlyphPositioningTableHeader_3_2
+#define GlyphSubstitutionLookupProcessor GlyphSubstitutionLookupProcessor_3_2
+#define GlyphSubstitutionTableHeader GlyphSubstitutionTableHeader_3_2
+#define Grego Grego_3_2
+#define GregorianCalendar GregorianCalendar_3_2
+#define HanOpenTypeLayoutEngine HanOpenTypeLayoutEngine_3_2
+#define HebrewCalendar HebrewCalendar_3_2
+#define ICUBreakIteratorFactory ICUBreakIteratorFactory_3_2
+#define ICUBreakIteratorService ICUBreakIteratorService_3_2
+#define ICUCollatorFactory ICUCollatorFactory_3_2
+#define ICUCollatorService ICUCollatorService_3_2
+#define ICULayoutEngine ICULayoutEngine_3_2
+#define ICULocaleService ICULocaleService_3_2
+#define ICUNotifier ICUNotifier_3_2
+#define ICUNumberFormatFactory ICUNumberFormatFactory_3_2
+#define ICUNumberFormatService ICUNumberFormatService_3_2
+#define ICUResourceBundleFactory ICUResourceBundleFactory_3_2
+#define ICUService ICUService_3_2
+#define ICUServiceFactory ICUServiceFactory_3_2
+#define ICUServiceKey ICUServiceKey_3_2
+#define ICU_Utility ICU_Utility_3_2
+#define IndicClassTable IndicClassTable_3_2
+#define IndicOpenTypeLayoutEngine IndicOpenTypeLayoutEngine_3_2
+#define IndicRearrangementProcessor IndicRearrangementProcessor_3_2
+#define IndicReordering IndicReordering_3_2
+#define IntegralPartSubstitution IntegralPartSubstitution_3_2
+#define IslamicCalendar IslamicCalendar_3_2
+#define JapaneseCalendar JapaneseCalendar_3_2
+#define KeywordEnumeration KeywordEnumeration_3_2
+#define LECharMapper LECharMapper_3_2
+#define LEFontInstance LEFontInstance_3_2
+#define LEGlyphFilter LEGlyphFilter_3_2
+#define LEGlyphStorage LEGlyphStorage_3_2
+#define LEInsertionCallback LEInsertionCallback_3_2
+#define LEInsertionList LEInsertionList_3_2
+#define LXUtilities LXUtilities_3_2
+#define LayoutEngine LayoutEngine_3_2
+#define LigatureSubstitutionProcessor LigatureSubstitutionProcessor_3_2
+#define LigatureSubstitutionSubtable LigatureSubstitutionSubtable_3_2
+#define LocDataParser LocDataParser_3_2
+#define Locale Locale_3_2
+#define LocaleBased LocaleBased_3_2
+#define LocaleKey LocaleKey_3_2
+#define LocaleKeyFactory LocaleKeyFactory_3_2
+#define LocaleRuns LocaleRuns_3_2
+#define LocaleUtility LocaleUtility_3_2
+#define LocalizationInfo LocalizationInfo_3_2
+#define LookupListTable LookupListTable_3_2
+#define LookupProcessor LookupProcessor_3_2
+#define LookupSubtable LookupSubtable_3_2
+#define LookupTable LookupTable_3_2
+#define LowercaseTransliterator LowercaseTransliterator_3_2
+#define MPreFixups MPreFixups_3_2
+#define MarkArray MarkArray_3_2
+#define MarkToBasePositioningSubtable MarkToBasePositioningSubtable_3_2
+#define MarkToLigaturePositioningSubtable MarkToLigaturePositioningSubtable_3_2
+#define MarkToMarkPositioningSubtable MarkToMarkPositioningSubtable_3_2
+#define Math Math_3_2
+#define Measure Measure_3_2
+#define MeasureFormat MeasureFormat_3_2
+#define MeasureUnit MeasureUnit_3_2
+#define MessageFormat MessageFormat_3_2
+#define MessageFormatAdapter MessageFormatAdapter_3_2
+#define ModulusSubstitution ModulusSubstitution_3_2
+#define MoonRiseSetCoordFunc MoonRiseSetCoordFunc_3_2
+#define MoonTimeAngleFunc MoonTimeAngleFunc_3_2
+#define MorphSubtableHeader MorphSubtableHeader_3_2
+#define MorphTableHeader MorphTableHeader_3_2
+#define MultipleSubstitutionSubtable MultipleSubstitutionSubtable_3_2
+#define MultiplierSubstitution MultiplierSubstitution_3_2
+#define NFFactory NFFactory_3_2
+#define NFRule NFRule_3_2
+#define NFRuleSet NFRuleSet_3_2
+#define NFSubstitution NFSubstitution_3_2
+#define NameToEnum NameToEnum_3_2
+#define NameUnicodeTransliterator NameUnicodeTransliterator_3_2
+#define NonContextualGlyphSubstitutionProcessor NonContextualGlyphSubstitutionProcessor_3_2
+#define NonContiguousEnumToOffset NonContiguousEnumToOffset_3_2
+#define NormalizationTransliterator NormalizationTransliterator_3_2
+#define Normalizer Normalizer_3_2
+#define NullSubstitution NullSubstitution_3_2
+#define NullTransliterator NullTransliterator_3_2
+#define NumberFormat NumberFormat_3_2
+#define NumberFormatFactory NumberFormatFactory_3_2
+#define NumeratorSubstitution NumeratorSubstitution_3_2
+#define OlsonTimeZone OlsonTimeZone_3_2
+#define OpenTypeLayoutEngine OpenTypeLayoutEngine_3_2
+#define OpenTypeUtilities OpenTypeUtilities_3_2
+#define PairPositioningFormat1Subtable PairPositioningFormat1Subtable_3_2
+#define PairPositioningFormat2Subtable PairPositioningFormat2Subtable_3_2
+#define PairPositioningSubtable PairPositioningSubtable_3_2
+#define ParagraphLayout ParagraphLayout_3_2
+#define ParseData ParseData_3_2
+#define ParsePosition ParsePosition_3_2
+#define PropertyAliases PropertyAliases_3_2
+#define Quantifier Quantifier_3_2
+#define RBBIDataWrapper RBBIDataWrapper_3_2
+#define RBBINode RBBINode_3_2
+#define RBBIRuleBuilder RBBIRuleBuilder_3_2
+#define RBBIRuleScanner RBBIRuleScanner_3_2
+#define RBBISetBuilder RBBISetBuilder_3_2
+#define RBBIStateDescriptor RBBIStateDescriptor_3_2
+#define RBBISymbolTable RBBISymbolTable_3_2
+#define RBBISymbolTableEntry RBBISymbolTableEntry_3_2
+#define RBBITableBuilder RBBITableBuilder_3_2
+#define RangeDescriptor RangeDescriptor_3_2
+#define RegexCompile RegexCompile_3_2
+#define RegexMatcher RegexMatcher_3_2
+#define RegexPattern RegexPattern_3_2
+#define RegexStaticSets RegexStaticSets_3_2
+#define RemoveTransliterator RemoveTransliterator_3_2
+#define Replaceable Replaceable_3_2
+#define ReplaceableGlue ReplaceableGlue_3_2
+#define ResourceBundle ResourceBundle_3_2
+#define RiseSetCoordFunc RiseSetCoordFunc_3_2
+#define RuleBasedBreakIterator RuleBasedBreakIterator_3_2
+#define RuleBasedCollator RuleBasedCollator_3_2
+#define RuleBasedNumberFormat RuleBasedNumberFormat_3_2
+#define RuleBasedTransliterator RuleBasedTransliterator_3_2
+#define RuleCharacterIterator RuleCharacterIterator_3_2
+#define RuleHalf RuleHalf_3_2
+#define RunArray RunArray_3_2
+#define SameValueSubstitution SameValueSubstitution_3_2
+#define ScriptListTable ScriptListTable_3_2
+#define ScriptRunIterator ScriptRunIterator_3_2
+#define ScriptTable ScriptTable_3_2
+#define SearchIterator SearchIterator_3_2
+#define SegmentArrayProcessor SegmentArrayProcessor_3_2
+#define SegmentSingleProcessor SegmentSingleProcessor_3_2
+#define ServiceEnumeration ServiceEnumeration_3_2
+#define ServiceListener ServiceListener_3_2
+#define SimpleArrayProcessor SimpleArrayProcessor_3_2
+#define SimpleDateFormat SimpleDateFormat_3_2
+#define SimpleFactory SimpleFactory_3_2
+#define SimpleLocaleKeyFactory SimpleLocaleKeyFactory_3_2
+#define SimpleNumberFormatFactory SimpleNumberFormatFactory_3_2
+#define SimpleTimeZone SimpleTimeZone_3_2
+#define SinglePositioningFormat1Subtable SinglePositioningFormat1Subtable_3_2
+#define SinglePositioningFormat2Subtable SinglePositioningFormat2Subtable_3_2
+#define SinglePositioningSubtable SinglePositioningSubtable_3_2
+#define SingleSubstitutionFormat1Subtable SingleSubstitutionFormat1Subtable_3_2
+#define SingleSubstitutionFormat2Subtable SingleSubstitutionFormat2Subtable_3_2
+#define SingleSubstitutionSubtable SingleSubstitutionSubtable_3_2
+#define SingleTableProcessor SingleTableProcessor_3_2
+#define Spec Spec_3_2
+#define StateTableProcessor StateTableProcessor_3_2
+#define StringCharacterIterator StringCharacterIterator_3_2
+#define StringEnumeration StringEnumeration_3_2
+#define StringLocalizationInfo StringLocalizationInfo_3_2
+#define StringMatcher StringMatcher_3_2
+#define StringPair StringPair_3_2
+#define StringReplacer StringReplacer_3_2
+#define StringSearch StringSearch_3_2
+#define StyleRuns StyleRuns_3_2
+#define SubstitutionLookup SubstitutionLookup_3_2
+#define SubtableProcessor SubtableProcessor_3_2
+#define SunTimeAngleFunc SunTimeAngleFunc_3_2
+#define SymbolTable SymbolTable_3_2
+#define TZEnumeration TZEnumeration_3_2
+#define ThaiLayoutEngine ThaiLayoutEngine_3_2
+#define ThaiShaping ThaiShaping_3_2
+#define TimeZone TimeZone_3_2
+#define TitlecaseTransliterator TitlecaseTransliterator_3_2
+#define TransliterationRule TransliterationRule_3_2
+#define TransliterationRuleData TransliterationRuleData_3_2
+#define TransliterationRuleSet TransliterationRuleSet_3_2
+#define Transliterator Transliterator_3_2
+#define TransliteratorAlias TransliteratorAlias_3_2
+#define TransliteratorIDParser TransliteratorIDParser_3_2
+#define TransliteratorParser TransliteratorParser_3_2
+#define TransliteratorRegistry TransliteratorRegistry_3_2
+#define TrimmedArrayProcessor TrimmedArrayProcessor_3_2
+#define UCharCharacterIterator UCharCharacterIterator_3_2
+#define UMemory UMemory_3_2
+#define UObject UObject_3_2
+#define UStack UStack_3_2
+#define UStringEnumeration UStringEnumeration_3_2
+#define UVector UVector_3_2
+#define UVector32 UVector32_3_2
+#define UnescapeTransliterator UnescapeTransliterator_3_2
+#define UnicodeArabicOpenTypeLayoutEngine UnicodeArabicOpenTypeLayoutEngine_3_2
+#define UnicodeFilter UnicodeFilter_3_2
+#define UnicodeFunctor UnicodeFunctor_3_2
+#define UnicodeMatcher UnicodeMatcher_3_2
+#define UnicodeNameTransliterator UnicodeNameTransliterator_3_2
+#define UnicodeReplacer UnicodeReplacer_3_2
+#define UnicodeSet UnicodeSet_3_2
+#define UnicodeSetIterator UnicodeSetIterator_3_2
+#define UnicodeString UnicodeString_3_2
+#define UppercaseTransliterator UppercaseTransliterator_3_2
+#define ValueRecord ValueRecord_3_2
+#define ValueRuns ValueRuns_3_2
+#define locale_set_default_internal locale_set_default_internal_3_2
+#define uprv_parseCurrency uprv_parseCurrency_3_2
+#define util64_fromDouble util64_fromDouble_3_2
+#define util64_pow util64_pow_3_2
+#define util64_tou util64_tou_3_2
+#define util64_utoi util64_utoi_3_2
+
+#endif
+#endif
+
+#endif
+
+#endif
diff --git a/Source/WebCore/icu/unicode/uscript.h b/Source/WebCore/icu/unicode/uscript.h
new file mode 100644
index 0000000..f31d748
--- /dev/null
+++ b/Source/WebCore/icu/unicode/uscript.h
@@ -0,0 +1,148 @@
+/*
+**********************************************************************
+* Copyright (C) 1997-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File USCRIPT.H
+*
+* Modification History:
+*
+* Date Name Description
+* 07/06/2001 Ram Creation.
+******************************************************************************
+*/
+#ifndef USCRIPT_H
+#define USCRIPT_H
+#include "unicode/utypes.h"
+
+/**
+ * Constants for Unicode script values from ScriptNames.txt .
+ *
+ * @stable ICU 2.2
+ */
+typedef enum UScriptCode {
+ USCRIPT_INVALID_CODE = -1,
+ USCRIPT_COMMON = 0 , /* Zyyy */
+ USCRIPT_INHERITED = 1, /* Qaai */
+ USCRIPT_ARABIC = 2, /* Arab */
+ USCRIPT_ARMENIAN = 3, /* Armn */
+ USCRIPT_BENGALI = 4, /* Beng */
+ USCRIPT_BOPOMOFO = 5, /* Bopo */
+ USCRIPT_CHEROKEE = 6, /* Cher */
+ USCRIPT_COPTIC = 7, /* Copt */
+ USCRIPT_CYRILLIC = 8, /* Cyrl (Cyrs) */
+ USCRIPT_DESERET = 9, /* Dsrt */
+ USCRIPT_DEVANAGARI = 10, /* Deva */
+ USCRIPT_ETHIOPIC = 11, /* Ethi */
+ USCRIPT_GEORGIAN = 12, /* Geor (Geon, Geoa) */
+ USCRIPT_GOTHIC = 13, /* Goth */
+ USCRIPT_GREEK = 14, /* Grek */
+ USCRIPT_GUJARATI = 15, /* Gujr */
+ USCRIPT_GURMUKHI = 16, /* Guru */
+ USCRIPT_HAN = 17, /* Hani */
+ USCRIPT_HANGUL = 18, /* Hang */
+ USCRIPT_HEBREW = 19, /* Hebr */
+ USCRIPT_HIRAGANA = 20, /* Hira */
+ USCRIPT_KANNADA = 21, /* Knda */
+ USCRIPT_KATAKANA = 22, /* Kana */
+ USCRIPT_KHMER = 23, /* Khmr */
+ USCRIPT_LAO = 24, /* Laoo */
+ USCRIPT_LATIN = 25, /* Latn (Latf, Latg) */
+ USCRIPT_MALAYALAM = 26, /* Mlym */
+ USCRIPT_MONGOLIAN = 27, /* Mong */
+ USCRIPT_MYANMAR = 28, /* Mymr */
+ USCRIPT_OGHAM = 29, /* Ogam */
+ USCRIPT_OLD_ITALIC = 30, /* Ital */
+ USCRIPT_ORIYA = 31, /* Orya */
+ USCRIPT_RUNIC = 32, /* Runr */
+ USCRIPT_SINHALA = 33, /* Sinh */
+ USCRIPT_SYRIAC = 34, /* Syrc (Syrj, Syrn, Syre) */
+ USCRIPT_TAMIL = 35, /* Taml */
+ USCRIPT_TELUGU = 36, /* Telu */
+ USCRIPT_THAANA = 37, /* Thaa */
+ USCRIPT_THAI = 38, /* Thai */
+ USCRIPT_TIBETAN = 39, /* Tibt */
+ /** Canadian_Aboriginal script. @stable ICU 2.6 */
+ USCRIPT_CANADIAN_ABORIGINAL = 40, /* Cans */
+ /** Canadian_Aboriginal script (alias). @stable ICU 2.2 */
+ USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL,
+ USCRIPT_YI = 41, /* Yiii */
+ USCRIPT_TAGALOG = 42, /* Tglg */
+ USCRIPT_HANUNOO = 43, /* Hano */
+ USCRIPT_BUHID = 44, /* Buhd */
+ USCRIPT_TAGBANWA = 45, /* Tagb */
+
+ /* New scripts in Unicode 4 @stable ICU 2.6 */
+ USCRIPT_BRAILLE, /* Brai */
+ USCRIPT_CYPRIOT, /* Cprt */
+ USCRIPT_LIMBU, /* Limb */
+ USCRIPT_LINEAR_B, /* Linb */
+ USCRIPT_OSMANYA, /* Osma */
+ USCRIPT_SHAVIAN, /* Shaw */
+ USCRIPT_TAI_LE, /* Tale */
+ USCRIPT_UGARITIC, /* Ugar */
+
+ /** New script code in Unicode 4.0.1 @draft ICU 3.0 */
+ USCRIPT_KATAKANA_OR_HIRAGANA,/*Hrkt */
+
+ USCRIPT_CODE_LIMIT
+} UScriptCode;
+
+/**
+ * Gets script codes associated with the given locale or ISO 15924 abbreviation or name.
+ * Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym".
+ * Fills in USCRIPT_LATIN given "en" OR "en_US"
+ * If required capacity is greater than capacity of the destination buffer then the error code
+ * is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned
+ *
+ * <p>Note: To search by short or long script alias only, use
+ * u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. This does
+ * a fast lookup with no access of the locale data.
+ * @param nameOrAbbrOrLocale name of the script, as given in
+ * PropertyValueAliases.txt, or ISO 15924 code or locale
+ * @param fillIn the UScriptCode buffer to fill in the script code
+ * @param capacity the capacity (size) fo UScriptCode buffer passed in.
+ * @param err the error status code.
+ * @return The number of script codes filled in the buffer passed in
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capacity,UErrorCode *err);
+
+/**
+ * Gets a script name associated with the given script code.
+ * Returns "Malayam" given USCRIPT_MALAYALAM
+ * @param scriptCode UScriptCode enum
+ * @return script long name as given in
+ * PropertyValueAliases.txt, or NULL if scriptCode is invalid
+ * @stable ICU 2.4
+ */
+U_STABLE const char* U_EXPORT2
+uscript_getName(UScriptCode scriptCode);
+
+/**
+ * Gets a script name associated with the given script code.
+ * Returns "Mlym" given USCRIPT_MALAYALAM
+ * @param scriptCode UScriptCode enum
+ * @return script abbreviated name as given in
+ * PropertyValueAliases.txt, or NULL if scriptCode is invalid
+ * @stable ICU 2.4
+ */
+U_STABLE const char* U_EXPORT2
+uscript_getShortName(UScriptCode scriptCode);
+
+/**
+ * Gets the script code associated with the given codepoint.
+ * Returns USCRIPT_MALAYALAM given 0x0D02
+ * @param codepoint UChar32 codepoint
+ * @param err the error status code.
+ * @return The UScriptCode, or 0 if codepoint is invalid
+ * @stable ICU 2.4
+ */
+U_STABLE UScriptCode U_EXPORT2
+uscript_getScript(UChar32 codepoint, UErrorCode *err);
+
+#endif
+
+
diff --git a/Source/WebCore/icu/unicode/usearch.h b/Source/WebCore/icu/unicode/usearch.h
new file mode 100644
index 0000000..64b9e45
--- /dev/null
+++ b/Source/WebCore/icu/unicode/usearch.h
@@ -0,0 +1,643 @@
+/*
+**********************************************************************
+* Copyright (C) 2001-2005 IBM and others. All rights reserved.
+**********************************************************************
+* Date Name Description
+* 06/28/2001 synwee Creation.
+**********************************************************************
+*/
+#ifndef USEARCH_H
+#define USEARCH_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+#include "unicode/ucol.h"
+#include "unicode/ucoleitr.h"
+#include "unicode/ubrk.h"
+
+/**
+ * \file
+ * \brief C API: StringSearch
+ *
+ * C Apis for an engine that provides language-sensitive text searching based
+ * on the comparison rules defined in a <tt>UCollator</tt> data struct,
+ * see <tt>ucol.h</tt>. This ensures that language eccentricity can be
+ * handled, e.g. for the German collator, characters &szlig; and SS will be matched
+ * if case is chosen to be ignored.
+ * See the <a href="http://dev.icu-project.org/cgi-bin/viewcvs.cgi/~checkout~/icuhtml/design/collation/ICU_collation_design.htm">
+ * "ICU Collation Design Document"</a> for more information.
+ * <p>
+ * The algorithm implemented is a modified form of the Boyer Moore's search.
+ * For more information see
+ * <a href="http://icu.sourceforge.net/docs/papers/efficient_text_searching_in_java.html">
+ * "Efficient Text Searching in Java"</a>, published in <i>Java Report</i>
+ * in February, 1999, for further information on the algorithm.
+ * <p>
+ * There are 2 match options for selection:<br>
+ * Let S' be the sub-string of a text string S between the offsets start and
+ * end <start, end>.
+ * <br>
+ * A pattern string P matches a text string S at the offsets <start, end>
+ * if
+ * <pre>
+ * option 1. Some canonical equivalent of P matches some canonical equivalent
+ * of S'
+ * option 2. P matches S' and if P starts or ends with a combining mark,
+ * there exists no non-ignorable combining mark before or after S'
+ * in S respectively.
+ * </pre>
+ * Option 2. will be the default.
+ * <p>
+ * This search has APIs similar to that of other text iteration mechanisms
+ * such as the break iterators in <tt>ubrk.h</tt>. Using these
+ * APIs, it is easy to scan through text looking for all occurances of
+ * a given pattern. This search iterator allows changing of direction by
+ * calling a <tt>reset</tt> followed by a <tt>next</tt> or <tt>previous</tt>.
+ * Though a direction change can occur without calling <tt>reset</tt> first,
+ * this operation comes with some speed penalty.
+ * Generally, match results in the forward direction will match the result
+ * matches in the backwards direction in the reverse order
+ * <p>
+ * <tt>usearch.h</tt> provides APIs to specify the starting position
+ * within the text string to be searched, e.g. <tt>usearch_setOffset</tt>,
+ * <tt>usearch_preceding</tt> and <tt>usearch_following</tt>. Since the
+ * starting position will be set as it is specified, please take note that
+ * there are some dangerous positions which the search may render incorrect
+ * results:
+ * <ul>
+ * <li> The midst of a substring that requires normalization.
+ * <li> If the following match is to be found, the position should not be the
+ * second character which requires to be swapped with the preceding
+ * character. Vice versa, if the preceding match is to be found,
+ * position to search from should not be the first character which
+ * requires to be swapped with the next character. E.g certain Thai and
+ * Lao characters require swapping.
+ * <li> If a following pattern match is to be found, any position within a
+ * contracting sequence except the first will fail. Vice versa if a
+ * preceding pattern match is to be found, a invalid starting point
+ * would be any character within a contracting sequence except the last.
+ * </ul>
+ * <p>
+ * A breakiterator can be used if only matches at logical breaks are desired.
+ * Using a breakiterator will only give you results that exactly matches the
+ * boundaries given by the breakiterator. For instance the pattern "e" will
+ * not be found in the string "\u00e9" if a character break iterator is used.
+ * <p>
+ * Options are provided to handle overlapping matches.
+ * E.g. In English, overlapping matches produces the result 0 and 2
+ * for the pattern "abab" in the text "ababab", where else mutually
+ * exclusive matches only produce the result of 0.
+ * <p>
+ * Though collator attributes will be taken into consideration while
+ * performing matches, there are no APIs here for setting and getting the
+ * attributes. These attributes can be set by getting the collator
+ * from <tt>usearch_getCollator</tt> and using the APIs in <tt>ucol.h</tt>.
+ * Lastly to update String Search to the new collator attributes,
+ * usearch_reset() has to be called.
+ * <p>
+ * Restriction: <br>
+ * Currently there are no composite characters that consists of a
+ * character with combining class > 0 before a character with combining
+ * class == 0. However, if such a character exists in the future, the
+ * search mechanism does not guarantee the results for option 1.
+ *
+ * <p>
+ * Example of use:<br>
+ * <pre><code>
+ * char *tgtstr = "The quick brown fox jumped over the lazy fox";
+ * char *patstr = "fox";
+ * UChar target[64];
+ * UChar pattern[16];
+ * UErrorCode status = U_ZERO_ERROR;
+ * u_uastrcpy(target, tgtstr);
+ * u_uastrcpy(pattern, patstr);
+ *
+ * UStringSearch *search = usearch_open(pattern, -1, target, -1, "en_US",
+ * &status);
+ * if (U_SUCCESS(status)) {
+ * for (int pos = usearch_first(search, &status);
+ * pos != USEARCH_DONE;
+ * pos = usearch_next(search, &status)) {
+ * printf("Found match at %d pos, length is %d\n", pos,
+ * usearch_getMatchLength(search));
+ * }
+ * }
+ * </code></pre>
+ * @stable ICU 2.4
+ */
+
+/**
+* DONE is returned by previous() and next() after all valid matches have
+* been returned, and by first() and last() if there are no matches at all.
+* @stable ICU 2.4
+*/
+#define USEARCH_DONE -1
+
+/**
+* Data structure for searching
+* @stable ICU 2.4
+*/
+struct UStringSearch;
+/**
+* Data structure for searching
+* @stable ICU 2.4
+*/
+typedef struct UStringSearch UStringSearch;
+
+/**
+* @stable ICU 2.4
+*/
+typedef enum {
+ /** Option for overlapping matches */
+ USEARCH_OVERLAP,
+ /**
+ Option for canonical matches. option 1 in header documentation.
+ The default value will be USEARCH_OFF
+ */
+ USEARCH_CANONICAL_MATCH,
+ USEARCH_ATTRIBUTE_COUNT
+} USearchAttribute;
+
+/**
+* @stable ICU 2.4
+*/
+typedef enum {
+ /** default value for any USearchAttribute */
+ USEARCH_DEFAULT = -1,
+ /** value for USEARCH_OVERLAP and USEARCH_CANONICAL_MATCH */
+ USEARCH_OFF,
+ /** value for USEARCH_OVERLAP and USEARCH_CANONICAL_MATCH */
+ USEARCH_ON,
+ USEARCH_ATTRIBUTE_VALUE_COUNT
+} USearchAttributeValue;
+
+/* open and close ------------------------------------------------------ */
+
+/**
+* Creating a search iterator data struct using the argument locale language
+* rule set. A collator will be created in the process, which will be owned by
+* this search and will be deleted in <tt>usearch_close</tt>.
+* @param pattern for matching
+* @param patternlength length of the pattern, -1 for null-termination
+* @param text text string
+* @param textlength length of the text string, -1 for null-termination
+* @param locale name of locale for the rules to be used
+* @param breakiter A BreakIterator that will be used to restrict the points
+* at which matches are detected. If a match is found, but
+* the match's start or end index is not a boundary as
+* determined by the <tt>BreakIterator</tt>, the match will
+* be rejected and another will be searched for.
+* If this parameter is <tt>NULL</tt>, no break detection is
+* attempted.
+* @param status for errors if it occurs. If pattern or text is NULL, or if
+* patternlength or textlength is 0 then an
+* U_ILLEGAL_ARGUMENT_ERROR is returned.
+* @return search iterator data structure, or NULL if there is an error.
+* @stable ICU 2.4
+*/
+U_STABLE UStringSearch * U_EXPORT2 usearch_open(const UChar *pattern,
+ int32_t patternlength,
+ const UChar *text,
+ int32_t textlength,
+ const char *locale,
+ UBreakIterator *breakiter,
+ UErrorCode *status);
+
+/**
+* Creating a search iterator data struct using the argument collator language
+* rule set. Note, user retains the ownership of this collator, thus the
+* responsibility of deletion lies with the user.
+* NOTE: string search cannot be instantiated from a collator that has
+* collate digits as numbers (CODAN) turned on.
+* @param pattern for matching
+* @param patternlength length of the pattern, -1 for null-termination
+* @param text text string
+* @param textlength length of the text string, -1 for null-termination
+* @param collator used for the language rules
+* @param breakiter A BreakIterator that will be used to restrict the points
+* at which matches are detected. If a match is found, but
+* the match's start or end index is not a boundary as
+* determined by the <tt>BreakIterator</tt>, the match will
+* be rejected and another will be searched for.
+* If this parameter is <tt>NULL</tt>, no break detection is
+* attempted.
+* @param status for errors if it occurs. If collator, pattern or text is NULL,
+* or if patternlength or textlength is 0 then an
+* U_ILLEGAL_ARGUMENT_ERROR is returned.
+* @return search iterator data structure, or NULL if there is an error.
+* @stable ICU 2.4
+*/
+U_STABLE UStringSearch * U_EXPORT2 usearch_openFromCollator(
+ const UChar *pattern,
+ int32_t patternlength,
+ const UChar *text,
+ int32_t textlength,
+ const UCollator *collator,
+ UBreakIterator *breakiter,
+ UErrorCode *status);
+
+/**
+* Destroying and cleaning up the search iterator data struct.
+* If a collator is created in <tt>usearch_open</tt>, it will be destroyed here.
+* @param searchiter data struct to clean up
+* @stable ICU 2.4
+*/
+U_STABLE void U_EXPORT2 usearch_close(UStringSearch *searchiter);
+
+/* get and set methods -------------------------------------------------- */
+
+/**
+* Sets the current position in the text string which the next search will
+* start from. Clears previous states.
+* This method takes the argument index and sets the position in the text
+* string accordingly without checking if the index is pointing to a
+* valid starting point to begin searching.
+* Search positions that may render incorrect results are highlighted in the
+* header comments
+* @param strsrch search iterator data struct
+* @param position position to start next search from. If position is less
+* than or greater than the text range for searching,
+* an U_INDEX_OUTOFBOUNDS_ERROR will be returned
+* @param status error status if any.
+* @stable ICU 2.4
+*/
+U_STABLE void U_EXPORT2 usearch_setOffset(UStringSearch *strsrch,
+ int32_t position,
+ UErrorCode *status);
+
+/**
+* Return the current index in the string text being searched.
+* If the iteration has gone past the end of the text (or past the beginning
+* for a backwards search), <tt>USEARCH_DONE</tt> is returned.
+* @param strsrch search iterator data struct
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_getOffset(const UStringSearch *strsrch);
+
+/**
+* Sets the text searching attributes located in the enum USearchAttribute
+* with values from the enum USearchAttributeValue.
+* <tt>USEARCH_DEFAULT</tt> can be used for all attributes for resetting.
+* @param strsrch search iterator data struct
+* @param attribute text attribute to be set
+* @param value text attribute value
+* @param status for errors if it occurs
+* @see #usearch_getAttribute
+* @stable ICU 2.4
+*/
+U_STABLE void U_EXPORT2 usearch_setAttribute(UStringSearch *strsrch,
+ USearchAttribute attribute,
+ USearchAttributeValue value,
+ UErrorCode *status);
+
+/**
+* Gets the text searching attributes.
+* @param strsrch search iterator data struct
+* @param attribute text attribute to be retrieve
+* @return text attribute value
+* @see #usearch_setAttribute
+* @stable ICU 2.4
+*/
+U_STABLE USearchAttributeValue U_EXPORT2 usearch_getAttribute(
+ const UStringSearch *strsrch,
+ USearchAttribute attribute);
+
+/**
+* Returns the index to the match in the text string that was searched.
+* This call returns a valid result only after a successful call to
+* <tt>usearch_first</tt>, <tt>usearch_next</tt>, <tt>usearch_previous</tt>,
+* or <tt>usearch_last</tt>.
+* Just after construction, or after a searching method returns
+* <tt>USEARCH_DONE</tt>, this method will return <tt>USEARCH_DONE</tt>.
+* <p>
+* Use <tt>usearch_getMatchedLength</tt> to get the matched string length.
+* @param strsrch search iterator data struct
+* @return index to a substring within the text string that is being
+* searched.
+* @see #usearch_first
+* @see #usearch_next
+* @see #usearch_previous
+* @see #usearch_last
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_getMatchedStart(
+ const UStringSearch *strsrch);
+
+/**
+* Returns the length of text in the string which matches the search pattern.
+* This call returns a valid result only after a successful call to
+* <tt>usearch_first</tt>, <tt>usearch_next</tt>, <tt>usearch_previous</tt>,
+* or <tt>usearch_last</tt>.
+* Just after construction, or after a searching method returns
+* <tt>USEARCH_DONE</tt>, this method will return 0.
+* @param strsrch search iterator data struct
+* @return The length of the match in the string text, or 0 if there is no
+* match currently.
+* @see #usearch_first
+* @see #usearch_next
+* @see #usearch_previous
+* @see #usearch_last
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_getMatchedLength(
+ const UStringSearch *strsrch);
+
+/**
+* Returns the text that was matched by the most recent call to
+* <tt>usearch_first</tt>, <tt>usearch_next</tt>, <tt>usearch_previous</tt>,
+* or <tt>usearch_last</tt>.
+* If the iterator is not pointing at a valid match (e.g. just after
+* construction or after <tt>USEARCH_DONE</tt> has been returned, returns
+* an empty string. If result is not large enough to store the matched text,
+* result will be filled with the partial text and an U_BUFFER_OVERFLOW_ERROR
+* will be returned in status. result will be null-terminated whenever
+* possible. If the buffer fits the matched text exactly, a null-termination
+* is not possible, then a U_STRING_NOT_TERMINATED_ERROR set in status.
+* Pre-flighting can be either done with length = 0 or the API
+* <tt>usearch_getMatchLength</tt>.
+* @param strsrch search iterator data struct
+* @param result UChar buffer to store the matched string
+* @param resultCapacity length of the result buffer
+* @param status error returned if result is not large enough
+* @return exact length of the matched text, not counting the null-termination
+* @see #usearch_first
+* @see #usearch_next
+* @see #usearch_previous
+* @see #usearch_last
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_getMatchedText(const UStringSearch *strsrch,
+ UChar *result,
+ int32_t resultCapacity,
+ UErrorCode *status);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+* Set the BreakIterator that will be used to restrict the points at which
+* matches are detected.
+* @param strsrch search iterator data struct
+* @param breakiter A BreakIterator that will be used to restrict the points
+* at which matches are detected. If a match is found, but
+* the match's start or end index is not a boundary as
+* determined by the <tt>BreakIterator</tt>, the match will
+* be rejected and another will be searched for.
+* If this parameter is <tt>NULL</tt>, no break detection is
+* attempted.
+* @param status for errors if it occurs
+* @see #usearch_getBreakIterator
+* @stable ICU 2.4
+*/
+U_STABLE void U_EXPORT2 usearch_setBreakIterator(UStringSearch *strsrch,
+ UBreakIterator *breakiter,
+ UErrorCode *status);
+
+/**
+* Returns the BreakIterator that is used to restrict the points at which
+* matches are detected. This will be the same object that was passed to the
+* constructor or to <tt>usearch_setBreakIterator</tt>. Note that
+* <tt>NULL</tt>
+* is a legal value; it means that break detection should not be attempted.
+* @param strsrch search iterator data struct
+* @return break iterator used
+* @see #usearch_setBreakIterator
+* @stable ICU 2.4
+*/
+U_STABLE const UBreakIterator * U_EXPORT2 usearch_getBreakIterator(
+ const UStringSearch *strsrch);
+
+#endif
+
+/**
+* Set the string text to be searched. Text iteration will hence begin at the
+* start of the text string. This method is useful if you want to re-use an
+* iterator to search for the same pattern within a different body of text.
+* @param strsrch search iterator data struct
+* @param text new string to look for match
+* @param textlength length of the new string, -1 for null-termination
+* @param status for errors if it occurs. If text is NULL, or textlength is 0
+* then an U_ILLEGAL_ARGUMENT_ERROR is returned with no change
+* done to strsrch.
+* @see #usearch_getText
+* @stable ICU 2.4
+*/
+U_STABLE void U_EXPORT2 usearch_setText( UStringSearch *strsrch,
+ const UChar *text,
+ int32_t textlength,
+ UErrorCode *status);
+
+/**
+* Return the string text to be searched.
+* @param strsrch search iterator data struct
+* @param length returned string text length
+* @return string text
+* @see #usearch_setText
+* @stable ICU 2.4
+*/
+U_STABLE const UChar * U_EXPORT2 usearch_getText(const UStringSearch *strsrch,
+ int32_t *length);
+
+/**
+* Gets the collator used for the language rules.
+* <p>
+* Deleting the returned <tt>UCollator</tt> before calling
+* <tt>usearch_close</tt> would cause the string search to fail.
+* <tt>usearch_close</tt> will delete the collator if this search owns it.
+* @param strsrch search iterator data struct
+* @return collator
+* @stable ICU 2.4
+*/
+U_STABLE UCollator * U_EXPORT2 usearch_getCollator(
+ const UStringSearch *strsrch);
+
+/**
+* Sets the collator used for the language rules. User retains the ownership
+* of this collator, thus the responsibility of deletion lies with the user.
+* This method causes internal data such as Boyer-Moore shift tables to
+* be recalculated, but the iterator's position is unchanged.
+* @param strsrch search iterator data struct
+* @param collator to be used
+* @param status for errors if it occurs
+* @stable ICU 2.4
+*/
+U_STABLE void U_EXPORT2 usearch_setCollator( UStringSearch *strsrch,
+ const UCollator *collator,
+ UErrorCode *status);
+
+/**
+* Sets the pattern used for matching.
+* Internal data like the Boyer Moore table will be recalculated, but the
+* iterator's position is unchanged.
+* @param strsrch search iterator data struct
+* @param pattern string
+* @param patternlength pattern length, -1 for null-terminated string
+* @param status for errors if it occurs. If text is NULL, or textlength is 0
+* then an U_ILLEGAL_ARGUMENT_ERROR is returned with no change
+* done to strsrch.
+* @stable ICU 2.4
+*/
+U_STABLE void U_EXPORT2 usearch_setPattern( UStringSearch *strsrch,
+ const UChar *pattern,
+ int32_t patternlength,
+ UErrorCode *status);
+
+/**
+* Gets the search pattern
+* @param strsrch search iterator data struct
+* @param length return length of the pattern, -1 indicates that the pattern
+* is null-terminated
+* @return pattern string
+* @stable ICU 2.4
+*/
+U_STABLE const UChar * U_EXPORT2 usearch_getPattern(
+ const UStringSearch *strsrch,
+ int32_t *length);
+
+/* methods ------------------------------------------------------------- */
+
+/**
+* Returns the first index at which the string text matches the search
+* pattern.
+* The iterator is adjusted so that its current index (as returned by
+* <tt>usearch_getOffset</tt>) is the match position if one was found.
+* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>.
+* @param strsrch search iterator data struct
+* @param status for errors if it occurs
+* @return The character index of the first match, or
+* <tt>USEARCH_DONE</tt> if there are no matches.
+* @see #usearch_getOffset
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_first(UStringSearch *strsrch,
+ UErrorCode *status);
+
+/**
+* Returns the first index greater than <tt>position</tt> at which the string
+* text
+* matches the search pattern. The iterator is adjusted so that its current
+* index (as returned by <tt>usearch_getOffset</tt>) is the match position if
+* one was found.
+* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>
+* <p>
+* Search positions that may render incorrect results are highlighted in the
+* header comments. If position is less than or greater than the text range
+* for searching, an U_INDEX_OUTOFBOUNDS_ERROR will be returned
+* @param strsrch search iterator data struct
+* @param position to start the search at
+* @param status for errors if it occurs
+* @return The character index of the first match following <tt>pos</tt>,
+* or <tt>USEARCH_DONE</tt> if there are no matches.
+* @see #usearch_getOffset
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_following(UStringSearch *strsrch,
+ int32_t position,
+ UErrorCode *status);
+
+/**
+* Returns the last index in the target text at which it matches the search
+* pattern. The iterator is adjusted so that its current
+* index (as returned by <tt>usearch_getOffset</tt>) is the match position if
+* one was found.
+* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>.
+* @param strsrch search iterator data struct
+* @param status for errors if it occurs
+* @return The index of the first match, or <tt>USEARCH_DONE</tt> if there
+* are no matches.
+* @see #usearch_getOffset
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_last(UStringSearch *strsrch,
+ UErrorCode *status);
+
+/**
+* Returns the first index less than <tt>position</tt> at which the string text
+* matches the search pattern. The iterator is adjusted so that its current
+* index (as returned by <tt>usearch_getOffset</tt>) is the match position if
+* one was found.
+* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>
+* <p>
+* Search positions that may render incorrect results are highlighted in the
+* header comments. If position is less than or greater than the text range
+* for searching, an U_INDEX_OUTOFBOUNDS_ERROR will be returned
+* @param strsrch search iterator data struct
+* @param position index position the search is to begin at
+* @param status for errors if it occurs
+* @return The character index of the first match preceding <tt>pos</tt>,
+* or <tt>USEARCH_DONE</tt> if there are no matches.
+* @see #usearch_getOffset
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_preceding(UStringSearch *strsrch,
+ int32_t position,
+ UErrorCode *status);
+
+/**
+* Returns the index of the next point at which the string text matches the
+* search pattern, starting from the current position.
+* The iterator is adjusted so that its current
+* index (as returned by <tt>usearch_getOffset</tt>) is the match position if
+* one was found.
+* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>
+* @param strsrch search iterator data struct
+* @param status for errors if it occurs
+* @return The index of the next match after the current position, or
+* <tt>USEARCH_DONE</tt> if there are no more matches.
+* @see #usearch_first
+* @see #usearch_getOffset
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch,
+ UErrorCode *status);
+
+/**
+* Returns the index of the previous point at which the string text matches
+* the search pattern, starting at the current position.
+* The iterator is adjusted so that its current
+* index (as returned by <tt>usearch_getOffset</tt>) is the match position if
+* one was found.
+* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>
+* @param strsrch search iterator data struct
+* @param status for errors if it occurs
+* @return The index of the previous match before the current position,
+* or <tt>USEARCH_DONE</tt> if there are no more matches.
+* @see #usearch_last
+* @see #usearch_getOffset
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_previous(UStringSearch *strsrch,
+ UErrorCode *status);
+
+/**
+* Reset the iteration.
+* Search will begin at the start of the text string if a forward iteration
+* is initiated before a backwards iteration. Otherwise if a backwards
+* iteration is initiated before a forwards iteration, the search will begin
+* at the end of the text string.
+* @param strsrch search iterator data struct
+* @see #usearch_first
+* @stable ICU 2.4
+*/
+U_STABLE void U_EXPORT2 usearch_reset(UStringSearch *strsrch);
+
+#endif /* #if !UCONFIG_NO_COLLATION */
+
+#endif
diff --git a/Source/WebCore/icu/unicode/uset.h b/Source/WebCore/icu/unicode/uset.h
new file mode 100644
index 0000000..b82ceb8
--- /dev/null
+++ b/Source/WebCore/icu/unicode/uset.h
@@ -0,0 +1,745 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uset.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002mar07
+* created by: Markus W. Scherer
+*
+* C version of UnicodeSet.
+*/
+
+
+/**
+ * \file
+ * \brief C API: Unicode Set
+ *
+ * <p>This is a C wrapper around the C++ UnicodeSet class.</p>
+ */
+
+#ifndef __USET_H__
+#define __USET_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uchar.h"
+
+#ifndef UCNV_H
+struct USet;
+/**
+ * A UnicodeSet. Use the uset_* API to manipulate. Create with
+ * uset_open*, and destroy with uset_close.
+ * @stable ICU 2.4
+ */
+typedef struct USet USet;
+#endif
+
+/**
+ * Bitmask values to be passed to uset_openPatternOptions() or
+ * uset_applyPattern() taking an option parameter.
+ * @stable ICU 2.4
+ */
+enum {
+ /**
+ * Ignore white space within patterns unless quoted or escaped.
+ * @stable ICU 2.4
+ */
+ USET_IGNORE_SPACE = 1,
+
+ /**
+ * Enable case insensitive matching. E.g., "[ab]" with this flag
+ * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will
+ * match all except 'a', 'A', 'b', and 'B'. This performs a full
+ * closure over case mappings, e.g. U+017F for s.
+ * @stable ICU 2.4
+ */
+ USET_CASE_INSENSITIVE = 2,
+
+ /**
+ * Bitmask for UnicodeSet::closeOver() indicating letter case.
+ * This may be ORed together with other selectors.
+ * @internal
+ */
+ USET_CASE = 2,
+
+ /**
+ * Enable case insensitive matching. E.g., "[ab]" with this flag
+ * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will
+ * match all except 'a', 'A', 'b', and 'B'. This adds the lower-,
+ * title-, and uppercase mappings as well as the case folding
+ * of each existing element in the set.
+ * @draft ICU 3.2
+ */
+ USET_ADD_CASE_MAPPINGS = 4,
+
+ /**
+ * Enough for any single-code point set
+ * @internal
+ */
+ USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8
+};
+
+/**
+ * A serialized form of a Unicode set. Limited manipulations are
+ * possible directly on a serialized set. See below.
+ * @stable ICU 2.4
+ */
+typedef struct USerializedSet {
+ /**
+ * The serialized Unicode Set.
+ * @stable ICU 2.4
+ */
+ const uint16_t *array;
+ /**
+ * The length of the array that contains BMP characters.
+ * @stable ICU 2.4
+ */
+ int32_t bmpLength;
+ /**
+ * The total length of the array.
+ * @stable ICU 2.4
+ */
+ int32_t length;
+ /**
+ * A small buffer for the array to reduce memory allocations.
+ * @stable ICU 2.4
+ */
+ uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY];
+} USerializedSet;
+
+/*********************************************************************
+ * USet API
+ *********************************************************************/
+
+/**
+ * Creates a USet object that contains the range of characters
+ * start..end, inclusive.
+ * @param start first character of the range, inclusive
+ * @param end last character of the range, inclusive
+ * @return a newly created USet. The caller must call uset_close() on
+ * it when done.
+ * @stable ICU 2.4
+ */
+U_STABLE USet* U_EXPORT2
+uset_open(UChar32 start, UChar32 end);
+
+/**
+ * Creates a set from the given pattern. See the UnicodeSet class
+ * description for the syntax of the pattern language.
+ * @param pattern a string specifying what characters are in the set
+ * @param patternLength the length of the pattern, or -1 if null
+ * terminated
+ * @param ec the error code
+ * @stable ICU 2.4
+ */
+U_STABLE USet* U_EXPORT2
+uset_openPattern(const UChar* pattern, int32_t patternLength,
+ UErrorCode* ec);
+
+/**
+ * Creates a set from the given pattern. See the UnicodeSet class
+ * description for the syntax of the pattern language.
+ * @param pattern a string specifying what characters are in the set
+ * @param patternLength the length of the pattern, or -1 if null
+ * terminated
+ * @param options bitmask for options to apply to the pattern.
+ * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * @param ec the error code
+ * @stable ICU 2.4
+ */
+U_STABLE USet* U_EXPORT2
+uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
+ uint32_t options,
+ UErrorCode* ec);
+
+/**
+ * Disposes of the storage used by a USet object. This function should
+ * be called exactly once for objects returned by uset_open().
+ * @param set the object to dispose of
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_close(USet* set);
+
+/**
+ * Causes the USet object to represent the range <code>start - end</code>.
+ * If <code>start > end</code> then this USet is set to an empty range.
+ * @param set the object to set to the given range
+ * @param start first character in the set, inclusive
+ * @param end last character in the set, inclusive
+ * @draft ICU 3.2
+ */
+U_DRAFT void U_EXPORT2
+uset_set(USet* set,
+ UChar32 start, UChar32 end);
+
+/**
+ * Modifies the set to represent the set specified by the given
+ * pattern. See the UnicodeSet class description for the syntax of
+ * the pattern language. See also the User Guide chapter about UnicodeSet.
+ * <em>Empties the set passed before applying the pattern.</em>
+ * @param set The set to which the pattern is to be applied.
+ * @param pattern A pointer to UChar string specifying what characters are in the set.
+ * The character at pattern[0] must be a '['.
+ * @param patternLength The length of the UChar string. -1 if NUL terminated.
+ * @param options A bitmask for options to apply to the pattern.
+ * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * @param status Returns an error if the pattern cannot be parsed.
+ * @return Upon successful parse, the value is either
+ * the index of the character after the closing ']'
+ * of the parsed pattern.
+ * If the status code indicates failure, then the return value
+ * is the index of the error in the source.
+ *
+ * @draft ICU 2.8
+ */
+U_DRAFT int32_t U_EXPORT2
+uset_applyPattern(USet *set,
+ const UChar *pattern, int32_t patternLength,
+ uint32_t options,
+ UErrorCode *status);
+
+/**
+ * Modifies the set to contain those code points which have the given value
+ * for the given binary or enumerated property, as returned by
+ * u_getIntPropertyValue. Prior contents of this set are lost.
+ *
+ * @param set the object to contain the code points defined by the property
+ *
+ * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
+ * or UCHAR_INT_START..UCHAR_INT_LIMIT-1
+ * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
+ *
+ * @param value a value in the range u_getIntPropertyMinValue(prop)..
+ * u_getIntPropertyMaxValue(prop), with one exception. If prop is
+ * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but
+ * rather a mask value produced by U_GET_GC_MASK(). This allows grouped
+ * categories such as [:L:] to be represented.
+ *
+ * @param ec error code input/output parameter
+ *
+ * @draft ICU 3.2
+ */
+U_DRAFT void U_EXPORT2
+uset_applyIntPropertyValue(USet* set,
+ UProperty prop, int32_t value, UErrorCode* ec);
+
+/**
+ * Modifies the set to contain those code points which have the
+ * given value for the given property. Prior contents of this
+ * set are lost.
+ *
+ * @param set the object to contain the code points defined by the given
+ * property and value alias
+ *
+ * @param prop a string specifying a property alias, either short or long.
+ * The name is matched loosely. See PropertyAliases.txt for names and a
+ * description of loose matching. If the value string is empty, then this
+ * string is interpreted as either a General_Category value alias, a Script
+ * value alias, a binary property alias, or a special ID. Special IDs are
+ * matched loosely and correspond to the following sets:
+ *
+ * "ANY" = [\\u0000-\\U0010FFFF],
+ * "ASCII" = [\\u0000-\\u007F].
+ *
+ * @param propLength the length of the prop, or -1 if NULL
+ *
+ * @param value a string specifying a value alias, either short or long.
+ * The name is matched loosely. See PropertyValueAliases.txt for names
+ * and a description of loose matching. In addition to aliases listed,
+ * numeric values and canonical combining classes may be expressed
+ * numerically, e.g., ("nv", "0.5") or ("ccc", "220"). The value string
+ * may also be empty.
+ *
+ * @param valueLength the length of the value, or -1 if NULL
+ *
+ * @param ec error code input/output parameter
+ *
+ * @draft ICU 3.2
+ */
+U_DRAFT void U_EXPORT2
+uset_applyPropertyAlias(USet* set,
+ const UChar *prop, int32_t propLength,
+ const UChar *value, int32_t valueLength,
+ UErrorCode* ec);
+
+/**
+ * Return true if the given position, in the given pattern, appears
+ * to be the start of a UnicodeSet pattern.
+ *
+ * @param pattern a string specifying the pattern
+ * @param patternLength the length of the pattern, or -1 if NULL
+ * @param pos the given position
+ * @draft ICU 3.2
+ */
+U_DRAFT UBool U_EXPORT2
+uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
+ int32_t pos);
+
+/**
+ * Returns a string representation of this set. If the result of
+ * calling this function is passed to a uset_openPattern(), it
+ * will produce another set that is equal to this one.
+ * @param set the set
+ * @param result the string to receive the rules, may be NULL
+ * @param resultCapacity the capacity of result, may be 0 if result is NULL
+ * @param escapeUnprintable if TRUE then convert unprintable
+ * character to their hex escape representations, \\uxxxx or
+ * \\Uxxxxxxxx. Unprintable characters are those other than
+ * U+000A, U+0020..U+007E.
+ * @param ec error code.
+ * @return length of string, possibly larger than resultCapacity
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_toPattern(const USet* set,
+ UChar* result, int32_t resultCapacity,
+ UBool escapeUnprintable,
+ UErrorCode* ec);
+
+/**
+ * Adds the given character to the given USet. After this call,
+ * uset_contains(set, c) will return TRUE.
+ * @param set the object to which to add the character
+ * @param c the character to add
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_add(USet* set, UChar32 c);
+
+/**
+ * Adds all of the elements in the specified set to this set if
+ * they're not already present. This operation effectively
+ * modifies this set so that its value is the <i>union</i> of the two
+ * sets. The behavior of this operation is unspecified if the specified
+ * collection is modified while the operation is in progress.
+ *
+ * @param set the object to which to add the set
+ * @param additionalSet the source set whose elements are to be added to this set.
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+uset_addAll(USet* set, const USet *additionalSet);
+
+/**
+ * Adds the given range of characters to the given USet. After this call,
+ * uset_contains(set, start, end) will return TRUE.
+ * @param set the object to which to add the character
+ * @param start the first character of the range to add, inclusive
+ * @param end the last character of the range to add, inclusive
+ * @stable ICU 2.2
+ */
+U_STABLE void U_EXPORT2
+uset_addRange(USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Adds the given string to the given USet. After this call,
+ * uset_containsString(set, str, strLen) will return TRUE.
+ * @param set the object to which to add the character
+ * @param str the string to add
+ * @param strLen the length of the string or -1 if null terminated.
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_addString(USet* set, const UChar* str, int32_t strLen);
+
+/**
+ * Removes the given character from the given USet. After this call,
+ * uset_contains(set, c) will return FALSE.
+ * @param set the object from which to remove the character
+ * @param c the character to remove
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_remove(USet* set, UChar32 c);
+
+/**
+ * Removes the given range of characters from the given USet. After this call,
+ * uset_contains(set, start, end) will return FALSE.
+ * @param set the object to which to add the character
+ * @param start the first character of the range to remove, inclusive
+ * @param end the last character of the range to remove, inclusive
+ * @stable ICU 2.2
+ */
+U_STABLE void U_EXPORT2
+uset_removeRange(USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Removes the given string to the given USet. After this call,
+ * uset_containsString(set, str, strLen) will return FALSE.
+ * @param set the object to which to add the character
+ * @param str the string to remove
+ * @param strLen the length of the string or -1 if null terminated.
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_removeString(USet* set, const UChar* str, int32_t strLen);
+
+/**
+ * Removes from this set all of its elements that are contained in the
+ * specified set. This operation effectively modifies this
+ * set so that its value is the <i>asymmetric set difference</i> of
+ * the two sets.
+ * @param set the object from which the elements are to be removed
+ * @param removeSet the object that defines which elements will be
+ * removed from this set
+ * @draft ICU 3.2
+ */
+U_DRAFT void U_EXPORT2
+uset_removeAll(USet* set, const USet* removeSet);
+
+/**
+ * Retain only the elements in this set that are contained in the
+ * specified range. If <code>start > end</code> then an empty range is
+ * retained, leaving the set empty. This is equivalent to
+ * a boolean logic AND, or a set INTERSECTION.
+ *
+ * @param set the object for which to retain only the specified range
+ * @param start first character, inclusive, of range to be retained
+ * to this set.
+ * @param end last character, inclusive, of range to be retained
+ * to this set.
+ * @draft ICU 3.2
+ */
+U_DRAFT void U_EXPORT2
+uset_retain(USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Retains only the elements in this set that are contained in the
+ * specified set. In other words, removes from this set all of
+ * its elements that are not contained in the specified set. This
+ * operation effectively modifies this set so that its value is
+ * the <i>intersection</i> of the two sets.
+ *
+ * @param set the object on which to perform the retain
+ * @param retain set that defines which elements this set will retain
+ * @draft ICU 3.2
+ */
+U_DRAFT void U_EXPORT2
+uset_retainAll(USet* set, const USet* retain);
+
+/**
+ * Reallocate this objects internal structures to take up the least
+ * possible space, without changing this object's value.
+ *
+ * @param set the object on which to perfrom the compact
+ * @draft ICU 3.2
+ */
+U_DRAFT void U_EXPORT2
+uset_compact(USet* set);
+
+/**
+ * Inverts this set. This operation modifies this set so that
+ * its value is its complement. This operation does not affect
+ * the multicharacter strings, if any.
+ * @param set the set
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_complement(USet* set);
+
+/**
+ * Complements in this set all elements contained in the specified
+ * set. Any character in the other set will be removed if it is
+ * in this set, or will be added if it is not in this set.
+ *
+ * @param set the set with which to complement
+ * @param complement set that defines which elements will be xor'ed
+ * from this set.
+ * @draft ICU 3.2
+ */
+U_DRAFT void U_EXPORT2
+uset_complementAll(USet* set, const USet* complement);
+
+/**
+ * Removes all of the elements from this set. This set will be
+ * empty after this call returns.
+ * @param set the set
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_clear(USet* set);
+
+/**
+ * Returns TRUE if the given USet contains no characters and no
+ * strings.
+ * @param set the set
+ * @return true if set is empty
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_isEmpty(const USet* set);
+
+/**
+ * Returns TRUE if the given USet contains the given character.
+ * @param set the set
+ * @param c The codepoint to check for within the set
+ * @return true if set contains c
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_contains(const USet* set, UChar32 c);
+
+/**
+ * Returns TRUE if the given USet contains all characters c
+ * where start <= c && c <= end.
+ * @param set the set
+ * @param start the first character of the range to test, inclusive
+ * @param end the last character of the range to test, inclusive
+ * @return TRUE if set contains the range
+ * @stable ICU 2.2
+ */
+U_STABLE UBool U_EXPORT2
+uset_containsRange(const USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Returns TRUE if the given USet contains the given string.
+ * @param set the set
+ * @param str the string
+ * @param strLen the length of the string or -1 if null terminated.
+ * @return true if set contains str
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_containsString(const USet* set, const UChar* str, int32_t strLen);
+
+/**
+ * Returns the index of the given character within this set, where
+ * the set is ordered by ascending code point. If the character
+ * is not in this set, return -1. The inverse of this method is
+ * <code>charAt()</code>.
+ * @param set the set
+ * @param c the character to obtain the index for
+ * @return an index from 0..size()-1, or -1
+ * @draft ICU 3.2
+ */
+U_DRAFT int32_t U_EXPORT2
+uset_indexOf(const USet* set, UChar32 c);
+
+/**
+ * Returns the character at the given index within this set, where
+ * the set is ordered by ascending code point. If the index is
+ * out of range, return (UChar32)-1. The inverse of this method is
+ * <code>indexOf()</code>.
+ * @param set the set
+ * @param index an index from 0..size()-1 to obtain the char for
+ * @return the character at the given index, or (UChar32)-1.
+ * @draft ICU 3.2
+ */
+U_DRAFT UChar32 U_EXPORT2
+uset_charAt(const USet* set, int32_t index);
+
+/**
+ * Returns the number of characters and strings contained in the given
+ * USet.
+ * @param set the set
+ * @return a non-negative integer counting the characters and strings
+ * contained in set
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_size(const USet* set);
+
+/**
+ * Returns the number of items in this set. An item is either a range
+ * of characters or a single multicharacter string.
+ * @param set the set
+ * @return a non-negative integer counting the character ranges
+ * and/or strings contained in set
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_getItemCount(const USet* set);
+
+/**
+ * Returns an item of this set. An item is either a range of
+ * characters or a single multicharacter string.
+ * @param set the set
+ * @param itemIndex a non-negative integer in the range 0..
+ * uset_getItemCount(set)-1
+ * @param start pointer to variable to receive first character
+ * in range, inclusive
+ * @param end pointer to variable to receive last character in range,
+ * inclusive
+ * @param str buffer to receive the string, may be NULL
+ * @param strCapacity capacity of str, or 0 if str is NULL
+ * @param ec error code
+ * @return the length of the string (>= 2), or 0 if the item is a
+ * range, in which case it is the range *start..*end, or -1 if
+ * itemIndex is out of range
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_getItem(const USet* set, int32_t itemIndex,
+ UChar32* start, UChar32* end,
+ UChar* str, int32_t strCapacity,
+ UErrorCode* ec);
+
+/**
+ * Returns true if set1 contains all the characters and strings
+ * of set2. It answers the question, 'Is set1 a subset of set2?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @draft ICU 3.2
+ */
+U_DRAFT UBool U_EXPORT2
+uset_containsAll(const USet* set1, const USet* set2);
+
+/**
+ * Returns true if set1 contains none of the characters and strings
+ * of set2. It answers the question, 'Is set1 a disjoint set of set2?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @draft ICU 3.2
+ */
+U_DRAFT UBool U_EXPORT2
+uset_containsNone(const USet* set1, const USet* set2);
+
+/**
+ * Returns true if set1 contains some of the characters and strings
+ * of set2. It answers the question, 'Does set1 and set2 have an intersection?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @draft ICU 3.2
+ */
+U_DRAFT UBool U_EXPORT2
+uset_containsSome(const USet* set1, const USet* set2);
+
+/**
+ * Returns true if set1 contains all of the characters and strings
+ * of set2, and vis versa. It answers the question, 'Is set1 equal to set2?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @draft ICU 3.2
+ */
+U_DRAFT UBool U_EXPORT2
+uset_equals(const USet* set1, const USet* set2);
+
+/*********************************************************************
+ * Serialized set API
+ *********************************************************************/
+
+/**
+ * Serializes this set into an array of 16-bit integers. Serialization
+ * (currently) only records the characters in the set; multicharacter
+ * strings are ignored.
+ *
+ * The array
+ * has following format (each line is one 16-bit integer):
+ *
+ * length = (n+2*m) | (m!=0?0x8000:0)
+ * bmpLength = n; present if m!=0
+ * bmp[0]
+ * bmp[1]
+ * ...
+ * bmp[n-1]
+ * supp-high[0]
+ * supp-low[0]
+ * supp-high[1]
+ * supp-low[1]
+ * ...
+ * supp-high[m-1]
+ * supp-low[m-1]
+ *
+ * The array starts with a header. After the header are n bmp
+ * code points, then m supplementary code points. Either n or m
+ * or both may be zero. n+2*m is always <= 0x7FFF.
+ *
+ * If there are no supplementary characters (if m==0) then the
+ * header is one 16-bit integer, 'length', with value n.
+ *
+ * If there are supplementary characters (if m!=0) then the header
+ * is two 16-bit integers. The first, 'length', has value
+ * (n+2*m)|0x8000. The second, 'bmpLength', has value n.
+ *
+ * After the header the code points are stored in ascending order.
+ * Supplementary code points are stored as most significant 16
+ * bits followed by least significant 16 bits.
+ *
+ * @param set the set
+ * @param dest pointer to buffer of destCapacity 16-bit integers.
+ * May be NULL only if destCapacity is zero.
+ * @param destCapacity size of dest, or zero. Must not be negative.
+ * @param pErrorCode pointer to the error code. Will be set to
+ * U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF. Will be set to
+ * U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity.
+ * @return the total length of the serialized format, including
+ * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
+ * than U_BUFFER_OVERFLOW_ERROR.
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
+
+/**
+ * Given a serialized array, fill in the given serialized set object.
+ * @param fillSet pointer to result
+ * @param src pointer to start of array
+ * @param srcLength length of array
+ * @return true if the given array is valid, otherwise false
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
+
+/**
+ * Set the USerializedSet to contain the given character (and nothing
+ * else).
+ * @param fillSet pointer to result
+ * @param c The codepoint to set
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c);
+
+/**
+ * Returns TRUE if the given USerializedSet contains the given
+ * character.
+ * @param set the serialized set
+ * @param c The codepoint to check for within the set
+ * @return true if set contains c
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_serializedContains(const USerializedSet* set, UChar32 c);
+
+/**
+ * Returns the number of disjoint ranges of characters contained in
+ * the given serialized set. Ignores any strings contained in the
+ * set.
+ * @param set the serialized set
+ * @return a non-negative integer counting the character ranges
+ * contained in set
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_getSerializedRangeCount(const USerializedSet* set);
+
+/**
+ * Returns a range of characters contained in the given serialized
+ * set.
+ * @param set the serialized set
+ * @param rangeIndex a non-negative integer in the range 0..
+ * uset_getSerializedRangeCount(set)-1
+ * @param pStart pointer to variable to receive first character
+ * in range, inclusive
+ * @param pEnd pointer to variable to receive last character in range,
+ * inclusive
+ * @return true if rangeIndex is valid, otherwise false
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
+ UChar32* pStart, UChar32* pEnd);
+
+#endif
diff --git a/Source/WebCore/icu/unicode/ushape.h b/Source/WebCore/icu/unicode/ushape.h
new file mode 100644
index 0000000..34e81e3
--- /dev/null
+++ b/Source/WebCore/icu/unicode/ushape.h
@@ -0,0 +1,234 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 2000-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: ushape.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2000jun29
+* created by: Markus W. Scherer
+*/
+
+#ifndef __USHAPE_H__
+#define __USHAPE_H__
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C API: Arabic shaping
+ *
+ */
+
+/**
+ * Shape Arabic text on a character basis.
+ *
+ * <p>This function performs basic operations for "shaping" Arabic text. It is most
+ * useful for use with legacy data formats and legacy display technology
+ * (simple terminals). All operations are performed on Unicode characters.</p>
+ *
+ * <p>Text-based shaping means that some character code points in the text are
+ * replaced by others depending on the context. It transforms one kind of text
+ * into another. In comparison, modern displays for Arabic text select
+ * appropriate, context-dependent font glyphs for each text element, which means
+ * that they transform text into a glyph vector.</p>
+ *
+ * <p>Text transformations are necessary when modern display technology is not
+ * available or when text needs to be transformed to or from legacy formats that
+ * use "shaped" characters. Since the Arabic script is cursive, connecting
+ * adjacent letters to each other, computers select images for each letter based
+ * on the surrounding letters. This usually results in four images per Arabic
+ * letter: initial, middle, final, and isolated forms. In Unicode, on the other
+ * hand, letters are normally stored abstract, and a display system is expected
+ * to select the necessary glyphs. (This makes searching and other text
+ * processing easier because the same letter has only one code.) It is possible
+ * to mimic this with text transformations because there are characters in
+ * Unicode that are rendered as letters with a specific shape
+ * (or cursive connectivity). They were included for interoperability with
+ * legacy systems and codepages, and for unsophisticated display systems.</p>
+ *
+ * <p>A second kind of text transformations is supported for Arabic digits:
+ * For compatibility with legacy codepages that only include European digits,
+ * it is possible to replace one set of digits by another, changing the
+ * character code points. These operations can be performed for either
+ * Arabic-Indic Digits (U+0660...U+0669) or Eastern (Extended) Arabic-Indic
+ * digits (U+06f0...U+06f9).</p>
+ *
+ * <p>Some replacements may result in more or fewer characters (code points).
+ * By default, this means that the destination buffer may receive text with a
+ * length different from the source length. Some legacy systems rely on the
+ * length of the text to be constant. They expect extra spaces to be added
+ * or consumed either next to the affected character or at the end of the
+ * text.</p>
+ *
+ * <p>For details about the available operations, see the description of the
+ * <code>U_SHAPE_...</code> options.</p>
+ *
+ * @param source The input text.
+ *
+ * @param sourceLength The number of UChars in <code>source</code>.
+ *
+ * @param dest The destination buffer that will receive the results of the
+ * requested operations. It may be <code>NULL</code> only if
+ * <code>destSize</code> is 0. The source and destination must not
+ * overlap.
+ *
+ * @param destSize The size (capacity) of the destination buffer in UChars.
+ * If <code>destSize</code> is 0, then no output is produced,
+ * but the necessary buffer size is returned ("preflighting").
+ *
+ * @param options This is a 32-bit set of flags that specify the operations
+ * that are performed on the input text. If no error occurs,
+ * then the result will always be written to the destination
+ * buffer.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @return The number of UChars written to the destination buffer.
+ * If an error occured, then no output was written, or it may be
+ * incomplete. If <code>U_BUFFER_OVERFLOW_ERROR</code> is set, then
+ * the return value indicates the necessary destination buffer size.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_shapeArabic(const UChar *source, int32_t sourceLength,
+ UChar *dest, int32_t destSize,
+ uint32_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Memory option: allow the result to have a different length than the source.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_LENGTH_GROW_SHRINK 0
+
+/**
+ * Memory option: the result must have the same length as the source.
+ * If more room is necessary, then try to consume spaces next to modified characters.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_LENGTH_FIXED_SPACES_NEAR 1
+
+/**
+ * Memory option: the result must have the same length as the source.
+ * If more room is necessary, then try to consume spaces at the end of the text.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_LENGTH_FIXED_SPACES_AT_END 2
+
+/**
+ * Memory option: the result must have the same length as the source.
+ * If more room is necessary, then try to consume spaces at the beginning of the text.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_LENGTH_FIXED_SPACES_AT_BEGINNING 3
+
+/** Bit mask for memory options. @stable ICU 2.0 */
+#define U_SHAPE_LENGTH_MASK 3
+
+
+/** Direction indicator: the source is in logical (keyboard) order. @stable ICU 2.0 */
+#define U_SHAPE_TEXT_DIRECTION_LOGICAL 0
+
+/**
+ * Direction indicator:
+ * the source is in visual LTR order,
+ * the leftmost displayed character stored first.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_TEXT_DIRECTION_VISUAL_LTR 4
+
+/** Bit mask for direction indicators. @stable ICU 2.0 */
+#define U_SHAPE_TEXT_DIRECTION_MASK 4
+
+
+/** Letter shaping option: do not perform letter shaping. @stable ICU 2.0 */
+#define U_SHAPE_LETTERS_NOOP 0
+
+/** Letter shaping option: replace abstract letter characters by "shaped" ones. @stable ICU 2.0 */
+#define U_SHAPE_LETTERS_SHAPE 8
+
+/** Letter shaping option: replace "shaped" letter characters by abstract ones. @stable ICU 2.0 */
+#define U_SHAPE_LETTERS_UNSHAPE 0x10
+
+/**
+ * Letter shaping option: replace abstract letter characters by "shaped" ones.
+ * The only difference with U_SHAPE_LETTERS_SHAPE is that Tashkeel letters
+ * are always "shaped" into the isolated form instead of the medial form
+ * (selecting code points from the Arabic Presentation Forms-B block).
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED 0x18
+
+/** Bit mask for letter shaping options. @stable ICU 2.0 */
+#define U_SHAPE_LETTERS_MASK 0x18
+
+
+/** Digit shaping option: do not perform digit shaping. @stable ICU 2.0 */
+#define U_SHAPE_DIGITS_NOOP 0
+
+/**
+ * Digit shaping option:
+ * Replace European digits (U+0030...) by Arabic-Indic digits.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_DIGITS_EN2AN 0x20
+
+/**
+ * Digit shaping option:
+ * Replace Arabic-Indic digits by European digits (U+0030...).
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_DIGITS_AN2EN 0x40
+
+/**
+ * Digit shaping option:
+ * Replace European digits (U+0030...) by Arabic-Indic digits if the most recent
+ * strongly directional character is an Arabic letter
+ * (<code>u_charDirection()</code> result <code>U_RIGHT_TO_LEFT_ARABIC</code> [AL]).<br>
+ * The direction of "preceding" depends on the direction indicator option.
+ * For the first characters, the preceding strongly directional character
+ * (initial state) is assumed to be not an Arabic letter
+ * (it is <code>U_LEFT_TO_RIGHT</code> [L] or <code>U_RIGHT_TO_LEFT</code> [R]).
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_DIGITS_ALEN2AN_INIT_LR 0x60
+
+/**
+ * Digit shaping option:
+ * Replace European digits (U+0030...) by Arabic-Indic digits if the most recent
+ * strongly directional character is an Arabic letter
+ * (<code>u_charDirection()</code> result <code>U_RIGHT_TO_LEFT_ARABIC</code> [AL]).<br>
+ * The direction of "preceding" depends on the direction indicator option.
+ * For the first characters, the preceding strongly directional character
+ * (initial state) is assumed to be an Arabic letter.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_DIGITS_ALEN2AN_INIT_AL 0x80
+
+/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */
+#define U_SHAPE_DIGITS_RESERVED 0xa0
+
+/** Bit mask for digit shaping options. @stable ICU 2.0 */
+#define U_SHAPE_DIGITS_MASK 0xe0
+
+
+/** Digit type option: Use Arabic-Indic digits (U+0660...U+0669). @stable ICU 2.0 */
+#define U_SHAPE_DIGIT_TYPE_AN 0
+
+/** Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9). @stable ICU 2.0 */
+#define U_SHAPE_DIGIT_TYPE_AN_EXTENDED 0x100
+
+/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */
+#define U_SHAPE_DIGIT_TYPE_RESERVED 0x200
+
+/** Bit mask for digit type options. @stable ICU 2.0 */
+#define U_SHAPE_DIGIT_TYPE_MASK 0x3f00
+
+#endif
diff --git a/Source/WebCore/icu/unicode/ustring.h b/Source/WebCore/icu/unicode/ustring.h
new file mode 100644
index 0000000..6ebb6fb
--- /dev/null
+++ b/Source/WebCore/icu/unicode/ustring.h
@@ -0,0 +1,1320 @@
+/*
+**********************************************************************
+* Copyright (C) 1998-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File ustring.h
+*
+* Modification History:
+*
+* Date Name Description
+* 12/07/98 bertrand Creation.
+******************************************************************************
+*/
+
+#ifndef USTRING_H
+#define USTRING_H
+
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "unicode/uiter.h"
+
+/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1*/
+#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
+# define UBRK_TYPEDEF_UBREAK_ITERATOR
+ typedef void UBreakIterator;
+#endif
+
+/**
+ * \file
+ * \brief C API: Unicode string handling functions
+ *
+ * These C API functions provide general Unicode string handling.
+ *
+ * Some functions are equivalent in name, signature, and behavior to the ANSI C <string.h>
+ * functions. (For example, they do not check for bad arguments like NULL string pointers.)
+ * In some cases, only the thread-safe variant of such a function is implemented here
+ * (see u_strtok_r()).
+ *
+ * Other functions provide more Unicode-specific functionality like locale-specific
+ * upper/lower-casing and string comparison in code point order.
+ *
+ * ICU uses 16-bit Unicode (UTF-16) in the form of arrays of UChar code units.
+ * UTF-16 encodes each Unicode code point with either one or two UChar code units.
+ * (This is the default form of Unicode, and a forward-compatible extension of the original,
+ * fixed-width form that was known as UCS-2. UTF-16 superseded UCS-2 with Unicode 2.0
+ * in 1996.)
+ *
+ * Some APIs accept a 32-bit UChar32 value for a single code point.
+ *
+ * ICU also handles 16-bit Unicode text with unpaired surrogates.
+ * Such text is not well-formed UTF-16.
+ * Code-point-related functions treat unpaired surrogates as surrogate code points,
+ * i.e., as separate units.
+ *
+ * Although UTF-16 is a variable-width encoding form (like some legacy multi-byte encodings),
+ * it is much more efficient even for random access because the code unit values
+ * for single-unit characters vs. lead units vs. trail units are completely disjoint.
+ * This means that it is easy to determine character (code point) boundaries from
+ * random offsets in the string.
+ *
+ * Unicode (UTF-16) string processing is optimized for the single-unit case.
+ * Although it is important to support supplementary characters
+ * (which use pairs of lead/trail code units called "surrogates"),
+ * their occurrence is rare. Almost all characters in modern use require only
+ * a single UChar code unit (i.e., their code point values are <=0xffff).
+ *
+ * For more details see the User Guide Strings chapter (http://oss.software.ibm.com/icu/userguide/strings.html).
+ * For a discussion of the handling of unpaired surrogates see also
+ * Jitterbug 2145 and its icu mailing list proposal on 2002-sep-18.
+ */
+
+/**
+ * Determine the length of an array of UChar.
+ *
+ * @param s The array of UChars, NULL (U+0000) terminated.
+ * @return The number of UChars in <code>chars</code>, minus the terminator.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strlen(const UChar *s);
+
+/**
+ * Count Unicode code points in the length UChar code units of the string.
+ * A code point may occupy either one or two UChar code units.
+ * Counting code points involves reading all code units.
+ *
+ * This functions is basically the inverse of the U16_FWD_N() macro (see utf.h).
+ *
+ * @param s The input string.
+ * @param length The number of UChar code units to be checked, or -1 to count all
+ * code points before the first NUL (U+0000).
+ * @return The number of code points in the specified code units.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_countChar32(const UChar *s, int32_t length);
+
+/**
+ * Check if the string contains more Unicode code points than a certain number.
+ * This is more efficient than counting all code points in the entire string
+ * and comparing that number with a threshold.
+ * This function may not need to scan the string at all if the length is known
+ * (not -1 for NUL-termination) and falls within a certain range, and
+ * never needs to count more than 'number+1' code points.
+ * Logically equivalent to (u_countChar32(s, length)>number).
+ * A Unicode code point may occupy either one or two UChar code units.
+ *
+ * @param s The input string.
+ * @param length The length of the string, or -1 if it is NUL-terminated.
+ * @param number The number of code points in the string is compared against
+ * the 'number' parameter.
+ * @return Boolean value for whether the string contains more Unicode code points
+ * than 'number'. Same as (u_countChar32(s, length)>number).
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number);
+
+/**
+ * Concatenate two ustrings. Appends a copy of <code>src</code>,
+ * including the null terminator, to <code>dst</code>. The initial copied
+ * character from <code>src</code> overwrites the null terminator in <code>dst</code>.
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strcat(UChar *dst,
+ const UChar *src);
+
+/**
+ * Concatenate two ustrings.
+ * Appends at most <code>n</code> characters from <code>src</code> to <code>dst</code>.
+ * Adds a terminating NUL.
+ * If src is too long, then only <code>n-1</code> characters will be copied
+ * before the terminating NUL.
+ * If <code>n&lt;=0</code> then dst is not modified.
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @param n The maximum number of characters to compare.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strncat(UChar *dst,
+ const UChar *src,
+ int32_t n);
+
+/**
+ * Find the first occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param substring The substring to find (NUL-terminated).
+ * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
+ * or <code>s</code> itself if the <code>substring</code> is empty,
+ * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strrstr
+ * @see u_strFindFirst
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strstr(const UChar *s, const UChar *substring);
+
+/**
+ * Find the first occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search.
+ * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
+ * @param substring The substring to find (NUL-terminated).
+ * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
+ * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
+ * or <code>s</code> itself if the <code>substring</code> is empty,
+ * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strstr
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
+
+/**
+ * Find the first occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The BMP code point to find.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr32
+ * @see u_memchr
+ * @see u_strstr
+ * @see u_strFindFirst
+ */
+U_STABLE UChar * U_EXPORT2
+u_strchr(const UChar *s, UChar c);
+
+/**
+ * Find the first occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The code point to find.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr
+ * @see u_memchr32
+ * @see u_strstr
+ * @see u_strFindFirst
+ */
+U_STABLE UChar * U_EXPORT2
+u_strchr32(const UChar *s, UChar32 c);
+
+/**
+ * Find the last occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param substring The substring to find (NUL-terminated).
+ * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
+ * or <code>s</code> itself if the <code>substring</code> is empty,
+ * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strstr
+ * @see u_strFindFirst
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strrstr(const UChar *s, const UChar *substring);
+
+/**
+ * Find the last occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search.
+ * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
+ * @param substring The substring to find (NUL-terminated).
+ * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
+ * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
+ * or <code>s</code> itself if the <code>substring</code> is empty,
+ * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strstr
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
+
+/**
+ * Find the last occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The BMP code point to find.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr32
+ * @see u_memrchr
+ * @see u_strrstr
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strrchr(const UChar *s, UChar c);
+
+/**
+ * Find the last occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The code point to find.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr
+ * @see u_memchr32
+ * @see u_strrstr
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strrchr32(const UChar *s, UChar32 c);
+
+/**
+ * Locates the first occurrence in the string <code>string</code> of any of the characters
+ * in the string <code>matchSet</code>.
+ * Works just like C's strpbrk but with Unicode.
+ *
+ * @param string The string in which to search, NUL-terminated.
+ * @param matchSet A NUL-terminated string defining a set of code points
+ * for which to search in the text string.
+ * @return A pointer to the character in <code>string</code> that matches one of the
+ * characters in <code>matchSet</code>, or NULL if no such character is found.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar * U_EXPORT2
+u_strpbrk(const UChar *string, const UChar *matchSet);
+
+/**
+ * Returns the number of consecutive characters in <code>string</code>,
+ * beginning with the first, that do not occur somewhere in <code>matchSet</code>.
+ * Works just like C's strcspn but with Unicode.
+ *
+ * @param string The string in which to search, NUL-terminated.
+ * @param matchSet A NUL-terminated string defining a set of code points
+ * for which to search in the text string.
+ * @return The number of initial characters in <code>string</code> that do not
+ * occur in <code>matchSet</code>.
+ * @see u_strspn
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strcspn(const UChar *string, const UChar *matchSet);
+
+/**
+ * Returns the number of consecutive characters in <code>string</code>,
+ * beginning with the first, that occur somewhere in <code>matchSet</code>.
+ * Works just like C's strspn but with Unicode.
+ *
+ * @param string The string in which to search, NUL-terminated.
+ * @param matchSet A NUL-terminated string defining a set of code points
+ * for which to search in the text string.
+ * @return The number of initial characters in <code>string</code> that do
+ * occur in <code>matchSet</code>.
+ * @see u_strcspn
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strspn(const UChar *string, const UChar *matchSet);
+
+/**
+ * The string tokenizer API allows an application to break a string into
+ * tokens. Unlike strtok(), the saveState (the current pointer within the
+ * original string) is maintained in saveState. In the first call, the
+ * argument src is a pointer to the string. In subsequent calls to
+ * return successive tokens of that string, src must be specified as
+ * NULL. The value saveState is set by this function to maintain the
+ * function's position within the string, and on each subsequent call
+ * you must give this argument the same variable. This function does
+ * handle surrogate pairs. This function is similar to the strtok_r()
+ * the POSIX Threads Extension (1003.1c-1995) version.
+ *
+ * @param src String containing token(s). This string will be modified.
+ * After the first call to u_strtok_r(), this argument must
+ * be NULL to get to the next token.
+ * @param delim Set of delimiter characters (Unicode code points).
+ * @param saveState The current pointer within the original string,
+ * which is set by this function. The saveState
+ * parameter should the address of a local variable of type
+ * UChar *. (i.e. defined "Uhar *myLocalSaveState" and use
+ * &myLocalSaveState for this parameter).
+ * @return A pointer to the next token found in src, or NULL
+ * when there are no more tokens.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar * U_EXPORT2
+u_strtok_r(UChar *src,
+ const UChar *delim,
+ UChar **saveState);
+
+/**
+ * Compare two Unicode strings for bitwise equality (code unit order).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
+ * value if <code>s1</code> is bitwise less than <code>s2,</code>; a positive
+ * value if <code>s1</code> is bitwise greater than <code>s2</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strcmp(const UChar *s1,
+ const UChar *s2);
+
+/**
+ * Compare two Unicode strings in code point order.
+ * See u_strCompare for details.
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * the first string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strcmpCodePointOrder(const UChar *s1, const UChar *s2);
+
+/**
+ * Compare two Unicode strings (binary order).
+ *
+ * The comparison can be done in code unit order or in code point order.
+ * They differ only in UTF-16 when
+ * comparing supplementary code points (U+10000..U+10ffff)
+ * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
+ * In code unit order, high BMP code points sort after supplementary code points
+ * because they are stored as pairs of surrogates which are at U+d800..U+dfff.
+ *
+ * This functions works with strings of different explicitly specified lengths
+ * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
+ * NUL-terminated strings are possible with length arguments of -1.
+ *
+ * @param s1 First source string.
+ * @param length1 Length of first source string, or -1 if NUL-terminated.
+ *
+ * @param s2 Second source string.
+ * @param length2 Length of second source string, or -1 if NUL-terminated.
+ *
+ * @param codePointOrder Choose between code unit order (FALSE)
+ * and code point order (TRUE).
+ *
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_strCompare(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ UBool codePointOrder);
+
+/**
+ * Compare two Unicode strings (binary order)
+ * as presented by UCharIterator objects.
+ * Works otherwise just like u_strCompare().
+ *
+ * Both iterators are reset to their start positions.
+ * When the function returns, it is undefined where the iterators
+ * have stopped.
+ *
+ * @param iter1 First source string iterator.
+ * @param iter2 Second source string iterator.
+ * @param codePointOrder Choose between code unit order (FALSE)
+ * and code point order (TRUE).
+ *
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @see u_strCompare
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder);
+
+#ifndef U_COMPARE_CODE_POINT_ORDER
+/* see also unistr.h and unorm.h */
+/**
+ * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
+ * Compare strings in code point order instead of code unit order.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_CODE_POINT_ORDER 0x8000
+#endif
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to
+ * u_strCompare(u_strFoldCase(s1, options),
+ * u_strFoldCase(s2, options),
+ * (options&U_COMPARE_CODE_POINT_ORDER)!=0).
+ *
+ * The comparison can be done in UTF-16 code unit order or in code point order.
+ * They differ only when comparing supplementary code points (U+10000..U+10ffff)
+ * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
+ * In code unit order, high BMP code points sort after supplementary code points
+ * because they are stored as pairs of surrogates which are at U+d800..U+dfff.
+ *
+ * This functions works with strings of different explicitly specified lengths
+ * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
+ * NUL-terminated strings are possible with length arguments of -1.
+ *
+ * @param s1 First source string.
+ * @param length1 Length of first source string, or -1 if NUL-terminated.
+ *
+ * @param s2 Second source string.
+ * @param length2 Length of second source string, or -1 if NUL-terminated.
+ *
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_strCaseCompare(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ uint32_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Compare two ustrings for bitwise equality.
+ * Compares at most <code>n</code> characters.
+ *
+ * @param ucs1 A string to compare.
+ * @param ucs2 A string to compare.
+ * @param n The maximum number of characters to compare.
+ * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
+ * value if <code>s1</code> is bitwise less than <code>s2</code>; a positive
+ * value if <code>s1</code> is bitwise greater than <code>s2</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strncmp(const UChar *ucs1,
+ const UChar *ucs2,
+ int32_t n);
+
+/**
+ * Compare two Unicode strings in code point order.
+ * This is different in UTF-16 from u_strncmp() if supplementary characters are present.
+ * For details, see u_strCompare().
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param n The maximum number of characters to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * the first string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n);
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options);
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options),
+ * u_strFoldCase(s2, at most n, options)).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param n The maximum number of characters each string to case-fold and then compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options);
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to u_strcmp(u_strFoldCase(s1, n, options),
+ * u_strFoldCase(s2, n, options)).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param length The number of characters in each string to case-fold and then compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options);
+
+/**
+ * Copy a ustring. Adds a null terminator.
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strcpy(UChar *dst,
+ const UChar *src);
+
+/**
+ * Copy a ustring.
+ * Copies at most <code>n</code> characters. The result will be null terminated
+ * if the length of <code>src</code> is less than <code>n</code>.
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @param n The maximum number of characters to copy.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strncpy(UChar *dst,
+ const UChar *src,
+ int32_t n);
+
+#if !UCONFIG_NO_CONVERSION
+
+/**
+ * Copy a byte string encoded in the default codepage to a ustring.
+ * Adds a null terminator.
+ * Performs a host byte to UChar conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2 u_uastrcpy(UChar *dst,
+ const char *src );
+
+/**
+ * Copy a byte string encoded in the default codepage to a ustring.
+ * Copies at most <code>n</code> characters. The result will be null terminated
+ * if the length of <code>src</code> is less than <code>n</code>.
+ * Performs a host byte to UChar conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @param n The maximum number of characters to copy.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2 u_uastrncpy(UChar *dst,
+ const char *src,
+ int32_t n);
+
+/**
+ * Copy ustring to a byte string encoded in the default codepage.
+ * Adds a null terminator.
+ * Performs a UChar to host byte conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE char* U_EXPORT2 u_austrcpy(char *dst,
+ const UChar *src );
+
+/**
+ * Copy ustring to a byte string encoded in the default codepage.
+ * Copies at most <code>n</code> characters. The result will be null terminated
+ * if the length of <code>src</code> is less than <code>n</code>.
+ * Performs a UChar to host byte conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @param n The maximum number of characters to copy.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE char* U_EXPORT2 u_austrncpy(char *dst,
+ const UChar *src,
+ int32_t n );
+
+#endif
+
+/**
+ * Synonym for memcpy(), but with UChars only.
+ * @param dest The destination string
+ * @param src The source string
+ * @param count The number of characters to copy
+ * @return A pointer to <code>dest</code>
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_memcpy(UChar *dest, const UChar *src, int32_t count);
+
+/**
+ * Synonym for memmove(), but with UChars only.
+ * @param dest The destination string
+ * @param src The source string
+ * @param count The number of characters to move
+ * @return A pointer to <code>dest</code>
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_memmove(UChar *dest, const UChar *src, int32_t count);
+
+/**
+ * Initialize <code>count</code> characters of <code>dest</code> to <code>c</code>.
+ *
+ * @param dest The destination string.
+ * @param c The character to initialize the string.
+ * @param count The maximum number of characters to set.
+ * @return A pointer to <code>dest</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_memset(UChar *dest, UChar c, int32_t count);
+
+/**
+ * Compare the first <code>count</code> UChars of each buffer.
+ *
+ * @param buf1 The first string to compare.
+ * @param buf2 The second string to compare.
+ * @param count The maximum number of UChars to compare.
+ * @return When buf1 < buf2, a negative number is returned.
+ * When buf1 == buf2, 0 is returned.
+ * When buf1 > buf2, a positive number is returned.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count);
+
+/**
+ * Compare two Unicode strings in code point order.
+ * This is different in UTF-16 from u_memcmp() if supplementary characters are present.
+ * For details, see u_strCompare().
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param count The maximum number of characters to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * the first string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count);
+
+/**
+ * Find the first occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The BMP code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr
+ * @see u_memchr32
+ * @see u_strFindFirst
+ */
+U_STABLE UChar* U_EXPORT2
+u_memchr(const UChar *s, UChar c, int32_t count);
+
+/**
+ * Find the first occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr32
+ * @see u_memchr
+ * @see u_strFindFirst
+ */
+U_STABLE UChar* U_EXPORT2
+u_memchr32(const UChar *s, UChar32 c, int32_t count);
+
+/**
+ * Find the last occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The BMP code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr
+ * @see u_memrchr32
+ * @see u_strFindLast
+ */
+U_STABLE UChar* U_EXPORT2
+u_memrchr(const UChar *s, UChar c, int32_t count);
+
+/**
+ * Find the last occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr32
+ * @see u_memrchr
+ * @see u_strFindLast
+ */
+U_STABLE UChar* U_EXPORT2
+u_memrchr32(const UChar *s, UChar32 c, int32_t count);
+
+/**
+ * Unicode String literals in C.
+ * We need one macro to declare a variable for the string
+ * and to statically preinitialize it if possible,
+ * and a second macro to dynamically intialize such a string variable if necessary.
+ *
+ * The macros are defined for maximum performance.
+ * They work only for strings that contain "invariant characters", i.e.,
+ * only latin letters, digits, and some punctuation.
+ * See utypes.h for details.
+ *
+ * A pair of macros for a single string must be used with the same
+ * parameters.
+ * The string parameter must be a C string literal.
+ * The length of the string, not including the terminating
+ * <code>NUL</code>, must be specified as a constant.
+ * The U_STRING_DECL macro should be invoked exactly once for one
+ * such string variable before it is used.
+ *
+ * Usage:
+ * <pre>
+ * &#32; U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11);
+ * &#32; U_STRING_DECL(ustringVar2, "jumps 5%", 8);
+ * &#32; static UBool didInit=FALSE;
+ * &#32;
+ * &#32; int32_t function() {
+ * &#32; if(!didInit) {
+ * &#32; U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11);
+ * &#32; U_STRING_INIT(ustringVar2, "jumps 5%", 8);
+ * &#32; didInit=TRUE;
+ * &#32; }
+ * &#32; return u_strcmp(ustringVar1, ustringVar2);
+ * &#32; }
+ * </pre>
+ * @stable ICU 2.0
+ */
+#if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && U_CHARSET_FAMILY==U_ASCII_FAMILY
+# define U_STRING_DECL(var, cs, length) static const wchar_t var[(length)+1]={ L ## cs }
+ /**@stable ICU 2.0 */
+# define U_STRING_INIT(var, cs, length)
+#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
+# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]={ (const UChar *)cs }
+ /**@stable ICU 2.0 */
+# define U_STRING_INIT(var, cs, length)
+#else
+# define U_STRING_DECL(var, cs, length) static UChar var[(length)+1]
+ /**@stable ICU 2.0 */
+# define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1)
+#endif
+
+/**
+ * Unescape a string of characters and write the resulting
+ * Unicode characters to the destination buffer. The following escape
+ * sequences are recognized:
+ *
+ * \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
+ * \\Uhhhhhhhh 8 hex digits
+ * \\xhh 1-2 hex digits
+ * \\x{h...} 1-8 hex digits
+ * \\ooo 1-3 octal digits; o in [0-7]
+ * \\cX control-X; X is masked with 0x1F
+ *
+ * as well as the standard ANSI C escapes:
+ *
+ * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
+ * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
+ * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
+ *
+ * Anything else following a backslash is generically escaped. For
+ * example, "[a\\-z]" returns "[a-z]".
+ *
+ * If an escape sequence is ill-formed, this method returns an empty
+ * string. An example of an ill-formed sequence is "\\u" followed by
+ * fewer than 4 hex digits.
+ *
+ * The above characters are recognized in the compiler's codepage,
+ * that is, they are coded as 'u', '\\', etc. Characters that are
+ * not parts of escape sequences are converted using u_charsToUChars().
+ *
+ * This function is similar to UnicodeString::unescape() but not
+ * identical to it. The latter takes a source UnicodeString, so it
+ * does escape recognition but no conversion.
+ *
+ * @param src a zero-terminated string of invariant characters
+ * @param dest pointer to buffer to receive converted and unescaped
+ * text and, if there is room, a zero terminator. May be NULL for
+ * preflighting, in which case no UChars will be written, but the
+ * return value will still be valid. On error, an empty string is
+ * stored here (if possible).
+ * @param destCapacity the number of UChars that may be written at
+ * dest. Ignored if dest == NULL.
+ * @return the length of unescaped string.
+ * @see u_unescapeAt
+ * @see UnicodeString#unescape()
+ * @see UnicodeString#unescapeAt()
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_unescape(const char *src,
+ UChar *dest, int32_t destCapacity);
+
+U_CDECL_BEGIN
+/**
+ * Callback function for u_unescapeAt() that returns a character of
+ * the source text given an offset and a context pointer. The context
+ * pointer will be whatever is passed into u_unescapeAt().
+ *
+ * @param offset pointer to the offset that will be passed to u_unescapeAt().
+ * @param context an opaque pointer passed directly into u_unescapeAt()
+ * @return the character represented by the escape sequence at
+ * offset
+ * @see u_unescapeAt
+ * @stable ICU 2.0
+ */
+typedef UChar (U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset, void *context);
+U_CDECL_END
+
+/**
+ * Unescape a single sequence. The character at offset-1 is assumed
+ * (without checking) to be a backslash. This method takes a callback
+ * pointer to a function that returns the UChar at a given offset. By
+ * varying this callback, ICU functions are able to unescape char*
+ * strings, UnicodeString objects, and UFILE pointers.
+ *
+ * If offset is out of range, or if the escape sequence is ill-formed,
+ * (UChar32)0xFFFFFFFF is returned. See documentation of u_unescape()
+ * for a list of recognized sequences.
+ *
+ * @param charAt callback function that returns a UChar of the source
+ * text given an offset and a context pointer.
+ * @param offset pointer to the offset that will be passed to charAt.
+ * The offset value will be updated upon return to point after the
+ * last parsed character of the escape sequence. On error the offset
+ * is unchanged.
+ * @param length the number of characters in the source text. The
+ * last character of the source text is considered to be at offset
+ * length-1.
+ * @param context an opaque pointer passed directly into charAt.
+ * @return the character represented by the escape sequence at
+ * offset, or (UChar32)0xFFFFFFFF on error.
+ * @see u_unescape()
+ * @see UnicodeString#unescape()
+ * @see UnicodeString#unescapeAt()
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_unescapeAt(UNESCAPE_CHAR_AT charAt,
+ int32_t *offset,
+ int32_t length,
+ void *context);
+
+/**
+ * Uppercase the characters in a string.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param locale The locale to consider, or "" for the root locale or NULL for the default locale.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ * only some of the result was written to the destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strToUpper(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ const char *locale,
+ UErrorCode *pErrorCode);
+
+/**
+ * Lowercase the characters in a string.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param locale The locale to consider, or "" for the root locale or NULL for the default locale.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ * only some of the result was written to the destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strToLower(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ const char *locale,
+ UErrorCode *pErrorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * Titlecase a string.
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others.
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * It may be more efficient to always provide an iterator to avoid
+ * opening and closing one for each string.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the first() and next() methods of the
+ * provided break iterator.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param titleIter A break iterator to find the first characters of words
+ * that are to be titlecased.
+ * If none is provided (NULL), then a standard titlecase
+ * break iterator is opened.
+ * @param locale The locale to consider, or "" for the root locale or NULL for the default locale.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ * only some of the result was written to the destination buffer.
+ * @stable ICU 2.1
+ */
+U_STABLE int32_t U_EXPORT2
+u_strToTitle(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UBreakIterator *titleIter,
+ const char *locale,
+ UErrorCode *pErrorCode);
+
+#endif
+
+/**
+ * Case-fold the characters in a string.
+ * Case-folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'I' in CaseFolding.txt.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ * only some of the result was written to the destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strFoldCase(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ uint32_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Converts a sequence of UChars to wchar_t units.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of wchar_t's). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE wchar_t* U_EXPORT2
+u_strToWCS(wchar_t *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+/**
+ * Converts a sequence of wchar_t units to UChars
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strFromWCS(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const wchar_t *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+/**
+ * Converts a sequence of UChars (UTF-16) to UTF-8 bytes
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of chars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE char* U_EXPORT2
+u_strToUTF8(char *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Converts a sequence of UTF-8 bytes to UChars (UTF-16).
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strFromUTF8(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const char *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Converts a sequence of UChars (UTF-16) to UTF32 units.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChar32s). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32* U_EXPORT2
+u_strToUTF32(UChar32 *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Converts a sequence of UTF32 units to UChars (UTF-16)
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strFromUTF32(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar32 *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+#endif
diff --git a/Source/WebCore/icu/unicode/utf.h b/Source/WebCore/icu/unicode/utf.h
new file mode 100644
index 0000000..201691d
--- /dev/null
+++ b/Source/WebCore/icu/unicode/utf.h
@@ -0,0 +1,221 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utf.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999sep09
+* created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C API: Code point macros
+ *
+ * This file defines macros for checking whether a code point is
+ * a surrogate or a non-character etc.
+ *
+ * The UChar and UChar32 data types for Unicode code units and code points
+ * are defined in umachines.h because they can be machine-dependent.
+ *
+ * utf.h is included by utypes.h and itself includes utf8.h and utf16.h after some
+ * common definitions. Those files define macros for efficiently getting code points
+ * in and out of UTF-8/16 strings.
+ * utf16.h macros have "U16_" prefixes.
+ * utf8.h defines similar macros with "U8_" prefixes for UTF-8 string handling.
+ *
+ * ICU processes 16-bit Unicode strings.
+ * Most of the time, such strings are well-formed UTF-16.
+ * Single, unpaired surrogates must be handled as well, and are treated in ICU
+ * like regular code points where possible.
+ * (Pairs of surrogate code points are indistinguishable from supplementary
+ * code points encoded as pairs of supplementary code units.)
+ *
+ * In fact, almost all Unicode code points in normal text (>99%)
+ * are on the BMP (<=U+ffff) and even <=U+d7ff.
+ * ICU functions handle supplementary code points (U+10000..U+10ffff)
+ * but are optimized for the much more frequently occurring BMP code points.
+ *
+ * utf.h defines UChar to be an unsigned 16-bit integer. If this matches wchar_t, then
+ * UChar is defined to be exactly wchar_t, otherwise uint16_t.
+ *
+ * UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit
+ * Unicode code point (Unicode scalar value, 0..0x10ffff).
+ * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as
+ * the definition of UChar. For details see the documentation for UChar32 itself.
+ *
+ * utf.h also defines a small number of C macros for single Unicode code points.
+ * These are simple checks for surrogates and non-characters.
+ * For actual Unicode character properties see uchar.h.
+ *
+ * By default, string operations must be done with error checking in case
+ * a string is not well-formed UTF-16.
+ * The macros will detect if a surrogate code unit is unpaired
+ * (lead unit without trail unit or vice versa) and just return the unit itself
+ * as the code point.
+ * (It is an accidental property of Unicode and UTF-16 that all
+ * malformed sequences can be expressed unambiguously with a distinct subrange
+ * of Unicode code points.)
+ *
+ * When it is safe to assume that text is well-formed UTF-16
+ * (does not contain single, unpaired surrogates), then one can use
+ * U16_..._UNSAFE macros.
+ * These do not check for proper code unit sequences or truncated text and may
+ * yield wrong results or even cause a crash if they are used with "malformed"
+ * text.
+ * In practice, U16_..._UNSAFE macros will produce slightly less code but
+ * should not be faster because the processing is only different when a
+ * surrogate code unit is detected, which will be rare.
+ *
+ * Similarly for UTF-8, there are "safe" macros without a suffix,
+ * and U8_..._UNSAFE versions.
+ * The performance differences are much larger here because UTF-8 provides so
+ * many opportunities for malformed sequences.
+ * The unsafe UTF-8 macros are entirely implemented inside the macro definitions
+ * and are fast, while the safe UTF-8 macros call functions for all but the
+ * trivial (ASCII) cases.
+ *
+ * Unlike with UTF-16, malformed sequences cannot be expressed with distinct
+ * code point values (0..U+10ffff). They are indicated with negative values instead.
+ *
+ * For more information see the ICU User Guide Strings chapter
+ * (http://oss.software.ibm.com/icu/userguide/).
+ *
+ * <em>Usage:</em>
+ * ICU coding guidelines for if() statements should be followed when using these macros.
+ * Compound statements (curly braces {}) must be used for if-else-while...
+ * bodies and all macro statements should be terminated with semicolon.
+ *
+ * @stable ICU 2.4
+ */
+
+#ifndef __UTF_H__
+#define __UTF_H__
+
+#include "unicode/utypes.h"
+/* include the utfXX.h after the following definitions */
+
+/* single-code point definitions -------------------------------------------- */
+
+/**
+ * This value is intended for sentinel values for APIs that
+ * (take or) return single code points (UChar32).
+ * It is outside of the Unicode code point range 0..0x10ffff.
+ *
+ * For example, a "done" or "error" value in a new API
+ * could be indicated with U_SENTINEL.
+ *
+ * ICU APIs designed before ICU 2.4 usually define service-specific "done"
+ * values, mostly 0xffff.
+ * Those may need to be distinguished from
+ * actual U+ffff text contents by calling functions like
+ * CharacterIterator::hasNext() or UnicodeString::length().
+ *
+ * @return -1
+ * @see UChar32
+ * @stable ICU 2.4
+ */
+#define U_SENTINEL (-1)
+
+/**
+ * Is this code point a Unicode noncharacter?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U_IS_UNICODE_NONCHAR(c) \
+ ((c)>=0xfdd0 && \
+ ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
+ (uint32_t)(c)<=0x10ffff)
+
+/**
+ * Is c a Unicode code point value (0..U+10ffff)
+ * that can be assigned a character?
+ *
+ * Code points that are not characters include:
+ * - single surrogate code points (U+d800..U+dfff, 2048 code points)
+ * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points)
+ * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points)
+ * - the highest Unicode code point value is U+10ffff
+ *
+ * This means that all code points below U+d800 are character code points,
+ * and that boundary is tested first for performance.
+ *
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U_IS_UNICODE_CHAR(c) \
+ ((uint32_t)(c)<0xd800 || \
+ ((uint32_t)(c)>0xdfff && \
+ (uint32_t)(c)<=0x10ffff && \
+ !U_IS_UNICODE_NONCHAR(c)))
+
+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Is this code point a BMP code point (U+0000..U+ffff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @draft ICU 2.8
+ */
+#define U_IS_BMP(c) ((uint32_t)(c)<=0xffff)
+
+/**
+ * Is this code point a supplementary code point (U+10000..U+10ffff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @draft ICU 2.8
+ */
+#define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c)-0x10000)<=0xfffff)
+
+#endif /*U_HIDE_DRAFT_API*/
+
+/**
+ * Is this code point a lead surrogate (U+d800..U+dbff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
+
+/**
+ * Is this code point a trail surrogate (U+dc00..U+dfff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
+
+/**
+ * Is this code point a surrogate (U+d800..U+dfff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800)
+
+/**
+ * Assuming c is a surrogate code point (U_IS_SURROGATE(c)),
+ * is it a lead surrogate?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
+
+/* include the utfXX.h ------------------------------------------------------ */
+
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+
+/* utf_old.h contains deprecated, pre-ICU 2.4 definitions */
+#include "unicode/utf_old.h"
+
+#endif
diff --git a/Source/WebCore/icu/unicode/utf16.h b/Source/WebCore/icu/unicode/utf16.h
new file mode 100644
index 0000000..7bf3872
--- /dev/null
+++ b/Source/WebCore/icu/unicode/utf16.h
@@ -0,0 +1,605 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utf16.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999sep09
+* created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C API: 16-bit Unicode handling macros
+ *
+ * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
+ * utf16.h is included by utf.h after unicode/umachine.h
+ * and some common definitions.
+ *
+ * For more information see utf.h and the ICU User Guide Strings chapter
+ * (http://oss.software.ibm.com/icu/userguide/).
+ *
+ * <em>Usage:</em>
+ * ICU coding guidelines for if() statements should be followed when using these macros.
+ * Compound statements (curly braces {}) must be used for if-else-while...
+ * bodies and all macro statements should be terminated with semicolon.
+ */
+
+#ifndef __UTF16_H__
+#define __UTF16_H__
+
+/* utf.h must be included first. */
+#ifndef __UTF_H__
+# include "unicode/utf.h"
+#endif
+
+/* single-code point definitions -------------------------------------------- */
+
+/**
+ * Does this code unit alone encode a code point (BMP, not a surrogate)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
+
+/**
+ * Is this code unit a lead surrogate (U+d800..U+dbff)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
+
+/**
+ * Is this code unit a trail surrogate (U+dc00..U+dfff)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
+
+/**
+ * Is this code unit a surrogate (U+d800..U+dfff)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
+
+/**
+ * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
+ * is it a lead surrogate?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
+
+/**
+ * Helper constant for U16_GET_SUPPLEMENTARY.
+ * @internal
+ */
+#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
+
+/**
+ * Get a supplementary code point value (U+10000..U+10ffff)
+ * from its lead and trail surrogates.
+ * The result is undefined if the input values are not
+ * lead and trail surrogates.
+ *
+ * @param lead lead surrogate (U+d800..U+dbff)
+ * @param trail trail surrogate (U+dc00..U+dfff)
+ * @return supplementary code point (U+10000..U+10ffff)
+ * @stable ICU 2.4
+ */
+#define U16_GET_SUPPLEMENTARY(lead, trail) \
+ (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
+
+
+/**
+ * Get the lead surrogate (0xd800..0xdbff) for a
+ * supplementary code point (0x10000..0x10ffff).
+ * @param supplementary 32-bit code point (U+10000..U+10ffff)
+ * @return lead surrogate (U+d800..U+dbff) for supplementary
+ * @stable ICU 2.4
+ */
+#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
+
+/**
+ * Get the trail surrogate (0xdc00..0xdfff) for a
+ * supplementary code point (0x10000..0x10ffff).
+ * @param supplementary 32-bit code point (U+10000..U+10ffff)
+ * @return trail surrogate (U+dc00..U+dfff) for supplementary
+ * @stable ICU 2.4
+ */
+#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
+
+/**
+ * How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
+ * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
+ * @param c 32-bit code point
+ * @return 1 or 2
+ * @stable ICU 2.4
+ */
+#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
+
+/**
+ * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
+ * @return 2
+ * @stable ICU 2.4
+ */
+#define U16_MAX_LENGTH 2
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * The offset may point to either the lead or trail surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the adjacent matching surrogate as well.
+ * The result is undefined if the offset points to a single, unpaired surrogate.
+ * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U16_GET
+ * @stable ICU 2.4
+ */
+#define U16_GET_UNSAFE(s, i, c) { \
+ (c)=(s)[i]; \
+ if(U16_IS_SURROGATE(c)) { \
+ if(U16_IS_SURROGATE_LEAD(c)) { \
+ (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
+ } else { \
+ (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
+ } \
+ } \
+}
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The offset may point to either the lead or trail surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the adjacent matching surrogate as well.
+ * If the offset points to a single, unpaired surrogate, then that itself
+ * will be returned as the code point.
+ * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, start<=i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_GET_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_GET(s, start, i, length, c) { \
+ (c)=(s)[i]; \
+ if(U16_IS_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if(U16_IS_SURROGATE_LEAD(c)) { \
+ if((i)+1<(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
+ (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+ } \
+ } else { \
+ if((i)-1>=(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+ (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+ } \
+ } \
+ } \
+}
+
+/* definitions with forward iteration --------------------------------------- */
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * The offset may point to the lead surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the following trail surrogate as well.
+ * If the offset points to a trail surrogate, then that itself
+ * will be returned as the code point.
+ * The result is undefined if the offset points to a single, unpaired lead surrogate.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U16_NEXT
+ * @stable ICU 2.4
+ */
+#define U16_NEXT_UNSAFE(s, i, c) { \
+ (c)=(s)[(i)++]; \
+ if(U16_IS_LEAD(c)) { \
+ (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
+ } \
+}
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The offset may point to the lead surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the following trail surrogate as well.
+ * If the offset points to a trail surrogate or
+ * to a single, unpaired lead surrogate, then that itself
+ * will be returned as the code point.
+ *
+ * @param s const UChar * string
+ * @param i string offset, i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_NEXT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_NEXT(s, i, length, c) { \
+ (c)=(s)[(i)++]; \
+ if(U16_IS_LEAD(c)) { \
+ uint16_t __c2; \
+ if((i)<(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
+ ++(i); \
+ (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+ } \
+ } \
+}
+
+/**
+ * Append a code point to a string, overwriting 1 or 2 code units.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
+ * Otherwise, the result is undefined.
+ *
+ * @param s const UChar * string buffer
+ * @param i string offset
+ * @param c code point to append
+ * @see U16_APPEND
+ * @stable ICU 2.4
+ */
+#define U16_APPEND_UNSAFE(s, i, c) { \
+ if((uint32_t)(c)<=0xffff) { \
+ (s)[(i)++]=(uint16_t)(c); \
+ } else { \
+ (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+ (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+ } \
+}
+
+/**
+ * Append a code point to a string, overwriting 1 or 2 code units.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Safe" macro, checks for a valid code point.
+ * If a surrogate pair is written, checks for sufficient space in the string.
+ * If the code point is not valid or a trail surrogate does not fit,
+ * then isError is set to TRUE.
+ *
+ * @param s const UChar * string buffer
+ * @param i string offset, i<length
+ * @param capacity size of the string buffer
+ * @param c code point to append
+ * @param isError output UBool set to TRUE if an error occurs, otherwise not modified
+ * @see U16_APPEND_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_APPEND(s, i, capacity, c, isError) { \
+ if((uint32_t)(c)<=0xffff) { \
+ (s)[(i)++]=(uint16_t)(c); \
+ } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
+ (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+ (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+ } else /* c>0x10ffff or not enough space */ { \
+ (isError)=TRUE; \
+ } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_FWD_1
+ * @stable ICU 2.4
+ */
+#define U16_FWD_1_UNSAFE(s, i) { \
+ if(U16_IS_LEAD((s)[(i)++])) { \
+ ++(i); \
+ } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param i string offset, i<length
+ * @param length string length
+ * @see U16_FWD_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_FWD_1(s, i, length) { \
+ if(U16_IS_LEAD((s)[(i)++]) && (i)<(length) && U16_IS_TRAIL((s)[i])) { \
+ ++(i); \
+ } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U16_FWD_N
+ * @stable ICU 2.4
+ */
+#define U16_FWD_N_UNSAFE(s, i, n) { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ U16_FWD_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param i string offset, i<length
+ * @param length string length
+ * @param n number of code points to skip
+ * @see U16_FWD_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_FWD_N(s, i, length, n) { \
+ int32_t __N=(n); \
+ while(__N>0 && (i)<(length)) { \
+ U16_FWD_1(s, i, length); \
+ --__N; \
+ } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to the trail surrogate of a surrogate pair,
+ * then the offset is decremented.
+ * Otherwise, it is not modified.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_SET_CP_START
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_START_UNSAFE(s, i) { \
+ if(U16_IS_TRAIL((s)[i])) { \
+ --(i); \
+ } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to the trail surrogate of a surrogate pair,
+ * then the offset is decremented.
+ * Otherwise, it is not modified.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, start<=i
+ * @see U16_SET_CP_START_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_START(s, start, i) { \
+ if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
+ --(i); \
+ } \
+}
+
+/* definitions with backward iteration -------------------------------------- */
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a trail surrogate unit
+ * for a supplementary code point, then the macro will read
+ * the preceding lead surrogate as well.
+ * If the offset is behind a lead surrogate, then that itself
+ * will be returned as the code point.
+ * The result is undefined if the offset is behind a single, unpaired trail surrogate.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U16_PREV
+ * @stable ICU 2.4
+ */
+#define U16_PREV_UNSAFE(s, i, c) { \
+ (c)=(s)[--(i)]; \
+ if(U16_IS_TRAIL(c)) { \
+ (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
+ } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a trail surrogate unit
+ * for a supplementary code point, then the macro will read
+ * the preceding lead surrogate as well.
+ * If the offset is behind a lead surrogate or behind a single, unpaired
+ * trail surrogate, then that itself
+ * will be returned as the code point.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, start<=i
+ * @param c output UChar32 variable
+ * @see U16_PREV_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_PREV(s, start, i, c) { \
+ (c)=(s)[--(i)]; \
+ if(U16_IS_TRAIL(c)) { \
+ uint16_t __c2; \
+ if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+ --(i); \
+ (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+ } \
+ } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_BACK_1
+ * @stable ICU 2.4
+ */
+#define U16_BACK_1_UNSAFE(s, i) { \
+ if(U16_IS_TRAIL((s)[--(i)])) { \
+ --(i); \
+ } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, start<=i
+ * @see U16_BACK_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_BACK_1(s, start, i) { \
+ if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
+ --(i); \
+ } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U16_BACK_N
+ * @stable ICU 2.4
+ */
+#define U16_BACK_N_UNSAFE(s, i, n) { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ U16_BACK_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start start of string
+ * @param i string offset, i<length
+ * @param n number of code points to skip
+ * @see U16_BACK_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_BACK_N(s, start, i, n) { \
+ int32_t __N=(n); \
+ while(__N>0 && (i)>(start)) { \
+ U16_BACK_1(s, start, i); \
+ --__N; \
+ } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind the lead surrogate of a surrogate pair,
+ * then the offset is incremented.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_SET_CP_LIMIT
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_LIMIT_UNSAFE(s, i) { \
+ if(U16_IS_LEAD((s)[(i)-1])) { \
+ ++(i); \
+ } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind the lead surrogate of a surrogate pair,
+ * then the offset is incremented.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, start<=i<=length
+ * @param length string length
+ * @see U16_SET_CP_LIMIT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_LIMIT(s, start, i, length) { \
+ if((start)<(i) && (i)<(length) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
+ ++(i); \
+ } \
+}
+
+#endif
diff --git a/Source/WebCore/icu/unicode/utf8.h b/Source/WebCore/icu/unicode/utf8.h
new file mode 100644
index 0000000..f83662b
--- /dev/null
+++ b/Source/WebCore/icu/unicode/utf8.h
@@ -0,0 +1,627 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utf8.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999sep13
+* created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C API: 8-bit Unicode handling macros
+ *
+ * This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
+ * utf8.h is included by utf.h after unicode/umachine.h
+ * and some common definitions.
+ *
+ * For more information see utf.h and the ICU User Guide Strings chapter
+ * (http://oss.software.ibm.com/icu/userguide/).
+ *
+ * <em>Usage:</em>
+ * ICU coding guidelines for if() statements should be followed when using these macros.
+ * Compound statements (curly braces {}) must be used for if-else-while...
+ * bodies and all macro statements should be terminated with semicolon.
+ */
+
+#ifndef __UTF8_H__
+#define __UTF8_H__
+
+/* utf.h must be included first. */
+#ifndef __UTF_H__
+# include "unicode/utf.h"
+#endif
+
+/* internal definitions ----------------------------------------------------- */
+
+/**
+ * \var utf8_countTrailBytes
+ * Internal array with numbers of trail bytes for any given byte used in
+ * lead byte position.
+ * @internal
+ */
+#ifdef U_UTF8_IMPL
+U_INTERNAL const uint8_t
+#elif defined(U_STATIC_IMPLEMENTATION)
+U_CFUNC const uint8_t
+#else
+U_CFUNC U_IMPORT const uint8_t /* U_IMPORT2? */ /*U_IMPORT*/
+#endif
+utf8_countTrailBytes[256];
+
+/**
+ * Count the trail bytes for a UTF-8 lead byte.
+ * @internal
+ */
+#define U8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])
+
+/**
+ * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
+ * @internal
+ */
+#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
+
+/**
+ * Function for handling "next code point" with error-checking.
+ * @internal
+ */
+U_INTERNAL UChar32 U_EXPORT2
+utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict);
+
+/**
+ * Function for handling "append code point" with error-checking.
+ * @internal
+ */
+U_INTERNAL int32_t U_EXPORT2
+utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError);
+
+/**
+ * Function for handling "previous code point" with error-checking.
+ * @internal
+ */
+U_INTERNAL UChar32 U_EXPORT2
+utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict);
+
+/**
+ * Function for handling "skip backward one code point" with error-checking.
+ * @internal
+ */
+U_INTERNAL int32_t U_EXPORT2
+utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
+
+/* single-code point definitions -------------------------------------------- */
+
+/**
+ * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U8_IS_SINGLE(c) (((c)&0x80)==0)
+
+/**
+ * Is this code unit (byte) a UTF-8 lead byte?
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc0)<0x3e)
+
+/**
+ * Is this code unit (byte) a UTF-8 trail byte?
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U8_IS_TRAIL(c) (((c)&0xc0)==0x80)
+
+/**
+ * How many code units (bytes) are used for the UTF-8 encoding
+ * of this Unicode code point?
+ * @param c 32-bit code point
+ * @return 1..4, or 0 if c is a surrogate or not a Unicode code point
+ * @stable ICU 2.4
+ */
+#define U8_LENGTH(c) \
+ ((uint32_t)(c)<=0x7f ? 1 : \
+ ((uint32_t)(c)<=0x7ff ? 2 : \
+ ((uint32_t)(c)<=0xd7ff ? 3 : \
+ ((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \
+ ((uint32_t)(c)<=0xffff ? 3 : 4)\
+ ) \
+ ) \
+ ) \
+ )
+
+/**
+ * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
+ * @return 4
+ * @stable ICU 2.4
+ */
+#define U8_MAX_LENGTH 4
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * The offset may point to either the lead byte or one of the trail bytes
+ * for a code point, in which case the macro will read all of the bytes
+ * for the code point.
+ * The result is undefined if the offset points to an illegal UTF-8
+ * byte sequence.
+ * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U8_GET
+ * @stable ICU 2.4
+ */
+#define U8_GET_UNSAFE(s, i, c) { \
+ int32_t _u8_get_unsafe_index=(int32_t)(i); \
+ U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \
+ U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \
+}
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * The offset may point to either the lead byte or one of the trail bytes
+ * for a code point, in which case the macro will read all of the bytes
+ * for the code point.
+ * If the offset points to an illegal UTF-8 byte sequence, then
+ * c is set to a negative value.
+ * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
+ *
+ * @param s const uint8_t * string
+ * @param start starting string offset
+ * @param i string offset, start<=i<length
+ * @param length string length
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see U8_GET_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_GET(s, start, i, length, c) { \
+ int32_t _u8_get_index=(int32_t)(i); \
+ U8_SET_CP_START(s, start, _u8_get_index); \
+ U8_NEXT(s, _u8_get_index, length, c); \
+}
+
+/* definitions with forward iteration --------------------------------------- */
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * The offset may point to the lead byte of a multi-byte sequence,
+ * in which case the macro will read the whole sequence.
+ * The result is undefined if the offset points to a trail byte
+ * or an illegal UTF-8 sequence.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U8_NEXT
+ * @stable ICU 2.4
+ */
+#define U8_NEXT_UNSAFE(s, i, c) { \
+ (c)=(s)[(i)++]; \
+ if((uint8_t)((c)-0xc0)<0x35) { \
+ uint8_t __count=U8_COUNT_TRAIL_BYTES(c); \
+ U8_MASK_LEAD_BYTE(c, __count); \
+ switch(__count) { \
+ /* each following branch falls through to the next one */ \
+ case 3: \
+ (c)=((c)<<6)|((s)[(i)++]&0x3f); \
+ case 2: \
+ (c)=((c)<<6)|((s)[(i)++]&0x3f); \
+ case 1: \
+ (c)=((c)<<6)|((s)[(i)++]&0x3f); \
+ /* no other branches to optimize switch() */ \
+ break; \
+ } \
+ } \
+}
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The offset may point to the lead byte of a multi-byte sequence,
+ * in which case the macro will read the whole sequence.
+ * If the offset points to a trail byte or an illegal UTF-8 sequence, then
+ * c is set to a negative value.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset, i<length
+ * @param length string length
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see U8_NEXT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_NEXT(s, i, length, c) { \
+ (c)=(s)[(i)++]; \
+ if(((uint8_t)(c))>=0x80) { \
+ if(U8_IS_LEAD(c)) { \
+ (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (int32_t)(length), c, -1); \
+ } else { \
+ (c)=U_SENTINEL; \
+ } \
+ } \
+}
+
+/**
+ * Append a code point to a string, overwriting 1 to 4 bytes.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
+ * Otherwise, the result is undefined.
+ *
+ * @param s const uint8_t * string buffer
+ * @param i string offset
+ * @param c code point to append
+ * @see U8_APPEND
+ * @stable ICU 2.4
+ */
+#define U8_APPEND_UNSAFE(s, i, c) { \
+ if((uint32_t)(c)<=0x7f) { \
+ (s)[(i)++]=(uint8_t)(c); \
+ } else { \
+ if((uint32_t)(c)<=0x7ff) { \
+ (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
+ } else { \
+ if((uint32_t)(c)<=0xffff) { \
+ (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
+ } else { \
+ (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
+ (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
+ } \
+ (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
+ } \
+ (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
+ } \
+}
+
+/**
+ * Append a code point to a string, overwriting 1 or 2 code units.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Safe" macro, checks for a valid code point.
+ * If a non-ASCII code point is written, checks for sufficient space in the string.
+ * If the code point is not valid or trail bytes do not fit,
+ * then isError is set to TRUE.
+ *
+ * @param s const uint8_t * string buffer
+ * @param i string offset, i<length
+ * @param length size of the string buffer
+ * @param c code point to append
+ * @param isError output UBool set to TRUE if an error occurs, otherwise not modified
+ * @see U8_APPEND_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_APPEND(s, i, length, c, isError) { \
+ if((uint32_t)(c)<=0x7f) { \
+ (s)[(i)++]=(uint8_t)(c); \
+ } else { \
+ (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, &(isError)); \
+ } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_FWD_1
+ * @stable ICU 2.4
+ */
+#define U8_FWD_1_UNSAFE(s, i) { \
+ (i)+=1+U8_COUNT_TRAIL_BYTES((s)[i]); \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset, i<length
+ * @param length string length
+ * @see U8_FWD_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_FWD_1(s, i, length) { \
+ uint8_t __b=(s)[(i)++]; \
+ if(U8_IS_LEAD(__b)) { \
+ uint8_t __count=U8_COUNT_TRAIL_BYTES(__b); \
+ if((i)+__count>(length)) { \
+ __count=(uint8_t)((length)-(i)); \
+ } \
+ while(__count>0 && U8_IS_TRAIL((s)[i])) { \
+ ++(i); \
+ --__count; \
+ } \
+ } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U8_FWD_N
+ * @stable ICU 2.4
+ */
+#define U8_FWD_N_UNSAFE(s, i, n) { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ U8_FWD_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset, i<length
+ * @param length string length
+ * @param n number of code points to skip
+ * @see U8_FWD_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_FWD_N(s, i, length, n) { \
+ int32_t __N=(n); \
+ while(__N>0 && (i)<(length)) { \
+ U8_FWD_1(s, i, length); \
+ --__N; \
+ } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to a UTF-8 trail byte,
+ * then the offset is moved backward to the corresponding lead byte.
+ * Otherwise, it is not modified.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_SET_CP_START
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_START_UNSAFE(s, i) { \
+ while(U8_IS_TRAIL((s)[i])) { --(i); } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to a UTF-8 trail byte,
+ * then the offset is moved backward to the corresponding lead byte.
+ * Otherwise, it is not modified.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const uint8_t * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, start<=i
+ * @see U8_SET_CP_START_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_START(s, start, i) { \
+ if(U8_IS_TRAIL((s)[(i)])) { \
+ (i)=utf8_back1SafeBody(s, start, (int32_t)(i)); \
+ } \
+}
+
+/* definitions with backward iteration -------------------------------------- */
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a multi-byte sequence, then the macro will read
+ * the whole sequence.
+ * If the offset is behind a lead byte, then that itself
+ * will be returned as the code point.
+ * The result is undefined if the offset is behind an illegal UTF-8 sequence.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U8_PREV
+ * @stable ICU 2.4
+ */
+#define U8_PREV_UNSAFE(s, i, c) { \
+ (c)=(s)[--(i)]; \
+ if(U8_IS_TRAIL(c)) { \
+ uint8_t __b, __count=1, __shift=6; \
+\
+ /* c is a trail byte */ \
+ (c)&=0x3f; \
+ for(;;) { \
+ __b=(s)[--(i)]; \
+ if(__b>=0xc0) { \
+ U8_MASK_LEAD_BYTE(__b, __count); \
+ (c)|=(UChar32)__b<<__shift; \
+ break; \
+ } else { \
+ (c)|=(UChar32)(__b&0x3f)<<__shift; \
+ ++__count; \
+ __shift+=6; \
+ } \
+ } \
+ } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a multi-byte sequence, then the macro will read
+ * the whole sequence.
+ * If the offset is behind a lead byte, then that itself
+ * will be returned as the code point.
+ * If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
+ *
+ * @param s const uint8_t * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, start<=i
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see U8_PREV_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_PREV(s, start, i, c) { \
+ (c)=(s)[--(i)]; \
+ if((c)>=0x80) { \
+ if((c)<=0xbf) { \
+ (c)=utf8_prevCharSafeBody(s, start, &(i), c, -1); \
+ } else { \
+ (c)=U_SENTINEL; \
+ } \
+ } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_BACK_1
+ * @stable ICU 2.4
+ */
+#define U8_BACK_1_UNSAFE(s, i) { \
+ while(U8_IS_TRAIL((s)[--(i)])) {} \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const uint8_t * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, start<=i
+ * @see U8_BACK_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_BACK_1(s, start, i) { \
+ if(U8_IS_TRAIL((s)[--(i)])) { \
+ (i)=utf8_back1SafeBody(s, start, (int32_t)(i)); \
+ } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U8_BACK_N
+ * @stable ICU 2.4
+ */
+#define U8_BACK_N_UNSAFE(s, i, n) { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ U8_BACK_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const uint8_t * string
+ * @param start index of the start of the string
+ * @param i string offset, i<length
+ * @param n number of code points to skip
+ * @see U8_BACK_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_BACK_N(s, start, i, n) { \
+ int32_t __N=(n); \
+ while(__N>0 && (i)>(start)) { \
+ U8_BACK_1(s, start, i); \
+ --__N; \
+ } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind a partial multi-byte sequence,
+ * then the offset is incremented to behind the whole sequence.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_SET_CP_LIMIT
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_LIMIT_UNSAFE(s, i) { \
+ U8_BACK_1_UNSAFE(s, i); \
+ U8_FWD_1_UNSAFE(s, i); \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind a partial multi-byte sequence,
+ * then the offset is incremented to behind the whole sequence.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const uint8_t * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, start<=i<=length
+ * @param length string length
+ * @see U8_SET_CP_LIMIT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_LIMIT(s, start, i, length) { \
+ if((start)<(i) && (i)<(length)) { \
+ U8_BACK_1(s, start, i); \
+ U8_FWD_1(s, i, length); \
+ } \
+}
+
+#endif
diff --git a/Source/WebCore/icu/unicode/utf_old.h b/Source/WebCore/icu/unicode/utf_old.h
new file mode 100644
index 0000000..8504a03
--- /dev/null
+++ b/Source/WebCore/icu/unicode/utf_old.h
@@ -0,0 +1 @@
+/* This file is intentionally left blank. */
diff --git a/Source/WebCore/icu/unicode/utypes.h b/Source/WebCore/icu/unicode/utypes.h
new file mode 100644
index 0000000..e20cd79
--- /dev/null
+++ b/Source/WebCore/icu/unicode/utypes.h
@@ -0,0 +1,745 @@
+/*
+**********************************************************************
+* Copyright (C) 1996-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* FILE NAME : UTYPES.H (formerly ptypes.h)
+*
+* Date Name Description
+* 12/11/96 helena Creation.
+* 02/27/97 aliu Added typedefs for UClassID, int8, int16, int32,
+* uint8, uint16, and uint32.
+* 04/01/97 aliu Added XP_CPLUSPLUS and modified to work under C as
+* well as C++.
+* Modified to use memcpy() for uprv_arrayCopy() fns.
+* 04/14/97 aliu Added TPlatformUtilities.
+* 05/07/97 aliu Added import/export specifiers (replacing the old
+* broken EXT_CLASS). Added version number for our
+* code. Cleaned up header.
+* 6/20/97 helena Java class name change.
+* 08/11/98 stephen UErrorCode changed from typedef to enum
+* 08/12/98 erm Changed T_ANALYTIC_PACKAGE_VERSION to 3
+* 08/14/98 stephen Added uprv_arrayCopy() for int8_t, int16_t, int32_t
+* 12/09/98 jfitz Added BUFFER_OVERFLOW_ERROR (bug 1100066)
+* 04/20/99 stephen Cleaned up & reworked for autoconf.
+* Renamed to utypes.h.
+* 05/05/99 stephen Changed to use <inttypes.h>
+* 12/07/99 helena Moved copyright notice string from ucnv_bld.h here.
+*******************************************************************************
+*/
+
+#ifndef UTYPES_H
+#define UTYPES_H
+
+
+#include "unicode/umachine.h"
+#include "unicode/utf.h"
+#include "unicode/uversion.h"
+#include "unicode/uconfig.h"
+
+#ifdef U_HIDE_DRAFT_API
+#include "unicode/udraft.h"
+#endif
+
+#ifdef U_HIDE_DEPRECATED_API
+#include "unicode/udeprctd.h"
+#endif
+
+#ifdef U_HIDE_DEPRECATED_API
+#include "unicode/uobslete.h"
+#endif
+
+
+/*!
+ * \file
+ * \brief Basic definitions for ICU, for both C and C++ APIs
+ *
+ * This file defines basic types, constants, and enumerations directly or
+ * indirectly by including other header files, especially utf.h for the
+ * basic character and string definitions and umachine.h for consistent
+ * integer and other types.
+ */
+
+/*===========================================================================*/
+/* char Character set family */
+/*===========================================================================*/
+
+/**
+ * U_CHARSET_FAMILY is equal to this value when the platform is an ASCII based platform.
+ * @stable ICU 2.0
+ */
+#define U_ASCII_FAMILY 0
+
+/**
+ * U_CHARSET_FAMILY is equal to this value when the platform is an EBCDIC based platform.
+ * @stable ICU 2.0
+ */
+#define U_EBCDIC_FAMILY 1
+
+/**
+ * \def U_CHARSET_FAMILY
+ *
+ * <p>These definitions allow to specify the encoding of text
+ * in the char data type as defined by the platform and the compiler.
+ * It is enough to determine the code point values of "invariant characters",
+ * which are the ones shared by all encodings that are in use
+ * on a given platform.</p>
+ *
+ * <p>Those "invariant characters" should be all the uppercase and lowercase
+ * latin letters, the digits, the space, and "basic punctuation".
+ * Also, '\\n', '\\r', '\\t' should be available.</p>
+ *
+ * <p>The list of "invariant characters" is:<br>
+ * \code
+ * A-Z a-z 0-9 SPACE " % &amp; ' ( ) * + , - . / : ; < = > ? _
+ * \endcode
+ * <br>
+ * (52 letters + 10 numbers + 20 punc/sym/space = 82 total)</p>
+ *
+ * <p>This matches the IBM Syntactic Character Set (CS 640).</p>
+ *
+ * <p>In other words, all the graphic characters in 7-bit ASCII should
+ * be safely accessible except the following:</p>
+ *
+ * \code
+ * '\' <backslash>
+ * '[' <left bracket>
+ * ']' <right bracket>
+ * '{' <left brace>
+ * '}' <right brace>
+ * '^' <circumflex>
+ * '~' <tilde>
+ * '!' <exclamation mark>
+ * '#' <number sign>
+ * '|' <vertical line>
+ * '$' <dollar sign>
+ * '@' <commercial at>
+ * '`' <grave accent>
+ * \endcode
+ * @stable ICU 2.0
+ */
+
+#ifndef U_CHARSET_FAMILY
+# define U_CHARSET_FAMILY 0
+#endif
+
+/*===========================================================================*/
+/* ICUDATA naming scheme */
+/*===========================================================================*/
+
+/**
+ * \def U_ICUDATA_TYPE_LETTER
+ *
+ * This is a platform-dependent string containing one letter:
+ * - b for big-endian, ASCII-family platforms
+ * - l for little-endian, ASCII-family platforms
+ * - e for big-endian, EBCDIC-family platforms
+ * This letter is part of the common data file name.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_ICUDATA_TYPE_LITLETTER
+ * The non-string form of U_ICUDATA_TYPE_LETTER
+ * @stable ICU 2.0
+ */
+#if U_CHARSET_FAMILY
+# if U_IS_BIG_ENDIAN
+ /* EBCDIC - should always be BE */
+# define U_ICUDATA_TYPE_LETTER "e"
+# define U_ICUDATA_TYPE_LITLETTER e
+# else
+# error "Don't know what to do with little endian EBCDIC!"
+# define U_ICUDATA_TYPE_LETTER "x"
+# define U_ICUDATA_TYPE_LITLETTER x
+# endif
+#else
+# if U_IS_BIG_ENDIAN
+ /* Big-endian ASCII */
+# define U_ICUDATA_TYPE_LETTER "b"
+# define U_ICUDATA_TYPE_LITLETTER b
+# else
+ /* Little-endian ASCII */
+# define U_ICUDATA_TYPE_LETTER "l"
+# define U_ICUDATA_TYPE_LITLETTER l
+# endif
+#endif
+
+/**
+ * A single string literal containing the icudata stub name. i.e. 'icudt18e' for
+ * ICU 1.8.x on EBCDIC, etc..
+ * @stable ICU 2.0
+ */
+#define U_ICUDATA_NAME "icudt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER
+
+
+/**
+ * U_ICU_ENTRY_POINT is the name of the DLL entry point to the ICU data library.
+ * Defined as a literal, not a string.
+ * Tricky Preprocessor use - ## operator replaces macro paramters with the literal string
+ * from the corresponding macro invocation, _before_ other macro substitutions.
+ * Need a nested #defines to get the actual version numbers rather than
+ * the literal text U_ICU_VERSION_MAJOR_NUM into the name.
+ * The net result will be something of the form
+ * #define U_ICU_ENTRY_POINT icudt19_dat
+ * @stable ICU 2.4
+ */
+#define U_ICUDATA_ENTRY_POINT U_DEF2_ICUDATA_ENTRY_POINT(U_ICU_VERSION_MAJOR_NUM, U_ICU_VERSION_MINOR_NUM)
+/**
+ * @internal
+ */
+#define U_DEF2_ICUDATA_ENTRY_POINT(major, minor) U_DEF_ICUDATA_ENTRY_POINT(major, minor)
+/**
+ * @internal
+ */
+#define U_DEF_ICUDATA_ENTRY_POINT(major, minor) icudt##major##minor##_dat
+
+/**
+ * \def U_CALLCONV
+ * Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary
+ * in callback function typedefs to make sure that the calling convention
+ * is compatible.
+ *
+ * This is only used for non-ICU-API functions.
+ * When a function is a public ICU API,
+ * you must use the U_CAPI and U_EXPORT2 qualifiers.
+ * @stable ICU 2.0
+ */
+#if defined(OS390) && (__COMPILER_VER__ < 0x41020000) && defined(XP_CPLUSPLUS)
+# define U_CALLCONV __cdecl
+#else
+# define U_CALLCONV U_EXPORT2
+#endif
+
+/**
+ * \def NULL
+ * Define NULL if necessary, to 0 for C++ and to ((void *)0) for C.
+ * @stable ICU 2.0
+ */
+#ifndef NULL
+#ifdef XP_CPLUSPLUS
+#define NULL 0
+#else
+#define NULL ((void *)0)
+#endif
+#endif
+
+/*===========================================================================*/
+/* Calendar/TimeZone data types */
+/*===========================================================================*/
+
+/**
+ * Date and Time data type.
+ * This is a primitive data type that holds the date and time
+ * as the number of milliseconds since 1970-jan-01, 00:00 UTC.
+ * UTC leap seconds are ignored.
+ * @stable ICU 2.0
+ */
+typedef double UDate;
+
+/** The number of milliseconds per second @stable ICU 2.0 */
+#define U_MILLIS_PER_SECOND (1000)
+/** The number of milliseconds per minute @stable ICU 2.0 */
+#define U_MILLIS_PER_MINUTE (60000)
+/** The number of milliseconds per hour @stable ICU 2.0 */
+#define U_MILLIS_PER_HOUR (3600000)
+/** The number of milliseconds per day @stable ICU 2.0 */
+#define U_MILLIS_PER_DAY (86400000)
+
+
+/*===========================================================================*/
+/* UClassID-based RTTI */
+/*===========================================================================*/
+
+/**
+ * UClassID is used to identify classes without using RTTI, since RTTI
+ * is not yet supported by all C++ compilers. Each class hierarchy which needs
+ * to implement polymorphic clone() or operator==() defines two methods,
+ * described in detail below. UClassID values can be compared using
+ * operator==(). Nothing else should be done with them.
+ *
+ * \par
+ * getDynamicClassID() is declared in the base class of the hierarchy as
+ * a pure virtual. Each concrete subclass implements it in the same way:
+ *
+ * \code
+ * class Base {
+ * public:
+ * virtual UClassID getDynamicClassID() const = 0;
+ * }
+ *
+ * class Derived {
+ * public:
+ * virtual UClassID getDynamicClassID() const
+ * { return Derived::getStaticClassID(); }
+ * }
+ * \endcode
+ *
+ * Each concrete class implements getStaticClassID() as well, which allows
+ * clients to test for a specific type.
+ *
+ * \code
+ * class Derived {
+ * public:
+ * static UClassID U_EXPORT2 getStaticClassID();
+ * private:
+ * static char fgClassID;
+ * }
+ *
+ * // In Derived.cpp:
+ * UClassID Derived::getStaticClassID()
+ * { return (UClassID)&Derived::fgClassID; }
+ * char Derived::fgClassID = 0; // Value is irrelevant
+ * \endcode
+ * @stable ICU 2.0
+ */
+typedef void* UClassID;
+
+/*===========================================================================*/
+/* Shared library/DLL import-export API control */
+/*===========================================================================*/
+
+/*
+ * Control of symbol import/export.
+ * ICU is separated into three libraries.
+ */
+
+/*
+ * \def U_COMBINED_IMPLEMENTATION
+ * Set to export library symbols from inside the ICU library
+ * when all of ICU is in a single library.
+ * This can be set as a compiler option while building ICU, and it
+ * needs to be the first one tested to override U_COMMON_API, U_I18N_API, etc.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_DATA_API
+ * Set to export library symbols from inside the stubdata library,
+ * and to import them from outside.
+ * @draft ICU 3.0
+ */
+
+/**
+ * \def U_COMMON_API
+ * Set to export library symbols from inside the common library,
+ * and to import them from outside.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_I18N_API
+ * Set to export library symbols from inside the i18n library,
+ * and to import them from outside.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_LAYOUT_API
+ * Set to export library symbols from inside the layout engine library,
+ * and to import them from outside.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_LAYOUTEX_API
+ * Set to export library symbols from inside the layout extensions library,
+ * and to import them from outside.
+ * @stable ICU 2.6
+ */
+
+/**
+ * \def U_IO_API
+ * Set to export library symbols from inside the ustdio library,
+ * and to import them from outside.
+ * @stable ICU 2.0
+ */
+
+#if defined(U_COMBINED_IMPLEMENTATION)
+#define U_DATA_API U_EXPORT
+#define U_COMMON_API U_EXPORT
+#define U_I18N_API U_EXPORT
+#define U_LAYOUT_API U_EXPORT
+#define U_LAYOUTEX_API U_EXPORT
+#define U_IO_API U_EXPORT
+#elif defined(U_STATIC_IMPLEMENTATION)
+#define U_DATA_API
+#define U_COMMON_API
+#define U_I18N_API
+#define U_LAYOUT_API
+#define U_LAYOUTEX_API
+#define U_IO_API
+#elif defined(U_COMMON_IMPLEMENTATION)
+#define U_DATA_API U_IMPORT
+#define U_COMMON_API U_EXPORT
+#define U_I18N_API U_IMPORT
+#define U_LAYOUT_API U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API U_IMPORT
+#elif defined(U_I18N_IMPLEMENTATION)
+#define U_DATA_API U_IMPORT
+#define U_COMMON_API U_IMPORT
+#define U_I18N_API U_EXPORT
+#define U_LAYOUT_API U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API U_IMPORT
+#elif defined(U_LAYOUT_IMPLEMENTATION)
+#define U_DATA_API U_IMPORT
+#define U_COMMON_API U_IMPORT
+#define U_I18N_API U_IMPORT
+#define U_LAYOUT_API U_EXPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API U_IMPORT
+#elif defined(U_LAYOUTEX_IMPLEMENTATION)
+#define U_DATA_API U_IMPORT
+#define U_COMMON_API U_IMPORT
+#define U_I18N_API U_IMPORT
+#define U_LAYOUT_API U_IMPORT
+#define U_LAYOUTEX_API U_EXPORT
+#define U_IO_API U_IMPORT
+#elif defined(U_IO_IMPLEMENTATION)
+#define U_DATA_API U_IMPORT
+#define U_COMMON_API U_IMPORT
+#define U_I18N_API U_IMPORT
+#define U_LAYOUT_API U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API U_EXPORT
+#else
+#define U_DATA_API U_IMPORT
+#define U_COMMON_API U_IMPORT
+#define U_I18N_API U_IMPORT
+#define U_LAYOUT_API U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API U_IMPORT
+#endif
+
+/**
+ * \def U_STANDARD_CPP_NAMESPACE
+ * Control of C++ Namespace
+ * @stable ICU 2.0
+ */
+#ifdef __cplusplus
+#define U_STANDARD_CPP_NAMESPACE ::
+#else
+#define U_STANDARD_CPP_NAMESPACE
+#endif
+
+
+/*===========================================================================*/
+/* Global delete operator */
+/*===========================================================================*/
+
+/*
+ * The ICU4C library must not use the global new and delete operators.
+ * These operators here are defined to enable testing for this.
+ * See Jitterbug 2581 for details of why this is necessary.
+ *
+ * Verification that ICU4C's memory usage is correct, i.e.,
+ * that global new/delete are not used:
+ *
+ * a) Check for imports of global new/delete (see uobject.cpp for details)
+ * b) Verify that new is never imported.
+ * c) Verify that delete is only imported from object code for interface/mixin classes.
+ * d) Add global delete and delete[] only for the ICU4C library itself
+ * and define them in a way that crashes or otherwise easily shows a problem.
+ *
+ * The following implements d).
+ * The operator implementations crash; this is intentional and used for library debugging.
+ *
+ * Note: This is currently only done on Windows because
+ * some Linux/Unix compilers have problems with defining global new/delete.
+ * On Windows, WIN32 is defined, and it is _MSC_Ver>=1200 for MSVC 6.0 and higher.
+ */
+#if defined(XP_CPLUSPLUS) && defined(WIN32) && (_MSC_Ver>=1200) && (defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_USTDIO_IMPLEMENTATION))
+
+/**
+ * Global operator new, defined only inside ICU4C, must not be used.
+ * Crashes intentionally.
+ * @internal
+ */
+inline void *
+operator new(size_t /*size*/) {
+ char *q=NULL;
+ *q=5; /* break it */
+ return q;
+}
+
+/**
+ * Global operator new[], defined only inside ICU4C, must not be used.
+ * Crashes intentionally.
+ * @internal
+ */
+inline void *
+operator new[](size_t /*size*/) {
+ char *q=NULL;
+ *q=5; /* break it */
+ return q;
+}
+
+/**
+ * Global operator delete, defined only inside ICU4C, must not be used.
+ * Crashes intentionally.
+ * @internal
+ */
+inline void
+operator delete(void * /*p*/) {
+ char *q=NULL;
+ *q=5; /* break it */
+}
+
+/**
+ * Global operator delete[], defined only inside ICU4C, must not be used.
+ * Crashes intentionally.
+ * @internal
+ */
+inline void
+operator delete[](void * /*p*/) {
+ char *q=NULL;
+ *q=5; /* break it */
+}
+
+#endif
+
+/*===========================================================================*/
+/* UErrorCode */
+/*===========================================================================*/
+
+/**
+ * Error code to replace exception handling, so that the code is compatible with all C++ compilers,
+ * and to use the same mechanism for C and C++.
+ *
+ * \par
+ * ICU functions that take a reference (C++) or a pointer (C) to a UErrorCode
+ * first test if(U_FAILURE(errorCode)) { return immediately; }
+ * so that in a chain of such functions the first one that sets an error code
+ * causes the following ones to not perform any operations.
+ *
+ * \par
+ * Error codes should be tested using U_FAILURE() and U_SUCCESS().
+ * @stable ICU 2.0
+ */
+typedef enum UErrorCode {
+ /* The ordering of U_ERROR_INFO_START Vs U_USING_FALLBACK_WARNING looks weird
+ * and is that way because VC++ debugger displays first encountered constant,
+ * which is not the what the code is used for
+ */
+
+ U_USING_FALLBACK_WARNING = -128, /**< A resource bundle lookup returned a fallback result (not an error) */
+
+ U_ERROR_WARNING_START = -128, /**< Start of information results (semantically successful) */
+
+ U_USING_DEFAULT_WARNING = -127, /**< A resource bundle lookup returned a result from the root locale (not an error) */
+
+ U_SAFECLONE_ALLOCATED_WARNING = -126, /**< A SafeClone operation required allocating memory (informational only) */
+
+ U_STATE_OLD_WARNING = -125, /**< ICU has to use compatibility layer to construct the service. Expect performance/memory usage degradation. Consider upgrading */
+
+ U_STRING_NOT_TERMINATED_WARNING = -124,/**< An output string could not be NUL-terminated because output length==destCapacity. */
+
+ U_SORT_KEY_TOO_SHORT_WARNING = -123, /**< Number of levels requested in getBound is higher than the number of levels in the sort key */
+
+ U_AMBIGUOUS_ALIAS_WARNING = -122, /**< This converter alias can go to different converter implementations */
+
+ U_DIFFERENT_UCA_VERSION = -121, /**< ucol_open encountered a mismatch between UCA version and collator image version, so the collator was constructed from rules. No impact to further function */
+
+ U_ERROR_WARNING_LIMIT, /**< This must always be the last warning value to indicate the limit for UErrorCode warnings (last warning code +1) */
+
+
+ U_ZERO_ERROR = 0, /**< No error, no warning. */
+
+ U_ILLEGAL_ARGUMENT_ERROR = 1, /**< Start of codes indicating failure */
+ U_MISSING_RESOURCE_ERROR = 2, /**< The requested resource cannot be found */
+ U_INVALID_FORMAT_ERROR = 3, /**< Data format is not what is expected */
+ U_FILE_ACCESS_ERROR = 4, /**< The requested file cannot be found */
+ U_INTERNAL_PROGRAM_ERROR = 5, /**< Indicates a bug in the library code */
+ U_MESSAGE_PARSE_ERROR = 6, /**< Unable to parse a message (message format) */
+ U_MEMORY_ALLOCATION_ERROR = 7, /**< Memory allocation error */
+ U_INDEX_OUTOFBOUNDS_ERROR = 8, /**< Trying to access the index that is out of bounds */
+ U_PARSE_ERROR = 9, /**< Equivalent to Java ParseException */
+ U_INVALID_CHAR_FOUND = 10, /**< Character conversion: Unmappable input sequence. In other APIs: Invalid character. */
+ U_TRUNCATED_CHAR_FOUND = 11, /**< Character conversion: Incomplete input sequence. */
+ U_ILLEGAL_CHAR_FOUND = 12, /**< Character conversion: Illegal input sequence/combination of input units.. */
+ U_INVALID_TABLE_FORMAT = 13, /**< Conversion table file found, but corrupted */
+ U_INVALID_TABLE_FILE = 14, /**< Conversion table file not found */
+ U_BUFFER_OVERFLOW_ERROR = 15, /**< A result would not fit in the supplied buffer */
+ U_UNSUPPORTED_ERROR = 16, /**< Requested operation not supported in current context */
+ U_RESOURCE_TYPE_MISMATCH = 17, /**< an operation is requested over a resource that does not support it */
+ U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illlegal escape sequence */
+ U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, /**< ISO-2022 unsupported escape sequence */
+ U_NO_SPACE_AVAILABLE = 20, /**< No space available for in-buffer expansion for Arabic shaping */
+ U_CE_NOT_FOUND_ERROR = 21, /**< Currently used only while setting variable top, but can be used generally */
+ U_PRIMARY_TOO_LONG_ERROR = 22, /**< User tried to set variable top to a primary that is longer than two bytes */
+ U_STATE_TOO_OLD_ERROR = 23, /**< ICU cannot construct a service from this state, as it is no longer supported */
+ U_TOO_MANY_ALIASES_ERROR = 24, /**< There are too many aliases in the path to the requested resource.
+ It is very possible that a circular alias definition has occured */
+ U_ENUM_OUT_OF_SYNC_ERROR = 25, /**< UEnumeration out of sync with underlying collection */
+ U_INVARIANT_CONVERSION_ERROR = 26, /**< Unable to convert a UChar* string to char* with the invariant converter. */
+ U_INVALID_STATE_ERROR = 27, /**< Requested operation can not be completed with ICU in its current state */
+ U_COLLATOR_VERSION_MISMATCH = 28, /**< Collator version is not compatible with the base version */
+ U_USELESS_COLLATOR_ERROR = 29, /**< Collator is options only and no base is specified */
+
+ U_STANDARD_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for standard errors */
+ /*
+ * the error code range 0x10000 0x10100 are reserved for Transliterator
+ */
+ U_BAD_VARIABLE_DEFINITION=0x10000,/**< Missing '$' or duplicate variable name */
+ U_PARSE_ERROR_START = 0x10000, /**< Start of Transliterator errors */
+ U_MALFORMED_RULE, /**< Elements of a rule are misplaced */
+ U_MALFORMED_SET, /**< A UnicodeSet pattern is invalid*/
+ U_MALFORMED_SYMBOL_REFERENCE, /**< UNUSED as of ICU 2.4 */
+ U_MALFORMED_UNICODE_ESCAPE, /**< A Unicode escape pattern is invalid*/
+ U_MALFORMED_VARIABLE_DEFINITION, /**< A variable definition is invalid */
+ U_MALFORMED_VARIABLE_REFERENCE, /**< A variable reference is invalid */
+ U_MISMATCHED_SEGMENT_DELIMITERS, /**< UNUSED as of ICU 2.4 */
+ U_MISPLACED_ANCHOR_START, /**< A start anchor appears at an illegal position */
+ U_MISPLACED_CURSOR_OFFSET, /**< A cursor offset occurs at an illegal position */
+ U_MISPLACED_QUANTIFIER, /**< A quantifier appears after a segment close delimiter */
+ U_MISSING_OPERATOR, /**< A rule contains no operator */
+ U_MISSING_SEGMENT_CLOSE, /**< UNUSED as of ICU 2.4 */
+ U_MULTIPLE_ANTE_CONTEXTS, /**< More than one ante context */
+ U_MULTIPLE_CURSORS, /**< More than one cursor */
+ U_MULTIPLE_POST_CONTEXTS, /**< More than one post context */
+ U_TRAILING_BACKSLASH, /**< A dangling backslash */
+ U_UNDEFINED_SEGMENT_REFERENCE, /**< A segment reference does not correspond to a defined segment */
+ U_UNDEFINED_VARIABLE, /**< A variable reference does not correspond to a defined variable */
+ U_UNQUOTED_SPECIAL, /**< A special character was not quoted or escaped */
+ U_UNTERMINATED_QUOTE, /**< A closing single quote is missing */
+ U_RULE_MASK_ERROR, /**< A rule is hidden by an earlier more general rule */
+ U_MISPLACED_COMPOUND_FILTER, /**< A compound filter is in an invalid location */
+ U_MULTIPLE_COMPOUND_FILTERS, /**< More than one compound filter */
+ U_INVALID_RBT_SYNTAX, /**< A "::id" rule was passed to the RuleBasedTransliterator parser */
+ U_INVALID_PROPERTY_PATTERN, /**< UNUSED as of ICU 2.4 */
+ U_MALFORMED_PRAGMA, /**< A 'use' pragma is invlalid */
+ U_UNCLOSED_SEGMENT, /**< A closing ')' is missing */
+ U_ILLEGAL_CHAR_IN_SEGMENT, /**< UNUSED as of ICU 2.4 */
+ U_VARIABLE_RANGE_EXHAUSTED, /**< Too many stand-ins generated for the given variable range */
+ U_VARIABLE_RANGE_OVERLAP, /**< The variable range overlaps characters used in rules */
+ U_ILLEGAL_CHARACTER, /**< A special character is outside its allowed context */
+ U_INTERNAL_TRANSLITERATOR_ERROR, /**< Internal transliterator system error */
+ U_INVALID_ID, /**< A "::id" rule specifies an unknown transliterator */
+ U_INVALID_FUNCTION, /**< A "&fn()" rule specifies an unknown transliterator */
+ U_PARSE_ERROR_LIMIT, /**< The limit for Transliterator errors */
+
+ /*
+ * the error code range 0x10100 0x10200 are reserved for formatting API parsing error
+ */
+ U_UNEXPECTED_TOKEN=0x10100, /**< Syntax error in format pattern */
+ U_FMT_PARSE_ERROR_START=0x10100, /**< Start of format library errors */
+ U_MULTIPLE_DECIMAL_SEPARATORS, /**< More than one decimal separator in number pattern */
+ U_MULTIPLE_DECIMAL_SEPERATORS = U_MULTIPLE_DECIMAL_SEPARATORS, /**< Typo: kept for backward compatibility. Use U_MULTIPLE_DECIMAL_SEPARATORS */
+ U_MULTIPLE_EXPONENTIAL_SYMBOLS, /**< More than one exponent symbol in number pattern */
+ U_MALFORMED_EXPONENTIAL_PATTERN, /**< Grouping symbol in exponent pattern */
+ U_MULTIPLE_PERCENT_SYMBOLS, /**< More than one percent symbol in number pattern */
+ U_MULTIPLE_PERMILL_SYMBOLS, /**< More than one permill symbol in number pattern */
+ U_MULTIPLE_PAD_SPECIFIERS, /**< More than one pad symbol in number pattern */
+ U_PATTERN_SYNTAX_ERROR, /**< Syntax error in format pattern */
+ U_ILLEGAL_PAD_POSITION, /**< Pad symbol misplaced in number pattern */
+ U_UNMATCHED_BRACES, /**< Braces do not match in message pattern */
+ U_UNSUPPORTED_PROPERTY, /**< UNUSED as of ICU 2.4 */
+ U_UNSUPPORTED_ATTRIBUTE, /**< UNUSED as of ICU 2.4 */
+ U_FMT_PARSE_ERROR_LIMIT, /**< The limit for format library errors */
+
+ /*
+ * the error code range 0x10200 0x102ff are reserved for Break Iterator related error
+ */
+ U_BRK_ERROR_START=0x10200, /**< Start of codes indicating Break Iterator failures */
+ U_BRK_INTERNAL_ERROR, /**< An internal error (bug) was detected. */
+ U_BRK_HEX_DIGITS_EXPECTED, /**< Hex digits expected as part of a escaped char in a rule. */
+ U_BRK_SEMICOLON_EXPECTED, /**< Missing ';' at the end of a RBBI rule. */
+ U_BRK_RULE_SYNTAX, /**< Syntax error in RBBI rule. */
+ U_BRK_UNCLOSED_SET, /**< UnicodeSet witing an RBBI rule missing a closing ']'. */
+ U_BRK_ASSIGN_ERROR, /**< Syntax error in RBBI rule assignment statement. */
+ U_BRK_VARIABLE_REDFINITION, /**< RBBI rule $Variable redefined. */
+ U_BRK_MISMATCHED_PAREN, /**< Mis-matched parentheses in an RBBI rule. */
+ U_BRK_NEW_LINE_IN_QUOTED_STRING, /**< Missing closing quote in an RBBI rule. */
+ U_BRK_UNDEFINED_VARIABLE, /**< Use of an undefined $Variable in an RBBI rule. */
+ U_BRK_INIT_ERROR, /**< Initialization failure. Probable missing ICU Data. */
+ U_BRK_RULE_EMPTY_SET, /**< Rule contains an empty Unicode Set. */
+ U_BRK_UNRECOGNIZED_OPTION, /**< !!option in RBBI rules not recognized. */
+ U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is mal formed */
+ U_BRK_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for Break Iterator failures */
+
+ /*
+ * The error codes in the range 0x10300-0x103ff are reserved for regular expression related errrs
+ */
+ U_REGEX_ERROR_START=0x10300, /**< Start of codes indicating Regexp failures */
+ U_REGEX_INTERNAL_ERROR, /**< An internal error (bug) was detected. */
+ U_REGEX_RULE_SYNTAX, /**< Syntax error in regexp pattern. */
+ U_REGEX_INVALID_STATE, /**< RegexMatcher in invalid state for requested operation */
+ U_REGEX_BAD_ESCAPE_SEQUENCE, /**< Unrecognized backslash escape sequence in pattern */
+ U_REGEX_PROPERTY_SYNTAX, /**< Incorrect Unicode property */
+ U_REGEX_UNIMPLEMENTED, /**< Use of regexp feature that is not yet implemented. */
+ U_REGEX_MISMATCHED_PAREN, /**< Incorrectly nested parentheses in regexp pattern. */
+ U_REGEX_NUMBER_TOO_BIG, /**< Decimal number is too large. */
+ U_REGEX_BAD_INTERVAL, /**< Error in {min,max} interval */
+ U_REGEX_MAX_LT_MIN, /**< In {min,max}, max is less than min. */
+ U_REGEX_INVALID_BACK_REF, /**< Back-reference to a non-existent capture group. */
+ U_REGEX_INVALID_FLAG, /**< Invalid value for match mode flags. */
+ U_REGEX_LOOK_BEHIND_LIMIT, /**< Look-Behind pattern matches must have a bounded maximum length. */
+ U_REGEX_SET_CONTAINS_STRING, /**< Regexps cannot have UnicodeSets containing strings.*/
+ U_REGEX_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for regexp errors */
+
+ /*
+ * The error code in the range 0x10400-0x104ff are reserved for IDNA related error codes
+ */
+ U_IDNA_ERROR_START=0x10400,
+ U_IDNA_PROHIBITED_ERROR,
+ U_IDNA_UNASSIGNED_ERROR,
+ U_IDNA_CHECK_BIDI_ERROR,
+ U_IDNA_STD3_ASCII_RULES_ERROR,
+ U_IDNA_ACE_PREFIX_ERROR,
+ U_IDNA_VERIFICATION_ERROR,
+ U_IDNA_LABEL_TOO_LONG_ERROR,
+ U_IDNA_ERROR_LIMIT,
+ /*
+ * Aliases for StringPrep
+ */
+ U_STRINGPREP_PROHIBITED_ERROR = U_IDNA_PROHIBITED_ERROR,
+ U_STRINGPREP_UNASSIGNED_ERROR = U_IDNA_UNASSIGNED_ERROR,
+ U_STRINGPREP_CHECK_BIDI_ERROR = U_IDNA_CHECK_BIDI_ERROR,
+
+
+ U_ERROR_LIMIT=U_IDNA_ERROR_LIMIT /**< This must always be the last value to indicate the limit for UErrorCode (last error code +1) */
+} UErrorCode;
+
+/* Use the following to determine if an UErrorCode represents */
+/* operational success or failure. */
+
+#ifdef XP_CPLUSPLUS
+ /**
+ * Does the error code indicate success?
+ * @stable ICU 2.0
+ */
+ static
+ inline UBool U_SUCCESS(UErrorCode code) { return (UBool)(code<=U_ZERO_ERROR); }
+ /**
+ * Does the error code indicate a failure?
+ * @stable ICU 2.0
+ */
+ static
+ inline UBool U_FAILURE(UErrorCode code) { return (UBool)(code>U_ZERO_ERROR); }
+#else
+ /**
+ * Does the error code indicate success?
+ * @stable ICU 2.0
+ */
+# define U_SUCCESS(x) ((x)<=U_ZERO_ERROR)
+ /**
+ * Does the error code indicate a failure?
+ * @stable ICU 2.0
+ */
+# define U_FAILURE(x) ((x)>U_ZERO_ERROR)
+#endif
+
+/**
+ * Return a string for a UErrorCode value.
+ * The string will be the same as the name of the error code constant
+ * in the UErrorCode enum above.
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2
+u_errorName(UErrorCode code);
+
+
+#endif /* _UTYPES */
diff --git a/Source/WebCore/icu/unicode/uversion.h b/Source/WebCore/icu/unicode/uversion.h
new file mode 100644
index 0000000..f9a7755
--- /dev/null
+++ b/Source/WebCore/icu/unicode/uversion.h
@@ -0,0 +1,216 @@
+/*
+*******************************************************************************
+* Copyright (C) 2000-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*
+* file name: uversion.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* Created by: Vladimir Weinstein
+*
+* Contains all the important version numbers for ICU.
+* Gets included by utypes.h and Windows .rc files
+*/
+
+/*===========================================================================*/
+/* Main ICU version information */
+/*===========================================================================*/
+
+#ifndef UVERSION_H
+#define UVERSION_H
+
+/** IMPORTANT: When updating version, the following things need to be done: */
+/** source/common/unicode/uversion.h - this file: update major, minor, */
+/** patchlevel, suffix, version, short version constants, namespace, */
+/** and copyright */
+/** source/common/common.dsp - update 'Output file name' on the link tab so */
+/** that it contains the new major/minor combination */
+/** source/i18n/i18n.dsp - same as for the common.dsp */
+/** source/layout/layout.dsp - same as for the common.dsp */
+/** source/stubdata/stubdata.dsp - same as for the common.dsp */
+/** source/extra/ustdio/ustdio.dsp - same as for the common.dsp */
+/** source/data/makedata.mak - change U_ICUDATA_NAME so that it contains */
+/** the new major/minor combination */
+/** source/tools/genren/genren.pl - use this script according to the README */
+/** in that folder */
+
+#include "unicode/umachine.h"
+
+/** The standard copyright notice that gets compiled into each library.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.4
+ */
+#define U_COPYRIGHT_STRING \
+ " Copyright (C) 2004, International Business Machines Corporation and others. All Rights Reserved. "
+
+/** Maximum length of the copyright string.
+ * @stable ICU 2.4
+ */
+#define U_COPYRIGHT_STRING_LENGTH 128
+
+/** The current ICU major version as an integer.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.4
+ */
+#define U_ICU_VERSION_MAJOR_NUM 3
+
+/** The current ICU minor version as an integer.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.6
+ */
+#define U_ICU_VERSION_MINOR_NUM 2
+
+/** The current ICU patchlevel version as an integer.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.4
+ */
+#define U_ICU_VERSION_PATCHLEVEL_NUM 0
+
+/** Glued version suffix for renamers
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.6
+ */
+#define U_ICU_VERSION_SUFFIX _3_2
+
+/** The current ICU library version as a dotted-decimal string. The patchlevel
+ * only appears in this string if it non-zero.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.4
+ */
+#define U_ICU_VERSION "3.2"
+
+/** The current ICU library major/minor version as a string without dots, for library name suffixes.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.6
+ */
+#define U_ICU_VERSION_SHORT "32"
+
+/** An ICU version consists of up to 4 numbers from 0..255.
+ * @stable ICU 2.4
+ */
+#define U_MAX_VERSION_LENGTH 4
+
+/** In a string, ICU version fields are delimited by dots.
+ * @stable ICU 2.4
+ */
+#define U_VERSION_DELIMITER '.'
+
+/** The maximum length of an ICU version string.
+ * @stable ICU 2.4
+ */
+#define U_MAX_VERSION_STRING_LENGTH 20
+
+/** The binary form of a version on ICU APIs is an array of 4 uint8_t.
+ * @stable ICU 2.4
+ */
+typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
+
+#if U_HAVE_NAMESPACE && defined(XP_CPLUSPLUS)
+#if U_DISABLE_RENAMING
+#define U_ICU_NAMESPACE icu
+namespace U_ICU_NAMESPACE { }
+#else
+#define U_ICU_NAMESPACE icu_3_2
+namespace U_ICU_NAMESPACE { }
+namespace icu = U_ICU_NAMESPACE;
+#endif
+U_NAMESPACE_USE
+#endif
+
+
+/*===========================================================================*/
+/* General version helper functions. Definitions in putil.c */
+/*===========================================================================*/
+
+/**
+ * Parse a string with dotted-decimal version information and
+ * fill in a UVersionInfo structure with the result.
+ * Definition of this function lives in putil.c
+ *
+ * @param versionArray The destination structure for the version information.
+ * @param versionString A string with dotted-decimal version information,
+ * with up to four non-negative number fields with
+ * values of up to 255 each.
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+u_versionFromString(UVersionInfo versionArray, const char *versionString);
+
+/**
+ * Write a string with dotted-decimal version information according
+ * to the input UVersionInfo.
+ * Definition of this function lives in putil.c
+ *
+ * @param versionArray The version information to be written as a string.
+ * @param versionString A string buffer that will be filled in with
+ * a string corresponding to the numeric version
+ * information in versionArray.
+ * The buffer size must be at least U_MAX_VERSION_STRING_LENGTH.
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+u_versionToString(UVersionInfo versionArray, char *versionString);
+
+/**
+ * Gets the ICU release version. The version array stores the version information
+ * for ICU. For example, release "1.3.31.2" is then represented as 0x01031F02.
+ * Definition of this function lives in putil.c
+ *
+ * @param versionArray the version # information, the result will be filled in
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+u_getVersion(UVersionInfo versionArray);
+
+
+/*===========================================================================
+ * ICU collation framework version information
+ * Version info that can be obtained from a collator is affected by these
+ * numbers in a secret and magic way. Please use collator version as whole
+ *===========================================================================
+ */
+
+/** Collation runtime version (sort key generator, strcoll).
+ * If the version is different, sortkeys for the same string could be different
+ * version 2 was in ICU 1.8.1. changed is: compression intervals, French secondary
+ * compression, generating quad level always when strength is quad or more
+ * version 4 - ICU 2.2 - tracking UCA changes, ignore completely ignorables
+ * in contractions, ignore primary ignorables after shifted
+ * version 5 - ICU 2.8 - changed implicit generation code
+ * This value may change in the subsequent releases of ICU
+ * @stable ICU 2.4
+ */
+#define UCOL_RUNTIME_VERSION 5
+
+/** Builder code version. When this is different, same tailoring might result
+ * in assigning different collation elements to code points
+ * version 2 was in ICU 1.8.1. added support for prefixes, tweaked canonical
+ * closure. However, the tailorings should probably get same CEs assigned
+ * version 5 - ICU 2.2 - fixed some bugs, renamed some indirect values.
+ * version 6 - ICU 2.8 - fixed bug in builder that allowed 0xFF in primary values
+ * Backward compatible with the old rules.
+ * This value may change in the subsequent releases of ICU
+ * @stable ICU 2.4
+ */
+#define UCOL_BUILDER_VERSION 6
+
+/** *** Removed *** Instead we use the data we read from FractionalUCA.txt
+ * This is the version of FractionalUCA.txt tailoring rules
+ * Version 1 was in ICU 1.8.1. Version two contains canonical closure for
+ * supplementary code points
+ * Version 4 in ICU 2.2, following UCA=3.1.1d6, UCD=3.2.0
+ * This value may change in the subsequent releases of ICU
+ * @stable ICU 2.4
+ */
+/*#define UCOL_FRACTIONAL_UCA_VERSION 4*/
+
+/** This is the version of the tailorings
+ * This value may change in the subsequent releases of ICU
+ * @stable ICU 2.4
+ */
+#define UCOL_TAILORINGS_VERSION 1
+
+#endif