diff options
author | Upstream <upstream-import@none> | 1970-01-12 13:46:40 +0000 |
---|---|---|
committer | Upstream <upstream-import@none> | 1970-01-12 13:46:40 +0000 |
commit | d8543bb6618c17b12da906afa77d216f58cf4058 (patch) | |
tree | c58dc05ed86825bd0ef8d305d58c8205106b540f /JavaScriptGlue/icu/unicode | |
download | external_webkit-d8543bb6618c17b12da906afa77d216f58cf4058.zip external_webkit-d8543bb6618c17b12da906afa77d216f58cf4058.tar.gz external_webkit-d8543bb6618c17b12da906afa77d216f58cf4058.tar.bz2 |
external/webkit r30707
Diffstat (limited to 'JavaScriptGlue/icu/unicode')
-rw-r--r-- | JavaScriptGlue/icu/unicode/platform.h | 267 | ||||
-rw-r--r-- | JavaScriptGlue/icu/unicode/putil.h | 180 | ||||
-rw-r--r-- | JavaScriptGlue/icu/unicode/uchar.h | 2798 | ||||
-rw-r--r-- | JavaScriptGlue/icu/unicode/ucnv.h | 1817 | ||||
-rw-r--r-- | JavaScriptGlue/icu/unicode/ucnv_err.h | 456 | ||||
-rw-r--r-- | JavaScriptGlue/icu/unicode/uconfig.h | 186 | ||||
-rw-r--r-- | JavaScriptGlue/icu/unicode/uenum.h | 129 | ||||
-rw-r--r-- | JavaScriptGlue/icu/unicode/uiter.h | 707 | ||||
-rw-r--r-- | JavaScriptGlue/icu/unicode/umachine.h | 371 | ||||
-rw-r--r-- | JavaScriptGlue/icu/unicode/urename.h | 1468 | ||||
-rw-r--r-- | JavaScriptGlue/icu/unicode/ustring.h | 1320 | ||||
-rw-r--r-- | JavaScriptGlue/icu/unicode/utf.h | 221 | ||||
-rw-r--r-- | JavaScriptGlue/icu/unicode/utf16.h | 605 | ||||
-rw-r--r-- | JavaScriptGlue/icu/unicode/utf8.h | 627 | ||||
-rw-r--r-- | JavaScriptGlue/icu/unicode/utf_old.h | 0 | ||||
-rw-r--r-- | JavaScriptGlue/icu/unicode/utypes.h | 745 | ||||
-rw-r--r-- | JavaScriptGlue/icu/unicode/uversion.h | 216 |
17 files changed, 12113 insertions, 0 deletions
diff --git a/JavaScriptGlue/icu/unicode/platform.h b/JavaScriptGlue/icu/unicode/platform.h new file mode 100644 index 0000000..9595a26 --- /dev/null +++ b/JavaScriptGlue/icu/unicode/platform.h @@ -0,0 +1,267 @@ +/* +****************************************************************************** +* +* Copyright (C) 1997-2004, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* FILE NAME : platform.h +* +* Date Name Description +* 05/13/98 nos Creation (content moved here from ptypes.h). +* 03/02/99 stephen Added AS400 support. +* 03/30/99 stephen Added Linux support. +* 04/13/99 stephen Reworked for autoconf. +****************************************************************************** +*/ + +/* Define the platform we're on. */ +#ifndef U_DARWIN +#define U_DARWIN +#endif + +/* Define whether inttypes.h is available */ +#ifndef U_HAVE_INTTYPES_H +#define U_HAVE_INTTYPES_H 1 +#endif + +/* + * Define what support for C++ streams is available. + * If U_IOSTREAM_SOURCE is set to 199711, then <iostream> is available + * (1997711 is the date the ISO/IEC C++ FDIS was published), and then + * one should qualify streams using the std namespace in ICU header + * files. + * If U_IOSTREAM_SOURCE is set to 198506, then <iostream.h> is + * available instead (198506 is the date when Stroustrup published + * "An Extensible I/O Facility for C++" at the summer USENIX conference). + * If U_IOSTREAM_SOURCE is 0, then C++ streams are not available and + * support for them will be silently suppressed in ICU. + * + */ + +#ifndef U_IOSTREAM_SOURCE +#define U_IOSTREAM_SOURCE 199711 +#endif + +/* Determines whether specific types are available */ +#ifndef U_HAVE_INT8_T +#define U_HAVE_INT8_T 1 +#endif + +#ifndef U_HAVE_UINT8_T +#define U_HAVE_UINT8_T 0 +#endif + +#ifndef U_HAVE_INT16_T +#define U_HAVE_INT16_T 1 +#endif + +#ifndef U_HAVE_UINT16_T +#define U_HAVE_UINT16_T 0 +#endif + +#ifndef U_HAVE_INT32_T +#define U_HAVE_INT32_T 1 +#endif + +#ifndef U_HAVE_UINT32_T +#define U_HAVE_UINT32_T 0 +#endif + +#ifndef U_HAVE_INT64_T +#define U_HAVE_INT64_T 1 +#endif + +#ifndef U_HAVE_UINT64_T +#define U_HAVE_UINT64_T 0 +#endif + +/*===========================================================================*/ +/* Generic data types */ +/*===========================================================================*/ + +#include <sys/types.h> + +/* If your platform does not have the <inttypes.h> header, you may + need to edit the typedefs below. */ +#if U_HAVE_INTTYPES_H + +/* autoconf 2.13 sometimes can't properly find the data types in <inttypes.h> */ +/* os/390 needs <inttypes.h>, but it doesn't have int8_t, and it sometimes */ +/* doesn't have uint8_t depending on the OS version. */ +/* So we have this work around. */ +#ifdef OS390 +/* The features header is needed to get (u)int64_t sometimes. */ +#include <features.h> +#if ! U_HAVE_INT8_T +typedef signed char int8_t; +#endif +#if !defined(__uint8_t) +#define __uint8_t 1 +typedef unsigned char uint8_t; +#endif +#endif /* OS390 */ + +#include <inttypes.h> + +#else /* U_HAVE_INTTYPES_H */ + +#if ! U_HAVE_INT8_T +typedef signed char int8_t; +#endif + +#if ! U_HAVE_UINT8_T +typedef unsigned char uint8_t; +#endif + +#if ! U_HAVE_INT16_T +typedef signed short int16_t; +#endif + +#if ! U_HAVE_UINT16_T +typedef unsigned short uint16_t; +#endif + +#if ! U_HAVE_INT32_T +typedef signed int int32_t; +#endif + +#if ! U_HAVE_UINT32_T +typedef unsigned int uint32_t; +#endif + +#if ! U_HAVE_INT64_T + typedef signed long long int64_t; +/* else we may not have a 64-bit type */ +#endif + +#if ! U_HAVE_UINT64_T + typedef unsigned long long uint64_t; +/* else we may not have a 64-bit type */ +#endif + +#endif + +/*===========================================================================*/ +/* Compiler and environment features */ +/*===========================================================================*/ + +/* Define whether namespace is supported */ +#ifndef U_HAVE_NAMESPACE +#define U_HAVE_NAMESPACE 1 +#endif + +/* Determines the endianness of the platform + It's done this way in case multiple architectures are being built at once. + For example, Darwin supports fat binaries, which can be both PPC and x86 based. */ +#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) +#define U_IS_BIG_ENDIAN (BYTE_ORDER == BIG_ENDIAN) +#else +#define U_IS_BIG_ENDIAN 1 +#endif + +/* 1 or 0 to enable or disable threads. If undefined, default is: enable threads. */ +#define ICU_USE_THREADS 1 + +#ifndef U_DEBUG +#define U_DEBUG 0 +#endif + +#ifndef U_RELEASE +#define U_RELEASE 1 +#endif + +/* Determine whether to disable renaming or not. This overrides the + setting in umachine.h which is for all platforms. */ +#ifndef U_DISABLE_RENAMING +#define U_DISABLE_RENAMING 1 +#endif + +/* Determine whether to override new and delete. */ +#ifndef U_OVERRIDE_CXX_ALLOCATION +#define U_OVERRIDE_CXX_ALLOCATION 1 +#endif +/* Determine whether to override placement new and delete for STL. */ +#ifndef U_HAVE_PLACEMENT_NEW +#define U_HAVE_PLACEMENT_NEW 1 +#endif + +/* Determine whether to enable tracing. */ +#ifndef U_ENABLE_TRACING +#define U_ENABLE_TRACING 1 +#endif + +/* Define the library suffix in a C syntax. */ +#define U_HAVE_LIB_SUFFIX 0 +#define U_LIB_SUFFIX_C_NAME +#define U_LIB_SUFFIX_C_NAME_STRING "" + +/*===========================================================================*/ +/* Character data types */ +/*===========================================================================*/ + +#if defined(OS390) || defined(OS400) +# define U_CHARSET_FAMILY 1 +#endif + +/*===========================================================================*/ +/* Information about wchar support */ +/*===========================================================================*/ + +#define U_HAVE_WCHAR_H 1 +#define U_SIZEOF_WCHAR_T 4 + +#define U_HAVE_WCSCPY 1 + +/*===========================================================================*/ +/* Information about POSIX support */ +/*===========================================================================*/ + +#define U_HAVE_NL_LANGINFO 1 +#define U_HAVE_NL_LANGINFO_CODESET 1 +#define U_NL_LANGINFO_CODESET CODESET + +#if 1 +#define U_TZSET tzset +#endif +#if 0 +#define U_TIMEZONE +#endif +#if 1 +#define U_TZNAME tzname +#endif + +#define U_HAVE_MMAP 1 +#define U_HAVE_POPEN 1 + +/*===========================================================================*/ +/* Symbol import-export control */ +/*===========================================================================*/ + +#define U_EXPORT +/* U_CALLCONV is releated to U_EXPORT2 */ +#define U_EXPORT2 + +/* cygwin needs to export/import data */ +#ifdef U_CYGWIN +#define U_IMPORT __declspec(dllimport) +#else +#define U_IMPORT +#endif + +/*===========================================================================*/ +/* Code alignment and C function inlining */ +/*===========================================================================*/ + +#ifndef U_INLINE +#define U_INLINE inline +#endif + +#define U_ALIGN_CODE(n) + +/*===========================================================================*/ +/* Programs used by ICU code */ +/*===========================================================================*/ + +#define U_MAKE "/usr/bin/gnumake" diff --git a/JavaScriptGlue/icu/unicode/putil.h b/JavaScriptGlue/icu/unicode/putil.h new file mode 100644 index 0000000..685df53 --- /dev/null +++ b/JavaScriptGlue/icu/unicode/putil.h @@ -0,0 +1,180 @@ +/* +****************************************************************************** +* +* Copyright (C) 1997-2004, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* FILE NAME : putil.h +* +* Date Name Description +* 05/14/98 nos Creation (content moved here from utypes.h). +* 06/17/99 erm Added IEEE_754 +* 07/22/98 stephen Added IEEEremainder, max, min, trunc +* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity +* 08/24/98 stephen Added longBitsFromDouble +* 03/02/99 stephen Removed openFile(). Added AS400 support. +* 04/15/99 stephen Converted to C +* 11/15/99 helena Integrated S/390 changes for IEEE support. +* 01/11/00 helena Added u_getVersion. +****************************************************************************** +*/ + +#ifndef PUTIL_H +#define PUTIL_H + +#include "unicode/utypes.h" + +/* Define this to 1 if your platform supports IEEE 754 floating point, + to 0 if it does not. */ +#ifndef IEEE_754 +# define IEEE_754 1 +#endif + +/*==========================================================================*/ +/* Platform utilities */ +/*==========================================================================*/ + +/** + * Platform utilities isolates the platform dependencies of the + * libarary. For each platform which this code is ported to, these + * functions may have to be re-implemented. + */ + +/** + * Return the ICU data directory. + * The data directory is where common format ICU data files (.dat files) + * are loaded from. Note that normal use of the built-in ICU + * facilities does not require loading of an external data file; + * unless you are adding custom data to ICU, the data directory + * does not need to be set. + * + * The data directory is determined as follows: + * If u_setDataDirectory() has been called, that is it, otherwise + * if the ICU_DATA environment variable is set, use that, otherwise + * If a data directory was specifed at ICU build time + * (#define ICU_DATA_DIR "path"), use that, + * otherwise no data directory is available. + * + * @return the data directory, or an empty string ("") if no data directory has + * been specified. + * + * @stable ICU 2.0 + */ +U_STABLE const char* U_EXPORT2 u_getDataDirectory(void); + +/** + * Set the ICU data directory. + * The data directory is where common format ICU data files (.dat files) + * are loaded from. Note that normal use of the built-in ICU + * facilities does not require loading of an external data file; + * unless you are adding custom data to ICU, the data directory + * does not need to be set. + * + * This function should be called at most once in a process, before the + * first ICU operation (e.g., u_init()) that will require the loading of an + * ICU data file. + * This function is not thread-safe. Use it before calling ICU APIs from + * multiple threads. + * + * @param directory The directory to be set. + * + * @see u_init + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 u_setDataDirectory(const char *directory); + +/** + * Please use ucnv_getDefaultName() instead. + * Return the default codepage for this platform and locale. + * This function can call setlocale() on Unix platforms. Please read the + * platform documentation on setlocale() before calling this function. + * @return the default codepage for this platform + * @internal + */ +U_INTERNAL const char* U_EXPORT2 uprv_getDefaultCodepage(void); + +/** + * Please use uloc_getDefault() instead. + * Return the default locale ID string by querying ths system, or + * zero if one cannot be found. + * This function can call setlocale() on Unix platforms. Please read the + * platform documentation on setlocale() before calling this function. + * @return the default locale ID string + * @internal + */ +U_INTERNAL const char* U_EXPORT2 uprv_getDefaultLocaleID(void); + +/** + * Filesystem file and path separator characters. + * Example: '/' and ':' on Unix, '\\' and ';' on Windows. + * @stable ICU 2.0 + */ +#ifdef XP_MAC +# define U_FILE_SEP_CHAR ':' +# define U_FILE_ALT_SEP_CHAR ':' +# define U_PATH_SEP_CHAR ';' +# define U_FILE_SEP_STRING ":" +# define U_FILE_ALT_SEP_STRING ":" +# define U_PATH_SEP_STRING ";" +#elif defined(WIN32) || defined(OS2) +# define U_FILE_SEP_CHAR '\\' +# define U_FILE_ALT_SEP_CHAR '/' +# define U_PATH_SEP_CHAR ';' +# define U_FILE_SEP_STRING "\\" +# define U_FILE_ALT_SEP_STRING "/" +# define U_PATH_SEP_STRING ";" +#else +# define U_FILE_SEP_CHAR '/' +# define U_FILE_ALT_SEP_CHAR '/' +# define U_PATH_SEP_CHAR ':' +# define U_FILE_SEP_STRING "/" +# define U_FILE_ALT_SEP_STRING "/" +# define U_PATH_SEP_STRING ":" +#endif + +/** + * Convert char characters to UChar characters. + * This utility function is useful only for "invariant characters" + * that are encoded in the platform default encoding. + * They are a small, constant subset of the encoding and include + * just the latin letters, digits, and some punctuation. + * For details, see U_CHARSET_FAMILY. + * + * @param cs Input string, points to <code>length</code> + * character bytes from a subset of the platform encoding. + * @param us Output string, points to memory for <code>length</code> + * Unicode characters. + * @param length The number of characters to convert; this may + * include the terminating <code>NUL</code>. + * + * @see U_CHARSET_FAMILY + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +u_charsToUChars(const char *cs, UChar *us, int32_t length); + +/** + * Convert UChar characters to char characters. + * This utility function is useful only for "invariant characters" + * that can be encoded in the platform default encoding. + * They are a small, constant subset of the encoding and include + * just the latin letters, digits, and some punctuation. + * For details, see U_CHARSET_FAMILY. + * + * @param us Input string, points to <code>length</code> + * Unicode characters that can be encoded with the + * codepage-invariant subset of the platform encoding. + * @param cs Output string, points to memory for <code>length</code> + * character bytes. + * @param length The number of characters to convert; this may + * include the terminating <code>NUL</code>. + * + * @see U_CHARSET_FAMILY + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +u_UCharsToChars(const UChar *us, char *cs, int32_t length); + +#endif diff --git a/JavaScriptGlue/icu/unicode/uchar.h b/JavaScriptGlue/icu/unicode/uchar.h new file mode 100644 index 0000000..7fd490c --- /dev/null +++ b/JavaScriptGlue/icu/unicode/uchar.h @@ -0,0 +1,2798 @@ +/* +********************************************************************** +* Copyright (C) 1997-2004, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* File UCHAR.H +* +* Modification History: +* +* Date Name Description +* 04/02/97 aliu Creation. +* 03/29/99 helena Updated for C APIs. +* 4/15/99 Madhu Updated for C Implementation and Javadoc +* 5/20/99 Madhu Added the function u_getVersion() +* 8/19/1999 srl Upgraded scripts to Unicode 3.0 +* 8/27/1999 schererm UCharDirection constants: U_... +* 11/11/1999 weiv added u_isalnum(), cleaned comments +* 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion(). +****************************************************************************** +*/ + +#ifndef UCHAR_H +#define UCHAR_H + +#include "unicode/utypes.h" + +U_CDECL_BEGIN + +/*==========================================================================*/ +/* Unicode version number */ +/*==========================================================================*/ +/** + * Unicode version number, default for the current ICU version. + * The actual Unicode Character Database (UCD) data is stored in uprops.dat + * and may be generated from UCD files from a different Unicode version. + * Call u_getUnicodeVersion to get the actual Unicode version of the data. + * + * @see u_getUnicodeVersion + * @stable ICU 2.0 + */ +#define U_UNICODE_VERSION "4.0.1" + +/** + * \file + * \brief C API: Unicode Properties + * + * This C API provides low-level access to the Unicode Character Database. + * In addition to raw property values, some convenience functions calculate + * derived properties, for example for Java-style programming. + * + * Unicode assigns each code point (not just assigned character) values for + * many properties. + * Most of them are simple boolean flags, or constants from a small enumerated list. + * For some properties, values are strings or other relatively more complex types. + * + * For more information see + * "About the Unicode Character Database" (http://www.unicode.org/ucd/) + * and the ICU User Guide chapter on Properties (http://oss.software.ibm.com/icu/userguide/properties.html). + * + * Many functions are designed to match java.lang.Character functions. + * See the individual function documentation, + * and see the JDK 1.4.1 java.lang.Character documentation + * at http://java.sun.com/j2se/1.4.1/docs/api/java/lang/Character.html + * + * There are also functions that provide easy migration from C/POSIX functions + * like isblank(). Their use is generally discouraged because the C/POSIX + * standards do not define their semantics beyond the ASCII range, which means + * that different implementations exhibit very different behavior. + * Instead, Unicode properties should be used directly. + * + * There are also only a few, broad C/POSIX character classes, and they tend + * to be used for conflicting purposes. For example, the "isalpha()" class + * is sometimes used to determine word boundaries, while a more sophisticated + * approach would at least distinguish initial letters from continuation + * characters (the latter including combining marks). + * (In ICU, BreakIterator is the most sophisticated API for word boundaries.) + * Another example: There is no "istitle()" class for titlecase characters. + * + * A summary of the behavior of some C/POSIX character classification implementations + * for Unicode is available at http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/posix_classes.html + * + * <strong>Important</strong>: + * The behavior of the ICU C/POSIX-style character classification + * functions is subject to change according to discussion of the above summary. + * + * Note: There are several ICU whitespace functions. + * Comparison: + * - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; + * most of general categories "Z" (separators) + most whitespace ISO controls + * (including no-break spaces, but excluding IS1..IS4 and ZWSP) + * - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces + * - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces) + * - u_isspace: Z + whitespace ISO controls (including no-break spaces) + * - u_isblank: "horizontal spaces" = TAB + Zs - ZWSP + */ + +/** + * Constants. + */ + +/** The lowest Unicode code point value. Code points are non-negative. @stable ICU 2.0 */ +#define UCHAR_MIN_VALUE 0 + +/** + * The highest Unicode code point value (scalar value) according to + * The Unicode Standard. This is a 21-bit value (20.1 bits, rounded up). + * For a single character, UChar32 is a simple type that can hold any code point value. + * + * @see UChar32 + * @stable ICU 2.0 + */ +#define UCHAR_MAX_VALUE 0x10ffff + +/** + * Get a single-bit bit set (a flag) from a bit number 0..31. + * @stable ICU 2.1 + */ +#define U_MASK(x) ((uint32_t)1<<(x)) + +/* + * !! Note: Several comments in this file are machine-read by the + * genpname tool. These comments describe the correspondence between + * icu enum constants and UCD entities. Do not delete them. Update + * these comments as needed. + * + * Any comment of the form "/ *[name]* /" (spaces added) is such + * a comment. + * + * The U_JG_* and U_GC_*_MASK constants are matched by their symbolic + * name, which must match PropertyValueAliases.txt. + */ + +/** + * Selection constants for Unicode properties. + * These constants are used in functions like u_hasBinaryProperty to select + * one of the Unicode properties. + * + * The properties APIs are intended to reflect Unicode properties as defined + * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). + * For details about the properties see http://www.unicode.org/ucd/ . + * For names of Unicode properties see the UCD file PropertyAliases.txt. + * + * Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2, + * then properties marked with "new in Unicode 3.2" are not or not fully available. + * Check u_getUnicodeVersion to be sure. + * + * @see u_hasBinaryProperty + * @see u_getIntPropertyValue + * @see u_getUnicodeVersion + * @stable ICU 2.1 + */ +typedef enum UProperty { + /* See note !!. Comments of the form "Binary property Dash", + "Enumerated property Script", "Double property Numeric_Value", + and "String property Age" are read by genpname. */ + + /* Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that + debuggers display UCHAR_ALPHABETIC as the symbolic name for 0, + rather than UCHAR_BINARY_START. Likewise for other *_START + identifiers. */ + + /** Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha. + Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic @stable ICU 2.1 */ + UCHAR_ALPHABETIC=0, + /** First constant for binary Unicode properties. @stable ICU 2.1 */ + UCHAR_BINARY_START=UCHAR_ALPHABETIC, + /** Binary property ASCII_Hex_Digit. 0-9 A-F a-f @stable ICU 2.1 */ + UCHAR_ASCII_HEX_DIGIT, + /** Binary property Bidi_Control. + Format controls which have specific functions + in the Bidi Algorithm. @stable ICU 2.1 */ + UCHAR_BIDI_CONTROL, + /** Binary property Bidi_Mirrored. + Characters that may change display in RTL text. + Same as u_isMirrored. + See Bidi Algorithm, UTR 9. @stable ICU 2.1 */ + UCHAR_BIDI_MIRRORED, + /** Binary property Dash. Variations of dashes. @stable ICU 2.1 */ + UCHAR_DASH, + /** Binary property Default_Ignorable_Code_Point (new in Unicode 3.2). + Ignorable in most processing. + <2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space) @stable ICU 2.1 */ + UCHAR_DEFAULT_IGNORABLE_CODE_POINT, + /** Binary property Deprecated (new in Unicode 3.2). + The usage of deprecated characters is strongly discouraged. @stable ICU 2.1 */ + UCHAR_DEPRECATED, + /** Binary property Diacritic. Characters that linguistically modify + the meaning of another character to which they apply. @stable ICU 2.1 */ + UCHAR_DIACRITIC, + /** Binary property Extender. + Extend the value or shape of a preceding alphabetic character, + e.g., length and iteration marks. @stable ICU 2.1 */ + UCHAR_EXTENDER, + /** Binary property Full_Composition_Exclusion. + CompositionExclusions.txt+Singleton Decompositions+ + Non-Starter Decompositions. @stable ICU 2.1 */ + UCHAR_FULL_COMPOSITION_EXCLUSION, + /** Binary property Grapheme_Base (new in Unicode 3.2). + For programmatic determination of grapheme cluster boundaries. + [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ @stable ICU 2.1 */ + UCHAR_GRAPHEME_BASE, + /** Binary property Grapheme_Extend (new in Unicode 3.2). + For programmatic determination of grapheme cluster boundaries. + Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ @stable ICU 2.1 */ + UCHAR_GRAPHEME_EXTEND, + /** Binary property Grapheme_Link (new in Unicode 3.2). + For programmatic determination of grapheme cluster boundaries. @stable ICU 2.1 */ + UCHAR_GRAPHEME_LINK, + /** Binary property Hex_Digit. + Characters commonly used for hexadecimal numbers. @stable ICU 2.1 */ + UCHAR_HEX_DIGIT, + /** Binary property Hyphen. Dashes used to mark connections + between pieces of words, plus the Katakana middle dot. @stable ICU 2.1 */ + UCHAR_HYPHEN, + /** Binary property ID_Continue. + Characters that can continue an identifier. + DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out." + ID_Start+Mn+Mc+Nd+Pc @stable ICU 2.1 */ + UCHAR_ID_CONTINUE, + /** Binary property ID_Start. + Characters that can start an identifier. + Lu+Ll+Lt+Lm+Lo+Nl @stable ICU 2.1 */ + UCHAR_ID_START, + /** Binary property Ideographic. + CJKV ideographs. @stable ICU 2.1 */ + UCHAR_IDEOGRAPHIC, + /** Binary property IDS_Binary_Operator (new in Unicode 3.2). + For programmatic determination of + Ideographic Description Sequences. @stable ICU 2.1 */ + UCHAR_IDS_BINARY_OPERATOR, + /** Binary property IDS_Trinary_Operator (new in Unicode 3.2). + For programmatic determination of + Ideographic Description Sequences. @stable ICU 2.1 */ + UCHAR_IDS_TRINARY_OPERATOR, + /** Binary property Join_Control. + Format controls for cursive joining and ligation. @stable ICU 2.1 */ + UCHAR_JOIN_CONTROL, + /** Binary property Logical_Order_Exception (new in Unicode 3.2). + Characters that do not use logical order and + require special handling in most processing. @stable ICU 2.1 */ + UCHAR_LOGICAL_ORDER_EXCEPTION, + /** Binary property Lowercase. Same as u_isULowercase, different from u_islower. + Ll+Other_Lowercase @stable ICU 2.1 */ + UCHAR_LOWERCASE, + /** Binary property Math. Sm+Other_Math @stable ICU 2.1 */ + UCHAR_MATH, + /** Binary property Noncharacter_Code_Point. + Code points that are explicitly defined as illegal + for the encoding of characters. @stable ICU 2.1 */ + UCHAR_NONCHARACTER_CODE_POINT, + /** Binary property Quotation_Mark. @stable ICU 2.1 */ + UCHAR_QUOTATION_MARK, + /** Binary property Radical (new in Unicode 3.2). + For programmatic determination of + Ideographic Description Sequences. @stable ICU 2.1 */ + UCHAR_RADICAL, + /** Binary property Soft_Dotted (new in Unicode 3.2). + Characters with a "soft dot", like i or j. + An accent placed on these characters causes + the dot to disappear. @stable ICU 2.1 */ + UCHAR_SOFT_DOTTED, + /** Binary property Terminal_Punctuation. + Punctuation characters that generally mark + the end of textual units. @stable ICU 2.1 */ + UCHAR_TERMINAL_PUNCTUATION, + /** Binary property Unified_Ideograph (new in Unicode 3.2). + For programmatic determination of + Ideographic Description Sequences. @stable ICU 2.1 */ + UCHAR_UNIFIED_IDEOGRAPH, + /** Binary property Uppercase. Same as u_isUUppercase, different from u_isupper. + Lu+Other_Uppercase @stable ICU 2.1 */ + UCHAR_UPPERCASE, + /** Binary property White_Space. + Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace. + Space characters+TAB+CR+LF-ZWSP-ZWNBSP @stable ICU 2.1 */ + UCHAR_WHITE_SPACE, + /** Binary property XID_Continue. + ID_Continue modified to allow closure under + normalization forms NFKC and NFKD. @stable ICU 2.1 */ + UCHAR_XID_CONTINUE, + /** Binary property XID_Start. ID_Start modified to allow + closure under normalization forms NFKC and NFKD. @stable ICU 2.1 */ + UCHAR_XID_START, + /** Binary property Case_Sensitive. Either the source of a case + mapping or _in_ the target of a case mapping. Not the same as + the general category Cased_Letter. @stable ICU 2.6 */ + UCHAR_CASE_SENSITIVE, + /** Binary property STerm (new in Unicode 4.0.1). + Sentence Terminal. Used in UAX #29: Text Boundaries + (http://www.unicode.org/reports/tr29/) + @draft ICU 3.0 */ + UCHAR_S_TERM, + /** Binary property Variation_Selector (new in Unicode 4.0.1). + Indicates all those characters that qualify as Variation Selectors. + For details on the behavior of these characters, + see StandardizedVariants.html and 15.6 Variation Selectors. + @draft ICU 3.0 */ + UCHAR_VARIATION_SELECTOR, + /** Binary property NFD_Inert. + ICU-specific property for characters that are inert under NFD, + i.e., they do not interact with adjacent characters. + Used for example in normalizing transforms in incremental mode + to find the boundary of safely normalizable text despite possible + text additions. + + There is one such property per normalization form. + These properties are computed as follows - an inert character is: + a) unassigned, or ALL of the following: + b) of combining class 0. + c) not decomposed by this normalization form. + AND if NFC or NFKC, + d) can never compose with a previous character. + e) can never compose with a following character. + f) can never change if another character is added. + Example: a-breve might satisfy all but f, but if you + add an ogonek it changes to a-ogonek + breve + + See also com.ibm.text.UCD.NFSkippable in the ICU4J repository, + and icu/source/common/unormimp.h . + @draft ICU 3.0 */ + UCHAR_NFD_INERT, + /** Binary property NFKD_Inert. + ICU-specific property for characters that are inert under NFKD, + i.e., they do not interact with adjacent characters. + Used for example in normalizing transforms in incremental mode + to find the boundary of safely normalizable text despite possible + text additions. + @see UCHAR_NFD_INERT + @draft ICU 3.0 */ + UCHAR_NFKD_INERT, + /** Binary property NFC_Inert. + ICU-specific property for characters that are inert under NFC, + i.e., they do not interact with adjacent characters. + Used for example in normalizing transforms in incremental mode + to find the boundary of safely normalizable text despite possible + text additions. + @see UCHAR_NFD_INERT + @draft ICU 3.0 */ + UCHAR_NFC_INERT, + /** Binary property NFKC_Inert. + ICU-specific property for characters that are inert under NFKC, + i.e., they do not interact with adjacent characters. + Used for example in normalizing transforms in incremental mode + to find the boundary of safely normalizable text despite possible + text additions. + @see UCHAR_NFD_INERT + @draft ICU 3.0 */ + UCHAR_NFKC_INERT, + /** Binary Property Segment_Starter. + ICU-specific property for characters that are starters in terms of + Unicode normalization and combining character sequences. + They have ccc=0 and do not occur in non-initial position of the + canonical decomposition of any character + (like " in NFD(a-umlaut) and a Jamo T in an NFD(Hangul LVT)). + ICU uses this property for segmenting a string for generating a set of + canonically equivalent strings, e.g. for canonical closure while + processing collation tailoring rules. + @draft ICU 3.0 */ + UCHAR_SEGMENT_STARTER, + /** One more than the last constant for binary Unicode properties. @stable ICU 2.1 */ + UCHAR_BINARY_LIMIT, + + /** Enumerated property Bidi_Class. + Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */ + UCHAR_BIDI_CLASS=0x1000, + /** First constant for enumerated/integer Unicode properties. @stable ICU 2.2 */ + UCHAR_INT_START=UCHAR_BIDI_CLASS, + /** Enumerated property Block. + Same as ublock_getCode, returns UBlockCode values. @stable ICU 2.2 */ + UCHAR_BLOCK, + /** Enumerated property Canonical_Combining_Class. + Same as u_getCombiningClass, returns 8-bit numeric values. @stable ICU 2.2 */ + UCHAR_CANONICAL_COMBINING_CLASS, + /** Enumerated property Decomposition_Type. + Returns UDecompositionType values. @stable ICU 2.2 */ + UCHAR_DECOMPOSITION_TYPE, + /** Enumerated property East_Asian_Width. + See http://www.unicode.org/reports/tr11/ + Returns UEastAsianWidth values. @stable ICU 2.2 */ + UCHAR_EAST_ASIAN_WIDTH, + /** Enumerated property General_Category. + Same as u_charType, returns UCharCategory values. @stable ICU 2.2 */ + UCHAR_GENERAL_CATEGORY, + /** Enumerated property Joining_Group. + Returns UJoiningGroup values. @stable ICU 2.2 */ + UCHAR_JOINING_GROUP, + /** Enumerated property Joining_Type. + Returns UJoiningType values. @stable ICU 2.2 */ + UCHAR_JOINING_TYPE, + /** Enumerated property Line_Break. + Returns ULineBreak values. @stable ICU 2.2 */ + UCHAR_LINE_BREAK, + /** Enumerated property Numeric_Type. + Returns UNumericType values. @stable ICU 2.2 */ + UCHAR_NUMERIC_TYPE, + /** Enumerated property Script. + Same as uscript_getScript, returns UScriptCode values. @stable ICU 2.2 */ + UCHAR_SCRIPT, + /** Enumerated property Hangul_Syllable_Type, new in Unicode 4. + Returns UHangulSyllableType values. @stable ICU 2.6 */ + UCHAR_HANGUL_SYLLABLE_TYPE, + /** Enumerated property NFD_Quick_Check. + Returns UNormalizationCheckResult values. @draft ICU 3.0 */ + UCHAR_NFD_QUICK_CHECK, + /** Enumerated property NFKD_Quick_Check. + Returns UNormalizationCheckResult values. @draft ICU 3.0 */ + UCHAR_NFKD_QUICK_CHECK, + /** Enumerated property NFC_Quick_Check. + Returns UNormalizationCheckResult values. @draft ICU 3.0 */ + UCHAR_NFC_QUICK_CHECK, + /** Enumerated property NFKC_Quick_Check. + Returns UNormalizationCheckResult values. @draft ICU 3.0 */ + UCHAR_NFKC_QUICK_CHECK, + /** Enumerated property Lead_Canonical_Combining_Class. + ICU-specific property for the ccc of the first code point + of the decomposition, or lccc(c)=ccc(NFD(c)[0]). + Useful for checking for canonically ordered text; + see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . + Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @draft ICU 3.0 */ + UCHAR_LEAD_CANONICAL_COMBINING_CLASS, + /** Enumerated property Trail_Canonical_Combining_Class. + ICU-specific property for the ccc of the last code point + of the decomposition, or tccc(c)=ccc(NFD(c)[last]). + Useful for checking for canonically ordered text; + see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . + Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @draft ICU 3.0 */ + UCHAR_TRAIL_CANONICAL_COMBINING_CLASS, + /** One more than the last constant for enumerated/integer Unicode properties. @stable ICU 2.2 */ + UCHAR_INT_LIMIT, + + /** Bitmask property General_Category_Mask. + This is the General_Category property returned as a bit mask. + When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)), + returns bit masks for UCharCategory values where exactly one bit is set. + When used with u_getPropertyValueName() and u_getPropertyValueEnum(), + a multi-bit mask is used for sets of categories like "Letters". + Mask values should be cast to uint32_t. + @stable ICU 2.4 */ + UCHAR_GENERAL_CATEGORY_MASK=0x2000, + /** First constant for bit-mask Unicode properties. @stable ICU 2.4 */ + UCHAR_MASK_START=UCHAR_GENERAL_CATEGORY_MASK, + /** One more than the last constant for bit-mask Unicode properties. @stable ICU 2.4 */ + UCHAR_MASK_LIMIT, + + /** Double property Numeric_Value. + Corresponds to u_getNumericValue. @stable ICU 2.4 */ + UCHAR_NUMERIC_VALUE=0x3000, + /** First constant for double Unicode properties. @stable ICU 2.4 */ + UCHAR_DOUBLE_START=UCHAR_NUMERIC_VALUE, + /** One more than the last constant for double Unicode properties. @stable ICU 2.4 */ + UCHAR_DOUBLE_LIMIT, + + /** String property Age. + Corresponds to u_charAge. @stable ICU 2.4 */ + UCHAR_AGE=0x4000, + /** First constant for string Unicode properties. @stable ICU 2.4 */ + UCHAR_STRING_START=UCHAR_AGE, + /** String property Bidi_Mirroring_Glyph. + Corresponds to u_charMirror. @stable ICU 2.4 */ + UCHAR_BIDI_MIRRORING_GLYPH, + /** String property Case_Folding. + Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */ + UCHAR_CASE_FOLDING, + /** String property ISO_Comment. + Corresponds to u_getISOComment. @stable ICU 2.4 */ + UCHAR_ISO_COMMENT, + /** String property Lowercase_Mapping. + Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */ + UCHAR_LOWERCASE_MAPPING, + /** String property Name. + Corresponds to u_charName. @stable ICU 2.4 */ + UCHAR_NAME, + /** String property Simple_Case_Folding. + Corresponds to u_foldCase. @stable ICU 2.4 */ + UCHAR_SIMPLE_CASE_FOLDING, + /** String property Simple_Lowercase_Mapping. + Corresponds to u_tolower. @stable ICU 2.4 */ + UCHAR_SIMPLE_LOWERCASE_MAPPING, + /** String property Simple_Titlecase_Mapping. + Corresponds to u_totitle. @stable ICU 2.4 */ + UCHAR_SIMPLE_TITLECASE_MAPPING, + /** String property Simple_Uppercase_Mapping. + Corresponds to u_toupper. @stable ICU 2.4 */ + UCHAR_SIMPLE_UPPERCASE_MAPPING, + /** String property Titlecase_Mapping. + Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */ + UCHAR_TITLECASE_MAPPING, + /** String property Unicode_1_Name. + Corresponds to u_charName. @stable ICU 2.4 */ + UCHAR_UNICODE_1_NAME, + /** String property Uppercase_Mapping. + Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */ + UCHAR_UPPERCASE_MAPPING, + /** One more than the last constant for string Unicode properties. @stable ICU 2.4 */ + UCHAR_STRING_LIMIT, + + /** Represents a nonexistent or invalid property or property value. @stable ICU 2.4 */ + UCHAR_INVALID_CODE = -1 +} UProperty; + +/** + * Data for enumerated Unicode general category types. + * See http://www.unicode.org/Public/UNIDATA/UnicodeData.html . + * @stable ICU 2.0 + */ +typedef enum UCharCategory +{ + /** See note !!. Comments of the form "Cn" are read by genpname. */ + + /** Non-category for unassigned and non-character code points. @stable ICU 2.0 */ + U_UNASSIGNED = 0, + /** Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) @stable ICU 2.0 */ + U_GENERAL_OTHER_TYPES = 0, + /** Lu @stable ICU 2.0 */ + U_UPPERCASE_LETTER = 1, + /** Ll @stable ICU 2.0 */ + U_LOWERCASE_LETTER = 2, + /** Lt @stable ICU 2.0 */ + U_TITLECASE_LETTER = 3, + /** Lm @stable ICU 2.0 */ + U_MODIFIER_LETTER = 4, + /** Lo @stable ICU 2.0 */ + U_OTHER_LETTER = 5, + /** Mn @stable ICU 2.0 */ + U_NON_SPACING_MARK = 6, + /** Me @stable ICU 2.0 */ + U_ENCLOSING_MARK = 7, + /** Mc @stable ICU 2.0 */ + U_COMBINING_SPACING_MARK = 8, + /** Nd @stable ICU 2.0 */ + U_DECIMAL_DIGIT_NUMBER = 9, + /** Nl @stable ICU 2.0 */ + U_LETTER_NUMBER = 10, + /** No @stable ICU 2.0 */ + U_OTHER_NUMBER = 11, + /** Zs @stable ICU 2.0 */ + U_SPACE_SEPARATOR = 12, + /** Zl @stable ICU 2.0 */ + U_LINE_SEPARATOR = 13, + /** Zp @stable ICU 2.0 */ + U_PARAGRAPH_SEPARATOR = 14, + /** Cc @stable ICU 2.0 */ + U_CONTROL_CHAR = 15, + /** Cf @stable ICU 2.0 */ + U_FORMAT_CHAR = 16, + /** Co @stable ICU 2.0 */ + U_PRIVATE_USE_CHAR = 17, + /** Cs @stable ICU 2.0 */ + U_SURROGATE = 18, + /** Pd @stable ICU 2.0 */ + U_DASH_PUNCTUATION = 19, + /** Ps @stable ICU 2.0 */ + U_START_PUNCTUATION = 20, + /** Pe @stable ICU 2.0 */ + U_END_PUNCTUATION = 21, + /** Pc @stable ICU 2.0 */ + U_CONNECTOR_PUNCTUATION = 22, + /** Po @stable ICU 2.0 */ + U_OTHER_PUNCTUATION = 23, + /** Sm @stable ICU 2.0 */ + U_MATH_SYMBOL = 24, + /** Sc @stable ICU 2.0 */ + U_CURRENCY_SYMBOL = 25, + /** Sk @stable ICU 2.0 */ + U_MODIFIER_SYMBOL = 26, + /** So @stable ICU 2.0 */ + U_OTHER_SYMBOL = 27, + /** Pi @stable ICU 2.0 */ + U_INITIAL_PUNCTUATION = 28, + /** Pf @stable ICU 2.0 */ + U_FINAL_PUNCTUATION = 29, + /** One higher than the last enum UCharCategory constant. @stable ICU 2.0 */ + U_CHAR_CATEGORY_COUNT +} UCharCategory; + +/** + * U_GC_XX_MASK constants are bit flags corresponding to Unicode + * general category values. + * For each category, the nth bit is set if the numeric value of the + * corresponding UCharCategory constant is n. + * + * There are also some U_GC_Y_MASK constants for groups of general categories + * like L for all letter categories. + * + * @see u_charType + * @see U_GET_GC_MASK + * @see UCharCategory + * @stable ICU 2.1 + */ +#define U_GC_CN_MASK U_MASK(U_GENERAL_OTHER_TYPES) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_LU_MASK U_MASK(U_UPPERCASE_LETTER) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_LL_MASK U_MASK(U_LOWERCASE_LETTER) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_LT_MASK U_MASK(U_TITLECASE_LETTER) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_LM_MASK U_MASK(U_MODIFIER_LETTER) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_LO_MASK U_MASK(U_OTHER_LETTER) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_MN_MASK U_MASK(U_NON_SPACING_MARK) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_ME_MASK U_MASK(U_ENCLOSING_MARK) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_MC_MASK U_MASK(U_COMBINING_SPACING_MARK) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_ND_MASK U_MASK(U_DECIMAL_DIGIT_NUMBER) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_NL_MASK U_MASK(U_LETTER_NUMBER) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_NO_MASK U_MASK(U_OTHER_NUMBER) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_ZS_MASK U_MASK(U_SPACE_SEPARATOR) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_ZL_MASK U_MASK(U_LINE_SEPARATOR) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_ZP_MASK U_MASK(U_PARAGRAPH_SEPARATOR) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_CC_MASK U_MASK(U_CONTROL_CHAR) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_CF_MASK U_MASK(U_FORMAT_CHAR) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_CO_MASK U_MASK(U_PRIVATE_USE_CHAR) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_CS_MASK U_MASK(U_SURROGATE) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PD_MASK U_MASK(U_DASH_PUNCTUATION) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PS_MASK U_MASK(U_START_PUNCTUATION) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PE_MASK U_MASK(U_END_PUNCTUATION) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PC_MASK U_MASK(U_CONNECTOR_PUNCTUATION) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PO_MASK U_MASK(U_OTHER_PUNCTUATION) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_SM_MASK U_MASK(U_MATH_SYMBOL) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_SC_MASK U_MASK(U_CURRENCY_SYMBOL) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_SK_MASK U_MASK(U_MODIFIER_SYMBOL) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_SO_MASK U_MASK(U_OTHER_SYMBOL) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PI_MASK U_MASK(U_INITIAL_PUNCTUATION) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PF_MASK U_MASK(U_FINAL_PUNCTUATION) + + +/** Mask constant for multiple UCharCategory bits (L Letters). @stable ICU 2.1 */ +#define U_GC_L_MASK \ + (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK) + +/** Mask constant for multiple UCharCategory bits (LC Cased Letters). @stable ICU 2.1 */ +#define U_GC_LC_MASK \ + (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK) + +/** Mask constant for multiple UCharCategory bits (M Marks). @stable ICU 2.1 */ +#define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK) + +/** Mask constant for multiple UCharCategory bits (N Numbers). @stable ICU 2.1 */ +#define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK) + +/** Mask constant for multiple UCharCategory bits (Z Separators). @stable ICU 2.1 */ +#define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK) + +/** Mask constant for multiple UCharCategory bits (C Others). @stable ICU 2.1 */ +#define U_GC_C_MASK \ + (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK) + +/** Mask constant for multiple UCharCategory bits (P Punctuation). @stable ICU 2.1 */ +#define U_GC_P_MASK \ + (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \ + U_GC_PI_MASK|U_GC_PF_MASK) + +/** Mask constant for multiple UCharCategory bits (S Symbols). @stable ICU 2.1 */ +#define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK) + +/** + * This specifies the language directional property of a character set. + * @stable ICU 2.0 + */ +typedef enum UCharDirection { + /** See note !!. Comments of the form "EN" are read by genpname. */ + + /** L @stable ICU 2.0 */ + U_LEFT_TO_RIGHT = 0, + /** R @stable ICU 2.0 */ + U_RIGHT_TO_LEFT = 1, + /** EN @stable ICU 2.0 */ + U_EUROPEAN_NUMBER = 2, + /** ES @stable ICU 2.0 */ + U_EUROPEAN_NUMBER_SEPARATOR = 3, + /** ET @stable ICU 2.0 */ + U_EUROPEAN_NUMBER_TERMINATOR = 4, + /** AN @stable ICU 2.0 */ + U_ARABIC_NUMBER = 5, + /** CS @stable ICU 2.0 */ + U_COMMON_NUMBER_SEPARATOR = 6, + /** B @stable ICU 2.0 */ + U_BLOCK_SEPARATOR = 7, + /** S @stable ICU 2.0 */ + U_SEGMENT_SEPARATOR = 8, + /** WS @stable ICU 2.0 */ + U_WHITE_SPACE_NEUTRAL = 9, + /** ON @stable ICU 2.0 */ + U_OTHER_NEUTRAL = 10, + /** LRE @stable ICU 2.0 */ + U_LEFT_TO_RIGHT_EMBEDDING = 11, + /** LRO @stable ICU 2.0 */ + U_LEFT_TO_RIGHT_OVERRIDE = 12, + /** AL @stable ICU 2.0 */ + U_RIGHT_TO_LEFT_ARABIC = 13, + /** RLE @stable ICU 2.0 */ + U_RIGHT_TO_LEFT_EMBEDDING = 14, + /** RLO @stable ICU 2.0 */ + U_RIGHT_TO_LEFT_OVERRIDE = 15, + /** PDF @stable ICU 2.0 */ + U_POP_DIRECTIONAL_FORMAT = 16, + /** NSM @stable ICU 2.0 */ + U_DIR_NON_SPACING_MARK = 17, + /** BN @stable ICU 2.0 */ + U_BOUNDARY_NEUTRAL = 18, + /** @stable ICU 2.0 */ + U_CHAR_DIRECTION_COUNT +} UCharDirection; + +/** + * Constants for Unicode blocks, see the Unicode Data file Blocks.txt + * @stable ICU 2.0 + */ +enum UBlockCode { + + /** New No_Block value in Unicode 4. @stable ICU 2.6 */ + UBLOCK_NO_BLOCK = 0, /*[none]*/ /* Special range indicating No_Block */ + + /** @stable ICU 2.0 */ + UBLOCK_BASIC_LATIN = 1, /*[0000]*/ /*See note !!*/ + + /** @stable ICU 2.0 */ + UBLOCK_LATIN_1_SUPPLEMENT=2, /*[0080]*/ + + /** @stable ICU 2.0 */ + UBLOCK_LATIN_EXTENDED_A =3, /*[0100]*/ + + /** @stable ICU 2.0 */ + UBLOCK_LATIN_EXTENDED_B =4, /*[0180]*/ + + /** @stable ICU 2.0 */ + UBLOCK_IPA_EXTENSIONS =5, /*[0250]*/ + + /** @stable ICU 2.0 */ + UBLOCK_SPACING_MODIFIER_LETTERS =6, /*[02B0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_COMBINING_DIACRITICAL_MARKS =7, /*[0300]*/ + + /** + * Unicode 3.2 renames this block to "Greek and Coptic". + * @stable ICU 2.0 + */ + UBLOCK_GREEK =8, /*[0370]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CYRILLIC =9, /*[0400]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ARMENIAN =10, /*[0530]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HEBREW =11, /*[0590]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ARABIC =12, /*[0600]*/ + + /** @stable ICU 2.0 */ + UBLOCK_SYRIAC =13, /*[0700]*/ + + /** @stable ICU 2.0 */ + UBLOCK_THAANA =14, /*[0780]*/ + + /** @stable ICU 2.0 */ + UBLOCK_DEVANAGARI =15, /*[0900]*/ + + /** @stable ICU 2.0 */ + UBLOCK_BENGALI =16, /*[0980]*/ + + /** @stable ICU 2.0 */ + UBLOCK_GURMUKHI =17, /*[0A00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_GUJARATI =18, /*[0A80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ORIYA =19, /*[0B00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_TAMIL =20, /*[0B80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_TELUGU =21, /*[0C00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_KANNADA =22, /*[0C80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_MALAYALAM =23, /*[0D00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_SINHALA =24, /*[0D80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_THAI =25, /*[0E00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_LAO =26, /*[0E80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_TIBETAN =27, /*[0F00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_MYANMAR =28, /*[1000]*/ + + /** @stable ICU 2.0 */ + UBLOCK_GEORGIAN =29, /*[10A0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HANGUL_JAMO =30, /*[1100]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ETHIOPIC =31, /*[1200]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CHEROKEE =32, /*[13A0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33, /*[1400]*/ + + /** @stable ICU 2.0 */ + UBLOCK_OGHAM =34, /*[1680]*/ + + /** @stable ICU 2.0 */ + UBLOCK_RUNIC =35, /*[16A0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_KHMER =36, /*[1780]*/ + + /** @stable ICU 2.0 */ + UBLOCK_MONGOLIAN =37, /*[1800]*/ + + /** @stable ICU 2.0 */ + UBLOCK_LATIN_EXTENDED_ADDITIONAL =38, /*[1E00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_GREEK_EXTENDED =39, /*[1F00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_GENERAL_PUNCTUATION =40, /*[2000]*/ + + /** @stable ICU 2.0 */ + UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41, /*[2070]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CURRENCY_SYMBOLS =42, /*[20A0]*/ + + /** + * Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols". + * @stable ICU 2.0 + */ + UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43, /*[20D0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_LETTERLIKE_SYMBOLS =44, /*[2100]*/ + + /** @stable ICU 2.0 */ + UBLOCK_NUMBER_FORMS =45, /*[2150]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ARROWS =46, /*[2190]*/ + + /** @stable ICU 2.0 */ + UBLOCK_MATHEMATICAL_OPERATORS =47, /*[2200]*/ + + /** @stable ICU 2.0 */ + UBLOCK_MISCELLANEOUS_TECHNICAL =48, /*[2300]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CONTROL_PICTURES =49, /*[2400]*/ + + /** @stable ICU 2.0 */ + UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50, /*[2440]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ENCLOSED_ALPHANUMERICS =51, /*[2460]*/ + + /** @stable ICU 2.0 */ + UBLOCK_BOX_DRAWING =52, /*[2500]*/ + + /** @stable ICU 2.0 */ + UBLOCK_BLOCK_ELEMENTS =53, /*[2580]*/ + + /** @stable ICU 2.0 */ + UBLOCK_GEOMETRIC_SHAPES =54, /*[25A0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_MISCELLANEOUS_SYMBOLS =55, /*[2600]*/ + + /** @stable ICU 2.0 */ + UBLOCK_DINGBATS =56, /*[2700]*/ + + /** @stable ICU 2.0 */ + UBLOCK_BRAILLE_PATTERNS =57, /*[2800]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CJK_RADICALS_SUPPLEMENT =58, /*[2E80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_KANGXI_RADICALS =59, /*[2F00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60, /*[2FF0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61, /*[3000]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HIRAGANA =62, /*[3040]*/ + + /** @stable ICU 2.0 */ + UBLOCK_KATAKANA =63, /*[30A0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_BOPOMOFO =64, /*[3100]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HANGUL_COMPATIBILITY_JAMO =65, /*[3130]*/ + + /** @stable ICU 2.0 */ + UBLOCK_KANBUN =66, /*[3190]*/ + + /** @stable ICU 2.0 */ + UBLOCK_BOPOMOFO_EXTENDED =67, /*[31A0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68, /*[3200]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CJK_COMPATIBILITY =69, /*[3300]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70, /*[3400]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71, /*[4E00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_YI_SYLLABLES =72, /*[A000]*/ + + /** @stable ICU 2.0 */ + UBLOCK_YI_RADICALS =73, /*[A490]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HANGUL_SYLLABLES =74, /*[AC00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HIGH_SURROGATES =75, /*[D800]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76, /*[DB80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_LOW_SURROGATES =77, /*[DC00]*/ + + /** + * Same as UBLOCK_PRIVATE_USE_AREA. + * Until Unicode 3.1.1, the corresponding block name was "Private Use", + * and multiple code point ranges had this block. + * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and + * adds separate blocks for the supplementary PUAs. + * + * @stable ICU 2.0 + */ + UBLOCK_PRIVATE_USE = 78, + /** + * Same as UBLOCK_PRIVATE_USE. + * Until Unicode 3.1.1, the corresponding block name was "Private Use", + * and multiple code point ranges had this block. + * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and + * adds separate blocks for the supplementary PUAs. + * + * @stable ICU 2.0 + */ + UBLOCK_PRIVATE_USE_AREA =UBLOCK_PRIVATE_USE, /*[E000]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79, /*[F900]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80, /*[FB00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ARABIC_PRESENTATION_FORMS_A =81, /*[FB50]*/ + + /** @stable ICU 2.0 */ + UBLOCK_COMBINING_HALF_MARKS =82, /*[FE20]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CJK_COMPATIBILITY_FORMS =83, /*[FE30]*/ + + /** @stable ICU 2.0 */ + UBLOCK_SMALL_FORM_VARIANTS =84, /*[FE50]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ARABIC_PRESENTATION_FORMS_B =85, /*[FE70]*/ + + /** @stable ICU 2.0 */ + UBLOCK_SPECIALS =86, /*[FFF0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87, /*[FF00]*/ + + /* New blocks in Unicode 3.1 */ + + /** @stable ICU 2.0 */ + UBLOCK_OLD_ITALIC = 88 , /*[10300]*/ + /** @stable ICU 2.0 */ + UBLOCK_GOTHIC = 89 , /*[10330]*/ + /** @stable ICU 2.0 */ + UBLOCK_DESERET = 90 , /*[10400]*/ + /** @stable ICU 2.0 */ + UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91 , /*[1D000]*/ + /** @stable ICU 2.0 */ + UBLOCK_MUSICAL_SYMBOLS = 92 , /*[1D100]*/ + /** @stable ICU 2.0 */ + UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93 , /*[1D400]*/ + /** @stable ICU 2.0 */ + UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94 , /*[20000]*/ + /** @stable ICU 2.0 */ + UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95 , /*[2F800]*/ + /** @stable ICU 2.0 */ + UBLOCK_TAGS = 96, /*[E0000]*/ + + /* New blocks in Unicode 3.2 */ + + /** + * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". + * @stable ICU 2.2 + */ + UBLOCK_CYRILLIC_SUPPLEMENTARY = 97, + /** @draft ICU 3.0 */ + UBLOCK_CYRILLIC_SUPPLEMENT = UBLOCK_CYRILLIC_SUPPLEMENTARY, /*[0500]*/ + /** @stable ICU 2.2 */ + UBLOCK_TAGALOG = 98, /*[1700]*/ + /** @stable ICU 2.2 */ + UBLOCK_HANUNOO = 99, /*[1720]*/ + /** @stable ICU 2.2 */ + UBLOCK_BUHID = 100, /*[1740]*/ + /** @stable ICU 2.2 */ + UBLOCK_TAGBANWA = 101, /*[1760]*/ + /** @stable ICU 2.2 */ + UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102, /*[27C0]*/ + /** @stable ICU 2.2 */ + UBLOCK_SUPPLEMENTAL_ARROWS_A = 103, /*[27F0]*/ + /** @stable ICU 2.2 */ + UBLOCK_SUPPLEMENTAL_ARROWS_B = 104, /*[2900]*/ + /** @stable ICU 2.2 */ + UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105, /*[2980]*/ + /** @stable ICU 2.2 */ + UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106, /*[2A00]*/ + /** @stable ICU 2.2 */ + UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107, /*[31F0]*/ + /** @stable ICU 2.2 */ + UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/ + /** @stable ICU 2.2 */ + UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109, /*[F0000]*/ + /** @stable ICU 2.2 */ + UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110, /*[100000]*/ + + /* New blocks in Unicode 4 */ + + /** @stable ICU 2.6 */ + UBLOCK_LIMBU = 111, /*[1900]*/ + /** @stable ICU 2.6 */ + UBLOCK_TAI_LE = 112, /*[1950]*/ + /** @stable ICU 2.6 */ + UBLOCK_KHMER_SYMBOLS = 113, /*[19E0]*/ + /** @stable ICU 2.6 */ + UBLOCK_PHONETIC_EXTENSIONS = 114, /*[1D00]*/ + /** @stable ICU 2.6 */ + UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 115, /*[2B00]*/ + /** @stable ICU 2.6 */ + UBLOCK_YIJING_HEXAGRAM_SYMBOLS = 116, /*[4DC0]*/ + /** @stable ICU 2.6 */ + UBLOCK_LINEAR_B_SYLLABARY = 117, /*[10000]*/ + /** @stable ICU 2.6 */ + UBLOCK_LINEAR_B_IDEOGRAMS = 118, /*[10080]*/ + /** @stable ICU 2.6 */ + UBLOCK_AEGEAN_NUMBERS = 119, /*[10100]*/ + /** @stable ICU 2.6 */ + UBLOCK_UGARITIC = 120, /*[10380]*/ + /** @stable ICU 2.6 */ + UBLOCK_SHAVIAN = 121, /*[10450]*/ + /** @stable ICU 2.6 */ + UBLOCK_OSMANYA = 122, /*[10480]*/ + /** @stable ICU 2.6 */ + UBLOCK_CYPRIOT_SYLLABARY = 123, /*[10800]*/ + /** @stable ICU 2.6 */ + UBLOCK_TAI_XUAN_JING_SYMBOLS = 124, /*[1D300]*/ + /** @stable ICU 2.6 */ + UBLOCK_VARIATION_SELECTORS_SUPPLEMENT = 125, /*[E0100]*/ + + /** @stable ICU 2.0 */ + UBLOCK_COUNT, + + /** @stable ICU 2.0 */ + UBLOCK_INVALID_CODE=-1 +}; + +/** @stable ICU 2.0 */ +typedef enum UBlockCode UBlockCode; + +/** + * East Asian Width constants. + * + * @see UCHAR_EAST_ASIAN_WIDTH + * @see u_getIntPropertyValue + * @stable ICU 2.2 + */ +typedef enum UEastAsianWidth { + U_EA_NEUTRAL, /*[N]*/ /*See note !!*/ + U_EA_AMBIGUOUS, /*[A]*/ + U_EA_HALFWIDTH, /*[H]*/ + U_EA_FULLWIDTH, /*[F]*/ + U_EA_NARROW, /*[Na]*/ + U_EA_WIDE, /*[W]*/ + U_EA_COUNT +} UEastAsianWidth; +/* + * Implementation note: + * Keep UEastAsianWidth constant values in sync with names list in genprops/props2.c. + */ + +/** + * Selector constants for u_charName(). + * u_charName() returns the "modern" name of a + * Unicode character; or the name that was defined in + * Unicode version 1.0, before the Unicode standard merged + * with ISO-10646; or an "extended" name that gives each + * Unicode code point a unique name. + * + * @see u_charName + * @stable ICU 2.0 + */ +typedef enum UCharNameChoice { + U_UNICODE_CHAR_NAME, + U_UNICODE_10_CHAR_NAME, + U_EXTENDED_CHAR_NAME, + U_CHAR_NAME_CHOICE_COUNT +} UCharNameChoice; + +/** + * Selector constants for u_getPropertyName() and + * u_getPropertyValueName(). These selectors are used to choose which + * name is returned for a given property or value. All properties and + * values have a long name. Most have a short name, but some do not. + * Unicode allows for additional names, beyond the long and short + * name, which would be indicated by U_LONG_PROPERTY_NAME + i, where + * i=1, 2,... + * + * @see u_getPropertyName() + * @see u_getPropertyValueName() + * @stable ICU 2.4 + */ +typedef enum UPropertyNameChoice { + U_SHORT_PROPERTY_NAME, + U_LONG_PROPERTY_NAME, + U_PROPERTY_NAME_CHOICE_COUNT +} UPropertyNameChoice; + +/** + * Decomposition Type constants. + * + * @see UCHAR_DECOMPOSITION_TYPE + * @stable ICU 2.2 + */ +typedef enum UDecompositionType { + U_DT_NONE, /*[none]*/ /*See note !!*/ + U_DT_CANONICAL, /*[can]*/ + U_DT_COMPAT, /*[com]*/ + U_DT_CIRCLE, /*[enc]*/ + U_DT_FINAL, /*[fin]*/ + U_DT_FONT, /*[font]*/ + U_DT_FRACTION, /*[fra]*/ + U_DT_INITIAL, /*[init]*/ + U_DT_ISOLATED, /*[iso]*/ + U_DT_MEDIAL, /*[med]*/ + U_DT_NARROW, /*[nar]*/ + U_DT_NOBREAK, /*[nb]*/ + U_DT_SMALL, /*[sml]*/ + U_DT_SQUARE, /*[sqr]*/ + U_DT_SUB, /*[sub]*/ + U_DT_SUPER, /*[sup]*/ + U_DT_VERTICAL, /*[vert]*/ + U_DT_WIDE, /*[wide]*/ + U_DT_COUNT /* 18 */ +} UDecompositionType; + +/** + * Joining Type constants. + * + * @see UCHAR_JOINING_TYPE + * @stable ICU 2.2 + */ +typedef enum UJoiningType { + U_JT_NON_JOINING, /*[U]*/ /*See note !!*/ + U_JT_JOIN_CAUSING, /*[C]*/ + U_JT_DUAL_JOINING, /*[D]*/ + U_JT_LEFT_JOINING, /*[L]*/ + U_JT_RIGHT_JOINING, /*[R]*/ + U_JT_TRANSPARENT, /*[T]*/ + U_JT_COUNT /* 6 */ +} UJoiningType; + +/** + * Joining Group constants. + * + * @see UCHAR_JOINING_GROUP + * @stable ICU 2.2 + */ +typedef enum UJoiningGroup { + U_JG_NO_JOINING_GROUP, + U_JG_AIN, + U_JG_ALAPH, + U_JG_ALEF, + U_JG_BEH, + U_JG_BETH, + U_JG_DAL, + U_JG_DALATH_RISH, + U_JG_E, + U_JG_FEH, + U_JG_FINAL_SEMKATH, + U_JG_GAF, + U_JG_GAMAL, + U_JG_HAH, + U_JG_HAMZA_ON_HEH_GOAL, + U_JG_HE, + U_JG_HEH, + U_JG_HEH_GOAL, + U_JG_HETH, + U_JG_KAF, + U_JG_KAPH, + U_JG_KNOTTED_HEH, + U_JG_LAM, + U_JG_LAMADH, + U_JG_MEEM, + U_JG_MIM, + U_JG_NOON, + U_JG_NUN, + U_JG_PE, + U_JG_QAF, + U_JG_QAPH, + U_JG_REH, + U_JG_REVERSED_PE, + U_JG_SAD, + U_JG_SADHE, + U_JG_SEEN, + U_JG_SEMKATH, + U_JG_SHIN, + U_JG_SWASH_KAF, + U_JG_SYRIAC_WAW, + U_JG_TAH, + U_JG_TAW, + U_JG_TEH_MARBUTA, + U_JG_TETH, + U_JG_WAW, + U_JG_YEH, + U_JG_YEH_BARREE, + U_JG_YEH_WITH_TAIL, + U_JG_YUDH, + U_JG_YUDH_HE, + U_JG_ZAIN, + U_JG_FE, /**< @stable ICU 2.6 */ + U_JG_KHAPH, /**< @stable ICU 2.6 */ + U_JG_ZHAIN, /**< @stable ICU 2.6 */ + U_JG_COUNT +} UJoiningGroup; + +/** + * Line Break constants. + * + * @see UCHAR_LINE_BREAK + * @stable ICU 2.2 + */ +typedef enum ULineBreak { + U_LB_UNKNOWN, /*[XX]*/ /*See note !!*/ + U_LB_AMBIGUOUS, /*[AI]*/ + U_LB_ALPHABETIC, /*[AL]*/ + U_LB_BREAK_BOTH, /*[B2]*/ + U_LB_BREAK_AFTER, /*[BA]*/ + U_LB_BREAK_BEFORE, /*[BB]*/ + U_LB_MANDATORY_BREAK, /*[BK]*/ + U_LB_CONTINGENT_BREAK, /*[CB]*/ + U_LB_CLOSE_PUNCTUATION, /*[CL]*/ + U_LB_COMBINING_MARK, /*[CM]*/ + U_LB_CARRIAGE_RETURN, /*[CR]*/ + U_LB_EXCLAMATION, /*[EX]*/ + U_LB_GLUE, /*[GL]*/ + U_LB_HYPHEN, /*[HY]*/ + U_LB_IDEOGRAPHIC, /*[ID]*/ + U_LB_INSEPERABLE, + /** Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0 @draft ICU 3.0 */ + U_LB_INSEPARABLE=U_LB_INSEPERABLE,/*[IN]*/ + U_LB_INFIX_NUMERIC, /*[IS]*/ + U_LB_LINE_FEED, /*[LF]*/ + U_LB_NONSTARTER, /*[NS]*/ + U_LB_NUMERIC, /*[NU]*/ + U_LB_OPEN_PUNCTUATION, /*[OP]*/ + U_LB_POSTFIX_NUMERIC, /*[PO]*/ + U_LB_PREFIX_NUMERIC, /*[PR]*/ + U_LB_QUOTATION, /*[QU]*/ + U_LB_COMPLEX_CONTEXT, /*[SA]*/ + U_LB_SURROGATE, /*[SG]*/ + U_LB_SPACE, /*[SP]*/ + U_LB_BREAK_SYMBOLS, /*[SY]*/ + U_LB_ZWSPACE, /*[ZW]*/ + U_LB_NEXT_LINE, /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */ + U_LB_WORD_JOINER, /*[WJ]*/ + U_LB_COUNT +} ULineBreak; + +/** + * Numeric Type constants. + * + * @see UCHAR_NUMERIC_TYPE + * @stable ICU 2.2 + */ +typedef enum UNumericType { + U_NT_NONE, /*[None]*/ /*See note !!*/ + U_NT_DECIMAL, /*[de]*/ + U_NT_DIGIT, /*[di]*/ + U_NT_NUMERIC, /*[nu]*/ + U_NT_COUNT +} UNumericType; + +/** + * Hangul Syllable Type constants. + * + * @see UCHAR_HANGUL_SYLLABLE_TYPE + * @stable ICU 2.6 + */ +typedef enum UHangulSyllableType { + U_HST_NOT_APPLICABLE, /*[NA]*/ /*See note !!*/ + U_HST_LEADING_JAMO, /*[L]*/ + U_HST_VOWEL_JAMO, /*[V]*/ + U_HST_TRAILING_JAMO, /*[T]*/ + U_HST_LV_SYLLABLE, /*[LV]*/ + U_HST_LVT_SYLLABLE, /*[LVT]*/ + U_HST_COUNT +} UHangulSyllableType; + +/** + * Check a binary Unicode property for a code point. + * + * Unicode, especially in version 3.2, defines many more properties than the + * original set in UnicodeData.txt. + * + * The properties APIs are intended to reflect Unicode properties as defined + * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). + * For details about the properties see http://www.unicode.org/ucd/ . + * For names of Unicode properties see the UCD file PropertyAliases.txt. + * + * Important: If ICU is built with UCD files from Unicode versions below 3.2, + * then properties marked with "new in Unicode 3.2" are not or not fully available. + * + * @param c Code point to test. + * @param which UProperty selector constant, identifies which binary property to check. + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT. + * @return TRUE or FALSE according to the binary Unicode property value for c. + * Also FALSE if 'which' is out of bounds or if the Unicode version + * does not have data for the property at all, or not for this code point. + * + * @see UProperty + * @see u_getIntPropertyValue + * @see u_getUnicodeVersion + * @stable ICU 2.1 + */ +U_STABLE UBool U_EXPORT2 +u_hasBinaryProperty(UChar32 c, UProperty which); + +/** + * Check if a code point has the Alphabetic Unicode property. + * Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC). + * This is different from u_isalpha! + * @param c Code point to test + * @return true if the code point has the Alphabetic Unicode property, false otherwise + * + * @see UCHAR_ALPHABETIC + * @see u_isalpha + * @see u_hasBinaryProperty + * @stable ICU 2.1 + */ +U_STABLE UBool U_EXPORT2 +u_isUAlphabetic(UChar32 c); + +/** + * Check if a code point has the Lowercase Unicode property. + * Same as u_hasBinaryProperty(c, UCHAR_LOWERCASE). + * This is different from u_islower! + * @param c Code point to test + * @return true if the code point has the Lowercase Unicode property, false otherwise + * + * @see UCHAR_LOWERCASE + * @see u_islower + * @see u_hasBinaryProperty + * @stable ICU 2.1 + */ +U_STABLE UBool U_EXPORT2 +u_isULowercase(UChar32 c); + +/** + * Check if a code point has the Uppercase Unicode property. + * Same as u_hasBinaryProperty(c, UCHAR_UPPERCASE). + * This is different from u_isupper! + * @param c Code point to test + * @return true if the code point has the Uppercase Unicode property, false otherwise + * + * @see UCHAR_UPPERCASE + * @see u_isupper + * @see u_hasBinaryProperty + * @stable ICU 2.1 + */ +U_STABLE UBool U_EXPORT2 +u_isUUppercase(UChar32 c); + +/** + * Check if a code point has the White_Space Unicode property. + * Same as u_hasBinaryProperty(c, UCHAR_WHITE_SPACE). + * This is different from both u_isspace and u_isWhitespace! + * + * Note: There are several ICU whitespace functions; please see the uchar.h + * file documentation for a detailed comparison. + * + * @param c Code point to test + * @return true if the code point has the White_Space Unicode property, false otherwise. + * + * @see UCHAR_WHITE_SPACE + * @see u_isWhitespace + * @see u_isspace + * @see u_isJavaSpaceChar + * @see u_hasBinaryProperty + * @stable ICU 2.1 + */ +U_STABLE UBool U_EXPORT2 +u_isUWhiteSpace(UChar32 c); + +/** + * Get the property value for an enumerated or integer Unicode property for a code point. + * Also returns binary and mask property values. + * + * Unicode, especially in version 3.2, defines many more properties than the + * original set in UnicodeData.txt. + * + * The properties APIs are intended to reflect Unicode properties as defined + * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). + * For details about the properties see http://www.unicode.org/ . + * For names of Unicode properties see the UCD file PropertyAliases.txt. + * + * Sample usage: + * UEastAsianWidth ea=(UEastAsianWidth)u_getIntPropertyValue(c, UCHAR_EAST_ASIAN_WIDTH); + * UBool b=(UBool)u_getIntPropertyValue(c, UCHAR_IDEOGRAPHIC); + * + * @param c Code point to test. + * @param which UProperty selector constant, identifies which property to check. + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT + * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT + * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT. + * @return Numeric value that is directly the property value or, + * for enumerated properties, corresponds to the numeric value of the enumerated + * constant of the respective property value enumeration type + * (cast to enum type if necessary). + * Returns 0 or 1 (for FALSE/TRUE) for binary Unicode properties. + * Returns a bit-mask for mask properties. + * Returns 0 if 'which' is out of bounds or if the Unicode version + * does not have data for the property at all, or not for this code point. + * + * @see UProperty + * @see u_hasBinaryProperty + * @see u_getIntPropertyMinValue + * @see u_getIntPropertyMaxValue + * @see u_getUnicodeVersion + * @stable ICU 2.2 + */ +U_STABLE int32_t U_EXPORT2 +u_getIntPropertyValue(UChar32 c, UProperty which); + +/** + * Get the minimum value for an enumerated/integer/binary Unicode property. + * Can be used together with u_getIntPropertyMaxValue + * to allocate arrays of UnicodeSet or similar. + * + * @param which UProperty selector constant, identifies which binary property to check. + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT + * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT. + * @return Minimum value returned by u_getIntPropertyValue for a Unicode property. + * 0 if the property selector is out of range. + * + * @see UProperty + * @see u_hasBinaryProperty + * @see u_getUnicodeVersion + * @see u_getIntPropertyMaxValue + * @see u_getIntPropertyValue + * @stable ICU 2.2 + */ +U_STABLE int32_t U_EXPORT2 +u_getIntPropertyMinValue(UProperty which); + +/** + * Get the maximum value for an enumerated/integer/binary Unicode property. + * Can be used together with u_getIntPropertyMinValue + * to allocate arrays of UnicodeSet or similar. + * + * Examples for min/max values (for Unicode 3.2): + * + * - UCHAR_BIDI_CLASS: 0/18 (U_LEFT_TO_RIGHT/U_BOUNDARY_NEUTRAL) + * - UCHAR_SCRIPT: 0/45 (USCRIPT_COMMON/USCRIPT_TAGBANWA) + * - UCHAR_IDEOGRAPHIC: 0/1 (FALSE/TRUE) + * + * For undefined UProperty constant values, min/max values will be 0/-1. + * + * @param which UProperty selector constant, identifies which binary property to check. + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT + * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT. + * @return Maximum value returned by u_getIntPropertyValue for a Unicode property. + * <=0 if the property selector is out of range. + * + * @see UProperty + * @see u_hasBinaryProperty + * @see u_getUnicodeVersion + * @see u_getIntPropertyMaxValue + * @see u_getIntPropertyValue + * @stable ICU 2.2 + */ +U_STABLE int32_t U_EXPORT2 +u_getIntPropertyMaxValue(UProperty which); + +/** + * Get the numeric value for a Unicode code point as defined in the + * Unicode Character Database. + * + * A "double" return type is necessary because + * some numeric values are fractions, negative, or too large for int32_t. + * + * For characters without any numeric values in the Unicode Character Database, + * this function will return U_NO_NUMERIC_VALUE. + * + * Similar to java.lang.Character.getNumericValue(), but u_getNumericValue() + * also supports negative values, large values, and fractions, + * while Java's getNumericValue() returns values 10..35 for ASCII letters. + * + * @param c Code point to get the numeric value for. + * @return Numeric value of c, or U_NO_NUMERIC_VALUE if none is defined. + * + * @see U_NO_NUMERIC_VALUE + * @stable ICU 2.2 + */ +U_STABLE double U_EXPORT2 +u_getNumericValue(UChar32 c); + +/** + * Special value that is returned by u_getNumericValue when + * no numeric value is defined for a code point. + * + * @see u_getNumericValue + * @stable ICU 2.2 + */ +#define U_NO_NUMERIC_VALUE ((double)-123456789.) + +/** + * Determines whether the specified code point has the general category "Ll" + * (lowercase letter). + * + * Same as java.lang.Character.isLowerCase(). + * + * This misses some characters that are also lowercase but + * have a different general category value. + * In order to include those, use UCHAR_LOWERCASE. + * + * In addition to being equivalent to a Java function, this also serves + * as a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is an Ll lowercase letter + * + * @see UCHAR_LOWERCASE + * @see u_isupper + * @see u_istitle + * @see u_islower + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_islower(UChar32 c); + +/** + * Determines whether the specified code point has the general category "Lu" + * (uppercase letter). + * + * Same as java.lang.Character.isUpperCase(). + * + * This misses some characters that are also uppercase but + * have a different general category value. + * In order to include those, use UCHAR_UPPERCASE. + * + * In addition to being equivalent to a Java function, this also serves + * as a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is an Lu uppercase letter + * + * @see UCHAR_UPPERCASE + * @see u_islower + * @see u_istitle + * @see u_tolower + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isupper(UChar32 c); + +/** + * Determines whether the specified code point is a titlecase letter. + * True for general category "Lt" (titlecase letter). + * + * Same as java.lang.Character.isTitleCase(). + * + * @param c the code point to be tested + * @return TRUE if the code point is an Lt titlecase letter + * + * @see u_isupper + * @see u_islower + * @see u_totitle + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_istitle(UChar32 c); + +/** + * Determines whether the specified code point is a digit character according to Java. + * True for characters with general category "Nd" (decimal digit numbers). + * Beginning with Unicode 4, this is the same as + * testing for the Numeric_Type of Decimal. + * + * Same as java.lang.Character.isDigit(). + * + * In addition to being equivalent to a Java function, this also serves + * as a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is a digit character according to Character.isDigit() + * + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isdigit(UChar32 c); + +/** + * Determines whether the specified code point is a letter character. + * True for general categories "L" (letters). + * + * Same as java.lang.Character.isLetter(). + * + * In addition to being equivalent to a Java function, this also serves + * as a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is a letter character + * + * @see u_isdigit + * @see u_isalnum + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isalpha(UChar32 c); + +/** + * Determines whether the specified code point is an alphanumeric character + * (letter or digit) according to Java. + * True for characters with general categories + * "L" (letters) and "Nd" (decimal digit numbers). + * + * Same as java.lang.Character.isLetterOrDigit(). + * + * In addition to being equivalent to a Java function, this also serves + * as a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is an alphanumeric character according to Character.isLetterOrDigit() + * + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isalnum(UChar32 c); + +/** + * Determines whether the specified code point is a hexadecimal digit. + * This is equivalent to u_digit(c, 16)>=0. + * True for characters with general category "Nd" (decimal digit numbers) + * as well as Latin letters a-f and A-F in both ASCII and Fullwidth ASCII. + * (That is, for letters with code points + * 0041..0046, 0061..0066, FF21..FF26, FF41..FF46.) + * + * In order to narrow the definition of hexadecimal digits to only ASCII + * characters, use (c<=0x7f && u_isxdigit(c)). + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is a hexadecimal digit + * + * @stable ICU 2.6 + */ +U_STABLE UBool U_EXPORT2 +u_isxdigit(UChar32 c); + +/** + * Determines whether the specified code point is a punctuation character. + * True for characters with general categories "P" (punctuation). + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is a punctuation character + * + * @stable ICU 2.6 + */ +U_STABLE UBool U_EXPORT2 +u_ispunct(UChar32 c); + +/** + * Determines whether the specified code point is a "graphic" character + * (printable, excluding spaces). + * TRUE for all characters except those with general categories + * "Cc" (control codes), "Cf" (format controls), "Cs" (surrogates), + * "Cn" (unassigned), and "Z" (separators). + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is a "graphic" character + * + * @stable ICU 2.6 + */ +U_STABLE UBool U_EXPORT2 +u_isgraph(UChar32 c); + +/** + * Determines whether the specified code point is a "blank" or "horizontal space", + * a character that visibly separates words on a line. + * The following are equivalent definitions: + * + * TRUE for Unicode White_Space characters except for "vertical space controls" + * where "vertical space controls" are the following characters: + * U+000A (LF) U+000B (VT) U+000C (FF) U+000D (CR) U+0085 (NEL) U+2028 (LS) U+2029 (PS) + * + * same as + * + * TRUE for U+0009 (TAB) and characters with general category "Zs" (space separators) + * except Zero Width Space (ZWSP, U+200B). + * + * Note: There are several ICU whitespace functions; please see the uchar.h + * file documentation for a detailed comparison. + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is a "blank" + * + * @stable ICU 2.6 + */ +U_STABLE UBool U_EXPORT2 +u_isblank(UChar32 c); + +/** + * Determines whether the specified code point is "defined", + * which usually means that it is assigned a character. + * True for general categories other than "Cn" (other, not assigned), + * i.e., true for all code points mentioned in UnicodeData.txt. + * + * Note that non-character code points (e.g., U+FDD0) are not "defined" + * (they are Cn), but surrogate code points are "defined" (Cs). + * + * Same as java.lang.Character.isDefined(). + * + * @param c the code point to be tested + * @return TRUE if the code point is assigned a character + * + * @see u_isdigit + * @see u_isalpha + * @see u_isalnum + * @see u_isupper + * @see u_islower + * @see u_istitle + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isdefined(UChar32 c); + +/** + * Determines if the specified character is a space character or not. + * + * Note: There are several ICU whitespace functions; please see the uchar.h + * file documentation for a detailed comparison. + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the character to be tested + * @return true if the character is a space character; false otherwise. + * + * @see u_isJavaSpaceChar + * @see u_isWhitespace + * @see u_isUWhiteSpace + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isspace(UChar32 c); + +/** + * Determine if the specified code point is a space character according to Java. + * True for characters with general categories "Z" (separators), + * which does not include control codes (e.g., TAB or Line Feed). + * + * Same as java.lang.Character.isSpaceChar(). + * + * Note: There are several ICU whitespace functions; please see the uchar.h + * file documentation for a detailed comparison. + * + * @param c the code point to be tested + * @return TRUE if the code point is a space character according to Character.isSpaceChar() + * + * @see u_isspace + * @see u_isWhitespace + * @see u_isUWhiteSpace + * @stable ICU 2.6 + */ +U_STABLE UBool U_EXPORT2 +u_isJavaSpaceChar(UChar32 c); + +/** + * Determines if the specified code point is a whitespace character according to Java/ICU. + * A character is considered to be a Java whitespace character if and only + * if it satisfies one of the following criteria: + * + * - It is a Unicode separator (categories "Z"), but is not + * a no-break space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP). + * - It is U+0009 HORIZONTAL TABULATION. + * - It is U+000A LINE FEED. + * - It is U+000B VERTICAL TABULATION. + * - It is U+000C FORM FEED. + * - It is U+000D CARRIAGE RETURN. + * - It is U+001C FILE SEPARATOR. + * - It is U+001D GROUP SEPARATOR. + * - It is U+001E RECORD SEPARATOR. + * - It is U+001F UNIT SEPARATOR. + * - It is U+0085 NEXT LINE. + * + * Same as java.lang.Character.isWhitespace() except that Java omits U+0085. + * + * Note: There are several ICU whitespace functions; please see the uchar.h + * file documentation for a detailed comparison. + * + * @param c the code point to be tested + * @return TRUE if the code point is a whitespace character according to Java/ICU + * + * @see u_isspace + * @see u_isJavaSpaceChar + * @see u_isUWhiteSpace + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isWhitespace(UChar32 c); + +/** + * Determines whether the specified code point is a control character + * (as defined by this function). + * A control character is one of the following: + * - ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f) + * - U_CONTROL_CHAR (Cc) + * - U_FORMAT_CHAR (Cf) + * - U_LINE_SEPARATOR (Zl) + * - U_PARAGRAPH_SEPARATOR (Zp) + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is a control character + * + * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT + * @see u_isprint + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_iscntrl(UChar32 c); + +/** + * Determines whether the specified code point is an ISO control code. + * True for U+0000..U+001f and U+007f..U+009f (general category "Cc"). + * + * Same as java.lang.Character.isISOControl(). + * + * @param c the code point to be tested + * @return TRUE if the code point is an ISO control code + * + * @see u_iscntrl + * @stable ICU 2.6 + */ +U_STABLE UBool U_EXPORT2 +u_isISOControl(UChar32 c); + +/** + * Determines whether the specified code point is a printable character. + * True for general categories <em>other</em> than "C" (controls). + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is a printable character + * + * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT + * @see u_iscntrl + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isprint(UChar32 c); + +/** + * Determines whether the specified code point is a base character. + * True for general categories "L" (letters), "N" (numbers), + * "Mc" (spacing combining marks), and "Me" (enclosing marks). + * + * Note that this is different from the Unicode definition in + * chapter 3.5, conformance clause D13, + * which defines base characters to be all characters (not Cn) + * that do not graphically combine with preceding characters (M) + * and that are neither control (Cc) or format (Cf) characters. + * + * @param c the code point to be tested + * @return TRUE if the code point is a base character according to this function + * + * @see u_isalpha + * @see u_isdigit + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isbase(UChar32 c); + +/** + * Returns the bidirectional category value for the code point, + * which is used in the Unicode bidirectional algorithm + * (UAX #9 http://www.unicode.org/reports/tr9/). + * Note that some <em>unassigned</em> code points have bidi values + * of R or AL because they are in blocks that are reserved + * for Right-To-Left scripts. + * + * Same as java.lang.Character.getDirectionality() + * + * @param c the code point to be tested + * @return the bidirectional category (UCharDirection) value + * + * @see UCharDirection + * @stable ICU 2.0 + */ +U_STABLE UCharDirection U_EXPORT2 +u_charDirection(UChar32 c); + +/** + * Determines whether the code point has the Bidi_Mirrored property. + * This property is set for characters that are commonly used in + * Right-To-Left contexts and need to be displayed with a "mirrored" + * glyph. + * + * Same as java.lang.Character.isMirrored(). + * Same as UCHAR_BIDI_MIRRORED + * + * @param c the code point to be tested + * @return TRUE if the character has the Bidi_Mirrored property + * + * @see UCHAR_BIDI_MIRRORED + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isMirrored(UChar32 c); + +/** + * Maps the specified character to a "mirror-image" character. + * For characters with the Bidi_Mirrored property, implementations + * sometimes need a "poor man's" mapping to another Unicode + * character (code point) such that the default glyph may serve + * as the mirror-image of the default glyph of the specified + * character. This is useful for text conversion to and from + * codepages with visual order, and for displays without glyph + * selecetion capabilities. + * + * @param c the code point to be mapped + * @return another Unicode code point that may serve as a mirror-image + * substitute, or c itself if there is no such mapping or c + * does not have the Bidi_Mirrored property + * + * @see UCHAR_BIDI_MIRRORED + * @see u_isMirrored + * @stable ICU 2.0 + */ +U_STABLE UChar32 U_EXPORT2 +u_charMirror(UChar32 c); + +/** + * Returns the general category value for the code point. + * + * Same as java.lang.Character.getType(). + * + * @param c the code point to be tested + * @return the general category (UCharCategory) value + * + * @see UCharCategory + * @stable ICU 2.0 + */ +U_STABLE int8_t U_EXPORT2 +u_charType(UChar32 c); + +/** + * Get a single-bit bit set for the general category of a character. + * This bit set can be compared bitwise with U_GC_SM_MASK, U_GC_L_MASK, etc. + * Same as U_MASK(u_charType(c)). + * + * @param c the code point to be tested + * @return a single-bit mask corresponding to the general category (UCharCategory) value + * + * @see u_charType + * @see UCharCategory + * @see U_GC_CN_MASK + * @stable ICU 2.1 + */ +#define U_GET_GC_MASK(c) U_MASK(u_charType(c)) + +/** + * Callback from u_enumCharTypes(), is called for each contiguous range + * of code points c (where start<=c<limit) + * with the same Unicode general category ("character type"). + * + * The callback function can stop the enumeration by returning FALSE. + * + * @param context an opaque pointer, as passed into utrie_enum() + * @param start the first code point in a contiguous range with value + * @param limit one past the last code point in a contiguous range with value + * @param type the general category for all code points in [start..limit[ + * @return FALSE to stop the enumeration + * + * @stable ICU 2.1 + * @see UCharCategory + * @see u_enumCharTypes + */ +typedef UBool U_CALLCONV +UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type); + +/** + * Enumerate efficiently all code points with their Unicode general categories. + * + * This is useful for building data structures (e.g., UnicodeSet's), + * for enumerating all assigned code points (type!=U_UNASSIGNED), etc. + * + * For each contiguous range of code points with a given general category ("character type"), + * the UCharEnumTypeRange function is called. + * Adjacent ranges have different types. + * The Unicode Standard guarantees that the numeric value of the type is 0..31. + * + * @param enumRange a pointer to a function that is called for each contiguous range + * of code points with the same general category + * @param context an opaque pointer that is passed on to the callback function + * + * @stable ICU 2.1 + * @see UCharCategory + * @see UCharEnumTypeRange + */ +U_STABLE void U_EXPORT2 +u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context); + +#if !UCONFIG_NO_NORMALIZATION + +/** + * Returns the combining class of the code point as specified in UnicodeData.txt. + * + * @param c the code point of the character + * @return the combining class of the character + * @stable ICU 2.0 + */ +U_STABLE uint8_t U_EXPORT2 +u_getCombiningClass(UChar32 c); + +#endif + +/** + * Returns the decimal digit value of a decimal digit character. + * Such characters have the general category "Nd" (decimal digit numbers) + * and a Numeric_Type of Decimal. + * + * Unlike ICU releases before 2.6, no digit values are returned for any + * Han characters because Han number characters are often used with a special + * Chinese-style number format (with characters for powers of 10 in between) + * instead of in decimal-positional notation. + * Unicode 4 explicitly assigns Han number characters the Numeric_Type + * Numeric instead of Decimal. + * See Jitterbug 1483 for more details. + * + * Use u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE) and u_getNumericValue() + * for complete numeric Unicode properties. + * + * @param c the code point for which to get the decimal digit value + * @return the decimal digit value of c, + * or -1 if c is not a decimal digit character + * + * @see u_getNumericValue + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_charDigitValue(UChar32 c); + +/** + * Returns the Unicode allocation block that contains the character. + * + * @param c the code point to be tested + * @return the block value (UBlockCode) for c + * + * @see UBlockCode + * @stable ICU 2.0 + */ +U_STABLE UBlockCode U_EXPORT2 +ublock_getCode(UChar32 c); + +/** + * Retrieve the name of a Unicode character. + * Depending on <code>nameChoice</code>, the character name written + * into the buffer is the "modern" name or the name that was defined + * in Unicode version 1.0. + * The name contains only "invariant" characters + * like A-Z, 0-9, space, and '-'. + * Unicode 1.0 names are only retrieved if they are different from the modern + * names and if the data file contains the data for them. gennames may or may + * not be called with a command line option to include 1.0 names in unames.dat. + * + * @param code The character (code point) for which to get the name. + * It must be <code>0<=code<=0x10ffff</code>. + * @param nameChoice Selector for which name to get. + * @param buffer Destination address for copying the name. + * The name will always be zero-terminated. + * If there is no name, then the buffer will be set to the empty string. + * @param bufferLength <code>==sizeof(buffer)</code> + * @param pErrorCode Pointer to a UErrorCode variable; + * check for <code>U_SUCCESS()</code> after <code>u_charName()</code> + * returns. + * @return The length of the name, or 0 if there is no name for this character. + * If the bufferLength is less than or equal to the length, then the buffer + * contains the truncated name and the returned length indicates the full + * length of the name. + * The length does not include the zero-termination. + * + * @see UCharNameChoice + * @see u_charFromName + * @see u_enumCharNames + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_charName(UChar32 code, UCharNameChoice nameChoice, + char *buffer, int32_t bufferLength, + UErrorCode *pErrorCode); + +/** + * Get the ISO 10646 comment for a character. + * The ISO 10646 comment is an informative field in the Unicode Character + * Database (UnicodeData.txt field 11) and is from the ISO 10646 names list. + * + * @param c The character (code point) for which to get the ISO comment. + * It must be <code>0<=c<=0x10ffff</code>. + * @param dest Destination address for copying the comment. + * The comment will be zero-terminated if possible. + * If there is no comment, then the buffer will be set to the empty string. + * @param destCapacity <code>==sizeof(dest)</code> + * @param pErrorCode Pointer to a UErrorCode variable; + * check for <code>U_SUCCESS()</code> after <code>u_getISOComment()</code> + * returns. + * @return The length of the comment, or 0 if there is no comment for this character. + * If the destCapacity is less than or equal to the length, then the buffer + * contains the truncated name and the returned length indicates the full + * length of the name. + * The length does not include the zero-termination. + * + * @stable ICU 2.2 + */ +U_STABLE int32_t U_EXPORT2 +u_getISOComment(UChar32 c, + char *dest, int32_t destCapacity, + UErrorCode *pErrorCode); + +/** + * Find a Unicode character by its name and return its code point value. + * The name is matched exactly and completely. + * If the name does not correspond to a code point, <i>pErrorCode</i> + * is set to <code>U_INVALID_CHAR_FOUND</code>. + * A Unicode 1.0 name is matched only if it differs from the modern name. + * Unicode names are all uppercase. Extended names are lowercase followed + * by an uppercase hexadecimal number, and within angle brackets. + * + * @param nameChoice Selector for which name to match. + * @param name The name to match. + * @param pErrorCode Pointer to a UErrorCode variable + * @return The Unicode value of the code point with the given name, + * or an undefined value if there is no such code point. + * + * @see UCharNameChoice + * @see u_charName + * @see u_enumCharNames + * @stable ICU 1.7 + */ +U_STABLE UChar32 U_EXPORT2 +u_charFromName(UCharNameChoice nameChoice, + const char *name, + UErrorCode *pErrorCode); + +/** + * Type of a callback function for u_enumCharNames() that gets called + * for each Unicode character with the code point value and + * the character name. + * If such a function returns FALSE, then the enumeration is stopped. + * + * @param context The context pointer that was passed to u_enumCharNames(). + * @param code The Unicode code point for the character with this name. + * @param nameChoice Selector for which kind of names is enumerated. + * @param name The character's name, zero-terminated. + * @param length The length of the name. + * @return TRUE if the enumeration should continue, FALSE to stop it. + * + * @see UCharNameChoice + * @see u_enumCharNames + * @stable ICU 1.7 + */ +typedef UBool UEnumCharNamesFn(void *context, + UChar32 code, + UCharNameChoice nameChoice, + const char *name, + int32_t length); + +/** + * Enumerate all assigned Unicode characters between the start and limit + * code points (start inclusive, limit exclusive) and call a function + * for each, passing the code point value and the character name. + * For Unicode 1.0 names, only those are enumerated that differ from the + * modern names. + * + * @param start The first code point in the enumeration range. + * @param limit One more than the last code point in the enumeration range + * (the first one after the range). + * @param fn The function that is to be called for each character name. + * @param context An arbitrary pointer that is passed to the function. + * @param nameChoice Selector for which kind of names to enumerate. + * @param pErrorCode Pointer to a UErrorCode variable + * + * @see UCharNameChoice + * @see UEnumCharNamesFn + * @see u_charName + * @see u_charFromName + * @stable ICU 1.7 + */ +U_STABLE void U_EXPORT2 +u_enumCharNames(UChar32 start, UChar32 limit, + UEnumCharNamesFn *fn, + void *context, + UCharNameChoice nameChoice, + UErrorCode *pErrorCode); + +/** + * Return the Unicode name for a given property, as given in the + * Unicode database file PropertyAliases.txt. + * + * In addition, this function maps the property + * UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" / + * "General_Category_Mask". These names are not in + * PropertyAliases.txt. + * + * @param property UProperty selector other than UCHAR_INVALID_CODE. + * If out of range, NULL is returned. + * + * @param nameChoice selector for which name to get. If out of range, + * NULL is returned. All properties have a long name. Most + * have a short name, but some do not. Unicode allows for + * additional names; if present these will be returned by + * U_LONG_PROPERTY_NAME + i, where i=1, 2,... + * + * @return a pointer to the name, or NULL if either the + * property or the nameChoice is out of range. If a given + * nameChoice returns NULL, then all larger values of + * nameChoice will return NULL, with one exception: if NULL is + * returned for U_SHORT_PROPERTY_NAME, then + * U_LONG_PROPERTY_NAME (and higher) may still return a + * non-NULL value. The returned pointer is valid until + * u_cleanup() is called. + * + * @see UProperty + * @see UPropertyNameChoice + * @stable ICU 2.4 + */ +U_STABLE const char* U_EXPORT2 +u_getPropertyName(UProperty property, + UPropertyNameChoice nameChoice); + +/** + * Return the UProperty enum for a given property name, as specified + * in the Unicode database file PropertyAliases.txt. Short, long, and + * any other variants are recognized. + * + * In addition, this function maps the synthetic names "gcm" / + * "General_Category_Mask" to the property + * UCHAR_GENERAL_CATEGORY_MASK. These names are not in + * PropertyAliases.txt. + * + * @param alias the property name to be matched. The name is compared + * using "loose matching" as described in PropertyAliases.txt. + * + * @return a UProperty enum, or UCHAR_INVALID_CODE if the given name + * does not match any property. + * + * @see UProperty + * @stable ICU 2.4 + */ +U_STABLE UProperty U_EXPORT2 +u_getPropertyEnum(const char* alias); + +/** + * Return the Unicode name for a given property value, as given in the + * Unicode database file PropertyValueAliases.txt. + * + * Note: Some of the names in PropertyValueAliases.txt can only be + * retrieved using UCHAR_GENERAL_CATEGORY_MASK, not + * UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" / + * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" + * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". + * + * @param property UProperty selector constant. + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT + * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT + * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT. + * If out of range, NULL is returned. + * + * @param value selector for a value for the given property. If out + * of range, NULL is returned. In general, valid values range + * from 0 up to some maximum. There are a few exceptions: + * (1.) UCHAR_BLOCK values begin at the non-zero value + * UBLOCK_BASIC_LATIN. (2.) UCHAR_CANONICAL_COMBINING_CLASS + * values are not contiguous and range from 0..240. (3.) + * UCHAR_GENERAL_CATEGORY_MASK values are not values of + * UCharCategory, but rather mask values produced by + * U_GET_GC_MASK(). This allows grouped categories such as + * [:L:] to be represented. Mask values range + * non-contiguously from 1..U_GC_P_MASK. + * + * @param nameChoice selector for which name to get. If out of range, + * NULL is returned. All values have a long name. Most have + * a short name, but some do not. Unicode allows for + * additional names; if present these will be returned by + * U_LONG_PROPERTY_NAME + i, where i=1, 2,... + + * @return a pointer to the name, or NULL if either the + * property or the nameChoice is out of range. If a given + * nameChoice returns NULL, then all larger values of + * nameChoice will return NULL, with one exception: if NULL is + * returned for U_SHORT_PROPERTY_NAME, then + * U_LONG_PROPERTY_NAME (and higher) may still return a + * non-NULL value. The returned pointer is valid until + * u_cleanup() is called. + * + * @see UProperty + * @see UPropertyNameChoice + * @stable ICU 2.4 + */ +U_STABLE const char* U_EXPORT2 +u_getPropertyValueName(UProperty property, + int32_t value, + UPropertyNameChoice nameChoice); + +/** + * Return the property value integer for a given value name, as + * specified in the Unicode database file PropertyValueAliases.txt. + * Short, long, and any other variants are recognized. + * + * Note: Some of the names in PropertyValueAliases.txt will only be + * recognized with UCHAR_GENERAL_CATEGORY_MASK, not + * UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" / + * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" + * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". + * + * @param property UProperty selector constant. + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT + * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT + * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT. + * If out of range, UCHAR_INVALID_CODE is returned. + * + * @param alias the value name to be matched. The name is compared + * using "loose matching" as described in + * PropertyValueAliases.txt. + * + * @return a value integer or UCHAR_INVALID_CODE if the given name + * does not match any value of the given property, or if the + * property is invalid. Note: U CHAR_GENERAL_CATEGORY values + * are not values of UCharCategory, but rather mask values + * produced by U_GET_GC_MASK(). This allows grouped + * categories such as [:L:] to be represented. + * + * @see UProperty + * @stable ICU 2.4 + */ +U_STABLE int32_t U_EXPORT2 +u_getPropertyValueEnum(UProperty property, + const char* alias); + +/** + * Determines if the specified character is permissible as the + * first character in an identifier according to Unicode + * (The Unicode Standard, Version 3.0, chapter 5.16 Identifiers). + * True for characters with general categories "L" (letters) and "Nl" (letter numbers). + * + * Same as java.lang.Character.isUnicodeIdentifierStart(). + * Same as UCHAR_ID_START + * + * @param c the code point to be tested + * @return TRUE if the code point may start an identifier + * + * @see UCHAR_ID_START + * @see u_isalpha + * @see u_isIDPart + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isIDStart(UChar32 c); + +/** + * Determines if the specified character is permissible + * in an identifier according to Java. + * True for characters with general categories "L" (letters), + * "Nl" (letter numbers), "Nd" (decimal digits), + * "Mc" and "Mn" (combining marks), "Pc" (connecting punctuation), and + * u_isIDIgnorable(c). + * + * Same as java.lang.Character.isUnicodeIdentifierPart(). + * Almost the same as Unicode's ID_Continue (UCHAR_ID_CONTINUE) + * except that Unicode recommends to ignore Cf which is less than + * u_isIDIgnorable(c). + * + * @param c the code point to be tested + * @return TRUE if the code point may occur in an identifier according to Java + * + * @see UCHAR_ID_CONTINUE + * @see u_isIDStart + * @see u_isIDIgnorable + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isIDPart(UChar32 c); + +/** + * Determines if the specified character should be regarded + * as an ignorable character in an identifier, + * according to Java. + * True for characters with general category "Cf" (format controls) as well as + * non-whitespace ISO controls + * (U+0000..U+0008, U+000E..U+001B, U+007F..U+0084, U+0086..U+009F). + * + * Same as java.lang.Character.isIdentifierIgnorable() + * except that Java also returns TRUE for U+0085 Next Line + * (it omits U+0085 from whitespace ISO controls). + * + * Note that Unicode just recommends to ignore Cf (format controls). + * + * @param c the code point to be tested + * @return TRUE if the code point is ignorable in identifiers according to Java + * + * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT + * @see u_isIDStart + * @see u_isIDPart + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isIDIgnorable(UChar32 c); + +/** + * Determines if the specified character is permissible as the + * first character in a Java identifier. + * In addition to u_isIDStart(c), true for characters with + * general categories "Sc" (currency symbols) and "Pc" (connecting punctuation). + * + * Same as java.lang.Character.isJavaIdentifierStart(). + * + * @param c the code point to be tested + * @return TRUE if the code point may start a Java identifier + * + * @see u_isJavaIDPart + * @see u_isalpha + * @see u_isIDStart + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isJavaIDStart(UChar32 c); + +/** + * Determines if the specified character is permissible + * in a Java identifier. + * In addition to u_isIDPart(c), true for characters with + * general category "Sc" (currency symbols). + * + * Same as java.lang.Character.isJavaIdentifierPart(). + * + * @param c the code point to be tested + * @return TRUE if the code point may occur in a Java identifier + * + * @see u_isIDIgnorable + * @see u_isJavaIDStart + * @see u_isalpha + * @see u_isdigit + * @see u_isIDPart + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isJavaIDPart(UChar32 c); + +/** + * The given character is mapped to its lowercase equivalent according to + * UnicodeData.txt; if the character has no lowercase equivalent, the character + * itself is returned. + * + * Same as java.lang.Character.toLowerCase(). + * + * This function only returns the simple, single-code point case mapping. + * Full case mappings may result in zero, one or more code points and depend + * on context or language etc. + * Full case mappings are applied by the string case mapping functions, + * see ustring.h and the UnicodeString class. + * + * @param c the code point to be mapped + * @return the Simple_Lowercase_Mapping of the code point, if any; + * otherwise the code point itself. + * @stable ICU 2.0 + */ +U_STABLE UChar32 U_EXPORT2 +u_tolower(UChar32 c); + +/** + * The given character is mapped to its uppercase equivalent according to UnicodeData.txt; + * if the character has no uppercase equivalent, the character itself is + * returned. + * + * Same as java.lang.Character.toUpperCase(). + * + * This function only returns the simple, single-code point case mapping. + * Full case mappings may result in zero, one or more code points and depend + * on context or language etc. + * Full case mappings are applied by the string case mapping functions, + * see ustring.h and the UnicodeString class. + * + * @param c the code point to be mapped + * @return the Simple_Uppercase_Mapping of the code point, if any; + * otherwise the code point itself. + * @stable ICU 2.0 + */ +U_STABLE UChar32 U_EXPORT2 +u_toupper(UChar32 c); + +/** + * The given character is mapped to its titlecase equivalent + * according to UnicodeData.txt; + * if none is defined, the character itself is returned. + * + * Same as java.lang.Character.toTitleCase(). + * + * This function only returns the simple, single-code point case mapping. + * Full case mappings may result in zero, one or more code points and depend + * on context or language etc. + * Full case mappings are applied by the string case mapping functions, + * see ustring.h and the UnicodeString class. + * + * @param c the code point to be mapped + * @return the Simple_Titlecase_Mapping of the code point, if any; + * otherwise the code point itself. + * @stable ICU 2.0 + */ +U_STABLE UChar32 U_EXPORT2 +u_totitle(UChar32 c); + +/** Option value for case folding: use default mappings defined in CaseFolding.txt. @stable ICU 2.0 */ +#define U_FOLD_CASE_DEFAULT 0 + +/** + * Option value for case folding: + * + * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I + * and dotless i appropriately for Turkic languages (tr, az). + * + * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that + * are to be included for default mappings and + * excluded for the Turkic-specific mappings. + * + * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that + * are to be excluded for default mappings and + * included for the Turkic-specific mappings. + * + * @stable ICU 2.0 + */ +#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1 + +/** + * The given character is mapped to its case folding equivalent according to + * UnicodeData.txt and CaseFolding.txt; + * if the character has no case folding equivalent, the character + * itself is returned. + * + * This function only returns the simple, single-code point case mapping. + * Full case mappings may result in zero, one or more code points and depend + * on context or language etc. + * Full case mappings are applied by the string case mapping functions, + * see ustring.h and the UnicodeString class. + * + * @param c the code point to be mapped + * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I + * @return the Simple_Case_Folding of the code point, if any; + * otherwise the code point itself. + * @stable ICU 2.0 + */ +U_STABLE UChar32 U_EXPORT2 +u_foldCase(UChar32 c, uint32_t options); + +/** + * Returns the decimal digit value of the code point in the + * specified radix. + * + * If the radix is not in the range <code>2<=radix<=36</code> or if the + * value of <code>c</code> is not a valid digit in the specified + * radix, <code>-1</code> is returned. A character is a valid digit + * if at least one of the following is true: + * <ul> + * <li>The character has a decimal digit value. + * Such characters have the general category "Nd" (decimal digit numbers) + * and a Numeric_Type of Decimal. + * In this case the value is the character's decimal digit value.</li> + * <li>The character is one of the uppercase Latin letters + * <code>'A'</code> through <code>'Z'</code>. + * In this case the value is <code>c-'A'+10</code>.</li> + * <li>The character is one of the lowercase Latin letters + * <code>'a'</code> through <code>'z'</code>. + * In this case the value is <code>ch-'a'+10</code>.</li> + * <li>Latin letters from both the ASCII range (0061..007A, 0041..005A) + * as well as from the Fullwidth ASCII range (FF41..FF5A, FF21..FF3A) + * are recognized.</li> + * </ul> + * + * Same as java.lang.Character.digit(). + * + * @param ch the code point to be tested. + * @param radix the radix. + * @return the numeric value represented by the character in the + * specified radix, + * or -1 if there is no value or if the value exceeds the radix. + * + * @see UCHAR_NUMERIC_TYPE + * @see u_forDigit + * @see u_charDigitValue + * @see u_isdigit + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_digit(UChar32 ch, int8_t radix); + +/** + * Determines the character representation for a specific digit in + * the specified radix. If the value of <code>radix</code> is not a + * valid radix, or the value of <code>digit</code> is not a valid + * digit in the specified radix, the null character + * (<code>U+0000</code>) is returned. + * <p> + * The <code>radix</code> argument is valid if it is greater than or + * equal to 2 and less than or equal to 36. + * The <code>digit</code> argument is valid if + * <code>0 <= digit < radix</code>. + * <p> + * If the digit is less than 10, then + * <code>'0' + digit</code> is returned. Otherwise, the value + * <code>'a' + digit - 10</code> is returned. + * + * Same as java.lang.Character.forDigit(). + * + * @param digit the number to convert to a character. + * @param radix the radix. + * @return the <code>char</code> representation of the specified digit + * in the specified radix. + * + * @see u_digit + * @see u_charDigitValue + * @see u_isdigit + * @stable ICU 2.0 + */ +U_STABLE UChar32 U_EXPORT2 +u_forDigit(int32_t digit, int8_t radix); + +/** + * Get the "age" of the code point. + * The "age" is the Unicode version when the code point was first + * designated (as a non-character or for Private Use) + * or assigned a character. + * This can be useful to avoid emitting code points to receiving + * processes that do not accept newer characters. + * The data is from the UCD file DerivedAge.txt. + * + * @param c The code point. + * @param versionArray The Unicode version number array, to be filled in. + * + * @stable ICU 2.1 + */ +U_STABLE void U_EXPORT2 +u_charAge(UChar32 c, UVersionInfo versionArray); + +/** + * Gets the Unicode version information. + * The version array is filled in with the version information + * for the Unicode standard that is currently used by ICU. + * For example, Unicode version 3.1.1 is represented as an array with + * the values { 3, 1, 1, 0 }. + * + * @param versionArray an output array that will be filled in with + * the Unicode version number + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +u_getUnicodeVersion(UVersionInfo versionArray); + +/** + * Get the FC_NFKC_Closure property string for a character. + * See Unicode Standard Annex #15 for details, search for "FC_NFKC_Closure" + * or for "FNC": http://www.unicode.org/reports/tr15/ + * + * @param c The character (code point) for which to get the FC_NFKC_Closure string. + * It must be <code>0<=c<=0x10ffff</code>. + * @param dest Destination address for copying the string. + * The string will be zero-terminated if possible. + * If there is no FC_NFKC_Closure string, + * then the buffer will be set to the empty string. + * @param destCapacity <code>==sizeof(dest)</code> + * @param pErrorCode Pointer to a UErrorCode variable. + * @return The length of the string, or 0 if there is no FC_NFKC_Closure string for this character. + * If the destCapacity is less than or equal to the length, then the buffer + * contains the truncated name and the returned length indicates the full + * length of the name. + * The length does not include the zero-termination. + * + * @stable ICU 2.2 + */ +U_STABLE int32_t U_EXPORT2 +u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode); + +U_CDECL_END + +#endif /*_UCHAR*/ +/*eof*/ diff --git a/JavaScriptGlue/icu/unicode/ucnv.h b/JavaScriptGlue/icu/unicode/ucnv.h new file mode 100644 index 0000000..a042f7a --- /dev/null +++ b/JavaScriptGlue/icu/unicode/ucnv.h @@ -0,0 +1,1817 @@ +/* +********************************************************************** +* Copyright (C) 1999-2004, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** + * ucnv.h: + * External APIs for the ICU's codeset conversion library + * Bertrand A. Damiba + * + * Modification History: + * + * Date Name Description + * 04/04/99 helena Fixed internal header inclusion. + * 05/11/00 helena Added setFallback and usesFallback APIs. + * 06/29/2000 helena Major rewrite of the callback APIs. + * 12/07/2000 srl Update of documentation + */ + +/** + * \file + * \brief C API: Character conversion + * + * <h2>Character Conversion C API</h2> + * + * <p>This API is used to convert codepage or character encoded data to and + * from UTF-16. You can open a converter with {@link ucnv_open() }. With that + * converter, you can get its properties, set options, convert your data and + * close the converter.</p> + * + * <p>Since many software programs recogize different converter names for + * different types of converters, there are other functions in this API to + * iterate over the converter aliases. The functions {@link ucnv_getAvailableName() }, + * {@link ucnv_getAlias() } and {@link ucnv_getStandardName() } are some of the + * more frequently used alias functions to get this information.</p> + * + * <p>When a converter encounters an illegal, irregular, invalid or unmappable character + * its default behavior is to use a substitution character to replace the + * bad byte sequence. This behavior can be changed by using {@link ucnv_getFromUCallBack() } + * or {@link ucnv_getToUCallBack() } on the converter. The header ucnv_err.h defines + * many other callback actions that can be used instead of a character substitution.</p> + * + * <p>More information about this API can be found in our + * <a href="http://oss.software.ibm.com/icu/userguide/conversion.html">User's + * Guide</a>.</p> + */ + +#ifndef UCNV_H +#define UCNV_H + +#include "unicode/ucnv_err.h" +#include "unicode/uenum.h" + +#ifndef __USET_H__ + +/** + * USet is the C API type for Unicode sets. + * It is forward-declared here to avoid including the header file if related + * conversion APIs are not used. + * See unicode/uset.h + * + * @see ucnv_getUnicodeSet + * @stable ICU 2.6 + */ +struct USet; +/** @stable ICU 2.6 */ +typedef struct USet USet; + +#endif + +#if !UCONFIG_NO_CONVERSION + +U_CDECL_BEGIN + +/** Maximum length of a converter name including the terminating NULL @stable ICU 2.0 */ +#define UCNV_MAX_CONVERTER_NAME_LENGTH 60 +/** Maximum length of a converter name including path and terminating NULL @stable ICU 2.0 */ +#define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH) + +/** Shift in for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */ +#define UCNV_SI 0x0F +/** Shift out for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */ +#define UCNV_SO 0x0E + +/** + * Enum for specifying basic types of converters + * @see ucnv_getType + * @stable ICU 2.0 + */ +typedef enum { + UCNV_UNSUPPORTED_CONVERTER = -1, + UCNV_SBCS = 0, + UCNV_DBCS = 1, + UCNV_MBCS = 2, + UCNV_LATIN_1 = 3, + UCNV_UTF8 = 4, + UCNV_UTF16_BigEndian = 5, + UCNV_UTF16_LittleEndian = 6, + UCNV_UTF32_BigEndian = 7, + UCNV_UTF32_LittleEndian = 8, + UCNV_EBCDIC_STATEFUL = 9, + UCNV_ISO_2022 = 10, + + UCNV_LMBCS_1 = 11, + UCNV_LMBCS_2, + UCNV_LMBCS_3, + UCNV_LMBCS_4, + UCNV_LMBCS_5, + UCNV_LMBCS_6, + UCNV_LMBCS_8, + UCNV_LMBCS_11, + UCNV_LMBCS_16, + UCNV_LMBCS_17, + UCNV_LMBCS_18, + UCNV_LMBCS_19, + UCNV_LMBCS_LAST = UCNV_LMBCS_19, + UCNV_HZ, + UCNV_SCSU, + UCNV_ISCII, + UCNV_US_ASCII, + UCNV_UTF7, + UCNV_BOCU1, + UCNV_UTF16, + UCNV_UTF32, + UCNV_CESU8, + UCNV_IMAP_MAILBOX, + + /* Number of converter types for which we have conversion routines. */ + UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES + +} UConverterType; + +/** + * Enum for specifying which platform a converter ID refers to. + * The use of platform/CCSID is not recommended. See ucnv_openCCSID(). + * + * @see ucnv_getPlatform + * @see ucnv_openCCSID + * @see ucnv_getCCSID + * @stable ICU 2.0 + */ +typedef enum { + UCNV_UNKNOWN = -1, + UCNV_IBM = 0 +} UConverterPlatform; + +/** + * Function pointer for error callback in the codepage to unicode direction. + * Called when an error has occured in conversion to unicode, or on open/close of the callback (see reason). + * @param context Pointer to the callback's private data + * @param args Information about the conversion in progress + * @param codeUnits Points to 'length' bytes of the concerned codepage sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param reason Defines the reason the callback was invoked + * @see ucnv_setToUCallBack + * @see UConverterToUnicodeArgs + * @stable ICU 2.0 + */ +typedef void (U_EXPORT2 *UConverterToUCallback) ( + const void* context, + UConverterToUnicodeArgs *args, + const char *codeUnits, + int32_t length, + UConverterCallbackReason reason, + UErrorCode *); + +/** + * Function pointer for error callback in the unicode to codepage direction. + * Called when an error has occured in conversion from unicode, or on open/close of the callback (see reason). + * @param context Pointer to the callback's private data + * @param args Information about the conversion in progress + * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. + * @param reason Defines the reason the callback was invoked + * @see ucnv_setFromUCallBack + * @stable ICU 2.0 + */ +typedef void (U_EXPORT2 *UConverterFromUCallback) ( + const void* context, + UConverterFromUnicodeArgs *args, + const UChar* codeUnits, + int32_t length, + UChar32 codePoint, + UConverterCallbackReason reason, + UErrorCode *); + +U_CDECL_END + +/** + * Character that separates converter names from options and options from each other. + * @see ucnv_open + * @stable ICU 2.0 + */ +#define UCNV_OPTION_SEP_CHAR ',' + +/** + * String version of UCNV_OPTION_SEP_CHAR. + * @see ucnv_open + * @stable ICU 2.0 + */ +#define UCNV_OPTION_SEP_STRING "," + +/** + * Character that separates a converter option from its value. + * @see ucnv_open + * @stable ICU 2.0 + */ +#define UCNV_VALUE_SEP_CHAR '=' + +/** + * String version of UCNV_VALUE_SEP_CHAR. + * @see ucnv_open + * @stable ICU 2.0 + */ +#define UCNV_VALUE_SEP_STRING "=" + +/** + * Converter option for specifying a locale. + * For example, ucnv_open("SCSU,locale=ja", &errorCode); + * See convrtrs.txt. + * + * @see ucnv_open + * @stable ICU 2.0 + */ +#define UCNV_LOCALE_OPTION_STRING ",locale=" + +/** + * Converter option for specifying a version selector (0..9) for some converters. + * For example, ucnv_open("UTF-7,version=1", &errorCode); + * See convrtrs.txt. + * + * @see ucnv_open + * @stable ICU 2.4 + */ +#define UCNV_VERSION_OPTION_STRING ",version=" + +/** + * Converter option for EBCDIC SBCS or mixed-SBCS/DBCS (stateful) codepages. + * Swaps Unicode mappings for EBCDIC LF and NL codes, as used on + * S/390 (z/OS) Unix System Services (Open Edition). + * For example, ucnv_open("ibm-1047,swaplfnl", &errorCode); + * See convrtrs.txt. + * + * @see ucnv_open + * @stable ICU 2.4 + */ +#define UCNV_SWAP_LFNL_OPTION_STRING ",swaplfnl" + +/** + * Do a fuzzy compare of a two converter/alias names. The comparison + * is case-insensitive. It also ignores the characters '-', '_', and + * ' ' (dash, underscore, and space). Thus the strings "UTF-8", + * "utf_8", and "Utf 8" are exactly equivalent. + * + * @param name1 a converter name or alias, zero-terminated + * @param name2 a converter name or alias, zero-terminated + * @return 0 if the names match, or a negative value if the name1 + * lexically precedes name2, or a positive value if the name1 + * lexically follows name2. + * @stable ICU 2.0 + */ +U_STABLE int U_EXPORT2 +ucnv_compareNames(const char *name1, const char *name2); + + +/** + * Creates a UConverter object with the names specified as a C string. + * The actual name will be resolved with the alias file + * using a case-insensitive string comparison that ignores + * the delimiters '-', '_', and ' ' (dash, underscore, and space). + * E.g., the names "UTF8", "utf-8", and "Utf 8" are all equivalent. + * If <code>NULL</code> is passed for the converter name, it will create one with the + * getDefaultName return value. + * + * <p>A converter name for ICU 1.5 and above may contain options + * like a locale specification to control the specific behavior of + * the newly instantiated converter. + * The meaning of the options depends on the particular converter. + * If an option is not defined for or recognized by a given converter, then it is ignored.</p> + * + * <p>Options are appended to the converter name string, with a + * <code>UCNV_OPTION_SEP_CHAR</code> between the name and the first option and + * also between adjacent options.</p> + * + * <p>If the alias is ambiguous, then the preferred converter is used + * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.</p> + * + * <p>The conversion behavior and names can vary between platforms. ICU may + * convert some characters differently from other platforms. Details on this topic + * are in the <a href="http://oss.software.ibm.com/icu/userguide/conversion.html">User's + * Guide</a>.</p> + * + * @param converterName Name of the uconv table, may have options appended + * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT> + * @return the created Unicode converter object, or <TT>NULL</TT> if an error occured + * @see ucnv_openU + * @see ucnv_openCCSID + * @see ucnv_close + * @stable ICU 2.0 + */ +U_STABLE UConverter* U_EXPORT2 +ucnv_open(const char *converterName, UErrorCode *err); + + +/** + * Creates a Unicode converter with the names specified as unicode string. + * The name should be limited to the ASCII-7 alphanumerics range. + * The actual name will be resolved with the alias file + * using a case-insensitive string comparison that ignores + * the delimiters '-', '_', and ' ' (dash, underscore, and space). + * E.g., the names "UTF8", "utf-8", and "Utf 8" are all equivalent. + * If <TT>NULL</TT> is passed for the converter name, it will create + * one with the ucnv_getDefaultName() return value. + * If the alias is ambiguous, then the preferred converter is used + * and the status is set to U_AMBIGUOUS_ALIAS_WARNING. + * @param name : name of the uconv table in a zero terminated + * Unicode string + * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, + * U_FILE_ACCESS_ERROR</TT> + * @return the created Unicode converter object, or <TT>NULL</TT> if an + * error occured + * @see ucnv_open + * @see ucnv_openCCSID + * @see ucnv_close + * @see ucnv_getDefaultName + * @stable ICU 2.0 + */ +U_STABLE UConverter* U_EXPORT2 +ucnv_openU(const UChar *name, + UErrorCode *err); + +/** + * Creates a UConverter object from a CCSID number and platform pair. + * Note that the usefulness of this function is limited to platforms with numeric + * encoding IDs. Only IBM and Microsoft platforms use numeric (16-bit) identifiers for + * encodings. + * + * In addition, IBM CCSIDs and Unicode conversion tables are not 1:1 related. + * For many IBM CCSIDs there are multiple (up to six) Unicode conversion tables, and + * for some Unicode conversion tables there are multiple CCSIDs. + * Some "alternate" Unicode conversion tables are provided by the + * IBM CDRA conversion table registry. + * The most prominent example of a systematic modification of conversion tables that is + * not provided in the form of conversion table files in the repository is + * that S/390 Unix System Services swaps the codes for Line Feed and New Line in all + * EBCDIC codepages, which requires such a swap in the Unicode conversion tables as well. + * + * Only IBM default conversion tables are accessible with ucnv_openCCSID(). + * ucnv_getCCSID() will return the same CCSID for all conversion tables that are associated + * with that CCSID. + * + * Currently, the only "platform" supported in the ICU converter API is UCNV_IBM. + * + * In summary, the use of CCSIDs and the associated API functions is not recommended. + * + * In order to open a converter with the default IBM CDRA Unicode conversion table, + * you can use this function or use the prefix "ibm-": + * \code + * char name[20]; + * sprintf(name, "ibm-%hu", ccsid); + * cnv=ucnv_open(name, &errorCode); + * \endcode + * + * In order to open a converter with the IBM S/390 Unix System Services variant + * of a Unicode/EBCDIC conversion table, + * you can use the prefix "ibm-" together with the option string UCNV_SWAP_LFNL_OPTION_STRING: + * \code + * char name[20]; + * sprintf(name, "ibm-%hu" UCNV_SWAP_LFNL_OPTION_STRING, ccsid); + * cnv=ucnv_open(name, &errorCode); + * \endcode + * + * In order to open a converter from a Microsoft codepage number, use the prefix "cp": + * \code + * char name[20]; + * sprintf(name, "cp%hu", codepageID); + * cnv=ucnv_open(name, &errorCode); + * \endcode + * + * If the alias is ambiguous, then the preferred converter is used + * and the status is set to U_AMBIGUOUS_ALIAS_WARNING. + * + * @param codepage codepage number to create + * @param platform the platform in which the codepage number exists + * @param err error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT> + * @return the created Unicode converter object, or <TT>NULL</TT> if an error + * occured. + * @see ucnv_open + * @see ucnv_openU + * @see ucnv_close + * @see ucnv_getCCSID + * @see ucnv_getPlatform + * @see UConverterPlatform + * @stable ICU 2.0 + */ +U_STABLE UConverter* U_EXPORT2 +ucnv_openCCSID(int32_t codepage, + UConverterPlatform platform, + UErrorCode * err); + +/** + * <p>Creates a UConverter object specified from a packageName and a converterName.</p> + * + * <p>The packageName and converterName must point to an ICU udata object, as defined by + * <code> udata_open( packageName, "cnv", converterName, err) </code> or equivalent. + * Typically, packageName will refer to a (.dat) file, or to a package registered with + * udata_setAppData().</p> + * + * <p>The name will NOT be looked up in the alias mechanism, nor will the converter be + * stored in the converter cache or the alias table. The only way to open further converters + * is call this function multiple times, or use the ucnv_safeClone() function to clone a + * 'master' converter.</p> + * + * <p>A future version of ICU may add alias table lookups and/or caching + * to this function.</p> + * + * <p>Example Use: + * <code>cnv = ucnv_openPackage("myapp", "myconverter", &err);</code> + * </p> + * + * @param packageName name of the package (equivalent to 'path' in udata_open() call) + * @param converterName name of the data item to be used, without suffix. + * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT> + * @return the created Unicode converter object, or <TT>NULL</TT> if an error occured + * @see udata_open + * @see ucnv_open + * @see ucnv_safeClone + * @see ucnv_close + * @stable ICU 2.2 + */ +U_STABLE UConverter* U_EXPORT2 +ucnv_openPackage(const char *packageName, const char *converterName, UErrorCode *err); + +/** + * Thread safe cloning operation + * @param cnv converter to be cloned + * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated. + * If buffer is not large enough, new memory will be allocated. + * Clients can use the U_CNV_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations. + * @param pBufferSize pointer to size of allocated space. + * If *pBufferSize == 0, a sufficient size for use in cloning will + * be returned ('pre-flighting') + * If *pBufferSize is not enough for a stack-based safe clone, + * new memory will be allocated. + * @param status to indicate whether the operation went on smoothly or there were errors + * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary. + * @return pointer to the new clone + * @stable ICU 2.0 + */ +U_STABLE UConverter * U_EXPORT2 +ucnv_safeClone(const UConverter *cnv, + void *stackBuffer, + int32_t *pBufferSize, + UErrorCode *status); + +/** + * \def U_CNV_SAFECLONE_BUFFERSIZE + * Definition of a buffer size that is designed to be large enough for + * converters to be cloned with ucnv_safeClone(). + * @stable ICU 2.0 + */ +#define U_CNV_SAFECLONE_BUFFERSIZE 1024 + +/** + * Deletes the unicode converter and releases resources associated + * with just this instance. + * Does not free up shared converter tables. + * + * @param converter the converter object to be deleted + * @see ucnv_open + * @see ucnv_openU + * @see ucnv_openCCSID + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_close(UConverter * converter); + +/** + * Fills in the output parameter, subChars, with the substitution characters + * as multiple bytes. + * + * @param converter the Unicode converter + * @param subChars the subsitution characters + * @param len on input the capacity of subChars, on output the number + * of bytes copied to it + * @param err the outgoing error status code. + * If the substitution character array is too small, an + * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned. + * @see ucnv_setSubstChars + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_getSubstChars(const UConverter *converter, + char *subChars, + int8_t *len, + UErrorCode *err); + +/** + * Sets the substitution chars when converting from unicode to a codepage. The + * substitution is specified as a string of 1-4 bytes, and may contain + * <TT>NULL</TT> byte. + * @param converter the Unicode converter + * @param subChars the substitution character byte sequence we want set + * @param len the number of bytes in subChars + * @param err the error status code. <TT>U_INDEX_OUTOFBOUNDS_ERROR </TT> if + * len is bigger than the maximum number of bytes allowed in subchars + * @see ucnv_getSubstChars + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_setSubstChars(UConverter *converter, + const char *subChars, + int8_t len, + UErrorCode *err); + +/** + * Fills in the output parameter, errBytes, with the error characters from the + * last failing conversion. + * + * @param converter the Unicode converter + * @param errBytes the codepage bytes which were in error + * @param len on input the capacity of errBytes, on output the number of + * bytes which were copied to it + * @param err the error status code. + * If the substitution character array is too small, an + * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_getInvalidChars(const UConverter *converter, + char *errBytes, + int8_t *len, + UErrorCode *err); + +/** + * Fills in the output parameter, errChars, with the error characters from the + * last failing conversion. + * + * @param converter the Unicode converter + * @param errUChars the UChars which were in error + * @param len on input the capacity of errUChars, on output the number of + * UChars which were copied to it + * @param err the error status code. + * If the substitution character array is too small, an + * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_getInvalidUChars(const UConverter *converter, + UChar *errUChars, + int8_t *len, + UErrorCode *err); + +/** + * Resets the state of a converter to the default state. This is used + * in the case of an error, to restart a conversion from a known default state. + * It will also empty the internal output buffers. + * @param converter the Unicode converter + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_reset(UConverter *converter); + +/** + * Resets the to-Unicode part of a converter state to the default state. + * This is used in the case of an error to restart a conversion to + * Unicode to a known default state. It will also empty the internal + * output buffers used for the conversion to Unicode codepoints. + * @param converter the Unicode converter + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_resetToUnicode(UConverter *converter); + +/** + * Resets the from-Unicode part of a converter state to the default state. + * This is used in the case of an error to restart a conversion from + * Unicode to a known default state. It will also empty the internal output + * buffers used for the conversion from Unicode codepoints. + * @param converter the Unicode converter + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_resetFromUnicode(UConverter *converter); + +/** + * Returns the maximum number of bytes that are output per UChar in conversion + * from Unicode using this converter. + * The returned number can be used with UCNV_GET_MAX_BYTES_FOR_STRING + * to calculate the size of a target buffer for conversion from Unicode. + * + * Note: Before ICU 2.8, this function did not return reliable numbers for + * some stateful converters (EBCDIC_STATEFUL, ISO-2022) and LMBCS. + * + * This number may not be the same as the maximum number of bytes per + * "conversion unit". In other words, it may not be the intuitively expected + * number of bytes per character that would be published for a charset, + * and may not fulfill any other purpose than the allocation of an output + * buffer of guaranteed sufficient size for a given input length and converter. + * + * Examples for special cases that are taken into account: + * - Supplementary code points may convert to more bytes than BMP code points. + * This function returns bytes per UChar (UTF-16 code unit), not per + * Unicode code point, for efficient buffer allocation. + * - State-shifting output (SI/SO, escapes, etc.) from stateful converters. + * - When m input UChars are converted to n output bytes, then the maximum m/n + * is taken into account. + * + * The number returned here does not take into account + * (see UCNV_GET_MAX_BYTES_FOR_STRING): + * - callbacks which output more than one charset character sequence per call, + * like escape callbacks + * - initial and final non-character bytes that are output by some converters + * (automatic BOMs, initial escape sequence, final SI, etc.) + * + * Examples for returned values: + * - SBCS charsets: 1 + * - Shift-JIS: 2 + * - UTF-16: 2 (2 per BMP, 4 per surrogate _pair_, BOM not counted) + * - UTF-8: 3 (3 per BMP, 4 per surrogate _pair_) + * - EBCDIC_STATEFUL (EBCDIC mixed SBCS/DBCS): 3 (SO + DBCS) + * - ISO-2022: 3 (always outputs UTF-8) + * - ISO-2022-JP: 6 (4-byte escape sequences + DBCS) + * - ISO-2022-CN: 8 (4-byte designator sequences + 2-byte SS2/SS3 + DBCS) + * + * @param converter The Unicode converter. + * @return The maximum number of bytes per UChar that are output by ucnv_fromUnicode(), + * to be used together with UCNV_GET_MAX_BYTES_FOR_STRING for buffer allocation. + * + * @see UCNV_GET_MAX_BYTES_FOR_STRING + * @see ucnv_getMinCharSize + * @stable ICU 2.0 + */ +U_STABLE int8_t U_EXPORT2 +ucnv_getMaxCharSize(const UConverter *converter); + +#ifndef U_HIDE_DRAFT_API + +/** + * Calculates the size of a buffer for conversion from Unicode to a charset. + * The calculated size is guaranteed to be sufficient for this conversion. + * + * It takes into account initial and final non-character bytes that are output + * by some converters. + * It does not take into account callbacks which output more than one charset + * character sequence per call, like escape callbacks. + * The default (substitution) callback only outputs one charset character sequence. + * + * @param length Number of UChars to be converted. + * @param maxCharSize Return value from ucnv_getMaxCharSize() for the converter + * that will be used. + * @return Size of a buffer that will be large enough to hold the output bytes of + * converting length UChars with the converter that returned the maxCharSize. + * + * @see ucnv_getMaxCharSize + * @draft ICU 2.8 + */ +#define UCNV_GET_MAX_BYTES_FOR_STRING(length, maxCharSize) \ + (((int32_t)(length)+10)*(int32_t)(maxCharSize)) + +#endif /*U_HIDE_DRAFT_API*/ + +/** + * Returns the minimum byte length for characters in this codepage. + * This is usually either 1 or 2. + * @param converter the Unicode converter + * @return the minimum number of bytes allowed by this particular converter + * @see ucnv_getMaxCharSize + * @stable ICU 2.0 + */ +U_STABLE int8_t U_EXPORT2 +ucnv_getMinCharSize(const UConverter *converter); + +/** + * Returns the display name of the converter passed in based on the Locale + * passed in. If the locale contains no display name, the internal ASCII + * name will be filled in. + * + * @param converter the Unicode converter. + * @param displayLocale is the specific Locale we want to localised for + * @param displayName user provided buffer to be filled in + * @param displayNameCapacity size of displayName Buffer + * @param err error status code + * @return displayNameLength number of UChar needed in displayName + * @see ucnv_getName + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucnv_getDisplayName(const UConverter *converter, + const char *displayLocale, + UChar *displayName, + int32_t displayNameCapacity, + UErrorCode *err); + +/** + * Gets the internal, canonical name of the converter (zero-terminated). + * The lifetime of the returned string will be that of the converter + * passed to this function. + * @param converter the Unicode converter + * @param err UErrorCode status + * @return the internal name of the converter + * @see ucnv_getDisplayName + * @stable ICU 2.0 + */ +U_STABLE const char * U_EXPORT2 +ucnv_getName(const UConverter *converter, UErrorCode *err); + +/** + * Gets a codepage number associated with the converter. This is not guaranteed + * to be the one used to create the converter. Some converters do not represent + * platform registered codepages and return zero for the codepage number. + * The error code fill-in parameter indicates if the codepage number + * is available. + * Does not check if the converter is <TT>NULL</TT> or if converter's data + * table is <TT>NULL</TT>. + * + * Important: The use of CCSIDs is not recommended because it is limited + * to only two platforms in principle and only one (UCNV_IBM) in the current + * ICU converter API. + * Also, CCSIDs are insufficient to identify IBM Unicode conversion tables precisely. + * For more details see ucnv_openCCSID(). + * + * @param converter the Unicode converter + * @param err the error status code. + * @return If any error occurrs, -1 will be returned otherwise, the codepage number + * will be returned + * @see ucnv_openCCSID + * @see ucnv_getPlatform + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucnv_getCCSID(const UConverter *converter, + UErrorCode *err); + +/** + * Gets a codepage platform associated with the converter. Currently, + * only <TT>UCNV_IBM</TT> will be returned. + * Does not test if the converter is <TT>NULL</TT> or if converter's data + * table is <TT>NULL</TT>. + * @param converter the Unicode converter + * @param err the error status code. + * @return The codepage platform + * @stable ICU 2.0 + */ +U_STABLE UConverterPlatform U_EXPORT2 +ucnv_getPlatform(const UConverter *converter, + UErrorCode *err); + +/** + * Gets the type of the converter + * e.g. SBCS, MBCS, DBCS, UTF8, UTF16_BE, UTF16_LE, ISO_2022, + * EBCDIC_STATEFUL, LATIN_1 + * @param converter a valid, opened converter + * @return the type of the converter + * @stable ICU 2.0 + */ +U_STABLE UConverterType U_EXPORT2 +ucnv_getType(const UConverter * converter); + +/** + * Gets the "starter" (lead) bytes for converters of type MBCS. + * Will fill in an <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if converter passed in + * is not MBCS. Fills in an array of type UBool, with the value of the byte + * as offset to the array. For example, if (starters[0x20] == TRUE) at return, + * it means that the byte 0x20 is a starter byte in this converter. + * Context pointers are always owned by the caller. + * + * @param converter a valid, opened converter of type MBCS + * @param starters an array of size 256 to be filled in + * @param err error status, <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if the + * converter is not a type which can return starters. + * @see ucnv_getType + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_getStarters(const UConverter* converter, + UBool starters[256], + UErrorCode* err); + + +/** + * Selectors for Unicode sets that can be returned by ucnv_getUnicodeSet(). + * @see ucnv_getUnicodeSet + * @stable ICU 2.6 + */ +typedef enum UConverterUnicodeSet { + /** Select the set of roundtrippable Unicode code points. @stable ICU 2.6 */ + UCNV_ROUNDTRIP_SET, + /** Number of UConverterUnicodeSet selectors. @stable ICU 2.6 */ + UCNV_SET_COUNT +} UConverterUnicodeSet; + + +/** + * Returns the set of Unicode code points that can be converted by an ICU converter. + * + * The current implementation returns only one kind of set (UCNV_ROUNDTRIP_SET): + * The set of all Unicode code points that can be roundtrip-converted + * (converted without any data loss) with the converter. + * This set will not include code points that have fallback mappings + * or are only the result of reverse fallback mappings. + * See UTR #22 "Character Mapping Markup Language" + * at http://www.unicode.org/reports/tr22/ + * + * This is useful for example for + * - checking that a string or document can be roundtrip-converted with a converter, + * without/before actually performing the conversion + * - testing if a converter can be used for text for typical text for a certain locale, + * by comparing its roundtrip set with the set of ExemplarCharacters from + * ICU's locale data or other sources + * + * In the future, there may be more UConverterUnicodeSet choices to select + * sets with different properties. + * + * @param cnv The converter for which a set is requested. + * @param setFillIn A valid USet *. It will be cleared by this function before + * the converter's specific set is filled into the USet. + * @param whichSet A UConverterUnicodeSet selector; + * currently UCNV_ROUNDTRIP_SET is the only supported value. + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * + * @see UConverterUnicodeSet + * @see uset_open + * @see uset_close + * @stable ICU 2.6 + */ +U_STABLE void U_EXPORT2 +ucnv_getUnicodeSet(const UConverter *cnv, + USet *setFillIn, + UConverterUnicodeSet whichSet, + UErrorCode *pErrorCode); + +/** + * Gets the current calback function used by the converter when an illegal + * or invalid codepage sequence is found. + * Context pointers are always owned by the caller. + * + * @param converter the unicode converter + * @param action fillin: returns the callback function pointer + * @param context fillin: returns the callback's private void* context + * @see ucnv_setToUCallBack + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_getToUCallBack (const UConverter * converter, + UConverterToUCallback *action, + const void **context); + +/** + * Gets the current callback function used by the converter when illegal + * or invalid Unicode sequence is found. + * Context pointers are always owned by the caller. + * + * @param converter the unicode converter + * @param action fillin: returns the callback function pointer + * @param context fillin: returns the callback's private void* context + * @see ucnv_setFromUCallBack + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_getFromUCallBack (const UConverter * converter, + UConverterFromUCallback *action, + const void **context); + +/** + * Changes the callback function used by the converter when + * an illegal or invalid sequence is found. + * Context pointers are always owned by the caller. + * Predefined actions and contexts can be found in the ucnv_err.h header. + * + * @param converter the unicode converter + * @param newAction the new callback function + * @param newContext the new toUnicode callback context pointer. This can be NULL. + * @param oldAction fillin: returns the old callback function pointer. This can be NULL. + * @param oldContext fillin: returns the old callback's private void* context. This can be NULL. + * @param err The error code status + * @see ucnv_getToUCallBack + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_setToUCallBack (UConverter * converter, + UConverterToUCallback newAction, + const void* newContext, + UConverterToUCallback *oldAction, + const void** oldContext, + UErrorCode * err); + +/** + * Changes the current callback function used by the converter when + * an illegal or invalid sequence is found. + * Context pointers are always owned by the caller. + * Predefined actions and contexts can be found in the ucnv_err.h header. + * + * @param converter the unicode converter + * @param newAction the new callback function + * @param newContext the new fromUnicode callback context pointer. This can be NULL. + * @param oldAction fillin: returns the old callback function pointer. This can be NULL. + * @param oldContext fillin: returns the old callback's private void* context. This can be NULL. + * @param err The error code status + * @see ucnv_getFromUCallBack + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_setFromUCallBack (UConverter * converter, + UConverterFromUCallback newAction, + const void *newContext, + UConverterFromUCallback *oldAction, + const void **oldContext, + UErrorCode * err); + +/** + * Converts an array of unicode characters to an array of codepage + * characters. This function is optimized for converting a continuous + * stream of data in buffer-sized chunks, where the entire source and + * target does not fit in available buffers. + * + * The source pointer is an in/out parameter. It starts out pointing where the + * conversion is to begin, and ends up pointing after the last UChar consumed. + * + * Target similarly starts out pointer at the first available byte in the output + * buffer, and ends up pointing after the last byte written to the output. + * + * The converter always attempts to consume the entire source buffer, unless + * (1.) the target buffer is full, or (2.) a failing error is returned from the + * current callback function. When a successful error status has been + * returned, it means that all of the source buffer has been + * consumed. At that point, the caller should reset the source and + * sourceLimit pointers to point to the next chunk. + * + * At the end of the stream (flush==TRUE), the input is completely consumed + * when *source==sourceLimit and no error code is set. + * The converter object is then automatically reset by this function. + * (This means that a converter need not be reset explicitly between data + * streams if it finishes the previous stream without errors.) + * + * This is a <I>stateful</I> conversion. Additionally, even when all source data has + * been consumed, some data may be in the converters' internal state. + * Call this function repeatedly, updating the target pointers with + * the next empty chunk of target in case of a + * <TT>U_BUFFER_OVERFLOW_ERROR</TT>, and updating the source pointers + * with the next chunk of source when a successful error status is + * returned, until there are no more chunks of source data. + * @param converter the Unicode converter + * @param target I/O parameter. Input : Points to the beginning of the buffer to copy + * codepage characters to. Output : points to after the last codepage character copied + * to <TT>target</TT>. + * @param targetLimit the pointer just after last of the <TT>target</TT> buffer + * @param source I/O parameter, pointer to pointer to the source Unicode character buffer. + * @param sourceLimit the pointer just after the last of the source buffer + * @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number + * of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer + * e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT> + * For output data carried across calls, and other data without a specific source character + * (such as from escape sequences or callbacks) -1 will be placed for offsets. + * @param flush set to <TT>TRUE</TT> if the current source buffer is the last available + * chunk of the source, <TT>FALSE</TT> otherwise. Note that if a failing status is returned, + * this function may have to be called multiple times with flush set to <TT>TRUE</TT> until + * the source buffer is consumed. + * @param err the error status. <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be set if the + * converter is <TT>NULL</TT>. + * <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is + * still data to be written to the target. + * @see ucnv_fromUChars + * @see ucnv_convert + * @see ucnv_getMinCharSize + * @see ucnv_setToUCallBack + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_fromUnicode (UConverter * converter, + char **target, + const char *targetLimit, + const UChar ** source, + const UChar * sourceLimit, + int32_t* offsets, + UBool flush, + UErrorCode * err); + +/** + * Converts a buffer of codepage bytes into an array of unicode UChars + * characters. This function is optimized for converting a continuous + * stream of data in buffer-sized chunks, where the entire source and + * target does not fit in available buffers. + * + * The source pointer is an in/out parameter. It starts out pointing where the + * conversion is to begin, and ends up pointing after the last byte of source consumed. + * + * Target similarly starts out pointer at the first available UChar in the output + * buffer, and ends up pointing after the last UChar written to the output. + * It does NOT necessarily keep UChar sequences together. + * + * The converter always attempts to consume the entire source buffer, unless + * (1.) the target buffer is full, or (2.) a failing error is returned from the + * current callback function. When a successful error status has been + * returned, it means that all of the source buffer has been + * consumed. At that point, the caller should reset the source and + * sourceLimit pointers to point to the next chunk. + * + * At the end of the stream (flush==TRUE), the input is completely consumed + * when *source==sourceLimit and no error code is set + * The converter object is then automatically reset by this function. + * (This means that a converter need not be reset explicitly between data + * streams if it finishes the previous stream without errors.) + * + * This is a <I>stateful</I> conversion. Additionally, even when all source data has + * been consumed, some data may be in the converters' internal state. + * Call this function repeatedly, updating the target pointers with + * the next empty chunk of target in case of a + * <TT>U_BUFFER_OVERFLOW_ERROR</TT>, and updating the source pointers + * with the next chunk of source when a successful error status is + * returned, until there are no more chunks of source data. + * @param converter the Unicode converter + * @param target I/O parameter. Input : Points to the beginning of the buffer to copy + * UChars into. Output : points to after the last UChar copied. + * @param targetLimit the pointer just after the end of the <TT>target</TT> buffer + * @param source I/O parameter, pointer to pointer to the source codepage buffer. + * @param sourceLimit the pointer to the byte after the end of the source buffer + * @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number + * of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer + * e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT> + * For output data carried across calls, and other data without a specific source character + * (such as from escape sequences or callbacks) -1 will be placed for offsets. + * @param flush set to <TT>TRUE</TT> if the current source buffer is the last available + * chunk of the source, <TT>FALSE</TT> otherwise. Note that if a failing status is returned, + * this function may have to be called multiple times with flush set to <TT>TRUE</TT> until + * the source buffer is consumed. + * @param err the error status. <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be set if the + * converter is <TT>NULL</TT>. + * <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is + * still data to be written to the target. + * @see ucnv_fromUChars + * @see ucnv_convert + * @see ucnv_getMinCharSize + * @see ucnv_setFromUCallBack + * @see ucnv_getNextUChar + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_toUnicode(UConverter *converter, + UChar **target, + const UChar *targetLimit, + const char **source, + const char *sourceLimit, + int32_t *offsets, + UBool flush, + UErrorCode *err); + +/** + * Convert the Unicode string into a codepage string using an existing UConverter. + * The output string is NUL-terminated if possible. + * + * This function is a more convenient but less powerful version of ucnv_fromUnicode(). + * It is only useful for whole strings, not for streaming conversion. + * + * The maximum output buffer capacity required (barring output from callbacks) will be + * UCNV_GET_MAX_BYTES_FOR_STRING(srcLength, ucnv_getMaxCharSize(cnv)). + * + * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called) + * @param src the input Unicode string + * @param srcLength the input string length, or -1 if NUL-terminated + * @param dest destination string buffer, can be NULL if destCapacity==0 + * @param destCapacity the number of chars available at dest + * @param pErrorCode normal ICU error code; + * common error codes that may be set by this function include + * U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING, + * U_ILLEGAL_ARGUMENT_ERROR, and conversion errors + * @return the length of the output string, not counting the terminating NUL; + * if the length is greater than destCapacity, then the string will not fit + * and a buffer of the indicated length would need to be passed in + * @see ucnv_fromUnicode + * @see ucnv_convert + * @see UCNV_GET_MAX_BYTES_FOR_STRING + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucnv_fromUChars(UConverter *cnv, + char *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Convert the codepage string into a Unicode string using an existing UConverter. + * The output string is NUL-terminated if possible. + * + * This function is a more convenient but less powerful version of ucnv_toUnicode(). + * It is only useful for whole strings, not for streaming conversion. + * + * The maximum output buffer capacity required (barring output from callbacks) will be + * 2*srcLength (each char may be converted into a surrogate pair). + * + * @param cnv the converter object to be used (ucnv_resetToUnicode() will be called) + * @param src the input codepage string + * @param srcLength the input string length, or -1 if NUL-terminated + * @param dest destination string buffer, can be NULL if destCapacity==0 + * @param destCapacity the number of UChars available at dest + * @param pErrorCode normal ICU error code; + * common error codes that may be set by this function include + * U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING, + * U_ILLEGAL_ARGUMENT_ERROR, and conversion errors + * @return the length of the output string, not counting the terminating NUL; + * if the length is greater than destCapacity, then the string will not fit + * and a buffer of the indicated length would need to be passed in + * @see ucnv_toUnicode + * @see ucnv_convert + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucnv_toUChars(UConverter *cnv, + UChar *dest, int32_t destCapacity, + const char *src, int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Convert a codepage buffer into Unicode one character at a time. + * The input is completely consumed when the U_INDEX_OUTOFBOUNDS_ERROR is set. + * + * Advantage compared to ucnv_toUnicode() or ucnv_toUChars(): + * - Faster for small amounts of data, for most converters, e.g., + * US-ASCII, ISO-8859-1, UTF-8/16/32, and most "normal" charsets. + * (For complex converters, e.g., SCSU, UTF-7 and ISO 2022 variants, + * it uses ucnv_toUnicode() internally.) + * - Convenient. + * + * Limitations compared to ucnv_toUnicode(): + * - Always assumes flush=TRUE. + * This makes ucnv_getNextUChar() unsuitable for "streaming" conversion, + * that is, for where the input is supplied in multiple buffers, + * because ucnv_getNextUChar() will assume the end of the input at the end + * of the first buffer. + * - Does not provide offset output. + * + * It is possible to "mix" ucnv_getNextUChar() and ucnv_toUnicode() because + * ucnv_getNextUChar() uses the current state of the converter + * (unlike ucnv_toUChars() which always resets first). + * However, if ucnv_getNextUChar() is called after ucnv_toUnicode() + * stopped in the middle of a character sequence (with flush=FALSE), + * then ucnv_getNextUChar() will always use the slower ucnv_toUnicode() + * internally until the next character boundary. + * (This is new in ICU 2.6. In earlier releases, ucnv_getNextUChar() had to + * start at a character boundary.) + * + * Instead of using ucnv_getNextUChar(), it is recommended + * to convert using ucnv_toUnicode() or ucnv_toUChars() + * and then iterate over the text using U16_NEXT() or a UCharIterator (uiter.h) + * or a C++ CharacterIterator or similar. + * This allows streaming conversion and offset output, for example. + * + * <p>Handling of surrogate pairs and supplementary-plane code points:<br> + * There are two different kinds of codepages that provide mappings for surrogate characters: + * <ul> + * <li>Codepages like UTF-8, UTF-32, and GB 18030 provide direct representations for Unicode + * code points U+10000-U+10ffff as well as for single surrogates U+d800-U+dfff. + * Each valid sequence will result in exactly one returned code point. + * If a sequence results in a single surrogate, then that will be returned + * by itself, even if a neighboring sequence encodes the matching surrogate.</li> + * <li>Codepages like SCSU and LMBCS (and UTF-16) provide direct representations only for BMP code points + * including surrogates. Code points in supplementary planes are represented with + * two sequences, each encoding a surrogate. + * For these codepages, matching pairs of surrogates will be combined into single + * code points for returning from this function. + * (Note that SCSU is actually a mix of these codepage types.)</li> + * </ul></p> + * + * @param converter an open UConverter + * @param source the address of a pointer to the codepage buffer, will be + * updated to point after the bytes consumed in the conversion call. + * @param sourceLimit points to the end of the input buffer + * @param err fills in error status (see ucnv_toUnicode) + * <code>U_INDEX_OUTOFBOUNDS_ERROR</code> will be set if the input + * is empty or does not convert to any output (e.g.: pure state-change + * codes SI/SO, escape sequences for ISO 2022, + * or if the callback did not output anything, ...). + * This function will not set a <code>U_BUFFER_OVERFLOW_ERROR</code> because + * the "buffer" is the return code. However, there might be subsequent output + * stored in the converter object + * that will be returned in following calls to this function. + * @return a UChar32 resulting from the partial conversion of source + * @see ucnv_toUnicode + * @see ucnv_toUChars + * @see ucnv_convert + * @stable ICU 2.0 + */ +U_STABLE UChar32 U_EXPORT2 +ucnv_getNextUChar(UConverter * converter, + const char **source, + const char * sourceLimit, + UErrorCode * err); + +/** + * Convert from one external charset to another using two existing UConverters. + * Internally, two conversions - ucnv_toUnicode() and ucnv_fromUnicode() - + * are used, "pivoting" through 16-bit Unicode. + * + * There is a similar function, ucnv_convert(), + * which has the following limitations: + * - it takes charset names, not converter objects, so that + * - two converters are opened for each call + * - only single-string conversion is possible, not streaming operation + * - it does not provide enough information to find out, + * in case of failure, whether the toUnicode or + * the fromUnicode conversion failed + * + * By contrast, ucnv_convertEx() + * - takes UConverter parameters instead of charset names + * - fully exposes the pivot buffer for complete error handling + * + * ucnv_convertEx() also provides further convenience: + * - an option to reset the converters at the beginning + * (if reset==TRUE, see parameters; + * also sets *pivotTarget=*pivotSource=pivotStart) + * - allow NUL-terminated input + * (only a single NUL byte, will not work for charsets with multi-byte NULs) + * (if sourceLimit==NULL, see parameters) + * - terminate with a NUL on output + * (only a single NUL byte, not useful for charsets with multi-byte NULs), + * or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills + * the target buffer + * - the pivot buffer can be provided internally; + * in this case, the caller will not be able to get details about where an + * error occurred + * (if pivotStart==NULL, see below) + * + * The function returns when one of the following is true: + * - the entire source text has been converted successfully to the target buffer + * - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR) + * - a conversion error occurred + * (other U_FAILURE(), see description of pErrorCode) + * + * Limitation compared to the direct use of + * ucnv_fromUnicode() and ucnv_toUnicode(): + * ucnv_convertEx() does not provide offset information. + * + * Limitation compared to ucnv_fromUChars() and ucnv_toUChars(): + * ucnv_convertEx() does not support preflighting directly. + * + * Sample code for converting a single string from + * one external charset to UTF-8, ignoring the location of errors: + * + * \code + * int32_t + * myToUTF8(UConverter *cnv, + * const char *s, int32_t length, + * char *u8, int32_t capacity, + * UErrorCode *pErrorCode) { + * UConverter *utf8Cnv; + * char *target; + * + * if(U_FAILURE(*pErrorCode)) { + * return 0; + * } + * + * utf8Cnv=myGetCachedUTF8Converter(pErrorCode); + * if(U_FAILURE(*pErrorCode)) { + * return 0; + * } + * + * target=u8; + * ucnv_convertEx(cnv, utf8Cnv, + * &target, u8+capacity, + * &s, length>=0 ? s+length : NULL, + * NULL, NULL, NULL, NULL, + * TRUE, TRUE, + * pErrorCode); + * + * myReleaseCachedUTF8Converter(utf8Cnv); + * + * // return the output string length, but without preflighting + * return (int32_t)(target-u8); + * } + * \endcode + * + * @param targetCnv Output converter, used to convert from the UTF-16 pivot + * to the target using ucnv_fromUnicode(). + * @param sourceCnv Input converter, used to convert from the source to + * the UTF-16 pivot using ucnv_toUnicode(). + * @param target I/O parameter, same as for ucnv_fromUChars(). + * Input: *target points to the beginning of the target buffer. + * Output: *target points to the first unit after the last char written. + * @param targetLimit Pointer to the first unit after the target buffer. + * @param source I/O parameter, same as for ucnv_toUChars(). + * Input: *source points to the beginning of the source buffer. + * Output: *source points to the first unit after the last char read. + * @param sourceLimit Pointer to the first unit after the source buffer. + * @param pivotStart Pointer to the UTF-16 pivot buffer. If pivotStart==NULL, + * then an internal buffer is used and the other pivot + * arguments are ignored and can be NULL as well. + * @param pivotSource I/O parameter, same as source in ucnv_fromUChars() for + * conversion from the pivot buffer to the target buffer. + * @param pivotTarget I/O parameter, same as target in ucnv_toUChars() for + * conversion from the source buffer to the pivot buffer. + * It must be pivotStart<=*pivotSource<=*pivotTarget<=pivotLimit + * and pivotStart<pivotLimit (unless pivotStart==NULL). + * @param pivotLimit Pointer to the first unit after the pivot buffer. + * @param reset If TRUE, then ucnv_resetToUnicode(sourceCnv) and + * ucnv_resetFromUnicode(targetCnv) are called, and the + * pivot pointers are reset (*pivotTarget=*pivotSource=pivotStart). + * @param flush If true, indicates the end of the input. + * Passed directly to ucnv_toUnicode(), and carried over to + * ucnv_fromUnicode() when the source is empty as well. + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * U_BUFFER_OVERFLOW_ERROR always refers to the target buffer + * because overflows into the pivot buffer are handled internally. + * Other conversion errors are from the source-to-pivot + * conversion if *pivotSource==pivotStart, otherwise from + * the pivot-to-target conversion. + * + * @see ucnv_convert + * @see ucnv_fromAlgorithmic + * @see ucnv_toAlgorithmic + * @see ucnv_fromUnicode + * @see ucnv_toUnicode + * @see ucnv_fromUChars + * @see ucnv_toUChars + * @stable ICU 2.6 + */ +U_STABLE void U_EXPORT2 +ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv, + char **target, const char *targetLimit, + const char **source, const char *sourceLimit, + UChar *pivotStart, UChar **pivotSource, + UChar **pivotTarget, const UChar *pivotLimit, + UBool reset, UBool flush, + UErrorCode *pErrorCode); + +/** + * Convert from one external charset to another. + * Internally, two converters are opened according to the name arguments, + * then the text is converted to and from the 16-bit Unicode "pivot" + * using ucnv_convertEx(), then the converters are closed again. + * + * This is a convenience function, not an efficient way to convert a lot of text: + * ucnv_convert() + * - takes charset names, not converter objects, so that + * - two converters are opened for each call + * - only single-string conversion is possible, not streaming operation + * - does not provide enough information to find out, + * in case of failure, whether the toUnicode or + * the fromUnicode conversion failed + * - allows NUL-terminated input + * (only a single NUL byte, will not work for charsets with multi-byte NULs) + * (if sourceLength==-1, see parameters) + * - terminate with a NUL on output + * (only a single NUL byte, not useful for charsets with multi-byte NULs), + * or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills + * the target buffer + * - a pivot buffer is provided internally + * + * The function returns when one of the following is true: + * - the entire source text has been converted successfully to the target buffer + * and either the target buffer is terminated with a single NUL byte + * or the error code is set to U_STRING_NOT_TERMINATED_WARNING + * - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR) + * and the full output string length is returned ("preflighting") + * - a conversion error occurred + * (other U_FAILURE(), see description of pErrorCode) + * + * @param toConverterName The name of the converter that is used to convert + * from the UTF-16 pivot buffer to the target. + * @param fromConverterName The name of the converter that is used to convert + * from the source to the UTF-16 pivot buffer. + * @param target Pointer to the output buffer. + * @param targetCapacity Capacity of the target, in bytes. + * @param source Pointer to the input buffer. + * @param sourceLength Length of the input text, in bytes, or -1 for NUL-terminated input. + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity + * and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see ucnv_convertEx + * @see ucnv_fromAlgorithmic + * @see ucnv_toAlgorithmic + * @see ucnv_fromUnicode + * @see ucnv_toUnicode + * @see ucnv_fromUChars + * @see ucnv_toUChars + * @see ucnv_getNextUChar + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucnv_convert(const char *toConverterName, + const char *fromConverterName, + char *target, + int32_t targetCapacity, + const char *source, + int32_t sourceLength, + UErrorCode *pErrorCode); + +/** + * Convert from one external charset to another. + * Internally, the text is converted to and from the 16-bit Unicode "pivot" + * using ucnv_convertEx(). ucnv_toAlgorithmic() works exactly like ucnv_convert() + * except that the two converters need not be looked up and opened completely. + * + * The source-to-pivot conversion uses the cnv converter parameter. + * The pivot-to-target conversion uses a purely algorithmic converter + * according to the specified type, e.g., UCNV_UTF8 for a UTF-8 converter. + * + * Internally, the algorithmic converter is opened and closed for each + * function call, which is more efficient than using the public ucnv_open() + * but somewhat less efficient than only resetting an existing converter + * and using ucnv_convertEx(). + * + * This function is more convenient than ucnv_convertEx() for single-string + * conversions, especially when "preflighting" is desired (returning the length + * of the complete output even if it does not fit into the target buffer; + * see the User Guide Strings chapter). See ucnv_convert() for details. + * + * @param algorithmicType UConverterType constant identifying the desired target + * charset as a purely algorithmic converter. + * Those are converters for Unicode charsets like + * UTF-8, BOCU-1, SCSU, UTF-7, IMAP-mailbox-name, etc., + * as well as US-ASCII and ISO-8859-1. + * @param cnv The converter that is used to convert + * from the source to the UTF-16 pivot buffer. + * @param target Pointer to the output buffer. + * @param targetCapacity Capacity of the target, in bytes. + * @param source Pointer to the input buffer. + * @param sourceLength Length of the input text, in bytes + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity + * and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see ucnv_fromAlgorithmic + * @see ucnv_convert + * @see ucnv_convertEx + * @see ucnv_fromUnicode + * @see ucnv_toUnicode + * @see ucnv_fromUChars + * @see ucnv_toUChars + * @stable ICU 2.6 + */ +U_STABLE int32_t U_EXPORT2 +ucnv_toAlgorithmic(UConverterType algorithmicType, + UConverter *cnv, + char *target, int32_t targetCapacity, + const char *source, int32_t sourceLength, + UErrorCode *pErrorCode); + +/** + * Convert from one external charset to another. + * Internally, the text is converted to and from the 16-bit Unicode "pivot" + * using ucnv_convertEx(). ucnv_fromAlgorithmic() works exactly like ucnv_convert() + * except that the two converters need not be looked up and opened completely. + * + * The source-to-pivot conversion uses a purely algorithmic converter + * according to the specified type, e.g., UCNV_UTF8 for a UTF-8 converter. + * The pivot-to-target conversion uses the cnv converter parameter. + * + * Internally, the algorithmic converter is opened and closed for each + * function call, which is more efficient than using the public ucnv_open() + * but somewhat less efficient than only resetting an existing converter + * and using ucnv_convertEx(). + * + * This function is more convenient than ucnv_convertEx() for single-string + * conversions, especially when "preflighting" is desired (returning the length + * of the complete output even if it does not fit into the target buffer; + * see the User Guide Strings chapter). See ucnv_convert() for details. + * + * @param cnv The converter that is used to convert + * from the UTF-16 pivot buffer to the target. + * @param algorithmicType UConverterType constant identifying the desired source + * charset as a purely algorithmic converter. + * Those are converters for Unicode charsets like + * UTF-8, BOCU-1, SCSU, UTF-7, IMAP-mailbox-name, etc., + * as well as US-ASCII and ISO-8859-1. + * @param target Pointer to the output buffer. + * @param targetCapacity Capacity of the target, in bytes. + * @param source Pointer to the input buffer. + * @param sourceLength Length of the input text, in bytes + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity + * and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see ucnv_fromAlgorithmic + * @see ucnv_convert + * @see ucnv_convertEx + * @see ucnv_fromUnicode + * @see ucnv_toUnicode + * @see ucnv_fromUChars + * @see ucnv_toUChars + * @stable ICU 2.6 + */ +U_STABLE int32_t U_EXPORT2 +ucnv_fromAlgorithmic(UConverter *cnv, + UConverterType algorithmicType, + char *target, int32_t targetCapacity, + const char *source, int32_t sourceLength, + UErrorCode *pErrorCode); + +/** + * Frees up memory occupied by unused, cached converter shared data. + * + * @return the number of cached converters successfully deleted + * @see ucnv_close + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucnv_flushCache(void); + +/** + * Returns the number of available converters, as per the alias file. + * + * @return the number of available converters + * @see ucnv_getAvailableName + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucnv_countAvailable(void); + +/** + * Gets the canonical converter name of the specified converter from a list of + * all available converters contaied in the alias file. All converters + * in this list can be opened. + * + * @param n the index to a converter available on the system (in the range <TT>[0..ucnv_countAvaiable()]</TT>) + * @return a pointer a string (library owned), or <TT>NULL</TT> if the index is out of bounds. + * @see ucnv_countAvailable + * @stable ICU 2.0 + */ +U_STABLE const char* U_EXPORT2 +ucnv_getAvailableName(int32_t n); + +/** + * Returns a UEnumeration to enumerate all of the canonical converter + * names, as per the alias file, regardless of the ability to open each + * converter. + * + * @return A UEnumeration object for getting all the recognized canonical + * converter names. + * @see ucnv_getAvailableName + * @see uenum_close + * @see uenum_next + * @stable ICU 2.4 + */ +U_STABLE UEnumeration * U_EXPORT2 +ucnv_openAllNames(UErrorCode *pErrorCode); + +/** + * Gives the number of aliases for a given converter or alias name. + * If the alias is ambiguous, then the preferred converter is used + * and the status is set to U_AMBIGUOUS_ALIAS_WARNING. + * This method only enumerates the listed entries in the alias file. + * @param alias alias name + * @param pErrorCode error status + * @return number of names on alias list for given alias + * @stable ICU 2.0 + */ +U_STABLE uint16_t U_EXPORT2 +ucnv_countAliases(const char *alias, UErrorCode *pErrorCode); + +/** + * Gives the name of the alias at given index of alias list. + * This method only enumerates the listed entries in the alias file. + * If the alias is ambiguous, then the preferred converter is used + * and the status is set to U_AMBIGUOUS_ALIAS_WARNING. + * @param alias alias name + * @param n index in alias list + * @param pErrorCode result of operation + * @return returns the name of the alias at given index + * @see ucnv_countAliases + * @stable ICU 2.0 + */ +U_STABLE const char * U_EXPORT2 +ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode); + +/** + * Fill-up the list of alias names for the given alias. + * This method only enumerates the listed entries in the alias file. + * If the alias is ambiguous, then the preferred converter is used + * and the status is set to U_AMBIGUOUS_ALIAS_WARNING. + * @param alias alias name + * @param aliases fill-in list, aliases is a pointer to an array of + * <code>ucnv_countAliases()</code> string-pointers + * (<code>const char *</code>) that will be filled in. + * The strings themselves are owned by the library. + * @param pErrorCode result of operation + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode); + +/** + * Return a new UEnumeration object for enumerating all the + * alias names for a given converter that are recognized by a standard. + * This method only enumerates the listed entries in the alias file. + * The convrtrs.txt file can be modified to change the results of + * this function. + * The first result in this list is the same result given by + * <code>ucnv_getStandardName</code>, which is the default alias for + * the specified standard name. The returned object must be closed with + * <code>uenum_close</code> when you are done with the object. + * + * @param convName original converter name + * @param standard name of the standard governing the names; MIME and IANA + * are such standards + * @param pErrorCode The error code + * @return A UEnumeration object for getting all aliases that are recognized + * by a standard. If any of the parameters are invalid, NULL + * is returned. + * @see ucnv_getStandardName + * @see uenum_close + * @see uenum_next + * @stable ICU 2.2 + */ +U_STABLE UEnumeration * U_EXPORT2 +ucnv_openStandardNames(const char *convName, + const char *standard, + UErrorCode *pErrorCode); + +/** + * Gives the number of standards associated to converter names. + * @return number of standards + * @stable ICU 2.0 + */ +U_STABLE uint16_t U_EXPORT2 +ucnv_countStandards(void); + +/** + * Gives the name of the standard at given index of standard list. + * @param n index in standard list + * @param pErrorCode result of operation + * @return returns the name of the standard at given index. Owned by the library. + * @stable ICU 2.0 + */ +U_STABLE const char * U_EXPORT2 +ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode); + +/** + * Returns a standard name for a given converter name. + * <p> + * Example alias table:<br> + * conv alias1 { STANDARD1 } alias2 { STANDARD1* } + * <p> + * Result of ucnv_getStandardName("conv", "STANDARD1") from example + * alias table:<br> + * <b>"alias2"</b> + * + * @param name original converter name + * @param standard name of the standard governing the names; MIME and IANA + * are such standards + * @param pErrorCode result of operation + * @return returns the standard converter name; + * if a standard converter name cannot be determined, + * then <code>NULL</code> is returned. Owned by the library. + * @stable ICU 2.0 + */ +U_STABLE const char * U_EXPORT2 +ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode); + +/** + * This function will return the internal canonical converter name of the + * tagged alias. This is the opposite of ucnv_openStandardNames, which + * returns the tagged alias given the canonical name. + * <p> + * Example alias table:<br> + * conv alias1 { STANDARD1 } alias2 { STANDARD1* } + * <p> + * Result of ucnv_getStandardName("alias1", "STANDARD1") from example + * alias table:<br> + * <b>"conv"</b> + * + * @return returns the canonical converter name; + * if a standard or alias name cannot be determined, + * then <code>NULL</code> is returned. The returned string is + * owned by the library. + * @see ucnv_getStandardName + * @stable ICU 2.4 + */ +U_STABLE const char * U_EXPORT2 +ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode); + +/** + * returns the current default converter name. + * + * @return returns the current default converter name; + * if a default converter name cannot be determined, + * then <code>NULL</code> is returned. + * Storage owned by the library + * @see ucnv_setDefaultName + * @stable ICU 2.0 + */ +U_STABLE const char * U_EXPORT2 +ucnv_getDefaultName(void); + +/** + * sets the current default converter name. Caller must own the storage for 'name' + * and preserve it indefinitely. + * @param name the converter name to be the default (must exist). + * @see ucnv_getDefaultName + * @system SYSTEM API + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_setDefaultName(const char *name); + +/** + * Fixes the backslash character mismapping. For example, in SJIS, the backslash + * character in the ASCII portion is also used to represent the yen currency sign. + * When mapping from Unicode character 0x005C, it's unclear whether to map the + * character back to yen or backslash in SJIS. This function will take the input + * buffer and replace all the yen sign characters with backslash. This is necessary + * when the user tries to open a file with the input buffer on Windows. + * This function will test the converter to see whether such mapping is + * required. You can sometimes avoid using this function by using the correct version + * of Shift-JIS. + * + * @param cnv The converter representing the target codepage. + * @param source the input buffer to be fixed + * @param sourceLen the length of the input buffer + * @see ucnv_isAmbiguous + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_fixFileSeparator(const UConverter *cnv, UChar *source, int32_t sourceLen); + +/** + * Determines if the converter contains ambiguous mappings of the same + * character or not. + * @param cnv the converter to be tested + * @return TRUE if the converter contains ambiguous mapping of the same + * character, FALSE otherwise. + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +ucnv_isAmbiguous(const UConverter *cnv); + +/** + * Sets the converter to use fallback mapping or not. + * @param cnv The converter to set the fallback mapping usage on. + * @param usesFallback TRUE if the user wants the converter to take advantage of the fallback + * mapping, FALSE otherwise. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_setFallback(UConverter *cnv, UBool usesFallback); + +/** + * Determines if the converter uses fallback mappings or not. + * @param cnv The converter to be tested + * @return TRUE if the converter uses fallback, FALSE otherwise. + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +ucnv_usesFallback(const UConverter *cnv); + +/** + * Detects Unicode signature byte sequences at the start of the byte stream + * and returns the charset name of the indicated Unicode charset. + * NULL is returned when no Unicode signature is recognized. + * The number of bytes in the signature is output as well. + * + * The caller can ucnv_open() a converter using the charset name. + * The first code unit (UChar) from the start of the stream will be U+FEFF + * (the Unicode BOM/signature character) and can usually be ignored. + * + * For most Unicode charsets it is also possible to ignore the indicated + * number of initial stream bytes and start converting after them. + * However, there are stateful Unicode charsets (UTF-7 and BOCU-1) for which + * this will not work. Therefore, it is best to ignore the first output UChar + * instead of the input signature bytes. + * <p> + * Usage: + * @code + * UErrorCode err = U_ZERO_ERROR; + * char input[] = { '\xEF','\xBB', '\xBF','\x41','\x42','\x43' }; + * int32_t signatureLength = 0; + * char *encoding = ucnv_detectUnicodeSignatures(input,sizeof(input),&signatureLength,&err); + * UConverter *conv = NULL; + * UChar output[100]; + * UChar *target = output, *out; + * char *source = input; + * if(encoding!=NULL && U_SUCCESS(err)){ + * // should signature be discarded ? + * conv = ucnv_open(encoding, &err); + * // do the conversion + * ucnv_toUnicode(conv, + * target, output + sizeof(output)/U_SIZEOF_UCHAR, + * source, input + sizeof(input), + * NULL, TRUE, &err); + * out = output; + * if (discardSignature){ + * ++out; // ignore initial U+FEFF + * } + * while(out != target) { + * printf("%04x ", *out++); + * } + * puts(""); + * } + * + * @endcode + * + * @param source The source string in which the signature should be detected. + * @param sourceLength Length of the input string, or -1 if terminated with a NUL byte. + * @param signatureLength A pointer to int32_t to receive the number of bytes that make up the signature + * of the detected UTF. 0 if not detected. + * Can be a NULL pointer. + * @param pErrorCode A pointer to receive information about any errors that may occur during detection. + * Must be a valid pointer to an error code value, which must not indicate a failure + * before the function call. + * @return The name of the encoding detected. NULL if encoding is not detected. + * @stable ICU 2.4 + */ +U_STABLE const char* U_EXPORT2 +ucnv_detectUnicodeSignature(const char* source, + int32_t sourceLength, + int32_t *signatureLength, + UErrorCode *pErrorCode); + +#endif + +#endif +/*_UCNV*/ diff --git a/JavaScriptGlue/icu/unicode/ucnv_err.h b/JavaScriptGlue/icu/unicode/ucnv_err.h new file mode 100644 index 0000000..2a6104c --- /dev/null +++ b/JavaScriptGlue/icu/unicode/ucnv_err.h @@ -0,0 +1,456 @@ +/* +********************************************************************** +* Copyright (C) 1999-2004, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** + * + * + * ucnv_err.h: + */ + +/** + * \file + * \brief C UConverter predefined error callbacks + * + * <h2>Error Behaviour Functions</h2> + * Defines some error behaviour functions called by ucnv_{from,to}Unicode + * These are provided as part of ICU and many are stable, but they + * can also be considered only as an example of what can be done with + * callbacks. You may of course write your own. + * + * If you want to write your own, you may also find the functions from + * ucnv_cb.h useful when writing your own callbacks. + * + * These functions, although public, should NEVER be called directly. + * They should be used as parameters to the ucnv_setFromUCallback + * and ucnv_setToUCallback functions, to set the behaviour of a converter + * when it encounters ILLEGAL/UNMAPPED/INVALID sequences. + * + * usage example: 'STOP' doesn't need any context, but newContext + * could be set to something other than 'NULL' if needed. The available + * contexts in this header can modify the default behavior of the callback. + * + * \code + * UErrorCode err = U_ZERO_ERROR; + * UConverter *myConverter = ucnv_open("ibm-949", &err); + * const void *oldContext; + * UConverterFromUCallback oldAction; + * + * + * if (U_SUCCESS(err)) + * { + * ucnv_setFromUCallBack(myConverter, + * UCNV_FROM_U_CALLBACK_STOP, + * NULL, + * &oldAction, + * &oldContext, + * &status); + * } + * \endcode + * + * The code above tells "myConverter" to stop when it encounters an + * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from + * Unicode -> Codepage. The behavior from Codepage to Unicode is not changed, + * and ucnv_setToUCallBack would need to be called in order to change + * that behavior too. + * + * Here is an example with a context: + * + * \code + * UErrorCode err = U_ZERO_ERROR; + * UConverter *myConverter = ucnv_open("ibm-949", &err); + * const void *oldContext; + * UConverterFromUCallback oldAction; + * + * + * if (U_SUCCESS(err)) + * { + * ucnv_setToUCallBack(myConverter, + * UCNV_TO_U_CALLBACK_SUBSTITUTE, + * UCNV_SUB_STOP_ON_ILLEGAL, + * &oldAction, + * &oldContext, + * &status); + * } + * \endcode + * + * The code above tells "myConverter" to stop when it encounters an + * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from + * Codepage -> Unicode. Any unmapped and legal characters will be + * substituted to be the default substitution character. + */ + +#ifndef UCNV_ERR_H +#define UCNV_ERR_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +/** Forward declaring the UConverter structure. @stable ICU 2.0 */ +struct UConverter; + +/** @stable ICU 2.0 */ +typedef struct UConverter UConverter; + +/** + * FROM_U, TO_U context options for sub callback + * @stable ICU 2.0 + */ +#define UCNV_SUB_STOP_ON_ILLEGAL "i" + +/** + * FROM_U, TO_U context options for skip callback + * @stable ICU 2.0 + */ +#define UCNV_SKIP_STOP_ON_ILLEGAL "i" + +/** + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX) + * @stable ICU 2.0 + */ +#define UCNV_ESCAPE_ICU NULL +/** + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX) + * @stable ICU 2.0 + */ +#define UCNV_ESCAPE_JAVA "J" +/** + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX) + * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX) + * @stable ICU 2.0 + */ +#define UCNV_ESCAPE_C "C" +/** + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape (&#DDDD;) + * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape (&#DDDD;) + * @stable ICU 2.0 + */ +#define UCNV_ESCAPE_XML_DEC "D" +/** + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape (&#xXXXX;) + * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape (&#xXXXX;) + * @stable ICU 2.0 + */ +#define UCNV_ESCAPE_XML_HEX "X" +/** + * FROM_U_CALLBACK_ESCAPE context option to escape teh code unit according to Unicode (U+XXXXX) + * @stable ICU 2.0 + */ +#define UCNV_ESCAPE_UNICODE "U" + +/** + * The process condition code to be used with the callbacks. + * Codes which are greater than UCNV_IRREGULAR should be + * passed on to any chained callbacks. + * @stable ICU 2.0 + */ +typedef enum { + UCNV_UNASSIGNED = 0, /**< The code point is unassigned. + The error code U_INVALID_CHAR_FOUND will be set. */ + UCNV_ILLEGAL = 1, /**< The code point is illegal. For example, + \\x81\\x2E is illegal in SJIS because \\x2E + is not a valid trail byte for the \\x81 + lead byte. + Also, starting with Unicode 3.0.1, non-shortest byte sequences + in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061) + are also illegal, not just irregular. + The error code U_ILLEGAL_CHAR_FOUND will be set. */ + UCNV_IRREGULAR = 2, /**< The codepoint is not a regular sequence in + the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF + are irregular UTF-8 byte sequences for single surrogate + code points. + The error code U_INVALID_CHAR_FOUND will be set. */ + UCNV_RESET = 3, /**< The callback is called with this reason when a + 'reset' has occured. Callback should reset all + state. */ + UCNV_CLOSE = 4, /**< Called when the converter is closed. The + callback should release any allocated memory.*/ + UCNV_CLONE = 5 /**< Called when ucnv_safeClone() is called on the + converter. the pointer available as the + 'context' is an alias to the original converters' + context pointer. If the context must be owned + by the new converter, the callback must clone + the data and call ucnv_setFromUCallback + (or setToUCallback) with the correct pointer. + @stable ICU 2.2 + */ +} UConverterCallbackReason; + + +/** + * The structure for the fromUnicode callback function parameter. + * @stable ICU 2.0 + */ +typedef struct { + uint16_t size; /**< The size of this struct. @stable ICU 2.0 */ + UBool flush; /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 */ + UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */ + const UChar *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */ + const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ + char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ + const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ + int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ +} UConverterFromUnicodeArgs; + + +/** + * The structure for the toUnicode callback function parameter. + * @stable ICU 2.0 + */ +typedef struct { + uint16_t size; /**< The size of this struct @stable ICU 2.0 */ + UBool flush; /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 */ + UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */ + const char *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */ + const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ + UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ + const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ + int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ +} UConverterToUnicodeArgs; + + +/** + * DO NOT CALL THIS FUNCTION DIRECTLY! + * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE, + * returning the error code back to the caller immediately. + * + * @param context Pointer to the callback's private data + * @param fromUArgs Information about the conversion in progress + * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. + * @param reason Defines the reason the callback was invoked + * @param err This should always be set to a failure status prior to calling. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP ( + const void *context, + UConverterFromUnicodeArgs *fromUArgs, + const UChar* codeUnits, + int32_t length, + UChar32 codePoint, + UConverterCallbackReason reason, + UErrorCode * err); + + + +/** + * DO NOT CALL THIS FUNCTION DIRECTLY! + * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE, + * returning the error code back to the caller immediately. + * + * @param context Pointer to the callback's private data + * @param toUArgs Information about the conversion in progress + * @param codeUnits Points to 'length' bytes of the concerned codepage sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param reason Defines the reason the callback was invoked + * @param err This should always be set to a failure status prior to calling. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP ( + const void *context, + UConverterToUnicodeArgs *toUArgs, + const char* codeUnits, + int32_t length, + UConverterCallbackReason reason, + UErrorCode * err); + +/** + * DO NOT CALL THIS FUNCTION DIRECTLY! + * This From Unicode callback skips any ILLEGAL_SEQUENCE, or + * skips only UNASSINGED_SEQUENCE depending on the context parameter + * simply ignoring those characters. + * + * @param context The function currently recognizes the callback options: + * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, + * returning the error code back to the caller immediately. + * NULL: Skips any ILLEGAL_SEQUENCE + * @param fromUArgs Information about the conversion in progress + * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. + * @param reason Defines the reason the callback was invoked + * @param err Return value will be set to success if the callback was handled, + * otherwise this value will be set to a failure status. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP ( + const void *context, + UConverterFromUnicodeArgs *fromUArgs, + const UChar* codeUnits, + int32_t length, + UChar32 codePoint, + UConverterCallbackReason reason, + UErrorCode * err); + +/** + * DO NOT CALL THIS FUNCTION DIRECTLY! + * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or + * UNASSIGNED_SEQUENCE depending on context parameter, with the + * current substitution string for the converter. This is the default + * callback. + * + * @param context The function currently recognizes the callback options: + * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, + * returning the error code back to the caller immediately. + * NULL: Substitutes any ILLEGAL_SEQUENCE + * @param fromUArgs Information about the conversion in progress + * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. + * @param reason Defines the reason the callback was invoked + * @param err Return value will be set to success if the callback was handled, + * otherwise this value will be set to a failure status. + * @see ucnv_setSubstChars + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE ( + const void *context, + UConverterFromUnicodeArgs *fromUArgs, + const UChar* codeUnits, + int32_t length, + UChar32 codePoint, + UConverterCallbackReason reason, + UErrorCode * err); + +/** + * DO NOT CALL THIS FUNCTION DIRECTLY! + * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the + * hexadecimal representation of the illegal codepoints + * + * @param context The function currently recognizes the callback options: + * <ul> + * <li>UCNV_ESCAPE_ICU: Substitues the ILLEGAL SEQUENCE with the hexadecimal + * representation in the format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE"). + * In the Event the converter doesn't support the characters {%,U}[A-F][0-9], + * it will substitute the illegal sequence with the substitution characters. + * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as + * %UD84D%UDC56</li> + * <li>UCNV_ESCAPE_JAVA: Substitues the ILLEGAL SEQUENCE with the hexadecimal + * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). + * In the Event the converter doesn't support the characters {\,u}[A-F][0-9], + * it will substitute the illegal sequence with the substitution characters. + * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as + * \\uD84D\\uDC56</li> + * <li>UCNV_ESCAPE_C: Substitues the ILLEGAL SEQUENCE with the hexadecimal + * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). + * In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9], + * it will substitute the illegal sequence with the substitution characters. + * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as + * \\U00023456</li> + * <li>UCNV_ESCAPE_XML_DEC: Substitues the ILLEGAL SEQUENCE with the decimal + * representation in the format &#DDDDDDDD;, e.g. "&#65534;&#172;&#51454;"). + * In the Event the converter doesn't support the characters {&,#}[0-9], + * it will substitute the illegal sequence with the substitution characters. + * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as + * &#144470; and Zero padding is ignored.</li> + * <li>UCNV_ESCAPE_XML_HEX:Substitues the ILLEGAL SEQUENCE with the decimal + * representation in the format &#xXXXX, e.g. "&#xFFFE;&#x00AC;&#xC8FE;"). + * In the Event the converter doesn't support the characters {&,#,x}[0-9], + * it will substitute the illegal sequence with the substitution characters. + * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as + * &#x23456;</li> + * </ul> + * @param fromUArgs Information about the conversion in progress + * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. + * @param reason Defines the reason the callback was invoked + * @param err Return value will be set to success if the callback was handled, + * otherwise this value will be set to a failure status. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE ( + const void *context, + UConverterFromUnicodeArgs *fromUArgs, + const UChar* codeUnits, + int32_t length, + UChar32 codePoint, + UConverterCallbackReason reason, + UErrorCode * err); + + +/** + * DO NOT CALL THIS FUNCTION DIRECTLY! + * This To Unicode callback skips any ILLEGAL_SEQUENCE, or + * skips only UNASSINGED_SEQUENCE depending on the context parameter + * simply ignoring those characters. + * + * @param context The function currently recognizes the callback options: + * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, + * returning the error code back to the caller immediately. + * NULL: Skips any ILLEGAL_SEQUENCE + * @param toUArgs Information about the conversion in progress + * @param codeUnits Points to 'length' bytes of the concerned codepage sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param reason Defines the reason the callback was invoked + * @param err Return value will be set to success if the callback was handled, + * otherwise this value will be set to a failure status. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP ( + const void *context, + UConverterToUnicodeArgs *toUArgs, + const char* codeUnits, + int32_t length, + UConverterCallbackReason reason, + UErrorCode * err); + +/** + * DO NOT CALL THIS FUNCTION DIRECTLY! + * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or + * UNASSIGNED_SEQUENCE depending on context parameter, with the + * Unicode substitution character, U+FFFD. + * + * @param context The function currently recognizes the callback options: + * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, + * returning the error code back to the caller immediately. + * NULL: Substitutes any ILLEGAL_SEQUENCE + * @param toUArgs Information about the conversion in progress + * @param codeUnits Points to 'length' bytes of the concerned codepage sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param reason Defines the reason the callback was invoked + * @param err Return value will be set to success if the callback was handled, + * otherwise this value will be set to a failure status. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE ( + const void *context, + UConverterToUnicodeArgs *toUArgs, + const char* codeUnits, + int32_t length, + UConverterCallbackReason reason, + UErrorCode * err); + +/** + * DO NOT CALL THIS FUNCTION DIRECTLY! + * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the + * hexadecimal representation of the illegal bytes + * (in the format %XNN, e.g. "%XFF%X0A%XC8%X03"). + * + * @param context This function currently recognizes the callback options: + * UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC, + * UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE. + * @param toUArgs Information about the conversion in progress + * @param codeUnits Points to 'length' bytes of the concerned codepage sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param reason Defines the reason the callback was invoked + * @param err Return value will be set to success if the callback was handled, + * otherwise this value will be set to a failure status. + * @stable ICU 2.0 + */ + +U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE ( + const void *context, + UConverterToUnicodeArgs *toUArgs, + const char* codeUnits, + int32_t length, + UConverterCallbackReason reason, + UErrorCode * err); + +#endif + +#endif + +/*UCNV_ERR_H*/ diff --git a/JavaScriptGlue/icu/unicode/uconfig.h b/JavaScriptGlue/icu/unicode/uconfig.h new file mode 100644 index 0000000..997cf68 --- /dev/null +++ b/JavaScriptGlue/icu/unicode/uconfig.h @@ -0,0 +1,186 @@ +/* +********************************************************************** +* Copyright (C) 2002-2004, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: uconfig.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002sep19 +* created by: Markus W. Scherer +*/ + +#ifndef __UCONFIG_H__ +#define __UCONFIG_H__ + +/*! + * \file + * \brief Switches for excluding parts of ICU library code modules. + * + * Allows to build partial, smaller libraries for special purposes. + * By default, all modules are built. + * The switches are fairly coarse, controlling large modules. + * Basic services cannot be turned off. + * + * @stable ICU 2.4 + */ + +/** + * \def UCONFIG_ONLY_COLLATION + * This switch turns off modules that are not needed for collation. + * + * It does not turn off legacy conversion because that is necessary + * for ICU to work on EBCDIC platforms (for the default converter). + * If you want "only collation" and do not build for EBCDIC, + * then you can #define UCONFIG_NO_LEGACY_CONVERSION 1 as well. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_ONLY_COLLATION +# define UCONFIG_ONLY_COLLATION 0 +#endif + +#if UCONFIG_ONLY_COLLATION + /* common library */ +# define UCONFIG_NO_BREAK_ITERATION 1 +# define UCONFIG_NO_IDNA 1 + + /* i18n library */ +# if UCONFIG_NO_COLLATION +# error Contradictory collation switches in uconfig.h. +# endif +# define UCONFIG_NO_FORMATTING 1 +# define UCONFIG_NO_TRANSLITERATION 1 +# define UCONFIG_NO_REGULAR_EXPRESSIONS 1 +#endif + +/* common library switches -------------------------------------------------- */ + +/** + * \def UCONFIG_NO_CONVERSION + * ICU will not completely build with this switch turned on. + * This switch turns off all converters. + * + * @draft ICU 3.2 + */ +#ifndef UCONFIG_NO_CONVERSION +# define UCONFIG_NO_CONVERSION 0 +#endif + +#if UCONFIG_NO_CONVERSION +# define UCONFIG_NO_LEGACY_CONVERSION 1 +#endif + +/** + * \def UCONFIG_NO_LEGACY_CONVERSION + * This switch turns off all converters except for + * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1) + * - US-ASCII + * - ISO-8859-1 + * + * Turning off legacy conversion is not possible on EBCDIC platforms + * because they need ibm-37 or ibm-1047 default converters. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_NO_LEGACY_CONVERSION +# define UCONFIG_NO_LEGACY_CONVERSION 0 +#endif + +/** + * \def UCONFIG_NO_NORMALIZATION + * This switch turns off normalization. + * It implies turning off several other services as well, for example + * collation and IDNA. + * + * @stable ICU 2.6 + */ +#ifndef UCONFIG_NO_NORMALIZATION +# define UCONFIG_NO_NORMALIZATION 0 +#elif UCONFIG_NO_NORMALIZATION + /* common library */ +# define UCONFIG_NO_IDNA 1 + + /* i18n library */ +# if UCONFIG_ONLY_COLLATION +# error Contradictory collation switches in uconfig.h. +# endif +# define UCONFIG_NO_COLLATION 1 +# define UCONFIG_NO_TRANSLITERATION 1 +#endif + +/** + * \def UCONFIG_NO_BREAK_ITERATION + * This switch turns off break iteration. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_NO_BREAK_ITERATION +# define UCONFIG_NO_BREAK_ITERATION 0 +#endif + +/** + * \def UCONFIG_NO_IDNA + * This switch turns off IDNA. + * + * @stable ICU 2.6 + */ +#ifndef UCONFIG_NO_IDNA +# define UCONFIG_NO_IDNA 0 +#endif + +/* i18n library switches ---------------------------------------------------- */ + +/** + * \def UCONFIG_NO_COLLATION + * This switch turns off collation and collation-based string search. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_NO_COLLATION +# define UCONFIG_NO_COLLATION 0 +#endif + +/** + * \def UCONFIG_NO_FORMATTING + * This switch turns off formatting and calendar/timezone services. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_NO_FORMATTING +# define UCONFIG_NO_FORMATTING 0 +#endif + +/** + * \def UCONFIG_NO_TRANSLITERATION + * This switch turns off transliteration. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_NO_TRANSLITERATION +# define UCONFIG_NO_TRANSLITERATION 0 +#endif + +/** + * \def UCONFIG_NO_REGULAR_EXPRESSIONS + * This switch turns off regular expressions. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_NO_REGULAR_EXPRESSIONS +# define UCONFIG_NO_REGULAR_EXPRESSIONS 0 +#endif + +/** + * \def UCONFIG_NO_SERVICE + * This switch turns off service registration. + * + * @draft ICU 3.2 + */ +#ifndef UCONFIG_NO_SERVICE +# define UCONFIG_NO_SERVICE 0 +#endif + +#endif diff --git a/JavaScriptGlue/icu/unicode/uenum.h b/JavaScriptGlue/icu/unicode/uenum.h new file mode 100644 index 0000000..63690f8 --- /dev/null +++ b/JavaScriptGlue/icu/unicode/uenum.h @@ -0,0 +1,129 @@ +/* +******************************************************************************* +* +* Copyright (C) 2002-2004, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uenum.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:2 +* +* created on: 2002jul08 +* created by: Vladimir Weinstein +*/ + +#ifndef __UENUM_H +#define __UENUM_H + +#include "unicode/utypes.h" + +/** + * An enumeration object. + * For usage in C programs. + * @stable ICU 2.2 + */ +struct UEnumeration; +/** structure representing an enumeration object instance @stable ICU 2.2 */ +typedef struct UEnumeration UEnumeration; + +/** + * Disposes of resources in use by the iterator. If en is NULL, + * does nothing. After this call, any char* or UChar* pointer + * returned by uenum_unext() or uenum_next() is invalid. + * @param en UEnumeration structure pointer + * @stable ICU 2.2 + */ +U_STABLE void U_EXPORT2 +uenum_close(UEnumeration* en); + +/** + * Returns the number of elements that the iterator traverses. If + * the iterator is out-of-sync with its service, status is set to + * U_ENUM_OUT_OF_SYNC_ERROR. + * This is a convenience function. It can end up being very + * expensive as all the items might have to be pre-fetched (depending + * on the type of data being traversed). Use with caution and only + * when necessary. + * @param en UEnumeration structure pointer + * @param status error code, can be U_ENUM_OUT_OF_SYNC_ERROR if the + * iterator is out of sync. + * @return number of elements in the iterator + * @stable ICU 2.2 + */ +U_STABLE int32_t U_EXPORT2 +uenum_count(UEnumeration* en, UErrorCode* status); + +/** + * Returns the next element in the iterator's list. If there are + * no more elements, returns NULL. If the iterator is out-of-sync + * with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and + * NULL is returned. If the native service string is a char* string, + * it is converted to UChar* with the invariant converter. + * The result is terminated by (UChar)0. + * @param en the iterator object + * @param resultLength pointer to receive the length of the result + * (not including the terminating \\0). + * If the pointer is NULL it is ignored. + * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if + * the iterator is out of sync with its service. + * @return a pointer to the string. The string will be + * zero-terminated. The return pointer is owned by this iterator + * and must not be deleted by the caller. The pointer is valid + * until the next call to any uenum_... method, including + * uenum_next() or uenum_unext(). When all strings have been + * traversed, returns NULL. + * @stable ICU 2.2 + */ +U_STABLE const UChar* U_EXPORT2 +uenum_unext(UEnumeration* en, + int32_t* resultLength, + UErrorCode* status); + +/** + * Returns the next element in the iterator's list. If there are + * no more elements, returns NULL. If the iterator is out-of-sync + * with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and + * NULL is returned. If the native service string is a UChar* + * string, it is converted to char* with the invariant converter. + * The result is terminated by (char)0. If the conversion fails + * (because a character cannot be converted) then status is set to + * U_INVARIANT_CONVERSION_ERROR and the return value is undefined + * (but non-NULL). + * @param en the iterator object + * @param resultLength pointer to receive the length of the result + * (not including the terminating \\0). + * If the pointer is NULL it is ignored. + * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if + * the iterator is out of sync with its service. Set to + * U_INVARIANT_CONVERSION_ERROR if the underlying native string is + * UChar* and conversion to char* with the invariant converter + * fails. This error pertains only to current string, so iteration + * might be able to continue successfully. + * @return a pointer to the string. The string will be + * zero-terminated. The return pointer is owned by this iterator + * and must not be deleted by the caller. The pointer is valid + * until the next call to any uenum_... method, including + * uenum_next() or uenum_unext(). When all strings have been + * traversed, returns NULL. + * @stable ICU 2.2 + */ +U_STABLE const char* U_EXPORT2 +uenum_next(UEnumeration* en, + int32_t* resultLength, + UErrorCode* status); + +/** + * Resets the iterator to the current list of service IDs. This + * re-establishes sync with the service and rewinds the iterator + * to start at the first element. + * @param en the iterator object + * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if + * the iterator is out of sync with its service. + * @stable ICU 2.2 + */ +U_STABLE void U_EXPORT2 +uenum_reset(UEnumeration* en, UErrorCode* status); + +#endif diff --git a/JavaScriptGlue/icu/unicode/uiter.h b/JavaScriptGlue/icu/unicode/uiter.h new file mode 100644 index 0000000..963df5c --- /dev/null +++ b/JavaScriptGlue/icu/unicode/uiter.h @@ -0,0 +1,707 @@ +/* +******************************************************************************* +* +* Copyright (C) 2002-2004, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uiter.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002jan18 +* created by: Markus W. Scherer +*/ + +#ifndef __UITER_H__ +#define __UITER_H__ + +/** + * \file + * \brief C API: Unicode Character Iteration + * + * @see UCharIterator + */ + +#include "unicode/utypes.h" + +#ifdef XP_CPLUSPLUS + U_NAMESPACE_BEGIN + + class CharacterIterator; + class Replaceable; + + U_NAMESPACE_END +#endif + +U_CDECL_BEGIN + +struct UCharIterator; +typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */ + +/** + * Origin constants for UCharIterator.getIndex() and UCharIterator.move(). + * @see UCharIteratorMove + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef enum UCharIteratorOrigin { + UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH +} UCharIteratorOrigin; + +/** Constants for UCharIterator. @stable ICU 2.6 */ +enum { + /** + * Constant value that may be returned by UCharIteratorMove + * indicating that the final UTF-16 index is not known, but that the move succeeded. + * This can occur when moving relative to limit or length, or + * when moving relative to the current index after a setState() + * when the current UTF-16 index is not known. + * + * It would be very inefficient to have to count from the beginning of the text + * just to get the current/limit/length index after moving relative to it. + * The actual index can be determined with getIndex(UITER_CURRENT) + * which will count the UChars if necessary. + * + * @stable ICU 2.6 + */ + UITER_UNKNOWN_INDEX=-2 +}; + + +/** + * Constant for UCharIterator getState() indicating an error or + * an unknown state. + * Returned by uiter_getState()/UCharIteratorGetState + * when an error occurs. + * Also, some UCharIterator implementations may not be able to return + * a valid state for each position. This will be clearly documented + * for each such iterator (none of the public ones here). + * + * @stable ICU 2.6 + */ +#define UITER_NO_STATE ((uint32_t)0xffffffff) + +/** + * Function type declaration for UCharIterator.getIndex(). + * + * Gets the current position, or the start or limit of the + * iteration range. + * + * This function may perform slowly for UITER_CURRENT after setState() was called, + * or for UITER_LENGTH, because an iterator implementation may have to count + * UChars if the underlying storage is not UTF-16. + * + * @param iter the UCharIterator structure ("this pointer") + * @param origin get the 0, start, limit, length, or current index + * @return the requested index, or U_SENTINEL in an error condition + * + * @see UCharIteratorOrigin + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef int32_t U_CALLCONV +UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin); + +/** + * Function type declaration for UCharIterator.move(). + * + * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index). + * + * Moves the current position relative to the start or limit of the + * iteration range, or relative to the current position itself. + * The movement is expressed in numbers of code units forward + * or backward by specifying a positive or negative delta. + * Out of bounds movement will be pinned to the start or limit. + * + * This function may perform slowly for moving relative to UITER_LENGTH + * because an iterator implementation may have to count the rest of the + * UChars if the native storage is not UTF-16. + * + * When moving relative to the limit or length, or + * relative to the current position after setState() was called, + * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient + * determination of the actual UTF-16 index. + * The actual index can be determined with getIndex(UITER_CURRENT) + * which will count the UChars if necessary. + * See UITER_UNKNOWN_INDEX for details. + * + * @param iter the UCharIterator structure ("this pointer") + * @param delta can be positive, zero, or negative + * @param origin move relative to the 0, start, limit, length, or current index + * @return the new index, or U_SENTINEL on an error condition, + * or UITER_UNKNOWN_INDEX when the index is not known. + * + * @see UCharIteratorOrigin + * @see UCharIterator + * @see UITER_UNKNOWN_INDEX + * @stable ICU 2.1 + */ +typedef int32_t U_CALLCONV +UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin); + +/** + * Function type declaration for UCharIterator.hasNext(). + * + * Check if current() and next() can still + * return another code unit. + * + * @param iter the UCharIterator structure ("this pointer") + * @return boolean value for whether current() and next() can still return another code unit + * + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef UBool U_CALLCONV +UCharIteratorHasNext(UCharIterator *iter); + +/** + * Function type declaration for UCharIterator.hasPrevious(). + * + * Check if previous() can still return another code unit. + * + * @param iter the UCharIterator structure ("this pointer") + * @return boolean value for whether previous() can still return another code unit + * + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef UBool U_CALLCONV +UCharIteratorHasPrevious(UCharIterator *iter); + +/** + * Function type declaration for UCharIterator.current(). + * + * Return the code unit at the current position, + * or U_SENTINEL if there is none (index is at the limit). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the current code unit + * + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef UChar32 U_CALLCONV +UCharIteratorCurrent(UCharIterator *iter); + +/** + * Function type declaration for UCharIterator.next(). + * + * Return the code unit at the current index and increment + * the index (post-increment, like s[i++]), + * or return U_SENTINEL if there is none (index is at the limit). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the current code unit (and post-increment the current index) + * + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef UChar32 U_CALLCONV +UCharIteratorNext(UCharIterator *iter); + +/** + * Function type declaration for UCharIterator.previous(). + * + * Decrement the index and return the code unit from there + * (pre-decrement, like s[--i]), + * or return U_SENTINEL if there is none (index is at the start). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the previous code unit (after pre-decrementing the current index) + * + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef UChar32 U_CALLCONV +UCharIteratorPrevious(UCharIterator *iter); + +/** + * Function type declaration for UCharIterator.reservedFn(). + * Reserved for future use. + * + * @param iter the UCharIterator structure ("this pointer") + * @param something some integer argument + * @return some integer + * + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef int32_t U_CALLCONV +UCharIteratorReserved(UCharIterator *iter, int32_t something); + +/** + * Function type declaration for UCharIterator.getState(). + * + * Get the "state" of the iterator in the form of a single 32-bit word. + * It is recommended that the state value be calculated to be as small as + * is feasible. For strings with limited lengths, fewer than 32 bits may + * be sufficient. + * + * This is used together with setState()/UCharIteratorSetState + * to save and restore the iterator position more efficiently than with + * getIndex()/move(). + * + * The iterator state is defined as a uint32_t value because it is designed + * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state + * of the character iterator. + * + * With some UCharIterator implementations (e.g., UTF-8), + * getting and setting the UTF-16 index with existing functions + * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but + * relatively slow because the iterator has to "walk" from a known index + * to the requested one. + * This takes more time the farther it needs to go. + * + * An opaque state value allows an iterator implementation to provide + * an internal index (UTF-8: the source byte array index) for + * fast, constant-time restoration. + * + * After calling setState(), a getIndex(UITER_CURRENT) may be slow because + * the UTF-16 index may not be restored as well, but the iterator can deliver + * the correct text contents and move relative to the current position + * without performance degradation. + * + * Some UCharIterator implementations may not be able to return + * a valid state for each position, in which case they return UITER_NO_STATE instead. + * This will be clearly documented for each such iterator (none of the public ones here). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the state word + * + * @see UCharIterator + * @see UCharIteratorSetState + * @see UITER_NO_STATE + * @stable ICU 2.6 + */ +typedef uint32_t U_CALLCONV +UCharIteratorGetState(const UCharIterator *iter); + +/** + * Function type declaration for UCharIterator.setState(). + * + * Restore the "state" of the iterator using a state word from a getState() call. + * The iterator object need not be the same one as for which getState() was called, + * but it must be of the same type (set up using the same uiter_setXYZ function) + * and it must iterate over the same string + * (binary identical regardless of memory address). + * For more about the state word see UCharIteratorGetState. + * + * After calling setState(), a getIndex(UITER_CURRENT) may be slow because + * the UTF-16 index may not be restored as well, but the iterator can deliver + * the correct text contents and move relative to the current position + * without performance degradation. + * + * @param iter the UCharIterator structure ("this pointer") + * @param state the state word from a getState() call + * on a same-type, same-string iterator + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * + * @see UCharIterator + * @see UCharIteratorGetState + * @stable ICU 2.6 + */ +typedef void U_CALLCONV +UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); + + +/** + * C API for code unit iteration. + * This can be used as a C wrapper around + * CharacterIterator, Replaceable, or implemented using simple strings, etc. + * + * There are two roles for using UCharIterator: + * + * A "provider" sets the necessary function pointers and controls the "protected" + * fields of the UCharIterator structure. A "provider" passes a UCharIterator + * into C APIs that need a UCharIterator as an abstract, flexible string interface. + * + * Implementations of such C APIs are "callers" of UCharIterator functions; + * they only use the "public" function pointers and never access the "protected" + * fields directly. + * + * The current() and next() functions only check the current index against the + * limit, and previous() only checks the current index against the start, + * to see if the iterator already reached the end of the iteration range. + * + * The assumption - in all iterators - is that the index is moved via the API, + * which means it won't go out of bounds, or the index is modified by + * user code that knows enough about the iterator implementation to set valid + * index values. + * + * UCharIterator functions return code unit values 0..0xffff, + * or U_SENTINEL if the iteration bounds are reached. + * + * @stable ICU 2.1 + */ +struct UCharIterator { + /** + * (protected) Pointer to string or wrapped object or similar. + * Not used by caller. + * @stable ICU 2.1 + */ + const void *context; + + /** + * (protected) Length of string or similar. + * Not used by caller. + * @stable ICU 2.1 + */ + int32_t length; + + /** + * (protected) Start index or similar. + * Not used by caller. + * @stable ICU 2.1 + */ + int32_t start; + + /** + * (protected) Current index or similar. + * Not used by caller. + * @stable ICU 2.1 + */ + int32_t index; + + /** + * (protected) Limit index or similar. + * Not used by caller. + * @stable ICU 2.1 + */ + int32_t limit; + + /** + * (protected) Used by UTF-8 iterators and possibly others. + * @stable ICU 2.1 + */ + int32_t reservedField; + + /** + * (public) Returns the current position or the + * start or limit index of the iteration range. + * + * @see UCharIteratorGetIndex + * @stable ICU 2.1 + */ + UCharIteratorGetIndex *getIndex; + + /** + * (public) Moves the current position relative to the start or limit of the + * iteration range, or relative to the current position itself. + * The movement is expressed in numbers of code units forward + * or backward by specifying a positive or negative delta. + * + * @see UCharIteratorMove + * @stable ICU 2.1 + */ + UCharIteratorMove *move; + + /** + * (public) Check if current() and next() can still + * return another code unit. + * + * @see UCharIteratorHasNext + * @stable ICU 2.1 + */ + UCharIteratorHasNext *hasNext; + + /** + * (public) Check if previous() can still return another code unit. + * + * @see UCharIteratorHasPrevious + * @stable ICU 2.1 + */ + UCharIteratorHasPrevious *hasPrevious; + + /** + * (public) Return the code unit at the current position, + * or U_SENTINEL if there is none (index is at the limit). + * + * @see UCharIteratorCurrent + * @stable ICU 2.1 + */ + UCharIteratorCurrent *current; + + /** + * (public) Return the code unit at the current index and increment + * the index (post-increment, like s[i++]), + * or return U_SENTINEL if there is none (index is at the limit). + * + * @see UCharIteratorNext + * @stable ICU 2.1 + */ + UCharIteratorNext *next; + + /** + * (public) Decrement the index and return the code unit from there + * (pre-decrement, like s[--i]), + * or return U_SENTINEL if there is none (index is at the start). + * + * @see UCharIteratorPrevious + * @stable ICU 2.1 + */ + UCharIteratorPrevious *previous; + + /** + * (public) Reserved for future use. Currently NULL. + * + * @see UCharIteratorReserved + * @stable ICU 2.1 + */ + UCharIteratorReserved *reservedFn; + + /** + * (public) Return the state of the iterator, to be restored later with setState(). + * This function pointer is NULL if the iterator does not implement it. + * + * @see UCharIteratorGet + * @stable ICU 2.6 + */ + UCharIteratorGetState *getState; + + /** + * (public) Restore the iterator state from the state word from a call + * to getState(). + * This function pointer is NULL if the iterator does not implement it. + * + * @see UCharIteratorSet + * @stable ICU 2.6 + */ + UCharIteratorSetState *setState; +}; + +/** + * Helper function for UCharIterator to get the code point + * at the current index. + * + * Return the code point that includes the code unit at the current position, + * or U_SENTINEL if there is none (index is at the limit). + * If the current code unit is a lead or trail surrogate, + * then the following or preceding surrogate is used to form + * the code point value. + * + * @param iter the UCharIterator structure ("this pointer") + * @return the current code point + * + * @see UCharIterator + * @see U16_GET + * @see UnicodeString::char32At() + * @stable ICU 2.1 + */ +U_STABLE UChar32 U_EXPORT2 +uiter_current32(UCharIterator *iter); + +/** + * Helper function for UCharIterator to get the next code point. + * + * Return the code point at the current index and increment + * the index (post-increment, like s[i++]), + * or return U_SENTINEL if there is none (index is at the limit). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the current code point (and post-increment the current index) + * + * @see UCharIterator + * @see U16_NEXT + * @stable ICU 2.1 + */ +U_STABLE UChar32 U_EXPORT2 +uiter_next32(UCharIterator *iter); + +/** + * Helper function for UCharIterator to get the previous code point. + * + * Decrement the index and return the code point from there + * (pre-decrement, like s[--i]), + * or return U_SENTINEL if there is none (index is at the start). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the previous code point (after pre-decrementing the current index) + * + * @see UCharIterator + * @see U16_PREV + * @stable ICU 2.1 + */ +U_STABLE UChar32 U_EXPORT2 +uiter_previous32(UCharIterator *iter); + +/** + * Get the "state" of the iterator in the form of a single 32-bit word. + * This is a convenience function that calls iter->getState(iter) + * if iter->getState is not NULL; + * if it is NULL or any other error occurs, then UITER_NO_STATE is returned. + * + * Some UCharIterator implementations may not be able to return + * a valid state for each position, in which case they return UITER_NO_STATE instead. + * This will be clearly documented for each such iterator (none of the public ones here). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the state word + * + * @see UCharIterator + * @see UCharIteratorGetState + * @see UITER_NO_STATE + * @stable ICU 2.6 + */ +U_STABLE uint32_t U_EXPORT2 +uiter_getState(const UCharIterator *iter); + +/** + * Restore the "state" of the iterator using a state word from a getState() call. + * This is a convenience function that calls iter->setState(iter, state, pErrorCode) + * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set. + * + * @param iter the UCharIterator structure ("this pointer") + * @param state the state word from a getState() call + * on a same-type, same-string iterator + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * + * @see UCharIterator + * @see UCharIteratorSetState + * @stable ICU 2.6 + */ +U_STABLE void U_EXPORT2 +uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); + +/** + * Set up a UCharIterator to iterate over a string. + * + * Sets the UCharIterator function pointers for iteration over the string s + * with iteration boundaries start=index=0 and length=limit=string length. + * The "provider" may set the start, index, and limit values at any time + * within the range 0..length. + * The length field will be ignored. + * + * The string pointer s is set into UCharIterator.context without copying + * or reallocating the string contents. + * + * getState() simply returns the current index. + * move() will always return the final index. + * + * @param iter UCharIterator structure to be set for iteration + * @param s String to iterate over + * @param length Length of s, or -1 if NUL-terminated + * + * @see UCharIterator + * @stable ICU 2.1 + */ +U_STABLE void U_EXPORT2 +uiter_setString(UCharIterator *iter, const UChar *s, int32_t length); + +/** + * Set up a UCharIterator to iterate over a UTF-16BE string + * (byte vector with a big-endian pair of bytes per UChar). + * + * Everything works just like with a normal UChar iterator (uiter_setString), + * except that UChars are assembled from byte pairs, + * and that the length argument here indicates an even number of bytes. + * + * getState() simply returns the current index. + * move() will always return the final index. + * + * @param iter UCharIterator structure to be set for iteration + * @param s UTF-16BE string to iterate over + * @param length Length of s as an even number of bytes, or -1 if NUL-terminated + * (NUL means pair of 0 bytes at even index from s) + * + * @see UCharIterator + * @see uiter_setString + * @stable ICU 2.6 + */ +U_STABLE void U_EXPORT2 +uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length); + +/** + * Set up a UCharIterator to iterate over a UTF-8 string. + * + * Sets the UCharIterator function pointers for iteration over the UTF-8 string s + * with UTF-8 iteration boundaries 0 and length. + * The implementation counts the UTF-16 index on the fly and + * lazily evaluates the UTF-16 length of the text. + * + * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length. + * When the reservedField is not 0, then it contains a supplementary code point + * and the UTF-16 index is between the two corresponding surrogates. + * At that point, the UTF-8 index is behind that code point. + * + * The UTF-8 string pointer s is set into UCharIterator.context without copying + * or reallocating the string contents. + * + * getState() returns a state value consisting of + * - the current UTF-8 source byte index (bits 31..1) + * - a flag (bit 0) that indicates whether the UChar position is in the middle + * of a surrogate pair + * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point) + * + * getState() cannot also encode the UTF-16 index in the state value. + * move(relative to limit or length), or + * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX. + * + * @param iter UCharIterator structure to be set for iteration + * @param s UTF-8 string to iterate over + * @param length Length of s in bytes, or -1 if NUL-terminated + * + * @see UCharIterator + * @stable ICU 2.6 + */ +U_STABLE void U_EXPORT2 +uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length); + +#ifdef XP_CPLUSPLUS + +/** + * Set up a UCharIterator to wrap around a C++ CharacterIterator. + * + * Sets the UCharIterator function pointers for iteration using the + * CharacterIterator charIter. + * + * The CharacterIterator pointer charIter is set into UCharIterator.context + * without copying or cloning the CharacterIterator object. + * The other "protected" UCharIterator fields are set to 0 and will be ignored. + * The iteration index and boundaries are controlled by the CharacterIterator. + * + * getState() simply returns the current index. + * move() will always return the final index. + * + * @param iter UCharIterator structure to be set for iteration + * @param charIter CharacterIterator to wrap + * + * @see UCharIterator + * @stable ICU 2.1 + */ +U_STABLE void U_EXPORT2 +uiter_setCharacterIterator(UCharIterator *iter, CharacterIterator *charIter); + +/** + * Set up a UCharIterator to iterate over a C++ Replaceable. + * + * Sets the UCharIterator function pointers for iteration over the + * Replaceable rep with iteration boundaries start=index=0 and + * length=limit=rep->length(). + * The "provider" may set the start, index, and limit values at any time + * within the range 0..length=rep->length(). + * The length field will be ignored. + * + * The Replaceable pointer rep is set into UCharIterator.context without copying + * or cloning/reallocating the Replaceable object. + * + * getState() simply returns the current index. + * move() will always return the final index. + * + * @param iter UCharIterator structure to be set for iteration + * @param rep Replaceable to iterate over + * + * @see UCharIterator + * @stable ICU 2.1 + */ +U_STABLE void U_EXPORT2 +uiter_setReplaceable(UCharIterator *iter, const Replaceable *rep); + +#endif + +U_CDECL_END + +#endif diff --git a/JavaScriptGlue/icu/unicode/umachine.h b/JavaScriptGlue/icu/unicode/umachine.h new file mode 100644 index 0000000..d841f53 --- /dev/null +++ b/JavaScriptGlue/icu/unicode/umachine.h @@ -0,0 +1,371 @@ +/* +****************************************************************************** +* +* Copyright (C) 1999-2004, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: umachine.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999sep13 +* created by: Markus W. Scherer +* +* This file defines basic types and constants for utf.h to be +* platform-independent. umachine.h and utf.h are included into +* utypes.h to provide all the general definitions for ICU. +* All of these definitions used to be in utypes.h before +* the UTF-handling macros made this unmaintainable. +*/ + +#ifndef __UMACHINE_H__ +#define __UMACHINE_H__ + + +/** + * \file + * \brief Basic types and constants for UTF + * + * <h2> Basic types and constants for UTF </h2> + * This file defines basic types and constants for utf.h to be + * platform-independent. umachine.h and utf.h are included into + * utypes.h to provide all the general definitions for ICU. + * All of these definitions used to be in utypes.h before + * the UTF-handling macros made this unmaintainable. + * + */ +/*==========================================================================*/ +/* Include platform-dependent definitions */ +/* which are contained in the platform-specific file platform.h */ +/*==========================================================================*/ + +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) +# include "unicode/pwin32.h" +#else +# include "unicode/platform.h" +#endif + +/* + * ANSI C headers: + * stddef.h defines wchar_t + */ +#include <stddef.h> + +/*==========================================================================*/ +/* XP_CPLUSPLUS is a cross-platform symbol which should be defined when */ +/* using C++. It should not be defined when compiling under C. */ +/*==========================================================================*/ + +#ifdef __cplusplus +# ifndef XP_CPLUSPLUS +# define XP_CPLUSPLUS +# endif +#else +# undef XP_CPLUSPLUS +#endif + +/*==========================================================================*/ +/* For C wrappers, we use the symbol U_STABLE. */ +/* This works properly if the includer is C or C++. */ +/* Functions are declared U_STABLE return-type U_EXPORT2 function-name()... */ +/*==========================================================================*/ + +/** + * \def U_CFUNC + * This is used in a declaration of a library private ICU C function. + * @stable ICU 2.4 + */ + +/** + * \def U_CDECL_BEGIN + * This is used to begin a declaration of a library private ICU C API. + * @stable ICU 2.4 + */ + +/** + * \def U_CDECL_END + * This is used to end a declaration of a library private ICU C API + * @stable ICU 2.4 + */ + +#ifdef XP_CPLUSPLUS +# define U_CFUNC extern "C" +# define U_CDECL_BEGIN extern "C" { +# define U_CDECL_END } +#else +# define U_CFUNC extern +# define U_CDECL_BEGIN +# define U_CDECL_END +#endif + +/** + * \def U_NAMESPACE_BEGIN + * This is used to begin a declaration of a public ICU C++ API. + * If the compiler doesn't support namespaces, this does nothing. + * @stable ICU 2.4 + */ + +/** + * \def U_NAMESPACE_END + * This is used to end a declaration of a public ICU C++ API + * If the compiler doesn't support namespaces, this does nothing. + * @stable ICU 2.4 + */ + +/** + * \def U_NAMESPACE_USE + * This is used to specify that the rest of the code uses the + * public ICU C++ API namespace. + * If the compiler doesn't support namespaces, this does nothing. + * @stable ICU 2.4 + */ + +/** + * \def U_NAMESPACE_QUALIFIER + * This is used to qualify that a function or class is part of + * the public ICU C++ API namespace. + * If the compiler doesn't support namespaces, this does nothing. + * @stable ICU 2.4 + */ + +/* Define namespace symbols if the compiler supports it. */ +#if U_HAVE_NAMESPACE +# define U_NAMESPACE_BEGIN namespace U_ICU_NAMESPACE { +# define U_NAMESPACE_END } +# define U_NAMESPACE_USE using namespace U_ICU_NAMESPACE; +# define U_NAMESPACE_QUALIFIER U_ICU_NAMESPACE:: +#else +# define U_NAMESPACE_BEGIN +# define U_NAMESPACE_END +# define U_NAMESPACE_USE +# define U_NAMESPACE_QUALIFIER +#endif + +/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/ +#define U_CAPI U_CFUNC U_EXPORT +#define U_STABLE U_CAPI +#define U_DRAFT U_CAPI +#define U_DEPRECATED U_CAPI +#define U_OBSOLETE U_CAPI +#define U_INTERNAL U_CAPI + +/*==========================================================================*/ +/* limits for int32_t etc., like in POSIX inttypes.h */ +/*==========================================================================*/ + +#ifndef INT8_MIN +/** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */ +# define INT8_MIN ((int8_t)(-128)) +#endif +#ifndef INT16_MIN +/** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */ +# define INT16_MIN ((int16_t)(-32767-1)) +#endif +#ifndef INT32_MIN +/** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */ +# define INT32_MIN ((int32_t)(-2147483647-1)) +#endif + +#ifndef INT8_MAX +/** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */ +# define INT8_MAX ((int8_t)(127)) +#endif +#ifndef INT16_MAX +/** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */ +# define INT16_MAX ((int16_t)(32767)) +#endif +#ifndef INT32_MAX +/** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */ +# define INT32_MAX ((int32_t)(2147483647)) +#endif + +#ifndef UINT8_MAX +/** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */ +# define UINT8_MAX ((uint8_t)(255U)) +#endif +#ifndef UINT16_MAX +/** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */ +# define UINT16_MAX ((uint16_t)(65535U)) +#endif +#ifndef UINT32_MAX +/** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */ +# define UINT32_MAX ((uint32_t)(4294967295U)) +#endif + +#if defined(U_INT64_T_UNAVAILABLE) +# error int64_t is required for decimal format and rule-based number format. +#else +# ifndef INT64_C +/** + * Provides a platform independent way to specify a signed 64-bit integer constant. + * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C + * @draft ICU 2.8 + */ +# define INT64_C(c) c ## LL +# endif +# ifndef UINT64_C +/** + * Provides a platform independent way to specify an unsigned 64-bit integer constant. + * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C + * @draft ICU 2.8 + */ +# define UINT64_C(c) c ## ULL +# endif +# ifndef U_INT64_MIN +/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */ +# define U_INT64_MIN ((int64_t)(INT64_C(-9223372036854775807)-1)) +# endif +# ifndef U_INT64_MAX +/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */ +# define U_INT64_MAX ((int64_t)(INT64_C(9223372036854775807))) +# endif +# ifndef U_UINT64_MAX +/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */ +# define U_UINT64_MAX ((uint64_t)(UINT64_C(18446744073709551615))) +# endif +#endif + +/*==========================================================================*/ +/* Boolean data type */ +/*==========================================================================*/ + +/** The ICU boolean type @stable ICU 2.0 */ +typedef int8_t UBool; + +#ifndef TRUE +/** The TRUE value of a UBool @stable ICU 2.0 */ +# define TRUE 1 +#endif +#ifndef FALSE +/** The FALSE value of a UBool @stable ICU 2.0 */ +# define FALSE 0 +#endif + + +/*==========================================================================*/ +/* Unicode data types */ +/*==========================================================================*/ + +/* wchar_t-related definitions -------------------------------------------- */ + +/** + * \def U_HAVE_WCHAR_H + * Indicates whether <wchar.h> is available (1) or not (0). Set to 1 by default. + * + * @stable ICU 2.0 + */ +#ifndef U_HAVE_WCHAR_H +# define U_HAVE_WCHAR_H 1 +#endif + +/** + * \def U_SIZEOF_WCHAR_T + * U_SIZEOF_WCHAR_T==sizeof(wchar_t) (0 means it is not defined or autoconf could not set it) + * + * @stable ICU 2.0 + */ +#if U_SIZEOF_WCHAR_T==0 +# undef U_SIZEOF_WCHAR_T +# define U_SIZEOF_WCHAR_T 4 +#endif + +/* + * \def U_WCHAR_IS_UTF16 + * Defined if wchar_t uses UTF-16. + * + * @stable ICU 2.0 + */ +/* + * \def U_WCHAR_IS_UTF32 + * Defined if wchar_t uses UTF-32. + * + * @stable ICU 2.0 + */ +#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) +# ifdef __STDC_ISO_10646__ +# if (U_SIZEOF_WCHAR_T==2) +# define U_WCHAR_IS_UTF16 +# elif (U_SIZEOF_WCHAR_T==4) +# define U_WCHAR_IS_UTF32 +# endif +# elif defined __UCS2__ +# if (__OS390__ || __OS400__) && (U_SIZEOF_WCHAR_T==2) +# define U_WCHAR_IS_UTF16 +# endif +# elif defined __UCS4__ +# if (U_SIZEOF_WCHAR_T==4) +# define U_WCHAR_IS_UTF32 +# endif +# elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) +# define U_WCHAR_IS_UTF16 +# endif +#endif + +/* UChar and UChar32 definitions -------------------------------------------- */ + +/** Number of bytes in a UChar. @stable ICU 2.0 */ +#define U_SIZEOF_UCHAR 2 + +/** + * \var UChar + * Define UChar to be wchar_t if that is 16 bits wide; always assumed to be unsigned. + * If wchar_t is not 16 bits wide, then define UChar to be uint16_t. + * This makes the definition of UChar platform-dependent + * but allows direct string type compatibility with platforms with + * 16-bit wchar_t types. + * + * @stable ICU 2.0 + */ + +/* Define UChar to be compatible with wchar_t if possible. */ +#if U_SIZEOF_WCHAR_T==2 + typedef wchar_t UChar; +#else + typedef uint16_t UChar; +#endif + +/** + * Define UChar32 as a type for single Unicode code points. + * UChar32 is a signed 32-bit integer (same as int32_t). + * + * The Unicode code point range is 0..0x10ffff. + * All other values (negative or >=0x110000) are illegal as Unicode code points. + * They may be used as sentinel values to indicate "done", "error" + * or similar non-code point conditions. + * + * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined + * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned) + * or else to be uint32_t. + * That is, the definition of UChar32 was platform-dependent. + * + * @see U_SENTINEL + * @stable ICU 2.4 + */ +typedef int32_t UChar32; + +/*==========================================================================*/ +/* U_INLINE and U_ALIGN_CODE Set default values if these are not already */ +/* defined. Definitions normally are in */ +/* platform.h or the corresponding file for */ +/* the OS in use. */ +/*==========================================================================*/ + +/** + * \def U_ALIGN_CODE + * This is used to align code fragments to a specific byte boundary. + * This is useful for getting consistent performance test results. + * @internal + */ +#ifndef U_ALIGN_CODE +# define U_ALIGN_CODE(n) +#endif + +#ifndef U_INLINE +# define U_INLINE +#endif + +#include "unicode/urename.h" + +#endif diff --git a/JavaScriptGlue/icu/unicode/urename.h b/JavaScriptGlue/icu/unicode/urename.h new file mode 100644 index 0000000..5562592 --- /dev/null +++ b/JavaScriptGlue/icu/unicode/urename.h @@ -0,0 +1,1468 @@ +/* +******************************************************************************* +* Copyright (C) 2002-2004, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* +* file name: urename.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* Created by: Perl script written by Vladimir Weinstein +* +* Contains data for renaming ICU exports. +* Gets included by umachine.h +* +* THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT +* YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN! +*/ + +#ifndef URENAME_H +#define URENAME_H + +/* Uncomment the following line to disable renaming on platforms + that do not use Autoconf. */ +/* #define U_DISABLE_RENAMING 1 */ + +#if !U_DISABLE_RENAMING + +/* C exports renaming data */ + +#define T_CString_int64ToString T_CString_int64ToString_3_2 +#define T_CString_integerToString T_CString_integerToString_3_2 +#define T_CString_stricmp T_CString_stricmp_3_2 +#define T_CString_stringToInteger T_CString_stringToInteger_3_2 +#define T_CString_strnicmp T_CString_strnicmp_3_2 +#define T_CString_toLowerCase T_CString_toLowerCase_3_2 +#define T_CString_toUpperCase T_CString_toUpperCase_3_2 +#define T_FileStream_close T_FileStream_close_3_2 +#define T_FileStream_eof T_FileStream_eof_3_2 +#define T_FileStream_error T_FileStream_error_3_2 +#define T_FileStream_file_exists T_FileStream_file_exists_3_2 +#define T_FileStream_getc T_FileStream_getc_3_2 +#define T_FileStream_open T_FileStream_open_3_2 +#define T_FileStream_peek T_FileStream_peek_3_2 +#define T_FileStream_putc T_FileStream_putc_3_2 +#define T_FileStream_read T_FileStream_read_3_2 +#define T_FileStream_readLine T_FileStream_readLine_3_2 +#define T_FileStream_remove T_FileStream_remove_3_2 +#define T_FileStream_rewind T_FileStream_rewind_3_2 +#define T_FileStream_size T_FileStream_size_3_2 +#define T_FileStream_stderr T_FileStream_stderr_3_2 +#define T_FileStream_stdin T_FileStream_stdin_3_2 +#define T_FileStream_stdout T_FileStream_stdout_3_2 +#define T_FileStream_ungetc T_FileStream_ungetc_3_2 +#define T_FileStream_write T_FileStream_write_3_2 +#define T_FileStream_writeLine T_FileStream_writeLine_3_2 +#define UCNV_FROM_U_CALLBACK_ESCAPE UCNV_FROM_U_CALLBACK_ESCAPE_3_2 +#define UCNV_FROM_U_CALLBACK_SKIP UCNV_FROM_U_CALLBACK_SKIP_3_2 +#define UCNV_FROM_U_CALLBACK_STOP UCNV_FROM_U_CALLBACK_STOP_3_2 +#define UCNV_FROM_U_CALLBACK_SUBSTITUTE UCNV_FROM_U_CALLBACK_SUBSTITUTE_3_2 +#define UCNV_TO_U_CALLBACK_ESCAPE UCNV_TO_U_CALLBACK_ESCAPE_3_2 +#define UCNV_TO_U_CALLBACK_SKIP UCNV_TO_U_CALLBACK_SKIP_3_2 +#define UCNV_TO_U_CALLBACK_STOP UCNV_TO_U_CALLBACK_STOP_3_2 +#define UCNV_TO_U_CALLBACK_SUBSTITUTE UCNV_TO_U_CALLBACK_SUBSTITUTE_3_2 +#define UDataMemory_createNewInstance UDataMemory_createNewInstance_3_2 +#define UDataMemory_init UDataMemory_init_3_2 +#define UDataMemory_isLoaded UDataMemory_isLoaded_3_2 +#define UDataMemory_normalizeDataPointer UDataMemory_normalizeDataPointer_3_2 +#define UDataMemory_setData UDataMemory_setData_3_2 +#define UDatamemory_assign UDatamemory_assign_3_2 +#define _ASCIIData _ASCIIData_3_2 +#define _Bocu1Data _Bocu1Data_3_2 +#define _CESU8Data _CESU8Data_3_2 +#define _HZData _HZData_3_2 +#define _IMAPData _IMAPData_3_2 +#define _ISCIIData _ISCIIData_3_2 +#define _ISO2022Data _ISO2022Data_3_2 +#define _LMBCSData1 _LMBCSData1_3_2 +#define _LMBCSData11 _LMBCSData11_3_2 +#define _LMBCSData16 _LMBCSData16_3_2 +#define _LMBCSData17 _LMBCSData17_3_2 +#define _LMBCSData18 _LMBCSData18_3_2 +#define _LMBCSData19 _LMBCSData19_3_2 +#define _LMBCSData2 _LMBCSData2_3_2 +#define _LMBCSData3 _LMBCSData3_3_2 +#define _LMBCSData4 _LMBCSData4_3_2 +#define _LMBCSData5 _LMBCSData5_3_2 +#define _LMBCSData6 _LMBCSData6_3_2 +#define _LMBCSData8 _LMBCSData8_3_2 +#define _Latin1Data _Latin1Data_3_2 +#define _MBCSData _MBCSData_3_2 +#define _SCSUData _SCSUData_3_2 +#define _UTF16BEData _UTF16BEData_3_2 +#define _UTF16Data _UTF16Data_3_2 +#define _UTF16LEData _UTF16LEData_3_2 +#define _UTF32BEData _UTF32BEData_3_2 +#define _UTF32Data _UTF32Data_3_2 +#define _UTF32LEData _UTF32LEData_3_2 +#define _UTF7Data _UTF7Data_3_2 +#define _UTF8Data _UTF8Data_3_2 +#define cmemory_cleanup cmemory_cleanup_3_2 +#define cmemory_inUse cmemory_inUse_3_2 +#define locale_getKeywords locale_getKeywords_3_2 +#define locale_get_default locale_get_default_3_2 +#define locale_set_default locale_set_default_3_2 +#define res_countArrayItems res_countArrayItems_3_2 +#define res_findResource res_findResource_3_2 +#define res_getAlias res_getAlias_3_2 +#define res_getArrayItem res_getArrayItem_3_2 +#define res_getBinary res_getBinary_3_2 +#define res_getIntVector res_getIntVector_3_2 +#define res_getResource res_getResource_3_2 +#define res_getString res_getString_3_2 +#define res_getTableItemByIndex res_getTableItemByIndex_3_2 +#define res_getTableItemByKey res_getTableItemByKey_3_2 +#define res_load res_load_3_2 +#define res_unload res_unload_3_2 +#define transliterator_cleanup transliterator_cleanup_3_2 +#define u_UCharsToChars u_UCharsToChars_3_2 +#define u_austrcpy u_austrcpy_3_2 +#define u_austrncpy u_austrncpy_3_2 +#define u_catclose u_catclose_3_2 +#define u_catgets u_catgets_3_2 +#define u_catopen u_catopen_3_2 +#define u_charAge u_charAge_3_2 +#define u_charDigitValue u_charDigitValue_3_2 +#define u_charDirection u_charDirection_3_2 +#define u_charFromName u_charFromName_3_2 +#define u_charMirror u_charMirror_3_2 +#define u_charName u_charName_3_2 +#define u_charType u_charType_3_2 +#define u_charsToUChars u_charsToUChars_3_2 +#define u_cleanup u_cleanup_3_2 +#define u_countChar32 u_countChar32_3_2 +#define u_digit u_digit_3_2 +#define u_enumCharNames u_enumCharNames_3_2 +#define u_enumCharTypes u_enumCharTypes_3_2 +#define u_errorName u_errorName_3_2 +#define u_fclose u_fclose_3_2 +#define u_feof u_feof_3_2 +#define u_fflush u_fflush_3_2 +#define u_fgetConverter u_fgetConverter_3_2 +#define u_fgetc u_fgetc_3_2 +#define u_fgetcodepage u_fgetcodepage_3_2 +#define u_fgetcx u_fgetcx_3_2 +#define u_fgetfile u_fgetfile_3_2 +#define u_fgetlocale u_fgetlocale_3_2 +#define u_fgets u_fgets_3_2 +#define u_file_read u_file_read_3_2 +#define u_file_write u_file_write_3_2 +#define u_file_write_flush u_file_write_flush_3_2 +#define u_finit u_finit_3_2 +#define u_foldCase u_foldCase_3_2 +#define u_fopen u_fopen_3_2 +#define u_forDigit u_forDigit_3_2 +#define u_formatMessage u_formatMessage_3_2 +#define u_formatMessageWithError u_formatMessageWithError_3_2 +#define u_fprintf u_fprintf_3_2 +#define u_fprintf_u u_fprintf_u_3_2 +#define u_fputc u_fputc_3_2 +#define u_fputs u_fputs_3_2 +#define u_frewind u_frewind_3_2 +#define u_fscanf u_fscanf_3_2 +#define u_fscanf_u u_fscanf_u_3_2 +#define u_fsetcodepage u_fsetcodepage_3_2 +#define u_fsetlocale u_fsetlocale_3_2 +#define u_fsettransliterator u_fsettransliterator_3_2 +#define u_fstropen u_fstropen_3_2 +#define u_fungetc u_fungetc_3_2 +#define u_getCombiningClass u_getCombiningClass_3_2 +#define u_getDataDirectory u_getDataDirectory_3_2 +#define u_getDefaultConverter u_getDefaultConverter_3_2 +#define u_getFC_NFKC_Closure u_getFC_NFKC_Closure_3_2 +#define u_getISOComment u_getISOComment_3_2 +#define u_getIntPropertyMaxValue u_getIntPropertyMaxValue_3_2 +#define u_getIntPropertyMinValue u_getIntPropertyMinValue_3_2 +#define u_getIntPropertyValue u_getIntPropertyValue_3_2 +#define u_getNumericValue u_getNumericValue_3_2 +#define u_getPropertyEnum u_getPropertyEnum_3_2 +#define u_getPropertyName u_getPropertyName_3_2 +#define u_getPropertyValueEnum u_getPropertyValueEnum_3_2 +#define u_getPropertyValueName u_getPropertyValueName_3_2 +#define u_getUnicodeProperties u_getUnicodeProperties_3_2 +#define u_getUnicodeVersion u_getUnicodeVersion_3_2 +#define u_getVersion u_getVersion_3_2 +#define u_growBufferFromStatic u_growBufferFromStatic_3_2 +#define u_hasBinaryProperty u_hasBinaryProperty_3_2 +#define u_init u_init_3_2 +#define u_isIDIgnorable u_isIDIgnorable_3_2 +#define u_isIDPart u_isIDPart_3_2 +#define u_isIDStart u_isIDStart_3_2 +#define u_isISOControl u_isISOControl_3_2 +#define u_isJavaIDPart u_isJavaIDPart_3_2 +#define u_isJavaIDStart u_isJavaIDStart_3_2 +#define u_isJavaSpaceChar u_isJavaSpaceChar_3_2 +#define u_isMirrored u_isMirrored_3_2 +#define u_isUAlphabetic u_isUAlphabetic_3_2 +#define u_isULowercase u_isULowercase_3_2 +#define u_isUUppercase u_isUUppercase_3_2 +#define u_isUWhiteSpace u_isUWhiteSpace_3_2 +#define u_isWhitespace u_isWhitespace_3_2 +#define u_isalnum u_isalnum_3_2 +#define u_isalpha u_isalpha_3_2 +#define u_isbase u_isbase_3_2 +#define u_isblank u_isblank_3_2 +#define u_iscntrl u_iscntrl_3_2 +#define u_isdefined u_isdefined_3_2 +#define u_isdigit u_isdigit_3_2 +#define u_isgraph u_isgraph_3_2 +#define u_islower u_islower_3_2 +#define u_isprint u_isprint_3_2 +#define u_ispunct u_ispunct_3_2 +#define u_isspace u_isspace_3_2 +#define u_istitle u_istitle_3_2 +#define u_isupper u_isupper_3_2 +#define u_isxdigit u_isxdigit_3_2 +#define u_lengthOfIdenticalLevelRun u_lengthOfIdenticalLevelRun_3_2 +#define u_locbund_close u_locbund_close_3_2 +#define u_locbund_getNumberFormat u_locbund_getNumberFormat_3_2 +#define u_locbund_init u_locbund_init_3_2 +#define u_memcasecmp u_memcasecmp_3_2 +#define u_memchr u_memchr_3_2 +#define u_memchr32 u_memchr32_3_2 +#define u_memcmp u_memcmp_3_2 +#define u_memcmpCodePointOrder u_memcmpCodePointOrder_3_2 +#define u_memcpy u_memcpy_3_2 +#define u_memmove u_memmove_3_2 +#define u_memrchr u_memrchr_3_2 +#define u_memrchr32 u_memrchr32_3_2 +#define u_memset u_memset_3_2 +#define u_parseMessage u_parseMessage_3_2 +#define u_parseMessageWithError u_parseMessageWithError_3_2 +#define u_printf_parse u_printf_parse_3_2 +#define u_releaseDefaultConverter u_releaseDefaultConverter_3_2 +#define u_scanf_parse u_scanf_parse_3_2 +#define u_setAtomicIncDecFunctions u_setAtomicIncDecFunctions_3_2 +#define u_setDataDirectory u_setDataDirectory_3_2 +#define u_setMemoryFunctions u_setMemoryFunctions_3_2 +#define u_setMutexFunctions u_setMutexFunctions_3_2 +#define u_shapeArabic u_shapeArabic_3_2 +#define u_snprintf u_snprintf_3_2 +#define u_snprintf_u u_snprintf_u_3_2 +#define u_sprintf u_sprintf_3_2 +#define u_sprintf_u u_sprintf_u_3_2 +#define u_sscanf u_sscanf_3_2 +#define u_sscanf_u u_sscanf_u_3_2 +#define u_strCaseCompare u_strCaseCompare_3_2 +#define u_strCompare u_strCompare_3_2 +#define u_strCompareIter u_strCompareIter_3_2 +#define u_strFindFirst u_strFindFirst_3_2 +#define u_strFindLast u_strFindLast_3_2 +#define u_strFoldCase u_strFoldCase_3_2 +#define u_strFromPunycode u_strFromPunycode_3_2 +#define u_strFromUTF32 u_strFromUTF32_3_2 +#define u_strFromUTF8 u_strFromUTF8_3_2 +#define u_strFromWCS u_strFromWCS_3_2 +#define u_strHasMoreChar32Than u_strHasMoreChar32Than_3_2 +#define u_strToLower u_strToLower_3_2 +#define u_strToPunycode u_strToPunycode_3_2 +#define u_strToTitle u_strToTitle_3_2 +#define u_strToUTF32 u_strToUTF32_3_2 +#define u_strToUTF8 u_strToUTF8_3_2 +#define u_strToUpper u_strToUpper_3_2 +#define u_strToWCS u_strToWCS_3_2 +#define u_strcasecmp u_strcasecmp_3_2 +#define u_strcat u_strcat_3_2 +#define u_strchr u_strchr_3_2 +#define u_strchr32 u_strchr32_3_2 +#define u_strcmp u_strcmp_3_2 +#define u_strcmpCodePointOrder u_strcmpCodePointOrder_3_2 +#define u_strcmpFold u_strcmpFold_3_2 +#define u_strcpy u_strcpy_3_2 +#define u_strcspn u_strcspn_3_2 +#define u_strlen u_strlen_3_2 +#define u_strncasecmp u_strncasecmp_3_2 +#define u_strncat u_strncat_3_2 +#define u_strncmp u_strncmp_3_2 +#define u_strncmpCodePointOrder u_strncmpCodePointOrder_3_2 +#define u_strncpy u_strncpy_3_2 +#define u_strpbrk u_strpbrk_3_2 +#define u_strrchr u_strrchr_3_2 +#define u_strrchr32 u_strrchr32_3_2 +#define u_strrstr u_strrstr_3_2 +#define u_strspn u_strspn_3_2 +#define u_strstr u_strstr_3_2 +#define u_strtok_r u_strtok_r_3_2 +#define u_terminateChars u_terminateChars_3_2 +#define u_terminateUChar32s u_terminateUChar32s_3_2 +#define u_terminateUChars u_terminateUChars_3_2 +#define u_terminateWChars u_terminateWChars_3_2 +#define u_tolower u_tolower_3_2 +#define u_totitle u_totitle_3_2 +#define u_toupper u_toupper_3_2 +#define u_uastrcpy u_uastrcpy_3_2 +#define u_uastrncpy u_uastrncpy_3_2 +#define u_unescape u_unescape_3_2 +#define u_unescapeAt u_unescapeAt_3_2 +#define u_versionFromString u_versionFromString_3_2 +#define u_versionToString u_versionToString_3_2 +#define u_vformatMessage u_vformatMessage_3_2 +#define u_vformatMessageWithError u_vformatMessageWithError_3_2 +#define u_vfprintf u_vfprintf_3_2 +#define u_vfprintf_u u_vfprintf_u_3_2 +#define u_vfscanf u_vfscanf_3_2 +#define u_vfscanf_u u_vfscanf_u_3_2 +#define u_vparseMessage u_vparseMessage_3_2 +#define u_vparseMessageWithError u_vparseMessageWithError_3_2 +#define u_vsnprintf u_vsnprintf_3_2 +#define u_vsnprintf_u u_vsnprintf_u_3_2 +#define u_vsprintf u_vsprintf_3_2 +#define u_vsprintf_u u_vsprintf_u_3_2 +#define u_vsscanf u_vsscanf_3_2 +#define u_vsscanf_u u_vsscanf_u_3_2 +#define u_writeDiff u_writeDiff_3_2 +#define u_writeIdenticalLevelRun u_writeIdenticalLevelRun_3_2 +#define u_writeIdenticalLevelRunTwoChars u_writeIdenticalLevelRunTwoChars_3_2 +#define ubidi_close ubidi_close_3_2 +#define ubidi_countRuns ubidi_countRuns_3_2 +#define ubidi_getDirection ubidi_getDirection_3_2 +#define ubidi_getLength ubidi_getLength_3_2 +#define ubidi_getLevelAt ubidi_getLevelAt_3_2 +#define ubidi_getLevels ubidi_getLevels_3_2 +#define ubidi_getLogicalIndex ubidi_getLogicalIndex_3_2 +#define ubidi_getLogicalMap ubidi_getLogicalMap_3_2 +#define ubidi_getLogicalRun ubidi_getLogicalRun_3_2 +#define ubidi_getMemory ubidi_getMemory_3_2 +#define ubidi_getParaLevel ubidi_getParaLevel_3_2 +#define ubidi_getRuns ubidi_getRuns_3_2 +#define ubidi_getText ubidi_getText_3_2 +#define ubidi_getVisualIndex ubidi_getVisualIndex_3_2 +#define ubidi_getVisualMap ubidi_getVisualMap_3_2 +#define ubidi_getVisualRun ubidi_getVisualRun_3_2 +#define ubidi_invertMap ubidi_invertMap_3_2 +#define ubidi_isInverse ubidi_isInverse_3_2 +#define ubidi_open ubidi_open_3_2 +#define ubidi_openSized ubidi_openSized_3_2 +#define ubidi_reorderLogical ubidi_reorderLogical_3_2 +#define ubidi_reorderVisual ubidi_reorderVisual_3_2 +#define ubidi_setInverse ubidi_setInverse_3_2 +#define ubidi_setLine ubidi_setLine_3_2 +#define ubidi_setPara ubidi_setPara_3_2 +#define ubidi_writeReordered ubidi_writeReordered_3_2 +#define ubidi_writeReverse ubidi_writeReverse_3_2 +#define ublock_getCode ublock_getCode_3_2 +#define ubrk_close ubrk_close_3_2 +#define ubrk_countAvailable ubrk_countAvailable_3_2 +#define ubrk_current ubrk_current_3_2 +#define ubrk_first ubrk_first_3_2 +#define ubrk_following ubrk_following_3_2 +#define ubrk_getAvailable ubrk_getAvailable_3_2 +#define ubrk_getLocaleByType ubrk_getLocaleByType_3_2 +#define ubrk_getRuleStatus ubrk_getRuleStatus_3_2 +#define ubrk_getRuleStatusVec ubrk_getRuleStatusVec_3_2 +#define ubrk_isBoundary ubrk_isBoundary_3_2 +#define ubrk_last ubrk_last_3_2 +#define ubrk_next ubrk_next_3_2 +#define ubrk_open ubrk_open_3_2 +#define ubrk_openRules ubrk_openRules_3_2 +#define ubrk_preceding ubrk_preceding_3_2 +#define ubrk_previous ubrk_previous_3_2 +#define ubrk_safeClone ubrk_safeClone_3_2 +#define ubrk_setText ubrk_setText_3_2 +#define ubrk_swap ubrk_swap_3_2 +#define ucal_add ucal_add_3_2 +#define ucal_clear ucal_clear_3_2 +#define ucal_clearField ucal_clearField_3_2 +#define ucal_close ucal_close_3_2 +#define ucal_countAvailable ucal_countAvailable_3_2 +#define ucal_equivalentTo ucal_equivalentTo_3_2 +#define ucal_get ucal_get_3_2 +#define ucal_getAttribute ucal_getAttribute_3_2 +#define ucal_getAvailable ucal_getAvailable_3_2 +#define ucal_getDSTSavings ucal_getDSTSavings_3_2 +#define ucal_getDefaultTimeZone ucal_getDefaultTimeZone_3_2 +#define ucal_getLimit ucal_getLimit_3_2 +#define ucal_getLocaleByType ucal_getLocaleByType_3_2 +#define ucal_getMillis ucal_getMillis_3_2 +#define ucal_getNow ucal_getNow_3_2 +#define ucal_getTimeZoneDisplayName ucal_getTimeZoneDisplayName_3_2 +#define ucal_inDaylightTime ucal_inDaylightTime_3_2 +#define ucal_isSet ucal_isSet_3_2 +#define ucal_open ucal_open_3_2 +#define ucal_openCountryTimeZones ucal_openCountryTimeZones_3_2 +#define ucal_openTimeZones ucal_openTimeZones_3_2 +#define ucal_roll ucal_roll_3_2 +#define ucal_set ucal_set_3_2 +#define ucal_setAttribute ucal_setAttribute_3_2 +#define ucal_setDate ucal_setDate_3_2 +#define ucal_setDateTime ucal_setDateTime_3_2 +#define ucal_setDefaultTimeZone ucal_setDefaultTimeZone_3_2 +#define ucal_setMillis ucal_setMillis_3_2 +#define ucal_setTimeZone ucal_setTimeZone_3_2 +#define ucase_addPropertyStarts ucase_addPropertyStarts_3_2 +#define ucase_close ucase_close_3_2 +#define ucase_fold ucase_fold_3_2 +#define ucase_getSingleton ucase_getSingleton_3_2 +#define ucase_getType ucase_getType_3_2 +#define ucase_getTypeOrIgnorable ucase_getTypeOrIgnorable_3_2 +#define ucase_isCaseSensitive ucase_isCaseSensitive_3_2 +#define ucase_isSoftDotted ucase_isSoftDotted_3_2 +#define ucase_open ucase_open_3_2 +#define ucase_openBinary ucase_openBinary_3_2 +#define ucase_swap ucase_swap_3_2 +#define ucase_toFullFolding ucase_toFullFolding_3_2 +#define ucase_toFullLower ucase_toFullLower_3_2 +#define ucase_toFullTitle ucase_toFullTitle_3_2 +#define ucase_toFullUpper ucase_toFullUpper_3_2 +#define ucase_tolower ucase_tolower_3_2 +#define ucase_totitle ucase_totitle_3_2 +#define ucase_toupper ucase_toupper_3_2 +#define uchar_addPropertyStarts uchar_addPropertyStarts_3_2 +#define uchar_getHST uchar_getHST_3_2 +#define uchar_swapNames uchar_swapNames_3_2 +#define ucln_common_lib_cleanup ucln_common_lib_cleanup_3_2 +#define ucln_common_registerCleanup ucln_common_registerCleanup_3_2 +#define ucln_i18n_registerCleanup ucln_i18n_registerCleanup_3_2 +#define ucln_registerCleanup ucln_registerCleanup_3_2 +#define ucmp8_close ucmp8_close_3_2 +#define ucmp8_compact ucmp8_compact_3_2 +#define ucmp8_expand ucmp8_expand_3_2 +#define ucmp8_flattenMem ucmp8_flattenMem_3_2 +#define ucmp8_getArray ucmp8_getArray_3_2 +#define ucmp8_getCount ucmp8_getCount_3_2 +#define ucmp8_getIndex ucmp8_getIndex_3_2 +#define ucmp8_getkBlockCount ucmp8_getkBlockCount_3_2 +#define ucmp8_getkUnicodeCount ucmp8_getkUnicodeCount_3_2 +#define ucmp8_init ucmp8_init_3_2 +#define ucmp8_initAdopt ucmp8_initAdopt_3_2 +#define ucmp8_initAlias ucmp8_initAlias_3_2 +#define ucmp8_initBogus ucmp8_initBogus_3_2 +#define ucmp8_initFromData ucmp8_initFromData_3_2 +#define ucmp8_isBogus ucmp8_isBogus_3_2 +#define ucmp8_open ucmp8_open_3_2 +#define ucmp8_openAdopt ucmp8_openAdopt_3_2 +#define ucmp8_openAlias ucmp8_openAlias_3_2 +#define ucmp8_set ucmp8_set_3_2 +#define ucmp8_setRange ucmp8_setRange_3_2 +#define ucnv_MBCSFromUChar32 ucnv_MBCSFromUChar32_3_2 +#define ucnv_MBCSFromUnicodeWithOffsets ucnv_MBCSFromUnicodeWithOffsets_3_2 +#define ucnv_MBCSGetType ucnv_MBCSGetType_3_2 +#define ucnv_MBCSGetUnicodeSetForBytes ucnv_MBCSGetUnicodeSetForBytes_3_2 +#define ucnv_MBCSGetUnicodeSetForUnicode ucnv_MBCSGetUnicodeSetForUnicode_3_2 +#define ucnv_MBCSIsLeadByte ucnv_MBCSIsLeadByte_3_2 +#define ucnv_MBCSSimpleGetNextUChar ucnv_MBCSSimpleGetNextUChar_3_2 +#define ucnv_MBCSToUnicodeWithOffsets ucnv_MBCSToUnicodeWithOffsets_3_2 +#define ucnv_cbFromUWriteBytes ucnv_cbFromUWriteBytes_3_2 +#define ucnv_cbFromUWriteSub ucnv_cbFromUWriteSub_3_2 +#define ucnv_cbFromUWriteUChars ucnv_cbFromUWriteUChars_3_2 +#define ucnv_cbToUWriteSub ucnv_cbToUWriteSub_3_2 +#define ucnv_cbToUWriteUChars ucnv_cbToUWriteUChars_3_2 +#define ucnv_close ucnv_close_3_2 +#define ucnv_compareNames ucnv_compareNames_3_2 +#define ucnv_convert ucnv_convert_3_2 +#define ucnv_convertEx ucnv_convertEx_3_2 +#define ucnv_copyPlatformString ucnv_copyPlatformString_3_2 +#define ucnv_countAliases ucnv_countAliases_3_2 +#define ucnv_countAvailable ucnv_countAvailable_3_2 +#define ucnv_countStandards ucnv_countStandards_3_2 +#define ucnv_createAlgorithmicConverter ucnv_createAlgorithmicConverter_3_2 +#define ucnv_createConverter ucnv_createConverter_3_2 +#define ucnv_createConverterFromPackage ucnv_createConverterFromPackage_3_2 +#define ucnv_createConverterFromSharedData ucnv_createConverterFromSharedData_3_2 +#define ucnv_detectUnicodeSignature ucnv_detectUnicodeSignature_3_2 +#define ucnv_extContinueMatchFromU ucnv_extContinueMatchFromU_3_2 +#define ucnv_extContinueMatchToU ucnv_extContinueMatchToU_3_2 +#define ucnv_extGetUnicodeSet ucnv_extGetUnicodeSet_3_2 +#define ucnv_extInitialMatchFromU ucnv_extInitialMatchFromU_3_2 +#define ucnv_extInitialMatchToU ucnv_extInitialMatchToU_3_2 +#define ucnv_extSimpleMatchFromU ucnv_extSimpleMatchFromU_3_2 +#define ucnv_extSimpleMatchToU ucnv_extSimpleMatchToU_3_2 +#define ucnv_fixFileSeparator ucnv_fixFileSeparator_3_2 +#define ucnv_flushCache ucnv_flushCache_3_2 +#define ucnv_fromAlgorithmic ucnv_fromAlgorithmic_3_2 +#define ucnv_fromUChars ucnv_fromUChars_3_2 +#define ucnv_fromUWriteBytes ucnv_fromUWriteBytes_3_2 +#define ucnv_fromUnicode ucnv_fromUnicode_3_2 +#define ucnv_fromUnicode_UTF8 ucnv_fromUnicode_UTF8_3_2 +#define ucnv_fromUnicode_UTF8_OFFSETS_LOGIC ucnv_fromUnicode_UTF8_OFFSETS_LOGIC_3_2 +#define ucnv_getAlias ucnv_getAlias_3_2 +#define ucnv_getAliases ucnv_getAliases_3_2 +#define ucnv_getAvailableName ucnv_getAvailableName_3_2 +#define ucnv_getCCSID ucnv_getCCSID_3_2 +#define ucnv_getCanonicalName ucnv_getCanonicalName_3_2 +#define ucnv_getCompleteUnicodeSet ucnv_getCompleteUnicodeSet_3_2 +#define ucnv_getDefaultName ucnv_getDefaultName_3_2 +#define ucnv_getDisplayName ucnv_getDisplayName_3_2 +#define ucnv_getFromUCallBack ucnv_getFromUCallBack_3_2 +#define ucnv_getInvalidChars ucnv_getInvalidChars_3_2 +#define ucnv_getInvalidUChars ucnv_getInvalidUChars_3_2 +#define ucnv_getMaxCharSize ucnv_getMaxCharSize_3_2 +#define ucnv_getMinCharSize ucnv_getMinCharSize_3_2 +#define ucnv_getName ucnv_getName_3_2 +#define ucnv_getNextUChar ucnv_getNextUChar_3_2 +#define ucnv_getNonSurrogateUnicodeSet ucnv_getNonSurrogateUnicodeSet_3_2 +#define ucnv_getPlatform ucnv_getPlatform_3_2 +#define ucnv_getStandard ucnv_getStandard_3_2 +#define ucnv_getStandardName ucnv_getStandardName_3_2 +#define ucnv_getStarters ucnv_getStarters_3_2 +#define ucnv_getSubstChars ucnv_getSubstChars_3_2 +#define ucnv_getToUCallBack ucnv_getToUCallBack_3_2 +#define ucnv_getType ucnv_getType_3_2 +#define ucnv_getUnicodeSet ucnv_getUnicodeSet_3_2 +#define ucnv_incrementRefCount ucnv_incrementRefCount_3_2 +#define ucnv_io_countAliases ucnv_io_countAliases_3_2 +#define ucnv_io_countAvailableAliases ucnv_io_countAvailableAliases_3_2 +#define ucnv_io_countAvailableConverters ucnv_io_countAvailableConverters_3_2 +#define ucnv_io_countStandards ucnv_io_countStandards_3_2 +#define ucnv_io_flushAvailableConverterCache ucnv_io_flushAvailableConverterCache_3_2 +#define ucnv_io_getAlias ucnv_io_getAlias_3_2 +#define ucnv_io_getAliases ucnv_io_getAliases_3_2 +#define ucnv_io_getAvailableConverter ucnv_io_getAvailableConverter_3_2 +#define ucnv_io_getConverterName ucnv_io_getConverterName_3_2 +#define ucnv_io_getDefaultConverterName ucnv_io_getDefaultConverterName_3_2 +#define ucnv_io_setDefaultConverterName ucnv_io_setDefaultConverterName_3_2 +#define ucnv_io_stripASCIIForCompare ucnv_io_stripASCIIForCompare_3_2 +#define ucnv_io_stripEBCDICForCompare ucnv_io_stripEBCDICForCompare_3_2 +#define ucnv_isAmbiguous ucnv_isAmbiguous_3_2 +#define ucnv_load ucnv_load_3_2 +#define ucnv_loadSharedData ucnv_loadSharedData_3_2 +#define ucnv_open ucnv_open_3_2 +#define ucnv_openAllNames ucnv_openAllNames_3_2 +#define ucnv_openCCSID ucnv_openCCSID_3_2 +#define ucnv_openPackage ucnv_openPackage_3_2 +#define ucnv_openStandardNames ucnv_openStandardNames_3_2 +#define ucnv_openU ucnv_openU_3_2 +#define ucnv_reset ucnv_reset_3_2 +#define ucnv_resetFromUnicode ucnv_resetFromUnicode_3_2 +#define ucnv_resetToUnicode ucnv_resetToUnicode_3_2 +#define ucnv_safeClone ucnv_safeClone_3_2 +#define ucnv_setDefaultName ucnv_setDefaultName_3_2 +#define ucnv_setFallback ucnv_setFallback_3_2 +#define ucnv_setFromUCallBack ucnv_setFromUCallBack_3_2 +#define ucnv_setSubstChars ucnv_setSubstChars_3_2 +#define ucnv_setToUCallBack ucnv_setToUCallBack_3_2 +#define ucnv_swap ucnv_swap_3_2 +#define ucnv_swapAliases ucnv_swapAliases_3_2 +#define ucnv_toAlgorithmic ucnv_toAlgorithmic_3_2 +#define ucnv_toUChars ucnv_toUChars_3_2 +#define ucnv_toUWriteCodePoint ucnv_toUWriteCodePoint_3_2 +#define ucnv_toUWriteUChars ucnv_toUWriteUChars_3_2 +#define ucnv_toUnicode ucnv_toUnicode_3_2 +#define ucnv_unload ucnv_unload_3_2 +#define ucnv_unloadSharedDataIfReady ucnv_unloadSharedDataIfReady_3_2 +#define ucnv_usesFallback ucnv_usesFallback_3_2 +#define ucol_allocWeights ucol_allocWeights_3_2 +#define ucol_assembleTailoringTable ucol_assembleTailoringTable_3_2 +#define ucol_calcSortKey ucol_calcSortKey_3_2 +#define ucol_calcSortKeySimpleTertiary ucol_calcSortKeySimpleTertiary_3_2 +#define ucol_cloneBinary ucol_cloneBinary_3_2 +#define ucol_cloneRuleData ucol_cloneRuleData_3_2 +#define ucol_close ucol_close_3_2 +#define ucol_closeElements ucol_closeElements_3_2 +#define ucol_collatorToIdentifier ucol_collatorToIdentifier_3_2 +#define ucol_countAvailable ucol_countAvailable_3_2 +#define ucol_createElements ucol_createElements_3_2 +#define ucol_doCE ucol_doCE_3_2 +#define ucol_equal ucol_equal_3_2 +#define ucol_equals ucol_equals_3_2 +#define ucol_getAttribute ucol_getAttribute_3_2 +#define ucol_getAttributeOrDefault ucol_getAttributeOrDefault_3_2 +#define ucol_getAvailable ucol_getAvailable_3_2 +#define ucol_getBound ucol_getBound_3_2 +#define ucol_getCEGenerator ucol_getCEGenerator_3_2 +#define ucol_getCEStrengthDifference ucol_getCEStrengthDifference_3_2 +#define ucol_getContractions ucol_getContractions_3_2 +#define ucol_getDisplayName ucol_getDisplayName_3_2 +#define ucol_getFirstCE ucol_getFirstCE_3_2 +#define ucol_getFunctionalEquivalent ucol_getFunctionalEquivalent_3_2 +#define ucol_getKeywordValues ucol_getKeywordValues_3_2 +#define ucol_getKeywords ucol_getKeywords_3_2 +#define ucol_getLocale ucol_getLocale_3_2 +#define ucol_getLocaleByType ucol_getLocaleByType_3_2 +#define ucol_getMaxExpansion ucol_getMaxExpansion_3_2 +#define ucol_getNextCE ucol_getNextCE_3_2 +#define ucol_getNextGenerated ucol_getNextGenerated_3_2 +#define ucol_getOffset ucol_getOffset_3_2 +#define ucol_getPrevCE ucol_getPrevCE_3_2 +#define ucol_getRules ucol_getRules_3_2 +#define ucol_getRulesEx ucol_getRulesEx_3_2 +#define ucol_getShortDefinitionString ucol_getShortDefinitionString_3_2 +#define ucol_getSimpleCEGenerator ucol_getSimpleCEGenerator_3_2 +#define ucol_getSortKey ucol_getSortKey_3_2 +#define ucol_getSortKeySize ucol_getSortKeySize_3_2 +#define ucol_getSortKeyWithAllocation ucol_getSortKeyWithAllocation_3_2 +#define ucol_getStrength ucol_getStrength_3_2 +#define ucol_getTailoredSet ucol_getTailoredSet_3_2 +#define ucol_getUCAVersion ucol_getUCAVersion_3_2 +#define ucol_getUnsafeSet ucol_getUnsafeSet_3_2 +#define ucol_getVariableTop ucol_getVariableTop_3_2 +#define ucol_getVersion ucol_getVersion_3_2 +#define ucol_greater ucol_greater_3_2 +#define ucol_greaterOrEqual ucol_greaterOrEqual_3_2 +#define ucol_identifierToShortString ucol_identifierToShortString_3_2 +#define ucol_initBuffers ucol_initBuffers_3_2 +#define ucol_initCollator ucol_initCollator_3_2 +#define ucol_initInverseUCA ucol_initInverseUCA_3_2 +#define ucol_initUCA ucol_initUCA_3_2 +#define ucol_inv_getGapPositions ucol_inv_getGapPositions_3_2 +#define ucol_inv_getNextCE ucol_inv_getNextCE_3_2 +#define ucol_inv_getPrevCE ucol_inv_getPrevCE_3_2 +#define ucol_isTailored ucol_isTailored_3_2 +#define ucol_keyHashCode ucol_keyHashCode_3_2 +#define ucol_mergeSortkeys ucol_mergeSortkeys_3_2 +#define ucol_next ucol_next_3_2 +#define ucol_nextSortKeyPart ucol_nextSortKeyPart_3_2 +#define ucol_nextWeight ucol_nextWeight_3_2 +#define ucol_normalizeShortDefinitionString ucol_normalizeShortDefinitionString_3_2 +#define ucol_open ucol_open_3_2 +#define ucol_openAvailableLocales ucol_openAvailableLocales_3_2 +#define ucol_openBinary ucol_openBinary_3_2 +#define ucol_openElements ucol_openElements_3_2 +#define ucol_openFromIdentifier ucol_openFromIdentifier_3_2 +#define ucol_openFromShortString ucol_openFromShortString_3_2 +#define ucol_openRules ucol_openRules_3_2 +#define ucol_open_internal ucol_open_internal_3_2 +#define ucol_previous ucol_previous_3_2 +#define ucol_primaryOrder ucol_primaryOrder_3_2 +#define ucol_prv_getSpecialCE ucol_prv_getSpecialCE_3_2 +#define ucol_prv_getSpecialPrevCE ucol_prv_getSpecialPrevCE_3_2 +#define ucol_reset ucol_reset_3_2 +#define ucol_restoreVariableTop ucol_restoreVariableTop_3_2 +#define ucol_safeClone ucol_safeClone_3_2 +#define ucol_secondaryOrder ucol_secondaryOrder_3_2 +#define ucol_setAttribute ucol_setAttribute_3_2 +#define ucol_setOffset ucol_setOffset_3_2 +#define ucol_setOptionsFromHeader ucol_setOptionsFromHeader_3_2 +#define ucol_setReqValidLocales ucol_setReqValidLocales_3_2 +#define ucol_setStrength ucol_setStrength_3_2 +#define ucol_setText ucol_setText_3_2 +#define ucol_setVariableTop ucol_setVariableTop_3_2 +#define ucol_shortStringToIdentifier ucol_shortStringToIdentifier_3_2 +#define ucol_sortKeyToString ucol_sortKeyToString_3_2 +#define ucol_strcoll ucol_strcoll_3_2 +#define ucol_strcollIter ucol_strcollIter_3_2 +#define ucol_swap ucol_swap_3_2 +#define ucol_swapBinary ucol_swapBinary_3_2 +#define ucol_swapInverseUCA ucol_swapInverseUCA_3_2 +#define ucol_tertiaryOrder ucol_tertiaryOrder_3_2 +#define ucol_tok_assembleTokenList ucol_tok_assembleTokenList_3_2 +#define ucol_tok_closeTokenList ucol_tok_closeTokenList_3_2 +#define ucol_tok_getNextArgument ucol_tok_getNextArgument_3_2 +#define ucol_tok_initTokenList ucol_tok_initTokenList_3_2 +#define ucol_tok_parseNextToken ucol_tok_parseNextToken_3_2 +#define ucol_updateInternalState ucol_updateInternalState_3_2 +#define ucurr_forLocale ucurr_forLocale_3_2 +#define ucurr_getDefaultFractionDigits ucurr_getDefaultFractionDigits_3_2 +#define ucurr_getName ucurr_getName_3_2 +#define ucurr_getRoundingIncrement ucurr_getRoundingIncrement_3_2 +#define ucurr_register ucurr_register_3_2 +#define ucurr_unregister ucurr_unregister_3_2 +#define udat_applyPattern udat_applyPattern_3_2 +#define udat_clone udat_clone_3_2 +#define udat_close udat_close_3_2 +#define udat_countAvailable udat_countAvailable_3_2 +#define udat_countSymbols udat_countSymbols_3_2 +#define udat_format udat_format_3_2 +#define udat_get2DigitYearStart udat_get2DigitYearStart_3_2 +#define udat_getAvailable udat_getAvailable_3_2 +#define udat_getCalendar udat_getCalendar_3_2 +#define udat_getLocaleByType udat_getLocaleByType_3_2 +#define udat_getNumberFormat udat_getNumberFormat_3_2 +#define udat_getSymbols udat_getSymbols_3_2 +#define udat_isLenient udat_isLenient_3_2 +#define udat_open udat_open_3_2 +#define udat_parse udat_parse_3_2 +#define udat_parseCalendar udat_parseCalendar_3_2 +#define udat_set2DigitYearStart udat_set2DigitYearStart_3_2 +#define udat_setCalendar udat_setCalendar_3_2 +#define udat_setLenient udat_setLenient_3_2 +#define udat_setNumberFormat udat_setNumberFormat_3_2 +#define udat_setSymbols udat_setSymbols_3_2 +#define udat_toPattern udat_toPattern_3_2 +#define udata_checkCommonData udata_checkCommonData_3_2 +#define udata_close udata_close_3_2 +#define udata_closeSwapper udata_closeSwapper_3_2 +#define udata_getHeaderSize udata_getHeaderSize_3_2 +#define udata_getInfo udata_getInfo_3_2 +#define udata_getInfoSize udata_getInfoSize_3_2 +#define udata_getLength udata_getLength_3_2 +#define udata_getMemory udata_getMemory_3_2 +#define udata_getRawMemory udata_getRawMemory_3_2 +#define udata_open udata_open_3_2 +#define udata_openChoice udata_openChoice_3_2 +#define udata_openSwapper udata_openSwapper_3_2 +#define udata_openSwapperForInputData udata_openSwapperForInputData_3_2 +#define udata_printError udata_printError_3_2 +#define udata_readInt16 udata_readInt16_3_2 +#define udata_readInt32 udata_readInt32_3_2 +#define udata_setAppData udata_setAppData_3_2 +#define udata_setCommonData udata_setCommonData_3_2 +#define udata_swapDataHeader udata_swapDataHeader_3_2 +#define udata_swapInvStringBlock udata_swapInvStringBlock_3_2 +#define uenum_close uenum_close_3_2 +#define uenum_count uenum_count_3_2 +#define uenum_next uenum_next_3_2 +#define uenum_nextDefault uenum_nextDefault_3_2 +#define uenum_openCharStringsEnumeration uenum_openCharStringsEnumeration_3_2 +#define uenum_openStringEnumeration uenum_openStringEnumeration_3_2 +#define uenum_reset uenum_reset_3_2 +#define uenum_unext uenum_unext_3_2 +#define uenum_unextDefault uenum_unextDefault_3_2 +#define ufile_close_translit ufile_close_translit_3_2 +#define ufile_fill_uchar_buffer ufile_fill_uchar_buffer_3_2 +#define ufile_flush_translit ufile_flush_translit_3_2 +#define ufile_getch ufile_getch_3_2 +#define ufile_getch32 ufile_getch32_3_2 +#define ufmt_64tou ufmt_64tou_3_2 +#define ufmt_defaultCPToUnicode ufmt_defaultCPToUnicode_3_2 +#define ufmt_digitvalue ufmt_digitvalue_3_2 +#define ufmt_isdigit ufmt_isdigit_3_2 +#define ufmt_ptou ufmt_ptou_3_2 +#define ufmt_uto64 ufmt_uto64_3_2 +#define ufmt_utop ufmt_utop_3_2 +#define uhash_close uhash_close_3_2 +#define uhash_compareCaselessUnicodeString uhash_compareCaselessUnicodeString_3_2 +#define uhash_compareChars uhash_compareChars_3_2 +#define uhash_compareIChars uhash_compareIChars_3_2 +#define uhash_compareLong uhash_compareLong_3_2 +#define uhash_compareUChars uhash_compareUChars_3_2 +#define uhash_compareUnicodeString uhash_compareUnicodeString_3_2 +#define uhash_count uhash_count_3_2 +#define uhash_deleteHashtable uhash_deleteHashtable_3_2 +#define uhash_deleteUVector uhash_deleteUVector_3_2 +#define uhash_deleteUnicodeString uhash_deleteUnicodeString_3_2 +#define uhash_find uhash_find_3_2 +#define uhash_freeBlock uhash_freeBlock_3_2 +#define uhash_get uhash_get_3_2 +#define uhash_geti uhash_geti_3_2 +#define uhash_hashCaselessUnicodeString uhash_hashCaselessUnicodeString_3_2 +#define uhash_hashChars uhash_hashChars_3_2 +#define uhash_hashIChars uhash_hashIChars_3_2 +#define uhash_hashLong uhash_hashLong_3_2 +#define uhash_hashUChars uhash_hashUChars_3_2 +#define uhash_hashUCharsN uhash_hashUCharsN_3_2 +#define uhash_hashUnicodeString uhash_hashUnicodeString_3_2 +#define uhash_iget uhash_iget_3_2 +#define uhash_igeti uhash_igeti_3_2 +#define uhash_iput uhash_iput_3_2 +#define uhash_iputi uhash_iputi_3_2 +#define uhash_iremove uhash_iremove_3_2 +#define uhash_iremovei uhash_iremovei_3_2 +#define uhash_nextElement uhash_nextElement_3_2 +#define uhash_open uhash_open_3_2 +#define uhash_openSize uhash_openSize_3_2 +#define uhash_put uhash_put_3_2 +#define uhash_puti uhash_puti_3_2 +#define uhash_remove uhash_remove_3_2 +#define uhash_removeAll uhash_removeAll_3_2 +#define uhash_removeElement uhash_removeElement_3_2 +#define uhash_removei uhash_removei_3_2 +#define uhash_setKeyComparator uhash_setKeyComparator_3_2 +#define uhash_setKeyDeleter uhash_setKeyDeleter_3_2 +#define uhash_setKeyHasher uhash_setKeyHasher_3_2 +#define uhash_setResizePolicy uhash_setResizePolicy_3_2 +#define uhash_setValueDeleter uhash_setValueDeleter_3_2 +#define uhash_toki uhash_toki_3_2 +#define uhash_tokp uhash_tokp_3_2 +#define uhst_addPropertyStarts uhst_addPropertyStarts_3_2 +#define uidna_IDNToASCII uidna_IDNToASCII_3_2 +#define uidna_IDNToUnicode uidna_IDNToUnicode_3_2 +#define uidna_compare uidna_compare_3_2 +#define uidna_toASCII uidna_toASCII_3_2 +#define uidna_toUnicode uidna_toUnicode_3_2 +#define uiter_current32 uiter_current32_3_2 +#define uiter_getState uiter_getState_3_2 +#define uiter_next32 uiter_next32_3_2 +#define uiter_previous32 uiter_previous32_3_2 +#define uiter_setCharacterIterator uiter_setCharacterIterator_3_2 +#define uiter_setReplaceable uiter_setReplaceable_3_2 +#define uiter_setState uiter_setState_3_2 +#define uiter_setString uiter_setString_3_2 +#define uiter_setUTF16BE uiter_setUTF16BE_3_2 +#define uiter_setUTF8 uiter_setUTF8_3_2 +#define uloc_acceptLanguage uloc_acceptLanguage_3_2 +#define uloc_acceptLanguageFromHTTP uloc_acceptLanguageFromHTTP_3_2 +#define uloc_canonicalize uloc_canonicalize_3_2 +#define uloc_countAvailable uloc_countAvailable_3_2 +#define uloc_getAvailable uloc_getAvailable_3_2 +#define uloc_getBaseName uloc_getBaseName_3_2 +#define uloc_getCountry uloc_getCountry_3_2 +#define uloc_getDefault uloc_getDefault_3_2 +#define uloc_getDisplayCountry uloc_getDisplayCountry_3_2 +#define uloc_getDisplayKeyword uloc_getDisplayKeyword_3_2 +#define uloc_getDisplayKeywordValue uloc_getDisplayKeywordValue_3_2 +#define uloc_getDisplayLanguage uloc_getDisplayLanguage_3_2 +#define uloc_getDisplayName uloc_getDisplayName_3_2 +#define uloc_getDisplayScript uloc_getDisplayScript_3_2 +#define uloc_getDisplayVariant uloc_getDisplayVariant_3_2 +#define uloc_getISO3Country uloc_getISO3Country_3_2 +#define uloc_getISO3Language uloc_getISO3Language_3_2 +#define uloc_getISOCountries uloc_getISOCountries_3_2 +#define uloc_getISOLanguages uloc_getISOLanguages_3_2 +#define uloc_getKeywordValue uloc_getKeywordValue_3_2 +#define uloc_getLCID uloc_getLCID_3_2 +#define uloc_getLanguage uloc_getLanguage_3_2 +#define uloc_getName uloc_getName_3_2 +#define uloc_getParent uloc_getParent_3_2 +#define uloc_getScript uloc_getScript_3_2 +#define uloc_getVariant uloc_getVariant_3_2 +#define uloc_openKeywordList uloc_openKeywordList_3_2 +#define uloc_openKeywords uloc_openKeywords_3_2 +#define uloc_setDefault uloc_setDefault_3_2 +#define uloc_setKeywordValue uloc_setKeywordValue_3_2 +#define ulocdata_getExemplarSet ulocdata_getExemplarSet_3_2 +#define ulocdata_getMeasurementSystem ulocdata_getMeasurementSystem_3_2 +#define ulocdata_getPaperSize ulocdata_getPaperSize_3_2 +#define umsg_applyPattern umsg_applyPattern_3_2 +#define umsg_clone umsg_clone_3_2 +#define umsg_close umsg_close_3_2 +#define umsg_format umsg_format_3_2 +#define umsg_getLocale umsg_getLocale_3_2 +#define umsg_getLocaleByType umsg_getLocaleByType_3_2 +#define umsg_open umsg_open_3_2 +#define umsg_parse umsg_parse_3_2 +#define umsg_setLocale umsg_setLocale_3_2 +#define umsg_toPattern umsg_toPattern_3_2 +#define umsg_vformat umsg_vformat_3_2 +#define umsg_vparse umsg_vparse_3_2 +#define umtx_atomic_dec umtx_atomic_dec_3_2 +#define umtx_atomic_inc umtx_atomic_inc_3_2 +#define umtx_cleanup umtx_cleanup_3_2 +#define umtx_destroy umtx_destroy_3_2 +#define umtx_init umtx_init_3_2 +#define umtx_lock umtx_lock_3_2 +#define umtx_unlock umtx_unlock_3_2 +#define unorm_addPropertyStarts unorm_addPropertyStarts_3_2 +#define unorm_closeIter unorm_closeIter_3_2 +#define unorm_compare unorm_compare_3_2 +#define unorm_compose unorm_compose_3_2 +#define unorm_concatenate unorm_concatenate_3_2 +#define unorm_decompose unorm_decompose_3_2 +#define unorm_getCanonStartSet unorm_getCanonStartSet_3_2 +#define unorm_getCanonicalDecomposition unorm_getCanonicalDecomposition_3_2 +#define unorm_getDecomposition unorm_getDecomposition_3_2 +#define unorm_getFCD16FromCodePoint unorm_getFCD16FromCodePoint_3_2 +#define unorm_getFCDTrie unorm_getFCDTrie_3_2 +#define unorm_getNX unorm_getNX_3_2 +#define unorm_getQuickCheck unorm_getQuickCheck_3_2 +#define unorm_getUnicodeVersion unorm_getUnicodeVersion_3_2 +#define unorm_haveData unorm_haveData_3_2 +#define unorm_internalIsFullCompositionExclusion unorm_internalIsFullCompositionExclusion_3_2 +#define unorm_internalNormalize unorm_internalNormalize_3_2 +#define unorm_internalNormalizeWithNX unorm_internalNormalizeWithNX_3_2 +#define unorm_internalQuickCheck unorm_internalQuickCheck_3_2 +#define unorm_isCanonSafeStart unorm_isCanonSafeStart_3_2 +#define unorm_isNFSkippable unorm_isNFSkippable_3_2 +#define unorm_isNormalized unorm_isNormalized_3_2 +#define unorm_isNormalizedWithOptions unorm_isNormalizedWithOptions_3_2 +#define unorm_next unorm_next_3_2 +#define unorm_normalize unorm_normalize_3_2 +#define unorm_openIter unorm_openIter_3_2 +#define unorm_previous unorm_previous_3_2 +#define unorm_quickCheck unorm_quickCheck_3_2 +#define unorm_quickCheckWithOptions unorm_quickCheckWithOptions_3_2 +#define unorm_setIter unorm_setIter_3_2 +#define unorm_swap unorm_swap_3_2 +#define unum_applyPattern unum_applyPattern_3_2 +#define unum_clone unum_clone_3_2 +#define unum_close unum_close_3_2 +#define unum_countAvailable unum_countAvailable_3_2 +#define unum_format unum_format_3_2 +#define unum_formatDouble unum_formatDouble_3_2 +#define unum_formatDoubleCurrency unum_formatDoubleCurrency_3_2 +#define unum_formatInt64 unum_formatInt64_3_2 +#define unum_getAttribute unum_getAttribute_3_2 +#define unum_getAvailable unum_getAvailable_3_2 +#define unum_getDoubleAttribute unum_getDoubleAttribute_3_2 +#define unum_getLocaleByType unum_getLocaleByType_3_2 +#define unum_getSymbol unum_getSymbol_3_2 +#define unum_getTextAttribute unum_getTextAttribute_3_2 +#define unum_open unum_open_3_2 +#define unum_parse unum_parse_3_2 +#define unum_parseDouble unum_parseDouble_3_2 +#define unum_parseDoubleCurrency unum_parseDoubleCurrency_3_2 +#define unum_parseInt64 unum_parseInt64_3_2 +#define unum_setAttribute unum_setAttribute_3_2 +#define unum_setDoubleAttribute unum_setDoubleAttribute_3_2 +#define unum_setSymbol unum_setSymbol_3_2 +#define unum_setTextAttribute unum_setTextAttribute_3_2 +#define unum_toPattern unum_toPattern_3_2 +#define upname_swap upname_swap_3_2 +#define uprops_getSource uprops_getSource_3_2 +#define uprops_swap uprops_swap_3_2 +#define uprv_asciiFromEbcdic uprv_asciiFromEbcdic_3_2 +#define uprv_asciitolower uprv_asciitolower_3_2 +#define uprv_ceil uprv_ceil_3_2 +#define uprv_cnttab_addContraction uprv_cnttab_addContraction_3_2 +#define uprv_cnttab_changeContraction uprv_cnttab_changeContraction_3_2 +#define uprv_cnttab_changeLastCE uprv_cnttab_changeLastCE_3_2 +#define uprv_cnttab_clone uprv_cnttab_clone_3_2 +#define uprv_cnttab_close uprv_cnttab_close_3_2 +#define uprv_cnttab_constructTable uprv_cnttab_constructTable_3_2 +#define uprv_cnttab_findCE uprv_cnttab_findCE_3_2 +#define uprv_cnttab_findCP uprv_cnttab_findCP_3_2 +#define uprv_cnttab_getCE uprv_cnttab_getCE_3_2 +#define uprv_cnttab_insertContraction uprv_cnttab_insertContraction_3_2 +#define uprv_cnttab_isTailored uprv_cnttab_isTailored_3_2 +#define uprv_cnttab_open uprv_cnttab_open_3_2 +#define uprv_cnttab_setContraction uprv_cnttab_setContraction_3_2 +#define uprv_compareASCIIPropertyNames uprv_compareASCIIPropertyNames_3_2 +#define uprv_compareEBCDICPropertyNames uprv_compareEBCDICPropertyNames_3_2 +#define uprv_compareInvAscii uprv_compareInvAscii_3_2 +#define uprv_compareInvEbcdic uprv_compareInvEbcdic_3_2 +#define uprv_convertToLCID uprv_convertToLCID_3_2 +#define uprv_convertToPosix uprv_convertToPosix_3_2 +#define uprv_copyAscii uprv_copyAscii_3_2 +#define uprv_copyEbcdic uprv_copyEbcdic_3_2 +#define uprv_dtostr uprv_dtostr_3_2 +#define uprv_ebcdicFromAscii uprv_ebcdicFromAscii_3_2 +#define uprv_ebcdictolower uprv_ebcdictolower_3_2 +#define uprv_fabs uprv_fabs_3_2 +#define uprv_floor uprv_floor_3_2 +#define uprv_fmax uprv_fmax_3_2 +#define uprv_fmin uprv_fmin_3_2 +#define uprv_fmod uprv_fmod_3_2 +#define uprv_free uprv_free_3_2 +#define uprv_getCharNameCharacters uprv_getCharNameCharacters_3_2 +#define uprv_getDefaultCodepage uprv_getDefaultCodepage_3_2 +#define uprv_getDefaultLocaleID uprv_getDefaultLocaleID_3_2 +#define uprv_getInfinity uprv_getInfinity_3_2 +#define uprv_getMaxCharNameLength uprv_getMaxCharNameLength_3_2 +#define uprv_getMaxValues uprv_getMaxValues_3_2 +#define uprv_getNaN uprv_getNaN_3_2 +#define uprv_getStaticCurrencyName uprv_getStaticCurrencyName_3_2 +#define uprv_getUTCtime uprv_getUTCtime_3_2 +#define uprv_haveProperties uprv_haveProperties_3_2 +#define uprv_init_collIterate uprv_init_collIterate_3_2 +#define uprv_int32Comparator uprv_int32Comparator_3_2 +#define uprv_isInfinite uprv_isInfinite_3_2 +#define uprv_isInvariantString uprv_isInvariantString_3_2 +#define uprv_isInvariantUString uprv_isInvariantUString_3_2 +#define uprv_isNaN uprv_isNaN_3_2 +#define uprv_isNegativeInfinity uprv_isNegativeInfinity_3_2 +#define uprv_isPositiveInfinity uprv_isPositiveInfinity_3_2 +#define uprv_isRuleWhiteSpace uprv_isRuleWhiteSpace_3_2 +#define uprv_itou uprv_itou_3_2 +#define uprv_loadPropsData uprv_loadPropsData_3_2 +#define uprv_log uprv_log_3_2 +#define uprv_log10 uprv_log10_3_2 +#define uprv_malloc uprv_malloc_3_2 +#define uprv_mapFile uprv_mapFile_3_2 +#define uprv_max uprv_max_3_2 +#define uprv_maxMantissa uprv_maxMantissa_3_2 +#define uprv_min uprv_min_3_2 +#define uprv_modf uprv_modf_3_2 +#define uprv_openRuleWhiteSpaceSet uprv_openRuleWhiteSpaceSet_3_2 +#define uprv_pathIsAbsolute uprv_pathIsAbsolute_3_2 +#define uprv_pow uprv_pow_3_2 +#define uprv_pow10 uprv_pow10_3_2 +#define uprv_realloc uprv_realloc_3_2 +#define uprv_round uprv_round_3_2 +#define uprv_sortArray uprv_sortArray_3_2 +#define uprv_strCompare uprv_strCompare_3_2 +#define uprv_strdup uprv_strdup_3_2 +#define uprv_strndup uprv_strndup_3_2 +#define uprv_syntaxError uprv_syntaxError_3_2 +#define uprv_timezone uprv_timezone_3_2 +#define uprv_toupper uprv_toupper_3_2 +#define uprv_trunc uprv_trunc_3_2 +#define uprv_tzname uprv_tzname_3_2 +#define uprv_tzset uprv_tzset_3_2 +#define uprv_uca_addAnElement uprv_uca_addAnElement_3_2 +#define uprv_uca_assembleTable uprv_uca_assembleTable_3_2 +#define uprv_uca_canonicalClosure uprv_uca_canonicalClosure_3_2 +#define uprv_uca_cloneTempTable uprv_uca_cloneTempTable_3_2 +#define uprv_uca_closeTempTable uprv_uca_closeTempTable_3_2 +#define uprv_uca_getCodePointFromRaw uprv_uca_getCodePointFromRaw_3_2 +#define uprv_uca_getImplicitFromRaw uprv_uca_getImplicitFromRaw_3_2 +#define uprv_uca_getImplicitPrimary uprv_uca_getImplicitPrimary_3_2 +#define uprv_uca_getRawFromCodePoint uprv_uca_getRawFromCodePoint_3_2 +#define uprv_uca_getRawFromImplicit uprv_uca_getRawFromImplicit_3_2 +#define uprv_uca_initImplicitConstants uprv_uca_initImplicitConstants_3_2 +#define uprv_uca_initTempTable uprv_uca_initTempTable_3_2 +#define uprv_uint16Comparator uprv_uint16Comparator_3_2 +#define uprv_uint32Comparator uprv_uint32Comparator_3_2 +#define uprv_unmapFile uprv_unmapFile_3_2 +#define uregex_appendReplacement uregex_appendReplacement_3_2 +#define uregex_appendTail uregex_appendTail_3_2 +#define uregex_clone uregex_clone_3_2 +#define uregex_close uregex_close_3_2 +#define uregex_end uregex_end_3_2 +#define uregex_find uregex_find_3_2 +#define uregex_findNext uregex_findNext_3_2 +#define uregex_flags uregex_flags_3_2 +#define uregex_getText uregex_getText_3_2 +#define uregex_group uregex_group_3_2 +#define uregex_groupCount uregex_groupCount_3_2 +#define uregex_lookingAt uregex_lookingAt_3_2 +#define uregex_matches uregex_matches_3_2 +#define uregex_open uregex_open_3_2 +#define uregex_openC uregex_openC_3_2 +#define uregex_pattern uregex_pattern_3_2 +#define uregex_replaceAll uregex_replaceAll_3_2 +#define uregex_replaceFirst uregex_replaceFirst_3_2 +#define uregex_reset uregex_reset_3_2 +#define uregex_setText uregex_setText_3_2 +#define uregex_split uregex_split_3_2 +#define uregex_start uregex_start_3_2 +#define ures_appendResPath ures_appendResPath_3_2 +#define ures_close ures_close_3_2 +#define ures_copyResb ures_copyResb_3_2 +#define ures_countArrayItems ures_countArrayItems_3_2 +#define ures_findResource ures_findResource_3_2 +#define ures_findSubResource ures_findSubResource_3_2 +#define ures_freeResPath ures_freeResPath_3_2 +#define ures_getBinary ures_getBinary_3_2 +#define ures_getByIndex ures_getByIndex_3_2 +#define ures_getByKey ures_getByKey_3_2 +#define ures_getByKeyWithFallback ures_getByKeyWithFallback_3_2 +#define ures_getFunctionalEquivalent ures_getFunctionalEquivalent_3_2 +#define ures_getInt ures_getInt_3_2 +#define ures_getIntVector ures_getIntVector_3_2 +#define ures_getKey ures_getKey_3_2 +#define ures_getKeywordValues ures_getKeywordValues_3_2 +#define ures_getLocale ures_getLocale_3_2 +#define ures_getLocaleByType ures_getLocaleByType_3_2 +#define ures_getName ures_getName_3_2 +#define ures_getNextResource ures_getNextResource_3_2 +#define ures_getNextString ures_getNextString_3_2 +#define ures_getPath ures_getPath_3_2 +#define ures_getSize ures_getSize_3_2 +#define ures_getString ures_getString_3_2 +#define ures_getStringByIndex ures_getStringByIndex_3_2 +#define ures_getStringByKey ures_getStringByKey_3_2 +#define ures_getType ures_getType_3_2 +#define ures_getUInt ures_getUInt_3_2 +#define ures_getVersion ures_getVersion_3_2 +#define ures_getVersionNumber ures_getVersionNumber_3_2 +#define ures_hasNext ures_hasNext_3_2 +#define ures_initStackObject ures_initStackObject_3_2 +#define ures_open ures_open_3_2 +#define ures_openAvailableLocales ures_openAvailableLocales_3_2 +#define ures_openDirect ures_openDirect_3_2 +#define ures_openFillIn ures_openFillIn_3_2 +#define ures_openU ures_openU_3_2 +#define ures_resetIterator ures_resetIterator_3_2 +#define ures_swap ures_swap_3_2 +#define uscript_closeRun uscript_closeRun_3_2 +#define uscript_getCode uscript_getCode_3_2 +#define uscript_getName uscript_getName_3_2 +#define uscript_getScript uscript_getScript_3_2 +#define uscript_getShortName uscript_getShortName_3_2 +#define uscript_nextRun uscript_nextRun_3_2 +#define uscript_openRun uscript_openRun_3_2 +#define uscript_resetRun uscript_resetRun_3_2 +#define uscript_setRunText uscript_setRunText_3_2 +#define usearch_close usearch_close_3_2 +#define usearch_first usearch_first_3_2 +#define usearch_following usearch_following_3_2 +#define usearch_getAttribute usearch_getAttribute_3_2 +#define usearch_getBreakIterator usearch_getBreakIterator_3_2 +#define usearch_getCollator usearch_getCollator_3_2 +#define usearch_getMatchedLength usearch_getMatchedLength_3_2 +#define usearch_getMatchedStart usearch_getMatchedStart_3_2 +#define usearch_getMatchedText usearch_getMatchedText_3_2 +#define usearch_getOffset usearch_getOffset_3_2 +#define usearch_getPattern usearch_getPattern_3_2 +#define usearch_getText usearch_getText_3_2 +#define usearch_handleNextCanonical usearch_handleNextCanonical_3_2 +#define usearch_handleNextExact usearch_handleNextExact_3_2 +#define usearch_handlePreviousCanonical usearch_handlePreviousCanonical_3_2 +#define usearch_handlePreviousExact usearch_handlePreviousExact_3_2 +#define usearch_last usearch_last_3_2 +#define usearch_next usearch_next_3_2 +#define usearch_open usearch_open_3_2 +#define usearch_openFromCollator usearch_openFromCollator_3_2 +#define usearch_preceding usearch_preceding_3_2 +#define usearch_previous usearch_previous_3_2 +#define usearch_reset usearch_reset_3_2 +#define usearch_setAttribute usearch_setAttribute_3_2 +#define usearch_setBreakIterator usearch_setBreakIterator_3_2 +#define usearch_setCollator usearch_setCollator_3_2 +#define usearch_setOffset usearch_setOffset_3_2 +#define usearch_setPattern usearch_setPattern_3_2 +#define usearch_setText usearch_setText_3_2 +#define userv_deleteStringPair userv_deleteStringPair_3_2 +#define uset_add uset_add_3_2 +#define uset_addAll uset_addAll_3_2 +#define uset_addRange uset_addRange_3_2 +#define uset_addString uset_addString_3_2 +#define uset_applyIntPropertyValue uset_applyIntPropertyValue_3_2 +#define uset_applyPattern uset_applyPattern_3_2 +#define uset_applyPropertyAlias uset_applyPropertyAlias_3_2 +#define uset_charAt uset_charAt_3_2 +#define uset_clear uset_clear_3_2 +#define uset_close uset_close_3_2 +#define uset_compact uset_compact_3_2 +#define uset_complement uset_complement_3_2 +#define uset_complementAll uset_complementAll_3_2 +#define uset_contains uset_contains_3_2 +#define uset_containsAll uset_containsAll_3_2 +#define uset_containsNone uset_containsNone_3_2 +#define uset_containsRange uset_containsRange_3_2 +#define uset_containsSome uset_containsSome_3_2 +#define uset_containsString uset_containsString_3_2 +#define uset_equals uset_equals_3_2 +#define uset_getItem uset_getItem_3_2 +#define uset_getItemCount uset_getItemCount_3_2 +#define uset_getSerializedRange uset_getSerializedRange_3_2 +#define uset_getSerializedRangeCount uset_getSerializedRangeCount_3_2 +#define uset_getSerializedSet uset_getSerializedSet_3_2 +#define uset_indexOf uset_indexOf_3_2 +#define uset_isEmpty uset_isEmpty_3_2 +#define uset_open uset_open_3_2 +#define uset_openPattern uset_openPattern_3_2 +#define uset_openPatternOptions uset_openPatternOptions_3_2 +#define uset_remove uset_remove_3_2 +#define uset_removeAll uset_removeAll_3_2 +#define uset_removeRange uset_removeRange_3_2 +#define uset_removeString uset_removeString_3_2 +#define uset_resemblesPattern uset_resemblesPattern_3_2 +#define uset_retain uset_retain_3_2 +#define uset_retainAll uset_retainAll_3_2 +#define uset_serialize uset_serialize_3_2 +#define uset_serializedContains uset_serializedContains_3_2 +#define uset_set uset_set_3_2 +#define uset_setSerializedToOne uset_setSerializedToOne_3_2 +#define uset_size uset_size_3_2 +#define uset_toPattern uset_toPattern_3_2 +#define usprep_close usprep_close_3_2 +#define usprep_open usprep_open_3_2 +#define usprep_prepare usprep_prepare_3_2 +#define usprep_swap usprep_swap_3_2 +#define ustr_foldCase ustr_foldCase_3_2 +#define ustr_toLower ustr_toLower_3_2 +#define ustr_toTitle ustr_toTitle_3_2 +#define ustr_toUpper ustr_toUpper_3_2 +#define utf8_appendCharSafeBody utf8_appendCharSafeBody_3_2 +#define utf8_back1SafeBody utf8_back1SafeBody_3_2 +#define utf8_countTrailBytes utf8_countTrailBytes_3_2 +#define utf8_nextCharSafeBody utf8_nextCharSafeBody_3_2 +#define utf8_prevCharSafeBody utf8_prevCharSafeBody_3_2 +#define utmscale_fromInt64 utmscale_fromInt64_3_2 +#define utmscale_getTimeScaleValue utmscale_getTimeScaleValue_3_2 +#define utmscale_toInt64 utmscale_toInt64_3_2 +#define utrace_cleanup utrace_cleanup_3_2 +#define utrace_data utrace_data_3_2 +#define utrace_entry utrace_entry_3_2 +#define utrace_exit utrace_exit_3_2 +#define utrace_format utrace_format_3_2 +#define utrace_functionName utrace_functionName_3_2 +#define utrace_getFunctions utrace_getFunctions_3_2 +#define utrace_getLevel utrace_getLevel_3_2 +#define utrace_level utrace_level_3_2 +#define utrace_setFunctions utrace_setFunctions_3_2 +#define utrace_setLevel utrace_setLevel_3_2 +#define utrace_vformat utrace_vformat_3_2 +#define utrans_clone utrans_clone_3_2 +#define utrans_close utrans_close_3_2 +#define utrans_countAvailableIDs utrans_countAvailableIDs_3_2 +#define utrans_getAvailableID utrans_getAvailableID_3_2 +#define utrans_getID utrans_getID_3_2 +#define utrans_getUnicodeID utrans_getUnicodeID_3_2 +#define utrans_open utrans_open_3_2 +#define utrans_openIDs utrans_openIDs_3_2 +#define utrans_openInverse utrans_openInverse_3_2 +#define utrans_openU utrans_openU_3_2 +#define utrans_register utrans_register_3_2 +#define utrans_rep_caseContextIterator utrans_rep_caseContextIterator_3_2 +#define utrans_setFilter utrans_setFilter_3_2 +#define utrans_trans utrans_trans_3_2 +#define utrans_transIncremental utrans_transIncremental_3_2 +#define utrans_transIncrementalUChars utrans_transIncrementalUChars_3_2 +#define utrans_transUChars utrans_transUChars_3_2 +#define utrans_unregister utrans_unregister_3_2 +#define utrans_unregisterID utrans_unregisterID_3_2 +#define utrie_clone utrie_clone_3_2 +#define utrie_close utrie_close_3_2 +#define utrie_enum utrie_enum_3_2 +#define utrie_get32 utrie_get32_3_2 +#define utrie_getData utrie_getData_3_2 +#define utrie_open utrie_open_3_2 +#define utrie_serialize utrie_serialize_3_2 +#define utrie_set32 utrie_set32_3_2 +#define utrie_setRange32 utrie_setRange32_3_2 +#define utrie_swap utrie_swap_3_2 +#define utrie_unserialize utrie_unserialize_3_2 +/* C++ class names renaming defines */ + +#ifdef XP_CPLUSPLUS +#if !U_HAVE_NAMESPACE + +#define AbsoluteValueSubstitution AbsoluteValueSubstitution_3_2 +#define AlternateSubstitutionSubtable AlternateSubstitutionSubtable_3_2 +#define AnchorTable AnchorTable_3_2 +#define AnyTransliterator AnyTransliterator_3_2 +#define ArabicOpenTypeLayoutEngine ArabicOpenTypeLayoutEngine_3_2 +#define ArabicShaping ArabicShaping_3_2 +#define BasicCalendarFactory BasicCalendarFactory_3_2 +#define BinarySearchLookupTable BinarySearchLookupTable_3_2 +#define BreakDictionary BreakDictionary_3_2 +#define BreakIterator BreakIterator_3_2 +#define BuddhistCalendar BuddhistCalendar_3_2 +#define CFactory CFactory_3_2 +#define Calendar Calendar_3_2 +#define CalendarAstronomer CalendarAstronomer_3_2 +#define CalendarCache CalendarCache_3_2 +#define CalendarData CalendarData_3_2 +#define CalendarService CalendarService_3_2 +#define CanonShaping CanonShaping_3_2 +#define CanonicalIterator CanonicalIterator_3_2 +#define CaseMapTransliterator CaseMapTransliterator_3_2 +#define ChainingContextualSubstitutionFormat1Subtable ChainingContextualSubstitutionFormat1Subtable_3_2 +#define ChainingContextualSubstitutionFormat2Subtable ChainingContextualSubstitutionFormat2Subtable_3_2 +#define ChainingContextualSubstitutionFormat3Subtable ChainingContextualSubstitutionFormat3Subtable_3_2 +#define ChainingContextualSubstitutionSubtable ChainingContextualSubstitutionSubtable_3_2 +#define CharSubstitutionFilter CharSubstitutionFilter_3_2 +#define CharacterIterator CharacterIterator_3_2 +#define ChoiceFormat ChoiceFormat_3_2 +#define ClassDefFormat1Table ClassDefFormat1Table_3_2 +#define ClassDefFormat2Table ClassDefFormat2Table_3_2 +#define ClassDefinitionTable ClassDefinitionTable_3_2 +#define CollationElementIterator CollationElementIterator_3_2 +#define CollationKey CollationKey_3_2 +#define Collator Collator_3_2 +#define CollatorFactory CollatorFactory_3_2 +#define CompoundTransliterator CompoundTransliterator_3_2 +#define ContextualGlyphSubstitutionProcessor ContextualGlyphSubstitutionProcessor_3_2 +#define ContextualSubstitutionBase ContextualSubstitutionBase_3_2 +#define ContextualSubstitutionFormat1Subtable ContextualSubstitutionFormat1Subtable_3_2 +#define ContextualSubstitutionFormat2Subtable ContextualSubstitutionFormat2Subtable_3_2 +#define ContextualSubstitutionFormat3Subtable ContextualSubstitutionFormat3Subtable_3_2 +#define ContextualSubstitutionSubtable ContextualSubstitutionSubtable_3_2 +#define CoverageFormat1Table CoverageFormat1Table_3_2 +#define CoverageFormat2Table CoverageFormat2Table_3_2 +#define CoverageTable CoverageTable_3_2 +#define CurrencyAmount CurrencyAmount_3_2 +#define CurrencyFormat CurrencyFormat_3_2 +#define CurrencyUnit CurrencyUnit_3_2 +#define CursiveAttachmentSubtable CursiveAttachmentSubtable_3_2 +#define DateFormat DateFormat_3_2 +#define DateFormatSymbols DateFormatSymbols_3_2 +#define DecimalFormat DecimalFormat_3_2 +#define DecimalFormatSymbols DecimalFormatSymbols_3_2 +#define DefaultCalendarFactory DefaultCalendarFactory_3_2 +#define DefaultCharMapper DefaultCharMapper_3_2 +#define DeviceTable DeviceTable_3_2 +#define DictionaryBasedBreakIterator DictionaryBasedBreakIterator_3_2 +#define DictionaryBasedBreakIteratorTables DictionaryBasedBreakIteratorTables_3_2 +#define DigitList DigitList_3_2 +#define Entry Entry_3_2 +#define EnumToOffset EnumToOffset_3_2 +#define EscapeTransliterator EscapeTransliterator_3_2 +#define EventListener EventListener_3_2 +#define ExtensionSubtable ExtensionSubtable_3_2 +#define FeatureListTable FeatureListTable_3_2 +#define FieldPosition FieldPosition_3_2 +#define FontRuns FontRuns_3_2 +#define Format Format_3_2 +#define Format1AnchorTable Format1AnchorTable_3_2 +#define Format2AnchorTable Format2AnchorTable_3_2 +#define Format3AnchorTable Format3AnchorTable_3_2 +#define Formattable Formattable_3_2 +#define ForwardCharacterIterator ForwardCharacterIterator_3_2 +#define FractionalPartSubstitution FractionalPartSubstitution_3_2 +#define FunctionReplacer FunctionReplacer_3_2 +#define GDEFMarkFilter GDEFMarkFilter_3_2 +#define GXLayoutEngine GXLayoutEngine_3_2 +#define GlyphDefinitionTableHeader GlyphDefinitionTableHeader_3_2 +#define GlyphIterator GlyphIterator_3_2 +#define GlyphLookupTableHeader GlyphLookupTableHeader_3_2 +#define GlyphPositioningLookupProcessor GlyphPositioningLookupProcessor_3_2 +#define GlyphPositioningTableHeader GlyphPositioningTableHeader_3_2 +#define GlyphSubstitutionLookupProcessor GlyphSubstitutionLookupProcessor_3_2 +#define GlyphSubstitutionTableHeader GlyphSubstitutionTableHeader_3_2 +#define Grego Grego_3_2 +#define GregorianCalendar GregorianCalendar_3_2 +#define HanOpenTypeLayoutEngine HanOpenTypeLayoutEngine_3_2 +#define HebrewCalendar HebrewCalendar_3_2 +#define ICUBreakIteratorFactory ICUBreakIteratorFactory_3_2 +#define ICUBreakIteratorService ICUBreakIteratorService_3_2 +#define ICUCollatorFactory ICUCollatorFactory_3_2 +#define ICUCollatorService ICUCollatorService_3_2 +#define ICULayoutEngine ICULayoutEngine_3_2 +#define ICULocaleService ICULocaleService_3_2 +#define ICUNotifier ICUNotifier_3_2 +#define ICUNumberFormatFactory ICUNumberFormatFactory_3_2 +#define ICUNumberFormatService ICUNumberFormatService_3_2 +#define ICUResourceBundleFactory ICUResourceBundleFactory_3_2 +#define ICUService ICUService_3_2 +#define ICUServiceFactory ICUServiceFactory_3_2 +#define ICUServiceKey ICUServiceKey_3_2 +#define ICU_Utility ICU_Utility_3_2 +#define IndicClassTable IndicClassTable_3_2 +#define IndicOpenTypeLayoutEngine IndicOpenTypeLayoutEngine_3_2 +#define IndicRearrangementProcessor IndicRearrangementProcessor_3_2 +#define IndicReordering IndicReordering_3_2 +#define IntegralPartSubstitution IntegralPartSubstitution_3_2 +#define IslamicCalendar IslamicCalendar_3_2 +#define JapaneseCalendar JapaneseCalendar_3_2 +#define KeywordEnumeration KeywordEnumeration_3_2 +#define LECharMapper LECharMapper_3_2 +#define LEFontInstance LEFontInstance_3_2 +#define LEGlyphFilter LEGlyphFilter_3_2 +#define LEGlyphStorage LEGlyphStorage_3_2 +#define LEInsertionCallback LEInsertionCallback_3_2 +#define LEInsertionList LEInsertionList_3_2 +#define LXUtilities LXUtilities_3_2 +#define LayoutEngine LayoutEngine_3_2 +#define LigatureSubstitutionProcessor LigatureSubstitutionProcessor_3_2 +#define LigatureSubstitutionSubtable LigatureSubstitutionSubtable_3_2 +#define LocDataParser LocDataParser_3_2 +#define Locale Locale_3_2 +#define LocaleBased LocaleBased_3_2 +#define LocaleKey LocaleKey_3_2 +#define LocaleKeyFactory LocaleKeyFactory_3_2 +#define LocaleRuns LocaleRuns_3_2 +#define LocaleUtility LocaleUtility_3_2 +#define LocalizationInfo LocalizationInfo_3_2 +#define LookupListTable LookupListTable_3_2 +#define LookupProcessor LookupProcessor_3_2 +#define LookupSubtable LookupSubtable_3_2 +#define LookupTable LookupTable_3_2 +#define LowercaseTransliterator LowercaseTransliterator_3_2 +#define MPreFixups MPreFixups_3_2 +#define MarkArray MarkArray_3_2 +#define MarkToBasePositioningSubtable MarkToBasePositioningSubtable_3_2 +#define MarkToLigaturePositioningSubtable MarkToLigaturePositioningSubtable_3_2 +#define MarkToMarkPositioningSubtable MarkToMarkPositioningSubtable_3_2 +#define Math Math_3_2 +#define Measure Measure_3_2 +#define MeasureFormat MeasureFormat_3_2 +#define MeasureUnit MeasureUnit_3_2 +#define MessageFormat MessageFormat_3_2 +#define MessageFormatAdapter MessageFormatAdapter_3_2 +#define ModulusSubstitution ModulusSubstitution_3_2 +#define MoonRiseSetCoordFunc MoonRiseSetCoordFunc_3_2 +#define MoonTimeAngleFunc MoonTimeAngleFunc_3_2 +#define MorphSubtableHeader MorphSubtableHeader_3_2 +#define MorphTableHeader MorphTableHeader_3_2 +#define MultipleSubstitutionSubtable MultipleSubstitutionSubtable_3_2 +#define MultiplierSubstitution MultiplierSubstitution_3_2 +#define NFFactory NFFactory_3_2 +#define NFRule NFRule_3_2 +#define NFRuleSet NFRuleSet_3_2 +#define NFSubstitution NFSubstitution_3_2 +#define NameToEnum NameToEnum_3_2 +#define NameUnicodeTransliterator NameUnicodeTransliterator_3_2 +#define NonContextualGlyphSubstitutionProcessor NonContextualGlyphSubstitutionProcessor_3_2 +#define NonContiguousEnumToOffset NonContiguousEnumToOffset_3_2 +#define NormalizationTransliterator NormalizationTransliterator_3_2 +#define Normalizer Normalizer_3_2 +#define NullSubstitution NullSubstitution_3_2 +#define NullTransliterator NullTransliterator_3_2 +#define NumberFormat NumberFormat_3_2 +#define NumberFormatFactory NumberFormatFactory_3_2 +#define NumeratorSubstitution NumeratorSubstitution_3_2 +#define OlsonTimeZone OlsonTimeZone_3_2 +#define OpenTypeLayoutEngine OpenTypeLayoutEngine_3_2 +#define OpenTypeUtilities OpenTypeUtilities_3_2 +#define PairPositioningFormat1Subtable PairPositioningFormat1Subtable_3_2 +#define PairPositioningFormat2Subtable PairPositioningFormat2Subtable_3_2 +#define PairPositioningSubtable PairPositioningSubtable_3_2 +#define ParagraphLayout ParagraphLayout_3_2 +#define ParseData ParseData_3_2 +#define ParsePosition ParsePosition_3_2 +#define PropertyAliases PropertyAliases_3_2 +#define Quantifier Quantifier_3_2 +#define RBBIDataWrapper RBBIDataWrapper_3_2 +#define RBBINode RBBINode_3_2 +#define RBBIRuleBuilder RBBIRuleBuilder_3_2 +#define RBBIRuleScanner RBBIRuleScanner_3_2 +#define RBBISetBuilder RBBISetBuilder_3_2 +#define RBBIStateDescriptor RBBIStateDescriptor_3_2 +#define RBBISymbolTable RBBISymbolTable_3_2 +#define RBBISymbolTableEntry RBBISymbolTableEntry_3_2 +#define RBBITableBuilder RBBITableBuilder_3_2 +#define RangeDescriptor RangeDescriptor_3_2 +#define RegexCompile RegexCompile_3_2 +#define RegexMatcher RegexMatcher_3_2 +#define RegexPattern RegexPattern_3_2 +#define RegexStaticSets RegexStaticSets_3_2 +#define RemoveTransliterator RemoveTransliterator_3_2 +#define Replaceable Replaceable_3_2 +#define ReplaceableGlue ReplaceableGlue_3_2 +#define ResourceBundle ResourceBundle_3_2 +#define RiseSetCoordFunc RiseSetCoordFunc_3_2 +#define RuleBasedBreakIterator RuleBasedBreakIterator_3_2 +#define RuleBasedCollator RuleBasedCollator_3_2 +#define RuleBasedNumberFormat RuleBasedNumberFormat_3_2 +#define RuleBasedTransliterator RuleBasedTransliterator_3_2 +#define RuleCharacterIterator RuleCharacterIterator_3_2 +#define RuleHalf RuleHalf_3_2 +#define RunArray RunArray_3_2 +#define SameValueSubstitution SameValueSubstitution_3_2 +#define ScriptListTable ScriptListTable_3_2 +#define ScriptRunIterator ScriptRunIterator_3_2 +#define ScriptTable ScriptTable_3_2 +#define SearchIterator SearchIterator_3_2 +#define SegmentArrayProcessor SegmentArrayProcessor_3_2 +#define SegmentSingleProcessor SegmentSingleProcessor_3_2 +#define ServiceEnumeration ServiceEnumeration_3_2 +#define ServiceListener ServiceListener_3_2 +#define SimpleArrayProcessor SimpleArrayProcessor_3_2 +#define SimpleDateFormat SimpleDateFormat_3_2 +#define SimpleFactory SimpleFactory_3_2 +#define SimpleLocaleKeyFactory SimpleLocaleKeyFactory_3_2 +#define SimpleNumberFormatFactory SimpleNumberFormatFactory_3_2 +#define SimpleTimeZone SimpleTimeZone_3_2 +#define SinglePositioningFormat1Subtable SinglePositioningFormat1Subtable_3_2 +#define SinglePositioningFormat2Subtable SinglePositioningFormat2Subtable_3_2 +#define SinglePositioningSubtable SinglePositioningSubtable_3_2 +#define SingleSubstitutionFormat1Subtable SingleSubstitutionFormat1Subtable_3_2 +#define SingleSubstitutionFormat2Subtable SingleSubstitutionFormat2Subtable_3_2 +#define SingleSubstitutionSubtable SingleSubstitutionSubtable_3_2 +#define SingleTableProcessor SingleTableProcessor_3_2 +#define Spec Spec_3_2 +#define StateTableProcessor StateTableProcessor_3_2 +#define StringCharacterIterator StringCharacterIterator_3_2 +#define StringEnumeration StringEnumeration_3_2 +#define StringLocalizationInfo StringLocalizationInfo_3_2 +#define StringMatcher StringMatcher_3_2 +#define StringPair StringPair_3_2 +#define StringReplacer StringReplacer_3_2 +#define StringSearch StringSearch_3_2 +#define StyleRuns StyleRuns_3_2 +#define SubstitutionLookup SubstitutionLookup_3_2 +#define SubtableProcessor SubtableProcessor_3_2 +#define SunTimeAngleFunc SunTimeAngleFunc_3_2 +#define SymbolTable SymbolTable_3_2 +#define TZEnumeration TZEnumeration_3_2 +#define ThaiLayoutEngine ThaiLayoutEngine_3_2 +#define ThaiShaping ThaiShaping_3_2 +#define TimeZone TimeZone_3_2 +#define TitlecaseTransliterator TitlecaseTransliterator_3_2 +#define TransliterationRule TransliterationRule_3_2 +#define TransliterationRuleData TransliterationRuleData_3_2 +#define TransliterationRuleSet TransliterationRuleSet_3_2 +#define Transliterator Transliterator_3_2 +#define TransliteratorAlias TransliteratorAlias_3_2 +#define TransliteratorIDParser TransliteratorIDParser_3_2 +#define TransliteratorParser TransliteratorParser_3_2 +#define TransliteratorRegistry TransliteratorRegistry_3_2 +#define TrimmedArrayProcessor TrimmedArrayProcessor_3_2 +#define UCharCharacterIterator UCharCharacterIterator_3_2 +#define UMemory UMemory_3_2 +#define UObject UObject_3_2 +#define UStack UStack_3_2 +#define UStringEnumeration UStringEnumeration_3_2 +#define UVector UVector_3_2 +#define UVector32 UVector32_3_2 +#define UnescapeTransliterator UnescapeTransliterator_3_2 +#define UnicodeArabicOpenTypeLayoutEngine UnicodeArabicOpenTypeLayoutEngine_3_2 +#define UnicodeFilter UnicodeFilter_3_2 +#define UnicodeFunctor UnicodeFunctor_3_2 +#define UnicodeMatcher UnicodeMatcher_3_2 +#define UnicodeNameTransliterator UnicodeNameTransliterator_3_2 +#define UnicodeReplacer UnicodeReplacer_3_2 +#define UnicodeSet UnicodeSet_3_2 +#define UnicodeSetIterator UnicodeSetIterator_3_2 +#define UnicodeString UnicodeString_3_2 +#define UppercaseTransliterator UppercaseTransliterator_3_2 +#define ValueRecord ValueRecord_3_2 +#define ValueRuns ValueRuns_3_2 +#define locale_set_default_internal locale_set_default_internal_3_2 +#define uprv_parseCurrency uprv_parseCurrency_3_2 +#define util64_fromDouble util64_fromDouble_3_2 +#define util64_pow util64_pow_3_2 +#define util64_tou util64_tou_3_2 +#define util64_utoi util64_utoi_3_2 + +#endif +#endif + +#endif + +#endif diff --git a/JavaScriptGlue/icu/unicode/ustring.h b/JavaScriptGlue/icu/unicode/ustring.h new file mode 100644 index 0000000..6ebb6fb --- /dev/null +++ b/JavaScriptGlue/icu/unicode/ustring.h @@ -0,0 +1,1320 @@ +/* +********************************************************************** +* Copyright (C) 1998-2004, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* File ustring.h +* +* Modification History: +* +* Date Name Description +* 12/07/98 bertrand Creation. +****************************************************************************** +*/ + +#ifndef USTRING_H +#define USTRING_H + +#include "unicode/utypes.h" +#include "unicode/putil.h" +#include "unicode/uiter.h" + +/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1*/ +#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR +# define UBRK_TYPEDEF_UBREAK_ITERATOR + typedef void UBreakIterator; +#endif + +/** + * \file + * \brief C API: Unicode string handling functions + * + * These C API functions provide general Unicode string handling. + * + * Some functions are equivalent in name, signature, and behavior to the ANSI C <string.h> + * functions. (For example, they do not check for bad arguments like NULL string pointers.) + * In some cases, only the thread-safe variant of such a function is implemented here + * (see u_strtok_r()). + * + * Other functions provide more Unicode-specific functionality like locale-specific + * upper/lower-casing and string comparison in code point order. + * + * ICU uses 16-bit Unicode (UTF-16) in the form of arrays of UChar code units. + * UTF-16 encodes each Unicode code point with either one or two UChar code units. + * (This is the default form of Unicode, and a forward-compatible extension of the original, + * fixed-width form that was known as UCS-2. UTF-16 superseded UCS-2 with Unicode 2.0 + * in 1996.) + * + * Some APIs accept a 32-bit UChar32 value for a single code point. + * + * ICU also handles 16-bit Unicode text with unpaired surrogates. + * Such text is not well-formed UTF-16. + * Code-point-related functions treat unpaired surrogates as surrogate code points, + * i.e., as separate units. + * + * Although UTF-16 is a variable-width encoding form (like some legacy multi-byte encodings), + * it is much more efficient even for random access because the code unit values + * for single-unit characters vs. lead units vs. trail units are completely disjoint. + * This means that it is easy to determine character (code point) boundaries from + * random offsets in the string. + * + * Unicode (UTF-16) string processing is optimized for the single-unit case. + * Although it is important to support supplementary characters + * (which use pairs of lead/trail code units called "surrogates"), + * their occurrence is rare. Almost all characters in modern use require only + * a single UChar code unit (i.e., their code point values are <=0xffff). + * + * For more details see the User Guide Strings chapter (http://oss.software.ibm.com/icu/userguide/strings.html). + * For a discussion of the handling of unpaired surrogates see also + * Jitterbug 2145 and its icu mailing list proposal on 2002-sep-18. + */ + +/** + * Determine the length of an array of UChar. + * + * @param s The array of UChars, NULL (U+0000) terminated. + * @return The number of UChars in <code>chars</code>, minus the terminator. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strlen(const UChar *s); + +/** + * Count Unicode code points in the length UChar code units of the string. + * A code point may occupy either one or two UChar code units. + * Counting code points involves reading all code units. + * + * This functions is basically the inverse of the U16_FWD_N() macro (see utf.h). + * + * @param s The input string. + * @param length The number of UChar code units to be checked, or -1 to count all + * code points before the first NUL (U+0000). + * @return The number of code points in the specified code units. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_countChar32(const UChar *s, int32_t length); + +/** + * Check if the string contains more Unicode code points than a certain number. + * This is more efficient than counting all code points in the entire string + * and comparing that number with a threshold. + * This function may not need to scan the string at all if the length is known + * (not -1 for NUL-termination) and falls within a certain range, and + * never needs to count more than 'number+1' code points. + * Logically equivalent to (u_countChar32(s, length)>number). + * A Unicode code point may occupy either one or two UChar code units. + * + * @param s The input string. + * @param length The length of the string, or -1 if it is NUL-terminated. + * @param number The number of code points in the string is compared against + * the 'number' parameter. + * @return Boolean value for whether the string contains more Unicode code points + * than 'number'. Same as (u_countChar32(s, length)>number). + * @stable ICU 2.4 + */ +U_STABLE UBool U_EXPORT2 +u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number); + +/** + * Concatenate two ustrings. Appends a copy of <code>src</code>, + * including the null terminator, to <code>dst</code>. The initial copied + * character from <code>src</code> overwrites the null terminator in <code>dst</code>. + * + * @param dst The destination string. + * @param src The source string. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_strcat(UChar *dst, + const UChar *src); + +/** + * Concatenate two ustrings. + * Appends at most <code>n</code> characters from <code>src</code> to <code>dst</code>. + * Adds a terminating NUL. + * If src is too long, then only <code>n-1</code> characters will be copied + * before the terminating NUL. + * If <code>n<=0</code> then dst is not modified. + * + * @param dst The destination string. + * @param src The source string. + * @param n The maximum number of characters to compare. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_strncat(UChar *dst, + const UChar *src, + int32_t n); + +/** + * Find the first occurrence of a substring in a string. + * The substring is found at code point boundaries. + * That means that if the substring begins with + * a trail surrogate or ends with a lead surrogate, + * then it is found only if these surrogates stand alone in the text. + * Otherwise, the substring edge units would be matched against + * halves of surrogate pairs. + * + * @param s The string to search (NUL-terminated). + * @param substring The substring to find (NUL-terminated). + * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>, + * or <code>s</code> itself if the <code>substring</code> is empty, + * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. + * @stable ICU 2.0 + * + * @see u_strrstr + * @see u_strFindFirst + * @see u_strFindLast + */ +U_STABLE UChar * U_EXPORT2 +u_strstr(const UChar *s, const UChar *substring); + +/** + * Find the first occurrence of a substring in a string. + * The substring is found at code point boundaries. + * That means that if the substring begins with + * a trail surrogate or ends with a lead surrogate, + * then it is found only if these surrogates stand alone in the text. + * Otherwise, the substring edge units would be matched against + * halves of surrogate pairs. + * + * @param s The string to search. + * @param length The length of s (number of UChars), or -1 if it is NUL-terminated. + * @param substring The substring to find (NUL-terminated). + * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated. + * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>, + * or <code>s</code> itself if the <code>substring</code> is empty, + * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strstr + * @see u_strFindLast + */ +U_STABLE UChar * U_EXPORT2 +u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength); + +/** + * Find the first occurrence of a BMP code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (NUL-terminated). + * @param c The BMP code point to find. + * @return A pointer to the first occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.0 + * + * @see u_strchr32 + * @see u_memchr + * @see u_strstr + * @see u_strFindFirst + */ +U_STABLE UChar * U_EXPORT2 +u_strchr(const UChar *s, UChar c); + +/** + * Find the first occurrence of a code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (NUL-terminated). + * @param c The code point to find. + * @return A pointer to the first occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.0 + * + * @see u_strchr + * @see u_memchr32 + * @see u_strstr + * @see u_strFindFirst + */ +U_STABLE UChar * U_EXPORT2 +u_strchr32(const UChar *s, UChar32 c); + +/** + * Find the last occurrence of a substring in a string. + * The substring is found at code point boundaries. + * That means that if the substring begins with + * a trail surrogate or ends with a lead surrogate, + * then it is found only if these surrogates stand alone in the text. + * Otherwise, the substring edge units would be matched against + * halves of surrogate pairs. + * + * @param s The string to search (NUL-terminated). + * @param substring The substring to find (NUL-terminated). + * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>, + * or <code>s</code> itself if the <code>substring</code> is empty, + * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strstr + * @see u_strFindFirst + * @see u_strFindLast + */ +U_STABLE UChar * U_EXPORT2 +u_strrstr(const UChar *s, const UChar *substring); + +/** + * Find the last occurrence of a substring in a string. + * The substring is found at code point boundaries. + * That means that if the substring begins with + * a trail surrogate or ends with a lead surrogate, + * then it is found only if these surrogates stand alone in the text. + * Otherwise, the substring edge units would be matched against + * halves of surrogate pairs. + * + * @param s The string to search. + * @param length The length of s (number of UChars), or -1 if it is NUL-terminated. + * @param substring The substring to find (NUL-terminated). + * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated. + * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>, + * or <code>s</code> itself if the <code>substring</code> is empty, + * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strstr + * @see u_strFindLast + */ +U_STABLE UChar * U_EXPORT2 +u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength); + +/** + * Find the last occurrence of a BMP code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (NUL-terminated). + * @param c The BMP code point to find. + * @return A pointer to the last occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strrchr32 + * @see u_memrchr + * @see u_strrstr + * @see u_strFindLast + */ +U_STABLE UChar * U_EXPORT2 +u_strrchr(const UChar *s, UChar c); + +/** + * Find the last occurrence of a code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (NUL-terminated). + * @param c The code point to find. + * @return A pointer to the last occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strrchr + * @see u_memchr32 + * @see u_strrstr + * @see u_strFindLast + */ +U_STABLE UChar * U_EXPORT2 +u_strrchr32(const UChar *s, UChar32 c); + +/** + * Locates the first occurrence in the string <code>string</code> of any of the characters + * in the string <code>matchSet</code>. + * Works just like C's strpbrk but with Unicode. + * + * @param string The string in which to search, NUL-terminated. + * @param matchSet A NUL-terminated string defining a set of code points + * for which to search in the text string. + * @return A pointer to the character in <code>string</code> that matches one of the + * characters in <code>matchSet</code>, or NULL if no such character is found. + * @stable ICU 2.0 + */ +U_STABLE UChar * U_EXPORT2 +u_strpbrk(const UChar *string, const UChar *matchSet); + +/** + * Returns the number of consecutive characters in <code>string</code>, + * beginning with the first, that do not occur somewhere in <code>matchSet</code>. + * Works just like C's strcspn but with Unicode. + * + * @param string The string in which to search, NUL-terminated. + * @param matchSet A NUL-terminated string defining a set of code points + * for which to search in the text string. + * @return The number of initial characters in <code>string</code> that do not + * occur in <code>matchSet</code>. + * @see u_strspn + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strcspn(const UChar *string, const UChar *matchSet); + +/** + * Returns the number of consecutive characters in <code>string</code>, + * beginning with the first, that occur somewhere in <code>matchSet</code>. + * Works just like C's strspn but with Unicode. + * + * @param string The string in which to search, NUL-terminated. + * @param matchSet A NUL-terminated string defining a set of code points + * for which to search in the text string. + * @return The number of initial characters in <code>string</code> that do + * occur in <code>matchSet</code>. + * @see u_strcspn + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strspn(const UChar *string, const UChar *matchSet); + +/** + * The string tokenizer API allows an application to break a string into + * tokens. Unlike strtok(), the saveState (the current pointer within the + * original string) is maintained in saveState. In the first call, the + * argument src is a pointer to the string. In subsequent calls to + * return successive tokens of that string, src must be specified as + * NULL. The value saveState is set by this function to maintain the + * function's position within the string, and on each subsequent call + * you must give this argument the same variable. This function does + * handle surrogate pairs. This function is similar to the strtok_r() + * the POSIX Threads Extension (1003.1c-1995) version. + * + * @param src String containing token(s). This string will be modified. + * After the first call to u_strtok_r(), this argument must + * be NULL to get to the next token. + * @param delim Set of delimiter characters (Unicode code points). + * @param saveState The current pointer within the original string, + * which is set by this function. The saveState + * parameter should the address of a local variable of type + * UChar *. (i.e. defined "Uhar *myLocalSaveState" and use + * &myLocalSaveState for this parameter). + * @return A pointer to the next token found in src, or NULL + * when there are no more tokens. + * @stable ICU 2.0 + */ +U_STABLE UChar * U_EXPORT2 +u_strtok_r(UChar *src, + const UChar *delim, + UChar **saveState); + +/** + * Compare two Unicode strings for bitwise equality (code unit order). + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative + * value if <code>s1</code> is bitwise less than <code>s2,</code>; a positive + * value if <code>s1</code> is bitwise greater than <code>s2</code>. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strcmp(const UChar *s1, + const UChar *s2); + +/** + * Compare two Unicode strings in code point order. + * See u_strCompare for details. + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @return a negative/zero/positive integer corresponding to whether + * the first string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strcmpCodePointOrder(const UChar *s1, const UChar *s2); + +/** + * Compare two Unicode strings (binary order). + * + * The comparison can be done in code unit order or in code point order. + * They differ only in UTF-16 when + * comparing supplementary code points (U+10000..U+10ffff) + * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff). + * In code unit order, high BMP code points sort after supplementary code points + * because they are stored as pairs of surrogates which are at U+d800..U+dfff. + * + * This functions works with strings of different explicitly specified lengths + * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. + * NUL-terminated strings are possible with length arguments of -1. + * + * @param s1 First source string. + * @param length1 Length of first source string, or -1 if NUL-terminated. + * + * @param s2 Second source string. + * @param length2 Length of second source string, or -1 if NUL-terminated. + * + * @param codePointOrder Choose between code unit order (FALSE) + * and code point order (TRUE). + * + * @return <0 or 0 or >0 as usual for string comparisons + * + * @stable ICU 2.2 + */ +U_STABLE int32_t U_EXPORT2 +u_strCompare(const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + UBool codePointOrder); + +/** + * Compare two Unicode strings (binary order) + * as presented by UCharIterator objects. + * Works otherwise just like u_strCompare(). + * + * Both iterators are reset to their start positions. + * When the function returns, it is undefined where the iterators + * have stopped. + * + * @param iter1 First source string iterator. + * @param iter2 Second source string iterator. + * @param codePointOrder Choose between code unit order (FALSE) + * and code point order (TRUE). + * + * @return <0 or 0 or >0 as usual for string comparisons + * + * @see u_strCompare + * + * @stable ICU 2.6 + */ +U_STABLE int32_t U_EXPORT2 +u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder); + +#ifndef U_COMPARE_CODE_POINT_ORDER +/* see also unistr.h and unorm.h */ +/** + * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: + * Compare strings in code point order instead of code unit order. + * @stable ICU 2.2 + */ +#define U_COMPARE_CODE_POINT_ORDER 0x8000 +#endif + +/** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to + * u_strCompare(u_strFoldCase(s1, options), + * u_strFoldCase(s2, options), + * (options&U_COMPARE_CODE_POINT_ORDER)!=0). + * + * The comparison can be done in UTF-16 code unit order or in code point order. + * They differ only when comparing supplementary code points (U+10000..U+10ffff) + * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff). + * In code unit order, high BMP code points sort after supplementary code points + * because they are stored as pairs of surrogates which are at U+d800..U+dfff. + * + * This functions works with strings of different explicitly specified lengths + * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. + * NUL-terminated strings are possible with length arguments of -1. + * + * @param s1 First source string. + * @param length1 Length of first source string, or -1 if NUL-terminated. + * + * @param s2 Second source string. + * @param length2 Length of second source string, or -1 if NUL-terminated. + * + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * + * @return <0 or 0 or >0 as usual for string comparisons + * + * @stable ICU 2.2 + */ +U_STABLE int32_t U_EXPORT2 +u_strCaseCompare(const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + uint32_t options, + UErrorCode *pErrorCode); + +/** + * Compare two ustrings for bitwise equality. + * Compares at most <code>n</code> characters. + * + * @param ucs1 A string to compare. + * @param ucs2 A string to compare. + * @param n The maximum number of characters to compare. + * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative + * value if <code>s1</code> is bitwise less than <code>s2</code>; a positive + * value if <code>s1</code> is bitwise greater than <code>s2</code>. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strncmp(const UChar *ucs1, + const UChar *ucs2, + int32_t n); + +/** + * Compare two Unicode strings in code point order. + * This is different in UTF-16 from u_strncmp() if supplementary characters are present. + * For details, see u_strCompare(). + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @param n The maximum number of characters to compare. + * @return a negative/zero/positive integer corresponding to whether + * the first string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n); + +/** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)). + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options); + +/** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options), + * u_strFoldCase(s2, at most n, options)). + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @param n The maximum number of characters each string to case-fold and then compare. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options); + +/** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to u_strcmp(u_strFoldCase(s1, n, options), + * u_strFoldCase(s2, n, options)). + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @param length The number of characters in each string to case-fold and then compare. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options); + +/** + * Copy a ustring. Adds a null terminator. + * + * @param dst The destination string. + * @param src The source string. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_strcpy(UChar *dst, + const UChar *src); + +/** + * Copy a ustring. + * Copies at most <code>n</code> characters. The result will be null terminated + * if the length of <code>src</code> is less than <code>n</code>. + * + * @param dst The destination string. + * @param src The source string. + * @param n The maximum number of characters to copy. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_strncpy(UChar *dst, + const UChar *src, + int32_t n); + +#if !UCONFIG_NO_CONVERSION + +/** + * Copy a byte string encoded in the default codepage to a ustring. + * Adds a null terminator. + * Performs a host byte to UChar conversion + * + * @param dst The destination string. + * @param src The source string. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 u_uastrcpy(UChar *dst, + const char *src ); + +/** + * Copy a byte string encoded in the default codepage to a ustring. + * Copies at most <code>n</code> characters. The result will be null terminated + * if the length of <code>src</code> is less than <code>n</code>. + * Performs a host byte to UChar conversion + * + * @param dst The destination string. + * @param src The source string. + * @param n The maximum number of characters to copy. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 u_uastrncpy(UChar *dst, + const char *src, + int32_t n); + +/** + * Copy ustring to a byte string encoded in the default codepage. + * Adds a null terminator. + * Performs a UChar to host byte conversion + * + * @param dst The destination string. + * @param src The source string. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_STABLE char* U_EXPORT2 u_austrcpy(char *dst, + const UChar *src ); + +/** + * Copy ustring to a byte string encoded in the default codepage. + * Copies at most <code>n</code> characters. The result will be null terminated + * if the length of <code>src</code> is less than <code>n</code>. + * Performs a UChar to host byte conversion + * + * @param dst The destination string. + * @param src The source string. + * @param n The maximum number of characters to copy. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_STABLE char* U_EXPORT2 u_austrncpy(char *dst, + const UChar *src, + int32_t n ); + +#endif + +/** + * Synonym for memcpy(), but with UChars only. + * @param dest The destination string + * @param src The source string + * @param count The number of characters to copy + * @return A pointer to <code>dest</code> + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_memcpy(UChar *dest, const UChar *src, int32_t count); + +/** + * Synonym for memmove(), but with UChars only. + * @param dest The destination string + * @param src The source string + * @param count The number of characters to move + * @return A pointer to <code>dest</code> + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_memmove(UChar *dest, const UChar *src, int32_t count); + +/** + * Initialize <code>count</code> characters of <code>dest</code> to <code>c</code>. + * + * @param dest The destination string. + * @param c The character to initialize the string. + * @param count The maximum number of characters to set. + * @return A pointer to <code>dest</code>. + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_memset(UChar *dest, UChar c, int32_t count); + +/** + * Compare the first <code>count</code> UChars of each buffer. + * + * @param buf1 The first string to compare. + * @param buf2 The second string to compare. + * @param count The maximum number of UChars to compare. + * @return When buf1 < buf2, a negative number is returned. + * When buf1 == buf2, 0 is returned. + * When buf1 > buf2, a positive number is returned. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count); + +/** + * Compare two Unicode strings in code point order. + * This is different in UTF-16 from u_memcmp() if supplementary characters are present. + * For details, see u_strCompare(). + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @param count The maximum number of characters to compare. + * @return a negative/zero/positive integer corresponding to whether + * the first string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count); + +/** + * Find the first occurrence of a BMP code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (contains <code>count</code> UChars). + * @param c The BMP code point to find. + * @param count The length of the string. + * @return A pointer to the first occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.0 + * + * @see u_strchr + * @see u_memchr32 + * @see u_strFindFirst + */ +U_STABLE UChar* U_EXPORT2 +u_memchr(const UChar *s, UChar c, int32_t count); + +/** + * Find the first occurrence of a code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (contains <code>count</code> UChars). + * @param c The code point to find. + * @param count The length of the string. + * @return A pointer to the first occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.0 + * + * @see u_strchr32 + * @see u_memchr + * @see u_strFindFirst + */ +U_STABLE UChar* U_EXPORT2 +u_memchr32(const UChar *s, UChar32 c, int32_t count); + +/** + * Find the last occurrence of a BMP code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (contains <code>count</code> UChars). + * @param c The BMP code point to find. + * @param count The length of the string. + * @return A pointer to the last occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strrchr + * @see u_memrchr32 + * @see u_strFindLast + */ +U_STABLE UChar* U_EXPORT2 +u_memrchr(const UChar *s, UChar c, int32_t count); + +/** + * Find the last occurrence of a code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (contains <code>count</code> UChars). + * @param c The code point to find. + * @param count The length of the string. + * @return A pointer to the last occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strrchr32 + * @see u_memrchr + * @see u_strFindLast + */ +U_STABLE UChar* U_EXPORT2 +u_memrchr32(const UChar *s, UChar32 c, int32_t count); + +/** + * Unicode String literals in C. + * We need one macro to declare a variable for the string + * and to statically preinitialize it if possible, + * and a second macro to dynamically intialize such a string variable if necessary. + * + * The macros are defined for maximum performance. + * They work only for strings that contain "invariant characters", i.e., + * only latin letters, digits, and some punctuation. + * See utypes.h for details. + * + * A pair of macros for a single string must be used with the same + * parameters. + * The string parameter must be a C string literal. + * The length of the string, not including the terminating + * <code>NUL</code>, must be specified as a constant. + * The U_STRING_DECL macro should be invoked exactly once for one + * such string variable before it is used. + * + * Usage: + * <pre> + *   U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11); + *   U_STRING_DECL(ustringVar2, "jumps 5%", 8); + *   static UBool didInit=FALSE; + *   + *   int32_t function() { + *   if(!didInit) { + *   U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11); + *   U_STRING_INIT(ustringVar2, "jumps 5%", 8); + *   didInit=TRUE; + *   } + *   return u_strcmp(ustringVar1, ustringVar2); + *   } + * </pre> + * @stable ICU 2.0 + */ +#if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && U_CHARSET_FAMILY==U_ASCII_FAMILY +# define U_STRING_DECL(var, cs, length) static const wchar_t var[(length)+1]={ L ## cs } + /**@stable ICU 2.0 */ +# define U_STRING_INIT(var, cs, length) +#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY +# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]={ (const UChar *)cs } + /**@stable ICU 2.0 */ +# define U_STRING_INIT(var, cs, length) +#else +# define U_STRING_DECL(var, cs, length) static UChar var[(length)+1] + /**@stable ICU 2.0 */ +# define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1) +#endif + +/** + * Unescape a string of characters and write the resulting + * Unicode characters to the destination buffer. The following escape + * sequences are recognized: + * + * \\uhhhh 4 hex digits; h in [0-9A-Fa-f] + * \\Uhhhhhhhh 8 hex digits + * \\xhh 1-2 hex digits + * \\x{h...} 1-8 hex digits + * \\ooo 1-3 octal digits; o in [0-7] + * \\cX control-X; X is masked with 0x1F + * + * as well as the standard ANSI C escapes: + * + * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A, + * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B, + * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C + * + * Anything else following a backslash is generically escaped. For + * example, "[a\\-z]" returns "[a-z]". + * + * If an escape sequence is ill-formed, this method returns an empty + * string. An example of an ill-formed sequence is "\\u" followed by + * fewer than 4 hex digits. + * + * The above characters are recognized in the compiler's codepage, + * that is, they are coded as 'u', '\\', etc. Characters that are + * not parts of escape sequences are converted using u_charsToUChars(). + * + * This function is similar to UnicodeString::unescape() but not + * identical to it. The latter takes a source UnicodeString, so it + * does escape recognition but no conversion. + * + * @param src a zero-terminated string of invariant characters + * @param dest pointer to buffer to receive converted and unescaped + * text and, if there is room, a zero terminator. May be NULL for + * preflighting, in which case no UChars will be written, but the + * return value will still be valid. On error, an empty string is + * stored here (if possible). + * @param destCapacity the number of UChars that may be written at + * dest. Ignored if dest == NULL. + * @return the length of unescaped string. + * @see u_unescapeAt + * @see UnicodeString#unescape() + * @see UnicodeString#unescapeAt() + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_unescape(const char *src, + UChar *dest, int32_t destCapacity); + +U_CDECL_BEGIN +/** + * Callback function for u_unescapeAt() that returns a character of + * the source text given an offset and a context pointer. The context + * pointer will be whatever is passed into u_unescapeAt(). + * + * @param offset pointer to the offset that will be passed to u_unescapeAt(). + * @param context an opaque pointer passed directly into u_unescapeAt() + * @return the character represented by the escape sequence at + * offset + * @see u_unescapeAt + * @stable ICU 2.0 + */ +typedef UChar (U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset, void *context); +U_CDECL_END + +/** + * Unescape a single sequence. The character at offset-1 is assumed + * (without checking) to be a backslash. This method takes a callback + * pointer to a function that returns the UChar at a given offset. By + * varying this callback, ICU functions are able to unescape char* + * strings, UnicodeString objects, and UFILE pointers. + * + * If offset is out of range, or if the escape sequence is ill-formed, + * (UChar32)0xFFFFFFFF is returned. See documentation of u_unescape() + * for a list of recognized sequences. + * + * @param charAt callback function that returns a UChar of the source + * text given an offset and a context pointer. + * @param offset pointer to the offset that will be passed to charAt. + * The offset value will be updated upon return to point after the + * last parsed character of the escape sequence. On error the offset + * is unchanged. + * @param length the number of characters in the source text. The + * last character of the source text is considered to be at offset + * length-1. + * @param context an opaque pointer passed directly into charAt. + * @return the character represented by the escape sequence at + * offset, or (UChar32)0xFFFFFFFF on error. + * @see u_unescape() + * @see UnicodeString#unescape() + * @see UnicodeString#unescapeAt() + * @stable ICU 2.0 + */ +U_STABLE UChar32 U_EXPORT2 +u_unescapeAt(UNESCAPE_CHAR_AT charAt, + int32_t *offset, + int32_t length, + void *context); + +/** + * Uppercase the characters in a string. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer are allowed to overlap. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param src The original string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param locale The locale to consider, or "" for the root locale or NULL for the default locale. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The length of the result string. It may be greater than destCapacity. In that case, + * only some of the result was written to the destination buffer. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strToUpper(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + const char *locale, + UErrorCode *pErrorCode); + +/** + * Lowercase the characters in a string. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer are allowed to overlap. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param src The original string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param locale The locale to consider, or "" for the root locale or NULL for the default locale. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The length of the result string. It may be greater than destCapacity. In that case, + * only some of the result was written to the destination buffer. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strToLower(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + const char *locale, + UErrorCode *pErrorCode); + +#if !UCONFIG_NO_BREAK_ITERATION + +/** + * Titlecase a string. + * Casing is locale-dependent and context-sensitive. + * Titlecasing uses a break iterator to find the first characters of words + * that are to be titlecased. It titlecases those characters and lowercases + * all others. + * + * The titlecase break iterator can be provided to customize for arbitrary + * styles, using rules and dictionaries beyond the standard iterators. + * It may be more efficient to always provide an iterator to avoid + * opening and closing one for each string. + * The standard titlecase iterator for the root locale implements the + * algorithm of Unicode TR 21. + * + * This function uses only the first() and next() methods of the + * provided break iterator. + * + * The result may be longer or shorter than the original. + * The source string and the destination buffer are allowed to overlap. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param src The original string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param titleIter A break iterator to find the first characters of words + * that are to be titlecased. + * If none is provided (NULL), then a standard titlecase + * break iterator is opened. + * @param locale The locale to consider, or "" for the root locale or NULL for the default locale. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The length of the result string. It may be greater than destCapacity. In that case, + * only some of the result was written to the destination buffer. + * @stable ICU 2.1 + */ +U_STABLE int32_t U_EXPORT2 +u_strToTitle(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UBreakIterator *titleIter, + const char *locale, + UErrorCode *pErrorCode); + +#endif + +/** + * Case-fold the characters in a string. + * Case-folding is locale-independent and not context-sensitive, + * but there is an option for whether to include or exclude mappings for dotted I + * and dotless i that are marked with 'I' in CaseFolding.txt. + * The result may be longer or shorter than the original. + * The source string and the destination buffer are allowed to overlap. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param src The original string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The length of the result string. It may be greater than destCapacity. In that case, + * only some of the result was written to the destination buffer. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strFoldCase(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + uint32_t options, + UErrorCode *pErrorCode); + +/** + * Converts a sequence of UChars to wchar_t units. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of wchar_t's). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The pointer to destination buffer. + * @stable ICU 2.0 + */ +U_STABLE wchar_t* U_EXPORT2 +u_strToWCS(wchar_t *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UErrorCode *pErrorCode); +/** + * Converts a sequence of wchar_t units to UChars + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The pointer to destination buffer. + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_strFromWCS(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const wchar_t *src, + int32_t srcLength, + UErrorCode *pErrorCode); +/** + * Converts a sequence of UChars (UTF-16) to UTF-8 bytes + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of chars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The pointer to destination buffer. + * @stable ICU 2.0 + */ +U_STABLE char* U_EXPORT2 +u_strToUTF8(char *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Converts a sequence of UTF-8 bytes to UChars (UTF-16). + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The pointer to destination buffer. + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_strFromUTF8(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const char *src, + int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Converts a sequence of UChars (UTF-16) to UTF32 units. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChar32s). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The pointer to destination buffer. + * @stable ICU 2.0 + */ +U_STABLE UChar32* U_EXPORT2 +u_strToUTF32(UChar32 *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Converts a sequence of UTF32 units to UChars (UTF-16) + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The pointer to destination buffer. + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_strFromUTF32(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar32 *src, + int32_t srcLength, + UErrorCode *pErrorCode); + +#endif diff --git a/JavaScriptGlue/icu/unicode/utf.h b/JavaScriptGlue/icu/unicode/utf.h new file mode 100644 index 0000000..201691d --- /dev/null +++ b/JavaScriptGlue/icu/unicode/utf.h @@ -0,0 +1,221 @@ +/* +******************************************************************************* +* +* Copyright (C) 1999-2004, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: utf.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999sep09 +* created by: Markus W. Scherer +*/ + +/** + * \file + * \brief C API: Code point macros + * + * This file defines macros for checking whether a code point is + * a surrogate or a non-character etc. + * + * The UChar and UChar32 data types for Unicode code units and code points + * are defined in umachines.h because they can be machine-dependent. + * + * utf.h is included by utypes.h and itself includes utf8.h and utf16.h after some + * common definitions. Those files define macros for efficiently getting code points + * in and out of UTF-8/16 strings. + * utf16.h macros have "U16_" prefixes. + * utf8.h defines similar macros with "U8_" prefixes for UTF-8 string handling. + * + * ICU processes 16-bit Unicode strings. + * Most of the time, such strings are well-formed UTF-16. + * Single, unpaired surrogates must be handled as well, and are treated in ICU + * like regular code points where possible. + * (Pairs of surrogate code points are indistinguishable from supplementary + * code points encoded as pairs of supplementary code units.) + * + * In fact, almost all Unicode code points in normal text (>99%) + * are on the BMP (<=U+ffff) and even <=U+d7ff. + * ICU functions handle supplementary code points (U+10000..U+10ffff) + * but are optimized for the much more frequently occurring BMP code points. + * + * utf.h defines UChar to be an unsigned 16-bit integer. If this matches wchar_t, then + * UChar is defined to be exactly wchar_t, otherwise uint16_t. + * + * UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit + * Unicode code point (Unicode scalar value, 0..0x10ffff). + * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as + * the definition of UChar. For details see the documentation for UChar32 itself. + * + * utf.h also defines a small number of C macros for single Unicode code points. + * These are simple checks for surrogates and non-characters. + * For actual Unicode character properties see uchar.h. + * + * By default, string operations must be done with error checking in case + * a string is not well-formed UTF-16. + * The macros will detect if a surrogate code unit is unpaired + * (lead unit without trail unit or vice versa) and just return the unit itself + * as the code point. + * (It is an accidental property of Unicode and UTF-16 that all + * malformed sequences can be expressed unambiguously with a distinct subrange + * of Unicode code points.) + * + * When it is safe to assume that text is well-formed UTF-16 + * (does not contain single, unpaired surrogates), then one can use + * U16_..._UNSAFE macros. + * These do not check for proper code unit sequences or truncated text and may + * yield wrong results or even cause a crash if they are used with "malformed" + * text. + * In practice, U16_..._UNSAFE macros will produce slightly less code but + * should not be faster because the processing is only different when a + * surrogate code unit is detected, which will be rare. + * + * Similarly for UTF-8, there are "safe" macros without a suffix, + * and U8_..._UNSAFE versions. + * The performance differences are much larger here because UTF-8 provides so + * many opportunities for malformed sequences. + * The unsafe UTF-8 macros are entirely implemented inside the macro definitions + * and are fast, while the safe UTF-8 macros call functions for all but the + * trivial (ASCII) cases. + * + * Unlike with UTF-16, malformed sequences cannot be expressed with distinct + * code point values (0..U+10ffff). They are indicated with negative values instead. + * + * For more information see the ICU User Guide Strings chapter + * (http://oss.software.ibm.com/icu/userguide/). + * + * <em>Usage:</em> + * ICU coding guidelines for if() statements should be followed when using these macros. + * Compound statements (curly braces {}) must be used for if-else-while... + * bodies and all macro statements should be terminated with semicolon. + * + * @stable ICU 2.4 + */ + +#ifndef __UTF_H__ +#define __UTF_H__ + +#include "unicode/utypes.h" +/* include the utfXX.h after the following definitions */ + +/* single-code point definitions -------------------------------------------- */ + +/** + * This value is intended for sentinel values for APIs that + * (take or) return single code points (UChar32). + * It is outside of the Unicode code point range 0..0x10ffff. + * + * For example, a "done" or "error" value in a new API + * could be indicated with U_SENTINEL. + * + * ICU APIs designed before ICU 2.4 usually define service-specific "done" + * values, mostly 0xffff. + * Those may need to be distinguished from + * actual U+ffff text contents by calling functions like + * CharacterIterator::hasNext() or UnicodeString::length(). + * + * @return -1 + * @see UChar32 + * @stable ICU 2.4 + */ +#define U_SENTINEL (-1) + +/** + * Is this code point a Unicode noncharacter? + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U_IS_UNICODE_NONCHAR(c) \ + ((c)>=0xfdd0 && \ + ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \ + (uint32_t)(c)<=0x10ffff) + +/** + * Is c a Unicode code point value (0..U+10ffff) + * that can be assigned a character? + * + * Code points that are not characters include: + * - single surrogate code points (U+d800..U+dfff, 2048 code points) + * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points) + * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points) + * - the highest Unicode code point value is U+10ffff + * + * This means that all code points below U+d800 are character code points, + * and that boundary is tested first for performance. + * + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U_IS_UNICODE_CHAR(c) \ + ((uint32_t)(c)<0xd800 || \ + ((uint32_t)(c)>0xdfff && \ + (uint32_t)(c)<=0x10ffff && \ + !U_IS_UNICODE_NONCHAR(c))) + +#ifndef U_HIDE_DRAFT_API + +/** + * Is this code point a BMP code point (U+0000..U+ffff)? + * @param c 32-bit code point + * @return TRUE or FALSE + * @draft ICU 2.8 + */ +#define U_IS_BMP(c) ((uint32_t)(c)<=0xffff) + +/** + * Is this code point a supplementary code point (U+10000..U+10ffff)? + * @param c 32-bit code point + * @return TRUE or FALSE + * @draft ICU 2.8 + */ +#define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c)-0x10000)<=0xfffff) + +#endif /*U_HIDE_DRAFT_API*/ + +/** + * Is this code point a lead surrogate (U+d800..U+dbff)? + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) + +/** + * Is this code point a trail surrogate (U+dc00..U+dfff)? + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) + +/** + * Is this code point a surrogate (U+d800..U+dfff)? + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800) + +/** + * Assuming c is a surrogate code point (U_IS_SURROGATE(c)), + * is it a lead surrogate? + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) + +/* include the utfXX.h ------------------------------------------------------ */ + +#include "unicode/utf8.h" +#include "unicode/utf16.h" + +/* utf_old.h contains deprecated, pre-ICU 2.4 definitions */ +#include "unicode/utf_old.h" + +#endif diff --git a/JavaScriptGlue/icu/unicode/utf16.h b/JavaScriptGlue/icu/unicode/utf16.h new file mode 100644 index 0000000..7bf3872 --- /dev/null +++ b/JavaScriptGlue/icu/unicode/utf16.h @@ -0,0 +1,605 @@ +/* +******************************************************************************* +* +* Copyright (C) 1999-2004, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: utf16.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999sep09 +* created by: Markus W. Scherer +*/ + +/** + * \file + * \brief C API: 16-bit Unicode handling macros + * + * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings. + * utf16.h is included by utf.h after unicode/umachine.h + * and some common definitions. + * + * For more information see utf.h and the ICU User Guide Strings chapter + * (http://oss.software.ibm.com/icu/userguide/). + * + * <em>Usage:</em> + * ICU coding guidelines for if() statements should be followed when using these macros. + * Compound statements (curly braces {}) must be used for if-else-while... + * bodies and all macro statements should be terminated with semicolon. + */ + +#ifndef __UTF16_H__ +#define __UTF16_H__ + +/* utf.h must be included first. */ +#ifndef __UTF_H__ +# include "unicode/utf.h" +#endif + +/* single-code point definitions -------------------------------------------- */ + +/** + * Does this code unit alone encode a code point (BMP, not a surrogate)? + * @param c 16-bit code unit + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) + +/** + * Is this code unit a lead surrogate (U+d800..U+dbff)? + * @param c 16-bit code unit + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) + +/** + * Is this code unit a trail surrogate (U+dc00..U+dfff)? + * @param c 16-bit code unit + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) + +/** + * Is this code unit a surrogate (U+d800..U+dfff)? + * @param c 16-bit code unit + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) + +/** + * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), + * is it a lead surrogate? + * @param c 16-bit code unit + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) + +/** + * Helper constant for U16_GET_SUPPLEMENTARY. + * @internal + */ +#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) + +/** + * Get a supplementary code point value (U+10000..U+10ffff) + * from its lead and trail surrogates. + * The result is undefined if the input values are not + * lead and trail surrogates. + * + * @param lead lead surrogate (U+d800..U+dbff) + * @param trail trail surrogate (U+dc00..U+dfff) + * @return supplementary code point (U+10000..U+10ffff) + * @stable ICU 2.4 + */ +#define U16_GET_SUPPLEMENTARY(lead, trail) \ + (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET) + + +/** + * Get the lead surrogate (0xd800..0xdbff) for a + * supplementary code point (0x10000..0x10ffff). + * @param supplementary 32-bit code point (U+10000..U+10ffff) + * @return lead surrogate (U+d800..U+dbff) for supplementary + * @stable ICU 2.4 + */ +#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) + +/** + * Get the trail surrogate (0xdc00..0xdfff) for a + * supplementary code point (0x10000..0x10ffff). + * @param supplementary 32-bit code point (U+10000..U+10ffff) + * @return trail surrogate (U+dc00..U+dfff) for supplementary + * @stable ICU 2.4 + */ +#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) + +/** + * How many 16-bit code units are used to encode this Unicode code point? (1 or 2) + * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff). + * @param c 32-bit code point + * @return 1 or 2 + * @stable ICU 2.4 + */ +#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) + +/** + * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff). + * @return 2 + * @stable ICU 2.4 + */ +#define U16_MAX_LENGTH 2 + +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * "Unsafe" macro, assumes well-formed UTF-16. + * + * The offset may point to either the lead or trail surrogate unit + * for a supplementary code point, in which case the macro will read + * the adjacent matching surrogate as well. + * The result is undefined if the offset points to a single, unpaired surrogate. + * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. + * + * @param s const UChar * string + * @param i string offset + * @param c output UChar32 variable + * @see U16_GET + * @stable ICU 2.4 + */ +#define U16_GET_UNSAFE(s, i, c) { \ + (c)=(s)[i]; \ + if(U16_IS_SURROGATE(c)) { \ + if(U16_IS_SURROGATE_LEAD(c)) { \ + (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \ + } else { \ + (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \ + } \ + } \ +} + +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The offset may point to either the lead or trail surrogate unit + * for a supplementary code point, in which case the macro will read + * the adjacent matching surrogate as well. + * If the offset points to a single, unpaired surrogate, then that itself + * will be returned as the code point. + * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, start<=i<length + * @param length string length + * @param c output UChar32 variable + * @see U16_GET_UNSAFE + * @stable ICU 2.4 + */ +#define U16_GET(s, start, i, length, c) { \ + (c)=(s)[i]; \ + if(U16_IS_SURROGATE(c)) { \ + uint16_t __c2; \ + if(U16_IS_SURROGATE_LEAD(c)) { \ + if((i)+1<(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \ + (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ + } \ + } else { \ + if((i)-1>=(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ + (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ + } \ + } \ + } \ +} + +/* definitions with forward iteration --------------------------------------- */ + +/** + * Get a code point from a string at a code point boundary offset, + * and advance the offset to the next code point boundary. + * (Post-incrementing forward iteration.) + * "Unsafe" macro, assumes well-formed UTF-16. + * + * The offset may point to the lead surrogate unit + * for a supplementary code point, in which case the macro will read + * the following trail surrogate as well. + * If the offset points to a trail surrogate, then that itself + * will be returned as the code point. + * The result is undefined if the offset points to a single, unpaired lead surrogate. + * + * @param s const UChar * string + * @param i string offset + * @param c output UChar32 variable + * @see U16_NEXT + * @stable ICU 2.4 + */ +#define U16_NEXT_UNSAFE(s, i, c) { \ + (c)=(s)[(i)++]; \ + if(U16_IS_LEAD(c)) { \ + (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \ + } \ +} + +/** + * Get a code point from a string at a code point boundary offset, + * and advance the offset to the next code point boundary. + * (Post-incrementing forward iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The offset may point to the lead surrogate unit + * for a supplementary code point, in which case the macro will read + * the following trail surrogate as well. + * If the offset points to a trail surrogate or + * to a single, unpaired lead surrogate, then that itself + * will be returned as the code point. + * + * @param s const UChar * string + * @param i string offset, i<length + * @param length string length + * @param c output UChar32 variable + * @see U16_NEXT_UNSAFE + * @stable ICU 2.4 + */ +#define U16_NEXT(s, i, length, c) { \ + (c)=(s)[(i)++]; \ + if(U16_IS_LEAD(c)) { \ + uint16_t __c2; \ + if((i)<(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ + ++(i); \ + (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ + } \ + } \ +} + +/** + * Append a code point to a string, overwriting 1 or 2 code units. + * The offset points to the current end of the string contents + * and is advanced (post-increment). + * "Unsafe" macro, assumes a valid code point and sufficient space in the string. + * Otherwise, the result is undefined. + * + * @param s const UChar * string buffer + * @param i string offset + * @param c code point to append + * @see U16_APPEND + * @stable ICU 2.4 + */ +#define U16_APPEND_UNSAFE(s, i, c) { \ + if((uint32_t)(c)<=0xffff) { \ + (s)[(i)++]=(uint16_t)(c); \ + } else { \ + (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ + (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ + } \ +} + +/** + * Append a code point to a string, overwriting 1 or 2 code units. + * The offset points to the current end of the string contents + * and is advanced (post-increment). + * "Safe" macro, checks for a valid code point. + * If a surrogate pair is written, checks for sufficient space in the string. + * If the code point is not valid or a trail surrogate does not fit, + * then isError is set to TRUE. + * + * @param s const UChar * string buffer + * @param i string offset, i<length + * @param capacity size of the string buffer + * @param c code point to append + * @param isError output UBool set to TRUE if an error occurs, otherwise not modified + * @see U16_APPEND_UNSAFE + * @stable ICU 2.4 + */ +#define U16_APPEND(s, i, capacity, c, isError) { \ + if((uint32_t)(c)<=0xffff) { \ + (s)[(i)++]=(uint16_t)(c); \ + } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \ + (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ + (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ + } else /* c>0x10ffff or not enough space */ { \ + (isError)=TRUE; \ + } \ +} + +/** + * Advance the string offset from one code point boundary to the next. + * (Post-incrementing iteration.) + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @see U16_FWD_1 + * @stable ICU 2.4 + */ +#define U16_FWD_1_UNSAFE(s, i) { \ + if(U16_IS_LEAD((s)[(i)++])) { \ + ++(i); \ + } \ +} + +/** + * Advance the string offset from one code point boundary to the next. + * (Post-incrementing iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * @param s const UChar * string + * @param i string offset, i<length + * @param length string length + * @see U16_FWD_1_UNSAFE + * @stable ICU 2.4 + */ +#define U16_FWD_1(s, i, length) { \ + if(U16_IS_LEAD((s)[(i)++]) && (i)<(length) && U16_IS_TRAIL((s)[i])) { \ + ++(i); \ + } \ +} + +/** + * Advance the string offset from one code point boundary to the n-th next one, + * i.e., move forward by n code points. + * (Post-incrementing iteration.) + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @param n number of code points to skip + * @see U16_FWD_N + * @stable ICU 2.4 + */ +#define U16_FWD_N_UNSAFE(s, i, n) { \ + int32_t __N=(n); \ + while(__N>0) { \ + U16_FWD_1_UNSAFE(s, i); \ + --__N; \ + } \ +} + +/** + * Advance the string offset from one code point boundary to the n-th next one, + * i.e., move forward by n code points. + * (Post-incrementing iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * @param s const UChar * string + * @param i string offset, i<length + * @param length string length + * @param n number of code points to skip + * @see U16_FWD_N_UNSAFE + * @stable ICU 2.4 + */ +#define U16_FWD_N(s, i, length, n) { \ + int32_t __N=(n); \ + while(__N>0 && (i)<(length)) { \ + U16_FWD_1(s, i, length); \ + --__N; \ + } \ +} + +/** + * Adjust a random-access offset to a code point boundary + * at the start of a code point. + * If the offset points to the trail surrogate of a surrogate pair, + * then the offset is decremented. + * Otherwise, it is not modified. + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @see U16_SET_CP_START + * @stable ICU 2.4 + */ +#define U16_SET_CP_START_UNSAFE(s, i) { \ + if(U16_IS_TRAIL((s)[i])) { \ + --(i); \ + } \ +} + +/** + * Adjust a random-access offset to a code point boundary + * at the start of a code point. + * If the offset points to the trail surrogate of a surrogate pair, + * then the offset is decremented. + * Otherwise, it is not modified. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, start<=i + * @see U16_SET_CP_START_UNSAFE + * @stable ICU 2.4 + */ +#define U16_SET_CP_START(s, start, i) { \ + if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ + --(i); \ + } \ +} + +/* definitions with backward iteration -------------------------------------- */ + +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Unsafe" macro, assumes well-formed UTF-16. + * + * The input offset may be the same as the string length. + * If the offset is behind a trail surrogate unit + * for a supplementary code point, then the macro will read + * the preceding lead surrogate as well. + * If the offset is behind a lead surrogate, then that itself + * will be returned as the code point. + * The result is undefined if the offset is behind a single, unpaired trail surrogate. + * + * @param s const UChar * string + * @param i string offset + * @param c output UChar32 variable + * @see U16_PREV + * @stable ICU 2.4 + */ +#define U16_PREV_UNSAFE(s, i, c) { \ + (c)=(s)[--(i)]; \ + if(U16_IS_TRAIL(c)) { \ + (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \ + } \ +} + +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The input offset may be the same as the string length. + * If the offset is behind a trail surrogate unit + * for a supplementary code point, then the macro will read + * the preceding lead surrogate as well. + * If the offset is behind a lead surrogate or behind a single, unpaired + * trail surrogate, then that itself + * will be returned as the code point. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, start<=i + * @param c output UChar32 variable + * @see U16_PREV_UNSAFE + * @stable ICU 2.4 + */ +#define U16_PREV(s, start, i, c) { \ + (c)=(s)[--(i)]; \ + if(U16_IS_TRAIL(c)) { \ + uint16_t __c2; \ + if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ + --(i); \ + (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ + } \ + } \ +} + +/** + * Move the string offset from one code point boundary to the previous one. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @see U16_BACK_1 + * @stable ICU 2.4 + */ +#define U16_BACK_1_UNSAFE(s, i) { \ + if(U16_IS_TRAIL((s)[--(i)])) { \ + --(i); \ + } \ +} + +/** + * Move the string offset from one code point boundary to the previous one. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, start<=i + * @see U16_BACK_1_UNSAFE + * @stable ICU 2.4 + */ +#define U16_BACK_1(s, start, i) { \ + if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ + --(i); \ + } \ +} + +/** + * Move the string offset from one code point boundary to the n-th one before it, + * i.e., move backward by n code points. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @param n number of code points to skip + * @see U16_BACK_N + * @stable ICU 2.4 + */ +#define U16_BACK_N_UNSAFE(s, i, n) { \ + int32_t __N=(n); \ + while(__N>0) { \ + U16_BACK_1_UNSAFE(s, i); \ + --__N; \ + } \ +} + +/** + * Move the string offset from one code point boundary to the n-th one before it, + * i.e., move backward by n code points. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * @param s const UChar * string + * @param start start of string + * @param i string offset, i<length + * @param n number of code points to skip + * @see U16_BACK_N_UNSAFE + * @stable ICU 2.4 + */ +#define U16_BACK_N(s, start, i, n) { \ + int32_t __N=(n); \ + while(__N>0 && (i)>(start)) { \ + U16_BACK_1(s, start, i); \ + --__N; \ + } \ +} + +/** + * Adjust a random-access offset to a code point boundary after a code point. + * If the offset is behind the lead surrogate of a surrogate pair, + * then the offset is incremented. + * Otherwise, it is not modified. + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @see U16_SET_CP_LIMIT + * @stable ICU 2.4 + */ +#define U16_SET_CP_LIMIT_UNSAFE(s, i) { \ + if(U16_IS_LEAD((s)[(i)-1])) { \ + ++(i); \ + } \ +} + +/** + * Adjust a random-access offset to a code point boundary after a code point. + * If the offset is behind the lead surrogate of a surrogate pair, + * then the offset is incremented. + * Otherwise, it is not modified. + * The input offset may be the same as the string length. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, start<=i<=length + * @param length string length + * @see U16_SET_CP_LIMIT_UNSAFE + * @stable ICU 2.4 + */ +#define U16_SET_CP_LIMIT(s, start, i, length) { \ + if((start)<(i) && (i)<(length) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \ + ++(i); \ + } \ +} + +#endif diff --git a/JavaScriptGlue/icu/unicode/utf8.h b/JavaScriptGlue/icu/unicode/utf8.h new file mode 100644 index 0000000..f83662b --- /dev/null +++ b/JavaScriptGlue/icu/unicode/utf8.h @@ -0,0 +1,627 @@ +/* +******************************************************************************* +* +* Copyright (C) 1999-2004, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: utf8.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999sep13 +* created by: Markus W. Scherer +*/ + +/** + * \file + * \brief C API: 8-bit Unicode handling macros + * + * This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings. + * utf8.h is included by utf.h after unicode/umachine.h + * and some common definitions. + * + * For more information see utf.h and the ICU User Guide Strings chapter + * (http://oss.software.ibm.com/icu/userguide/). + * + * <em>Usage:</em> + * ICU coding guidelines for if() statements should be followed when using these macros. + * Compound statements (curly braces {}) must be used for if-else-while... + * bodies and all macro statements should be terminated with semicolon. + */ + +#ifndef __UTF8_H__ +#define __UTF8_H__ + +/* utf.h must be included first. */ +#ifndef __UTF_H__ +# include "unicode/utf.h" +#endif + +/* internal definitions ----------------------------------------------------- */ + +/** + * \var utf8_countTrailBytes + * Internal array with numbers of trail bytes for any given byte used in + * lead byte position. + * @internal + */ +#ifdef U_UTF8_IMPL +U_INTERNAL const uint8_t +#elif defined(U_STATIC_IMPLEMENTATION) +U_CFUNC const uint8_t +#else +U_CFUNC U_IMPORT const uint8_t /* U_IMPORT2? */ /*U_IMPORT*/ +#endif +utf8_countTrailBytes[256]; + +/** + * Count the trail bytes for a UTF-8 lead byte. + * @internal + */ +#define U8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte]) + +/** + * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value. + * @internal + */ +#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1) + +/** + * Function for handling "next code point" with error-checking. + * @internal + */ +U_INTERNAL UChar32 U_EXPORT2 +utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict); + +/** + * Function for handling "append code point" with error-checking. + * @internal + */ +U_INTERNAL int32_t U_EXPORT2 +utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError); + +/** + * Function for handling "previous code point" with error-checking. + * @internal + */ +U_INTERNAL UChar32 U_EXPORT2 +utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict); + +/** + * Function for handling "skip backward one code point" with error-checking. + * @internal + */ +U_INTERNAL int32_t U_EXPORT2 +utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); + +/* single-code point definitions -------------------------------------------- */ + +/** + * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)? + * @param c 8-bit code unit (byte) + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U8_IS_SINGLE(c) (((c)&0x80)==0) + +/** + * Is this code unit (byte) a UTF-8 lead byte? + * @param c 8-bit code unit (byte) + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc0)<0x3e) + +/** + * Is this code unit (byte) a UTF-8 trail byte? + * @param c 8-bit code unit (byte) + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U8_IS_TRAIL(c) (((c)&0xc0)==0x80) + +/** + * How many code units (bytes) are used for the UTF-8 encoding + * of this Unicode code point? + * @param c 32-bit code point + * @return 1..4, or 0 if c is a surrogate or not a Unicode code point + * @stable ICU 2.4 + */ +#define U8_LENGTH(c) \ + ((uint32_t)(c)<=0x7f ? 1 : \ + ((uint32_t)(c)<=0x7ff ? 2 : \ + ((uint32_t)(c)<=0xd7ff ? 3 : \ + ((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \ + ((uint32_t)(c)<=0xffff ? 3 : 4)\ + ) \ + ) \ + ) \ + ) + +/** + * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff). + * @return 4 + * @stable ICU 2.4 + */ +#define U8_MAX_LENGTH 4 + +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * The offset may point to either the lead byte or one of the trail bytes + * for a code point, in which case the macro will read all of the bytes + * for the code point. + * The result is undefined if the offset points to an illegal UTF-8 + * byte sequence. + * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT. + * + * @param s const uint8_t * string + * @param i string offset + * @param c output UChar32 variable + * @see U8_GET + * @stable ICU 2.4 + */ +#define U8_GET_UNSAFE(s, i, c) { \ + int32_t _u8_get_unsafe_index=(int32_t)(i); \ + U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \ + U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \ +} + +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * The offset may point to either the lead byte or one of the trail bytes + * for a code point, in which case the macro will read all of the bytes + * for the code point. + * If the offset points to an illegal UTF-8 byte sequence, then + * c is set to a negative value. + * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT. + * + * @param s const uint8_t * string + * @param start starting string offset + * @param i string offset, start<=i<length + * @param length string length + * @param c output UChar32 variable, set to <0 in case of an error + * @see U8_GET_UNSAFE + * @stable ICU 2.4 + */ +#define U8_GET(s, start, i, length, c) { \ + int32_t _u8_get_index=(int32_t)(i); \ + U8_SET_CP_START(s, start, _u8_get_index); \ + U8_NEXT(s, _u8_get_index, length, c); \ +} + +/* definitions with forward iteration --------------------------------------- */ + +/** + * Get a code point from a string at a code point boundary offset, + * and advance the offset to the next code point boundary. + * (Post-incrementing forward iteration.) + * "Unsafe" macro, assumes well-formed UTF-8. + * + * The offset may point to the lead byte of a multi-byte sequence, + * in which case the macro will read the whole sequence. + * The result is undefined if the offset points to a trail byte + * or an illegal UTF-8 sequence. + * + * @param s const uint8_t * string + * @param i string offset + * @param c output UChar32 variable + * @see U8_NEXT + * @stable ICU 2.4 + */ +#define U8_NEXT_UNSAFE(s, i, c) { \ + (c)=(s)[(i)++]; \ + if((uint8_t)((c)-0xc0)<0x35) { \ + uint8_t __count=U8_COUNT_TRAIL_BYTES(c); \ + U8_MASK_LEAD_BYTE(c, __count); \ + switch(__count) { \ + /* each following branch falls through to the next one */ \ + case 3: \ + (c)=((c)<<6)|((s)[(i)++]&0x3f); \ + case 2: \ + (c)=((c)<<6)|((s)[(i)++]&0x3f); \ + case 1: \ + (c)=((c)<<6)|((s)[(i)++]&0x3f); \ + /* no other branches to optimize switch() */ \ + break; \ + } \ + } \ +} + +/** + * Get a code point from a string at a code point boundary offset, + * and advance the offset to the next code point boundary. + * (Post-incrementing forward iteration.) + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The offset may point to the lead byte of a multi-byte sequence, + * in which case the macro will read the whole sequence. + * If the offset points to a trail byte or an illegal UTF-8 sequence, then + * c is set to a negative value. + * + * @param s const uint8_t * string + * @param i string offset, i<length + * @param length string length + * @param c output UChar32 variable, set to <0 in case of an error + * @see U8_NEXT_UNSAFE + * @stable ICU 2.4 + */ +#define U8_NEXT(s, i, length, c) { \ + (c)=(s)[(i)++]; \ + if(((uint8_t)(c))>=0x80) { \ + if(U8_IS_LEAD(c)) { \ + (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (int32_t)(length), c, -1); \ + } else { \ + (c)=U_SENTINEL; \ + } \ + } \ +} + +/** + * Append a code point to a string, overwriting 1 to 4 bytes. + * The offset points to the current end of the string contents + * and is advanced (post-increment). + * "Unsafe" macro, assumes a valid code point and sufficient space in the string. + * Otherwise, the result is undefined. + * + * @param s const uint8_t * string buffer + * @param i string offset + * @param c code point to append + * @see U8_APPEND + * @stable ICU 2.4 + */ +#define U8_APPEND_UNSAFE(s, i, c) { \ + if((uint32_t)(c)<=0x7f) { \ + (s)[(i)++]=(uint8_t)(c); \ + } else { \ + if((uint32_t)(c)<=0x7ff) { \ + (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ + } else { \ + if((uint32_t)(c)<=0xffff) { \ + (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ + } else { \ + (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \ + (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \ + } \ + (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ + } \ + (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ + } \ +} + +/** + * Append a code point to a string, overwriting 1 or 2 code units. + * The offset points to the current end of the string contents + * and is advanced (post-increment). + * "Safe" macro, checks for a valid code point. + * If a non-ASCII code point is written, checks for sufficient space in the string. + * If the code point is not valid or trail bytes do not fit, + * then isError is set to TRUE. + * + * @param s const uint8_t * string buffer + * @param i string offset, i<length + * @param length size of the string buffer + * @param c code point to append + * @param isError output UBool set to TRUE if an error occurs, otherwise not modified + * @see U8_APPEND_UNSAFE + * @stable ICU 2.4 + */ +#define U8_APPEND(s, i, length, c, isError) { \ + if((uint32_t)(c)<=0x7f) { \ + (s)[(i)++]=(uint8_t)(c); \ + } else { \ + (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, &(isError)); \ + } \ +} + +/** + * Advance the string offset from one code point boundary to the next. + * (Post-incrementing iteration.) + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @see U8_FWD_1 + * @stable ICU 2.4 + */ +#define U8_FWD_1_UNSAFE(s, i) { \ + (i)+=1+U8_COUNT_TRAIL_BYTES((s)[i]); \ +} + +/** + * Advance the string offset from one code point boundary to the next. + * (Post-incrementing iteration.) + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * @param s const uint8_t * string + * @param i string offset, i<length + * @param length string length + * @see U8_FWD_1_UNSAFE + * @stable ICU 2.4 + */ +#define U8_FWD_1(s, i, length) { \ + uint8_t __b=(s)[(i)++]; \ + if(U8_IS_LEAD(__b)) { \ + uint8_t __count=U8_COUNT_TRAIL_BYTES(__b); \ + if((i)+__count>(length)) { \ + __count=(uint8_t)((length)-(i)); \ + } \ + while(__count>0 && U8_IS_TRAIL((s)[i])) { \ + ++(i); \ + --__count; \ + } \ + } \ +} + +/** + * Advance the string offset from one code point boundary to the n-th next one, + * i.e., move forward by n code points. + * (Post-incrementing iteration.) + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @param n number of code points to skip + * @see U8_FWD_N + * @stable ICU 2.4 + */ +#define U8_FWD_N_UNSAFE(s, i, n) { \ + int32_t __N=(n); \ + while(__N>0) { \ + U8_FWD_1_UNSAFE(s, i); \ + --__N; \ + } \ +} + +/** + * Advance the string offset from one code point boundary to the n-th next one, + * i.e., move forward by n code points. + * (Post-incrementing iteration.) + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * @param s const uint8_t * string + * @param i string offset, i<length + * @param length string length + * @param n number of code points to skip + * @see U8_FWD_N_UNSAFE + * @stable ICU 2.4 + */ +#define U8_FWD_N(s, i, length, n) { \ + int32_t __N=(n); \ + while(__N>0 && (i)<(length)) { \ + U8_FWD_1(s, i, length); \ + --__N; \ + } \ +} + +/** + * Adjust a random-access offset to a code point boundary + * at the start of a code point. + * If the offset points to a UTF-8 trail byte, + * then the offset is moved backward to the corresponding lead byte. + * Otherwise, it is not modified. + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @see U8_SET_CP_START + * @stable ICU 2.4 + */ +#define U8_SET_CP_START_UNSAFE(s, i) { \ + while(U8_IS_TRAIL((s)[i])) { --(i); } \ +} + +/** + * Adjust a random-access offset to a code point boundary + * at the start of a code point. + * If the offset points to a UTF-8 trail byte, + * then the offset is moved backward to the corresponding lead byte. + * Otherwise, it is not modified. + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * @param s const uint8_t * string + * @param start starting string offset (usually 0) + * @param i string offset, start<=i + * @see U8_SET_CP_START_UNSAFE + * @stable ICU 2.4 + */ +#define U8_SET_CP_START(s, start, i) { \ + if(U8_IS_TRAIL((s)[(i)])) { \ + (i)=utf8_back1SafeBody(s, start, (int32_t)(i)); \ + } \ +} + +/* definitions with backward iteration -------------------------------------- */ + +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Unsafe" macro, assumes well-formed UTF-8. + * + * The input offset may be the same as the string length. + * If the offset is behind a multi-byte sequence, then the macro will read + * the whole sequence. + * If the offset is behind a lead byte, then that itself + * will be returned as the code point. + * The result is undefined if the offset is behind an illegal UTF-8 sequence. + * + * @param s const uint8_t * string + * @param i string offset + * @param c output UChar32 variable + * @see U8_PREV + * @stable ICU 2.4 + */ +#define U8_PREV_UNSAFE(s, i, c) { \ + (c)=(s)[--(i)]; \ + if(U8_IS_TRAIL(c)) { \ + uint8_t __b, __count=1, __shift=6; \ +\ + /* c is a trail byte */ \ + (c)&=0x3f; \ + for(;;) { \ + __b=(s)[--(i)]; \ + if(__b>=0xc0) { \ + U8_MASK_LEAD_BYTE(__b, __count); \ + (c)|=(UChar32)__b<<__shift; \ + break; \ + } else { \ + (c)|=(UChar32)(__b&0x3f)<<__shift; \ + ++__count; \ + __shift+=6; \ + } \ + } \ + } \ +} + +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The input offset may be the same as the string length. + * If the offset is behind a multi-byte sequence, then the macro will read + * the whole sequence. + * If the offset is behind a lead byte, then that itself + * will be returned as the code point. + * If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value. + * + * @param s const uint8_t * string + * @param start starting string offset (usually 0) + * @param i string offset, start<=i + * @param c output UChar32 variable, set to <0 in case of an error + * @see U8_PREV_UNSAFE + * @stable ICU 2.4 + */ +#define U8_PREV(s, start, i, c) { \ + (c)=(s)[--(i)]; \ + if((c)>=0x80) { \ + if((c)<=0xbf) { \ + (c)=utf8_prevCharSafeBody(s, start, &(i), c, -1); \ + } else { \ + (c)=U_SENTINEL; \ + } \ + } \ +} + +/** + * Move the string offset from one code point boundary to the previous one. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @see U8_BACK_1 + * @stable ICU 2.4 + */ +#define U8_BACK_1_UNSAFE(s, i) { \ + while(U8_IS_TRAIL((s)[--(i)])) {} \ +} + +/** + * Move the string offset from one code point boundary to the previous one. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * @param s const uint8_t * string + * @param start starting string offset (usually 0) + * @param i string offset, start<=i + * @see U8_BACK_1_UNSAFE + * @stable ICU 2.4 + */ +#define U8_BACK_1(s, start, i) { \ + if(U8_IS_TRAIL((s)[--(i)])) { \ + (i)=utf8_back1SafeBody(s, start, (int32_t)(i)); \ + } \ +} + +/** + * Move the string offset from one code point boundary to the n-th one before it, + * i.e., move backward by n code points. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @param n number of code points to skip + * @see U8_BACK_N + * @stable ICU 2.4 + */ +#define U8_BACK_N_UNSAFE(s, i, n) { \ + int32_t __N=(n); \ + while(__N>0) { \ + U8_BACK_1_UNSAFE(s, i); \ + --__N; \ + } \ +} + +/** + * Move the string offset from one code point boundary to the n-th one before it, + * i.e., move backward by n code points. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * @param s const uint8_t * string + * @param start index of the start of the string + * @param i string offset, i<length + * @param n number of code points to skip + * @see U8_BACK_N_UNSAFE + * @stable ICU 2.4 + */ +#define U8_BACK_N(s, start, i, n) { \ + int32_t __N=(n); \ + while(__N>0 && (i)>(start)) { \ + U8_BACK_1(s, start, i); \ + --__N; \ + } \ +} + +/** + * Adjust a random-access offset to a code point boundary after a code point. + * If the offset is behind a partial multi-byte sequence, + * then the offset is incremented to behind the whole sequence. + * Otherwise, it is not modified. + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @see U8_SET_CP_LIMIT + * @stable ICU 2.4 + */ +#define U8_SET_CP_LIMIT_UNSAFE(s, i) { \ + U8_BACK_1_UNSAFE(s, i); \ + U8_FWD_1_UNSAFE(s, i); \ +} + +/** + * Adjust a random-access offset to a code point boundary after a code point. + * If the offset is behind a partial multi-byte sequence, + * then the offset is incremented to behind the whole sequence. + * Otherwise, it is not modified. + * The input offset may be the same as the string length. + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * @param s const uint8_t * string + * @param start starting string offset (usually 0) + * @param i string offset, start<=i<=length + * @param length string length + * @see U8_SET_CP_LIMIT_UNSAFE + * @stable ICU 2.4 + */ +#define U8_SET_CP_LIMIT(s, start, i, length) { \ + if((start)<(i) && (i)<(length)) { \ + U8_BACK_1(s, start, i); \ + U8_FWD_1(s, i, length); \ + } \ +} + +#endif diff --git a/JavaScriptGlue/icu/unicode/utf_old.h b/JavaScriptGlue/icu/unicode/utf_old.h new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/JavaScriptGlue/icu/unicode/utf_old.h diff --git a/JavaScriptGlue/icu/unicode/utypes.h b/JavaScriptGlue/icu/unicode/utypes.h new file mode 100644 index 0000000..e20cd79 --- /dev/null +++ b/JavaScriptGlue/icu/unicode/utypes.h @@ -0,0 +1,745 @@ +/* +********************************************************************** +* Copyright (C) 1996-2004, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* FILE NAME : UTYPES.H (formerly ptypes.h) +* +* Date Name Description +* 12/11/96 helena Creation. +* 02/27/97 aliu Added typedefs for UClassID, int8, int16, int32, +* uint8, uint16, and uint32. +* 04/01/97 aliu Added XP_CPLUSPLUS and modified to work under C as +* well as C++. +* Modified to use memcpy() for uprv_arrayCopy() fns. +* 04/14/97 aliu Added TPlatformUtilities. +* 05/07/97 aliu Added import/export specifiers (replacing the old +* broken EXT_CLASS). Added version number for our +* code. Cleaned up header. +* 6/20/97 helena Java class name change. +* 08/11/98 stephen UErrorCode changed from typedef to enum +* 08/12/98 erm Changed T_ANALYTIC_PACKAGE_VERSION to 3 +* 08/14/98 stephen Added uprv_arrayCopy() for int8_t, int16_t, int32_t +* 12/09/98 jfitz Added BUFFER_OVERFLOW_ERROR (bug 1100066) +* 04/20/99 stephen Cleaned up & reworked for autoconf. +* Renamed to utypes.h. +* 05/05/99 stephen Changed to use <inttypes.h> +* 12/07/99 helena Moved copyright notice string from ucnv_bld.h here. +******************************************************************************* +*/ + +#ifndef UTYPES_H +#define UTYPES_H + + +#include "unicode/umachine.h" +#include "unicode/utf.h" +#include "unicode/uversion.h" +#include "unicode/uconfig.h" + +#ifdef U_HIDE_DRAFT_API +#include "unicode/udraft.h" +#endif + +#ifdef U_HIDE_DEPRECATED_API +#include "unicode/udeprctd.h" +#endif + +#ifdef U_HIDE_DEPRECATED_API +#include "unicode/uobslete.h" +#endif + + +/*! + * \file + * \brief Basic definitions for ICU, for both C and C++ APIs + * + * This file defines basic types, constants, and enumerations directly or + * indirectly by including other header files, especially utf.h for the + * basic character and string definitions and umachine.h for consistent + * integer and other types. + */ + +/*===========================================================================*/ +/* char Character set family */ +/*===========================================================================*/ + +/** + * U_CHARSET_FAMILY is equal to this value when the platform is an ASCII based platform. + * @stable ICU 2.0 + */ +#define U_ASCII_FAMILY 0 + +/** + * U_CHARSET_FAMILY is equal to this value when the platform is an EBCDIC based platform. + * @stable ICU 2.0 + */ +#define U_EBCDIC_FAMILY 1 + +/** + * \def U_CHARSET_FAMILY + * + * <p>These definitions allow to specify the encoding of text + * in the char data type as defined by the platform and the compiler. + * It is enough to determine the code point values of "invariant characters", + * which are the ones shared by all encodings that are in use + * on a given platform.</p> + * + * <p>Those "invariant characters" should be all the uppercase and lowercase + * latin letters, the digits, the space, and "basic punctuation". + * Also, '\\n', '\\r', '\\t' should be available.</p> + * + * <p>The list of "invariant characters" is:<br> + * \code + * A-Z a-z 0-9 SPACE " % & ' ( ) * + , - . / : ; < = > ? _ + * \endcode + * <br> + * (52 letters + 10 numbers + 20 punc/sym/space = 82 total)</p> + * + * <p>This matches the IBM Syntactic Character Set (CS 640).</p> + * + * <p>In other words, all the graphic characters in 7-bit ASCII should + * be safely accessible except the following:</p> + * + * \code + * '\' <backslash> + * '[' <left bracket> + * ']' <right bracket> + * '{' <left brace> + * '}' <right brace> + * '^' <circumflex> + * '~' <tilde> + * '!' <exclamation mark> + * '#' <number sign> + * '|' <vertical line> + * '$' <dollar sign> + * '@' <commercial at> + * '`' <grave accent> + * \endcode + * @stable ICU 2.0 + */ + +#ifndef U_CHARSET_FAMILY +# define U_CHARSET_FAMILY 0 +#endif + +/*===========================================================================*/ +/* ICUDATA naming scheme */ +/*===========================================================================*/ + +/** + * \def U_ICUDATA_TYPE_LETTER + * + * This is a platform-dependent string containing one letter: + * - b for big-endian, ASCII-family platforms + * - l for little-endian, ASCII-family platforms + * - e for big-endian, EBCDIC-family platforms + * This letter is part of the common data file name. + * @stable ICU 2.0 + */ + +/** + * \def U_ICUDATA_TYPE_LITLETTER + * The non-string form of U_ICUDATA_TYPE_LETTER + * @stable ICU 2.0 + */ +#if U_CHARSET_FAMILY +# if U_IS_BIG_ENDIAN + /* EBCDIC - should always be BE */ +# define U_ICUDATA_TYPE_LETTER "e" +# define U_ICUDATA_TYPE_LITLETTER e +# else +# error "Don't know what to do with little endian EBCDIC!" +# define U_ICUDATA_TYPE_LETTER "x" +# define U_ICUDATA_TYPE_LITLETTER x +# endif +#else +# if U_IS_BIG_ENDIAN + /* Big-endian ASCII */ +# define U_ICUDATA_TYPE_LETTER "b" +# define U_ICUDATA_TYPE_LITLETTER b +# else + /* Little-endian ASCII */ +# define U_ICUDATA_TYPE_LETTER "l" +# define U_ICUDATA_TYPE_LITLETTER l +# endif +#endif + +/** + * A single string literal containing the icudata stub name. i.e. 'icudt18e' for + * ICU 1.8.x on EBCDIC, etc.. + * @stable ICU 2.0 + */ +#define U_ICUDATA_NAME "icudt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER + + +/** + * U_ICU_ENTRY_POINT is the name of the DLL entry point to the ICU data library. + * Defined as a literal, not a string. + * Tricky Preprocessor use - ## operator replaces macro paramters with the literal string + * from the corresponding macro invocation, _before_ other macro substitutions. + * Need a nested #defines to get the actual version numbers rather than + * the literal text U_ICU_VERSION_MAJOR_NUM into the name. + * The net result will be something of the form + * #define U_ICU_ENTRY_POINT icudt19_dat + * @stable ICU 2.4 + */ +#define U_ICUDATA_ENTRY_POINT U_DEF2_ICUDATA_ENTRY_POINT(U_ICU_VERSION_MAJOR_NUM, U_ICU_VERSION_MINOR_NUM) +/** + * @internal + */ +#define U_DEF2_ICUDATA_ENTRY_POINT(major, minor) U_DEF_ICUDATA_ENTRY_POINT(major, minor) +/** + * @internal + */ +#define U_DEF_ICUDATA_ENTRY_POINT(major, minor) icudt##major##minor##_dat + +/** + * \def U_CALLCONV + * Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary + * in callback function typedefs to make sure that the calling convention + * is compatible. + * + * This is only used for non-ICU-API functions. + * When a function is a public ICU API, + * you must use the U_CAPI and U_EXPORT2 qualifiers. + * @stable ICU 2.0 + */ +#if defined(OS390) && (__COMPILER_VER__ < 0x41020000) && defined(XP_CPLUSPLUS) +# define U_CALLCONV __cdecl +#else +# define U_CALLCONV U_EXPORT2 +#endif + +/** + * \def NULL + * Define NULL if necessary, to 0 for C++ and to ((void *)0) for C. + * @stable ICU 2.0 + */ +#ifndef NULL +#ifdef XP_CPLUSPLUS +#define NULL 0 +#else +#define NULL ((void *)0) +#endif +#endif + +/*===========================================================================*/ +/* Calendar/TimeZone data types */ +/*===========================================================================*/ + +/** + * Date and Time data type. + * This is a primitive data type that holds the date and time + * as the number of milliseconds since 1970-jan-01, 00:00 UTC. + * UTC leap seconds are ignored. + * @stable ICU 2.0 + */ +typedef double UDate; + +/** The number of milliseconds per second @stable ICU 2.0 */ +#define U_MILLIS_PER_SECOND (1000) +/** The number of milliseconds per minute @stable ICU 2.0 */ +#define U_MILLIS_PER_MINUTE (60000) +/** The number of milliseconds per hour @stable ICU 2.0 */ +#define U_MILLIS_PER_HOUR (3600000) +/** The number of milliseconds per day @stable ICU 2.0 */ +#define U_MILLIS_PER_DAY (86400000) + + +/*===========================================================================*/ +/* UClassID-based RTTI */ +/*===========================================================================*/ + +/** + * UClassID is used to identify classes without using RTTI, since RTTI + * is not yet supported by all C++ compilers. Each class hierarchy which needs + * to implement polymorphic clone() or operator==() defines two methods, + * described in detail below. UClassID values can be compared using + * operator==(). Nothing else should be done with them. + * + * \par + * getDynamicClassID() is declared in the base class of the hierarchy as + * a pure virtual. Each concrete subclass implements it in the same way: + * + * \code + * class Base { + * public: + * virtual UClassID getDynamicClassID() const = 0; + * } + * + * class Derived { + * public: + * virtual UClassID getDynamicClassID() const + * { return Derived::getStaticClassID(); } + * } + * \endcode + * + * Each concrete class implements getStaticClassID() as well, which allows + * clients to test for a specific type. + * + * \code + * class Derived { + * public: + * static UClassID U_EXPORT2 getStaticClassID(); + * private: + * static char fgClassID; + * } + * + * // In Derived.cpp: + * UClassID Derived::getStaticClassID() + * { return (UClassID)&Derived::fgClassID; } + * char Derived::fgClassID = 0; // Value is irrelevant + * \endcode + * @stable ICU 2.0 + */ +typedef void* UClassID; + +/*===========================================================================*/ +/* Shared library/DLL import-export API control */ +/*===========================================================================*/ + +/* + * Control of symbol import/export. + * ICU is separated into three libraries. + */ + +/* + * \def U_COMBINED_IMPLEMENTATION + * Set to export library symbols from inside the ICU library + * when all of ICU is in a single library. + * This can be set as a compiler option while building ICU, and it + * needs to be the first one tested to override U_COMMON_API, U_I18N_API, etc. + * @stable ICU 2.0 + */ + +/** + * \def U_DATA_API + * Set to export library symbols from inside the stubdata library, + * and to import them from outside. + * @draft ICU 3.0 + */ + +/** + * \def U_COMMON_API + * Set to export library symbols from inside the common library, + * and to import them from outside. + * @stable ICU 2.0 + */ + +/** + * \def U_I18N_API + * Set to export library symbols from inside the i18n library, + * and to import them from outside. + * @stable ICU 2.0 + */ + +/** + * \def U_LAYOUT_API + * Set to export library symbols from inside the layout engine library, + * and to import them from outside. + * @stable ICU 2.0 + */ + +/** + * \def U_LAYOUTEX_API + * Set to export library symbols from inside the layout extensions library, + * and to import them from outside. + * @stable ICU 2.6 + */ + +/** + * \def U_IO_API + * Set to export library symbols from inside the ustdio library, + * and to import them from outside. + * @stable ICU 2.0 + */ + +#if defined(U_COMBINED_IMPLEMENTATION) +#define U_DATA_API U_EXPORT +#define U_COMMON_API U_EXPORT +#define U_I18N_API U_EXPORT +#define U_LAYOUT_API U_EXPORT +#define U_LAYOUTEX_API U_EXPORT +#define U_IO_API U_EXPORT +#elif defined(U_STATIC_IMPLEMENTATION) +#define U_DATA_API +#define U_COMMON_API +#define U_I18N_API +#define U_LAYOUT_API +#define U_LAYOUTEX_API +#define U_IO_API +#elif defined(U_COMMON_IMPLEMENTATION) +#define U_DATA_API U_IMPORT +#define U_COMMON_API U_EXPORT +#define U_I18N_API U_IMPORT +#define U_LAYOUT_API U_IMPORT +#define U_LAYOUTEX_API U_IMPORT +#define U_IO_API U_IMPORT +#elif defined(U_I18N_IMPLEMENTATION) +#define U_DATA_API U_IMPORT +#define U_COMMON_API U_IMPORT +#define U_I18N_API U_EXPORT +#define U_LAYOUT_API U_IMPORT +#define U_LAYOUTEX_API U_IMPORT +#define U_IO_API U_IMPORT +#elif defined(U_LAYOUT_IMPLEMENTATION) +#define U_DATA_API U_IMPORT +#define U_COMMON_API U_IMPORT +#define U_I18N_API U_IMPORT +#define U_LAYOUT_API U_EXPORT +#define U_LAYOUTEX_API U_IMPORT +#define U_IO_API U_IMPORT +#elif defined(U_LAYOUTEX_IMPLEMENTATION) +#define U_DATA_API U_IMPORT +#define U_COMMON_API U_IMPORT +#define U_I18N_API U_IMPORT +#define U_LAYOUT_API U_IMPORT +#define U_LAYOUTEX_API U_EXPORT +#define U_IO_API U_IMPORT +#elif defined(U_IO_IMPLEMENTATION) +#define U_DATA_API U_IMPORT +#define U_COMMON_API U_IMPORT +#define U_I18N_API U_IMPORT +#define U_LAYOUT_API U_IMPORT +#define U_LAYOUTEX_API U_IMPORT +#define U_IO_API U_EXPORT +#else +#define U_DATA_API U_IMPORT +#define U_COMMON_API U_IMPORT +#define U_I18N_API U_IMPORT +#define U_LAYOUT_API U_IMPORT +#define U_LAYOUTEX_API U_IMPORT +#define U_IO_API U_IMPORT +#endif + +/** + * \def U_STANDARD_CPP_NAMESPACE + * Control of C++ Namespace + * @stable ICU 2.0 + */ +#ifdef __cplusplus +#define U_STANDARD_CPP_NAMESPACE :: +#else +#define U_STANDARD_CPP_NAMESPACE +#endif + + +/*===========================================================================*/ +/* Global delete operator */ +/*===========================================================================*/ + +/* + * The ICU4C library must not use the global new and delete operators. + * These operators here are defined to enable testing for this. + * See Jitterbug 2581 for details of why this is necessary. + * + * Verification that ICU4C's memory usage is correct, i.e., + * that global new/delete are not used: + * + * a) Check for imports of global new/delete (see uobject.cpp for details) + * b) Verify that new is never imported. + * c) Verify that delete is only imported from object code for interface/mixin classes. + * d) Add global delete and delete[] only for the ICU4C library itself + * and define them in a way that crashes or otherwise easily shows a problem. + * + * The following implements d). + * The operator implementations crash; this is intentional and used for library debugging. + * + * Note: This is currently only done on Windows because + * some Linux/Unix compilers have problems with defining global new/delete. + * On Windows, WIN32 is defined, and it is _MSC_Ver>=1200 for MSVC 6.0 and higher. + */ +#if defined(XP_CPLUSPLUS) && defined(WIN32) && (_MSC_Ver>=1200) && (defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_USTDIO_IMPLEMENTATION)) + +/** + * Global operator new, defined only inside ICU4C, must not be used. + * Crashes intentionally. + * @internal + */ +inline void * +operator new(size_t /*size*/) { + char *q=NULL; + *q=5; /* break it */ + return q; +} + +/** + * Global operator new[], defined only inside ICU4C, must not be used. + * Crashes intentionally. + * @internal + */ +inline void * +operator new[](size_t /*size*/) { + char *q=NULL; + *q=5; /* break it */ + return q; +} + +/** + * Global operator delete, defined only inside ICU4C, must not be used. + * Crashes intentionally. + * @internal + */ +inline void +operator delete(void * /*p*/) { + char *q=NULL; + *q=5; /* break it */ +} + +/** + * Global operator delete[], defined only inside ICU4C, must not be used. + * Crashes intentionally. + * @internal + */ +inline void +operator delete[](void * /*p*/) { + char *q=NULL; + *q=5; /* break it */ +} + +#endif + +/*===========================================================================*/ +/* UErrorCode */ +/*===========================================================================*/ + +/** + * Error code to replace exception handling, so that the code is compatible with all C++ compilers, + * and to use the same mechanism for C and C++. + * + * \par + * ICU functions that take a reference (C++) or a pointer (C) to a UErrorCode + * first test if(U_FAILURE(errorCode)) { return immediately; } + * so that in a chain of such functions the first one that sets an error code + * causes the following ones to not perform any operations. + * + * \par + * Error codes should be tested using U_FAILURE() and U_SUCCESS(). + * @stable ICU 2.0 + */ +typedef enum UErrorCode { + /* The ordering of U_ERROR_INFO_START Vs U_USING_FALLBACK_WARNING looks weird + * and is that way because VC++ debugger displays first encountered constant, + * which is not the what the code is used for + */ + + U_USING_FALLBACK_WARNING = -128, /**< A resource bundle lookup returned a fallback result (not an error) */ + + U_ERROR_WARNING_START = -128, /**< Start of information results (semantically successful) */ + + U_USING_DEFAULT_WARNING = -127, /**< A resource bundle lookup returned a result from the root locale (not an error) */ + + U_SAFECLONE_ALLOCATED_WARNING = -126, /**< A SafeClone operation required allocating memory (informational only) */ + + U_STATE_OLD_WARNING = -125, /**< ICU has to use compatibility layer to construct the service. Expect performance/memory usage degradation. Consider upgrading */ + + U_STRING_NOT_TERMINATED_WARNING = -124,/**< An output string could not be NUL-terminated because output length==destCapacity. */ + + U_SORT_KEY_TOO_SHORT_WARNING = -123, /**< Number of levels requested in getBound is higher than the number of levels in the sort key */ + + U_AMBIGUOUS_ALIAS_WARNING = -122, /**< This converter alias can go to different converter implementations */ + + U_DIFFERENT_UCA_VERSION = -121, /**< ucol_open encountered a mismatch between UCA version and collator image version, so the collator was constructed from rules. No impact to further function */ + + U_ERROR_WARNING_LIMIT, /**< This must always be the last warning value to indicate the limit for UErrorCode warnings (last warning code +1) */ + + + U_ZERO_ERROR = 0, /**< No error, no warning. */ + + U_ILLEGAL_ARGUMENT_ERROR = 1, /**< Start of codes indicating failure */ + U_MISSING_RESOURCE_ERROR = 2, /**< The requested resource cannot be found */ + U_INVALID_FORMAT_ERROR = 3, /**< Data format is not what is expected */ + U_FILE_ACCESS_ERROR = 4, /**< The requested file cannot be found */ + U_INTERNAL_PROGRAM_ERROR = 5, /**< Indicates a bug in the library code */ + U_MESSAGE_PARSE_ERROR = 6, /**< Unable to parse a message (message format) */ + U_MEMORY_ALLOCATION_ERROR = 7, /**< Memory allocation error */ + U_INDEX_OUTOFBOUNDS_ERROR = 8, /**< Trying to access the index that is out of bounds */ + U_PARSE_ERROR = 9, /**< Equivalent to Java ParseException */ + U_INVALID_CHAR_FOUND = 10, /**< Character conversion: Unmappable input sequence. In other APIs: Invalid character. */ + U_TRUNCATED_CHAR_FOUND = 11, /**< Character conversion: Incomplete input sequence. */ + U_ILLEGAL_CHAR_FOUND = 12, /**< Character conversion: Illegal input sequence/combination of input units.. */ + U_INVALID_TABLE_FORMAT = 13, /**< Conversion table file found, but corrupted */ + U_INVALID_TABLE_FILE = 14, /**< Conversion table file not found */ + U_BUFFER_OVERFLOW_ERROR = 15, /**< A result would not fit in the supplied buffer */ + U_UNSUPPORTED_ERROR = 16, /**< Requested operation not supported in current context */ + U_RESOURCE_TYPE_MISMATCH = 17, /**< an operation is requested over a resource that does not support it */ + U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illlegal escape sequence */ + U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, /**< ISO-2022 unsupported escape sequence */ + U_NO_SPACE_AVAILABLE = 20, /**< No space available for in-buffer expansion for Arabic shaping */ + U_CE_NOT_FOUND_ERROR = 21, /**< Currently used only while setting variable top, but can be used generally */ + U_PRIMARY_TOO_LONG_ERROR = 22, /**< User tried to set variable top to a primary that is longer than two bytes */ + U_STATE_TOO_OLD_ERROR = 23, /**< ICU cannot construct a service from this state, as it is no longer supported */ + U_TOO_MANY_ALIASES_ERROR = 24, /**< There are too many aliases in the path to the requested resource. + It is very possible that a circular alias definition has occured */ + U_ENUM_OUT_OF_SYNC_ERROR = 25, /**< UEnumeration out of sync with underlying collection */ + U_INVARIANT_CONVERSION_ERROR = 26, /**< Unable to convert a UChar* string to char* with the invariant converter. */ + U_INVALID_STATE_ERROR = 27, /**< Requested operation can not be completed with ICU in its current state */ + U_COLLATOR_VERSION_MISMATCH = 28, /**< Collator version is not compatible with the base version */ + U_USELESS_COLLATOR_ERROR = 29, /**< Collator is options only and no base is specified */ + + U_STANDARD_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for standard errors */ + /* + * the error code range 0x10000 0x10100 are reserved for Transliterator + */ + U_BAD_VARIABLE_DEFINITION=0x10000,/**< Missing '$' or duplicate variable name */ + U_PARSE_ERROR_START = 0x10000, /**< Start of Transliterator errors */ + U_MALFORMED_RULE, /**< Elements of a rule are misplaced */ + U_MALFORMED_SET, /**< A UnicodeSet pattern is invalid*/ + U_MALFORMED_SYMBOL_REFERENCE, /**< UNUSED as of ICU 2.4 */ + U_MALFORMED_UNICODE_ESCAPE, /**< A Unicode escape pattern is invalid*/ + U_MALFORMED_VARIABLE_DEFINITION, /**< A variable definition is invalid */ + U_MALFORMED_VARIABLE_REFERENCE, /**< A variable reference is invalid */ + U_MISMATCHED_SEGMENT_DELIMITERS, /**< UNUSED as of ICU 2.4 */ + U_MISPLACED_ANCHOR_START, /**< A start anchor appears at an illegal position */ + U_MISPLACED_CURSOR_OFFSET, /**< A cursor offset occurs at an illegal position */ + U_MISPLACED_QUANTIFIER, /**< A quantifier appears after a segment close delimiter */ + U_MISSING_OPERATOR, /**< A rule contains no operator */ + U_MISSING_SEGMENT_CLOSE, /**< UNUSED as of ICU 2.4 */ + U_MULTIPLE_ANTE_CONTEXTS, /**< More than one ante context */ + U_MULTIPLE_CURSORS, /**< More than one cursor */ + U_MULTIPLE_POST_CONTEXTS, /**< More than one post context */ + U_TRAILING_BACKSLASH, /**< A dangling backslash */ + U_UNDEFINED_SEGMENT_REFERENCE, /**< A segment reference does not correspond to a defined segment */ + U_UNDEFINED_VARIABLE, /**< A variable reference does not correspond to a defined variable */ + U_UNQUOTED_SPECIAL, /**< A special character was not quoted or escaped */ + U_UNTERMINATED_QUOTE, /**< A closing single quote is missing */ + U_RULE_MASK_ERROR, /**< A rule is hidden by an earlier more general rule */ + U_MISPLACED_COMPOUND_FILTER, /**< A compound filter is in an invalid location */ + U_MULTIPLE_COMPOUND_FILTERS, /**< More than one compound filter */ + U_INVALID_RBT_SYNTAX, /**< A "::id" rule was passed to the RuleBasedTransliterator parser */ + U_INVALID_PROPERTY_PATTERN, /**< UNUSED as of ICU 2.4 */ + U_MALFORMED_PRAGMA, /**< A 'use' pragma is invlalid */ + U_UNCLOSED_SEGMENT, /**< A closing ')' is missing */ + U_ILLEGAL_CHAR_IN_SEGMENT, /**< UNUSED as of ICU 2.4 */ + U_VARIABLE_RANGE_EXHAUSTED, /**< Too many stand-ins generated for the given variable range */ + U_VARIABLE_RANGE_OVERLAP, /**< The variable range overlaps characters used in rules */ + U_ILLEGAL_CHARACTER, /**< A special character is outside its allowed context */ + U_INTERNAL_TRANSLITERATOR_ERROR, /**< Internal transliterator system error */ + U_INVALID_ID, /**< A "::id" rule specifies an unknown transliterator */ + U_INVALID_FUNCTION, /**< A "&fn()" rule specifies an unknown transliterator */ + U_PARSE_ERROR_LIMIT, /**< The limit for Transliterator errors */ + + /* + * the error code range 0x10100 0x10200 are reserved for formatting API parsing error + */ + U_UNEXPECTED_TOKEN=0x10100, /**< Syntax error in format pattern */ + U_FMT_PARSE_ERROR_START=0x10100, /**< Start of format library errors */ + U_MULTIPLE_DECIMAL_SEPARATORS, /**< More than one decimal separator in number pattern */ + U_MULTIPLE_DECIMAL_SEPERATORS = U_MULTIPLE_DECIMAL_SEPARATORS, /**< Typo: kept for backward compatibility. Use U_MULTIPLE_DECIMAL_SEPARATORS */ + U_MULTIPLE_EXPONENTIAL_SYMBOLS, /**< More than one exponent symbol in number pattern */ + U_MALFORMED_EXPONENTIAL_PATTERN, /**< Grouping symbol in exponent pattern */ + U_MULTIPLE_PERCENT_SYMBOLS, /**< More than one percent symbol in number pattern */ + U_MULTIPLE_PERMILL_SYMBOLS, /**< More than one permill symbol in number pattern */ + U_MULTIPLE_PAD_SPECIFIERS, /**< More than one pad symbol in number pattern */ + U_PATTERN_SYNTAX_ERROR, /**< Syntax error in format pattern */ + U_ILLEGAL_PAD_POSITION, /**< Pad symbol misplaced in number pattern */ + U_UNMATCHED_BRACES, /**< Braces do not match in message pattern */ + U_UNSUPPORTED_PROPERTY, /**< UNUSED as of ICU 2.4 */ + U_UNSUPPORTED_ATTRIBUTE, /**< UNUSED as of ICU 2.4 */ + U_FMT_PARSE_ERROR_LIMIT, /**< The limit for format library errors */ + + /* + * the error code range 0x10200 0x102ff are reserved for Break Iterator related error + */ + U_BRK_ERROR_START=0x10200, /**< Start of codes indicating Break Iterator failures */ + U_BRK_INTERNAL_ERROR, /**< An internal error (bug) was detected. */ + U_BRK_HEX_DIGITS_EXPECTED, /**< Hex digits expected as part of a escaped char in a rule. */ + U_BRK_SEMICOLON_EXPECTED, /**< Missing ';' at the end of a RBBI rule. */ + U_BRK_RULE_SYNTAX, /**< Syntax error in RBBI rule. */ + U_BRK_UNCLOSED_SET, /**< UnicodeSet witing an RBBI rule missing a closing ']'. */ + U_BRK_ASSIGN_ERROR, /**< Syntax error in RBBI rule assignment statement. */ + U_BRK_VARIABLE_REDFINITION, /**< RBBI rule $Variable redefined. */ + U_BRK_MISMATCHED_PAREN, /**< Mis-matched parentheses in an RBBI rule. */ + U_BRK_NEW_LINE_IN_QUOTED_STRING, /**< Missing closing quote in an RBBI rule. */ + U_BRK_UNDEFINED_VARIABLE, /**< Use of an undefined $Variable in an RBBI rule. */ + U_BRK_INIT_ERROR, /**< Initialization failure. Probable missing ICU Data. */ + U_BRK_RULE_EMPTY_SET, /**< Rule contains an empty Unicode Set. */ + U_BRK_UNRECOGNIZED_OPTION, /**< !!option in RBBI rules not recognized. */ + U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is mal formed */ + U_BRK_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for Break Iterator failures */ + + /* + * The error codes in the range 0x10300-0x103ff are reserved for regular expression related errrs + */ + U_REGEX_ERROR_START=0x10300, /**< Start of codes indicating Regexp failures */ + U_REGEX_INTERNAL_ERROR, /**< An internal error (bug) was detected. */ + U_REGEX_RULE_SYNTAX, /**< Syntax error in regexp pattern. */ + U_REGEX_INVALID_STATE, /**< RegexMatcher in invalid state for requested operation */ + U_REGEX_BAD_ESCAPE_SEQUENCE, /**< Unrecognized backslash escape sequence in pattern */ + U_REGEX_PROPERTY_SYNTAX, /**< Incorrect Unicode property */ + U_REGEX_UNIMPLEMENTED, /**< Use of regexp feature that is not yet implemented. */ + U_REGEX_MISMATCHED_PAREN, /**< Incorrectly nested parentheses in regexp pattern. */ + U_REGEX_NUMBER_TOO_BIG, /**< Decimal number is too large. */ + U_REGEX_BAD_INTERVAL, /**< Error in {min,max} interval */ + U_REGEX_MAX_LT_MIN, /**< In {min,max}, max is less than min. */ + U_REGEX_INVALID_BACK_REF, /**< Back-reference to a non-existent capture group. */ + U_REGEX_INVALID_FLAG, /**< Invalid value for match mode flags. */ + U_REGEX_LOOK_BEHIND_LIMIT, /**< Look-Behind pattern matches must have a bounded maximum length. */ + U_REGEX_SET_CONTAINS_STRING, /**< Regexps cannot have UnicodeSets containing strings.*/ + U_REGEX_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for regexp errors */ + + /* + * The error code in the range 0x10400-0x104ff are reserved for IDNA related error codes + */ + U_IDNA_ERROR_START=0x10400, + U_IDNA_PROHIBITED_ERROR, + U_IDNA_UNASSIGNED_ERROR, + U_IDNA_CHECK_BIDI_ERROR, + U_IDNA_STD3_ASCII_RULES_ERROR, + U_IDNA_ACE_PREFIX_ERROR, + U_IDNA_VERIFICATION_ERROR, + U_IDNA_LABEL_TOO_LONG_ERROR, + U_IDNA_ERROR_LIMIT, + /* + * Aliases for StringPrep + */ + U_STRINGPREP_PROHIBITED_ERROR = U_IDNA_PROHIBITED_ERROR, + U_STRINGPREP_UNASSIGNED_ERROR = U_IDNA_UNASSIGNED_ERROR, + U_STRINGPREP_CHECK_BIDI_ERROR = U_IDNA_CHECK_BIDI_ERROR, + + + U_ERROR_LIMIT=U_IDNA_ERROR_LIMIT /**< This must always be the last value to indicate the limit for UErrorCode (last error code +1) */ +} UErrorCode; + +/* Use the following to determine if an UErrorCode represents */ +/* operational success or failure. */ + +#ifdef XP_CPLUSPLUS + /** + * Does the error code indicate success? + * @stable ICU 2.0 + */ + static + inline UBool U_SUCCESS(UErrorCode code) { return (UBool)(code<=U_ZERO_ERROR); } + /** + * Does the error code indicate a failure? + * @stable ICU 2.0 + */ + static + inline UBool U_FAILURE(UErrorCode code) { return (UBool)(code>U_ZERO_ERROR); } +#else + /** + * Does the error code indicate success? + * @stable ICU 2.0 + */ +# define U_SUCCESS(x) ((x)<=U_ZERO_ERROR) + /** + * Does the error code indicate a failure? + * @stable ICU 2.0 + */ +# define U_FAILURE(x) ((x)>U_ZERO_ERROR) +#endif + +/** + * Return a string for a UErrorCode value. + * The string will be the same as the name of the error code constant + * in the UErrorCode enum above. + * @stable ICU 2.0 + */ +U_STABLE const char * U_EXPORT2 +u_errorName(UErrorCode code); + + +#endif /* _UTYPES */ diff --git a/JavaScriptGlue/icu/unicode/uversion.h b/JavaScriptGlue/icu/unicode/uversion.h new file mode 100644 index 0000000..f9a7755 --- /dev/null +++ b/JavaScriptGlue/icu/unicode/uversion.h @@ -0,0 +1,216 @@ +/* +******************************************************************************* +* Copyright (C) 2000-2004, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* +* file name: uversion.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* Created by: Vladimir Weinstein +* +* Contains all the important version numbers for ICU. +* Gets included by utypes.h and Windows .rc files +*/ + +/*===========================================================================*/ +/* Main ICU version information */ +/*===========================================================================*/ + +#ifndef UVERSION_H +#define UVERSION_H + +/** IMPORTANT: When updating version, the following things need to be done: */ +/** source/common/unicode/uversion.h - this file: update major, minor, */ +/** patchlevel, suffix, version, short version constants, namespace, */ +/** and copyright */ +/** source/common/common.dsp - update 'Output file name' on the link tab so */ +/** that it contains the new major/minor combination */ +/** source/i18n/i18n.dsp - same as for the common.dsp */ +/** source/layout/layout.dsp - same as for the common.dsp */ +/** source/stubdata/stubdata.dsp - same as for the common.dsp */ +/** source/extra/ustdio/ustdio.dsp - same as for the common.dsp */ +/** source/data/makedata.mak - change U_ICUDATA_NAME so that it contains */ +/** the new major/minor combination */ +/** source/tools/genren/genren.pl - use this script according to the README */ +/** in that folder */ + +#include "unicode/umachine.h" + +/** The standard copyright notice that gets compiled into each library. + * This value will change in the subsequent releases of ICU + * @stable ICU 2.4 + */ +#define U_COPYRIGHT_STRING \ + " Copyright (C) 2004, International Business Machines Corporation and others. All Rights Reserved. " + +/** Maximum length of the copyright string. + * @stable ICU 2.4 + */ +#define U_COPYRIGHT_STRING_LENGTH 128 + +/** The current ICU major version as an integer. + * This value will change in the subsequent releases of ICU + * @stable ICU 2.4 + */ +#define U_ICU_VERSION_MAJOR_NUM 3 + +/** The current ICU minor version as an integer. + * This value will change in the subsequent releases of ICU + * @stable ICU 2.6 + */ +#define U_ICU_VERSION_MINOR_NUM 2 + +/** The current ICU patchlevel version as an integer. + * This value will change in the subsequent releases of ICU + * @stable ICU 2.4 + */ +#define U_ICU_VERSION_PATCHLEVEL_NUM 0 + +/** Glued version suffix for renamers + * This value will change in the subsequent releases of ICU + * @stable ICU 2.6 + */ +#define U_ICU_VERSION_SUFFIX _3_2 + +/** The current ICU library version as a dotted-decimal string. The patchlevel + * only appears in this string if it non-zero. + * This value will change in the subsequent releases of ICU + * @stable ICU 2.4 + */ +#define U_ICU_VERSION "3.2" + +/** The current ICU library major/minor version as a string without dots, for library name suffixes. + * This value will change in the subsequent releases of ICU + * @stable ICU 2.6 + */ +#define U_ICU_VERSION_SHORT "32" + +/** An ICU version consists of up to 4 numbers from 0..255. + * @stable ICU 2.4 + */ +#define U_MAX_VERSION_LENGTH 4 + +/** In a string, ICU version fields are delimited by dots. + * @stable ICU 2.4 + */ +#define U_VERSION_DELIMITER '.' + +/** The maximum length of an ICU version string. + * @stable ICU 2.4 + */ +#define U_MAX_VERSION_STRING_LENGTH 20 + +/** The binary form of a version on ICU APIs is an array of 4 uint8_t. + * @stable ICU 2.4 + */ +typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]; + +#if U_HAVE_NAMESPACE && defined(XP_CPLUSPLUS) +#if U_DISABLE_RENAMING +#define U_ICU_NAMESPACE icu +namespace U_ICU_NAMESPACE { } +#else +#define U_ICU_NAMESPACE icu_3_2 +namespace U_ICU_NAMESPACE { } +namespace icu = U_ICU_NAMESPACE; +#endif +U_NAMESPACE_USE +#endif + + +/*===========================================================================*/ +/* General version helper functions. Definitions in putil.c */ +/*===========================================================================*/ + +/** + * Parse a string with dotted-decimal version information and + * fill in a UVersionInfo structure with the result. + * Definition of this function lives in putil.c + * + * @param versionArray The destination structure for the version information. + * @param versionString A string with dotted-decimal version information, + * with up to four non-negative number fields with + * values of up to 255 each. + * @stable ICU 2.4 + */ +U_STABLE void U_EXPORT2 +u_versionFromString(UVersionInfo versionArray, const char *versionString); + +/** + * Write a string with dotted-decimal version information according + * to the input UVersionInfo. + * Definition of this function lives in putil.c + * + * @param versionArray The version information to be written as a string. + * @param versionString A string buffer that will be filled in with + * a string corresponding to the numeric version + * information in versionArray. + * The buffer size must be at least U_MAX_VERSION_STRING_LENGTH. + * @stable ICU 2.4 + */ +U_STABLE void U_EXPORT2 +u_versionToString(UVersionInfo versionArray, char *versionString); + +/** + * Gets the ICU release version. The version array stores the version information + * for ICU. For example, release "1.3.31.2" is then represented as 0x01031F02. + * Definition of this function lives in putil.c + * + * @param versionArray the version # information, the result will be filled in + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +u_getVersion(UVersionInfo versionArray); + + +/*=========================================================================== + * ICU collation framework version information + * Version info that can be obtained from a collator is affected by these + * numbers in a secret and magic way. Please use collator version as whole + *=========================================================================== + */ + +/** Collation runtime version (sort key generator, strcoll). + * If the version is different, sortkeys for the same string could be different + * version 2 was in ICU 1.8.1. changed is: compression intervals, French secondary + * compression, generating quad level always when strength is quad or more + * version 4 - ICU 2.2 - tracking UCA changes, ignore completely ignorables + * in contractions, ignore primary ignorables after shifted + * version 5 - ICU 2.8 - changed implicit generation code + * This value may change in the subsequent releases of ICU + * @stable ICU 2.4 + */ +#define UCOL_RUNTIME_VERSION 5 + +/** Builder code version. When this is different, same tailoring might result + * in assigning different collation elements to code points + * version 2 was in ICU 1.8.1. added support for prefixes, tweaked canonical + * closure. However, the tailorings should probably get same CEs assigned + * version 5 - ICU 2.2 - fixed some bugs, renamed some indirect values. + * version 6 - ICU 2.8 - fixed bug in builder that allowed 0xFF in primary values + * Backward compatible with the old rules. + * This value may change in the subsequent releases of ICU + * @stable ICU 2.4 + */ +#define UCOL_BUILDER_VERSION 6 + +/** *** Removed *** Instead we use the data we read from FractionalUCA.txt + * This is the version of FractionalUCA.txt tailoring rules + * Version 1 was in ICU 1.8.1. Version two contains canonical closure for + * supplementary code points + * Version 4 in ICU 2.2, following UCA=3.1.1d6, UCD=3.2.0 + * This value may change in the subsequent releases of ICU + * @stable ICU 2.4 + */ +/*#define UCOL_FRACTIONAL_UCA_VERSION 4*/ + +/** This is the version of the tailorings + * This value may change in the subsequent releases of ICU + * @stable ICU 2.4 + */ +#define UCOL_TAILORINGS_VERSION 1 + +#endif |