diff options
author | The Android Open Source Project <initial-contribution@android.com> | 2008-10-21 07:00:00 -0700 |
---|---|---|
committer | The Android Open Source Project <initial-contribution@android.com> | 2008-10-21 07:00:00 -0700 |
commit | 7c1b96a165f970a09ed239bb4fb3f1b0d8f2a407 (patch) | |
tree | df5a6539447324de36e95b057d6b9f0361b7a250 /include/utils/AndroidUnicode.h | |
download | frameworks_native-7c1b96a165f970a09ed239bb4fb3f1b0d8f2a407.zip frameworks_native-7c1b96a165f970a09ed239bb4fb3f1b0d8f2a407.tar.gz frameworks_native-7c1b96a165f970a09ed239bb4fb3f1b0d8f2a407.tar.bz2 |
Initial Contribution
Diffstat (limited to 'include/utils/AndroidUnicode.h')
-rw-r--r-- | include/utils/AndroidUnicode.h | 255 |
1 files changed, 255 insertions, 0 deletions
diff --git a/include/utils/AndroidUnicode.h b/include/utils/AndroidUnicode.h new file mode 100644 index 0000000..563fcd0 --- /dev/null +++ b/include/utils/AndroidUnicode.h @@ -0,0 +1,255 @@ +/* + * Copyright (C) 2006 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// + +#ifndef ANDROID_UNICODE_H +#define ANDROID_UNICODE_H + +#include <stdint.h> +#include <sys/types.h> + +#define REPLACEMENT_CHAR (0xFFFD) + +// this part of code is copied from umachine.h under ICU +/** + * Define UChar32 as a type for single Unicode code points. + * UChar32 is a signed 32-bit integer (same as int32_t). + * + * The Unicode code point range is 0..0x10ffff. + * All other values (negative or >=0x110000) are illegal as Unicode code points. + * They may be used as sentinel values to indicate "done", "error" + * or similar non-code point conditions. + * + * @stable ICU 2.4 + */ +typedef int32_t UChar32; + +namespace android { + + class Encoding; + /** + * \class Unicode + * + * Helper class for getting properties of Unicode characters. Characters + * can have one of the types listed in CharType and each character can have the + * directionality of Direction. + */ + class Unicode + { + public: + /** + * Directions specified in the Unicode standard. These directions map directly + * to java.lang.Character. + */ + enum Direction { + DIRECTIONALITY_UNDEFINED = -1, + DIRECTIONALITY_LEFT_TO_RIGHT, + DIRECTIONALITY_RIGHT_TO_LEFT, + DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC, + DIRECTIONALITY_EUROPEAN_NUMBER, + DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR, + DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR, + DIRECTIONALITY_ARABIC_NUMBER, + DIRECTIONALITY_COMMON_NUMBER_SEPARATOR, + DIRECTIONALITY_NONSPACING_MARK, + DIRECTIONALITY_BOUNDARY_NEUTRAL, + DIRECTIONALITY_PARAGRAPH_SEPARATOR, + DIRECTIONALITY_SEGMENT_SEPARATOR, + DIRECTIONALITY_WHITESPACE, + DIRECTIONALITY_OTHER_NEUTRALS, + DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING, + DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE, + DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING, + DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE, + DIRECTIONALITY_POP_DIRECTIONAL_FORMAT + }; + + /** + * Character types as specified in the Unicode standard. These map directly to + * java.lang.Character. + */ + enum CharType { + CHARTYPE_UNASSIGNED = 0, + CHARTYPE_UPPERCASE_LETTER, + CHARTYPE_LOWERCASE_LETTER, + CHARTYPE_TITLECASE_LETTER, + CHARTYPE_MODIFIER_LETTER, + CHARTYPE_OTHER_LETTER, + CHARTYPE_NON_SPACING_MARK, + CHARTYPE_ENCLOSING_MARK, + CHARTYPE_COMBINING_SPACING_MARK, + CHARTYPE_DECIMAL_DIGIT_NUMBER, + CHARTYPE_LETTER_NUMBER, + CHARTYPE_OTHER_NUMBER, + CHARTYPE_SPACE_SEPARATOR, + CHARTYPE_LINE_SEPARATOR, + CHARTYPE_PARAGRAPH_SEPARATOR, + CHARTYPE_CONTROL, + CHARTYPE_FORMAT, + CHARTYPE_MISSING_VALUE_FOR_JAVA, /* This is the mysterious missing 17 value from the java constants */ + CHARTYPE_PRIVATE_USE, + CHARTYPE_SURROGATE, + CHARTYPE_DASH_PUNCTUATION, + CHARTYPE_START_PUNCTUATION, + CHARTYPE_END_PUNCTUATION, + CHARTYPE_CONNECTOR_PUNCTUATION, + CHARTYPE_OTHER_PUNCTUATION, + CHARTYPE_MATH_SYMBOL, + CHARTYPE_CURRENCY_SYMBOL, + CHARTYPE_MODIFIER_SYMBOL, + CHARTYPE_OTHER_SYMBOL, + CHARTYPE_INITIAL_QUOTE_PUNCTUATION, + CHARTYPE_FINAL_QUOTE_PUNCTUATION + }; + + /** + * Decomposition types as described by the unicode standard. These values map to + * the same values in uchar.h in ICU. + */ + enum DecompositionType { + DECOMPOSITION_NONE = 0, + DECOMPOSITION_CANONICAL, + DECOMPOSITION_COMPAT, + DECOMPOSITION_CIRCLE, + DECOMPOSITION_FINAL, + DECOMPOSITION_FONT, + DECOMPOSITION_FRACTION, + DECOMPOSITION_INITIAL, + DECOMPOSITION_ISOLATED, + DECOMPOSITION_MEDIAL, + DECOMPOSITION_NARROW, + DECOMPOSITION_NOBREAK, + DECOMPOSITION_SMALL, + DECOMPOSITION_SQUARE, + DECOMPOSITION_SUB, + DECOMPOSITION_SUPER, + DECOMPOSITION_VERTICAL, + DECOMPOSITION_WIDE + }; + + /** + * Returns the packed data for java calls + * @param c The unicode character. + * @return The packed data for the character. + * + * Copied from java.lang.Character implementation: + * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 + * F E D C B A 9 8 7 6 5 4 3 2 1 0 F E D C B A 9 8 7 6 5 4 3 2 1 0 + * + * 31 types --------- + * 18 directionalities --------- + * 2 mirroreds - + * ----------- 56 toupper diffs + * ----------- 48 tolower diffs + * --- 4 totitlecase diffs + * ------------- 84 numeric values + * --------- 24 mirror char diffs + */ + static uint32_t getPackedData(UChar32 c); + + /** + * Get the Character type. + * @param c The unicode character. + * @return The character's type or CHARTYPE_UNASSIGNED if the character is invalid + * or has an unassigned class. + */ + static CharType getType(UChar32 c); + + /** + * Get the Character's decomposition type. + * @param c The unicode character. + * @return The character's decomposition type or DECOMPOSITION_NONE is there + * is no decomposition. + */ + static DecompositionType getDecompositionType(UChar32 c); + + /** + * Returns the digit value of a character or -1 if the character + * is not within the specified radix. + * + * The digit value is computed for integer characters and letters + * within the given radix. This function does not handle Roman Numerals, + * fractions, or any other characters that may represent numbers. + * + * @param c The unicode character + * @param radix The intended radix. + * @return The digit value or -1 if there is no digit value or if the value is outside the radix. + */ + static int getDigitValue(UChar32 c, int radix = 10); + + /** + * Return the numeric value of a character + * + * @param c The unicode character. + * @return The numeric value of the character. -1 if the character has no numeric value, + * -2 if the character has a numeric value that is not representable by an integer. + */ + static int getNumericValue(UChar32 c); + + /** + * Convert the character to lowercase + * @param c The unicode character. + * @return The lowercase character equivalent of c. If c does not have a lowercase equivalent, + * the original character is returned. + */ + static UChar32 toLower(UChar32 c); + + /** + * Convert the character to uppercase + * @param c The unicode character. + * @return The uppercase character equivalent of c. If c does not have an uppercase equivalent, + * the original character is returned. + */ + static UChar32 toUpper(UChar32 c); + + /** + * Get the directionality of the character. + * @param c The unicode character. + * @return The direction of the character or DIRECTIONALITY_UNDEFINED. + */ + static Direction getDirectionality(UChar32 c); + + /** + * Check if the character is a mirrored character. This means that the character + * has an equivalent character that is the mirror image of itself. + * @param c The unicode character. + * @return True iff c has a mirror equivalent. + */ + static bool isMirrored(UChar32 c); + + /** + * Return the mirror of the given character. + * @param c The unicode character. + * @return The mirror equivalent of c. If c does not have a mirror equivalent, + * the original character is returned. + * @see isMirrored + */ + static UChar32 toMirror(UChar32 c); + + /** + * Convert the character to title case. + * @param c The unicode character. + * @return The titlecase equivalent of c. If c does not have a titlecase equivalent, + * the original character is returned. + */ + static UChar32 toTitle(UChar32 c); + + }; + +} + +#endif |