summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNarayan Kamath <narayan@google.com>2014-01-13 12:08:59 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2014-01-13 12:09:00 +0000
commitfdbcd4fbefa4ff049029f87660409f29c057433b (patch)
tree40f6a0a6647ba3694dbee5d7ca388801137271f6
parentb95ccb65af509c50be21e6e9750f88f781a5e4be (diff)
parent48819fe0b9130618a430ec52b3f8526c4c0a5f8a (diff)
downloadlibcore-fdbcd4fbefa4ff049029f87660409f29c057433b.zip
libcore-fdbcd4fbefa4ff049029f87660409f29c057433b.tar.gz
libcore-fdbcd4fbefa4ff049029f87660409f29c057433b.tar.bz2
Merge "java7: Implement Character.UnicodeScript"
-rw-r--r--luni/src/main/java/java/lang/Character.java431
-rw-r--r--luni/src/main/native/java_lang_Character.cpp30
-rw-r--r--luni/src/test/java/libcore/java/lang/CharacterTest.java62
3 files changed, 515 insertions, 8 deletions
diff --git a/luni/src/main/java/java/lang/Character.java b/luni/src/main/java/java/lang/Character.java
index 5762bd4..32c38d3 100644
--- a/luni/src/main/java/java/lang/Character.java
+++ b/luni/src/main/java/java/lang/Character.java
@@ -1489,7 +1489,7 @@ public final class Character implements Serializable, Comparable<Character> {
if (blockName == null) {
throw new NullPointerException("blockName == null");
}
- int block = forNameImpl(blockName);
+ int block = unicodeBlockForName(blockName);
if (block == -1) {
throw new IllegalArgumentException("Unknown block: " + blockName);
}
@@ -1510,7 +1510,7 @@ public final class Character implements Serializable, Comparable<Character> {
*/
public static UnicodeBlock of(int codePoint) {
checkValidCodePoint(codePoint);
- int block = ofImpl(codePoint);
+ int block = unicodeBlockForCodePoint(codePoint);
if (block == -1 || block >= BLOCKS.length) {
return null;
}
@@ -1522,9 +1522,432 @@ public final class Character implements Serializable, Comparable<Character> {
}
}
- private static native int forNameImpl(String blockName);
+ private static native int unicodeBlockForName(String blockName);
+
+ private static native int unicodeBlockForCodePoint(int codePoint);
+
+ /**
+ * Represents a <a href="http://www.unicode.org/reports/tr24/">Unicode script</a>.
+ * Every Unicode code point is contained by a single {@code UnicodeScript}. Code points
+ * shared between scripts will be in {@code COMMON}. Code points for combining
+ * characters that can be applied to multiple scripts will be in {@code INHERITED}
+ * because they inherit the script of their base character. Code points whose scripts
+ * don't have a corresponding {@code UnicodeScript} will be in {@code UNKNOWN}.
+ *
+ * @since 1.7
+ * @hide
+ */
+ public static enum UnicodeScript {
+ /** ISO 15924 English name "Arabic" */
+ ARABIC,
+ /** ISO 15924 English name "Armenian" */
+ ARMENIAN,
+ /** ISO 15924 English name "Avestan" */
+ AVESTAN,
+ /** ISO 15924 English name "Balinese" */
+ BALINESE,
+ /** ISO 15924 English name "Bamum" */
+ BAMUM,
+ /** ISO 15924 English name "Batak" */
+ BATAK,
+ /** ISO 15924 English name "Bengali" */
+ BENGALI,
+ /** ISO 15924 English name "Bopomofo" */
+ BOPOMOFO,
+ /** ISO 15924 English name "Brahmi" */
+ BRAHMI,
+ /** ISO 15924 English name "Braille" */
+ BRAILLE,
+ /** ISO 15924 English name "Buginese" */
+ BUGINESE,
+ /** ISO 15924 English name "Buhid" */
+ BUHID,
+ /** ISO 15924 English name "Unified Canadian Aboriginal Syllabics" */
+ CANADIAN_ABORIGINAL,
+ /** ISO 15924 English name "Carian" */
+ CARIAN,
+ /** ISO 15924 English name "Cham" */
+ CHAM,
+ /** ISO 15924 English name "Cherokee" */
+ CHEROKEE,
+ /** ISO 15924 English name "Common" */
+ COMMON,
+ /** ISO 15924 English name "Coptic" */
+ COPTIC,
+ /** ISO 15924 English name "Cuneiform" */
+ CUNEIFORM,
+ /** ISO 15924 English name "Cypriot" */
+ CYPRIOT,
+ /** ISO 15924 English name "Cyrillic" */
+ CYRILLIC,
+ /** ISO 15924 English name "Deseret" */
+ DESERET,
+ /** ISO 15924 English name "Devanagari" */
+ DEVANAGARI,
+ /** ISO 15924 English name "Egyptian hieroglyphs" */
+ EGYPTIAN_HIEROGLYPHS,
+ /** ISO 15924 English name "Ethiopic" */
+ ETHIOPIC,
+ /** ISO 15924 English name "Georgian" */
+ GEORGIAN,
+ /** ISO 15924 English name "Glagolitic" */
+ GLAGOLITIC,
+ /** ISO 15924 English name "Gothic" */
+ GOTHIC,
+ /** ISO 15924 English name "Greek" */
+ GREEK,
+ /** ISO 15924 English name "Gujarati" */
+ GUJARATI,
+ /** ISO 15924 English name "Gurmukhi" */
+ GURMUKHI,
+ /** ISO 15924 English name "Han" */
+ HAN,
+ /** ISO 15924 English name "Hangul" */
+ HANGUL,
+ /** ISO 15924 English name "Hanunoo" */
+ HANUNOO,
+ /** ISO 15924 English name "Hebrew" */
+ HEBREW,
+ /** ISO 15924 English name "Hiragana" */
+ HIRAGANA,
+ /** ISO 15924 English name "Imperial aramaic" */
+ IMPERIAL_ARAMAIC,
+ /** ISO 15924 English name "Inherited" */
+ INHERITED,
+ /** ISO 15924 English name "Inscriptional pahlavi" */
+ INSCRIPTIONAL_PAHLAVI,
+ /** ISO 15924 English name "Inscriptional parthian" */
+ INSCRIPTIONAL_PARTHIAN,
+ /** ISO 15924 English name "Javanese" */
+ JAVANESE,
+ /** ISO 15924 English name "Kaithi" */
+ KAITHI,
+ /** ISO 15924 English name "Kannada" */
+ KANNADA,
+ /** ISO 15924 English name "Katakana" */
+ KATAKANA,
+ /** ISO 15924 English name "Kayah li" */
+ KAYAH_LI,
+ /** ISO 15924 English name "Kharoshthi" */
+ KHAROSHTHI,
+ /** ISO 15924 English name "Khmer" */
+ KHMER,
+ /** ISO 15924 English name "Lao" */
+ LAO,
+ /** ISO 15924 English name "Latin" */
+ LATIN,
+ /** ISO 15924 English name "Lepcha" */
+ LEPCHA,
+ /** ISO 15924 English name "Limbu" */
+ LIMBU,
+ /** ISO 15924 English name "Linear B" */
+ LINEAR_B,
+ /** ISO 15924 English name "Lisu" */
+ LISU,
+ /** ISO 15924 English name "Lycian" */
+ LYCIAN,
+ /** ISO 15924 English name "Lydian" */
+ LYDIAN,
+ /** ISO 15924 English name "Malayalam" */
+ MALAYALAM,
+ /** ISO 15924 English name "Mandaic" */
+ MANDAIC,
+ /** ISO 15924 English name "Meetei Mayek (Meithei, Meetei)" */
+ MEETEI_MAYEK,
+ /** ISO 15924 English name "Mongolian" */
+ MONGOLIAN,
+ /** ISO 15924 English name "Myanmar" */
+ MYANMAR,
+ /** ISO 15924 English name "New Tai Lue" */
+ NEW_TAI_LUE,
+ /** ISO 15924 English name "Nko" */
+ NKO,
+ /** ISO 15924 English name "Ogham" */
+ OGHAM,
+ /** ISO 15924 English name "Ol Chiki" */
+ OL_CHIKI,
+ /** ISO 15924 English name "Old Italic" */
+ OLD_ITALIC,
+ /** ISO 15924 English name "Old Persian" */
+ OLD_PERSIAN,
+ /** ISO 15924 English name "Old South Arabian" */
+ OLD_SOUTH_ARABIAN,
+ /** ISO 15924 English name "Old Turkic, Orkhon Runic" */
+ OLD_TURKIC,
+ /** ISO 15924 English name "Oriya" */
+ ORIYA,
+ /** ISO 15924 English name "Osmanya" */
+ OSMANYA,
+ /** ISO 15924 English name "Phags-pa" */
+ PHAGS_PA,
+ /** ISO 15924 English name "Phoenician" */
+ PHOENICIAN,
+ /** ISO 15924 English name "Rejang" */
+ REJANG,
+ /** ISO 15924 English name "Runic" */
+ RUNIC,
+ /** ISO 15924 English name "Samaritan" */
+ SAMARITAN,
+ /** ISO 15924 English name "Saurashtra" */
+ SAURASHTRA,
+ /** ISO 15924 English name "Shavian" */
+ SHAVIAN,
+ /** ISO 15924 English name "Sinhala" */
+ SINHALA,
+ /** ISO 15924 English name "Sundanese" */
+ SUNDANESE,
+ /** ISO 15924 English name "Syloti Nagri" */
+ SYLOTI_NAGRI,
+ /** ISO 15924 English name "Syriac" */
+ SYRIAC,
+ /** ISO 15924 English name "Tagalog" */
+ TAGALOG,
+ /** ISO 15924 English name "Tagbanwa" */
+ TAGBANWA,
+ /** ISO 15924 English name "Tai Le" */
+ TAI_LE,
+ /** ISO 15924 English name "Tai Tham (Lanna)" */
+ TAI_THAM,
+ /** ISO 15924 English name "Tai Viet" */
+ TAI_VIET,
+ /** ISO 15924 English name "Tamil" */
+ TAMIL,
+ /** ISO 15924 English name "Telugu" */
+ TELUGU,
+ /** ISO 15924 English name "Thaana" */
+ THAANA,
+ /** ISO 15924 English name "Thai" */
+ THAI,
+ /** ISO 15924 English name "Tibetan" */
+ TIBETAN,
+ /** ISO 15924 English name "Tifinagh" */
+ TIFINAGH,
+ /** ISO 15924 English name "Ugaritic" */
+ UGARITIC,
+ /** ISO 15924 English name "Unknown" */
+ UNKNOWN,
+ /** ISO 15924 English name "Vai" */
+ VAI,
+ /** ISO 15924 English name "Yi" */
+ YI;
+
+ private static final UnicodeScript[] SCRIPTS = {
+ COMMON,
+ INHERITED,
+ ARABIC,
+ ARMENIAN,
+ BENGALI,
+ BOPOMOFO,
+ CHEROKEE,
+ COPTIC,
+ CYRILLIC,
+ DESERET,
+ DEVANAGARI,
+ ETHIOPIC,
+ GEORGIAN,
+ GOTHIC,
+ GREEK,
+ GUJARATI,
+ GURMUKHI,
+ HAN,
+ HANGUL,
+ HEBREW,
+ HIRAGANA,
+ KANNADA,
+ KATAKANA,
+ KHMER,
+ LAO,
+ LATIN,
+ MALAYALAM,
+ MONGOLIAN,
+ MYANMAR,
+ OGHAM,
+ OLD_ITALIC,
+ ORIYA,
+ RUNIC,
+ SINHALA,
+ SYRIAC,
+ TAMIL,
+ TELUGU,
+ THAANA,
+ THAI,
+ TIBETAN,
+ CANADIAN_ABORIGINAL,
+ YI,
+ TAGALOG,
+ HANUNOO,
+ BUHID,
+ TAGBANWA,
+ BRAILLE,
+ CYPRIOT,
+ LIMBU,
+ LINEAR_B,
+ OSMANYA,
+ SHAVIAN,
+ TAI_LE,
+ UGARITIC,
+ null, // USCRIPT_KATAKANA_OR_HIRAGANA
+ BUGINESE,
+ GLAGOLITIC,
+ KHAROSHTHI,
+ SYLOTI_NAGRI,
+ NEW_TAI_LUE,
+ TIFINAGH,
+ OLD_PERSIAN,
+ BALINESE,
+ BATAK,
+ null, // USCRIPT_BLISSYMBOLS,
+ BRAHMI,
+ CHAM,
+ null, // USCRIPT_CIRTH,
+ null, // USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC,
+ null, // USCRIPT_DEMOTIC_EGYPTIAN,
+ null, // USCRIPT_HIERATIC_EGYPTIAN,
+ EGYPTIAN_HIEROGLYPHS,
+ null, // USCRIPT_USCRIPT_KHUTSURI,
+ null, // USCRIPT_SIMPLIFIED_HAN,
+ null, // USCRIPT_TRADITIONAL_HAN,
+ null, // USCRIPT_PAHAWH_HMONG,
+ null, // USCRIPT_OLD_HUNGARIAN,
+ null, // USCRIPT_HARAPPAN_INDUS,
+ JAVANESE,
+ KAYAH_LI,
+ null, // USCRIPT_LATIN_FRAKTUR,
+ null, // USCRIPT_LATIN_GAELIC,
+ LEPCHA,
+ null, // USCRIPT_LINEAR_A,
+ MANDAIC, // == MANDAEAN
+ null, // USCRIPT_MAYAN_HIEROGLYPHS,
+ null, // USCRIPT_MEROITIC_HIEROGLYPHS == USCRIPT_MEROITIC
+ null, // USCRIPT_NKO,
+ OLD_TURKIC, // USCRIPT_ORKHON == OLD_TURKIC,
+ null, // USCRIPT_OLD_PERMIC,
+ PHAGS_PA,
+ PHOENICIAN,
+ null, // USCRIPT_PHONETIC_POLLARD === MIAO,
+ null, // USCRIPT_RONGORONGO,
+ null, // USCRIPT_SARATI,
+ null, // USCRIPT_ESTRANGELO_SYRIAC,
+ null, // USCRIPT_WESTERN_SYRIAC,
+ null, // USCRIPT_EASTERN_SYRIAC,
+ null, // USCRIPT_TENGWAR,
+ VAI,
+ null, // USCRIPT_VISIBLE_SPEECH,
+ CUNEIFORM,
+ null, // USCRIPT_UNWRITTEN_LANGUAGES,
+ UNKNOWN,
+ CARIAN,
+ null, // USCRIPT_JAPANESE,
+ TAI_THAM, // USCRIPT_LANNA (aka TAI_THAM),
+ LYCIAN,
+ LYDIAN,
+ OL_CHIKI,
+ REJANG,
+ SAURASHTRA,
+ null, // USCRIPT_SIGN_WRITING,
+ SUNDANESE,
+ null, // USCRIPT_MOON,
+ MEETEI_MAYEK, // USCRIPT_MEITEI_MAYEK (aka MEETEI, MEITHEI),
+ IMPERIAL_ARAMAIC,
+ AVESTAN,
+ null, // USCRIPT_CHAKMA,
+ null, // USCRIPT_KOREAN,
+ KAITHI,
+ null, // USCRIPT_MANICHAEAN,
+ INSCRIPTIONAL_PAHLAVI,
+ null, // USCRIPT_PSALTER_PAHLAVI,
+ null, // USCRIPT_BOOK_PAHLAVI,
+ INSCRIPTIONAL_PARTHIAN,
+ SAMARITAN,
+ TAI_VIET,
+ null, // USCRIPT_MATHEMATICAL_NOTATION,
+ null, // USCRIPT_SYMBOLS,
+ BAMUM,
+ LISU,
+ null, // USCRIPT_NAKHI_GEBA,
+ OLD_SOUTH_ARABIAN,
+ null, // USCRIPT_BASSA_VAH,
+ null, // USCRIPT_DUPLOYAN_SHORTAND,
+ null, // USCRIPT_ELBASAN,
+ null, // USCRIPT_GRANTHA,
+ null, // USCRIPT_KPELLE,
+ null, // USCRIPT_LOMA,
+ null, // USCRIPT_MENDE,
+ null, // USCRIPT_MEROITIC_CURSIVE,
+ null, // USCRIPT_OLD_NORTH_ARABIAN,
+ null, // USCRIPT_NABATAEAN,
+ null, // USCRIPT_PALMYRENE,
+ null, // USCRIPT_SINDHI,
+ null, // USCRIPT_WARANG_CITI,
+ null, // USCRIPT_AFAKA,
+ null, // USCRIPT_JURCHEN,
+ null, // USCRIPT_MRO,
+ null, // USCRIPT_NUSHU,
+ null, // USCRIPT_SHARADA,
+ null, // USCRIPT_SORA_SOMPENG,
+ null, // USCRIPT_TAKRI,
+ null, // USCRIPT_TANGUT,
+ null, // USCRIPT_WOLEAI,
+ null, // USCRIPT_ANATOLIAN_HIEROGLYPHS,
+ null, // USCRIPT_KHOJKI,
+ null, // USCRIPT_TIRHUTA,
+ };
+
+ /**
+ * Returns the {@link UnicodeScript} value identified by {@code scriptName}.
+ * {@code scriptName} can be a ISO-15924 English script name
+ * or an alias (ISO-15924 script code) for that name.
+ * {@see http://www.unicode.org/iso15924/iso15924-codes.html}
+ * Lookups are case insensitive.
+ *
+ * @throws NullPointerException if {@code scriptName} is null.
+ * @throws IllegalAccessException if {@code scriptName} in invalid.
+ *
+ * @since 1.7
+ */
+ public static UnicodeScript forName(String scriptName) {
+ if (scriptName == null) {
+ throw new NullPointerException("scriptName == null");
+ }
+
+ final int script = unicodeScriptForName(scriptName);
+ if (script == -1 || script >= SCRIPTS.length ||
+ SCRIPTS[script] == null) {
+ throw new IllegalArgumentException("Unknown script: " + scriptName);
+ }
+
+ return SCRIPTS[script];
+ }
+
+ /**
+ * Returns the {@link UnicodeScript} value that the given Unicode code
+ * point is assigned to.
+ *
+ * @throws IllegalArgumentException if {@codePoint} is not a valid Unicode code point.
+ */
+ public static UnicodeScript of(int codePoint) {
+ checkValidCodePoint(codePoint);
+ int script = unicodeScriptForCodePoint(codePoint);
+ if (script == -1 || script >= SCRIPTS.length) {
+ // This signifies an ICU error. Complain loudly instead of swallowing
+ // the error up.
+ throw new IllegalArgumentException("Invalid codePoint: " + codePoint);
+ }
+
+ // This happens when ICU maps the code point to a script known to ICU but
+ // not the Java API.
+ if (SCRIPTS[script] == null) {
+ return UNKNOWN;
+ }
+
+ return SCRIPTS[script];
+ }
+ }
+
+ private static native int unicodeScriptForName(String blockName);
+
+ private static native int unicodeScriptForCodePoint(int codePoint);
- private static native int ofImpl(int codePoint);
/**
* Constructs a new {@code Character} with the specified primitive char
diff --git a/luni/src/main/native/java_lang_Character.cpp b/luni/src/main/native/java_lang_Character.cpp
index 14eef64..2d1fcfc 100644
--- a/luni/src/main/native/java_lang_Character.cpp
+++ b/luni/src/main/native/java_lang_Character.cpp
@@ -20,6 +20,7 @@
#include "JniConstants.h"
#include "ScopedUtfChars.h"
#include "unicode/uchar.h"
+#include "unicode/uscript.h"
#include <math.h>
#include <stdio.h> // For BUFSIZ
#include <stdlib.h>
@@ -124,7 +125,7 @@ static jboolean Character_isLowerCaseImpl(JNIEnv*, jclass, jint codePoint) {
return u_islower(codePoint);
}
-static int Character_forNameImpl(JNIEnv* env, jclass, jstring javaBlockName) {
+static int Character_unicodeBlockForName(JNIEnv* env, jclass, jstring javaBlockName) {
ScopedUtfChars blockName(env, javaBlockName);
if (blockName.c_str() == NULL) {
return 0;
@@ -132,10 +133,29 @@ static int Character_forNameImpl(JNIEnv* env, jclass, jstring javaBlockName) {
return u_getPropertyValueEnum(UCHAR_BLOCK, blockName.c_str());
}
-static int Character_ofImpl(JNIEnv*, jclass, jint codePoint) {
+static int Character_unicodeBlockForCodePoint(JNIEnv*, jclass, jint codePoint) {
return ublock_getCode(codePoint);
}
+static int Character_unicodeScriptForName(JNIEnv* env, jclass, jstring javaScriptName) {
+ ScopedUtfChars scriptName(env, javaScriptName);
+ if (scriptName.c_str() == NULL) {
+ return -1;
+ }
+
+ return u_getPropertyValueEnum(UCHAR_SCRIPT, scriptName.c_str());
+}
+
+static int Character_unicodeScriptForCodePoint(JNIEnv*, jclass, jint codePoint) {
+ UErrorCode status = U_ZERO_ERROR;
+ const UScriptCode script = uscript_getScript(codePoint, &status);
+ if (status != U_ZERO_ERROR) {
+ return -1;
+ }
+
+ return script;
+}
+
static jboolean Character_isAlphabetic(JNIEnv*, jclass, jint codePoint) {
return u_hasBinaryProperty(codePoint, UCHAR_ALPHABETIC);
}
@@ -146,7 +166,6 @@ static jboolean Character_isIdeographic(JNIEnv*, jclass, jint codePoint) {
static JNINativeMethod gMethods[] = {
NATIVE_METHOD(Character, digitImpl, "!(II)I"),
- NATIVE_METHOD(Character, forNameImpl, "(Ljava/lang/String;)I"),
NATIVE_METHOD(Character, getDirectionalityImpl, "!(I)B"),
NATIVE_METHOD(Character, getNameImpl, "(I)Ljava/lang/String;"),
NATIVE_METHOD(Character, getNumericValueImpl, "!(I)I"),
@@ -166,10 +185,13 @@ static JNINativeMethod gMethods[] = {
NATIVE_METHOD(Character, isUnicodeIdentifierStartImpl, "!(I)Z"),
NATIVE_METHOD(Character, isUpperCaseImpl, "!(I)Z"),
NATIVE_METHOD(Character, isWhitespaceImpl, "!(I)Z"),
- NATIVE_METHOD(Character, ofImpl, "!(I)I"),
NATIVE_METHOD(Character, toLowerCaseImpl, "!(I)I"),
NATIVE_METHOD(Character, toTitleCaseImpl, "!(I)I"),
NATIVE_METHOD(Character, toUpperCaseImpl, "!(I)I"),
+ NATIVE_METHOD(Character, unicodeBlockForName, "(Ljava/lang/String;)I"),
+ NATIVE_METHOD(Character, unicodeBlockForCodePoint, "!(I)I"),
+ NATIVE_METHOD(Character, unicodeScriptForName, "(Ljava/lang/String;)I"),
+ NATIVE_METHOD(Character, unicodeScriptForCodePoint, "!(I)I"),
};
void register_java_lang_Character(JNIEnv* env) {
jniRegisterNativeMethods(env, "java/lang/Character", gMethods, NELEM(gMethods));
diff --git a/luni/src/test/java/libcore/java/lang/CharacterTest.java b/luni/src/test/java/libcore/java/lang/CharacterTest.java
index 48284d6..f0c5a23 100644
--- a/luni/src/test/java/libcore/java/lang/CharacterTest.java
+++ b/luni/src/test/java/libcore/java/lang/CharacterTest.java
@@ -263,4 +263,66 @@ public class CharacterTest extends junit.framework.TestCase {
assertEquals(m.invoke(null, i), Character.isWhitespace(i));
}
}
+
+ public void test_UnicodeScript_forName() throws Exception {
+ try {
+ Character.UnicodeScript.forName(null);
+ fail();
+ } catch (NullPointerException expected) {
+ }
+
+ try {
+ Character.UnicodeScript.forName("existential_dilemmas");
+ fail();
+ } catch (IllegalArgumentException expected) {
+ }
+
+ // Note that ICU is pretty lenient about block names and their abbreviations.
+ assertSame(Character.UnicodeScript.MALAYALAM, Character.UnicodeScript.forName("Malayalam"));
+ assertSame(Character.UnicodeScript.MALAYALAM, Character.UnicodeScript.forName("MalayaLAM"));
+ assertSame(Character.UnicodeScript.MALAYALAM, Character.UnicodeScript.forName("Mlym"));
+ assertSame(Character.UnicodeScript.MALAYALAM, Character.UnicodeScript.forName("MlYM"));
+
+ assertSame(Character.UnicodeScript.OLD_SOUTH_ARABIAN, Character.UnicodeScript.forName("Old_south_arabian"));
+
+ // NOTE: This test fails on the RI because they're much stricter in
+ // their matching. Strict enough that they fail on "Old south arabian", despite
+ // it being the official name AND the alias for this script.
+ assertSame(Character.UnicodeScript.OLD_SOUTH_ARABIAN, Character.UnicodeScript.forName("Old south arabian"));
+ assertSame(Character.UnicodeScript.OLD_SOUTH_ARABIAN, Character.UnicodeScript.forName("SARB"));
+
+ // A script that's recognized by ICU but not a part of the standard
+ // java script values.
+ try {
+ Character.UnicodeScript.forName("Old north arabian");
+ fail();
+ } catch (IllegalArgumentException expected) {
+ }
+ }
+
+ public void test_UnicodeScript_of() throws Exception {
+ try {
+ Character.UnicodeScript.of(-1);
+ fail();
+ } catch (IllegalArgumentException expected) {
+ }
+
+ try {
+ Character.UnicodeScript.of(0xffffff);
+ fail();
+ } catch (IllegalArgumentException expected) {
+ }
+
+ // The example from the ICU4C unit tests.
+ assertSame(Character.UnicodeScript.MALAYALAM, Character.UnicodeScript.of(0x0D02));
+
+ // Special cases:
+ //
+ // 0640 is the ARABIC_TATWEEL, used by both Mandiac & Syriac
+ assertSame(Character.UnicodeScript.COMMON, Character.UnicodeScript.of(0x0640));
+ // 0300 is the COMBINING GRAVE ACCENT, which should be INHERITED because it's
+ // a nonspacing mark.
+ assertSame(Character.UnicodeScript.INHERITED, Character.UnicodeScript.of(0x0300));
+ assertSame(Character.UnicodeScript.COMMON, Character.UnicodeScript.of(0x0640));
+ }
}