diff options
author | Elliott Hughes <enh@google.com> | 2010-04-12 18:32:50 -0700 |
---|---|---|
committer | Elliott Hughes <enh@google.com> | 2010-04-13 10:56:37 -0700 |
commit | 9de899cc3ffd3aa3f8f827201cbe14120609018b (patch) | |
tree | 0c81bf39b8c8e1d393c31a9e7e68a4de7e4c9fe7 | |
parent | 4a6cd08d55ec407dea29586cc917f8a423f5645f (diff) | |
download | libcore-9de899cc3ffd3aa3f8f827201cbe14120609018b.zip libcore-9de899cc3ffd3aa3f8f827201cbe14120609018b.tar.gz libcore-9de899cc3ffd3aa3f8f827201cbe14120609018b.tar.bz2 |
Fix String.toLowerCase and toUpperCase.
Rather than try to cope with Lithuanian, let's just hand that one to ICU4C.
I've removed my hand-crafted Azeri/Turkish lowercasing too, in favor of ICU.
Presence of a high surrogate (which implies a supplemental character) is a
good reason to hand over to ICU too.
On the uppercasing side, I've kept our existing hard-coded table and just
added code to defer to ICU for Azeri, Lithuanian, and Turkish (plus
supplemental characters). I don't like the tables, but I don't have proof
that they're incorrect.
Bug: 2340628
Change-Id: I36b556b0444623a5aacc1afc58ebb4d84211d3dc
-rw-r--r-- | icu/src/main/java/com/ibm/icu4jni/lang/UCharacter.java | 2 | ||||
-rw-r--r-- | icu/src/main/native/NativeIDN.cpp | 2 | ||||
-rw-r--r-- | icu/src/main/native/Resources.cpp | 8 | ||||
-rw-r--r-- | icu/src/main/native/ScopedJavaUnicodeString.h | 10 | ||||
-rw-r--r-- | icu/src/main/native/UCharacter.cpp | 75 | ||||
-rw-r--r-- | luni/src/main/java/java/lang/CaseMapper.java | 121 | ||||
-rw-r--r-- | luni/src/main/java/java/lang/String.java | 180 |
7 files changed, 196 insertions, 202 deletions
diff --git a/icu/src/main/java/com/ibm/icu4jni/lang/UCharacter.java b/icu/src/main/java/com/ibm/icu4jni/lang/UCharacter.java index dc351f4..08fe26a 100644 --- a/icu/src/main/java/com/ibm/icu4jni/lang/UCharacter.java +++ b/icu/src/main/java/com/ibm/icu4jni/lang/UCharacter.java @@ -42,6 +42,8 @@ public final class UCharacter { public static native int toLowerCase(int codePoint); public static native int toTitleCase(int codePoint); public static native int toUpperCase(int codePoint); + public static native String toLowerCase(String s, String localeName); + public static native String toUpperCase(String s, String localeName); public static UnicodeBlock[] getBlockTable() { /** diff --git a/icu/src/main/native/NativeIDN.cpp b/icu/src/main/native/NativeIDN.cpp index 5ce3e94..72afc74 100644 --- a/icu/src/main/native/NativeIDN.cpp +++ b/icu/src/main/native/NativeIDN.cpp @@ -36,7 +36,7 @@ static jstring convertImpl(JNIEnv* env, jclass, jstring s, jint flags, jboolean const size_t srcLength = sus.unicodeString().length(); UChar dst[256]; UErrorCode status = U_ZERO_ERROR; - int32_t resultLength = toAscii + size_t resultLength = toAscii ? uidna_IDNToASCII(src, srcLength, &dst[0], sizeof(dst), flags, NULL, &status) : uidna_IDNToUnicode(src, srcLength, &dst[0], sizeof(dst), flags, NULL, &status); if (U_FAILURE(status)) { diff --git a/icu/src/main/native/Resources.cpp b/icu/src/main/native/Resources.cpp index e4138b1..ba363fe 100644 --- a/icu/src/main/native/Resources.cpp +++ b/icu/src/main/native/Resources.cpp @@ -17,6 +17,7 @@ #define LOG_TAG "Resources" #include "JNIHelp.h" #include "AndroidSystemNatives.h" +#include "ScopedUtfChars.h" #include "cutils/log.h" #include "unicode/numfmt.h" #include "unicode/locid.h" @@ -67,11 +68,8 @@ private: void operator=(const ScopedResourceBundle&); }; -static Locale getLocale(JNIEnv* env, jstring locale) { - const char* name = env->GetStringUTFChars(locale, NULL); - Locale result = Locale::createFromName(name); - env->ReleaseStringUTFChars(locale, name); - return result; +static Locale getLocale(JNIEnv* env, jstring localeName) { + return Locale::createFromName(ScopedUtfChars(env, localeName).data()); } static jint getCurrencyFractionDigitsNative(JNIEnv* env, jclass clazz, jstring currencyCode) { diff --git a/icu/src/main/native/ScopedJavaUnicodeString.h b/icu/src/main/native/ScopedJavaUnicodeString.h index 69726fb..b108a6b 100644 --- a/icu/src/main/native/ScopedJavaUnicodeString.h +++ b/icu/src/main/native/ScopedJavaUnicodeString.h @@ -22,9 +22,7 @@ // A smart pointer that provides access to an ICU UnicodeString given a JNI // jstring. We give ICU a direct pointer to the characters on the Java heap. -// It's clever enough to copy-on-write if necessary, but we only provide -// const UnicodeString access anyway because attempted write access seems -// likely to be an error. +// It's clever enough to copy-on-write if necessary. class ScopedJavaUnicodeString { public: ScopedJavaUnicodeString(JNIEnv* env, jstring s) : mEnv(env), mString(s) { @@ -37,7 +35,11 @@ public: mEnv->ReleaseStringChars(mString, mChars); } - const UnicodeString& unicodeString() { + const UnicodeString& unicodeString() const { + return mUnicodeString; + } + + UnicodeString& unicodeString() { return mUnicodeString; } diff --git a/icu/src/main/native/UCharacter.cpp b/icu/src/main/native/UCharacter.cpp index 3fd8151..9856a1a 100644 --- a/icu/src/main/native/UCharacter.cpp +++ b/icu/src/main/native/UCharacter.cpp @@ -1,12 +1,12 @@ /* * Copyright (C) 2006 The Android Open Source Project - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,6 +16,9 @@ #include "JNIHelp.h" #include "AndroidSystemNatives.h" +#include "ScopedJavaUnicodeString.h" +#include "ScopedUtfChars.h" +#include "unicode/locid.h" #include "unicode/uchar.h" #include <math.h> #include <stdlib.h> @@ -37,12 +40,12 @@ static jboolean isMirroredImpl(JNIEnv*, jclass, jint codePoint) { } static jint getNumericValueImpl(JNIEnv*, jclass, jint codePoint){ - // The letters A-Z in their uppercase ('\u0041' through '\u005A'), - // lowercase ('\u0061' through '\u007A'), - // and full width variant ('\uFF21' through '\uFF3A' - // and '\uFF41' through '\uFF5A') forms - // have numeric values from 10 through 35. This is independent of the - // Unicode specification, which does not assign numeric values to these + // The letters A-Z in their uppercase ('\u0041' through '\u005A'), + // lowercase ('\u0061' through '\u007A'), + // and full width variant ('\uFF21' through '\uFF3A' + // and '\uFF41' through '\uFF5A') forms + // have numeric values from 10 through 35. This is independent of the + // Unicode specification, which does not assign numeric values to these // char values. if (codePoint >= 0x41 && codePoint <= 0x5A) { return codePoint - 0x37; @@ -66,15 +69,15 @@ static jint getNumericValueImpl(JNIEnv*, jclass, jint codePoint){ } return result; -} - +} + static jboolean isDefinedImpl(JNIEnv*, jclass, jint codePoint) { return u_isdefined(codePoint); -} +} static jboolean isDigitImpl(JNIEnv*, jclass, jint codePoint) { return u_isdigit(codePoint); -} +} static jboolean isIdentifierIgnorableImpl(JNIEnv*, jclass, jint codePoint) { // Java also returns TRUE for U+0085 Next Line (it omits U+0085 from whitespace ISO controls) @@ -82,31 +85,31 @@ static jboolean isIdentifierIgnorableImpl(JNIEnv*, jclass, jint codePoint) { return JNI_TRUE; } return u_isIDIgnorable(codePoint); -} +} static jboolean isLetterImpl(JNIEnv*, jclass, jint codePoint) { return u_isalpha(codePoint); -} +} static jboolean isLetterOrDigitImpl(JNIEnv*, jclass, jint codePoint) { return u_isalnum(codePoint); -} +} static jboolean isSpaceCharImpl(JNIEnv*, jclass, jint codePoint) { return u_isJavaSpaceChar(codePoint); -} +} static jboolean isTitleCaseImpl(JNIEnv*, jclass, jint codePoint) { return u_istitle(codePoint); -} +} static jboolean isUnicodeIdentifierPartImpl(JNIEnv*, jclass, jint codePoint) { return u_isIDPart(codePoint); -} +} static jboolean isUnicodeIdentifierStartImpl(JNIEnv*, jclass, jint codePoint) { return u_isIDStart(codePoint); -} +} static jboolean isWhitespaceImpl(JNIEnv*, jclass, jint codePoint) { // Java omits U+0085 @@ -114,27 +117,43 @@ static jboolean isWhitespaceImpl(JNIEnv*, jclass, jint codePoint) { return JNI_FALSE; } return u_isWhitespace(codePoint); -} +} static jint toLowerCaseImpl(JNIEnv*, jclass, jint codePoint) { return u_tolower(codePoint); -} +} static jint toTitleCaseImpl(JNIEnv*, jclass, jint codePoint) { return u_totitle(codePoint); -} +} static jint toUpperCaseImpl(JNIEnv*, jclass, jint codePoint) { return u_toupper(codePoint); -} +} + +static jstring toLowerCaseStringImpl(JNIEnv* env, jclass, jstring javaString, jstring localeName) { + ScopedJavaUnicodeString scopedString(env, javaString); + UnicodeString& s(scopedString.unicodeString()); + UnicodeString original(s); + s.toLower(Locale::createFromName(ScopedUtfChars(env, localeName).data())); + return s == original ? javaString : env->NewString(s.getBuffer(), s.length()); +} + +static jstring toUpperCaseStringImpl(JNIEnv* env, jclass, jstring javaString, jstring localeName) { + ScopedJavaUnicodeString scopedString(env, javaString); + UnicodeString& s(scopedString.unicodeString()); + UnicodeString original(s); + s.toUpper(Locale::createFromName(ScopedUtfChars(env, localeName).data())); + return s == original ? javaString : env->NewString(s.getBuffer(), s.length()); +} static jboolean isUpperCaseImpl(JNIEnv*, jclass, jint codePoint) { return u_isupper(codePoint); -} +} static jboolean isLowerCaseImpl(JNIEnv*, jclass, jint codePoint) { return u_islower(codePoint); -} +} static int forNameImpl(JNIEnv* env, jclass, jstring blockName) { if (blockName == NULL) { @@ -178,7 +197,9 @@ static JNINativeMethod gMethods[] = { { "toLowerCase", "(I)I", (void*) toLowerCaseImpl }, { "toTitleCase", "(I)I", (void*) toTitleCaseImpl }, { "toUpperCase", "(I)I", (void*) toUpperCaseImpl }, -}; + { "toLowerCase", "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;", (void*) toLowerCaseStringImpl }, + { "toUpperCase", "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;", (void*) toUpperCaseStringImpl }, +}; int register_com_ibm_icu4jni_lang_UCharacter(JNIEnv* env) { return jniRegisterNativeMethods(env, "com/ibm/icu4jni/lang/UCharacter", diff --git a/luni/src/main/java/java/lang/CaseMapper.java b/luni/src/main/java/java/lang/CaseMapper.java index c74bda0..f2f5ac8 100644 --- a/luni/src/main/java/java/lang/CaseMapper.java +++ b/luni/src/main/java/java/lang/CaseMapper.java @@ -16,18 +16,17 @@ package java.lang; +import com.ibm.icu4jni.lang.UCharacter; import java.util.Locale; /** * Performs case operations as described by http://unicode.org/reports/tr21/tr21-5.html. */ class CaseMapper { - // Intention-revealing constants for various important characters. - private static final char LATIN_CAPITAL_I = 'I'; - private static final char LATIN_SMALL_I = 'i'; + private static final char[] upperValues = "SS\u0000\u02bcN\u0000J\u030c\u0000\u0399\u0308\u0301\u03a5\u0308\u0301\u0535\u0552\u0000H\u0331\u0000T\u0308\u0000W\u030a\u0000Y\u030a\u0000A\u02be\u0000\u03a5\u0313\u0000\u03a5\u0313\u0300\u03a5\u0313\u0301\u03a5\u0313\u0342\u1f08\u0399\u0000\u1f09\u0399\u0000\u1f0a\u0399\u0000\u1f0b\u0399\u0000\u1f0c\u0399\u0000\u1f0d\u0399\u0000\u1f0e\u0399\u0000\u1f0f\u0399\u0000\u1f08\u0399\u0000\u1f09\u0399\u0000\u1f0a\u0399\u0000\u1f0b\u0399\u0000\u1f0c\u0399\u0000\u1f0d\u0399\u0000\u1f0e\u0399\u0000\u1f0f\u0399\u0000\u1f28\u0399\u0000\u1f29\u0399\u0000\u1f2a\u0399\u0000\u1f2b\u0399\u0000\u1f2c\u0399\u0000\u1f2d\u0399\u0000\u1f2e\u0399\u0000\u1f2f\u0399\u0000\u1f28\u0399\u0000\u1f29\u0399\u0000\u1f2a\u0399\u0000\u1f2b\u0399\u0000\u1f2c\u0399\u0000\u1f2d\u0399\u0000\u1f2e\u0399\u0000\u1f2f\u0399\u0000\u1f68\u0399\u0000\u1f69\u0399\u0000\u1f6a\u0399\u0000\u1f6b\u0399\u0000\u1f6c\u0399\u0000\u1f6d\u0399\u0000\u1f6e\u0399\u0000\u1f6f\u0399\u0000\u1f68\u0399\u0000\u1f69\u0399\u0000\u1f6a\u0399\u0000\u1f6b\u0399\u0000\u1f6c\u0399\u0000\u1f6d\u0399\u0000\u1f6e\u0399\u0000\u1f6f\u0399\u0000\u1fba\u0399\u0000\u0391\u0399\u0000\u0386\u0399\u0000\u0391\u0342\u0000\u0391\u0342\u0399\u0391\u0399\u0000\u1fca\u0399\u0000\u0397\u0399\u0000\u0389\u0399\u0000\u0397\u0342\u0000\u0397\u0342\u0399\u0397\u0399\u0000\u0399\u0308\u0300\u0399\u0308\u0301\u0399\u0342\u0000\u0399\u0308\u0342\u03a5\u0308\u0300\u03a5\u0308\u0301\u03a1\u0313\u0000\u03a5\u0342\u0000\u03a5\u0308\u0342\u1ffa\u0399\u0000\u03a9\u0399\u0000\u038f\u0399\u0000\u03a9\u0342\u0000\u03a9\u0342\u0399\u03a9\u0399\u0000FF\u0000FI\u0000FL\u0000FFIFFLST\u0000ST\u0000\u0544\u0546\u0000\u0544\u0535\u0000\u0544\u053b\u0000\u054e\u0546\u0000\u0544\u053d\u0000".toCharArray(); + private static final char[] upperValues2 = "\u000b\u0000\f\u0000\r\u0000\u000e\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>\u0000\u0000?@A\u0000BC\u0000\u0000\u0000\u0000D\u0000\u0000\u0000\u0000\u0000EFG\u0000HI\u0000\u0000\u0000\u0000J\u0000\u0000\u0000\u0000\u0000KL\u0000\u0000MN\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000OPQ\u0000RS\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000TUV\u0000WX\u0000\u0000\u0000\u0000Y".toCharArray(); + private static final char LATIN_CAPITAL_I_WITH_DOT = '\u0130'; - private static final char LATIN_SMALL_DOTLESS_I = '\u0131'; - private static final char COMBINING_DOT_ABOVE = '\u0307'; private static final char GREEK_CAPITAL_SIGMA = '\u03a3'; private static final char GREEK_SMALL_FINAL_SIGMA = '\u03c2'; @@ -45,20 +44,20 @@ class CaseMapper { * accessible. */ public static String toLowerCase(Locale locale, String s, char[] value, int offset, int count) { + // Punt hard cases to ICU4C. String languageCode = locale.getLanguage(); - boolean turkishOrAzeri = languageCode.equals("tr") || languageCode.equals("az"); + if (languageCode.equals("tr") || languageCode.equals("az") || languageCode.equals("lt")) { + return UCharacter.toLowerCase(s, locale.toString()); + } char[] newValue = null; int newCount = 0; for (int i = offset, end = offset + count; i < end; ++i) { char ch = value[i]; char newCh = ch; - if (turkishOrAzeri && ch == LATIN_CAPITAL_I_WITH_DOT) { - newCh = LATIN_SMALL_I; - } else if (turkishOrAzeri && ch == LATIN_CAPITAL_I && !followedBy(value, offset, count, i, COMBINING_DOT_ABOVE)) { - newCh = LATIN_SMALL_DOTLESS_I; - } else if (turkishOrAzeri && ch == COMBINING_DOT_ABOVE && precededBy(value, offset, count, i, LATIN_CAPITAL_I)) { - continue; // (We've already converted the preceding I, so we don't need to create newValue.) + if (ch == LATIN_CAPITAL_I_WITH_DOT || Character.isHighSurrogate(ch)) { + // Punt these hard cases. + return UCharacter.toLowerCase(s, locale.toString()); } else if (ch == GREEK_CAPITAL_SIGMA && isFinalSigma(value, offset, count, i)) { newCh = GREEK_SMALL_FINAL_SIGMA; } else { @@ -108,4 +107,102 @@ class CaseMapper { } return true; } + + /** + * Return the index of the specified character into the upperValues table. + * The upperValues table contains three entries at each position. These + * three characters are the upper case conversion. If only two characters + * are used, the third character in the table is \u0000. + * @return the index into the upperValues table, or -1 + */ + private static int upperIndex(int ch) { + int index = -1; + if (ch >= 0xdf) { + if (ch <= 0x587) { + switch (ch) { + case 0xdf: return 0; + case 0x149: return 1; + case 0x1f0: return 2; + case 0x390: return 3; + case 0x3b0: return 4; + case 0x587: return 5; + } + } else if (ch >= 0x1e96) { + if (ch <= 0x1e9a) { + index = 6 + ch - 0x1e96; + } else if (ch >= 0x1f50 && ch <= 0x1ffc) { + index = upperValues2[ch - 0x1f50]; + if (index == 0) { + index = -1; + } + } else if (ch >= 0xfb00) { + if (ch <= 0xfb06) { + index = 90 + ch - 0xfb00; + } else if (ch >= 0xfb13 && ch <= 0xfb17) { + index = 97 + ch - 0xfb13; + } + } + } + } + return index; + } + + public static String toUpperCase(Locale locale, String s, char[] value, int offset, int count) { + String languageCode = locale.getLanguage(); + if (languageCode.equals("tr") || languageCode.equals("az") || languageCode.equals("lt")) { + return UCharacter.toUpperCase(s, locale.toString()); + } + + char[] output = null; + int i = 0; + for (int o = offset, end = offset + count; o < end; o++) { + char ch = value[o]; + if (Character.isHighSurrogate(ch)) { + return UCharacter.toUpperCase(s, locale.toString()); + } + int index = upperIndex(ch); + if (index == -1) { + if (output != null && i >= output.length) { + char[] newoutput = new char[output.length + (count / 6) + 2]; + System.arraycopy(output, 0, newoutput, 0, output.length); + output = newoutput; + } + char upch = Character.toUpperCase(ch); + if (ch != upch) { + if (output == null) { + output = new char[count]; + i = o - offset; + System.arraycopy(value, offset, output, 0, i); + } + output[i++] = upch; + } else if (output != null) { + output[i++] = ch; + } + } else { + int target = index * 3; + char val3 = upperValues[target + 2]; + if (output == null) { + output = new char[count + (count / 6) + 2]; + i = o - offset; + System.arraycopy(value, offset, output, 0, i); + } else if (i + (val3 == 0 ? 1 : 2) >= output.length) { + char[] newoutput = new char[output.length + (count / 6) + 3]; + System.arraycopy(output, 0, newoutput, 0, output.length); + output = newoutput; + } + + char val = upperValues[target]; + output[i++] = val; + val = upperValues[target + 1]; + output[i++] = val; + if (val3 != 0) { + output[i++] = val3; + } + } + } + if (output == null) { + return s; + } + return output.length == i || output.length - i < 8 ? new String(0, i, output) : new String(output, 0, i); + } } diff --git a/luni/src/main/java/java/lang/String.java b/luni/src/main/java/java/lang/String.java index 7de8094..495cc63 100644 --- a/luni/src/main/java/java/lang/String.java +++ b/luni/src/main/java/java/lang/String.java @@ -44,18 +44,11 @@ import org.apache.harmony.luni.util.PriviAction; * @see Charset * @since 1.0 */ -public final class String implements Serializable, Comparable<String>, - CharSequence { +public final class String implements Serializable, Comparable<String>, CharSequence { private static final long serialVersionUID = -6849794470754667710L; - // BEGIN android-added private static final char REPLACEMENT_CHAR = (char) 0xfffd; - // END android-added - - // BEGIN android-removed - // static class ConsolePrintStream extends java.io.PrintStream ... - // END android-removed /** * CaseInsensitiveComparator compares Strings ignoring the case of the @@ -394,7 +387,7 @@ public final class String implements Serializable, Comparable<String>, } catch (Exception e) { // do nothing. according to spec: // behavior is unspecified for invalid array - cb = CharBuffer.wrap("\u003f".toCharArray()); //$NON-NLS-1$ + cb = CharBuffer.wrap("\u003f".toCharArray()); } if ((result = cb.length()) > 0) { value = cb.array(); @@ -688,7 +681,7 @@ public final class String implements Serializable, Comparable<String>, @SuppressWarnings("unused") private String(String s1, int v1) { if (s1 == null) { - s1 = "null"; //$NON-NLS-1$ + s1 = "null"; } String s2 = String.valueOf(v1); int len = s1.count + s2.count; @@ -856,8 +849,7 @@ public final class String implements Serializable, Comparable<String>, private Charset defaultCharset() { if (DefaultCharset == null) { String encoding = AccessController - .doPrivileged(new PriviAction<String>( - "file.encoding", "ISO8859_1")); //$NON-NLS-1$ //$NON-NLS-2$ + .doPrivileged(new PriviAction<String>("file.encoding", "ISO8859_1")); // calling System.getProperty() may cause DefaultCharset to be // initialized try { @@ -869,7 +861,7 @@ public final class String implements Serializable, Comparable<String>, } if (DefaultCharset == null) { - DefaultCharset = Charset.forName("ISO-8859-1"); //$NON-NLS-1$ + DefaultCharset = Charset.forName("ISO-8859-1"); } } return DefaultCharset; @@ -1713,18 +1705,16 @@ public final class String implements Serializable, Comparable<String>, } /** - * Converts this string to lowercase, using the rules of the specified locale. - * <p> - * Most case mappings are unaffected by the language of a {@code Locale}. Exceptions include + * Converts this string to lowercase, using the rules of {@code locale}. + * + * <p>Most case mappings are unaffected by the language of a {@code Locale}. Exceptions include * dotted and dotless I in Azeri and Turkish locales, and dotted and dotless I and J in - * Lithuanian locales. On the other hand, it isn't necessary to provide, a Greek locale to get + * Lithuanian locales. On the other hand, it isn't necessary to provide a Greek locale to get * correct case mapping of Greek characters: any locale will do. - * <p> - * See <a href="http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt">http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt</a> + * + * <p>See <a href="http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt">http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt</a> * for full details of context- and language-specific special cases. - * - * @param locale - * the Locale to use. + * * @return a new lowercase string, or {@code this} if it's already all-lowercase. */ public String toLowerCase(Locale locale) { @@ -1742,145 +1732,29 @@ public final class String implements Serializable, Comparable<String>, } /** - * Converts the characters in this string to uppercase, using the default - * Locale. + * Converts this this string to uppercase, using the rules of the default locale. * - * @return a new string containing the uppercase characters equivalent to - * the characters in this string. + * @return a new uppercase string, or {@code this} if it's already all-uppercase. */ public String toUpperCase() { - return toUpperCase(Locale.getDefault()); + return CaseMapper.toUpperCase(Locale.getDefault(), this, value, offset, count); } - // BEGIN android-note - // put this in a helper class so that it's only initialized on demand? - // END android-note - private static final char[] upperValues = "SS\u0000\u02bcN\u0000J\u030c\u0000\u0399\u0308\u0301\u03a5\u0308\u0301\u0535\u0552\u0000H\u0331\u0000T\u0308\u0000W\u030a\u0000Y\u030a\u0000A\u02be\u0000\u03a5\u0313\u0000\u03a5\u0313\u0300\u03a5\u0313\u0301\u03a5\u0313\u0342\u1f08\u0399\u0000\u1f09\u0399\u0000\u1f0a\u0399\u0000\u1f0b\u0399\u0000\u1f0c\u0399\u0000\u1f0d\u0399\u0000\u1f0e\u0399\u0000\u1f0f\u0399\u0000\u1f08\u0399\u0000\u1f09\u0399\u0000\u1f0a\u0399\u0000\u1f0b\u0399\u0000\u1f0c\u0399\u0000\u1f0d\u0399\u0000\u1f0e\u0399\u0000\u1f0f\u0399\u0000\u1f28\u0399\u0000\u1f29\u0399\u0000\u1f2a\u0399\u0000\u1f2b\u0399\u0000\u1f2c\u0399\u0000\u1f2d\u0399\u0000\u1f2e\u0399\u0000\u1f2f\u0399\u0000\u1f28\u0399\u0000\u1f29\u0399\u0000\u1f2a\u0399\u0000\u1f2b\u0399\u0000\u1f2c\u0399\u0000\u1f2d\u0399\u0000\u1f2e\u0399\u0000\u1f2f\u0399\u0000\u1f68\u0399\u0000\u1f69\u0399\u0000\u1f6a\u0399\u0000\u1f6b\u0399\u0000\u1f6c\u0399\u0000\u1f6d\u0399\u0000\u1f6e\u0399\u0000\u1f6f\u0399\u0000\u1f68\u0399\u0000\u1f69\u0399\u0000\u1f6a\u0399\u0000\u1f6b\u0399\u0000\u1f6c\u0399\u0000\u1f6d\u0399\u0000\u1f6e\u0399\u0000\u1f6f\u0399\u0000\u1fba\u0399\u0000\u0391\u0399\u0000\u0386\u0399\u0000\u0391\u0342\u0000\u0391\u0342\u0399\u0391\u0399\u0000\u1fca\u0399\u0000\u0397\u0399\u0000\u0389\u0399\u0000\u0397\u0342\u0000\u0397\u0342\u0399\u0397\u0399\u0000\u0399\u0308\u0300\u0399\u0308\u0301\u0399\u0342\u0000\u0399\u0308\u0342\u03a5\u0308\u0300\u03a5\u0308\u0301\u03a1\u0313\u0000\u03a5\u0342\u0000\u03a5\u0308\u0342\u1ffa\u0399\u0000\u03a9\u0399\u0000\u038f\u0399\u0000\u03a9\u0342\u0000\u03a9\u0342\u0399\u03a9\u0399\u0000FF\u0000FI\u0000FL\u0000FFIFFLST\u0000ST\u0000\u0544\u0546\u0000\u0544\u0535\u0000\u0544\u053b\u0000\u054e\u0546\u0000\u0544\u053d\u0000".value; //$NON-NLS-1$ - /** - * Return the index of the specified character into the upperValues table. - * The upperValues table contains three entries at each position. These - * three characters are the upper case conversion. If only two characters - * are used, the third character in the table is \u0000. + * Converts this this string to uppercase, using the rules of {@code locale}. * - * @param ch - * the char being converted to upper case + * <p>Most case mappings are unaffected by the language of a {@code Locale}. Exceptions include + * dotted and dotless I in Azeri and Turkish locales, and dotted and dotless I and J in + * Lithuanian locales. On the other hand, it isn't necessary to provide a Greek locale to get + * correct case mapping of Greek characters: any locale will do. * - * @return the index into the upperValues table, or -1 - */ - private int upperIndex(int ch) { - int index = -1; - if (ch >= 0xdf) { - if (ch <= 0x587) { - if (ch == 0xdf) { - index = 0; - } else if (ch <= 0x149) { - if (ch == 0x149) { - index = 1; - } - } else if (ch <= 0x1f0) { - if (ch == 0x1f0) { - index = 2; - } - } else if (ch <= 0x390) { - if (ch == 0x390) { - index = 3; - } - } else if (ch <= 0x3b0) { - if (ch == 0x3b0) { - index = 4; - } - } else if (ch <= 0x587) { - if (ch == 0x587) { - index = 5; - } - } - } else if (ch >= 0x1e96) { - if (ch <= 0x1e9a) { - index = 6 + ch - 0x1e96; - } else if (ch >= 0x1f50 && ch <= 0x1ffc) { - index = "\u000b\u0000\f\u0000\r\u0000\u000e\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>\u0000\u0000?@A\u0000BC\u0000\u0000\u0000\u0000D\u0000\u0000\u0000\u0000\u0000EFG\u0000HI\u0000\u0000\u0000\u0000J\u0000\u0000\u0000\u0000\u0000KL\u0000\u0000MN\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000OPQ\u0000RS\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000TUV\u0000WX\u0000\u0000\u0000\u0000Y".value[ch - 0x1f50]; //$NON-NLS-1$ - if (index == 0) { - index = -1; - } - } else if (ch >= 0xfb00) { - if (ch <= 0xfb06) { - index = 90 + ch - 0xfb00; - } else if (ch >= 0xfb13 && ch <= 0xfb17) { - index = 97 + ch - 0xfb13; - } - } - } - } - return index; - } - - /** - * Converts the characters in this string to uppercase, using the specified - * Locale. - * - * @param locale - * the Locale to use. - * @return a new string containing the uppercase characters equivalent to - * the characters in this string. + * <p>See <a href="http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt">http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt</a> + * for full details of context- and language-specific special cases. + * + * @return a new uppercase string, or {@code this} if it's already all-uppercase. */ public String toUpperCase(Locale locale) { - // BEGIN android-changed: support Azeri. - String languageCode = locale.getLanguage(); - boolean turkishOrAzeri = languageCode.equals("tr") || languageCode.equals("az"); - - char[] output = null; - int i = 0; - for (int o = offset, end = offset + count; o < end; o++) { - char ch = value[o]; - int index = upperIndex(ch); - if (index == -1) { - if (output != null && i >= output.length) { - char[] newoutput = new char[output.length + (count / 6) + 2]; - System.arraycopy(output, 0, newoutput, 0, output.length); - output = newoutput; - } - char upch = !turkishOrAzeri ? Character.toUpperCase(ch) - : (ch != 0x69 ? Character.toUpperCase(ch) - : (char) 0x130); - if (ch != upch) { - if (output == null) { - output = new char[count]; - i = o - offset; - System.arraycopy(value, offset, output, 0, i); - } - output[i++] = upch; - } else if (output != null) { - output[i++] = ch; - } - } else { - int target = index * 3; - char val3 = upperValues[target + 2]; - if (output == null) { - output = new char[count + (count / 6) + 2]; - i = o - offset; - System.arraycopy(value, offset, output, 0, i); - } else if (i + (val3 == 0 ? 1 : 2) >= output.length) { - char[] newoutput = new char[output.length + (count / 6) + 3]; - System.arraycopy(output, 0, newoutput, 0, output.length); - output = newoutput; - } - - char val = upperValues[target]; - output[i++] = val; - val = upperValues[target + 1]; - output[i++] = val; - if (val3 != 0) { - output[i++] = val3; - } - } - } - if (output == null) { - return this; - } - return output.length == i || output.length - i < 8 ? new String(0, i, - output) : new String(output, 0, i); - // END android-changed + return CaseMapper.toUpperCase(locale, this, value, offset, count); } /** @@ -2014,7 +1888,7 @@ public final class String implements Serializable, Comparable<String>, * @return the object converted to a string, or the string {@code "null"}. */ public static String valueOf(Object value) { - return value != null ? value.toString() : "null"; //$NON-NLS-1$ + return value != null ? value.toString() : "null"; } /** @@ -2027,7 +1901,7 @@ public final class String implements Serializable, Comparable<String>, * @return the boolean converted to a string. */ public static String valueOf(boolean value) { - return value ? "true" : "false"; //$NON-NLS-1$ //$NON-NLS-2$ + return value ? "true" : "false"; } /** |