Fix String.toLowerCase and toUpperCase.

Rather than try to cope with Lithuanian, let's just hand that one to ICU4C. I've removed my hand-crafted Azeri/Turkish lowercasing too, in favor of ICU. Presence of a high surrogate (which implies a supplemental character) is a good reason to hand over to ICU too. On the uppercasing side, I've kept our existing hard-coded table and just added code to defer to ICU for Azeri, Lithuanian, and Turkish (plus supplemental characters). I don't like the tables, but I don't have proof that they're incorrect. Bug: 2340628 Change-Id: I36b556b0444623a5aacc1afc58ebb4d84211d3dc
author: Elliott Hughes <enh@google.com> 2010-04-12 18:32:50 -0700
committer: Elliott Hughes <enh@google.com> 2010-04-13 10:56:37 -0700
commit: 9de899cc3ffd3aa3f8f827201cbe14120609018b (patch)
tree: 0c81bf39b8c8e1d393c31a9e7e68a4de7e4c9fe7 /icu/src/main
parent: 4a6cd08d55ec407dea29586cc917f8a423f5645f (diff)
download: libcore-9de899cc3ffd3aa3f8f827201cbe14120609018b.zip
libcore-9de899cc3ffd3aa3f8f827201cbe14120609018b.tar.gz
libcore-9de899cc3ffd3aa3f8f827201cbe14120609018b.tar.bz2
5 files changed, 60 insertions, 37 deletions
diff --git a/icu/src/main/java/com/ibm/icu4jni/lang/UCharacter.java b/icu/src/main/java/com/ibm/icu4jni/lang/UCharacter.java
index dc351f4..08fe26a 100644
--- a/icu/src/main/java/com/ibm/icu4jni/lang/UCharacter.java
+++ b/icu/src/main/java/com/ibm/icu4jni/lang/UCharacter.java
@@ -42,6 +42,8 @@ public final class UCharacter {
     public static native int toLowerCase(int codePoint);
     public static native int toTitleCase(int codePoint);
     public static native int toUpperCase(int codePoint);
+    public static native String toLowerCase(String s, String localeName);
+    public static native String toUpperCase(String s, String localeName);
 
     public static UnicodeBlock[] getBlockTable() {
         /**
diff --git a/icu/src/main/native/NativeIDN.cpp b/icu/src/main/native/NativeIDN.cpp
index 5ce3e94..72afc74 100644
--- a/icu/src/main/native/NativeIDN.cpp
+++ b/icu/src/main/native/NativeIDN.cpp
@@ -36,7 +36,7 @@ static jstring convertImpl(JNIEnv* env, jclass, jstring s, jint flags, jboolean
     const size_t srcLength = sus.unicodeString().length();
     UChar dst[256];
     UErrorCode status = U_ZERO_ERROR;
-    int32_t resultLength = toAscii
+    size_t resultLength = toAscii
             ? uidna_IDNToASCII(src, srcLength, &dst[0], sizeof(dst), flags, NULL, &status)
             : uidna_IDNToUnicode(src, srcLength, &dst[0], sizeof(dst), flags, NULL, &status);
     if (U_FAILURE(status)) {
diff --git a/icu/src/main/native/Resources.cpp b/icu/src/main/native/Resources.cpp
index e4138b1..ba363fe 100644
--- a/icu/src/main/native/Resources.cpp
+++ b/icu/src/main/native/Resources.cpp
@@ -17,6 +17,7 @@
 #define LOG_TAG "Resources"
 #include "JNIHelp.h"
 #include "AndroidSystemNatives.h"
+#include "ScopedUtfChars.h"
 #include "cutils/log.h"
 #include "unicode/numfmt.h"
 #include "unicode/locid.h"
@@ -67,11 +68,8 @@ private:
     void operator=(const ScopedResourceBundle&);
 };
 
-static Locale getLocale(JNIEnv* env, jstring locale) {
-    const char* name = env->GetStringUTFChars(locale, NULL);
-    Locale result = Locale::createFromName(name);
-    env->ReleaseStringUTFChars(locale, name);
-    return result;
+static Locale getLocale(JNIEnv* env, jstring localeName) {
+    return Locale::createFromName(ScopedUtfChars(env, localeName).data());
 }
 
 static jint getCurrencyFractionDigitsNative(JNIEnv* env, jclass clazz, jstring currencyCode) {
diff --git a/icu/src/main/native/ScopedJavaUnicodeString.h b/icu/src/main/native/ScopedJavaUnicodeString.h
index 69726fb..b108a6b 100644
--- a/icu/src/main/native/ScopedJavaUnicodeString.h
+++ b/icu/src/main/native/ScopedJavaUnicodeString.h
@@ -22,9 +22,7 @@
 
 // A smart pointer that provides access to an ICU UnicodeString given a JNI
 // jstring. We give ICU a direct pointer to the characters on the Java heap.
-// It's clever enough to copy-on-write if necessary, but we only provide
-// const UnicodeString access anyway because attempted write access seems
-// likely to be an error.
+// It's clever enough to copy-on-write if necessary.
 class ScopedJavaUnicodeString {
 public:
     ScopedJavaUnicodeString(JNIEnv* env, jstring s) : mEnv(env), mString(s) {
@@ -37,7 +35,11 @@ public:
         mEnv->ReleaseStringChars(mString, mChars);
     }
 
-    const UnicodeString& unicodeString() {
+    const UnicodeString& unicodeString() const {
+        return mUnicodeString;
+    }
+
+    UnicodeString& unicodeString() {
         return mUnicodeString;
     }
 
diff --git a/icu/src/main/native/UCharacter.cpp b/icu/src/main/native/UCharacter.cpp
index 3fd8151..9856a1a 100644
--- a/icu/src/main/native/UCharacter.cpp
+++ b/icu/src/main/native/UCharacter.cpp
@@ -1,12 +1,12 @@
 /*
  * Copyright (C) 2006 The Android Open Source Project
- * 
+ *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
- * 
+ *
  *      http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -16,6 +16,9 @@
 
 #include "JNIHelp.h"
 #include "AndroidSystemNatives.h"
+#include "ScopedJavaUnicodeString.h"
+#include "ScopedUtfChars.h"
+#include "unicode/locid.h"
 #include "unicode/uchar.h"
 #include <math.h>
 #include <stdlib.h>
@@ -37,12 +40,12 @@ static jboolean isMirroredImpl(JNIEnv*, jclass, jint codePoint) {
 }
 
 static jint getNumericValueImpl(JNIEnv*, jclass, jint codePoint){
-    // The letters A-Z in their uppercase ('\u0041' through '\u005A'), 
-    //                          lowercase ('\u0061' through '\u007A'), 
-    //             and full width variant ('\uFF21' through '\uFF3A' 
-    //                                 and '\uFF41' through '\uFF5A') forms 
-    // have numeric values from 10 through 35. This is independent of the 
-    // Unicode specification, which does not assign numeric values to these 
+    // The letters A-Z in their uppercase ('\u0041' through '\u005A'),
+    //                          lowercase ('\u0061' through '\u007A'),
+    //             and full width variant ('\uFF21' through '\uFF3A'
+    //                                 and '\uFF41' through '\uFF5A') forms
+    // have numeric values from 10 through 35. This is independent of the
+    // Unicode specification, which does not assign numeric values to these
     // char values.
     if (codePoint >= 0x41 && codePoint <= 0x5A) {
         return codePoint - 0x37;
@@ -66,15 +69,15 @@ static jint getNumericValueImpl(JNIEnv*, jclass, jint codePoint){
     }
 
     return result;
-} 
-    
+}
+
 static jboolean isDefinedImpl(JNIEnv*, jclass, jint codePoint) {
     return u_isdefined(codePoint);
-} 
+}
 
 static jboolean isDigitImpl(JNIEnv*, jclass, jint codePoint) {
     return u_isdigit(codePoint);
-} 
+}
 
 static jboolean isIdentifierIgnorableImpl(JNIEnv*, jclass, jint codePoint) {
     // Java also returns TRUE for U+0085 Next Line (it omits U+0085 from whitespace ISO controls)
@@ -82,31 +85,31 @@ static jboolean isIdentifierIgnorableImpl(JNIEnv*, jclass, jint codePoint) {
         return JNI_TRUE;
     }
     return u_isIDIgnorable(codePoint);
-} 
+}
 
 static jboolean isLetterImpl(JNIEnv*, jclass, jint codePoint) {
     return u_isalpha(codePoint);
-} 
+}
 
 static jboolean isLetterOrDigitImpl(JNIEnv*, jclass, jint codePoint) {
     return u_isalnum(codePoint);
-} 
+}
 
 static jboolean isSpaceCharImpl(JNIEnv*, jclass, jint codePoint) {
     return u_isJavaSpaceChar(codePoint);
-} 
+}
 
 static jboolean isTitleCaseImpl(JNIEnv*, jclass, jint codePoint) {
     return u_istitle(codePoint);
-} 
+}
 
 static jboolean isUnicodeIdentifierPartImpl(JNIEnv*, jclass, jint codePoint) {
     return u_isIDPart(codePoint);
-} 
+}
 
 static jboolean isUnicodeIdentifierStartImpl(JNIEnv*, jclass, jint codePoint) {
     return u_isIDStart(codePoint);
-} 
+}
 
 static jboolean isWhitespaceImpl(JNIEnv*, jclass, jint codePoint) {
     // Java omits U+0085
@@ -114,27 +117,43 @@ static jboolean isWhitespaceImpl(JNIEnv*, jclass, jint codePoint) {
         return JNI_FALSE;
     }
     return u_isWhitespace(codePoint);
-} 
+}
 
 static jint toLowerCaseImpl(JNIEnv*, jclass, jint codePoint) {
     return u_tolower(codePoint);
-} 
+}
 
 static jint toTitleCaseImpl(JNIEnv*, jclass, jint codePoint) {
     return u_totitle(codePoint);
-} 
+}
 
 static jint toUpperCaseImpl(JNIEnv*, jclass, jint codePoint) {
     return u_toupper(codePoint);
-} 
+}
+
+static jstring toLowerCaseStringImpl(JNIEnv* env, jclass, jstring javaString, jstring localeName) {
+    ScopedJavaUnicodeString scopedString(env, javaString);
+    UnicodeString& s(scopedString.unicodeString());
+    UnicodeString original(s);
+    s.toLower(Locale::createFromName(ScopedUtfChars(env, localeName).data()));
+    return s == original ? javaString : env->NewString(s.getBuffer(), s.length());
+}
+
+static jstring toUpperCaseStringImpl(JNIEnv* env, jclass, jstring javaString, jstring localeName) {
+    ScopedJavaUnicodeString scopedString(env, javaString);
+    UnicodeString& s(scopedString.unicodeString());
+    UnicodeString original(s);
+    s.toUpper(Locale::createFromName(ScopedUtfChars(env, localeName).data()));
+    return s == original ? javaString : env->NewString(s.getBuffer(), s.length());
+}
 
 static jboolean isUpperCaseImpl(JNIEnv*, jclass, jint codePoint) {
     return u_isupper(codePoint);
-} 
+}
 
 static jboolean isLowerCaseImpl(JNIEnv*, jclass, jint codePoint) {
     return u_islower(codePoint);
-} 
+}
 
 static int forNameImpl(JNIEnv* env, jclass, jstring blockName) {
     if (blockName == NULL) {
@@ -178,7 +197,9 @@ static JNINativeMethod gMethods[] = {
     { "toLowerCase", "(I)I", (void*) toLowerCaseImpl },
     { "toTitleCase", "(I)I", (void*) toTitleCaseImpl },
     { "toUpperCase", "(I)I", (void*) toUpperCaseImpl },
-}; 
+    { "toLowerCase", "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;", (void*) toLowerCaseStringImpl },
+    { "toUpperCase", "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;", (void*) toUpperCaseStringImpl },
+};
 
 int register_com_ibm_icu4jni_lang_UCharacter(JNIEnv* env) {
     return jniRegisterNativeMethods(env, "com/ibm/icu4jni/lang/UCharacter",
author	Elliott Hughes <enh@google.com>	2010-04-12 18:32:50 -0700
committer	Elliott Hughes <enh@google.com>	2010-04-13 10:56:37 -0700
commit	9de899cc3ffd3aa3f8f827201cbe14120609018b (patch)
tree	0c81bf39b8c8e1d393c31a9e7e68a4de7e4c9fe7 /icu/src/main
parent	4a6cd08d55ec407dea29586cc917f8a423f5645f (diff)
download	libcore-9de899cc3ffd3aa3f8f827201cbe14120609018b.zip libcore-9de899cc3ffd3aa3f8f827201cbe14120609018b.tar.gz libcore-9de899cc3ffd3aa3f8f827201cbe14120609018b.tar.bz2