From 9de899cc3ffd3aa3f8f827201cbe14120609018b Mon Sep 17 00:00:00 2001 From: Elliott Hughes Date: Mon, 12 Apr 2010 18:32:50 -0700 Subject: Fix String.toLowerCase and toUpperCase. Rather than try to cope with Lithuanian, let's just hand that one to ICU4C. I've removed my hand-crafted Azeri/Turkish lowercasing too, in favor of ICU. Presence of a high surrogate (which implies a supplemental character) is a good reason to hand over to ICU too. On the uppercasing side, I've kept our existing hard-coded table and just added code to defer to ICU for Azeri, Lithuanian, and Turkish (plus supplemental characters). I don't like the tables, but I don't have proof that they're incorrect. Bug: 2340628 Change-Id: I36b556b0444623a5aacc1afc58ebb4d84211d3dc --- .../main/java/com/ibm/icu4jni/lang/UCharacter.java | 2 + icu/src/main/native/NativeIDN.cpp | 2 +- icu/src/main/native/Resources.cpp | 8 +-- icu/src/main/native/ScopedJavaUnicodeString.h | 10 +-- icu/src/main/native/UCharacter.cpp | 75 ++++++++++++++-------- 5 files changed, 60 insertions(+), 37 deletions(-) (limited to 'icu/src/main') diff --git a/icu/src/main/java/com/ibm/icu4jni/lang/UCharacter.java b/icu/src/main/java/com/ibm/icu4jni/lang/UCharacter.java index dc351f4..08fe26a 100644 --- a/icu/src/main/java/com/ibm/icu4jni/lang/UCharacter.java +++ b/icu/src/main/java/com/ibm/icu4jni/lang/UCharacter.java @@ -42,6 +42,8 @@ public final class UCharacter { public static native int toLowerCase(int codePoint); public static native int toTitleCase(int codePoint); public static native int toUpperCase(int codePoint); + public static native String toLowerCase(String s, String localeName); + public static native String toUpperCase(String s, String localeName); public static UnicodeBlock[] getBlockTable() { /** diff --git a/icu/src/main/native/NativeIDN.cpp b/icu/src/main/native/NativeIDN.cpp index 5ce3e94..72afc74 100644 --- a/icu/src/main/native/NativeIDN.cpp +++ b/icu/src/main/native/NativeIDN.cpp @@ -36,7 +36,7 @@ static jstring convertImpl(JNIEnv* env, jclass, jstring s, jint flags, jboolean const size_t srcLength = sus.unicodeString().length(); UChar dst[256]; UErrorCode status = U_ZERO_ERROR; - int32_t resultLength = toAscii + size_t resultLength = toAscii ? uidna_IDNToASCII(src, srcLength, &dst[0], sizeof(dst), flags, NULL, &status) : uidna_IDNToUnicode(src, srcLength, &dst[0], sizeof(dst), flags, NULL, &status); if (U_FAILURE(status)) { diff --git a/icu/src/main/native/Resources.cpp b/icu/src/main/native/Resources.cpp index e4138b1..ba363fe 100644 --- a/icu/src/main/native/Resources.cpp +++ b/icu/src/main/native/Resources.cpp @@ -17,6 +17,7 @@ #define LOG_TAG "Resources" #include "JNIHelp.h" #include "AndroidSystemNatives.h" +#include "ScopedUtfChars.h" #include "cutils/log.h" #include "unicode/numfmt.h" #include "unicode/locid.h" @@ -67,11 +68,8 @@ private: void operator=(const ScopedResourceBundle&); }; -static Locale getLocale(JNIEnv* env, jstring locale) { - const char* name = env->GetStringUTFChars(locale, NULL); - Locale result = Locale::createFromName(name); - env->ReleaseStringUTFChars(locale, name); - return result; +static Locale getLocale(JNIEnv* env, jstring localeName) { + return Locale::createFromName(ScopedUtfChars(env, localeName).data()); } static jint getCurrencyFractionDigitsNative(JNIEnv* env, jclass clazz, jstring currencyCode) { diff --git a/icu/src/main/native/ScopedJavaUnicodeString.h b/icu/src/main/native/ScopedJavaUnicodeString.h index 69726fb..b108a6b 100644 --- a/icu/src/main/native/ScopedJavaUnicodeString.h +++ b/icu/src/main/native/ScopedJavaUnicodeString.h @@ -22,9 +22,7 @@ // A smart pointer that provides access to an ICU UnicodeString given a JNI // jstring. We give ICU a direct pointer to the characters on the Java heap. -// It's clever enough to copy-on-write if necessary, but we only provide -// const UnicodeString access anyway because attempted write access seems -// likely to be an error. +// It's clever enough to copy-on-write if necessary. class ScopedJavaUnicodeString { public: ScopedJavaUnicodeString(JNIEnv* env, jstring s) : mEnv(env), mString(s) { @@ -37,7 +35,11 @@ public: mEnv->ReleaseStringChars(mString, mChars); } - const UnicodeString& unicodeString() { + const UnicodeString& unicodeString() const { + return mUnicodeString; + } + + UnicodeString& unicodeString() { return mUnicodeString; } diff --git a/icu/src/main/native/UCharacter.cpp b/icu/src/main/native/UCharacter.cpp index 3fd8151..9856a1a 100644 --- a/icu/src/main/native/UCharacter.cpp +++ b/icu/src/main/native/UCharacter.cpp @@ -1,12 +1,12 @@ /* * Copyright (C) 2006 The Android Open Source Project - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,6 +16,9 @@ #include "JNIHelp.h" #include "AndroidSystemNatives.h" +#include "ScopedJavaUnicodeString.h" +#include "ScopedUtfChars.h" +#include "unicode/locid.h" #include "unicode/uchar.h" #include #include @@ -37,12 +40,12 @@ static jboolean isMirroredImpl(JNIEnv*, jclass, jint codePoint) { } static jint getNumericValueImpl(JNIEnv*, jclass, jint codePoint){ - // The letters A-Z in their uppercase ('\u0041' through '\u005A'), - // lowercase ('\u0061' through '\u007A'), - // and full width variant ('\uFF21' through '\uFF3A' - // and '\uFF41' through '\uFF5A') forms - // have numeric values from 10 through 35. This is independent of the - // Unicode specification, which does not assign numeric values to these + // The letters A-Z in their uppercase ('\u0041' through '\u005A'), + // lowercase ('\u0061' through '\u007A'), + // and full width variant ('\uFF21' through '\uFF3A' + // and '\uFF41' through '\uFF5A') forms + // have numeric values from 10 through 35. This is independent of the + // Unicode specification, which does not assign numeric values to these // char values. if (codePoint >= 0x41 && codePoint <= 0x5A) { return codePoint - 0x37; @@ -66,15 +69,15 @@ static jint getNumericValueImpl(JNIEnv*, jclass, jint codePoint){ } return result; -} - +} + static jboolean isDefinedImpl(JNIEnv*, jclass, jint codePoint) { return u_isdefined(codePoint); -} +} static jboolean isDigitImpl(JNIEnv*, jclass, jint codePoint) { return u_isdigit(codePoint); -} +} static jboolean isIdentifierIgnorableImpl(JNIEnv*, jclass, jint codePoint) { // Java also returns TRUE for U+0085 Next Line (it omits U+0085 from whitespace ISO controls) @@ -82,31 +85,31 @@ static jboolean isIdentifierIgnorableImpl(JNIEnv*, jclass, jint codePoint) { return JNI_TRUE; } return u_isIDIgnorable(codePoint); -} +} static jboolean isLetterImpl(JNIEnv*, jclass, jint codePoint) { return u_isalpha(codePoint); -} +} static jboolean isLetterOrDigitImpl(JNIEnv*, jclass, jint codePoint) { return u_isalnum(codePoint); -} +} static jboolean isSpaceCharImpl(JNIEnv*, jclass, jint codePoint) { return u_isJavaSpaceChar(codePoint); -} +} static jboolean isTitleCaseImpl(JNIEnv*, jclass, jint codePoint) { return u_istitle(codePoint); -} +} static jboolean isUnicodeIdentifierPartImpl(JNIEnv*, jclass, jint codePoint) { return u_isIDPart(codePoint); -} +} static jboolean isUnicodeIdentifierStartImpl(JNIEnv*, jclass, jint codePoint) { return u_isIDStart(codePoint); -} +} static jboolean isWhitespaceImpl(JNIEnv*, jclass, jint codePoint) { // Java omits U+0085 @@ -114,27 +117,43 @@ static jboolean isWhitespaceImpl(JNIEnv*, jclass, jint codePoint) { return JNI_FALSE; } return u_isWhitespace(codePoint); -} +} static jint toLowerCaseImpl(JNIEnv*, jclass, jint codePoint) { return u_tolower(codePoint); -} +} static jint toTitleCaseImpl(JNIEnv*, jclass, jint codePoint) { return u_totitle(codePoint); -} +} static jint toUpperCaseImpl(JNIEnv*, jclass, jint codePoint) { return u_toupper(codePoint); -} +} + +static jstring toLowerCaseStringImpl(JNIEnv* env, jclass, jstring javaString, jstring localeName) { + ScopedJavaUnicodeString scopedString(env, javaString); + UnicodeString& s(scopedString.unicodeString()); + UnicodeString original(s); + s.toLower(Locale::createFromName(ScopedUtfChars(env, localeName).data())); + return s == original ? javaString : env->NewString(s.getBuffer(), s.length()); +} + +static jstring toUpperCaseStringImpl(JNIEnv* env, jclass, jstring javaString, jstring localeName) { + ScopedJavaUnicodeString scopedString(env, javaString); + UnicodeString& s(scopedString.unicodeString()); + UnicodeString original(s); + s.toUpper(Locale::createFromName(ScopedUtfChars(env, localeName).data())); + return s == original ? javaString : env->NewString(s.getBuffer(), s.length()); +} static jboolean isUpperCaseImpl(JNIEnv*, jclass, jint codePoint) { return u_isupper(codePoint); -} +} static jboolean isLowerCaseImpl(JNIEnv*, jclass, jint codePoint) { return u_islower(codePoint); -} +} static int forNameImpl(JNIEnv* env, jclass, jstring blockName) { if (blockName == NULL) { @@ -178,7 +197,9 @@ static JNINativeMethod gMethods[] = { { "toLowerCase", "(I)I", (void*) toLowerCaseImpl }, { "toTitleCase", "(I)I", (void*) toTitleCaseImpl }, { "toUpperCase", "(I)I", (void*) toUpperCaseImpl }, -}; + { "toLowerCase", "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;", (void*) toLowerCaseStringImpl }, + { "toUpperCase", "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;", (void*) toUpperCaseStringImpl }, +}; int register_com_ibm_icu4jni_lang_UCharacter(JNIEnv* env) { return jniRegisterNativeMethods(env, "com/ibm/icu4jni/lang/UCharacter", -- cgit v1.1