summaryrefslogtreecommitdiffstats
path: root/icu/src/main
diff options
context:
space:
mode:
authorElliott Hughes <enh@google.com>2010-04-12 18:32:50 -0700
committerElliott Hughes <enh@google.com>2010-04-13 10:56:37 -0700
commit9de899cc3ffd3aa3f8f827201cbe14120609018b (patch)
tree0c81bf39b8c8e1d393c31a9e7e68a4de7e4c9fe7 /icu/src/main
parent4a6cd08d55ec407dea29586cc917f8a423f5645f (diff)
downloadlibcore-9de899cc3ffd3aa3f8f827201cbe14120609018b.zip
libcore-9de899cc3ffd3aa3f8f827201cbe14120609018b.tar.gz
libcore-9de899cc3ffd3aa3f8f827201cbe14120609018b.tar.bz2
Fix String.toLowerCase and toUpperCase.
Rather than try to cope with Lithuanian, let's just hand that one to ICU4C. I've removed my hand-crafted Azeri/Turkish lowercasing too, in favor of ICU. Presence of a high surrogate (which implies a supplemental character) is a good reason to hand over to ICU too. On the uppercasing side, I've kept our existing hard-coded table and just added code to defer to ICU for Azeri, Lithuanian, and Turkish (plus supplemental characters). I don't like the tables, but I don't have proof that they're incorrect. Bug: 2340628 Change-Id: I36b556b0444623a5aacc1afc58ebb4d84211d3dc
Diffstat (limited to 'icu/src/main')
-rw-r--r--icu/src/main/java/com/ibm/icu4jni/lang/UCharacter.java2
-rw-r--r--icu/src/main/native/NativeIDN.cpp2
-rw-r--r--icu/src/main/native/Resources.cpp8
-rw-r--r--icu/src/main/native/ScopedJavaUnicodeString.h10
-rw-r--r--icu/src/main/native/UCharacter.cpp75
5 files changed, 60 insertions, 37 deletions
diff --git a/icu/src/main/java/com/ibm/icu4jni/lang/UCharacter.java b/icu/src/main/java/com/ibm/icu4jni/lang/UCharacter.java
index dc351f4..08fe26a 100644
--- a/icu/src/main/java/com/ibm/icu4jni/lang/UCharacter.java
+++ b/icu/src/main/java/com/ibm/icu4jni/lang/UCharacter.java
@@ -42,6 +42,8 @@ public final class UCharacter {
public static native int toLowerCase(int codePoint);
public static native int toTitleCase(int codePoint);
public static native int toUpperCase(int codePoint);
+ public static native String toLowerCase(String s, String localeName);
+ public static native String toUpperCase(String s, String localeName);
public static UnicodeBlock[] getBlockTable() {
/**
diff --git a/icu/src/main/native/NativeIDN.cpp b/icu/src/main/native/NativeIDN.cpp
index 5ce3e94..72afc74 100644
--- a/icu/src/main/native/NativeIDN.cpp
+++ b/icu/src/main/native/NativeIDN.cpp
@@ -36,7 +36,7 @@ static jstring convertImpl(JNIEnv* env, jclass, jstring s, jint flags, jboolean
const size_t srcLength = sus.unicodeString().length();
UChar dst[256];
UErrorCode status = U_ZERO_ERROR;
- int32_t resultLength = toAscii
+ size_t resultLength = toAscii
? uidna_IDNToASCII(src, srcLength, &dst[0], sizeof(dst), flags, NULL, &status)
: uidna_IDNToUnicode(src, srcLength, &dst[0], sizeof(dst), flags, NULL, &status);
if (U_FAILURE(status)) {
diff --git a/icu/src/main/native/Resources.cpp b/icu/src/main/native/Resources.cpp
index e4138b1..ba363fe 100644
--- a/icu/src/main/native/Resources.cpp
+++ b/icu/src/main/native/Resources.cpp
@@ -17,6 +17,7 @@
#define LOG_TAG "Resources"
#include "JNIHelp.h"
#include "AndroidSystemNatives.h"
+#include "ScopedUtfChars.h"
#include "cutils/log.h"
#include "unicode/numfmt.h"
#include "unicode/locid.h"
@@ -67,11 +68,8 @@ private:
void operator=(const ScopedResourceBundle&);
};
-static Locale getLocale(JNIEnv* env, jstring locale) {
- const char* name = env->GetStringUTFChars(locale, NULL);
- Locale result = Locale::createFromName(name);
- env->ReleaseStringUTFChars(locale, name);
- return result;
+static Locale getLocale(JNIEnv* env, jstring localeName) {
+ return Locale::createFromName(ScopedUtfChars(env, localeName).data());
}
static jint getCurrencyFractionDigitsNative(JNIEnv* env, jclass clazz, jstring currencyCode) {
diff --git a/icu/src/main/native/ScopedJavaUnicodeString.h b/icu/src/main/native/ScopedJavaUnicodeString.h
index 69726fb..b108a6b 100644
--- a/icu/src/main/native/ScopedJavaUnicodeString.h
+++ b/icu/src/main/native/ScopedJavaUnicodeString.h
@@ -22,9 +22,7 @@
// A smart pointer that provides access to an ICU UnicodeString given a JNI
// jstring. We give ICU a direct pointer to the characters on the Java heap.
-// It's clever enough to copy-on-write if necessary, but we only provide
-// const UnicodeString access anyway because attempted write access seems
-// likely to be an error.
+// It's clever enough to copy-on-write if necessary.
class ScopedJavaUnicodeString {
public:
ScopedJavaUnicodeString(JNIEnv* env, jstring s) : mEnv(env), mString(s) {
@@ -37,7 +35,11 @@ public:
mEnv->ReleaseStringChars(mString, mChars);
}
- const UnicodeString& unicodeString() {
+ const UnicodeString& unicodeString() const {
+ return mUnicodeString;
+ }
+
+ UnicodeString& unicodeString() {
return mUnicodeString;
}
diff --git a/icu/src/main/native/UCharacter.cpp b/icu/src/main/native/UCharacter.cpp
index 3fd8151..9856a1a 100644
--- a/icu/src/main/native/UCharacter.cpp
+++ b/icu/src/main/native/UCharacter.cpp
@@ -1,12 +1,12 @@
/*
* Copyright (C) 2006 The Android Open Source Project
- *
+ *
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -16,6 +16,9 @@
#include "JNIHelp.h"
#include "AndroidSystemNatives.h"
+#include "ScopedJavaUnicodeString.h"
+#include "ScopedUtfChars.h"
+#include "unicode/locid.h"
#include "unicode/uchar.h"
#include <math.h>
#include <stdlib.h>
@@ -37,12 +40,12 @@ static jboolean isMirroredImpl(JNIEnv*, jclass, jint codePoint) {
}
static jint getNumericValueImpl(JNIEnv*, jclass, jint codePoint){
- // The letters A-Z in their uppercase ('\u0041' through '\u005A'),
- // lowercase ('\u0061' through '\u007A'),
- // and full width variant ('\uFF21' through '\uFF3A'
- // and '\uFF41' through '\uFF5A') forms
- // have numeric values from 10 through 35. This is independent of the
- // Unicode specification, which does not assign numeric values to these
+ // The letters A-Z in their uppercase ('\u0041' through '\u005A'),
+ // lowercase ('\u0061' through '\u007A'),
+ // and full width variant ('\uFF21' through '\uFF3A'
+ // and '\uFF41' through '\uFF5A') forms
+ // have numeric values from 10 through 35. This is independent of the
+ // Unicode specification, which does not assign numeric values to these
// char values.
if (codePoint >= 0x41 && codePoint <= 0x5A) {
return codePoint - 0x37;
@@ -66,15 +69,15 @@ static jint getNumericValueImpl(JNIEnv*, jclass, jint codePoint){
}
return result;
-}
-
+}
+
static jboolean isDefinedImpl(JNIEnv*, jclass, jint codePoint) {
return u_isdefined(codePoint);
-}
+}
static jboolean isDigitImpl(JNIEnv*, jclass, jint codePoint) {
return u_isdigit(codePoint);
-}
+}
static jboolean isIdentifierIgnorableImpl(JNIEnv*, jclass, jint codePoint) {
// Java also returns TRUE for U+0085 Next Line (it omits U+0085 from whitespace ISO controls)
@@ -82,31 +85,31 @@ static jboolean isIdentifierIgnorableImpl(JNIEnv*, jclass, jint codePoint) {
return JNI_TRUE;
}
return u_isIDIgnorable(codePoint);
-}
+}
static jboolean isLetterImpl(JNIEnv*, jclass, jint codePoint) {
return u_isalpha(codePoint);
-}
+}
static jboolean isLetterOrDigitImpl(JNIEnv*, jclass, jint codePoint) {
return u_isalnum(codePoint);
-}
+}
static jboolean isSpaceCharImpl(JNIEnv*, jclass, jint codePoint) {
return u_isJavaSpaceChar(codePoint);
-}
+}
static jboolean isTitleCaseImpl(JNIEnv*, jclass, jint codePoint) {
return u_istitle(codePoint);
-}
+}
static jboolean isUnicodeIdentifierPartImpl(JNIEnv*, jclass, jint codePoint) {
return u_isIDPart(codePoint);
-}
+}
static jboolean isUnicodeIdentifierStartImpl(JNIEnv*, jclass, jint codePoint) {
return u_isIDStart(codePoint);
-}
+}
static jboolean isWhitespaceImpl(JNIEnv*, jclass, jint codePoint) {
// Java omits U+0085
@@ -114,27 +117,43 @@ static jboolean isWhitespaceImpl(JNIEnv*, jclass, jint codePoint) {
return JNI_FALSE;
}
return u_isWhitespace(codePoint);
-}
+}
static jint toLowerCaseImpl(JNIEnv*, jclass, jint codePoint) {
return u_tolower(codePoint);
-}
+}
static jint toTitleCaseImpl(JNIEnv*, jclass, jint codePoint) {
return u_totitle(codePoint);
-}
+}
static jint toUpperCaseImpl(JNIEnv*, jclass, jint codePoint) {
return u_toupper(codePoint);
-}
+}
+
+static jstring toLowerCaseStringImpl(JNIEnv* env, jclass, jstring javaString, jstring localeName) {
+ ScopedJavaUnicodeString scopedString(env, javaString);
+ UnicodeString& s(scopedString.unicodeString());
+ UnicodeString original(s);
+ s.toLower(Locale::createFromName(ScopedUtfChars(env, localeName).data()));
+ return s == original ? javaString : env->NewString(s.getBuffer(), s.length());
+}
+
+static jstring toUpperCaseStringImpl(JNIEnv* env, jclass, jstring javaString, jstring localeName) {
+ ScopedJavaUnicodeString scopedString(env, javaString);
+ UnicodeString& s(scopedString.unicodeString());
+ UnicodeString original(s);
+ s.toUpper(Locale::createFromName(ScopedUtfChars(env, localeName).data()));
+ return s == original ? javaString : env->NewString(s.getBuffer(), s.length());
+}
static jboolean isUpperCaseImpl(JNIEnv*, jclass, jint codePoint) {
return u_isupper(codePoint);
-}
+}
static jboolean isLowerCaseImpl(JNIEnv*, jclass, jint codePoint) {
return u_islower(codePoint);
-}
+}
static int forNameImpl(JNIEnv* env, jclass, jstring blockName) {
if (blockName == NULL) {
@@ -178,7 +197,9 @@ static JNINativeMethod gMethods[] = {
{ "toLowerCase", "(I)I", (void*) toLowerCaseImpl },
{ "toTitleCase", "(I)I", (void*) toTitleCaseImpl },
{ "toUpperCase", "(I)I", (void*) toUpperCaseImpl },
-};
+ { "toLowerCase", "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;", (void*) toLowerCaseStringImpl },
+ { "toUpperCase", "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;", (void*) toUpperCaseStringImpl },
+};
int register_com_ibm_icu4jni_lang_UCharacter(JNIEnv* env) {
return jniRegisterNativeMethods(env, "com/ibm/icu4jni/lang/UCharacter",