From ccb8b92211a3e87acaf6486c8d4423c2053b8b5e Mon Sep 17 00:00:00 2001 From: Elliott Hughes Date: Fri, 2 Apr 2010 17:19:21 -0700 Subject: More Charset/ICU cleanup. I've been feeling guilty about leaving broken double-checked locking (missing the "volatile") in harmony's Charset code. A quick investigation showed that the method that it's intended to optimize is basically never called, and the RI's documentation explicitly says "don't call this; it's slow". So this patch fixes that. I've also improved our documentation. I've also deleted a bunch of dead code. I've also tidied up some dodgy native string handling. Change-Id: Iad69ebb3459d9cc4c4ff37b255d458b83fe40132 --- .../java/com/ibm/icu4jni/charset/CharsetICU.java | 38 +-- .../ibm/icu4jni/charset/CharsetProviderICU.java | 50 --- .../com/ibm/icu4jni/charset/NativeConverter.java | 377 +++------------------ icu/src/main/native/NativeConverter.cpp | 373 +++++--------------- .../src/main/java/java/nio/charset/Charset.java | 53 ++- 5 files changed, 168 insertions(+), 723 deletions(-) delete mode 100644 icu/src/main/java/com/ibm/icu4jni/charset/CharsetProviderICU.java diff --git a/icu/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java b/icu/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java index 155f966..fe0f920 100644 --- a/icu/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java +++ b/icu/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java @@ -5,7 +5,7 @@ ******************************************************************************* * ******************************************************************************* -*/ +*/ package com.ibm.icu4jni.charset; @@ -36,52 +36,52 @@ public final class CharsetICU extends Charset { long converterHandle = NativeConverter.openConverter(icuCanonicalName); return new CharsetDecoderICU(this, converterHandle); } - + // hardCoded list of replacement bytes - private static final Map subByteMap = new HashMap(); - static{ - subByteMap.put("UTF-32",new byte[]{0x00, 0x00, (byte)0xfe, (byte)0xff}); - subByteMap.put("ibm-16684_P110-2003",new byte[]{0x40, 0x40}); // make \u3000 the sub char - subByteMap.put("ibm-971_P100-1995",new byte[]{(byte)0xa1, (byte)0xa1}); // make \u3000 the sub char + private static final Map subByteMap = new HashMap(); + static { + subByteMap.put("UTF-32", new byte[]{0x00, 0x00, (byte)0xfe, (byte)0xff}); + subByteMap.put("ibm-16684_P110-2003", new byte[]{0x40, 0x40}); // make \u3000 the sub char + subByteMap.put("ibm-971_P100-1995", new byte[]{(byte)0xa1, (byte)0xa1}); // make \u3000 the sub char } /** * Returns a new encoder object of the charset * @return a new encoder * @stable ICU 2.4 */ - public CharsetEncoder newEncoder(){ + public CharsetEncoder newEncoder() { // the arrays are locals and not // instance variables since the - // methods on this class need to + // methods on this class need to // be thread safe long converterHandle = NativeConverter.openConverter(icuCanonicalName); - + //According to the contract all converters should have non-empty replacement byte[] replacement = NativeConverter.getSubstitutionBytes(converterHandle); - try{ + try { return new CharsetEncoderICU(this,converterHandle, replacement); - }catch(IllegalArgumentException ex){ + } catch (IllegalArgumentException ex) { // work around for the non-sensical check in the nio API that // a substitution character must be mappable while decoding!! - replacement = (byte[])subByteMap.get(icuCanonicalName); - if(replacement==null){ + replacement = subByteMap.get(icuCanonicalName); + if (replacement == null) { replacement = new byte[NativeConverter.getMinBytesPerChar(converterHandle)]; - for(int i=0; i charsets() { - ArrayList result = new ArrayList(); - for (String charsetName : NativeConverter.getAvailable()) { - result.add(charsetForName(charsetName)); - } - return result.iterator(); - } - - /** - * Implements Charset.availableCharsets. - */ - public SortedMap initAvailableCharsets() { - SortedMap result = - new TreeMap(String.CASE_INSENSITIVE_ORDER); - for (String charset : NativeConverter.getAvailable()) { - if (!result.containsKey(charset)) { - result.put(charset, charsetForName(charset)); - } - } - return result; - } -} diff --git a/icu/src/main/java/com/ibm/icu4jni/charset/NativeConverter.java b/icu/src/main/java/com/ibm/icu4jni/charset/NativeConverter.java index eefe3d5..6a97c27 100644 --- a/icu/src/main/java/com/ibm/icu4jni/charset/NativeConverter.java +++ b/icu/src/main/java/com/ibm/icu4jni/charset/NativeConverter.java @@ -5,28 +5,21 @@ ******************************************************************************* * ******************************************************************************* -*/ +*/ package com.ibm.icu4jni.charset; import java.nio.charset.Charset; -/** - * Class for accessing the underlying JNI methods - * @internal ICU 2.4 - */ -final class NativeConverter { - - //Native methods - +public final class NativeConverter { /** * Converts an array of bytes containing characters in an external * encoding into an array of Unicode characters. This method allows - * a buffer by buffer conversion of a data stream. The state of the - * conversion is saved between calls to convert. Among other things, + * buffer-by-buffer conversion of a data stream. The state of the + * conversion is saved between calls. Among other things, * this means multibyte input sequences can be split between calls. - * If a call to convert results in an Error, the conversion may be - * continued by calling convert again with suitably modified parameters. + * If a call to results in an error, the conversion may be + * continued by calling this method again with suitably modified parameters. * All conversions should be finished with a call to the flush method. * * @param converterHandle Address of converter object created by C code @@ -34,78 +27,22 @@ final class NativeConverter { * @param inEnd stop conversion at this offset in input array (exclusive). * @param output character array to receive conversion result. * @param outEnd stop writing to output array at this offset (exclusive). - * @param data integer array containing the following data + * @param data integer array containing the following data * data[0] = inputOffset * data[1] = outputOffset * @return int error code returned by ICU * @internal ICU 2.4 */ - - public static final native int convertByteToChar( long converterHandle, - byte[] input, int inEnd, - char[] output, int outEnd, - int[] data, - boolean flush); - /** - * Converts an array of bytes containing characters in an external - * encoding into an array of Unicode characters. This method allows - * a buffer by buffer conversion of a data stream. The state of the - * conversion is saved between calls to convert. Among other things, - * this means multibyte input sequences can be split between calls. - * If a call to convert results in an Error, the conversion may be - * continued by calling convert again with suitably modified parameters. - * All conversions should be finished with a call to the flush method. - * - * @param converterHandle Address of converter object created by C code - * @param input byte array containing text to be converted. - * @param inEnd stop conversion at this offset in input array (exclusive). - * @param output character array to receive conversion result. - * @param outEnd stop writing to output array at this offset (exclusive). - * @param data integer array containing the following data - * data[0] = inputOffset - * data[1] = outputOffset - * @return int error code returned by ICU - * @internal ICU 2.4 - */ - public static final native int decode( long converterHandle, - byte[] input, int inEnd, - char[] output, int outEnd, - int[] data, - boolean flush); - /** - * Converts an array of Unicode chars containing characters in an - * external encoding into an array of bytes. This method allows - * a buffer by buffer conversion of a data stream. The state of the - * conversion is saved between calls to convert. Among other things, - * this means multibyte input sequences can be split between calls. - * If a call to convert results in an Error, the conversion may be - * continued by calling convert again with suitably modified parameters. - * All conversions should be finished with a call to the flush method. - * - * @param converterHandle Address of converter object created by C code - * @param input char array containing text to be converted. - * @param inEnd stop conversion at this offset in input array (exclusive). - * @param output byte array to receive conversion result. - * @param outEnd stop writing to output array at this offset (exclusive). - * @param data integer array containing the following data - * data[0] = inputOffset - * data[1] = outputOffset - * @return int error code returned by ICU - * @internal ICU 2.4 - */ - public static final native int convertCharToByte(long converterHandle, - char[] input, int inEnd, - byte[] output, int outEnd, - int[] data, - boolean flush); + public static native int decode(long converterHandle, byte[] input, int inEnd, + char[] output, int outEnd, int[] data, boolean flush); + /** - * Converts an array of Unicode chars containing characters in an - * external encoding into an array of bytes. This method allows - * a buffer by buffer conversion of a data stream. The state of the + * Converts an array of Unicode chars to an array of bytes in an external encoding. + * This method allows a buffer by buffer conversion of a data stream. The state of the * conversion is saved between calls to convert. Among other things, * this means multibyte input sequences can be split between calls. - * If a call to convert results in an Error, the conversion may be - * continued by calling convert again with suitably modified parameters. + * If a call results in an error, the conversion may be + * continued by calling this method again with suitably modified parameters. * All conversions should be finished with a call to the flush method. * * @param converterHandle Address of converter object created by C code @@ -113,283 +50,71 @@ final class NativeConverter { * @param inEnd stop conversion at this offset in input array (exclusive). * @param output byte array to receive conversion result. * @param outEnd stop writing to output array at this offset (exclusive). - * @param data integer array containing the following data + * @param data integer array containing the following data * data[0] = inputOffset * data[1] = outputOffset * @return int error code returned by ICU * @internal ICU 2.4 - */ - public static final native int encode(long converterHandle, - char[] input, int inEnd, - byte[] output, int outEnd, - int[] data, - boolean flush); + */ + public static native int encode(long converterHandle, char[] input, int inEnd, + byte[] output, int outEnd, int[] data, boolean flush); + /** * Writes any remaining output to the output buffer and resets the - * converter to its initial state. + * converter to its initial state. * * @param converterHandle Address of converter object created by C code * @param output byte array to receive flushed output. * @param outEnd stop writing to output array at this offset (exclusive). * @return int error code returned by ICU - * @param data integer array containing the following data + * @param data integer array containing the following data * data[0] = inputOffset * data[1] = outputOffset * @internal ICU 2.4 - */ - public static final native int flushCharToByte(long converterHandle, - byte[] output, - int outEnd, - int[] data); + */ + public static native int flushCharToByte(long converterHandle, byte[] output, int outEnd, int[] data); + /** * Writes any remaining output to the output buffer and resets the - * converter to its initial state. + * converter to its initial state. * * @param converterHandle Address of converter object created by the native code * @param output char array to receive flushed output. * @param outEnd stop writing to output array at this offset (exclusive). * @return int error code returned by ICU - * @param data integer array containing the following data + * @param data integer array containing the following data * data[0] = inputOffset * data[1] = outputOffset * @internal ICU 2.4 - */ - public static final native int flushByteToChar(long converterHandle, - char[] output, - int outEnd, - int[] data); - - /** - * Open the converter with the specified encoding - * - * @param converterHandle long array for recieving the adress of converter object - * created by the native code - * @param encoding string representing encoding - * @return int error code returned by ICU - * @internal ICU 2.4 - */ - public static final native long openConverter(String encoding); - /** - * Resets the ByteToChar (toUnicode) state of specified converter - * - * @param converterHandle Address of converter object created by the native code - * @internal ICU 2.4 - */ - public static final native void resetByteToChar(long converterHandle); - - /** - * Resets the CharToByte (fromUnicode) state of specified converter - * - * @param converterHandle Address of converter object created by the native code - * @internal ICU 2.4 - */ - public static final native void resetCharToByte(long converterHandle); - - /** - * Closes the specified converter and releases the resources - * - * @param converterHandle Address of converter object created by the native code - * @internal ICU 2.4 - */ - public static final native void closeConverter(long converterHandle); - - /** - * Sets the substitution Unicode chars of the specified converter used - * by encoder - * @param converterHandle Address of converter object created by the native code - * @param subChars array of chars to used for substitution - * @param length length of the array - * @return int error code returned by ICU - * @internal ICU 2.4 - */ - public static final native int setSubstitutionChars( long converterHandle, - char[] subChars,int length); - /** - * Sets the substitution bytes of the specified converter used by decoder - * - * @param converterHandle Address of converter object created by the native code - * @param subChars array of bytes to used for substitution - * @param length length of the array - * @return int error code returned by ICU - * @internal ICU 2.4 - */ - public static final native int setSubstitutionBytes( long converterHandle, - byte[] subChars,int length); - /** - * Sets the substitution mode of CharToByte(fromUnicode) for the specified converter - * - * @param converterHandle Address of converter object created by the native code - * @param mode to set the true/false - * @return int error code returned by ICU - * @internal ICU 2.4 - */ - public static final native int setSubstitutionModeCharToByte(long converterHandle, - boolean mode); - /** - * Sets the substitution mode of CharToByte(fromUnicode) for the specified converter - * - * @param converterHandle Address of converter object created by the native code - * @param mode to set the true/false - * @return int error code returned by ICU - * @internal ICU 3.6 - */ - public static final native int setSubstitutionModeByteToChar(long converterHandle, - boolean mode); - /** - * Gets the numnber of invalid bytes in the specified converter object - * for the last error that has occured - * - * @param converterHandle Address of converter object created by the native code - * @param length array of int to recieve length of the array - * @return int error code returned by ICU - * @internal ICU 2.4 - */ - public static final native int countInvalidBytes(long converterHandle, int[] length); - - /** - * Gets the numnber of invalid chars in the specified converter object - * for the last error that has occured - * - * @param converterHandle Address of converter object created by the native code - * @param length array of int to recieve length of the array - * @return int error code returned by ICU - * @internal ICU 2.4 - */ - public static final native int countInvalidChars(long converterHandle, int[] length); - - /** - * Gets the number of bytes needed for converting a char - * - * @param converterHandle Address of converter object created by the native code - * @return number of bytes needed - * @internal ICU 2.4 - */ - public static final native int getMaxBytesPerChar(long converterHandle); - - /** - * Gets the number of bytes needed for converting a char - * - * @param converterHandle Address of converter object created by the native code - * @return number of bytes needed - * @internal ICU 3.2 - */ - public static final native int getMinBytesPerChar(long converterHandle); - - /** - * Gets the average numnber of bytes needed for converting a char - * - * @param converterHandle Address of converter object created by the native code - * @return number of bytes needed - * @internal ICU 2.4 - */ - public static final native float getAveBytesPerChar(long converterHandle); - - /** - * Gets the number of chars needed for converting a byte - * - * @param converterHandle Address of converter object created by the native code - * @return number of bytes needed - * @internal ICU 2.4 - */ - public static final native int getMaxCharsPerByte(long converterHandle); - - /** - * Gets the average numnber of chars needed for converting a byte - * - * @param converterHandle Address of converter object created by the native code - * @return number of bytes needed - * @internal ICU 2.4 - */ - public static final native float getAveCharsPerByte(long converterHandle); - - //CSDL: added by Jack - /** - * Determines whether charset1 contains charset2. - */ - public static final native boolean contains(long converterHandle1, long converterHandle2); - - public static final native byte[] getSubstitutionBytes(long converterHandle); - - /** - * Ascertains if a given Unicode code unit can - * be converted to the target encoding - * @param converterHandle Address of converter object created by the native code - * @param codeUnit the character to be converted - * @return true if a character can be converted - * @internal ICU 2.4 - * - */ - public static final native boolean canEncode(long converterHandle,int codeUnit); - - /** - * Ascertains if a given a byte sequence can be converted to Unicode - * @param converterHandle Address of converter object created by the native code - * @param bytes the bytes to be converted - * @return true if a character can be converted - * @internal ICU 2.4 - * */ - public static final native boolean canDecode(long converterHandle,byte[] bytes); - - /** - * Gets the canonical names of available converters - * @return Object[] names as an object array - * @internal ICU 2.4 - */ - public static final native String[] getAvailable(); + public static native int flushByteToChar(long converterHandle, char[] output, int outEnd, int[] data); - public static final native Charset charsetForName(String charsetName); + public static native long openConverter(String encoding); + public static native void closeConverter(long converterHandle); + + public static native void resetByteToChar(long converterHandle); + public static native void resetCharToByte(long converterHandle); + + public static native int setSubstitutionChars(long converterHandle, char[] subChars,int length); + public static native int setSubstitutionBytes(long converterHandle, byte[] subChars,int length); + public static native byte[] getSubstitutionBytes(long converterHandle); + + public static native int getMaxBytesPerChar(long converterHandle); + public static native int getMinBytesPerChar(long converterHandle); + public static native float getAveBytesPerChar(long converterHandle); + public static native int getMaxCharsPerByte(long converterHandle); + public static native float getAveCharsPerByte(long converterHandle); + + public static native boolean contains(long converterHandle1, long converterHandle2); + + public static native boolean canEncode(long converterHandle, int codeUnit); + + public static native String[] getAvailableCharsetNames(); + public static native Charset charsetForName(String charsetName); - /** - * Gets the number of aliases for a converter name - * @param enc encoding name - * @return number of aliases for the converter - * @internal ICU 2.4 - */ - public static final native int countAliases(String enc); - - /** - * Gets the aliases associated with the converter name - * @param enc converter name - * @return converter names as elements in an object array - * @internal ICU 2.4 - */ - public static final native String[] getAliases(String enc); - - /** - * Sets the callback to Unicode for ICU conveter. The default behaviour of ICU callback - * is to call the specified callback function for both illegal and unmapped sequences. - * @param converterHandle Adress of the converter object created by native code - * @param mode call back mode to set. This is either STOP_CALLBACK, SKIP_CALLBACK or SUBSTITUE_CALLBACK - * The converter performs the specified callback when an error occurs - * @param stopOnIllegal If true sets the alerts the converter callback to stop on an illegal sequence - * @return int error code returned by ICU - * @internal ICU 2.4 - */ - public static final native int setCallbackDecode(long converterHandle, int onMalformedInput, int onUnmappableInput, char[] subChars, int length); - - /** - * Sets the callback from Unicode for ICU conveter. The default behaviour of ICU callback - * is to call the specified callback function for both illegal and unmapped sequences. - * @param converterHandle Adress of the converter object created by native code - * @param mode call back mode to set. This is either STOP_CALLBACK, SKIP_CALLBACK or SUBSTITUE_CALLBACK - * The converter performs the specified callback when an error occurs - * @param stopOnIllegal If true sets the alerts the converter callback to stop on an illegal sequence - * @return int error code returned by ICU - * @internal ICU 2.4 - */ - public static final native int setCallbackEncode(long converterHandle, int onMalformedInput, int onUnmappableInput, byte[] subBytes, int length); - - /** - * Returns a thread safe clone of the converter - * @internal ICU 2.4 - */ - public static final native long safeClone(long converterHandle); - - /** @internal ICU 2.4 */ public static final int STOP_CALLBACK = 0;//CodingErrorAction.REPORT - /** @internal ICU 2.4 */ public static final int SKIP_CALLBACK = 1;//CodingErrorAction.IGNORE - /** @internal ICU 2.4 */ public static final int SUBSTITUTE_CALLBACK = 2;//CodingErrorAction.REPLACE + public static native int setCallbackDecode(long converterHandle, int onMalformedInput, int onUnmappableInput, char[] subChars, int length); + public static native int setCallbackEncode(long converterHandle, int onMalformedInput, int onUnmappableInput, byte[] subBytes, int length); } diff --git a/icu/src/main/native/NativeConverter.cpp b/icu/src/main/native/NativeConverter.cpp index ebb035f..1bb9ac8 100644 --- a/icu/src/main/native/NativeConverter.cpp +++ b/icu/src/main/native/NativeConverter.cpp @@ -15,15 +15,16 @@ * @author: Ram Viswanadha */ -#include "JNIHelp.h" #include "AndroidSystemNatives.h" -#include "ScopedUtfChars.h" -#include "unicode/utypes.h" /* Basic ICU data types */ -#include "unicode/ucnv.h" /* C Converter API */ -#include "unicode/ustring.h" /* some more string functions*/ -#include "unicode/ucnv_cb.h" /* for callback functions */ -#include "unicode/uset.h" /* for contains function */ #include "ErrorCode.h" +#include "JNIHelp.h" +#include "ScopedUtfChars.h" +#include "UniquePtr.h" +#include "unicode/ucnv.h" +#include "unicode/ucnv_cb.h" +#include "unicode/uset.h" +#include "unicode/ustring.h" +#include "unicode/utypes.h" #include #include @@ -74,92 +75,6 @@ static void closeConverter(JNIEnv* env, jclass, jlong handle) { } /** - * Sets the substution mode for from Unicode conversion. Currently only - * two modes are supported: substitute or report - * @param env environment handle for JNI - * @param jClass handle for the class - * @param handle address of ICU converter - * @param mode the mode to set - */ -static jint setSubstitutionModeCharToByte (JNIEnv *env, jclass, jlong handle, jboolean mode) { - - UConverter* conv = (UConverter*)(long)handle; - UErrorCode errorCode =U_ZERO_ERROR; - - if(conv) { - - UConverterFromUCallback fromUOldAction ; - void* fromUOldContext; - void* fromUNewContext=NULL; - if(mode) { - - ucnv_setFromUCallBack(conv, - UCNV_FROM_U_CALLBACK_SUBSTITUTE, - fromUNewContext, - &fromUOldAction, - (const void**)&fromUOldContext, - &errorCode); - - } - else{ - - ucnv_setFromUCallBack(conv, - UCNV_FROM_U_CALLBACK_STOP, - fromUNewContext, - &fromUOldAction, - (const void**)&fromUOldContext, - &errorCode); - - } - return errorCode; - } - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return errorCode; -} -/** - * Sets the substution mode for to Unicode conversion. Currently only - * two modes are supported: substitute or report - * @param env environment handle for JNI - * @param jClass handle for the class - * @param handle address of ICU converter - * @param mode the mode to set - */ -static jint setSubstitutionModeByteToChar (JNIEnv *env, jclass, jlong handle, jboolean mode) { - - UConverter* conv = (UConverter*)handle; - UErrorCode errorCode =U_ZERO_ERROR; - - if(conv) { - - UConverterToUCallback toUOldAction ; - void* toUOldContext; - void* toUNewContext=NULL; - if(mode) { - - ucnv_setToUCallBack(conv, - UCNV_TO_U_CALLBACK_SUBSTITUTE, - toUNewContext, - &toUOldAction, - (const void**)&toUOldContext, - &errorCode); - - } - else{ - - ucnv_setToUCallBack(conv, - UCNV_TO_U_CALLBACK_STOP, - toUNewContext, - &toUOldAction, - (const void**)&toUOldContext, - &errorCode); - - } - return errorCode; - } - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return errorCode; -} -/** * Converts a buffer of Unicode code units to target encoding * @param env environment handle for JNI * @param jClass handle for the class @@ -342,38 +257,6 @@ static void resetCharToByte(JNIEnv* env, jclass, jlong handle) { } } -static jint countInvalidBytes (JNIEnv *env, jclass, jlong handle, jintArray length) { - UConverter* cnv = (UConverter*)handle; - if (!cnv) { - return U_ILLEGAL_ARGUMENT_ERROR; - } - - UErrorCode errorCode = U_ZERO_ERROR; - jint* len = (jint*) env->GetPrimitiveArrayCritical(length, NULL); - if (len) { - char invalidChars[32]; - ucnv_getInvalidChars(cnv,invalidChars,(int8_t*)len,&errorCode); - } - env->ReleasePrimitiveArrayCritical(length,(jint*)len,0); - return errorCode; -} - -static jint countInvalidChars(JNIEnv *env, jclass, jlong handle, jintArray length) { - UConverter* cnv = (UConverter*)handle; - if (!cnv) { - return U_ILLEGAL_ARGUMENT_ERROR; - } - - UErrorCode errorCode =U_ZERO_ERROR; - jint* len = (jint*) env->GetPrimitiveArrayCritical(length, NULL); - if (len) { - UChar invalidUChars[32]; - ucnv_getInvalidUChars(cnv,invalidUChars,(int8_t*)len,&errorCode); - } - env->ReleasePrimitiveArrayCritical(length,(jint*)len,0); - return errorCode; -} - static jint getMaxBytesPerChar(JNIEnv *env, jclass, jlong handle) { UConverter* cnv = (UConverter*)handle; return (cnv != NULL) ? ucnv_getMaxCharSize(cnv) : -1; @@ -477,30 +360,26 @@ static jint flushCharToByte (JNIEnv *env, jclass, jlong handle, jbyteArray targe } static void toChars(const UChar* us, char* cs, int32_t length) { - UChar u; - while(length>0) { - u=*us++; + while (length > 0) { + UChar u = *us++; *cs++=(char)u; --length; } } static jint setSubstitutionBytes(JNIEnv *env, jclass, jlong handle, jbyteArray subChars, jint length) { - UConverter* cnv = (UConverter*) handle; UErrorCode errorCode = U_ZERO_ERROR; - if(cnv) { + if (cnv) { jbyte* u_subChars = reinterpret_cast(env->GetPrimitiveArrayCritical(subChars, NULL)); - if(u_subChars) { - char* mySubChars = new char[length]; - toChars((UChar*)u_subChars,&mySubChars[0],length); - ucnv_setSubstChars(cnv,mySubChars, (char)length,&errorCode); - if(U_FAILURE(errorCode)) { + if (u_subChars) { + char mySubChars[length]; + toChars((UChar*)u_subChars,&mySubChars[0],length); + ucnv_setSubstChars(cnv,mySubChars, (char)length,&errorCode); + if(U_FAILURE(errorCode)) { env->ReleasePrimitiveArrayCritical(subChars,mySubChars,0); return errorCode; - } - delete[] mySubChars; - } - else{ + } + } else{ errorCode = U_ILLEGAL_ARGUMENT_ERROR; } env->ReleasePrimitiveArrayCritical(subChars,u_subChars,0); @@ -630,128 +509,53 @@ static jboolean canEncode(JNIEnv *env, jclass, jlong handle, jint codeUnit) { return (jboolean)FALSE; } +/* + * If a charset listed in the IANA Charset Registry is supported by an implementation + * of the Java platform then its canonical name must be the name listed in the registry. + * Many charsets are given more than one name in the registry, in which case the registry + * identifies one of the names as MIME-preferred. If a charset has more than one registry + * name then its canonical name must be the MIME-preferred name and the other names in + * the registry must be valid aliases. If a supported charset is not listed in the IANA + * registry then its canonical name must begin with one of the strings "X-" or "x-". + */ +static jstring getJavaCanonicalName(JNIEnv *env, const char* icuCanonicalName) { + UErrorCode status = U_ZERO_ERROR; -static jboolean canDecode(JNIEnv *env, jclass, jlong handle, jbyteArray source) { - - UErrorCode errorCode =U_ZERO_ERROR; - UConverter* cnv = (UConverter*)handle; - if(cnv) { - jint len = env->GetArrayLength(source); - jbyte* cSource =(jbyte*) env->GetPrimitiveArrayCritical(source, NULL); - if(cSource) { - const char* cSourceLimit = reinterpret_cast(cSource+len); - - /* Assume that we need at most twice the length of source */ - UChar* target = (UChar*) malloc(sizeof(UChar)* (len<<1)); - UChar* targetLimit = target + (len<<1); - if(target) { - ucnv_toUnicode(cnv,&target,targetLimit, (const char**)&cSource, - cSourceLimit,NULL, TRUE,&errorCode); - - if(U_SUCCESS(errorCode)) { - free(target); - env->ReleasePrimitiveArrayCritical(source,cSource,0); - return (jboolean)TRUE; - } - } - free(target); - } - env->ReleasePrimitiveArrayCritical(source,cSource,0); - } - return (jboolean)FALSE; -} - -static int32_t copyString(char* dest, int32_t destCapacity, int32_t startIndex, - const char* src, UErrorCode* status) { - int32_t srcLen = 0, i=0; - if(U_FAILURE(*status)) { - return 0; - } - if(dest == NULL || src == NULL || destCapacity < startIndex) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - srcLen = strlen(src); - if(srcLen >= destCapacity) { - *status = U_BUFFER_OVERFLOW_ERROR; - return 0; - } - for(i=0; i < srcLen; i++) { - dest[startIndex++] = src[i]; - } - /* null terminate the buffer */ - dest[startIndex] = 0; /* no bounds check already made sure that we have enough room */ - return startIndex; -} - -static int32_t getJavaCanonicalName1(const char* icuCanonicalName, - char* canonicalName, int32_t capacity, - UErrorCode* status) { - int32_t retLen = 0; + // Check to see if this is a well-known MIME or IANA name. const char* cName = NULL; - /* find out the alias with MIME tag */ - if((cName =ucnv_getStandardName(icuCanonicalName, "MIME", status)) != NULL) { - retLen = copyString(canonicalName, capacity, 0, cName, status); - /* find out the alias with IANA tag */ - }else if((cName =ucnv_getStandardName(icuCanonicalName, "IANA", status)) != NULL) { - retLen = copyString(canonicalName, capacity, 0, cName, status); - }else { - /* - check to see if an alias already exists with x- prefix, if yes then - make that the canonical name - */ - int32_t aliasCount = ucnv_countAliases(icuCanonicalName,status); - int32_t i=0; - const char* name; - for(i=0;i= 2) { - strcpy(canonicalName,"x-"); - } - retLen = copyString(canonicalName, capacity, 2, name, status); + if ((cName = ucnv_getStandardName(icuCanonicalName, "MIME", &status)) != NULL) { + return env->NewStringUTF(cName); + } else if ((cName = ucnv_getStandardName(icuCanonicalName, "IANA", &status)) != NULL) { + return env->NewStringUTF(cName); + } + + // Check to see if an alias already exists with "x-" prefix, if yes then + // make that the canonical name. + int32_t aliasCount = ucnv_countAliases(icuCanonicalName, &status); + for (int i = 0; i < aliasCount; ++i) { + const char* name = ucnv_getAlias(icuCanonicalName, i, &status); + if (name != NULL && name[0] == 'x' && name[1] == '-') { + return env->NewStringUTF(name); } } - return retLen; -} -static jstring getJavaCanonicalName(JNIEnv *env, const char* icuCanonicalName) { - /* - * If a charset listed in the IANA Charset Registry is supported by an implementation - * of the Java platform then its canonical name must be the name listed in the registry. - * Many charsets are given more than one name in the registry, in which case the registry - * identifies one of the names as MIME-preferred. If a charset has more than one registry - * name then its canonical name must be the MIME-preferred name and the other names in - * the registry must be valid aliases. If a supported charset is not listed in the IANA - * registry then its canonical name must begin with one of the strings "X-" or "x-". - */ - UErrorCode error = U_ZERO_ERROR; - char cName[UCNV_MAX_CONVERTER_NAME_LENGTH] = {0}; - if (icuCanonicalName[0] != 0) { - getJavaCanonicalName1(icuCanonicalName, cName, UCNV_MAX_CONVERTER_NAME_LENGTH, &error); + // As a last resort, prepend "x-" to any alias and make that the canonical name. + status = U_ZERO_ERROR; + const char* name = ucnv_getStandardName(icuCanonicalName, "UTR22", &status); + if (name == NULL && strchr(icuCanonicalName, ',') != NULL) { + name = ucnv_getAlias(icuCanonicalName, 1, &status); } - return env->NewStringUTF(cName); + // If there is no UTR22 canonical name then just return the original name. + if (name == NULL) { + name = icuCanonicalName; + } + UniquePtr result(new char[2 + strlen(name) + 1]); + strcpy(&result[0], "x-"); + strcat(&result[0], name); + return env->NewStringUTF(&result[0]); } -static jobjectArray getAvailable(JNIEnv *env, jclass) { +static jobjectArray getAvailableCharsetNames(JNIEnv *env, jclass) { int32_t num = ucnv_countAvailable(); jobjectArray result = env->NewObjectArray(num, env->FindClass("java/lang/String"), NULL); for (int i = 0; i < num; ++i) { @@ -763,12 +567,6 @@ static jobjectArray getAvailable(JNIEnv *env, jclass) { return result; } -static jint countAliases(JNIEnv *env, jclass, jstring enc) { - ScopedUtfChars encChars(env, enc); - UErrorCode error = U_ZERO_ERROR; - return encChars.data() ? ucnv_countAliases(encChars.data(), &error) : 0; -} - static jobjectArray getAliases(JNIEnv* env, const char* icuCanonicalName) { // Get an upper bound on the number of aliases... const char* myEncName = icuCanonicalName; @@ -872,10 +670,11 @@ static void CHARSET_ENCODER_CALLBACK(const void *context, *status = U_ILLEGAL_ARGUMENT_ERROR; return; } - if(realCB==NULL) { + if (realCB == NULL) { *status = U_INTERNAL_PROGRAM_ERROR; + } else { + realCB(context, fromArgs, codeUnits, length, codePoint, reason, status); } - realCB(context, fromArgs, codeUnits, length, codePoint, reason, status); } } } @@ -1027,10 +826,11 @@ static void CHARSET_DECODER_CALLBACK(const void *context, *status = U_ILLEGAL_ARGUMENT_ERROR; return; } - if(realCB==NULL) { + if (realCB == NULL) { *status = U_INTERNAL_PROGRAM_ERROR; + } else { + realCB(context, args, codeUnits, length, reason, status); } - realCB(context, args, codeUnits, length, reason, status); } } } @@ -1085,18 +885,6 @@ static jint setCallbackDecode(JNIEnv *env, jclass, jlong handle, jint onMalforme return U_ILLEGAL_ARGUMENT_ERROR; } -static jlong safeClone(JNIEnv *env, jclass, jlong address) { - UConverter* source = reinterpret_cast(static_cast(address)); - if (!source) { - return NULL; - } - UErrorCode status = U_ZERO_ERROR; - jint bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; - UConverter* conv = ucnv_safeClone(source, NULL, &bufferSize, &status); - icu4jni_error(env, status); - return reinterpret_cast(conv); -} - static jint getMaxCharsPerByte(JNIEnv *env, jclass, jlong handle) { /* * currently we know that max number of chars per byte is 2 @@ -1204,37 +992,28 @@ static jobject charsetForName(JNIEnv* env, jclass, jstring charsetName) { */ static JNINativeMethod gMethods[] = { /* name, signature, funcPtr */ + { "canEncode", "(JI)Z", (void*) canEncode }, { "charsetForName", "(Ljava/lang/String;)Ljava/nio/charset/Charset;", (void*) charsetForName }, - { "convertByteToChar", "(J[BI[CI[IZ)I", (void*) convertByteToChar }, + { "closeConverter", "(J)V", (void*) closeConverter }, + { "contains", "(JJ)Z", (void*) contains }, { "decode", "(J[BI[CI[IZ)I", (void*) decode }, - { "convertCharToByte", "(J[CI[BI[IZ)I", (void*) convertCharToByte }, { "encode", "(J[CI[BI[IZ)I", (void*) encode }, - { "flushCharToByte", "(J[BI[I)I", (void*) flushCharToByte }, { "flushByteToChar", "(J[CI[I)I", (void*) flushByteToChar }, - { "openConverter", "(Ljava/lang/String;)J", (void*) openConverter }, - { "resetByteToChar", "(J)V", (void*) resetByteToChar }, - { "resetCharToByte", "(J)V", (void*) resetCharToByte }, - { "closeConverter", "(J)V", (void*) closeConverter }, - { "setSubstitutionChars", "(J[CI)I", (void*) setSubstitutionChars }, - { "setSubstitutionBytes", "(J[BI)I", (void*) setSubstitutionBytes }, - { "setSubstitutionModeCharToByte", "(JZ)I", (void*) setSubstitutionModeCharToByte }, - { "setSubstitutionModeByteToChar", "(JZ)I", (void*) setSubstitutionModeByteToChar }, - { "countInvalidBytes", "(J[I)I", (void*) countInvalidBytes }, - { "countInvalidChars", "(J[I)I", (void*) countInvalidChars }, - { "getMaxBytesPerChar", "(J)I", (void*) getMaxBytesPerChar }, - { "getMinBytesPerChar", "(J)I", (void*) getMinBytesPerChar }, + { "flushCharToByte", "(J[BI[I)I", (void*) flushCharToByte }, + { "getAvailableCharsetNames", "()[Ljava/lang/String;", (void*) getAvailableCharsetNames }, { "getAveBytesPerChar", "(J)F", (void*) getAveBytesPerChar }, - { "getMaxCharsPerByte", "(J)I", (void*) getMaxCharsPerByte }, { "getAveCharsPerByte", "(J)F", (void*) getAveCharsPerByte }, - { "contains", "(JJ)Z", (void*) contains }, + { "getMaxBytesPerChar", "(J)I", (void*) getMaxBytesPerChar }, + { "getMaxCharsPerByte", "(J)I", (void*) getMaxCharsPerByte }, + { "getMinBytesPerChar", "(J)I", (void*) getMinBytesPerChar }, { "getSubstitutionBytes", "(J)[B", (void*) getSubstitutionBytes }, - { "canEncode", "(JI)Z", (void*) canEncode }, - { "canDecode", "(J[B)Z", (void*) canDecode }, - { "getAvailable", "()[Ljava/lang/String;", (void*) getAvailable }, - { "countAliases", "(Ljava/lang/String;)I", (void*) countAliases }, + { "openConverter", "(Ljava/lang/String;)J", (void*) openConverter }, + { "resetByteToChar", "(J)V", (void*) resetByteToChar }, + { "resetCharToByte", "(J)V", (void*) resetCharToByte }, { "setCallbackDecode", "(JII[CI)I", (void*) setCallbackDecode }, { "setCallbackEncode", "(JII[BI)I", (void*) setCallbackEncode }, - { "safeClone", "(J)J", (void*) safeClone } + { "setSubstitutionBytes", "(J[BI)I", (void*) setSubstitutionBytes }, + { "setSubstitutionChars", "(J[CI)I", (void*) setSubstitutionChars }, }; int register_com_ibm_icu4jni_converters_NativeConverter(JNIEnv *_env) { diff --git a/nio_char/src/main/java/java/nio/charset/Charset.java b/nio_char/src/main/java/java/nio/charset/Charset.java index 4b8849f..7135d00 100644 --- a/nio_char/src/main/java/java/nio/charset/Charset.java +++ b/nio_char/src/main/java/java/nio/charset/Charset.java @@ -17,6 +17,7 @@ package java.nio.charset; +import com.ibm.icu4jni.charset.NativeConverter; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; @@ -39,10 +40,6 @@ import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; -// BEGIN android-changed -import com.ibm.icu4jni.charset.CharsetProviderICU; -// END android-changed - /** * A charset defines a mapping between a Unicode character sequence and a byte * sequence. It facilitates the encoding from a Unicode character sequence into @@ -94,13 +91,7 @@ public abstract class Charset implements Comparable { private static ClassLoader systemClassLoader; - // built in provider instance, assuming thread-safe - // BEGIN android-changed - private static final CharsetProviderICU _builtInProvider = new CharsetProviderICU(); - // END android-changed - - // cached built in charsets - private static SortedMap _builtInCharsets = null; + private static SortedMap cachedBuiltInCharsets; private final String canonicalName; @@ -299,29 +290,29 @@ public abstract class Charset implements Comparable { } } + private static synchronized SortedMap getCachedBuiltInCharsets() { + if (cachedBuiltInCharsets == null) { + cachedBuiltInCharsets = new TreeMap(String.CASE_INSENSITIVE_ORDER); + for (String charsetName : NativeConverter.getAvailableCharsetNames()) { + Charset charset = NativeConverter.charsetForName(charsetName); + cachedBuiltInCharsets.put(charset.name(), charset); + } + } + return cachedBuiltInCharsets; + } + /** - * Gets a map of all available charsets supported by the runtime. - *

- * The returned map contains mappings from canonical names to corresponding - * instances of Charset. The canonical names can be considered - * as case-insensitive. - * - * @return an unmodifiable map of all available charsets supported by the - * runtime + * Returns an immutable case-insensitive map from canonical names to {@code Charset} instances. + * If multiple charsets have the same canonical name, it is unspecified which is returned in + * the map. This method may be slow. If you know which charset you're looking for, use + * {@link #forName}. + * @return an immutable case-insensitive map from canonical names to {@code Charset} instances */ @SuppressWarnings("unchecked") public static SortedMap availableCharsets() { - // Initialize the built-in charsets map cache if necessary - if (_builtInCharsets == null) { - synchronized (Charset.class) { - if (_builtInCharsets == null) { - _builtInCharsets = _builtInProvider.initAvailableCharsets(); - } - } - } - - // Start with the built-in charsets... - SortedMap charsets = new TreeMap(_builtInCharsets); + // Start with a copy of the built-in charsets... + TreeMap charsets = new TreeMap(String.CASE_INSENSITIVE_ORDER); + charsets.putAll(getCachedBuiltInCharsets()); // Add all charsets provided by charset providers... ClassLoader contextClassLoader = getContextClassLoader(); @@ -467,7 +458,7 @@ public abstract class Charset implements Comparable { throw new IllegalArgumentException(); } checkCharsetName(charsetName); - cs = _builtInProvider.charsetForName(charsetName); + cs = NativeConverter.charsetForName(charsetName); if (cs != null) { cacheCharset(cs); } -- cgit v1.1