diff options
author | Elliott Hughes <enh@google.com> | 2010-04-02 17:19:21 -0700 |
---|---|---|
committer | Elliott Hughes <enh@google.com> | 2010-04-02 17:58:45 -0700 |
commit | ccb8b92211a3e87acaf6486c8d4423c2053b8b5e (patch) | |
tree | 5898c2d9793dcf05f83192c17183f09e13b8920a /icu/src/main/java | |
parent | 3604384c5f53c83383ce85f838901e46b0105e5e (diff) | |
download | libcore-ccb8b92211a3e87acaf6486c8d4423c2053b8b5e.zip libcore-ccb8b92211a3e87acaf6486c8d4423c2053b8b5e.tar.gz libcore-ccb8b92211a3e87acaf6486c8d4423c2053b8b5e.tar.bz2 |
More Charset/ICU cleanup.
I've been feeling guilty about leaving broken double-checked locking (missing
the "volatile") in harmony's Charset code. A quick investigation showed that
the method that it's intended to optimize is basically never called, and the
RI's documentation explicitly says "don't call this; it's slow". So this patch
fixes that.
I've also improved our documentation.
I've also deleted a bunch of dead code.
I've also tidied up some dodgy native string handling.
Change-Id: Iad69ebb3459d9cc4c4ff37b255d458b83fe40132
Diffstat (limited to 'icu/src/main/java')
3 files changed, 70 insertions, 395 deletions
diff --git a/icu/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java b/icu/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java index 155f966..fe0f920 100644 --- a/icu/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java +++ b/icu/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java @@ -5,7 +5,7 @@ ******************************************************************************* * ******************************************************************************* -*/ +*/ package com.ibm.icu4jni.charset; @@ -36,52 +36,52 @@ public final class CharsetICU extends Charset { long converterHandle = NativeConverter.openConverter(icuCanonicalName); return new CharsetDecoderICU(this, converterHandle); } - + // hardCoded list of replacement bytes - private static final Map subByteMap = new HashMap(); - static{ - subByteMap.put("UTF-32",new byte[]{0x00, 0x00, (byte)0xfe, (byte)0xff}); - subByteMap.put("ibm-16684_P110-2003",new byte[]{0x40, 0x40}); // make \u3000 the sub char - subByteMap.put("ibm-971_P100-1995",new byte[]{(byte)0xa1, (byte)0xa1}); // make \u3000 the sub char + private static final Map<String, byte[]> subByteMap = new HashMap<String, byte[]>(); + static { + subByteMap.put("UTF-32", new byte[]{0x00, 0x00, (byte)0xfe, (byte)0xff}); + subByteMap.put("ibm-16684_P110-2003", new byte[]{0x40, 0x40}); // make \u3000 the sub char + subByteMap.put("ibm-971_P100-1995", new byte[]{(byte)0xa1, (byte)0xa1}); // make \u3000 the sub char } /** * Returns a new encoder object of the charset * @return a new encoder * @stable ICU 2.4 */ - public CharsetEncoder newEncoder(){ + public CharsetEncoder newEncoder() { // the arrays are locals and not // instance variables since the - // methods on this class need to + // methods on this class need to // be thread safe long converterHandle = NativeConverter.openConverter(icuCanonicalName); - + //According to the contract all converters should have non-empty replacement byte[] replacement = NativeConverter.getSubstitutionBytes(converterHandle); - try{ + try { return new CharsetEncoderICU(this,converterHandle, replacement); - }catch(IllegalArgumentException ex){ + } catch (IllegalArgumentException ex) { // work around for the non-sensical check in the nio API that // a substitution character must be mappable while decoding!! - replacement = (byte[])subByteMap.get(icuCanonicalName); - if(replacement==null){ + replacement = subByteMap.get(icuCanonicalName); + if (replacement == null) { replacement = new byte[NativeConverter.getMinBytesPerChar(converterHandle)]; - for(int i=0; i<replacement.length; i++){ + for(int i = 0; i < replacement.length; ++i) { replacement[i]= 0x3f; } } NativeConverter.setSubstitutionBytes(converterHandle, replacement, replacement.length); return new CharsetEncoderICU(this,converterHandle, replacement); } - } - + } + /** * Ascertains if a charset is a sub set of this charset * @param cs charset to test * @return true if the given charset is a subset of this charset * @stable ICU 2.4 - * + * * //CSDL: major changes by Jack */ public boolean contains(Charset cs){ @@ -90,7 +90,7 @@ public final class CharsetICU extends Charset { } else if (this.equals(cs)) { return true; } - + long converterHandle1 = 0; long converterHandle2 = 0; diff --git a/icu/src/main/java/com/ibm/icu4jni/charset/CharsetProviderICU.java b/icu/src/main/java/com/ibm/icu4jni/charset/CharsetProviderICU.java deleted file mode 100644 index 0479223..0000000 --- a/icu/src/main/java/com/ibm/icu4jni/charset/CharsetProviderICU.java +++ /dev/null @@ -1,50 +0,0 @@ -/** -******************************************************************************* -* Copyright (C) 1996-2005, International Business Machines Corporation and * -* others. All Rights Reserved. * -******************************************************************************* -* -******************************************************************************* -*/ - -package com.ibm.icu4jni.charset; - -import java.nio.charset.Charset; -import java.nio.charset.spi.CharsetProvider; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.SortedMap; -import java.util.TreeMap; - -public final class CharsetProviderICU extends CharsetProvider { - public CharsetProviderICU() { - } - - @Override - public Charset charsetForName(String charsetName) { - return NativeConverter.charsetForName(charsetName); - } - - @Override - public Iterator<Charset> charsets() { - ArrayList<Charset> result = new ArrayList<Charset>(); - for (String charsetName : NativeConverter.getAvailable()) { - result.add(charsetForName(charsetName)); - } - return result.iterator(); - } - - /** - * Implements Charset.availableCharsets. - */ - public SortedMap<String, Charset> initAvailableCharsets() { - SortedMap<String, Charset> result = - new TreeMap<String, Charset>(String.CASE_INSENSITIVE_ORDER); - for (String charset : NativeConverter.getAvailable()) { - if (!result.containsKey(charset)) { - result.put(charset, charsetForName(charset)); - } - } - return result; - } -} diff --git a/icu/src/main/java/com/ibm/icu4jni/charset/NativeConverter.java b/icu/src/main/java/com/ibm/icu4jni/charset/NativeConverter.java index eefe3d5..6a97c27 100644 --- a/icu/src/main/java/com/ibm/icu4jni/charset/NativeConverter.java +++ b/icu/src/main/java/com/ibm/icu4jni/charset/NativeConverter.java @@ -5,28 +5,21 @@ ******************************************************************************* * ******************************************************************************* -*/ +*/ package com.ibm.icu4jni.charset; import java.nio.charset.Charset; -/** - * Class for accessing the underlying JNI methods - * @internal ICU 2.4 - */ -final class NativeConverter { - - //Native methods - +public final class NativeConverter { /** * Converts an array of bytes containing characters in an external * encoding into an array of Unicode characters. This method allows - * a buffer by buffer conversion of a data stream. The state of the - * conversion is saved between calls to convert. Among other things, + * buffer-by-buffer conversion of a data stream. The state of the + * conversion is saved between calls. Among other things, * this means multibyte input sequences can be split between calls. - * If a call to convert results in an Error, the conversion may be - * continued by calling convert again with suitably modified parameters. + * If a call to results in an error, the conversion may be + * continued by calling this method again with suitably modified parameters. * All conversions should be finished with a call to the flush method. * * @param converterHandle Address of converter object created by C code @@ -34,78 +27,22 @@ final class NativeConverter { * @param inEnd stop conversion at this offset in input array (exclusive). * @param output character array to receive conversion result. * @param outEnd stop writing to output array at this offset (exclusive). - * @param data integer array containing the following data + * @param data integer array containing the following data * data[0] = inputOffset * data[1] = outputOffset * @return int error code returned by ICU * @internal ICU 2.4 */ - - public static final native int convertByteToChar( long converterHandle, - byte[] input, int inEnd, - char[] output, int outEnd, - int[] data, - boolean flush); - /** - * Converts an array of bytes containing characters in an external - * encoding into an array of Unicode characters. This method allows - * a buffer by buffer conversion of a data stream. The state of the - * conversion is saved between calls to convert. Among other things, - * this means multibyte input sequences can be split between calls. - * If a call to convert results in an Error, the conversion may be - * continued by calling convert again with suitably modified parameters. - * All conversions should be finished with a call to the flush method. - * - * @param converterHandle Address of converter object created by C code - * @param input byte array containing text to be converted. - * @param inEnd stop conversion at this offset in input array (exclusive). - * @param output character array to receive conversion result. - * @param outEnd stop writing to output array at this offset (exclusive). - * @param data integer array containing the following data - * data[0] = inputOffset - * data[1] = outputOffset - * @return int error code returned by ICU - * @internal ICU 2.4 - */ - public static final native int decode( long converterHandle, - byte[] input, int inEnd, - char[] output, int outEnd, - int[] data, - boolean flush); - /** - * Converts an array of Unicode chars containing characters in an - * external encoding into an array of bytes. This method allows - * a buffer by buffer conversion of a data stream. The state of the - * conversion is saved between calls to convert. Among other things, - * this means multibyte input sequences can be split between calls. - * If a call to convert results in an Error, the conversion may be - * continued by calling convert again with suitably modified parameters. - * All conversions should be finished with a call to the flush method. - * - * @param converterHandle Address of converter object created by C code - * @param input char array containing text to be converted. - * @param inEnd stop conversion at this offset in input array (exclusive). - * @param output byte array to receive conversion result. - * @param outEnd stop writing to output array at this offset (exclusive). - * @param data integer array containing the following data - * data[0] = inputOffset - * data[1] = outputOffset - * @return int error code returned by ICU - * @internal ICU 2.4 - */ - public static final native int convertCharToByte(long converterHandle, - char[] input, int inEnd, - byte[] output, int outEnd, - int[] data, - boolean flush); + public static native int decode(long converterHandle, byte[] input, int inEnd, + char[] output, int outEnd, int[] data, boolean flush); + /** - * Converts an array of Unicode chars containing characters in an - * external encoding into an array of bytes. This method allows - * a buffer by buffer conversion of a data stream. The state of the + * Converts an array of Unicode chars to an array of bytes in an external encoding. + * This method allows a buffer by buffer conversion of a data stream. The state of the * conversion is saved between calls to convert. Among other things, * this means multibyte input sequences can be split between calls. - * If a call to convert results in an Error, the conversion may be - * continued by calling convert again with suitably modified parameters. + * If a call results in an error, the conversion may be + * continued by calling this method again with suitably modified parameters. * All conversions should be finished with a call to the flush method. * * @param converterHandle Address of converter object created by C code @@ -113,283 +50,71 @@ final class NativeConverter { * @param inEnd stop conversion at this offset in input array (exclusive). * @param output byte array to receive conversion result. * @param outEnd stop writing to output array at this offset (exclusive). - * @param data integer array containing the following data + * @param data integer array containing the following data * data[0] = inputOffset * data[1] = outputOffset * @return int error code returned by ICU * @internal ICU 2.4 - */ - public static final native int encode(long converterHandle, - char[] input, int inEnd, - byte[] output, int outEnd, - int[] data, - boolean flush); + */ + public static native int encode(long converterHandle, char[] input, int inEnd, + byte[] output, int outEnd, int[] data, boolean flush); + /** * Writes any remaining output to the output buffer and resets the - * converter to its initial state. + * converter to its initial state. * * @param converterHandle Address of converter object created by C code * @param output byte array to receive flushed output. * @param outEnd stop writing to output array at this offset (exclusive). * @return int error code returned by ICU - * @param data integer array containing the following data + * @param data integer array containing the following data * data[0] = inputOffset * data[1] = outputOffset * @internal ICU 2.4 - */ - public static final native int flushCharToByte(long converterHandle, - byte[] output, - int outEnd, - int[] data); + */ + public static native int flushCharToByte(long converterHandle, byte[] output, int outEnd, int[] data); + /** * Writes any remaining output to the output buffer and resets the - * converter to its initial state. + * converter to its initial state. * * @param converterHandle Address of converter object created by the native code * @param output char array to receive flushed output. * @param outEnd stop writing to output array at this offset (exclusive). * @return int error code returned by ICU - * @param data integer array containing the following data + * @param data integer array containing the following data * data[0] = inputOffset * data[1] = outputOffset * @internal ICU 2.4 - */ - public static final native int flushByteToChar(long converterHandle, - char[] output, - int outEnd, - int[] data); - - /** - * Open the converter with the specified encoding - * - * @param converterHandle long array for recieving the adress of converter object - * created by the native code - * @param encoding string representing encoding - * @return int error code returned by ICU - * @internal ICU 2.4 - */ - public static final native long openConverter(String encoding); - /** - * Resets the ByteToChar (toUnicode) state of specified converter - * - * @param converterHandle Address of converter object created by the native code - * @internal ICU 2.4 - */ - public static final native void resetByteToChar(long converterHandle); - - /** - * Resets the CharToByte (fromUnicode) state of specified converter - * - * @param converterHandle Address of converter object created by the native code - * @internal ICU 2.4 - */ - public static final native void resetCharToByte(long converterHandle); - - /** - * Closes the specified converter and releases the resources - * - * @param converterHandle Address of converter object created by the native code - * @internal ICU 2.4 - */ - public static final native void closeConverter(long converterHandle); - - /** - * Sets the substitution Unicode chars of the specified converter used - * by encoder - * @param converterHandle Address of converter object created by the native code - * @param subChars array of chars to used for substitution - * @param length length of the array - * @return int error code returned by ICU - * @internal ICU 2.4 - */ - public static final native int setSubstitutionChars( long converterHandle, - char[] subChars,int length); - /** - * Sets the substitution bytes of the specified converter used by decoder - * - * @param converterHandle Address of converter object created by the native code - * @param subChars array of bytes to used for substitution - * @param length length of the array - * @return int error code returned by ICU - * @internal ICU 2.4 - */ - public static final native int setSubstitutionBytes( long converterHandle, - byte[] subChars,int length); - /** - * Sets the substitution mode of CharToByte(fromUnicode) for the specified converter - * - * @param converterHandle Address of converter object created by the native code - * @param mode to set the true/false - * @return int error code returned by ICU - * @internal ICU 2.4 - */ - public static final native int setSubstitutionModeCharToByte(long converterHandle, - boolean mode); - /** - * Sets the substitution mode of CharToByte(fromUnicode) for the specified converter - * - * @param converterHandle Address of converter object created by the native code - * @param mode to set the true/false - * @return int error code returned by ICU - * @internal ICU 3.6 - */ - public static final native int setSubstitutionModeByteToChar(long converterHandle, - boolean mode); - /** - * Gets the numnber of invalid bytes in the specified converter object - * for the last error that has occured - * - * @param converterHandle Address of converter object created by the native code - * @param length array of int to recieve length of the array - * @return int error code returned by ICU - * @internal ICU 2.4 - */ - public static final native int countInvalidBytes(long converterHandle, int[] length); - - /** - * Gets the numnber of invalid chars in the specified converter object - * for the last error that has occured - * - * @param converterHandle Address of converter object created by the native code - * @param length array of int to recieve length of the array - * @return int error code returned by ICU - * @internal ICU 2.4 - */ - public static final native int countInvalidChars(long converterHandle, int[] length); - - /** - * Gets the number of bytes needed for converting a char - * - * @param converterHandle Address of converter object created by the native code - * @return number of bytes needed - * @internal ICU 2.4 - */ - public static final native int getMaxBytesPerChar(long converterHandle); - - /** - * Gets the number of bytes needed for converting a char - * - * @param converterHandle Address of converter object created by the native code - * @return number of bytes needed - * @internal ICU 3.2 - */ - public static final native int getMinBytesPerChar(long converterHandle); - - /** - * Gets the average numnber of bytes needed for converting a char - * - * @param converterHandle Address of converter object created by the native code - * @return number of bytes needed - * @internal ICU 2.4 - */ - public static final native float getAveBytesPerChar(long converterHandle); - - /** - * Gets the number of chars needed for converting a byte - * - * @param converterHandle Address of converter object created by the native code - * @return number of bytes needed - * @internal ICU 2.4 - */ - public static final native int getMaxCharsPerByte(long converterHandle); - - /** - * Gets the average numnber of chars needed for converting a byte - * - * @param converterHandle Address of converter object created by the native code - * @return number of bytes needed - * @internal ICU 2.4 - */ - public static final native float getAveCharsPerByte(long converterHandle); - - //CSDL: added by Jack - /** - * Determines whether charset1 contains charset2. - */ - public static final native boolean contains(long converterHandle1, long converterHandle2); - - public static final native byte[] getSubstitutionBytes(long converterHandle); - - /** - * Ascertains if a given Unicode code unit can - * be converted to the target encoding - * @param converterHandle Address of converter object created by the native code - * @param codeUnit the character to be converted - * @return true if a character can be converted - * @internal ICU 2.4 - * - */ - public static final native boolean canEncode(long converterHandle,int codeUnit); - - /** - * Ascertains if a given a byte sequence can be converted to Unicode - * @param converterHandle Address of converter object created by the native code - * @param bytes the bytes to be converted - * @return true if a character can be converted - * @internal ICU 2.4 - * */ - public static final native boolean canDecode(long converterHandle,byte[] bytes); - - /** - * Gets the canonical names of available converters - * @return Object[] names as an object array - * @internal ICU 2.4 - */ - public static final native String[] getAvailable(); + public static native int flushByteToChar(long converterHandle, char[] output, int outEnd, int[] data); - public static final native Charset charsetForName(String charsetName); + public static native long openConverter(String encoding); + public static native void closeConverter(long converterHandle); + + public static native void resetByteToChar(long converterHandle); + public static native void resetCharToByte(long converterHandle); + + public static native int setSubstitutionChars(long converterHandle, char[] subChars,int length); + public static native int setSubstitutionBytes(long converterHandle, byte[] subChars,int length); + public static native byte[] getSubstitutionBytes(long converterHandle); + + public static native int getMaxBytesPerChar(long converterHandle); + public static native int getMinBytesPerChar(long converterHandle); + public static native float getAveBytesPerChar(long converterHandle); + public static native int getMaxCharsPerByte(long converterHandle); + public static native float getAveCharsPerByte(long converterHandle); + + public static native boolean contains(long converterHandle1, long converterHandle2); + + public static native boolean canEncode(long converterHandle, int codeUnit); + + public static native String[] getAvailableCharsetNames(); + public static native Charset charsetForName(String charsetName); - /** - * Gets the number of aliases for a converter name - * @param enc encoding name - * @return number of aliases for the converter - * @internal ICU 2.4 - */ - public static final native int countAliases(String enc); - - /** - * Gets the aliases associated with the converter name - * @param enc converter name - * @return converter names as elements in an object array - * @internal ICU 2.4 - */ - public static final native String[] getAliases(String enc); - - /** - * Sets the callback to Unicode for ICU conveter. The default behaviour of ICU callback - * is to call the specified callback function for both illegal and unmapped sequences. - * @param converterHandle Adress of the converter object created by native code - * @param mode call back mode to set. This is either STOP_CALLBACK, SKIP_CALLBACK or SUBSTITUE_CALLBACK - * The converter performs the specified callback when an error occurs - * @param stopOnIllegal If true sets the alerts the converter callback to stop on an illegal sequence - * @return int error code returned by ICU - * @internal ICU 2.4 - */ - public static final native int setCallbackDecode(long converterHandle, int onMalformedInput, int onUnmappableInput, char[] subChars, int length); - - /** - * Sets the callback from Unicode for ICU conveter. The default behaviour of ICU callback - * is to call the specified callback function for both illegal and unmapped sequences. - * @param converterHandle Adress of the converter object created by native code - * @param mode call back mode to set. This is either STOP_CALLBACK, SKIP_CALLBACK or SUBSTITUE_CALLBACK - * The converter performs the specified callback when an error occurs - * @param stopOnIllegal If true sets the alerts the converter callback to stop on an illegal sequence - * @return int error code returned by ICU - * @internal ICU 2.4 - */ - public static final native int setCallbackEncode(long converterHandle, int onMalformedInput, int onUnmappableInput, byte[] subBytes, int length); - - /** - * Returns a thread safe clone of the converter - * @internal ICU 2.4 - */ - public static final native long safeClone(long converterHandle); - - /** @internal ICU 2.4 */ public static final int STOP_CALLBACK = 0;//CodingErrorAction.REPORT - /** @internal ICU 2.4 */ public static final int SKIP_CALLBACK = 1;//CodingErrorAction.IGNORE - /** @internal ICU 2.4 */ public static final int SUBSTITUTE_CALLBACK = 2;//CodingErrorAction.REPLACE + public static native int setCallbackDecode(long converterHandle, int onMalformedInput, int onUnmappableInput, char[] subChars, int length); + public static native int setCallbackEncode(long converterHandle, int onMalformedInput, int onUnmappableInput, byte[] subBytes, int length); } |