More Charset/ICU cleanup.

I've been feeling guilty about leaving broken double-checked locking (missing the "volatile") in harmony's Charset code. A quick investigation showed that the method that it's intended to optimize is basically never called, and the RI's documentation explicitly says "don't call this; it's slow". So this patch fixes that. I've also improved our documentation. I've also deleted a bunch of dead code. I've also tidied up some dodgy native string handling. Change-Id: Iad69ebb3459d9cc4c4ff37b255d458b83fe40132
author: Elliott Hughes <enh@google.com> 2010-04-02 17:19:21 -0700
committer: Elliott Hughes <enh@google.com> 2010-04-02 17:58:45 -0700
commit: ccb8b92211a3e87acaf6486c8d4423c2053b8b5e (patch)
tree: 5898c2d9793dcf05f83192c17183f09e13b8920a /icu/src/main/java
parent: 3604384c5f53c83383ce85f838901e46b0105e5e (diff)
download: libcore-ccb8b92211a3e87acaf6486c8d4423c2053b8b5e.zip
libcore-ccb8b92211a3e87acaf6486c8d4423c2053b8b5e.tar.gz
libcore-ccb8b92211a3e87acaf6486c8d4423c2053b8b5e.tar.bz2
3 files changed, 70 insertions, 395 deletions
diff --git a/icu/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java b/icu/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java
index 155f966..fe0f920 100644
--- a/icu/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java
+++ b/icu/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java
@@ -5,7 +5,7 @@
 *******************************************************************************
 *
 *******************************************************************************
-*/ 
+*/
 
 package com.ibm.icu4jni.charset;
 
@@ -36,52 +36,52 @@ public final class CharsetICU extends Charset {
         long converterHandle = NativeConverter.openConverter(icuCanonicalName);
         return new CharsetDecoderICU(this, converterHandle);
     }
-    
+
     // hardCoded list of replacement bytes
-    private static final Map subByteMap = new HashMap();
-    static{
-        subByteMap.put("UTF-32",new byte[]{0x00, 0x00, (byte)0xfe, (byte)0xff});
-        subByteMap.put("ibm-16684_P110-2003",new byte[]{0x40, 0x40}); // make \u3000 the sub char
-        subByteMap.put("ibm-971_P100-1995",new byte[]{(byte)0xa1, (byte)0xa1}); // make \u3000 the sub char
+    private static final Map<String, byte[]> subByteMap = new HashMap<String, byte[]>();
+    static {
+        subByteMap.put("UTF-32", new byte[]{0x00, 0x00, (byte)0xfe, (byte)0xff});
+        subByteMap.put("ibm-16684_P110-2003", new byte[]{0x40, 0x40}); // make \u3000 the sub char
+        subByteMap.put("ibm-971_P100-1995", new byte[]{(byte)0xa1, (byte)0xa1}); // make \u3000 the sub char
     }
     /**
      * Returns a new encoder object of the charset
      * @return a new encoder
      * @stable ICU 2.4
      */
-    public CharsetEncoder newEncoder(){
+    public CharsetEncoder newEncoder() {
         // the arrays are locals and not
         // instance variables since the
-        // methods on this class need to 
+        // methods on this class need to
         // be thread safe
         long converterHandle = NativeConverter.openConverter(icuCanonicalName);
-        
+
         //According to the contract all converters should have non-empty replacement
         byte[] replacement = NativeConverter.getSubstitutionBytes(converterHandle);
 
-       try{
+       try {
             return new CharsetEncoderICU(this,converterHandle, replacement);
-        }catch(IllegalArgumentException ex){
+        } catch (IllegalArgumentException ex) {
             // work around for the non-sensical check in the nio API that
             // a substitution character must be mappable while decoding!!
-            replacement = (byte[])subByteMap.get(icuCanonicalName);
-            if(replacement==null){
+            replacement = subByteMap.get(icuCanonicalName);
+            if (replacement == null) {
                 replacement = new byte[NativeConverter.getMinBytesPerChar(converterHandle)];
-                for(int i=0; i<replacement.length; i++){
+                for(int i = 0; i < replacement.length; ++i) {
                     replacement[i]= 0x3f;
                 }
             }
             NativeConverter.setSubstitutionBytes(converterHandle, replacement, replacement.length);
             return new CharsetEncoderICU(this,converterHandle, replacement);
         }
-    } 
-    
+    }
+
     /**
      * Ascertains if a charset is a sub set of this charset
      * @param cs charset to test
      * @return true if the given charset is a subset of this charset
      * @stable ICU 2.4
-     * 
+     *
      * //CSDL: major changes by Jack
      */
     public boolean contains(Charset cs){
@@ -90,7 +90,7 @@ public final class CharsetICU extends Charset {
         } else if (this.equals(cs)) {
             return true;
         }
-        
+
         long converterHandle1 = 0;
         long converterHandle2 = 0;
 
diff --git a/icu/src/main/java/com/ibm/icu4jni/charset/CharsetProviderICU.java b/icu/src/main/java/com/ibm/icu4jni/charset/CharsetProviderICU.java
deleted file mode 100644
index 0479223..0000000
--- a/icu/src/main/java/com/ibm/icu4jni/charset/CharsetProviderICU.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/**
-*******************************************************************************
-* Copyright (C) 1996-2005, International Business Machines Corporation and    *
-* others. All Rights Reserved.                                                *
-*******************************************************************************
-*
-*******************************************************************************
-*/ 
-
-package com.ibm.icu4jni.charset;
-
-import java.nio.charset.Charset;
-import java.nio.charset.spi.CharsetProvider;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.SortedMap;
-import java.util.TreeMap;
-
-public final class CharsetProviderICU extends CharsetProvider {
-    public CharsetProviderICU() {
-    }
-
-    @Override
-    public Charset charsetForName(String charsetName) {
-        return NativeConverter.charsetForName(charsetName);
-    }
-
-    @Override
-    public Iterator<Charset> charsets() {
-        ArrayList<Charset> result = new ArrayList<Charset>();
-        for (String charsetName : NativeConverter.getAvailable()) {
-            result.add(charsetForName(charsetName));
-        }
-        return result.iterator();
-    }
-
-    /**
-     * Implements Charset.availableCharsets.
-     */
-    public SortedMap<String, Charset> initAvailableCharsets() {
-        SortedMap<String, Charset> result =
-                new TreeMap<String, Charset>(String.CASE_INSENSITIVE_ORDER);
-        for (String charset : NativeConverter.getAvailable()) {
-            if (!result.containsKey(charset)) {
-                result.put(charset, charsetForName(charset));
-            }
-        }
-        return result;
-    }
-}
diff --git a/icu/src/main/java/com/ibm/icu4jni/charset/NativeConverter.java b/icu/src/main/java/com/ibm/icu4jni/charset/NativeConverter.java
index eefe3d5..6a97c27 100644
--- a/icu/src/main/java/com/ibm/icu4jni/charset/NativeConverter.java
+++ b/icu/src/main/java/com/ibm/icu4jni/charset/NativeConverter.java
@@ -5,28 +5,21 @@
 *******************************************************************************
 *
 *******************************************************************************
-*/ 
+*/
 
 package com.ibm.icu4jni.charset;
 
 import java.nio.charset.Charset;
 
-/**
- * Class for accessing the underlying JNI methods
- * @internal ICU 2.4
- */
-final class NativeConverter {
-  
-    //Native methods
-    
+public final class NativeConverter {
     /**
      * Converts an array of bytes containing characters in an external
      * encoding into an array of Unicode characters.  This  method allows
-     * a buffer by buffer conversion of a data stream.  The state of the
-     * conversion is saved between calls to convert.  Among other things,
+     * buffer-by-buffer conversion of a data stream.  The state of the
+     * conversion is saved between calls.  Among other things,
      * this means multibyte input sequences can be split between calls.
-     * If a call to convert results in an Error, the conversion may be
-     * continued by calling convert again with suitably modified parameters.
+     * If a call to results in an error, the conversion may be
+     * continued by calling this method again with suitably modified parameters.
      * All conversions should be finished with a call to the flush method.
      *
      * @param converterHandle Address of converter object created by C code
@@ -34,78 +27,22 @@ final class NativeConverter {
      * @param inEnd stop conversion at this offset in input array (exclusive).
      * @param output character array to receive conversion result.
      * @param outEnd stop writing to output array at this offset (exclusive).
-     * @param data integer array containing the following data    
+     * @param data integer array containing the following data
      *        data[0] = inputOffset
      *        data[1] = outputOffset
      * @return int error code returned by ICU
      * @internal ICU 2.4
      */
-     
-    public static final native int convertByteToChar( long converterHandle,
-                                   byte[] input, int inEnd,
-                                   char[] output, int outEnd,
-                                   int[] data,
-                                   boolean flush);
-    /**
-     * Converts an array of bytes containing characters in an external
-     * encoding into an array of Unicode characters.  This  method allows
-     * a buffer by buffer conversion of a data stream.  The state of the
-     * conversion is saved between calls to convert.  Among other things,
-     * this means multibyte input sequences can be split between calls.
-     * If a call to convert results in an Error, the conversion may be
-     * continued by calling convert again with suitably modified parameters.
-     * All conversions should be finished with a call to the flush method.
-     *
-     * @param converterHandle Address of converter object created by C code
-     * @param input byte array containing text to be converted.
-     * @param inEnd stop conversion at this offset in input array (exclusive).
-     * @param output character array to receive conversion result.
-     * @param outEnd stop writing to output array at this offset (exclusive).
-     * @param data integer array containing the following data    
-     *        data[0] = inputOffset
-     *        data[1] = outputOffset
-     * @return int error code returned by ICU
-     * @internal ICU 2.4
-     */
-    public static final native int decode( long converterHandle,
-                                   byte[] input, int inEnd,
-                                   char[] output, int outEnd,
-                                   int[] data,
-                                   boolean flush);
-    /**
-     * Converts an array of Unicode chars containing characters in an 
-     * external encoding into an array of bytes.  This  method allows
-     * a buffer by buffer conversion of a data stream.  The state of the
-     * conversion is saved between calls to convert.  Among other things,
-     * this means multibyte input sequences can be split between calls.
-     * If a call to convert results in an Error, the conversion may be
-     * continued by calling convert again with suitably modified parameters.
-     * All conversions should be finished with a call to the flush method.
-     *
-     * @param converterHandle Address of converter object created by C code
-     * @param input char array containing text to be converted.
-     * @param inEnd stop conversion at this offset in input array (exclusive).
-     * @param output byte array to receive conversion result.
-     * @param outEnd stop writing to output array at this offset (exclusive).
-     * @param data integer array containing the following data    
-     *        data[0] = inputOffset
-     *        data[1] = outputOffset
-     * @return int error code returned by ICU
-     * @internal ICU 2.4
-     */                             
-    public static final native int convertCharToByte(long converterHandle,
-                                   char[] input, int inEnd,
-                                   byte[] output, int outEnd,
-                                   int[] data,
-                                   boolean flush); 
+    public static native int decode(long converterHandle, byte[] input, int inEnd,
+            char[] output, int outEnd, int[] data, boolean flush);
+
     /**
-     * Converts an array of Unicode chars containing characters in an 
-     * external encoding into an array of bytes.  This  method allows
-     * a buffer by buffer conversion of a data stream.  The state of the
+     * Converts an array of Unicode chars to an array of bytes in an external encoding.
+     * This  method allows a buffer by buffer conversion of a data stream.  The state of the
      * conversion is saved between calls to convert.  Among other things,
      * this means multibyte input sequences can be split between calls.
-     * If a call to convert results in an Error, the conversion may be
-     * continued by calling convert again with suitably modified parameters.
+     * If a call results in an error, the conversion may be
+     * continued by calling this method again with suitably modified parameters.
      * All conversions should be finished with a call to the flush method.
      *
      * @param converterHandle Address of converter object created by C code
@@ -113,283 +50,71 @@ final class NativeConverter {
      * @param inEnd stop conversion at this offset in input array (exclusive).
      * @param output byte array to receive conversion result.
      * @param outEnd stop writing to output array at this offset (exclusive).
-     * @param data integer array containing the following data    
+     * @param data integer array containing the following data
      *        data[0] = inputOffset
      *        data[1] = outputOffset
      * @return int error code returned by ICU
      * @internal ICU 2.4
-     */                                     
-    public static final native int encode(long converterHandle,
-                                   char[] input, int inEnd,
-                                   byte[] output, int outEnd,
-                                   int[] data,
-                                   boolean flush);
+     */
+    public static native int encode(long converterHandle, char[] input, int inEnd,
+            byte[] output, int outEnd, int[] data, boolean flush);
+
     /**
      * Writes any remaining output to the output buffer and resets the
-     * converter to its initial state. 
+     * converter to its initial state.
      *
      * @param converterHandle Address of converter object created by C code
      * @param output byte array to receive flushed output.
      * @param outEnd stop writing to output array at this offset (exclusive).
      * @return int error code returned by ICU
-     * @param data integer array containing the following data    
+     * @param data integer array containing the following data
      *        data[0] = inputOffset
      *        data[1] = outputOffset
      * @internal ICU 2.4
-     */ 
-    public static final native int flushCharToByte(long converterHandle,
-                                   byte[] output, 
-                                   int outEnd, 
-                                   int[] data);
+     */
+    public static native int flushCharToByte(long converterHandle, byte[] output, int outEnd, int[] data);
+
     /**
      * Writes any remaining output to the output buffer and resets the
-     * converter to its initial state. 
+     * converter to its initial state.
      *
      * @param converterHandle Address of converter object created by the native code
      * @param output char array to receive flushed output.
      * @param outEnd stop writing to output array at this offset (exclusive).
      * @return int error code returned by ICU
-     * @param data integer array containing the following data    
+     * @param data integer array containing the following data
      *        data[0] = inputOffset
      *        data[1] = outputOffset
      * @internal ICU 2.4
-     */     
-    public static final native int flushByteToChar(long converterHandle,
-                                   char[] output,  
-                                   int outEnd, 
-                                   int[] data);
-    
-    /**
-     * Open the converter with the specified encoding
-     *
-     * @param converterHandle long array for recieving the adress of converter object
-     *        created by the native code
-     * @param encoding string representing encoding
-     * @return int error code returned by ICU
-     * @internal ICU 2.4
-     */
-    public static final native long openConverter(String encoding);
-    /**
-     * Resets the ByteToChar (toUnicode) state of specified converter 
-     *
-     * @param converterHandle Address of converter object created by the native code
-     * @internal ICU 2.4
-     */
-    public static final native void resetByteToChar(long  converterHandle);
-    
-    /**
-     * Resets the CharToByte (fromUnicode) state of specified converter 
-     *
-     * @param converterHandle Address of converter object created by the native code
-     * @internal ICU 2.4
-     */
-    public static final native void resetCharToByte(long  converterHandle);
-    
-    /**
-     * Closes the specified converter and releases the resources
-     *
-     * @param converterHandle Address of converter object created by the native code
-     * @internal ICU 2.4
-     */
-    public static final native void closeConverter(long converterHandle);
-    
-    /**
-     * Sets the substitution Unicode chars of the specified converter used
-     * by encoder
-     * @param converterHandle Address of converter object created by the native code
-     * @param subChars array of chars to used for substitution
-     * @param length length of the array 
-     * @return int error code returned by ICU
-     * @internal ICU 2.4
-     */    
-    public static final native int setSubstitutionChars( long converterHandle,
-                                   char[] subChars,int length); 
-    /**
-     * Sets the substitution bytes of the specified converter used by decoder
-     *
-     * @param converterHandle Address of converter object created by the native code
-     * @param subChars array of bytes to used for substitution
-     * @param length length of the array 
-     * @return int error code returned by ICU
-     * @internal ICU 2.4
-     */    
-    public static final native int setSubstitutionBytes( long converterHandle,
-                                   byte[] subChars,int length);
-    /**
-     * Sets the substitution mode of CharToByte(fromUnicode) for the specified converter 
-     *
-     * @param converterHandle Address of converter object created by the native code
-     * @param mode to set the true/false
-     * @return int error code returned by ICU
-     * @internal ICU 2.4
-     */  
-    public static final native int setSubstitutionModeCharToByte(long converterHandle, 
-                                   boolean mode);
-    /**
-     * Sets the substitution mode of CharToByte(fromUnicode) for the specified converter 
-     *
-     * @param converterHandle Address of converter object created by the native code
-     * @param mode to set the true/false
-     * @return int error code returned by ICU
-     * @internal ICU 3.6
-     */  
-    public static final native int setSubstitutionModeByteToChar(long converterHandle, 
-                                   boolean mode);
-    /**
-     * Gets the numnber of invalid bytes in the specified converter object 
-     * for the last error that has occured
-     *
-     * @param converterHandle Address of converter object created by the native code
-     * @param length array of int to recieve length of the array 
-     * @return int error code returned by ICU
-     * @internal ICU 2.4
-     */
-    public static final native int countInvalidBytes(long converterHandle, int[] length);
-    
-    /**
-     * Gets the numnber of invalid chars in the specified converter object 
-     * for the last error that has occured
-     *
-     * @param converterHandle Address of converter object created by the native code
-     * @param length array of int to recieve length of the array 
-     * @return int error code returned by ICU
-     * @internal ICU 2.4
-     */   
-    public static final native int countInvalidChars(long converterHandle, int[] length);
-    
-    /**
-     * Gets the number of bytes needed for converting a char
-     *
-     * @param converterHandle Address of converter object created by the native code
-     * @return number of bytes needed
-     * @internal ICU 2.4
-     */ 
-    public static final native int getMaxBytesPerChar(long converterHandle);
-    
-    /**
-     * Gets the number of bytes needed for converting a char
-     *
-     * @param converterHandle Address of converter object created by the native code
-     * @return number of bytes needed
-     * @internal ICU 3.2
-     */ 
-    public static final native int getMinBytesPerChar(long converterHandle);
-    
-    /**
-     * Gets the average numnber of bytes needed for converting a char
-     *
-     * @param converterHandle Address of converter object created by the native code
-     * @return number of bytes needed
-     * @internal ICU 2.4
-     */ 
-    public static final native float getAveBytesPerChar(long converterHandle);
-   
-    /**
-     * Gets the number of chars needed for converting a byte
-     *
-     * @param converterHandle Address of converter object created by the native code
-     * @return number of bytes needed
-     * @internal ICU 2.4
-     */ 
-    public static final native int getMaxCharsPerByte(long converterHandle);
-   
-    /**
-     * Gets the average numnber of chars needed for converting a byte
-     *
-     * @param converterHandle Address of converter object created by the native code
-     * @return number of bytes needed
-     * @internal ICU 2.4
-     */ 
-    public static final native float getAveCharsPerByte(long converterHandle);
-    
-    //CSDL: added by Jack
-    /**
-     * Determines whether charset1 contains charset2.
-     */
-    public static final native boolean contains(long converterHandle1, long converterHandle2);
-    
-    public static final native byte[] getSubstitutionBytes(long converterHandle);
-    
-    /**
-     * Ascertains if a given Unicode code unit can 
-     * be converted to the target encoding
-     * @param converterHandle Address of converter object created by the native code
-     * @param  codeUnit the character to be converted
-     * @return true if a character can be converted
-     * @internal ICU 2.4
-     * 
-     */
-    public static final native boolean canEncode(long converterHandle,int codeUnit);
-    
-    /**
-     * Ascertains if a given a byte sequence can be converted to Unicode
-     * @param converterHandle Address of converter object created by the native code
-     * @param  bytes the bytes to be converted
-     * @return true if a character can be converted
-     * @internal ICU 2.4
-     * 
      */
-    public static final native boolean canDecode(long converterHandle,byte[] bytes);
-    
-    /**
-     * Gets the canonical names of available converters 
-     * @return Object[] names as an object array
-     * @internal ICU 2.4
-     */
-    public static final native String[] getAvailable();
+    public static native int flushByteToChar(long converterHandle, char[] output,  int outEnd, int[] data);
 
-    public static final native Charset charsetForName(String charsetName);
+    public static native long openConverter(String encoding);
+    public static native void closeConverter(long converterHandle);
+
+    public static native void resetByteToChar(long  converterHandle);
+    public static native void resetCharToByte(long  converterHandle);
+
+    public static native int setSubstitutionChars(long converterHandle, char[] subChars,int length);
+    public static native int setSubstitutionBytes(long converterHandle, byte[] subChars,int length);
+    public static native byte[] getSubstitutionBytes(long converterHandle);
+
+    public static native int getMaxBytesPerChar(long converterHandle);
+    public static native int getMinBytesPerChar(long converterHandle);
+    public static native float getAveBytesPerChar(long converterHandle);
+    public static native int getMaxCharsPerByte(long converterHandle);
+    public static native float getAveCharsPerByte(long converterHandle);
+
+    public static native boolean contains(long converterHandle1, long converterHandle2);
+
+    public static native boolean canEncode(long converterHandle, int codeUnit);
+
+    public static native String[] getAvailableCharsetNames();
+    public static native Charset charsetForName(String charsetName);
 
-    /**
-     * Gets the number of aliases for a converter name
-     * @param enc encoding name
-     * @return number of aliases for the converter
-     * @internal ICU 2.4
-     */
-    public static final native int countAliases(String enc);
-    
-    /** 
-     * Gets the aliases associated with the converter name
-     * @param enc converter name
-     * @return converter names as elements in an object array
-     * @internal ICU 2.4
-     */
-    public static final native String[] getAliases(String enc);
-    
-    /**
-     * Sets the callback to Unicode for ICU conveter. The default behaviour of ICU callback
-     * is to call the specified callback function for both illegal and unmapped sequences.
-     * @param converterHandle Adress of the converter object created by native code
-     * @param mode call back mode to set. This is either STOP_CALLBACK, SKIP_CALLBACK or SUBSTITUE_CALLBACK
-     *        The converter performs the specified callback when an error occurs
-     * @param stopOnIllegal If true sets the alerts the converter callback to stop on an illegal sequence
-     * @return int error code returned by ICU
-     * @internal ICU 2.4
-     */
-    public static final native int setCallbackDecode(long converterHandle, int onMalformedInput, int onUnmappableInput, char[] subChars, int length);
-   
-    /**
-     * Sets the callback from Unicode for ICU conveter. The default behaviour of ICU callback
-     * is to call the specified callback function for both illegal and unmapped sequences.
-     * @param converterHandle Adress of the converter object created by native code
-     * @param mode call back mode to set. This is either STOP_CALLBACK, SKIP_CALLBACK or SUBSTITUE_CALLBACK
-     *        The converter performs the specified callback when an error occurs
-     * @param stopOnIllegal If true sets the alerts the converter callback to stop on an illegal sequence
-     * @return int error code returned by ICU
-     * @internal ICU 2.4
-     */
-    public static final native int setCallbackEncode(long converterHandle, int onMalformedInput, int onUnmappableInput, byte[] subBytes, int length);
-    
-    /**
-     * Returns a thread safe clone of the converter
-     * @internal ICU 2.4
-     */
-    public static final native long safeClone(long converterHandle);
-    
-    /** @internal ICU 2.4 */
     public static final int STOP_CALLBACK = 0;//CodingErrorAction.REPORT
-    /** @internal ICU 2.4 */
     public static final int SKIP_CALLBACK = 1;//CodingErrorAction.IGNORE
-    /** @internal ICU 2.4 */
     public static final int SUBSTITUTE_CALLBACK = 2;//CodingErrorAction.REPLACE
+    public static native int setCallbackDecode(long converterHandle, int onMalformedInput, int onUnmappableInput, char[] subChars, int length);
+    public static native int setCallbackEncode(long converterHandle, int onMalformedInput, int onUnmappableInput, byte[] subBytes, int length);
 }
author	Elliott Hughes <enh@google.com>	2010-04-02 17:19:21 -0700
committer	Elliott Hughes <enh@google.com>	2010-04-02 17:58:45 -0700
commit	ccb8b92211a3e87acaf6486c8d4423c2053b8b5e (patch)
tree	5898c2d9793dcf05f83192c17183f09e13b8920a /icu/src/main/java
parent	3604384c5f53c83383ce85f838901e46b0105e5e (diff)
download	libcore-ccb8b92211a3e87acaf6486c8d4423c2053b8b5e.zip libcore-ccb8b92211a3e87acaf6486c8d4423c2053b8b5e.tar.gz libcore-ccb8b92211a3e87acaf6486c8d4423c2053b8b5e.tar.bz2