More Charset/ICU cleanup.

I've been feeling guilty about leaving broken double-checked locking (missing the "volatile") in harmony's Charset code. A quick investigation showed that the method that it's intended to optimize is basically never called, and the RI's documentation explicitly says "don't call this; it's slow". So this patch fixes that. I've also improved our documentation. I've also deleted a bunch of dead code. I've also tidied up some dodgy native string handling. Change-Id: Iad69ebb3459d9cc4c4ff37b255d458b83fe40132
author: Elliott Hughes <enh@google.com> 2010-04-02 17:19:21 -0700
committer: Elliott Hughes <enh@google.com> 2010-04-02 17:58:45 -0700
commit: ccb8b92211a3e87acaf6486c8d4423c2053b8b5e (patch)
tree: 5898c2d9793dcf05f83192c17183f09e13b8920a /icu/src/main
parent: 3604384c5f53c83383ce85f838901e46b0105e5e (diff)
download: libcore-ccb8b92211a3e87acaf6486c8d4423c2053b8b5e.zip
libcore-ccb8b92211a3e87acaf6486c8d4423c2053b8b5e.tar.gz
libcore-ccb8b92211a3e87acaf6486c8d4423c2053b8b5e.tar.bz2
4 files changed, 146 insertions, 692 deletions
diff --git a/icu/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java b/icu/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java
index 155f966..fe0f920 100644
--- a/icu/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java
+++ b/icu/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java
@@ -5,7 +5,7 @@
 *******************************************************************************
 *
 *******************************************************************************
-*/ 
+*/
 
 package com.ibm.icu4jni.charset;
 
@@ -36,52 +36,52 @@ public final class CharsetICU extends Charset {
         long converterHandle = NativeConverter.openConverter(icuCanonicalName);
         return new CharsetDecoderICU(this, converterHandle);
     }
-    
+
     // hardCoded list of replacement bytes
-    private static final Map subByteMap = new HashMap();
-    static{
-        subByteMap.put("UTF-32",new byte[]{0x00, 0x00, (byte)0xfe, (byte)0xff});
-        subByteMap.put("ibm-16684_P110-2003",new byte[]{0x40, 0x40}); // make \u3000 the sub char
-        subByteMap.put("ibm-971_P100-1995",new byte[]{(byte)0xa1, (byte)0xa1}); // make \u3000 the sub char
+    private static final Map<String, byte[]> subByteMap = new HashMap<String, byte[]>();
+    static {
+        subByteMap.put("UTF-32", new byte[]{0x00, 0x00, (byte)0xfe, (byte)0xff});
+        subByteMap.put("ibm-16684_P110-2003", new byte[]{0x40, 0x40}); // make \u3000 the sub char
+        subByteMap.put("ibm-971_P100-1995", new byte[]{(byte)0xa1, (byte)0xa1}); // make \u3000 the sub char
     }
     /**
      * Returns a new encoder object of the charset
      * @return a new encoder
      * @stable ICU 2.4
      */
-    public CharsetEncoder newEncoder(){
+    public CharsetEncoder newEncoder() {
         // the arrays are locals and not
         // instance variables since the
-        // methods on this class need to 
+        // methods on this class need to
         // be thread safe
         long converterHandle = NativeConverter.openConverter(icuCanonicalName);
-        
+
         //According to the contract all converters should have non-empty replacement
         byte[] replacement = NativeConverter.getSubstitutionBytes(converterHandle);
 
-       try{
+       try {
             return new CharsetEncoderICU(this,converterHandle, replacement);
-        }catch(IllegalArgumentException ex){
+        } catch (IllegalArgumentException ex) {
             // work around for the non-sensical check in the nio API that
             // a substitution character must be mappable while decoding!!
-            replacement = (byte[])subByteMap.get(icuCanonicalName);
-            if(replacement==null){
+            replacement = subByteMap.get(icuCanonicalName);
+            if (replacement == null) {
                 replacement = new byte[NativeConverter.getMinBytesPerChar(converterHandle)];
-                for(int i=0; i<replacement.length; i++){
+                for(int i = 0; i < replacement.length; ++i) {
                     replacement[i]= 0x3f;
                 }
             }
             NativeConverter.setSubstitutionBytes(converterHandle, replacement, replacement.length);
             return new CharsetEncoderICU(this,converterHandle, replacement);
         }
-    } 
-    
+    }
+
     /**
      * Ascertains if a charset is a sub set of this charset
      * @param cs charset to test
      * @return true if the given charset is a subset of this charset
      * @stable ICU 2.4
-     * 
+     *
      * //CSDL: major changes by Jack
      */
     public boolean contains(Charset cs){
@@ -90,7 +90,7 @@ public final class CharsetICU extends Charset {
         } else if (this.equals(cs)) {
             return true;
         }
-        
+
         long converterHandle1 = 0;
         long converterHandle2 = 0;
 
diff --git a/icu/src/main/java/com/ibm/icu4jni/charset/CharsetProviderICU.java b/icu/src/main/java/com/ibm/icu4jni/charset/CharsetProviderICU.java
deleted file mode 100644
index 0479223..0000000
--- a/icu/src/main/java/com/ibm/icu4jni/charset/CharsetProviderICU.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/**
-*******************************************************************************
-* Copyright (C) 1996-2005, International Business Machines Corporation and    *
-* others. All Rights Reserved.                                                *
-*******************************************************************************
-*
-*******************************************************************************
-*/ 
-
-package com.ibm.icu4jni.charset;
-
-import java.nio.charset.Charset;
-import java.nio.charset.spi.CharsetProvider;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.SortedMap;
-import java.util.TreeMap;
-
-public final class CharsetProviderICU extends CharsetProvider {
-    public CharsetProviderICU() {
-    }
-
-    @Override
-    public Charset charsetForName(String charsetName) {
-        return NativeConverter.charsetForName(charsetName);
-    }
-
-    @Override
-    public Iterator<Charset> charsets() {
-        ArrayList<Charset> result = new ArrayList<Charset>();
-        for (String charsetName : NativeConverter.getAvailable()) {
-            result.add(charsetForName(charsetName));
-        }
-        return result.iterator();
-    }
-
-    /**
-     * Implements Charset.availableCharsets.
-     */
-    public SortedMap<String, Charset> initAvailableCharsets() {
-        SortedMap<String, Charset> result =
-                new TreeMap<String, Charset>(String.CASE_INSENSITIVE_ORDER);
-        for (String charset : NativeConverter.getAvailable()) {
-            if (!result.containsKey(charset)) {
-                result.put(charset, charsetForName(charset));
-            }
-        }
-        return result;
-    }
-}
diff --git a/icu/src/main/java/com/ibm/icu4jni/charset/NativeConverter.java b/icu/src/main/java/com/ibm/icu4jni/charset/NativeConverter.java
index eefe3d5..6a97c27 100644
--- a/icu/src/main/java/com/ibm/icu4jni/charset/NativeConverter.java
+++ b/icu/src/main/java/com/ibm/icu4jni/charset/NativeConverter.java
@@ -5,28 +5,21 @@
 *******************************************************************************
 *
 *******************************************************************************
-*/ 
+*/
 
 package com.ibm.icu4jni.charset;
 
 import java.nio.charset.Charset;
 
-/**
- * Class for accessing the underlying JNI methods
- * @internal ICU 2.4
- */
-final class NativeConverter {
-  
-    //Native methods
-    
+public final class NativeConverter {
     /**
      * Converts an array of bytes containing characters in an external
      * encoding into an array of Unicode characters.  This  method allows
-     * a buffer by buffer conversion of a data stream.  The state of the
-     * conversion is saved between calls to convert.  Among other things,
+     * buffer-by-buffer conversion of a data stream.  The state of the
+     * conversion is saved between calls.  Among other things,
      * this means multibyte input sequences can be split between calls.
-     * If a call to convert results in an Error, the conversion may be
-     * continued by calling convert again with suitably modified parameters.
+     * If a call to results in an error, the conversion may be
+     * continued by calling this method again with suitably modified parameters.
      * All conversions should be finished with a call to the flush method.
      *
      * @param converterHandle Address of converter object created by C code
@@ -34,78 +27,22 @@ final class NativeConverter {
      * @param inEnd stop conversion at this offset in input array (exclusive).
      * @param output character array to receive conversion result.
      * @param outEnd stop writing to output array at this offset (exclusive).
-     * @param data integer array containing the following data    
+     * @param data integer array containing the following data
      *        data[0] = inputOffset
      *        data[1] = outputOffset
      * @return int error code returned by ICU
      * @internal ICU 2.4
      */
-     
-    public static final native int convertByteToChar( long converterHandle,
-                                   byte[] input, int inEnd,
-                                   char[] output, int outEnd,
-                                   int[] data,
-                                   boolean flush);
-    /**
-     * Converts an array of bytes containing characters in an external
-     * encoding into an array of Unicode characters.  This  method allows
-     * a buffer by buffer conversion of a data stream.  The state of the
-     * conversion is saved between calls to convert.  Among other things,
-     * this means multibyte input sequences can be split between calls.
-     * If a call to convert results in an Error, the conversion may be
-     * continued by calling convert again with suitably modified parameters.
-     * All conversions should be finished with a call to the flush method.
-     *
-     * @param converterHandle Address of converter object created by C code
-     * @param input byte array containing text to be converted.
-     * @param inEnd stop conversion at this offset in input array (exclusive).
-     * @param output character array to receive conversion result.
-     * @param outEnd stop writing to output array at this offset (exclusive).
-     * @param data integer array containing the following data    
-     *        data[0] = inputOffset
-     *        data[1] = outputOffset
-     * @return int error code returned by ICU
-     * @internal ICU 2.4
-     */
-    public static final native int decode( long converterHandle,
-                                   byte[] input, int inEnd,
-                                   char[] output, int outEnd,
-                                   int[] data,
-                                   boolean flush);
-    /**
-     * Converts an array of Unicode chars containing characters in an 
-     * external encoding into an array of bytes.  This  method allows
-     * a buffer by buffer conversion of a data stream.  The state of the
-     * conversion is saved between calls to convert.  Among other things,
-     * this means multibyte input sequences can be split between calls.
-     * If a call to convert results in an Error, the conversion may be
-     * continued by calling convert again with suitably modified parameters.
-     * All conversions should be finished with a call to the flush method.
-     *
-     * @param converterHandle Address of converter object created by C code
-     * @param input char array containing text to be converted.
-     * @param inEnd stop conversion at this offset in input array (exclusive).
-     * @param output byte array to receive conversion result.
-     * @param outEnd stop writing to output array at this offset (exclusive).
-     * @param data integer array containing the following data    
-     *        data[0] = inputOffset
-     *        data[1] = outputOffset
-     * @return int error code returned by ICU
-     * @internal ICU 2.4
-     */                             
-    public static final native int convertCharToByte(long converterHandle,
-                                   char[] input, int inEnd,
-                                   byte[] output, int outEnd,
-                                   int[] data,
-                                   boolean flush); 
+    public static native int decode(long converterHandle, byte[] input, int inEnd,
+            char[] output, int outEnd, int[] data, boolean flush);
+
     /**
-     * Converts an array of Unicode chars containing characters in an 
-     * external encoding into an array of bytes.  This  method allows
-     * a buffer by buffer conversion of a data stream.  The state of the
+     * Converts an array of Unicode chars to an array of bytes in an external encoding.
+     * This  method allows a buffer by buffer conversion of a data stream.  The state of the
      * conversion is saved between calls to convert.  Among other things,
      * this means multibyte input sequences can be split between calls.
-     * If a call to convert results in an Error, the conversion may be
-     * continued by calling convert again with suitably modified parameters.
+     * If a call results in an error, the conversion may be
+     * continued by calling this method again with suitably modified parameters.
      * All conversions should be finished with a call to the flush method.
      *
      * @param converterHandle Address of converter object created by C code
@@ -113,283 +50,71 @@ final class NativeConverter {
      * @param inEnd stop conversion at this offset in input array (exclusive).
      * @param output byte array to receive conversion result.
      * @param outEnd stop writing to output array at this offset (exclusive).
-     * @param data integer array containing the following data    
+     * @param data integer array containing the following data
      *        data[0] = inputOffset
      *        data[1] = outputOffset
      * @return int error code returned by ICU
      * @internal ICU 2.4
-     */                                     
-    public static final native int encode(long converterHandle,
-                                   char[] input, int inEnd,
-                                   byte[] output, int outEnd,
-                                   int[] data,
-                                   boolean flush);
+     */
+    public static native int encode(long converterHandle, char[] input, int inEnd,
+            byte[] output, int outEnd, int[] data, boolean flush);
+
     /**
      * Writes any remaining output to the output buffer and resets the
-     * converter to its initial state. 
+     * converter to its initial state.
      *
      * @param converterHandle Address of converter object created by C code
      * @param output byte array to receive flushed output.
      * @param outEnd stop writing to output array at this offset (exclusive).
      * @return int error code returned by ICU
-     * @param data integer array containing the following data    
+     * @param data integer array containing the following data
      *        data[0] = inputOffset
      *        data[1] = outputOffset
      * @internal ICU 2.4
-     */ 
-    public static final native int flushCharToByte(long converterHandle,
-                                   byte[] output, 
-                                   int outEnd, 
-                                   int[] data);
+     */
+    public static native int flushCharToByte(long converterHandle, byte[] output, int outEnd, int[] data);
+
     /**
      * Writes any remaining output to the output buffer and resets the
-     * converter to its initial state. 
+     * converter to its initial state.
      *
      * @param converterHandle Address of converter object created by the native code
      * @param output char array to receive flushed output.
      * @param outEnd stop writing to output array at this offset (exclusive).
      * @return int error code returned by ICU
-     * @param data integer array containing the following data    
+     * @param data integer array containing the following data
      *        data[0] = inputOffset
      *        data[1] = outputOffset
      * @internal ICU 2.4
-     */     
-    public static final native int flushByteToChar(long converterHandle,
-                                   char[] output,  
-                                   int outEnd, 
-                                   int[] data);
-    
-    /**
-     * Open the converter with the specified encoding
-     *
-     * @param converterHandle long array for recieving the adress of converter object
-     *        created by the native code
-     * @param encoding string representing encoding
-     * @return int error code returned by ICU
-     * @internal ICU 2.4
-     */
-    public static final native long openConverter(String encoding);
-    /**
-     * Resets the ByteToChar (toUnicode) state of specified converter 
-     *
-     * @param converterHandle Address of converter object created by the native code
-     * @internal ICU 2.4
-     */
-    public static final native void resetByteToChar(long  converterHandle);
-    
-    /**
-     * Resets the CharToByte (fromUnicode) state of specified converter 
-     *
-     * @param converterHandle Address of converter object created by the native code
-     * @internal ICU 2.4
-     */
-    public static final native void resetCharToByte(long  converterHandle);
-    
-    /**
-     * Closes the specified converter and releases the resources
-     *
-     * @param converterHandle Address of converter object created by the native code
-     * @internal ICU 2.4
-     */
-    public static final native void closeConverter(long converterHandle);
-    
-    /**
-     * Sets the substitution Unicode chars of the specified converter used
-     * by encoder
-     * @param converterHandle Address of converter object created by the native code
-     * @param subChars array of chars to used for substitution
-     * @param length length of the array 
-     * @return int error code returned by ICU
-     * @internal ICU 2.4
-     */    
-    public static final native int setSubstitutionChars( long converterHandle,
-                                   char[] subChars,int length); 
-    /**
-     * Sets the substitution bytes of the specified converter used by decoder
-     *
-     * @param converterHandle Address of converter object created by the native code
-     * @param subChars array of bytes to used for substitution
-     * @param length length of the array 
-     * @return int error code returned by ICU
-     * @internal ICU 2.4
-     */    
-    public static final native int setSubstitutionBytes( long converterHandle,
-                                   byte[] subChars,int length);
-    /**
-     * Sets the substitution mode of CharToByte(fromUnicode) for the specified converter 
-     *
-     * @param converterHandle Address of converter object created by the native code
-     * @param mode to set the true/false
-     * @return int error code returned by ICU
-     * @internal ICU 2.4
-     */  
-    public static final native int setSubstitutionModeCharToByte(long converterHandle, 
-                                   boolean mode);
-    /**
-     * Sets the substitution mode of CharToByte(fromUnicode) for the specified converter 
-     *
-     * @param converterHandle Address of converter object created by the native code
-     * @param mode to set the true/false
-     * @return int error code returned by ICU
-     * @internal ICU 3.6
-     */  
-    public static final native int setSubstitutionModeByteToChar(long converterHandle, 
-                                   boolean mode);
-    /**
-     * Gets the numnber of invalid bytes in the specified converter object 
-     * for the last error that has occured
-     *
-     * @param converterHandle Address of converter object created by the native code
-     * @param length array of int to recieve length of the array 
-     * @return int error code returned by ICU
-     * @internal ICU 2.4
-     */
-    public static final native int countInvalidBytes(long converterHandle, int[] length);
-    
-    /**
-     * Gets the numnber of invalid chars in the specified converter object 
-     * for the last error that has occured
-     *
-     * @param converterHandle Address of converter object created by the native code
-     * @param length array of int to recieve length of the array 
-     * @return int error code returned by ICU
-     * @internal ICU 2.4
-     */   
-    public static final native int countInvalidChars(long converterHandle, int[] length);
-    
-    /**
-     * Gets the number of bytes needed for converting a char
-     *
-     * @param converterHandle Address of converter object created by the native code
-     * @return number of bytes needed
-     * @internal ICU 2.4
-     */ 
-    public static final native int getMaxBytesPerChar(long converterHandle);
-    
-    /**
-     * Gets the number of bytes needed for converting a char
-     *
-     * @param converterHandle Address of converter object created by the native code
-     * @return number of bytes needed
-     * @internal ICU 3.2
-     */ 
-    public static final native int getMinBytesPerChar(long converterHandle);
-    
-    /**
-     * Gets the average numnber of bytes needed for converting a char
-     *
-     * @param converterHandle Address of converter object created by the native code
-     * @return number of bytes needed
-     * @internal ICU 2.4
-     */ 
-    public static final native float getAveBytesPerChar(long converterHandle);
-   
-    /**
-     * Gets the number of chars needed for converting a byte
-     *
-     * @param converterHandle Address of converter object created by the native code
-     * @return number of bytes needed
-     * @internal ICU 2.4
-     */ 
-    public static final native int getMaxCharsPerByte(long converterHandle);
-   
-    /**
-     * Gets the average numnber of chars needed for converting a byte
-     *
-     * @param converterHandle Address of converter object created by the native code
-     * @return number of bytes needed
-     * @internal ICU 2.4
-     */ 
-    public static final native float getAveCharsPerByte(long converterHandle);
-    
-    //CSDL: added by Jack
-    /**
-     * Determines whether charset1 contains charset2.
-     */
-    public static final native boolean contains(long converterHandle1, long converterHandle2);
-    
-    public static final native byte[] getSubstitutionBytes(long converterHandle);
-    
-    /**
-     * Ascertains if a given Unicode code unit can 
-     * be converted to the target encoding
-     * @param converterHandle Address of converter object created by the native code
-     * @param  codeUnit the character to be converted
-     * @return true if a character can be converted
-     * @internal ICU 2.4
-     * 
-     */
-    public static final native boolean canEncode(long converterHandle,int codeUnit);
-    
-    /**
-     * Ascertains if a given a byte sequence can be converted to Unicode
-     * @param converterHandle Address of converter object created by the native code
-     * @param  bytes the bytes to be converted
-     * @return true if a character can be converted
-     * @internal ICU 2.4
-     * 
      */
-    public static final native boolean canDecode(long converterHandle,byte[] bytes);
-    
-    /**
-     * Gets the canonical names of available converters 
-     * @return Object[] names as an object array
-     * @internal ICU 2.4
-     */
-    public static final native String[] getAvailable();
+    public static native int flushByteToChar(long converterHandle, char[] output,  int outEnd, int[] data);
 
-    public static final native Charset charsetForName(String charsetName);
+    public static native long openConverter(String encoding);
+    public static native void closeConverter(long converterHandle);
+
+    public static native void resetByteToChar(long  converterHandle);
+    public static native void resetCharToByte(long  converterHandle);
+
+    public static native int setSubstitutionChars(long converterHandle, char[] subChars,int length);
+    public static native int setSubstitutionBytes(long converterHandle, byte[] subChars,int length);
+    public static native byte[] getSubstitutionBytes(long converterHandle);
+
+    public static native int getMaxBytesPerChar(long converterHandle);
+    public static native int getMinBytesPerChar(long converterHandle);
+    public static native float getAveBytesPerChar(long converterHandle);
+    public static native int getMaxCharsPerByte(long converterHandle);
+    public static native float getAveCharsPerByte(long converterHandle);
+
+    public static native boolean contains(long converterHandle1, long converterHandle2);
+
+    public static native boolean canEncode(long converterHandle, int codeUnit);
+
+    public static native String[] getAvailableCharsetNames();
+    public static native Charset charsetForName(String charsetName);
 
-    /**
-     * Gets the number of aliases for a converter name
-     * @param enc encoding name
-     * @return number of aliases for the converter
-     * @internal ICU 2.4
-     */
-    public static final native int countAliases(String enc);
-    
-    /** 
-     * Gets the aliases associated with the converter name
-     * @param enc converter name
-     * @return converter names as elements in an object array
-     * @internal ICU 2.4
-     */
-    public static final native String[] getAliases(String enc);
-    
-    /**
-     * Sets the callback to Unicode for ICU conveter. The default behaviour of ICU callback
-     * is to call the specified callback function for both illegal and unmapped sequences.
-     * @param converterHandle Adress of the converter object created by native code
-     * @param mode call back mode to set. This is either STOP_CALLBACK, SKIP_CALLBACK or SUBSTITUE_CALLBACK
-     *        The converter performs the specified callback when an error occurs
-     * @param stopOnIllegal If true sets the alerts the converter callback to stop on an illegal sequence
-     * @return int error code returned by ICU
-     * @internal ICU 2.4
-     */
-    public static final native int setCallbackDecode(long converterHandle, int onMalformedInput, int onUnmappableInput, char[] subChars, int length);
-   
-    /**
-     * Sets the callback from Unicode for ICU conveter. The default behaviour of ICU callback
-     * is to call the specified callback function for both illegal and unmapped sequences.
-     * @param converterHandle Adress of the converter object created by native code
-     * @param mode call back mode to set. This is either STOP_CALLBACK, SKIP_CALLBACK or SUBSTITUE_CALLBACK
-     *        The converter performs the specified callback when an error occurs
-     * @param stopOnIllegal If true sets the alerts the converter callback to stop on an illegal sequence
-     * @return int error code returned by ICU
-     * @internal ICU 2.4
-     */
-    public static final native int setCallbackEncode(long converterHandle, int onMalformedInput, int onUnmappableInput, byte[] subBytes, int length);
-    
-    /**
-     * Returns a thread safe clone of the converter
-     * @internal ICU 2.4
-     */
-    public static final native long safeClone(long converterHandle);
-    
-    /** @internal ICU 2.4 */
     public static final int STOP_CALLBACK = 0;//CodingErrorAction.REPORT
-    /** @internal ICU 2.4 */
     public static final int SKIP_CALLBACK = 1;//CodingErrorAction.IGNORE
-    /** @internal ICU 2.4 */
     public static final int SUBSTITUTE_CALLBACK = 2;//CodingErrorAction.REPLACE
+    public static native int setCallbackDecode(long converterHandle, int onMalformedInput, int onUnmappableInput, char[] subChars, int length);
+    public static native int setCallbackEncode(long converterHandle, int onMalformedInput, int onUnmappableInput, byte[] subBytes, int length);
 }
diff --git a/icu/src/main/native/NativeConverter.cpp b/icu/src/main/native/NativeConverter.cpp
index ebb035f..1bb9ac8 100644
--- a/icu/src/main/native/NativeConverter.cpp
+++ b/icu/src/main/native/NativeConverter.cpp
@@ -15,15 +15,16 @@
  * @author: Ram Viswanadha
  */
 
-#include "JNIHelp.h"
 #include "AndroidSystemNatives.h"
-#include "ScopedUtfChars.h"
-#include "unicode/utypes.h"   /* Basic ICU data types */
-#include "unicode/ucnv.h"     /* C   Converter API    */
-#include "unicode/ustring.h"  /* some more string functions*/
-#include "unicode/ucnv_cb.h"  /* for callback functions */
-#include "unicode/uset.h"     /* for contains function */
 #include "ErrorCode.h"
+#include "JNIHelp.h"
+#include "ScopedUtfChars.h"
+#include "UniquePtr.h"
+#include "unicode/ucnv.h"
+#include "unicode/ucnv_cb.h"
+#include "unicode/uset.h"
+#include "unicode/ustring.h"
+#include "unicode/utypes.h"
 #include <stdlib.h>
 #include <string.h>
 
@@ -74,92 +75,6 @@ static void closeConverter(JNIEnv* env, jclass, jlong handle) {
 }
 
 /**
- * Sets the substution mode for from Unicode conversion. Currently only 
- * two modes are supported: substitute or report
- * @param env environment handle for JNI 
- * @param jClass handle for the class
- * @param handle address of ICU converter
- * @param mode the mode to set 
- */
-static jint setSubstitutionModeCharToByte (JNIEnv *env, jclass, jlong handle, jboolean mode) {
-    
-    UConverter* conv = (UConverter*)(long)handle;
-    UErrorCode errorCode =U_ZERO_ERROR;
-
-    if(conv) {
-        
-        UConverterFromUCallback fromUOldAction ;
-        void* fromUOldContext;
-        void* fromUNewContext=NULL;
-        if(mode) {
-
-            ucnv_setFromUCallBack(conv,
-               UCNV_FROM_U_CALLBACK_SUBSTITUTE,
-               fromUNewContext,
-               &fromUOldAction,
-               (const void**)&fromUOldContext,
-               &errorCode);
-
-        }
-        else{
-
-            ucnv_setFromUCallBack(conv,
-               UCNV_FROM_U_CALLBACK_STOP,
-               fromUNewContext,
-               &fromUOldAction,
-               (const void**)&fromUOldContext,
-               &errorCode);
-         
-        }
-        return errorCode;
-    }
-    errorCode = U_ILLEGAL_ARGUMENT_ERROR;
-    return errorCode;
-}
-/**
- * Sets the substution mode for to Unicode conversion. Currently only 
- * two modes are supported: substitute or report
- * @param env environment handle for JNI 
- * @param jClass handle for the class
- * @param handle address of ICU converter
- * @param mode the mode to set 
- */
-static jint setSubstitutionModeByteToChar (JNIEnv *env, jclass, jlong handle, jboolean mode) {
-    
-    UConverter* conv = (UConverter*)handle;
-    UErrorCode errorCode =U_ZERO_ERROR;
-
-    if(conv) {
-        
-        UConverterToUCallback toUOldAction ;
-        void* toUOldContext;
-        void* toUNewContext=NULL;
-        if(mode) {
-
-            ucnv_setToUCallBack(conv,
-               UCNV_TO_U_CALLBACK_SUBSTITUTE,
-               toUNewContext,
-               &toUOldAction,
-               (const void**)&toUOldContext,
-               &errorCode);
-
-        }
-        else{
-
-            ucnv_setToUCallBack(conv,
-               UCNV_TO_U_CALLBACK_STOP,
-               toUNewContext,
-               &toUOldAction,
-               (const void**)&toUOldContext,
-               &errorCode);
-         
-        }
-        return errorCode;
-    }
-    errorCode = U_ILLEGAL_ARGUMENT_ERROR;
-    return errorCode;
-}
-/**
  * Converts a buffer of Unicode code units to target encoding 
  * @param env environment handle for JNI 
  * @param jClass handle for the class
@@ -342,38 +257,6 @@ static void resetCharToByte(JNIEnv* env, jclass, jlong handle) {
     }
 }
 
-static jint countInvalidBytes (JNIEnv *env, jclass, jlong handle, jintArray length) {
-    UConverter* cnv = (UConverter*)handle;
-    if (!cnv) {
-        return U_ILLEGAL_ARGUMENT_ERROR;
-    }
-
-    UErrorCode errorCode = U_ZERO_ERROR;
-    jint* len = (jint*) env->GetPrimitiveArrayCritical(length, NULL);
-    if (len) {
-        char invalidChars[32];
-        ucnv_getInvalidChars(cnv,invalidChars,(int8_t*)len,&errorCode);
-    }
-    env->ReleasePrimitiveArrayCritical(length,(jint*)len,0);
-    return errorCode;
-}
-
-static jint countInvalidChars(JNIEnv *env, jclass, jlong handle, jintArray length) {
-    UConverter* cnv = (UConverter*)handle;
-    if (!cnv) {
-        return U_ILLEGAL_ARGUMENT_ERROR;
-    }
-
-    UErrorCode errorCode =U_ZERO_ERROR;
-    jint* len = (jint*) env->GetPrimitiveArrayCritical(length, NULL);
-    if (len) {
-        UChar invalidUChars[32];
-        ucnv_getInvalidUChars(cnv,invalidUChars,(int8_t*)len,&errorCode);
-    }
-    env->ReleasePrimitiveArrayCritical(length,(jint*)len,0);
-    return errorCode;
-}
-
 static jint getMaxBytesPerChar(JNIEnv *env, jclass, jlong handle) {
     UConverter* cnv = (UConverter*)handle;
     return (cnv != NULL) ? ucnv_getMaxCharSize(cnv) : -1;
@@ -477,30 +360,26 @@ static jint flushCharToByte (JNIEnv *env, jclass, jlong handle, jbyteArray targe
 }
 
 static void toChars(const UChar* us, char* cs, int32_t length) {
-    UChar u;
-    while(length>0) {
-        u=*us++;
+    while (length > 0) {
+        UChar u = *us++;
         *cs++=(char)u;
         --length;
     }
 }
 static jint setSubstitutionBytes(JNIEnv *env, jclass, jlong handle, jbyteArray subChars, jint length) {
-
     UConverter* cnv = (UConverter*) handle;
     UErrorCode errorCode = U_ZERO_ERROR;
-    if(cnv) {
+    if (cnv) {
         jbyte* u_subChars = reinterpret_cast<jbyte*>(env->GetPrimitiveArrayCritical(subChars, NULL));
-        if(u_subChars) {
-            char* mySubChars = new char[length];
-             toChars((UChar*)u_subChars,&mySubChars[0],length);
-             ucnv_setSubstChars(cnv,mySubChars, (char)length,&errorCode);
-             if(U_FAILURE(errorCode)) {
+        if (u_subChars) {
+            char mySubChars[length];
+            toChars((UChar*)u_subChars,&mySubChars[0],length);
+            ucnv_setSubstChars(cnv,mySubChars, (char)length,&errorCode);
+            if(U_FAILURE(errorCode)) {
                 env->ReleasePrimitiveArrayCritical(subChars,mySubChars,0);
                 return errorCode;
-             }
-             delete[] mySubChars;
-        }
-        else{   
+            }
+        } else{
            errorCode =  U_ILLEGAL_ARGUMENT_ERROR;
         }
         env->ReleasePrimitiveArrayCritical(subChars,u_subChars,0);
@@ -630,128 +509,53 @@ static jboolean canEncode(JNIEnv *env, jclass, jlong handle, jint codeUnit) {
     return (jboolean)FALSE;
 }
 
+/*
+ * If a charset listed in the IANA Charset Registry is supported by an implementation
+ * of the Java platform then its canonical name must be the name listed in the registry.
+ * Many charsets are given more than one name in the registry, in which case the registry
+ * identifies one of the names as MIME-preferred. If a charset has more than one registry
+ * name then its canonical name must be the MIME-preferred name and the other names in
+ * the registry must be valid aliases. If a supported charset is not listed in the IANA
+ * registry then its canonical name must begin with one of the strings "X-" or "x-".
+ */
+static jstring getJavaCanonicalName(JNIEnv *env, const char* icuCanonicalName) {
+    UErrorCode status = U_ZERO_ERROR;
 
-static jboolean canDecode(JNIEnv *env, jclass, jlong handle, jbyteArray source) {
-    
-    UErrorCode errorCode =U_ZERO_ERROR;
-    UConverter* cnv = (UConverter*)handle;
-    if(cnv) {
-        jint len = env->GetArrayLength(source);
-        jbyte* cSource =(jbyte*) env->GetPrimitiveArrayCritical(source, NULL);
-        if(cSource) {
-            const char* cSourceLimit = reinterpret_cast<const char*>(cSource+len);
-
-            /* Assume that we need at most twice the length of source */
-            UChar* target = (UChar*) malloc(sizeof(UChar)* (len<<1));
-            UChar* targetLimit = target + (len<<1);
-            if(target) {
-                ucnv_toUnicode(cnv,&target,targetLimit, (const char**)&cSource,
-                        cSourceLimit,NULL, TRUE,&errorCode);
-
-                if(U_SUCCESS(errorCode)) {
-                    free(target);
-                    env->ReleasePrimitiveArrayCritical(source,cSource,0);
-                    return (jboolean)TRUE;
-                }
-            }
-            free(target);
-        }
-        env->ReleasePrimitiveArrayCritical(source,cSource,0);
-    }
-    return (jboolean)FALSE;
-}
-
-static int32_t copyString(char* dest, int32_t destCapacity, int32_t startIndex,
-           const char* src, UErrorCode* status) {
-    int32_t srcLen = 0, i=0;
-    if(U_FAILURE(*status)) {
-        return 0;
-    }
-    if(dest == NULL || src == NULL || destCapacity < startIndex) { 
-        *status = U_ILLEGAL_ARGUMENT_ERROR;
-        return 0;
-    }
-    srcLen = strlen(src);
-    if(srcLen >= destCapacity) {
-        *status = U_BUFFER_OVERFLOW_ERROR;
-        return 0;
-    }
-    for(i=0; i < srcLen; i++) {
-        dest[startIndex++] = src[i];
-    }
-    /* null terminate the buffer */
-    dest[startIndex] = 0; /* no bounds check already made sure that we have enough room */
-    return startIndex;
-}
-
-static int32_t getJavaCanonicalName1(const char* icuCanonicalName,
-                     char* canonicalName, int32_t capacity, 
-                     UErrorCode* status) {
-    int32_t retLen = 0;
+    // Check to see if this is a well-known MIME or IANA name.
     const char* cName = NULL;
-    /* find out the alias with MIME tag */
-    if((cName =ucnv_getStandardName(icuCanonicalName, "MIME", status)) !=  NULL) {
-        retLen = copyString(canonicalName, capacity, 0, cName, status);
-        /* find out the alias with IANA tag */
-    }else if((cName =ucnv_getStandardName(icuCanonicalName, "IANA", status)) !=  NULL) {
-        retLen = copyString(canonicalName, capacity, 0, cName, status);
-    }else {
-        /*  
-            check to see if an alias already exists with x- prefix, if yes then 
-            make that the canonical name
-        */
-        int32_t aliasCount = ucnv_countAliases(icuCanonicalName,status);
-        int32_t i=0;
-        const char* name;
-        for(i=0;i<aliasCount;i++) {
-            name = ucnv_getAlias(icuCanonicalName,(uint16_t)i, status);
-            if(name != NULL && name[0]=='x' && name[1]=='-') {
-                retLen = copyString(canonicalName, capacity, 0, name, status);
-                break;
-            }
-        }
-        /* last resort just append x- to any of the alias and 
-            make it the canonical name */
-        if(retLen == 0 && U_SUCCESS(*status)) {
-            name = ucnv_getStandardName(icuCanonicalName, "UTR22", status);
-            if(name == NULL && strchr(icuCanonicalName, ',')!= NULL) {
-                name = ucnv_getAlias(icuCanonicalName, 1, status);
-                if(*status == U_INDEX_OUTOFBOUNDS_ERROR) {
-                    *status = U_ZERO_ERROR;
-                }
-            }
-            /* if there is no UTR22 canonical name .. then just return itself*/
-            if(name == NULL) {                
-                name = icuCanonicalName;
-            }
-            if(capacity >= 2) {
-                strcpy(canonicalName,"x-");
-            }
-            retLen = copyString(canonicalName, capacity, 2, name, status);
+    if ((cName = ucnv_getStandardName(icuCanonicalName, "MIME", &status)) != NULL) {
+        return env->NewStringUTF(cName);
+    } else if ((cName = ucnv_getStandardName(icuCanonicalName, "IANA", &status)) != NULL) {
+        return env->NewStringUTF(cName);
+    }
+
+    // Check to see if an alias already exists with "x-" prefix, if yes then
+    // make that the canonical name.
+    int32_t aliasCount = ucnv_countAliases(icuCanonicalName, &status);
+    for (int i = 0; i < aliasCount; ++i) {
+        const char* name = ucnv_getAlias(icuCanonicalName, i, &status);
+        if (name != NULL && name[0] == 'x' && name[1] == '-') {
+            return env->NewStringUTF(name);
         }
     }
-    return retLen;
-}
 
-static jstring getJavaCanonicalName(JNIEnv *env, const char* icuCanonicalName) {
-    /*
-     * If a charset listed in the IANA Charset Registry is supported by an implementation
-     * of the Java platform then its canonical name must be the name listed in the registry.
-     * Many charsets are given more than one name in the registry, in which case the registry
-     * identifies one of the names as MIME-preferred. If a charset has more than one registry
-     * name then its canonical name must be the MIME-preferred name and the other names in
-     * the registry must be valid aliases. If a supported charset is not listed in the IANA
-     * registry then its canonical name must begin with one of the strings "X-" or "x-".
-     */
-    UErrorCode error = U_ZERO_ERROR;
-    char cName[UCNV_MAX_CONVERTER_NAME_LENGTH] = {0};
-    if (icuCanonicalName[0] != 0) {
-        getJavaCanonicalName1(icuCanonicalName, cName, UCNV_MAX_CONVERTER_NAME_LENGTH, &error);
+    // As a last resort, prepend "x-" to any alias and make that the canonical name.
+    status = U_ZERO_ERROR;
+    const char* name = ucnv_getStandardName(icuCanonicalName, "UTR22", &status);
+    if (name == NULL && strchr(icuCanonicalName, ',') != NULL) {
+        name = ucnv_getAlias(icuCanonicalName, 1, &status);
     }
-    return env->NewStringUTF(cName);
+    // If there is no UTR22 canonical name then just return the original name.
+    if (name == NULL) {
+        name = icuCanonicalName;
+    }
+    UniquePtr<char[]> result(new char[2 + strlen(name) + 1]);
+    strcpy(&result[0], "x-");
+    strcat(&result[0], name);
+    return env->NewStringUTF(&result[0]);
 }
 
-static jobjectArray getAvailable(JNIEnv *env, jclass) {
+static jobjectArray getAvailableCharsetNames(JNIEnv *env, jclass) {
     int32_t num = ucnv_countAvailable();
     jobjectArray result = env->NewObjectArray(num, env->FindClass("java/lang/String"), NULL);
     for (int i = 0; i < num; ++i) {
@@ -763,12 +567,6 @@ static jobjectArray getAvailable(JNIEnv *env, jclass) {
     return result;
 }
 
-static jint countAliases(JNIEnv *env, jclass, jstring enc) {
-    ScopedUtfChars encChars(env, enc);
-    UErrorCode error = U_ZERO_ERROR;
-    return encChars.data() ? ucnv_countAliases(encChars.data(), &error) : 0;
-}
-
 static jobjectArray getAliases(JNIEnv* env, const char* icuCanonicalName) {
     // Get an upper bound on the number of aliases...
     const char* myEncName = icuCanonicalName;
@@ -872,10 +670,11 @@ static void CHARSET_ENCODER_CALLBACK(const void *context,
                     *status = U_ILLEGAL_ARGUMENT_ERROR;
                     return;
             }
-            if(realCB==NULL) {
+            if (realCB == NULL) {
                 *status = U_INTERNAL_PROGRAM_ERROR;
+            } else {
+                realCB(context, fromArgs, codeUnits, length, codePoint, reason, status);
             }
-            realCB(context, fromArgs, codeUnits, length, codePoint, reason, status);
         }
     }      
 }
@@ -1027,10 +826,11 @@ static void CHARSET_DECODER_CALLBACK(const void *context,
                     *status = U_ILLEGAL_ARGUMENT_ERROR;
                     return;
             }
-            if(realCB==NULL) {
+            if (realCB == NULL) {
                 *status = U_INTERNAL_PROGRAM_ERROR;
+            } else {
+                realCB(context, args, codeUnits, length, reason, status);
             }
-            realCB(context, args, codeUnits, length, reason, status);
         }
     }      
 }
@@ -1085,18 +885,6 @@ static jint setCallbackDecode(JNIEnv *env, jclass, jlong handle, jint onMalforme
     return U_ILLEGAL_ARGUMENT_ERROR;
 }
 
-static jlong safeClone(JNIEnv *env, jclass, jlong address) {
-    UConverter* source = reinterpret_cast<UConverter*>(static_cast<uintptr_t>(address));
-    if (!source) {
-        return NULL;
-    }
-    UErrorCode status = U_ZERO_ERROR;
-    jint bufferSize = U_CNV_SAFECLONE_BUFFERSIZE;
-    UConverter* conv = ucnv_safeClone(source, NULL, &bufferSize, &status);
-    icu4jni_error(env, status);
-    return reinterpret_cast<uintptr_t>(conv);
-}
-
 static jint getMaxCharsPerByte(JNIEnv *env, jclass, jlong handle) {
     /*
      * currently we know that max number of chars per byte is 2
@@ -1204,37 +992,28 @@ static jobject charsetForName(JNIEnv* env, jclass, jstring charsetName) {
  */
 static JNINativeMethod gMethods[] = {
     /* name, signature, funcPtr */
+    { "canEncode", "(JI)Z", (void*) canEncode },
     { "charsetForName", "(Ljava/lang/String;)Ljava/nio/charset/Charset;", (void*) charsetForName },
-    { "convertByteToChar", "(J[BI[CI[IZ)I", (void*) convertByteToChar },
+    { "closeConverter", "(J)V", (void*) closeConverter },
+    { "contains", "(JJ)Z", (void*) contains },
     { "decode", "(J[BI[CI[IZ)I", (void*) decode },
-    { "convertCharToByte", "(J[CI[BI[IZ)I", (void*) convertCharToByte },
     { "encode", "(J[CI[BI[IZ)I", (void*) encode },
-    { "flushCharToByte", "(J[BI[I)I", (void*) flushCharToByte },
     { "flushByteToChar", "(J[CI[I)I", (void*) flushByteToChar },
-    { "openConverter", "(Ljava/lang/String;)J", (void*) openConverter },
-    { "resetByteToChar", "(J)V", (void*) resetByteToChar },
-    { "resetCharToByte", "(J)V", (void*) resetCharToByte },
-    { "closeConverter", "(J)V", (void*) closeConverter },
-    { "setSubstitutionChars", "(J[CI)I", (void*) setSubstitutionChars },
-    { "setSubstitutionBytes", "(J[BI)I", (void*) setSubstitutionBytes },
-    { "setSubstitutionModeCharToByte", "(JZ)I", (void*) setSubstitutionModeCharToByte },
-    { "setSubstitutionModeByteToChar", "(JZ)I", (void*) setSubstitutionModeByteToChar },
-    { "countInvalidBytes", "(J[I)I", (void*) countInvalidBytes },
-    { "countInvalidChars", "(J[I)I", (void*) countInvalidChars },
-    { "getMaxBytesPerChar", "(J)I", (void*) getMaxBytesPerChar },
-    { "getMinBytesPerChar", "(J)I", (void*) getMinBytesPerChar },
+    { "flushCharToByte", "(J[BI[I)I", (void*) flushCharToByte },
+    { "getAvailableCharsetNames", "()[Ljava/lang/String;", (void*) getAvailableCharsetNames },
     { "getAveBytesPerChar", "(J)F", (void*) getAveBytesPerChar },
-    { "getMaxCharsPerByte", "(J)I", (void*) getMaxCharsPerByte },
     { "getAveCharsPerByte", "(J)F", (void*) getAveCharsPerByte },
-    { "contains", "(JJ)Z", (void*) contains },
+    { "getMaxBytesPerChar", "(J)I", (void*) getMaxBytesPerChar },
+    { "getMaxCharsPerByte", "(J)I", (void*) getMaxCharsPerByte },
+    { "getMinBytesPerChar", "(J)I", (void*) getMinBytesPerChar },
     { "getSubstitutionBytes", "(J)[B", (void*) getSubstitutionBytes },
-    { "canEncode", "(JI)Z", (void*) canEncode },
-    { "canDecode", "(J[B)Z", (void*) canDecode },
-    { "getAvailable", "()[Ljava/lang/String;", (void*) getAvailable },
-    { "countAliases", "(Ljava/lang/String;)I", (void*) countAliases },
+    { "openConverter", "(Ljava/lang/String;)J", (void*) openConverter },
+    { "resetByteToChar", "(J)V", (void*) resetByteToChar },
+    { "resetCharToByte", "(J)V", (void*) resetCharToByte },
     { "setCallbackDecode", "(JII[CI)I", (void*) setCallbackDecode },
     { "setCallbackEncode", "(JII[BI)I", (void*) setCallbackEncode },
-    { "safeClone", "(J)J", (void*) safeClone }
+    { "setSubstitutionBytes", "(J[BI)I", (void*) setSubstitutionBytes },
+    { "setSubstitutionChars", "(J[CI)I", (void*) setSubstitutionChars },
 };
 
 int register_com_ibm_icu4jni_converters_NativeConverter(JNIEnv *_env) {
author	Elliott Hughes <enh@google.com>	2010-04-02 17:19:21 -0700
committer	Elliott Hughes <enh@google.com>	2010-04-02 17:58:45 -0700
commit	ccb8b92211a3e87acaf6486c8d4423c2053b8b5e (patch)
tree	5898c2d9793dcf05f83192c17183f09e13b8920a /icu/src/main
parent	3604384c5f53c83383ce85f838901e46b0105e5e (diff)
download	libcore-ccb8b92211a3e87acaf6486c8d4423c2053b8b5e.zip libcore-ccb8b92211a3e87acaf6486c8d4423c2053b8b5e.tar.gz libcore-ccb8b92211a3e87acaf6486c8d4423c2053b8b5e.tar.bz2