summaryrefslogtreecommitdiffstats
path: root/icu/src/main/java
diff options
context:
space:
mode:
authorElliott Hughes <enh@google.com>2010-04-02 17:19:21 -0700
committerElliott Hughes <enh@google.com>2010-04-02 17:58:45 -0700
commitccb8b92211a3e87acaf6486c8d4423c2053b8b5e (patch)
tree5898c2d9793dcf05f83192c17183f09e13b8920a /icu/src/main/java
parent3604384c5f53c83383ce85f838901e46b0105e5e (diff)
downloadlibcore-ccb8b92211a3e87acaf6486c8d4423c2053b8b5e.zip
libcore-ccb8b92211a3e87acaf6486c8d4423c2053b8b5e.tar.gz
libcore-ccb8b92211a3e87acaf6486c8d4423c2053b8b5e.tar.bz2
More Charset/ICU cleanup.
I've been feeling guilty about leaving broken double-checked locking (missing the "volatile") in harmony's Charset code. A quick investigation showed that the method that it's intended to optimize is basically never called, and the RI's documentation explicitly says "don't call this; it's slow". So this patch fixes that. I've also improved our documentation. I've also deleted a bunch of dead code. I've also tidied up some dodgy native string handling. Change-Id: Iad69ebb3459d9cc4c4ff37b255d458b83fe40132
Diffstat (limited to 'icu/src/main/java')
-rw-r--r--icu/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java38
-rw-r--r--icu/src/main/java/com/ibm/icu4jni/charset/CharsetProviderICU.java50
-rw-r--r--icu/src/main/java/com/ibm/icu4jni/charset/NativeConverter.java377
3 files changed, 70 insertions, 395 deletions
diff --git a/icu/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java b/icu/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java
index 155f966..fe0f920 100644
--- a/icu/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java
+++ b/icu/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java
@@ -5,7 +5,7 @@
*******************************************************************************
*
*******************************************************************************
-*/
+*/
package com.ibm.icu4jni.charset;
@@ -36,52 +36,52 @@ public final class CharsetICU extends Charset {
long converterHandle = NativeConverter.openConverter(icuCanonicalName);
return new CharsetDecoderICU(this, converterHandle);
}
-
+
// hardCoded list of replacement bytes
- private static final Map subByteMap = new HashMap();
- static{
- subByteMap.put("UTF-32",new byte[]{0x00, 0x00, (byte)0xfe, (byte)0xff});
- subByteMap.put("ibm-16684_P110-2003",new byte[]{0x40, 0x40}); // make \u3000 the sub char
- subByteMap.put("ibm-971_P100-1995",new byte[]{(byte)0xa1, (byte)0xa1}); // make \u3000 the sub char
+ private static final Map<String, byte[]> subByteMap = new HashMap<String, byte[]>();
+ static {
+ subByteMap.put("UTF-32", new byte[]{0x00, 0x00, (byte)0xfe, (byte)0xff});
+ subByteMap.put("ibm-16684_P110-2003", new byte[]{0x40, 0x40}); // make \u3000 the sub char
+ subByteMap.put("ibm-971_P100-1995", new byte[]{(byte)0xa1, (byte)0xa1}); // make \u3000 the sub char
}
/**
* Returns a new encoder object of the charset
* @return a new encoder
* @stable ICU 2.4
*/
- public CharsetEncoder newEncoder(){
+ public CharsetEncoder newEncoder() {
// the arrays are locals and not
// instance variables since the
- // methods on this class need to
+ // methods on this class need to
// be thread safe
long converterHandle = NativeConverter.openConverter(icuCanonicalName);
-
+
//According to the contract all converters should have non-empty replacement
byte[] replacement = NativeConverter.getSubstitutionBytes(converterHandle);
- try{
+ try {
return new CharsetEncoderICU(this,converterHandle, replacement);
- }catch(IllegalArgumentException ex){
+ } catch (IllegalArgumentException ex) {
// work around for the non-sensical check in the nio API that
// a substitution character must be mappable while decoding!!
- replacement = (byte[])subByteMap.get(icuCanonicalName);
- if(replacement==null){
+ replacement = subByteMap.get(icuCanonicalName);
+ if (replacement == null) {
replacement = new byte[NativeConverter.getMinBytesPerChar(converterHandle)];
- for(int i=0; i<replacement.length; i++){
+ for(int i = 0; i < replacement.length; ++i) {
replacement[i]= 0x3f;
}
}
NativeConverter.setSubstitutionBytes(converterHandle, replacement, replacement.length);
return new CharsetEncoderICU(this,converterHandle, replacement);
}
- }
-
+ }
+
/**
* Ascertains if a charset is a sub set of this charset
* @param cs charset to test
* @return true if the given charset is a subset of this charset
* @stable ICU 2.4
- *
+ *
* //CSDL: major changes by Jack
*/
public boolean contains(Charset cs){
@@ -90,7 +90,7 @@ public final class CharsetICU extends Charset {
} else if (this.equals(cs)) {
return true;
}
-
+
long converterHandle1 = 0;
long converterHandle2 = 0;
diff --git a/icu/src/main/java/com/ibm/icu4jni/charset/CharsetProviderICU.java b/icu/src/main/java/com/ibm/icu4jni/charset/CharsetProviderICU.java
deleted file mode 100644
index 0479223..0000000
--- a/icu/src/main/java/com/ibm/icu4jni/charset/CharsetProviderICU.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/**
-*******************************************************************************
-* Copyright (C) 1996-2005, International Business Machines Corporation and *
-* others. All Rights Reserved. *
-*******************************************************************************
-*
-*******************************************************************************
-*/
-
-package com.ibm.icu4jni.charset;
-
-import java.nio.charset.Charset;
-import java.nio.charset.spi.CharsetProvider;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.SortedMap;
-import java.util.TreeMap;
-
-public final class CharsetProviderICU extends CharsetProvider {
- public CharsetProviderICU() {
- }
-
- @Override
- public Charset charsetForName(String charsetName) {
- return NativeConverter.charsetForName(charsetName);
- }
-
- @Override
- public Iterator<Charset> charsets() {
- ArrayList<Charset> result = new ArrayList<Charset>();
- for (String charsetName : NativeConverter.getAvailable()) {
- result.add(charsetForName(charsetName));
- }
- return result.iterator();
- }
-
- /**
- * Implements Charset.availableCharsets.
- */
- public SortedMap<String, Charset> initAvailableCharsets() {
- SortedMap<String, Charset> result =
- new TreeMap<String, Charset>(String.CASE_INSENSITIVE_ORDER);
- for (String charset : NativeConverter.getAvailable()) {
- if (!result.containsKey(charset)) {
- result.put(charset, charsetForName(charset));
- }
- }
- return result;
- }
-}
diff --git a/icu/src/main/java/com/ibm/icu4jni/charset/NativeConverter.java b/icu/src/main/java/com/ibm/icu4jni/charset/NativeConverter.java
index eefe3d5..6a97c27 100644
--- a/icu/src/main/java/com/ibm/icu4jni/charset/NativeConverter.java
+++ b/icu/src/main/java/com/ibm/icu4jni/charset/NativeConverter.java
@@ -5,28 +5,21 @@
*******************************************************************************
*
*******************************************************************************
-*/
+*/
package com.ibm.icu4jni.charset;
import java.nio.charset.Charset;
-/**
- * Class for accessing the underlying JNI methods
- * @internal ICU 2.4
- */
-final class NativeConverter {
-
- //Native methods
-
+public final class NativeConverter {
/**
* Converts an array of bytes containing characters in an external
* encoding into an array of Unicode characters. This method allows
- * a buffer by buffer conversion of a data stream. The state of the
- * conversion is saved between calls to convert. Among other things,
+ * buffer-by-buffer conversion of a data stream. The state of the
+ * conversion is saved between calls. Among other things,
* this means multibyte input sequences can be split between calls.
- * If a call to convert results in an Error, the conversion may be
- * continued by calling convert again with suitably modified parameters.
+ * If a call to results in an error, the conversion may be
+ * continued by calling this method again with suitably modified parameters.
* All conversions should be finished with a call to the flush method.
*
* @param converterHandle Address of converter object created by C code
@@ -34,78 +27,22 @@ final class NativeConverter {
* @param inEnd stop conversion at this offset in input array (exclusive).
* @param output character array to receive conversion result.
* @param outEnd stop writing to output array at this offset (exclusive).
- * @param data integer array containing the following data
+ * @param data integer array containing the following data
* data[0] = inputOffset
* data[1] = outputOffset
* @return int error code returned by ICU
* @internal ICU 2.4
*/
-
- public static final native int convertByteToChar( long converterHandle,
- byte[] input, int inEnd,
- char[] output, int outEnd,
- int[] data,
- boolean flush);
- /**
- * Converts an array of bytes containing characters in an external
- * encoding into an array of Unicode characters. This method allows
- * a buffer by buffer conversion of a data stream. The state of the
- * conversion is saved between calls to convert. Among other things,
- * this means multibyte input sequences can be split between calls.
- * If a call to convert results in an Error, the conversion may be
- * continued by calling convert again with suitably modified parameters.
- * All conversions should be finished with a call to the flush method.
- *
- * @param converterHandle Address of converter object created by C code
- * @param input byte array containing text to be converted.
- * @param inEnd stop conversion at this offset in input array (exclusive).
- * @param output character array to receive conversion result.
- * @param outEnd stop writing to output array at this offset (exclusive).
- * @param data integer array containing the following data
- * data[0] = inputOffset
- * data[1] = outputOffset
- * @return int error code returned by ICU
- * @internal ICU 2.4
- */
- public static final native int decode( long converterHandle,
- byte[] input, int inEnd,
- char[] output, int outEnd,
- int[] data,
- boolean flush);
- /**
- * Converts an array of Unicode chars containing characters in an
- * external encoding into an array of bytes. This method allows
- * a buffer by buffer conversion of a data stream. The state of the
- * conversion is saved between calls to convert. Among other things,
- * this means multibyte input sequences can be split between calls.
- * If a call to convert results in an Error, the conversion may be
- * continued by calling convert again with suitably modified parameters.
- * All conversions should be finished with a call to the flush method.
- *
- * @param converterHandle Address of converter object created by C code
- * @param input char array containing text to be converted.
- * @param inEnd stop conversion at this offset in input array (exclusive).
- * @param output byte array to receive conversion result.
- * @param outEnd stop writing to output array at this offset (exclusive).
- * @param data integer array containing the following data
- * data[0] = inputOffset
- * data[1] = outputOffset
- * @return int error code returned by ICU
- * @internal ICU 2.4
- */
- public static final native int convertCharToByte(long converterHandle,
- char[] input, int inEnd,
- byte[] output, int outEnd,
- int[] data,
- boolean flush);
+ public static native int decode(long converterHandle, byte[] input, int inEnd,
+ char[] output, int outEnd, int[] data, boolean flush);
+
/**
- * Converts an array of Unicode chars containing characters in an
- * external encoding into an array of bytes. This method allows
- * a buffer by buffer conversion of a data stream. The state of the
+ * Converts an array of Unicode chars to an array of bytes in an external encoding.
+ * This method allows a buffer by buffer conversion of a data stream. The state of the
* conversion is saved between calls to convert. Among other things,
* this means multibyte input sequences can be split between calls.
- * If a call to convert results in an Error, the conversion may be
- * continued by calling convert again with suitably modified parameters.
+ * If a call results in an error, the conversion may be
+ * continued by calling this method again with suitably modified parameters.
* All conversions should be finished with a call to the flush method.
*
* @param converterHandle Address of converter object created by C code
@@ -113,283 +50,71 @@ final class NativeConverter {
* @param inEnd stop conversion at this offset in input array (exclusive).
* @param output byte array to receive conversion result.
* @param outEnd stop writing to output array at this offset (exclusive).
- * @param data integer array containing the following data
+ * @param data integer array containing the following data
* data[0] = inputOffset
* data[1] = outputOffset
* @return int error code returned by ICU
* @internal ICU 2.4
- */
- public static final native int encode(long converterHandle,
- char[] input, int inEnd,
- byte[] output, int outEnd,
- int[] data,
- boolean flush);
+ */
+ public static native int encode(long converterHandle, char[] input, int inEnd,
+ byte[] output, int outEnd, int[] data, boolean flush);
+
/**
* Writes any remaining output to the output buffer and resets the
- * converter to its initial state.
+ * converter to its initial state.
*
* @param converterHandle Address of converter object created by C code
* @param output byte array to receive flushed output.
* @param outEnd stop writing to output array at this offset (exclusive).
* @return int error code returned by ICU
- * @param data integer array containing the following data
+ * @param data integer array containing the following data
* data[0] = inputOffset
* data[1] = outputOffset
* @internal ICU 2.4
- */
- public static final native int flushCharToByte(long converterHandle,
- byte[] output,
- int outEnd,
- int[] data);
+ */
+ public static native int flushCharToByte(long converterHandle, byte[] output, int outEnd, int[] data);
+
/**
* Writes any remaining output to the output buffer and resets the
- * converter to its initial state.
+ * converter to its initial state.
*
* @param converterHandle Address of converter object created by the native code
* @param output char array to receive flushed output.
* @param outEnd stop writing to output array at this offset (exclusive).
* @return int error code returned by ICU
- * @param data integer array containing the following data
+ * @param data integer array containing the following data
* data[0] = inputOffset
* data[1] = outputOffset
* @internal ICU 2.4
- */
- public static final native int flushByteToChar(long converterHandle,
- char[] output,
- int outEnd,
- int[] data);
-
- /**
- * Open the converter with the specified encoding
- *
- * @param converterHandle long array for recieving the adress of converter object
- * created by the native code
- * @param encoding string representing encoding
- * @return int error code returned by ICU
- * @internal ICU 2.4
- */
- public static final native long openConverter(String encoding);
- /**
- * Resets the ByteToChar (toUnicode) state of specified converter
- *
- * @param converterHandle Address of converter object created by the native code
- * @internal ICU 2.4
- */
- public static final native void resetByteToChar(long converterHandle);
-
- /**
- * Resets the CharToByte (fromUnicode) state of specified converter
- *
- * @param converterHandle Address of converter object created by the native code
- * @internal ICU 2.4
- */
- public static final native void resetCharToByte(long converterHandle);
-
- /**
- * Closes the specified converter and releases the resources
- *
- * @param converterHandle Address of converter object created by the native code
- * @internal ICU 2.4
- */
- public static final native void closeConverter(long converterHandle);
-
- /**
- * Sets the substitution Unicode chars of the specified converter used
- * by encoder
- * @param converterHandle Address of converter object created by the native code
- * @param subChars array of chars to used for substitution
- * @param length length of the array
- * @return int error code returned by ICU
- * @internal ICU 2.4
- */
- public static final native int setSubstitutionChars( long converterHandle,
- char[] subChars,int length);
- /**
- * Sets the substitution bytes of the specified converter used by decoder
- *
- * @param converterHandle Address of converter object created by the native code
- * @param subChars array of bytes to used for substitution
- * @param length length of the array
- * @return int error code returned by ICU
- * @internal ICU 2.4
- */
- public static final native int setSubstitutionBytes( long converterHandle,
- byte[] subChars,int length);
- /**
- * Sets the substitution mode of CharToByte(fromUnicode) for the specified converter
- *
- * @param converterHandle Address of converter object created by the native code
- * @param mode to set the true/false
- * @return int error code returned by ICU
- * @internal ICU 2.4
- */
- public static final native int setSubstitutionModeCharToByte(long converterHandle,
- boolean mode);
- /**
- * Sets the substitution mode of CharToByte(fromUnicode) for the specified converter
- *
- * @param converterHandle Address of converter object created by the native code
- * @param mode to set the true/false
- * @return int error code returned by ICU
- * @internal ICU 3.6
- */
- public static final native int setSubstitutionModeByteToChar(long converterHandle,
- boolean mode);
- /**
- * Gets the numnber of invalid bytes in the specified converter object
- * for the last error that has occured
- *
- * @param converterHandle Address of converter object created by the native code
- * @param length array of int to recieve length of the array
- * @return int error code returned by ICU
- * @internal ICU 2.4
- */
- public static final native int countInvalidBytes(long converterHandle, int[] length);
-
- /**
- * Gets the numnber of invalid chars in the specified converter object
- * for the last error that has occured
- *
- * @param converterHandle Address of converter object created by the native code
- * @param length array of int to recieve length of the array
- * @return int error code returned by ICU
- * @internal ICU 2.4
- */
- public static final native int countInvalidChars(long converterHandle, int[] length);
-
- /**
- * Gets the number of bytes needed for converting a char
- *
- * @param converterHandle Address of converter object created by the native code
- * @return number of bytes needed
- * @internal ICU 2.4
- */
- public static final native int getMaxBytesPerChar(long converterHandle);
-
- /**
- * Gets the number of bytes needed for converting a char
- *
- * @param converterHandle Address of converter object created by the native code
- * @return number of bytes needed
- * @internal ICU 3.2
- */
- public static final native int getMinBytesPerChar(long converterHandle);
-
- /**
- * Gets the average numnber of bytes needed for converting a char
- *
- * @param converterHandle Address of converter object created by the native code
- * @return number of bytes needed
- * @internal ICU 2.4
- */
- public static final native float getAveBytesPerChar(long converterHandle);
-
- /**
- * Gets the number of chars needed for converting a byte
- *
- * @param converterHandle Address of converter object created by the native code
- * @return number of bytes needed
- * @internal ICU 2.4
- */
- public static final native int getMaxCharsPerByte(long converterHandle);
-
- /**
- * Gets the average numnber of chars needed for converting a byte
- *
- * @param converterHandle Address of converter object created by the native code
- * @return number of bytes needed
- * @internal ICU 2.4
- */
- public static final native float getAveCharsPerByte(long converterHandle);
-
- //CSDL: added by Jack
- /**
- * Determines whether charset1 contains charset2.
- */
- public static final native boolean contains(long converterHandle1, long converterHandle2);
-
- public static final native byte[] getSubstitutionBytes(long converterHandle);
-
- /**
- * Ascertains if a given Unicode code unit can
- * be converted to the target encoding
- * @param converterHandle Address of converter object created by the native code
- * @param codeUnit the character to be converted
- * @return true if a character can be converted
- * @internal ICU 2.4
- *
- */
- public static final native boolean canEncode(long converterHandle,int codeUnit);
-
- /**
- * Ascertains if a given a byte sequence can be converted to Unicode
- * @param converterHandle Address of converter object created by the native code
- * @param bytes the bytes to be converted
- * @return true if a character can be converted
- * @internal ICU 2.4
- *
*/
- public static final native boolean canDecode(long converterHandle,byte[] bytes);
-
- /**
- * Gets the canonical names of available converters
- * @return Object[] names as an object array
- * @internal ICU 2.4
- */
- public static final native String[] getAvailable();
+ public static native int flushByteToChar(long converterHandle, char[] output, int outEnd, int[] data);
- public static final native Charset charsetForName(String charsetName);
+ public static native long openConverter(String encoding);
+ public static native void closeConverter(long converterHandle);
+
+ public static native void resetByteToChar(long converterHandle);
+ public static native void resetCharToByte(long converterHandle);
+
+ public static native int setSubstitutionChars(long converterHandle, char[] subChars,int length);
+ public static native int setSubstitutionBytes(long converterHandle, byte[] subChars,int length);
+ public static native byte[] getSubstitutionBytes(long converterHandle);
+
+ public static native int getMaxBytesPerChar(long converterHandle);
+ public static native int getMinBytesPerChar(long converterHandle);
+ public static native float getAveBytesPerChar(long converterHandle);
+ public static native int getMaxCharsPerByte(long converterHandle);
+ public static native float getAveCharsPerByte(long converterHandle);
+
+ public static native boolean contains(long converterHandle1, long converterHandle2);
+
+ public static native boolean canEncode(long converterHandle, int codeUnit);
+
+ public static native String[] getAvailableCharsetNames();
+ public static native Charset charsetForName(String charsetName);
- /**
- * Gets the number of aliases for a converter name
- * @param enc encoding name
- * @return number of aliases for the converter
- * @internal ICU 2.4
- */
- public static final native int countAliases(String enc);
-
- /**
- * Gets the aliases associated with the converter name
- * @param enc converter name
- * @return converter names as elements in an object array
- * @internal ICU 2.4
- */
- public static final native String[] getAliases(String enc);
-
- /**
- * Sets the callback to Unicode for ICU conveter. The default behaviour of ICU callback
- * is to call the specified callback function for both illegal and unmapped sequences.
- * @param converterHandle Adress of the converter object created by native code
- * @param mode call back mode to set. This is either STOP_CALLBACK, SKIP_CALLBACK or SUBSTITUE_CALLBACK
- * The converter performs the specified callback when an error occurs
- * @param stopOnIllegal If true sets the alerts the converter callback to stop on an illegal sequence
- * @return int error code returned by ICU
- * @internal ICU 2.4
- */
- public static final native int setCallbackDecode(long converterHandle, int onMalformedInput, int onUnmappableInput, char[] subChars, int length);
-
- /**
- * Sets the callback from Unicode for ICU conveter. The default behaviour of ICU callback
- * is to call the specified callback function for both illegal and unmapped sequences.
- * @param converterHandle Adress of the converter object created by native code
- * @param mode call back mode to set. This is either STOP_CALLBACK, SKIP_CALLBACK or SUBSTITUE_CALLBACK
- * The converter performs the specified callback when an error occurs
- * @param stopOnIllegal If true sets the alerts the converter callback to stop on an illegal sequence
- * @return int error code returned by ICU
- * @internal ICU 2.4
- */
- public static final native int setCallbackEncode(long converterHandle, int onMalformedInput, int onUnmappableInput, byte[] subBytes, int length);
-
- /**
- * Returns a thread safe clone of the converter
- * @internal ICU 2.4
- */
- public static final native long safeClone(long converterHandle);
-
- /** @internal ICU 2.4 */
public static final int STOP_CALLBACK = 0;//CodingErrorAction.REPORT
- /** @internal ICU 2.4 */
public static final int SKIP_CALLBACK = 1;//CodingErrorAction.IGNORE
- /** @internal ICU 2.4 */
public static final int SUBSTITUTE_CALLBACK = 2;//CodingErrorAction.REPLACE
+ public static native int setCallbackDecode(long converterHandle, int onMalformedInput, int onUnmappableInput, char[] subChars, int length);
+ public static native int setCallbackEncode(long converterHandle, int onMalformedInput, int onUnmappableInput, byte[] subBytes, int length);
}