summaryrefslogtreecommitdiffstats
path: root/luni
diff options
context:
space:
mode:
authorElliott Hughes <enh@google.com>2010-05-20 17:46:44 -0700
committerAndroid (Google) Code Review <android-gerrit@google.com>2010-05-20 17:46:44 -0700
commit40679a19b5de44363aa3f68d77cc13a74b41ffd6 (patch)
tree3aa93ac63f07f88bd21bb3c4c05d4276d365546e /luni
parentba34c446de7ad7ab49e0dbc3f2229c7b6d56f0db (diff)
parentc60bc1815dca549f3fb4e572f6aac749d7fa9fc6 (diff)
downloadlibcore-40679a19b5de44363aa3f68d77cc13a74b41ffd6.zip
libcore-40679a19b5de44363aa3f68d77cc13a74b41ffd6.tar.gz
libcore-40679a19b5de44363aa3f68d77cc13a74b41ffd6.tar.bz2
Merge "Fix more Charset/CharsetDecoder/CharsetEncoder bugs." into dalvik-dev
Diffstat (limited to 'luni')
-rw-r--r--luni/src/main/java/com/ibm/icu4jni/charset/CharsetDecoderICU.java113
-rw-r--r--luni/src/main/java/com/ibm/icu4jni/charset/CharsetEncoderICU.java22
-rw-r--r--luni/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java90
-rw-r--r--luni/src/main/java/java/nio/charset/Charset.java22
-rw-r--r--luni/src/main/java/java/nio/charset/CharsetDecoder.java13
-rw-r--r--luni/src/main/java/java/nio/charset/CharsetEncoder.java20
-rw-r--r--luni/src/main/java/java/nio/charset/CoderResult.java2
-rw-r--r--luni/src/main/java/java/nio/charset/IllegalCharsetNameException.java10
-rw-r--r--luni/src/main/java/java/nio/charset/UnsupportedCharsetException.java10
-rw-r--r--luni/src/main/native/NativeConverter.cpp130
-rw-r--r--luni/src/test/java/java/nio/charset/CharsetEncoderTest.java16
11 files changed, 194 insertions, 254 deletions
diff --git a/luni/src/main/java/com/ibm/icu4jni/charset/CharsetDecoderICU.java b/luni/src/main/java/com/ibm/icu4jni/charset/CharsetDecoderICU.java
index 404dd6e..d1c9546 100644
--- a/luni/src/main/java/com/ibm/icu4jni/charset/CharsetDecoderICU.java
+++ b/luni/src/main/java/com/ibm/icu4jni/charset/CharsetDecoderICU.java
@@ -27,28 +27,26 @@ import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.nio.ByteBuffer;
-public final class CharsetDecoderICU extends CharsetDecoder{
+public final class CharsetDecoderICU extends CharsetDecoder {
private static final int MAX_CHARS_PER_BYTE = 2;
- private static final int INPUT_OFFSET = 0,
- OUTPUT_OFFSET = 1,
- INVALID_BYTES = 2,
- INPUT_HELD = 3,
- LIMIT = 4;
- /* data is 3 element array where
- * data[INPUT_OFFSET] = on input contains the start of input and on output the number of input chars consumed
- * data[OUTPUT_OFFSET] = on input contains the start of output and on output the number of output bytes written
- * data[INVALID_CHARS] = number of invalid chars
- * data[INPUT_HELD] = number of input chars held in the converter's state
+ private static final int INPUT_OFFSET = 0;
+ private static final int OUTPUT_OFFSET = 1;
+ private static final int INVALID_BYTES = 2;
+ private static final int INPUT_HELD = 3;
+ /*
+ * data[INPUT_OFFSET] = on input contains the start of input and on output the number of input bytes consumed
+ * data[OUTPUT_OFFSET] = on input contains the start of output and on output the number of output chars written
+ * data[INVALID_BYTES] = number of invalid bytes
+ * data[INPUT_HELD] = number of input bytes held in the converter's state
*/
- private int[] data = new int[LIMIT];
+ private int[] data = new int[4];
/* handle to the ICU converter that is opened */
- private long converterHandle=0;
+ private long converterHandle = 0;
-
- private byte[] input = null;
- private char[] output= null;
+ private byte[] input = null;
+ private char[] output= null;
// BEGIN android-added
private byte[] allocatedInput = null;
@@ -81,7 +79,7 @@ public final class CharsetDecoderICU extends CharsetDecoder{
/**
* Sets this decoders replacement string. Substitutes the string in input if an
- * umappable or illegal sequence is encountered
+ * unmappable or illegal sequence is encountered
* @param newReplacement to replace the error bytes with
* @stable ICU 2.4
*/
@@ -130,42 +128,35 @@ public final class CharsetDecoderICU extends CharsetDecoder{
* @stable ICU 2.4
*/
protected final CoderResult implFlush(CharBuffer out) {
- try{
-
- data[OUTPUT_OFFSET] = getArray(out);
-
- ec=NativeConverter.flushByteToChar(
+ try {
+ data[OUTPUT_OFFSET] = getArray(out);
+ ec = NativeConverter.flushByteToChar(
converterHandle, /* Handle to ICU Converter */
output, /* input array of chars */
outEnd, /* input index+1 to be written */
data /* contains data, inOff,outOff */
);
-
/* If we don't have room for the output, throw an exception*/
if (ErrorCode.isFailure(ec)) {
if (ec == ErrorCode.U_BUFFER_OVERFLOW_ERROR) {
return CoderResult.OVERFLOW;
- }else if (ec == ErrorCode.U_TRUNCATED_CHAR_FOUND ) {//CSDL: add this truncated character error handling
- if(data[INPUT_OFFSET]>0){
+ } else if (ec == ErrorCode.U_TRUNCATED_CHAR_FOUND) {//CSDL: add this truncated character error handling
+ if (data[INPUT_OFFSET] > 0) {
return CoderResult.malformedForLength(data[INPUT_OFFSET]);
}
- }else {
+ } else {
ErrorCode.getException(ec);
}
}
return CoderResult.UNDERFLOW;
- }finally{
+ } finally {
/* save the flushed data */
setPosition(out);
implReset();
}
}
- /**
- * Resets the to Unicode mode of converter
- * @stable ICU 2.4
- */
protected void implReset() {
NativeConverter.resetByteToChar(converterHandle);
data[INPUT_OFFSET] = 0;
@@ -175,6 +166,11 @@ public final class CharsetDecoderICU extends CharsetDecoder{
savedInputHeldLen = 0;
output = null;
input = null;
+ allocatedInput = null;
+ allocatedOutput = null;
+ ec = 0;
+ inEnd = 0;
+ outEnd = 0;
}
/**
@@ -194,9 +190,8 @@ public final class CharsetDecoderICU extends CharsetDecoder{
* action succeeds or more input is needed for completing the decoding action.
* @stable ICU 2.4
*/
- protected CoderResult decodeLoop(ByteBuffer in,CharBuffer out){
-
- if(!in.hasRemaining()){
+ protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out){
+ if (!in.hasRemaining()){
return CoderResult.UNDERFLOW;
}
@@ -205,29 +200,27 @@ public final class CharsetDecoderICU extends CharsetDecoder{
data[INPUT_HELD] = 0;
try{
- /* do the conversion */
- ec=NativeConverter.decode(
+ ec = NativeConverter.decode(
converterHandle, /* Handle to ICU Converter */
input, /* input array of bytes */
inEnd, /* last index+1 to be converted */
output, /* input array of chars */
outEnd, /* input index+1 to be written */
data, /* contains data, inOff,outOff */
- false /* donot flush the data */
+ false /* don't flush the data */
);
-
- /* return an error*/
- if(ec == ErrorCode.U_BUFFER_OVERFLOW_ERROR){
+ // Return an error.
+ if (ec == ErrorCode.U_BUFFER_OVERFLOW_ERROR) {
return CoderResult.OVERFLOW;
- }else if(ec==ErrorCode.U_INVALID_CHAR_FOUND){
- return CoderResult.malformedForLength(data[INVALID_BYTES]);
- }else if(ec==ErrorCode.U_ILLEGAL_CHAR_FOUND){
+ } else if (ec == ErrorCode.U_INVALID_CHAR_FOUND) {
+ return CoderResult.unmappableForLength(data[INVALID_BYTES]);
+ } else if (ec == ErrorCode.U_ILLEGAL_CHAR_FOUND) {
return CoderResult.malformedForLength(data[INVALID_BYTES]);
}
- /* decoding action succeded */
+ // Decoding succeeded: give us more data.
return CoderResult.UNDERFLOW;
- }finally{
+ } finally {
setPosition(in);
setPosition(out);
}
@@ -237,7 +230,7 @@ public final class CharsetDecoderICU extends CharsetDecoder{
* Releases the system resources by cleanly closing ICU converter opened
* @stable ICU 2.4
*/
- protected void finalize()throws Throwable{
+ protected void finalize() throws Throwable{
NativeConverter.closeConverter(converterHandle);
super.finalize();
converterHandle = 0;
@@ -248,13 +241,13 @@ public final class CharsetDecoderICU extends CharsetDecoder{
//------------------------------------------
private final int getArray(CharBuffer out){
- if(out.hasArray()){
+ if (out.hasArray()) {
// BEGIN android-changed: take arrayOffset into account
output = out.array();
outEnd = out.arrayOffset() + out.limit();
return out.arrayOffset() + out.position();
// END android-changed
- }else{
+ } else {
outEnd = out.remaining();
// BEGIN android-added
if (allocatedOutput == null || (outEnd > allocatedOutput.length)) {
@@ -267,16 +260,16 @@ public final class CharsetDecoderICU extends CharsetDecoder{
// is 0
return 0;
}
-
}
+
private final int getArray(ByteBuffer in){
- if(in.hasArray()){
+ if (in.hasArray()) {
// BEGIN android-changed: take arrayOffset into account
input = in.array();
inEnd = in.arrayOffset() + in.limit();
return in.arrayOffset() + in.position() + savedInputHeldLen;/*exclude the number fo bytes held in previous conversion*/
// END android-changed
- }else{
+ } else {
inEnd = in.remaining();
// BEGIN android-added
if (allocatedInput == null || (inEnd > allocatedInput.length)) {
@@ -294,30 +287,24 @@ public final class CharsetDecoderICU extends CharsetDecoder{
// is whatever is savedInputLen
return savedInputHeldLen;
}
-
}
- private final void setPosition(CharBuffer out){
- if(out.hasArray()){
- // BEGIN android-changed: take arrayOffset into account
+
+ private final void setPosition(CharBuffer out) {
+ if (out.hasArray()) {
out.position(out.position() + data[OUTPUT_OFFSET] - out.arrayOffset());
- // END android-changed
- }else{
- out.put(output,0,data[OUTPUT_OFFSET]);
+ } else {
+ out.put(output, 0, data[OUTPUT_OFFSET]);
}
- // BEGIN android-added
// release reference to output array, which may not be ours
output = null;
- // END android-added
}
- private final void setPosition(ByteBuffer in){
+ private final void setPosition(ByteBuffer in) {
// ok was there input held in the previous invocation of decodeLoop
// that resulted in output in this invocation?
- // BEGIN android-changed
in.position(in.position() + data[INPUT_OFFSET] + savedInputHeldLen - data[INPUT_HELD]);
savedInputHeldLen = data[INPUT_HELD];
// release reference to input array, which may not be ours
input = null;
- // END android-changed
}
}
diff --git a/luni/src/main/java/com/ibm/icu4jni/charset/CharsetEncoderICU.java b/luni/src/main/java/com/ibm/icu4jni/charset/CharsetEncoderICU.java
index bef1862..55f883b 100644
--- a/luni/src/main/java/com/ibm/icu4jni/charset/CharsetEncoderICU.java
+++ b/luni/src/main/java/com/ibm/icu4jni/charset/CharsetEncoderICU.java
@@ -28,18 +28,17 @@ import com.ibm.icu4jni.common.ErrorCode;
public final class CharsetEncoderICU extends CharsetEncoder {
- private static final int INPUT_OFFSET = 0,
- OUTPUT_OFFSET = 1,
- INVALID_CHARS = 2,
- INPUT_HELD = 3,
- LIMIT = 4;
- /* data is 3 element array where
+ private static final int INPUT_OFFSET = 0;
+ private static final int OUTPUT_OFFSET = 1;
+ private static final int INVALID_CHARS = 2;
+ private static final int INPUT_HELD = 3;
+ /*
* data[INPUT_OFFSET] = on input contains the start of input and on output the number of input chars consumed
* data[OUTPUT_OFFSET] = on input contains the start of output and on output the number of output bytes written
* data[INVALID_CHARS] = number of invalid chars
* data[INPUT_HELD] = number of input chars held in the converter's state
*/
- private int[] data = new int[LIMIT];
+ private int[] data = new int[4];
/* handle to the ICU converter that is opened */
private long converterHandle=0;
@@ -63,7 +62,7 @@ public final class CharsetEncoderICU extends CharsetEncoder {
private int savedInputHeldLen;
/**
- * Construcs a new encoder for the given charset
+ * Constructs a new encoder for the given charset
* @param cs for which the decoder is created
* @param cHandle the address of ICU converter
* @param replacement the substitution bytes
@@ -139,11 +138,11 @@ public final class CharsetEncoderICU extends CharsetEncoder {
if (ErrorCode.isFailure(ec)) {
if (ec == ErrorCode.U_BUFFER_OVERFLOW_ERROR) {
return CoderResult.OVERFLOW;
- }else if (ec == ErrorCode.U_TRUNCATED_CHAR_FOUND) {//CSDL: add this truncated character error handling
- if(data[INPUT_OFFSET]>0){
+ } else if (ec == ErrorCode.U_TRUNCATED_CHAR_FOUND) {//CSDL: add this truncated character error handling
+ if (data[INPUT_OFFSET] > 0) {
return CoderResult.malformedForLength(data[INPUT_OFFSET]);
}
- }else {
+ } else {
ErrorCode.getException(ec);
}
}
@@ -178,7 +177,6 @@ public final class CharsetEncoderICU extends CharsetEncoder {
* @stable ICU 2.4
*/
protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
-
if (!in.hasRemaining()) {
return CoderResult.UNDERFLOW;
}
diff --git a/luni/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java b/luni/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java
index 71d2747..bf3cfe0 100644
--- a/luni/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java
+++ b/luni/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java
@@ -16,76 +16,49 @@ import java.util.HashMap;
import java.util.Map;
public final class CharsetICU extends Charset {
+ private static final Map<String, byte[]> DEFAULT_REPLACEMENTS = new HashMap<String, byte[]>();
+ static {
+ // ICU has different default replacements to the RI in these cases. There are probably
+ // more cases too, but this covers all the charsets that Java guarantees will be available.
+ // These use U+FFFD REPLACEMENT CHARACTER...
+ DEFAULT_REPLACEMENTS.put("UTF-16", new byte[] { (byte) 0xff, (byte) 0xfd });
+ DEFAULT_REPLACEMENTS.put("UTF-32", new byte[] { (byte) 0x00, (byte) 0x00, (byte) 0xff, (byte) 0xfd });
+ // These use '?'. It's odd that UTF-8 doesn't use U+FFFD, given that (unlike ISO-8859-1
+ // and US-ASCII) it can represent it, but this is what the RI does...
+ byte[] questionMark = new byte[] { (byte) '?' };
+ DEFAULT_REPLACEMENTS.put("UTF-8", questionMark);
+ DEFAULT_REPLACEMENTS.put("ISO-8859-1", questionMark);
+ DEFAULT_REPLACEMENTS.put("US-ASCII", questionMark);
+ }
+
private final String icuCanonicalName;
- /**
- * Constructor to create a the CharsetICU object
- * @param canonicalName the canonical name as a string
- * @param aliases the alias set as an array of strings
- * @stable ICU 2.4
- */
+
protected CharsetICU(String canonicalName, String icuCanonName, String[] aliases) {
super(canonicalName, aliases);
icuCanonicalName = icuCanonName;
}
- /**
- * Returns a new decoder instance of this charset object
- * @return a new decoder object
- * @stable ICU 2.4
- */
+
public CharsetDecoder newDecoder() {
- long converterHandle = NativeConverter.openConverter(icuCanonicalName);
- return new CharsetDecoderICU(this, converterHandle);
+ return new CharsetDecoderICU(this, NativeConverter.openConverter(icuCanonicalName));
}
- // hardCoded list of replacement bytes
- private static final Map<String, byte[]> subByteMap = new HashMap<String, byte[]>();
- static {
- subByteMap.put("UTF-32", new byte[]{0x00, 0x00, (byte)0xfe, (byte)0xff});
- subByteMap.put("ibm-16684_P110-2003", new byte[]{0x40, 0x40}); // make \u3000 the sub char
- subByteMap.put("ibm-971_P100-1995", new byte[]{(byte)0xa1, (byte)0xa1}); // make \u3000 the sub char
- }
- /**
- * Returns a new encoder object of the charset
- * @return a new encoder
- * @stable ICU 2.4
- */
public CharsetEncoder newEncoder() {
- // the arrays are locals and not
- // instance variables since the
- // methods on this class need to
- // be thread safe
long converterHandle = NativeConverter.openConverter(icuCanonicalName);
-
- //According to the contract all converters should have non-empty replacement
- byte[] replacement = NativeConverter.getSubstitutionBytes(converterHandle);
-
- try {
- return new CharsetEncoderICU(this,converterHandle, replacement);
- } catch (IllegalArgumentException ex) {
- // work around for the nonsensical check in the nio API that
- // a substitution character must be mappable while decoding!!
- replacement = subByteMap.get(icuCanonicalName);
- if (replacement == null) {
- replacement = new byte[NativeConverter.getMinBytesPerChar(converterHandle)];
- for(int i = 0; i < replacement.length; ++i) {
- replacement[i]= 0x3f;
- }
- }
- return new CharsetEncoderICU(this, converterHandle, replacement);
+ // We have our own map of RI-compatible default replacements...
+ byte[] replacement = DEFAULT_REPLACEMENTS.get(icuCanonicalName);
+ if (replacement == null) {
+ // ...but fall back to asking ICU.
+ // TODO: should we just try to use U+FFFD and fall back to '?' if U+FFFD can't be encoded?
+ replacement = NativeConverter.getSubstitutionBytes(converterHandle);
+ } else {
+ replacement = replacement.clone();
}
+ return new CharsetEncoderICU(this, converterHandle, replacement);
}
- /**
- * Ascertains if a charset is a sub set of this charset
- * @param cs charset to test
- * @return true if the given charset is a subset of this charset
- * @stable ICU 2.4
- *
- * //CSDL: major changes by Jack
- */
- public boolean contains(Charset cs){
- if (null == cs) {
- return false;
+ public boolean contains(Charset cs) {
+ if (cs == null) {
+ return false;
} else if (this.equals(cs)) {
return true;
}
@@ -98,8 +71,7 @@ public final class CharsetICU extends Charset {
if (converterHandle1 > 0) {
converterHandle2 = NativeConverter.openConverter(cs.name());
if (converterHandle2 > 0) {
- return NativeConverter.contains(converterHandle1,
- converterHandle2);
+ return NativeConverter.contains(converterHandle1, converterHandle2);
}
}
return false;
diff --git a/luni/src/main/java/java/nio/charset/Charset.java b/luni/src/main/java/java/nio/charset/Charset.java
index 4b265e5..494152e 100644
--- a/luni/src/main/java/java/nio/charset/Charset.java
+++ b/luni/src/main/java/java/nio/charset/Charset.java
@@ -45,16 +45,23 @@ import java.util.TreeMap;
* sequence. It facilitates the encoding from a Unicode character sequence into
* a byte sequence, and the decoding from a byte sequence into a Unicode
* character sequence.
- * <p>
- * A charset has a canonical name, which is usually in uppercase. Typically it
+ *
+ * <p>A charset has a canonical name, which is usually in uppercase. Typically it
* also has one or more aliases. The name string can only consist of the
* following characters: '0' - '9', 'A' - 'Z', 'a' - 'z', '.', ':'. '-' and '_'.
* The first character of the name must be a digit or a letter.
- * <p>
- * The following charsets should be supported by any java platform: US-ASCII,
- * ISO-8859-1, UTF-8, UTF-16BE, UTF-16LE, UTF-16.
- * <p>
- * Additional charsets can be made available by configuring one or more charset
+ *
+ * <p>The following charsets must be available on every Java implementation:
+ * <ul>
+ * <li>ISO-8859-1
+ * <li>US-ASCII
+ * <li>UTF-16
+ * <li>UTF-16BE
+ * <li>UTF-16LE
+ * <li>UTF-8
+ * </ul>
+ *
+ * <p>Additional charsets can be made available by configuring one or more charset
* providers through provider configuration files. Such files are always named
* as "java.nio.charset.spi.CharsetProvider" and located in the
* "META-INF/services" sub folder of one or more classpaths. The files should be
@@ -72,7 +79,6 @@ import java.util.TreeMap;
* @see java.nio.charset.spi.CharsetProvider
*/
public abstract class Charset implements Comparable<Charset> {
-
/*
* The name of configuration files where charset provider class names can be
* specified.
diff --git a/luni/src/main/java/java/nio/charset/CharsetDecoder.java b/luni/src/main/java/java/nio/charset/CharsetDecoder.java
index d2c9871..f4c4558 100644
--- a/luni/src/main/java/java/nio/charset/CharsetDecoder.java
+++ b/luni/src/main/java/java/nio/charset/CharsetDecoder.java
@@ -360,10 +360,6 @@ public abstract class CharsetDecoder {
status = endOfInput ? END : ONGOING;
if (endOfInput && remaining > 0) {
result = CoderResult.malformedForLength(remaining);
- // BEGIN android-added
- // needed to adjust for the changed call to position() below
- in.position(in.position() + result.length());
- // END android-added
} else {
return result;
}
@@ -386,14 +382,7 @@ public abstract class CharsetDecoder {
if (action != CodingErrorAction.IGNORE)
return result;
}
- // BEGIN android-changed
- // the condition is removed in Harmony revision 518047. However,
- // making the conditional statement unconditional leads to
- // misbehavior when using REPLACE on malformedInput.
- if (!result.isMalformed()) {
- in.position(in.position() + result.length());
- }
- // END android-changed
+ in.position(in.position() + result.length());
}
}
diff --git a/luni/src/main/java/java/nio/charset/CharsetEncoder.java b/luni/src/main/java/java/nio/charset/CharsetEncoder.java
index 7375e14..abbe04a 100644
--- a/luni/src/main/java/java/nio/charset/CharsetEncoder.java
+++ b/luni/src/main/java/java/nio/charset/CharsetEncoder.java
@@ -121,21 +121,8 @@ public abstract class CharsetEncoder {
private CharsetDecoder decoder;
/**
- * Constructs a new <code>CharsetEncoder</code> using the given
- * <code>Charset</code>, average number and maximum number of bytes
- * created by this encoder for one input character.
- *
- * @param cs
- * the <code>Charset</code> to be used by this encoder.
- * @param averageBytesPerChar
- * average number of bytes created by this encoder for one input
- * character, must be positive.
- * @param maxBytesPerChar
- * maximum number of bytes which can be created by this encoder
- * for one input character, must be positive.
- * @throws IllegalArgumentException
- * if <code>maxBytesPerChar</code> or
- * <code>averageBytesPerChar</code> is negative.
+ * Constructs a new {@code CharsetEncoder} using the given parameters and
+ * the replacement byte array {@code { (byte) '?' }}.
*/
protected CharsetEncoder(Charset cs, float averageBytesPerChar, float maxBytesPerChar) {
this(cs, averageBytesPerChar, maxBytesPerChar, new byte[] { (byte) '?' });
@@ -743,8 +730,9 @@ public abstract class CharsetEncoder {
|| !isLegalReplacement(replacement)) {
throw new IllegalArgumentException("bad replacement: " + Arrays.toString(replacement));
}
+ // It seems like a bug, but the RI doesn't clone, and we have tests that check we don't.
replace = replacement;
- implReplaceWith(replacement);
+ implReplaceWith(replace);
return this;
}
diff --git a/luni/src/main/java/java/nio/charset/CoderResult.java b/luni/src/main/java/java/nio/charset/CoderResult.java
index 8458dfc..2653161 100644
--- a/luni/src/main/java/java/nio/charset/CoderResult.java
+++ b/luni/src/main/java/java/nio/charset/CoderResult.java
@@ -203,7 +203,7 @@ public class CoderResult {
/**
* Gets the length of the erroneous input. The length is only meaningful to
- * a malformed-input error or an unmappble character error.
+ * a malformed-input error or an unmappable character error.
*
* @return the length, as an integer, of this object's erroneous input.
* @throws UnsupportedOperationException
diff --git a/luni/src/main/java/java/nio/charset/IllegalCharsetNameException.java b/luni/src/main/java/java/nio/charset/IllegalCharsetNameException.java
index 72c04e5..85e7ee1 100644
--- a/luni/src/main/java/java/nio/charset/IllegalCharsetNameException.java
+++ b/luni/src/main/java/java/nio/charset/IllegalCharsetNameException.java
@@ -36,18 +36,18 @@ public class IllegalCharsetNameException extends IllegalArgumentException {
* Constructs a new {@code IllegalCharsetNameException} with the supplied
* charset name.
*
- * @param charset
+ * @param charsetName
* the encountered illegal charset name.
*/
- public IllegalCharsetNameException(String charset) {
- super(charset);
- this.charsetName = charset;
+ public IllegalCharsetNameException(String charsetName) {
+ super((charsetName != null) ? charsetName : "null");
+ this.charsetName = charsetName;
}
/**
* Returns the encountered illegal charset name.
*/
public String getCharsetName() {
- return this.charsetName;
+ return charsetName;
}
}
diff --git a/luni/src/main/java/java/nio/charset/UnsupportedCharsetException.java b/luni/src/main/java/java/nio/charset/UnsupportedCharsetException.java
index d8b5f3e..ed3bfaa 100644
--- a/luni/src/main/java/java/nio/charset/UnsupportedCharsetException.java
+++ b/luni/src/main/java/java/nio/charset/UnsupportedCharsetException.java
@@ -36,12 +36,12 @@ public class UnsupportedCharsetException extends IllegalArgumentException {
* Constructs a new {@code UnsupportedCharsetException} with the supplied
* charset name.
*
- * @param charset
+ * @param charsetName
* the encountered unsupported charset name.
*/
- public UnsupportedCharsetException(String charset) {
- super(charset);
- this.charsetName = charset;
+ public UnsupportedCharsetException(String charsetName) {
+ super((charsetName != null) ? charsetName : "null");
+ this.charsetName = charsetName;
}
/**
@@ -50,6 +50,6 @@ public class UnsupportedCharsetException extends IllegalArgumentException {
* @return the encountered unsupported charset name.
*/
public String getCharsetName() {
- return this.charsetName;
+ return charsetName;
}
}
diff --git a/luni/src/main/native/NativeConverter.cpp b/luni/src/main/native/NativeConverter.cpp
index 4afdaf2..2c65e74 100644
--- a/luni/src/main/native/NativeConverter.cpp
+++ b/luni/src/main/native/NativeConverter.cpp
@@ -32,9 +32,9 @@
#include <stdlib.h>
#include <string.h>
-#define com_ibm_icu4jni_converters_NativeConverter_STOP_CALLBACK 0L
-#define com_ibm_icu4jni_converters_NativeConverter_SKIP_CALLBACK 1L
-#define com_ibm_icu4jni_converters_NativeConverter_SUBSTITUTE_CALLBACK 2L
+#define NativeConverter_REPORT 0
+#define NativeConverter_IGNORE 1
+#define NativeConverter_REPLACE 2
struct DecoderCallbackContext {
int length;
@@ -93,32 +93,31 @@ static void closeConverter(JNIEnv*, jclass, jlong handle) {
* @param data buffer to recieve state of the current conversion
* @param flush boolean that specifies end of source input
*/
-static jint convertCharToByte(JNIEnv* env, jclass, jlong handle, jcharArray source, jint sourceEnd, jbyteArray target, jint targetEnd, jintArray data, jboolean flush) {
+static UErrorCode convertCharsToBytes(JNIEnv* env, jclass, jlong handle, jcharArray source, jint sourceEnd, jbyteArray target, jint targetEnd, jintArray data, jboolean flush) {
UConverter* cnv = (UConverter*)handle;
if (!cnv) {
return U_ILLEGAL_ARGUMENT_ERROR;
}
+ ScopedCharArray uSource(env, source);
UErrorCode errorCode = U_ZERO_ERROR;
- jint* myData = (jint*) env->GetPrimitiveArrayCritical(data,NULL);
- if(myData) {
+ jint* myData = (jint*) env->GetPrimitiveArrayCritical(data, NULL);
+ if (myData) {
jint* sourceOffset = &myData[0];
jint* targetOffset = &myData[1];
- const jchar* uSource =(jchar*) env->GetPrimitiveArrayCritical(source, NULL);
- if(uSource) {
+ if (uSource.get() != NULL) {
jbyte* uTarget=(jbyte*) env->GetPrimitiveArrayCritical(target,NULL);
if(uTarget) {
- const jchar* mySource = uSource+ *sourceOffset;
- const UChar* mySourceLimit= uSource+sourceEnd;
+ const jchar* mySource = uSource.get() + *sourceOffset;
+ const UChar* mySourceLimit= uSource.get() + sourceEnd;
char* cTarget = reinterpret_cast<char*>(uTarget+ *targetOffset);
const char* cTargetLimit = reinterpret_cast<const char*>(uTarget+targetEnd);
ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&mySource, mySourceLimit,NULL,(UBool) flush, &errorCode);
- *sourceOffset = (jint) (mySource - uSource)-*sourceOffset;
+ *sourceOffset = (jint) (mySource - uSource.get())-*sourceOffset;
*targetOffset = (jint) ((jbyte*)cTarget - uTarget)- *targetOffset;
if(U_FAILURE(errorCode)) {
env->ReleasePrimitiveArrayCritical(target,uTarget,0);
- env->ReleasePrimitiveArrayCritical(source,(jchar*)uSource,0);
env->ReleasePrimitiveArrayCritical(data,(jint*)myData,0);
return errorCode;
}
@@ -129,7 +128,6 @@ static jint convertCharToByte(JNIEnv* env, jclass, jlong handle, jcharArray sou
}else{
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
}
- env->ReleasePrimitiveArrayCritical(source,(jchar*)uSource,0);
}else{
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
}
@@ -138,27 +136,23 @@ static jint convertCharToByte(JNIEnv* env, jclass, jlong handle, jcharArray sou
}
static jint encode(JNIEnv* env, jclass, jlong handle, jcharArray source, jint sourceEnd, jbyteArray target, jint targetEnd, jintArray data, jboolean flush) {
-
- UErrorCode ec = UErrorCode(convertCharToByte(env, NULL,handle,source,sourceEnd, target,targetEnd,data,flush));
+ UErrorCode ec = convertCharsToBytes(env, NULL, handle, source, sourceEnd, target, targetEnd, data, flush);
UConverter* cnv = (UConverter*)handle;
- jint* myData = (jint*) env->GetPrimitiveArrayCritical(data,NULL);
-
- if(cnv && myData) {
-
- UErrorCode errorCode = U_ZERO_ERROR;
- myData[3] = ucnv_fromUCountPending(cnv, &errorCode);
+ if (cnv) {
+ UErrorCode errorCode = U_ZERO_ERROR;
+ jint count = ucnv_fromUCountPending(cnv, &errorCode);
+ env->SetIntArrayRegion(data, 3, 1, &count);
- if(ec == U_ILLEGAL_CHAR_FOUND || ec == U_INVALID_CHAR_FOUND) {
- int8_t count =32;
+ if (ec == U_ILLEGAL_CHAR_FOUND || ec == U_INVALID_CHAR_FOUND) {
+ int8_t len = 32;
UChar invalidUChars[32];
- ucnv_getInvalidUChars(cnv,invalidUChars,&count,&errorCode);
-
- if(U_SUCCESS(errorCode)) {
- myData[2] = count;
+ ucnv_getInvalidUChars(cnv, invalidUChars, &len, &errorCode);
+ if (U_SUCCESS(errorCode)) {
+ jint value = len;
+ env->SetIntArrayRegion(data, 2, 1, &value);
}
}
}
- env->ReleasePrimitiveArrayCritical(data,(jint*)myData,0);
return ec;
}
@@ -174,33 +168,31 @@ static jint encode(JNIEnv* env, jclass, jlong handle, jcharArray source, jint so
* @param data buffer to recieve state of the current conversion
* @param flush boolean that specifies end of source input
*/
-static jint convertByteToChar(JNIEnv* env, jclass, jlong handle, jbyteArray source, jint sourceEnd, jcharArray target, jint targetEnd, jintArray data, jboolean flush) {
-
+static UErrorCode convertBytesToChars(JNIEnv* env, jclass, jlong handle, jbyteArray source, jint sourceEnd, jcharArray target, jint targetEnd, jintArray data, jboolean flush) {
UErrorCode errorCode =U_ZERO_ERROR;
UConverter* cnv = (UConverter*)handle;
- if(cnv) {
+ if (cnv) {
+ ScopedByteArray uSource(env, source);
jint* myData = (jint*) env->GetPrimitiveArrayCritical(data,NULL);
if(myData) {
jint* sourceOffset = &myData[0];
jint* targetOffset = &myData[1];
- const jbyte* uSource =(jbyte*) env->GetPrimitiveArrayCritical(source, NULL);
- if(uSource) {
+ if (uSource.get() != NULL) {
jchar* uTarget=(jchar*) env->GetPrimitiveArrayCritical(target,NULL);
if(uTarget) {
- const jbyte* mySource = uSource+ *sourceOffset;
- const char* mySourceLimit = reinterpret_cast<const char*>(uSource+sourceEnd);
+ const jbyte* mySource = uSource.get() + *sourceOffset;
+ const char* mySourceLimit = reinterpret_cast<const char*>(uSource.get() + sourceEnd);
UChar* cTarget=uTarget+ *targetOffset;
const UChar* cTargetLimit=uTarget+targetEnd;
ucnv_toUnicode( cnv , &cTarget, cTargetLimit,(const char**)&mySource,
mySourceLimit,NULL,(UBool) flush, &errorCode);
- *sourceOffset = mySource - uSource - *sourceOffset ;
+ *sourceOffset = mySource - uSource.get() - *sourceOffset;
*targetOffset = cTarget - uTarget - *targetOffset;
if(U_FAILURE(errorCode)) {
env->ReleasePrimitiveArrayCritical(target,uTarget,0);
- env->ReleasePrimitiveArrayCritical(source,(jchar*)uSource,0);
env->ReleasePrimitiveArrayCritical(data,(jint*)myData,0);
return errorCode;
}
@@ -211,7 +203,6 @@ static jint convertByteToChar(JNIEnv* env, jclass, jlong handle, jbyteArray sour
}else{
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
}
- env->ReleasePrimitiveArrayCritical(source,(jchar*)uSource,0);
}else{
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
}
@@ -223,27 +214,23 @@ static jint convertByteToChar(JNIEnv* env, jclass, jlong handle, jbyteArray sour
}
static jint decode(JNIEnv* env, jclass, jlong handle, jbyteArray source, jint sourceEnd, jcharArray target, jint targetEnd, jintArray data, jboolean flush) {
-
- jint ec = convertByteToChar(env, NULL,handle,source,sourceEnd, target,targetEnd,data,flush);
-
- jint* myData = (jint*) env->GetPrimitiveArrayCritical(data,NULL);
+ UErrorCode ec = convertBytesToChars(env, NULL, handle, source, sourceEnd, target, targetEnd, data, flush);
UConverter* cnv = (UConverter*)handle;
-
- if(myData && cnv) {
+ if (cnv) {
UErrorCode errorCode = U_ZERO_ERROR;
- myData[3] = ucnv_toUCountPending(cnv, &errorCode);
+ jint count = ucnv_toUCountPending(cnv, &errorCode);
+ env->SetIntArrayRegion(data, 3, 1, &count);
- if(ec == U_ILLEGAL_CHAR_FOUND || ec == U_INVALID_CHAR_FOUND ) {
- char invalidChars[32] = {'\0'};
+ if (ec == U_ILLEGAL_CHAR_FOUND || ec == U_INVALID_CHAR_FOUND) {
int8_t len = 32;
- ucnv_getInvalidChars(cnv,invalidChars,&len,&errorCode);
-
- if(U_SUCCESS(errorCode)) {
- myData[2] = len;
+ char invalidChars[32] = {'\0'};
+ ucnv_getInvalidChars(cnv, invalidChars, &len, &errorCode);
+ if (U_SUCCESS(errorCode)) {
+ jint value = len;
+ env->SetIntArrayRegion(data, 2, 1, &value);
}
}
}
- env->ReleasePrimitiveArrayCritical(data,(jint*)myData,0);
return ec;
}
@@ -273,16 +260,10 @@ static jint getMinBytesPerChar(JNIEnv*, jclass, jlong handle) {
static jfloat getAveBytesPerChar(JNIEnv*, jclass, jlong handle) {
UConverter* cnv = (UConverter*)handle;
- if (cnv) {
- jfloat max = (jfloat)ucnv_getMaxCharSize(cnv);
- jfloat min = (jfloat)ucnv_getMinCharSize(cnv);
- return (jfloat) ( (max+min)/2 );
- }
- return -1;
+ return (cnv != NULL) ? ((ucnv_getMaxCharSize(cnv) + ucnv_getMinCharSize(cnv)) / 2.0) : -1;
}
static jint flushByteToChar(JNIEnv* env, jclass,jlong handle, jcharArray target, jint targetEnd, jintArray data) {
-
UErrorCode errorCode =U_ZERO_ERROR;
UConverter* cnv = (UConverter*)handle;
if(cnv) {
@@ -522,7 +503,7 @@ static void CHARSET_ENCODER_CALLBACK(const void* rawContext, UConverterFromUnico
}
}
-static void JNI_FROM_U_CALLBACK_SUBSTITUTE_ENCODER(const void* rawContext,
+static void encoderReplaceCallback(const void* rawContext,
UConverterFromUnicodeArgs *fromArgs, const UChar*, int32_t, UChar32,
UConverterCallbackReason, UErrorCode * err) {
if (rawContext == NULL) {
@@ -535,12 +516,12 @@ static void JNI_FROM_U_CALLBACK_SUBSTITUTE_ENCODER(const void* rawContext,
static UConverterFromUCallback getFromUCallback(int32_t mode) {
switch(mode) {
- case com_ibm_icu4jni_converters_NativeConverter_STOP_CALLBACK:
+ case NativeConverter_REPORT:
return UCNV_FROM_U_CALLBACK_STOP;
- case com_ibm_icu4jni_converters_NativeConverter_SKIP_CALLBACK:
+ case NativeConverter_IGNORE:
return UCNV_FROM_U_CALLBACK_SKIP;
- case com_ibm_icu4jni_converters_NativeConverter_SUBSTITUTE_CALLBACK:
- return JNI_FROM_U_CALLBACK_SUBSTITUTE_ENCODER;
+ case NativeConverter_REPLACE:
+ return encoderReplaceCallback;
}
abort();
}
@@ -576,13 +557,19 @@ static jint setCallbackEncode(JNIEnv* env, jclass, jlong handle, jint onMalforme
return U_ILLEGAL_ARGUMENT_ERROR;
}
fromUNewContext->length = sub.size();
- strncpy(fromUNewContext->subBytes, reinterpret_cast<const char*>(sub.get()), sub.size());
+ memcpy(fromUNewContext->subBytes, sub.get(), sub.size());
UErrorCode errorCode = U_ZERO_ERROR;
ucnv_setFromUCallBack(conv, fromUNewAction, fromUNewContext, &fromUOldAction, (const void**)&fromUOldContext, &errorCode);
return errorCode;
}
-static void JNI_TO_U_CALLBACK_SUBSTITUTE_DECODER(const void* rawContext,
+static void decoderIgnoreCallback(const void*, UConverterToUnicodeArgs*, const char*, int32_t, UConverterCallbackReason, UErrorCode* err) {
+ // The icu4c UCNV_FROM_U_CALLBACK_SKIP callback requires that the context is NULL, which is
+ // never true for us.
+ *err = U_ZERO_ERROR;
+}
+
+static void decoderReplaceCallback(const void* rawContext,
UConverterToUnicodeArgs* toArgs, const char*, int32_t, UConverterCallbackReason,
UErrorCode* err) {
if (!rawContext) {
@@ -590,17 +577,14 @@ static void JNI_TO_U_CALLBACK_SUBSTITUTE_DECODER(const void* rawContext,
}
const DecoderCallbackContext* context = reinterpret_cast<const DecoderCallbackContext*>(rawContext);
*err = U_ZERO_ERROR;
- ucnv_cbToUWriteUChars(toArgs,context->subUChars ,context->length , 0, err);
+ ucnv_cbToUWriteUChars(toArgs,context->subUChars, context->length, 0, err);
}
static UConverterToUCallback getToUCallback(int32_t mode) {
switch (mode) {
- case com_ibm_icu4jni_converters_NativeConverter_STOP_CALLBACK:
- return UCNV_TO_U_CALLBACK_STOP;
- case com_ibm_icu4jni_converters_NativeConverter_SKIP_CALLBACK:
- return UCNV_TO_U_CALLBACK_SKIP;
- case com_ibm_icu4jni_converters_NativeConverter_SUBSTITUTE_CALLBACK:
- return JNI_TO_U_CALLBACK_SUBSTITUTE_DECODER;
+ case NativeConverter_IGNORE: return decoderIgnoreCallback;
+ case NativeConverter_REPLACE: return decoderReplaceCallback;
+ case NativeConverter_REPORT: return UCNV_TO_U_CALLBACK_STOP;
}
abort();
}
diff --git a/luni/src/test/java/java/nio/charset/CharsetEncoderTest.java b/luni/src/test/java/java/nio/charset/CharsetEncoderTest.java
index 4354cec..6ba9327 100644
--- a/luni/src/test/java/java/nio/charset/CharsetEncoderTest.java
+++ b/luni/src/test/java/java/nio/charset/CharsetEncoderTest.java
@@ -18,6 +18,7 @@ package java.nio.charset;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
+import java.util.Arrays;
public class CharsetEncoderTest extends junit.framework.TestCase {
// None of the harmony or jtreg tests actually check that replaceWith does the right thing!
@@ -31,4 +32,19 @@ public class CharsetEncoderTest extends junit.framework.TestCase {
String output = ascii.decode(e.encode(CharBuffer.wrap(input))).toString();
assertEquals("hello=world", output);
}
+
+ private void assertReplacementBytesForEncoder(String charset, byte[] bytes) {
+ byte[] result = Charset.forName(charset).newEncoder().replacement();
+ assertEquals(Arrays.toString(bytes), Arrays.toString(result));
+ }
+
+ // For all the guaranteed built-in charsets, check that we have the right default replacements.
+ public void test_defaultReplacementBytes() throws Exception {
+ assertReplacementBytesForEncoder("ISO-8859-1", new byte[] { (byte) '?' });
+ assertReplacementBytesForEncoder("US-ASCII", new byte[] { (byte) '?' });
+ assertReplacementBytesForEncoder("UTF-16", new byte[] { (byte) 0xff, (byte) 0xfd });
+ assertReplacementBytesForEncoder("UTF-16BE", new byte[] { (byte) 0xff, (byte) 0xfd });
+ assertReplacementBytesForEncoder("UTF-16LE", new byte[] { (byte) 0xfd, (byte) 0xff });
+ assertReplacementBytesForEncoder("UTF-8", new byte[] { (byte) '?' });
+ }
}