diff options
Diffstat (limited to 'luni')
11 files changed, 194 insertions, 254 deletions
diff --git a/luni/src/main/java/com/ibm/icu4jni/charset/CharsetDecoderICU.java b/luni/src/main/java/com/ibm/icu4jni/charset/CharsetDecoderICU.java index 404dd6e..d1c9546 100644 --- a/luni/src/main/java/com/ibm/icu4jni/charset/CharsetDecoderICU.java +++ b/luni/src/main/java/com/ibm/icu4jni/charset/CharsetDecoderICU.java @@ -27,28 +27,26 @@ import java.nio.charset.CoderResult; import java.nio.charset.CodingErrorAction; import java.nio.ByteBuffer; -public final class CharsetDecoderICU extends CharsetDecoder{ +public final class CharsetDecoderICU extends CharsetDecoder { private static final int MAX_CHARS_PER_BYTE = 2; - private static final int INPUT_OFFSET = 0, - OUTPUT_OFFSET = 1, - INVALID_BYTES = 2, - INPUT_HELD = 3, - LIMIT = 4; - /* data is 3 element array where - * data[INPUT_OFFSET] = on input contains the start of input and on output the number of input chars consumed - * data[OUTPUT_OFFSET] = on input contains the start of output and on output the number of output bytes written - * data[INVALID_CHARS] = number of invalid chars - * data[INPUT_HELD] = number of input chars held in the converter's state + private static final int INPUT_OFFSET = 0; + private static final int OUTPUT_OFFSET = 1; + private static final int INVALID_BYTES = 2; + private static final int INPUT_HELD = 3; + /* + * data[INPUT_OFFSET] = on input contains the start of input and on output the number of input bytes consumed + * data[OUTPUT_OFFSET] = on input contains the start of output and on output the number of output chars written + * data[INVALID_BYTES] = number of invalid bytes + * data[INPUT_HELD] = number of input bytes held in the converter's state */ - private int[] data = new int[LIMIT]; + private int[] data = new int[4]; /* handle to the ICU converter that is opened */ - private long converterHandle=0; + private long converterHandle = 0; - - private byte[] input = null; - private char[] output= null; + private byte[] input = null; + private char[] output= null; // BEGIN android-added private byte[] allocatedInput = null; @@ -81,7 +79,7 @@ public final class CharsetDecoderICU extends CharsetDecoder{ /** * Sets this decoders replacement string. Substitutes the string in input if an - * umappable or illegal sequence is encountered + * unmappable or illegal sequence is encountered * @param newReplacement to replace the error bytes with * @stable ICU 2.4 */ @@ -130,42 +128,35 @@ public final class CharsetDecoderICU extends CharsetDecoder{ * @stable ICU 2.4 */ protected final CoderResult implFlush(CharBuffer out) { - try{ - - data[OUTPUT_OFFSET] = getArray(out); - - ec=NativeConverter.flushByteToChar( + try { + data[OUTPUT_OFFSET] = getArray(out); + ec = NativeConverter.flushByteToChar( converterHandle, /* Handle to ICU Converter */ output, /* input array of chars */ outEnd, /* input index+1 to be written */ data /* contains data, inOff,outOff */ ); - /* If we don't have room for the output, throw an exception*/ if (ErrorCode.isFailure(ec)) { if (ec == ErrorCode.U_BUFFER_OVERFLOW_ERROR) { return CoderResult.OVERFLOW; - }else if (ec == ErrorCode.U_TRUNCATED_CHAR_FOUND ) {//CSDL: add this truncated character error handling - if(data[INPUT_OFFSET]>0){ + } else if (ec == ErrorCode.U_TRUNCATED_CHAR_FOUND) {//CSDL: add this truncated character error handling + if (data[INPUT_OFFSET] > 0) { return CoderResult.malformedForLength(data[INPUT_OFFSET]); } - }else { + } else { ErrorCode.getException(ec); } } return CoderResult.UNDERFLOW; - }finally{ + } finally { /* save the flushed data */ setPosition(out); implReset(); } } - /** - * Resets the to Unicode mode of converter - * @stable ICU 2.4 - */ protected void implReset() { NativeConverter.resetByteToChar(converterHandle); data[INPUT_OFFSET] = 0; @@ -175,6 +166,11 @@ public final class CharsetDecoderICU extends CharsetDecoder{ savedInputHeldLen = 0; output = null; input = null; + allocatedInput = null; + allocatedOutput = null; + ec = 0; + inEnd = 0; + outEnd = 0; } /** @@ -194,9 +190,8 @@ public final class CharsetDecoderICU extends CharsetDecoder{ * action succeeds or more input is needed for completing the decoding action. * @stable ICU 2.4 */ - protected CoderResult decodeLoop(ByteBuffer in,CharBuffer out){ - - if(!in.hasRemaining()){ + protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out){ + if (!in.hasRemaining()){ return CoderResult.UNDERFLOW; } @@ -205,29 +200,27 @@ public final class CharsetDecoderICU extends CharsetDecoder{ data[INPUT_HELD] = 0; try{ - /* do the conversion */ - ec=NativeConverter.decode( + ec = NativeConverter.decode( converterHandle, /* Handle to ICU Converter */ input, /* input array of bytes */ inEnd, /* last index+1 to be converted */ output, /* input array of chars */ outEnd, /* input index+1 to be written */ data, /* contains data, inOff,outOff */ - false /* donot flush the data */ + false /* don't flush the data */ ); - - /* return an error*/ - if(ec == ErrorCode.U_BUFFER_OVERFLOW_ERROR){ + // Return an error. + if (ec == ErrorCode.U_BUFFER_OVERFLOW_ERROR) { return CoderResult.OVERFLOW; - }else if(ec==ErrorCode.U_INVALID_CHAR_FOUND){ - return CoderResult.malformedForLength(data[INVALID_BYTES]); - }else if(ec==ErrorCode.U_ILLEGAL_CHAR_FOUND){ + } else if (ec == ErrorCode.U_INVALID_CHAR_FOUND) { + return CoderResult.unmappableForLength(data[INVALID_BYTES]); + } else if (ec == ErrorCode.U_ILLEGAL_CHAR_FOUND) { return CoderResult.malformedForLength(data[INVALID_BYTES]); } - /* decoding action succeded */ + // Decoding succeeded: give us more data. return CoderResult.UNDERFLOW; - }finally{ + } finally { setPosition(in); setPosition(out); } @@ -237,7 +230,7 @@ public final class CharsetDecoderICU extends CharsetDecoder{ * Releases the system resources by cleanly closing ICU converter opened * @stable ICU 2.4 */ - protected void finalize()throws Throwable{ + protected void finalize() throws Throwable{ NativeConverter.closeConverter(converterHandle); super.finalize(); converterHandle = 0; @@ -248,13 +241,13 @@ public final class CharsetDecoderICU extends CharsetDecoder{ //------------------------------------------ private final int getArray(CharBuffer out){ - if(out.hasArray()){ + if (out.hasArray()) { // BEGIN android-changed: take arrayOffset into account output = out.array(); outEnd = out.arrayOffset() + out.limit(); return out.arrayOffset() + out.position(); // END android-changed - }else{ + } else { outEnd = out.remaining(); // BEGIN android-added if (allocatedOutput == null || (outEnd > allocatedOutput.length)) { @@ -267,16 +260,16 @@ public final class CharsetDecoderICU extends CharsetDecoder{ // is 0 return 0; } - } + private final int getArray(ByteBuffer in){ - if(in.hasArray()){ + if (in.hasArray()) { // BEGIN android-changed: take arrayOffset into account input = in.array(); inEnd = in.arrayOffset() + in.limit(); return in.arrayOffset() + in.position() + savedInputHeldLen;/*exclude the number fo bytes held in previous conversion*/ // END android-changed - }else{ + } else { inEnd = in.remaining(); // BEGIN android-added if (allocatedInput == null || (inEnd > allocatedInput.length)) { @@ -294,30 +287,24 @@ public final class CharsetDecoderICU extends CharsetDecoder{ // is whatever is savedInputLen return savedInputHeldLen; } - } - private final void setPosition(CharBuffer out){ - if(out.hasArray()){ - // BEGIN android-changed: take arrayOffset into account + + private final void setPosition(CharBuffer out) { + if (out.hasArray()) { out.position(out.position() + data[OUTPUT_OFFSET] - out.arrayOffset()); - // END android-changed - }else{ - out.put(output,0,data[OUTPUT_OFFSET]); + } else { + out.put(output, 0, data[OUTPUT_OFFSET]); } - // BEGIN android-added // release reference to output array, which may not be ours output = null; - // END android-added } - private final void setPosition(ByteBuffer in){ + private final void setPosition(ByteBuffer in) { // ok was there input held in the previous invocation of decodeLoop // that resulted in output in this invocation? - // BEGIN android-changed in.position(in.position() + data[INPUT_OFFSET] + savedInputHeldLen - data[INPUT_HELD]); savedInputHeldLen = data[INPUT_HELD]; // release reference to input array, which may not be ours input = null; - // END android-changed } } diff --git a/luni/src/main/java/com/ibm/icu4jni/charset/CharsetEncoderICU.java b/luni/src/main/java/com/ibm/icu4jni/charset/CharsetEncoderICU.java index bef1862..55f883b 100644 --- a/luni/src/main/java/com/ibm/icu4jni/charset/CharsetEncoderICU.java +++ b/luni/src/main/java/com/ibm/icu4jni/charset/CharsetEncoderICU.java @@ -28,18 +28,17 @@ import com.ibm.icu4jni.common.ErrorCode; public final class CharsetEncoderICU extends CharsetEncoder { - private static final int INPUT_OFFSET = 0, - OUTPUT_OFFSET = 1, - INVALID_CHARS = 2, - INPUT_HELD = 3, - LIMIT = 4; - /* data is 3 element array where + private static final int INPUT_OFFSET = 0; + private static final int OUTPUT_OFFSET = 1; + private static final int INVALID_CHARS = 2; + private static final int INPUT_HELD = 3; + /* * data[INPUT_OFFSET] = on input contains the start of input and on output the number of input chars consumed * data[OUTPUT_OFFSET] = on input contains the start of output and on output the number of output bytes written * data[INVALID_CHARS] = number of invalid chars * data[INPUT_HELD] = number of input chars held in the converter's state */ - private int[] data = new int[LIMIT]; + private int[] data = new int[4]; /* handle to the ICU converter that is opened */ private long converterHandle=0; @@ -63,7 +62,7 @@ public final class CharsetEncoderICU extends CharsetEncoder { private int savedInputHeldLen; /** - * Construcs a new encoder for the given charset + * Constructs a new encoder for the given charset * @param cs for which the decoder is created * @param cHandle the address of ICU converter * @param replacement the substitution bytes @@ -139,11 +138,11 @@ public final class CharsetEncoderICU extends CharsetEncoder { if (ErrorCode.isFailure(ec)) { if (ec == ErrorCode.U_BUFFER_OVERFLOW_ERROR) { return CoderResult.OVERFLOW; - }else if (ec == ErrorCode.U_TRUNCATED_CHAR_FOUND) {//CSDL: add this truncated character error handling - if(data[INPUT_OFFSET]>0){ + } else if (ec == ErrorCode.U_TRUNCATED_CHAR_FOUND) {//CSDL: add this truncated character error handling + if (data[INPUT_OFFSET] > 0) { return CoderResult.malformedForLength(data[INPUT_OFFSET]); } - }else { + } else { ErrorCode.getException(ec); } } @@ -178,7 +177,6 @@ public final class CharsetEncoderICU extends CharsetEncoder { * @stable ICU 2.4 */ protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) { - if (!in.hasRemaining()) { return CoderResult.UNDERFLOW; } diff --git a/luni/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java b/luni/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java index 71d2747..bf3cfe0 100644 --- a/luni/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java +++ b/luni/src/main/java/com/ibm/icu4jni/charset/CharsetICU.java @@ -16,76 +16,49 @@ import java.util.HashMap; import java.util.Map; public final class CharsetICU extends Charset { + private static final Map<String, byte[]> DEFAULT_REPLACEMENTS = new HashMap<String, byte[]>(); + static { + // ICU has different default replacements to the RI in these cases. There are probably + // more cases too, but this covers all the charsets that Java guarantees will be available. + // These use U+FFFD REPLACEMENT CHARACTER... + DEFAULT_REPLACEMENTS.put("UTF-16", new byte[] { (byte) 0xff, (byte) 0xfd }); + DEFAULT_REPLACEMENTS.put("UTF-32", new byte[] { (byte) 0x00, (byte) 0x00, (byte) 0xff, (byte) 0xfd }); + // These use '?'. It's odd that UTF-8 doesn't use U+FFFD, given that (unlike ISO-8859-1 + // and US-ASCII) it can represent it, but this is what the RI does... + byte[] questionMark = new byte[] { (byte) '?' }; + DEFAULT_REPLACEMENTS.put("UTF-8", questionMark); + DEFAULT_REPLACEMENTS.put("ISO-8859-1", questionMark); + DEFAULT_REPLACEMENTS.put("US-ASCII", questionMark); + } + private final String icuCanonicalName; - /** - * Constructor to create a the CharsetICU object - * @param canonicalName the canonical name as a string - * @param aliases the alias set as an array of strings - * @stable ICU 2.4 - */ + protected CharsetICU(String canonicalName, String icuCanonName, String[] aliases) { super(canonicalName, aliases); icuCanonicalName = icuCanonName; } - /** - * Returns a new decoder instance of this charset object - * @return a new decoder object - * @stable ICU 2.4 - */ + public CharsetDecoder newDecoder() { - long converterHandle = NativeConverter.openConverter(icuCanonicalName); - return new CharsetDecoderICU(this, converterHandle); + return new CharsetDecoderICU(this, NativeConverter.openConverter(icuCanonicalName)); } - // hardCoded list of replacement bytes - private static final Map<String, byte[]> subByteMap = new HashMap<String, byte[]>(); - static { - subByteMap.put("UTF-32", new byte[]{0x00, 0x00, (byte)0xfe, (byte)0xff}); - subByteMap.put("ibm-16684_P110-2003", new byte[]{0x40, 0x40}); // make \u3000 the sub char - subByteMap.put("ibm-971_P100-1995", new byte[]{(byte)0xa1, (byte)0xa1}); // make \u3000 the sub char - } - /** - * Returns a new encoder object of the charset - * @return a new encoder - * @stable ICU 2.4 - */ public CharsetEncoder newEncoder() { - // the arrays are locals and not - // instance variables since the - // methods on this class need to - // be thread safe long converterHandle = NativeConverter.openConverter(icuCanonicalName); - - //According to the contract all converters should have non-empty replacement - byte[] replacement = NativeConverter.getSubstitutionBytes(converterHandle); - - try { - return new CharsetEncoderICU(this,converterHandle, replacement); - } catch (IllegalArgumentException ex) { - // work around for the nonsensical check in the nio API that - // a substitution character must be mappable while decoding!! - replacement = subByteMap.get(icuCanonicalName); - if (replacement == null) { - replacement = new byte[NativeConverter.getMinBytesPerChar(converterHandle)]; - for(int i = 0; i < replacement.length; ++i) { - replacement[i]= 0x3f; - } - } - return new CharsetEncoderICU(this, converterHandle, replacement); + // We have our own map of RI-compatible default replacements... + byte[] replacement = DEFAULT_REPLACEMENTS.get(icuCanonicalName); + if (replacement == null) { + // ...but fall back to asking ICU. + // TODO: should we just try to use U+FFFD and fall back to '?' if U+FFFD can't be encoded? + replacement = NativeConverter.getSubstitutionBytes(converterHandle); + } else { + replacement = replacement.clone(); } + return new CharsetEncoderICU(this, converterHandle, replacement); } - /** - * Ascertains if a charset is a sub set of this charset - * @param cs charset to test - * @return true if the given charset is a subset of this charset - * @stable ICU 2.4 - * - * //CSDL: major changes by Jack - */ - public boolean contains(Charset cs){ - if (null == cs) { - return false; + public boolean contains(Charset cs) { + if (cs == null) { + return false; } else if (this.equals(cs)) { return true; } @@ -98,8 +71,7 @@ public final class CharsetICU extends Charset { if (converterHandle1 > 0) { converterHandle2 = NativeConverter.openConverter(cs.name()); if (converterHandle2 > 0) { - return NativeConverter.contains(converterHandle1, - converterHandle2); + return NativeConverter.contains(converterHandle1, converterHandle2); } } return false; diff --git a/luni/src/main/java/java/nio/charset/Charset.java b/luni/src/main/java/java/nio/charset/Charset.java index 4b265e5..494152e 100644 --- a/luni/src/main/java/java/nio/charset/Charset.java +++ b/luni/src/main/java/java/nio/charset/Charset.java @@ -45,16 +45,23 @@ import java.util.TreeMap; * sequence. It facilitates the encoding from a Unicode character sequence into * a byte sequence, and the decoding from a byte sequence into a Unicode * character sequence. - * <p> - * A charset has a canonical name, which is usually in uppercase. Typically it + * + * <p>A charset has a canonical name, which is usually in uppercase. Typically it * also has one or more aliases. The name string can only consist of the * following characters: '0' - '9', 'A' - 'Z', 'a' - 'z', '.', ':'. '-' and '_'. * The first character of the name must be a digit or a letter. - * <p> - * The following charsets should be supported by any java platform: US-ASCII, - * ISO-8859-1, UTF-8, UTF-16BE, UTF-16LE, UTF-16. - * <p> - * Additional charsets can be made available by configuring one or more charset + * + * <p>The following charsets must be available on every Java implementation: + * <ul> + * <li>ISO-8859-1 + * <li>US-ASCII + * <li>UTF-16 + * <li>UTF-16BE + * <li>UTF-16LE + * <li>UTF-8 + * </ul> + * + * <p>Additional charsets can be made available by configuring one or more charset * providers through provider configuration files. Such files are always named * as "java.nio.charset.spi.CharsetProvider" and located in the * "META-INF/services" sub folder of one or more classpaths. The files should be @@ -72,7 +79,6 @@ import java.util.TreeMap; * @see java.nio.charset.spi.CharsetProvider */ public abstract class Charset implements Comparable<Charset> { - /* * The name of configuration files where charset provider class names can be * specified. diff --git a/luni/src/main/java/java/nio/charset/CharsetDecoder.java b/luni/src/main/java/java/nio/charset/CharsetDecoder.java index d2c9871..f4c4558 100644 --- a/luni/src/main/java/java/nio/charset/CharsetDecoder.java +++ b/luni/src/main/java/java/nio/charset/CharsetDecoder.java @@ -360,10 +360,6 @@ public abstract class CharsetDecoder { status = endOfInput ? END : ONGOING; if (endOfInput && remaining > 0) { result = CoderResult.malformedForLength(remaining); - // BEGIN android-added - // needed to adjust for the changed call to position() below - in.position(in.position() + result.length()); - // END android-added } else { return result; } @@ -386,14 +382,7 @@ public abstract class CharsetDecoder { if (action != CodingErrorAction.IGNORE) return result; } - // BEGIN android-changed - // the condition is removed in Harmony revision 518047. However, - // making the conditional statement unconditional leads to - // misbehavior when using REPLACE on malformedInput. - if (!result.isMalformed()) { - in.position(in.position() + result.length()); - } - // END android-changed + in.position(in.position() + result.length()); } } diff --git a/luni/src/main/java/java/nio/charset/CharsetEncoder.java b/luni/src/main/java/java/nio/charset/CharsetEncoder.java index 7375e14..abbe04a 100644 --- a/luni/src/main/java/java/nio/charset/CharsetEncoder.java +++ b/luni/src/main/java/java/nio/charset/CharsetEncoder.java @@ -121,21 +121,8 @@ public abstract class CharsetEncoder { private CharsetDecoder decoder; /** - * Constructs a new <code>CharsetEncoder</code> using the given - * <code>Charset</code>, average number and maximum number of bytes - * created by this encoder for one input character. - * - * @param cs - * the <code>Charset</code> to be used by this encoder. - * @param averageBytesPerChar - * average number of bytes created by this encoder for one input - * character, must be positive. - * @param maxBytesPerChar - * maximum number of bytes which can be created by this encoder - * for one input character, must be positive. - * @throws IllegalArgumentException - * if <code>maxBytesPerChar</code> or - * <code>averageBytesPerChar</code> is negative. + * Constructs a new {@code CharsetEncoder} using the given parameters and + * the replacement byte array {@code { (byte) '?' }}. */ protected CharsetEncoder(Charset cs, float averageBytesPerChar, float maxBytesPerChar) { this(cs, averageBytesPerChar, maxBytesPerChar, new byte[] { (byte) '?' }); @@ -743,8 +730,9 @@ public abstract class CharsetEncoder { || !isLegalReplacement(replacement)) { throw new IllegalArgumentException("bad replacement: " + Arrays.toString(replacement)); } + // It seems like a bug, but the RI doesn't clone, and we have tests that check we don't. replace = replacement; - implReplaceWith(replacement); + implReplaceWith(replace); return this; } diff --git a/luni/src/main/java/java/nio/charset/CoderResult.java b/luni/src/main/java/java/nio/charset/CoderResult.java index 8458dfc..2653161 100644 --- a/luni/src/main/java/java/nio/charset/CoderResult.java +++ b/luni/src/main/java/java/nio/charset/CoderResult.java @@ -203,7 +203,7 @@ public class CoderResult { /** * Gets the length of the erroneous input. The length is only meaningful to - * a malformed-input error or an unmappble character error. + * a malformed-input error or an unmappable character error. * * @return the length, as an integer, of this object's erroneous input. * @throws UnsupportedOperationException diff --git a/luni/src/main/java/java/nio/charset/IllegalCharsetNameException.java b/luni/src/main/java/java/nio/charset/IllegalCharsetNameException.java index 72c04e5..85e7ee1 100644 --- a/luni/src/main/java/java/nio/charset/IllegalCharsetNameException.java +++ b/luni/src/main/java/java/nio/charset/IllegalCharsetNameException.java @@ -36,18 +36,18 @@ public class IllegalCharsetNameException extends IllegalArgumentException { * Constructs a new {@code IllegalCharsetNameException} with the supplied * charset name. * - * @param charset + * @param charsetName * the encountered illegal charset name. */ - public IllegalCharsetNameException(String charset) { - super(charset); - this.charsetName = charset; + public IllegalCharsetNameException(String charsetName) { + super((charsetName != null) ? charsetName : "null"); + this.charsetName = charsetName; } /** * Returns the encountered illegal charset name. */ public String getCharsetName() { - return this.charsetName; + return charsetName; } } diff --git a/luni/src/main/java/java/nio/charset/UnsupportedCharsetException.java b/luni/src/main/java/java/nio/charset/UnsupportedCharsetException.java index d8b5f3e..ed3bfaa 100644 --- a/luni/src/main/java/java/nio/charset/UnsupportedCharsetException.java +++ b/luni/src/main/java/java/nio/charset/UnsupportedCharsetException.java @@ -36,12 +36,12 @@ public class UnsupportedCharsetException extends IllegalArgumentException { * Constructs a new {@code UnsupportedCharsetException} with the supplied * charset name. * - * @param charset + * @param charsetName * the encountered unsupported charset name. */ - public UnsupportedCharsetException(String charset) { - super(charset); - this.charsetName = charset; + public UnsupportedCharsetException(String charsetName) { + super((charsetName != null) ? charsetName : "null"); + this.charsetName = charsetName; } /** @@ -50,6 +50,6 @@ public class UnsupportedCharsetException extends IllegalArgumentException { * @return the encountered unsupported charset name. */ public String getCharsetName() { - return this.charsetName; + return charsetName; } } diff --git a/luni/src/main/native/NativeConverter.cpp b/luni/src/main/native/NativeConverter.cpp index 4afdaf2..2c65e74 100644 --- a/luni/src/main/native/NativeConverter.cpp +++ b/luni/src/main/native/NativeConverter.cpp @@ -32,9 +32,9 @@ #include <stdlib.h> #include <string.h> -#define com_ibm_icu4jni_converters_NativeConverter_STOP_CALLBACK 0L -#define com_ibm_icu4jni_converters_NativeConverter_SKIP_CALLBACK 1L -#define com_ibm_icu4jni_converters_NativeConverter_SUBSTITUTE_CALLBACK 2L +#define NativeConverter_REPORT 0 +#define NativeConverter_IGNORE 1 +#define NativeConverter_REPLACE 2 struct DecoderCallbackContext { int length; @@ -93,32 +93,31 @@ static void closeConverter(JNIEnv*, jclass, jlong handle) { * @param data buffer to recieve state of the current conversion * @param flush boolean that specifies end of source input */ -static jint convertCharToByte(JNIEnv* env, jclass, jlong handle, jcharArray source, jint sourceEnd, jbyteArray target, jint targetEnd, jintArray data, jboolean flush) { +static UErrorCode convertCharsToBytes(JNIEnv* env, jclass, jlong handle, jcharArray source, jint sourceEnd, jbyteArray target, jint targetEnd, jintArray data, jboolean flush) { UConverter* cnv = (UConverter*)handle; if (!cnv) { return U_ILLEGAL_ARGUMENT_ERROR; } + ScopedCharArray uSource(env, source); UErrorCode errorCode = U_ZERO_ERROR; - jint* myData = (jint*) env->GetPrimitiveArrayCritical(data,NULL); - if(myData) { + jint* myData = (jint*) env->GetPrimitiveArrayCritical(data, NULL); + if (myData) { jint* sourceOffset = &myData[0]; jint* targetOffset = &myData[1]; - const jchar* uSource =(jchar*) env->GetPrimitiveArrayCritical(source, NULL); - if(uSource) { + if (uSource.get() != NULL) { jbyte* uTarget=(jbyte*) env->GetPrimitiveArrayCritical(target,NULL); if(uTarget) { - const jchar* mySource = uSource+ *sourceOffset; - const UChar* mySourceLimit= uSource+sourceEnd; + const jchar* mySource = uSource.get() + *sourceOffset; + const UChar* mySourceLimit= uSource.get() + sourceEnd; char* cTarget = reinterpret_cast<char*>(uTarget+ *targetOffset); const char* cTargetLimit = reinterpret_cast<const char*>(uTarget+targetEnd); ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&mySource, mySourceLimit,NULL,(UBool) flush, &errorCode); - *sourceOffset = (jint) (mySource - uSource)-*sourceOffset; + *sourceOffset = (jint) (mySource - uSource.get())-*sourceOffset; *targetOffset = (jint) ((jbyte*)cTarget - uTarget)- *targetOffset; if(U_FAILURE(errorCode)) { env->ReleasePrimitiveArrayCritical(target,uTarget,0); - env->ReleasePrimitiveArrayCritical(source,(jchar*)uSource,0); env->ReleasePrimitiveArrayCritical(data,(jint*)myData,0); return errorCode; } @@ -129,7 +128,6 @@ static jint convertCharToByte(JNIEnv* env, jclass, jlong handle, jcharArray sou }else{ errorCode = U_ILLEGAL_ARGUMENT_ERROR; } - env->ReleasePrimitiveArrayCritical(source,(jchar*)uSource,0); }else{ errorCode = U_ILLEGAL_ARGUMENT_ERROR; } @@ -138,27 +136,23 @@ static jint convertCharToByte(JNIEnv* env, jclass, jlong handle, jcharArray sou } static jint encode(JNIEnv* env, jclass, jlong handle, jcharArray source, jint sourceEnd, jbyteArray target, jint targetEnd, jintArray data, jboolean flush) { - - UErrorCode ec = UErrorCode(convertCharToByte(env, NULL,handle,source,sourceEnd, target,targetEnd,data,flush)); + UErrorCode ec = convertCharsToBytes(env, NULL, handle, source, sourceEnd, target, targetEnd, data, flush); UConverter* cnv = (UConverter*)handle; - jint* myData = (jint*) env->GetPrimitiveArrayCritical(data,NULL); - - if(cnv && myData) { - - UErrorCode errorCode = U_ZERO_ERROR; - myData[3] = ucnv_fromUCountPending(cnv, &errorCode); + if (cnv) { + UErrorCode errorCode = U_ZERO_ERROR; + jint count = ucnv_fromUCountPending(cnv, &errorCode); + env->SetIntArrayRegion(data, 3, 1, &count); - if(ec == U_ILLEGAL_CHAR_FOUND || ec == U_INVALID_CHAR_FOUND) { - int8_t count =32; + if (ec == U_ILLEGAL_CHAR_FOUND || ec == U_INVALID_CHAR_FOUND) { + int8_t len = 32; UChar invalidUChars[32]; - ucnv_getInvalidUChars(cnv,invalidUChars,&count,&errorCode); - - if(U_SUCCESS(errorCode)) { - myData[2] = count; + ucnv_getInvalidUChars(cnv, invalidUChars, &len, &errorCode); + if (U_SUCCESS(errorCode)) { + jint value = len; + env->SetIntArrayRegion(data, 2, 1, &value); } } } - env->ReleasePrimitiveArrayCritical(data,(jint*)myData,0); return ec; } @@ -174,33 +168,31 @@ static jint encode(JNIEnv* env, jclass, jlong handle, jcharArray source, jint so * @param data buffer to recieve state of the current conversion * @param flush boolean that specifies end of source input */ -static jint convertByteToChar(JNIEnv* env, jclass, jlong handle, jbyteArray source, jint sourceEnd, jcharArray target, jint targetEnd, jintArray data, jboolean flush) { - +static UErrorCode convertBytesToChars(JNIEnv* env, jclass, jlong handle, jbyteArray source, jint sourceEnd, jcharArray target, jint targetEnd, jintArray data, jboolean flush) { UErrorCode errorCode =U_ZERO_ERROR; UConverter* cnv = (UConverter*)handle; - if(cnv) { + if (cnv) { + ScopedByteArray uSource(env, source); jint* myData = (jint*) env->GetPrimitiveArrayCritical(data,NULL); if(myData) { jint* sourceOffset = &myData[0]; jint* targetOffset = &myData[1]; - const jbyte* uSource =(jbyte*) env->GetPrimitiveArrayCritical(source, NULL); - if(uSource) { + if (uSource.get() != NULL) { jchar* uTarget=(jchar*) env->GetPrimitiveArrayCritical(target,NULL); if(uTarget) { - const jbyte* mySource = uSource+ *sourceOffset; - const char* mySourceLimit = reinterpret_cast<const char*>(uSource+sourceEnd); + const jbyte* mySource = uSource.get() + *sourceOffset; + const char* mySourceLimit = reinterpret_cast<const char*>(uSource.get() + sourceEnd); UChar* cTarget=uTarget+ *targetOffset; const UChar* cTargetLimit=uTarget+targetEnd; ucnv_toUnicode( cnv , &cTarget, cTargetLimit,(const char**)&mySource, mySourceLimit,NULL,(UBool) flush, &errorCode); - *sourceOffset = mySource - uSource - *sourceOffset ; + *sourceOffset = mySource - uSource.get() - *sourceOffset; *targetOffset = cTarget - uTarget - *targetOffset; if(U_FAILURE(errorCode)) { env->ReleasePrimitiveArrayCritical(target,uTarget,0); - env->ReleasePrimitiveArrayCritical(source,(jchar*)uSource,0); env->ReleasePrimitiveArrayCritical(data,(jint*)myData,0); return errorCode; } @@ -211,7 +203,6 @@ static jint convertByteToChar(JNIEnv* env, jclass, jlong handle, jbyteArray sour }else{ errorCode = U_ILLEGAL_ARGUMENT_ERROR; } - env->ReleasePrimitiveArrayCritical(source,(jchar*)uSource,0); }else{ errorCode = U_ILLEGAL_ARGUMENT_ERROR; } @@ -223,27 +214,23 @@ static jint convertByteToChar(JNIEnv* env, jclass, jlong handle, jbyteArray sour } static jint decode(JNIEnv* env, jclass, jlong handle, jbyteArray source, jint sourceEnd, jcharArray target, jint targetEnd, jintArray data, jboolean flush) { - - jint ec = convertByteToChar(env, NULL,handle,source,sourceEnd, target,targetEnd,data,flush); - - jint* myData = (jint*) env->GetPrimitiveArrayCritical(data,NULL); + UErrorCode ec = convertBytesToChars(env, NULL, handle, source, sourceEnd, target, targetEnd, data, flush); UConverter* cnv = (UConverter*)handle; - - if(myData && cnv) { + if (cnv) { UErrorCode errorCode = U_ZERO_ERROR; - myData[3] = ucnv_toUCountPending(cnv, &errorCode); + jint count = ucnv_toUCountPending(cnv, &errorCode); + env->SetIntArrayRegion(data, 3, 1, &count); - if(ec == U_ILLEGAL_CHAR_FOUND || ec == U_INVALID_CHAR_FOUND ) { - char invalidChars[32] = {'\0'}; + if (ec == U_ILLEGAL_CHAR_FOUND || ec == U_INVALID_CHAR_FOUND) { int8_t len = 32; - ucnv_getInvalidChars(cnv,invalidChars,&len,&errorCode); - - if(U_SUCCESS(errorCode)) { - myData[2] = len; + char invalidChars[32] = {'\0'}; + ucnv_getInvalidChars(cnv, invalidChars, &len, &errorCode); + if (U_SUCCESS(errorCode)) { + jint value = len; + env->SetIntArrayRegion(data, 2, 1, &value); } } } - env->ReleasePrimitiveArrayCritical(data,(jint*)myData,0); return ec; } @@ -273,16 +260,10 @@ static jint getMinBytesPerChar(JNIEnv*, jclass, jlong handle) { static jfloat getAveBytesPerChar(JNIEnv*, jclass, jlong handle) { UConverter* cnv = (UConverter*)handle; - if (cnv) { - jfloat max = (jfloat)ucnv_getMaxCharSize(cnv); - jfloat min = (jfloat)ucnv_getMinCharSize(cnv); - return (jfloat) ( (max+min)/2 ); - } - return -1; + return (cnv != NULL) ? ((ucnv_getMaxCharSize(cnv) + ucnv_getMinCharSize(cnv)) / 2.0) : -1; } static jint flushByteToChar(JNIEnv* env, jclass,jlong handle, jcharArray target, jint targetEnd, jintArray data) { - UErrorCode errorCode =U_ZERO_ERROR; UConverter* cnv = (UConverter*)handle; if(cnv) { @@ -522,7 +503,7 @@ static void CHARSET_ENCODER_CALLBACK(const void* rawContext, UConverterFromUnico } } -static void JNI_FROM_U_CALLBACK_SUBSTITUTE_ENCODER(const void* rawContext, +static void encoderReplaceCallback(const void* rawContext, UConverterFromUnicodeArgs *fromArgs, const UChar*, int32_t, UChar32, UConverterCallbackReason, UErrorCode * err) { if (rawContext == NULL) { @@ -535,12 +516,12 @@ static void JNI_FROM_U_CALLBACK_SUBSTITUTE_ENCODER(const void* rawContext, static UConverterFromUCallback getFromUCallback(int32_t mode) { switch(mode) { - case com_ibm_icu4jni_converters_NativeConverter_STOP_CALLBACK: + case NativeConverter_REPORT: return UCNV_FROM_U_CALLBACK_STOP; - case com_ibm_icu4jni_converters_NativeConverter_SKIP_CALLBACK: + case NativeConverter_IGNORE: return UCNV_FROM_U_CALLBACK_SKIP; - case com_ibm_icu4jni_converters_NativeConverter_SUBSTITUTE_CALLBACK: - return JNI_FROM_U_CALLBACK_SUBSTITUTE_ENCODER; + case NativeConverter_REPLACE: + return encoderReplaceCallback; } abort(); } @@ -576,13 +557,19 @@ static jint setCallbackEncode(JNIEnv* env, jclass, jlong handle, jint onMalforme return U_ILLEGAL_ARGUMENT_ERROR; } fromUNewContext->length = sub.size(); - strncpy(fromUNewContext->subBytes, reinterpret_cast<const char*>(sub.get()), sub.size()); + memcpy(fromUNewContext->subBytes, sub.get(), sub.size()); UErrorCode errorCode = U_ZERO_ERROR; ucnv_setFromUCallBack(conv, fromUNewAction, fromUNewContext, &fromUOldAction, (const void**)&fromUOldContext, &errorCode); return errorCode; } -static void JNI_TO_U_CALLBACK_SUBSTITUTE_DECODER(const void* rawContext, +static void decoderIgnoreCallback(const void*, UConverterToUnicodeArgs*, const char*, int32_t, UConverterCallbackReason, UErrorCode* err) { + // The icu4c UCNV_FROM_U_CALLBACK_SKIP callback requires that the context is NULL, which is + // never true for us. + *err = U_ZERO_ERROR; +} + +static void decoderReplaceCallback(const void* rawContext, UConverterToUnicodeArgs* toArgs, const char*, int32_t, UConverterCallbackReason, UErrorCode* err) { if (!rawContext) { @@ -590,17 +577,14 @@ static void JNI_TO_U_CALLBACK_SUBSTITUTE_DECODER(const void* rawContext, } const DecoderCallbackContext* context = reinterpret_cast<const DecoderCallbackContext*>(rawContext); *err = U_ZERO_ERROR; - ucnv_cbToUWriteUChars(toArgs,context->subUChars ,context->length , 0, err); + ucnv_cbToUWriteUChars(toArgs,context->subUChars, context->length, 0, err); } static UConverterToUCallback getToUCallback(int32_t mode) { switch (mode) { - case com_ibm_icu4jni_converters_NativeConverter_STOP_CALLBACK: - return UCNV_TO_U_CALLBACK_STOP; - case com_ibm_icu4jni_converters_NativeConverter_SKIP_CALLBACK: - return UCNV_TO_U_CALLBACK_SKIP; - case com_ibm_icu4jni_converters_NativeConverter_SUBSTITUTE_CALLBACK: - return JNI_TO_U_CALLBACK_SUBSTITUTE_DECODER; + case NativeConverter_IGNORE: return decoderIgnoreCallback; + case NativeConverter_REPLACE: return decoderReplaceCallback; + case NativeConverter_REPORT: return UCNV_TO_U_CALLBACK_STOP; } abort(); } diff --git a/luni/src/test/java/java/nio/charset/CharsetEncoderTest.java b/luni/src/test/java/java/nio/charset/CharsetEncoderTest.java index 4354cec..6ba9327 100644 --- a/luni/src/test/java/java/nio/charset/CharsetEncoderTest.java +++ b/luni/src/test/java/java/nio/charset/CharsetEncoderTest.java @@ -18,6 +18,7 @@ package java.nio.charset; import java.nio.ByteBuffer; import java.nio.CharBuffer; +import java.util.Arrays; public class CharsetEncoderTest extends junit.framework.TestCase { // None of the harmony or jtreg tests actually check that replaceWith does the right thing! @@ -31,4 +32,19 @@ public class CharsetEncoderTest extends junit.framework.TestCase { String output = ascii.decode(e.encode(CharBuffer.wrap(input))).toString(); assertEquals("hello=world", output); } + + private void assertReplacementBytesForEncoder(String charset, byte[] bytes) { + byte[] result = Charset.forName(charset).newEncoder().replacement(); + assertEquals(Arrays.toString(bytes), Arrays.toString(result)); + } + + // For all the guaranteed built-in charsets, check that we have the right default replacements. + public void test_defaultReplacementBytes() throws Exception { + assertReplacementBytesForEncoder("ISO-8859-1", new byte[] { (byte) '?' }); + assertReplacementBytesForEncoder("US-ASCII", new byte[] { (byte) '?' }); + assertReplacementBytesForEncoder("UTF-16", new byte[] { (byte) 0xff, (byte) 0xfd }); + assertReplacementBytesForEncoder("UTF-16BE", new byte[] { (byte) 0xff, (byte) 0xfd }); + assertReplacementBytesForEncoder("UTF-16LE", new byte[] { (byte) 0xfd, (byte) 0xff }); + assertReplacementBytesForEncoder("UTF-8", new byte[] { (byte) '?' }); + } } |