diff options
23 files changed, 929 insertions, 896 deletions
diff --git a/JavaLibrary.mk b/JavaLibrary.mk index 0602a5b..a51f233 100644 --- a/JavaLibrary.mk +++ b/JavaLibrary.mk @@ -183,6 +183,17 @@ LOCAL_MODULE_TAGS := tests LOCAL_MODULE := core-tests-xml include $(BUILD_JAVA_LIBRARY) +# also build support as a static library for use by frameworks/base HTTPS tests +include $(CLEAR_VARS) +LOCAL_SRC_FILES := $(call all-test-java-files-under,support) +LOCAL_JAVA_RESOURCE_DIRS := $(test_resource_dirs) +LOCAL_NO_STANDARD_LIBRARIES := true +LOCAL_JAVA_LIBRARIES := core core-junit core-junitrunner +LOCAL_DX_FLAGS := --core-library +LOCAL_MODULE_TAGS := tests +LOCAL_MODULE := core-tests-supportlib +include $(BUILD_STATIC_JAVA_LIBRARY) + # This one's tricky. One of our tests needs to have a # resource with a "#" in its name, but Perforce doesn't # allow us to submit such a file. So we create it here diff --git a/dalvik/src/main/java/dalvik/system/BlockGuard.java b/dalvik/src/main/java/dalvik/system/BlockGuard.java index fb9082e..6279e64 100644 --- a/dalvik/src/main/java/dalvik/system/BlockGuard.java +++ b/dalvik/src/main/java/dalvik/system/BlockGuard.java @@ -70,7 +70,7 @@ public final class BlockGuard { int getPolicyMask(); } - public final class BlockGuardPolicyException extends RuntimeException { + public static final class BlockGuardPolicyException extends RuntimeException { // bitmask of DISALLOW_*, PENALTY_*, etc flags public final int mPolicyState; public final int mPolicyViolated; @@ -80,6 +80,14 @@ public final class BlockGuard { mPolicyViolated = policyViolated; } + public int getPolicy() { + return mPolicyState; + } + + public int getPolicyViolation() { + return mPolicyViolated; + } + // TODO: toString() and stringify the bitmasks above } diff --git a/luni/src/main/java/com/ibm/icu4jni/util/LocaleData.java b/luni/src/main/java/com/ibm/icu4jni/util/LocaleData.java index e27bd54..67112b6 100644 --- a/luni/src/main/java/com/ibm/icu4jni/util/LocaleData.java +++ b/luni/src/main/java/com/ibm/icu4jni/util/LocaleData.java @@ -32,18 +32,23 @@ public final class LocaleData { // A cache for the locale-specific data. private static final HashMap<String, LocaleData> localeDataCache = new HashMap<String, LocaleData>(); + // Used by Calendar. public Integer firstDayOfWeek; public Integer minimalDaysInFirstWeek; + // Used by DateFormatSymbols. public String[] amPm; - public String[] eras; public String[] longMonthNames; public String[] shortMonthNames; + public String[] longStandAloneMonthNames; + public String[] shortStandAloneMonthNames; public String[] longWeekdayNames; public String[] shortWeekdayNames; + public String[] longStandAloneWeekdayNames; + public String[] shortStandAloneWeekdayNames; public String fullTimeFormat; public String longTimeFormat; @@ -55,7 +60,7 @@ public final class LocaleData { public String mediumDateFormat; public String shortDateFormat; - // DecimalFormatSymbols. + // Used by DecimalFormatSymbols. public char zeroDigit; public char digit; public char decimalSeparator; @@ -68,10 +73,11 @@ public final class LocaleData { public String exponentSeparator; public String infinity; public String NaN; - + // Also used by Currency. public String currencySymbol; public String internationalCurrencySymbol; + // Used by DecimalFormat and NumberFormat. public String numberPattern; public String integerPattern; public String currencyPattern; @@ -131,8 +137,12 @@ public final class LocaleData { "eras=" + Arrays.toString(eras) + "," + "longMonthNames=" + Arrays.toString(longMonthNames) + "," + "shortMonthNames=" + Arrays.toString(shortMonthNames) + "," + + "longStandAloneMonthNames=" + Arrays.toString(longStandAloneMonthNames) + "," + + "shortStandAloneMonthNames=" + Arrays.toString(shortStandAloneMonthNames) + "," + "longWeekdayNames=" + Arrays.toString(longWeekdayNames) + "," + "shortWeekdayNames=" + Arrays.toString(shortWeekdayNames) + "," + + "longStandAloneWeekdayNames=" + Arrays.toString(longStandAloneWeekdayNames) + "," + + "shortStandAloneWeekdayNames=" + Arrays.toString(shortStandAloneWeekdayNames) + "," + "fullTimeFormat=" + fullTimeFormat + "," + "longTimeFormat=" + longTimeFormat + "," + "mediumTimeFormat=" + mediumTimeFormat + "," + @@ -180,12 +190,24 @@ public final class LocaleData { if (overrides.shortMonthNames != null) { shortMonthNames = overrides.shortMonthNames; } + if (overrides.longStandAloneMonthNames != null) { + longStandAloneMonthNames = overrides.longStandAloneMonthNames; + } + if (overrides.shortStandAloneMonthNames != null) { + shortStandAloneMonthNames = overrides.shortStandAloneMonthNames; + } if (overrides.longWeekdayNames != null) { longWeekdayNames = overrides.longWeekdayNames; } if (overrides.shortWeekdayNames != null) { shortWeekdayNames = overrides.shortWeekdayNames; } + if (overrides.longStandAloneWeekdayNames != null) { + longStandAloneWeekdayNames = overrides.longStandAloneWeekdayNames; + } + if (overrides.shortStandAloneWeekdayNames != null) { + shortStandAloneWeekdayNames = overrides.shortStandAloneWeekdayNames; + } if (overrides.fullTimeFormat != null) { fullTimeFormat = overrides.fullTimeFormat; } diff --git a/luni/src/main/java/java/io/File.java b/luni/src/main/java/java/io/File.java index 4d1e637..57f6609 100644 --- a/luni/src/main/java/java/io/File.java +++ b/luni/src/main/java/java/io/File.java @@ -67,10 +67,7 @@ public class File implements Serializable, Comparable<File> { private static final long serialVersionUID = 301077366599181567L; - private static final String EMPTY_STRING = ""; - - // Caches the UTF-8 Charset for newCString. - private static final Charset UTF8 = Charset.forName("UTF-8"); + private static final Charset UTF_8 = Charset.forName("UTF-8"); /** * The system dependent file separator character. @@ -210,7 +207,7 @@ public class File implements Serializable, Comparable<File> { } private byte[] newCString(String s) { - ByteBuffer buffer = UTF8.encode(s); + ByteBuffer buffer = UTF_8.encode(s); // Add a trailing NUL, because this byte[] is going to be used as a char*. int byteCount = buffer.limit() + 1; byte[] bytes = new byte[byteCount]; @@ -471,7 +468,7 @@ public class File implements Serializable, Comparable<File> { * @return the absolute file path. */ public String getAbsolutePath() { - return Util.toUTF8String(pathBytes, 0, pathBytes.length - 1); + return new String(pathBytes, 0, pathBytes.length - 1, UTF_8); } /** @@ -583,13 +580,7 @@ public class File implements Serializable, Comparable<File> { newResult[newLength] = 0; newResult = getCanonImpl(newResult); newLength = newResult.length; - - // BEGIN android-changed - // caching the canonical path is completely bogus - return Util.toUTF8String(newResult, 0, newLength); - // FileCanonPathCache.put(absPath, canonPath); - // return canonPath; - // END android-changed + return new String(newResult, 0, newLength, UTF_8); } /* @@ -1477,13 +1468,12 @@ public class File implements Serializable, Comparable<File> { String name = getAbsoluteName(); if (!name.startsWith("/")) { // start with sep. - return new URL( - "file", EMPTY_STRING, -1, new StringBuilder(name.length() + 1) - .append('/').append(name).toString(), null); + return new URL("file", "", -1, + new StringBuilder(name.length() + 1).append('/').append(name).toString(), null); } else if (name.startsWith("//")) { return new URL("file:" + name); // UNC path } - return new URL("file", EMPTY_STRING, -1, name, null); + return new URL("file", "", -1, name, null); } private String getAbsoluteName() { diff --git a/luni/src/main/java/java/io/RandomAccessFile.java b/luni/src/main/java/java/io/RandomAccessFile.java index b429537..54eb6a5 100644 --- a/luni/src/main/java/java/io/RandomAccessFile.java +++ b/luni/src/main/java/java/io/RandomAccessFile.java @@ -676,7 +676,7 @@ public class RandomAccessFile implements DataInput, DataOutput, Closeable { if (read(buf, 0, buf.length) != buf.length) { throw new EOFException(); } - return Util.convertFromUTF8(buf, 0, utfSize); + return Util.convertUTF8WithBuf(buf, new char[utfSize], 0, utfSize); } /** diff --git a/luni/src/main/java/java/lang/Character.java b/luni/src/main/java/java/lang/Character.java index 7d41c45..90082de 100644 --- a/luni/src/main/java/java/lang/Character.java +++ b/luni/src/main/java/java/lang/Character.java @@ -1698,7 +1698,7 @@ public final class Character implements Serializable, Comparable<Character> { */ public static int toCodePoint(char high, char low) { // See RFC 2781, Section 2.2 - // http://www.faqs.org/rfcs/rfc2781.html + // http://www.ietf.org/rfc/rfc2781.txt int h = (high & 0x3FF) << 10; int l = low & 0x3FF; return (h | l) + 0x10000; @@ -1999,7 +1999,7 @@ public final class Character implements Serializable, Comparable<Character> { throw new IndexOutOfBoundsException(); } // See RFC 2781, Section 2.1 - // http://www.faqs.org/rfcs/rfc2781.html + // http://www.ietf.org/rfc/rfc2781.txt int cpPrime = codePoint - 0x10000; int high = 0xD800 | ((cpPrime >> 10) & 0x3FF); int low = 0xDC00 | (cpPrime & 0x3FF); diff --git a/luni/src/main/java/java/lang/String.java b/luni/src/main/java/java/lang/String.java index 5407c7a..9ba9939 100644 --- a/luni/src/main/java/java/lang/String.java +++ b/luni/src/main/java/java/lang/String.java @@ -17,21 +17,16 @@ package java.lang; -import com.ibm.icu4jni.regex.NativeRegEx; +import java.io.ByteArrayOutputStream; import java.io.Serializable; import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.Charset; -import java.nio.charset.IllegalCharsetNameException; -import java.nio.charset.UnsupportedCharsetException; -import java.security.AccessController; import java.util.Comparator; import java.util.Formatter; import java.util.Locale; import java.util.regex.Pattern; -import java.util.regex.PatternSyntaxException; -import org.apache.harmony.luni.util.PriviAction; /** * An immutable sequence of characters/code units ({@code char}s). A @@ -81,7 +76,15 @@ public final class String implements Serializable, Comparable<String>, CharSeque */ public static final Comparator<String> CASE_INSENSITIVE_ORDER = new CaseInsensitiveComparator(); - private static final char[] ascii; + private static final char[] EMPTY_CHAR_ARRAY = new char[0]; + + private static final char[] ASCII; + static { + ASCII = new char[128]; + for (int i = 0; i < ASCII.length; ++i) { + ASCII[i] = (char) i; + } + } private final char[] value; @@ -91,22 +94,11 @@ public final class String implements Serializable, Comparable<String>, CharSeque private int hashCode; - private static Charset DefaultCharset; - - private static Charset lastCharset; - - static { - ascii = new char[128]; - for (int i = 0; i < ascii.length; i++) { - ascii[i] = (char) i; - } - } - /** * Creates an empty string. */ public String() { - value = new char[0]; + value = EMPTY_CHAR_ARRAY; offset = 0; count = 0; } @@ -124,13 +116,8 @@ public final class String implements Serializable, Comparable<String>, CharSeque } /** - * Converts the byte array to a string using the default encoding as - * specified by the file.encoding system property. If the system property is - * not defined, the default encoding is ISO8859_1 (ISO-Latin-1). If 8859-1 - * is not available, an ASCII encoding is used. - * - * @param data - * the byte array to convert to a string. + * Converts the byte array to a string using the system's + * {@link java.nio.charset.Charset#defaultCharset default charset}. */ public String(byte[] data) { this(data, 0, data.length); @@ -155,10 +142,8 @@ public final class String implements Serializable, Comparable<String>, CharSeque } /** - * Converts the byte array to a string using the default encoding as - * specified by the file.encoding system property. If the system property is - * not defined, the default encoding is ISO8859_1 (ISO-Latin-1). If 8859-1 - * is not available, an ASCII encoding is used. + * Converts a subsequence of the byte array to a string using the system's + * {@link java.nio.charset.Charset#defaultCharset default charset}. * * @param data * the byte array to convert to a string. @@ -175,17 +160,13 @@ public final class String implements Serializable, Comparable<String>, CharSeque public String(byte[] data, int start, int length) { // start + length could overflow, start/length maybe MaxInt if (start >= 0 && 0 <= length && length <= data.length - start) { + CharBuffer cb = Charset.defaultCharset().decode(ByteBuffer.wrap(data, start, length)); + count = cb.length(); offset = 0; - Charset charset = defaultCharset(); - int result; - CharBuffer cb = charset - .decode(ByteBuffer.wrap(data, start, length)); - if ((result = cb.length()) > 0) { + if (count > 0) { value = cb.array(); - count = result; } else { - count = 0; - value = new char[0]; + value = EMPTY_CHAR_ARRAY; } } else { throw new StringIndexOutOfBoundsException(); @@ -233,7 +214,10 @@ public final class String implements Serializable, Comparable<String>, CharSeque } /** - * Converts the byte array to a string using the specified encoding. + * Converts the byte array to a string using the named charset. + * + * <p>The behavior when the bytes cannot be decoded by the named charset + * is unspecified. Use {@link java.nio.charset.CharsetDecoder} for more control. * * @param data * the byte array to convert to a string. @@ -241,184 +225,45 @@ public final class String implements Serializable, Comparable<String>, CharSeque * the starting offset in the byte array. * @param length * the number of bytes to convert. - * @param encoding - * the encoding. + * @param charsetName + * the charset name. * @throws NullPointerException * when {@code data} is {@code null}. * @throws IndexOutOfBoundsException * if {@code length < 0, start < 0} or {@code start + length > * data.length}. * @throws UnsupportedEncodingException - * if {@code encoding} is not supported. + * if the named charset is not supported. */ - public String(byte[] data, int start, int length, final String encoding) - throws UnsupportedEncodingException { - if (encoding == null) { - throw new NullPointerException(); - } - // start + length could overflow, start/length maybe MaxInt - if (start >= 0 && 0 <= length && length <= data.length - start) { - offset = 0; - // BEGIN android-added - // Special-case ISO-88589-1 and UTF 8 decoding - if (encoding.equalsIgnoreCase("ISO-8859-1") || - encoding.equalsIgnoreCase("ISO8859_1")) { - value = new char[length]; - count = length; - for (int i = 0; i < count; i++) { - value[i] = (char) (data[start++] & 0xff); - } - return; - } else if ("utf8".equals(encoding) || - "utf-8".equals(encoding) || - "UTF8".equals(encoding) || - "UTF-8".equals(encoding)) { - // We inline UTF8 decoding for speed and because a - // non-constructor can't write directly to the final - // members 'value' or 'count'. - byte[] d = data; - char[] v = new char[length]; - - int idx = start, last = start + length, s = 0; - outer: - while (idx < last) { - byte b0 = d[idx++]; - if ((b0 & 0x80) == 0) { - // 0xxxxxxx - // Range: U-00000000 - U-0000007F - int val = b0 & 0xff; - v[s++] = (char) val; - } else if (((b0 & 0xe0) == 0xc0) || - ((b0 & 0xf0) == 0xe0) || - ((b0 & 0xf8) == 0xf0) || - ((b0 & 0xfc) == 0xf8) || - ((b0 & 0xfe) == 0xfc)) { - int utfCount = 1; - if ((b0 & 0xf0) == 0xe0) utfCount = 2; - else if ((b0 & 0xf8) == 0xf0) utfCount = 3; - else if ((b0 & 0xfc) == 0xf8) utfCount = 4; - else if ((b0 & 0xfe) == 0xfc) utfCount = 5; - - // 110xxxxx (10xxxxxx)+ - // Range: U-00000080 - U-000007FF (count == 1) - // Range: U-00000800 - U-0000FFFF (count == 2) - // Range: U-00010000 - U-001FFFFF (count == 3) - // Range: U-00200000 - U-03FFFFFF (count == 4) - // Range: U-04000000 - U-7FFFFFFF (count == 5) - - if (idx + utfCount > last) { - v[s++] = REPLACEMENT_CHAR; - break; - } - - // Extract usable bits from b0 - int val = b0 & (0x1f >> (utfCount - 1)); - for (int i = 0; i < utfCount; i++) { - byte b = d[idx++]; - if ((b & 0xC0) != 0x80) { - v[s++] = REPLACEMENT_CHAR; - idx--; // Put the input char back - continue outer; - } - // Push new bits in from the right side - val <<= 6; - val |= b & 0x3f; - } - - // Note: Java allows overlong char - // specifications To disallow, check that val - // is greater than or equal to the minimum - // value for each count: - // - // count min value - // ----- ---------- - // 1 0x80 - // 2 0x800 - // 3 0x10000 - // 4 0x200000 - // 5 0x4000000 - - // Allow surrogate values (0xD800 - 0xDFFF) to - // be specified using 3-byte UTF values only - if ((utfCount != 2) && - (val >= 0xD800) && (val <= 0xDFFF)) { - v[s++] = REPLACEMENT_CHAR; - continue; - } - - // Reject chars greater than the Unicode - // maximum of U+10FFFF - if (val > 0x10FFFF) { - v[s++] = REPLACEMENT_CHAR; - continue; - } - - // Encode chars from U+10000 up as surrogate pairs - if (val < 0x10000) { - v[s++] = (char) val; - } else { - int x = val & 0xffff; - int u = (val >> 16) & 0x1f; - int w = (u - 1) & 0xffff; - int hi = 0xd800 | (w << 6) | (x >> 10); - int lo = 0xdc00 | (x & 0x3ff); - v[s++] = (char) hi; - v[s++] = (char) lo; - } - } else { - // Illegal values 0x8*, 0x9*, 0xa*, 0xb*, 0xfd-0xff - v[s++] = REPLACEMENT_CHAR; - } - } - - // Reallocate the array to fit the contents - count = s; - value = new char[s]; - System.arraycopy(v, 0, value, 0, s); - return; - } - // END android-added - Charset charset = getCharset(encoding); - - int result; - CharBuffer cb; - try { - cb = charset.decode(ByteBuffer.wrap(data, start, length)); - } catch (Exception e) { - // do nothing. according to spec: - // behavior is unspecified for invalid array - cb = CharBuffer.wrap("\u003f".toCharArray()); - } - if ((result = cb.length()) > 0) { - value = cb.array(); - count = result; - } else { - count = 0; - value = new char[0]; - } - } else { - throw new StringIndexOutOfBoundsException(); - } + public String(byte[] data, int start, int length, String charsetName) throws UnsupportedEncodingException { + this(data, start, length, Charset.forName(charsetName)); } /** - * Converts the byte array to a string using the specified encoding. + * Converts the byte array to a string using the named charset. + * + * <p>The behavior when the bytes cannot be decoded by the named charset + * is unspecified. Use {@link java.nio.charset.CharsetDecoder} for more control. * * @param data * the byte array to convert to a string. - * @param encoding - * the encoding. + * @param charsetName + * the charset name. * @throws NullPointerException * when {@code data} is {@code null}. * @throws UnsupportedEncodingException - * if {@code encoding} is not supported. + * if {@code charsetName} is not supported. */ - public String(byte[] data, String encoding) throws UnsupportedEncodingException { - this(data, 0, data.length, encoding); + public String(byte[] data, String charsetName) throws UnsupportedEncodingException { + this(data, 0, data.length, Charset.forName(charsetName)); } /** - * Converts the byte array to a String using the specified encoding. + * Converts the byte array to a string using the given charset. + * + * <p>The behavior when the bytes cannot be decoded by the given charset + * is to replace malformed input and unmappable characters with the charset's default + * replacement string. Use {@link java.nio.charset.CharsetDecoder} for more control. * * @param data * the byte array to convert to a String @@ -426,8 +271,8 @@ public final class String implements Serializable, Comparable<String>, CharSeque * the starting offset in the byte array * @param length * the number of bytes to convert - * @param encoding - * the encoding + * @param charset + * the charset * * @throws IndexOutOfBoundsException * when <code>length < 0, start < 0</code> or @@ -449,31 +294,147 @@ public final class String implements Serializable, Comparable<String>, CharSeque * @see #valueOf(Object) * @since 1.6 */ - public String(byte[] data, int start, int length, final Charset encoding) { - if (encoding == null) { - throw new NullPointerException(); - } + public String(byte[] data, int start, int length, Charset charset) { if (start < 0 || length < 0 || length > data.length - start) { throw new StringIndexOutOfBoundsException(); } - CharBuffer cb = encoding.decode(ByteBuffer.wrap(data, start, length)); - this.lastCharset = encoding; - this.offset = 0; - this.count = cb.length(); - this.value = new char[count]; - System.arraycopy(cb.array(), 0, value, 0, count); + + // We inline UTF-8, ISO-8859-1, and US-ASCII decoders for speed and because 'count' and + // 'value' are final. + String canonicalCharsetName = charset.name(); + if (canonicalCharsetName.equals("UTF-8")) { + byte[] d = data; + char[] v = new char[length]; + + int idx = start, last = start + length, s = 0; +outer: + while (idx < last) { + byte b0 = d[idx++]; + if ((b0 & 0x80) == 0) { + // 0xxxxxxx + // Range: U-00000000 - U-0000007F + int val = b0 & 0xff; + v[s++] = (char) val; + } else if (((b0 & 0xe0) == 0xc0) || ((b0 & 0xf0) == 0xe0) || + ((b0 & 0xf8) == 0xf0) || ((b0 & 0xfc) == 0xf8) || ((b0 & 0xfe) == 0xfc)) { + int utfCount = 1; + if ((b0 & 0xf0) == 0xe0) utfCount = 2; + else if ((b0 & 0xf8) == 0xf0) utfCount = 3; + else if ((b0 & 0xfc) == 0xf8) utfCount = 4; + else if ((b0 & 0xfe) == 0xfc) utfCount = 5; + + // 110xxxxx (10xxxxxx)+ + // Range: U-00000080 - U-000007FF (count == 1) + // Range: U-00000800 - U-0000FFFF (count == 2) + // Range: U-00010000 - U-001FFFFF (count == 3) + // Range: U-00200000 - U-03FFFFFF (count == 4) + // Range: U-04000000 - U-7FFFFFFF (count == 5) + + if (idx + utfCount > last) { + v[s++] = REPLACEMENT_CHAR; + break; + } + + // Extract usable bits from b0 + int val = b0 & (0x1f >> (utfCount - 1)); + for (int i = 0; i < utfCount; i++) { + byte b = d[idx++]; + if ((b & 0xC0) != 0x80) { + v[s++] = REPLACEMENT_CHAR; + idx--; // Put the input char back + continue outer; + } + // Push new bits in from the right side + val <<= 6; + val |= b & 0x3f; + } + + // Note: Java allows overlong char + // specifications To disallow, check that val + // is greater than or equal to the minimum + // value for each count: + // + // count min value + // ----- ---------- + // 1 0x80 + // 2 0x800 + // 3 0x10000 + // 4 0x200000 + // 5 0x4000000 + + // Allow surrogate values (0xD800 - 0xDFFF) to + // be specified using 3-byte UTF values only + if ((utfCount != 2) && (val >= 0xD800) && (val <= 0xDFFF)) { + v[s++] = REPLACEMENT_CHAR; + continue; + } + + // Reject chars greater than the Unicode maximum of U+10FFFF. + if (val > 0x10FFFF) { + v[s++] = REPLACEMENT_CHAR; + continue; + } + + // Encode chars from U+10000 up as surrogate pairs + if (val < 0x10000) { + v[s++] = (char) val; + } else { + int x = val & 0xffff; + int u = (val >> 16) & 0x1f; + int w = (u - 1) & 0xffff; + int hi = 0xd800 | (w << 6) | (x >> 10); + int lo = 0xdc00 | (x & 0x3ff); + v[s++] = (char) hi; + v[s++] = (char) lo; + } + } else { + // Illegal values 0x8*, 0x9*, 0xa*, 0xb*, 0xfd-0xff + v[s++] = REPLACEMENT_CHAR; + } + } + + // Reallocate the array to fit the contents + this.offset = 0; + this.value = new char[s]; + this.count = s; + System.arraycopy(v, 0, value, 0, s); + } else if (canonicalCharsetName.equals("ISO-8859-1")) { + this.offset = 0; + this.value = new char[length]; + this.count = length; + for (int i = 0; i < count; ++i) { + value[i] = (char) (data[start++] & 0xff); + } + } else if (canonicalCharsetName.equals("US-ASCII")) { + this.offset = 0; + this.value = new char[length]; + this.count = length; + for (int i = 0; i < count; ++i) { + char ch = (char) (data[start++] & 0xff); + value[i] = (ch <= 0x7f) ? ch : REPLACEMENT_CHAR; + } + } else { + CharBuffer cb = charset.decode(ByteBuffer.wrap(data, start, length)); + this.offset = 0; + this.count = cb.length(); + if (count > 0) { + // We could use cb.array() directly, but that would mean we'd have to trust + // the CharsetDecoder doesn't hang on to the CharBuffer and mutate it later, + // which would break String's immutability guarantee. It would also tend to + // mean that we'd be wasting memory because CharsetDecoder doesn't trim the + // array. So we copy. + this.value = new char[count]; + System.arraycopy(cb.array(), 0, value, 0, count); + } else { + value = EMPTY_CHAR_ARRAY; + } + } } /** - * Converts the byte array to a String using the specified encoding. - * - * @param data - * the byte array to convert to a String - * @param encoding - * the encoding + * Converts the byte array to a String using the given charset. * - * @throws NullPointerException - * when data is null + * @throws NullPointerException if {@code data == null} * * @see #getBytes() * @see #getBytes(int, int, byte[], int) @@ -489,8 +450,8 @@ public final class String implements Serializable, Comparable<String>, CharSeque * @see #valueOf(Object) * @since 1.6 */ - public String(byte[] data, Charset encoding) { - this(data, 0, data.length, encoding); + public String(byte[] data, Charset charset) { + this(data, 0, data.length, charset); } /** @@ -800,8 +761,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque if (string.count > 0 && count > 0) { char[] buffer = new char[count + string.count]; System.arraycopy(value, offset, buffer, 0, count); - System.arraycopy(string.value, string.offset, buffer, count, - string.count); + System.arraycopy(string.value, string.offset, buffer, count, string.count); return new String(0, buffer.length, buffer); } return count == 0 ? string : this; @@ -844,27 +804,6 @@ public final class String implements Serializable, Comparable<String>, CharSeque return new String(data, start, length); } - private Charset defaultCharset() { - if (DefaultCharset == null) { - String encoding = AccessController - .doPrivileged(new PriviAction<String>("file.encoding", "ISO8859_1")); - // calling System.getProperty() may cause DefaultCharset to be - // initialized - try { - DefaultCharset = Charset.forName(encoding); - } catch (IllegalCharsetNameException e) { - // Ignored - } catch (UnsupportedCharsetException e) { - // Ignored - } - - if (DefaultCharset == null) { - DefaultCharset = Charset.forName("ISO-8859-1"); - } - } - return DefaultCharset; - } - /** * Compares the specified string to this string to determine if the * specified string is a suffix. @@ -954,18 +893,6 @@ public final class String implements Serializable, Comparable<String>, CharSeque } /** - * Converts this string to a byte array using the default encoding as - * specified by the file.encoding system property. If the system property is - * not defined, the default encoding is ISO8859_1 (ISO-Latin-1). If 8859-1 - * is not available, an ASCII encoding is used. - * - * @return the byte array encoding of this string. - */ - public byte[] getBytes() { - return getBytes(defaultCharset()); - } - - /** * Converts this string to a byte array, ignoring the high order bits of * each character. * @@ -1001,50 +928,108 @@ public final class String implements Serializable, Comparable<String>, CharSeque } /** - * Converts this string to a byte array using the specified encoding. + * Returns a new byte array containing the characters of this string encoded using the + * system's {@link java.nio.charset.Charset#defaultCharset default charset}. * - * @param encoding - * the encoding to use. - * @return the encoded byte array of this string. - * @throws UnsupportedEncodingException - * if the encoding is not supported. + * <p>The behavior when this string cannot be represented in the system's default charset + * is unspecified. In practice, when the default charset is UTF-8 (as it is on Android), + * all strings can be encoded. */ - public byte[] getBytes(String encoding) throws UnsupportedEncodingException { - return getBytes(getCharset(encoding)); + public byte[] getBytes() { + return getBytes(Charset.defaultCharset()); } - private Charset getCharset(final String encoding) - throws UnsupportedEncodingException { - Charset charset = lastCharset; - if (charset == null || !encoding.equalsIgnoreCase(charset.name())) { - try { - charset = Charset.forName(encoding); - } catch (IllegalCharsetNameException e) { - throw (UnsupportedEncodingException) (new UnsupportedEncodingException( - encoding).initCause(e)); - } catch (UnsupportedCharsetException e) { - throw (UnsupportedEncodingException) (new UnsupportedEncodingException( - encoding).initCause(e)); - } - lastCharset = charset; - } - return charset; + /** + * Returns a new byte array containing the characters of this string encoded using the + * named charset. + * + * <p>The behavior when this string cannot be represented in the named charset + * is unspecified. Use {@link java.nio.charset.CharsetEncoder} for more control. + * + * @throws UnsupportedEncodingException if the charset is not supported + */ + public byte[] getBytes(String charsetName) throws UnsupportedEncodingException { + return getBytes(Charset.forName(charsetName)); } /** - * Returns a new byte array containing the characters of this string encoded in the + * Returns a new byte array containing the characters of this string encoded using the * given charset. * - * @param encoding the encoding + * <p>The behavior when this string cannot be represented in the given charset + * is to replace malformed input and unmappable characters with the charset's default + * replacement byte array. Use {@link java.nio.charset.CharsetEncoder} for more control. * * @since 1.6 */ - public byte[] getBytes(Charset encoding) { - CharBuffer chars = CharBuffer.wrap(this.value, this.offset, this.count); - ByteBuffer buffer = encoding.encode(chars.asReadOnlyBuffer()); - byte[] bytes = new byte[buffer.limit()]; - buffer.get(bytes); - return bytes; + public byte[] getBytes(Charset charset) { + String canonicalCharsetName = charset.name(); + if (canonicalCharsetName.equals("UTF-8")) { + return getUtf8Bytes(); + } else if (canonicalCharsetName.equals("ISO-8859-1")) { + return getDirectMappedBytes(0xff); + } else if (canonicalCharsetName.equals("US-ASCII")) { + return getDirectMappedBytes(0x7f); + } else { + CharBuffer chars = CharBuffer.wrap(this.value, this.offset, this.count); + ByteBuffer buffer = charset.encode(chars.asReadOnlyBuffer()); + byte[] bytes = new byte[buffer.limit()]; + buffer.get(bytes); + return bytes; + } + } + + /** + * Translates this string's characters to US-ASCII or ISO-8859-1 bytes, using the fact that + * Unicode code points between U+0000 and U+007f inclusive are identical to US-ASCII, while + * U+0000 to U+00ff inclusive are identical to ISO-8859-1. + */ + private byte[] getDirectMappedBytes(int maxValidChar) { + byte[] result = new byte[count]; + int o = offset; + for (int i = 0; i < count; ++i) { + int ch = value[o++]; + result[i] = (byte) ((ch <= maxValidChar) ? ch : '?'); + } + return result; + } + + private byte[] getUtf8Bytes() { + UnsafeByteSequence result = new UnsafeByteSequence(count); + final int end = offset + count; + for (int i = offset; i < end; ++i) { + int ch = value[i]; + if (ch < 0x80) { + // One byte. + result.write(ch); + } else if (ch < 0x800) { + // Two bytes. + result.write((ch >> 6) | 0xc0); + result.write((ch & 0x3f) | 0x80); + } else if (ch >= Character.MIN_SURROGATE && ch <= Character.MAX_SURROGATE) { + // A supplementary character. + char high = (char) ch; + char low = (i + 1 != end) ? value[i + 1] : '\u0000'; + if (!Character.isSurrogatePair(high, low)) { + result.write('?'); + continue; + } + // Now we know we have a *valid* surrogate pair, we can consume the low surrogate. + ++i; + ch = Character.toCodePoint(high, low); + // Four bytes. + result.write((ch >> 18) | 0xf0); + result.write(((ch >> 12) & 0x3f) | 0x80); + result.write(((ch >> 6) & 0x3f) | 0x80); + result.write((ch & 0x3f) | 0x80); + } else { + // Three bytes. + result.write((ch >> 12) | 0xe0); + result.write(((ch >> 6) & 0x3f) | 0x80); + result.write((ch & 0x3f) | 0x80); + } + } + return result.toByteArray(); } /** @@ -1824,7 +1809,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque public static String valueOf(char value) { String s; if (value < 128) { - s = new String(value, 1, ascii); + s = new String(value, 1, ASCII); } else { s = new String(0, 1, new char[] { value }); } diff --git a/luni/src/main/java/java/lang/UnsafeByteSequence.java b/luni/src/main/java/java/lang/UnsafeByteSequence.java new file mode 100644 index 0000000..c3cac57 --- /dev/null +++ b/luni/src/main/java/java/lang/UnsafeByteSequence.java @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2010 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package java.lang; + +/** + * A cheaper ByteArrayOutputStream for internal use. This class is unsynchronized, + * and returns its internal array if it's the right size. This makes String.getBytes("UTF-8") + * 10x faster than the baseline non-fast-path implementation instead of 8x faster when using + * ByteArrayOutputStream. When GC and uncontended synchronization become cheap, we should be + * able to get rid of this class. In the meantime, if you need to add further API, please try + * to keep it plug-compatible with ByteArrayOutputStream with an eye to future obsolescence. + * + * @hide + */ +class UnsafeByteSequence { + private byte[] bytes; + private int count; + + public UnsafeByteSequence(int initialCapacity) { + this.bytes = new byte[initialCapacity]; + } + + public void write(int b) { + if (count == bytes.length) { + byte[] newBytes = new byte[count * 2]; + System.arraycopy(bytes, 0, newBytes, 0, count); + bytes = newBytes; + } + bytes[count++] = (byte) b; + } + + public byte[] toByteArray() { + if (count == bytes.length) { + return bytes; + } + byte[] result = new byte[count]; + System.arraycopy(bytes, 0, result, 0, count); + return result; + } +} diff --git a/luni/src/main/java/java/net/URLDecoder.java b/luni/src/main/java/java/net/URLDecoder.java index d30248d..d3c61c6 100644 --- a/luni/src/main/java/java/net/URLDecoder.java +++ b/luni/src/main/java/java/net/URLDecoder.java @@ -29,9 +29,6 @@ import java.nio.charset.UnsupportedCharsetException; * application/x-www-form-urlencoded} MIME content type. */ public class URLDecoder { - - static Charset defaultCharset; - /** * Decodes the argument which is assumed to be encoded in the {@code * x-www-form-urlencoded} MIME content type. @@ -48,22 +45,7 @@ public class URLDecoder { */ @Deprecated public static String decode(String s) { - - if (defaultCharset == null) { - try { - defaultCharset = Charset.forName( - System.getProperty("file.encoding")); - } catch (IllegalCharsetNameException e) { - // Ignored - } catch (UnsupportedCharsetException e) { - // Ignored - } - - if (defaultCharset == null) { - defaultCharset = Charset.forName("ISO-8859-1"); - } - } - return decode(s, defaultCharset); + return decode(s, Charset.defaultCharset()); } /** diff --git a/luni/src/main/java/java/nio/charset/Charset.java b/luni/src/main/java/java/nio/charset/Charset.java index e414df3..81803b8 100644 --- a/luni/src/main/java/java/nio/charset/Charset.java +++ b/luni/src/main/java/java/nio/charset/Charset.java @@ -177,6 +177,13 @@ public abstract class Charset implements Comparable<Charset> { */ private static final String PROVIDER_CONFIGURATION_FILE_COMMENT = "#"; + /** + * The cache of charsets. + */ + private static final HashMap<String, Charset> cachedCharsetTable = new HashMap<String, Charset>(); + + private static final Charset DEFAULT_CHARSET = getDefaultCharset(); + private static ClassLoader systemClassLoader; private static SortedMap<String, Charset> cachedBuiltInCharsets; @@ -186,9 +193,6 @@ public abstract class Charset implements Comparable<Charset> { // the aliases set private final HashSet<String> aliasesSet; - // cached Charset table - private final static HashMap<String, Charset> cachedCharsetTable = new HashMap<String, Charset>(); - private static boolean inForNameInternal = false; /** @@ -205,7 +209,7 @@ public abstract class Charset implements Comparable<Charset> { * <code>aliases</code>. */ protected Charset(String canonicalName, String[] aliases) { - if (null == canonicalName) { + if (canonicalName == null) { throw new NullPointerException(); } // check whether the given canonical name is legal @@ -250,8 +254,7 @@ public abstract class Charset implements Comparable<Charset> { * should not be null. */ private static void checkCharsetName(String name) { - // An empty string is illegal charset name - if (name.length() == 0) { + if (name.isEmpty()) { throw new IllegalCharsetNameException(name); } // The first character must be a letter or a digit @@ -497,7 +500,6 @@ public abstract class Charset implements Comparable<Charset> { */ private synchronized static Charset forNameInternal(String charsetName) throws IllegalCharsetNameException { - Charset cs = lookupCachedOrBuiltInCharset(charsetName); if (cs != null || inForNameInternal) { return cs; @@ -525,7 +527,7 @@ public abstract class Charset implements Comparable<Charset> { cs = searchConfiguredCharsets(charsetName, contextClassLoader, e.nextElement()); inForNameInternal = false; if (cs != null) { - cacheCharset(cs); + cacheCharset(charsetName, cs); return cs; } } @@ -548,25 +550,31 @@ public abstract class Charset implements Comparable<Charset> { checkCharsetName(charsetName); cs = NativeConverter.charsetForName(charsetName); if (cs != null) { - cacheCharset(cs); + cacheCharset(charsetName, cs); } return cs; } - /* - * save charset into cachedCharsetTable - */ - private synchronized static void cacheCharset(Charset cs) { - // Cache the Charset by its canonical name... + private synchronized static void cacheCharset(String charsetName, Charset cs) { + // Get the canonical name for this charset, and the canonical instance from the table. String canonicalName = cs.name(); - if (!cachedCharsetTable.containsKey(canonicalName)) { - cachedCharsetTable.put(canonicalName, cs); + Charset canonicalCharset = cachedCharsetTable.get(canonicalName); + if (canonicalCharset == null) { + canonicalCharset = cs; } + + // Cache the charset by its canonical name... + cachedCharsetTable.put(canonicalName, canonicalCharset); + + // And the name the user used... (Section 1.4 of http://unicode.org/reports/tr22/ means + // that many non-alias, non-canonical names are valid. For example, "utf8" isn't an alias + // of the canonical name "UTF-8", but we shouldn't penalize consistent users of such + // names unduly.) + cachedCharsetTable.put(charsetName, canonicalCharset); + // And all its aliases... for (String alias : cs.aliasesSet) { - if (!cachedCharsetTable.containsKey(alias)) { - cachedCharsetTable.put(alias, cs); - } + cachedCharsetTable.put(alias, canonicalCharset); } } @@ -582,11 +590,11 @@ public abstract class Charset implements Comparable<Charset> { * if the desired charset is not supported by this runtime. */ public static Charset forName(String charsetName) { - Charset c = forNameInternal(charsetName); - if (c == null) { - throw new UnsupportedCharsetException(charsetName); + Charset cs = forNameInternal(charsetName); + if (cs != null) { + return cs; } - return c; + throw new UnsupportedCharsetException(charsetName); } /** @@ -598,8 +606,13 @@ public abstract class Charset implements Comparable<Charset> { * @throws IllegalCharsetNameException * if the specified charset name is illegal. */ - public static synchronized boolean isSupported(String charsetName) { - return forNameInternal(charsetName) != null; + public static boolean isSupported(String charsetName) { + try { + Charset cs = forName(charsetName); + return true; + } catch (UnsupportedCharsetException ex) { + return false; + } } /** @@ -822,23 +835,23 @@ public abstract class Charset implements Comparable<Charset> { } /** - * Gets the system default charset from the virtual machine. - * - * @return the default charset. + * Returns the system's default charset. This is determined during VM startup, and will not + * change thereafter. On Android, the default charset is UTF-8. */ public static Charset defaultCharset() { - Charset defaultCharset = null; - String encoding = AccessController - .doPrivileged(new PrivilegedAction<String>() { - public String run() { - return System.getProperty("file.encoding"); - } - }); + return DEFAULT_CHARSET; + } + + private static Charset getDefaultCharset() { + String encoding = AccessController.doPrivileged(new PrivilegedAction<String>() { + public String run() { + return System.getProperty("file.encoding", "UTF-8"); + } + }); try { - defaultCharset = Charset.forName(encoding); + return Charset.forName(encoding); } catch (UnsupportedCharsetException e) { - defaultCharset = Charset.forName("UTF-8"); + return Charset.forName("UTF-8"); } - return defaultCharset; } } diff --git a/luni/src/main/java/java/sql/Date.java b/luni/src/main/java/java/sql/Date.java index 407da32..adc35aa 100644 --- a/luni/src/main/java/java/sql/Date.java +++ b/luni/src/main/java/java/sql/Date.java @@ -20,7 +20,7 @@ package java.sql; /** * A class which can consume and produce dates in SQL {@code Date} format. * <p> - * Dates are represented in SQL as {@code yyyy-mm-dd}. Note that this date + * Dates are represented in SQL as {@code yyyy-MM-dd}. Note that this date * format only deals with year, month and day values. There are no values for * hours, minutes, seconds. * <p> @@ -170,8 +170,7 @@ public class Date extends java.util.Date { /** * Produces a string representation of the date in SQL format * - * @return a string representation of the date in SQL format - {@code - * "yyyy-mm-dd"}. + * @return a string representation of the date in SQL format - {@code "yyyy-MM-dd"}. */ @Override public String toString() { @@ -204,8 +203,7 @@ public class Date extends java.util.Date { * format. * * @param dateString - * the string representation of a date in SQL format - " {@code - * yyyy-mm-dd}". + * the string representation of a date in SQL format - " {@code yyyy-MM-dd}". * @return the {@code Date} object. * @throws IllegalArgumentException * if the format of the supplied string does not match the SQL diff --git a/luni/src/main/java/java/sql/Timestamp.java b/luni/src/main/java/java/sql/Timestamp.java index c2500c9..23cbea5 100644 --- a/luni/src/main/java/java/sql/Timestamp.java +++ b/luni/src/main/java/java/sql/Timestamp.java @@ -48,7 +48,7 @@ public class Timestamp extends Date { // The nanoseconds time value of the Timestamp private int nanos; - // The regex pattern of yyyy-mm-dd hh:mm:ss + // The regex pattern of yyyy-MM-dd HH:mm:ss private static final String TIME_FORMAT_REGEX = "[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}.*"; /** @@ -342,7 +342,7 @@ public class Timestamp extends Date { /** * Returns the timestamp formatted as a String in the JDBC Timestamp Escape - * format, which is {@code "yyyy-mm-dd hh:mm:ss.nnnnnnnnn"}. + * format, which is {@code "yyyy-MM-dd HH:mm:ss.nnnnnnnnn"}. * * @return A string representing the instant defined by the {@code * Timestamp}, in JDBC Timestamp escape format. @@ -392,7 +392,7 @@ public class Timestamp extends Date { /** * Creates a {@code Timestamp} object with a time value equal to the time * specified by a supplied String holding the time in JDBC timestamp escape - * format, which is {@code "yyyy-mm-dd hh:mm:ss.nnnnnnnnn}" + * format, which is {@code "yyyy-MM-dd HH:mm:ss.nnnnnnnnn}" * * @param s * the {@code String} containing a time in JDBC timestamp escape @@ -504,6 +504,6 @@ public class Timestamp extends Date { private static IllegalArgumentException badTimestampString(String s) { throw new IllegalArgumentException("Timestamp format must be " + - "yyyy-mm-dd hh:mm:ss.fffffffff; was '" + s + "'"); + "yyyy-MM-dd HH:mm:ss.fffffffff; was '" + s + "'"); } } diff --git a/luni/src/main/java/java/text/DateFormat.java b/luni/src/main/java/java/text/DateFormat.java index 927df7a..983691e 100644 --- a/luni/src/main/java/java/text/DateFormat.java +++ b/luni/src/main/java/java/text/DateFormat.java @@ -261,16 +261,13 @@ public abstract class DateFormat extends Format { /** * FieldPosition selector for 'h' field alignment, corresponding to the - * {@link Calendar#HOUR} field. {@code HOUR1_FIELD} is used for the - * one-based 12-hour clock. For example, 11:30 PM + 1 hour results in 12:30 - * AM. + * {@link Calendar#HOUR} field. */ public final static int HOUR1_FIELD = 15; /** - * The {@code FieldPosition} selector for 'z' field alignment, corresponds - * to the {@link Calendar#ZONE_OFFSET} and {@link Calendar#DST_OFFSET} - * fields. + * The {@code FieldPosition} selector for 'K' field alignment, corresponding to the + * {@link Calendar#HOUR} field. */ public final static int HOUR0_FIELD = 16; diff --git a/luni/src/main/java/java/text/DateFormatSymbols.java b/luni/src/main/java/java/text/DateFormatSymbols.java index 2406e57..398021a 100644 --- a/luni/src/main/java/java/text/DateFormatSymbols.java +++ b/luni/src/main/java/java/text/DateFormatSymbols.java @@ -55,6 +55,12 @@ public class DateFormatSymbols implements Serializable, Cloneable { String[] ampms, eras, months, shortMonths, shortWeekdays, weekdays; + // These are used to implement ICU/Android extensions. + String[] longStandAloneMonths; + String[] shortStandAloneMonths; + String[] longStandAloneWeekdays; + String[] shortStandAloneWeekdays; + // Localized display names. String[][] zoneStrings; // Has the user called setZoneStrings? @@ -97,7 +103,7 @@ public class DateFormatSymbols implements Serializable, Cloneable { */ public DateFormatSymbols(Locale locale) { this.locale = locale; - this.localPatternChars = SimpleDateFormat.patternChars; + this.localPatternChars = SimpleDateFormat.PATTERN_CHARS; LocaleData localeData = LocaleData.get(locale); this.ampms = localeData.amPm; this.eras = localeData.eras; @@ -105,6 +111,12 @@ public class DateFormatSymbols implements Serializable, Cloneable { this.shortMonths = localeData.shortMonthNames; this.weekdays = localeData.longWeekdayNames; this.shortWeekdays = localeData.shortWeekdayNames; + + // ICU/Android extensions. + this.longStandAloneMonths = localeData.longStandAloneMonthNames; + this.shortStandAloneMonths = localeData.shortStandAloneMonthNames; + this.longStandAloneWeekdays = localeData.longStandAloneWeekdayNames; + this.shortStandAloneWeekdays = localeData.shortStandAloneWeekdayNames; } /** diff --git a/luni/src/main/java/java/text/SimpleDateFormat.java b/luni/src/main/java/java/text/SimpleDateFormat.java index d369fb8..30f3afa 100644 --- a/luni/src/main/java/java/text/SimpleDateFormat.java +++ b/luni/src/main/java/java/text/SimpleDateFormat.java @@ -33,249 +33,81 @@ import java.util.Vector; /** * A concrete class for formatting and parsing dates in a locale-sensitive - * manner. It allows for formatting (date to text), parsing (text to date) and - * normalization. - * <p> - * {@code SimpleDateFormat} allows you to start by choosing any user-defined - * patterns for date-time formatting. However, you are encouraged to create a - * date-time formatter with either {@code getTimeInstance}, {@code - * getDateInstance}, or {@code getDateTimeInstance} in {@code DateFormat}. Each - * of these class methods can return a date/time formatter initialized with a - * default format pattern. You may modify the format pattern using the {@code - * applyPattern} methods as desired. For more information on using these - * methods, see {@link DateFormat}. - * <h4>Time Format Syntax</h4> - * <p> - * To specify the time format, use a <em>time pattern</em> string. In this - * pattern, all ASCII letters are reserved as pattern letters, which are defined - * as follows: - * <table border=0 cellspacing=3 cellpadding=0> - * <tr bgcolor="#ccccff"> - * <th>Symbol</th> - * <th>Meaning</th> - * <th>Presentation</th> - * <th>Example</th> - * </tr> - * <tr valign=top> - * <td>G</td> - * <td>era designator</td> - * <td>(Text)</td> - * <td>AD</td> - * </tr> - * <tr valign=top bgcolor="#eeeeff"> - * <td>y</td> - * <td>year</td> - * <td>(Number)</td> - * <td>1996</td> - * </tr> - * <tr valign=top> - * <td>M</td> - * <td>month in year</td> - * <td>(Text & Number)</td> - * <td>July & 07</td> - * </tr> - * <tr valign=top bgcolor="#eeeeff"> - * <td>d</td> - * <td>day in month</td> - * <td>(Number)</td> - * <td>10</td> - * </tr> - * <tr valign=top> - * <td>h</td> - * <td>hour in am/pm (1˜12)</td> - * <td>(Number)</td> - * <td>12</td> - * </tr> - * <tr valign=top bgcolor="#eeeeff"> - * <td>H</td> - * <td>hour in day (0˜23)</td> - * <td>(Number)</td> - * <td>0</td> - * </tr> - * <tr valign=top> - * <td>m</td> - * <td>minute in hour</td> - * <td>(Number)</td> - * <td>30</td> - * </tr> - * <tr valign=top bgcolor="#eeeeff"> - * <td>s</td> - * <td>second in minute</td> - * <td>(Number)</td> - * <td>55</td> - * </tr> - * <tr valign=top> - * <td>S</td> - * <td>fractional second</td> - * <td>(Number)</td> - * <td>978</td> - * </tr> - * <tr valign=top bgcolor="#eeeeff"> - * <td>E</td> - * <td>day of week</td> - * <td>(Text)</td> - * <td>Tuesday</td> - * </tr> - * <tr valign=top bgcolor="#eeeeff"> - * <td>D</td> - * <td>day in year</td> - * <td>(Number)</td> - * <td>189</td> - * </tr> - * <tr valign=top> - * <td>F</td> - * <td>day of week in month</td> - * <td>(Number)</td> - * <td>2 (2nd Wed in July)</td> - * </tr> - * <tr valign=top bgcolor="#eeeeff"> - * <td>w</td> - * <td>week in year</td> - * <td>(Number)</td> - * <td>27</td> - * </tr> - * <tr valign=top> - * <td>W</td> - * <td>week in month</td> - * <td>(Number)</td> - * <td>2</td> - * </tr> - * <tr valign=top bgcolor="#eeeeff"> - * <td>a</td> - * <td>am/pm marker</td> - * <td>(Text)</td> - * <td>PM</td> - * </tr> - * <tr valign=top> - * <td>k</td> - * <td>hour in day (1˜24)</td> - * <td>(Number)</td> - * <td>24</td> - * </tr> - * <tr valign=top bgcolor="#eeeeff"> - * <td>K</td> - * <td>hour in am/pm (0˜11)</td> - * <td>(Number)</td> - * <td>0</td> - * </tr> - * <tr valign=top> - * <td>z</td> - * <td>time zone</td> - * <td>(Text)</td> - * <td>Pacific Standard Time</td> - * </tr> - * <tr valign=top bgcolor="#eeeeff"> - * <td>Z</td> - * <td>time zone (RFC 822)</td> - * <td>(Number)</td> - * <td>-0800</td> - * </tr> - * <tr valign=top> - * <td>v</td> - * <td>time zone (generic)</td> - * <td>(Text)</td> - * <td>Pacific Time</td> - * </tr> - * <tr valign=top bgcolor="#eeeeff"> - * <td>V</td> - * <td>time zone (location)</td> - * <td>(Text)</td> - * <td>United States (Los Angeles)</td> - * </tr> - * <tr valign=top> - * <td>'</td> - * <td>escape for text</td> - * <td>(Delimiter)</td> - * <td>'Date='</td> - * </tr> - * <tr valign=top bgcolor="#eeeeff"> - * <td>''</td> - * <td>single quote</td> - * <td>(Literal)</td> - * <td>'o''clock'</td> - * </tr> - * </table> - * <p> - * The count of pattern letters determines the format: - * <p> - * <strong>(Text)</strong>: 4 or more pattern letters → use the full form, - * less than 4 pattern letters → use a short or abbreviated form if one - * exists. - * <p> - * <strong>(Number)</strong>: the minimum number of digits. Shorter numbers are - * zero-padded to this amount. Year is handled specially; that is, if the count - * of 'y' is 2, the year will be truncated to 2 digits. (if "yyyy" produces - * "1997", "yy" produces "97".) Unlike other fields, fractional seconds are - * padded on the right with zero. - * <p> - * <strong>(Text & Number)</strong>: 3 or over, use text, otherwise use number. - * <p> - * Any characters in the pattern that are not in the ranges of ['a'..'z'] and - * ['A'..'Z'] will be treated as quoted text. For instance, characters like ':', - * '.', ' ', '#' and '@' will appear in the resulting time text even they are - * not embraced within single quotes. - * <p> - * A pattern containing any invalid pattern letter will result in an exception - * thrown during formatting or parsing. - * <h4>Examples Using the US Locale</h4> <blockquote> + * manner. Formatting turns a {@link Date} into a {@link String}, and parsing turns a + * {@code String} into a {@code Date}. * - * <pre> - * Format Pattern Result - * -------------- ------- - * "yyyy.MM.dd G 'at' HH:mm:ss vvvv" → 1996.07.10 AD at 15:08:56 Pacific Time - * "EEE, MMM d, ''yy" → Wed, July 10, '96 - * "h:mm a" → 12:08 PM - * "hh 'o''clock' a, zzzz" → 12 o'clock PM, Pacific Daylight Time - * "K:mm a, vvv" → 0:00 PM, PT - * "yyyyy.MMMMM.dd GGG hh:mm aaa" → 01996.July.10 AD 12:08 PM - * </pre> + * <h4>Time Pattern Syntax</h4> + * <p>You can supply a pattern describing what strings are produced/accepted, but almost all + * callers should use {@link DateFormat#getDateInstance}, {@link DateFormat#getDateTimeInstance}, + * or {@link DateFormat#getTimeInstance} to get a ready-made instance suitable for the user's + * locale. * - * </blockquote> <h4>Code Sample:</h4> <blockquote> + * <p>The main reason you'd create an instance this class directly is because you need to + * format/parse a specific machine-readable format, in which case you almost certainly want + * to explicitly ask for {@link Locale#US} to ensure that you get ASCII digits (rather than, + * say, Arabic digits). + * (See "<a href="../util/Locale.html#default_locale">Be wary of the default locale</a>".) + * The most useful non-localized pattern is {@code "yyyy-MM-dd HH:mm:ss.SSSZ"}, which corresponds + * to the ISO 8601 international standard date format. * - * <pre> - * SimpleTimeZone pdt = new SimpleTimeZone(-8 * 60 * 60 * 1000, "PST"); - * pdt.setStartRule(Calendar.APRIL, 1, Calendar.SUNDAY, 2 * 60 * 60 * 1000); - * pdt.setEndRule(Calendar.OCTOBER, -1, Calendar.SUNDAY, 2 * 60 * 60 * 1000); + * <p>To specify the time format, use a <i>time pattern</i> string. In this + * string, any character from {@code 'A'} to {@code 'Z'} or {@code 'a'} to {@code 'z'} is + * treated specially. All other characters are passed through verbatim. The interpretation of each + * of the ASCII letters is given in the table below. ASCII letters not appearing in the table are + * reserved for future use, and it is an error to attempt to use them. * - * // Format the current time. - * SimpleDateFormat formatter = new SimpleDateFormat( - * "yyyy.MM.dd G 'at' hh:mm:ss a zzz"); - * Date currentTime_1 = new Date(); - * String dateString = formatter.format(currentTime_1); + * <p><table BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY=""> + * <tr BGCOLOR="#CCCCFF" CLASS="TableHeadingColor"> + * <td><B>Symbol</B></td> <td><B>Meaning</B></td> <td><B>Presentation</B></td> <td><B>Example</B></td> </tr> + * <tr> <td>{@code D}</td> <td>day in year</td> <td>(Number)</td> <td>189</td> </tr> + * <tr> <td>{@code E}</td> <td>day of week</td> <td>(Text)</td> <td>Tuesday</td> </tr> + * <tr> <td>{@code F}</td> <td>day of week in month</td> <td>(Number)</td> <td>2 <i>(2nd Wed in July)</i></td> </tr> + * <tr> <td>{@code G}</td> <td>era designator</td> <td>(Text)</td> <td>AD</td> </tr> + * <tr> <td>{@code H}</td> <td>hour in day (0-23)</td> <td>(Number)</td> <td>0</td> </tr> + * <tr> <td>{@code K}</td> <td>hour in am/pm (0-11)</td> <td>(Number)</td> <td>0</td> </tr> + * <tr> <td>{@code L}</td> <td>stand-alone month</td> <td>(Text/Number)</td> <td>July / 07</td> </tr> + * <tr> <td>{@code M}</td> <td>month in year</td> <td>(Text/Number)</td> <td>July / 07</td> </tr> + * <tr> <td>{@code S}</td> <td>fractional seconds</td> <td>(Number)</td> <td>978</td> </tr> + * <tr> <td>{@code W}</td> <td>week in month</td> <td>(Number)</td> <td>2</td> </tr> + * <tr> <td>{@code Z}</td> <td>time zone (RFC 822)</td> <td>(Timezone)</td> <td>-0800</td> </tr> + * <tr> <td>{@code a}</td> <td>am/pm marker</td> <td>(Text)</td> <td>PM</td> </tr> + * <tr> <td>{@code c}</td> <td>stand-alone day of week</td> <td>(Text/Number)</td> <td>Tuesday / 2</td> </tr> + * <tr> <td>{@code d}</td> <td>day in month</td> <td>(Number)</td> <td>10</td> </tr> + * <tr> <td>{@code h}</td> <td>hour in am/pm (1-12)</td> <td>(Number)</td> <td>12</td> </tr> + * <tr> <td>{@code k}</td> <td>hour in day (1-24)</td> <td>(Number)</td> <td>24</td> </tr> + * <tr> <td>{@code m}</td> <td>minute in hour</td> <td>(Number)</td> <td>30</td> </tr> + * <tr> <td>{@code s}</td> <td>second in minute</td> <td>(Number)</td> <td>55</td> </tr> + * <tr> <td>{@code w}</td> <td>week in year</td> <td>(Number)</td> <td>27</td> </tr> + * <tr> <td>{@code y}</td> <td>year</td> <td>(Number)</td> <td>2010</td> </tr> + * <tr> <td>{@code z}</td> <td>time zone</td> <td>(Timezone)</td> <td>Pacific Standard Time</td> </tr> + * <tr> <td>{@code '}</td> <td>escape for text</td> <td>(Delimiter)</td> <td>'Date='</td> </tr> + * <tr> <td>{@code ''}</td> <td>single quote</td> <td>(Literal)</td> <td>'o''clock'</td> </tr> + * </table> * - * // Parse the previous string back into a Date. - * ParsePosition pos = new ParsePosition(0); - * Date currentTime_2 = formatter.parse(dateString, pos); - * </pre> + * <p>The number of consecutive copies (the "count") of a pattern character further influences + * the format. + * <ul> + * <li><b>Text</b> if the count is 4 or more, use the full form; otherwise use a short or + * abbreviated form if one exists. So {@code zzzz} might give {@code Pacific Standard Time} + * whereas {@code z} might give {@code PST}. Note that the count does <i>not</i> specify the + * exact width of the field. + * + * <li><b>Number</b> the count is the minimum number of digits. Shorter values are + * zero-padded to this width, longer values overflow this width. + * Years are handled specially: {@code yy} truncates to 2 digits. So {@code yyyy} might + * give {@code 2010}, but {@code yy} would give {@code 10}. + * Fractional seconds are also handled specially: they're + * zero-padded on the <i>right</i>. + * + * <li><b>Text/Number</b>: if the count is 3 or more, use text; otherwise use a number. + * So {@code MM} might give {@code 07} while {@code MMM} gives {@code July}. + * </ul> * - * </blockquote> - * <p> - * In the example, the time value {@code currentTime_2} obtained from parsing - * will be equal to {@code currentTime_1}. However, they may not be equal if the - * am/pm marker 'a' is left out from the format pattern while the - * "hour in am/pm" pattern symbol is used. This information loss can happen when - * formatting the time in PM. - * <p> - * When parsing a date string using the abbreviated year pattern ("yy"), {@code - * SimpleDateFormat} must interpret the abbreviated year relative to some - * century. It does this by adjusting dates to be within 80 years before and 20 - * years after the time the {@code SimpleDateFormat} instance is created. For - * example, using a pattern of "MM/dd/yy" and a {@code SimpleDateFormat} - * instance created on Jan 1, 1997, the string "01/11/12" would be interpreted - * as Jan 11, 2012 while the string "05/04/64" would be interpreted as May 4, - * 1964. During parsing, only strings consisting of exactly two digits, as - * defined by {@link java.lang.Character#isDigit(char)}, will be parsed into the - * default century. Any other numeric string, such as a one digit string, a - * three or more digit string, or a two digit string that isn't all digits (for - * example, "-1"), is interpreted literally. So "01/02/3" or "01/02/003" are - * parsed, using the same pattern, as Jan 2, 3 AD. Likewise, "01/02/-3" is - * parsed as Jan 2, 4 BC. - * <p> - * If the year pattern does not have exactly two 'y' characters, the year is - * interpreted literally, regardless of the number of digits. So using the - * pattern "MM/dd/yyyy", "01/11/12" parses to Jan 11, 12 A.D. - * <p> - * When numeric fields are adjacent directly, with no intervening delimiter + * <p>The two pattern characters {@code L} and {@code c} are ICU-compatible extensions, not + * available in the RI. These are necessary for correct localization in languages such as Russian + * that distinguish between, say, "June" and "June 2010". + * + * <p>When numeric fields are adjacent directly, with no intervening delimiter * characters, they constitute a run of adjacent numeric fields. Such runs are * parsed specially. For example, the format "HHmmss" parses the input text * "123456" to 12:34:56, parses the input text "12345" to 1:23:45, and fails to @@ -285,30 +117,100 @@ import java.util.Vector; * parsed again. This is repeated until either the parse succeeds or the * leftmost field is one character in length. If the parse still fails at that * point, the parse of the run fails. - * <p> - * For time zones that have no names, use the strings "GMT+hours:minutes" or - * "GMT-hours:minutes". - * <p> - * The calendar defines the first day of the week, the first week of the year, - * whether hours are zero based or not (0 vs. 12 or 24) and the time zone. There - * is one common decimal format to handle all the numbers; the digit count is - * handled programmatically according to the pattern. - * <h4>Synchronization</h4> Date formats are not synchronized. It is recommended - * to create separate format instances for each thread. If multiple threads - * access a format concurrently, it must be synchronized externally. * - * @see Calendar - * @see GregorianCalendar + * <p>See {@link #set2DigitYearStart} for more about handling two-digit years. + * + * <h4>Sample Code</h4> + * <p>If you're formatting for human use, you should use an instance returned from + * {@link DateFormat} as described above. This code: + * <pre> + * DateFormat[] formats = new DateFormat[] { + * DateFormat.getDateInstance(), + * DateFormat.getDateTimeInstance(), + * DateFormat.getTimeInstance(), + * }; + * for (DateFormat df : formats) { + * System.err.println(df.format(new Date(0))); + * } + * </pre> + * + * <p>Produces this output when run on an {@code en_US} device in the PDT time zone: + * <pre> + * Dec 31, 1969 + * Dec 31, 1969 4:00:00 PM + * 4:00:00 PM + * </pre> + * And will produce similarly appropriate localized human-readable output on any user's system. + * + * <p>If you're formatting for machine use, consider this code: + * <pre> + * String[] formats = new String[] { + * "yyyy-MM-dd", + * "yyyy-MM-dd HH:mm", + * "yyyy-MM-dd HH:mmZ", + * "yyyy-MM-dd HH:mm:ss.SSSZ", + * "yyyy-MM-dd'T'HH:mm:ss.SSSZ", + * }; + * for (String format : formats) { + * SimpleDateFormat sdf = new SimpleDateFormat(format, Locale.US); + * System.err.format("%30s %s\n", format, sdf.format(new Date(0))); + * sdf.setTimeZone(TimeZone.getTimeZone("UTC")); + * System.err.format("%30s %s\n", format, sdf.format(new Date(0))); + * } + * </pre> + * + * <p>Which produces this output when run in the PDT time zone: + * <pre> + * yyyy-MM-dd 1969-12-31 + * yyyy-MM-dd 1970-01-01 + * yyyy-MM-dd HH:mm 1969-12-31 16:00 + * yyyy-MM-dd HH:mm 1970-01-01 00:00 + * yyyy-MM-dd HH:mmZ 1969-12-31 16:00-0800 + * yyyy-MM-dd HH:mmZ 1970-01-01 00:00+0000 + * yyyy-MM-dd HH:mm:ss.SSSZ 1969-12-31 16:00:00.000-0800 + * yyyy-MM-dd HH:mm:ss.SSSZ 1970-01-01 00:00:00.000+0000 + * yyyy-MM-dd'T'HH:mm:ss.SSSZ 1969-12-31T16:00:00.000-0800 + * yyyy-MM-dd'T'HH:mm:ss.SSSZ 1970-01-01T00:00:00.000+0000 + * </pre> + * + * <p>As this example shows, each {@code SimpleDateFormat} instance has a {@link TimeZone}. + * This is because it's called upon to format instances of {@code Date}, which represents an + * absolute time in UTC. That is, {@code Date} does not carry time zone information. + * By default, {@code SimpleDateFormat} will use the system's default time zone. This is + * appropriate for human-readable output (for which, see the previous sample instead), but + * generally inappropriate for machine-readable output, where ambiguity is a problem. Note that + * in this example, the output that included a time but no time zone cannot be parsed back into + * the original {@code Date}. For this + * reason it is almost always necessary and desirable to include the timezone in the output. + * It may also be desirable to set the formatter's time zone to UTC (to ease comparison, or to + * make logs more readable, for example). + * + * <h4>Synchronization</h4> + * {@code SimpleDateFormat} is not thread-safe. Users should create a separate instance for + * each thread. + * + * @see java.util.Calendar + * @see java.util.Date * @see java.util.TimeZone - * @see DateFormat - * @see DateFormatSymbols - * @see DecimalFormat + * @see java.text.DateFormat */ public class SimpleDateFormat extends DateFormat { private static final long serialVersionUID = 4774881970558875024L; - static final String patternChars = "GyMdkHmsSEDFwWahKzZ"; + // 'L' and 'c' are ICU-compatible extensions for stand-alone month and stand-alone weekday. + static final String PATTERN_CHARS = "GyMdkHmsSEDFwWahKzZLc"; + + // The index of 'Z' in the PATTERN_CHARS string. This pattern character is supported by the RI, + // but has no corresponding public constant. + private static final int RFC_822_TIMEZONE_FIELD = 18; + + // The index of 'L' (cf. 'M') in the PATTERN_CHARS string. This is an ICU-compatible extension + // necessary for correct localization in various languages (http://b/2633414). + private static final int STAND_ALONE_MONTH_FIELD = 19; + // The index of 'c' (cf. 'E') in the PATTERN_CHARS string. This is an ICU-compatible extension + // necessary for correct localization in various languages (http://b/2633414). + private static final int STAND_ALONE_DAY_OF_WEEK_FIELD = 20; private String pattern; @@ -357,7 +259,7 @@ public class SimpleDateFormat extends DateFormat { * when the format character is invalid */ private void validateFormat(char format) { - int index = patternChars.indexOf(format); + int index = PATTERN_CHARS.indexOf(format); if (index == -1) { throw new IllegalArgumentException("Unknown pattern character '" + format + "'"); } @@ -483,7 +385,7 @@ public class SimpleDateFormat extends DateFormat { * the localized pattern. */ public void applyLocalizedPattern(String template) { - pattern = convertPattern(template, formatData.getLocalPatternChars(), patternChars, true); + pattern = convertPattern(template, formatData.getLocalPatternChars(), PATTERN_CHARS, true); } /** @@ -503,36 +405,6 @@ public class SimpleDateFormat extends DateFormat { } /** - * Converts the Java-spec pattern into an equivalent pattern used by ICU. - * - * @param p - * the Java-spec style pattern. - * @return the ICU-style pattern. - */ - @SuppressWarnings("nls") - private String patternForICU(String p) { - String[] subPatterns = p.split("'"); - boolean quote = false; - boolean first = true; - StringBuilder result = new StringBuilder(); - for (String subPattern : subPatterns) { - if (!quote) { - // replace 'y' with 'yy' for ICU to follow Java spec - result.append((first ? "" : "'") - + subPattern.replaceAll("(?<!y)y(?!y)", "yy")); - first = false; - } else { - result.append("'" + subPattern); - } - quote = !quote; - } - if (p.endsWith("'")) { - result.append("'"); - } - return result.toString(); - } - - /** * Returns a new {@code SimpleDateFormat} with the same pattern and * properties as this simple date format. * @@ -621,8 +493,7 @@ public class SimpleDateFormat extends DateFormat { for (int i = 0; i < fields.size(); i++) { FieldPosition pos = fields.elementAt(i); Format.Field attribute = pos.getFieldAttribute(); - as.addAttribute(attribute, attribute, pos.getBeginIndex(), pos - .getEndIndex()); + as.addAttribute(attribute, attribute, pos.getBeginIndex(), pos.getEndIndex()); } // return the CharacterIterator from AttributedString @@ -710,7 +581,7 @@ public class SimpleDateFormat extends DateFormat { private void append(StringBuffer buffer, FieldPosition position, Vector<FieldPosition> fields, char format, int count) { int field = -1; - int index = patternChars.indexOf(format); + int index = PATTERN_CHARS.indexOf(format); if (index == -1) { throw new IllegalArgumentException("Unknown pattern character '" + format + "'"); } @@ -735,16 +606,13 @@ public class SimpleDateFormat extends DateFormat { appendNumber(buffer, count, year); } break; - case MONTH_FIELD: + case STAND_ALONE_MONTH_FIELD: // L dateFormatField = Field.MONTH; - int month = calendar.get(Calendar.MONTH); - if (count <= 2) { - appendNumber(buffer, count, month + 1); - } else if (count == 3) { - buffer.append(formatData.shortMonths[month]); - } else { - buffer.append(formatData.months[month]); - } + appendMonth(buffer, count, formatData.longStandAloneMonths, formatData.shortStandAloneMonths); + break; + case MONTH_FIELD: // M + dateFormatField = Field.MONTH; + appendMonth(buffer, count, formatData.months, formatData.shortMonths); break; case DATE_FIELD: dateFormatField = Field.DAY_OF_MONTH; @@ -772,14 +640,13 @@ public class SimpleDateFormat extends DateFormat { int value = calendar.get(Calendar.MILLISECOND); appendNumber(buffer, count, value); break; + case STAND_ALONE_DAY_OF_WEEK_FIELD: + dateFormatField = Field.DAY_OF_WEEK; + appendDayOfWeek(buffer, count, formatData.longStandAloneWeekdays, formatData.shortStandAloneWeekdays); + break; case DAY_OF_WEEK_FIELD: dateFormatField = Field.DAY_OF_WEEK; - int day = calendar.get(Calendar.DAY_OF_WEEK); - if (count < 4) { - buffer.append(formatData.shortWeekdays[day]); - } else { - buffer.append(formatData.weekdays[day]); - } + appendDayOfWeek(buffer, count, formatData.weekdays, formatData.shortWeekdays); break; case DAY_OF_YEAR_FIELD: dateFormatField = Field.DAY_OF_YEAR; @@ -814,7 +681,7 @@ public class SimpleDateFormat extends DateFormat { dateFormatField = Field.TIME_ZONE; appendTimeZone(buffer, count, true); break; - case (TIMEZONE_FIELD + 1): // Z + case RFC_822_TIMEZONE_FIELD: // Z dateFormatField = Field.TIME_ZONE; appendNumericTimeZone(buffer, false); break; @@ -839,6 +706,24 @@ public class SimpleDateFormat extends DateFormat { } } + private void appendDayOfWeek(StringBuffer buffer, int count, String[] longs, String[] shorts) { + boolean isLong = (count > 3); + String[] days = isLong ? longs : shorts; + buffer.append(days[calendar.get(Calendar.DAY_OF_WEEK)]); + } + + private void appendMonth(StringBuffer buffer, int count, String[] longs, String[] shorts) { + int month = calendar.get(Calendar.MONTH); + if (count <= 2) { + appendNumber(buffer, count, month + 1); + return; + } + + boolean isLong = (count > 3); + String[] months = isLong ? longs : shorts; + buffer.append(months[month]); + } + /** * Append a representation of the time zone of 'calendar' to 'buffer'. * @@ -931,6 +816,7 @@ public class SimpleDateFormat extends DateFormat { /** * Returns the date which is the start of the one hundred year period for two-digit year values. + * See {@link #set2DigitYearStart} for details. */ public Date get2DigitYearStart() { return (Date) defaultCenturyStart.clone(); @@ -947,16 +833,16 @@ public class SimpleDateFormat extends DateFormat { @Override public int hashCode() { - return super.hashCode() + pattern.hashCode() + formatData.hashCode() - + creationYear; + return super.hashCode() + pattern.hashCode() + formatData.hashCode() + creationYear; } private int parse(String string, int offset, char format, int count) { - int index = patternChars.indexOf(format); + int index = PATTERN_CHARS.indexOf(format); if (index == -1) { throw new IllegalArgumentException("Unknown pattern character '" + format + "'"); } int field = -1; + // TODO: what's 'absolute' for? when is 'count' negative, and why? int absolute = 0; if (count < 0) { count = -count; @@ -986,18 +872,12 @@ public class SimpleDateFormat extends DateFormat { return position.getIndex(); } break; + case STAND_ALONE_MONTH_FIELD: + return parseMonth(string, offset, count, absolute, + formatData.longStandAloneMonths, formatData.shortStandAloneMonths); case MONTH_FIELD: - if (count <= 2) { - return parseNumber(absolute, string, offset, - Calendar.MONTH, -1); - } - index = parseText(string, offset, formatData.months, - Calendar.MONTH); - if (index < 0) { - return parseText(string, offset, formatData.shortMonths, - Calendar.MONTH); - } - return index; + return parseMonth(string, offset, count, absolute, + formatData.months, formatData.shortMonths); case DATE_FIELD: field = Calendar.DATE; break; @@ -1025,14 +905,10 @@ public class SimpleDateFormat extends DateFormat { case MILLISECOND_FIELD: field = Calendar.MILLISECOND; break; + case STAND_ALONE_DAY_OF_WEEK_FIELD: + return parseDayOfWeek(string, offset, formatData.longStandAloneWeekdays, formatData.shortStandAloneWeekdays); case DAY_OF_WEEK_FIELD: - index = parseText(string, offset, formatData.weekdays, - Calendar.DAY_OF_WEEK); - if (index < 0) { - return parseText(string, offset, formatData.shortWeekdays, - Calendar.DAY_OF_WEEK); - } - return index; + return parseDayOfWeek(string, offset, formatData.weekdays, formatData.shortWeekdays); case DAY_OF_YEAR_FIELD: field = Calendar.DAY_OF_YEAR; break; @@ -1046,8 +922,7 @@ public class SimpleDateFormat extends DateFormat { field = Calendar.WEEK_OF_MONTH; break; case AM_PM_FIELD: - return parseText(string, offset, formatData.ampms, - Calendar.AM_PM); + return parseText(string, offset, formatData.ampms, Calendar.AM_PM); case HOUR1_FIELD: position = new ParsePosition(offset); result = parseNumber(absolute, string, position); @@ -1065,7 +940,7 @@ public class SimpleDateFormat extends DateFormat { break; case TIMEZONE_FIELD: return parseTimeZone(string, offset); - case (TIMEZONE_FIELD + 1): + case RFC_822_TIMEZONE_FIELD: return parseTimeZone(string, offset); } if (field != -1) { @@ -1074,6 +949,25 @@ public class SimpleDateFormat extends DateFormat { return offset; } + private int parseDayOfWeek(String string, int offset, String[] longs, String[] shorts) { + int index = parseText(string, offset, longs, Calendar.DAY_OF_WEEK); + if (index < 0) { + index = parseText(string, offset, shorts, Calendar.DAY_OF_WEEK); + } + return index; + } + + private int parseMonth(String string, int offset, int count, int absolute, String[] longs, String[] shorts) { + if (count <= 2) { + return parseNumber(absolute, string, offset, Calendar.MONTH, -1); + } + int index = parseText(string, offset, longs, Calendar.MONTH); + if (index < 0) { + index = parseText(string, offset, shorts, Calendar.MONTH); + } + return index; + } + /** * Parses a date from the specified string starting at the index specified * by {@code position}. If the string is successfully parsed then the index @@ -1196,8 +1090,7 @@ public class SimpleDateFormat extends DateFormat { return Integer.valueOf(result); } - private int parseNumber(int max, String string, int offset, int field, - int skew) { + private int parseNumber(int max, String string, int offset, int field, int skew) { ParsePosition position = new ParsePosition(offset); Number result = parseNumber(max, string, position); if (result == null) { @@ -1210,11 +1103,10 @@ public class SimpleDateFormat extends DateFormat { private int parseText(String string, int offset, String[] text, int field) { int found = -1; for (int i = 0; i < text.length; i++) { - if (text[i].length() == 0) { + if (text[i].isEmpty()) { continue; } - if (string - .regionMatches(true, offset, text[i], 0, text[i].length())) { + if (string.regionMatches(true, offset, text[i], 0, text[i].length())) { // Search for the longest match, in case some fields are subsets if (found == -1 || text[i].length() > text[found].length()) { found = i; @@ -1288,6 +1180,25 @@ public class SimpleDateFormat extends DateFormat { /** * Sets the date which is the start of the one hundred year period for two-digit year values. + * + * <p>When parsing a date string using the abbreviated year pattern {@code yy}, {@code + * SimpleDateFormat} must interpret the abbreviated year relative to some + * century. It does this by adjusting dates to be within 80 years before and 20 + * years after the time the {@code SimpleDateFormat} instance was created. For + * example, using a pattern of {@code MM/dd/yy}, an + * instance created on Jan 1, 1997 would interpret the string {@code "01/11/12"} + * as Jan 11, 2012 but interpret the string {@code "05/04/64"} as May 4, 1964. + * During parsing, only strings consisting of exactly two digits, as + * defined by {@link java.lang.Character#isDigit(char)}, will be parsed into the + * default century. Any other numeric string, such as a one digit string, a + * three or more digit string, or a two digit string that isn't all digits (for + * example, {@code "-1"}), is interpreted literally. So using the same pattern, both + * {@code "01/02/3"} and {@code "01/02/003"} are parsed as Jan 2, 3 AD. + * Similarly, {@code "01/02/-3"} is parsed as Jan 2, 4 BC. + * + * <p>If the year pattern does not have exactly two 'y' characters, the year is + * interpreted literally, regardless of the number of digits. So using the + * pattern {@code MM/dd/yyyy}, {@code "01/11/12"} is parsed as Jan 11, 12 A.D. */ public void set2DigitYearStart(Date date) { defaultCenturyStart = (Date) date.clone(); @@ -1313,7 +1224,7 @@ public class SimpleDateFormat extends DateFormat { * @return the localized pattern. */ public String toLocalizedPattern() { - return convertPattern(pattern, patternChars, formatData.getLocalPatternChars(), false); + return convertPattern(pattern, PATTERN_CHARS, formatData.getLocalPatternChars(), false); } private static String convertPattern(String template, String fromChars, String toChars, boolean check) { diff --git a/luni/src/main/java/org/apache/harmony/luni/util/HistoricalNamesUtil.java b/luni/src/main/java/org/apache/harmony/luni/util/HistoricalNamesUtil.java index f32f976..4da1f8e 100644 --- a/luni/src/main/java/org/apache/harmony/luni/util/HistoricalNamesUtil.java +++ b/luni/src/main/java/org/apache/harmony/luni/util/HistoricalNamesUtil.java @@ -164,7 +164,7 @@ public class HistoricalNamesUtil { } public static String getHistoricalName(String name) { - return (!historicalNames.containsKey(name) ? name : historicalNames - .get(name)); + String result = historicalNames.get(name); + return (result != null) ? result : name; } } diff --git a/luni/src/main/java/org/apache/harmony/luni/util/Util.java b/luni/src/main/java/org/apache/harmony/luni/util/Util.java index c42cbbd..5411e3f 100644 --- a/luni/src/main/java/org/apache/harmony/luni/util/Util.java +++ b/luni/src/main/java/org/apache/harmony/luni/util/Util.java @@ -20,63 +20,11 @@ package org.apache.harmony.luni.util; import java.io.ByteArrayOutputStream; import java.io.UTFDataFormatException; import java.io.UnsupportedEncodingException; -import java.util.Calendar; -import java.util.TimeZone; public final class Util { - - private static final String defaultEncoding; - - static { - // BEGIN android-changed - String encoding = System.getProperty("file.encoding"); - // END android-changed - if (encoding != null) { - try { - "".getBytes(encoding); - } catch (Throwable t) { - encoding = null; - } - } - defaultEncoding = encoding; - } - - public static String toString(byte[] bytes) { - if (defaultEncoding != null) { - try { - return new String(bytes, 0, bytes.length, defaultEncoding); - } catch (java.io.UnsupportedEncodingException e) { - } - } - return new String(bytes, 0, bytes.length); - } - - public static String toUTF8String(byte[] bytes) { - return toUTF8String(bytes, 0, bytes.length); - } - - public static String toString(byte[] bytes, int offset, int length) { - if (defaultEncoding != null) { - try { - return new String(bytes, offset, length, defaultEncoding); - } catch (java.io.UnsupportedEncodingException e) { - } - } - return new String(bytes, offset, length); - } - - public static String toUTF8String(byte[] bytes, int offset, int length) { - try { - return new String(bytes, offset, length, "UTF-8"); - } catch (java.io.UnsupportedEncodingException e) { - return toString(bytes, offset, length); - } - } - - public static String convertFromUTF8(byte[] buf, int offset, int utfSize) throws UTFDataFormatException { - return convertUTF8WithBuf(buf, new char[utfSize], offset, utfSize); - } - + /** + * Converts bytes encoded with Java's <i>modified</i> UTF-8 to a string. + */ public static String convertUTF8WithBuf(byte[] buf, char[] out, int offset, int utfSize) throws UTFDataFormatException { int count = 0, s = 0, a; while (count < utfSize) { diff --git a/luni/src/main/native/ICU.cpp b/luni/src/main/native/ICU.cpp index b9e6c7d..6d60053 100644 --- a/luni/src/main/native/ICU.cpp +++ b/luni/src/main/native/ICU.cpp @@ -377,8 +377,6 @@ static jobjectArray getAmPmMarkers(JNIEnv* env, UResourceBundle* gregorian) { return NULL; } - ures_resetIterator(gregorianElems.get()); - int lengthAm, lengthPm; const jchar* am = ures_getStringByIndex(gregorianElems.get(), 0, &lengthAm, &status); const jchar* pm = ures_getStringByIndex(gregorianElems.get(), 1, &lengthPm, &status); @@ -410,8 +408,6 @@ static jobjectArray getEras(JNIEnv* env, UResourceBundle* gregorian) { int eraCount = ures_getSize(eraElems.get()); jobjectArray eras = env->NewObjectArray(eraCount, string_class, NULL); - - ures_resetIterator(eraElems.get()); for (int i = 0; i < eraCount; ++i) { int eraLength; const jchar* era = ures_getStringByIndex(eraElems.get(), i, &eraLength, &status); @@ -424,91 +420,33 @@ static jobjectArray getEras(JNIEnv* env, UResourceBundle* gregorian) { return eras; } -static jobjectArray getMonthNames(JNIEnv* env, UResourceBundle* gregorian, bool longNames) { +enum NameType { REGULAR, STAND_ALONE }; +enum NameWidth { LONG, SHORT }; +static jobjectArray getNames(JNIEnv* env, UResourceBundle* namesBundle, bool months, NameType type, NameWidth width) { + const char* typeKey = (type == REGULAR) ? "format" : "stand-alone"; + const char* widthKey = (width == LONG) ? "wide" : "abbreviated"; UErrorCode status = U_ZERO_ERROR; - ScopedResourceBundle gregorianElems(ures_getByKey(gregorian, "monthNames", NULL, &status)); - if (U_FAILURE(status)) { - return NULL; - } - - ScopedResourceBundle monthNameElems(ures_getByKey(gregorianElems.get(), "format", NULL, &status)); - if (U_FAILURE(status)) { - return NULL; - } - - ScopedResourceBundle monthNameElemsFormat(ures_getByKey(monthNameElems.get(), longNames ? "wide" : "abbreviated", NULL, &status)); + ScopedResourceBundle formatBundle(ures_getByKey(namesBundle, typeKey, NULL, &status)); + ScopedResourceBundle valuesBundle(ures_getByKey(formatBundle.get(), widthKey, NULL, &status)); if (U_FAILURE(status)) { return NULL; } - ures_resetIterator(monthNameElemsFormat.get()); - int monthCount = ures_getSize(monthNameElemsFormat.get()); - // the array length is +1 because the harmony locales had an empty string at the end of their month name array - jobjectArray months = env->NewObjectArray(monthCount + 1, string_class, NULL); - for (int i = 0; i < monthCount; ++i) { - int monthNameLength; - const jchar* month = ures_getStringByIndex(monthNameElemsFormat.get(), i, &monthNameLength, &status); + // The months array has a trailing empty string. The days array has a leading empty string. + int count = ures_getSize(valuesBundle.get()); + jobjectArray result = env->NewObjectArray(count + 1, string_class, NULL); + env->SetObjectArrayElement(result, months ? count : 0, env->NewStringUTF("")); + int arrayOffset = months ? 0 : 1; + for (int i = 0; i < count; ++i) { + int nameLength; + const jchar* name = ures_getStringByIndex(valuesBundle.get(), i, &nameLength, &status); if (U_FAILURE(status)) { return NULL; } - ScopedLocalRef<jstring> monthU(env, env->NewString(month, monthNameLength)); - env->SetObjectArrayElement(months, i, monthU.get()); - } - - ScopedLocalRef<jstring> monthU(env, env->NewStringUTF("")); - env->SetObjectArrayElement(months, monthCount, monthU.get()); - - return months; -} - -static jobjectArray getLongMonthNames(JNIEnv* env, UResourceBundle* gregorian) { - return getMonthNames(env, gregorian, true); -} - -static jobjectArray getShortMonthNames(JNIEnv* env, UResourceBundle* gregorian) { - return getMonthNames(env, gregorian, false); -} - -static jobjectArray getWeekdayNames(JNIEnv* env, UResourceBundle* gregorian, bool longNames) { - UErrorCode status = U_ZERO_ERROR; - ScopedResourceBundle gregorianElems(ures_getByKey(gregorian, "dayNames", NULL, &status)); - if (U_FAILURE(status)) { - return NULL; - } - - ScopedResourceBundle dayNameElems(ures_getByKey(gregorianElems.get(), "format", NULL, &status)); - if (U_FAILURE(status)) { - return NULL; - } - - ScopedResourceBundle dayNameElemsFormat(ures_getByKey(dayNameElems.get(), longNames ? "wide" : "abbreviated", NULL, &status)); - if (U_FAILURE(status)) { - return NULL; - } - - ures_resetIterator(dayNameElemsFormat.get()); - int dayCount = ures_getSize(dayNameElemsFormat.get()); - jobjectArray weekdays = env->NewObjectArray(dayCount + 1, string_class, NULL); - // first entry in the weekdays array is an empty string - env->SetObjectArrayElement(weekdays, 0, env->NewStringUTF("")); - for(int i = 0; i < dayCount; i++) { - int dayNameLength; - const jchar* day = ures_getStringByIndex(dayNameElemsFormat.get(), i, &dayNameLength, &status); - if(U_FAILURE(status)) { - return NULL; - } - ScopedLocalRef<jstring> dayU(env, env->NewString(day, dayNameLength)); - env->SetObjectArrayElement(weekdays, i + 1, dayU.get()); + ScopedLocalRef<jstring> nameString(env, env->NewString(name, nameLength)); + env->SetObjectArrayElement(result, arrayOffset++, nameString.get()); } - return weekdays; -} - -static jobjectArray getLongWeekdayNames(JNIEnv* env, UResourceBundle* gregorian) { - return getWeekdayNames(env, gregorian, true); -} - -static jobjectArray getShortWeekdayNames(JNIEnv* env, UResourceBundle* gregorian) { - return getWeekdayNames(env, gregorian, false); + return result; } static jstring getIntCurrencyCode(JNIEnv* env, jstring locale) { @@ -605,21 +543,54 @@ static jboolean initLocaleDataImpl(JNIEnv* env, jclass, jstring locale, jobject setStringArrayField(env, localeData, "amPm", getAmPmMarkers(env, gregorian.get())); setStringArrayField(env, localeData, "eras", getEras(env, gregorian.get())); - setStringArrayField(env, localeData, "longMonthNames", getLongMonthNames(env, gregorian.get())); - setStringArrayField(env, localeData, "shortMonthNames", getShortMonthNames(env, gregorian.get())); - setStringArrayField(env, localeData, "longWeekdayNames", getLongWeekdayNames(env, gregorian.get())); - setStringArrayField(env, localeData, "shortWeekdayNames", getShortWeekdayNames(env, gregorian.get())); - - ScopedResourceBundle gregorianElems(ures_getByKey(gregorian.get(), "DateTimePatterns", NULL, &status)); + ScopedResourceBundle dayNames(ures_getByKey(gregorian.get(), "dayNames", NULL, &status)); + ScopedResourceBundle monthNames(ures_getByKey(gregorian.get(), "monthNames", NULL, &status)); + + // Get the regular month and weekday names. + jobjectArray longMonthNames = getNames(env, monthNames.get(), true, REGULAR, LONG); + jobjectArray shortMonthNames = getNames(env, monthNames.get(), true, REGULAR, SHORT); + jobjectArray longWeekdayNames = getNames(env, dayNames.get(), false, REGULAR, LONG); + jobjectArray shortWeekdayNames = getNames(env, dayNames.get(), false, REGULAR, SHORT); + setStringArrayField(env, localeData, "longMonthNames", longMonthNames); + setStringArrayField(env, localeData, "shortMonthNames", shortMonthNames); + setStringArrayField(env, localeData, "longWeekdayNames", longWeekdayNames); + setStringArrayField(env, localeData, "shortWeekdayNames", shortWeekdayNames); + + // Get the stand-alone month and weekday names. If they're not available (as they aren't for + // English), we reuse the regular names. If we returned null to Java, the usual fallback + // mechanisms would come into play and we'd end up with the bogus stand-alone names from the + // root locale ("1" for January, and so on). + jobjectArray longStandAloneMonthNames = getNames(env, monthNames.get(), true, STAND_ALONE, LONG); + if (longStandAloneMonthNames == NULL) { + longStandAloneMonthNames = longMonthNames; + } + jobjectArray shortStandAloneMonthNames = getNames(env, monthNames.get(), true, STAND_ALONE, SHORT); + if (shortStandAloneMonthNames == NULL) { + shortStandAloneMonthNames = shortMonthNames; + } + jobjectArray longStandAloneWeekdayNames = getNames(env, dayNames.get(), false, STAND_ALONE, LONG); + if (longStandAloneWeekdayNames == NULL) { + longStandAloneWeekdayNames = longWeekdayNames; + } + jobjectArray shortStandAloneWeekdayNames = getNames(env, dayNames.get(), false, STAND_ALONE, SHORT); + if (shortStandAloneWeekdayNames == NULL) { + shortStandAloneWeekdayNames = shortWeekdayNames; + } + setStringArrayField(env, localeData, "longStandAloneMonthNames", longStandAloneMonthNames); + setStringArrayField(env, localeData, "shortStandAloneMonthNames", shortStandAloneMonthNames); + setStringArrayField(env, localeData, "longStandAloneWeekdayNames", longStandAloneWeekdayNames); + setStringArrayField(env, localeData, "shortStandAloneWeekdayNames", shortStandAloneWeekdayNames); + + ScopedResourceBundle dateTimePatterns(ures_getByKey(gregorian.get(), "DateTimePatterns", NULL, &status)); if (U_SUCCESS(status)) { - setStringField(env, localeData, "fullTimeFormat", gregorianElems.get(), 0); - setStringField(env, localeData, "longTimeFormat", gregorianElems.get(), 1); - setStringField(env, localeData, "mediumTimeFormat", gregorianElems.get(), 2); - setStringField(env, localeData, "shortTimeFormat", gregorianElems.get(), 3); - setStringField(env, localeData, "fullDateFormat", gregorianElems.get(), 4); - setStringField(env, localeData, "longDateFormat", gregorianElems.get(), 5); - setStringField(env, localeData, "mediumDateFormat", gregorianElems.get(), 6); - setStringField(env, localeData, "shortDateFormat", gregorianElems.get(), 7); + setStringField(env, localeData, "fullTimeFormat", dateTimePatterns.get(), 0); + setStringField(env, localeData, "longTimeFormat", dateTimePatterns.get(), 1); + setStringField(env, localeData, "mediumTimeFormat", dateTimePatterns.get(), 2); + setStringField(env, localeData, "shortTimeFormat", dateTimePatterns.get(), 3); + setStringField(env, localeData, "fullDateFormat", dateTimePatterns.get(), 4); + setStringField(env, localeData, "longDateFormat", dateTimePatterns.get(), 5); + setStringField(env, localeData, "mediumDateFormat", dateTimePatterns.get(), 6); + setStringField(env, localeData, "shortDateFormat", dateTimePatterns.get(), 7); } status = U_ZERO_ERROR; diff --git a/luni/src/main/native/org_apache_harmony_luni_platform_OSNetworkSystem.cpp b/luni/src/main/native/org_apache_harmony_luni_platform_OSNetworkSystem.cpp index eac669d..c9fa21a 100644 --- a/luni/src/main/native/org_apache_harmony_luni_platform_OSNetworkSystem.cpp +++ b/luni/src/main/native/org_apache_harmony_luni_platform_OSNetworkSystem.cpp @@ -848,7 +848,7 @@ static void osNetworkSystem_createStreamSocket(JNIEnv* env, jobject, jobject fil static void osNetworkSystem_createDatagramSocket(JNIEnv* env, jobject, jobject fileDescriptor, jboolean) { int fd = createSocketFileDescriptor(env, fileDescriptor, SOCK_DGRAM); #ifdef __linux__ - // The RFC (http://tools.ietf.org/rfc/rfc3493.txt) says that IPV6_MULTICAST_HOPS defaults to 1. + // The RFC (http://www.ietf.org/rfc/rfc3493.txt) says that IPV6_MULTICAST_HOPS defaults to 1. // The Linux kernel (at least up to 2.6.32) accidentally defaults to 64 (which would be correct // for the *unicast* hop limit). See http://www.spinics.net/lists/netdev/msg129022.html. // When that's fixed, we can remove this code. Until then, we manually set the hop limit on diff --git a/luni/src/test/java/java/lang/StringTest.java b/luni/src/test/java/java/lang/StringTest.java index 300449b..f166f33 100644 --- a/luni/src/test/java/java/lang/StringTest.java +++ b/luni/src/test/java/java/lang/StringTest.java @@ -76,12 +76,6 @@ public class StringTest extends TestCase { public CharsetDecoder newDecoder() { return new EvilCharsetDecoder(this); } }; - public void testGetBytes() { - Charset cs = Charset.forName("UTF-8"); - byte[] expected = new byte[] {(byte) 'h', (byte) 'i'}; - assertTrue(Arrays.equals(expected, "hi".getBytes(cs))); - } - public void testGetBytes_MaliciousCharset() { try { String s = "hi"; @@ -110,4 +104,50 @@ public class StringTest extends TestCase { EvilCharsetDecoder.corrupt(); assertEquals("YY", result); } + + public void test_getBytes_bad() throws Exception { + // Check that we use '?' as the replacement byte for invalid characters. + assertEquals("[97, 63, 98]", Arrays.toString("a\u0666b".getBytes("US-ASCII"))); + assertEquals("[97, 63, 98]", Arrays.toString("a\u0666b".getBytes(Charset.forName("US-ASCII")))); + } + + public void test_getBytes_UTF_8() { + // We have a fast path implementation of String.getBytes for UTF-8. + Charset cs = Charset.forName("UTF-8"); + + // Test one-byte characters. + assertEquals("[0]", Arrays.toString("\u0000".getBytes(cs))); + assertEquals("[127]", Arrays.toString("\u007f".getBytes(cs))); + assertEquals("[104, 105]", Arrays.toString("hi".getBytes(cs))); + + // Test two-byte characters. + assertEquals("[-62, -128]", Arrays.toString("\u0080".getBytes(cs))); + assertEquals("[-39, -90]", Arrays.toString("\u0666".getBytes(cs))); + assertEquals("[-33, -65]", Arrays.toString("\u07ff".getBytes(cs))); + assertEquals("[104, -39, -90, 105]", Arrays.toString("h\u0666i".getBytes(cs))); + + // Test three-byte characters. + assertEquals("[-32, -96, -128]", Arrays.toString("\u0800".getBytes(cs))); + assertEquals("[-31, -120, -76]", Arrays.toString("\u1234".getBytes(cs))); + assertEquals("[-17, -65, -65]", Arrays.toString("\uffff".getBytes(cs))); + assertEquals("[104, -31, -120, -76, 105]", Arrays.toString("h\u1234i".getBytes(cs))); + + // Test supplementary characters. + // Minimum supplementary character: U+10000 + assertEquals("[-16, -112, -128, -128]", Arrays.toString("\ud800\udc00".getBytes(cs))); + // Random supplementary character: U+10381 Ugaritic letter beta + assertEquals("[-16, -112, -114, -127]", Arrays.toString("\ud800\udf81".getBytes(cs))); + // Maximum supplementary character: U+10FFFF + assertEquals("[-12, -113, -65, -65]", Arrays.toString("\udbff\udfff".getBytes(cs))); + // A high surrogate at end of string is an error replaced with '?'. + assertEquals("[104, 63]", Arrays.toString("h\ud800".getBytes(cs))); + // A high surrogate not followed by a low surrogate is an error replaced with '?'. + assertEquals("[104, 63, 105]", Arrays.toString("h\ud800i".getBytes(cs))); + } + + public void test_new_String_bad() throws Exception { + // Check that we use U+FFFD as the replacement string for invalid bytes. + assertEquals("a\ufffdb", new String(new byte[] { 97, -2, 98 }, "US-ASCII")); + assertEquals("a\ufffdb", new String(new byte[] { 97, -2, 98 }, Charset.forName("US-ASCII"))); + } } diff --git a/luni/src/test/java/java/text/SimpleDateFormatTest.java b/luni/src/test/java/java/text/SimpleDateFormatTest.java index 9d34024..e3a0cbe 100644 --- a/luni/src/test/java/java/text/SimpleDateFormatTest.java +++ b/luni/src/test/java/java/text/SimpleDateFormatTest.java @@ -16,7 +16,10 @@ package java.text; +import java.util.Calendar; import java.util.Date; +import java.util.Locale; +import java.util.TimeZone; public class SimpleDateFormatTest extends junit.framework.TestCase { // The RI fails this test. @@ -36,4 +39,48 @@ public class SimpleDateFormatTest extends junit.framework.TestCase { newDate.setTime(0); assertFalse(sdf.get2DigitYearStart().equals(newDate)); } + + // The RI fails this test because this is an ICU-compatible Android extension. + // Necessary for correct localization in various languages (http://b/2633414). + public void testStandAloneNames() throws Exception { + Locale en = Locale.ENGLISH; + Locale pl = new Locale("pl"); + Locale ru = new Locale("ru"); + + assertEquals("January", formatDate(en, "MMMM")); + assertEquals("January", formatDate(en, "LLLL")); + assertEquals("stycznia", formatDate(pl, "MMMM")); + assertEquals("stycze\u0144", formatDate(pl, "LLLL")); + + assertEquals("Thursday", formatDate(en, "EEEE")); + assertEquals("Thursday", formatDate(en, "cccc")); + assertEquals("\u0447\u0435\u0442\u0432\u0435\u0440\u0433", formatDate(ru, "EEEE")); + assertEquals("\u0427\u0435\u0442\u0432\u0435\u0440\u0433", formatDate(ru, "cccc")); + + assertEquals(Calendar.JUNE, parseDate(en, "yyyy-MMMM-dd", "1980-June-12").get(Calendar.MONTH)); + assertEquals(Calendar.JUNE, parseDate(en, "yyyy-LLLL-dd", "1980-June-12").get(Calendar.MONTH)); + assertEquals(Calendar.JUNE, parseDate(pl, "yyyy-MMMM-dd", "1980-czerwca-12").get(Calendar.MONTH)); + assertEquals(Calendar.JUNE, parseDate(pl, "yyyy-LLLL-dd", "1980-czerwiec-12").get(Calendar.MONTH)); + + assertEquals(Calendar.TUESDAY, parseDate(en, "EEEE", "Tuesday").get(Calendar.DAY_OF_WEEK)); + assertEquals(Calendar.TUESDAY, parseDate(en, "cccc", "Tuesday").get(Calendar.DAY_OF_WEEK)); + assertEquals(Calendar.TUESDAY, parseDate(ru, "EEEE", "\u0432\u0442\u043e\u0440\u043d\u0438\u043a").get(Calendar.DAY_OF_WEEK)); + assertEquals(Calendar.TUESDAY, parseDate(ru, "cccc", "\u0412\u0442\u043e\u0440\u043d\u0438\u043a").get(Calendar.DAY_OF_WEEK)); + } + + private String formatDate(Locale l, String fmt) { + return new SimpleDateFormat(fmt, l).format(new Date(0)); + } + + private Calendar parseDate(Locale l, String fmt, String value) { + SimpleDateFormat sdf = new SimpleDateFormat(fmt, l); + ParsePosition pp = new ParsePosition(0); + Date d = sdf.parse(value, pp); + if (d == null) { + fail(pp.toString()); + } + Calendar c = Calendar.getInstance(TimeZone.getTimeZone("UTC")); + c.setTime(d); + return c; + } } diff --git a/support/src/test/java/tests/http/MockResponse.java b/support/src/test/java/tests/http/MockResponse.java index 9893e2f..c9dc6ec 100644 --- a/support/src/test/java/tests/http/MockResponse.java +++ b/support/src/test/java/tests/http/MockResponse.java @@ -52,6 +52,11 @@ public class MockResponse { return this; } + public MockResponse setStatus(String status) { + this.status = status; + return this; + } + /** * Returns the HTTP headers, such as "Content-Length: 0". */ diff --git a/support/src/test/java/tests/http/MockWebServer.java b/support/src/test/java/tests/http/MockWebServer.java index e3df2e8..b981a51 100644 --- a/support/src/test/java/tests/http/MockWebServer.java +++ b/support/src/test/java/tests/http/MockWebServer.java @@ -34,6 +34,8 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.LinkedBlockingDeque; import java.util.concurrent.LinkedBlockingQueue; +import javax.net.ssl.SSLSocket; +import javax.net.ssl.SSLSocketFactory; /** * A scriptable web server. Callers supply canned responses and the server @@ -48,6 +50,8 @@ public final class MockWebServer { private final BlockingQueue<MockResponse> responseQueue = new LinkedBlockingDeque<MockResponse>(); private int bodyLimit = Integer.MAX_VALUE; + private SSLSocketFactory sslSocketFactory; + private boolean tunnelProxy; private final ExecutorService executor = Executors.newCachedThreadPool(); private int port = -1; @@ -76,8 +80,15 @@ public final class MockWebServer { this.bodyLimit = maxBodyLength; } - public void enqueue(MockResponse response) { - responseQueue.add(response); + /** + * Serve requests with HTTPS rather than otherwise. + * + * @param tunnelProxy whether to expect the HTTP CONNECT method before + * negotiating TLS. + */ + public void useHttps(SSLSocketFactory sslSocketFactory, boolean tunnelProxy) { + this.sslSocketFactory = sslSocketFactory; + this.tunnelProxy = tunnelProxy; } /** @@ -88,13 +99,19 @@ public final class MockWebServer { return requestQueue.take(); } + public void enqueue(MockResponse response) { + responseQueue.add(response); + } + /** * Starts the server, serves all enqueued requests, and shuts the server * down. */ public void play() throws IOException { - final ServerSocket ss = new ServerSocket(0); + final ServerSocket ss; + ss = new ServerSocket(0); ss.setReuseAddress(true); + port = ss.getLocalPort(); executor.submit(new Callable<Void>() { public Void call() throws Exception { @@ -113,31 +130,53 @@ public final class MockWebServer { }); } - private void serveConnection(final Socket s) { + private void serveConnection(final Socket raw) { executor.submit(new Callable<Void>() { - public Void call() throws Exception { - InputStream in = new BufferedInputStream(s.getInputStream()); - OutputStream out = new BufferedOutputStream(s.getOutputStream()); + int sequenceNumber = 0; - int sequenceNumber = 0; - while (true) { - RecordedRequest request = readRequest(in, sequenceNumber); - if (request == null) { - if (sequenceNumber == 0) { - throw new IllegalStateException("Connection without any request!"); - } else { - break; + public Void call() throws Exception { + Socket socket; + if (sslSocketFactory != null) { + if (tunnelProxy) { + if (!processOneRequest(raw.getInputStream(), raw.getOutputStream())) { + throw new IllegalStateException("Tunnel without any CONNECT!"); } } - requestQueue.add(request); - writeResponse(out, computeResponse(request)); - sequenceNumber++; + socket = sslSocketFactory.createSocket( + raw, raw.getInetAddress().getHostAddress(), raw.getPort(), true); + ((SSLSocket) socket).setUseClientMode(false); + } else { + socket = raw; } + InputStream in = new BufferedInputStream(socket.getInputStream()); + OutputStream out = new BufferedOutputStream(socket.getOutputStream()); + + if (!processOneRequest(in, out)) { + throw new IllegalStateException("Connection without any request!"); + } + while (processOneRequest(in, out)) {} + in.close(); out.close(); return null; } + + /** + * Reads a request and writes its response. Returns true if a request + * was processed. + */ + private boolean processOneRequest(InputStream in, OutputStream out) + throws IOException, InterruptedException { + RecordedRequest request = readRequest(in, sequenceNumber); + if (request == null) { + return false; + } + requestQueue.add(request); + writeResponse(out, computeResponse(request)); + sequenceNumber++; + return true; + } }); } @@ -186,7 +225,7 @@ public final class MockWebServer { } } - if (request.startsWith("GET ")) { + if (request.startsWith("GET ") || request.startsWith("CONNECT ")) { if (hasBody) { throw new IllegalArgumentException("GET requests should not have a body!"); } |