diff options
-rw-r--r-- | libart/src/main/java/java/lang/AbstractStringBuilder.java (renamed from luni/src/main/java/java/lang/AbstractStringBuilder.java) | 19 | ||||
-rw-r--r-- | libart/src/main/java/java/lang/CaseMapper.java (renamed from luni/src/main/java/java/lang/CaseMapper.java) | 76 | ||||
-rw-r--r-- | libart/src/main/java/java/lang/String.java | 506 | ||||
-rw-r--r-- | libart/src/main/java/java/lang/StringFactory.java | 251 | ||||
-rw-r--r-- | luni/src/main/java/libcore/util/CharsetUtils.java | 26 | ||||
-rw-r--r-- | luni/src/main/native/Register.cpp | 1 | ||||
-rw-r--r-- | luni/src/main/native/libcore_util_CharsetUtils.cpp | 250 | ||||
-rw-r--r-- | luni/src/main/native/sub.mk | 1 | ||||
-rw-r--r-- | luni/src/test/java/libcore/java/lang/StringTest.java | 41 |
9 files changed, 435 insertions, 736 deletions
diff --git a/luni/src/main/java/java/lang/AbstractStringBuilder.java b/libart/src/main/java/java/lang/AbstractStringBuilder.java index 4d84078..c8c8c5a 100644 --- a/luni/src/main/java/java/lang/AbstractStringBuilder.java +++ b/libart/src/main/java/java/lang/AbstractStringBuilder.java @@ -87,7 +87,7 @@ abstract class AbstractStringBuilder { count = string.length(); shared = false; value = new char[count + INITIAL_CAPACITY]; - string._getChars(0, count, value, 0); + string.getCharsNoCheck(0, count, value, 0); } private void enlargeBuffer(int min) { @@ -145,7 +145,7 @@ abstract class AbstractStringBuilder { if (newCount > value.length) { enlargeBuffer(newCount); } - string._getChars(0, length, value, count); + string.getCharsNoCheck(0, length, value, count); count = newCount; } @@ -167,7 +167,7 @@ abstract class AbstractStringBuilder { } if (s instanceof String) { - ((String) s)._getChars(start, end, value, count); + ((String) s).getCharsNoCheck(start, end, value, count); } else if (s instanceof AbstractStringBuilder) { AbstractStringBuilder other = (AbstractStringBuilder) s; System.arraycopy(other.value, start, value, count, length); @@ -345,7 +345,7 @@ abstract class AbstractStringBuilder { int min = string.length(); if (min != 0) { move(min, index); - string._getChars(0, min, value, index); + string.getCharsNoCheck(0, min, value, index); count += min; } } else { @@ -422,7 +422,7 @@ abstract class AbstractStringBuilder { value = value.clone(); shared = false; } - string._getChars(0, stringLength, value, start); + string.getCharsNoCheck(0, stringLength, value, start); count -= diff; return; } @@ -626,14 +626,7 @@ abstract class AbstractStringBuilder { if (count == 0) { return ""; } - // Optimize String sharing for more performance - int wasted = value.length - count; - if (wasted >= 256 - || (wasted >= INITIAL_CAPACITY && wasted >= (count >> 1))) { - return new String(value, 0, count); - } - shared = true; - return new String(0, count, value); + return StringFactory.newStringFromChars(0, count, value); } /** diff --git a/luni/src/main/java/java/lang/CaseMapper.java b/libart/src/main/java/java/lang/CaseMapper.java index 1da621c..f23a4ef 100644 --- a/luni/src/main/java/java/lang/CaseMapper.java +++ b/libart/src/main/java/java/lang/CaseMapper.java @@ -34,17 +34,15 @@ class CaseMapper { /** * Our current GC makes short-lived objects more expensive than we'd like. When that's fixed, * this class should be changed so that you instantiate it with the String and its value, - * offset, and count fields. + * and count fields. */ private CaseMapper() { } /** - * Implements String.toLowerCase. We need 's' so that we can return the original String instance - * if nothing changes. We need 'value', 'offset', and 'count' because they're not otherwise - * accessible. + * Implements String.toLowerCase. The original String instance is returned if nothing changes. */ - public static String toLowerCase(Locale locale, String s, char[] value, int offset, int count) { + public static String toLowerCase(Locale locale, String s) { // Punt hard cases to ICU4C. // Note that Greek isn't a particularly hard case for toLowerCase, only toUpperCase. String languageCode = locale.getLanguage(); @@ -52,29 +50,26 @@ class CaseMapper { return ICU.toLowerCase(s, locale); } - char[] newValue = null; - int newCount = 0; - for (int i = offset, end = offset + count; i < end; ++i) { - char ch = value[i]; + String newString = null; + for (int i = 0, end = s.length(); i < end; ++i) { + char ch = s.charAt(i); char newCh; if (ch == LATIN_CAPITAL_I_WITH_DOT || Character.isHighSurrogate(ch)) { // Punt these hard cases. return ICU.toLowerCase(s, locale); - } else if (ch == GREEK_CAPITAL_SIGMA && isFinalSigma(value, offset, count, i)) { + } else if (ch == GREEK_CAPITAL_SIGMA && isFinalSigma(s, i)) { newCh = GREEK_SMALL_FINAL_SIGMA; } else { newCh = Character.toLowerCase(ch); } - if (newValue == null && ch != newCh) { - newValue = new char[count]; // The result can't be longer than the input. - newCount = i - offset; - System.arraycopy(value, offset, newValue, 0, newCount); - } - if (newValue != null) { - newValue[newCount++] = newCh; + if (ch != newCh) { + if (newString == null) { + newString = StringFactory.newStringFromString(s); + } + newString.setCharAt(i, newCh); } } - return newValue != null ? new String(0, newCount, newValue) : s; + return newString != null ? newString : s; } /** @@ -82,20 +77,20 @@ class CaseMapper { * sequence, and 'index' is not followed by a sequence consisting of an ignorable sequence and * then a cased letter. */ - private static boolean isFinalSigma(char[] value, int offset, int count, int index) { + private static boolean isFinalSigma(String s, int index) { // TODO: we don't skip case-ignorable sequences like we should. // TODO: we should add a more direct way to test for a cased letter. - if (index <= offset) { + if (index <= 0) { return false; } - char previous = value[index - 1]; + char previous = s.charAt(index - 1); if (!(Character.isLowerCase(previous) || Character.isUpperCase(previous) || Character.isTitleCase(previous))) { return false; } - if (index + 1 >= offset + count) { + if (index + 1 >= s.length()) { return true; } - char next = value[index + 1]; + char next = s.charAt(index + 1); if (Character.isLowerCase(next) || Character.isUpperCase(next) || Character.isTitleCase(next)) { return false; } @@ -147,7 +142,7 @@ class CaseMapper { } }; - public static String toUpperCase(Locale locale, String s, char[] value, int offset, int count) { + public static String toUpperCase(Locale locale, String s, int count) { String languageCode = locale.getLanguage(); if (languageCode.equals("tr") || languageCode.equals("az") || languageCode.equals("lt")) { return ICU.toUpperCase(s, locale); @@ -157,9 +152,10 @@ class CaseMapper { } char[] output = null; + String newString = null; int i = 0; - for (int o = offset, end = offset + count; o < end; o++) { - char ch = value[o]; + for (int o = 0, end = count; o < end; o++) { + char ch = s.charAt(o); if (Character.isHighSurrogate(ch)) { return ICU.toUpperCase(s, locale); } @@ -171,23 +167,25 @@ class CaseMapper { output = newoutput; } char upch = Character.toUpperCase(ch); - if (ch != upch) { - if (output == null) { - output = new char[count]; - i = o - offset; - System.arraycopy(value, offset, output, 0, i); - } + if (output != null) { output[i++] = upch; - } else if (output != null) { - output[i++] = ch; + } else if (ch != upch) { + if (newString == null) { + newString = StringFactory.newStringFromString(s); + } + newString.setCharAt(o, upch); } } else { int target = index * 3; char val3 = upperValues[target + 2]; if (output == null) { output = new char[count + (count / 6) + 2]; - i = o - offset; - System.arraycopy(value, offset, output, 0, i); + i = o; + if (newString != null) { + System.arraycopy(newString.toCharArray(), 0, output, 0, i); + } else { + System.arraycopy(s.toCharArray(), 0, output, 0, i); + } } else if (i + (val3 == 0 ? 1 : 2) >= output.length) { char[] newoutput = new char[output.length + (count / 6) + 3]; System.arraycopy(output, 0, newoutput, 0, output.length); @@ -204,7 +202,11 @@ class CaseMapper { } } if (output == null) { - return s; + if (newString != null) { + return newString; + } else { + return s; + } } return output.length == i || output.length - i < 8 ? new String(0, i, output) : new String(output, 0, i); } diff --git a/libart/src/main/java/java/lang/String.java b/libart/src/main/java/java/lang/String.java index a5bf34c..0875d1a 100644 --- a/libart/src/main/java/java/lang/String.java +++ b/libart/src/main/java/java/lang/String.java @@ -35,23 +35,6 @@ import libcore.util.EmptyArray; * See {@link Character} for details about the relationship between {@code char} and * Unicode code points. * - * <a name="backing_array"><h3>Backing Arrays</h3></a> - * This class is implemented using a {@code char[]}. The length of the array may exceed - * the length of the string. For example, the string "Hello" may be backed by - * the array {@code ['H', 'e', 'l', 'l', 'o', 'W'. 'o', 'r', 'l', 'd']} with - * offset 0 and length 5. - * - * <p>Multiple strings can share the same {@code char[]} because strings are immutable. - * The {@link #substring} method <strong>always</strong> returns a string that - * shares the backing array of its source string. Generally this is an - * optimization: fewer {@code char[]}s need to be allocated, and less copying - * is necessary. But this can also lead to unwanted heap retention. Taking a - * short substring of long string means that the long shared {@code char[]} won't be - * garbage until both strings are garbage. This typically happens when parsing - * small substrings out of a large input. To avoid this where necessary, call - * {@code new String(longString.subString(...))}. The string copy constructor - * always ensures that the backing array is no larger than necessary. - * * @see StringBuffer * @see StringBuilder * @see Charset @@ -93,10 +76,6 @@ public final class String implements Serializable, Comparable<String>, CharSeque } } - private final char[] value; - - private final int offset; - private final int count; private int hashCode; @@ -105,9 +84,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque * Creates an empty string. */ public String() { - value = EmptyArray.CHAR; - offset = 0; - count = 0; + throw new UnsupportedOperationException("Use StringFactory instead."); } /** @@ -116,7 +93,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque */ @FindBugsSuppressWarnings("DM_DEFAULT_ENCODING") public String(byte[] data) { - this(data, 0, data.length); + throw new UnsupportedOperationException("Use StringFactory instead."); } /** @@ -133,7 +110,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque */ @Deprecated public String(byte[] data, int high) { - this(data, high, 0, data.length); + throw new UnsupportedOperationException("Use StringFactory instead."); } /** @@ -146,7 +123,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque * if {@code byteCount < 0 || offset < 0 || offset + byteCount > data.length}. */ public String(byte[] data, int offset, int byteCount) { - this(data, offset, byteCount, Charset.defaultCharset()); + throw new UnsupportedOperationException("Use StringFactory instead."); } /** @@ -162,16 +139,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque */ @Deprecated public String(byte[] data, int high, int offset, int byteCount) { - if ((offset | byteCount) < 0 || byteCount > data.length - offset) { - throw failedBoundsCheck(data.length, offset, byteCount); - } - this.offset = 0; - this.value = new char[byteCount]; - this.count = byteCount; - high <<= 8; - for (int i = 0; i < count; i++) { - value[i] = (char) (high + (data[offset++] & 0xff)); - } + throw new UnsupportedOperationException("Use StringFactory instead."); } /** @@ -188,7 +156,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque * if the named charset is not supported. */ public String(byte[] data, int offset, int byteCount, String charsetName) throws UnsupportedEncodingException { - this(data, offset, byteCount, Charset.forNameUEE(charsetName)); + throw new UnsupportedOperationException("Use StringFactory instead."); } /** @@ -203,7 +171,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque * if {@code charsetName} is not supported. */ public String(byte[] data, String charsetName) throws UnsupportedEncodingException { - this(data, 0, data.length, Charset.forNameUEE(charsetName)); + throw new UnsupportedOperationException("Use StringFactory instead."); } /** @@ -221,144 +189,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque * @since 1.6 */ public String(byte[] data, int offset, int byteCount, Charset charset) { - if ((offset | byteCount) < 0 || byteCount > data.length - offset) { - throw failedBoundsCheck(data.length, offset, byteCount); - } - - // We inline UTF-8, ISO-8859-1, and US-ASCII decoders for speed and because 'count' and - // 'value' are final. - String canonicalCharsetName = charset.name(); - if (canonicalCharsetName.equals("UTF-8")) { - byte[] d = data; - char[] v = new char[byteCount]; - - int idx = offset; - int last = offset + byteCount; - int s = 0; -outer: - while (idx < last) { - byte b0 = d[idx++]; - if ((b0 & 0x80) == 0) { - // 0xxxxxxx - // Range: U-00000000 - U-0000007F - int val = b0 & 0xff; - v[s++] = (char) val; - } else if (((b0 & 0xe0) == 0xc0) || ((b0 & 0xf0) == 0xe0) || - ((b0 & 0xf8) == 0xf0) || ((b0 & 0xfc) == 0xf8) || ((b0 & 0xfe) == 0xfc)) { - int utfCount = 1; - if ((b0 & 0xf0) == 0xe0) utfCount = 2; - else if ((b0 & 0xf8) == 0xf0) utfCount = 3; - else if ((b0 & 0xfc) == 0xf8) utfCount = 4; - else if ((b0 & 0xfe) == 0xfc) utfCount = 5; - - // 110xxxxx (10xxxxxx)+ - // Range: U-00000080 - U-000007FF (count == 1) - // Range: U-00000800 - U-0000FFFF (count == 2) - // Range: U-00010000 - U-001FFFFF (count == 3) - // Range: U-00200000 - U-03FFFFFF (count == 4) - // Range: U-04000000 - U-7FFFFFFF (count == 5) - - if (idx + utfCount > last) { - v[s++] = REPLACEMENT_CHAR; - continue; - } - - // Extract usable bits from b0 - int val = b0 & (0x1f >> (utfCount - 1)); - for (int i = 0; i < utfCount; ++i) { - byte b = d[idx++]; - if ((b & 0xc0) != 0x80) { - v[s++] = REPLACEMENT_CHAR; - idx--; // Put the input char back - continue outer; - } - // Push new bits in from the right side - val <<= 6; - val |= b & 0x3f; - } - - // Note: Java allows overlong char - // specifications To disallow, check that val - // is greater than or equal to the minimum - // value for each count: - // - // count min value - // ----- ---------- - // 1 0x80 - // 2 0x800 - // 3 0x10000 - // 4 0x200000 - // 5 0x4000000 - - // Allow surrogate values (0xD800 - 0xDFFF) to - // be specified using 3-byte UTF values only - if ((utfCount != 2) && (val >= 0xD800) && (val <= 0xDFFF)) { - v[s++] = REPLACEMENT_CHAR; - continue; - } - - // Reject chars greater than the Unicode maximum of U+10FFFF. - if (val > 0x10FFFF) { - v[s++] = REPLACEMENT_CHAR; - continue; - } - - // Encode chars from U+10000 up as surrogate pairs - if (val < 0x10000) { - v[s++] = (char) val; - } else { - int x = val & 0xffff; - int u = (val >> 16) & 0x1f; - int w = (u - 1) & 0xffff; - int hi = 0xd800 | (w << 6) | (x >> 10); - int lo = 0xdc00 | (x & 0x3ff); - v[s++] = (char) hi; - v[s++] = (char) lo; - } - } else { - // Illegal values 0x8*, 0x9*, 0xa*, 0xb*, 0xfd-0xff - v[s++] = REPLACEMENT_CHAR; - } - } - - if (s == byteCount) { - // We guessed right, so we can use our temporary array as-is. - this.offset = 0; - this.value = v; - this.count = s; - } else { - // Our temporary array was too big, so reallocate and copy. - this.offset = 0; - this.value = new char[s]; - this.count = s; - System.arraycopy(v, 0, value, 0, s); - } - } else if (canonicalCharsetName.equals("ISO-8859-1")) { - this.offset = 0; - this.value = new char[byteCount]; - this.count = byteCount; - CharsetUtils.isoLatin1BytesToChars(data, offset, byteCount, value); - } else if (canonicalCharsetName.equals("US-ASCII")) { - this.offset = 0; - this.value = new char[byteCount]; - this.count = byteCount; - CharsetUtils.asciiBytesToChars(data, offset, byteCount, value); - } else { - CharBuffer cb = charset.decode(ByteBuffer.wrap(data, offset, byteCount)); - this.offset = 0; - this.count = cb.length(); - if (count > 0) { - // We could use cb.array() directly, but that would mean we'd have to trust - // the CharsetDecoder doesn't hang on to the CharBuffer and mutate it later, - // which would break String's immutability guarantee. It would also tend to - // mean that we'd be wasting memory because CharsetDecoder doesn't trim the - // array. So we copy. - this.value = new char[count]; - System.arraycopy(cb.array(), 0, value, 0, count); - } else { - this.value = EmptyArray.CHAR; - } - } + throw new UnsupportedOperationException("Use StringFactory instead."); } /** @@ -368,7 +199,7 @@ outer: * @since 1.6 */ public String(byte[] data, Charset charset) { - this(data, 0, data.length, charset); + throw new UnsupportedOperationException("Use StringFactory instead."); } /** @@ -379,7 +210,7 @@ outer: * @throws NullPointerException if {@code data == null} */ public String(char[] data) { - this(data, 0, data.length); + throw new UnsupportedOperationException("Use StringFactory instead."); } /** @@ -393,36 +224,25 @@ outer: * if {@code charCount < 0 || offset < 0 || offset + charCount > data.length} */ public String(char[] data, int offset, int charCount) { - if ((offset | charCount) < 0 || charCount > data.length - offset) { - throw failedBoundsCheck(data.length, offset, charCount); - } - this.offset = 0; - this.value = new char[charCount]; - this.count = charCount; - System.arraycopy(data, offset, value, 0, count); + throw new UnsupportedOperationException("Use StringFactory instead."); } /* * Internal version of the String(char[], int, int) constructor. - * Does not range check, null check, or copy the array. + * Does not range check or null check. */ + // TODO: Replace calls to this with calls to StringFactory, will require + // splitting other files in java.lang. String(int offset, int charCount, char[] chars) { - this.value = chars; - this.offset = offset; - this.count = charCount; + throw new UnsupportedOperationException("Use StringFactory instead."); } /** - * Constructs a copy of the given string. - * The returned string's <a href="#backing_array">backing array</a> - * is no larger than necessary. + * Constructs a new string with the same sequence of characters as {@code + * toCopy}. */ public String(String toCopy) { - value = (toCopy.value.length == toCopy.count) - ? toCopy.value - : Arrays.copyOfRange(toCopy.value, toCopy.offset, toCopy.offset + toCopy.length()); - offset = 0; - count = value.length; + throw new UnsupportedOperationException("Use StringFactory instead."); } /** @@ -430,11 +250,7 @@ outer: * {@code StringBuffer}. */ public String(StringBuffer stringBuffer) { - offset = 0; - synchronized (stringBuffer) { - value = stringBuffer.shareValue(); - count = stringBuffer.length(); - } + throw new UnsupportedOperationException("Use StringFactory instead."); } /** @@ -451,20 +267,7 @@ outer: * @since 1.5 */ public String(int[] codePoints, int offset, int count) { - if (codePoints == null) { - throw new NullPointerException("codePoints == null"); - } - if ((offset | count) < 0 || count > codePoints.length - offset) { - throw failedBoundsCheck(codePoints.length, offset, count); - } - this.offset = 0; - this.value = new char[count * 2]; - int end = offset + count; - int c = 0; - for (int i = offset; i < end; i++) { - c += Character.toChars(codePoints[i], this.value, c); - } - this.count = c; + throw new UnsupportedOperationException("Use StringFactory instead."); } /** @@ -476,25 +279,16 @@ outer: * @since 1.5 */ public String(StringBuilder stringBuilder) { - if (stringBuilder == null) { - throw new NullPointerException("stringBuilder == null"); - } - this.offset = 0; - this.count = stringBuilder.length(); - this.value = new char[this.count]; - stringBuilder.getChars(0, this.count, this.value, 0); + throw new UnsupportedOperationException("Use StringFactory instead."); } /** * Returns the {@code char} at {@code index}. * @throws IndexOutOfBoundsException if {@code index < 0} or {@code index >= length()}. */ - public char charAt(int index) { - if (index < 0 || index >= count) { - throw indexAndLength(index); - } - return value[offset + index]; - } + public native char charAt(int index); + + native void setCharAt(int index, char c); private StringIndexOutOfBoundsException indexAndLength(int index) { throw new StringIndexOutOfBoundsException(this, index); @@ -557,12 +351,11 @@ outer: * if {@code string} is {@code null}. */ public int compareToIgnoreCase(String string) { - int o1 = offset, o2 = string.offset, result; - int end = offset + (count < string.count ? count : string.count); + int result; + int end = count < string.count ? count : string.count; char c1, c2; - char[] target = string.value; - while (o1 < end) { - if ((c1 = value[o1++]) == (c2 = target[o2++])) { + for (int i = 0; i < end; ++i) { + if ((c1 = charAt(i)) == (c2 = string.charAt(i))) { continue; } c1 = foldCase(c1); @@ -582,15 +375,7 @@ outer: * @return a new string which is the concatenation of this string and the * specified string. */ - public String concat(String string) { - if (string.count > 0 && count > 0) { - char[] buffer = new char[count + string.count]; - System.arraycopy(value, offset, buffer, 0, count); - System.arraycopy(string.value, string.offset, buffer, count, string.count); - return new String(0, buffer.length, buffer); - } - return count == 0 ? string : this; - } + public native String concat(String string); /** * Creates a new string by copying the given {@code char[]}. @@ -601,7 +386,7 @@ outer: * if {@code data} is {@code null}. */ public static String copyValueOf(char[] data) { - return new String(data, 0, data.length); + return StringFactory.newStringFromChars(data, 0, data.length); } /** @@ -616,7 +401,7 @@ outer: * data.length}. */ public static String copyValueOf(char[] data, int start, int length) { - return new String(data, start, length); + return StringFactory.newStringFromChars(data, start, length); } /** @@ -654,16 +439,10 @@ outer: if (hashCode() != s.hashCode()) { return false; } - char[] value1 = value; - int offset1 = offset; - char[] value2 = s.value; - int offset2 = s.offset; - for (int end = offset1 + count; offset1 < end; ) { - if (value1[offset1] != value2[offset2]) { + for (int i = 0; i < count; ++i) { + if (charAt(i) != s.charAt(i)) { return false; } - offset1++; - offset2++; } return true; } else { @@ -686,12 +465,9 @@ outer: if (string == null || count != string.count) { return false; } - int o1 = offset, o2 = string.offset; - int end = offset + count; - char[] target = string.value; - while (o1 < end) { - char c1 = value[o1++]; - char c2 = target[o2++]; + for (int i = 0; i < count; ++i) { + char c1 = charAt(i); + char c2 = string.charAt(i); if (c1 != c2 && foldCase(c1) != foldCase(c2)) { return false; } @@ -721,10 +497,9 @@ outer: @Deprecated public void getBytes(int start, int end, byte[] data, int index) { if (start >= 0 && start <= end && end <= count) { - end += offset; try { - for (int i = offset + start; i < end; i++) { - data[index++] = (byte) value[i]; + for (int i = start; i < end; ++i) { + data[index++] = (byte) charAt(i); } } catch (ArrayIndexOutOfBoundsException ignored) { throw failedBoundsCheck(data.length, index, end - start); @@ -772,16 +547,15 @@ outer: public byte[] getBytes(Charset charset) { String canonicalCharsetName = charset.name(); if (canonicalCharsetName.equals("UTF-8")) { - return CharsetUtils.toUtf8Bytes(value, offset, count); + return CharsetUtils.toUtf8Bytes(this, 0, count); } else if (canonicalCharsetName.equals("ISO-8859-1")) { - return CharsetUtils.toIsoLatin1Bytes(value, offset, count); + return CharsetUtils.toIsoLatin1Bytes(this, 0, count); } else if (canonicalCharsetName.equals("US-ASCII")) { - return CharsetUtils.toAsciiBytes(value, offset, count); + return CharsetUtils.toAsciiBytes(this, 0, count); } else if (canonicalCharsetName.equals("UTF-16BE")) { - return CharsetUtils.toBigEndianUtf16Bytes(value, offset, count); + return CharsetUtils.toBigEndianUtf16Bytes(this, 0, count); } else { - CharBuffer chars = CharBuffer.wrap(this.value, this.offset, this.count); - ByteBuffer buffer = charset.encode(chars.asReadOnlyBuffer()); + ByteBuffer buffer = charset.encode(this); byte[] bytes = new byte[buffer.limit()]; buffer.get(bytes); return bytes; @@ -809,7 +583,16 @@ outer: */ public void getChars(int start, int end, char[] buffer, int index) { if (start >= 0 && start <= end && end <= count) { - System.arraycopy(value, start + offset, buffer, index, end - start); + if (buffer == null) { + throw new NullPointerException("buffer == null"); + } + if (index < 0) { + throw new IndexOutOfBoundsException("index < 0"); + } + if (end - start > buffer.length - index) { + throw new ArrayIndexOutOfBoundsException("end - start > buffer.length - index"); + } + getCharsNoCheck(start, end, buffer, index); } else { // We throw StringIndexOutOfBoundsException rather than System.arraycopy's AIOOBE. throw startEndAndLength(start, end); @@ -821,9 +604,7 @@ outer: * within the java.lang package only. The caller is responsible for * ensuring that start >= 0 && start <= end && end <= count. */ - void _getChars(int start, int end, char[] buffer, int index) { - System.arraycopy(value, start + offset, buffer, index, end - start); - } + native void getCharsNoCheck(int start, int end, char[] buffer, int index); @Override public int hashCode() { int hash = hashCode; @@ -831,10 +612,8 @@ outer: if (count == 0) { return 0; } - final int end = count + offset; - final char[] chars = value; - for (int i = offset; i < end; ++i) { - hash = 31*hash + chars[i]; + for (int i = 0; i < count; ++i) { + hash = 31 * hash + charAt(i); } hashCode = hash; } @@ -893,21 +672,17 @@ outer: if (subCount > _count) { return -1; } - char[] target = string.value; - int subOffset = string.offset; - char firstChar = target[subOffset]; - int end = subOffset + subCount; + char firstChar = string.charAt(0); while (true) { int i = indexOf(firstChar, start); if (i == -1 || subCount + i > _count) { return -1; // handles subCount > count || start >= count } - int o1 = offset + i, o2 = subOffset; - char[] _value = value; - while (++o2 < end && _value[++o1] == target[o2]) { + int o1 = i, o2 = 0; + while (++o2 < subCount && charAt(++o1) == string.charAt(o2)) { // Intentionally empty } - if (o2 == end) { + if (o2 == subCount) { return i; } start = i + 1; @@ -934,21 +709,17 @@ outer: if (subCount + start > _count) { return -1; } - char[] target = subString.value; - int subOffset = subString.offset; - char firstChar = target[subOffset]; - int end = subOffset + subCount; + char firstChar = subString.charAt(0); while (true) { int i = indexOf(firstChar, start); if (i == -1 || subCount + i > _count) { return -1; // handles subCount > count || start >= count } - int o1 = offset + i, o2 = subOffset; - char[] _value = value; - while (++o2 < end && _value[++o1] == target[o2]) { + int o1 = i, o2 = 0; + while (++o2 < subCount && charAt(++o1) == subString.charAt(o2)) { // Intentionally empty } - if (o2 == end) { + if (o2 == subCount) { return i; } start = i + 1; @@ -991,11 +762,9 @@ outer: return lastIndexOfSupplementary(c, Integer.MAX_VALUE); } int _count = count; - int _offset = offset; - char[] _value = value; - for (int i = _offset + _count - 1; i >= _offset; --i) { - if (_value[i] == c) { - return i - _offset; + for (int i = _count - 1; i >= 0; --i) { + if (charAt(i) == c) { + return i; } } return -1; @@ -1011,15 +780,13 @@ outer: return lastIndexOfSupplementary(c, start); } int _count = count; - int _offset = offset; - char[] _value = value; if (start >= 0) { if (start >= _count) { start = _count - 1; } - for (int i = _offset + start; i >= _offset; --i) { - if (_value[i] == c) { - return i - _offset; + for (int i = start; i >= 0; --i) { + if (charAt(i) == c) { + return i; } } } @@ -1031,7 +798,7 @@ outer: return -1; } char[] chars = Character.toChars(c); - String needle = new String(0, chars.length, chars); + String needle = StringFactory.newStringFromChars(0, chars.length, chars); return lastIndexOf(needle, start); } @@ -1065,20 +832,17 @@ outer: start = count - subCount; } // count and subCount are both >= 1 - char[] target = subString.value; - int subOffset = subString.offset; - char firstChar = target[subOffset]; - int end = subOffset + subCount; + char firstChar = subString.charAt(0); while (true) { int i = lastIndexOf(firstChar, start); if (i == -1) { return -1; } - int o1 = offset + i, o2 = subOffset; - while (++o2 < end && value[++o1] == target[o2]) { + int o1 = i, o2 = 0; + while (++o2 < subCount && charAt(++o1) == subString.charAt(o2)) { // Intentionally empty } - if (o2 == end) { + if (o2 == subCount) { return i; } start = i - 1; @@ -1121,11 +885,8 @@ outer: if (length <= 0) { return true; } - int o1 = offset + thisStart, o2 = string.offset + start; - char[] value1 = value; - char[] value2 = string.value; for (int i = 0; i < length; ++i) { - if (value1[o1 + i] != value2[o2 + i]) { + if (charAt(thisStart + i) != string.charAt(start + i)) { return false; } } @@ -1164,13 +925,10 @@ outer: if (start < 0 || length > string.count - start) { return false; } - thisStart += offset; - start += string.offset; int end = thisStart + length; - char[] target = string.value; while (thisStart < end) { - char c1 = value[thisStart++]; - char c2 = target[start++]; + char c1 = charAt(thisStart++); + char c2 = string.charAt(start++); if (c1 != c2 && foldCase(c1) != foldCase(c2)) { return false; } @@ -1182,29 +940,20 @@ outer: * Returns a copy of this string after replacing occurrences of the given {@code char} with another. */ public String replace(char oldChar, char newChar) { - char[] buffer = value; - int _offset = offset; + String s = null; int _count = count; - - int idx = _offset; - int last = _offset + _count; boolean copied = false; - while (idx < last) { - if (buffer[idx] == oldChar) { + for (int i = 0; i < _count; ++i) { + if (charAt(i) == oldChar) { if (!copied) { - char[] newBuffer = new char[_count]; - System.arraycopy(buffer, _offset, newBuffer, 0, _count); - buffer = newBuffer; - idx -= _offset; - last -= _offset; + s = StringFactory.newStringFromString(this); copied = true; } - buffer[idx] = newChar; + s.setCharAt(i, newChar); } - idx++; } - return copied ? new String(0, count, buffer) : this; + return copied ? s : this; } /** @@ -1241,9 +990,8 @@ outer: int resultLength = count + (count + 1) * replacementString.length(); StringBuilder result = new StringBuilder(resultLength); result.append(replacementString); - int end = offset + count; - for (int i = offset; i != end; ++i) { - result.append(value[i]); + for (int i = 0; i != count; ++i) { + result.append(charAt(i)); result.append(replacementString); } return result.toString(); @@ -1252,15 +1000,21 @@ outer: StringBuilder result = new StringBuilder(count); int searchStart = 0; do { - // Copy chars before the match... - result.append(value, offset + searchStart, matchStart - searchStart); + // Copy characters before the match... + // TODO: Perform this faster than one char at a time? + for (int i = searchStart; i < matchStart; ++i) { + result.append(charAt(i)); + } // Insert the replacement... result.append(replacementString); // And skip over the match... searchStart = matchStart + targetLength; } while ((matchStart = indexOf(targetString, searchStart)) != -1); // Copy any trailing chars... - result.append(value, offset + searchStart, count - searchStart); + // TODO: Perform this faster than one char at a time? + for (int i = searchStart; i < count; ++i) { + result.append(charAt(i)); + } return result.toString(); } @@ -1308,7 +1062,7 @@ outer: return this; } if (start >= 0 && start <= count) { - return new String(offset + start, count - start, value); + return fastSubstring(start, count - start); } throw indexAndLength(start); } @@ -1328,21 +1082,19 @@ outer: } // Fast range check. if (start >= 0 && start <= end && end <= count) { - return new String(offset + start, end - start, value); + return fastSubstring(start, end - start); } throw startEndAndLength(start, end); } + private native String fastSubstring(int start, int length); + /** * Returns a new {@code char} array containing a copy of the {@code char}s in this string. * This is expensive and rarely useful. If you just want to iterate over the {@code char}s in * the string, use {@link #charAt} instead. */ - public char[] toCharArray() { - char[] buffer = new char[count]; - System.arraycopy(value, offset, buffer, 0, count); - return buffer; - } + public native char[] toCharArray(); /** * Converts this string to lower case, using the rules of the user's default locale. @@ -1351,7 +1103,7 @@ outer: * @return a new lower case string, or {@code this} if it's already all lower case. */ public String toLowerCase() { - return CaseMapper.toLowerCase(Locale.getDefault(), this, value, offset, count); + return CaseMapper.toLowerCase(Locale.getDefault(), this); } /** @@ -1368,7 +1120,7 @@ outer: * @return a new lower case string, or {@code this} if it's already all lower case. */ public String toLowerCase(Locale locale) { - return CaseMapper.toLowerCase(locale, this, value, offset, count); + return CaseMapper.toLowerCase(locale, this); } /** @@ -1386,7 +1138,7 @@ outer: * @return a new upper case string, or {@code this} if it's already all upper case. */ public String toUpperCase() { - return CaseMapper.toUpperCase(Locale.getDefault(), this, value, offset, count); + return CaseMapper.toUpperCase(Locale.getDefault(), this, count); } /** @@ -1403,7 +1155,7 @@ outer: * @return a new upper case string, or {@code this} if it's already all upper case. */ public String toUpperCase(Locale locale) { - return CaseMapper.toUpperCase(locale, this, value, offset, count); + return CaseMapper.toUpperCase(locale, this, count); } /** @@ -1411,18 +1163,18 @@ outer: * the beginning or end. */ public String trim() { - int start = offset, last = offset + count - 1; + int start = 0, last = count - 1; int end = last; - while ((start <= end) && (value[start] <= ' ')) { + while ((start <= end) && (charAt(start) <= ' ')) { start++; } - while ((end >= start) && (value[end] <= ' ')) { + while ((end >= start) && (charAt(end) <= ' ')) { end--; } - if (start == offset && end == last) { + if (start == 0 && end == last) { return this; } - return new String(start, end - start + 1, value); + return fastSubstring(start, end - start + 1); } /** @@ -1434,7 +1186,7 @@ outer: * if {@code data} is {@code null}. */ public static String valueOf(char[] data) { - return new String(data, 0, data.length); + return StringFactory.newStringFromChars(data, 0, data.length); } /** @@ -1448,7 +1200,7 @@ outer: * if {@code data} is {@code null}. */ public static String valueOf(char[] data, int start, int length) { - return new String(data, start, length); + return StringFactory.newStringFromChars(data, start, length); } /** @@ -1457,9 +1209,9 @@ outer: public static String valueOf(char value) { String s; if (value < 128) { - s = new String(value, 1, ASCII); + s = StringFactory.newStringFromChars(value, 1, ASCII); } else { - s = new String(0, 1, new char[] { value }); + s = StringFactory.newStringFromChars(0, 1, new char[] { value }); } s.hashCode = value; return s; @@ -1533,7 +1285,8 @@ outer: if (count != size) { return false; } - return regionMatches(0, new String(0, size, sb.getValue()), 0, size); + String s = StringFactory.newStringFromChars(0, size, sb.getValue()); + return regionMatches(0, s, 0, size); } } @@ -1682,7 +1435,7 @@ outer: if (index < 0 || index >= count) { throw indexAndLength(index); } - return Character.codePointAt(value, offset + index, offset + count); + return Character.codePointAt(this, index); } /** @@ -1696,7 +1449,7 @@ outer: if (index < 1 || index > count) { throw indexAndLength(index); } - return Character.codePointBefore(value, offset + index, offset); + return Character.codePointBefore(this, index); } /** @@ -1717,7 +1470,7 @@ outer: if (start < 0 || end > count || start > end) { throw startEndAndLength(start, end); } - return Character.codePointCount(value, offset + start, end - start); + return Character.codePointCount(this, start, end); } /** @@ -1748,9 +1501,7 @@ outer: * @since 1.5 */ public int offsetByCodePoints(int index, int codePointOffset) { - int s = index + offset; - int r = Character.offsetByCodePoints(value, offset, count, s, codePointOffset); - return r - offset; + return Character.offsetByCodePoints(this, index, codePointOffset); } /** @@ -1816,31 +1567,26 @@ outer: @SuppressWarnings("unused") private static int indexOf(String haystackString, String needleString, int cache, int md2, char lastChar) { - char[] haystack = haystackString.value; - int haystackOffset = haystackString.offset; int haystackLength = haystackString.count; - char[] needle = needleString.value; - int needleOffset = needleString.offset; int needleLength = needleString.count; int needleLengthMinus1 = needleLength - 1; - int haystackEnd = haystackOffset + haystackLength; - outer_loop: for (int i = haystackOffset + needleLengthMinus1; i < haystackEnd;) { - if (lastChar == haystack[i]) { + outer_loop: for (int i = needleLengthMinus1; i < haystackLength;) { + if (lastChar == haystackString.charAt(i)) { for (int j = 0; j < needleLengthMinus1; ++j) { - if (needle[j + needleOffset] != haystack[i + j - - needleLengthMinus1]) { + if (needleString.charAt(j) != + haystackString.charAt(i + j - needleLengthMinus1)) { int skip = 1; - if ((cache & (1 << haystack[i])) == 0) { + if ((cache & (1 << haystackString.charAt(i))) == 0) { skip += j; } i += Math.max(md2, skip); continue outer_loop; } } - return i - needleLengthMinus1 - haystackOffset; + return i - needleLengthMinus1; } - if ((cache & (1 << haystack[i])) == 0) { + if ((cache & (1 << haystackString.charAt(i))) == 0) { i += needleLengthMinus1; } i++; diff --git a/libart/src/main/java/java/lang/StringFactory.java b/libart/src/main/java/java/lang/StringFactory.java new file mode 100644 index 0000000..4fc3eba --- /dev/null +++ b/libart/src/main/java/java/lang/StringFactory.java @@ -0,0 +1,251 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package java.lang; + +import java.io.Serializable; +import java.io.UnsupportedEncodingException; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.util.Arrays; +import java.util.Comparator; +import libcore.util.CharsetUtils; +import libcore.util.EmptyArray; + +/** + * Class used to generate strings instead of calling String.<init>. + * + * @hide + */ +public final class StringFactory { + + // TODO: Remove once native methods are in place. + private static final char REPLACEMENT_CHAR = (char) 0xfffd; + + public static String newEmptyString() { + return newStringFromChars(EmptyArray.CHAR, 0, 0); + } + + public static String newStringFromBytes(byte[] data) { + return newStringFromBytes(data, 0, data.length); + } + + public static String newStringFromBytes(byte[] data, int high) { + return newStringFromBytes(data, high, 0, data.length); + } + + public static String newStringFromBytes(byte[] data, int offset, int byteCount) { + return newStringFromBytes(data, offset, byteCount, Charset.defaultCharset()); + } + + public static native String newStringFromBytes(byte[] data, int high, int offset, int byteCount); + + public static String newStringFromBytes(byte[] data, int offset, int byteCount, String charsetName) throws UnsupportedEncodingException { + return newStringFromBytes(data, offset, byteCount, Charset.forNameUEE(charsetName)); + } + + public static String newStringFromBytes(byte[] data, String charsetName) throws UnsupportedEncodingException { + return newStringFromBytes(data, 0, data.length, Charset.forNameUEE(charsetName)); + } + + // TODO: Implement this method natively. + public static String newStringFromBytes(byte[] data, int offset, int byteCount, Charset charset) { + if ((offset | byteCount) < 0 || byteCount > data.length - offset) { + throw new StringIndexOutOfBoundsException(data.length, offset, byteCount); + } + + char[] value; + int length; + + // We inline UTF-8, ISO-8859-1, and US-ASCII decoders for speed. + String canonicalCharsetName = charset.name(); + if (canonicalCharsetName.equals("UTF-8")) { + byte[] d = data; + char[] v = new char[byteCount]; + + int idx = offset; + int last = offset + byteCount; + int s = 0; +outer: + while (idx < last) { + byte b0 = d[idx++]; + if ((b0 & 0x80) == 0) { + // 0xxxxxxx + // Range: U-00000000 - U-0000007F + int val = b0 & 0xff; + v[s++] = (char) val; + } else if (((b0 & 0xe0) == 0xc0) || ((b0 & 0xf0) == 0xe0) || + ((b0 & 0xf8) == 0xf0) || ((b0 & 0xfc) == 0xf8) || ((b0 & 0xfe) == 0xfc)) { + int utfCount = 1; + if ((b0 & 0xf0) == 0xe0) utfCount = 2; + else if ((b0 & 0xf8) == 0xf0) utfCount = 3; + else if ((b0 & 0xfc) == 0xf8) utfCount = 4; + else if ((b0 & 0xfe) == 0xfc) utfCount = 5; + + // 110xxxxx (10xxxxxx)+ + // Range: U-00000080 - U-000007FF (count == 1) + // Range: U-00000800 - U-0000FFFF (count == 2) + // Range: U-00010000 - U-001FFFFF (count == 3) + // Range: U-00200000 - U-03FFFFFF (count == 4) + // Range: U-04000000 - U-7FFFFFFF (count == 5) + + if (idx + utfCount > last) { + v[s++] = REPLACEMENT_CHAR; + continue; + } + + // Extract usable bits from b0 + int val = b0 & (0x1f >> (utfCount - 1)); + for (int i = 0; i < utfCount; ++i) { + byte b = d[idx++]; + if ((b & 0xc0) != 0x80) { + v[s++] = REPLACEMENT_CHAR; + idx--; // Put the input char back + continue outer; + } + // Push new bits in from the right side + val <<= 6; + val |= b & 0x3f; + } + + // Note: Java allows overlong char + // specifications To disallow, check that val + // is greater than or equal to the minimum + // value for each count: + // + // count min value + // ----- ---------- + // 1 0x80 + // 2 0x800 + // 3 0x10000 + // 4 0x200000 + // 5 0x4000000 + + // Allow surrogate values (0xD800 - 0xDFFF) to + // be specified using 3-byte UTF values only + if ((utfCount != 2) && (val >= 0xD800) && (val <= 0xDFFF)) { + v[s++] = REPLACEMENT_CHAR; + continue; + } + + // Reject chars greater than the Unicode maximum of U+10FFFF. + if (val > 0x10FFFF) { + v[s++] = REPLACEMENT_CHAR; + continue; + } + + // Encode chars from U+10000 up as surrogate pairs + if (val < 0x10000) { + v[s++] = (char) val; + } else { + int x = val & 0xffff; + int u = (val >> 16) & 0x1f; + int w = (u - 1) & 0xffff; + int hi = 0xd800 | (w << 6) | (x >> 10); + int lo = 0xdc00 | (x & 0x3ff); + v[s++] = (char) hi; + v[s++] = (char) lo; + } + } else { + // Illegal values 0x8*, 0x9*, 0xa*, 0xb*, 0xfd-0xff + v[s++] = REPLACEMENT_CHAR; + } + } + + if (s == byteCount) { + // We guessed right, so we can use our temporary array as-is. + value = v; + length = s; + } else { + // Our temporary array was too big, so reallocate and copy. + value = new char[s]; + length = s; + System.arraycopy(v, 0, value, 0, s); + } + } else if (canonicalCharsetName.equals("ISO-8859-1")) { + value = new char[byteCount]; + length = byteCount; + CharsetUtils.isoLatin1BytesToChars(data, offset, byteCount, value); + } else if (canonicalCharsetName.equals("US-ASCII")) { + value = new char[byteCount]; + length = byteCount; + CharsetUtils.asciiBytesToChars(data, offset, byteCount, value); + } else { + CharBuffer cb = charset.decode(ByteBuffer.wrap(data, offset, byteCount)); + length = cb.length(); + if (length > 0) { + // We could use cb.array() directly, but that would mean we'd have to trust + // the CharsetDecoder doesn't hang on to the CharBuffer and mutate it later, + // which would break String's immutability guarantee. It would also tend to + // mean that we'd be wasting memory because CharsetDecoder doesn't trim the + // array. So we copy. + value = new char[length]; + System.arraycopy(cb.array(), 0, value, 0, length); + } else { + value = EmptyArray.CHAR; + } + } + return newStringFromChars(value, 0, length); + } + + public static String newStringFromBytes(byte[] data, Charset charset) { + return newStringFromBytes(data, 0, data.length, charset); + } + + public static String newStringFromChars(char[] data) { + return newStringFromChars(data, 0, data.length); + } + + public static String newStringFromChars(char[] data, int offset, int charCount) { + if ((offset | charCount) < 0 || charCount > data.length - offset) { + throw new StringIndexOutOfBoundsException(data.length, offset, charCount); + } + return newStringFromChars(offset, charCount, data); + } + + static native String newStringFromChars(int offset, int charCount, char[] data); + + public static native String newStringFromString(String toCopy); + + public static String newStringFromStringBuffer(StringBuffer stringBuffer) { + synchronized (stringBuffer) { + return newStringFromChars(stringBuffer.getValue(), 0, stringBuffer.length()); + } + } + + // TODO: Implement this method natively. + public static String newStringFromCodePoints(int[] codePoints, int offset, int count) { + if (codePoints == null) { + throw new NullPointerException("codePoints == null"); + } + if ((offset | count) < 0 || count > codePoints.length - offset) { + throw new StringIndexOutOfBoundsException(codePoints.length, offset, count); + } + char[] value = new char[count * 2]; + int end = offset + count; + int length = 0; + for (int i = offset; i < end; i++) { + length += Character.toChars(codePoints[i], value, length); + } + return newStringFromChars(value, 0, length); + } + + public static String newStringFromStringBuilder(StringBuilder stringBuilder) { + return newStringFromChars(stringBuilder.getValue(), 0, stringBuilder.length()); + } +} diff --git a/luni/src/main/java/libcore/util/CharsetUtils.java b/luni/src/main/java/libcore/util/CharsetUtils.java index 2e426c4..5163dba 100644 --- a/luni/src/main/java/libcore/util/CharsetUtils.java +++ b/luni/src/main/java/libcore/util/CharsetUtils.java @@ -23,33 +23,33 @@ package libcore.util; */ public final class CharsetUtils { /** - * Returns a new byte array containing the bytes corresponding to the given characters, - * encoded in US-ASCII. Unrepresentable characters are replaced by (byte) '?'. + * Returns a new byte array containing the bytes corresponding to the characters in the given + * string, encoded in US-ASCII. Unrepresentable characters are replaced by (byte) '?'. */ - public static native byte[] toAsciiBytes(char[] chars, int offset, int length); + public static native byte[] toAsciiBytes(String s, int offset, int length); /** - * Returns a new byte array containing the bytes corresponding to the given characters, - * encoded in ISO-8859-1. Unrepresentable characters are replaced by (byte) '?'. + * Returns a new byte array containing the bytes corresponding to the characters in the given + * string, encoded in ISO-8859-1. Unrepresentable characters are replaced by (byte) '?'. */ - public static native byte[] toIsoLatin1Bytes(char[] chars, int offset, int length); + public static native byte[] toIsoLatin1Bytes(String s, int offset, int length); /** - * Returns a new byte array containing the bytes corresponding to the given characters, - * encoded in UTF-8. All characters are representable in UTF-8. + * Returns a new byte array containing the bytes corresponding to the characters in the given + * string, encoded in UTF-8. All characters are representable in UTF-8. */ - public static native byte[] toUtf8Bytes(char[] chars, int offset, int length); + public static native byte[] toUtf8Bytes(String s, int offset, int length); /** - * Returns a new byte array containing the bytes corresponding to the given characters, - * encoded in UTF-16BE. All characters are representable in UTF-16BE. + * Returns a new byte array containing the bytes corresponding to the characters in the given + * string, encoded in UTF-16BE. All characters are representable in UTF-16BE. */ - public static byte[] toBigEndianUtf16Bytes(char[] chars, int offset, int length) { + public static byte[] toBigEndianUtf16Bytes(String s, int offset, int length) { byte[] result = new byte[length * 2]; int end = offset + length; int resultIndex = 0; for (int i = offset; i < end; ++i) { - char ch = chars[i]; + char ch = s.charAt(i); result[resultIndex++] = (byte) (ch >> 8); result[resultIndex++] = (byte) ch; } diff --git a/luni/src/main/native/Register.cpp b/luni/src/main/native/Register.cpp index 0f2d0ad..acc1e4f 100644 --- a/luni/src/main/native/Register.cpp +++ b/luni/src/main/native/Register.cpp @@ -69,7 +69,6 @@ jint JNI_OnLoad(JavaVM* vm, void*) { REGISTER(register_libcore_io_AsynchronousCloseMonitor); REGISTER(register_libcore_io_Memory); REGISTER(register_libcore_io_Posix); - REGISTER(register_libcore_util_CharsetUtils); REGISTER(register_org_apache_harmony_dalvik_NativeTestTarget); REGISTER(register_org_apache_harmony_xml_ExpatParser); REGISTER(register_sun_misc_Unsafe); diff --git a/luni/src/main/native/libcore_util_CharsetUtils.cpp b/luni/src/main/native/libcore_util_CharsetUtils.cpp index 57c8172..e69de29 100644 --- a/luni/src/main/native/libcore_util_CharsetUtils.cpp +++ b/luni/src/main/native/libcore_util_CharsetUtils.cpp @@ -1,250 +0,0 @@ -/* - * Copyright (C) 2010 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#define LOG_TAG "String" - -#include "JNIHelp.h" -#include "JniConstants.h" -#include "ScopedPrimitiveArray.h" -#include "jni.h" -#include "unicode/utf16.h" - -#include <string.h> - -/** - * Approximates java.lang.UnsafeByteSequence so we don't have to pay the cost of calling back into - * Java when converting a char[] to a UTF-8 byte[]. This lets us have UTF-8 conversions slightly - * faster than ICU for large char[]s without paying for the NIO overhead with small char[]s. - * - * We could avoid this by keeping the UTF-8 bytes on the native heap until we're done and only - * creating a byte[] on the Java heap when we know how big it needs to be, but one shouldn't lie - * to the garbage collector (nor hide potentially large allocations from it). - * - * Because a call to append might require an allocation, it might fail. Callers should always - * check the return value of append. - */ -class NativeUnsafeByteSequence { -public: - NativeUnsafeByteSequence(JNIEnv* env) - : mEnv(env), mJavaArray(NULL), mRawArray(NULL), mSize(-1), mOffset(0) - { - } - - ~NativeUnsafeByteSequence() { - // Release our pointer to the raw array, copying changes back to the Java heap. - if (mRawArray != NULL) { - mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, 0); - } - } - - bool append(jbyte b) { - if (mOffset == mSize && !resize(mSize * 2)) { - return false; - } - mRawArray[mOffset++] = b; - return true; - } - - bool resize(int newSize) { - if (newSize == mSize) { - return true; - } - - // Allocate a new array. - jbyteArray newJavaArray = mEnv->NewByteArray(newSize); - if (newJavaArray == NULL) { - return false; - } - jbyte* newRawArray = mEnv->GetByteArrayElements(newJavaArray, NULL); - if (newRawArray == NULL) { - return false; - } - - // Copy data out of the old array and then let go of it. - // Note that we may be trimming the array. - if (mRawArray != NULL) { - memcpy(newRawArray, mRawArray, mOffset); - mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, JNI_ABORT); - mEnv->DeleteLocalRef(mJavaArray); - } - - // Point ourselves at the new array. - mJavaArray = newJavaArray; - mRawArray = newRawArray; - mSize = newSize; - return true; - } - - jbyteArray toByteArray() { - // Trim any unused space, if necessary. - bool okay = resize(mOffset); - return okay ? mJavaArray : NULL; - } - -private: - JNIEnv* mEnv; - jbyteArray mJavaArray; - jbyte* mRawArray; - jint mSize; - jint mOffset; - - // Disallow copy and assignment. - NativeUnsafeByteSequence(const NativeUnsafeByteSequence&); - void operator=(const NativeUnsafeByteSequence&); -}; - -static void Charsets_asciiBytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, jint offset, jint length, jcharArray javaChars) { - ScopedByteArrayRO bytes(env, javaBytes); - if (bytes.get() == NULL) { - return; - } - ScopedCharArrayRW chars(env, javaChars); - if (chars.get() == NULL) { - return; - } - - const jbyte* src = &bytes[offset]; - jchar* dst = &chars[0]; - static const jchar REPLACEMENT_CHAR = 0xfffd; - for (int i = length - 1; i >= 0; --i) { - jchar ch = static_cast<jchar>(*src++ & 0xff); - *dst++ = (ch <= 0x7f) ? ch : REPLACEMENT_CHAR; - } -} - -static void Charsets_isoLatin1BytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, jint offset, jint length, jcharArray javaChars) { - ScopedByteArrayRO bytes(env, javaBytes); - if (bytes.get() == NULL) { - return; - } - ScopedCharArrayRW chars(env, javaChars); - if (chars.get() == NULL) { - return; - } - - const jbyte* src = &bytes[offset]; - jchar* dst = &chars[0]; - for (int i = length - 1; i >= 0; --i) { - *dst++ = static_cast<jchar>(*src++ & 0xff); - } -} - -/** - * Translates the given characters to US-ASCII or ISO-8859-1 bytes, using the fact that - * Unicode code points between U+0000 and U+007f inclusive are identical to US-ASCII, while - * U+0000 to U+00ff inclusive are identical to ISO-8859-1. - */ -static jbyteArray charsToBytes(JNIEnv* env, jcharArray javaChars, jint offset, jint length, jchar maxValidChar) { - ScopedCharArrayRO chars(env, javaChars); - if (chars.get() == NULL) { - return NULL; - } - - jbyteArray javaBytes = env->NewByteArray(length); - ScopedByteArrayRW bytes(env, javaBytes); - if (bytes.get() == NULL) { - return NULL; - } - - const jchar* src = &chars[offset]; - jbyte* dst = &bytes[0]; - for (int i = length - 1; i >= 0; --i) { - jchar ch = *src++; - if (ch > maxValidChar) { - ch = '?'; - } - *dst++ = static_cast<jbyte>(ch); - } - - return javaBytes; -} - -static jbyteArray Charsets_toAsciiBytes(JNIEnv* env, jclass, jcharArray javaChars, jint offset, jint length) { - return charsToBytes(env, javaChars, offset, length, 0x7f); -} - -static jbyteArray Charsets_toIsoLatin1Bytes(JNIEnv* env, jclass, jcharArray javaChars, jint offset, jint length) { - return charsToBytes(env, javaChars, offset, length, 0xff); -} - -static jbyteArray Charsets_toUtf8Bytes(JNIEnv* env, jclass, jcharArray javaChars, jint offset, jint length) { - ScopedCharArrayRO chars(env, javaChars); - if (chars.get() == NULL) { - return NULL; - } - - NativeUnsafeByteSequence out(env); - if (!out.resize(length)) { - return NULL; - } - - const int end = offset + length; - for (int i = offset; i < end; ++i) { - jint ch = chars[i]; - if (ch < 0x80) { - // One byte. - if (!out.append(ch)) { - return NULL; - } - } else if (ch < 0x800) { - // Two bytes. - if (!out.append((ch >> 6) | 0xc0) || !out.append((ch & 0x3f) | 0x80)) { - return NULL; - } - } else if (U16_IS_SURROGATE(ch)) { - // A supplementary character. - jchar high = (jchar) ch; - jchar low = (i + 1 != end) ? chars[i + 1] : 0; - if (!U16_IS_SURROGATE_LEAD(high) || !U16_IS_SURROGATE_TRAIL(low)) { - if (!out.append('?')) { - return NULL; - } - continue; - } - // Now we know we have a *valid* surrogate pair, we can consume the low surrogate. - ++i; - ch = U16_GET_SUPPLEMENTARY(high, low); - // Four bytes. - jbyte b1 = (ch >> 18) | 0xf0; - jbyte b2 = ((ch >> 12) & 0x3f) | 0x80; - jbyte b3 = ((ch >> 6) & 0x3f) | 0x80; - jbyte b4 = (ch & 0x3f) | 0x80; - if (!out.append(b1) || !out.append(b2) || !out.append(b3) || !out.append(b4)) { - return NULL; - } - } else { - // Three bytes. - jbyte b1 = (ch >> 12) | 0xe0; - jbyte b2 = ((ch >> 6) & 0x3f) | 0x80; - jbyte b3 = (ch & 0x3f) | 0x80; - if (!out.append(b1) || !out.append(b2) || !out.append(b3)) { - return NULL; - } - } - } - return out.toByteArray(); -} - -static JNINativeMethod gMethods[] = { - NATIVE_METHOD(Charsets, asciiBytesToChars, "([BII[C)V"), - NATIVE_METHOD(Charsets, isoLatin1BytesToChars, "([BII[C)V"), - NATIVE_METHOD(Charsets, toAsciiBytes, "([CII)[B"), - NATIVE_METHOD(Charsets, toIsoLatin1Bytes, "([CII)[B"), - NATIVE_METHOD(Charsets, toUtf8Bytes, "([CII)[B"), -}; -void register_libcore_util_CharsetUtils(JNIEnv* env) { - jniRegisterNativeMethods(env, "libcore/util/CharsetUtils", gMethods, NELEM(gMethods)); -} diff --git a/luni/src/main/native/sub.mk b/luni/src/main/native/sub.mk index a90c683..73ed7cb 100644 --- a/luni/src/main/native/sub.mk +++ b/luni/src/main/native/sub.mk @@ -49,7 +49,6 @@ LOCAL_SRC_FILES := \ libcore_io_AsynchronousCloseMonitor.cpp \ libcore_io_Memory.cpp \ libcore_io_Posix.cpp \ - libcore_util_CharsetUtils.cpp \ org_apache_harmony_xml_ExpatParser.cpp \ readlink.cpp \ sun_misc_Unsafe.cpp \ diff --git a/luni/src/test/java/libcore/java/lang/StringTest.java b/luni/src/test/java/libcore/java/lang/StringTest.java index bf162e5..bd52e06 100644 --- a/luni/src/test/java/libcore/java/lang/StringTest.java +++ b/luni/src/test/java/libcore/java/lang/StringTest.java @@ -173,47 +173,6 @@ public class StringTest extends TestCase { } /** - * Tests a widely assumed performance characteristic of String.substring(): - * that it reuses the original's backing array. Although behavior should be - * correct even if this test fails, many applications may suffer - * significant performance degradation. - */ - public void testSubstringSharesBackingArray() throws IllegalAccessException { - String abcdefghij = "ABCDEFGHIJ"; - String cdefg = abcdefghij.substring(2, 7); - assertSame(getBackingArray(abcdefghij), getBackingArray(cdefg)); - } - - /** - * Tests a widely assumed performance characteristic of string's copy - * constructor: that it ensures the backing array is the same length as the - * string. Although behavior should be correct even if this test fails, - * many applications may suffer significant performance degradation. - */ - public void testStringCopiesAvoidHeapRetention() throws IllegalAccessException { - String abcdefghij = "ABCDEFGHIJ"; - assertSame(getBackingArray(abcdefghij), getBackingArray(new String(abcdefghij))); - - String cdefg = abcdefghij.substring(2, 7); - assertSame(getBackingArray(abcdefghij), getBackingArray(cdefg)); - assertEquals(5, getBackingArray(new String(cdefg)).length); - } - - /** - * Uses reflection to return the char[] backing the given string. This - * returns the actual backing array; which must not be modified. - */ - private char[] getBackingArray(String string) throws IllegalAccessException { - for (Field f : String.class.getDeclaredFields()) { - if (!Modifier.isStatic(f.getModifiers()) && f.getType() == char[].class) { - f.setAccessible(true); - return (char[]) f.get(string); - } - } - throw new UnsupportedOperationException("No chars[] field on String!"); - } - - /** * Test that strings interned manually and then later loaded as literals * maintain reference equality. http://b/3098960 */ |