summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--libart/src/main/java/java/lang/AbstractStringBuilder.java (renamed from luni/src/main/java/java/lang/AbstractStringBuilder.java)19
-rw-r--r--libart/src/main/java/java/lang/CaseMapper.java (renamed from luni/src/main/java/java/lang/CaseMapper.java)76
-rw-r--r--libart/src/main/java/java/lang/String.java506
-rw-r--r--libart/src/main/java/java/lang/StringFactory.java251
-rw-r--r--luni/src/main/java/libcore/util/CharsetUtils.java26
-rw-r--r--luni/src/main/native/Register.cpp1
-rw-r--r--luni/src/main/native/libcore_util_CharsetUtils.cpp250
-rw-r--r--luni/src/main/native/sub.mk1
-rw-r--r--luni/src/test/java/libcore/java/lang/StringTest.java41
9 files changed, 435 insertions, 736 deletions
diff --git a/luni/src/main/java/java/lang/AbstractStringBuilder.java b/libart/src/main/java/java/lang/AbstractStringBuilder.java
index 4d84078..c8c8c5a 100644
--- a/luni/src/main/java/java/lang/AbstractStringBuilder.java
+++ b/libart/src/main/java/java/lang/AbstractStringBuilder.java
@@ -87,7 +87,7 @@ abstract class AbstractStringBuilder {
count = string.length();
shared = false;
value = new char[count + INITIAL_CAPACITY];
- string._getChars(0, count, value, 0);
+ string.getCharsNoCheck(0, count, value, 0);
}
private void enlargeBuffer(int min) {
@@ -145,7 +145,7 @@ abstract class AbstractStringBuilder {
if (newCount > value.length) {
enlargeBuffer(newCount);
}
- string._getChars(0, length, value, count);
+ string.getCharsNoCheck(0, length, value, count);
count = newCount;
}
@@ -167,7 +167,7 @@ abstract class AbstractStringBuilder {
}
if (s instanceof String) {
- ((String) s)._getChars(start, end, value, count);
+ ((String) s).getCharsNoCheck(start, end, value, count);
} else if (s instanceof AbstractStringBuilder) {
AbstractStringBuilder other = (AbstractStringBuilder) s;
System.arraycopy(other.value, start, value, count, length);
@@ -345,7 +345,7 @@ abstract class AbstractStringBuilder {
int min = string.length();
if (min != 0) {
move(min, index);
- string._getChars(0, min, value, index);
+ string.getCharsNoCheck(0, min, value, index);
count += min;
}
} else {
@@ -422,7 +422,7 @@ abstract class AbstractStringBuilder {
value = value.clone();
shared = false;
}
- string._getChars(0, stringLength, value, start);
+ string.getCharsNoCheck(0, stringLength, value, start);
count -= diff;
return;
}
@@ -626,14 +626,7 @@ abstract class AbstractStringBuilder {
if (count == 0) {
return "";
}
- // Optimize String sharing for more performance
- int wasted = value.length - count;
- if (wasted >= 256
- || (wasted >= INITIAL_CAPACITY && wasted >= (count >> 1))) {
- return new String(value, 0, count);
- }
- shared = true;
- return new String(0, count, value);
+ return StringFactory.newStringFromChars(0, count, value);
}
/**
diff --git a/luni/src/main/java/java/lang/CaseMapper.java b/libart/src/main/java/java/lang/CaseMapper.java
index 1da621c..f23a4ef 100644
--- a/luni/src/main/java/java/lang/CaseMapper.java
+++ b/libart/src/main/java/java/lang/CaseMapper.java
@@ -34,17 +34,15 @@ class CaseMapper {
/**
* Our current GC makes short-lived objects more expensive than we'd like. When that's fixed,
* this class should be changed so that you instantiate it with the String and its value,
- * offset, and count fields.
+ * and count fields.
*/
private CaseMapper() {
}
/**
- * Implements String.toLowerCase. We need 's' so that we can return the original String instance
- * if nothing changes. We need 'value', 'offset', and 'count' because they're not otherwise
- * accessible.
+ * Implements String.toLowerCase. The original String instance is returned if nothing changes.
*/
- public static String toLowerCase(Locale locale, String s, char[] value, int offset, int count) {
+ public static String toLowerCase(Locale locale, String s) {
// Punt hard cases to ICU4C.
// Note that Greek isn't a particularly hard case for toLowerCase, only toUpperCase.
String languageCode = locale.getLanguage();
@@ -52,29 +50,26 @@ class CaseMapper {
return ICU.toLowerCase(s, locale);
}
- char[] newValue = null;
- int newCount = 0;
- for (int i = offset, end = offset + count; i < end; ++i) {
- char ch = value[i];
+ String newString = null;
+ for (int i = 0, end = s.length(); i < end; ++i) {
+ char ch = s.charAt(i);
char newCh;
if (ch == LATIN_CAPITAL_I_WITH_DOT || Character.isHighSurrogate(ch)) {
// Punt these hard cases.
return ICU.toLowerCase(s, locale);
- } else if (ch == GREEK_CAPITAL_SIGMA && isFinalSigma(value, offset, count, i)) {
+ } else if (ch == GREEK_CAPITAL_SIGMA && isFinalSigma(s, i)) {
newCh = GREEK_SMALL_FINAL_SIGMA;
} else {
newCh = Character.toLowerCase(ch);
}
- if (newValue == null && ch != newCh) {
- newValue = new char[count]; // The result can't be longer than the input.
- newCount = i - offset;
- System.arraycopy(value, offset, newValue, 0, newCount);
- }
- if (newValue != null) {
- newValue[newCount++] = newCh;
+ if (ch != newCh) {
+ if (newString == null) {
+ newString = StringFactory.newStringFromString(s);
+ }
+ newString.setCharAt(i, newCh);
}
}
- return newValue != null ? new String(0, newCount, newValue) : s;
+ return newString != null ? newString : s;
}
/**
@@ -82,20 +77,20 @@ class CaseMapper {
* sequence, and 'index' is not followed by a sequence consisting of an ignorable sequence and
* then a cased letter.
*/
- private static boolean isFinalSigma(char[] value, int offset, int count, int index) {
+ private static boolean isFinalSigma(String s, int index) {
// TODO: we don't skip case-ignorable sequences like we should.
// TODO: we should add a more direct way to test for a cased letter.
- if (index <= offset) {
+ if (index <= 0) {
return false;
}
- char previous = value[index - 1];
+ char previous = s.charAt(index - 1);
if (!(Character.isLowerCase(previous) || Character.isUpperCase(previous) || Character.isTitleCase(previous))) {
return false;
}
- if (index + 1 >= offset + count) {
+ if (index + 1 >= s.length()) {
return true;
}
- char next = value[index + 1];
+ char next = s.charAt(index + 1);
if (Character.isLowerCase(next) || Character.isUpperCase(next) || Character.isTitleCase(next)) {
return false;
}
@@ -147,7 +142,7 @@ class CaseMapper {
}
};
- public static String toUpperCase(Locale locale, String s, char[] value, int offset, int count) {
+ public static String toUpperCase(Locale locale, String s, int count) {
String languageCode = locale.getLanguage();
if (languageCode.equals("tr") || languageCode.equals("az") || languageCode.equals("lt")) {
return ICU.toUpperCase(s, locale);
@@ -157,9 +152,10 @@ class CaseMapper {
}
char[] output = null;
+ String newString = null;
int i = 0;
- for (int o = offset, end = offset + count; o < end; o++) {
- char ch = value[o];
+ for (int o = 0, end = count; o < end; o++) {
+ char ch = s.charAt(o);
if (Character.isHighSurrogate(ch)) {
return ICU.toUpperCase(s, locale);
}
@@ -171,23 +167,25 @@ class CaseMapper {
output = newoutput;
}
char upch = Character.toUpperCase(ch);
- if (ch != upch) {
- if (output == null) {
- output = new char[count];
- i = o - offset;
- System.arraycopy(value, offset, output, 0, i);
- }
+ if (output != null) {
output[i++] = upch;
- } else if (output != null) {
- output[i++] = ch;
+ } else if (ch != upch) {
+ if (newString == null) {
+ newString = StringFactory.newStringFromString(s);
+ }
+ newString.setCharAt(o, upch);
}
} else {
int target = index * 3;
char val3 = upperValues[target + 2];
if (output == null) {
output = new char[count + (count / 6) + 2];
- i = o - offset;
- System.arraycopy(value, offset, output, 0, i);
+ i = o;
+ if (newString != null) {
+ System.arraycopy(newString.toCharArray(), 0, output, 0, i);
+ } else {
+ System.arraycopy(s.toCharArray(), 0, output, 0, i);
+ }
} else if (i + (val3 == 0 ? 1 : 2) >= output.length) {
char[] newoutput = new char[output.length + (count / 6) + 3];
System.arraycopy(output, 0, newoutput, 0, output.length);
@@ -204,7 +202,11 @@ class CaseMapper {
}
}
if (output == null) {
- return s;
+ if (newString != null) {
+ return newString;
+ } else {
+ return s;
+ }
}
return output.length == i || output.length - i < 8 ? new String(0, i, output) : new String(output, 0, i);
}
diff --git a/libart/src/main/java/java/lang/String.java b/libart/src/main/java/java/lang/String.java
index a5bf34c..0875d1a 100644
--- a/libart/src/main/java/java/lang/String.java
+++ b/libart/src/main/java/java/lang/String.java
@@ -35,23 +35,6 @@ import libcore.util.EmptyArray;
* See {@link Character} for details about the relationship between {@code char} and
* Unicode code points.
*
- * <a name="backing_array"><h3>Backing Arrays</h3></a>
- * This class is implemented using a {@code char[]}. The length of the array may exceed
- * the length of the string. For example, the string "Hello" may be backed by
- * the array {@code ['H', 'e', 'l', 'l', 'o', 'W'. 'o', 'r', 'l', 'd']} with
- * offset 0 and length 5.
- *
- * <p>Multiple strings can share the same {@code char[]} because strings are immutable.
- * The {@link #substring} method <strong>always</strong> returns a string that
- * shares the backing array of its source string. Generally this is an
- * optimization: fewer {@code char[]}s need to be allocated, and less copying
- * is necessary. But this can also lead to unwanted heap retention. Taking a
- * short substring of long string means that the long shared {@code char[]} won't be
- * garbage until both strings are garbage. This typically happens when parsing
- * small substrings out of a large input. To avoid this where necessary, call
- * {@code new String(longString.subString(...))}. The string copy constructor
- * always ensures that the backing array is no larger than necessary.
- *
* @see StringBuffer
* @see StringBuilder
* @see Charset
@@ -93,10 +76,6 @@ public final class String implements Serializable, Comparable<String>, CharSeque
}
}
- private final char[] value;
-
- private final int offset;
-
private final int count;
private int hashCode;
@@ -105,9 +84,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque
* Creates an empty string.
*/
public String() {
- value = EmptyArray.CHAR;
- offset = 0;
- count = 0;
+ throw new UnsupportedOperationException("Use StringFactory instead.");
}
/**
@@ -116,7 +93,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque
*/
@FindBugsSuppressWarnings("DM_DEFAULT_ENCODING")
public String(byte[] data) {
- this(data, 0, data.length);
+ throw new UnsupportedOperationException("Use StringFactory instead.");
}
/**
@@ -133,7 +110,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque
*/
@Deprecated
public String(byte[] data, int high) {
- this(data, high, 0, data.length);
+ throw new UnsupportedOperationException("Use StringFactory instead.");
}
/**
@@ -146,7 +123,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque
* if {@code byteCount < 0 || offset < 0 || offset + byteCount > data.length}.
*/
public String(byte[] data, int offset, int byteCount) {
- this(data, offset, byteCount, Charset.defaultCharset());
+ throw new UnsupportedOperationException("Use StringFactory instead.");
}
/**
@@ -162,16 +139,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque
*/
@Deprecated
public String(byte[] data, int high, int offset, int byteCount) {
- if ((offset | byteCount) < 0 || byteCount > data.length - offset) {
- throw failedBoundsCheck(data.length, offset, byteCount);
- }
- this.offset = 0;
- this.value = new char[byteCount];
- this.count = byteCount;
- high <<= 8;
- for (int i = 0; i < count; i++) {
- value[i] = (char) (high + (data[offset++] & 0xff));
- }
+ throw new UnsupportedOperationException("Use StringFactory instead.");
}
/**
@@ -188,7 +156,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque
* if the named charset is not supported.
*/
public String(byte[] data, int offset, int byteCount, String charsetName) throws UnsupportedEncodingException {
- this(data, offset, byteCount, Charset.forNameUEE(charsetName));
+ throw new UnsupportedOperationException("Use StringFactory instead.");
}
/**
@@ -203,7 +171,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque
* if {@code charsetName} is not supported.
*/
public String(byte[] data, String charsetName) throws UnsupportedEncodingException {
- this(data, 0, data.length, Charset.forNameUEE(charsetName));
+ throw new UnsupportedOperationException("Use StringFactory instead.");
}
/**
@@ -221,144 +189,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque
* @since 1.6
*/
public String(byte[] data, int offset, int byteCount, Charset charset) {
- if ((offset | byteCount) < 0 || byteCount > data.length - offset) {
- throw failedBoundsCheck(data.length, offset, byteCount);
- }
-
- // We inline UTF-8, ISO-8859-1, and US-ASCII decoders for speed and because 'count' and
- // 'value' are final.
- String canonicalCharsetName = charset.name();
- if (canonicalCharsetName.equals("UTF-8")) {
- byte[] d = data;
- char[] v = new char[byteCount];
-
- int idx = offset;
- int last = offset + byteCount;
- int s = 0;
-outer:
- while (idx < last) {
- byte b0 = d[idx++];
- if ((b0 & 0x80) == 0) {
- // 0xxxxxxx
- // Range: U-00000000 - U-0000007F
- int val = b0 & 0xff;
- v[s++] = (char) val;
- } else if (((b0 & 0xe0) == 0xc0) || ((b0 & 0xf0) == 0xe0) ||
- ((b0 & 0xf8) == 0xf0) || ((b0 & 0xfc) == 0xf8) || ((b0 & 0xfe) == 0xfc)) {
- int utfCount = 1;
- if ((b0 & 0xf0) == 0xe0) utfCount = 2;
- else if ((b0 & 0xf8) == 0xf0) utfCount = 3;
- else if ((b0 & 0xfc) == 0xf8) utfCount = 4;
- else if ((b0 & 0xfe) == 0xfc) utfCount = 5;
-
- // 110xxxxx (10xxxxxx)+
- // Range: U-00000080 - U-000007FF (count == 1)
- // Range: U-00000800 - U-0000FFFF (count == 2)
- // Range: U-00010000 - U-001FFFFF (count == 3)
- // Range: U-00200000 - U-03FFFFFF (count == 4)
- // Range: U-04000000 - U-7FFFFFFF (count == 5)
-
- if (idx + utfCount > last) {
- v[s++] = REPLACEMENT_CHAR;
- continue;
- }
-
- // Extract usable bits from b0
- int val = b0 & (0x1f >> (utfCount - 1));
- for (int i = 0; i < utfCount; ++i) {
- byte b = d[idx++];
- if ((b & 0xc0) != 0x80) {
- v[s++] = REPLACEMENT_CHAR;
- idx--; // Put the input char back
- continue outer;
- }
- // Push new bits in from the right side
- val <<= 6;
- val |= b & 0x3f;
- }
-
- // Note: Java allows overlong char
- // specifications To disallow, check that val
- // is greater than or equal to the minimum
- // value for each count:
- //
- // count min value
- // ----- ----------
- // 1 0x80
- // 2 0x800
- // 3 0x10000
- // 4 0x200000
- // 5 0x4000000
-
- // Allow surrogate values (0xD800 - 0xDFFF) to
- // be specified using 3-byte UTF values only
- if ((utfCount != 2) && (val >= 0xD800) && (val <= 0xDFFF)) {
- v[s++] = REPLACEMENT_CHAR;
- continue;
- }
-
- // Reject chars greater than the Unicode maximum of U+10FFFF.
- if (val > 0x10FFFF) {
- v[s++] = REPLACEMENT_CHAR;
- continue;
- }
-
- // Encode chars from U+10000 up as surrogate pairs
- if (val < 0x10000) {
- v[s++] = (char) val;
- } else {
- int x = val & 0xffff;
- int u = (val >> 16) & 0x1f;
- int w = (u - 1) & 0xffff;
- int hi = 0xd800 | (w << 6) | (x >> 10);
- int lo = 0xdc00 | (x & 0x3ff);
- v[s++] = (char) hi;
- v[s++] = (char) lo;
- }
- } else {
- // Illegal values 0x8*, 0x9*, 0xa*, 0xb*, 0xfd-0xff
- v[s++] = REPLACEMENT_CHAR;
- }
- }
-
- if (s == byteCount) {
- // We guessed right, so we can use our temporary array as-is.
- this.offset = 0;
- this.value = v;
- this.count = s;
- } else {
- // Our temporary array was too big, so reallocate and copy.
- this.offset = 0;
- this.value = new char[s];
- this.count = s;
- System.arraycopy(v, 0, value, 0, s);
- }
- } else if (canonicalCharsetName.equals("ISO-8859-1")) {
- this.offset = 0;
- this.value = new char[byteCount];
- this.count = byteCount;
- CharsetUtils.isoLatin1BytesToChars(data, offset, byteCount, value);
- } else if (canonicalCharsetName.equals("US-ASCII")) {
- this.offset = 0;
- this.value = new char[byteCount];
- this.count = byteCount;
- CharsetUtils.asciiBytesToChars(data, offset, byteCount, value);
- } else {
- CharBuffer cb = charset.decode(ByteBuffer.wrap(data, offset, byteCount));
- this.offset = 0;
- this.count = cb.length();
- if (count > 0) {
- // We could use cb.array() directly, but that would mean we'd have to trust
- // the CharsetDecoder doesn't hang on to the CharBuffer and mutate it later,
- // which would break String's immutability guarantee. It would also tend to
- // mean that we'd be wasting memory because CharsetDecoder doesn't trim the
- // array. So we copy.
- this.value = new char[count];
- System.arraycopy(cb.array(), 0, value, 0, count);
- } else {
- this.value = EmptyArray.CHAR;
- }
- }
+ throw new UnsupportedOperationException("Use StringFactory instead.");
}
/**
@@ -368,7 +199,7 @@ outer:
* @since 1.6
*/
public String(byte[] data, Charset charset) {
- this(data, 0, data.length, charset);
+ throw new UnsupportedOperationException("Use StringFactory instead.");
}
/**
@@ -379,7 +210,7 @@ outer:
* @throws NullPointerException if {@code data == null}
*/
public String(char[] data) {
- this(data, 0, data.length);
+ throw new UnsupportedOperationException("Use StringFactory instead.");
}
/**
@@ -393,36 +224,25 @@ outer:
* if {@code charCount < 0 || offset < 0 || offset + charCount > data.length}
*/
public String(char[] data, int offset, int charCount) {
- if ((offset | charCount) < 0 || charCount > data.length - offset) {
- throw failedBoundsCheck(data.length, offset, charCount);
- }
- this.offset = 0;
- this.value = new char[charCount];
- this.count = charCount;
- System.arraycopy(data, offset, value, 0, count);
+ throw new UnsupportedOperationException("Use StringFactory instead.");
}
/*
* Internal version of the String(char[], int, int) constructor.
- * Does not range check, null check, or copy the array.
+ * Does not range check or null check.
*/
+ // TODO: Replace calls to this with calls to StringFactory, will require
+ // splitting other files in java.lang.
String(int offset, int charCount, char[] chars) {
- this.value = chars;
- this.offset = offset;
- this.count = charCount;
+ throw new UnsupportedOperationException("Use StringFactory instead.");
}
/**
- * Constructs a copy of the given string.
- * The returned string's <a href="#backing_array">backing array</a>
- * is no larger than necessary.
+ * Constructs a new string with the same sequence of characters as {@code
+ * toCopy}.
*/
public String(String toCopy) {
- value = (toCopy.value.length == toCopy.count)
- ? toCopy.value
- : Arrays.copyOfRange(toCopy.value, toCopy.offset, toCopy.offset + toCopy.length());
- offset = 0;
- count = value.length;
+ throw new UnsupportedOperationException("Use StringFactory instead.");
}
/**
@@ -430,11 +250,7 @@ outer:
* {@code StringBuffer}.
*/
public String(StringBuffer stringBuffer) {
- offset = 0;
- synchronized (stringBuffer) {
- value = stringBuffer.shareValue();
- count = stringBuffer.length();
- }
+ throw new UnsupportedOperationException("Use StringFactory instead.");
}
/**
@@ -451,20 +267,7 @@ outer:
* @since 1.5
*/
public String(int[] codePoints, int offset, int count) {
- if (codePoints == null) {
- throw new NullPointerException("codePoints == null");
- }
- if ((offset | count) < 0 || count > codePoints.length - offset) {
- throw failedBoundsCheck(codePoints.length, offset, count);
- }
- this.offset = 0;
- this.value = new char[count * 2];
- int end = offset + count;
- int c = 0;
- for (int i = offset; i < end; i++) {
- c += Character.toChars(codePoints[i], this.value, c);
- }
- this.count = c;
+ throw new UnsupportedOperationException("Use StringFactory instead.");
}
/**
@@ -476,25 +279,16 @@ outer:
* @since 1.5
*/
public String(StringBuilder stringBuilder) {
- if (stringBuilder == null) {
- throw new NullPointerException("stringBuilder == null");
- }
- this.offset = 0;
- this.count = stringBuilder.length();
- this.value = new char[this.count];
- stringBuilder.getChars(0, this.count, this.value, 0);
+ throw new UnsupportedOperationException("Use StringFactory instead.");
}
/**
* Returns the {@code char} at {@code index}.
* @throws IndexOutOfBoundsException if {@code index < 0} or {@code index >= length()}.
*/
- public char charAt(int index) {
- if (index < 0 || index >= count) {
- throw indexAndLength(index);
- }
- return value[offset + index];
- }
+ public native char charAt(int index);
+
+ native void setCharAt(int index, char c);
private StringIndexOutOfBoundsException indexAndLength(int index) {
throw new StringIndexOutOfBoundsException(this, index);
@@ -557,12 +351,11 @@ outer:
* if {@code string} is {@code null}.
*/
public int compareToIgnoreCase(String string) {
- int o1 = offset, o2 = string.offset, result;
- int end = offset + (count < string.count ? count : string.count);
+ int result;
+ int end = count < string.count ? count : string.count;
char c1, c2;
- char[] target = string.value;
- while (o1 < end) {
- if ((c1 = value[o1++]) == (c2 = target[o2++])) {
+ for (int i = 0; i < end; ++i) {
+ if ((c1 = charAt(i)) == (c2 = string.charAt(i))) {
continue;
}
c1 = foldCase(c1);
@@ -582,15 +375,7 @@ outer:
* @return a new string which is the concatenation of this string and the
* specified string.
*/
- public String concat(String string) {
- if (string.count > 0 && count > 0) {
- char[] buffer = new char[count + string.count];
- System.arraycopy(value, offset, buffer, 0, count);
- System.arraycopy(string.value, string.offset, buffer, count, string.count);
- return new String(0, buffer.length, buffer);
- }
- return count == 0 ? string : this;
- }
+ public native String concat(String string);
/**
* Creates a new string by copying the given {@code char[]}.
@@ -601,7 +386,7 @@ outer:
* if {@code data} is {@code null}.
*/
public static String copyValueOf(char[] data) {
- return new String(data, 0, data.length);
+ return StringFactory.newStringFromChars(data, 0, data.length);
}
/**
@@ -616,7 +401,7 @@ outer:
* data.length}.
*/
public static String copyValueOf(char[] data, int start, int length) {
- return new String(data, start, length);
+ return StringFactory.newStringFromChars(data, start, length);
}
/**
@@ -654,16 +439,10 @@ outer:
if (hashCode() != s.hashCode()) {
return false;
}
- char[] value1 = value;
- int offset1 = offset;
- char[] value2 = s.value;
- int offset2 = s.offset;
- for (int end = offset1 + count; offset1 < end; ) {
- if (value1[offset1] != value2[offset2]) {
+ for (int i = 0; i < count; ++i) {
+ if (charAt(i) != s.charAt(i)) {
return false;
}
- offset1++;
- offset2++;
}
return true;
} else {
@@ -686,12 +465,9 @@ outer:
if (string == null || count != string.count) {
return false;
}
- int o1 = offset, o2 = string.offset;
- int end = offset + count;
- char[] target = string.value;
- while (o1 < end) {
- char c1 = value[o1++];
- char c2 = target[o2++];
+ for (int i = 0; i < count; ++i) {
+ char c1 = charAt(i);
+ char c2 = string.charAt(i);
if (c1 != c2 && foldCase(c1) != foldCase(c2)) {
return false;
}
@@ -721,10 +497,9 @@ outer:
@Deprecated
public void getBytes(int start, int end, byte[] data, int index) {
if (start >= 0 && start <= end && end <= count) {
- end += offset;
try {
- for (int i = offset + start; i < end; i++) {
- data[index++] = (byte) value[i];
+ for (int i = start; i < end; ++i) {
+ data[index++] = (byte) charAt(i);
}
} catch (ArrayIndexOutOfBoundsException ignored) {
throw failedBoundsCheck(data.length, index, end - start);
@@ -772,16 +547,15 @@ outer:
public byte[] getBytes(Charset charset) {
String canonicalCharsetName = charset.name();
if (canonicalCharsetName.equals("UTF-8")) {
- return CharsetUtils.toUtf8Bytes(value, offset, count);
+ return CharsetUtils.toUtf8Bytes(this, 0, count);
} else if (canonicalCharsetName.equals("ISO-8859-1")) {
- return CharsetUtils.toIsoLatin1Bytes(value, offset, count);
+ return CharsetUtils.toIsoLatin1Bytes(this, 0, count);
} else if (canonicalCharsetName.equals("US-ASCII")) {
- return CharsetUtils.toAsciiBytes(value, offset, count);
+ return CharsetUtils.toAsciiBytes(this, 0, count);
} else if (canonicalCharsetName.equals("UTF-16BE")) {
- return CharsetUtils.toBigEndianUtf16Bytes(value, offset, count);
+ return CharsetUtils.toBigEndianUtf16Bytes(this, 0, count);
} else {
- CharBuffer chars = CharBuffer.wrap(this.value, this.offset, this.count);
- ByteBuffer buffer = charset.encode(chars.asReadOnlyBuffer());
+ ByteBuffer buffer = charset.encode(this);
byte[] bytes = new byte[buffer.limit()];
buffer.get(bytes);
return bytes;
@@ -809,7 +583,16 @@ outer:
*/
public void getChars(int start, int end, char[] buffer, int index) {
if (start >= 0 && start <= end && end <= count) {
- System.arraycopy(value, start + offset, buffer, index, end - start);
+ if (buffer == null) {
+ throw new NullPointerException("buffer == null");
+ }
+ if (index < 0) {
+ throw new IndexOutOfBoundsException("index < 0");
+ }
+ if (end - start > buffer.length - index) {
+ throw new ArrayIndexOutOfBoundsException("end - start > buffer.length - index");
+ }
+ getCharsNoCheck(start, end, buffer, index);
} else {
// We throw StringIndexOutOfBoundsException rather than System.arraycopy's AIOOBE.
throw startEndAndLength(start, end);
@@ -821,9 +604,7 @@ outer:
* within the java.lang package only. The caller is responsible for
* ensuring that start >= 0 && start <= end && end <= count.
*/
- void _getChars(int start, int end, char[] buffer, int index) {
- System.arraycopy(value, start + offset, buffer, index, end - start);
- }
+ native void getCharsNoCheck(int start, int end, char[] buffer, int index);
@Override public int hashCode() {
int hash = hashCode;
@@ -831,10 +612,8 @@ outer:
if (count == 0) {
return 0;
}
- final int end = count + offset;
- final char[] chars = value;
- for (int i = offset; i < end; ++i) {
- hash = 31*hash + chars[i];
+ for (int i = 0; i < count; ++i) {
+ hash = 31 * hash + charAt(i);
}
hashCode = hash;
}
@@ -893,21 +672,17 @@ outer:
if (subCount > _count) {
return -1;
}
- char[] target = string.value;
- int subOffset = string.offset;
- char firstChar = target[subOffset];
- int end = subOffset + subCount;
+ char firstChar = string.charAt(0);
while (true) {
int i = indexOf(firstChar, start);
if (i == -1 || subCount + i > _count) {
return -1; // handles subCount > count || start >= count
}
- int o1 = offset + i, o2 = subOffset;
- char[] _value = value;
- while (++o2 < end && _value[++o1] == target[o2]) {
+ int o1 = i, o2 = 0;
+ while (++o2 < subCount && charAt(++o1) == string.charAt(o2)) {
// Intentionally empty
}
- if (o2 == end) {
+ if (o2 == subCount) {
return i;
}
start = i + 1;
@@ -934,21 +709,17 @@ outer:
if (subCount + start > _count) {
return -1;
}
- char[] target = subString.value;
- int subOffset = subString.offset;
- char firstChar = target[subOffset];
- int end = subOffset + subCount;
+ char firstChar = subString.charAt(0);
while (true) {
int i = indexOf(firstChar, start);
if (i == -1 || subCount + i > _count) {
return -1; // handles subCount > count || start >= count
}
- int o1 = offset + i, o2 = subOffset;
- char[] _value = value;
- while (++o2 < end && _value[++o1] == target[o2]) {
+ int o1 = i, o2 = 0;
+ while (++o2 < subCount && charAt(++o1) == subString.charAt(o2)) {
// Intentionally empty
}
- if (o2 == end) {
+ if (o2 == subCount) {
return i;
}
start = i + 1;
@@ -991,11 +762,9 @@ outer:
return lastIndexOfSupplementary(c, Integer.MAX_VALUE);
}
int _count = count;
- int _offset = offset;
- char[] _value = value;
- for (int i = _offset + _count - 1; i >= _offset; --i) {
- if (_value[i] == c) {
- return i - _offset;
+ for (int i = _count - 1; i >= 0; --i) {
+ if (charAt(i) == c) {
+ return i;
}
}
return -1;
@@ -1011,15 +780,13 @@ outer:
return lastIndexOfSupplementary(c, start);
}
int _count = count;
- int _offset = offset;
- char[] _value = value;
if (start >= 0) {
if (start >= _count) {
start = _count - 1;
}
- for (int i = _offset + start; i >= _offset; --i) {
- if (_value[i] == c) {
- return i - _offset;
+ for (int i = start; i >= 0; --i) {
+ if (charAt(i) == c) {
+ return i;
}
}
}
@@ -1031,7 +798,7 @@ outer:
return -1;
}
char[] chars = Character.toChars(c);
- String needle = new String(0, chars.length, chars);
+ String needle = StringFactory.newStringFromChars(0, chars.length, chars);
return lastIndexOf(needle, start);
}
@@ -1065,20 +832,17 @@ outer:
start = count - subCount;
}
// count and subCount are both >= 1
- char[] target = subString.value;
- int subOffset = subString.offset;
- char firstChar = target[subOffset];
- int end = subOffset + subCount;
+ char firstChar = subString.charAt(0);
while (true) {
int i = lastIndexOf(firstChar, start);
if (i == -1) {
return -1;
}
- int o1 = offset + i, o2 = subOffset;
- while (++o2 < end && value[++o1] == target[o2]) {
+ int o1 = i, o2 = 0;
+ while (++o2 < subCount && charAt(++o1) == subString.charAt(o2)) {
// Intentionally empty
}
- if (o2 == end) {
+ if (o2 == subCount) {
return i;
}
start = i - 1;
@@ -1121,11 +885,8 @@ outer:
if (length <= 0) {
return true;
}
- int o1 = offset + thisStart, o2 = string.offset + start;
- char[] value1 = value;
- char[] value2 = string.value;
for (int i = 0; i < length; ++i) {
- if (value1[o1 + i] != value2[o2 + i]) {
+ if (charAt(thisStart + i) != string.charAt(start + i)) {
return false;
}
}
@@ -1164,13 +925,10 @@ outer:
if (start < 0 || length > string.count - start) {
return false;
}
- thisStart += offset;
- start += string.offset;
int end = thisStart + length;
- char[] target = string.value;
while (thisStart < end) {
- char c1 = value[thisStart++];
- char c2 = target[start++];
+ char c1 = charAt(thisStart++);
+ char c2 = string.charAt(start++);
if (c1 != c2 && foldCase(c1) != foldCase(c2)) {
return false;
}
@@ -1182,29 +940,20 @@ outer:
* Returns a copy of this string after replacing occurrences of the given {@code char} with another.
*/
public String replace(char oldChar, char newChar) {
- char[] buffer = value;
- int _offset = offset;
+ String s = null;
int _count = count;
-
- int idx = _offset;
- int last = _offset + _count;
boolean copied = false;
- while (idx < last) {
- if (buffer[idx] == oldChar) {
+ for (int i = 0; i < _count; ++i) {
+ if (charAt(i) == oldChar) {
if (!copied) {
- char[] newBuffer = new char[_count];
- System.arraycopy(buffer, _offset, newBuffer, 0, _count);
- buffer = newBuffer;
- idx -= _offset;
- last -= _offset;
+ s = StringFactory.newStringFromString(this);
copied = true;
}
- buffer[idx] = newChar;
+ s.setCharAt(i, newChar);
}
- idx++;
}
- return copied ? new String(0, count, buffer) : this;
+ return copied ? s : this;
}
/**
@@ -1241,9 +990,8 @@ outer:
int resultLength = count + (count + 1) * replacementString.length();
StringBuilder result = new StringBuilder(resultLength);
result.append(replacementString);
- int end = offset + count;
- for (int i = offset; i != end; ++i) {
- result.append(value[i]);
+ for (int i = 0; i != count; ++i) {
+ result.append(charAt(i));
result.append(replacementString);
}
return result.toString();
@@ -1252,15 +1000,21 @@ outer:
StringBuilder result = new StringBuilder(count);
int searchStart = 0;
do {
- // Copy chars before the match...
- result.append(value, offset + searchStart, matchStart - searchStart);
+ // Copy characters before the match...
+ // TODO: Perform this faster than one char at a time?
+ for (int i = searchStart; i < matchStart; ++i) {
+ result.append(charAt(i));
+ }
// Insert the replacement...
result.append(replacementString);
// And skip over the match...
searchStart = matchStart + targetLength;
} while ((matchStart = indexOf(targetString, searchStart)) != -1);
// Copy any trailing chars...
- result.append(value, offset + searchStart, count - searchStart);
+ // TODO: Perform this faster than one char at a time?
+ for (int i = searchStart; i < count; ++i) {
+ result.append(charAt(i));
+ }
return result.toString();
}
@@ -1308,7 +1062,7 @@ outer:
return this;
}
if (start >= 0 && start <= count) {
- return new String(offset + start, count - start, value);
+ return fastSubstring(start, count - start);
}
throw indexAndLength(start);
}
@@ -1328,21 +1082,19 @@ outer:
}
// Fast range check.
if (start >= 0 && start <= end && end <= count) {
- return new String(offset + start, end - start, value);
+ return fastSubstring(start, end - start);
}
throw startEndAndLength(start, end);
}
+ private native String fastSubstring(int start, int length);
+
/**
* Returns a new {@code char} array containing a copy of the {@code char}s in this string.
* This is expensive and rarely useful. If you just want to iterate over the {@code char}s in
* the string, use {@link #charAt} instead.
*/
- public char[] toCharArray() {
- char[] buffer = new char[count];
- System.arraycopy(value, offset, buffer, 0, count);
- return buffer;
- }
+ public native char[] toCharArray();
/**
* Converts this string to lower case, using the rules of the user's default locale.
@@ -1351,7 +1103,7 @@ outer:
* @return a new lower case string, or {@code this} if it's already all lower case.
*/
public String toLowerCase() {
- return CaseMapper.toLowerCase(Locale.getDefault(), this, value, offset, count);
+ return CaseMapper.toLowerCase(Locale.getDefault(), this);
}
/**
@@ -1368,7 +1120,7 @@ outer:
* @return a new lower case string, or {@code this} if it's already all lower case.
*/
public String toLowerCase(Locale locale) {
- return CaseMapper.toLowerCase(locale, this, value, offset, count);
+ return CaseMapper.toLowerCase(locale, this);
}
/**
@@ -1386,7 +1138,7 @@ outer:
* @return a new upper case string, or {@code this} if it's already all upper case.
*/
public String toUpperCase() {
- return CaseMapper.toUpperCase(Locale.getDefault(), this, value, offset, count);
+ return CaseMapper.toUpperCase(Locale.getDefault(), this, count);
}
/**
@@ -1403,7 +1155,7 @@ outer:
* @return a new upper case string, or {@code this} if it's already all upper case.
*/
public String toUpperCase(Locale locale) {
- return CaseMapper.toUpperCase(locale, this, value, offset, count);
+ return CaseMapper.toUpperCase(locale, this, count);
}
/**
@@ -1411,18 +1163,18 @@ outer:
* the beginning or end.
*/
public String trim() {
- int start = offset, last = offset + count - 1;
+ int start = 0, last = count - 1;
int end = last;
- while ((start <= end) && (value[start] <= ' ')) {
+ while ((start <= end) && (charAt(start) <= ' ')) {
start++;
}
- while ((end >= start) && (value[end] <= ' ')) {
+ while ((end >= start) && (charAt(end) <= ' ')) {
end--;
}
- if (start == offset && end == last) {
+ if (start == 0 && end == last) {
return this;
}
- return new String(start, end - start + 1, value);
+ return fastSubstring(start, end - start + 1);
}
/**
@@ -1434,7 +1186,7 @@ outer:
* if {@code data} is {@code null}.
*/
public static String valueOf(char[] data) {
- return new String(data, 0, data.length);
+ return StringFactory.newStringFromChars(data, 0, data.length);
}
/**
@@ -1448,7 +1200,7 @@ outer:
* if {@code data} is {@code null}.
*/
public static String valueOf(char[] data, int start, int length) {
- return new String(data, start, length);
+ return StringFactory.newStringFromChars(data, start, length);
}
/**
@@ -1457,9 +1209,9 @@ outer:
public static String valueOf(char value) {
String s;
if (value < 128) {
- s = new String(value, 1, ASCII);
+ s = StringFactory.newStringFromChars(value, 1, ASCII);
} else {
- s = new String(0, 1, new char[] { value });
+ s = StringFactory.newStringFromChars(0, 1, new char[] { value });
}
s.hashCode = value;
return s;
@@ -1533,7 +1285,8 @@ outer:
if (count != size) {
return false;
}
- return regionMatches(0, new String(0, size, sb.getValue()), 0, size);
+ String s = StringFactory.newStringFromChars(0, size, sb.getValue());
+ return regionMatches(0, s, 0, size);
}
}
@@ -1682,7 +1435,7 @@ outer:
if (index < 0 || index >= count) {
throw indexAndLength(index);
}
- return Character.codePointAt(value, offset + index, offset + count);
+ return Character.codePointAt(this, index);
}
/**
@@ -1696,7 +1449,7 @@ outer:
if (index < 1 || index > count) {
throw indexAndLength(index);
}
- return Character.codePointBefore(value, offset + index, offset);
+ return Character.codePointBefore(this, index);
}
/**
@@ -1717,7 +1470,7 @@ outer:
if (start < 0 || end > count || start > end) {
throw startEndAndLength(start, end);
}
- return Character.codePointCount(value, offset + start, end - start);
+ return Character.codePointCount(this, start, end);
}
/**
@@ -1748,9 +1501,7 @@ outer:
* @since 1.5
*/
public int offsetByCodePoints(int index, int codePointOffset) {
- int s = index + offset;
- int r = Character.offsetByCodePoints(value, offset, count, s, codePointOffset);
- return r - offset;
+ return Character.offsetByCodePoints(this, index, codePointOffset);
}
/**
@@ -1816,31 +1567,26 @@ outer:
@SuppressWarnings("unused")
private static int indexOf(String haystackString, String needleString,
int cache, int md2, char lastChar) {
- char[] haystack = haystackString.value;
- int haystackOffset = haystackString.offset;
int haystackLength = haystackString.count;
- char[] needle = needleString.value;
- int needleOffset = needleString.offset;
int needleLength = needleString.count;
int needleLengthMinus1 = needleLength - 1;
- int haystackEnd = haystackOffset + haystackLength;
- outer_loop: for (int i = haystackOffset + needleLengthMinus1; i < haystackEnd;) {
- if (lastChar == haystack[i]) {
+ outer_loop: for (int i = needleLengthMinus1; i < haystackLength;) {
+ if (lastChar == haystackString.charAt(i)) {
for (int j = 0; j < needleLengthMinus1; ++j) {
- if (needle[j + needleOffset] != haystack[i + j
- - needleLengthMinus1]) {
+ if (needleString.charAt(j) !=
+ haystackString.charAt(i + j - needleLengthMinus1)) {
int skip = 1;
- if ((cache & (1 << haystack[i])) == 0) {
+ if ((cache & (1 << haystackString.charAt(i))) == 0) {
skip += j;
}
i += Math.max(md2, skip);
continue outer_loop;
}
}
- return i - needleLengthMinus1 - haystackOffset;
+ return i - needleLengthMinus1;
}
- if ((cache & (1 << haystack[i])) == 0) {
+ if ((cache & (1 << haystackString.charAt(i))) == 0) {
i += needleLengthMinus1;
}
i++;
diff --git a/libart/src/main/java/java/lang/StringFactory.java b/libart/src/main/java/java/lang/StringFactory.java
new file mode 100644
index 0000000..4fc3eba
--- /dev/null
+++ b/libart/src/main/java/java/lang/StringFactory.java
@@ -0,0 +1,251 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package java.lang;
+
+import java.io.Serializable;
+import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.util.Comparator;
+import libcore.util.CharsetUtils;
+import libcore.util.EmptyArray;
+
+/**
+ * Class used to generate strings instead of calling String.&lt;init>.
+ *
+ * @hide
+ */
+public final class StringFactory {
+
+ // TODO: Remove once native methods are in place.
+ private static final char REPLACEMENT_CHAR = (char) 0xfffd;
+
+ public static String newEmptyString() {
+ return newStringFromChars(EmptyArray.CHAR, 0, 0);
+ }
+
+ public static String newStringFromBytes(byte[] data) {
+ return newStringFromBytes(data, 0, data.length);
+ }
+
+ public static String newStringFromBytes(byte[] data, int high) {
+ return newStringFromBytes(data, high, 0, data.length);
+ }
+
+ public static String newStringFromBytes(byte[] data, int offset, int byteCount) {
+ return newStringFromBytes(data, offset, byteCount, Charset.defaultCharset());
+ }
+
+ public static native String newStringFromBytes(byte[] data, int high, int offset, int byteCount);
+
+ public static String newStringFromBytes(byte[] data, int offset, int byteCount, String charsetName) throws UnsupportedEncodingException {
+ return newStringFromBytes(data, offset, byteCount, Charset.forNameUEE(charsetName));
+ }
+
+ public static String newStringFromBytes(byte[] data, String charsetName) throws UnsupportedEncodingException {
+ return newStringFromBytes(data, 0, data.length, Charset.forNameUEE(charsetName));
+ }
+
+ // TODO: Implement this method natively.
+ public static String newStringFromBytes(byte[] data, int offset, int byteCount, Charset charset) {
+ if ((offset | byteCount) < 0 || byteCount > data.length - offset) {
+ throw new StringIndexOutOfBoundsException(data.length, offset, byteCount);
+ }
+
+ char[] value;
+ int length;
+
+ // We inline UTF-8, ISO-8859-1, and US-ASCII decoders for speed.
+ String canonicalCharsetName = charset.name();
+ if (canonicalCharsetName.equals("UTF-8")) {
+ byte[] d = data;
+ char[] v = new char[byteCount];
+
+ int idx = offset;
+ int last = offset + byteCount;
+ int s = 0;
+outer:
+ while (idx < last) {
+ byte b0 = d[idx++];
+ if ((b0 & 0x80) == 0) {
+ // 0xxxxxxx
+ // Range: U-00000000 - U-0000007F
+ int val = b0 & 0xff;
+ v[s++] = (char) val;
+ } else if (((b0 & 0xe0) == 0xc0) || ((b0 & 0xf0) == 0xe0) ||
+ ((b0 & 0xf8) == 0xf0) || ((b0 & 0xfc) == 0xf8) || ((b0 & 0xfe) == 0xfc)) {
+ int utfCount = 1;
+ if ((b0 & 0xf0) == 0xe0) utfCount = 2;
+ else if ((b0 & 0xf8) == 0xf0) utfCount = 3;
+ else if ((b0 & 0xfc) == 0xf8) utfCount = 4;
+ else if ((b0 & 0xfe) == 0xfc) utfCount = 5;
+
+ // 110xxxxx (10xxxxxx)+
+ // Range: U-00000080 - U-000007FF (count == 1)
+ // Range: U-00000800 - U-0000FFFF (count == 2)
+ // Range: U-00010000 - U-001FFFFF (count == 3)
+ // Range: U-00200000 - U-03FFFFFF (count == 4)
+ // Range: U-04000000 - U-7FFFFFFF (count == 5)
+
+ if (idx + utfCount > last) {
+ v[s++] = REPLACEMENT_CHAR;
+ continue;
+ }
+
+ // Extract usable bits from b0
+ int val = b0 & (0x1f >> (utfCount - 1));
+ for (int i = 0; i < utfCount; ++i) {
+ byte b = d[idx++];
+ if ((b & 0xc0) != 0x80) {
+ v[s++] = REPLACEMENT_CHAR;
+ idx--; // Put the input char back
+ continue outer;
+ }
+ // Push new bits in from the right side
+ val <<= 6;
+ val |= b & 0x3f;
+ }
+
+ // Note: Java allows overlong char
+ // specifications To disallow, check that val
+ // is greater than or equal to the minimum
+ // value for each count:
+ //
+ // count min value
+ // ----- ----------
+ // 1 0x80
+ // 2 0x800
+ // 3 0x10000
+ // 4 0x200000
+ // 5 0x4000000
+
+ // Allow surrogate values (0xD800 - 0xDFFF) to
+ // be specified using 3-byte UTF values only
+ if ((utfCount != 2) && (val >= 0xD800) && (val <= 0xDFFF)) {
+ v[s++] = REPLACEMENT_CHAR;
+ continue;
+ }
+
+ // Reject chars greater than the Unicode maximum of U+10FFFF.
+ if (val > 0x10FFFF) {
+ v[s++] = REPLACEMENT_CHAR;
+ continue;
+ }
+
+ // Encode chars from U+10000 up as surrogate pairs
+ if (val < 0x10000) {
+ v[s++] = (char) val;
+ } else {
+ int x = val & 0xffff;
+ int u = (val >> 16) & 0x1f;
+ int w = (u - 1) & 0xffff;
+ int hi = 0xd800 | (w << 6) | (x >> 10);
+ int lo = 0xdc00 | (x & 0x3ff);
+ v[s++] = (char) hi;
+ v[s++] = (char) lo;
+ }
+ } else {
+ // Illegal values 0x8*, 0x9*, 0xa*, 0xb*, 0xfd-0xff
+ v[s++] = REPLACEMENT_CHAR;
+ }
+ }
+
+ if (s == byteCount) {
+ // We guessed right, so we can use our temporary array as-is.
+ value = v;
+ length = s;
+ } else {
+ // Our temporary array was too big, so reallocate and copy.
+ value = new char[s];
+ length = s;
+ System.arraycopy(v, 0, value, 0, s);
+ }
+ } else if (canonicalCharsetName.equals("ISO-8859-1")) {
+ value = new char[byteCount];
+ length = byteCount;
+ CharsetUtils.isoLatin1BytesToChars(data, offset, byteCount, value);
+ } else if (canonicalCharsetName.equals("US-ASCII")) {
+ value = new char[byteCount];
+ length = byteCount;
+ CharsetUtils.asciiBytesToChars(data, offset, byteCount, value);
+ } else {
+ CharBuffer cb = charset.decode(ByteBuffer.wrap(data, offset, byteCount));
+ length = cb.length();
+ if (length > 0) {
+ // We could use cb.array() directly, but that would mean we'd have to trust
+ // the CharsetDecoder doesn't hang on to the CharBuffer and mutate it later,
+ // which would break String's immutability guarantee. It would also tend to
+ // mean that we'd be wasting memory because CharsetDecoder doesn't trim the
+ // array. So we copy.
+ value = new char[length];
+ System.arraycopy(cb.array(), 0, value, 0, length);
+ } else {
+ value = EmptyArray.CHAR;
+ }
+ }
+ return newStringFromChars(value, 0, length);
+ }
+
+ public static String newStringFromBytes(byte[] data, Charset charset) {
+ return newStringFromBytes(data, 0, data.length, charset);
+ }
+
+ public static String newStringFromChars(char[] data) {
+ return newStringFromChars(data, 0, data.length);
+ }
+
+ public static String newStringFromChars(char[] data, int offset, int charCount) {
+ if ((offset | charCount) < 0 || charCount > data.length - offset) {
+ throw new StringIndexOutOfBoundsException(data.length, offset, charCount);
+ }
+ return newStringFromChars(offset, charCount, data);
+ }
+
+ static native String newStringFromChars(int offset, int charCount, char[] data);
+
+ public static native String newStringFromString(String toCopy);
+
+ public static String newStringFromStringBuffer(StringBuffer stringBuffer) {
+ synchronized (stringBuffer) {
+ return newStringFromChars(stringBuffer.getValue(), 0, stringBuffer.length());
+ }
+ }
+
+ // TODO: Implement this method natively.
+ public static String newStringFromCodePoints(int[] codePoints, int offset, int count) {
+ if (codePoints == null) {
+ throw new NullPointerException("codePoints == null");
+ }
+ if ((offset | count) < 0 || count > codePoints.length - offset) {
+ throw new StringIndexOutOfBoundsException(codePoints.length, offset, count);
+ }
+ char[] value = new char[count * 2];
+ int end = offset + count;
+ int length = 0;
+ for (int i = offset; i < end; i++) {
+ length += Character.toChars(codePoints[i], value, length);
+ }
+ return newStringFromChars(value, 0, length);
+ }
+
+ public static String newStringFromStringBuilder(StringBuilder stringBuilder) {
+ return newStringFromChars(stringBuilder.getValue(), 0, stringBuilder.length());
+ }
+}
diff --git a/luni/src/main/java/libcore/util/CharsetUtils.java b/luni/src/main/java/libcore/util/CharsetUtils.java
index 2e426c4..5163dba 100644
--- a/luni/src/main/java/libcore/util/CharsetUtils.java
+++ b/luni/src/main/java/libcore/util/CharsetUtils.java
@@ -23,33 +23,33 @@ package libcore.util;
*/
public final class CharsetUtils {
/**
- * Returns a new byte array containing the bytes corresponding to the given characters,
- * encoded in US-ASCII. Unrepresentable characters are replaced by (byte) '?'.
+ * Returns a new byte array containing the bytes corresponding to the characters in the given
+ * string, encoded in US-ASCII. Unrepresentable characters are replaced by (byte) '?'.
*/
- public static native byte[] toAsciiBytes(char[] chars, int offset, int length);
+ public static native byte[] toAsciiBytes(String s, int offset, int length);
/**
- * Returns a new byte array containing the bytes corresponding to the given characters,
- * encoded in ISO-8859-1. Unrepresentable characters are replaced by (byte) '?'.
+ * Returns a new byte array containing the bytes corresponding to the characters in the given
+ * string, encoded in ISO-8859-1. Unrepresentable characters are replaced by (byte) '?'.
*/
- public static native byte[] toIsoLatin1Bytes(char[] chars, int offset, int length);
+ public static native byte[] toIsoLatin1Bytes(String s, int offset, int length);
/**
- * Returns a new byte array containing the bytes corresponding to the given characters,
- * encoded in UTF-8. All characters are representable in UTF-8.
+ * Returns a new byte array containing the bytes corresponding to the characters in the given
+ * string, encoded in UTF-8. All characters are representable in UTF-8.
*/
- public static native byte[] toUtf8Bytes(char[] chars, int offset, int length);
+ public static native byte[] toUtf8Bytes(String s, int offset, int length);
/**
- * Returns a new byte array containing the bytes corresponding to the given characters,
- * encoded in UTF-16BE. All characters are representable in UTF-16BE.
+ * Returns a new byte array containing the bytes corresponding to the characters in the given
+ * string, encoded in UTF-16BE. All characters are representable in UTF-16BE.
*/
- public static byte[] toBigEndianUtf16Bytes(char[] chars, int offset, int length) {
+ public static byte[] toBigEndianUtf16Bytes(String s, int offset, int length) {
byte[] result = new byte[length * 2];
int end = offset + length;
int resultIndex = 0;
for (int i = offset; i < end; ++i) {
- char ch = chars[i];
+ char ch = s.charAt(i);
result[resultIndex++] = (byte) (ch >> 8);
result[resultIndex++] = (byte) ch;
}
diff --git a/luni/src/main/native/Register.cpp b/luni/src/main/native/Register.cpp
index 0f2d0ad..acc1e4f 100644
--- a/luni/src/main/native/Register.cpp
+++ b/luni/src/main/native/Register.cpp
@@ -69,7 +69,6 @@ jint JNI_OnLoad(JavaVM* vm, void*) {
REGISTER(register_libcore_io_AsynchronousCloseMonitor);
REGISTER(register_libcore_io_Memory);
REGISTER(register_libcore_io_Posix);
- REGISTER(register_libcore_util_CharsetUtils);
REGISTER(register_org_apache_harmony_dalvik_NativeTestTarget);
REGISTER(register_org_apache_harmony_xml_ExpatParser);
REGISTER(register_sun_misc_Unsafe);
diff --git a/luni/src/main/native/libcore_util_CharsetUtils.cpp b/luni/src/main/native/libcore_util_CharsetUtils.cpp
index 57c8172..e69de29 100644
--- a/luni/src/main/native/libcore_util_CharsetUtils.cpp
+++ b/luni/src/main/native/libcore_util_CharsetUtils.cpp
@@ -1,250 +0,0 @@
-/*
- * Copyright (C) 2010 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#define LOG_TAG "String"
-
-#include "JNIHelp.h"
-#include "JniConstants.h"
-#include "ScopedPrimitiveArray.h"
-#include "jni.h"
-#include "unicode/utf16.h"
-
-#include <string.h>
-
-/**
- * Approximates java.lang.UnsafeByteSequence so we don't have to pay the cost of calling back into
- * Java when converting a char[] to a UTF-8 byte[]. This lets us have UTF-8 conversions slightly
- * faster than ICU for large char[]s without paying for the NIO overhead with small char[]s.
- *
- * We could avoid this by keeping the UTF-8 bytes on the native heap until we're done and only
- * creating a byte[] on the Java heap when we know how big it needs to be, but one shouldn't lie
- * to the garbage collector (nor hide potentially large allocations from it).
- *
- * Because a call to append might require an allocation, it might fail. Callers should always
- * check the return value of append.
- */
-class NativeUnsafeByteSequence {
-public:
- NativeUnsafeByteSequence(JNIEnv* env)
- : mEnv(env), mJavaArray(NULL), mRawArray(NULL), mSize(-1), mOffset(0)
- {
- }
-
- ~NativeUnsafeByteSequence() {
- // Release our pointer to the raw array, copying changes back to the Java heap.
- if (mRawArray != NULL) {
- mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, 0);
- }
- }
-
- bool append(jbyte b) {
- if (mOffset == mSize && !resize(mSize * 2)) {
- return false;
- }
- mRawArray[mOffset++] = b;
- return true;
- }
-
- bool resize(int newSize) {
- if (newSize == mSize) {
- return true;
- }
-
- // Allocate a new array.
- jbyteArray newJavaArray = mEnv->NewByteArray(newSize);
- if (newJavaArray == NULL) {
- return false;
- }
- jbyte* newRawArray = mEnv->GetByteArrayElements(newJavaArray, NULL);
- if (newRawArray == NULL) {
- return false;
- }
-
- // Copy data out of the old array and then let go of it.
- // Note that we may be trimming the array.
- if (mRawArray != NULL) {
- memcpy(newRawArray, mRawArray, mOffset);
- mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, JNI_ABORT);
- mEnv->DeleteLocalRef(mJavaArray);
- }
-
- // Point ourselves at the new array.
- mJavaArray = newJavaArray;
- mRawArray = newRawArray;
- mSize = newSize;
- return true;
- }
-
- jbyteArray toByteArray() {
- // Trim any unused space, if necessary.
- bool okay = resize(mOffset);
- return okay ? mJavaArray : NULL;
- }
-
-private:
- JNIEnv* mEnv;
- jbyteArray mJavaArray;
- jbyte* mRawArray;
- jint mSize;
- jint mOffset;
-
- // Disallow copy and assignment.
- NativeUnsafeByteSequence(const NativeUnsafeByteSequence&);
- void operator=(const NativeUnsafeByteSequence&);
-};
-
-static void Charsets_asciiBytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, jint offset, jint length, jcharArray javaChars) {
- ScopedByteArrayRO bytes(env, javaBytes);
- if (bytes.get() == NULL) {
- return;
- }
- ScopedCharArrayRW chars(env, javaChars);
- if (chars.get() == NULL) {
- return;
- }
-
- const jbyte* src = &bytes[offset];
- jchar* dst = &chars[0];
- static const jchar REPLACEMENT_CHAR = 0xfffd;
- for (int i = length - 1; i >= 0; --i) {
- jchar ch = static_cast<jchar>(*src++ & 0xff);
- *dst++ = (ch <= 0x7f) ? ch : REPLACEMENT_CHAR;
- }
-}
-
-static void Charsets_isoLatin1BytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, jint offset, jint length, jcharArray javaChars) {
- ScopedByteArrayRO bytes(env, javaBytes);
- if (bytes.get() == NULL) {
- return;
- }
- ScopedCharArrayRW chars(env, javaChars);
- if (chars.get() == NULL) {
- return;
- }
-
- const jbyte* src = &bytes[offset];
- jchar* dst = &chars[0];
- for (int i = length - 1; i >= 0; --i) {
- *dst++ = static_cast<jchar>(*src++ & 0xff);
- }
-}
-
-/**
- * Translates the given characters to US-ASCII or ISO-8859-1 bytes, using the fact that
- * Unicode code points between U+0000 and U+007f inclusive are identical to US-ASCII, while
- * U+0000 to U+00ff inclusive are identical to ISO-8859-1.
- */
-static jbyteArray charsToBytes(JNIEnv* env, jcharArray javaChars, jint offset, jint length, jchar maxValidChar) {
- ScopedCharArrayRO chars(env, javaChars);
- if (chars.get() == NULL) {
- return NULL;
- }
-
- jbyteArray javaBytes = env->NewByteArray(length);
- ScopedByteArrayRW bytes(env, javaBytes);
- if (bytes.get() == NULL) {
- return NULL;
- }
-
- const jchar* src = &chars[offset];
- jbyte* dst = &bytes[0];
- for (int i = length - 1; i >= 0; --i) {
- jchar ch = *src++;
- if (ch > maxValidChar) {
- ch = '?';
- }
- *dst++ = static_cast<jbyte>(ch);
- }
-
- return javaBytes;
-}
-
-static jbyteArray Charsets_toAsciiBytes(JNIEnv* env, jclass, jcharArray javaChars, jint offset, jint length) {
- return charsToBytes(env, javaChars, offset, length, 0x7f);
-}
-
-static jbyteArray Charsets_toIsoLatin1Bytes(JNIEnv* env, jclass, jcharArray javaChars, jint offset, jint length) {
- return charsToBytes(env, javaChars, offset, length, 0xff);
-}
-
-static jbyteArray Charsets_toUtf8Bytes(JNIEnv* env, jclass, jcharArray javaChars, jint offset, jint length) {
- ScopedCharArrayRO chars(env, javaChars);
- if (chars.get() == NULL) {
- return NULL;
- }
-
- NativeUnsafeByteSequence out(env);
- if (!out.resize(length)) {
- return NULL;
- }
-
- const int end = offset + length;
- for (int i = offset; i < end; ++i) {
- jint ch = chars[i];
- if (ch < 0x80) {
- // One byte.
- if (!out.append(ch)) {
- return NULL;
- }
- } else if (ch < 0x800) {
- // Two bytes.
- if (!out.append((ch >> 6) | 0xc0) || !out.append((ch & 0x3f) | 0x80)) {
- return NULL;
- }
- } else if (U16_IS_SURROGATE(ch)) {
- // A supplementary character.
- jchar high = (jchar) ch;
- jchar low = (i + 1 != end) ? chars[i + 1] : 0;
- if (!U16_IS_SURROGATE_LEAD(high) || !U16_IS_SURROGATE_TRAIL(low)) {
- if (!out.append('?')) {
- return NULL;
- }
- continue;
- }
- // Now we know we have a *valid* surrogate pair, we can consume the low surrogate.
- ++i;
- ch = U16_GET_SUPPLEMENTARY(high, low);
- // Four bytes.
- jbyte b1 = (ch >> 18) | 0xf0;
- jbyte b2 = ((ch >> 12) & 0x3f) | 0x80;
- jbyte b3 = ((ch >> 6) & 0x3f) | 0x80;
- jbyte b4 = (ch & 0x3f) | 0x80;
- if (!out.append(b1) || !out.append(b2) || !out.append(b3) || !out.append(b4)) {
- return NULL;
- }
- } else {
- // Three bytes.
- jbyte b1 = (ch >> 12) | 0xe0;
- jbyte b2 = ((ch >> 6) & 0x3f) | 0x80;
- jbyte b3 = (ch & 0x3f) | 0x80;
- if (!out.append(b1) || !out.append(b2) || !out.append(b3)) {
- return NULL;
- }
- }
- }
- return out.toByteArray();
-}
-
-static JNINativeMethod gMethods[] = {
- NATIVE_METHOD(Charsets, asciiBytesToChars, "([BII[C)V"),
- NATIVE_METHOD(Charsets, isoLatin1BytesToChars, "([BII[C)V"),
- NATIVE_METHOD(Charsets, toAsciiBytes, "([CII)[B"),
- NATIVE_METHOD(Charsets, toIsoLatin1Bytes, "([CII)[B"),
- NATIVE_METHOD(Charsets, toUtf8Bytes, "([CII)[B"),
-};
-void register_libcore_util_CharsetUtils(JNIEnv* env) {
- jniRegisterNativeMethods(env, "libcore/util/CharsetUtils", gMethods, NELEM(gMethods));
-}
diff --git a/luni/src/main/native/sub.mk b/luni/src/main/native/sub.mk
index a90c683..73ed7cb 100644
--- a/luni/src/main/native/sub.mk
+++ b/luni/src/main/native/sub.mk
@@ -49,7 +49,6 @@ LOCAL_SRC_FILES := \
libcore_io_AsynchronousCloseMonitor.cpp \
libcore_io_Memory.cpp \
libcore_io_Posix.cpp \
- libcore_util_CharsetUtils.cpp \
org_apache_harmony_xml_ExpatParser.cpp \
readlink.cpp \
sun_misc_Unsafe.cpp \
diff --git a/luni/src/test/java/libcore/java/lang/StringTest.java b/luni/src/test/java/libcore/java/lang/StringTest.java
index bf162e5..bd52e06 100644
--- a/luni/src/test/java/libcore/java/lang/StringTest.java
+++ b/luni/src/test/java/libcore/java/lang/StringTest.java
@@ -173,47 +173,6 @@ public class StringTest extends TestCase {
}
/**
- * Tests a widely assumed performance characteristic of String.substring():
- * that it reuses the original's backing array. Although behavior should be
- * correct even if this test fails, many applications may suffer
- * significant performance degradation.
- */
- public void testSubstringSharesBackingArray() throws IllegalAccessException {
- String abcdefghij = "ABCDEFGHIJ";
- String cdefg = abcdefghij.substring(2, 7);
- assertSame(getBackingArray(abcdefghij), getBackingArray(cdefg));
- }
-
- /**
- * Tests a widely assumed performance characteristic of string's copy
- * constructor: that it ensures the backing array is the same length as the
- * string. Although behavior should be correct even if this test fails,
- * many applications may suffer significant performance degradation.
- */
- public void testStringCopiesAvoidHeapRetention() throws IllegalAccessException {
- String abcdefghij = "ABCDEFGHIJ";
- assertSame(getBackingArray(abcdefghij), getBackingArray(new String(abcdefghij)));
-
- String cdefg = abcdefghij.substring(2, 7);
- assertSame(getBackingArray(abcdefghij), getBackingArray(cdefg));
- assertEquals(5, getBackingArray(new String(cdefg)).length);
- }
-
- /**
- * Uses reflection to return the char[] backing the given string. This
- * returns the actual backing array; which must not be modified.
- */
- private char[] getBackingArray(String string) throws IllegalAccessException {
- for (Field f : String.class.getDeclaredFields()) {
- if (!Modifier.isStatic(f.getModifiers()) && f.getType() == char[].class) {
- f.setAccessible(true);
- return (char[]) f.get(string);
- }
- }
- throw new UnsupportedOperationException("No chars[] field on String!");
- }
-
- /**
* Test that strings interned manually and then later loaded as literals
* maintain reference equality. http://b/3098960
*/