Removed offset and value from String and added StringFactory.

Change-Id: I55314ceb906d0bf7e78545dcd9bc3489a5baf03f
author: Jeff Hao <jeffhao@google.com> 2014-01-15 13:51:48 -0800
committer: Jeff Hao <jeffhao@google.com> 2015-04-27 17:15:26 -0700
commit: 83c7414449bc406b581f0cb81ae06e7bce91403c (patch)
tree: 98e6be2303f80a6ac36554e4ae8f4f8ae0d935f1
parent: 8b7dbadede97a5166fcddfe6783e89c8957c1830 (diff)
download: libcore-83c7414449bc406b581f0cb81ae06e7bce91403c.zip
libcore-83c7414449bc406b581f0cb81ae06e7bce91403c.tar.gz
libcore-83c7414449bc406b581f0cb81ae06e7bce91403c.tar.bz2
9 files changed, 435 insertions, 736 deletions
diff --git a/luni/src/main/java/java/lang/AbstractStringBuilder.java b/libart/src/main/java/java/lang/AbstractStringBuilder.java
index 4d84078..c8c8c5a 100644
--- a/luni/src/main/java/java/lang/AbstractStringBuilder.java
+++ b/libart/src/main/java/java/lang/AbstractStringBuilder.java
@@ -87,7 +87,7 @@ abstract class AbstractStringBuilder {
         count = string.length();
         shared = false;
         value = new char[count + INITIAL_CAPACITY];
-        string._getChars(0, count, value, 0);
+        string.getCharsNoCheck(0, count, value, 0);
     }
 
     private void enlargeBuffer(int min) {
@@ -145,7 +145,7 @@ abstract class AbstractStringBuilder {
         if (newCount > value.length) {
             enlargeBuffer(newCount);
         }
-        string._getChars(0, length, value, count);
+        string.getCharsNoCheck(0, length, value, count);
         count = newCount;
     }
 
@@ -167,7 +167,7 @@ abstract class AbstractStringBuilder {
         }
 
         if (s instanceof String) {
-            ((String) s)._getChars(start, end, value, count);
+            ((String) s).getCharsNoCheck(start, end, value, count);
         } else if (s instanceof AbstractStringBuilder) {
             AbstractStringBuilder other = (AbstractStringBuilder) s;
             System.arraycopy(other.value, start, value, count, length);
@@ -345,7 +345,7 @@ abstract class AbstractStringBuilder {
             int min = string.length();
             if (min != 0) {
                 move(min, index);
-                string._getChars(0, min, value, index);
+                string.getCharsNoCheck(0, min, value, index);
                 count += min;
             }
         } else {
@@ -422,7 +422,7 @@ abstract class AbstractStringBuilder {
                     value = value.clone();
                     shared = false;
                 }
-                string._getChars(0, stringLength, value, start);
+                string.getCharsNoCheck(0, stringLength, value, start);
                 count -= diff;
                 return;
             }
@@ -626,14 +626,7 @@ abstract class AbstractStringBuilder {
         if (count == 0) {
             return "";
         }
-        // Optimize String sharing for more performance
-        int wasted = value.length - count;
-        if (wasted >= 256
-                || (wasted >= INITIAL_CAPACITY && wasted >= (count >> 1))) {
-            return new String(value, 0, count);
-        }
-        shared = true;
-        return new String(0, count, value);
+        return StringFactory.newStringFromChars(0, count, value);
     }
 
     /**
diff --git a/luni/src/main/java/java/lang/CaseMapper.java b/libart/src/main/java/java/lang/CaseMapper.java
index 1da621c..f23a4ef 100644
--- a/luni/src/main/java/java/lang/CaseMapper.java
+++ b/libart/src/main/java/java/lang/CaseMapper.java
@@ -34,17 +34,15 @@ class CaseMapper {
     /**
      * Our current GC makes short-lived objects more expensive than we'd like. When that's fixed,
      * this class should be changed so that you instantiate it with the String and its value,
-     * offset, and count fields.
+     * and count fields.
      */
     private CaseMapper() {
     }
 
     /**
-     * Implements String.toLowerCase. We need 's' so that we can return the original String instance
-     * if nothing changes. We need 'value', 'offset', and 'count' because they're not otherwise
-     * accessible.
+     * Implements String.toLowerCase. The original String instance is returned if nothing changes.
      */
-    public static String toLowerCase(Locale locale, String s, char[] value, int offset, int count) {
+    public static String toLowerCase(Locale locale, String s) {
         // Punt hard cases to ICU4C.
         // Note that Greek isn't a particularly hard case for toLowerCase, only toUpperCase.
         String languageCode = locale.getLanguage();
@@ -52,29 +50,26 @@ class CaseMapper {
             return ICU.toLowerCase(s, locale);
         }
 
-        char[] newValue = null;
-        int newCount = 0;
-        for (int i = offset, end = offset + count; i < end; ++i) {
-            char ch = value[i];
+        String newString = null;
+        for (int i = 0, end = s.length(); i < end; ++i) {
+            char ch = s.charAt(i);
             char newCh;
             if (ch == LATIN_CAPITAL_I_WITH_DOT || Character.isHighSurrogate(ch)) {
                 // Punt these hard cases.
                 return ICU.toLowerCase(s, locale);
-            } else if (ch == GREEK_CAPITAL_SIGMA && isFinalSigma(value, offset, count, i)) {
+            } else if (ch == GREEK_CAPITAL_SIGMA && isFinalSigma(s, i)) {
                 newCh = GREEK_SMALL_FINAL_SIGMA;
             } else {
                 newCh = Character.toLowerCase(ch);
             }
-            if (newValue == null && ch != newCh) {
-                newValue = new char[count]; // The result can't be longer than the input.
-                newCount = i - offset;
-                System.arraycopy(value, offset, newValue, 0, newCount);
-            }
-            if (newValue != null) {
-                newValue[newCount++] = newCh;
+            if (ch != newCh) {
+                if (newString == null) {
+                    newString = StringFactory.newStringFromString(s);
+                }
+                newString.setCharAt(i, newCh);
             }
         }
-        return newValue != null ? new String(0, newCount, newValue) : s;
+        return newString != null ? newString : s;
     }
 
     /**
@@ -82,20 +77,20 @@ class CaseMapper {
      * sequence, and 'index' is not followed by a sequence consisting of an ignorable sequence and
      * then a cased letter.
      */
-    private static boolean isFinalSigma(char[] value, int offset, int count, int index) {
+    private static boolean isFinalSigma(String s, int index) {
         // TODO: we don't skip case-ignorable sequences like we should.
         // TODO: we should add a more direct way to test for a cased letter.
-        if (index <= offset) {
+        if (index <= 0) {
             return false;
         }
-        char previous = value[index - 1];
+        char previous = s.charAt(index - 1);
         if (!(Character.isLowerCase(previous) || Character.isUpperCase(previous) || Character.isTitleCase(previous))) {
             return false;
         }
-        if (index + 1 >= offset + count) {
+        if (index + 1 >= s.length()) {
             return true;
         }
-        char next = value[index + 1];
+        char next = s.charAt(index + 1);
         if (Character.isLowerCase(next) || Character.isUpperCase(next) || Character.isTitleCase(next)) {
             return false;
         }
@@ -147,7 +142,7 @@ class CaseMapper {
         }
     };
 
-    public static String toUpperCase(Locale locale, String s, char[] value, int offset, int count) {
+    public static String toUpperCase(Locale locale, String s, int count) {
         String languageCode = locale.getLanguage();
         if (languageCode.equals("tr") || languageCode.equals("az") || languageCode.equals("lt")) {
             return ICU.toUpperCase(s, locale);
@@ -157,9 +152,10 @@ class CaseMapper {
         }
 
         char[] output = null;
+        String newString = null;
         int i = 0;
-        for (int o = offset, end = offset + count; o < end; o++) {
-            char ch = value[o];
+        for (int o = 0, end = count; o < end; o++) {
+            char ch = s.charAt(o);
             if (Character.isHighSurrogate(ch)) {
                 return ICU.toUpperCase(s, locale);
             }
@@ -171,23 +167,25 @@ class CaseMapper {
                     output = newoutput;
                 }
                 char upch = Character.toUpperCase(ch);
-                if (ch != upch) {
-                    if (output == null) {
-                        output = new char[count];
-                        i = o - offset;
-                        System.arraycopy(value, offset, output, 0, i);
-                    }
+                if (output != null) {
                     output[i++] = upch;
-                } else if (output != null) {
-                    output[i++] = ch;
+                } else if (ch != upch) {
+                    if (newString == null) {
+                        newString = StringFactory.newStringFromString(s);
+                    }
+                    newString.setCharAt(o, upch);
                 }
             } else {
                 int target = index * 3;
                 char val3 = upperValues[target + 2];
                 if (output == null) {
                     output = new char[count + (count / 6) + 2];
-                    i = o - offset;
-                    System.arraycopy(value, offset, output, 0, i);
+                    i = o;
+                    if (newString != null) {
+                        System.arraycopy(newString.toCharArray(), 0, output, 0, i);
+                    } else {
+                        System.arraycopy(s.toCharArray(), 0, output, 0, i);
+                    }
                 } else if (i + (val3 == 0 ? 1 : 2) >= output.length) {
                     char[] newoutput = new char[output.length + (count / 6) + 3];
                     System.arraycopy(output, 0, newoutput, 0, output.length);
@@ -204,7 +202,11 @@ class CaseMapper {
             }
         }
         if (output == null) {
-            return s;
+            if (newString != null) {
+                return newString;
+            } else {
+                return s;
+            }
         }
         return output.length == i || output.length - i < 8 ? new String(0, i, output) : new String(output, 0, i);
     }
diff --git a/libart/src/main/java/java/lang/String.java b/libart/src/main/java/java/lang/String.java
index a5bf34c..0875d1a 100644
--- a/libart/src/main/java/java/lang/String.java
+++ b/libart/src/main/java/java/lang/String.java
@@ -35,23 +35,6 @@ import libcore.util.EmptyArray;
  * See {@link Character} for details about the relationship between {@code char} and
  * Unicode code points.
  *
- * <a name="backing_array"><h3>Backing Arrays</h3></a>
- * This class is implemented using a {@code char[]}. The length of the array may exceed
- * the length of the string. For example, the string "Hello" may be backed by
- * the array {@code ['H', 'e', 'l', 'l', 'o', 'W'. 'o', 'r', 'l', 'd']} with
- * offset 0 and length 5.
- *
- * <p>Multiple strings can share the same {@code char[]} because strings are immutable.
- * The {@link #substring} method <strong>always</strong> returns a string that
- * shares the backing array of its source string. Generally this is an
- * optimization: fewer {@code char[]}s need to be allocated, and less copying
- * is necessary. But this can also lead to unwanted heap retention. Taking a
- * short substring of long string means that the long shared {@code char[]} won't be
- * garbage until both strings are garbage. This typically happens when parsing
- * small substrings out of a large input. To avoid this where necessary, call
- * {@code new String(longString.subString(...))}. The string copy constructor
- * always ensures that the backing array is no larger than necessary.
- *
  * @see StringBuffer
  * @see StringBuilder
  * @see Charset
@@ -93,10 +76,6 @@ public final class String implements Serializable, Comparable<String>, CharSeque
         }
     }
 
-    private final char[] value;
-
-    private final int offset;
-
     private final int count;
 
     private int hashCode;
@@ -105,9 +84,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque
      * Creates an empty string.
      */
     public String() {
-        value = EmptyArray.CHAR;
-        offset = 0;
-        count = 0;
+        throw new UnsupportedOperationException("Use StringFactory instead.");
     }
 
     /**
@@ -116,7 +93,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque
      */
     @FindBugsSuppressWarnings("DM_DEFAULT_ENCODING")
     public String(byte[] data) {
-        this(data, 0, data.length);
+        throw new UnsupportedOperationException("Use StringFactory instead.");
     }
 
     /**
@@ -133,7 +110,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque
      */
     @Deprecated
     public String(byte[] data, int high) {
-        this(data, high, 0, data.length);
+        throw new UnsupportedOperationException("Use StringFactory instead.");
     }
 
     /**
@@ -146,7 +123,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque
      *             if {@code byteCount < 0 || offset < 0 || offset + byteCount > data.length}.
      */
     public String(byte[] data, int offset, int byteCount) {
-        this(data, offset, byteCount, Charset.defaultCharset());
+        throw new UnsupportedOperationException("Use StringFactory instead.");
     }
 
     /**
@@ -162,16 +139,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque
      */
     @Deprecated
     public String(byte[] data, int high, int offset, int byteCount) {
-        if ((offset | byteCount) < 0 || byteCount > data.length - offset) {
-            throw failedBoundsCheck(data.length, offset, byteCount);
-        }
-        this.offset = 0;
-        this.value = new char[byteCount];
-        this.count = byteCount;
-        high <<= 8;
-        for (int i = 0; i < count; i++) {
-            value[i] = (char) (high + (data[offset++] & 0xff));
-        }
+        throw new UnsupportedOperationException("Use StringFactory instead.");
     }
 
     /**
@@ -188,7 +156,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque
      *             if the named charset is not supported.
      */
     public String(byte[] data, int offset, int byteCount, String charsetName) throws UnsupportedEncodingException {
-        this(data, offset, byteCount, Charset.forNameUEE(charsetName));
+        throw new UnsupportedOperationException("Use StringFactory instead.");
     }
 
     /**
@@ -203,7 +171,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque
      *             if {@code charsetName} is not supported.
      */
     public String(byte[] data, String charsetName) throws UnsupportedEncodingException {
-        this(data, 0, data.length, Charset.forNameUEE(charsetName));
+        throw new UnsupportedOperationException("Use StringFactory instead.");
     }
 
     /**
@@ -221,144 +189,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque
      * @since 1.6
      */
     public String(byte[] data, int offset, int byteCount, Charset charset) {
-        if ((offset | byteCount) < 0 || byteCount > data.length - offset) {
-            throw failedBoundsCheck(data.length, offset, byteCount);
-        }
-
-        // We inline UTF-8, ISO-8859-1, and US-ASCII decoders for speed and because 'count' and
-        // 'value' are final.
-        String canonicalCharsetName = charset.name();
-        if (canonicalCharsetName.equals("UTF-8")) {
-            byte[] d = data;
-            char[] v = new char[byteCount];
-
-            int idx = offset;
-            int last = offset + byteCount;
-            int s = 0;
-outer:
-            while (idx < last) {
-                byte b0 = d[idx++];
-                if ((b0 & 0x80) == 0) {
-                    // 0xxxxxxx
-                    // Range:  U-00000000 - U-0000007F
-                    int val = b0 & 0xff;
-                    v[s++] = (char) val;
-                } else if (((b0 & 0xe0) == 0xc0) || ((b0 & 0xf0) == 0xe0) ||
-                        ((b0 & 0xf8) == 0xf0) || ((b0 & 0xfc) == 0xf8) || ((b0 & 0xfe) == 0xfc)) {
-                    int utfCount = 1;
-                    if ((b0 & 0xf0) == 0xe0) utfCount = 2;
-                    else if ((b0 & 0xf8) == 0xf0) utfCount = 3;
-                    else if ((b0 & 0xfc) == 0xf8) utfCount = 4;
-                    else if ((b0 & 0xfe) == 0xfc) utfCount = 5;
-
-                    // 110xxxxx (10xxxxxx)+
-                    // Range:  U-00000080 - U-000007FF (count == 1)
-                    // Range:  U-00000800 - U-0000FFFF (count == 2)
-                    // Range:  U-00010000 - U-001FFFFF (count == 3)
-                    // Range:  U-00200000 - U-03FFFFFF (count == 4)
-                    // Range:  U-04000000 - U-7FFFFFFF (count == 5)
-
-                    if (idx + utfCount > last) {
-                        v[s++] = REPLACEMENT_CHAR;
-                        continue;
-                    }
-
-                    // Extract usable bits from b0
-                    int val = b0 & (0x1f >> (utfCount - 1));
-                    for (int i = 0; i < utfCount; ++i) {
-                        byte b = d[idx++];
-                        if ((b & 0xc0) != 0x80) {
-                            v[s++] = REPLACEMENT_CHAR;
-                            idx--; // Put the input char back
-                            continue outer;
-                        }
-                        // Push new bits in from the right side
-                        val <<= 6;
-                        val |= b & 0x3f;
-                    }
-
-                    // Note: Java allows overlong char
-                    // specifications To disallow, check that val
-                    // is greater than or equal to the minimum
-                    // value for each count:
-                    //
-                    // count    min value
-                    // -----   ----------
-                    //   1           0x80
-                    //   2          0x800
-                    //   3        0x10000
-                    //   4       0x200000
-                    //   5      0x4000000
-
-                    // Allow surrogate values (0xD800 - 0xDFFF) to
-                    // be specified using 3-byte UTF values only
-                    if ((utfCount != 2) && (val >= 0xD800) && (val <= 0xDFFF)) {
-                        v[s++] = REPLACEMENT_CHAR;
-                        continue;
-                    }
-
-                    // Reject chars greater than the Unicode maximum of U+10FFFF.
-                    if (val > 0x10FFFF) {
-                        v[s++] = REPLACEMENT_CHAR;
-                        continue;
-                    }
-
-                    // Encode chars from U+10000 up as surrogate pairs
-                    if (val < 0x10000) {
-                        v[s++] = (char) val;
-                    } else {
-                        int x = val & 0xffff;
-                        int u = (val >> 16) & 0x1f;
-                        int w = (u - 1) & 0xffff;
-                        int hi = 0xd800 | (w << 6) | (x >> 10);
-                        int lo = 0xdc00 | (x & 0x3ff);
-                        v[s++] = (char) hi;
-                        v[s++] = (char) lo;
-                    }
-                } else {
-                    // Illegal values 0x8*, 0x9*, 0xa*, 0xb*, 0xfd-0xff
-                    v[s++] = REPLACEMENT_CHAR;
-                }
-            }
-
-            if (s == byteCount) {
-                // We guessed right, so we can use our temporary array as-is.
-                this.offset = 0;
-                this.value = v;
-                this.count = s;
-            } else {
-                // Our temporary array was too big, so reallocate and copy.
-                this.offset = 0;
-                this.value = new char[s];
-                this.count = s;
-                System.arraycopy(v, 0, value, 0, s);
-            }
-        } else if (canonicalCharsetName.equals("ISO-8859-1")) {
-            this.offset = 0;
-            this.value = new char[byteCount];
-            this.count = byteCount;
-            CharsetUtils.isoLatin1BytesToChars(data, offset, byteCount, value);
-        } else if (canonicalCharsetName.equals("US-ASCII")) {
-            this.offset = 0;
-            this.value = new char[byteCount];
-            this.count = byteCount;
-            CharsetUtils.asciiBytesToChars(data, offset, byteCount, value);
-        } else {
-            CharBuffer cb = charset.decode(ByteBuffer.wrap(data, offset, byteCount));
-            this.offset = 0;
-            this.count = cb.length();
-            if (count > 0) {
-                // We could use cb.array() directly, but that would mean we'd have to trust
-                // the CharsetDecoder doesn't hang on to the CharBuffer and mutate it later,
-                // which would break String's immutability guarantee. It would also tend to
-                // mean that we'd be wasting memory because CharsetDecoder doesn't trim the
-                // array. So we copy.
-                this.value = new char[count];
-                System.arraycopy(cb.array(), 0, value, 0, count);
-            } else {
-                this.value = EmptyArray.CHAR;
-            }
-        }
+        throw new UnsupportedOperationException("Use StringFactory instead.");
     }
 
     /**
@@ -368,7 +199,7 @@ outer:
      * @since 1.6
      */
     public String(byte[] data, Charset charset) {
-        this(data, 0, data.length, charset);
+        throw new UnsupportedOperationException("Use StringFactory instead.");
     }
 
     /**
@@ -379,7 +210,7 @@ outer:
      * @throws NullPointerException if {@code data == null}
      */
     public String(char[] data) {
-        this(data, 0, data.length);
+        throw new UnsupportedOperationException("Use StringFactory instead.");
     }
 
     /**
@@ -393,36 +224,25 @@ outer:
      *             if {@code charCount < 0 || offset < 0 || offset + charCount > data.length}
      */
     public String(char[] data, int offset, int charCount) {
-        if ((offset | charCount) < 0 || charCount > data.length - offset) {
-            throw failedBoundsCheck(data.length, offset, charCount);
-        }
-        this.offset = 0;
-        this.value = new char[charCount];
-        this.count = charCount;
-        System.arraycopy(data, offset, value, 0, count);
+        throw new UnsupportedOperationException("Use StringFactory instead.");
     }
 
     /*
      * Internal version of the String(char[], int, int) constructor.
-     * Does not range check, null check, or copy the array.
+     * Does not range check or null check.
      */
+    // TODO: Replace calls to this with calls to StringFactory, will require
+    // splitting other files in java.lang.
     String(int offset, int charCount, char[] chars) {
-        this.value = chars;
-        this.offset = offset;
-        this.count = charCount;
+        throw new UnsupportedOperationException("Use StringFactory instead.");
     }
 
     /**
-     * Constructs a copy of the given string.
-     * The returned string's <a href="#backing_array">backing array</a>
-     * is no larger than necessary.
+     * Constructs a new string with the same sequence of characters as {@code
+     * toCopy}.
      */
     public String(String toCopy) {
-        value = (toCopy.value.length == toCopy.count)
-                ? toCopy.value
-                : Arrays.copyOfRange(toCopy.value, toCopy.offset, toCopy.offset + toCopy.length());
-        offset = 0;
-        count = value.length;
+        throw new UnsupportedOperationException("Use StringFactory instead.");
     }
 
     /**
@@ -430,11 +250,7 @@ outer:
      * {@code StringBuffer}.
      */
     public String(StringBuffer stringBuffer) {
-        offset = 0;
-        synchronized (stringBuffer) {
-            value = stringBuffer.shareValue();
-            count = stringBuffer.length();
-        }
+        throw new UnsupportedOperationException("Use StringFactory instead.");
     }
 
     /**
@@ -451,20 +267,7 @@ outer:
      * @since 1.5
      */
     public String(int[] codePoints, int offset, int count) {
-        if (codePoints == null) {
-            throw new NullPointerException("codePoints == null");
-        }
-        if ((offset | count) < 0 || count > codePoints.length - offset) {
-            throw failedBoundsCheck(codePoints.length, offset, count);
-        }
-        this.offset = 0;
-        this.value = new char[count * 2];
-        int end = offset + count;
-        int c = 0;
-        for (int i = offset; i < end; i++) {
-            c += Character.toChars(codePoints[i], this.value, c);
-        }
-        this.count = c;
+        throw new UnsupportedOperationException("Use StringFactory instead.");
     }
 
     /**
@@ -476,25 +279,16 @@ outer:
      * @since 1.5
      */
     public String(StringBuilder stringBuilder) {
-        if (stringBuilder == null) {
-            throw new NullPointerException("stringBuilder == null");
-        }
-        this.offset = 0;
-        this.count = stringBuilder.length();
-        this.value = new char[this.count];
-        stringBuilder.getChars(0, this.count, this.value, 0);
+        throw new UnsupportedOperationException("Use StringFactory instead.");
     }
 
     /**
      * Returns the {@code char} at {@code index}.
      * @throws IndexOutOfBoundsException if {@code index < 0} or {@code index >= length()}.
      */
-    public char charAt(int index) {
-        if (index < 0 || index >= count) {
-            throw indexAndLength(index);
-        }
-        return value[offset + index];
-    }
+    public native char charAt(int index);
+
+    native void setCharAt(int index, char c);
 
     private StringIndexOutOfBoundsException indexAndLength(int index) {
         throw new StringIndexOutOfBoundsException(this, index);
@@ -557,12 +351,11 @@ outer:
      *             if {@code string} is {@code null}.
      */
     public int compareToIgnoreCase(String string) {
-        int o1 = offset, o2 = string.offset, result;
-        int end = offset + (count < string.count ? count : string.count);
+        int result;
+        int end = count < string.count ? count : string.count;
         char c1, c2;
-        char[] target = string.value;
-        while (o1 < end) {
-            if ((c1 = value[o1++]) == (c2 = target[o2++])) {
+        for (int i = 0; i < end; ++i) {
+            if ((c1 = charAt(i)) == (c2 = string.charAt(i))) {
                 continue;
             }
             c1 = foldCase(c1);
@@ -582,15 +375,7 @@ outer:
      * @return a new string which is the concatenation of this string and the
      *         specified string.
      */
-    public String concat(String string) {
-        if (string.count > 0 && count > 0) {
-            char[] buffer = new char[count + string.count];
-            System.arraycopy(value, offset, buffer, 0, count);
-            System.arraycopy(string.value, string.offset, buffer, count, string.count);
-            return new String(0, buffer.length, buffer);
-        }
-        return count == 0 ? string : this;
-    }
+    public native String concat(String string);
 
     /**
      * Creates a new string by copying the given {@code char[]}.
@@ -601,7 +386,7 @@ outer:
      *             if {@code data} is {@code null}.
      */
     public static String copyValueOf(char[] data) {
-        return new String(data, 0, data.length);
+        return StringFactory.newStringFromChars(data, 0, data.length);
     }
 
     /**
@@ -616,7 +401,7 @@ outer:
      *             data.length}.
      */
     public static String copyValueOf(char[] data, int start, int length) {
-        return new String(data, start, length);
+        return StringFactory.newStringFromChars(data, start, length);
     }
 
     /**
@@ -654,16 +439,10 @@ outer:
             if (hashCode() != s.hashCode()) {
                 return false;
             }
-            char[] value1 = value;
-            int offset1 = offset;
-            char[] value2 = s.value;
-            int offset2 = s.offset;
-            for (int end = offset1 + count; offset1 < end; ) {
-                if (value1[offset1] != value2[offset2]) {
+            for (int i = 0; i < count; ++i) {
+                if (charAt(i) != s.charAt(i)) {
                     return false;
                 }
-                offset1++;
-                offset2++;
             }
             return true;
         } else {
@@ -686,12 +465,9 @@ outer:
         if (string == null || count != string.count) {
             return false;
         }
-        int o1 = offset, o2 = string.offset;
-        int end = offset + count;
-        char[] target = string.value;
-        while (o1 < end) {
-            char c1 = value[o1++];
-            char c2 = target[o2++];
+        for (int i = 0; i < count; ++i) {
+            char c1 = charAt(i);
+            char c2 = string.charAt(i);
             if (c1 != c2 && foldCase(c1) != foldCase(c2)) {
                 return false;
             }
@@ -721,10 +497,9 @@ outer:
     @Deprecated
     public void getBytes(int start, int end, byte[] data, int index) {
         if (start >= 0 && start <= end && end <= count) {
-            end += offset;
             try {
-                for (int i = offset + start; i < end; i++) {
-                    data[index++] = (byte) value[i];
+                for (int i = start; i < end; ++i) {
+                    data[index++] = (byte) charAt(i);
                 }
             } catch (ArrayIndexOutOfBoundsException ignored) {
                 throw failedBoundsCheck(data.length, index, end - start);
@@ -772,16 +547,15 @@ outer:
     public byte[] getBytes(Charset charset) {
         String canonicalCharsetName = charset.name();
         if (canonicalCharsetName.equals("UTF-8")) {
-            return CharsetUtils.toUtf8Bytes(value, offset, count);
+            return CharsetUtils.toUtf8Bytes(this, 0, count);
         } else if (canonicalCharsetName.equals("ISO-8859-1")) {
-            return CharsetUtils.toIsoLatin1Bytes(value, offset, count);
+            return CharsetUtils.toIsoLatin1Bytes(this, 0, count);
         } else if (canonicalCharsetName.equals("US-ASCII")) {
-            return CharsetUtils.toAsciiBytes(value, offset, count);
+            return CharsetUtils.toAsciiBytes(this, 0, count);
         } else if (canonicalCharsetName.equals("UTF-16BE")) {
-            return CharsetUtils.toBigEndianUtf16Bytes(value, offset, count);
+            return CharsetUtils.toBigEndianUtf16Bytes(this, 0, count);
         } else {
-            CharBuffer chars = CharBuffer.wrap(this.value, this.offset, this.count);
-            ByteBuffer buffer = charset.encode(chars.asReadOnlyBuffer());
+            ByteBuffer buffer = charset.encode(this);
             byte[] bytes = new byte[buffer.limit()];
             buffer.get(bytes);
             return bytes;
@@ -809,7 +583,16 @@ outer:
      */
     public void getChars(int start, int end, char[] buffer, int index) {
         if (start >= 0 && start <= end && end <= count) {
-            System.arraycopy(value, start + offset, buffer, index, end - start);
+            if (buffer == null) {
+                throw new NullPointerException("buffer == null");
+            }
+            if (index < 0) {
+                throw new IndexOutOfBoundsException("index < 0");
+            }
+            if (end - start > buffer.length - index) {
+                throw new ArrayIndexOutOfBoundsException("end - start > buffer.length - index");
+            }
+            getCharsNoCheck(start, end, buffer, index);
         } else {
             // We throw StringIndexOutOfBoundsException rather than System.arraycopy's AIOOBE.
             throw startEndAndLength(start, end);
@@ -821,9 +604,7 @@ outer:
      * within the java.lang package only.  The caller is responsible for
      * ensuring that start >= 0 && start <= end && end <= count.
      */
-    void _getChars(int start, int end, char[] buffer, int index) {
-        System.arraycopy(value, start + offset, buffer, index, end - start);
-    }
+    native void getCharsNoCheck(int start, int end, char[] buffer, int index);
 
     @Override public int hashCode() {
         int hash = hashCode;
@@ -831,10 +612,8 @@ outer:
             if (count == 0) {
                 return 0;
             }
-            final int end = count + offset;
-            final char[] chars = value;
-            for (int i = offset; i < end; ++i) {
-                hash = 31*hash + chars[i];
+            for (int i = 0; i < count; ++i) {
+                hash = 31 * hash + charAt(i);
             }
             hashCode = hash;
         }
@@ -893,21 +672,17 @@ outer:
             if (subCount > _count) {
                 return -1;
             }
-            char[] target = string.value;
-            int subOffset = string.offset;
-            char firstChar = target[subOffset];
-            int end = subOffset + subCount;
+            char firstChar = string.charAt(0);
             while (true) {
                 int i = indexOf(firstChar, start);
                 if (i == -1 || subCount + i > _count) {
                     return -1; // handles subCount > count || start >= count
                 }
-                int o1 = offset + i, o2 = subOffset;
-                char[] _value = value;
-                while (++o2 < end && _value[++o1] == target[o2]) {
+                int o1 = i, o2 = 0;
+                while (++o2 < subCount && charAt(++o1) == string.charAt(o2)) {
                     // Intentionally empty
                 }
-                if (o2 == end) {
+                if (o2 == subCount) {
                     return i;
                 }
                 start = i + 1;
@@ -934,21 +709,17 @@ outer:
             if (subCount + start > _count) {
                 return -1;
             }
-            char[] target = subString.value;
-            int subOffset = subString.offset;
-            char firstChar = target[subOffset];
-            int end = subOffset + subCount;
+            char firstChar = subString.charAt(0);
             while (true) {
                 int i = indexOf(firstChar, start);
                 if (i == -1 || subCount + i > _count) {
                     return -1; // handles subCount > count || start >= count
                 }
-                int o1 = offset + i, o2 = subOffset;
-                char[] _value = value;
-                while (++o2 < end && _value[++o1] == target[o2]) {
+                int o1 = i, o2 = 0;
+                while (++o2 < subCount && charAt(++o1) == subString.charAt(o2)) {
                     // Intentionally empty
                 }
-                if (o2 == end) {
+                if (o2 == subCount) {
                     return i;
                 }
                 start = i + 1;
@@ -991,11 +762,9 @@ outer:
             return lastIndexOfSupplementary(c, Integer.MAX_VALUE);
         }
         int _count = count;
-        int _offset = offset;
-        char[] _value = value;
-        for (int i = _offset + _count - 1; i >= _offset; --i) {
-            if (_value[i] == c) {
-                return i - _offset;
+        for (int i = _count - 1; i >= 0; --i) {
+            if (charAt(i) == c) {
+                return i;
             }
         }
         return -1;
@@ -1011,15 +780,13 @@ outer:
             return lastIndexOfSupplementary(c, start);
         }
         int _count = count;
-        int _offset = offset;
-        char[] _value = value;
         if (start >= 0) {
             if (start >= _count) {
                 start = _count - 1;
             }
-            for (int i = _offset + start; i >= _offset; --i) {
-                if (_value[i] == c) {
-                    return i - _offset;
+            for (int i = start; i >= 0; --i) {
+                if (charAt(i) == c) {
+                    return i;
                 }
             }
         }
@@ -1031,7 +798,7 @@ outer:
             return -1;
         }
         char[] chars = Character.toChars(c);
-        String needle = new String(0, chars.length, chars);
+        String needle = StringFactory.newStringFromChars(0, chars.length, chars);
         return lastIndexOf(needle, start);
     }
 
@@ -1065,20 +832,17 @@ outer:
                     start = count - subCount;
                 }
                 // count and subCount are both >= 1
-                char[] target = subString.value;
-                int subOffset = subString.offset;
-                char firstChar = target[subOffset];
-                int end = subOffset + subCount;
+                char firstChar = subString.charAt(0);
                 while (true) {
                     int i = lastIndexOf(firstChar, start);
                     if (i == -1) {
                         return -1;
                     }
-                    int o1 = offset + i, o2 = subOffset;
-                    while (++o2 < end && value[++o1] == target[o2]) {
+                    int o1 = i, o2 = 0;
+                    while (++o2 < subCount && charAt(++o1) == subString.charAt(o2)) {
                         // Intentionally empty
                     }
-                    if (o2 == end) {
+                    if (o2 == subCount) {
                         return i;
                     }
                     start = i - 1;
@@ -1121,11 +885,8 @@ outer:
         if (length <= 0) {
             return true;
         }
-        int o1 = offset + thisStart, o2 = string.offset + start;
-        char[] value1 = value;
-        char[] value2 = string.value;
         for (int i = 0; i < length; ++i) {
-            if (value1[o1 + i] != value2[o2 + i]) {
+            if (charAt(thisStart + i) != string.charAt(start + i)) {
                 return false;
             }
         }
@@ -1164,13 +925,10 @@ outer:
         if (start < 0 || length > string.count - start) {
             return false;
         }
-        thisStart += offset;
-        start += string.offset;
         int end = thisStart + length;
-        char[] target = string.value;
         while (thisStart < end) {
-            char c1 = value[thisStart++];
-            char c2 = target[start++];
+            char c1 = charAt(thisStart++);
+            char c2 = string.charAt(start++);
             if (c1 != c2 && foldCase(c1) != foldCase(c2)) {
                 return false;
             }
@@ -1182,29 +940,20 @@ outer:
      * Returns a copy of this string after replacing occurrences of the given {@code char} with another.
      */
     public String replace(char oldChar, char newChar) {
-        char[] buffer = value;
-        int _offset = offset;
+        String s = null;
         int _count = count;
-
-        int idx = _offset;
-        int last = _offset + _count;
         boolean copied = false;
-        while (idx < last) {
-            if (buffer[idx] == oldChar) {
+        for (int i = 0; i < _count; ++i) {
+            if (charAt(i) == oldChar) {
                 if (!copied) {
-                    char[] newBuffer = new char[_count];
-                    System.arraycopy(buffer, _offset, newBuffer, 0, _count);
-                    buffer = newBuffer;
-                    idx -= _offset;
-                    last -= _offset;
+                    s = StringFactory.newStringFromString(this);
                     copied = true;
                 }
-                buffer[idx] = newChar;
+                s.setCharAt(i, newChar);
             }
-            idx++;
         }
 
-        return copied ? new String(0, count, buffer) : this;
+        return copied ? s : this;
     }
 
     /**
@@ -1241,9 +990,8 @@ outer:
             int resultLength = count + (count + 1) * replacementString.length();
             StringBuilder result = new StringBuilder(resultLength);
             result.append(replacementString);
-            int end = offset + count;
-            for (int i = offset; i != end; ++i) {
-                result.append(value[i]);
+            for (int i = 0; i != count; ++i) {
+                result.append(charAt(i));
                 result.append(replacementString);
             }
             return result.toString();
@@ -1252,15 +1000,21 @@ outer:
         StringBuilder result = new StringBuilder(count);
         int searchStart = 0;
         do {
-            // Copy chars before the match...
-            result.append(value, offset + searchStart, matchStart - searchStart);
+            // Copy characters before the match...
+            // TODO: Perform this faster than one char at a time?
+            for (int i = searchStart; i < matchStart; ++i) {
+                result.append(charAt(i));
+            }
             // Insert the replacement...
             result.append(replacementString);
             // And skip over the match...
             searchStart = matchStart + targetLength;
         } while ((matchStart = indexOf(targetString, searchStart)) != -1);
         // Copy any trailing chars...
-        result.append(value, offset + searchStart, count - searchStart);
+        // TODO: Perform this faster than one char at a time?
+        for (int i = searchStart; i < count; ++i) {
+            result.append(charAt(i));
+        }
         return result.toString();
     }
 
@@ -1308,7 +1062,7 @@ outer:
             return this;
         }
         if (start >= 0 && start <= count) {
-            return new String(offset + start, count - start, value);
+            return fastSubstring(start, count - start);
         }
         throw indexAndLength(start);
     }
@@ -1328,21 +1082,19 @@ outer:
         }
         // Fast range check.
         if (start >= 0 && start <= end && end <= count) {
-            return new String(offset + start, end - start, value);
+            return fastSubstring(start, end - start);
         }
         throw startEndAndLength(start, end);
     }
 
+    private native String fastSubstring(int start, int length);
+
     /**
      * Returns a new {@code char} array containing a copy of the {@code char}s in this string.
      * This is expensive and rarely useful. If you just want to iterate over the {@code char}s in
      * the string, use {@link #charAt} instead.
      */
-    public char[] toCharArray() {
-        char[] buffer = new char[count];
-        System.arraycopy(value, offset, buffer, 0, count);
-        return buffer;
-    }
+    public native char[] toCharArray();
 
     /**
      * Converts this string to lower case, using the rules of the user's default locale.
@@ -1351,7 +1103,7 @@ outer:
      * @return a new lower case string, or {@code this} if it's already all lower case.
      */
     public String toLowerCase() {
-        return CaseMapper.toLowerCase(Locale.getDefault(), this, value, offset, count);
+        return CaseMapper.toLowerCase(Locale.getDefault(), this);
     }
 
     /**
@@ -1368,7 +1120,7 @@ outer:
      * @return a new lower case string, or {@code this} if it's already all lower case.
      */
     public String toLowerCase(Locale locale) {
-        return CaseMapper.toLowerCase(locale, this, value, offset, count);
+        return CaseMapper.toLowerCase(locale, this);
     }
 
     /**
@@ -1386,7 +1138,7 @@ outer:
      * @return a new upper case string, or {@code this} if it's already all upper case.
      */
     public String toUpperCase() {
-        return CaseMapper.toUpperCase(Locale.getDefault(), this, value, offset, count);
+        return CaseMapper.toUpperCase(Locale.getDefault(), this, count);
     }
 
     /**
@@ -1403,7 +1155,7 @@ outer:
      * @return a new upper case string, or {@code this} if it's already all upper case.
      */
     public String toUpperCase(Locale locale) {
-        return CaseMapper.toUpperCase(locale, this, value, offset, count);
+        return CaseMapper.toUpperCase(locale, this, count);
     }
 
     /**
@@ -1411,18 +1163,18 @@ outer:
      * the beginning or end.
      */
     public String trim() {
-        int start = offset, last = offset + count - 1;
+        int start = 0, last = count - 1;
         int end = last;
-        while ((start <= end) && (value[start] <= ' ')) {
+        while ((start <= end) && (charAt(start) <= ' ')) {
             start++;
         }
-        while ((end >= start) && (value[end] <= ' ')) {
+        while ((end >= start) && (charAt(end) <= ' ')) {
             end--;
         }
-        if (start == offset && end == last) {
+        if (start == 0 && end == last) {
             return this;
         }
-        return new String(start, end - start + 1, value);
+        return fastSubstring(start, end - start + 1);
     }
 
     /**
@@ -1434,7 +1186,7 @@ outer:
      *             if {@code data} is {@code null}.
      */
     public static String valueOf(char[] data) {
-        return new String(data, 0, data.length);
+        return StringFactory.newStringFromChars(data, 0, data.length);
     }
 
     /**
@@ -1448,7 +1200,7 @@ outer:
      *             if {@code data} is {@code null}.
      */
     public static String valueOf(char[] data, int start, int length) {
-        return new String(data, start, length);
+        return StringFactory.newStringFromChars(data, start, length);
     }
 
     /**
@@ -1457,9 +1209,9 @@ outer:
     public static String valueOf(char value) {
         String s;
         if (value < 128) {
-            s = new String(value, 1, ASCII);
+            s = StringFactory.newStringFromChars(value, 1, ASCII);
         } else {
-            s = new String(0, 1, new char[] { value });
+            s = StringFactory.newStringFromChars(0, 1, new char[] { value });
         }
         s.hashCode = value;
         return s;
@@ -1533,7 +1285,8 @@ outer:
             if (count != size) {
                 return false;
             }
-            return regionMatches(0, new String(0, size, sb.getValue()), 0, size);
+            String s = StringFactory.newStringFromChars(0, size, sb.getValue());
+            return regionMatches(0, s, 0, size);
         }
     }
 
@@ -1682,7 +1435,7 @@ outer:
         if (index < 0 || index >= count) {
             throw indexAndLength(index);
         }
-        return Character.codePointAt(value, offset + index, offset + count);
+        return Character.codePointAt(this, index);
     }
 
     /**
@@ -1696,7 +1449,7 @@ outer:
         if (index < 1 || index > count) {
             throw indexAndLength(index);
         }
-        return Character.codePointBefore(value, offset + index, offset);
+        return Character.codePointBefore(this, index);
     }
 
     /**
@@ -1717,7 +1470,7 @@ outer:
         if (start < 0 || end > count || start > end) {
             throw startEndAndLength(start, end);
         }
-        return Character.codePointCount(value, offset + start, end - start);
+        return Character.codePointCount(this, start, end);
     }
 
     /**
@@ -1748,9 +1501,7 @@ outer:
      * @since 1.5
      */
     public int offsetByCodePoints(int index, int codePointOffset) {
-        int s = index + offset;
-        int r = Character.offsetByCodePoints(value, offset, count, s, codePointOffset);
-        return r - offset;
+        return Character.offsetByCodePoints(this, index, codePointOffset);
     }
 
     /**
@@ -1816,31 +1567,26 @@ outer:
     @SuppressWarnings("unused")
     private static int indexOf(String haystackString, String needleString,
             int cache, int md2, char lastChar) {
-        char[] haystack = haystackString.value;
-        int haystackOffset = haystackString.offset;
         int haystackLength = haystackString.count;
-        char[] needle = needleString.value;
-        int needleOffset = needleString.offset;
         int needleLength = needleString.count;
         int needleLengthMinus1 = needleLength - 1;
-        int haystackEnd = haystackOffset + haystackLength;
-        outer_loop: for (int i = haystackOffset + needleLengthMinus1; i < haystackEnd;) {
-            if (lastChar == haystack[i]) {
+        outer_loop: for (int i = needleLengthMinus1; i < haystackLength;) {
+            if (lastChar == haystackString.charAt(i)) {
                 for (int j = 0; j < needleLengthMinus1; ++j) {
-                    if (needle[j + needleOffset] != haystack[i + j
-                            - needleLengthMinus1]) {
+                    if (needleString.charAt(j) !=
+                            haystackString.charAt(i + j - needleLengthMinus1)) {
                         int skip = 1;
-                        if ((cache & (1 << haystack[i])) == 0) {
+                        if ((cache & (1 << haystackString.charAt(i))) == 0) {
                             skip += j;
                         }
                         i += Math.max(md2, skip);
                         continue outer_loop;
                     }
                 }
-                return i - needleLengthMinus1 - haystackOffset;
+                return i - needleLengthMinus1;
             }
 
-            if ((cache & (1 << haystack[i])) == 0) {
+            if ((cache & (1 << haystackString.charAt(i))) == 0) {
                 i += needleLengthMinus1;
             }
             i++;
diff --git a/libart/src/main/java/java/lang/StringFactory.java b/libart/src/main/java/java/lang/StringFactory.java
new file mode 100644
index 0000000..4fc3eba
--- /dev/null
+++ b/libart/src/main/java/java/lang/StringFactory.java
@@ -0,0 +1,251 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package java.lang;
+
+import java.io.Serializable;
+import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.util.Comparator;
+import libcore.util.CharsetUtils;
+import libcore.util.EmptyArray;
+
+/**
+ * Class used to generate strings instead of calling String.&lt;init>.
+ *
+ * @hide
+ */
+public final class StringFactory {
+
+    // TODO: Remove once native methods are in place.
+    private static final char REPLACEMENT_CHAR = (char) 0xfffd;
+
+    public static String newEmptyString() {
+        return newStringFromChars(EmptyArray.CHAR, 0, 0);
+    }
+
+    public static String newStringFromBytes(byte[] data) {
+        return newStringFromBytes(data, 0, data.length);
+    }
+
+    public static String newStringFromBytes(byte[] data, int high) {
+        return newStringFromBytes(data, high, 0, data.length);
+    }
+
+    public static String newStringFromBytes(byte[] data, int offset, int byteCount) {
+        return newStringFromBytes(data, offset, byteCount, Charset.defaultCharset());
+    }
+
+    public static native String newStringFromBytes(byte[] data, int high, int offset, int byteCount);
+
+    public static String newStringFromBytes(byte[] data, int offset, int byteCount, String charsetName) throws UnsupportedEncodingException {
+        return newStringFromBytes(data, offset, byteCount, Charset.forNameUEE(charsetName));
+    }
+
+    public static String newStringFromBytes(byte[] data, String charsetName) throws UnsupportedEncodingException {
+        return newStringFromBytes(data, 0, data.length, Charset.forNameUEE(charsetName));
+    }
+
+    // TODO: Implement this method natively.
+    public static String newStringFromBytes(byte[] data, int offset, int byteCount, Charset charset) {
+        if ((offset | byteCount) < 0 || byteCount > data.length - offset) {
+            throw new StringIndexOutOfBoundsException(data.length, offset, byteCount);
+        }
+
+        char[] value;
+        int length;
+
+        // We inline UTF-8, ISO-8859-1, and US-ASCII decoders for speed.
+        String canonicalCharsetName = charset.name();
+        if (canonicalCharsetName.equals("UTF-8")) {
+            byte[] d = data;
+            char[] v = new char[byteCount];
+
+            int idx = offset;
+            int last = offset + byteCount;
+            int s = 0;
+outer:
+            while (idx < last) {
+                byte b0 = d[idx++];
+                if ((b0 & 0x80) == 0) {
+                    // 0xxxxxxx
+                    // Range:  U-00000000 - U-0000007F
+                    int val = b0 & 0xff;
+                    v[s++] = (char) val;
+                } else if (((b0 & 0xe0) == 0xc0) || ((b0 & 0xf0) == 0xe0) ||
+                        ((b0 & 0xf8) == 0xf0) || ((b0 & 0xfc) == 0xf8) || ((b0 & 0xfe) == 0xfc)) {
+                    int utfCount = 1;
+                    if ((b0 & 0xf0) == 0xe0) utfCount = 2;
+                    else if ((b0 & 0xf8) == 0xf0) utfCount = 3;
+                    else if ((b0 & 0xfc) == 0xf8) utfCount = 4;
+                    else if ((b0 & 0xfe) == 0xfc) utfCount = 5;
+
+                    // 110xxxxx (10xxxxxx)+
+                    // Range:  U-00000080 - U-000007FF (count == 1)
+                    // Range:  U-00000800 - U-0000FFFF (count == 2)
+                    // Range:  U-00010000 - U-001FFFFF (count == 3)
+                    // Range:  U-00200000 - U-03FFFFFF (count == 4)
+                    // Range:  U-04000000 - U-7FFFFFFF (count == 5)
+
+                    if (idx + utfCount > last) {
+                        v[s++] = REPLACEMENT_CHAR;
+                        continue;
+                    }
+
+                    // Extract usable bits from b0
+                    int val = b0 & (0x1f >> (utfCount - 1));
+                    for (int i = 0; i < utfCount; ++i) {
+                        byte b = d[idx++];
+                        if ((b & 0xc0) != 0x80) {
+                            v[s++] = REPLACEMENT_CHAR;
+                            idx--; // Put the input char back
+                            continue outer;
+                        }
+                        // Push new bits in from the right side
+                        val <<= 6;
+                        val |= b & 0x3f;
+                    }
+
+                    // Note: Java allows overlong char
+                    // specifications To disallow, check that val
+                    // is greater than or equal to the minimum
+                    // value for each count:
+                    //
+                    // count    min value
+                    // -----   ----------
+                    //   1           0x80
+                    //   2          0x800
+                    //   3        0x10000
+                    //   4       0x200000
+                    //   5      0x4000000
+
+                    // Allow surrogate values (0xD800 - 0xDFFF) to
+                    // be specified using 3-byte UTF values only
+                    if ((utfCount != 2) && (val >= 0xD800) && (val <= 0xDFFF)) {
+                        v[s++] = REPLACEMENT_CHAR;
+                        continue;
+                    }
+
+                    // Reject chars greater than the Unicode maximum of U+10FFFF.
+                    if (val > 0x10FFFF) {
+                        v[s++] = REPLACEMENT_CHAR;
+                        continue;
+                    }
+
+                    // Encode chars from U+10000 up as surrogate pairs
+                    if (val < 0x10000) {
+                        v[s++] = (char) val;
+                    } else {
+                        int x = val & 0xffff;
+                        int u = (val >> 16) & 0x1f;
+                        int w = (u - 1) & 0xffff;
+                        int hi = 0xd800 | (w << 6) | (x >> 10);
+                        int lo = 0xdc00 | (x & 0x3ff);
+                        v[s++] = (char) hi;
+                        v[s++] = (char) lo;
+                    }
+                } else {
+                    // Illegal values 0x8*, 0x9*, 0xa*, 0xb*, 0xfd-0xff
+                    v[s++] = REPLACEMENT_CHAR;
+                }
+            }
+
+            if (s == byteCount) {
+                // We guessed right, so we can use our temporary array as-is.
+                value = v;
+                length = s;
+            } else {
+                // Our temporary array was too big, so reallocate and copy.
+                value = new char[s];
+                length = s;
+                System.arraycopy(v, 0, value, 0, s);
+            }
+        } else if (canonicalCharsetName.equals("ISO-8859-1")) {
+            value = new char[byteCount];
+            length = byteCount;
+            CharsetUtils.isoLatin1BytesToChars(data, offset, byteCount, value);
+        } else if (canonicalCharsetName.equals("US-ASCII")) {
+            value = new char[byteCount];
+            length = byteCount;
+            CharsetUtils.asciiBytesToChars(data, offset, byteCount, value);
+        } else {
+            CharBuffer cb = charset.decode(ByteBuffer.wrap(data, offset, byteCount));
+            length = cb.length();
+            if (length > 0) {
+                // We could use cb.array() directly, but that would mean we'd have to trust
+                // the CharsetDecoder doesn't hang on to the CharBuffer and mutate it later,
+                // which would break String's immutability guarantee. It would also tend to
+                // mean that we'd be wasting memory because CharsetDecoder doesn't trim the
+                // array. So we copy.
+                value = new char[length];
+                System.arraycopy(cb.array(), 0, value, 0, length);
+            } else {
+                value = EmptyArray.CHAR;
+            }
+        }
+        return newStringFromChars(value, 0, length);
+    }
+
+    public static String newStringFromBytes(byte[] data, Charset charset) {
+        return newStringFromBytes(data, 0, data.length, charset);
+    }
+
+    public static String newStringFromChars(char[] data) {
+        return newStringFromChars(data, 0, data.length);
+    }
+
+    public static String newStringFromChars(char[] data, int offset, int charCount) {
+        if ((offset | charCount) < 0 || charCount > data.length - offset) {
+            throw new StringIndexOutOfBoundsException(data.length, offset, charCount);
+        }
+        return newStringFromChars(offset, charCount, data);
+    }
+
+    static native String newStringFromChars(int offset, int charCount, char[] data);
+
+    public static native String newStringFromString(String toCopy);
+
+    public static String newStringFromStringBuffer(StringBuffer stringBuffer) {
+        synchronized (stringBuffer) {
+            return newStringFromChars(stringBuffer.getValue(), 0, stringBuffer.length());
+        }
+    }
+
+    // TODO: Implement this method natively.
+    public static String newStringFromCodePoints(int[] codePoints, int offset, int count) {
+        if (codePoints == null) {
+            throw new NullPointerException("codePoints == null");
+        }
+        if ((offset | count) < 0 || count > codePoints.length - offset) {
+            throw new StringIndexOutOfBoundsException(codePoints.length, offset, count);
+        }
+        char[] value = new char[count * 2];
+        int end = offset + count;
+        int length = 0;
+        for (int i = offset; i < end; i++) {
+            length += Character.toChars(codePoints[i], value, length);
+        }
+        return newStringFromChars(value, 0, length);
+    }
+
+    public static String newStringFromStringBuilder(StringBuilder stringBuilder) {
+        return newStringFromChars(stringBuilder.getValue(), 0, stringBuilder.length());
+    }
+}
diff --git a/luni/src/main/java/libcore/util/CharsetUtils.java b/luni/src/main/java/libcore/util/CharsetUtils.java
index 2e426c4..5163dba 100644
--- a/luni/src/main/java/libcore/util/CharsetUtils.java
+++ b/luni/src/main/java/libcore/util/CharsetUtils.java
@@ -23,33 +23,33 @@ package libcore.util;
  */
 public final class CharsetUtils {
     /**
-     * Returns a new byte array containing the bytes corresponding to the given characters,
-     * encoded in US-ASCII. Unrepresentable characters are replaced by (byte) '?'.
+     * Returns a new byte array containing the bytes corresponding to the characters in the given
+     * string, encoded in US-ASCII. Unrepresentable characters are replaced by (byte) '?'.
      */
-    public static native byte[] toAsciiBytes(char[] chars, int offset, int length);
+    public static native byte[] toAsciiBytes(String s, int offset, int length);
 
     /**
-     * Returns a new byte array containing the bytes corresponding to the given characters,
-     * encoded in ISO-8859-1. Unrepresentable characters are replaced by (byte) '?'.
+     * Returns a new byte array containing the bytes corresponding to the characters in the given
+     * string, encoded in ISO-8859-1. Unrepresentable characters are replaced by (byte) '?'.
      */
-    public static native byte[] toIsoLatin1Bytes(char[] chars, int offset, int length);
+    public static native byte[] toIsoLatin1Bytes(String s, int offset, int length);
 
     /**
-     * Returns a new byte array containing the bytes corresponding to the given characters,
-     * encoded in UTF-8. All characters are representable in UTF-8.
+     * Returns a new byte array containing the bytes corresponding to the characters in the given
+     * string, encoded in UTF-8. All characters are representable in UTF-8.
      */
-    public static native byte[] toUtf8Bytes(char[] chars, int offset, int length);
+    public static native byte[] toUtf8Bytes(String s, int offset, int length);
 
     /**
-     * Returns a new byte array containing the bytes corresponding to the given characters,
-     * encoded in UTF-16BE. All characters are representable in UTF-16BE.
+     * Returns a new byte array containing the bytes corresponding to the characters in the given
+     * string, encoded in UTF-16BE. All characters are representable in UTF-16BE.
      */
-    public static byte[] toBigEndianUtf16Bytes(char[] chars, int offset, int length) {
+    public static byte[] toBigEndianUtf16Bytes(String s, int offset, int length) {
         byte[] result = new byte[length * 2];
         int end = offset + length;
         int resultIndex = 0;
         for (int i = offset; i < end; ++i) {
-            char ch = chars[i];
+            char ch = s.charAt(i);
             result[resultIndex++] = (byte) (ch >> 8);
             result[resultIndex++] = (byte) ch;
         }
diff --git a/luni/src/main/native/Register.cpp b/luni/src/main/native/Register.cpp
index 0f2d0ad..acc1e4f 100644
--- a/luni/src/main/native/Register.cpp
+++ b/luni/src/main/native/Register.cpp
@@ -69,7 +69,6 @@ jint JNI_OnLoad(JavaVM* vm, void*) {
     REGISTER(register_libcore_io_AsynchronousCloseMonitor);
     REGISTER(register_libcore_io_Memory);
     REGISTER(register_libcore_io_Posix);
-    REGISTER(register_libcore_util_CharsetUtils);
     REGISTER(register_org_apache_harmony_dalvik_NativeTestTarget);
     REGISTER(register_org_apache_harmony_xml_ExpatParser);
     REGISTER(register_sun_misc_Unsafe);
diff --git a/luni/src/main/native/libcore_util_CharsetUtils.cpp b/luni/src/main/native/libcore_util_CharsetUtils.cpp
index 57c8172..e69de29 100644
--- a/luni/src/main/native/libcore_util_CharsetUtils.cpp
+++ b/luni/src/main/native/libcore_util_CharsetUtils.cpp
@@ -1,250 +0,0 @@
-/*
- * Copyright (C) 2010 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#define LOG_TAG "String"
-
-#include "JNIHelp.h"
-#include "JniConstants.h"
-#include "ScopedPrimitiveArray.h"
-#include "jni.h"
-#include "unicode/utf16.h"
-
-#include <string.h>
-
-/**
- * Approximates java.lang.UnsafeByteSequence so we don't have to pay the cost of calling back into
- * Java when converting a char[] to a UTF-8 byte[]. This lets us have UTF-8 conversions slightly
- * faster than ICU for large char[]s without paying for the NIO overhead with small char[]s.
- *
- * We could avoid this by keeping the UTF-8 bytes on the native heap until we're done and only
- * creating a byte[] on the Java heap when we know how big it needs to be, but one shouldn't lie
- * to the garbage collector (nor hide potentially large allocations from it).
- *
- * Because a call to append might require an allocation, it might fail. Callers should always
- * check the return value of append.
- */
-class NativeUnsafeByteSequence {
-public:
-    NativeUnsafeByteSequence(JNIEnv* env)
-        : mEnv(env), mJavaArray(NULL), mRawArray(NULL), mSize(-1), mOffset(0)
-    {
-    }
-
-    ~NativeUnsafeByteSequence() {
-        // Release our pointer to the raw array, copying changes back to the Java heap.
-        if (mRawArray != NULL) {
-            mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, 0);
-        }
-    }
-
-    bool append(jbyte b) {
-        if (mOffset == mSize && !resize(mSize * 2)) {
-            return false;
-        }
-        mRawArray[mOffset++] = b;
-        return true;
-    }
-
-    bool resize(int newSize) {
-        if (newSize == mSize) {
-            return true;
-        }
-
-        // Allocate a new array.
-        jbyteArray newJavaArray = mEnv->NewByteArray(newSize);
-        if (newJavaArray == NULL) {
-            return false;
-        }
-        jbyte* newRawArray = mEnv->GetByteArrayElements(newJavaArray, NULL);
-        if (newRawArray == NULL) {
-            return false;
-        }
-
-        // Copy data out of the old array and then let go of it.
-        // Note that we may be trimming the array.
-        if (mRawArray != NULL) {
-            memcpy(newRawArray, mRawArray, mOffset);
-            mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, JNI_ABORT);
-            mEnv->DeleteLocalRef(mJavaArray);
-        }
-
-        // Point ourselves at the new array.
-        mJavaArray = newJavaArray;
-        mRawArray = newRawArray;
-        mSize = newSize;
-        return true;
-    }
-
-    jbyteArray toByteArray() {
-        // Trim any unused space, if necessary.
-        bool okay = resize(mOffset);
-        return okay ? mJavaArray : NULL;
-    }
-
-private:
-    JNIEnv* mEnv;
-    jbyteArray mJavaArray;
-    jbyte* mRawArray;
-    jint mSize;
-    jint mOffset;
-
-    // Disallow copy and assignment.
-    NativeUnsafeByteSequence(const NativeUnsafeByteSequence&);
-    void operator=(const NativeUnsafeByteSequence&);
-};
-
-static void Charsets_asciiBytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, jint offset, jint length, jcharArray javaChars) {
-    ScopedByteArrayRO bytes(env, javaBytes);
-    if (bytes.get() == NULL) {
-        return;
-    }
-    ScopedCharArrayRW chars(env, javaChars);
-    if (chars.get() == NULL) {
-        return;
-    }
-
-    const jbyte* src = &bytes[offset];
-    jchar* dst = &chars[0];
-    static const jchar REPLACEMENT_CHAR = 0xfffd;
-    for (int i = length - 1; i >= 0; --i) {
-        jchar ch = static_cast<jchar>(*src++ & 0xff);
-        *dst++ = (ch <= 0x7f) ? ch : REPLACEMENT_CHAR;
-    }
-}
-
-static void Charsets_isoLatin1BytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, jint offset, jint length, jcharArray javaChars) {
-    ScopedByteArrayRO bytes(env, javaBytes);
-    if (bytes.get() == NULL) {
-        return;
-    }
-    ScopedCharArrayRW chars(env, javaChars);
-    if (chars.get() == NULL) {
-        return;
-    }
-
-    const jbyte* src = &bytes[offset];
-    jchar* dst = &chars[0];
-    for (int i = length - 1; i >= 0; --i) {
-        *dst++ = static_cast<jchar>(*src++ & 0xff);
-    }
-}
-
-/**
- * Translates the given characters to US-ASCII or ISO-8859-1 bytes, using the fact that
- * Unicode code points between U+0000 and U+007f inclusive are identical to US-ASCII, while
- * U+0000 to U+00ff inclusive are identical to ISO-8859-1.
- */
-static jbyteArray charsToBytes(JNIEnv* env, jcharArray javaChars, jint offset, jint length, jchar maxValidChar) {
-    ScopedCharArrayRO chars(env, javaChars);
-    if (chars.get() == NULL) {
-        return NULL;
-    }
-
-    jbyteArray javaBytes = env->NewByteArray(length);
-    ScopedByteArrayRW bytes(env, javaBytes);
-    if (bytes.get() == NULL) {
-        return NULL;
-    }
-
-    const jchar* src = &chars[offset];
-    jbyte* dst = &bytes[0];
-    for (int i = length - 1; i >= 0; --i) {
-        jchar ch = *src++;
-        if (ch > maxValidChar) {
-            ch = '?';
-        }
-        *dst++ = static_cast<jbyte>(ch);
-    }
-
-    return javaBytes;
-}
-
-static jbyteArray Charsets_toAsciiBytes(JNIEnv* env, jclass, jcharArray javaChars, jint offset, jint length) {
-    return charsToBytes(env, javaChars, offset, length, 0x7f);
-}
-
-static jbyteArray Charsets_toIsoLatin1Bytes(JNIEnv* env, jclass, jcharArray javaChars, jint offset, jint length) {
-    return charsToBytes(env, javaChars, offset, length, 0xff);
-}
-
-static jbyteArray Charsets_toUtf8Bytes(JNIEnv* env, jclass, jcharArray javaChars, jint offset, jint length) {
-    ScopedCharArrayRO chars(env, javaChars);
-    if (chars.get() == NULL) {
-        return NULL;
-    }
-
-    NativeUnsafeByteSequence out(env);
-    if (!out.resize(length)) {
-        return NULL;
-    }
-
-    const int end = offset + length;
-    for (int i = offset; i < end; ++i) {
-        jint ch = chars[i];
-        if (ch < 0x80) {
-            // One byte.
-            if (!out.append(ch)) {
-                return NULL;
-            }
-        } else if (ch < 0x800) {
-            // Two bytes.
-            if (!out.append((ch >> 6) | 0xc0) || !out.append((ch & 0x3f) | 0x80)) {
-                return NULL;
-            }
-        } else if (U16_IS_SURROGATE(ch)) {
-            // A supplementary character.
-            jchar high = (jchar) ch;
-            jchar low = (i + 1 != end) ? chars[i + 1] : 0;
-            if (!U16_IS_SURROGATE_LEAD(high) || !U16_IS_SURROGATE_TRAIL(low)) {
-                if (!out.append('?')) {
-                    return NULL;
-                }
-                continue;
-            }
-            // Now we know we have a *valid* surrogate pair, we can consume the low surrogate.
-            ++i;
-            ch = U16_GET_SUPPLEMENTARY(high, low);
-            // Four bytes.
-            jbyte b1 = (ch >> 18) | 0xf0;
-            jbyte b2 = ((ch >> 12) & 0x3f) | 0x80;
-            jbyte b3 = ((ch >> 6) & 0x3f) | 0x80;
-            jbyte b4 = (ch & 0x3f) | 0x80;
-            if (!out.append(b1) || !out.append(b2) || !out.append(b3) || !out.append(b4)) {
-                return NULL;
-            }
-        } else {
-            // Three bytes.
-            jbyte b1 = (ch >> 12) | 0xe0;
-            jbyte b2 = ((ch >> 6) & 0x3f) | 0x80;
-            jbyte b3 = (ch & 0x3f) | 0x80;
-            if (!out.append(b1) || !out.append(b2) || !out.append(b3)) {
-                return NULL;
-            }
-        }
-    }
-    return out.toByteArray();
-}
-
-static JNINativeMethod gMethods[] = {
-    NATIVE_METHOD(Charsets, asciiBytesToChars, "([BII[C)V"),
-    NATIVE_METHOD(Charsets, isoLatin1BytesToChars, "([BII[C)V"),
-    NATIVE_METHOD(Charsets, toAsciiBytes, "([CII)[B"),
-    NATIVE_METHOD(Charsets, toIsoLatin1Bytes, "([CII)[B"),
-    NATIVE_METHOD(Charsets, toUtf8Bytes, "([CII)[B"),
-};
-void register_libcore_util_CharsetUtils(JNIEnv* env) {
-    jniRegisterNativeMethods(env, "libcore/util/CharsetUtils", gMethods, NELEM(gMethods));
-}
diff --git a/luni/src/main/native/sub.mk b/luni/src/main/native/sub.mk
index a90c683..73ed7cb 100644
--- a/luni/src/main/native/sub.mk
+++ b/luni/src/main/native/sub.mk
@@ -49,7 +49,6 @@ LOCAL_SRC_FILES := \
     libcore_io_AsynchronousCloseMonitor.cpp \
     libcore_io_Memory.cpp \
     libcore_io_Posix.cpp \
-    libcore_util_CharsetUtils.cpp \
     org_apache_harmony_xml_ExpatParser.cpp \
     readlink.cpp \
     sun_misc_Unsafe.cpp \
diff --git a/luni/src/test/java/libcore/java/lang/StringTest.java b/luni/src/test/java/libcore/java/lang/StringTest.java
index bf162e5..bd52e06 100644
--- a/luni/src/test/java/libcore/java/lang/StringTest.java
+++ b/luni/src/test/java/libcore/java/lang/StringTest.java
@@ -173,47 +173,6 @@ public class StringTest extends TestCase {
     }
 
     /**
-     * Tests a widely assumed performance characteristic of String.substring():
-     * that it reuses the original's backing array. Although behavior should be
-     * correct even if this test fails, many applications may suffer
-     * significant performance degradation.
-     */
-    public void testSubstringSharesBackingArray() throws IllegalAccessException {
-        String abcdefghij = "ABCDEFGHIJ";
-        String cdefg = abcdefghij.substring(2, 7);
-        assertSame(getBackingArray(abcdefghij), getBackingArray(cdefg));
-    }
-
-    /**
-     * Tests a widely assumed performance characteristic of string's copy
-     * constructor: that it ensures the backing array is the same length as the
-     * string. Although behavior should be correct even if this test fails,
-     * many applications may suffer significant performance degradation.
-     */
-    public void testStringCopiesAvoidHeapRetention() throws IllegalAccessException {
-        String abcdefghij = "ABCDEFGHIJ";
-        assertSame(getBackingArray(abcdefghij), getBackingArray(new String(abcdefghij)));
-
-        String cdefg = abcdefghij.substring(2, 7);
-        assertSame(getBackingArray(abcdefghij), getBackingArray(cdefg));
-        assertEquals(5, getBackingArray(new String(cdefg)).length);
-    }
-
-    /**
-     * Uses reflection to return the char[] backing the given string. This
-     * returns the actual backing array; which must not be modified.
-     */
-    private char[] getBackingArray(String string) throws IllegalAccessException {
-        for (Field f : String.class.getDeclaredFields()) {
-            if (!Modifier.isStatic(f.getModifiers()) && f.getType() == char[].class) {
-                f.setAccessible(true);
-                return (char[]) f.get(string);
-            }
-        }
-        throw new UnsupportedOperationException("No chars[] field on String!");
-    }
-
-    /**
      * Test that strings interned manually and then later loaded as literals
      * maintain reference equality. http://b/3098960
      */
author	Jeff Hao <jeffhao@google.com>	2014-01-15 13:51:48 -0800
committer	Jeff Hao <jeffhao@google.com>	2015-04-27 17:15:26 -0700
commit	83c7414449bc406b581f0cb81ae06e7bce91403c (patch)
tree	98e6be2303f80a6ac36554e4ae8f4f8ae0d935f1
parent	8b7dbadede97a5166fcddfe6783e89c8957c1830 (diff)
download	libcore-83c7414449bc406b581f0cb81ae06e7bce91403c.zip libcore-83c7414449bc406b581f0cb81ae06e7bce91403c.tar.gz libcore-83c7414449bc406b581f0cb81ae06e7bce91403c.tar.bz2