diff options
author | Jeff Hao <jeffhao@google.com> | 2014-01-15 13:51:48 -0800 |
---|---|---|
committer | Jeff Hao <jeffhao@google.com> | 2015-04-27 17:15:26 -0700 |
commit | 83c7414449bc406b581f0cb81ae06e7bce91403c (patch) | |
tree | 98e6be2303f80a6ac36554e4ae8f4f8ae0d935f1 /libart/src/main/java | |
parent | 8b7dbadede97a5166fcddfe6783e89c8957c1830 (diff) | |
download | libcore-83c7414449bc406b581f0cb81ae06e7bce91403c.zip libcore-83c7414449bc406b581f0cb81ae06e7bce91403c.tar.gz libcore-83c7414449bc406b581f0cb81ae06e7bce91403c.tar.bz2 |
Removed offset and value from String and added StringFactory.
Change-Id: I55314ceb906d0bf7e78545dcd9bc3489a5baf03f
Diffstat (limited to 'libart/src/main/java')
-rw-r--r-- | libart/src/main/java/java/lang/AbstractStringBuilder.java | 886 | ||||
-rw-r--r-- | libart/src/main/java/java/lang/CaseMapper.java | 213 | ||||
-rw-r--r-- | libart/src/main/java/java/lang/String.java | 506 | ||||
-rw-r--r-- | libart/src/main/java/java/lang/StringFactory.java | 251 |
4 files changed, 1476 insertions, 380 deletions
diff --git a/libart/src/main/java/java/lang/AbstractStringBuilder.java b/libart/src/main/java/java/lang/AbstractStringBuilder.java new file mode 100644 index 0000000..c8c8c5a --- /dev/null +++ b/libart/src/main/java/java/lang/AbstractStringBuilder.java @@ -0,0 +1,886 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package java.lang; + +import libcore.util.EmptyArray; + +import java.io.InvalidObjectException; +import java.util.Arrays; + +/** + * A modifiable {@link CharSequence sequence of characters} for use in creating + * and modifying Strings. This class is intended as a base class for + * {@link StringBuffer} and {@link StringBuilder}. + * + * @see StringBuffer + * @see StringBuilder + * @since 1.5 + */ +abstract class AbstractStringBuilder { + + static final int INITIAL_CAPACITY = 16; + + private char[] value; + + private int count; + + private boolean shared; + + /* + * Returns the character array. + */ + final char[] getValue() { + return value; + } + + /* + * Returns the underlying buffer and sets the shared flag. + */ + final char[] shareValue() { + shared = true; + return value; + } + + /* + * Restores internal state after deserialization. + */ + final void set(char[] val, int len) throws InvalidObjectException { + if (val == null) { + val = EmptyArray.CHAR; + } + if (val.length < len) { + throw new InvalidObjectException("count out of range"); + } + + shared = false; + value = val; + count = len; + } + + AbstractStringBuilder() { + value = new char[INITIAL_CAPACITY]; + } + + AbstractStringBuilder(int capacity) { + if (capacity < 0) { + throw new NegativeArraySizeException(Integer.toString(capacity)); + } + value = new char[capacity]; + } + + AbstractStringBuilder(String string) { + count = string.length(); + shared = false; + value = new char[count + INITIAL_CAPACITY]; + string.getCharsNoCheck(0, count, value, 0); + } + + private void enlargeBuffer(int min) { + int newCount = ((value.length >> 1) + value.length) + 2; + char[] newData = new char[min > newCount ? min : newCount]; + System.arraycopy(value, 0, newData, 0, count); + value = newData; + shared = false; + } + + final void appendNull() { + int newCount = count + 4; + if (newCount > value.length) { + enlargeBuffer(newCount); + } + value[count++] = 'n'; + value[count++] = 'u'; + value[count++] = 'l'; + value[count++] = 'l'; + } + + final void append0(char[] chars) { + int newCount = count + chars.length; + if (newCount > value.length) { + enlargeBuffer(newCount); + } + System.arraycopy(chars, 0, value, count, chars.length); + count = newCount; + } + + final void append0(char[] chars, int offset, int length) { + Arrays.checkOffsetAndCount(chars.length, offset, length); + int newCount = count + length; + if (newCount > value.length) { + enlargeBuffer(newCount); + } + System.arraycopy(chars, offset, value, count, length); + count = newCount; + } + + final void append0(char ch) { + if (count == value.length) { + enlargeBuffer(count + 1); + } + value[count++] = ch; + } + + final void append0(String string) { + if (string == null) { + appendNull(); + return; + } + int length = string.length(); + int newCount = count + length; + if (newCount > value.length) { + enlargeBuffer(newCount); + } + string.getCharsNoCheck(0, length, value, count); + count = newCount; + } + + final void append0(CharSequence s, int start, int end) { + if (s == null) { + s = "null"; + } + if ((start | end) < 0 || start > end || end > s.length()) { + throw new IndexOutOfBoundsException(); + } + + int length = end - start; + int newCount = count + length; + if (newCount > value.length) { + enlargeBuffer(newCount); + } else if (shared) { + value = value.clone(); + shared = false; + } + + if (s instanceof String) { + ((String) s).getCharsNoCheck(start, end, value, count); + } else if (s instanceof AbstractStringBuilder) { + AbstractStringBuilder other = (AbstractStringBuilder) s; + System.arraycopy(other.value, start, value, count, length); + } else { + int j = count; // Destination index. + for (int i = start; i < end; i++) { + value[j++] = s.charAt(i); + } + } + + this.count = newCount; + } + + /** + * Returns the number of characters that can be held without growing. + * + * @return the capacity + * @see #ensureCapacity + * @see #length + */ + public int capacity() { + return value.length; + } + + /** + * Returns the character at {@code index}. + * @throws IndexOutOfBoundsException if {@code index < 0} or {@code index >= length()}. + */ + public char charAt(int index) { + if (index < 0 || index >= count) { + throw indexAndLength(index); + } + return value[index]; + } + + private StringIndexOutOfBoundsException indexAndLength(int index) { + throw new StringIndexOutOfBoundsException(count, index); + } + + private StringIndexOutOfBoundsException startEndAndLength(int start, int end) { + throw new StringIndexOutOfBoundsException(count, start, end - start); + } + + final void delete0(int start, int end) { + // NOTE: StringBuilder#delete(int, int) is specified not to throw if + // the end index is >= count, as long as it's >= start. This means + // we have to clamp it to count here. + if (end > count) { + end = count; + } + + if (start < 0 || start > count || start > end) { + throw startEndAndLength(start, end); + } + + // NOTE: StringBuilder#delete(int, int) throws only if start > count + // (start == count is considered valid, oddly enough). Since 'end' is + // already a clamped value, that case is handled here. + if (end == start) { + return; + } + + // At this point we know for sure that end > start. + int length = count - end; + if (length >= 0) { + if (!shared) { + System.arraycopy(value, end, value, start, length); + } else { + char[] newData = new char[value.length]; + System.arraycopy(value, 0, newData, 0, start); + System.arraycopy(value, end, newData, start, length); + value = newData; + shared = false; + } + } + count -= end - start; + } + + final void deleteCharAt0(int index) { + if (index < 0 || index >= count) { + throw indexAndLength(index); + } + + delete0(index, index + 1); + } + + /** + * Ensures that this object has a minimum capacity available before + * requiring the internal buffer to be enlarged. The general policy of this + * method is that if the {@code minimumCapacity} is larger than the current + * {@link #capacity()}, then the capacity will be increased to the largest + * value of either the {@code minimumCapacity} or the current capacity + * multiplied by two plus two. Although this is the general policy, there is + * no guarantee that the capacity will change. + * + * @param min + * the new minimum capacity to set. + */ + public void ensureCapacity(int min) { + if (min > value.length) { + int ourMin = value.length*2 + 2; + enlargeBuffer(Math.max(ourMin, min)); + } + } + + /** + * Copies the requested sequence of characters into {@code dst} passed + * starting at {@code dst}. + * + * @param start + * the inclusive start index of the characters to copy. + * @param end + * the exclusive end index of the characters to copy. + * @param dst + * the {@code char[]} to copy the characters to. + * @param dstStart + * the inclusive start index of {@code dst} to begin copying to. + * @throws IndexOutOfBoundsException + * if the {@code start} is negative, the {@code dstStart} is + * negative, the {@code start} is greater than {@code end}, the + * {@code end} is greater than the current {@link #length()} or + * {@code dstStart + end - begin} is greater than + * {@code dst.length}. + */ + public void getChars(int start, int end, char[] dst, int dstStart) { + if (start > count || end > count || start > end) { + throw startEndAndLength(start, end); + } + System.arraycopy(value, start, dst, dstStart, end - start); + } + + final void insert0(int index, char[] chars) { + if (index < 0 || index > count) { + throw indexAndLength(index); + } + if (chars.length != 0) { + move(chars.length, index); + System.arraycopy(chars, 0, value, index, chars.length); + count += chars.length; + } + } + + final void insert0(int index, char[] chars, int start, int length) { + if (index >= 0 && index <= count) { + // start + length could overflow, start/length maybe MaxInt + if (start >= 0 && length >= 0 && length <= chars.length - start) { + if (length != 0) { + move(length, index); + System.arraycopy(chars, start, value, index, length); + count += length; + } + return; + } + } + throw new StringIndexOutOfBoundsException("this.length=" + count + + "; index=" + index + "; chars.length=" + chars.length + + "; start=" + start + "; length=" + length); + } + + final void insert0(int index, char ch) { + if (index < 0 || index > count) { + // RI compatible exception type + throw new ArrayIndexOutOfBoundsException(count, index); + } + move(1, index); + value[index] = ch; + count++; + } + + final void insert0(int index, String string) { + if (index >= 0 && index <= count) { + if (string == null) { + string = "null"; + } + int min = string.length(); + if (min != 0) { + move(min, index); + string.getCharsNoCheck(0, min, value, index); + count += min; + } + } else { + throw indexAndLength(index); + } + } + + final void insert0(int index, CharSequence s, int start, int end) { + if (s == null) { + s = "null"; + } + if ((index | start | end) < 0 || index > count || start > end || end > s.length()) { + throw new IndexOutOfBoundsException(); + } + insert0(index, s.subSequence(start, end).toString()); + } + + /** + * The current length. + * + * @return the number of characters contained in this instance. + */ + public int length() { + return count; + } + + private void move(int size, int index) { + int newCount; + if (value.length - count >= size) { + if (!shared) { + // index == count case is no-op + System.arraycopy(value, index, value, index + size, count - index); + return; + } + newCount = value.length; + } else { + newCount = Math.max(count + size, value.length*2 + 2); + } + + char[] newData = new char[newCount]; + System.arraycopy(value, 0, newData, 0, index); + // index == count case is no-op + System.arraycopy(value, index, newData, index + size, count - index); + value = newData; + shared = false; + } + + final void replace0(int start, int end, String string) { + if (start >= 0) { + if (end > count) { + end = count; + } + if (end > start) { + int stringLength = string.length(); + int diff = end - start - stringLength; + if (diff > 0) { // replacing with fewer characters + if (!shared) { + // index == count case is no-op + System.arraycopy(value, end, value, start + + stringLength, count - end); + } else { + char[] newData = new char[value.length]; + System.arraycopy(value, 0, newData, 0, start); + // index == count case is no-op + System.arraycopy(value, end, newData, start + + stringLength, count - end); + value = newData; + shared = false; + } + } else if (diff < 0) { + // replacing with more characters...need some room + move(-diff, end); + } else if (shared) { + value = value.clone(); + shared = false; + } + string.getCharsNoCheck(0, stringLength, value, start); + count -= diff; + return; + } + if (start == end) { + if (string == null) { + throw new NullPointerException("string == null"); + } + insert0(start, string); + return; + } + } + throw startEndAndLength(start, end); + } + + final void reverse0() { + if (count < 2) { + return; + } + if (!shared) { + int end = count - 1; + char frontHigh = value[0]; + char endLow = value[end]; + boolean allowFrontSur = true, allowEndSur = true; + for (int i = 0, mid = count / 2; i < mid; i++, --end) { + char frontLow = value[i + 1]; + char endHigh = value[end - 1]; + boolean surAtFront = allowFrontSur && frontLow >= 0xdc00 + && frontLow <= 0xdfff && frontHigh >= 0xd800 + && frontHigh <= 0xdbff; + if (surAtFront && (count < 3)) { + return; + } + boolean surAtEnd = allowEndSur && endHigh >= 0xd800 + && endHigh <= 0xdbff && endLow >= 0xdc00 + && endLow <= 0xdfff; + allowFrontSur = allowEndSur = true; + if (surAtFront == surAtEnd) { + if (surAtFront) { + // both surrogates + value[end] = frontLow; + value[end - 1] = frontHigh; + value[i] = endHigh; + value[i + 1] = endLow; + frontHigh = value[i + 2]; + endLow = value[end - 2]; + i++; + end--; + } else { + // neither surrogates + value[end] = frontHigh; + value[i] = endLow; + frontHigh = frontLow; + endLow = endHigh; + } + } else { + if (surAtFront) { + // surrogate only at the front + value[end] = frontLow; + value[i] = endLow; + endLow = endHigh; + allowFrontSur = false; + } else { + // surrogate only at the end + value[end] = frontHigh; + value[i] = endHigh; + frontHigh = frontLow; + allowEndSur = false; + } + } + } + if ((count & 1) == 1 && (!allowFrontSur || !allowEndSur)) { + value[end] = allowFrontSur ? endLow : frontHigh; + } + } else { + char[] newData = new char[value.length]; + for (int i = 0, end = count; i < count; i++) { + char high = value[i]; + if ((i + 1) < count && high >= 0xd800 && high <= 0xdbff) { + char low = value[i + 1]; + if (low >= 0xdc00 && low <= 0xdfff) { + newData[--end] = low; + i++; + } + } + newData[--end] = high; + } + value = newData; + shared = false; + } + } + + /** + * Sets the character at the {@code index}. + * + * @param index + * the zero-based index of the character to replace. + * @param ch + * the character to set. + * @throws IndexOutOfBoundsException + * if {@code index} is negative or greater than or equal to the + * current {@link #length()}. + */ + public void setCharAt(int index, char ch) { + if (index < 0 || index >= count) { + throw indexAndLength(index); + } + if (shared) { + value = value.clone(); + shared = false; + } + value[index] = ch; + } + + /** + * Sets the current length to a new value. If the new length is larger than + * the current length, then the new characters at the end of this object + * will contain the {@code char} value of {@code \u0000}. + * + * @param length + * the new length of this StringBuffer. + * @throws IndexOutOfBoundsException + * if {@code length < 0}. + * @see #length + */ + public void setLength(int length) { + if (length < 0) { + throw new StringIndexOutOfBoundsException("length < 0: " + length); + } + if (length > value.length) { + enlargeBuffer(length); + } else { + if (shared) { + char[] newData = new char[value.length]; + System.arraycopy(value, 0, newData, 0, count); + value = newData; + shared = false; + } else { + if (count < length) { + Arrays.fill(value, count, length, (char) 0); + } + } + } + count = length; + } + + /** + * Returns the String value of the subsequence from the {@code start} index + * to the current end. + * + * @param start + * the inclusive start index to begin the subsequence. + * @return a String containing the subsequence. + * @throws StringIndexOutOfBoundsException + * if {@code start} is negative or greater than the current + * {@link #length()}. + */ + public String substring(int start) { + if (start >= 0 && start <= count) { + if (start == count) { + return ""; + } + + // Remove String sharing for more performance + return new String(value, start, count - start); + } + throw indexAndLength(start); + } + + /** + * Returns the String value of the subsequence from the {@code start} index + * to the {@code end} index. + * + * @param start + * the inclusive start index to begin the subsequence. + * @param end + * the exclusive end index to end the subsequence. + * @return a String containing the subsequence. + * @throws StringIndexOutOfBoundsException + * if {@code start} is negative, greater than {@code end} or if + * {@code end} is greater than the current {@link #length()}. + */ + public String substring(int start, int end) { + if (start >= 0 && start <= end && end <= count) { + if (start == end) { + return ""; + } + + // Remove String sharing for more performance + return new String(value, start, end - start); + } + throw startEndAndLength(start, end); + } + + /** + * Returns the current String representation. + * + * @return a String containing the characters in this instance. + */ + @Override + public String toString() { + if (count == 0) { + return ""; + } + return StringFactory.newStringFromChars(0, count, value); + } + + /** + * Returns a {@code CharSequence} of the subsequence from the {@code start} + * index to the {@code end} index. + * + * @param start + * the inclusive start index to begin the subsequence. + * @param end + * the exclusive end index to end the subsequence. + * @return a CharSequence containing the subsequence. + * @throws IndexOutOfBoundsException + * if {@code start} is negative, greater than {@code end} or if + * {@code end} is greater than the current {@link #length()}. + * @since 1.4 + */ + public CharSequence subSequence(int start, int end) { + return substring(start, end); + } + + /** + * Searches for the first index of the specified character. The search for + * the character starts at the beginning and moves towards the end. + * + * @param string + * the string to find. + * @return the index of the specified character, -1 if the character isn't + * found. + * @see #lastIndexOf(String) + * @since 1.4 + */ + public int indexOf(String string) { + return indexOf(string, 0); + } + + /** + * Searches for the index of the specified character. The search for the + * character starts at the specified offset and moves towards the end. + * + * @param subString + * the string to find. + * @param start + * the starting offset. + * @return the index of the specified character, -1 if the character isn't + * found + * @see #lastIndexOf(String,int) + * @since 1.4 + */ + public int indexOf(String subString, int start) { + if (start < 0) { + start = 0; + } + int subCount = subString.length(); + if (subCount > 0) { + if (subCount + start > count) { + return -1; + } + // TODO optimize charAt to direct array access + char firstChar = subString.charAt(0); + while (true) { + int i = start; + boolean found = false; + for (; i < count; i++) { + if (value[i] == firstChar) { + found = true; + break; + } + } + if (!found || subCount + i > count) { + return -1; // handles subCount > count || start >= count + } + int o1 = i, o2 = 0; + while (++o2 < subCount && value[++o1] == subString.charAt(o2)) { + // Intentionally empty + } + if (o2 == subCount) { + return i; + } + start = i + 1; + } + } + return (start < count || start == 0) ? start : count; + } + + /** + * Searches for the last index of the specified character. The search for + * the character starts at the end and moves towards the beginning. + * + * @param string + * the string to find. + * @return the index of the specified character, -1 if the character isn't + * found. + * @throws NullPointerException + * if {@code string} is {@code null}. + * @see String#lastIndexOf(java.lang.String) + * @since 1.4 + */ + public int lastIndexOf(String string) { + return lastIndexOf(string, count); + } + + /** + * Searches for the index of the specified character. The search for the + * character starts at the specified offset and moves towards the beginning. + * + * @param subString + * the string to find. + * @param start + * the starting offset. + * @return the index of the specified character, -1 if the character isn't + * found. + * @throws NullPointerException + * if {@code subString} is {@code null}. + * @see String#lastIndexOf(String,int) + * @since 1.4 + */ + public int lastIndexOf(String subString, int start) { + int subCount = subString.length(); + if (subCount <= count && start >= 0) { + if (subCount > 0) { + if (start > count - subCount) { + start = count - subCount; // count and subCount are both + } + // >= 1 + // TODO optimize charAt to direct array access + char firstChar = subString.charAt(0); + while (true) { + int i = start; + boolean found = false; + for (; i >= 0; --i) { + if (value[i] == firstChar) { + found = true; + break; + } + } + if (!found) { + return -1; + } + int o1 = i, o2 = 0; + while (++o2 < subCount + && value[++o1] == subString.charAt(o2)) { + // Intentionally empty + } + if (o2 == subCount) { + return i; + } + start = i - 1; + } + } + return start < count ? start : count; + } + return -1; + } + + /** + * Trims off any extra capacity beyond the current length. Note, this method + * is NOT guaranteed to change the capacity of this object. + * + * @since 1.5 + */ + public void trimToSize() { + if (count < value.length) { + char[] newValue = new char[count]; + System.arraycopy(value, 0, newValue, 0, count); + value = newValue; + shared = false; + } + } + + /** + * Retrieves the Unicode code point value at the {@code index}. + * + * @param index + * the index to the {@code char} code unit. + * @return the Unicode code point value. + * @throws IndexOutOfBoundsException + * if {@code index} is negative or greater than or equal to + * {@link #length()}. + * @see Character + * @see Character#codePointAt(char[], int, int) + * @since 1.5 + */ + public int codePointAt(int index) { + if (index < 0 || index >= count) { + throw indexAndLength(index); + } + return Character.codePointAt(value, index, count); + } + + /** + * Retrieves the Unicode code point value that precedes the {@code index}. + * + * @param index + * the index to the {@code char} code unit within this object. + * @return the Unicode code point value. + * @throws IndexOutOfBoundsException + * if {@code index} is less than 1 or greater than + * {@link #length()}. + * @see Character + * @see Character#codePointBefore(char[], int, int) + * @since 1.5 + */ + public int codePointBefore(int index) { + if (index < 1 || index > count) { + throw indexAndLength(index); + } + return Character.codePointBefore(value, index); + } + + /** + * Calculates the number of Unicode code points between {@code start} + * and {@code end}. + * + * @param start + * the inclusive beginning index of the subsequence. + * @param end + * the exclusive end index of the subsequence. + * @return the number of Unicode code points in the subsequence. + * @throws IndexOutOfBoundsException + * if {@code start} is negative or greater than + * {@code end} or {@code end} is greater than + * {@link #length()}. + * @see Character + * @see Character#codePointCount(char[], int, int) + * @since 1.5 + */ + public int codePointCount(int start, int end) { + if (start < 0 || end > count || start > end) { + throw startEndAndLength(start, end); + } + return Character.codePointCount(value, start, end - start); + } + + /** + * Returns the index that is offset {@code codePointOffset} code points from + * {@code index}. + * + * @param index + * the index to calculate the offset from. + * @param codePointOffset + * the number of code points to count. + * @return the index that is {@code codePointOffset} code points away from + * index. + * @throws IndexOutOfBoundsException + * if {@code index} is negative or greater than + * {@link #length()} or if there aren't enough code points + * before or after {@code index} to match + * {@code codePointOffset}. + * @see Character + * @see Character#offsetByCodePoints(char[], int, int, int, int) + * @since 1.5 + */ + public int offsetByCodePoints(int index, int codePointOffset) { + return Character.offsetByCodePoints(value, 0, count, index, + codePointOffset); + } +} diff --git a/libart/src/main/java/java/lang/CaseMapper.java b/libart/src/main/java/java/lang/CaseMapper.java new file mode 100644 index 0000000..f23a4ef --- /dev/null +++ b/libart/src/main/java/java/lang/CaseMapper.java @@ -0,0 +1,213 @@ +/* + * Copyright (C) 2010 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package java.lang; + +import java.util.Locale; +import libcore.icu.ICU; +import libcore.icu.Transliterator; + +/** + * Performs case operations as described by http://unicode.org/reports/tr21/tr21-5.html. + */ +class CaseMapper { + private static final char[] upperValues = "SS\u0000\u02bcN\u0000J\u030c\u0000\u0399\u0308\u0301\u03a5\u0308\u0301\u0535\u0552\u0000H\u0331\u0000T\u0308\u0000W\u030a\u0000Y\u030a\u0000A\u02be\u0000\u03a5\u0313\u0000\u03a5\u0313\u0300\u03a5\u0313\u0301\u03a5\u0313\u0342\u1f08\u0399\u0000\u1f09\u0399\u0000\u1f0a\u0399\u0000\u1f0b\u0399\u0000\u1f0c\u0399\u0000\u1f0d\u0399\u0000\u1f0e\u0399\u0000\u1f0f\u0399\u0000\u1f08\u0399\u0000\u1f09\u0399\u0000\u1f0a\u0399\u0000\u1f0b\u0399\u0000\u1f0c\u0399\u0000\u1f0d\u0399\u0000\u1f0e\u0399\u0000\u1f0f\u0399\u0000\u1f28\u0399\u0000\u1f29\u0399\u0000\u1f2a\u0399\u0000\u1f2b\u0399\u0000\u1f2c\u0399\u0000\u1f2d\u0399\u0000\u1f2e\u0399\u0000\u1f2f\u0399\u0000\u1f28\u0399\u0000\u1f29\u0399\u0000\u1f2a\u0399\u0000\u1f2b\u0399\u0000\u1f2c\u0399\u0000\u1f2d\u0399\u0000\u1f2e\u0399\u0000\u1f2f\u0399\u0000\u1f68\u0399\u0000\u1f69\u0399\u0000\u1f6a\u0399\u0000\u1f6b\u0399\u0000\u1f6c\u0399\u0000\u1f6d\u0399\u0000\u1f6e\u0399\u0000\u1f6f\u0399\u0000\u1f68\u0399\u0000\u1f69\u0399\u0000\u1f6a\u0399\u0000\u1f6b\u0399\u0000\u1f6c\u0399\u0000\u1f6d\u0399\u0000\u1f6e\u0399\u0000\u1f6f\u0399\u0000\u1fba\u0399\u0000\u0391\u0399\u0000\u0386\u0399\u0000\u0391\u0342\u0000\u0391\u0342\u0399\u0391\u0399\u0000\u1fca\u0399\u0000\u0397\u0399\u0000\u0389\u0399\u0000\u0397\u0342\u0000\u0397\u0342\u0399\u0397\u0399\u0000\u0399\u0308\u0300\u0399\u0308\u0301\u0399\u0342\u0000\u0399\u0308\u0342\u03a5\u0308\u0300\u03a5\u0308\u0301\u03a1\u0313\u0000\u03a5\u0342\u0000\u03a5\u0308\u0342\u1ffa\u0399\u0000\u03a9\u0399\u0000\u038f\u0399\u0000\u03a9\u0342\u0000\u03a9\u0342\u0399\u03a9\u0399\u0000FF\u0000FI\u0000FL\u0000FFIFFLST\u0000ST\u0000\u0544\u0546\u0000\u0544\u0535\u0000\u0544\u053b\u0000\u054e\u0546\u0000\u0544\u053d\u0000".toCharArray(); + private static final char[] upperValues2 = "\u000b\u0000\f\u0000\r\u0000\u000e\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>\u0000\u0000?@A\u0000BC\u0000\u0000\u0000\u0000D\u0000\u0000\u0000\u0000\u0000EFG\u0000HI\u0000\u0000\u0000\u0000J\u0000\u0000\u0000\u0000\u0000KL\u0000\u0000MN\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000OPQ\u0000RS\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000TUV\u0000WX\u0000\u0000\u0000\u0000Y".toCharArray(); + + private static final char LATIN_CAPITAL_I_WITH_DOT = '\u0130'; + private static final char GREEK_CAPITAL_SIGMA = '\u03a3'; + private static final char GREEK_SMALL_FINAL_SIGMA = '\u03c2'; + + /** + * Our current GC makes short-lived objects more expensive than we'd like. When that's fixed, + * this class should be changed so that you instantiate it with the String and its value, + * and count fields. + */ + private CaseMapper() { + } + + /** + * Implements String.toLowerCase. The original String instance is returned if nothing changes. + */ + public static String toLowerCase(Locale locale, String s) { + // Punt hard cases to ICU4C. + // Note that Greek isn't a particularly hard case for toLowerCase, only toUpperCase. + String languageCode = locale.getLanguage(); + if (languageCode.equals("tr") || languageCode.equals("az") || languageCode.equals("lt")) { + return ICU.toLowerCase(s, locale); + } + + String newString = null; + for (int i = 0, end = s.length(); i < end; ++i) { + char ch = s.charAt(i); + char newCh; + if (ch == LATIN_CAPITAL_I_WITH_DOT || Character.isHighSurrogate(ch)) { + // Punt these hard cases. + return ICU.toLowerCase(s, locale); + } else if (ch == GREEK_CAPITAL_SIGMA && isFinalSigma(s, i)) { + newCh = GREEK_SMALL_FINAL_SIGMA; + } else { + newCh = Character.toLowerCase(ch); + } + if (ch != newCh) { + if (newString == null) { + newString = StringFactory.newStringFromString(s); + } + newString.setCharAt(i, newCh); + } + } + return newString != null ? newString : s; + } + + /** + * True if 'index' is preceded by a sequence consisting of a cased letter and a case-ignorable + * sequence, and 'index' is not followed by a sequence consisting of an ignorable sequence and + * then a cased letter. + */ + private static boolean isFinalSigma(String s, int index) { + // TODO: we don't skip case-ignorable sequences like we should. + // TODO: we should add a more direct way to test for a cased letter. + if (index <= 0) { + return false; + } + char previous = s.charAt(index - 1); + if (!(Character.isLowerCase(previous) || Character.isUpperCase(previous) || Character.isTitleCase(previous))) { + return false; + } + if (index + 1 >= s.length()) { + return true; + } + char next = s.charAt(index + 1); + if (Character.isLowerCase(next) || Character.isUpperCase(next) || Character.isTitleCase(next)) { + return false; + } + return true; + } + + /** + * Return the index of the specified character into the upperValues table. + * The upperValues table contains three entries at each position. These + * three characters are the upper case conversion. If only two characters + * are used, the third character in the table is \u0000. + * @return the index into the upperValues table, or -1 + */ + private static int upperIndex(int ch) { + int index = -1; + if (ch >= 0xdf) { + if (ch <= 0x587) { + switch (ch) { + case 0xdf: return 0; + case 0x149: return 1; + case 0x1f0: return 2; + case 0x390: return 3; + case 0x3b0: return 4; + case 0x587: return 5; + } + } else if (ch >= 0x1e96) { + if (ch <= 0x1e9a) { + index = 6 + ch - 0x1e96; + } else if (ch >= 0x1f50 && ch <= 0x1ffc) { + index = upperValues2[ch - 0x1f50]; + if (index == 0) { + index = -1; + } + } else if (ch >= 0xfb00) { + if (ch <= 0xfb06) { + index = 90 + ch - 0xfb00; + } else if (ch >= 0xfb13 && ch <= 0xfb17) { + index = 97 + ch - 0xfb13; + } + } + } + } + return index; + } + + private static final ThreadLocal<Transliterator> EL_UPPER = new ThreadLocal<Transliterator>() { + @Override protected Transliterator initialValue() { + return new Transliterator("el-Upper"); + } + }; + + public static String toUpperCase(Locale locale, String s, int count) { + String languageCode = locale.getLanguage(); + if (languageCode.equals("tr") || languageCode.equals("az") || languageCode.equals("lt")) { + return ICU.toUpperCase(s, locale); + } + if (languageCode.equals("el")) { + return EL_UPPER.get().transliterate(s); + } + + char[] output = null; + String newString = null; + int i = 0; + for (int o = 0, end = count; o < end; o++) { + char ch = s.charAt(o); + if (Character.isHighSurrogate(ch)) { + return ICU.toUpperCase(s, locale); + } + int index = upperIndex(ch); + if (index == -1) { + if (output != null && i >= output.length) { + char[] newoutput = new char[output.length + (count / 6) + 2]; + System.arraycopy(output, 0, newoutput, 0, output.length); + output = newoutput; + } + char upch = Character.toUpperCase(ch); + if (output != null) { + output[i++] = upch; + } else if (ch != upch) { + if (newString == null) { + newString = StringFactory.newStringFromString(s); + } + newString.setCharAt(o, upch); + } + } else { + int target = index * 3; + char val3 = upperValues[target + 2]; + if (output == null) { + output = new char[count + (count / 6) + 2]; + i = o; + if (newString != null) { + System.arraycopy(newString.toCharArray(), 0, output, 0, i); + } else { + System.arraycopy(s.toCharArray(), 0, output, 0, i); + } + } else if (i + (val3 == 0 ? 1 : 2) >= output.length) { + char[] newoutput = new char[output.length + (count / 6) + 3]; + System.arraycopy(output, 0, newoutput, 0, output.length); + output = newoutput; + } + + char val = upperValues[target]; + output[i++] = val; + val = upperValues[target + 1]; + output[i++] = val; + if (val3 != 0) { + output[i++] = val3; + } + } + } + if (output == null) { + if (newString != null) { + return newString; + } else { + return s; + } + } + return output.length == i || output.length - i < 8 ? new String(0, i, output) : new String(output, 0, i); + } +} diff --git a/libart/src/main/java/java/lang/String.java b/libart/src/main/java/java/lang/String.java index a5bf34c..0875d1a 100644 --- a/libart/src/main/java/java/lang/String.java +++ b/libart/src/main/java/java/lang/String.java @@ -35,23 +35,6 @@ import libcore.util.EmptyArray; * See {@link Character} for details about the relationship between {@code char} and * Unicode code points. * - * <a name="backing_array"><h3>Backing Arrays</h3></a> - * This class is implemented using a {@code char[]}. The length of the array may exceed - * the length of the string. For example, the string "Hello" may be backed by - * the array {@code ['H', 'e', 'l', 'l', 'o', 'W'. 'o', 'r', 'l', 'd']} with - * offset 0 and length 5. - * - * <p>Multiple strings can share the same {@code char[]} because strings are immutable. - * The {@link #substring} method <strong>always</strong> returns a string that - * shares the backing array of its source string. Generally this is an - * optimization: fewer {@code char[]}s need to be allocated, and less copying - * is necessary. But this can also lead to unwanted heap retention. Taking a - * short substring of long string means that the long shared {@code char[]} won't be - * garbage until both strings are garbage. This typically happens when parsing - * small substrings out of a large input. To avoid this where necessary, call - * {@code new String(longString.subString(...))}. The string copy constructor - * always ensures that the backing array is no larger than necessary. - * * @see StringBuffer * @see StringBuilder * @see Charset @@ -93,10 +76,6 @@ public final class String implements Serializable, Comparable<String>, CharSeque } } - private final char[] value; - - private final int offset; - private final int count; private int hashCode; @@ -105,9 +84,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque * Creates an empty string. */ public String() { - value = EmptyArray.CHAR; - offset = 0; - count = 0; + throw new UnsupportedOperationException("Use StringFactory instead."); } /** @@ -116,7 +93,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque */ @FindBugsSuppressWarnings("DM_DEFAULT_ENCODING") public String(byte[] data) { - this(data, 0, data.length); + throw new UnsupportedOperationException("Use StringFactory instead."); } /** @@ -133,7 +110,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque */ @Deprecated public String(byte[] data, int high) { - this(data, high, 0, data.length); + throw new UnsupportedOperationException("Use StringFactory instead."); } /** @@ -146,7 +123,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque * if {@code byteCount < 0 || offset < 0 || offset + byteCount > data.length}. */ public String(byte[] data, int offset, int byteCount) { - this(data, offset, byteCount, Charset.defaultCharset()); + throw new UnsupportedOperationException("Use StringFactory instead."); } /** @@ -162,16 +139,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque */ @Deprecated public String(byte[] data, int high, int offset, int byteCount) { - if ((offset | byteCount) < 0 || byteCount > data.length - offset) { - throw failedBoundsCheck(data.length, offset, byteCount); - } - this.offset = 0; - this.value = new char[byteCount]; - this.count = byteCount; - high <<= 8; - for (int i = 0; i < count; i++) { - value[i] = (char) (high + (data[offset++] & 0xff)); - } + throw new UnsupportedOperationException("Use StringFactory instead."); } /** @@ -188,7 +156,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque * if the named charset is not supported. */ public String(byte[] data, int offset, int byteCount, String charsetName) throws UnsupportedEncodingException { - this(data, offset, byteCount, Charset.forNameUEE(charsetName)); + throw new UnsupportedOperationException("Use StringFactory instead."); } /** @@ -203,7 +171,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque * if {@code charsetName} is not supported. */ public String(byte[] data, String charsetName) throws UnsupportedEncodingException { - this(data, 0, data.length, Charset.forNameUEE(charsetName)); + throw new UnsupportedOperationException("Use StringFactory instead."); } /** @@ -221,144 +189,7 @@ public final class String implements Serializable, Comparable<String>, CharSeque * @since 1.6 */ public String(byte[] data, int offset, int byteCount, Charset charset) { - if ((offset | byteCount) < 0 || byteCount > data.length - offset) { - throw failedBoundsCheck(data.length, offset, byteCount); - } - - // We inline UTF-8, ISO-8859-1, and US-ASCII decoders for speed and because 'count' and - // 'value' are final. - String canonicalCharsetName = charset.name(); - if (canonicalCharsetName.equals("UTF-8")) { - byte[] d = data; - char[] v = new char[byteCount]; - - int idx = offset; - int last = offset + byteCount; - int s = 0; -outer: - while (idx < last) { - byte b0 = d[idx++]; - if ((b0 & 0x80) == 0) { - // 0xxxxxxx - // Range: U-00000000 - U-0000007F - int val = b0 & 0xff; - v[s++] = (char) val; - } else if (((b0 & 0xe0) == 0xc0) || ((b0 & 0xf0) == 0xe0) || - ((b0 & 0xf8) == 0xf0) || ((b0 & 0xfc) == 0xf8) || ((b0 & 0xfe) == 0xfc)) { - int utfCount = 1; - if ((b0 & 0xf0) == 0xe0) utfCount = 2; - else if ((b0 & 0xf8) == 0xf0) utfCount = 3; - else if ((b0 & 0xfc) == 0xf8) utfCount = 4; - else if ((b0 & 0xfe) == 0xfc) utfCount = 5; - - // 110xxxxx (10xxxxxx)+ - // Range: U-00000080 - U-000007FF (count == 1) - // Range: U-00000800 - U-0000FFFF (count == 2) - // Range: U-00010000 - U-001FFFFF (count == 3) - // Range: U-00200000 - U-03FFFFFF (count == 4) - // Range: U-04000000 - U-7FFFFFFF (count == 5) - - if (idx + utfCount > last) { - v[s++] = REPLACEMENT_CHAR; - continue; - } - - // Extract usable bits from b0 - int val = b0 & (0x1f >> (utfCount - 1)); - for (int i = 0; i < utfCount; ++i) { - byte b = d[idx++]; - if ((b & 0xc0) != 0x80) { - v[s++] = REPLACEMENT_CHAR; - idx--; // Put the input char back - continue outer; - } - // Push new bits in from the right side - val <<= 6; - val |= b & 0x3f; - } - - // Note: Java allows overlong char - // specifications To disallow, check that val - // is greater than or equal to the minimum - // value for each count: - // - // count min value - // ----- ---------- - // 1 0x80 - // 2 0x800 - // 3 0x10000 - // 4 0x200000 - // 5 0x4000000 - - // Allow surrogate values (0xD800 - 0xDFFF) to - // be specified using 3-byte UTF values only - if ((utfCount != 2) && (val >= 0xD800) && (val <= 0xDFFF)) { - v[s++] = REPLACEMENT_CHAR; - continue; - } - - // Reject chars greater than the Unicode maximum of U+10FFFF. - if (val > 0x10FFFF) { - v[s++] = REPLACEMENT_CHAR; - continue; - } - - // Encode chars from U+10000 up as surrogate pairs - if (val < 0x10000) { - v[s++] = (char) val; - } else { - int x = val & 0xffff; - int u = (val >> 16) & 0x1f; - int w = (u - 1) & 0xffff; - int hi = 0xd800 | (w << 6) | (x >> 10); - int lo = 0xdc00 | (x & 0x3ff); - v[s++] = (char) hi; - v[s++] = (char) lo; - } - } else { - // Illegal values 0x8*, 0x9*, 0xa*, 0xb*, 0xfd-0xff - v[s++] = REPLACEMENT_CHAR; - } - } - - if (s == byteCount) { - // We guessed right, so we can use our temporary array as-is. - this.offset = 0; - this.value = v; - this.count = s; - } else { - // Our temporary array was too big, so reallocate and copy. - this.offset = 0; - this.value = new char[s]; - this.count = s; - System.arraycopy(v, 0, value, 0, s); - } - } else if (canonicalCharsetName.equals("ISO-8859-1")) { - this.offset = 0; - this.value = new char[byteCount]; - this.count = byteCount; - CharsetUtils.isoLatin1BytesToChars(data, offset, byteCount, value); - } else if (canonicalCharsetName.equals("US-ASCII")) { - this.offset = 0; - this.value = new char[byteCount]; - this.count = byteCount; - CharsetUtils.asciiBytesToChars(data, offset, byteCount, value); - } else { - CharBuffer cb = charset.decode(ByteBuffer.wrap(data, offset, byteCount)); - this.offset = 0; - this.count = cb.length(); - if (count > 0) { - // We could use cb.array() directly, but that would mean we'd have to trust - // the CharsetDecoder doesn't hang on to the CharBuffer and mutate it later, - // which would break String's immutability guarantee. It would also tend to - // mean that we'd be wasting memory because CharsetDecoder doesn't trim the - // array. So we copy. - this.value = new char[count]; - System.arraycopy(cb.array(), 0, value, 0, count); - } else { - this.value = EmptyArray.CHAR; - } - } + throw new UnsupportedOperationException("Use StringFactory instead."); } /** @@ -368,7 +199,7 @@ outer: * @since 1.6 */ public String(byte[] data, Charset charset) { - this(data, 0, data.length, charset); + throw new UnsupportedOperationException("Use StringFactory instead."); } /** @@ -379,7 +210,7 @@ outer: * @throws NullPointerException if {@code data == null} */ public String(char[] data) { - this(data, 0, data.length); + throw new UnsupportedOperationException("Use StringFactory instead."); } /** @@ -393,36 +224,25 @@ outer: * if {@code charCount < 0 || offset < 0 || offset + charCount > data.length} */ public String(char[] data, int offset, int charCount) { - if ((offset | charCount) < 0 || charCount > data.length - offset) { - throw failedBoundsCheck(data.length, offset, charCount); - } - this.offset = 0; - this.value = new char[charCount]; - this.count = charCount; - System.arraycopy(data, offset, value, 0, count); + throw new UnsupportedOperationException("Use StringFactory instead."); } /* * Internal version of the String(char[], int, int) constructor. - * Does not range check, null check, or copy the array. + * Does not range check or null check. */ + // TODO: Replace calls to this with calls to StringFactory, will require + // splitting other files in java.lang. String(int offset, int charCount, char[] chars) { - this.value = chars; - this.offset = offset; - this.count = charCount; + throw new UnsupportedOperationException("Use StringFactory instead."); } /** - * Constructs a copy of the given string. - * The returned string's <a href="#backing_array">backing array</a> - * is no larger than necessary. + * Constructs a new string with the same sequence of characters as {@code + * toCopy}. */ public String(String toCopy) { - value = (toCopy.value.length == toCopy.count) - ? toCopy.value - : Arrays.copyOfRange(toCopy.value, toCopy.offset, toCopy.offset + toCopy.length()); - offset = 0; - count = value.length; + throw new UnsupportedOperationException("Use StringFactory instead."); } /** @@ -430,11 +250,7 @@ outer: * {@code StringBuffer}. */ public String(StringBuffer stringBuffer) { - offset = 0; - synchronized (stringBuffer) { - value = stringBuffer.shareValue(); - count = stringBuffer.length(); - } + throw new UnsupportedOperationException("Use StringFactory instead."); } /** @@ -451,20 +267,7 @@ outer: * @since 1.5 */ public String(int[] codePoints, int offset, int count) { - if (codePoints == null) { - throw new NullPointerException("codePoints == null"); - } - if ((offset | count) < 0 || count > codePoints.length - offset) { - throw failedBoundsCheck(codePoints.length, offset, count); - } - this.offset = 0; - this.value = new char[count * 2]; - int end = offset + count; - int c = 0; - for (int i = offset; i < end; i++) { - c += Character.toChars(codePoints[i], this.value, c); - } - this.count = c; + throw new UnsupportedOperationException("Use StringFactory instead."); } /** @@ -476,25 +279,16 @@ outer: * @since 1.5 */ public String(StringBuilder stringBuilder) { - if (stringBuilder == null) { - throw new NullPointerException("stringBuilder == null"); - } - this.offset = 0; - this.count = stringBuilder.length(); - this.value = new char[this.count]; - stringBuilder.getChars(0, this.count, this.value, 0); + throw new UnsupportedOperationException("Use StringFactory instead."); } /** * Returns the {@code char} at {@code index}. * @throws IndexOutOfBoundsException if {@code index < 0} or {@code index >= length()}. */ - public char charAt(int index) { - if (index < 0 || index >= count) { - throw indexAndLength(index); - } - return value[offset + index]; - } + public native char charAt(int index); + + native void setCharAt(int index, char c); private StringIndexOutOfBoundsException indexAndLength(int index) { throw new StringIndexOutOfBoundsException(this, index); @@ -557,12 +351,11 @@ outer: * if {@code string} is {@code null}. */ public int compareToIgnoreCase(String string) { - int o1 = offset, o2 = string.offset, result; - int end = offset + (count < string.count ? count : string.count); + int result; + int end = count < string.count ? count : string.count; char c1, c2; - char[] target = string.value; - while (o1 < end) { - if ((c1 = value[o1++]) == (c2 = target[o2++])) { + for (int i = 0; i < end; ++i) { + if ((c1 = charAt(i)) == (c2 = string.charAt(i))) { continue; } c1 = foldCase(c1); @@ -582,15 +375,7 @@ outer: * @return a new string which is the concatenation of this string and the * specified string. */ - public String concat(String string) { - if (string.count > 0 && count > 0) { - char[] buffer = new char[count + string.count]; - System.arraycopy(value, offset, buffer, 0, count); - System.arraycopy(string.value, string.offset, buffer, count, string.count); - return new String(0, buffer.length, buffer); - } - return count == 0 ? string : this; - } + public native String concat(String string); /** * Creates a new string by copying the given {@code char[]}. @@ -601,7 +386,7 @@ outer: * if {@code data} is {@code null}. */ public static String copyValueOf(char[] data) { - return new String(data, 0, data.length); + return StringFactory.newStringFromChars(data, 0, data.length); } /** @@ -616,7 +401,7 @@ outer: * data.length}. */ public static String copyValueOf(char[] data, int start, int length) { - return new String(data, start, length); + return StringFactory.newStringFromChars(data, start, length); } /** @@ -654,16 +439,10 @@ outer: if (hashCode() != s.hashCode()) { return false; } - char[] value1 = value; - int offset1 = offset; - char[] value2 = s.value; - int offset2 = s.offset; - for (int end = offset1 + count; offset1 < end; ) { - if (value1[offset1] != value2[offset2]) { + for (int i = 0; i < count; ++i) { + if (charAt(i) != s.charAt(i)) { return false; } - offset1++; - offset2++; } return true; } else { @@ -686,12 +465,9 @@ outer: if (string == null || count != string.count) { return false; } - int o1 = offset, o2 = string.offset; - int end = offset + count; - char[] target = string.value; - while (o1 < end) { - char c1 = value[o1++]; - char c2 = target[o2++]; + for (int i = 0; i < count; ++i) { + char c1 = charAt(i); + char c2 = string.charAt(i); if (c1 != c2 && foldCase(c1) != foldCase(c2)) { return false; } @@ -721,10 +497,9 @@ outer: @Deprecated public void getBytes(int start, int end, byte[] data, int index) { if (start >= 0 && start <= end && end <= count) { - end += offset; try { - for (int i = offset + start; i < end; i++) { - data[index++] = (byte) value[i]; + for (int i = start; i < end; ++i) { + data[index++] = (byte) charAt(i); } } catch (ArrayIndexOutOfBoundsException ignored) { throw failedBoundsCheck(data.length, index, end - start); @@ -772,16 +547,15 @@ outer: public byte[] getBytes(Charset charset) { String canonicalCharsetName = charset.name(); if (canonicalCharsetName.equals("UTF-8")) { - return CharsetUtils.toUtf8Bytes(value, offset, count); + return CharsetUtils.toUtf8Bytes(this, 0, count); } else if (canonicalCharsetName.equals("ISO-8859-1")) { - return CharsetUtils.toIsoLatin1Bytes(value, offset, count); + return CharsetUtils.toIsoLatin1Bytes(this, 0, count); } else if (canonicalCharsetName.equals("US-ASCII")) { - return CharsetUtils.toAsciiBytes(value, offset, count); + return CharsetUtils.toAsciiBytes(this, 0, count); } else if (canonicalCharsetName.equals("UTF-16BE")) { - return CharsetUtils.toBigEndianUtf16Bytes(value, offset, count); + return CharsetUtils.toBigEndianUtf16Bytes(this, 0, count); } else { - CharBuffer chars = CharBuffer.wrap(this.value, this.offset, this.count); - ByteBuffer buffer = charset.encode(chars.asReadOnlyBuffer()); + ByteBuffer buffer = charset.encode(this); byte[] bytes = new byte[buffer.limit()]; buffer.get(bytes); return bytes; @@ -809,7 +583,16 @@ outer: */ public void getChars(int start, int end, char[] buffer, int index) { if (start >= 0 && start <= end && end <= count) { - System.arraycopy(value, start + offset, buffer, index, end - start); + if (buffer == null) { + throw new NullPointerException("buffer == null"); + } + if (index < 0) { + throw new IndexOutOfBoundsException("index < 0"); + } + if (end - start > buffer.length - index) { + throw new ArrayIndexOutOfBoundsException("end - start > buffer.length - index"); + } + getCharsNoCheck(start, end, buffer, index); } else { // We throw StringIndexOutOfBoundsException rather than System.arraycopy's AIOOBE. throw startEndAndLength(start, end); @@ -821,9 +604,7 @@ outer: * within the java.lang package only. The caller is responsible for * ensuring that start >= 0 && start <= end && end <= count. */ - void _getChars(int start, int end, char[] buffer, int index) { - System.arraycopy(value, start + offset, buffer, index, end - start); - } + native void getCharsNoCheck(int start, int end, char[] buffer, int index); @Override public int hashCode() { int hash = hashCode; @@ -831,10 +612,8 @@ outer: if (count == 0) { return 0; } - final int end = count + offset; - final char[] chars = value; - for (int i = offset; i < end; ++i) { - hash = 31*hash + chars[i]; + for (int i = 0; i < count; ++i) { + hash = 31 * hash + charAt(i); } hashCode = hash; } @@ -893,21 +672,17 @@ outer: if (subCount > _count) { return -1; } - char[] target = string.value; - int subOffset = string.offset; - char firstChar = target[subOffset]; - int end = subOffset + subCount; + char firstChar = string.charAt(0); while (true) { int i = indexOf(firstChar, start); if (i == -1 || subCount + i > _count) { return -1; // handles subCount > count || start >= count } - int o1 = offset + i, o2 = subOffset; - char[] _value = value; - while (++o2 < end && _value[++o1] == target[o2]) { + int o1 = i, o2 = 0; + while (++o2 < subCount && charAt(++o1) == string.charAt(o2)) { // Intentionally empty } - if (o2 == end) { + if (o2 == subCount) { return i; } start = i + 1; @@ -934,21 +709,17 @@ outer: if (subCount + start > _count) { return -1; } - char[] target = subString.value; - int subOffset = subString.offset; - char firstChar = target[subOffset]; - int end = subOffset + subCount; + char firstChar = subString.charAt(0); while (true) { int i = indexOf(firstChar, start); if (i == -1 || subCount + i > _count) { return -1; // handles subCount > count || start >= count } - int o1 = offset + i, o2 = subOffset; - char[] _value = value; - while (++o2 < end && _value[++o1] == target[o2]) { + int o1 = i, o2 = 0; + while (++o2 < subCount && charAt(++o1) == subString.charAt(o2)) { // Intentionally empty } - if (o2 == end) { + if (o2 == subCount) { return i; } start = i + 1; @@ -991,11 +762,9 @@ outer: return lastIndexOfSupplementary(c, Integer.MAX_VALUE); } int _count = count; - int _offset = offset; - char[] _value = value; - for (int i = _offset + _count - 1; i >= _offset; --i) { - if (_value[i] == c) { - return i - _offset; + for (int i = _count - 1; i >= 0; --i) { + if (charAt(i) == c) { + return i; } } return -1; @@ -1011,15 +780,13 @@ outer: return lastIndexOfSupplementary(c, start); } int _count = count; - int _offset = offset; - char[] _value = value; if (start >= 0) { if (start >= _count) { start = _count - 1; } - for (int i = _offset + start; i >= _offset; --i) { - if (_value[i] == c) { - return i - _offset; + for (int i = start; i >= 0; --i) { + if (charAt(i) == c) { + return i; } } } @@ -1031,7 +798,7 @@ outer: return -1; } char[] chars = Character.toChars(c); - String needle = new String(0, chars.length, chars); + String needle = StringFactory.newStringFromChars(0, chars.length, chars); return lastIndexOf(needle, start); } @@ -1065,20 +832,17 @@ outer: start = count - subCount; } // count and subCount are both >= 1 - char[] target = subString.value; - int subOffset = subString.offset; - char firstChar = target[subOffset]; - int end = subOffset + subCount; + char firstChar = subString.charAt(0); while (true) { int i = lastIndexOf(firstChar, start); if (i == -1) { return -1; } - int o1 = offset + i, o2 = subOffset; - while (++o2 < end && value[++o1] == target[o2]) { + int o1 = i, o2 = 0; + while (++o2 < subCount && charAt(++o1) == subString.charAt(o2)) { // Intentionally empty } - if (o2 == end) { + if (o2 == subCount) { return i; } start = i - 1; @@ -1121,11 +885,8 @@ outer: if (length <= 0) { return true; } - int o1 = offset + thisStart, o2 = string.offset + start; - char[] value1 = value; - char[] value2 = string.value; for (int i = 0; i < length; ++i) { - if (value1[o1 + i] != value2[o2 + i]) { + if (charAt(thisStart + i) != string.charAt(start + i)) { return false; } } @@ -1164,13 +925,10 @@ outer: if (start < 0 || length > string.count - start) { return false; } - thisStart += offset; - start += string.offset; int end = thisStart + length; - char[] target = string.value; while (thisStart < end) { - char c1 = value[thisStart++]; - char c2 = target[start++]; + char c1 = charAt(thisStart++); + char c2 = string.charAt(start++); if (c1 != c2 && foldCase(c1) != foldCase(c2)) { return false; } @@ -1182,29 +940,20 @@ outer: * Returns a copy of this string after replacing occurrences of the given {@code char} with another. */ public String replace(char oldChar, char newChar) { - char[] buffer = value; - int _offset = offset; + String s = null; int _count = count; - - int idx = _offset; - int last = _offset + _count; boolean copied = false; - while (idx < last) { - if (buffer[idx] == oldChar) { + for (int i = 0; i < _count; ++i) { + if (charAt(i) == oldChar) { if (!copied) { - char[] newBuffer = new char[_count]; - System.arraycopy(buffer, _offset, newBuffer, 0, _count); - buffer = newBuffer; - idx -= _offset; - last -= _offset; + s = StringFactory.newStringFromString(this); copied = true; } - buffer[idx] = newChar; + s.setCharAt(i, newChar); } - idx++; } - return copied ? new String(0, count, buffer) : this; + return copied ? s : this; } /** @@ -1241,9 +990,8 @@ outer: int resultLength = count + (count + 1) * replacementString.length(); StringBuilder result = new StringBuilder(resultLength); result.append(replacementString); - int end = offset + count; - for (int i = offset; i != end; ++i) { - result.append(value[i]); + for (int i = 0; i != count; ++i) { + result.append(charAt(i)); result.append(replacementString); } return result.toString(); @@ -1252,15 +1000,21 @@ outer: StringBuilder result = new StringBuilder(count); int searchStart = 0; do { - // Copy chars before the match... - result.append(value, offset + searchStart, matchStart - searchStart); + // Copy characters before the match... + // TODO: Perform this faster than one char at a time? + for (int i = searchStart; i < matchStart; ++i) { + result.append(charAt(i)); + } // Insert the replacement... result.append(replacementString); // And skip over the match... searchStart = matchStart + targetLength; } while ((matchStart = indexOf(targetString, searchStart)) != -1); // Copy any trailing chars... - result.append(value, offset + searchStart, count - searchStart); + // TODO: Perform this faster than one char at a time? + for (int i = searchStart; i < count; ++i) { + result.append(charAt(i)); + } return result.toString(); } @@ -1308,7 +1062,7 @@ outer: return this; } if (start >= 0 && start <= count) { - return new String(offset + start, count - start, value); + return fastSubstring(start, count - start); } throw indexAndLength(start); } @@ -1328,21 +1082,19 @@ outer: } // Fast range check. if (start >= 0 && start <= end && end <= count) { - return new String(offset + start, end - start, value); + return fastSubstring(start, end - start); } throw startEndAndLength(start, end); } + private native String fastSubstring(int start, int length); + /** * Returns a new {@code char} array containing a copy of the {@code char}s in this string. * This is expensive and rarely useful. If you just want to iterate over the {@code char}s in * the string, use {@link #charAt} instead. */ - public char[] toCharArray() { - char[] buffer = new char[count]; - System.arraycopy(value, offset, buffer, 0, count); - return buffer; - } + public native char[] toCharArray(); /** * Converts this string to lower case, using the rules of the user's default locale. @@ -1351,7 +1103,7 @@ outer: * @return a new lower case string, or {@code this} if it's already all lower case. */ public String toLowerCase() { - return CaseMapper.toLowerCase(Locale.getDefault(), this, value, offset, count); + return CaseMapper.toLowerCase(Locale.getDefault(), this); } /** @@ -1368,7 +1120,7 @@ outer: * @return a new lower case string, or {@code this} if it's already all lower case. */ public String toLowerCase(Locale locale) { - return CaseMapper.toLowerCase(locale, this, value, offset, count); + return CaseMapper.toLowerCase(locale, this); } /** @@ -1386,7 +1138,7 @@ outer: * @return a new upper case string, or {@code this} if it's already all upper case. */ public String toUpperCase() { - return CaseMapper.toUpperCase(Locale.getDefault(), this, value, offset, count); + return CaseMapper.toUpperCase(Locale.getDefault(), this, count); } /** @@ -1403,7 +1155,7 @@ outer: * @return a new upper case string, or {@code this} if it's already all upper case. */ public String toUpperCase(Locale locale) { - return CaseMapper.toUpperCase(locale, this, value, offset, count); + return CaseMapper.toUpperCase(locale, this, count); } /** @@ -1411,18 +1163,18 @@ outer: * the beginning or end. */ public String trim() { - int start = offset, last = offset + count - 1; + int start = 0, last = count - 1; int end = last; - while ((start <= end) && (value[start] <= ' ')) { + while ((start <= end) && (charAt(start) <= ' ')) { start++; } - while ((end >= start) && (value[end] <= ' ')) { + while ((end >= start) && (charAt(end) <= ' ')) { end--; } - if (start == offset && end == last) { + if (start == 0 && end == last) { return this; } - return new String(start, end - start + 1, value); + return fastSubstring(start, end - start + 1); } /** @@ -1434,7 +1186,7 @@ outer: * if {@code data} is {@code null}. */ public static String valueOf(char[] data) { - return new String(data, 0, data.length); + return StringFactory.newStringFromChars(data, 0, data.length); } /** @@ -1448,7 +1200,7 @@ outer: * if {@code data} is {@code null}. */ public static String valueOf(char[] data, int start, int length) { - return new String(data, start, length); + return StringFactory.newStringFromChars(data, start, length); } /** @@ -1457,9 +1209,9 @@ outer: public static String valueOf(char value) { String s; if (value < 128) { - s = new String(value, 1, ASCII); + s = StringFactory.newStringFromChars(value, 1, ASCII); } else { - s = new String(0, 1, new char[] { value }); + s = StringFactory.newStringFromChars(0, 1, new char[] { value }); } s.hashCode = value; return s; @@ -1533,7 +1285,8 @@ outer: if (count != size) { return false; } - return regionMatches(0, new String(0, size, sb.getValue()), 0, size); + String s = StringFactory.newStringFromChars(0, size, sb.getValue()); + return regionMatches(0, s, 0, size); } } @@ -1682,7 +1435,7 @@ outer: if (index < 0 || index >= count) { throw indexAndLength(index); } - return Character.codePointAt(value, offset + index, offset + count); + return Character.codePointAt(this, index); } /** @@ -1696,7 +1449,7 @@ outer: if (index < 1 || index > count) { throw indexAndLength(index); } - return Character.codePointBefore(value, offset + index, offset); + return Character.codePointBefore(this, index); } /** @@ -1717,7 +1470,7 @@ outer: if (start < 0 || end > count || start > end) { throw startEndAndLength(start, end); } - return Character.codePointCount(value, offset + start, end - start); + return Character.codePointCount(this, start, end); } /** @@ -1748,9 +1501,7 @@ outer: * @since 1.5 */ public int offsetByCodePoints(int index, int codePointOffset) { - int s = index + offset; - int r = Character.offsetByCodePoints(value, offset, count, s, codePointOffset); - return r - offset; + return Character.offsetByCodePoints(this, index, codePointOffset); } /** @@ -1816,31 +1567,26 @@ outer: @SuppressWarnings("unused") private static int indexOf(String haystackString, String needleString, int cache, int md2, char lastChar) { - char[] haystack = haystackString.value; - int haystackOffset = haystackString.offset; int haystackLength = haystackString.count; - char[] needle = needleString.value; - int needleOffset = needleString.offset; int needleLength = needleString.count; int needleLengthMinus1 = needleLength - 1; - int haystackEnd = haystackOffset + haystackLength; - outer_loop: for (int i = haystackOffset + needleLengthMinus1; i < haystackEnd;) { - if (lastChar == haystack[i]) { + outer_loop: for (int i = needleLengthMinus1; i < haystackLength;) { + if (lastChar == haystackString.charAt(i)) { for (int j = 0; j < needleLengthMinus1; ++j) { - if (needle[j + needleOffset] != haystack[i + j - - needleLengthMinus1]) { + if (needleString.charAt(j) != + haystackString.charAt(i + j - needleLengthMinus1)) { int skip = 1; - if ((cache & (1 << haystack[i])) == 0) { + if ((cache & (1 << haystackString.charAt(i))) == 0) { skip += j; } i += Math.max(md2, skip); continue outer_loop; } } - return i - needleLengthMinus1 - haystackOffset; + return i - needleLengthMinus1; } - if ((cache & (1 << haystack[i])) == 0) { + if ((cache & (1 << haystackString.charAt(i))) == 0) { i += needleLengthMinus1; } i++; diff --git a/libart/src/main/java/java/lang/StringFactory.java b/libart/src/main/java/java/lang/StringFactory.java new file mode 100644 index 0000000..4fc3eba --- /dev/null +++ b/libart/src/main/java/java/lang/StringFactory.java @@ -0,0 +1,251 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package java.lang; + +import java.io.Serializable; +import java.io.UnsupportedEncodingException; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.util.Arrays; +import java.util.Comparator; +import libcore.util.CharsetUtils; +import libcore.util.EmptyArray; + +/** + * Class used to generate strings instead of calling String.<init>. + * + * @hide + */ +public final class StringFactory { + + // TODO: Remove once native methods are in place. + private static final char REPLACEMENT_CHAR = (char) 0xfffd; + + public static String newEmptyString() { + return newStringFromChars(EmptyArray.CHAR, 0, 0); + } + + public static String newStringFromBytes(byte[] data) { + return newStringFromBytes(data, 0, data.length); + } + + public static String newStringFromBytes(byte[] data, int high) { + return newStringFromBytes(data, high, 0, data.length); + } + + public static String newStringFromBytes(byte[] data, int offset, int byteCount) { + return newStringFromBytes(data, offset, byteCount, Charset.defaultCharset()); + } + + public static native String newStringFromBytes(byte[] data, int high, int offset, int byteCount); + + public static String newStringFromBytes(byte[] data, int offset, int byteCount, String charsetName) throws UnsupportedEncodingException { + return newStringFromBytes(data, offset, byteCount, Charset.forNameUEE(charsetName)); + } + + public static String newStringFromBytes(byte[] data, String charsetName) throws UnsupportedEncodingException { + return newStringFromBytes(data, 0, data.length, Charset.forNameUEE(charsetName)); + } + + // TODO: Implement this method natively. + public static String newStringFromBytes(byte[] data, int offset, int byteCount, Charset charset) { + if ((offset | byteCount) < 0 || byteCount > data.length - offset) { + throw new StringIndexOutOfBoundsException(data.length, offset, byteCount); + } + + char[] value; + int length; + + // We inline UTF-8, ISO-8859-1, and US-ASCII decoders for speed. + String canonicalCharsetName = charset.name(); + if (canonicalCharsetName.equals("UTF-8")) { + byte[] d = data; + char[] v = new char[byteCount]; + + int idx = offset; + int last = offset + byteCount; + int s = 0; +outer: + while (idx < last) { + byte b0 = d[idx++]; + if ((b0 & 0x80) == 0) { + // 0xxxxxxx + // Range: U-00000000 - U-0000007F + int val = b0 & 0xff; + v[s++] = (char) val; + } else if (((b0 & 0xe0) == 0xc0) || ((b0 & 0xf0) == 0xe0) || + ((b0 & 0xf8) == 0xf0) || ((b0 & 0xfc) == 0xf8) || ((b0 & 0xfe) == 0xfc)) { + int utfCount = 1; + if ((b0 & 0xf0) == 0xe0) utfCount = 2; + else if ((b0 & 0xf8) == 0xf0) utfCount = 3; + else if ((b0 & 0xfc) == 0xf8) utfCount = 4; + else if ((b0 & 0xfe) == 0xfc) utfCount = 5; + + // 110xxxxx (10xxxxxx)+ + // Range: U-00000080 - U-000007FF (count == 1) + // Range: U-00000800 - U-0000FFFF (count == 2) + // Range: U-00010000 - U-001FFFFF (count == 3) + // Range: U-00200000 - U-03FFFFFF (count == 4) + // Range: U-04000000 - U-7FFFFFFF (count == 5) + + if (idx + utfCount > last) { + v[s++] = REPLACEMENT_CHAR; + continue; + } + + // Extract usable bits from b0 + int val = b0 & (0x1f >> (utfCount - 1)); + for (int i = 0; i < utfCount; ++i) { + byte b = d[idx++]; + if ((b & 0xc0) != 0x80) { + v[s++] = REPLACEMENT_CHAR; + idx--; // Put the input char back + continue outer; + } + // Push new bits in from the right side + val <<= 6; + val |= b & 0x3f; + } + + // Note: Java allows overlong char + // specifications To disallow, check that val + // is greater than or equal to the minimum + // value for each count: + // + // count min value + // ----- ---------- + // 1 0x80 + // 2 0x800 + // 3 0x10000 + // 4 0x200000 + // 5 0x4000000 + + // Allow surrogate values (0xD800 - 0xDFFF) to + // be specified using 3-byte UTF values only + if ((utfCount != 2) && (val >= 0xD800) && (val <= 0xDFFF)) { + v[s++] = REPLACEMENT_CHAR; + continue; + } + + // Reject chars greater than the Unicode maximum of U+10FFFF. + if (val > 0x10FFFF) { + v[s++] = REPLACEMENT_CHAR; + continue; + } + + // Encode chars from U+10000 up as surrogate pairs + if (val < 0x10000) { + v[s++] = (char) val; + } else { + int x = val & 0xffff; + int u = (val >> 16) & 0x1f; + int w = (u - 1) & 0xffff; + int hi = 0xd800 | (w << 6) | (x >> 10); + int lo = 0xdc00 | (x & 0x3ff); + v[s++] = (char) hi; + v[s++] = (char) lo; + } + } else { + // Illegal values 0x8*, 0x9*, 0xa*, 0xb*, 0xfd-0xff + v[s++] = REPLACEMENT_CHAR; + } + } + + if (s == byteCount) { + // We guessed right, so we can use our temporary array as-is. + value = v; + length = s; + } else { + // Our temporary array was too big, so reallocate and copy. + value = new char[s]; + length = s; + System.arraycopy(v, 0, value, 0, s); + } + } else if (canonicalCharsetName.equals("ISO-8859-1")) { + value = new char[byteCount]; + length = byteCount; + CharsetUtils.isoLatin1BytesToChars(data, offset, byteCount, value); + } else if (canonicalCharsetName.equals("US-ASCII")) { + value = new char[byteCount]; + length = byteCount; + CharsetUtils.asciiBytesToChars(data, offset, byteCount, value); + } else { + CharBuffer cb = charset.decode(ByteBuffer.wrap(data, offset, byteCount)); + length = cb.length(); + if (length > 0) { + // We could use cb.array() directly, but that would mean we'd have to trust + // the CharsetDecoder doesn't hang on to the CharBuffer and mutate it later, + // which would break String's immutability guarantee. It would also tend to + // mean that we'd be wasting memory because CharsetDecoder doesn't trim the + // array. So we copy. + value = new char[length]; + System.arraycopy(cb.array(), 0, value, 0, length); + } else { + value = EmptyArray.CHAR; + } + } + return newStringFromChars(value, 0, length); + } + + public static String newStringFromBytes(byte[] data, Charset charset) { + return newStringFromBytes(data, 0, data.length, charset); + } + + public static String newStringFromChars(char[] data) { + return newStringFromChars(data, 0, data.length); + } + + public static String newStringFromChars(char[] data, int offset, int charCount) { + if ((offset | charCount) < 0 || charCount > data.length - offset) { + throw new StringIndexOutOfBoundsException(data.length, offset, charCount); + } + return newStringFromChars(offset, charCount, data); + } + + static native String newStringFromChars(int offset, int charCount, char[] data); + + public static native String newStringFromString(String toCopy); + + public static String newStringFromStringBuffer(StringBuffer stringBuffer) { + synchronized (stringBuffer) { + return newStringFromChars(stringBuffer.getValue(), 0, stringBuffer.length()); + } + } + + // TODO: Implement this method natively. + public static String newStringFromCodePoints(int[] codePoints, int offset, int count) { + if (codePoints == null) { + throw new NullPointerException("codePoints == null"); + } + if ((offset | count) < 0 || count > codePoints.length - offset) { + throw new StringIndexOutOfBoundsException(codePoints.length, offset, count); + } + char[] value = new char[count * 2]; + int end = offset + count; + int length = 0; + for (int i = offset; i < end; i++) { + length += Character.toChars(codePoints[i], value, length); + } + return newStringFromChars(value, 0, length); + } + + public static String newStringFromStringBuilder(StringBuilder stringBuilder) { + return newStringFromChars(stringBuilder.getValue(), 0, stringBuilder.length()); + } +} |