diff options
author | Jeff Hao <jeffhao@google.com> | 2015-04-28 02:04:11 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2015-04-28 02:04:11 +0000 |
commit | 39f72b16952fbe4aff5c2e080ce9f7f6dbb1b93d (patch) | |
tree | 9b30d123fd7df1e6ddded18db55110662ccfd01e /luni | |
parent | 824b1cd012662cfa16a8787bf908a0b5fb13e54b (diff) | |
parent | 83c7414449bc406b581f0cb81ae06e7bce91403c (diff) | |
download | libcore-39f72b16952fbe4aff5c2e080ce9f7f6dbb1b93d.zip libcore-39f72b16952fbe4aff5c2e080ce9f7f6dbb1b93d.tar.gz libcore-39f72b16952fbe4aff5c2e080ce9f7f6dbb1b93d.tar.bz2 |
Merge "Removed offset and value from String and added StringFactory."
Diffstat (limited to 'luni')
-rw-r--r-- | luni/src/main/java/java/lang/AbstractStringBuilder.java | 893 | ||||
-rw-r--r-- | luni/src/main/java/java/lang/CaseMapper.java | 211 | ||||
-rw-r--r-- | luni/src/main/java/libcore/util/CharsetUtils.java | 26 | ||||
-rw-r--r-- | luni/src/main/native/Register.cpp | 1 | ||||
-rw-r--r-- | luni/src/main/native/libcore_util_CharsetUtils.cpp | 250 | ||||
-rw-r--r-- | luni/src/main/native/sub.mk | 1 | ||||
-rw-r--r-- | luni/src/test/java/libcore/java/lang/StringTest.java | 41 |
7 files changed, 13 insertions, 1410 deletions
diff --git a/luni/src/main/java/java/lang/AbstractStringBuilder.java b/luni/src/main/java/java/lang/AbstractStringBuilder.java deleted file mode 100644 index 4d84078..0000000 --- a/luni/src/main/java/java/lang/AbstractStringBuilder.java +++ /dev/null @@ -1,893 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package java.lang; - -import libcore.util.EmptyArray; - -import java.io.InvalidObjectException; -import java.util.Arrays; - -/** - * A modifiable {@link CharSequence sequence of characters} for use in creating - * and modifying Strings. This class is intended as a base class for - * {@link StringBuffer} and {@link StringBuilder}. - * - * @see StringBuffer - * @see StringBuilder - * @since 1.5 - */ -abstract class AbstractStringBuilder { - - static final int INITIAL_CAPACITY = 16; - - private char[] value; - - private int count; - - private boolean shared; - - /* - * Returns the character array. - */ - final char[] getValue() { - return value; - } - - /* - * Returns the underlying buffer and sets the shared flag. - */ - final char[] shareValue() { - shared = true; - return value; - } - - /* - * Restores internal state after deserialization. - */ - final void set(char[] val, int len) throws InvalidObjectException { - if (val == null) { - val = EmptyArray.CHAR; - } - if (val.length < len) { - throw new InvalidObjectException("count out of range"); - } - - shared = false; - value = val; - count = len; - } - - AbstractStringBuilder() { - value = new char[INITIAL_CAPACITY]; - } - - AbstractStringBuilder(int capacity) { - if (capacity < 0) { - throw new NegativeArraySizeException(Integer.toString(capacity)); - } - value = new char[capacity]; - } - - AbstractStringBuilder(String string) { - count = string.length(); - shared = false; - value = new char[count + INITIAL_CAPACITY]; - string._getChars(0, count, value, 0); - } - - private void enlargeBuffer(int min) { - int newCount = ((value.length >> 1) + value.length) + 2; - char[] newData = new char[min > newCount ? min : newCount]; - System.arraycopy(value, 0, newData, 0, count); - value = newData; - shared = false; - } - - final void appendNull() { - int newCount = count + 4; - if (newCount > value.length) { - enlargeBuffer(newCount); - } - value[count++] = 'n'; - value[count++] = 'u'; - value[count++] = 'l'; - value[count++] = 'l'; - } - - final void append0(char[] chars) { - int newCount = count + chars.length; - if (newCount > value.length) { - enlargeBuffer(newCount); - } - System.arraycopy(chars, 0, value, count, chars.length); - count = newCount; - } - - final void append0(char[] chars, int offset, int length) { - Arrays.checkOffsetAndCount(chars.length, offset, length); - int newCount = count + length; - if (newCount > value.length) { - enlargeBuffer(newCount); - } - System.arraycopy(chars, offset, value, count, length); - count = newCount; - } - - final void append0(char ch) { - if (count == value.length) { - enlargeBuffer(count + 1); - } - value[count++] = ch; - } - - final void append0(String string) { - if (string == null) { - appendNull(); - return; - } - int length = string.length(); - int newCount = count + length; - if (newCount > value.length) { - enlargeBuffer(newCount); - } - string._getChars(0, length, value, count); - count = newCount; - } - - final void append0(CharSequence s, int start, int end) { - if (s == null) { - s = "null"; - } - if ((start | end) < 0 || start > end || end > s.length()) { - throw new IndexOutOfBoundsException(); - } - - int length = end - start; - int newCount = count + length; - if (newCount > value.length) { - enlargeBuffer(newCount); - } else if (shared) { - value = value.clone(); - shared = false; - } - - if (s instanceof String) { - ((String) s)._getChars(start, end, value, count); - } else if (s instanceof AbstractStringBuilder) { - AbstractStringBuilder other = (AbstractStringBuilder) s; - System.arraycopy(other.value, start, value, count, length); - } else { - int j = count; // Destination index. - for (int i = start; i < end; i++) { - value[j++] = s.charAt(i); - } - } - - this.count = newCount; - } - - /** - * Returns the number of characters that can be held without growing. - * - * @return the capacity - * @see #ensureCapacity - * @see #length - */ - public int capacity() { - return value.length; - } - - /** - * Returns the character at {@code index}. - * @throws IndexOutOfBoundsException if {@code index < 0} or {@code index >= length()}. - */ - public char charAt(int index) { - if (index < 0 || index >= count) { - throw indexAndLength(index); - } - return value[index]; - } - - private StringIndexOutOfBoundsException indexAndLength(int index) { - throw new StringIndexOutOfBoundsException(count, index); - } - - private StringIndexOutOfBoundsException startEndAndLength(int start, int end) { - throw new StringIndexOutOfBoundsException(count, start, end - start); - } - - final void delete0(int start, int end) { - // NOTE: StringBuilder#delete(int, int) is specified not to throw if - // the end index is >= count, as long as it's >= start. This means - // we have to clamp it to count here. - if (end > count) { - end = count; - } - - if (start < 0 || start > count || start > end) { - throw startEndAndLength(start, end); - } - - // NOTE: StringBuilder#delete(int, int) throws only if start > count - // (start == count is considered valid, oddly enough). Since 'end' is - // already a clamped value, that case is handled here. - if (end == start) { - return; - } - - // At this point we know for sure that end > start. - int length = count - end; - if (length >= 0) { - if (!shared) { - System.arraycopy(value, end, value, start, length); - } else { - char[] newData = new char[value.length]; - System.arraycopy(value, 0, newData, 0, start); - System.arraycopy(value, end, newData, start, length); - value = newData; - shared = false; - } - } - count -= end - start; - } - - final void deleteCharAt0(int index) { - if (index < 0 || index >= count) { - throw indexAndLength(index); - } - - delete0(index, index + 1); - } - - /** - * Ensures that this object has a minimum capacity available before - * requiring the internal buffer to be enlarged. The general policy of this - * method is that if the {@code minimumCapacity} is larger than the current - * {@link #capacity()}, then the capacity will be increased to the largest - * value of either the {@code minimumCapacity} or the current capacity - * multiplied by two plus two. Although this is the general policy, there is - * no guarantee that the capacity will change. - * - * @param min - * the new minimum capacity to set. - */ - public void ensureCapacity(int min) { - if (min > value.length) { - int ourMin = value.length*2 + 2; - enlargeBuffer(Math.max(ourMin, min)); - } - } - - /** - * Copies the requested sequence of characters into {@code dst} passed - * starting at {@code dst}. - * - * @param start - * the inclusive start index of the characters to copy. - * @param end - * the exclusive end index of the characters to copy. - * @param dst - * the {@code char[]} to copy the characters to. - * @param dstStart - * the inclusive start index of {@code dst} to begin copying to. - * @throws IndexOutOfBoundsException - * if the {@code start} is negative, the {@code dstStart} is - * negative, the {@code start} is greater than {@code end}, the - * {@code end} is greater than the current {@link #length()} or - * {@code dstStart + end - begin} is greater than - * {@code dst.length}. - */ - public void getChars(int start, int end, char[] dst, int dstStart) { - if (start > count || end > count || start > end) { - throw startEndAndLength(start, end); - } - System.arraycopy(value, start, dst, dstStart, end - start); - } - - final void insert0(int index, char[] chars) { - if (index < 0 || index > count) { - throw indexAndLength(index); - } - if (chars.length != 0) { - move(chars.length, index); - System.arraycopy(chars, 0, value, index, chars.length); - count += chars.length; - } - } - - final void insert0(int index, char[] chars, int start, int length) { - if (index >= 0 && index <= count) { - // start + length could overflow, start/length maybe MaxInt - if (start >= 0 && length >= 0 && length <= chars.length - start) { - if (length != 0) { - move(length, index); - System.arraycopy(chars, start, value, index, length); - count += length; - } - return; - } - } - throw new StringIndexOutOfBoundsException("this.length=" + count - + "; index=" + index + "; chars.length=" + chars.length - + "; start=" + start + "; length=" + length); - } - - final void insert0(int index, char ch) { - if (index < 0 || index > count) { - // RI compatible exception type - throw new ArrayIndexOutOfBoundsException(count, index); - } - move(1, index); - value[index] = ch; - count++; - } - - final void insert0(int index, String string) { - if (index >= 0 && index <= count) { - if (string == null) { - string = "null"; - } - int min = string.length(); - if (min != 0) { - move(min, index); - string._getChars(0, min, value, index); - count += min; - } - } else { - throw indexAndLength(index); - } - } - - final void insert0(int index, CharSequence s, int start, int end) { - if (s == null) { - s = "null"; - } - if ((index | start | end) < 0 || index > count || start > end || end > s.length()) { - throw new IndexOutOfBoundsException(); - } - insert0(index, s.subSequence(start, end).toString()); - } - - /** - * The current length. - * - * @return the number of characters contained in this instance. - */ - public int length() { - return count; - } - - private void move(int size, int index) { - int newCount; - if (value.length - count >= size) { - if (!shared) { - // index == count case is no-op - System.arraycopy(value, index, value, index + size, count - index); - return; - } - newCount = value.length; - } else { - newCount = Math.max(count + size, value.length*2 + 2); - } - - char[] newData = new char[newCount]; - System.arraycopy(value, 0, newData, 0, index); - // index == count case is no-op - System.arraycopy(value, index, newData, index + size, count - index); - value = newData; - shared = false; - } - - final void replace0(int start, int end, String string) { - if (start >= 0) { - if (end > count) { - end = count; - } - if (end > start) { - int stringLength = string.length(); - int diff = end - start - stringLength; - if (diff > 0) { // replacing with fewer characters - if (!shared) { - // index == count case is no-op - System.arraycopy(value, end, value, start - + stringLength, count - end); - } else { - char[] newData = new char[value.length]; - System.arraycopy(value, 0, newData, 0, start); - // index == count case is no-op - System.arraycopy(value, end, newData, start - + stringLength, count - end); - value = newData; - shared = false; - } - } else if (diff < 0) { - // replacing with more characters...need some room - move(-diff, end); - } else if (shared) { - value = value.clone(); - shared = false; - } - string._getChars(0, stringLength, value, start); - count -= diff; - return; - } - if (start == end) { - if (string == null) { - throw new NullPointerException("string == null"); - } - insert0(start, string); - return; - } - } - throw startEndAndLength(start, end); - } - - final void reverse0() { - if (count < 2) { - return; - } - if (!shared) { - int end = count - 1; - char frontHigh = value[0]; - char endLow = value[end]; - boolean allowFrontSur = true, allowEndSur = true; - for (int i = 0, mid = count / 2; i < mid; i++, --end) { - char frontLow = value[i + 1]; - char endHigh = value[end - 1]; - boolean surAtFront = allowFrontSur && frontLow >= 0xdc00 - && frontLow <= 0xdfff && frontHigh >= 0xd800 - && frontHigh <= 0xdbff; - if (surAtFront && (count < 3)) { - return; - } - boolean surAtEnd = allowEndSur && endHigh >= 0xd800 - && endHigh <= 0xdbff && endLow >= 0xdc00 - && endLow <= 0xdfff; - allowFrontSur = allowEndSur = true; - if (surAtFront == surAtEnd) { - if (surAtFront) { - // both surrogates - value[end] = frontLow; - value[end - 1] = frontHigh; - value[i] = endHigh; - value[i + 1] = endLow; - frontHigh = value[i + 2]; - endLow = value[end - 2]; - i++; - end--; - } else { - // neither surrogates - value[end] = frontHigh; - value[i] = endLow; - frontHigh = frontLow; - endLow = endHigh; - } - } else { - if (surAtFront) { - // surrogate only at the front - value[end] = frontLow; - value[i] = endLow; - endLow = endHigh; - allowFrontSur = false; - } else { - // surrogate only at the end - value[end] = frontHigh; - value[i] = endHigh; - frontHigh = frontLow; - allowEndSur = false; - } - } - } - if ((count & 1) == 1 && (!allowFrontSur || !allowEndSur)) { - value[end] = allowFrontSur ? endLow : frontHigh; - } - } else { - char[] newData = new char[value.length]; - for (int i = 0, end = count; i < count; i++) { - char high = value[i]; - if ((i + 1) < count && high >= 0xd800 && high <= 0xdbff) { - char low = value[i + 1]; - if (low >= 0xdc00 && low <= 0xdfff) { - newData[--end] = low; - i++; - } - } - newData[--end] = high; - } - value = newData; - shared = false; - } - } - - /** - * Sets the character at the {@code index}. - * - * @param index - * the zero-based index of the character to replace. - * @param ch - * the character to set. - * @throws IndexOutOfBoundsException - * if {@code index} is negative or greater than or equal to the - * current {@link #length()}. - */ - public void setCharAt(int index, char ch) { - if (index < 0 || index >= count) { - throw indexAndLength(index); - } - if (shared) { - value = value.clone(); - shared = false; - } - value[index] = ch; - } - - /** - * Sets the current length to a new value. If the new length is larger than - * the current length, then the new characters at the end of this object - * will contain the {@code char} value of {@code \u0000}. - * - * @param length - * the new length of this StringBuffer. - * @throws IndexOutOfBoundsException - * if {@code length < 0}. - * @see #length - */ - public void setLength(int length) { - if (length < 0) { - throw new StringIndexOutOfBoundsException("length < 0: " + length); - } - if (length > value.length) { - enlargeBuffer(length); - } else { - if (shared) { - char[] newData = new char[value.length]; - System.arraycopy(value, 0, newData, 0, count); - value = newData; - shared = false; - } else { - if (count < length) { - Arrays.fill(value, count, length, (char) 0); - } - } - } - count = length; - } - - /** - * Returns the String value of the subsequence from the {@code start} index - * to the current end. - * - * @param start - * the inclusive start index to begin the subsequence. - * @return a String containing the subsequence. - * @throws StringIndexOutOfBoundsException - * if {@code start} is negative or greater than the current - * {@link #length()}. - */ - public String substring(int start) { - if (start >= 0 && start <= count) { - if (start == count) { - return ""; - } - - // Remove String sharing for more performance - return new String(value, start, count - start); - } - throw indexAndLength(start); - } - - /** - * Returns the String value of the subsequence from the {@code start} index - * to the {@code end} index. - * - * @param start - * the inclusive start index to begin the subsequence. - * @param end - * the exclusive end index to end the subsequence. - * @return a String containing the subsequence. - * @throws StringIndexOutOfBoundsException - * if {@code start} is negative, greater than {@code end} or if - * {@code end} is greater than the current {@link #length()}. - */ - public String substring(int start, int end) { - if (start >= 0 && start <= end && end <= count) { - if (start == end) { - return ""; - } - - // Remove String sharing for more performance - return new String(value, start, end - start); - } - throw startEndAndLength(start, end); - } - - /** - * Returns the current String representation. - * - * @return a String containing the characters in this instance. - */ - @Override - public String toString() { - if (count == 0) { - return ""; - } - // Optimize String sharing for more performance - int wasted = value.length - count; - if (wasted >= 256 - || (wasted >= INITIAL_CAPACITY && wasted >= (count >> 1))) { - return new String(value, 0, count); - } - shared = true; - return new String(0, count, value); - } - - /** - * Returns a {@code CharSequence} of the subsequence from the {@code start} - * index to the {@code end} index. - * - * @param start - * the inclusive start index to begin the subsequence. - * @param end - * the exclusive end index to end the subsequence. - * @return a CharSequence containing the subsequence. - * @throws IndexOutOfBoundsException - * if {@code start} is negative, greater than {@code end} or if - * {@code end} is greater than the current {@link #length()}. - * @since 1.4 - */ - public CharSequence subSequence(int start, int end) { - return substring(start, end); - } - - /** - * Searches for the first index of the specified character. The search for - * the character starts at the beginning and moves towards the end. - * - * @param string - * the string to find. - * @return the index of the specified character, -1 if the character isn't - * found. - * @see #lastIndexOf(String) - * @since 1.4 - */ - public int indexOf(String string) { - return indexOf(string, 0); - } - - /** - * Searches for the index of the specified character. The search for the - * character starts at the specified offset and moves towards the end. - * - * @param subString - * the string to find. - * @param start - * the starting offset. - * @return the index of the specified character, -1 if the character isn't - * found - * @see #lastIndexOf(String,int) - * @since 1.4 - */ - public int indexOf(String subString, int start) { - if (start < 0) { - start = 0; - } - int subCount = subString.length(); - if (subCount > 0) { - if (subCount + start > count) { - return -1; - } - // TODO optimize charAt to direct array access - char firstChar = subString.charAt(0); - while (true) { - int i = start; - boolean found = false; - for (; i < count; i++) { - if (value[i] == firstChar) { - found = true; - break; - } - } - if (!found || subCount + i > count) { - return -1; // handles subCount > count || start >= count - } - int o1 = i, o2 = 0; - while (++o2 < subCount && value[++o1] == subString.charAt(o2)) { - // Intentionally empty - } - if (o2 == subCount) { - return i; - } - start = i + 1; - } - } - return (start < count || start == 0) ? start : count; - } - - /** - * Searches for the last index of the specified character. The search for - * the character starts at the end and moves towards the beginning. - * - * @param string - * the string to find. - * @return the index of the specified character, -1 if the character isn't - * found. - * @throws NullPointerException - * if {@code string} is {@code null}. - * @see String#lastIndexOf(java.lang.String) - * @since 1.4 - */ - public int lastIndexOf(String string) { - return lastIndexOf(string, count); - } - - /** - * Searches for the index of the specified character. The search for the - * character starts at the specified offset and moves towards the beginning. - * - * @param subString - * the string to find. - * @param start - * the starting offset. - * @return the index of the specified character, -1 if the character isn't - * found. - * @throws NullPointerException - * if {@code subString} is {@code null}. - * @see String#lastIndexOf(String,int) - * @since 1.4 - */ - public int lastIndexOf(String subString, int start) { - int subCount = subString.length(); - if (subCount <= count && start >= 0) { - if (subCount > 0) { - if (start > count - subCount) { - start = count - subCount; // count and subCount are both - } - // >= 1 - // TODO optimize charAt to direct array access - char firstChar = subString.charAt(0); - while (true) { - int i = start; - boolean found = false; - for (; i >= 0; --i) { - if (value[i] == firstChar) { - found = true; - break; - } - } - if (!found) { - return -1; - } - int o1 = i, o2 = 0; - while (++o2 < subCount - && value[++o1] == subString.charAt(o2)) { - // Intentionally empty - } - if (o2 == subCount) { - return i; - } - start = i - 1; - } - } - return start < count ? start : count; - } - return -1; - } - - /** - * Trims off any extra capacity beyond the current length. Note, this method - * is NOT guaranteed to change the capacity of this object. - * - * @since 1.5 - */ - public void trimToSize() { - if (count < value.length) { - char[] newValue = new char[count]; - System.arraycopy(value, 0, newValue, 0, count); - value = newValue; - shared = false; - } - } - - /** - * Retrieves the Unicode code point value at the {@code index}. - * - * @param index - * the index to the {@code char} code unit. - * @return the Unicode code point value. - * @throws IndexOutOfBoundsException - * if {@code index} is negative or greater than or equal to - * {@link #length()}. - * @see Character - * @see Character#codePointAt(char[], int, int) - * @since 1.5 - */ - public int codePointAt(int index) { - if (index < 0 || index >= count) { - throw indexAndLength(index); - } - return Character.codePointAt(value, index, count); - } - - /** - * Retrieves the Unicode code point value that precedes the {@code index}. - * - * @param index - * the index to the {@code char} code unit within this object. - * @return the Unicode code point value. - * @throws IndexOutOfBoundsException - * if {@code index} is less than 1 or greater than - * {@link #length()}. - * @see Character - * @see Character#codePointBefore(char[], int, int) - * @since 1.5 - */ - public int codePointBefore(int index) { - if (index < 1 || index > count) { - throw indexAndLength(index); - } - return Character.codePointBefore(value, index); - } - - /** - * Calculates the number of Unicode code points between {@code start} - * and {@code end}. - * - * @param start - * the inclusive beginning index of the subsequence. - * @param end - * the exclusive end index of the subsequence. - * @return the number of Unicode code points in the subsequence. - * @throws IndexOutOfBoundsException - * if {@code start} is negative or greater than - * {@code end} or {@code end} is greater than - * {@link #length()}. - * @see Character - * @see Character#codePointCount(char[], int, int) - * @since 1.5 - */ - public int codePointCount(int start, int end) { - if (start < 0 || end > count || start > end) { - throw startEndAndLength(start, end); - } - return Character.codePointCount(value, start, end - start); - } - - /** - * Returns the index that is offset {@code codePointOffset} code points from - * {@code index}. - * - * @param index - * the index to calculate the offset from. - * @param codePointOffset - * the number of code points to count. - * @return the index that is {@code codePointOffset} code points away from - * index. - * @throws IndexOutOfBoundsException - * if {@code index} is negative or greater than - * {@link #length()} or if there aren't enough code points - * before or after {@code index} to match - * {@code codePointOffset}. - * @see Character - * @see Character#offsetByCodePoints(char[], int, int, int, int) - * @since 1.5 - */ - public int offsetByCodePoints(int index, int codePointOffset) { - return Character.offsetByCodePoints(value, 0, count, index, - codePointOffset); - } -} diff --git a/luni/src/main/java/java/lang/CaseMapper.java b/luni/src/main/java/java/lang/CaseMapper.java deleted file mode 100644 index 1da621c..0000000 --- a/luni/src/main/java/java/lang/CaseMapper.java +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright (C) 2010 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package java.lang; - -import java.util.Locale; -import libcore.icu.ICU; -import libcore.icu.Transliterator; - -/** - * Performs case operations as described by http://unicode.org/reports/tr21/tr21-5.html. - */ -class CaseMapper { - private static final char[] upperValues = "SS\u0000\u02bcN\u0000J\u030c\u0000\u0399\u0308\u0301\u03a5\u0308\u0301\u0535\u0552\u0000H\u0331\u0000T\u0308\u0000W\u030a\u0000Y\u030a\u0000A\u02be\u0000\u03a5\u0313\u0000\u03a5\u0313\u0300\u03a5\u0313\u0301\u03a5\u0313\u0342\u1f08\u0399\u0000\u1f09\u0399\u0000\u1f0a\u0399\u0000\u1f0b\u0399\u0000\u1f0c\u0399\u0000\u1f0d\u0399\u0000\u1f0e\u0399\u0000\u1f0f\u0399\u0000\u1f08\u0399\u0000\u1f09\u0399\u0000\u1f0a\u0399\u0000\u1f0b\u0399\u0000\u1f0c\u0399\u0000\u1f0d\u0399\u0000\u1f0e\u0399\u0000\u1f0f\u0399\u0000\u1f28\u0399\u0000\u1f29\u0399\u0000\u1f2a\u0399\u0000\u1f2b\u0399\u0000\u1f2c\u0399\u0000\u1f2d\u0399\u0000\u1f2e\u0399\u0000\u1f2f\u0399\u0000\u1f28\u0399\u0000\u1f29\u0399\u0000\u1f2a\u0399\u0000\u1f2b\u0399\u0000\u1f2c\u0399\u0000\u1f2d\u0399\u0000\u1f2e\u0399\u0000\u1f2f\u0399\u0000\u1f68\u0399\u0000\u1f69\u0399\u0000\u1f6a\u0399\u0000\u1f6b\u0399\u0000\u1f6c\u0399\u0000\u1f6d\u0399\u0000\u1f6e\u0399\u0000\u1f6f\u0399\u0000\u1f68\u0399\u0000\u1f69\u0399\u0000\u1f6a\u0399\u0000\u1f6b\u0399\u0000\u1f6c\u0399\u0000\u1f6d\u0399\u0000\u1f6e\u0399\u0000\u1f6f\u0399\u0000\u1fba\u0399\u0000\u0391\u0399\u0000\u0386\u0399\u0000\u0391\u0342\u0000\u0391\u0342\u0399\u0391\u0399\u0000\u1fca\u0399\u0000\u0397\u0399\u0000\u0389\u0399\u0000\u0397\u0342\u0000\u0397\u0342\u0399\u0397\u0399\u0000\u0399\u0308\u0300\u0399\u0308\u0301\u0399\u0342\u0000\u0399\u0308\u0342\u03a5\u0308\u0300\u03a5\u0308\u0301\u03a1\u0313\u0000\u03a5\u0342\u0000\u03a5\u0308\u0342\u1ffa\u0399\u0000\u03a9\u0399\u0000\u038f\u0399\u0000\u03a9\u0342\u0000\u03a9\u0342\u0399\u03a9\u0399\u0000FF\u0000FI\u0000FL\u0000FFIFFLST\u0000ST\u0000\u0544\u0546\u0000\u0544\u0535\u0000\u0544\u053b\u0000\u054e\u0546\u0000\u0544\u053d\u0000".toCharArray(); - private static final char[] upperValues2 = "\u000b\u0000\f\u0000\r\u0000\u000e\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>\u0000\u0000?@A\u0000BC\u0000\u0000\u0000\u0000D\u0000\u0000\u0000\u0000\u0000EFG\u0000HI\u0000\u0000\u0000\u0000J\u0000\u0000\u0000\u0000\u0000KL\u0000\u0000MN\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000OPQ\u0000RS\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000TUV\u0000WX\u0000\u0000\u0000\u0000Y".toCharArray(); - - private static final char LATIN_CAPITAL_I_WITH_DOT = '\u0130'; - private static final char GREEK_CAPITAL_SIGMA = '\u03a3'; - private static final char GREEK_SMALL_FINAL_SIGMA = '\u03c2'; - - /** - * Our current GC makes short-lived objects more expensive than we'd like. When that's fixed, - * this class should be changed so that you instantiate it with the String and its value, - * offset, and count fields. - */ - private CaseMapper() { - } - - /** - * Implements String.toLowerCase. We need 's' so that we can return the original String instance - * if nothing changes. We need 'value', 'offset', and 'count' because they're not otherwise - * accessible. - */ - public static String toLowerCase(Locale locale, String s, char[] value, int offset, int count) { - // Punt hard cases to ICU4C. - // Note that Greek isn't a particularly hard case for toLowerCase, only toUpperCase. - String languageCode = locale.getLanguage(); - if (languageCode.equals("tr") || languageCode.equals("az") || languageCode.equals("lt")) { - return ICU.toLowerCase(s, locale); - } - - char[] newValue = null; - int newCount = 0; - for (int i = offset, end = offset + count; i < end; ++i) { - char ch = value[i]; - char newCh; - if (ch == LATIN_CAPITAL_I_WITH_DOT || Character.isHighSurrogate(ch)) { - // Punt these hard cases. - return ICU.toLowerCase(s, locale); - } else if (ch == GREEK_CAPITAL_SIGMA && isFinalSigma(value, offset, count, i)) { - newCh = GREEK_SMALL_FINAL_SIGMA; - } else { - newCh = Character.toLowerCase(ch); - } - if (newValue == null && ch != newCh) { - newValue = new char[count]; // The result can't be longer than the input. - newCount = i - offset; - System.arraycopy(value, offset, newValue, 0, newCount); - } - if (newValue != null) { - newValue[newCount++] = newCh; - } - } - return newValue != null ? new String(0, newCount, newValue) : s; - } - - /** - * True if 'index' is preceded by a sequence consisting of a cased letter and a case-ignorable - * sequence, and 'index' is not followed by a sequence consisting of an ignorable sequence and - * then a cased letter. - */ - private static boolean isFinalSigma(char[] value, int offset, int count, int index) { - // TODO: we don't skip case-ignorable sequences like we should. - // TODO: we should add a more direct way to test for a cased letter. - if (index <= offset) { - return false; - } - char previous = value[index - 1]; - if (!(Character.isLowerCase(previous) || Character.isUpperCase(previous) || Character.isTitleCase(previous))) { - return false; - } - if (index + 1 >= offset + count) { - return true; - } - char next = value[index + 1]; - if (Character.isLowerCase(next) || Character.isUpperCase(next) || Character.isTitleCase(next)) { - return false; - } - return true; - } - - /** - * Return the index of the specified character into the upperValues table. - * The upperValues table contains three entries at each position. These - * three characters are the upper case conversion. If only two characters - * are used, the third character in the table is \u0000. - * @return the index into the upperValues table, or -1 - */ - private static int upperIndex(int ch) { - int index = -1; - if (ch >= 0xdf) { - if (ch <= 0x587) { - switch (ch) { - case 0xdf: return 0; - case 0x149: return 1; - case 0x1f0: return 2; - case 0x390: return 3; - case 0x3b0: return 4; - case 0x587: return 5; - } - } else if (ch >= 0x1e96) { - if (ch <= 0x1e9a) { - index = 6 + ch - 0x1e96; - } else if (ch >= 0x1f50 && ch <= 0x1ffc) { - index = upperValues2[ch - 0x1f50]; - if (index == 0) { - index = -1; - } - } else if (ch >= 0xfb00) { - if (ch <= 0xfb06) { - index = 90 + ch - 0xfb00; - } else if (ch >= 0xfb13 && ch <= 0xfb17) { - index = 97 + ch - 0xfb13; - } - } - } - } - return index; - } - - private static final ThreadLocal<Transliterator> EL_UPPER = new ThreadLocal<Transliterator>() { - @Override protected Transliterator initialValue() { - return new Transliterator("el-Upper"); - } - }; - - public static String toUpperCase(Locale locale, String s, char[] value, int offset, int count) { - String languageCode = locale.getLanguage(); - if (languageCode.equals("tr") || languageCode.equals("az") || languageCode.equals("lt")) { - return ICU.toUpperCase(s, locale); - } - if (languageCode.equals("el")) { - return EL_UPPER.get().transliterate(s); - } - - char[] output = null; - int i = 0; - for (int o = offset, end = offset + count; o < end; o++) { - char ch = value[o]; - if (Character.isHighSurrogate(ch)) { - return ICU.toUpperCase(s, locale); - } - int index = upperIndex(ch); - if (index == -1) { - if (output != null && i >= output.length) { - char[] newoutput = new char[output.length + (count / 6) + 2]; - System.arraycopy(output, 0, newoutput, 0, output.length); - output = newoutput; - } - char upch = Character.toUpperCase(ch); - if (ch != upch) { - if (output == null) { - output = new char[count]; - i = o - offset; - System.arraycopy(value, offset, output, 0, i); - } - output[i++] = upch; - } else if (output != null) { - output[i++] = ch; - } - } else { - int target = index * 3; - char val3 = upperValues[target + 2]; - if (output == null) { - output = new char[count + (count / 6) + 2]; - i = o - offset; - System.arraycopy(value, offset, output, 0, i); - } else if (i + (val3 == 0 ? 1 : 2) >= output.length) { - char[] newoutput = new char[output.length + (count / 6) + 3]; - System.arraycopy(output, 0, newoutput, 0, output.length); - output = newoutput; - } - - char val = upperValues[target]; - output[i++] = val; - val = upperValues[target + 1]; - output[i++] = val; - if (val3 != 0) { - output[i++] = val3; - } - } - } - if (output == null) { - return s; - } - return output.length == i || output.length - i < 8 ? new String(0, i, output) : new String(output, 0, i); - } -} diff --git a/luni/src/main/java/libcore/util/CharsetUtils.java b/luni/src/main/java/libcore/util/CharsetUtils.java index 2e426c4..5163dba 100644 --- a/luni/src/main/java/libcore/util/CharsetUtils.java +++ b/luni/src/main/java/libcore/util/CharsetUtils.java @@ -23,33 +23,33 @@ package libcore.util; */ public final class CharsetUtils { /** - * Returns a new byte array containing the bytes corresponding to the given characters, - * encoded in US-ASCII. Unrepresentable characters are replaced by (byte) '?'. + * Returns a new byte array containing the bytes corresponding to the characters in the given + * string, encoded in US-ASCII. Unrepresentable characters are replaced by (byte) '?'. */ - public static native byte[] toAsciiBytes(char[] chars, int offset, int length); + public static native byte[] toAsciiBytes(String s, int offset, int length); /** - * Returns a new byte array containing the bytes corresponding to the given characters, - * encoded in ISO-8859-1. Unrepresentable characters are replaced by (byte) '?'. + * Returns a new byte array containing the bytes corresponding to the characters in the given + * string, encoded in ISO-8859-1. Unrepresentable characters are replaced by (byte) '?'. */ - public static native byte[] toIsoLatin1Bytes(char[] chars, int offset, int length); + public static native byte[] toIsoLatin1Bytes(String s, int offset, int length); /** - * Returns a new byte array containing the bytes corresponding to the given characters, - * encoded in UTF-8. All characters are representable in UTF-8. + * Returns a new byte array containing the bytes corresponding to the characters in the given + * string, encoded in UTF-8. All characters are representable in UTF-8. */ - public static native byte[] toUtf8Bytes(char[] chars, int offset, int length); + public static native byte[] toUtf8Bytes(String s, int offset, int length); /** - * Returns a new byte array containing the bytes corresponding to the given characters, - * encoded in UTF-16BE. All characters are representable in UTF-16BE. + * Returns a new byte array containing the bytes corresponding to the characters in the given + * string, encoded in UTF-16BE. All characters are representable in UTF-16BE. */ - public static byte[] toBigEndianUtf16Bytes(char[] chars, int offset, int length) { + public static byte[] toBigEndianUtf16Bytes(String s, int offset, int length) { byte[] result = new byte[length * 2]; int end = offset + length; int resultIndex = 0; for (int i = offset; i < end; ++i) { - char ch = chars[i]; + char ch = s.charAt(i); result[resultIndex++] = (byte) (ch >> 8); result[resultIndex++] = (byte) ch; } diff --git a/luni/src/main/native/Register.cpp b/luni/src/main/native/Register.cpp index 0f2d0ad..acc1e4f 100644 --- a/luni/src/main/native/Register.cpp +++ b/luni/src/main/native/Register.cpp @@ -69,7 +69,6 @@ jint JNI_OnLoad(JavaVM* vm, void*) { REGISTER(register_libcore_io_AsynchronousCloseMonitor); REGISTER(register_libcore_io_Memory); REGISTER(register_libcore_io_Posix); - REGISTER(register_libcore_util_CharsetUtils); REGISTER(register_org_apache_harmony_dalvik_NativeTestTarget); REGISTER(register_org_apache_harmony_xml_ExpatParser); REGISTER(register_sun_misc_Unsafe); diff --git a/luni/src/main/native/libcore_util_CharsetUtils.cpp b/luni/src/main/native/libcore_util_CharsetUtils.cpp index 57c8172..e69de29 100644 --- a/luni/src/main/native/libcore_util_CharsetUtils.cpp +++ b/luni/src/main/native/libcore_util_CharsetUtils.cpp @@ -1,250 +0,0 @@ -/* - * Copyright (C) 2010 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#define LOG_TAG "String" - -#include "JNIHelp.h" -#include "JniConstants.h" -#include "ScopedPrimitiveArray.h" -#include "jni.h" -#include "unicode/utf16.h" - -#include <string.h> - -/** - * Approximates java.lang.UnsafeByteSequence so we don't have to pay the cost of calling back into - * Java when converting a char[] to a UTF-8 byte[]. This lets us have UTF-8 conversions slightly - * faster than ICU for large char[]s without paying for the NIO overhead with small char[]s. - * - * We could avoid this by keeping the UTF-8 bytes on the native heap until we're done and only - * creating a byte[] on the Java heap when we know how big it needs to be, but one shouldn't lie - * to the garbage collector (nor hide potentially large allocations from it). - * - * Because a call to append might require an allocation, it might fail. Callers should always - * check the return value of append. - */ -class NativeUnsafeByteSequence { -public: - NativeUnsafeByteSequence(JNIEnv* env) - : mEnv(env), mJavaArray(NULL), mRawArray(NULL), mSize(-1), mOffset(0) - { - } - - ~NativeUnsafeByteSequence() { - // Release our pointer to the raw array, copying changes back to the Java heap. - if (mRawArray != NULL) { - mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, 0); - } - } - - bool append(jbyte b) { - if (mOffset == mSize && !resize(mSize * 2)) { - return false; - } - mRawArray[mOffset++] = b; - return true; - } - - bool resize(int newSize) { - if (newSize == mSize) { - return true; - } - - // Allocate a new array. - jbyteArray newJavaArray = mEnv->NewByteArray(newSize); - if (newJavaArray == NULL) { - return false; - } - jbyte* newRawArray = mEnv->GetByteArrayElements(newJavaArray, NULL); - if (newRawArray == NULL) { - return false; - } - - // Copy data out of the old array and then let go of it. - // Note that we may be trimming the array. - if (mRawArray != NULL) { - memcpy(newRawArray, mRawArray, mOffset); - mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, JNI_ABORT); - mEnv->DeleteLocalRef(mJavaArray); - } - - // Point ourselves at the new array. - mJavaArray = newJavaArray; - mRawArray = newRawArray; - mSize = newSize; - return true; - } - - jbyteArray toByteArray() { - // Trim any unused space, if necessary. - bool okay = resize(mOffset); - return okay ? mJavaArray : NULL; - } - -private: - JNIEnv* mEnv; - jbyteArray mJavaArray; - jbyte* mRawArray; - jint mSize; - jint mOffset; - - // Disallow copy and assignment. - NativeUnsafeByteSequence(const NativeUnsafeByteSequence&); - void operator=(const NativeUnsafeByteSequence&); -}; - -static void Charsets_asciiBytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, jint offset, jint length, jcharArray javaChars) { - ScopedByteArrayRO bytes(env, javaBytes); - if (bytes.get() == NULL) { - return; - } - ScopedCharArrayRW chars(env, javaChars); - if (chars.get() == NULL) { - return; - } - - const jbyte* src = &bytes[offset]; - jchar* dst = &chars[0]; - static const jchar REPLACEMENT_CHAR = 0xfffd; - for (int i = length - 1; i >= 0; --i) { - jchar ch = static_cast<jchar>(*src++ & 0xff); - *dst++ = (ch <= 0x7f) ? ch : REPLACEMENT_CHAR; - } -} - -static void Charsets_isoLatin1BytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, jint offset, jint length, jcharArray javaChars) { - ScopedByteArrayRO bytes(env, javaBytes); - if (bytes.get() == NULL) { - return; - } - ScopedCharArrayRW chars(env, javaChars); - if (chars.get() == NULL) { - return; - } - - const jbyte* src = &bytes[offset]; - jchar* dst = &chars[0]; - for (int i = length - 1; i >= 0; --i) { - *dst++ = static_cast<jchar>(*src++ & 0xff); - } -} - -/** - * Translates the given characters to US-ASCII or ISO-8859-1 bytes, using the fact that - * Unicode code points between U+0000 and U+007f inclusive are identical to US-ASCII, while - * U+0000 to U+00ff inclusive are identical to ISO-8859-1. - */ -static jbyteArray charsToBytes(JNIEnv* env, jcharArray javaChars, jint offset, jint length, jchar maxValidChar) { - ScopedCharArrayRO chars(env, javaChars); - if (chars.get() == NULL) { - return NULL; - } - - jbyteArray javaBytes = env->NewByteArray(length); - ScopedByteArrayRW bytes(env, javaBytes); - if (bytes.get() == NULL) { - return NULL; - } - - const jchar* src = &chars[offset]; - jbyte* dst = &bytes[0]; - for (int i = length - 1; i >= 0; --i) { - jchar ch = *src++; - if (ch > maxValidChar) { - ch = '?'; - } - *dst++ = static_cast<jbyte>(ch); - } - - return javaBytes; -} - -static jbyteArray Charsets_toAsciiBytes(JNIEnv* env, jclass, jcharArray javaChars, jint offset, jint length) { - return charsToBytes(env, javaChars, offset, length, 0x7f); -} - -static jbyteArray Charsets_toIsoLatin1Bytes(JNIEnv* env, jclass, jcharArray javaChars, jint offset, jint length) { - return charsToBytes(env, javaChars, offset, length, 0xff); -} - -static jbyteArray Charsets_toUtf8Bytes(JNIEnv* env, jclass, jcharArray javaChars, jint offset, jint length) { - ScopedCharArrayRO chars(env, javaChars); - if (chars.get() == NULL) { - return NULL; - } - - NativeUnsafeByteSequence out(env); - if (!out.resize(length)) { - return NULL; - } - - const int end = offset + length; - for (int i = offset; i < end; ++i) { - jint ch = chars[i]; - if (ch < 0x80) { - // One byte. - if (!out.append(ch)) { - return NULL; - } - } else if (ch < 0x800) { - // Two bytes. - if (!out.append((ch >> 6) | 0xc0) || !out.append((ch & 0x3f) | 0x80)) { - return NULL; - } - } else if (U16_IS_SURROGATE(ch)) { - // A supplementary character. - jchar high = (jchar) ch; - jchar low = (i + 1 != end) ? chars[i + 1] : 0; - if (!U16_IS_SURROGATE_LEAD(high) || !U16_IS_SURROGATE_TRAIL(low)) { - if (!out.append('?')) { - return NULL; - } - continue; - } - // Now we know we have a *valid* surrogate pair, we can consume the low surrogate. - ++i; - ch = U16_GET_SUPPLEMENTARY(high, low); - // Four bytes. - jbyte b1 = (ch >> 18) | 0xf0; - jbyte b2 = ((ch >> 12) & 0x3f) | 0x80; - jbyte b3 = ((ch >> 6) & 0x3f) | 0x80; - jbyte b4 = (ch & 0x3f) | 0x80; - if (!out.append(b1) || !out.append(b2) || !out.append(b3) || !out.append(b4)) { - return NULL; - } - } else { - // Three bytes. - jbyte b1 = (ch >> 12) | 0xe0; - jbyte b2 = ((ch >> 6) & 0x3f) | 0x80; - jbyte b3 = (ch & 0x3f) | 0x80; - if (!out.append(b1) || !out.append(b2) || !out.append(b3)) { - return NULL; - } - } - } - return out.toByteArray(); -} - -static JNINativeMethod gMethods[] = { - NATIVE_METHOD(Charsets, asciiBytesToChars, "([BII[C)V"), - NATIVE_METHOD(Charsets, isoLatin1BytesToChars, "([BII[C)V"), - NATIVE_METHOD(Charsets, toAsciiBytes, "([CII)[B"), - NATIVE_METHOD(Charsets, toIsoLatin1Bytes, "([CII)[B"), - NATIVE_METHOD(Charsets, toUtf8Bytes, "([CII)[B"), -}; -void register_libcore_util_CharsetUtils(JNIEnv* env) { - jniRegisterNativeMethods(env, "libcore/util/CharsetUtils", gMethods, NELEM(gMethods)); -} diff --git a/luni/src/main/native/sub.mk b/luni/src/main/native/sub.mk index a90c683..73ed7cb 100644 --- a/luni/src/main/native/sub.mk +++ b/luni/src/main/native/sub.mk @@ -49,7 +49,6 @@ LOCAL_SRC_FILES := \ libcore_io_AsynchronousCloseMonitor.cpp \ libcore_io_Memory.cpp \ libcore_io_Posix.cpp \ - libcore_util_CharsetUtils.cpp \ org_apache_harmony_xml_ExpatParser.cpp \ readlink.cpp \ sun_misc_Unsafe.cpp \ diff --git a/luni/src/test/java/libcore/java/lang/StringTest.java b/luni/src/test/java/libcore/java/lang/StringTest.java index bf162e5..bd52e06 100644 --- a/luni/src/test/java/libcore/java/lang/StringTest.java +++ b/luni/src/test/java/libcore/java/lang/StringTest.java @@ -173,47 +173,6 @@ public class StringTest extends TestCase { } /** - * Tests a widely assumed performance characteristic of String.substring(): - * that it reuses the original's backing array. Although behavior should be - * correct even if this test fails, many applications may suffer - * significant performance degradation. - */ - public void testSubstringSharesBackingArray() throws IllegalAccessException { - String abcdefghij = "ABCDEFGHIJ"; - String cdefg = abcdefghij.substring(2, 7); - assertSame(getBackingArray(abcdefghij), getBackingArray(cdefg)); - } - - /** - * Tests a widely assumed performance characteristic of string's copy - * constructor: that it ensures the backing array is the same length as the - * string. Although behavior should be correct even if this test fails, - * many applications may suffer significant performance degradation. - */ - public void testStringCopiesAvoidHeapRetention() throws IllegalAccessException { - String abcdefghij = "ABCDEFGHIJ"; - assertSame(getBackingArray(abcdefghij), getBackingArray(new String(abcdefghij))); - - String cdefg = abcdefghij.substring(2, 7); - assertSame(getBackingArray(abcdefghij), getBackingArray(cdefg)); - assertEquals(5, getBackingArray(new String(cdefg)).length); - } - - /** - * Uses reflection to return the char[] backing the given string. This - * returns the actual backing array; which must not be modified. - */ - private char[] getBackingArray(String string) throws IllegalAccessException { - for (Field f : String.class.getDeclaredFields()) { - if (!Modifier.isStatic(f.getModifiers()) && f.getType() == char[].class) { - f.setAccessible(true); - return (char[]) f.get(string); - } - } - throw new UnsupportedOperationException("No chars[] field on String!"); - } - - /** * Test that strings interned manually and then later loaded as literals * maintain reference equality. http://b/3098960 */ |