diff options
-rw-r--r-- | icu/src/main/java/com/ibm/icu4jni/regex/NativeRegEx.java | 2 | ||||
-rw-r--r-- | luni/src/main/java/java/lang/String.java | 26 | ||||
-rw-r--r-- | regex/src/main/java/java/util/regex/Matcher.java | 3 | ||||
-rw-r--r-- | regex/src/main/java/java/util/regex/Pattern.java | 52 | ||||
-rw-r--r-- | regex/src/main/java/java/util/regex/Splitter.java | 122 |
5 files changed, 139 insertions, 66 deletions
diff --git a/icu/src/main/java/com/ibm/icu4jni/regex/NativeRegEx.java b/icu/src/main/java/com/ibm/icu4jni/regex/NativeRegEx.java index 789c75b..a8ce8a6 100644 --- a/icu/src/main/java/com/ibm/icu4jni/regex/NativeRegEx.java +++ b/icu/src/main/java/com/ibm/icu4jni/regex/NativeRegEx.java @@ -17,6 +17,8 @@ package com.ibm.icu4jni.regex; public final class NativeRegEx { + private NativeRegEx() { + } /** * Opens (compiles) an ICU regular expression. diff --git a/luni/src/main/java/java/lang/String.java b/luni/src/main/java/java/lang/String.java index 4fe31f2..0e0381c 100644 --- a/luni/src/main/java/java/lang/String.java +++ b/luni/src/main/java/java/lang/String.java @@ -17,25 +17,20 @@ package java.lang; +import com.ibm.icu4jni.regex.NativeRegEx; import java.io.Serializable; import java.io.UnsupportedEncodingException; -import java.util.Comparator; -import java.util.Formatter; -import java.util.Locale; - -import java.util.regex.Pattern; - import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.Charset; import java.nio.charset.IllegalCharsetNameException; import java.nio.charset.UnsupportedCharsetException; import java.security.AccessController; +import java.util.Comparator; +import java.util.Formatter; +import java.util.Locale; +import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; - -// BEGIN android-removed -// import org.apache.harmony.kernel.vm.VM; -// END android-removed import org.apache.harmony.luni.util.PriviAction; /** @@ -2123,7 +2118,8 @@ public final class String implements Serializable, Comparable<String>, } /** - * Splits this string using the supplied regular expression {@code expr}. + * Splits this string using the supplied regular expression {@code expr}, + * as if by {@code split(expr, 0)}. * * @param expr * the regular expression used to divide the string. @@ -2138,13 +2134,14 @@ public final class String implements Serializable, Comparable<String>, * @since 1.4 */ public String[] split(String expr) { - return Pattern.compile(expr).split(this); + return split(expr, 0); } /** * Splits this string using the supplied regular expression {@code expr}. * The parameter {@code max} controls the behavior how many times the - * pattern is applied to the string. + * pattern is applied to the string; see {@link Pattern#split(CharSequence, int)} + * for details. * * @param expr * the regular expression used to divide the string. @@ -2161,7 +2158,8 @@ public final class String implements Serializable, Comparable<String>, * @since 1.4 */ public String[] split(String expr, int max) { - return Pattern.compile(expr).split(this, max); + String[] result = java.util.regex.Splitter.fastSplit(expr, this, max); + return result != null ? result : Pattern.compile(expr).split(this, max); } /** diff --git a/regex/src/main/java/java/util/regex/Matcher.java b/regex/src/main/java/java/util/regex/Matcher.java index be5c782..5abbbd5 100644 --- a/regex/src/main/java/java/util/regex/Matcher.java +++ b/regex/src/main/java/java/util/regex/Matcher.java @@ -206,8 +206,7 @@ public final class Matcher implements MatchResult { throw new IllegalArgumentException(); } - if (start < 0 || end < 0 || start > input.length() || - end > input.length() || start > end) { + if (start < 0 || end < 0 || start > input.length() || end > input.length() || start > end) { throw new IllegalArgumentException(); } diff --git a/regex/src/main/java/java/util/regex/Pattern.java b/regex/src/main/java/java/util/regex/Pattern.java index c366732..325e3e0 100644 --- a/regex/src/main/java/java/util/regex/Pattern.java +++ b/regex/src/main/java/java/util/regex/Pattern.java @@ -169,8 +169,6 @@ public final class Pattern implements Serializable { * <p>Otherwise, the {@code limit} parameter controls the contents of the * returned array as described below. * - * @param inputSeq - * the input sequence. * @param limit * Determines the maximum number of entries in the resulting * array, and the treatment of trailing empty strings. @@ -188,61 +186,15 @@ public final class Pattern implements Serializable { * special, as described above, and the limit parameter does * not apply there.) * </ul> - * - * @return the resulting array. */ - public String[] split(CharSequence inputSeq, int limit) { - if (inputSeq.length() == 0) { - // Unlike Perl, which considers the result of splitting the empty - // string to be the empty array, Java returns an array containing - // the empty string. - return new String[] { "" }; - } - - int maxLength = limit <= 0 ? Integer.MAX_VALUE : limit; - - String input = inputSeq.toString(); - ArrayList<String> list = new ArrayList<String>(); - - Matcher matcher = new Matcher(this, inputSeq); - int savedPos = 0; - - // Add text preceding each occurrence, if enough space. - while(matcher.find() && list.size() + 1 < maxLength) { - list.add(input.substring(savedPos, matcher.start())); - savedPos = matcher.end(); - } - - // Add trailing text if enough space. - if (list.size() < maxLength) { - if (savedPos < input.length()) { - list.add(input.substring(savedPos)); - } else { - list.add(""); - } - } - - // Remove trailing empty matches in the limit == 0 case. - if (limit == 0) { - int i = list.size() - 1; - while (i >= 0 && "".equals(list.get(i))) { - list.remove(i); - i--; - } - } - - return list.toArray(new String[list.size()]); + public String[] split(CharSequence input, int limit) { + return Splitter.split(this, pattern, input.toString(), limit); } /** * Splits a given input around occurrences of a regular expression. This is * a convenience method that is equivalent to calling the method * {@link #split(java.lang.CharSequence, int)} with a limit of 0. - * - * @param input - * the input sequence. - * - * @return the resulting array. */ public String[] split(CharSequence input) { return split(input, 0); diff --git a/regex/src/main/java/java/util/regex/Splitter.java b/regex/src/main/java/java/util/regex/Splitter.java new file mode 100644 index 0000000..5b4c048 --- /dev/null +++ b/regex/src/main/java/java/util/regex/Splitter.java @@ -0,0 +1,122 @@ +/* + * Copyright (C) 2010 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package java.util.regex; + +import java.util.ArrayList; +import java.util.List; + +/* + * Used to make {@code String.split} fast (and to help {@code Pattern.split} too). + * @hide + */ +public class Splitter { + // The RI allows regular expressions beginning with ] or }, but that's probably a bug. + private static final String METACHARACTERS = "\\?*+[](){}^$.|"; + + private Splitter() { + } + + /** + * Returns a result equivalent to {@code s.split(separator, limit)} if it's able + * to compute it more cheaply than ICU, or null if the caller should fall back to + * using ICU. + */ + public static String[] fastSplit(String re, String input, int limit) { + // Can we do it cheaply? + int len = re.length(); + if (len == 0) { + return null; + } + char ch = re.charAt(0); + if (len == 1 && METACHARACTERS.indexOf(ch) == -1) { + // We're looking for a single non-metacharacter. Easy. + } else if (len == 2 && ch == '\\') { + // We're looking for a quoted character. + // Quoted metacharacters are effectively single non-metacharacters. + ch = re.charAt(1); + if (METACHARACTERS.indexOf(ch) == -1) { + return null; + } + } else { + return null; + } + + // We can do this cheaply... + + // Unlike Perl, which considers the result of splitting the empty string to be the empty + // array, Java returns an array containing the empty string. + if (input.isEmpty()) { + return new String[] { "" }; + } + + // Collect text preceding each occurrence of the separator, while there's enough space. + ArrayList<String> list = new ArrayList<String>(); + int maxSize = limit <= 0 ? Integer.MAX_VALUE : limit; + int begin = 0; + int end; + while ((end = input.indexOf(ch, begin)) != -1 && list.size() + 1 < maxSize) { + list.add(input.substring(begin, end)); + begin = end + 1; + } + return finishSplit(list, input, begin, maxSize, limit); + } + + public static String[] split(Pattern pattern, String re, String input, int limit) { + String[] fastResult = fastSplit(re, input, limit); + if (fastResult != null) { + return fastResult; + } + + // Unlike Perl, which considers the result of splitting the empty string to be the empty + // array, Java returns an array containing the empty string. + if (input.isEmpty()) { + return new String[] { "" }; + } + + // Collect text preceding each occurrence of the separator, while there's enough space. + ArrayList<String> list = new ArrayList<String>(); + int maxSize = limit <= 0 ? Integer.MAX_VALUE : limit; + Matcher matcher = new Matcher(pattern, input); + int begin = 0; + while (matcher.find() && list.size() + 1 < maxSize) { + list.add(input.substring(begin, matcher.start())); + begin = matcher.end(); + } + return finishSplit(list, input, begin, maxSize, limit); + } + + private static String[] finishSplit(List<String> list, String input, int begin, int maxSize, int limit) { + // Add trailing text if enough space. + if (list.size() < maxSize) { + if (begin < input.length()) { + list.add(input.substring(begin)); + } else { + list.add(""); + } + } + // Remove trailing empty matches in the limit == 0 case. + if (limit == 0) { + int i = list.size() - 1; + while (i >= 0 && "".equals(list.get(i))) { + list.remove(i); + i--; + } + } + // Convert to an array. + return list.toArray(new String[list.size()]); + } +} |