summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--icu/src/main/java/com/ibm/icu4jni/regex/NativeRegEx.java2
-rw-r--r--luni/src/main/java/java/lang/String.java26
-rw-r--r--regex/src/main/java/java/util/regex/Matcher.java3
-rw-r--r--regex/src/main/java/java/util/regex/Pattern.java52
-rw-r--r--regex/src/main/java/java/util/regex/Splitter.java122
5 files changed, 139 insertions, 66 deletions
diff --git a/icu/src/main/java/com/ibm/icu4jni/regex/NativeRegEx.java b/icu/src/main/java/com/ibm/icu4jni/regex/NativeRegEx.java
index 789c75b..a8ce8a6 100644
--- a/icu/src/main/java/com/ibm/icu4jni/regex/NativeRegEx.java
+++ b/icu/src/main/java/com/ibm/icu4jni/regex/NativeRegEx.java
@@ -17,6 +17,8 @@
package com.ibm.icu4jni.regex;
public final class NativeRegEx {
+ private NativeRegEx() {
+ }
/**
* Opens (compiles) an ICU regular expression.
diff --git a/luni/src/main/java/java/lang/String.java b/luni/src/main/java/java/lang/String.java
index 4fe31f2..0e0381c 100644
--- a/luni/src/main/java/java/lang/String.java
+++ b/luni/src/main/java/java/lang/String.java
@@ -17,25 +17,20 @@
package java.lang;
+import com.ibm.icu4jni.regex.NativeRegEx;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
-import java.util.Comparator;
-import java.util.Formatter;
-import java.util.Locale;
-
-import java.util.regex.Pattern;
-
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
import java.security.AccessController;
+import java.util.Comparator;
+import java.util.Formatter;
+import java.util.Locale;
+import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
-
-// BEGIN android-removed
-// import org.apache.harmony.kernel.vm.VM;
-// END android-removed
import org.apache.harmony.luni.util.PriviAction;
/**
@@ -2123,7 +2118,8 @@ public final class String implements Serializable, Comparable<String>,
}
/**
- * Splits this string using the supplied regular expression {@code expr}.
+ * Splits this string using the supplied regular expression {@code expr},
+ * as if by {@code split(expr, 0)}.
*
* @param expr
* the regular expression used to divide the string.
@@ -2138,13 +2134,14 @@ public final class String implements Serializable, Comparable<String>,
* @since 1.4
*/
public String[] split(String expr) {
- return Pattern.compile(expr).split(this);
+ return split(expr, 0);
}
/**
* Splits this string using the supplied regular expression {@code expr}.
* The parameter {@code max} controls the behavior how many times the
- * pattern is applied to the string.
+ * pattern is applied to the string; see {@link Pattern#split(CharSequence, int)}
+ * for details.
*
* @param expr
* the regular expression used to divide the string.
@@ -2161,7 +2158,8 @@ public final class String implements Serializable, Comparable<String>,
* @since 1.4
*/
public String[] split(String expr, int max) {
- return Pattern.compile(expr).split(this, max);
+ String[] result = java.util.regex.Splitter.fastSplit(expr, this, max);
+ return result != null ? result : Pattern.compile(expr).split(this, max);
}
/**
diff --git a/regex/src/main/java/java/util/regex/Matcher.java b/regex/src/main/java/java/util/regex/Matcher.java
index be5c782..5abbbd5 100644
--- a/regex/src/main/java/java/util/regex/Matcher.java
+++ b/regex/src/main/java/java/util/regex/Matcher.java
@@ -206,8 +206,7 @@ public final class Matcher implements MatchResult {
throw new IllegalArgumentException();
}
- if (start < 0 || end < 0 || start > input.length() ||
- end > input.length() || start > end) {
+ if (start < 0 || end < 0 || start > input.length() || end > input.length() || start > end) {
throw new IllegalArgumentException();
}
diff --git a/regex/src/main/java/java/util/regex/Pattern.java b/regex/src/main/java/java/util/regex/Pattern.java
index c366732..325e3e0 100644
--- a/regex/src/main/java/java/util/regex/Pattern.java
+++ b/regex/src/main/java/java/util/regex/Pattern.java
@@ -169,8 +169,6 @@ public final class Pattern implements Serializable {
* <p>Otherwise, the {@code limit} parameter controls the contents of the
* returned array as described below.
*
- * @param inputSeq
- * the input sequence.
* @param limit
* Determines the maximum number of entries in the resulting
* array, and the treatment of trailing empty strings.
@@ -188,61 +186,15 @@ public final class Pattern implements Serializable {
* special, as described above, and the limit parameter does
* not apply there.)
* </ul>
- *
- * @return the resulting array.
*/
- public String[] split(CharSequence inputSeq, int limit) {
- if (inputSeq.length() == 0) {
- // Unlike Perl, which considers the result of splitting the empty
- // string to be the empty array, Java returns an array containing
- // the empty string.
- return new String[] { "" };
- }
-
- int maxLength = limit <= 0 ? Integer.MAX_VALUE : limit;
-
- String input = inputSeq.toString();
- ArrayList<String> list = new ArrayList<String>();
-
- Matcher matcher = new Matcher(this, inputSeq);
- int savedPos = 0;
-
- // Add text preceding each occurrence, if enough space.
- while(matcher.find() && list.size() + 1 < maxLength) {
- list.add(input.substring(savedPos, matcher.start()));
- savedPos = matcher.end();
- }
-
- // Add trailing text if enough space.
- if (list.size() < maxLength) {
- if (savedPos < input.length()) {
- list.add(input.substring(savedPos));
- } else {
- list.add("");
- }
- }
-
- // Remove trailing empty matches in the limit == 0 case.
- if (limit == 0) {
- int i = list.size() - 1;
- while (i >= 0 && "".equals(list.get(i))) {
- list.remove(i);
- i--;
- }
- }
-
- return list.toArray(new String[list.size()]);
+ public String[] split(CharSequence input, int limit) {
+ return Splitter.split(this, pattern, input.toString(), limit);
}
/**
* Splits a given input around occurrences of a regular expression. This is
* a convenience method that is equivalent to calling the method
* {@link #split(java.lang.CharSequence, int)} with a limit of 0.
- *
- * @param input
- * the input sequence.
- *
- * @return the resulting array.
*/
public String[] split(CharSequence input) {
return split(input, 0);
diff --git a/regex/src/main/java/java/util/regex/Splitter.java b/regex/src/main/java/java/util/regex/Splitter.java
new file mode 100644
index 0000000..5b4c048
--- /dev/null
+++ b/regex/src/main/java/java/util/regex/Splitter.java
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package java.util.regex;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/*
+ * Used to make {@code String.split} fast (and to help {@code Pattern.split} too).
+ * @hide
+ */
+public class Splitter {
+ // The RI allows regular expressions beginning with ] or }, but that's probably a bug.
+ private static final String METACHARACTERS = "\\?*+[](){}^$.|";
+
+ private Splitter() {
+ }
+
+ /**
+ * Returns a result equivalent to {@code s.split(separator, limit)} if it's able
+ * to compute it more cheaply than ICU, or null if the caller should fall back to
+ * using ICU.
+ */
+ public static String[] fastSplit(String re, String input, int limit) {
+ // Can we do it cheaply?
+ int len = re.length();
+ if (len == 0) {
+ return null;
+ }
+ char ch = re.charAt(0);
+ if (len == 1 && METACHARACTERS.indexOf(ch) == -1) {
+ // We're looking for a single non-metacharacter. Easy.
+ } else if (len == 2 && ch == '\\') {
+ // We're looking for a quoted character.
+ // Quoted metacharacters are effectively single non-metacharacters.
+ ch = re.charAt(1);
+ if (METACHARACTERS.indexOf(ch) == -1) {
+ return null;
+ }
+ } else {
+ return null;
+ }
+
+ // We can do this cheaply...
+
+ // Unlike Perl, which considers the result of splitting the empty string to be the empty
+ // array, Java returns an array containing the empty string.
+ if (input.isEmpty()) {
+ return new String[] { "" };
+ }
+
+ // Collect text preceding each occurrence of the separator, while there's enough space.
+ ArrayList<String> list = new ArrayList<String>();
+ int maxSize = limit <= 0 ? Integer.MAX_VALUE : limit;
+ int begin = 0;
+ int end;
+ while ((end = input.indexOf(ch, begin)) != -1 && list.size() + 1 < maxSize) {
+ list.add(input.substring(begin, end));
+ begin = end + 1;
+ }
+ return finishSplit(list, input, begin, maxSize, limit);
+ }
+
+ public static String[] split(Pattern pattern, String re, String input, int limit) {
+ String[] fastResult = fastSplit(re, input, limit);
+ if (fastResult != null) {
+ return fastResult;
+ }
+
+ // Unlike Perl, which considers the result of splitting the empty string to be the empty
+ // array, Java returns an array containing the empty string.
+ if (input.isEmpty()) {
+ return new String[] { "" };
+ }
+
+ // Collect text preceding each occurrence of the separator, while there's enough space.
+ ArrayList<String> list = new ArrayList<String>();
+ int maxSize = limit <= 0 ? Integer.MAX_VALUE : limit;
+ Matcher matcher = new Matcher(pattern, input);
+ int begin = 0;
+ while (matcher.find() && list.size() + 1 < maxSize) {
+ list.add(input.substring(begin, matcher.start()));
+ begin = matcher.end();
+ }
+ return finishSplit(list, input, begin, maxSize, limit);
+ }
+
+ private static String[] finishSplit(List<String> list, String input, int begin, int maxSize, int limit) {
+ // Add trailing text if enough space.
+ if (list.size() < maxSize) {
+ if (begin < input.length()) {
+ list.add(input.substring(begin));
+ } else {
+ list.add("");
+ }
+ }
+ // Remove trailing empty matches in the limit == 0 case.
+ if (limit == 0) {
+ int i = list.size() - 1;
+ while (i >= 0 && "".equals(list.get(i))) {
+ list.remove(i);
+ i--;
+ }
+ }
+ // Convert to an array.
+ return list.toArray(new String[list.size()]);
+ }
+}