summaryrefslogtreecommitdiffstats
path: root/luni/src
diff options
context:
space:
mode:
authorElliott Hughes <enh@google.com>2010-05-10 18:21:25 -0700
committerElliott Hughes <enh@google.com>2010-05-11 16:23:40 -0700
commit5f37da05bb48298568f8abd7c97c3d11552e1867 (patch)
tree2393b2e6f19d2a1ecf7feaae6317b93b8ac394f8 /luni/src
parentd6f5e66e31388c2777da33c30fb2194ff5b427be (diff)
downloadlibcore-5f37da05bb48298568f8abd7c97c3d11552e1867.zip
libcore-5f37da05bb48298568f8abd7c97c3d11552e1867.tar.gz
libcore-5f37da05bb48298568f8abd7c97c3d11552e1867.tar.bz2
Documentation improvements.
Remove a bunch of content-free "package.html" files, and rewrite the Pattern documentation. Change-Id: Ieb4eee940dbbeab21828b8d7b2f172732f9dd6de
Diffstat (limited to 'luni/src')
-rw-r--r--luni/src/main/java/java/io/package.html14
-rw-r--r--luni/src/main/java/java/lang/Character.java50
-rw-r--r--luni/src/main/java/java/lang/String.java103
-rw-r--r--luni/src/main/java/java/lang/annotation/package.html9
-rw-r--r--luni/src/main/java/java/lang/package.html10
-rw-r--r--luni/src/main/java/java/lang/reflect/package.html15
-rw-r--r--luni/src/main/java/java/math/package.html12
-rw-r--r--luni/src/main/java/java/net/package.html8
-rw-r--r--luni/src/main/java/java/nio/channels/package.html9
-rw-r--r--luni/src/main/java/java/nio/channels/spi/package.html7
-rw-r--r--luni/src/main/java/java/nio/charset/package.html14
-rw-r--r--luni/src/main/java/java/nio/charset/spi/package.html7
-rw-r--r--luni/src/main/java/java/nio/package.html15
-rw-r--r--luni/src/main/java/java/text/package.html16
-rw-r--r--luni/src/main/java/java/util/jar/package.html11
-rw-r--r--luni/src/main/java/java/util/logging/package.html9
-rw-r--r--luni/src/main/java/java/util/package.html10
-rw-r--r--luni/src/main/java/java/util/prefs/package.html13
-rw-r--r--luni/src/main/java/java/util/regex/Pattern.java424
-rw-r--r--luni/src/main/java/java/util/regex/package.html881
-rw-r--r--luni/src/main/java/java/util/zip/package.html8
-rw-r--r--luni/src/main/java/javax/sql/package.html8
22 files changed, 338 insertions, 1315 deletions
diff --git a/luni/src/main/java/java/io/package.html b/luni/src/main/java/java/io/package.html
deleted file mode 100644
index d074aa0..0000000
--- a/luni/src/main/java/java/io/package.html
+++ /dev/null
@@ -1,14 +0,0 @@
-<html>
- <body>
- <p>
- Provides input and output facilities by means of streaming,
- file system access and serialization.
- </p>
- <p>
- These classes allow you to get direct access to the file system.
- It also gives the possibility to handle data streams buffered or
- unbuffered. And on top of that there are classes that help to abstract
- data streams as streams of chars, strings or even objects.
- </p>
- </body>
-</html>
diff --git a/luni/src/main/java/java/lang/Character.java b/luni/src/main/java/java/lang/Character.java
index 859b6d6..37410fd 100644
--- a/luni/src/main/java/java/lang/Character.java
+++ b/luni/src/main/java/java/lang/Character.java
@@ -50,7 +50,55 @@ import com.ibm.icu4jni.lang.UCharacter;
* On the Java platform a {@code char} value represents either a single BMP code
* point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type
* is used to represent all Unicode code points.
- *
+ *
+ * <a name="unicode_categories"><h3>Unicode categories</h3></a>
+ * <p>Here's a list of the Unicode character categories and the corresponding Java constant,
+ * grouped semantically to provide a convenient overview. This table is also useful in
+ * conjunction with {@code \p} and {@code \P} in {@link java.util.regex.Pattern regular expressions}.
+ * <span class="datatable">
+ * <style type="text/css">
+ * .datatable td { padding-right: 20px; }
+ * </style>
+ * <p><table>
+ * <tr> <td> Cn </td> <td> Unassigned </td> <td>{@link #UNASSIGNED}</td> </tr>
+ * <tr> <td> Cc </td> <td> Control </td> <td>{@link #CONTROL}</td> </tr>
+ * <tr> <td> Cf </td> <td> Format </td> <td>{@link #FORMAT}</td> </tr>
+ * <tr> <td> Cf </td> <td> Private use </td> <td>{@link #PRIVATE_USE}</td> </tr>
+ * <tr> <td> Cf </td> <td> Surrogate </td> <td>{@link #SURROGATE}</td> </tr>
+ * <tr> <td><br></td> </tr>
+ * <tr> <td> Lu </td> <td> Uppercase letter </td> <td>{@link #UPPERCASE_LETTER}</td> </tr>
+ * <tr> <td> Ll </td> <td> Lowercase letter </td> <td>{@link #LOWERCASE_LETTER}</td> </tr>
+ * <tr> <td> Lt </td> <td> Titlecase letter </td> <td>{@link #TITLECASE_LETTER}</td> </tr>
+ * <tr> <td> Lm </td> <td> Modifier letter </td> <td>{@link #MODIFIER_LETTER}</td> </tr>
+ * <tr> <td> Lo </td> <td> Other letter </td> <td>{@link #OTHER_LETTER}</td> </tr>
+ * <tr> <td><br></td> </tr>
+ * <tr> <td> Mn </td> <td> Non-spacing mark </td> <td>{@link #NON_SPACING_MARK}</td> </tr>
+ * <tr> <td> Me </td> <td> Enclosing mark </td> <td>{@link #ENCLOSING_MARK}</td> </tr>
+ * <tr> <td> Mc </td> <td> Combining spacing mark </td> <td>{@link #COMBINING_SPACING_MARK}</td> </tr>
+ * <tr> <td><br></td> </tr>
+ * <tr> <td> Nd </td> <td> Decimal digit number </td> <td>{@link #DECIMAL_DIGIT_NUMBER}</td> </tr>
+ * <tr> <td> Nl </td> <td> Letter number </td> <td>{@link #LETTER_NUMBER}</td> </tr>
+ * <tr> <td> No </td> <td> Other number </td> <td>{@link #OTHER_NUMBER}</td> </tr>
+ * <tr> <td><br></td> </tr>
+ * <tr> <td> Pd </td> <td> Dash punctuation </td> <td>{@link #DASH_PUNCTUATION}</td> </tr>
+ * <tr> <td> Ps </td> <td> Start punctuation </td> <td>{@link #START_PUNCTUATION}</td> </tr>
+ * <tr> <td> Pe </td> <td> End punctuation </td> <td>{@link #END_PUNCTUATION}</td> </tr>
+ * <tr> <td> Pc </td> <td> Connector punctuation </td> <td>{@link #CONNECTOR_PUNCTUATION}</td> </tr>
+ * <tr> <td> Pi </td> <td> Initial quote punctuation </td> <td>{@link #INITIAL_QUOTE_PUNCTUATION}</td> </tr>
+ * <tr> <td> Pf </td> <td> Final quote punctuation </td> <td>{@link #FINAL_QUOTE_PUNCTUATION}</td> </tr>
+ * <tr> <td> Po </td> <td> Other punctuation </td> <td>{@link #OTHER_PUNCTUATION}</td> </tr>
+ * <tr> <td><br></td> </tr>
+ * <tr> <td> Sm </td> <td> Math symbol </td> <td>{@link #MATH_SYMBOL}</td> </tr>
+ * <tr> <td> Sc </td> <td> Currency symbol </td> <td>{@link #CURRENCY_SYMBOL}</td> </tr>
+ * <tr> <td> Sk </td> <td> Modifier symbol </td> <td>{@link #MODIFIER_SYMBOL}</td> </tr>
+ * <tr> <td> So </td> <td> Other symbol </td> <td>{@link #OTHER_SYMBOL}</td> </tr>
+ * <tr> <td><br></td> </tr>
+ * <tr> <td> Zs </td> <td> Space separator </td> <td>{@link #SPACE_SEPARATOR}</td> </tr>
+ * <tr> <td> Zl </td> <td> Line separator </td> <td>{@link #LINE_SEPARATOR}</td> </tr>
+ * <tr> <td> Zp </td> <td> Paragraph separator </td> <td>{@link #PARAGRAPH_SEPARATOR}</td> </tr>
+ * </table>
+ * </span>
+ *
* @since 1.0
*/
public final class Character implements Serializable, Comparable<Character> {
diff --git a/luni/src/main/java/java/lang/String.java b/luni/src/main/java/java/lang/String.java
index c7236d5..4c26585 100644
--- a/luni/src/main/java/java/lang/String.java
+++ b/luni/src/main/java/java/lang/String.java
@@ -1958,105 +1958,100 @@ public final class String implements Serializable, Comparable<String>, CharSeque
}
/**
- * Determines whether this string matches a given regular expression.
+ * Tests whether this string matches the given {@code regularExpression}. This method returns
+ * true only if the regular expression matches the <i>entire</i> input string. A common mistake is
+ * to assume that this method behaves like {@link #contains}; if you want to match anywhere
+ * within the input string, you need to add {@code .*} to the beginning and end of your
+ * regular expression. See {@link Pattern#matches}.
+ *
+ * <p>If the same regular expression is to be used for multiple operations, it may be more
+ * efficient to reuse a compiled {@code Pattern}.
*
- * @param expr
- * the regular expression to be matched.
- * @return {@code true} if the expression matches, otherwise {@code false}.
* @throws PatternSyntaxException
* if the syntax of the supplied regular expression is not
* valid.
- * @throws NullPointerException
- * if {@code expr} is {@code null}.
+ * @throws NullPointerException if {@code regularExpression == null}
* @since 1.4
*/
- public boolean matches(String expr) {
- return Pattern.matches(expr, this);
+ public boolean matches(String regularExpression) {
+ return Pattern.matches(regularExpression, this);
}
/**
- * Replace any substrings within this string that match the supplied regular
- * expression {@code expr}, with the string {@code substitute}.
+ * Replaces all matches for {@code regularExpression} within this string with the given
+ * {@code replacement}.
+ * See {@link Pattern} for regular expression syntax.
+ *
+ * <p>If the same regular expression is to be used for multiple operations, it may be more
+ * efficient to reuse a compiled {@code Pattern}.
*
- * @param expr
- * the regular expression to match.
- * @param substitute
- * the string to replace the matching substring with.
- * @return the new string.
* @throws PatternSyntaxException
* if the syntax of the supplied regular expression is not
* valid.
+ * @throws NullPointerException if {@code regularExpression == null}
* @see Pattern
* @since 1.4
*/
- public String replaceAll(String expr, String substitute) {
- return Pattern.compile(expr).matcher(this).replaceAll(substitute);
+ public String replaceAll(String regularExpression, String replacement) {
+ return Pattern.compile(regularExpression).matcher(this).replaceAll(replacement);
}
/**
- * Replace the first substring within this string that matches the supplied
- * regular expression {@code expr}, with the string {@code substitute}.
+ * Replaces the first match for {@code regularExpression} within this string with the given
+ * {@code replacement}.
+ * See {@link Pattern} for regular expression syntax.
+ *
+ * <p>If the same regular expression is to be used for multiple operations, it may be more
+ * efficient to reuse a compiled {@code Pattern}.
*
- * @param expr
- * the regular expression to match.
- * @param substitute
- * the string to replace the matching substring with.
- * @return the new string.
* @throws PatternSyntaxException
* if the syntax of the supplied regular expression is not
* valid.
- * @throws NullPointerException
- * if {@code strbuf} is {@code null}.
+ * @throws NullPointerException if {@code regularExpression == null}
* @see Pattern
* @since 1.4
*/
- public String replaceFirst(String expr, String substitute) {
- return Pattern.compile(expr).matcher(this).replaceFirst(substitute);
+ public String replaceFirst(String regularExpression, String replacement) {
+ return Pattern.compile(regularExpression).matcher(this).replaceFirst(replacement);
}
/**
- * Splits this string using the supplied regular expression {@code expr},
- * as if by {@code split(expr, 0)}.
+ * Splits this string using the supplied {@code regularExpression}.
+ * Equivalent to {@code split(regularExpression, 0)}.
+ * See {@link Pattern#split(CharSequence, int)} for an explanation of {@code limit}.
+ * See {@link Pattern} for regular expression syntax.
*
- * @param expr
- * the regular expression used to divide the string.
- * @return an array of Strings created by separating the string along
- * matches of the regular expression.
- * @throws NullPointerException
- * if {@code expr} is {@code null}.
+ * <p>If the same regular expression is to be used for multiple operations, it may be more
+ * efficient to reuse a compiled {@code Pattern}.
+ *
+ * @throws NullPointerException if {@code regularExpression == null}
* @throws PatternSyntaxException
* if the syntax of the supplied regular expression is not
* valid.
* @see Pattern
* @since 1.4
*/
- public String[] split(String expr) {
- return split(expr, 0);
+ public String[] split(String regularExpression) {
+ return split(regularExpression, 0);
}
/**
- * Splits this string using the supplied regular expression {@code expr}.
- * The parameter {@code max} controls the behavior how many times the
- * pattern is applied to the string; see {@link Pattern#split(CharSequence, int)}
- * for details.
+ * Splits this string using the supplied {@code regularExpression}.
+ * See {@link Pattern#split(CharSequence, int)} for an explanation of {@code limit}.
+ * See {@link Pattern} for regular expression syntax.
*
- * @param expr
- * the regular expression used to divide the string.
- * @param max
- * the number of entries in the resulting array.
- * @return an array of Strings created by separating the string along
- * matches of the regular expression.
- * @throws NullPointerException
- * if {@code expr} is {@code null}.
+ * <p>If the same regular expression is to be used for multiple operations, it may be more
+ * efficient to reuse a compiled {@code Pattern}.
+ *
+ * @throws NullPointerException if {@code regularExpression == null}
* @throws PatternSyntaxException
* if the syntax of the supplied regular expression is not
* valid.
- * @see Pattern#split(CharSequence, int)
* @since 1.4
*/
- public String[] split(String expr, int max) {
- String[] result = java.util.regex.Splitter.fastSplit(expr, this, max);
- return result != null ? result : Pattern.compile(expr).split(this, max);
+ public String[] split(String regularExpression, int limit) {
+ String[] result = java.util.regex.Splitter.fastSplit(regularExpression, this, limit);
+ return result != null ? result : Pattern.compile(regularExpression).split(this, limit);
}
/**
diff --git a/luni/src/main/java/java/lang/annotation/package.html b/luni/src/main/java/java/lang/annotation/package.html
deleted file mode 100644
index 5a150b3..0000000
--- a/luni/src/main/java/java/lang/annotation/package.html
+++ /dev/null
@@ -1,9 +0,0 @@
-<html>
- <body>
- <p>
- Defines interfaces and exceptions necessary for annotation support. Also
- provides some predefined annotations that are used throughout the Android
- libraries.
- </p>
- </body>
-</html>
diff --git a/luni/src/main/java/java/lang/package.html b/luni/src/main/java/java/lang/package.html
deleted file mode 100644
index 8075edb..0000000
--- a/luni/src/main/java/java/lang/package.html
+++ /dev/null
@@ -1,10 +0,0 @@
-<html>
- <body>
- <p>
- Provides core classes of the Android environment. This includes
- {@link java.lang.Object}, the ultimate ancestor of all classes,
- as well as several other classes which represent important data types or
- central aspects of the environment hosting the application.
- </p>
- </body>
-</html>
diff --git a/luni/src/main/java/java/lang/reflect/package.html b/luni/src/main/java/java/lang/reflect/package.html
deleted file mode 100644
index ec457e2..0000000
--- a/luni/src/main/java/java/lang/reflect/package.html
+++ /dev/null
@@ -1,15 +0,0 @@
-<html>
- <body>
- <p>
- Provides reflective run-time access to the properties of a class, which
- allows such things as querying or modifying an objects's field by its name
- or invoking a method by name.
- </p>
-
- <p>
- The code {@link java.lang.Class} class serves as the entry-point to
- reflection. The classes in this package represent the various language
- elements.
- </p>
- </body>
-</html>
diff --git a/luni/src/main/java/java/math/package.html b/luni/src/main/java/java/math/package.html
deleted file mode 100644
index 4c9ba9f..0000000
--- a/luni/src/main/java/java/math/package.html
+++ /dev/null
@@ -1,12 +0,0 @@
-<html>
- <body>
- Provides arbitrary-precision integers and decimals.
- Class {@link java.math.BigInteger} provides integers which are limited
- by the available memory only.
- Class {@link java.math.BigDecimal} provides arbitrary-precision signed
- decimal numbers. These numbers are suitable for currency calculations.
- The user has full control over the rounding behavior (comparable with
- the IEEE754R rounding modes).
- <p>
- </body>
-</html>
diff --git a/luni/src/main/java/java/net/package.html b/luni/src/main/java/java/net/package.html
deleted file mode 100644
index dff1ccd..0000000
--- a/luni/src/main/java/java/net/package.html
+++ /dev/null
@@ -1,8 +0,0 @@
-<html>
- <body>
- <p>
- Provides networking-related functionality, such as streaming and datagram
- sockets, handling of Internet addresses, and dealing with HTTP requests.
- </p>
- </body>
-</html>
diff --git a/luni/src/main/java/java/nio/channels/package.html b/luni/src/main/java/java/nio/channels/package.html
deleted file mode 100644
index c16c811..0000000
--- a/luni/src/main/java/java/nio/channels/package.html
+++ /dev/null
@@ -1,9 +0,0 @@
-<html>
- <body>
- <p>
- Channels provide a way to connect to sources of data such as
- files, sockets or other structures that allow input and/or output of
- data. Selectors support multiplexing of non-blocking channels.
- </p>
- </body>
-</html>
diff --git a/luni/src/main/java/java/nio/channels/spi/package.html b/luni/src/main/java/java/nio/channels/spi/package.html
deleted file mode 100644
index e7b8a49..0000000
--- a/luni/src/main/java/java/nio/channels/spi/package.html
+++ /dev/null
@@ -1,7 +0,0 @@
-<html>
- <body>
- <p>
- Service-provider classes for nio channels.
- </p>
- </body>
-</html>
diff --git a/luni/src/main/java/java/nio/charset/package.html b/luni/src/main/java/java/nio/charset/package.html
deleted file mode 100644
index 6554010..0000000
--- a/luni/src/main/java/java/nio/charset/package.html
+++ /dev/null
@@ -1,14 +0,0 @@
-<html>
- <body>
- <p>
- This package provides translation services between bytes and different
- character sets.
- </p>
- <p>
- An encoder translates characters into bytes and a decoder can
- translate a byte stream into characters. With a charset you can create a
- de-/encoder pair that can be used to translate a byte stream. With the
- service provider package it is possible to use your own charsets.
- </p>
- </body>
-</html>
diff --git a/luni/src/main/java/java/nio/charset/spi/package.html b/luni/src/main/java/java/nio/charset/spi/package.html
deleted file mode 100644
index 4e58391..0000000
--- a/luni/src/main/java/java/nio/charset/spi/package.html
+++ /dev/null
@@ -1,7 +0,0 @@
-<html>
- <body>
- <p>
- Service-provider class for nio charset.
- </p>
- </body>
-</html>
diff --git a/luni/src/main/java/java/nio/package.html b/luni/src/main/java/java/nio/package.html
deleted file mode 100644
index b521b67..0000000
--- a/luni/src/main/java/java/nio/package.html
+++ /dev/null
@@ -1,15 +0,0 @@
-<html>
- <body>
- <p>
- Provides buffers that help handling data.
- </p>
- <p>
- There are buffers for most primitive data types such as
- <code>FloatBuffer</code>, <code>IntBuffer</code>, etc. These classes
- provide methods to get and put data from the
- buffers, to compact, slice or duplicate them, or to wrap an existing
- array. Buffers also manage the position of the current element in the
- buffer, they can be rewound to the beginning and allow skipping of elements.
- </p>
- </body>
-</html>
diff --git a/luni/src/main/java/java/text/package.html b/luni/src/main/java/java/text/package.html
deleted file mode 100644
index b3f387a..0000000
--- a/luni/src/main/java/java/text/package.html
+++ /dev/null
@@ -1,16 +0,0 @@
-<html>
- <body>
- <p>
- The java.text package offers internationalization and localization
- facilities.
- </p>
- <p>
- By using the classes in this package, it is possible to write the
- application in an internationalized way. The benefit of this is that a new
- localization can be provided at any time without having to change the
- code. Support for localization is given for numbers, messages, dates and
- other characteristics of a language like the directionality, sorting order
- or enumeration of characters, words or lines.
- </p>
-</body>
-</html>
diff --git a/luni/src/main/java/java/util/jar/package.html b/luni/src/main/java/java/util/jar/package.html
deleted file mode 100644
index 21103cc..0000000
--- a/luni/src/main/java/java/util/jar/package.html
+++ /dev/null
@@ -1,11 +0,0 @@
-<html>
- <body>
- <p>
- The java.jar package gives access to reading and writing a Java archive,
- or JAR, files. These are
- actually ZIP files with the possibility to add meta-information in the
- form of a MANIFEST file. This manifest can also be used
- to sign a JAR file.
- </p>
- </body>
-</html>
diff --git a/luni/src/main/java/java/util/logging/package.html b/luni/src/main/java/java/util/logging/package.html
deleted file mode 100644
index d7ed252..0000000
--- a/luni/src/main/java/java/util/logging/package.html
+++ /dev/null
@@ -1,9 +0,0 @@
-<html>
- <body>
- <p>
- This package allows to add logging to any application. It
- supports different levels of importance of a message that needs to be
- logged. The output written to the target can be filtered by this level.
- </p>
- </body>
-</html>
diff --git a/luni/src/main/java/java/util/package.html b/luni/src/main/java/java/util/package.html
deleted file mode 100644
index 3656147..0000000
--- a/luni/src/main/java/java/util/package.html
+++ /dev/null
@@ -1,10 +0,0 @@
-<html>
- <body>
- <p>
- Provides an extensive set of utility classes. This encompasses things
- such as basic container data structures (various forms of lists, sets,
- and maps), classes for dealing with date and time, String-handling, formatting,
- localization, and scheduling repeated tasks.
- </p>
- </body>
-</html>
diff --git a/luni/src/main/java/java/util/prefs/package.html b/luni/src/main/java/java/util/prefs/package.html
deleted file mode 100644
index 41cd12c..0000000
--- a/luni/src/main/java/java/util/prefs/package.html
+++ /dev/null
@@ -1,13 +0,0 @@
-<html>
- <body>
- <p>
- This package provides a preferences mechanism, that is, a means of writing
- configuration data (key/value pairs) to a persistent data store and
- retrieving it from there. There are two different kinds of stores
- available: one for storing user data and one for storing system
- configuration data. Since the underlying implementation is dependent
- on the operating system, this package is designed to allow the installation
- of a custom service provider implementation.
- </p>
- </body>
-</html>
diff --git a/luni/src/main/java/java/util/regex/Pattern.java b/luni/src/main/java/java/util/regex/Pattern.java
index 325e3e0..49edd40 100644
--- a/luni/src/main/java/java/util/regex/Pattern.java
+++ b/luni/src/main/java/java/util/regex/Pattern.java
@@ -16,40 +16,204 @@
package java.util.regex;
+import java.io.IOException;
+import java.io.ObjectInputStream;
import java.io.Serializable;
import java.util.ArrayList;
import com.ibm.icu4jni.regex.NativeRegEx;
/**
- * Represents a pattern used for matching, searching, or replacing strings.
- * {@code Pattern}s are specified in terms of regular expressions and compiled
- * using an instance of this class. They are then used in conjunction with a
- * {@link Matcher} to perform the actual search.
- * <p/>
- * A typical use case looks like this:
- * <p/>
- * <pre>
- * Pattern p = Pattern.compile("Hello, A[a-z]*!");
- *
- * Matcher m = p.matcher("Hello, Android!");
- * boolean b1 = m.matches(); // true
- *
- * m.setInput("Hello, Robot!");
- * boolean b2 = m.matches(); // false
- * </pre>
- * <p/>
- * The above code could also be written in a more compact fashion, though this
- * variant is less efficient, since {@code Pattern} and {@code Matcher} objects
- * are created on the fly instead of being reused.
- * fashion:
+ * Patterns are compiled regular expressions. In many cases, convenience methods such as
+ * {@link String#matches String.matches}, {@link String#replaceAll String.replaceAll} and
+ * {@link String#split String.split} will be preferable, but if you need to do a lot of work
+ * with the same regular expression, it may be more efficient to compile it once and reuse it.
+ * The {@code Pattern} class and its companion, {@link Matcher}, are also a lot more powerful
+ * than the small amount of functionality exposed by {@code String}.
+ *
* <pre>
- * boolean b1 = Pattern.matches("Hello, A[a-z]*!", "Hello, Android!"); // true
- * boolean b2 = Pattern.matches("Hello, A[a-z]*!", "Hello, Robot!"); // false
+ * // String convenience methods:
+ * boolean sawFailures = s.matches("Failures: \d+");
+ * String farewell = s.replaceAll("Hello, (\S+)", "Goodbye, $1");
+ * String[] fields = s.split(":");
+ *
+ * // Direct use of Pattern:
+ * Pattern p = Pattern.compile("Hello, (\S+)");
+ * Matcher m = p.matcher(inputString);
+ * while (m.find()) { // Find each match in turn; String can't do this.
+ * String name = m.group(1); // Access a submatch group; String can't do this.
+ * }
* </pre>
- * <p/>
- * Please consult the <a href="package-descr.html">package documentation</a> for an
- * overview of the regular expression syntax used in this class as well as
- * Android-specific implementation details.
+ *
+ * <h3>Regular expression syntax</h3>
+ * <span class="datatable">
+ * <style type="text/css">
+ * .datatable td { padding-right: 20px; }
+ * </style>
+ *
+ * <p>Java supports a subset of Perl 5 regular expression syntax. An important gotcha is that Java
+ * has no regular expression literals, and uses plain old string literals instead. This means that
+ * you need an extra level of escaping. For example, the regular expression {@code \s+} has to
+ * be represented as the string {@code "\\s+"}.
+ *
+ * <h3>Escape sequences</h3>
+ * <p><table>
+ * <tr> <td> \ </td> <td>Quote the following metacharacter (so {@code \.} matches a literal {@code .}).</td> </tr>
+ * <tr> <td> \Q </td> <td>Quote all following metacharacters until {@code \E}.</td> </tr>
+ * <tr> <td> \E </td> <td>Stop quoting metacharacters (started by {@code \Q}).</td> </tr>
+ * <tr> <td> \\ </td> <td>A literal backslash.</td> </tr>
+ * <tr> <td> &#x005c;<i>hhhh</i> </td> <td>The Unicode character U+hhhh (in hex).</td> </tr>
+ * <tr> <td> \c<i>x</i> </td> <td>The ASCII control character <i>x</i> (so {@code \cI} would be U+0009).</td> </tr>
+ *
+ * <tr> <td> \a </td> <td>The ASCII bell character (U+0007).</td> </tr>
+ * <tr> <td> \e </td> <td>The ASCII ESC character (U+001b).</td> </tr>
+ * <tr> <td> \f </td> <td>The ASCII form feed character (U+000c).</td> </tr>
+ * <tr> <td> \n </td> <td>The ASCII newline character (U+000a).</td> </tr>
+ * <tr> <td> \r </td> <td>The ASCII carriage return character (U+000d).</td> </tr>
+ * <tr> <td> \t </td> <td>The ASCII tab character (U+0009).</td> </tr>
+ * </table>
+ *
+ * <h3>Character classes</h3>
+ * <p>It's possible to construct arbitrary character classes using set operations:
+ * <table>
+ * <tr> <td> [abc] </td> <td>Any one of {@code a}, {@code b}, or {@code c}. (Enumeration.)</td> </tr>
+ * <tr> <td> [a-c] </td> <td>Any one of {@code a}, {@code b}, or {@code c}. (Range.)</td> </tr>
+ * <tr> <td> [^abc] </td> <td>Any character <i>except</i> {@code a}, {@code b}, or {@code c}. (Negation.)</td> </tr>
+ * <tr> <td> [[a-f][0-9]] </td> <td>Any character in either range. (Union.)</td> </tr>
+ * <tr> <td> [[a-z]&&[jkl]] </td> <td>Any character in both ranges. (Intersection.)</td> </tr>
+ * </table>
+ * <p>Most of the time, the built-in character classes are more useful:
+ * <table>
+ * <tr> <td> \d </td> <td>Any digit character.</td> </tr>
+ * <tr> <td> \D </td> <td>Any non-digit character.</td> </tr>
+ * <tr> <td> \s </td> <td>Any whitespace character.</td> </tr>
+ * <tr> <td> \S </td> <td>Any non-whitespace character.</td> </tr>
+ * <tr> <td> \w </td> <td>Any word character.</td> </tr>
+ * <tr> <td> \W </td> <td>Any non-word character.</td> </tr>
+ * <tr> <td> \p{<i>NAME</i>} </td> <td> Any character in the class with the given <i>NAME</i>. </td> </tr>
+ * <tr> <td> \P{<i>NAME</i>} </td> <td> Any character <i>not</i> in the named class. </td> </tr>
+ * </table>
+ * <p>There are a variety of named classes:
+ * <ul>
+ * <li><a href="../../lang/Character.html#unicode_categories">Unicode category names</a>,
+ * prefixed by {@code Is}. For example {@code \p{IsLu}} for all uppercase letters.
+ * <li>POSIX class names. These are 'Alnum', 'Alpha', 'ASCII', 'Blank', 'Cntrl', 'Digit',
+ * 'Graph', 'Lower', 'Print', 'Punct', 'Upper', 'XDigit'.
+ * <li>Unicode block names, as used by {@link java.lang.Character.UnicodeBlock#forName} prefixed
+ * by {@code In}. For example {@code \p{InHebrew}} for all characters in the Hebrew block.
+ * <li>Character method names. These are all non-deprecated methods from {@link java.lang.Character}
+ * whose name starts with {@code is}, but with the {@code is} replaced by {@code java}.
+ * For example, {@code \p{javaLowerCase}}.
+ * </ul>
+ *
+ * <h3>Quantifiers</h3>
+ * <p>Quantifiers match some number of instances of the preceding regular expression.
+ * <table>
+ * <tr> <td> * </td> <td>Zero or more.</td> </tr>
+ * <tr> <td> ? </td> <td>Zero or one.</td> </tr>
+ * <tr> <td> + </td> <td>One or more.</td> </tr>
+ * <tr> <td> {<i>n</i>} </td> <td>Exactly <i>n</i>.</td> </tr>
+ * <tr> <td> {<i>n,</i>} </td> <td>At least <i>n</i>.</td> </tr>
+ * <tr> <td> {<i>n</i>,<i>m</i>} </td> <td>At least <i>n</i> but not more than <i>m</i>.</td> </tr>
+ * </table>
+ * <p>Quantifiers are "greedy" by default, meaning that they will match the longest possible input
+ * sequence. There are also non-greedy quantifiers that match the shortest possible input sequence.
+ * They're same as the greedy ones but with a trailing {@code ?}:
+ * <table>
+ * <tr> <td> *? </td> <td>Zero or more (non-greedy).</td> </tr>
+ * <tr> <td> ?? </td> <td>Zero or one (non-greedy).</td> </tr>
+ * <tr> <td> +? </td> <td>One or more (non-greedy).</td> </tr>
+ * <tr> <td> {<i>n</i>}? </td> <td>Exactly <i>n</i> (non-greedy).</td> </tr>
+ * <tr> <td> {<i>n,</i>}? </td> <td>At least <i>n</i> (non-greedy).</td> </tr>
+ * <tr> <td> {<i>n</i>,<i>m</i>}? </td> <td>At least <i>n</i> but not more than <i>m</i> (non-greedy).</td> </tr>
+ * </table>
+ * <p>Quantifiers allow backtracking by default. There are also possessive quantifiers to prevent
+ * backtracking. They're same as the greedy ones but with a trailing {@code +}:
+ * <table>
+ * <tr> <td> *+ </td> <td>Zero or more (possessive).</td> </tr>
+ * <tr> <td> ?+ </td> <td>Zero or one (possessive).</td> </tr>
+ * <tr> <td> ++ </td> <td>One or more (possessive).</td> </tr>
+ * <tr> <td> {<i>n</i>}+ </td> <td>Exactly <i>n</i> (possessive).</td> </tr>
+ * <tr> <td> {<i>n,</i>}+ </td> <td>At least <i>n</i> (possessive).</td> </tr>
+ * <tr> <td> {<i>n</i>,<i>m</i>}+ </td> <td>At least <i>n</i> but not more than <i>m</i> (possessive).</td> </tr>
+ * </table>
+ *
+ * <h3>Zero-width assertions</h3>
+ * <p><table>
+ * <tr> <td> ^ </td> <td>At beginning of line.</td> </tr>
+ * <tr> <td> $ </td> <td>At end of line.</td> </tr>
+ * <tr> <td> \A </td> <td>At beginning of input.</td> </tr>
+ * <tr> <td> \b </td> <td>At word boundary.</td> </tr>
+ * <tr> <td> \B </td> <td>At non-word boundary.</td> </tr>
+ * <tr> <td> \G </td> <td>At end of previous match.</td> </tr>
+ * <tr> <td> \z </td> <td>At end of input.</td> </tr>
+ * <tr> <td> \Z </td> <td>At end of input, or before newline at end.</td> </tr>
+ * </table>
+ *
+ * <h3>Look-around assertions</h3>
+ * <p>Look-around assertions assert that the subpattern does (positive) or doesn't (negative) match
+ * after (look-ahead) or before (look-behind) the current position, without including the matched
+ * text in the containing match. The maximum length of possible matches for look-behind patterns
+ * must not be unbounded.
+ * <p><table>
+ * <tr> <td> (?=<i>a</i>) </td> <td>Zero-width positive look-ahead.</td> </tr>
+ * <tr> <td> (?!<i>a</i>) </td> <td>Zero-width negative look-ahead.</td> </tr>
+ * <tr> <td> (?&lt;=<i>a</i>) </td> <td>Zero-width positive look-behind.</td> </tr>
+ * <tr> <td> (?&lt;!<i>a</i>) </td> <td>Zero-width negative look-behind.</td> </tr>
+ * </table>
+ *
+ * <h3>Groups</h3>
+ *
+ * <p><table>
+ * <tr> <td> (<i>a</i>) </td> <td>A capturing group.</td> </tr>
+ * <tr> <td> (?:<i>a</i>) </td> <td>A non-capturing group.</td> </tr>
+ * <tr> <td> (?&gt;<i>a</i>) </td> <td>An independent non-capturing group. (The first match of the subgroup is the only match tried.)</td> </tr>
+ * <tr> <td> \<i>n</i> </td> <td>The text already matched by capturing group <i>n</i>.</td> </tr>
+ * </table>
+ * <p>Explicit capturing groups are numbered from 1, and available via {@link Matcher#group}.
+ * Group 0 represents the whole match.
+ *
+ * <h3>Operators</h3>
+ * <p><table>
+ * <tr> <td> <i>ab</i> </td> <td>Expression <i>a</i> followed by expression <i>b</i>.</td> </tr>
+ * <tr> <td> <i>a</i>|<i>b</i> </td> <td>Either expression <i>a</i> or expression <i>b</i>.</td> </tr>
+ * </table>
+ *
+ * <a name="flags"><h3>Flags</h3></a>
+ * <p><table>
+ * <tr> <td> (?dimsux-dimsux:<i>a</i>) </td> <td>Evaluates the expression <i>a</i> with the given flags enabled/disabled.</td> </tr>
+ * <tr> <td> (?dimsux-dimsux) </td> <td>Evaluates the rest of the pattern with the given flags enabled/disabled.</td> </tr>
+ * </table>
+ *
+ * <p>The flags are:
+ * <table>
+ * <tr><td>{@code i}</td> <td>{@link #CASE_INSENSITIVE}</td> <td>case insensitive matching</td></tr>
+ * <tr><td>{@code d}</td> <td>{@link #UNIX_LINES}</td> <td>only accept {@code '\n'} as a line terminator</td></tr>
+ * <tr><td>{@code m}</td> <td>{@link #MULTILINE}</td> <td>allow {@code ^} and {@code $} to match beginning/end of any line</td></tr>
+ * <tr><td>{@code s}</td> <td>{@link #DOTALL}</td> <td>allow {@code .} to match {@code '\n'} ("s" for "single line")</td></tr>
+ * <tr><td>{@code u}</td> <td>{@link #UNICODE_CASE}</td> <td>enable Unicode case folding</td></tr>
+ * <tr><td>{@code x}</td> <td>{@link #COMMENTS}</td> <td>allow whitespace and comments</td></tr>
+ * </table>
+ * <p>Either set of flags may be empty. For example, {@code (?i-m)} would turn on case-insensitivity
+ * and turn off multiline mode, {@code (?i)} would just turn on case-insensitivity,
+ * and {@code (?-m)} would just turn off multiline mode.
+ * <p>Note that on Android, {@code UNICODE_CASE} is always on: case-insensitive matching will
+ * always be Unicode-aware.
+ * <p>There are two other flags not settable via this mechanism: {@link #CANON_EQ} and
+ * {@link #LITERAL}. Attempts to use {@link #CANON_EQ} on Android will throw an exception.
+ * </span>
+ *
+ * <h3>Implementation notes</h3>
+ *
+ * The regular expression implementation used in Android is provided by
+ * <a href="http://www.icu-project.org">ICU</a>. The notation for the regular
+ * expressions is mostly a superset of those used in other Java language
+ * implementations. This means that existing applications will normally work as
+ * expected, but in rare cases Android may accept a regular expression that is
+ * not accepted by other implementations.
+ *
+ * <p>In some cases, Android will recognize that a regular expression is a simple
+ * special case that can be handled more efficiently. This is true of both the convenience methods
+ * in {@code String} and the methods in {@code Pattern}.
*
* @see Matcher
*/
@@ -59,34 +223,28 @@ public final class Pattern implements Serializable {
/**
* This constant specifies that a pattern matches Unix line endings ('\n')
- * only against the '.', '^', and '$' meta characters.
+ * only against the '.', '^', and '$' meta characters. Corresponds to {@code (?d)}.
*/
public static final int UNIX_LINES = 0x01;
/**
* This constant specifies that a {@code Pattern} is matched
* case-insensitively. That is, the patterns "a+" and "A+" would both match
- * the string "aAaAaA".
- * <p>
- * Note: For Android, the {@code CASE_INSENSITIVE} constant
- * (currently) always includes the meaning of the {@link #UNICODE_CASE}
- * constant. So if case insensitivity is enabled, this automatically extends
- * to all Unicode characters. The {@code UNICODE_CASE} constant itself has
- * no special consequences.
+ * the string "aAaAaA". See {@link #UNICODE_CASE}. Corresponds to {@code (?i)}.
*/
public static final int CASE_INSENSITIVE = 0x02;
/**
* This constant specifies that a {@code Pattern} may contain whitespace or
* comments. Otherwise comments and whitespace are taken as literal
- * characters.
+ * characters. Corresponds to {@code (?x)}.
*/
public static final int COMMENTS = 0x04;
/**
* This constant specifies that the meta characters '^' and '$' match only
- * the beginning and end end of an input line, respectively. Normally, they
- * match the beginning and the end of the complete input.
+ * the beginning and end of an input line, respectively. Normally, they
+ * match the beginning and the end of the complete input. Corresponds to {@code (?m)}.
*/
public static final int MULTILINE = 0x08;
@@ -99,20 +257,16 @@ public final class Pattern implements Serializable {
/**
* This constant specifies that the '.' meta character matches arbitrary
* characters, including line endings, which is normally not the case.
+ * Corresponds to {@code (?s)}.
*/
public static final int DOTALL = 0x20;
/**
- * This constant specifies that a {@code Pattern} is matched
- * case-insensitively with regard to all Unicode characters. It is used in
- * conjunction with the {@link #CASE_INSENSITIVE} constant to extend its
- * meaning to all Unicode characters.
- * <p>
- * Note: For Android, the {@code CASE_INSENSITIVE} constant
- * (currently) always includes the meaning of the {@code UNICODE_CASE}
- * constant. So if case insensitivity is enabled, this automatically extends
- * to all Unicode characters. The {@code UNICODE_CASE} constant then has no
- * special consequences.
+ * This constant specifies that a {@code Pattern} that uses case-insensitive matching
+ * will use Unicode case folding. On Android, {@code UNICODE_CASE} is always on:
+ * case-insensitive matching will always be Unicode-aware. If your code is intended to
+ * be portable and uses case-insensitive matching on non-ASCII characters, you should
+ * use this flag. Corresponds to {@code (?u)}.
*/
public static final int UNICODE_CASE = 0x40;
@@ -123,15 +277,8 @@ public final class Pattern implements Serializable {
*/
public static final int CANON_EQ = 0x80;
- /**
- * Holds the regular expression.
- */
- private String pattern;
-
- /**
- * Holds the flags used when compiling this pattern.
- */
- private int flags;
+ private final String pattern;
+ private final int flags;
/**
* Holds a handle (a pointer, actually) for the native ICU pattern.
@@ -143,26 +290,20 @@ public final class Pattern implements Serializable {
*/
transient int mGroupCount;
-
/**
- * Returns a {@link Matcher} for the {@code Pattern} and a given input. The
- * {@code Matcher} can be used to match the {@code Pattern} against the
+ * Returns a {@link Matcher} for this pattern applied to the given {@code input}.
+ * The {@code Matcher} can be used to match the {@code Pattern} against the
* whole input, find occurrences of the {@code Pattern} in the input, or
* replace parts of the input.
- *
- * @param input
- * the input to process.
- *
- * @return the resulting {@code Matcher}.
*/
public Matcher matcher(CharSequence input) {
return new Matcher(this, input);
}
/**
- * Splits the given input sequence at occurrences of this {@code Pattern}.
+ * Splits the given {@code input} at occurrences of this pattern.
*
- * <p>If this {@code Pattern} does not occur in the input, the result is an
+ * <p>If this pattern does not occur in the input, the result is an
* array containing the input (converted from a {@code CharSequence} to
* a {@code String}).
*
@@ -192,19 +333,14 @@ public final class Pattern implements Serializable {
}
/**
- * Splits a given input around occurrences of a regular expression. This is
- * a convenience method that is equivalent to calling the method
- * {@link #split(java.lang.CharSequence, int)} with a limit of 0.
+ * Equivalent to {@code split(input, 0)}.
*/
public String[] split(CharSequence input) {
return split(input, 0);
}
/**
- * Returns the regular expression that was compiled into this
- * {@code Pattern}.
- *
- * @return the regular expression.
+ * Returns the regular expression supplied to {@code compile}.
*/
public String pattern() {
return pattern;
@@ -216,44 +352,17 @@ public final class Pattern implements Serializable {
}
/**
- * Returns the flags that have been set for this {@code Pattern}.
- *
- * @return the flags that have been set. A combination of the constants
- * defined in this class.
- *
- * @see #CANON_EQ
- * @see #CASE_INSENSITIVE
- * @see #COMMENTS
- * @see #DOTALL
- * @see #LITERAL
- * @see #MULTILINE
- * @see #UNICODE_CASE
- * @see #UNIX_LINES
+ * Returns the flags supplied to {@code compile}.
*/
public int flags() {
return flags;
}
/**
- * Compiles a regular expression, creating a new {@code Pattern} instance in
- * the process. Allows to set some flags that modify the behavior of the
- * {@code Pattern}.
- *
- * @param pattern
- * the regular expression.
- * @param flags
- * the flags to set. Basically, any combination of the constants
- * defined in this class is valid.
- * <p>
- * Note: Currently, the {@link #CASE_INSENSITIVE} and
- * {@link #UNICODE_CASE} constants have slightly special behavior
- * in Android, and the {@link #CANON_EQ} constant is not
- * supported at all.
+ * Returns a compiled form of the given {@code regularExpression}, as modified by the
+ * given {@code flags}. See the <a href="#flags">flags overview</a> for more on flags.
*
- * @return the new {@code Pattern} instance.
- *
- * @throws PatternSyntaxException
- * if the regular expression is syntactically incorrect.
+ * @throws PatternSyntaxException if the regular expression is syntactically incorrect.
*
* @see #CANON_EQ
* @see #CASE_INSENSITIVE
@@ -264,60 +373,26 @@ public final class Pattern implements Serializable {
* @see #UNICODE_CASE
* @see #UNIX_LINES
*/
- public static Pattern compile(String pattern, int flags) throws PatternSyntaxException {
- return new Pattern(pattern, flags);
+ public static Pattern compile(String regularExpression, int flags) throws PatternSyntaxException {
+ return new Pattern(regularExpression, flags);
}
/**
- * Creates a new {@code Pattern} instance from a given regular expression
- * and flags.
- *
- * @param pattern
- * the regular expression.
- * @param flags
- * the flags to set. Any combination of the constants defined in
- * this class is valid.
- *
- * @throws PatternSyntaxException
- * if the regular expression is syntactically incorrect.
+ * Equivalent to {@code Pattern.compile(pattern, 0)}.
*/
+ public static Pattern compile(String pattern) {
+ return new Pattern(pattern, 0);
+ }
+
private Pattern(String pattern, int flags) throws PatternSyntaxException {
if ((flags & CANON_EQ) != 0) {
throw new UnsupportedOperationException("CANON_EQ flag not supported");
}
-
this.pattern = pattern;
this.flags = flags;
-
compileImpl(pattern, flags);
}
- /**
- * Compiles a regular expression, creating a new Pattern instance in the
- * process. This is actually a convenience method that calls {@link
- * #compile(String, int)} with a {@code flags} value of zero.
- *
- * @param pattern
- * the regular expression.
- *
- * @return the new {@code Pattern} instance.
- *
- * @throws PatternSyntaxException
- * if the regular expression is syntactically incorrect.
- */
- public static Pattern compile(String pattern) {
- return new Pattern(pattern, 0);
- }
-
- /**
- * Compiles the given regular expression using the given flags. Used
- * internally only.
- *
- * @param pattern
- * the regular expression.
- * @param flags
- * the flags.
- */
private void compileImpl(String pattern, int flags) throws PatternSyntaxException {
if (pattern == null) {
throw new NullPointerException();
@@ -336,47 +411,35 @@ public final class Pattern implements Serializable {
}
/**
- * Tries to match a given regular expression against a given input. This is
- * actually nothing but a convenience method that compiles the regular
- * expression into a {@code Pattern}, builds a {@link Matcher} for it, and
- * then does the match. If the same regular expression is used for multiple
- * operations, it is recommended to compile it into a {@code Pattern}
- * explicitly and request a reusable {@code Matcher}.
- *
- * @param regex
- * the regular expression.
- * @param input
- * the input to process.
- *
- * @return true if and only if the {@code Pattern} matches the input.
+ * Tests whether the given {@code regularExpression} matches the given {@code input}.
+ * Equivalent to {@code Pattern.compile(regularExpression).matcher(input).matches()}.
+ * If the same regular expression is to be used for multiple operations, it may be more
+ * efficient to reuse a compiled {@code Pattern}.
*
* @see Pattern#compile(java.lang.String, int)
* @see Matcher#matches()
*/
- public static boolean matches(String regex, CharSequence input) {
- return new Matcher(new Pattern(regex, 0), input).matches();
+ public static boolean matches(String regularExpression, CharSequence input) {
+ return new Matcher(new Pattern(regularExpression, 0), input).matches();
}
/**
- * Quotes a given string using "\Q" and "\E", so that all other
- * meta-characters lose their special meaning. If the string is used for a
- * {@code Pattern} afterwards, it can only be matched literally.
- *
- * @param s
- * the string to quote.
- *
- * @return the quoted string.
+ * Quotes the given {@code string} using "\Q" and "\E", so that all
+ * meta-characters lose their special meaning. This method correctly
+ * escapes embedded instances of "\Q" or "\E". If the entire result
+ * is to be passed verbatim to {@link #compile}, it's usually clearer
+ * to use the {@link #LITERAL} flag instead.
*/
- public static String quote(String s) {
- StringBuilder sb = new StringBuilder().append("\\Q"); //$NON-NLS-1$
+ public static String quote(String string) {
+ StringBuilder sb = new StringBuilder();
+ sb.append("\\Q");
int apos = 0;
int k;
- while ((k = s.indexOf("\\E", apos)) >= 0) { //$NON-NLS-1$
- sb.append(s.substring(apos, k + 2)).append("\\\\E\\Q"); //$NON-NLS-1$
+ while ((k = string.indexOf("\\E", apos)) >= 0) {
+ sb.append(string.substring(apos, k + 2)).append("\\\\E\\Q");
apos = k + 2;
}
-
- return sb.append(s.substring(apos)).append("\\E").toString(); //$NON-NLS-1$
+ return sb.append(string.substring(apos)).append("\\E").toString();
}
@Override
@@ -385,20 +448,13 @@ public final class Pattern implements Serializable {
if (mNativePattern != 0) {
NativeRegEx.close(mNativePattern);
}
- }
- finally {
+ } finally {
super.finalize();
}
}
- /**
- * Serialization support
- */
- private void readObject(java.io.ObjectInputStream s)
- throws java.io.IOException, ClassNotFoundException {
+ private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException {
s.defaultReadObject();
-
compileImpl(pattern, flags);
}
-
}
diff --git a/luni/src/main/java/java/util/regex/package.html b/luni/src/main/java/java/util/regex/package.html
deleted file mode 100644
index 3ce73eb..0000000
--- a/luni/src/main/java/java/util/regex/package.html
+++ /dev/null
@@ -1,881 +0,0 @@
-<html>
- <head>
- <!--
- /*
- * Copyright (C) 2007 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- -->
- </head>
- <body>
- Provides an implementation of regular expressions, which is useful for
- matching, searching, and replacing strings based on patterns. The two
- fundamental classes are {@link java.util.regex.Pattern} and
- {@link java.util.regex.Matcher}. The former
- takes a pattern described by means of a regular expression and compiles it
- into a special internal representation. The latter matches the compiled
- pattern against a given input.
-
- <h2>Regular expressions</h2>
-
- A regular expression consists of literal text, meta characters, character
- sets, and operators. The latter three have a special meaning when
- encountered during the processing of a pattern.
-
- <ul>
- <li>
- <a href="#metachars">Meta characters</a> are a special means to describe
- single characters in the input text. A common example for a meta
- character is the dot '.', which, when used in a regular expression,
- matches any character.
- </li>
- <li>
- <a href="#charsets">Character sets</a> are a convenient means to
- describe different characters that match a single character in the
- input. Character sets are enclosed in angular brackets '[' and ']'
- and use the dash '-' for forming ranges. A typical example is
- "[0-9a-fA-F]", which describes the set of all hexadecimal digits.
- </li>
- <li>
- <a href="#operators">Operators</a> modify or combine whole regular
- expressions, with the result being a regular expression again. An
- example for an operator is the asterisk '*', which, together with the
- regular expression preceding it, matches zero or more repetitions of
- that regular expression. The plus sign '+' is similar, but requires at
- least one occurrence.
- </li>
- </ul>
-
- Meta characters, the '[' and ']' that form a character set, and operators
- normally lose their special meaning when preceded by a backslash '\'. To get
- a backslash by itself, use a double backslash. Note that when using regular
- expressions in Java source code, some care has to be taken to get the
- backslashes right (due to yet another level of escaping being necessary for
- Java).
-
- <p>
-
- The following table gives some basic examples of regular expressions and
- input strings that match them:
-
- <p>
-
- <table>
- <tr>
- <th>
- Regular expression
- </th>
- <th>
- Matched string(s)
- </th>
- </tr>
- <tr>
- <td>
- "Hello, World!"
- </td>
- <td>
- "Hello, World!"
- </td>
- </tr>
- <tr>
- <td>
- "Hello, World."
- </td>
- <td>
- "Hello, World!", "Hello, World?"
- </td>
- </tr>
- <tr>
- <td>
- "Hello, .*d!"
- </td>
- <td>
- "Hello, World!", "Hello, Android!", "Hello, Dad!"
- </td>
- </tr>
- <tr>
- <td>
- "[0-9]+ green bottles"
- </td>
- <td>
- "0 green bottles", "25 green bottles", "1234 green bottles"
- </td>
- </tr>
- </table>
-
- <p>
-
- The following section describe the various features in detail. The are also
- some <a href="#impnotes">implementation notes</a> at the end.
-
- <p>
-
- <a name="metachars"></a>
- <h2>Meta characters</h2>
-
- The following two tables lists the meta characters understood in regular
- expressions.
-
- <p>
-
- <!-- ICU-copied documentation begins here -->
-
- <table>
- <tr>
- <th>
- Meta character
- </th>
- <th>
- Description
- </th>
- </tr>
- <tr>
- <td>
- \a
- </td>
- <td>
- Match a BELL, \u0007.
- </td>
- </tr>
- <tr>
- <td>
- \A
- </td>
- <td>
- Match at the beginning of the input. Differs from ^ in that
- \A will not match after a new line within the input.
- </td>
- </tr>
- <tr>
- <td>
- \b, outside of a <a href="#charsets">character set</a>
- </td>
- <td>
- Match if the current position is a word boundary. Boundaries
- occur at the transitions between word (\w) and non-word (\W)
- characters, with combining marks ignored.
- </td>
- </tr>
- <tr>
- <td>
- \b, within a <a href="#charsets">character set</a>
- </td>
- <td>
- Match a BACKSPACE, \u0008.
- </td>
- </tr>
- <tr>
- <td>
- \B
- </td>
- <td>
- Match if the current position is not a word boundary.
- </td>
- </tr>
- <tr>
- <td>
- \cX
- </td>
- <td>
- Match a control-X character (replace X with actual character).
- </td>
- </tr>
- <tr>
- <td>
- \e
- </td>
- <td>
- Match an ESCAPE, \u001B.
- </td>
- </tr>
- <tr>
- <td>
- \E
- </td>
- <td>
- Ends quoting started by \Q. Meta characters, character classes, and
- operators become active again.
- </td>
- </tr>
- <tr>
- <td>
- \f
- </td>
- <td>
- Match a FORM FEED, \u000C.
- </td>
- </tr>
- <tr>
- <td>
- \G
- </td>
- <td>
- Match if the current position is at the end of the previous
- match.
- </td>
- </tr>
- <tr>
- <td>
- \n
- </td>
- <td>
- Match a LINE FEED, \u000A.
- </td>
- </tr>
- <tr>
- <td>
- \N{UNICODE CHARACTER NAME}
- </td>
- <td>
- Match the named Unicode character.
- </td>
- </tr>
- <tr>
- <td>
- \Q
- </td>
- <td>
- Quotes all following characters until \E. The following text is
- treated as literal.
- </td>
- </tr>
- <tr>
- <td>
- \r
- </td>
- <td>
- Match a CARRIAGE RETURN, \u000D.
- </td>
- </tr>
- <tr>
- <td>
- \t
- </td>
- <td>
- Match a HORIZONTAL TABULATION, \u0009.
- </td>
- </tr>
- <tr>
- <td>
- \uhhhh
- </td>
- <td>
- Match the character with the hex value hhhh.
- </td>
- </tr>
- <tr>
- <td>
- \Uhhhhhhhh
- </td>
- <td>
- Match the character with the hex value hhhhhhhh. Exactly
- eight hex digits must be provided, even though the largest Unicode
- code point is \U0010ffff.
- </td>
- </tr>
- <tr>
- <td>
- \x{hhhh}
- </td>
- <td>
- Match the character with the hex value hhhh. From one to six hex
- digits may be supplied.
- </td>
- </tr>
- <tr>
- <td>
- \xhh
- </td>
- <td>
- Match the character with the hex value hh.
- </td>
- </tr>
- <tr>
- <td>
- \Z
- </td>
- <td>
- Match if the current position is at the end of input, but
- before the final line terminator, if one exists.
- </td>
- </tr>
- <tr>
- <td>
- \z
- </td>
- <td>
- Match if the current position is at the end of input.
- </td>
- </tr>
- <tr>
- <td>
- \0n, \0nn, \0nnn
- </td>
- <td>
- Match the character with the octal value n, nn, or nnn. Maximum
- value is 0377.
- </td>
- </tr>
- <tr>
- <td>
- \n
- </td>
- <td>
- Back Reference. Match whatever the nth capturing group
- matched. n must be a number &gt; 1 and &lt; total number of capture
- groups in the pattern. Note: Octal escapes, such as \012, are not
- supported in ICU regular expressions
- </td>
- </tr>
- <tr>
- <td>
- [character set]
- </td>
- <td>
- Match any one character from the character set. See
- <a href="#charsets">character sets</a> for a full description of what
- may appear between the angular brackets.
- </td>
- </tr>
- <tr>
- <td>
- .
- </td>
- <td>
- Match any character.
- </td>
- </tr>
- <tr>
- <td>
- ^
- </td>
- <td>
- Match at the beginning of a line.
- </td>
- </tr>
- <tr>
- <td>
- $
- </td>
- <td>
- Match at the end of a line.
- </td>
- </tr>
- <tr>
- <td>
- \
- </td>
- <td>
- Quotes the following character, so that is loses any special
- meaning it might have.
- </td>
- </tr>
- </table>
-
- <!-- ICU-copied documentation begins here -->
-
- <p>
-
- <a name="charsets"></a>
- <h2>Character sets</h2>
-
- The following table lists the syntax elements allowed inside a character
- set:
-
- <p>
-
- <table>
- <tr>
- <th>
- Element
- </th>
- <th>
- Description
- </th>
- </tr>
- <tr>
- <td>
- [a]
- </td>
- <td>
- The character set consisting of the letter 'a' only.
- </td>
- </tr>
- <tr>
- <td>
- [xyz]
- </td>
- <td>
- The character set consisting of the letters 'x', 'y', and 'z',
- described by explicit enumeration.
- </td>
- </tr>
- <tr>
- <td>
- [x-z]
- </td>
- <td>
- The character set consisting of the letters 'x', 'y', and 'z',
- described by means of a range.
- </td>
- </tr>
- <tr>
- <td>
- [^xyz]
- </td>
- <td>
- The character set consisting of everything but the letters 'x', 'y',
- and 'z'.
- </td>
- </tr>
- <tr>
- <td>
- [[a-f][0-9]]
- </td>
- <td>
- The character set formed by building the union of the two character
- sets [a-f] and [0-9].
- </td>
- </tr>
- <tr>
- <td>
- [[a-z]&amp;&amp;[jkl]]
- </td>
- <td>
- The character set formed by building the intersection of the two
- character sets [a-z] and [jkl]. You can also use a single '&amp;', but
- this regular expression might not be <a href="#impnotes">portable</a>.
- </td>
- </tr>
- <tr>
- <td>
- [[a-z]--[jkl]]
- </td>
- <td>
- The character set formed by building the difference of the two
- character sets [a-z] and [jkl]. You can also use a single '-'. This
- operator is generally not <a href="#impnotes">portable</a>.
- </td>
- </tr>
- </table>
-
- <p>
-
- A couple of frequently used character sets are predefined and named.
- These can be referenced by their name, but behave otherwise similar to
- explicit character sets. The following table lists them:
-
- <p>
-
- <table>
- <tr>
- <th>
- Character set
- </th>
- <th>
- Description
- </th>
- </tr>
- <tr>
- <td>
- \d, \D
- </td>
- <td>
- The set consisting of all digit characters (\d) or the opposite of
- it (\D).
- </td>
- </tr>
- <tr>
- <td>
- \s, \S
- </td>
- <td>
- The set consisting of all space characters (\s) or the opposite of
- it (\S).
- </td>
- </tr>
- <tr>
- <td>
- \w, \W
- </td>
- <td>
- The set consisting of all word characters (\w) or the opposite
- of it (\W).
- </td>
- </tr>
- <tr>
- <td>
- \X
- </td>
- <td>
- The set of all grapheme clusters.
- </td>
- </tr>
- <tr>
- <td>
- \p{NAME}, \P{NAME}
- </td>
- <td>
- The Posix set with the specified NAME (\p{}) or the opposite
- of it (\P{}) - Legal values for NAME are 'Alnum', 'Alpha', 'ASCII',
- 'Blank', 'Cntrl', 'Digit', 'Graph', 'Lower', 'Print', 'Punct',
- 'Upper', 'XDigit' .
- </td>
- </tr>
- <tr>
- <td>
- \p{inBLOCK}, \P{inBLOCK}
- </td>
- <td>
- The character set equivalent to the given Unicode BLOCK (\p{}) or
- the opposite of it (\P{}). An example for a legal BLOCK name is
- 'Hebrew', meaning, unsurprisingly, all Hebrew characters.
- </td>
- </tr>
- <tr>
- <td>
- \p{CATEGORY}, \P{CATEGORY}
- </td>
- <td>
- The character set equivalent to the Unicode CATEGORY (\p{}) or the
- opposite of it (\P{}). An example for a legal CATEGORY name is 'Lu',
- meaning all uppercase letters.
- </td>
- </tr>
- <tr>
- <td>
- \p{javaMETHOD}, \P{javaMETHOD}
- </td>
- <td>
- The character set equivalent to the isMETHOD() operation of the
- {@link java.lang.Character} class (\p{}) or the opposite of it (\P{}).
- </td>
- </tr>
- </table>
-
- <p>
-
- <a name="operators"></a>
- <h2>Operators</h2>
-
- The following table lists the operators understood inside regular
- expressions:
-
- <p>
-
- <!-- ICU-copied documentation begins here -->
-
- <table>
- <tr>
- <th>
- Operator
- </th>
- <th>
- Description
- </th>
- </tr>
- <tr>
- <td>
- |
- </td>
- <td>
- Alternation. A|B matches either A or B.
- </td>
- </tr>
- <tr>
- <td>
- *
- </td>
- <td>
- Match 0 or more times. Match as many times as possible.
- </td>
- </tr>
- <tr>
- <td>
- +
- </td>
- <td>
- Match 1 or more times. Match as many times as possible.
- </td>
- </tr>
- <tr>
- <td>
- ?
- </td>
- <td>
- Match zero or one times. Prefer one.
- </td>
- </tr>
- <tr>
- <td>
- {n}
- </td>
- <td>
- Match exactly n times.
- </td>
- </tr>
- <tr>
- <td>
- {n,}
- </td>
- <td>
- Match at least n times. Match as many times as possible.
- </td>
- </tr>
- <tr>
- <td>
- {n,m}
- </td>
- <td>
- Match between n and m times. Match as many times as possible,
- but not more than m.
- </td>
- </tr>
- <tr>
- <td>
- *?
- </td>
- <td>
- Match 0 or more times. Match as few times as possible.
- </td>
- </tr>
- <tr>
- <td>
- +?
- </td>
- <td>
- Match 1 or more times. Match as few times as possible.
- </td>
- </tr>
- <tr>
- <td>
- ??
- </td>
- <td>
- Match zero or one times. Prefer zero.
- </td>
- </tr>
- <tr>
- <td>
- {n}?
- </td>
- <td>
- Match exactly n times.
- </td>
- </tr>
- <tr>
- <td>
- {n,}?
- </td>
- <td>
- Match at least n times, but no more than required for an
- overall pattern match.
- </td>
- </tr>
- <tr>
- <td>
- {n,m}?
- </td>
- <td>
- Match between n and m times. Match as few times as possible,
- but not less than n.
- </td>
- </tr>
- <tr>
- <td>
- *+
- </td>
- <td>
- Match 0 or more times. Match as many times as possible when
- first encountered, do not retry with fewer even if overall match
- fails (Possessive Match)
- </td>
- </tr>
- <tr>
- <td>
- ++
- </td>
- <td>
- Match 1 or more times. Possessive match.
- </td>
- </tr>
- <tr>
- <td>
- ?+
- </td>
- <td>
- Match zero or one times. Possessive match.
- </td>
- </tr>
- <tr>
- <td>
- {n}+
- </td>
- <td>
- Match exactly n times.
- </td>
- </tr>
- <tr>
- <td>
- {n,}+
- </td>
- <td>
- Match at least n times. Possessive Match.
- </td>
- </tr>
- <tr>
- <td>
- {n,m}+
- </td>
- <td>
- Match between n and m times. Possessive Match.
- </td>
- </tr>
- <tr>
- <td>
- ( ... )
- </td>
- <td>
- Capturing parentheses. Range of input that matched the
- parenthesized subexpression is available after the match.
- </td>
- </tr>
- <tr>
- <td>
- (?: ... )
- </td>
- <td>
- Non-capturing parentheses. Groups the included pattern, but
- does not provide capturing of matching text. Somewhat more efficient
- than capturing parentheses.
- </td>
- </tr>
- <tr>
- <td>
- (?&gt; ... )
- </td>
- <td>
- Atomic-match parentheses. First match of the parenthesized
- subexpression is the only one tried; if it does not lead to an
- overall pattern match, back up the search for a match to a position
- before the "(?&gt;"
- </td>
- </tr>
- <tr>
- <td>
- (?# ... )
- </td>
- <td>
- Free-format comment (?# comment ).
- </td>
- </tr>
- <tr>
- <td>
- (?= ... )
- </td>
- <td>
- Look-ahead assertion. True if the parenthesized pattern
- matches at the current input position, but does not advance the
- input position.
- </td>
- </tr>
- <tr>
- <td>
- (?! ... )
- </td>
- <td>
- Negative look-ahead assertion. True if the parenthesized
- pattern does not match at the current input position. Does not
- advance the input position.
- </td>
- </tr>
- <tr>
- <td>
- (?&lt;= ... )
- </td>
- <td>
- Look-behind assertion. True if the parenthesized pattern
- matches text preceding the current input position, with the last
- character of the match being the input character just before the
- current position. Does not alter the input position. The length of
- possible strings matched by the look-behind pattern must not be
- unbounded (no * or + operators.)
- </td>
- </tr>
- <tr>
- <td>
- (?&lt;! ... )
- </td>
- <td>
- Negative Look-behind assertion. True if the parenthesized
- pattern does not match text preceding the current input position,
- with the last character of the match being the input character just
- before the current position. Does not alter the input position. The
- length of possible strings matched by the look-behind pattern must
- not be unbounded (no * or + operators.)
- </td>
- </tr>
- <tr>
- <td>
- (?ismwx-ismwx: ... )
- </td>
- <td>
- Flag settings. Evaluate the parenthesized expression with the
- specified flags enabled or -disabled.
- </td>
- </tr>
- <tr>
- <td>
- (?ismwx-ismwx)
- </td>
- <td>
- Flag settings. Change the flag settings. Changes apply to the
- portion of the pattern following the setting. For example, (?i)
- changes to a case insensitive match.
- </td>
- </tr>
- </table>
-
- <!-- ICU-copied documentation ends here -->
-
- <p>
-
- <a name="impnotes"></a>
- <h2>Implementation notes</h2>
-
- The regular expression implementation used in Android is provided by
- <a href="http://www.icu-project.org">ICU</a>. The notation for the regular
- expressions is mostly a superset of those used in other Java language
- implementations. This means that existing applications will normally work as
- expected, but in rare cases some regular expression content that is meant to
- be literal might be interpreted with a special meaning. The most notable
- examples are the single '&amp;', which can also be used as the intersection
- operator for <a href="#charsets">character sets</a>, and the intersection
- operators '-' and '--'. Also, some of the flags are handled in a
- slightly different way:
-
- <ul>
- <li>
- The {@link java.util.regex.Pattern#CASE_INSENSITIVE} flag silently
- assumes Unicode case-insensitivity. That is, the
- {@link java.util.regex.Pattern#UNICODE_CASE} flag is effectively a
- no-op.
- </li>
- <li>
- The {@link java.util.regex.Pattern#CANON_EQ} flag is not supported at
- all (throws an exception).
- </li>
- </ul>
- </body>
-</html>
diff --git a/luni/src/main/java/java/util/zip/package.html b/luni/src/main/java/java/util/zip/package.html
deleted file mode 100644
index 8b4f91b..0000000
--- a/luni/src/main/java/java/util/zip/package.html
+++ /dev/null
@@ -1,8 +0,0 @@
-<html>
- <body>
- <p>
- This package contains classes for compressing and decompressing data in
- ZIP and GZIP file formats.
- </p>
- </body>
-</html>
diff --git a/luni/src/main/java/javax/sql/package.html b/luni/src/main/java/javax/sql/package.html
deleted file mode 100644
index 7fdf1ce..0000000
--- a/luni/src/main/java/javax/sql/package.html
+++ /dev/null
@@ -1,8 +0,0 @@
-<html>
- <body>
- <p>
- Provides extensions to the standard interface for accessing SQL-based
- databases.
- <p>
- </body>
-</html>