diff options
author | Elliott Hughes <enh@google.com> | 2010-05-10 18:21:25 -0700 |
---|---|---|
committer | Elliott Hughes <enh@google.com> | 2010-05-11 16:23:40 -0700 |
commit | 5f37da05bb48298568f8abd7c97c3d11552e1867 (patch) | |
tree | 2393b2e6f19d2a1ecf7feaae6317b93b8ac394f8 /luni/src | |
parent | d6f5e66e31388c2777da33c30fb2194ff5b427be (diff) | |
download | libcore-5f37da05bb48298568f8abd7c97c3d11552e1867.zip libcore-5f37da05bb48298568f8abd7c97c3d11552e1867.tar.gz libcore-5f37da05bb48298568f8abd7c97c3d11552e1867.tar.bz2 |
Documentation improvements.
Remove a bunch of content-free "package.html" files, and rewrite the Pattern
documentation.
Change-Id: Ieb4eee940dbbeab21828b8d7b2f172732f9dd6de
Diffstat (limited to 'luni/src')
22 files changed, 338 insertions, 1315 deletions
diff --git a/luni/src/main/java/java/io/package.html b/luni/src/main/java/java/io/package.html deleted file mode 100644 index d074aa0..0000000 --- a/luni/src/main/java/java/io/package.html +++ /dev/null @@ -1,14 +0,0 @@ -<html> - <body> - <p> - Provides input and output facilities by means of streaming, - file system access and serialization. - </p> - <p> - These classes allow you to get direct access to the file system. - It also gives the possibility to handle data streams buffered or - unbuffered. And on top of that there are classes that help to abstract - data streams as streams of chars, strings or even objects. - </p> - </body> -</html> diff --git a/luni/src/main/java/java/lang/Character.java b/luni/src/main/java/java/lang/Character.java index 859b6d6..37410fd 100644 --- a/luni/src/main/java/java/lang/Character.java +++ b/luni/src/main/java/java/lang/Character.java @@ -50,7 +50,55 @@ import com.ibm.icu4jni.lang.UCharacter; * On the Java platform a {@code char} value represents either a single BMP code * point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type * is used to represent all Unicode code points. - * + * + * <a name="unicode_categories"><h3>Unicode categories</h3></a> + * <p>Here's a list of the Unicode character categories and the corresponding Java constant, + * grouped semantically to provide a convenient overview. This table is also useful in + * conjunction with {@code \p} and {@code \P} in {@link java.util.regex.Pattern regular expressions}. + * <span class="datatable"> + * <style type="text/css"> + * .datatable td { padding-right: 20px; } + * </style> + * <p><table> + * <tr> <td> Cn </td> <td> Unassigned </td> <td>{@link #UNASSIGNED}</td> </tr> + * <tr> <td> Cc </td> <td> Control </td> <td>{@link #CONTROL}</td> </tr> + * <tr> <td> Cf </td> <td> Format </td> <td>{@link #FORMAT}</td> </tr> + * <tr> <td> Cf </td> <td> Private use </td> <td>{@link #PRIVATE_USE}</td> </tr> + * <tr> <td> Cf </td> <td> Surrogate </td> <td>{@link #SURROGATE}</td> </tr> + * <tr> <td><br></td> </tr> + * <tr> <td> Lu </td> <td> Uppercase letter </td> <td>{@link #UPPERCASE_LETTER}</td> </tr> + * <tr> <td> Ll </td> <td> Lowercase letter </td> <td>{@link #LOWERCASE_LETTER}</td> </tr> + * <tr> <td> Lt </td> <td> Titlecase letter </td> <td>{@link #TITLECASE_LETTER}</td> </tr> + * <tr> <td> Lm </td> <td> Modifier letter </td> <td>{@link #MODIFIER_LETTER}</td> </tr> + * <tr> <td> Lo </td> <td> Other letter </td> <td>{@link #OTHER_LETTER}</td> </tr> + * <tr> <td><br></td> </tr> + * <tr> <td> Mn </td> <td> Non-spacing mark </td> <td>{@link #NON_SPACING_MARK}</td> </tr> + * <tr> <td> Me </td> <td> Enclosing mark </td> <td>{@link #ENCLOSING_MARK}</td> </tr> + * <tr> <td> Mc </td> <td> Combining spacing mark </td> <td>{@link #COMBINING_SPACING_MARK}</td> </tr> + * <tr> <td><br></td> </tr> + * <tr> <td> Nd </td> <td> Decimal digit number </td> <td>{@link #DECIMAL_DIGIT_NUMBER}</td> </tr> + * <tr> <td> Nl </td> <td> Letter number </td> <td>{@link #LETTER_NUMBER}</td> </tr> + * <tr> <td> No </td> <td> Other number </td> <td>{@link #OTHER_NUMBER}</td> </tr> + * <tr> <td><br></td> </tr> + * <tr> <td> Pd </td> <td> Dash punctuation </td> <td>{@link #DASH_PUNCTUATION}</td> </tr> + * <tr> <td> Ps </td> <td> Start punctuation </td> <td>{@link #START_PUNCTUATION}</td> </tr> + * <tr> <td> Pe </td> <td> End punctuation </td> <td>{@link #END_PUNCTUATION}</td> </tr> + * <tr> <td> Pc </td> <td> Connector punctuation </td> <td>{@link #CONNECTOR_PUNCTUATION}</td> </tr> + * <tr> <td> Pi </td> <td> Initial quote punctuation </td> <td>{@link #INITIAL_QUOTE_PUNCTUATION}</td> </tr> + * <tr> <td> Pf </td> <td> Final quote punctuation </td> <td>{@link #FINAL_QUOTE_PUNCTUATION}</td> </tr> + * <tr> <td> Po </td> <td> Other punctuation </td> <td>{@link #OTHER_PUNCTUATION}</td> </tr> + * <tr> <td><br></td> </tr> + * <tr> <td> Sm </td> <td> Math symbol </td> <td>{@link #MATH_SYMBOL}</td> </tr> + * <tr> <td> Sc </td> <td> Currency symbol </td> <td>{@link #CURRENCY_SYMBOL}</td> </tr> + * <tr> <td> Sk </td> <td> Modifier symbol </td> <td>{@link #MODIFIER_SYMBOL}</td> </tr> + * <tr> <td> So </td> <td> Other symbol </td> <td>{@link #OTHER_SYMBOL}</td> </tr> + * <tr> <td><br></td> </tr> + * <tr> <td> Zs </td> <td> Space separator </td> <td>{@link #SPACE_SEPARATOR}</td> </tr> + * <tr> <td> Zl </td> <td> Line separator </td> <td>{@link #LINE_SEPARATOR}</td> </tr> + * <tr> <td> Zp </td> <td> Paragraph separator </td> <td>{@link #PARAGRAPH_SEPARATOR}</td> </tr> + * </table> + * </span> + * * @since 1.0 */ public final class Character implements Serializable, Comparable<Character> { diff --git a/luni/src/main/java/java/lang/String.java b/luni/src/main/java/java/lang/String.java index c7236d5..4c26585 100644 --- a/luni/src/main/java/java/lang/String.java +++ b/luni/src/main/java/java/lang/String.java @@ -1958,105 +1958,100 @@ public final class String implements Serializable, Comparable<String>, CharSeque } /** - * Determines whether this string matches a given regular expression. + * Tests whether this string matches the given {@code regularExpression}. This method returns + * true only if the regular expression matches the <i>entire</i> input string. A common mistake is + * to assume that this method behaves like {@link #contains}; if you want to match anywhere + * within the input string, you need to add {@code .*} to the beginning and end of your + * regular expression. See {@link Pattern#matches}. + * + * <p>If the same regular expression is to be used for multiple operations, it may be more + * efficient to reuse a compiled {@code Pattern}. * - * @param expr - * the regular expression to be matched. - * @return {@code true} if the expression matches, otherwise {@code false}. * @throws PatternSyntaxException * if the syntax of the supplied regular expression is not * valid. - * @throws NullPointerException - * if {@code expr} is {@code null}. + * @throws NullPointerException if {@code regularExpression == null} * @since 1.4 */ - public boolean matches(String expr) { - return Pattern.matches(expr, this); + public boolean matches(String regularExpression) { + return Pattern.matches(regularExpression, this); } /** - * Replace any substrings within this string that match the supplied regular - * expression {@code expr}, with the string {@code substitute}. + * Replaces all matches for {@code regularExpression} within this string with the given + * {@code replacement}. + * See {@link Pattern} for regular expression syntax. + * + * <p>If the same regular expression is to be used for multiple operations, it may be more + * efficient to reuse a compiled {@code Pattern}. * - * @param expr - * the regular expression to match. - * @param substitute - * the string to replace the matching substring with. - * @return the new string. * @throws PatternSyntaxException * if the syntax of the supplied regular expression is not * valid. + * @throws NullPointerException if {@code regularExpression == null} * @see Pattern * @since 1.4 */ - public String replaceAll(String expr, String substitute) { - return Pattern.compile(expr).matcher(this).replaceAll(substitute); + public String replaceAll(String regularExpression, String replacement) { + return Pattern.compile(regularExpression).matcher(this).replaceAll(replacement); } /** - * Replace the first substring within this string that matches the supplied - * regular expression {@code expr}, with the string {@code substitute}. + * Replaces the first match for {@code regularExpression} within this string with the given + * {@code replacement}. + * See {@link Pattern} for regular expression syntax. + * + * <p>If the same regular expression is to be used for multiple operations, it may be more + * efficient to reuse a compiled {@code Pattern}. * - * @param expr - * the regular expression to match. - * @param substitute - * the string to replace the matching substring with. - * @return the new string. * @throws PatternSyntaxException * if the syntax of the supplied regular expression is not * valid. - * @throws NullPointerException - * if {@code strbuf} is {@code null}. + * @throws NullPointerException if {@code regularExpression == null} * @see Pattern * @since 1.4 */ - public String replaceFirst(String expr, String substitute) { - return Pattern.compile(expr).matcher(this).replaceFirst(substitute); + public String replaceFirst(String regularExpression, String replacement) { + return Pattern.compile(regularExpression).matcher(this).replaceFirst(replacement); } /** - * Splits this string using the supplied regular expression {@code expr}, - * as if by {@code split(expr, 0)}. + * Splits this string using the supplied {@code regularExpression}. + * Equivalent to {@code split(regularExpression, 0)}. + * See {@link Pattern#split(CharSequence, int)} for an explanation of {@code limit}. + * See {@link Pattern} for regular expression syntax. * - * @param expr - * the regular expression used to divide the string. - * @return an array of Strings created by separating the string along - * matches of the regular expression. - * @throws NullPointerException - * if {@code expr} is {@code null}. + * <p>If the same regular expression is to be used for multiple operations, it may be more + * efficient to reuse a compiled {@code Pattern}. + * + * @throws NullPointerException if {@code regularExpression == null} * @throws PatternSyntaxException * if the syntax of the supplied regular expression is not * valid. * @see Pattern * @since 1.4 */ - public String[] split(String expr) { - return split(expr, 0); + public String[] split(String regularExpression) { + return split(regularExpression, 0); } /** - * Splits this string using the supplied regular expression {@code expr}. - * The parameter {@code max} controls the behavior how many times the - * pattern is applied to the string; see {@link Pattern#split(CharSequence, int)} - * for details. + * Splits this string using the supplied {@code regularExpression}. + * See {@link Pattern#split(CharSequence, int)} for an explanation of {@code limit}. + * See {@link Pattern} for regular expression syntax. * - * @param expr - * the regular expression used to divide the string. - * @param max - * the number of entries in the resulting array. - * @return an array of Strings created by separating the string along - * matches of the regular expression. - * @throws NullPointerException - * if {@code expr} is {@code null}. + * <p>If the same regular expression is to be used for multiple operations, it may be more + * efficient to reuse a compiled {@code Pattern}. + * + * @throws NullPointerException if {@code regularExpression == null} * @throws PatternSyntaxException * if the syntax of the supplied regular expression is not * valid. - * @see Pattern#split(CharSequence, int) * @since 1.4 */ - public String[] split(String expr, int max) { - String[] result = java.util.regex.Splitter.fastSplit(expr, this, max); - return result != null ? result : Pattern.compile(expr).split(this, max); + public String[] split(String regularExpression, int limit) { + String[] result = java.util.regex.Splitter.fastSplit(regularExpression, this, limit); + return result != null ? result : Pattern.compile(regularExpression).split(this, limit); } /** diff --git a/luni/src/main/java/java/lang/annotation/package.html b/luni/src/main/java/java/lang/annotation/package.html deleted file mode 100644 index 5a150b3..0000000 --- a/luni/src/main/java/java/lang/annotation/package.html +++ /dev/null @@ -1,9 +0,0 @@ -<html> - <body> - <p> - Defines interfaces and exceptions necessary for annotation support. Also - provides some predefined annotations that are used throughout the Android - libraries. - </p> - </body> -</html> diff --git a/luni/src/main/java/java/lang/package.html b/luni/src/main/java/java/lang/package.html deleted file mode 100644 index 8075edb..0000000 --- a/luni/src/main/java/java/lang/package.html +++ /dev/null @@ -1,10 +0,0 @@ -<html> - <body> - <p> - Provides core classes of the Android environment. This includes - {@link java.lang.Object}, the ultimate ancestor of all classes, - as well as several other classes which represent important data types or - central aspects of the environment hosting the application. - </p> - </body> -</html> diff --git a/luni/src/main/java/java/lang/reflect/package.html b/luni/src/main/java/java/lang/reflect/package.html deleted file mode 100644 index ec457e2..0000000 --- a/luni/src/main/java/java/lang/reflect/package.html +++ /dev/null @@ -1,15 +0,0 @@ -<html> - <body> - <p> - Provides reflective run-time access to the properties of a class, which - allows such things as querying or modifying an objects's field by its name - or invoking a method by name. - </p> - - <p> - The code {@link java.lang.Class} class serves as the entry-point to - reflection. The classes in this package represent the various language - elements. - </p> - </body> -</html> diff --git a/luni/src/main/java/java/math/package.html b/luni/src/main/java/java/math/package.html deleted file mode 100644 index 4c9ba9f..0000000 --- a/luni/src/main/java/java/math/package.html +++ /dev/null @@ -1,12 +0,0 @@ -<html> - <body> - Provides arbitrary-precision integers and decimals. - Class {@link java.math.BigInteger} provides integers which are limited - by the available memory only. - Class {@link java.math.BigDecimal} provides arbitrary-precision signed - decimal numbers. These numbers are suitable for currency calculations. - The user has full control over the rounding behavior (comparable with - the IEEE754R rounding modes). - <p> - </body> -</html> diff --git a/luni/src/main/java/java/net/package.html b/luni/src/main/java/java/net/package.html deleted file mode 100644 index dff1ccd..0000000 --- a/luni/src/main/java/java/net/package.html +++ /dev/null @@ -1,8 +0,0 @@ -<html> - <body> - <p> - Provides networking-related functionality, such as streaming and datagram - sockets, handling of Internet addresses, and dealing with HTTP requests. - </p> - </body> -</html> diff --git a/luni/src/main/java/java/nio/channels/package.html b/luni/src/main/java/java/nio/channels/package.html deleted file mode 100644 index c16c811..0000000 --- a/luni/src/main/java/java/nio/channels/package.html +++ /dev/null @@ -1,9 +0,0 @@ -<html> - <body> - <p> - Channels provide a way to connect to sources of data such as - files, sockets or other structures that allow input and/or output of - data. Selectors support multiplexing of non-blocking channels. - </p> - </body> -</html> diff --git a/luni/src/main/java/java/nio/channels/spi/package.html b/luni/src/main/java/java/nio/channels/spi/package.html deleted file mode 100644 index e7b8a49..0000000 --- a/luni/src/main/java/java/nio/channels/spi/package.html +++ /dev/null @@ -1,7 +0,0 @@ -<html> - <body> - <p> - Service-provider classes for nio channels. - </p> - </body> -</html> diff --git a/luni/src/main/java/java/nio/charset/package.html b/luni/src/main/java/java/nio/charset/package.html deleted file mode 100644 index 6554010..0000000 --- a/luni/src/main/java/java/nio/charset/package.html +++ /dev/null @@ -1,14 +0,0 @@ -<html> - <body> - <p> - This package provides translation services between bytes and different - character sets. - </p> - <p> - An encoder translates characters into bytes and a decoder can - translate a byte stream into characters. With a charset you can create a - de-/encoder pair that can be used to translate a byte stream. With the - service provider package it is possible to use your own charsets. - </p> - </body> -</html> diff --git a/luni/src/main/java/java/nio/charset/spi/package.html b/luni/src/main/java/java/nio/charset/spi/package.html deleted file mode 100644 index 4e58391..0000000 --- a/luni/src/main/java/java/nio/charset/spi/package.html +++ /dev/null @@ -1,7 +0,0 @@ -<html> - <body> - <p> - Service-provider class for nio charset. - </p> - </body> -</html> diff --git a/luni/src/main/java/java/nio/package.html b/luni/src/main/java/java/nio/package.html deleted file mode 100644 index b521b67..0000000 --- a/luni/src/main/java/java/nio/package.html +++ /dev/null @@ -1,15 +0,0 @@ -<html> - <body> - <p> - Provides buffers that help handling data. - </p> - <p> - There are buffers for most primitive data types such as - <code>FloatBuffer</code>, <code>IntBuffer</code>, etc. These classes - provide methods to get and put data from the - buffers, to compact, slice or duplicate them, or to wrap an existing - array. Buffers also manage the position of the current element in the - buffer, they can be rewound to the beginning and allow skipping of elements. - </p> - </body> -</html> diff --git a/luni/src/main/java/java/text/package.html b/luni/src/main/java/java/text/package.html deleted file mode 100644 index b3f387a..0000000 --- a/luni/src/main/java/java/text/package.html +++ /dev/null @@ -1,16 +0,0 @@ -<html> - <body> - <p> - The java.text package offers internationalization and localization - facilities. - </p> - <p> - By using the classes in this package, it is possible to write the - application in an internationalized way. The benefit of this is that a new - localization can be provided at any time without having to change the - code. Support for localization is given for numbers, messages, dates and - other characteristics of a language like the directionality, sorting order - or enumeration of characters, words or lines. - </p> -</body> -</html> diff --git a/luni/src/main/java/java/util/jar/package.html b/luni/src/main/java/java/util/jar/package.html deleted file mode 100644 index 21103cc..0000000 --- a/luni/src/main/java/java/util/jar/package.html +++ /dev/null @@ -1,11 +0,0 @@ -<html> - <body> - <p> - The java.jar package gives access to reading and writing a Java archive, - or JAR, files. These are - actually ZIP files with the possibility to add meta-information in the - form of a MANIFEST file. This manifest can also be used - to sign a JAR file. - </p> - </body> -</html> diff --git a/luni/src/main/java/java/util/logging/package.html b/luni/src/main/java/java/util/logging/package.html deleted file mode 100644 index d7ed252..0000000 --- a/luni/src/main/java/java/util/logging/package.html +++ /dev/null @@ -1,9 +0,0 @@ -<html> - <body> - <p> - This package allows to add logging to any application. It - supports different levels of importance of a message that needs to be - logged. The output written to the target can be filtered by this level. - </p> - </body> -</html> diff --git a/luni/src/main/java/java/util/package.html b/luni/src/main/java/java/util/package.html deleted file mode 100644 index 3656147..0000000 --- a/luni/src/main/java/java/util/package.html +++ /dev/null @@ -1,10 +0,0 @@ -<html> - <body> - <p> - Provides an extensive set of utility classes. This encompasses things - such as basic container data structures (various forms of lists, sets, - and maps), classes for dealing with date and time, String-handling, formatting, - localization, and scheduling repeated tasks. - </p> - </body> -</html> diff --git a/luni/src/main/java/java/util/prefs/package.html b/luni/src/main/java/java/util/prefs/package.html deleted file mode 100644 index 41cd12c..0000000 --- a/luni/src/main/java/java/util/prefs/package.html +++ /dev/null @@ -1,13 +0,0 @@ -<html> - <body> - <p> - This package provides a preferences mechanism, that is, a means of writing - configuration data (key/value pairs) to a persistent data store and - retrieving it from there. There are two different kinds of stores - available: one for storing user data and one for storing system - configuration data. Since the underlying implementation is dependent - on the operating system, this package is designed to allow the installation - of a custom service provider implementation. - </p> - </body> -</html> diff --git a/luni/src/main/java/java/util/regex/Pattern.java b/luni/src/main/java/java/util/regex/Pattern.java index 325e3e0..49edd40 100644 --- a/luni/src/main/java/java/util/regex/Pattern.java +++ b/luni/src/main/java/java/util/regex/Pattern.java @@ -16,40 +16,204 @@ package java.util.regex; +import java.io.IOException; +import java.io.ObjectInputStream; import java.io.Serializable; import java.util.ArrayList; import com.ibm.icu4jni.regex.NativeRegEx; /** - * Represents a pattern used for matching, searching, or replacing strings. - * {@code Pattern}s are specified in terms of regular expressions and compiled - * using an instance of this class. They are then used in conjunction with a - * {@link Matcher} to perform the actual search. - * <p/> - * A typical use case looks like this: - * <p/> - * <pre> - * Pattern p = Pattern.compile("Hello, A[a-z]*!"); - * - * Matcher m = p.matcher("Hello, Android!"); - * boolean b1 = m.matches(); // true - * - * m.setInput("Hello, Robot!"); - * boolean b2 = m.matches(); // false - * </pre> - * <p/> - * The above code could also be written in a more compact fashion, though this - * variant is less efficient, since {@code Pattern} and {@code Matcher} objects - * are created on the fly instead of being reused. - * fashion: + * Patterns are compiled regular expressions. In many cases, convenience methods such as + * {@link String#matches String.matches}, {@link String#replaceAll String.replaceAll} and + * {@link String#split String.split} will be preferable, but if you need to do a lot of work + * with the same regular expression, it may be more efficient to compile it once and reuse it. + * The {@code Pattern} class and its companion, {@link Matcher}, are also a lot more powerful + * than the small amount of functionality exposed by {@code String}. + * * <pre> - * boolean b1 = Pattern.matches("Hello, A[a-z]*!", "Hello, Android!"); // true - * boolean b2 = Pattern.matches("Hello, A[a-z]*!", "Hello, Robot!"); // false + * // String convenience methods: + * boolean sawFailures = s.matches("Failures: \d+"); + * String farewell = s.replaceAll("Hello, (\S+)", "Goodbye, $1"); + * String[] fields = s.split(":"); + * + * // Direct use of Pattern: + * Pattern p = Pattern.compile("Hello, (\S+)"); + * Matcher m = p.matcher(inputString); + * while (m.find()) { // Find each match in turn; String can't do this. + * String name = m.group(1); // Access a submatch group; String can't do this. + * } * </pre> - * <p/> - * Please consult the <a href="package-descr.html">package documentation</a> for an - * overview of the regular expression syntax used in this class as well as - * Android-specific implementation details. + * + * <h3>Regular expression syntax</h3> + * <span class="datatable"> + * <style type="text/css"> + * .datatable td { padding-right: 20px; } + * </style> + * + * <p>Java supports a subset of Perl 5 regular expression syntax. An important gotcha is that Java + * has no regular expression literals, and uses plain old string literals instead. This means that + * you need an extra level of escaping. For example, the regular expression {@code \s+} has to + * be represented as the string {@code "\\s+"}. + * + * <h3>Escape sequences</h3> + * <p><table> + * <tr> <td> \ </td> <td>Quote the following metacharacter (so {@code \.} matches a literal {@code .}).</td> </tr> + * <tr> <td> \Q </td> <td>Quote all following metacharacters until {@code \E}.</td> </tr> + * <tr> <td> \E </td> <td>Stop quoting metacharacters (started by {@code \Q}).</td> </tr> + * <tr> <td> \\ </td> <td>A literal backslash.</td> </tr> + * <tr> <td> \<i>hhhh</i> </td> <td>The Unicode character U+hhhh (in hex).</td> </tr> + * <tr> <td> \c<i>x</i> </td> <td>The ASCII control character <i>x</i> (so {@code \cI} would be U+0009).</td> </tr> + * + * <tr> <td> \a </td> <td>The ASCII bell character (U+0007).</td> </tr> + * <tr> <td> \e </td> <td>The ASCII ESC character (U+001b).</td> </tr> + * <tr> <td> \f </td> <td>The ASCII form feed character (U+000c).</td> </tr> + * <tr> <td> \n </td> <td>The ASCII newline character (U+000a).</td> </tr> + * <tr> <td> \r </td> <td>The ASCII carriage return character (U+000d).</td> </tr> + * <tr> <td> \t </td> <td>The ASCII tab character (U+0009).</td> </tr> + * </table> + * + * <h3>Character classes</h3> + * <p>It's possible to construct arbitrary character classes using set operations: + * <table> + * <tr> <td> [abc] </td> <td>Any one of {@code a}, {@code b}, or {@code c}. (Enumeration.)</td> </tr> + * <tr> <td> [a-c] </td> <td>Any one of {@code a}, {@code b}, or {@code c}. (Range.)</td> </tr> + * <tr> <td> [^abc] </td> <td>Any character <i>except</i> {@code a}, {@code b}, or {@code c}. (Negation.)</td> </tr> + * <tr> <td> [[a-f][0-9]] </td> <td>Any character in either range. (Union.)</td> </tr> + * <tr> <td> [[a-z]&&[jkl]] </td> <td>Any character in both ranges. (Intersection.)</td> </tr> + * </table> + * <p>Most of the time, the built-in character classes are more useful: + * <table> + * <tr> <td> \d </td> <td>Any digit character.</td> </tr> + * <tr> <td> \D </td> <td>Any non-digit character.</td> </tr> + * <tr> <td> \s </td> <td>Any whitespace character.</td> </tr> + * <tr> <td> \S </td> <td>Any non-whitespace character.</td> </tr> + * <tr> <td> \w </td> <td>Any word character.</td> </tr> + * <tr> <td> \W </td> <td>Any non-word character.</td> </tr> + * <tr> <td> \p{<i>NAME</i>} </td> <td> Any character in the class with the given <i>NAME</i>. </td> </tr> + * <tr> <td> \P{<i>NAME</i>} </td> <td> Any character <i>not</i> in the named class. </td> </tr> + * </table> + * <p>There are a variety of named classes: + * <ul> + * <li><a href="../../lang/Character.html#unicode_categories">Unicode category names</a>, + * prefixed by {@code Is}. For example {@code \p{IsLu}} for all uppercase letters. + * <li>POSIX class names. These are 'Alnum', 'Alpha', 'ASCII', 'Blank', 'Cntrl', 'Digit', + * 'Graph', 'Lower', 'Print', 'Punct', 'Upper', 'XDigit'. + * <li>Unicode block names, as used by {@link java.lang.Character.UnicodeBlock#forName} prefixed + * by {@code In}. For example {@code \p{InHebrew}} for all characters in the Hebrew block. + * <li>Character method names. These are all non-deprecated methods from {@link java.lang.Character} + * whose name starts with {@code is}, but with the {@code is} replaced by {@code java}. + * For example, {@code \p{javaLowerCase}}. + * </ul> + * + * <h3>Quantifiers</h3> + * <p>Quantifiers match some number of instances of the preceding regular expression. + * <table> + * <tr> <td> * </td> <td>Zero or more.</td> </tr> + * <tr> <td> ? </td> <td>Zero or one.</td> </tr> + * <tr> <td> + </td> <td>One or more.</td> </tr> + * <tr> <td> {<i>n</i>} </td> <td>Exactly <i>n</i>.</td> </tr> + * <tr> <td> {<i>n,</i>} </td> <td>At least <i>n</i>.</td> </tr> + * <tr> <td> {<i>n</i>,<i>m</i>} </td> <td>At least <i>n</i> but not more than <i>m</i>.</td> </tr> + * </table> + * <p>Quantifiers are "greedy" by default, meaning that they will match the longest possible input + * sequence. There are also non-greedy quantifiers that match the shortest possible input sequence. + * They're same as the greedy ones but with a trailing {@code ?}: + * <table> + * <tr> <td> *? </td> <td>Zero or more (non-greedy).</td> </tr> + * <tr> <td> ?? </td> <td>Zero or one (non-greedy).</td> </tr> + * <tr> <td> +? </td> <td>One or more (non-greedy).</td> </tr> + * <tr> <td> {<i>n</i>}? </td> <td>Exactly <i>n</i> (non-greedy).</td> </tr> + * <tr> <td> {<i>n,</i>}? </td> <td>At least <i>n</i> (non-greedy).</td> </tr> + * <tr> <td> {<i>n</i>,<i>m</i>}? </td> <td>At least <i>n</i> but not more than <i>m</i> (non-greedy).</td> </tr> + * </table> + * <p>Quantifiers allow backtracking by default. There are also possessive quantifiers to prevent + * backtracking. They're same as the greedy ones but with a trailing {@code +}: + * <table> + * <tr> <td> *+ </td> <td>Zero or more (possessive).</td> </tr> + * <tr> <td> ?+ </td> <td>Zero or one (possessive).</td> </tr> + * <tr> <td> ++ </td> <td>One or more (possessive).</td> </tr> + * <tr> <td> {<i>n</i>}+ </td> <td>Exactly <i>n</i> (possessive).</td> </tr> + * <tr> <td> {<i>n,</i>}+ </td> <td>At least <i>n</i> (possessive).</td> </tr> + * <tr> <td> {<i>n</i>,<i>m</i>}+ </td> <td>At least <i>n</i> but not more than <i>m</i> (possessive).</td> </tr> + * </table> + * + * <h3>Zero-width assertions</h3> + * <p><table> + * <tr> <td> ^ </td> <td>At beginning of line.</td> </tr> + * <tr> <td> $ </td> <td>At end of line.</td> </tr> + * <tr> <td> \A </td> <td>At beginning of input.</td> </tr> + * <tr> <td> \b </td> <td>At word boundary.</td> </tr> + * <tr> <td> \B </td> <td>At non-word boundary.</td> </tr> + * <tr> <td> \G </td> <td>At end of previous match.</td> </tr> + * <tr> <td> \z </td> <td>At end of input.</td> </tr> + * <tr> <td> \Z </td> <td>At end of input, or before newline at end.</td> </tr> + * </table> + * + * <h3>Look-around assertions</h3> + * <p>Look-around assertions assert that the subpattern does (positive) or doesn't (negative) match + * after (look-ahead) or before (look-behind) the current position, without including the matched + * text in the containing match. The maximum length of possible matches for look-behind patterns + * must not be unbounded. + * <p><table> + * <tr> <td> (?=<i>a</i>) </td> <td>Zero-width positive look-ahead.</td> </tr> + * <tr> <td> (?!<i>a</i>) </td> <td>Zero-width negative look-ahead.</td> </tr> + * <tr> <td> (?<=<i>a</i>) </td> <td>Zero-width positive look-behind.</td> </tr> + * <tr> <td> (?<!<i>a</i>) </td> <td>Zero-width negative look-behind.</td> </tr> + * </table> + * + * <h3>Groups</h3> + * + * <p><table> + * <tr> <td> (<i>a</i>) </td> <td>A capturing group.</td> </tr> + * <tr> <td> (?:<i>a</i>) </td> <td>A non-capturing group.</td> </tr> + * <tr> <td> (?><i>a</i>) </td> <td>An independent non-capturing group. (The first match of the subgroup is the only match tried.)</td> </tr> + * <tr> <td> \<i>n</i> </td> <td>The text already matched by capturing group <i>n</i>.</td> </tr> + * </table> + * <p>Explicit capturing groups are numbered from 1, and available via {@link Matcher#group}. + * Group 0 represents the whole match. + * + * <h3>Operators</h3> + * <p><table> + * <tr> <td> <i>ab</i> </td> <td>Expression <i>a</i> followed by expression <i>b</i>.</td> </tr> + * <tr> <td> <i>a</i>|<i>b</i> </td> <td>Either expression <i>a</i> or expression <i>b</i>.</td> </tr> + * </table> + * + * <a name="flags"><h3>Flags</h3></a> + * <p><table> + * <tr> <td> (?dimsux-dimsux:<i>a</i>) </td> <td>Evaluates the expression <i>a</i> with the given flags enabled/disabled.</td> </tr> + * <tr> <td> (?dimsux-dimsux) </td> <td>Evaluates the rest of the pattern with the given flags enabled/disabled.</td> </tr> + * </table> + * + * <p>The flags are: + * <table> + * <tr><td>{@code i}</td> <td>{@link #CASE_INSENSITIVE}</td> <td>case insensitive matching</td></tr> + * <tr><td>{@code d}</td> <td>{@link #UNIX_LINES}</td> <td>only accept {@code '\n'} as a line terminator</td></tr> + * <tr><td>{@code m}</td> <td>{@link #MULTILINE}</td> <td>allow {@code ^} and {@code $} to match beginning/end of any line</td></tr> + * <tr><td>{@code s}</td> <td>{@link #DOTALL}</td> <td>allow {@code .} to match {@code '\n'} ("s" for "single line")</td></tr> + * <tr><td>{@code u}</td> <td>{@link #UNICODE_CASE}</td> <td>enable Unicode case folding</td></tr> + * <tr><td>{@code x}</td> <td>{@link #COMMENTS}</td> <td>allow whitespace and comments</td></tr> + * </table> + * <p>Either set of flags may be empty. For example, {@code (?i-m)} would turn on case-insensitivity + * and turn off multiline mode, {@code (?i)} would just turn on case-insensitivity, + * and {@code (?-m)} would just turn off multiline mode. + * <p>Note that on Android, {@code UNICODE_CASE} is always on: case-insensitive matching will + * always be Unicode-aware. + * <p>There are two other flags not settable via this mechanism: {@link #CANON_EQ} and + * {@link #LITERAL}. Attempts to use {@link #CANON_EQ} on Android will throw an exception. + * </span> + * + * <h3>Implementation notes</h3> + * + * The regular expression implementation used in Android is provided by + * <a href="http://www.icu-project.org">ICU</a>. The notation for the regular + * expressions is mostly a superset of those used in other Java language + * implementations. This means that existing applications will normally work as + * expected, but in rare cases Android may accept a regular expression that is + * not accepted by other implementations. + * + * <p>In some cases, Android will recognize that a regular expression is a simple + * special case that can be handled more efficiently. This is true of both the convenience methods + * in {@code String} and the methods in {@code Pattern}. * * @see Matcher */ @@ -59,34 +223,28 @@ public final class Pattern implements Serializable { /** * This constant specifies that a pattern matches Unix line endings ('\n') - * only against the '.', '^', and '$' meta characters. + * only against the '.', '^', and '$' meta characters. Corresponds to {@code (?d)}. */ public static final int UNIX_LINES = 0x01; /** * This constant specifies that a {@code Pattern} is matched * case-insensitively. That is, the patterns "a+" and "A+" would both match - * the string "aAaAaA". - * <p> - * Note: For Android, the {@code CASE_INSENSITIVE} constant - * (currently) always includes the meaning of the {@link #UNICODE_CASE} - * constant. So if case insensitivity is enabled, this automatically extends - * to all Unicode characters. The {@code UNICODE_CASE} constant itself has - * no special consequences. + * the string "aAaAaA". See {@link #UNICODE_CASE}. Corresponds to {@code (?i)}. */ public static final int CASE_INSENSITIVE = 0x02; /** * This constant specifies that a {@code Pattern} may contain whitespace or * comments. Otherwise comments and whitespace are taken as literal - * characters. + * characters. Corresponds to {@code (?x)}. */ public static final int COMMENTS = 0x04; /** * This constant specifies that the meta characters '^' and '$' match only - * the beginning and end end of an input line, respectively. Normally, they - * match the beginning and the end of the complete input. + * the beginning and end of an input line, respectively. Normally, they + * match the beginning and the end of the complete input. Corresponds to {@code (?m)}. */ public static final int MULTILINE = 0x08; @@ -99,20 +257,16 @@ public final class Pattern implements Serializable { /** * This constant specifies that the '.' meta character matches arbitrary * characters, including line endings, which is normally not the case. + * Corresponds to {@code (?s)}. */ public static final int DOTALL = 0x20; /** - * This constant specifies that a {@code Pattern} is matched - * case-insensitively with regard to all Unicode characters. It is used in - * conjunction with the {@link #CASE_INSENSITIVE} constant to extend its - * meaning to all Unicode characters. - * <p> - * Note: For Android, the {@code CASE_INSENSITIVE} constant - * (currently) always includes the meaning of the {@code UNICODE_CASE} - * constant. So if case insensitivity is enabled, this automatically extends - * to all Unicode characters. The {@code UNICODE_CASE} constant then has no - * special consequences. + * This constant specifies that a {@code Pattern} that uses case-insensitive matching + * will use Unicode case folding. On Android, {@code UNICODE_CASE} is always on: + * case-insensitive matching will always be Unicode-aware. If your code is intended to + * be portable and uses case-insensitive matching on non-ASCII characters, you should + * use this flag. Corresponds to {@code (?u)}. */ public static final int UNICODE_CASE = 0x40; @@ -123,15 +277,8 @@ public final class Pattern implements Serializable { */ public static final int CANON_EQ = 0x80; - /** - * Holds the regular expression. - */ - private String pattern; - - /** - * Holds the flags used when compiling this pattern. - */ - private int flags; + private final String pattern; + private final int flags; /** * Holds a handle (a pointer, actually) for the native ICU pattern. @@ -143,26 +290,20 @@ public final class Pattern implements Serializable { */ transient int mGroupCount; - /** - * Returns a {@link Matcher} for the {@code Pattern} and a given input. The - * {@code Matcher} can be used to match the {@code Pattern} against the + * Returns a {@link Matcher} for this pattern applied to the given {@code input}. + * The {@code Matcher} can be used to match the {@code Pattern} against the * whole input, find occurrences of the {@code Pattern} in the input, or * replace parts of the input. - * - * @param input - * the input to process. - * - * @return the resulting {@code Matcher}. */ public Matcher matcher(CharSequence input) { return new Matcher(this, input); } /** - * Splits the given input sequence at occurrences of this {@code Pattern}. + * Splits the given {@code input} at occurrences of this pattern. * - * <p>If this {@code Pattern} does not occur in the input, the result is an + * <p>If this pattern does not occur in the input, the result is an * array containing the input (converted from a {@code CharSequence} to * a {@code String}). * @@ -192,19 +333,14 @@ public final class Pattern implements Serializable { } /** - * Splits a given input around occurrences of a regular expression. This is - * a convenience method that is equivalent to calling the method - * {@link #split(java.lang.CharSequence, int)} with a limit of 0. + * Equivalent to {@code split(input, 0)}. */ public String[] split(CharSequence input) { return split(input, 0); } /** - * Returns the regular expression that was compiled into this - * {@code Pattern}. - * - * @return the regular expression. + * Returns the regular expression supplied to {@code compile}. */ public String pattern() { return pattern; @@ -216,44 +352,17 @@ public final class Pattern implements Serializable { } /** - * Returns the flags that have been set for this {@code Pattern}. - * - * @return the flags that have been set. A combination of the constants - * defined in this class. - * - * @see #CANON_EQ - * @see #CASE_INSENSITIVE - * @see #COMMENTS - * @see #DOTALL - * @see #LITERAL - * @see #MULTILINE - * @see #UNICODE_CASE - * @see #UNIX_LINES + * Returns the flags supplied to {@code compile}. */ public int flags() { return flags; } /** - * Compiles a regular expression, creating a new {@code Pattern} instance in - * the process. Allows to set some flags that modify the behavior of the - * {@code Pattern}. - * - * @param pattern - * the regular expression. - * @param flags - * the flags to set. Basically, any combination of the constants - * defined in this class is valid. - * <p> - * Note: Currently, the {@link #CASE_INSENSITIVE} and - * {@link #UNICODE_CASE} constants have slightly special behavior - * in Android, and the {@link #CANON_EQ} constant is not - * supported at all. + * Returns a compiled form of the given {@code regularExpression}, as modified by the + * given {@code flags}. See the <a href="#flags">flags overview</a> for more on flags. * - * @return the new {@code Pattern} instance. - * - * @throws PatternSyntaxException - * if the regular expression is syntactically incorrect. + * @throws PatternSyntaxException if the regular expression is syntactically incorrect. * * @see #CANON_EQ * @see #CASE_INSENSITIVE @@ -264,60 +373,26 @@ public final class Pattern implements Serializable { * @see #UNICODE_CASE * @see #UNIX_LINES */ - public static Pattern compile(String pattern, int flags) throws PatternSyntaxException { - return new Pattern(pattern, flags); + public static Pattern compile(String regularExpression, int flags) throws PatternSyntaxException { + return new Pattern(regularExpression, flags); } /** - * Creates a new {@code Pattern} instance from a given regular expression - * and flags. - * - * @param pattern - * the regular expression. - * @param flags - * the flags to set. Any combination of the constants defined in - * this class is valid. - * - * @throws PatternSyntaxException - * if the regular expression is syntactically incorrect. + * Equivalent to {@code Pattern.compile(pattern, 0)}. */ + public static Pattern compile(String pattern) { + return new Pattern(pattern, 0); + } + private Pattern(String pattern, int flags) throws PatternSyntaxException { if ((flags & CANON_EQ) != 0) { throw new UnsupportedOperationException("CANON_EQ flag not supported"); } - this.pattern = pattern; this.flags = flags; - compileImpl(pattern, flags); } - /** - * Compiles a regular expression, creating a new Pattern instance in the - * process. This is actually a convenience method that calls {@link - * #compile(String, int)} with a {@code flags} value of zero. - * - * @param pattern - * the regular expression. - * - * @return the new {@code Pattern} instance. - * - * @throws PatternSyntaxException - * if the regular expression is syntactically incorrect. - */ - public static Pattern compile(String pattern) { - return new Pattern(pattern, 0); - } - - /** - * Compiles the given regular expression using the given flags. Used - * internally only. - * - * @param pattern - * the regular expression. - * @param flags - * the flags. - */ private void compileImpl(String pattern, int flags) throws PatternSyntaxException { if (pattern == null) { throw new NullPointerException(); @@ -336,47 +411,35 @@ public final class Pattern implements Serializable { } /** - * Tries to match a given regular expression against a given input. This is - * actually nothing but a convenience method that compiles the regular - * expression into a {@code Pattern}, builds a {@link Matcher} for it, and - * then does the match. If the same regular expression is used for multiple - * operations, it is recommended to compile it into a {@code Pattern} - * explicitly and request a reusable {@code Matcher}. - * - * @param regex - * the regular expression. - * @param input - * the input to process. - * - * @return true if and only if the {@code Pattern} matches the input. + * Tests whether the given {@code regularExpression} matches the given {@code input}. + * Equivalent to {@code Pattern.compile(regularExpression).matcher(input).matches()}. + * If the same regular expression is to be used for multiple operations, it may be more + * efficient to reuse a compiled {@code Pattern}. * * @see Pattern#compile(java.lang.String, int) * @see Matcher#matches() */ - public static boolean matches(String regex, CharSequence input) { - return new Matcher(new Pattern(regex, 0), input).matches(); + public static boolean matches(String regularExpression, CharSequence input) { + return new Matcher(new Pattern(regularExpression, 0), input).matches(); } /** - * Quotes a given string using "\Q" and "\E", so that all other - * meta-characters lose their special meaning. If the string is used for a - * {@code Pattern} afterwards, it can only be matched literally. - * - * @param s - * the string to quote. - * - * @return the quoted string. + * Quotes the given {@code string} using "\Q" and "\E", so that all + * meta-characters lose their special meaning. This method correctly + * escapes embedded instances of "\Q" or "\E". If the entire result + * is to be passed verbatim to {@link #compile}, it's usually clearer + * to use the {@link #LITERAL} flag instead. */ - public static String quote(String s) { - StringBuilder sb = new StringBuilder().append("\\Q"); //$NON-NLS-1$ + public static String quote(String string) { + StringBuilder sb = new StringBuilder(); + sb.append("\\Q"); int apos = 0; int k; - while ((k = s.indexOf("\\E", apos)) >= 0) { //$NON-NLS-1$ - sb.append(s.substring(apos, k + 2)).append("\\\\E\\Q"); //$NON-NLS-1$ + while ((k = string.indexOf("\\E", apos)) >= 0) { + sb.append(string.substring(apos, k + 2)).append("\\\\E\\Q"); apos = k + 2; } - - return sb.append(s.substring(apos)).append("\\E").toString(); //$NON-NLS-1$ + return sb.append(string.substring(apos)).append("\\E").toString(); } @Override @@ -385,20 +448,13 @@ public final class Pattern implements Serializable { if (mNativePattern != 0) { NativeRegEx.close(mNativePattern); } - } - finally { + } finally { super.finalize(); } } - /** - * Serialization support - */ - private void readObject(java.io.ObjectInputStream s) - throws java.io.IOException, ClassNotFoundException { + private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException { s.defaultReadObject(); - compileImpl(pattern, flags); } - } diff --git a/luni/src/main/java/java/util/regex/package.html b/luni/src/main/java/java/util/regex/package.html deleted file mode 100644 index 3ce73eb..0000000 --- a/luni/src/main/java/java/util/regex/package.html +++ /dev/null @@ -1,881 +0,0 @@ -<html> - <head> - <!-- - /* - * Copyright (C) 2007 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - --> - </head> - <body> - Provides an implementation of regular expressions, which is useful for - matching, searching, and replacing strings based on patterns. The two - fundamental classes are {@link java.util.regex.Pattern} and - {@link java.util.regex.Matcher}. The former - takes a pattern described by means of a regular expression and compiles it - into a special internal representation. The latter matches the compiled - pattern against a given input. - - <h2>Regular expressions</h2> - - A regular expression consists of literal text, meta characters, character - sets, and operators. The latter three have a special meaning when - encountered during the processing of a pattern. - - <ul> - <li> - <a href="#metachars">Meta characters</a> are a special means to describe - single characters in the input text. A common example for a meta - character is the dot '.', which, when used in a regular expression, - matches any character. - </li> - <li> - <a href="#charsets">Character sets</a> are a convenient means to - describe different characters that match a single character in the - input. Character sets are enclosed in angular brackets '[' and ']' - and use the dash '-' for forming ranges. A typical example is - "[0-9a-fA-F]", which describes the set of all hexadecimal digits. - </li> - <li> - <a href="#operators">Operators</a> modify or combine whole regular - expressions, with the result being a regular expression again. An - example for an operator is the asterisk '*', which, together with the - regular expression preceding it, matches zero or more repetitions of - that regular expression. The plus sign '+' is similar, but requires at - least one occurrence. - </li> - </ul> - - Meta characters, the '[' and ']' that form a character set, and operators - normally lose their special meaning when preceded by a backslash '\'. To get - a backslash by itself, use a double backslash. Note that when using regular - expressions in Java source code, some care has to be taken to get the - backslashes right (due to yet another level of escaping being necessary for - Java). - - <p> - - The following table gives some basic examples of regular expressions and - input strings that match them: - - <p> - - <table> - <tr> - <th> - Regular expression - </th> - <th> - Matched string(s) - </th> - </tr> - <tr> - <td> - "Hello, World!" - </td> - <td> - "Hello, World!" - </td> - </tr> - <tr> - <td> - "Hello, World." - </td> - <td> - "Hello, World!", "Hello, World?" - </td> - </tr> - <tr> - <td> - "Hello, .*d!" - </td> - <td> - "Hello, World!", "Hello, Android!", "Hello, Dad!" - </td> - </tr> - <tr> - <td> - "[0-9]+ green bottles" - </td> - <td> - "0 green bottles", "25 green bottles", "1234 green bottles" - </td> - </tr> - </table> - - <p> - - The following section describe the various features in detail. The are also - some <a href="#impnotes">implementation notes</a> at the end. - - <p> - - <a name="metachars"></a> - <h2>Meta characters</h2> - - The following two tables lists the meta characters understood in regular - expressions. - - <p> - - <!-- ICU-copied documentation begins here --> - - <table> - <tr> - <th> - Meta character - </th> - <th> - Description - </th> - </tr> - <tr> - <td> - \a - </td> - <td> - Match a BELL, \u0007. - </td> - </tr> - <tr> - <td> - \A - </td> - <td> - Match at the beginning of the input. Differs from ^ in that - \A will not match after a new line within the input. - </td> - </tr> - <tr> - <td> - \b, outside of a <a href="#charsets">character set</a> - </td> - <td> - Match if the current position is a word boundary. Boundaries - occur at the transitions between word (\w) and non-word (\W) - characters, with combining marks ignored. - </td> - </tr> - <tr> - <td> - \b, within a <a href="#charsets">character set</a> - </td> - <td> - Match a BACKSPACE, \u0008. - </td> - </tr> - <tr> - <td> - \B - </td> - <td> - Match if the current position is not a word boundary. - </td> - </tr> - <tr> - <td> - \cX - </td> - <td> - Match a control-X character (replace X with actual character). - </td> - </tr> - <tr> - <td> - \e - </td> - <td> - Match an ESCAPE, \u001B. - </td> - </tr> - <tr> - <td> - \E - </td> - <td> - Ends quoting started by \Q. Meta characters, character classes, and - operators become active again. - </td> - </tr> - <tr> - <td> - \f - </td> - <td> - Match a FORM FEED, \u000C. - </td> - </tr> - <tr> - <td> - \G - </td> - <td> - Match if the current position is at the end of the previous - match. - </td> - </tr> - <tr> - <td> - \n - </td> - <td> - Match a LINE FEED, \u000A. - </td> - </tr> - <tr> - <td> - \N{UNICODE CHARACTER NAME} - </td> - <td> - Match the named Unicode character. - </td> - </tr> - <tr> - <td> - \Q - </td> - <td> - Quotes all following characters until \E. The following text is - treated as literal. - </td> - </tr> - <tr> - <td> - \r - </td> - <td> - Match a CARRIAGE RETURN, \u000D. - </td> - </tr> - <tr> - <td> - \t - </td> - <td> - Match a HORIZONTAL TABULATION, \u0009. - </td> - </tr> - <tr> - <td> - \uhhhh - </td> - <td> - Match the character with the hex value hhhh. - </td> - </tr> - <tr> - <td> - \Uhhhhhhhh - </td> - <td> - Match the character with the hex value hhhhhhhh. Exactly - eight hex digits must be provided, even though the largest Unicode - code point is \U0010ffff. - </td> - </tr> - <tr> - <td> - \x{hhhh} - </td> - <td> - Match the character with the hex value hhhh. From one to six hex - digits may be supplied. - </td> - </tr> - <tr> - <td> - \xhh - </td> - <td> - Match the character with the hex value hh. - </td> - </tr> - <tr> - <td> - \Z - </td> - <td> - Match if the current position is at the end of input, but - before the final line terminator, if one exists. - </td> - </tr> - <tr> - <td> - \z - </td> - <td> - Match if the current position is at the end of input. - </td> - </tr> - <tr> - <td> - \0n, \0nn, \0nnn - </td> - <td> - Match the character with the octal value n, nn, or nnn. Maximum - value is 0377. - </td> - </tr> - <tr> - <td> - \n - </td> - <td> - Back Reference. Match whatever the nth capturing group - matched. n must be a number > 1 and < total number of capture - groups in the pattern. Note: Octal escapes, such as \012, are not - supported in ICU regular expressions - </td> - </tr> - <tr> - <td> - [character set] - </td> - <td> - Match any one character from the character set. See - <a href="#charsets">character sets</a> for a full description of what - may appear between the angular brackets. - </td> - </tr> - <tr> - <td> - . - </td> - <td> - Match any character. - </td> - </tr> - <tr> - <td> - ^ - </td> - <td> - Match at the beginning of a line. - </td> - </tr> - <tr> - <td> - $ - </td> - <td> - Match at the end of a line. - </td> - </tr> - <tr> - <td> - \ - </td> - <td> - Quotes the following character, so that is loses any special - meaning it might have. - </td> - </tr> - </table> - - <!-- ICU-copied documentation begins here --> - - <p> - - <a name="charsets"></a> - <h2>Character sets</h2> - - The following table lists the syntax elements allowed inside a character - set: - - <p> - - <table> - <tr> - <th> - Element - </th> - <th> - Description - </th> - </tr> - <tr> - <td> - [a] - </td> - <td> - The character set consisting of the letter 'a' only. - </td> - </tr> - <tr> - <td> - [xyz] - </td> - <td> - The character set consisting of the letters 'x', 'y', and 'z', - described by explicit enumeration. - </td> - </tr> - <tr> - <td> - [x-z] - </td> - <td> - The character set consisting of the letters 'x', 'y', and 'z', - described by means of a range. - </td> - </tr> - <tr> - <td> - [^xyz] - </td> - <td> - The character set consisting of everything but the letters 'x', 'y', - and 'z'. - </td> - </tr> - <tr> - <td> - [[a-f][0-9]] - </td> - <td> - The character set formed by building the union of the two character - sets [a-f] and [0-9]. - </td> - </tr> - <tr> - <td> - [[a-z]&&[jkl]] - </td> - <td> - The character set formed by building the intersection of the two - character sets [a-z] and [jkl]. You can also use a single '&', but - this regular expression might not be <a href="#impnotes">portable</a>. - </td> - </tr> - <tr> - <td> - [[a-z]--[jkl]] - </td> - <td> - The character set formed by building the difference of the two - character sets [a-z] and [jkl]. You can also use a single '-'. This - operator is generally not <a href="#impnotes">portable</a>. - </td> - </tr> - </table> - - <p> - - A couple of frequently used character sets are predefined and named. - These can be referenced by their name, but behave otherwise similar to - explicit character sets. The following table lists them: - - <p> - - <table> - <tr> - <th> - Character set - </th> - <th> - Description - </th> - </tr> - <tr> - <td> - \d, \D - </td> - <td> - The set consisting of all digit characters (\d) or the opposite of - it (\D). - </td> - </tr> - <tr> - <td> - \s, \S - </td> - <td> - The set consisting of all space characters (\s) or the opposite of - it (\S). - </td> - </tr> - <tr> - <td> - \w, \W - </td> - <td> - The set consisting of all word characters (\w) or the opposite - of it (\W). - </td> - </tr> - <tr> - <td> - \X - </td> - <td> - The set of all grapheme clusters. - </td> - </tr> - <tr> - <td> - \p{NAME}, \P{NAME} - </td> - <td> - The Posix set with the specified NAME (\p{}) or the opposite - of it (\P{}) - Legal values for NAME are 'Alnum', 'Alpha', 'ASCII', - 'Blank', 'Cntrl', 'Digit', 'Graph', 'Lower', 'Print', 'Punct', - 'Upper', 'XDigit' . - </td> - </tr> - <tr> - <td> - \p{inBLOCK}, \P{inBLOCK} - </td> - <td> - The character set equivalent to the given Unicode BLOCK (\p{}) or - the opposite of it (\P{}). An example for a legal BLOCK name is - 'Hebrew', meaning, unsurprisingly, all Hebrew characters. - </td> - </tr> - <tr> - <td> - \p{CATEGORY}, \P{CATEGORY} - </td> - <td> - The character set equivalent to the Unicode CATEGORY (\p{}) or the - opposite of it (\P{}). An example for a legal CATEGORY name is 'Lu', - meaning all uppercase letters. - </td> - </tr> - <tr> - <td> - \p{javaMETHOD}, \P{javaMETHOD} - </td> - <td> - The character set equivalent to the isMETHOD() operation of the - {@link java.lang.Character} class (\p{}) or the opposite of it (\P{}). - </td> - </tr> - </table> - - <p> - - <a name="operators"></a> - <h2>Operators</h2> - - The following table lists the operators understood inside regular - expressions: - - <p> - - <!-- ICU-copied documentation begins here --> - - <table> - <tr> - <th> - Operator - </th> - <th> - Description - </th> - </tr> - <tr> - <td> - | - </td> - <td> - Alternation. A|B matches either A or B. - </td> - </tr> - <tr> - <td> - * - </td> - <td> - Match 0 or more times. Match as many times as possible. - </td> - </tr> - <tr> - <td> - + - </td> - <td> - Match 1 or more times. Match as many times as possible. - </td> - </tr> - <tr> - <td> - ? - </td> - <td> - Match zero or one times. Prefer one. - </td> - </tr> - <tr> - <td> - {n} - </td> - <td> - Match exactly n times. - </td> - </tr> - <tr> - <td> - {n,} - </td> - <td> - Match at least n times. Match as many times as possible. - </td> - </tr> - <tr> - <td> - {n,m} - </td> - <td> - Match between n and m times. Match as many times as possible, - but not more than m. - </td> - </tr> - <tr> - <td> - *? - </td> - <td> - Match 0 or more times. Match as few times as possible. - </td> - </tr> - <tr> - <td> - +? - </td> - <td> - Match 1 or more times. Match as few times as possible. - </td> - </tr> - <tr> - <td> - ?? - </td> - <td> - Match zero or one times. Prefer zero. - </td> - </tr> - <tr> - <td> - {n}? - </td> - <td> - Match exactly n times. - </td> - </tr> - <tr> - <td> - {n,}? - </td> - <td> - Match at least n times, but no more than required for an - overall pattern match. - </td> - </tr> - <tr> - <td> - {n,m}? - </td> - <td> - Match between n and m times. Match as few times as possible, - but not less than n. - </td> - </tr> - <tr> - <td> - *+ - </td> - <td> - Match 0 or more times. Match as many times as possible when - first encountered, do not retry with fewer even if overall match - fails (Possessive Match) - </td> - </tr> - <tr> - <td> - ++ - </td> - <td> - Match 1 or more times. Possessive match. - </td> - </tr> - <tr> - <td> - ?+ - </td> - <td> - Match zero or one times. Possessive match. - </td> - </tr> - <tr> - <td> - {n}+ - </td> - <td> - Match exactly n times. - </td> - </tr> - <tr> - <td> - {n,}+ - </td> - <td> - Match at least n times. Possessive Match. - </td> - </tr> - <tr> - <td> - {n,m}+ - </td> - <td> - Match between n and m times. Possessive Match. - </td> - </tr> - <tr> - <td> - ( ... ) - </td> - <td> - Capturing parentheses. Range of input that matched the - parenthesized subexpression is available after the match. - </td> - </tr> - <tr> - <td> - (?: ... ) - </td> - <td> - Non-capturing parentheses. Groups the included pattern, but - does not provide capturing of matching text. Somewhat more efficient - than capturing parentheses. - </td> - </tr> - <tr> - <td> - (?> ... ) - </td> - <td> - Atomic-match parentheses. First match of the parenthesized - subexpression is the only one tried; if it does not lead to an - overall pattern match, back up the search for a match to a position - before the "(?>" - </td> - </tr> - <tr> - <td> - (?# ... ) - </td> - <td> - Free-format comment (?# comment ). - </td> - </tr> - <tr> - <td> - (?= ... ) - </td> - <td> - Look-ahead assertion. True if the parenthesized pattern - matches at the current input position, but does not advance the - input position. - </td> - </tr> - <tr> - <td> - (?! ... ) - </td> - <td> - Negative look-ahead assertion. True if the parenthesized - pattern does not match at the current input position. Does not - advance the input position. - </td> - </tr> - <tr> - <td> - (?<= ... ) - </td> - <td> - Look-behind assertion. True if the parenthesized pattern - matches text preceding the current input position, with the last - character of the match being the input character just before the - current position. Does not alter the input position. The length of - possible strings matched by the look-behind pattern must not be - unbounded (no * or + operators.) - </td> - </tr> - <tr> - <td> - (?<! ... ) - </td> - <td> - Negative Look-behind assertion. True if the parenthesized - pattern does not match text preceding the current input position, - with the last character of the match being the input character just - before the current position. Does not alter the input position. The - length of possible strings matched by the look-behind pattern must - not be unbounded (no * or + operators.) - </td> - </tr> - <tr> - <td> - (?ismwx-ismwx: ... ) - </td> - <td> - Flag settings. Evaluate the parenthesized expression with the - specified flags enabled or -disabled. - </td> - </tr> - <tr> - <td> - (?ismwx-ismwx) - </td> - <td> - Flag settings. Change the flag settings. Changes apply to the - portion of the pattern following the setting. For example, (?i) - changes to a case insensitive match. - </td> - </tr> - </table> - - <!-- ICU-copied documentation ends here --> - - <p> - - <a name="impnotes"></a> - <h2>Implementation notes</h2> - - The regular expression implementation used in Android is provided by - <a href="http://www.icu-project.org">ICU</a>. The notation for the regular - expressions is mostly a superset of those used in other Java language - implementations. This means that existing applications will normally work as - expected, but in rare cases some regular expression content that is meant to - be literal might be interpreted with a special meaning. The most notable - examples are the single '&', which can also be used as the intersection - operator for <a href="#charsets">character sets</a>, and the intersection - operators '-' and '--'. Also, some of the flags are handled in a - slightly different way: - - <ul> - <li> - The {@link java.util.regex.Pattern#CASE_INSENSITIVE} flag silently - assumes Unicode case-insensitivity. That is, the - {@link java.util.regex.Pattern#UNICODE_CASE} flag is effectively a - no-op. - </li> - <li> - The {@link java.util.regex.Pattern#CANON_EQ} flag is not supported at - all (throws an exception). - </li> - </ul> - </body> -</html> diff --git a/luni/src/main/java/java/util/zip/package.html b/luni/src/main/java/java/util/zip/package.html deleted file mode 100644 index 8b4f91b..0000000 --- a/luni/src/main/java/java/util/zip/package.html +++ /dev/null @@ -1,8 +0,0 @@ -<html> - <body> - <p> - This package contains classes for compressing and decompressing data in - ZIP and GZIP file formats. - </p> - </body> -</html> diff --git a/luni/src/main/java/javax/sql/package.html b/luni/src/main/java/javax/sql/package.html deleted file mode 100644 index 7fdf1ce..0000000 --- a/luni/src/main/java/javax/sql/package.html +++ /dev/null @@ -1,8 +0,0 @@ -<html> - <body> - <p> - Provides extensions to the standard interface for accessing SQL-based - databases. - <p> - </body> -</html> |