summaryrefslogtreecommitdiffstats
path: root/core/java/android/util/Patterns.java
diff options
context:
space:
mode:
Diffstat (limited to 'core/java/android/util/Patterns.java')
-rw-r--r--core/java/android/util/Patterns.java40
1 files changed, 32 insertions, 8 deletions
diff --git a/core/java/android/util/Patterns.java b/core/java/android/util/Patterns.java
index 2cc91b9..6fc8ae5 100644
--- a/core/java/android/util/Patterns.java
+++ b/core/java/android/util/Patterns.java
@@ -125,15 +125,35 @@ public class Patterns {
+ "|[1-9][0-9]|[0-9]))");
/**
+ * Match the characters without containing chinese characters
+ * @hide
+ */
+ private static final String GOOD_IRI_HOST_CHAR =
+ "a-zA-Z0-9\u00A0-\u2FFF\u3040-\u4DFF\u9FA6-\uD7FF"
+ + "\uF900-\uFDCF\uFDF0-\uFEFF";
+
+ /**
* RFC 1035 Section 2.3.4 limits the labels to a maximum 63 octets.
*/
- private static final String IRI
- = "[" + GOOD_IRI_CHAR + "]([" + GOOD_IRI_CHAR + "\\-]{0,61}[" + GOOD_IRI_CHAR + "]){0,1}";
+ private static final String IRI =
+ "[" + GOOD_IRI_HOST_CHAR + "]([" + GOOD_IRI_HOST_CHAR + "\\-]{0,61}["
+ + GOOD_IRI_HOST_CHAR + "]){0,1}";
private static final String GOOD_GTLD_CHAR =
- "a-zA-Z\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF";
+ "a-zA-Z\u00A0-\u2FFF\u3040-\u4DFF\u9FA6-\uD7FF"
+ + "\uF900-\uFDCF\uFDF0-\uFEFF";
private static final String GTLD = "[" + GOOD_GTLD_CHAR + "]{2,63}";
private static final String HOST_NAME = "(" + IRI + "\\.)+" + GTLD;
+ // Halfwidth and fullwidth forms
+ private static final String HALF_FULL_WIDTH_CHAR = "\uFF00-\uFFEF";
+ // Symbols and punctuation
+ private static final String SYMBOLS_PUNCTUATION_CHAR = "\u3000-\u303F";
+ // Chinese characters
+ private static final String CHINESE_CHAR = "\u4E00-\u9FA5";
+ // Forbidden characters, should remove from URL,
+ private static final String FORBIDDEN_CHAR =
+ "[" + SYMBOLS_PUNCTUATION_CHAR + CHINESE_CHAR
+ + HALF_FULL_WIDTH_CHAR + "]";
public static final Pattern DOMAIN_NAME
= Pattern.compile("(" + HOST_NAME + "|" + IP_ADDRESS + ")");
@@ -149,11 +169,15 @@ public class Patterns {
+ "\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,25})?\\@)?)?"
+ "(?:" + DOMAIN_NAME + ")"
+ "(?:\\:\\d{1,5})?)" // plus option port number
- + "(\\/(?:(?:[" + GOOD_IRI_CHAR + "\\;\\/\\?\\:\\@\\&\\=\\#\\~" // plus option query params
- + "\\-\\.\\+\\!\\*\\'\\(\\)\\,\\_])|(?:\\%[a-fA-F0-9]{2}))*)?"
- + "(?:\\b|$)"); // and finally, a word boundary or end of
- // input. This is to stop foo.sure from
- // matching as foo.su
+ + "(\\/(?:(?:[" + GOOD_IRI_HOST_CHAR
+ + "\\;\\/\\?\\:\\@\\&\\=\\#\\~" // plus option query params
+ + "\\-\\.\\+\\!\\*\\'\\(\\)\\_])|(?:\\,[" + GOOD_IRI_HOST_CHAR
+ + "])|(?:\\%[a-fA-F0-9]{2}))*)?"
+ + "(?:(?=" + FORBIDDEN_CHAR
+ + ")|\\b|$)");
+ // and finally, a word boundary or end of input. This is to stop
+ // foo.sure from matching as foo.su
+ // also should remove forbidden characters from end of URL.
public static final Pattern EMAIL_ADDRESS
= Pattern.compile(