summaryrefslogtreecommitdiffstats
path: root/core/java/android/util
diff options
context:
space:
mode:
authorSelim Gurun <sgurun@google.com>2014-02-13 00:01:32 +0100
committerSelim Gurun <sgurun@google.com>2014-03-21 15:29:01 -0700
commitd3ff8bdf92c23d596f3e4701455223f9e3ac8873 (patch)
tree34d6c92d2d2da00d396502b4a0f866d63e3909f9 /core/java/android/util
parent72191f3dc186e7dbe36096fdadad6845c735f8a8 (diff)
downloadframeworks_base-d3ff8bdf92c23d596f3e4701455223f9e3ac8873.zip
frameworks_base-d3ff8bdf92c23d596f3e4701455223f9e3ac8873.tar.gz
frameworks_base-d3ff8bdf92c23d596f3e4701455223f9e3ac8873.tar.bz2
Support for new gTLDs
Bug: 13006774 Add support for new generic Top-Level Domains (gTLDs) from ICANN and deprecate the public APIs that depend on them. Due to the fast profileration of gTLDs, it has become hard to keep this API up-to-date. Further it already fails to recognize internal domains. We thus replace them with simple "any letter sequence" regexps. Change-Id: Ib6cb5360bbb4a02b14bb34acb30d1cc8ddec771b
Diffstat (limited to 'core/java/android/util')
-rw-r--r--core/java/android/util/Patterns.java147
1 files changed, 78 insertions, 69 deletions
diff --git a/core/java/android/util/Patterns.java b/core/java/android/util/Patterns.java
index 0f8da44..7f1d3f9 100644
--- a/core/java/android/util/Patterns.java
+++ b/core/java/android/util/Patterns.java
@@ -25,78 +25,88 @@ import java.util.regex.Pattern;
public class Patterns {
/**
* Regular expression to match all IANA top-level domains.
- * List accurate as of 2011/07/18. List taken from:
+ * List accurate as of 2014/02/13. List taken from:
* http://data.iana.org/TLD/tlds-alpha-by-domain.txt
* This pattern is auto-generated by frameworks/ex/common/tools/make-iana-tld-pattern.py
+ *
+ * @deprecated Due to the recent profileration of gTLDs, this API is
+ * expected to become out-of-date very quickly. Therefore it is now
+ * deprecated.
*/
+ @Deprecated
public static final String TOP_LEVEL_DOMAIN_STR =
- "((aero|arpa|asia|a[cdefgilmnoqrstuwxz])"
- + "|(biz|b[abdefghijmnorstvwyz])"
- + "|(cat|com|coop|c[acdfghiklmnoruvxyz])"
- + "|d[ejkmoz]"
- + "|(edu|e[cegrstu])"
- + "|f[ijkmor]"
- + "|(gov|g[abdefghilmnpqrstuwy])"
- + "|h[kmnrtu]"
- + "|(info|int|i[delmnoqrst])"
+ "((academy|aero|agency|arpa|asia|a[cdefgilmnoqrstuwxz])"
+ + "|(bargains|berlin|bike|biz|blue|boutique|build|builders|buzz|b[abdefghijmnorstvwyz])"
+ + "|(cab|camera|camp|cards|careers|cat|catering|center|ceo|cheap|cleaning|clothing|club|codes|coffee|com|community|company|computer|condos|construction|contractors|cool|coop|cruises|c[acdfghiklmnoruvwxyz])"
+ + "|(dance|dating|democrat|diamonds|directory|domains|d[ejkmoz])"
+ + "|(edu|education|email|enterprises|equipment|estate|events|expert|exposed|e[cegrstu])"
+ + "|(farm|flights|florist|foundation|futbol|f[ijkmor])"
+ + "|(gallery|gift|glass|gov|graphics|guitars|guru|g[abdefghilmnpqrstuwy])"
+ + "|(holdings|holiday|house|h[kmnrtu])"
+ + "|(immobilien|info|institute|int|international|i[delmnoqrst])"
+ "|(jobs|j[emop])"
- + "|k[eghimnprwyz]"
- + "|l[abcikrstuvy]"
- + "|(mil|mobi|museum|m[acdeghklmnopqrstuvwxyz])"
- + "|(name|net|n[acefgilopruz])"
- + "|(org|om)"
- + "|(pro|p[aefghklmnrstwy])"
+ + "|(kaufen|kim|kitchen|kiwi|k[eghimnprwyz])"
+ + "|(land|lighting|limo|link|luxury|l[abcikrstuvy])"
+ + "|(maison|management|marketing|menu|mil|mobi|moda|monash|museum|m[acdeghklmnopqrstuvwxyz])"
+ + "|(nagoya|name|net|ninja|n[acefgilopruz])"
+ + "|(onl|org|om)"
+ + "|(partners|parts|photo|photography|photos|pics|pink|plumbing|post|pro|productions|properties|p[aefghklmnrstwy])"
+ "|qa"
- + "|r[eosuw]"
- + "|s[abcdeghijklmnortuvyz]"
- + "|(tel|travel|t[cdfghjklmnoprtvwz])"
- + "|u[agksyz]"
- + "|v[aceginu]"
- + "|w[fs]"
- + "|(\u03b4\u03bf\u03ba\u03b9\u03bc\u03ae|\u0438\u0441\u043f\u044b\u0442\u0430\u043d\u0438\u0435|\u0440\u0444|\u0441\u0440\u0431|\u05d8\u05e2\u05e1\u05d8|\u0622\u0632\u0645\u0627\u06cc\u0634\u06cc|\u0625\u062e\u062a\u0628\u0627\u0631|\u0627\u0644\u0627\u0631\u062f\u0646|\u0627\u0644\u062c\u0632\u0627\u0626\u0631|\u0627\u0644\u0633\u0639\u0648\u062f\u064a\u0629|\u0627\u0644\u0645\u063a\u0631\u0628|\u0627\u0645\u0627\u0631\u0627\u062a|\u0628\u06be\u0627\u0631\u062a|\u062a\u0648\u0646\u0633|\u0633\u0648\u0631\u064a\u0629|\u0641\u0644\u0633\u0637\u064a\u0646|\u0642\u0637\u0631|\u0645\u0635\u0631|\u092a\u0930\u0940\u0915\u094d\u0937\u093e|\u092d\u093e\u0930\u0924|\u09ad\u09be\u09b0\u09a4|\u0a2d\u0a3e\u0a30\u0a24|\u0aad\u0abe\u0ab0\u0aa4|\u0b87\u0ba8\u0bcd\u0ba4\u0bbf\u0baf\u0bbe|\u0b87\u0bb2\u0b99\u0bcd\u0b95\u0bc8|\u0b9a\u0bbf\u0b99\u0bcd\u0b95\u0baa\u0bcd\u0baa\u0bc2\u0bb0\u0bcd|\u0baa\u0bb0\u0bbf\u0b9f\u0bcd\u0b9a\u0bc8|\u0c2d\u0c3e\u0c30\u0c24\u0c4d|\u0dbd\u0d82\u0d9a\u0dcf|\u0e44\u0e17\u0e22|\u30c6\u30b9\u30c8|\u4e2d\u56fd|\u4e2d\u570b|\u53f0\u6e7e|\u53f0\u7063|\u65b0\u52a0\u5761|\u6d4b\u8bd5|\u6e2c\u8a66|\u9999\u6e2f|\ud14c\uc2a4\ud2b8|\ud55c\uad6d|xn\\-\\-0zwm56d|xn\\-\\-11b5bs3a9aj6g|xn\\-\\-3e0b707e|xn\\-\\-45brj9c|xn\\-\\-80akhbyknj4f|xn\\-\\-90a3ac|xn\\-\\-9t4b11yi5a|xn\\-\\-clchc0ea0b2g2a9gcd|xn\\-\\-deba0ad|xn\\-\\-fiqs8s|xn\\-\\-fiqz9s|xn\\-\\-fpcrj9c3d|xn\\-\\-fzc2c9e2c|xn\\-\\-g6w251d|xn\\-\\-gecrj9c|xn\\-\\-h2brj9c|xn\\-\\-hgbk6aj7f53bba|xn\\-\\-hlcj6aya9esc7a|xn\\-\\-j6w193g|xn\\-\\-jxalpdlp|xn\\-\\-kgbechtv|xn\\-\\-kprw13d|xn\\-\\-kpry57d|xn\\-\\-lgbbat1ad8j|xn\\-\\-mgbaam7a8h|xn\\-\\-mgbayh7gpa|xn\\-\\-mgbbh1a71e|xn\\-\\-mgbc0a9azcg|xn\\-\\-mgberp4a5d4ar|xn\\-\\-o3cw4h|xn\\-\\-ogbpf8fl|xn\\-\\-p1ai|xn\\-\\-pgbs0dh|xn\\-\\-s9brj9c|xn\\-\\-wgbh1c|xn\\-\\-wgbl6a|xn\\-\\-xkc2al3hye2a|xn\\-\\-xkc2dl3a5ee0h|xn\\-\\-yfro4i67o|xn\\-\\-ygbi2ammx|xn\\-\\-zckzah|xxx)"
+ + "|(recipes|red|rentals|repair|report|reviews|rich|ruhr|r[eosuw])"
+ + "|(sexy|shiksha|shoes|singles|social|solar|solutions|support|systems|s[abcdeghijklmnortuvxyz])"
+ + "|(tattoo|technology|tel|tienda|tips|today|tokyo|tools|training|travel|t[cdfghjklmnoprtvwz])"
+ + "|(uno|u[agksyz])"
+ + "|(ventures|viajes|villas|vision|voting|voyage|v[aceginu])"
+ + "|(wang|watch|wed|wien|works|w[fs])"
+ + "|(\u043c\u043e\u043d|\u043e\u043d\u043b\u0430\u0439\u043d|\u0440\u0444|\u0441\u0430\u0439\u0442|\u0441\u0440\u0431|\u0443\u043a\u0440|\u049b\u0430\u0437|\u0627\u0644\u0627\u0631\u062f\u0646|\u0627\u0644\u062c\u0632\u0627\u0626\u0631|\u0627\u0644\u0633\u0639\u0648\u062f\u064a\u0629|\u0627\u0644\u0645\u063a\u0631\u0628|\u0627\u0645\u0627\u0631\u0627\u062a|\u0627\u06cc\u0631\u0627\u0646|\u0628\u06be\u0627\u0631\u062a|\u062a\u0648\u0646\u0633|\u0633\u0648\u0631\u064a\u0629|\u0634\u0628\u0643\u0629|\u0639\u0645\u0627\u0646|\u0641\u0644\u0633\u0637\u064a\u0646|\u0642\u0637\u0631|\u0645\u0635\u0631|\u0645\u0644\u064a\u0633\u064a\u0627|\u092d\u093e\u0930\u0924|\u09ad\u09be\u09b0\u09a4|\u0a2d\u0a3e\u0a30\u0a24|\u0aad\u0abe\u0ab0\u0aa4|\u0b87\u0ba8\u0bcd\u0ba4\u0bbf\u0baf\u0bbe|\u0b87\u0bb2\u0b99\u0bcd\u0b95\u0bc8|\u0b9a\u0bbf\u0b99\u0bcd\u0b95\u0baa\u0bcd\u0baa\u0bc2\u0bb0\u0bcd|\u0c2d\u0c3e\u0c30\u0c24\u0c4d|\u0dbd\u0d82\u0d9a\u0dcf|\u0e44\u0e17\u0e22|\u307f\u3093\u306a|\u4e2d\u4fe1|\u4e2d\u56fd|\u4e2d\u570b|\u4e2d\u6587\u7f51|\u516c\u53f8|\u516c\u76ca|\u53f0\u6e7e|\u53f0\u7063|\u5728\u7ebf|\u6211\u7231\u4f60|\u653f\u52a1|\u65b0\u52a0\u5761|\u6e38\u620f|\u79fb\u52a8|\u7f51\u7edc|\u96c6\u56e2|\u9999\u6e2f|\ud55c\uad6d|xn\\-\\-3bst00m|xn\\-\\-3ds443g|xn\\-\\-3e0b707e|xn\\-\\-45brj9c|xn\\-\\-55qw42g|xn\\-\\-55qx5d|xn\\-\\-6frz82g|xn\\-\\-6qq986b3xl|xn\\-\\-80ao21a|xn\\-\\-80asehdb|xn\\-\\-80aswg|xn\\-\\-90a3ac|xn\\-\\-clchc0ea0b2g2a9gcd|xn\\-\\-fiq228c5hs|xn\\-\\-fiq64b|xn\\-\\-fiqs8s|xn\\-\\-fiqz9s|xn\\-\\-fpcrj9c3d|xn\\-\\-fzc2c9e2c|xn\\-\\-gecrj9c|xn\\-\\-h2brj9c|xn\\-\\-io0a7i|xn\\-\\-j1amh|xn\\-\\-j6w193g|xn\\-\\-kprw13d|xn\\-\\-kpry57d|xn\\-\\-l1acc|xn\\-\\-lgbbat1ad8j|xn\\-\\-mgb9awbf|xn\\-\\-mgba3a4f16a|xn\\-\\-mgbaam7a8h|xn\\-\\-mgbayh7gpa|xn\\-\\-mgbbh1a71e|xn\\-\\-mgbc0a9azcg|xn\\-\\-mgberp4a5d4ar|xn\\-\\-mgbx4cd0ab|xn\\-\\-ngbc5azd|xn\\-\\-o3cw4h|xn\\-\\-ogbpf8fl|xn\\-\\-p1ai|xn\\-\\-pgbs0dh|xn\\-\\-q9jyb4c|xn\\-\\-s9brj9c|xn\\-\\-unup4y|xn\\-\\-wgbh1c|xn\\-\\-wgbl6a|xn\\-\\-xkc2al3hye2a|xn\\-\\-xkc2dl3a5ee0h|xn\\-\\-yfro4i67o|xn\\-\\-ygbi2ammx|xn\\-\\-zfr164b|xxx)"
+ "|y[et]"
- + "|z[amw])";
+ + "|(zone|z[amw]))";
/**
* Regular expression pattern to match all IANA top-level domains.
+ * @deprecated This API is deprecated. See {@link #TOP_LEVEL_DOMAIN_STR}.
*/
+ @Deprecated
public static final Pattern TOP_LEVEL_DOMAIN =
Pattern.compile(TOP_LEVEL_DOMAIN_STR);
/**
* Regular expression to match all IANA top-level domains for WEB_URL.
- * List accurate as of 2011/07/18. List taken from:
+ * List accurate as of 2014/02/13. List taken from:
* http://data.iana.org/TLD/tlds-alpha-by-domain.txt
* This pattern is auto-generated by frameworks/ex/common/tools/make-iana-tld-pattern.py
+ *
+ * @deprecated This API is deprecated. See {@link #TOP_LEVEL_DOMAIN_STR}.
*/
+ @Deprecated
public static final String TOP_LEVEL_DOMAIN_STR_FOR_WEB_URL =
"(?:"
- + "(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])"
- + "|(?:biz|b[abdefghijmnorstvwyz])"
- + "|(?:cat|com|coop|c[acdfghiklmnoruvxyz])"
- + "|d[ejkmoz]"
- + "|(?:edu|e[cegrstu])"
- + "|f[ijkmor]"
- + "|(?:gov|g[abdefghilmnpqrstuwy])"
- + "|h[kmnrtu]"
- + "|(?:info|int|i[delmnoqrst])"
+ + "(?:academy|aero|agency|arpa|asia|a[cdefgilmnoqrstuwxz])"
+ + "|(?:bargains|berlin|bike|biz|blue|boutique|build|builders|buzz|b[abdefghijmnorstvwyz])"
+ + "|(?:cab|camera|camp|cards|careers|cat|catering|center|ceo|cheap|cleaning|clothing|club|codes|coffee|com|community|company|computer|condos|construction|contractors|cool|coop|cruises|c[acdfghiklmnoruvwxyz])"
+ + "|(?:dance|dating|democrat|diamonds|directory|domains|d[ejkmoz])"
+ + "|(?:edu|education|email|enterprises|equipment|estate|events|expert|exposed|e[cegrstu])"
+ + "|(?:farm|flights|florist|foundation|futbol|f[ijkmor])"
+ + "|(?:gallery|gift|glass|gov|graphics|guitars|guru|g[abdefghilmnpqrstuwy])"
+ + "|(?:holdings|holiday|house|h[kmnrtu])"
+ + "|(?:immobilien|info|institute|int|international|i[delmnoqrst])"
+ "|(?:jobs|j[emop])"
- + "|k[eghimnprwyz]"
- + "|l[abcikrstuvy]"
- + "|(?:mil|mobi|museum|m[acdeghklmnopqrstuvwxyz])"
- + "|(?:name|net|n[acefgilopruz])"
- + "|(?:org|om)"
- + "|(?:pro|p[aefghklmnrstwy])"
+ + "|(?:kaufen|kim|kitchen|kiwi|k[eghimnprwyz])"
+ + "|(?:land|lighting|limo|link|luxury|l[abcikrstuvy])"
+ + "|(?:maison|management|marketing|menu|mil|mobi|moda|monash|museum|m[acdeghklmnopqrstuvwxyz])"
+ + "|(?:nagoya|name|net|ninja|n[acefgilopruz])"
+ + "|(?:onl|org|om)"
+ + "|(?:partners|parts|photo|photography|photos|pics|pink|plumbing|post|pro|productions|properties|p[aefghklmnrstwy])"
+ "|qa"
- + "|r[eosuw]"
- + "|s[abcdeghijklmnortuvyz]"
- + "|(?:tel|travel|t[cdfghjklmnoprtvwz])"
- + "|u[agksyz]"
- + "|v[aceginu]"
- + "|w[fs]"
- + "|(?:\u03b4\u03bf\u03ba\u03b9\u03bc\u03ae|\u0438\u0441\u043f\u044b\u0442\u0430\u043d\u0438\u0435|\u0440\u0444|\u0441\u0440\u0431|\u05d8\u05e2\u05e1\u05d8|\u0622\u0632\u0645\u0627\u06cc\u0634\u06cc|\u0625\u062e\u062a\u0628\u0627\u0631|\u0627\u0644\u0627\u0631\u062f\u0646|\u0627\u0644\u062c\u0632\u0627\u0626\u0631|\u0627\u0644\u0633\u0639\u0648\u062f\u064a\u0629|\u0627\u0644\u0645\u063a\u0631\u0628|\u0627\u0645\u0627\u0631\u0627\u062a|\u0628\u06be\u0627\u0631\u062a|\u062a\u0648\u0646\u0633|\u0633\u0648\u0631\u064a\u0629|\u0641\u0644\u0633\u0637\u064a\u0646|\u0642\u0637\u0631|\u0645\u0635\u0631|\u092a\u0930\u0940\u0915\u094d\u0937\u093e|\u092d\u093e\u0930\u0924|\u09ad\u09be\u09b0\u09a4|\u0a2d\u0a3e\u0a30\u0a24|\u0aad\u0abe\u0ab0\u0aa4|\u0b87\u0ba8\u0bcd\u0ba4\u0bbf\u0baf\u0bbe|\u0b87\u0bb2\u0b99\u0bcd\u0b95\u0bc8|\u0b9a\u0bbf\u0b99\u0bcd\u0b95\u0baa\u0bcd\u0baa\u0bc2\u0bb0\u0bcd|\u0baa\u0bb0\u0bbf\u0b9f\u0bcd\u0b9a\u0bc8|\u0c2d\u0c3e\u0c30\u0c24\u0c4d|\u0dbd\u0d82\u0d9a\u0dcf|\u0e44\u0e17\u0e22|\u30c6\u30b9\u30c8|\u4e2d\u56fd|\u4e2d\u570b|\u53f0\u6e7e|\u53f0\u7063|\u65b0\u52a0\u5761|\u6d4b\u8bd5|\u6e2c\u8a66|\u9999\u6e2f|\ud14c\uc2a4\ud2b8|\ud55c\uad6d|xn\\-\\-0zwm56d|xn\\-\\-11b5bs3a9aj6g|xn\\-\\-3e0b707e|xn\\-\\-45brj9c|xn\\-\\-80akhbyknj4f|xn\\-\\-90a3ac|xn\\-\\-9t4b11yi5a|xn\\-\\-clchc0ea0b2g2a9gcd|xn\\-\\-deba0ad|xn\\-\\-fiqs8s|xn\\-\\-fiqz9s|xn\\-\\-fpcrj9c3d|xn\\-\\-fzc2c9e2c|xn\\-\\-g6w251d|xn\\-\\-gecrj9c|xn\\-\\-h2brj9c|xn\\-\\-hgbk6aj7f53bba|xn\\-\\-hlcj6aya9esc7a|xn\\-\\-j6w193g|xn\\-\\-jxalpdlp|xn\\-\\-kgbechtv|xn\\-\\-kprw13d|xn\\-\\-kpry57d|xn\\-\\-lgbbat1ad8j|xn\\-\\-mgbaam7a8h|xn\\-\\-mgbayh7gpa|xn\\-\\-mgbbh1a71e|xn\\-\\-mgbc0a9azcg|xn\\-\\-mgberp4a5d4ar|xn\\-\\-o3cw4h|xn\\-\\-ogbpf8fl|xn\\-\\-p1ai|xn\\-\\-pgbs0dh|xn\\-\\-s9brj9c|xn\\-\\-wgbh1c|xn\\-\\-wgbl6a|xn\\-\\-xkc2al3hye2a|xn\\-\\-xkc2dl3a5ee0h|xn\\-\\-yfro4i67o|xn\\-\\-ygbi2ammx|xn\\-\\-zckzah|xxx)"
+ + "|(?:recipes|red|rentals|repair|report|reviews|rich|ruhr|r[eosuw])"
+ + "|(?:sexy|shiksha|shoes|singles|social|solar|solutions|support|systems|s[abcdeghijklmnortuvxyz])"
+ + "|(?:tattoo|technology|tel|tienda|tips|today|tokyo|tools|training|travel|t[cdfghjklmnoprtvwz])"
+ + "|(?:uno|u[agksyz])"
+ + "|(?:ventures|viajes|villas|vision|voting|voyage|v[aceginu])"
+ + "|(?:wang|watch|wed|wien|works|w[fs])"
+ + "|(?:\u043c\u043e\u043d|\u043e\u043d\u043b\u0430\u0439\u043d|\u0440\u0444|\u0441\u0430\u0439\u0442|\u0441\u0440\u0431|\u0443\u043a\u0440|\u049b\u0430\u0437|\u0627\u0644\u0627\u0631\u062f\u0646|\u0627\u0644\u062c\u0632\u0627\u0626\u0631|\u0627\u0644\u0633\u0639\u0648\u062f\u064a\u0629|\u0627\u0644\u0645\u063a\u0631\u0628|\u0627\u0645\u0627\u0631\u0627\u062a|\u0627\u06cc\u0631\u0627\u0646|\u0628\u06be\u0627\u0631\u062a|\u062a\u0648\u0646\u0633|\u0633\u0648\u0631\u064a\u0629|\u0634\u0628\u0643\u0629|\u0639\u0645\u0627\u0646|\u0641\u0644\u0633\u0637\u064a\u0646|\u0642\u0637\u0631|\u0645\u0635\u0631|\u0645\u0644\u064a\u0633\u064a\u0627|\u092d\u093e\u0930\u0924|\u09ad\u09be\u09b0\u09a4|\u0a2d\u0a3e\u0a30\u0a24|\u0aad\u0abe\u0ab0\u0aa4|\u0b87\u0ba8\u0bcd\u0ba4\u0bbf\u0baf\u0bbe|\u0b87\u0bb2\u0b99\u0bcd\u0b95\u0bc8|\u0b9a\u0bbf\u0b99\u0bcd\u0b95\u0baa\u0bcd\u0baa\u0bc2\u0bb0\u0bcd|\u0c2d\u0c3e\u0c30\u0c24\u0c4d|\u0dbd\u0d82\u0d9a\u0dcf|\u0e44\u0e17\u0e22|\u307f\u3093\u306a|\u4e2d\u4fe1|\u4e2d\u56fd|\u4e2d\u570b|\u4e2d\u6587\u7f51|\u516c\u53f8|\u516c\u76ca|\u53f0\u6e7e|\u53f0\u7063|\u5728\u7ebf|\u6211\u7231\u4f60|\u653f\u52a1|\u65b0\u52a0\u5761|\u6e38\u620f|\u79fb\u52a8|\u7f51\u7edc|\u96c6\u56e2|\u9999\u6e2f|\ud55c\uad6d|xn\\-\\-3bst00m|xn\\-\\-3ds443g|xn\\-\\-3e0b707e|xn\\-\\-45brj9c|xn\\-\\-55qw42g|xn\\-\\-55qx5d|xn\\-\\-6frz82g|xn\\-\\-6qq986b3xl|xn\\-\\-80ao21a|xn\\-\\-80asehdb|xn\\-\\-80aswg|xn\\-\\-90a3ac|xn\\-\\-clchc0ea0b2g2a9gcd|xn\\-\\-fiq228c5hs|xn\\-\\-fiq64b|xn\\-\\-fiqs8s|xn\\-\\-fiqz9s|xn\\-\\-fpcrj9c3d|xn\\-\\-fzc2c9e2c|xn\\-\\-gecrj9c|xn\\-\\-h2brj9c|xn\\-\\-io0a7i|xn\\-\\-j1amh|xn\\-\\-j6w193g|xn\\-\\-kprw13d|xn\\-\\-kpry57d|xn\\-\\-l1acc|xn\\-\\-lgbbat1ad8j|xn\\-\\-mgb9awbf|xn\\-\\-mgba3a4f16a|xn\\-\\-mgbaam7a8h|xn\\-\\-mgbayh7gpa|xn\\-\\-mgbbh1a71e|xn\\-\\-mgbc0a9azcg|xn\\-\\-mgberp4a5d4ar|xn\\-\\-mgbx4cd0ab|xn\\-\\-ngbc5azd|xn\\-\\-o3cw4h|xn\\-\\-ogbpf8fl|xn\\-\\-p1ai|xn\\-\\-pgbs0dh|xn\\-\\-q9jyb4c|xn\\-\\-s9brj9c|xn\\-\\-unup4y|xn\\-\\-wgbh1c|xn\\-\\-wgbl6a|xn\\-\\-xkc2al3hye2a|xn\\-\\-xkc2dl3a5ee0h|xn\\-\\-yfro4i67o|xn\\-\\-ygbi2ammx|xn\\-\\-zfr164b|xxx)"
+ "|y[et]"
- + "|z[amw]))";
+ + "|(?:zone|z[amw])))";
/**
* Good characters for Internationalized Resource Identifiers (IRI).
@@ -107,6 +117,24 @@ public class Patterns {
public static final String GOOD_IRI_CHAR =
"a-zA-Z0-9\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF";
+ public static final Pattern IP_ADDRESS
+ = Pattern.compile(
+ "((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(25[0-5]|2[0-4]"
+ + "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1]"
+ + "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
+ + "|[1-9][0-9]|[0-9]))");
+
+ /**
+ * RFC 1035 Section 2.3.4 limits the labels to a maximum 63 octets.
+ */
+ private static final String IRI
+ = "[" + GOOD_IRI_CHAR + "]([" + GOOD_IRI_CHAR + "\\-]{0,61}[" + GOOD_IRI_CHAR + "]){0,1}";
+
+ private static final String HOST_NAME = IRI + "(?:\\." + IRI + ")+";
+
+ public static final Pattern DOMAIN_NAME
+ = Pattern.compile("(" + HOST_NAME + "|" + IP_ADDRESS + ")");
+
/**
* Regular expression pattern to match most part of RFC 3987
* Internationalized URLs, aka IRIs. Commonly used Unicode characters are
@@ -116,13 +144,7 @@ public class Patterns {
"((?:(http|https|Http|Https|rtsp|Rtsp):\\/\\/(?:(?:[a-zA-Z0-9\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)"
+ "\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,64}(?:\\:(?:[a-zA-Z0-9\\$\\-\\_"
+ "\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,25})?\\@)?)?"
- + "((?:(?:[" + GOOD_IRI_CHAR + "][" + GOOD_IRI_CHAR + "\\-]{0,64}\\.)+" // named host
- + TOP_LEVEL_DOMAIN_STR_FOR_WEB_URL
- + "|(?:(?:25[0-5]|2[0-4]" // or ip address
- + "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(?:25[0-5]|2[0-4][0-9]"
- + "|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1]"
- + "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
- + "|[1-9][0-9]|[0-9])))"
+ + "(?:" + DOMAIN_NAME + ")"
+ "(?:\\:\\d{1,5})?)" // plus option port number
+ "(\\/(?:(?:[" + GOOD_IRI_CHAR + "\\;\\/\\?\\:\\@\\&\\=\\#\\~" // plus option query params
+ "\\-\\.\\+\\!\\*\\'\\(\\)\\,\\_])|(?:\\%[a-fA-F0-9]{2}))*)?"
@@ -130,19 +152,6 @@ public class Patterns {
// input. This is to stop foo.sure from
// matching as foo.su
- public static final Pattern IP_ADDRESS
- = Pattern.compile(
- "((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(25[0-5]|2[0-4]"
- + "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1]"
- + "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
- + "|[1-9][0-9]|[0-9]))");
-
- public static final Pattern DOMAIN_NAME
- = Pattern.compile(
- "(((([" + GOOD_IRI_CHAR + "][" + GOOD_IRI_CHAR + "\\-]*)*[" + GOOD_IRI_CHAR + "]\\.)+"
- + TOP_LEVEL_DOMAIN + ")|"
- + IP_ADDRESS + ")");
-
public static final Pattern EMAIL_ADDRESS
= Pattern.compile(
"[a-zA-Z0-9\\+\\.\\_\\%\\-\\+]{1,256}" +
@@ -159,7 +168,7 @@ public class Patterns {
* might be phone numbers in arbitrary text, not for validating whether
* something is in fact a phone number. It will miss many things that
* are legitimate phone numbers.
- *
+ *
* <p> The pattern matches the following:
* <ul>
* <li>Optionally, a + sign followed immediately by one or more digits. Spaces, dots, or dashes