diff options
| author | Shimeng (Simon) Wang <swang@google.com> | 2010-02-05 17:57:45 -0800 |
|---|---|---|
| committer | Shimeng (Simon) Wang <swang@google.com> | 2010-02-09 15:13:09 -0800 |
| commit | 3ed6fbd9e141f20ca382306aa6a355cd544158d1 (patch) | |
| tree | d95f2118cb75fdaf03c8190ba4fa4b28dc339808 /common | |
| parent | 88e2a5d3d3b85915eea9a5eddcbe9414bc2976c5 (diff) | |
| download | frameworks_base-3ed6fbd9e141f20ca382306aa6a355cd544158d1.zip frameworks_base-3ed6fbd9e141f20ca382306aa6a355cd544158d1.tar.gz frameworks_base-3ed6fbd9e141f20ca382306aa6a355cd544158d1.tar.bz2 | |
Regenerate the TopLevelDomain from iana.org website.
This is to capture recently added top level domains.
modified: common/java/com/android/common/Patterns.java
modified: common/tests/src/com/android/common/PatternsTest.java
Diffstat (limited to 'common')
| -rw-r--r-- | common/java/com/android/common/Patterns.java | 148 | ||||
| -rw-r--r-- | common/tests/src/com/android/common/PatternsTest.java | 26 |
2 files changed, 100 insertions, 74 deletions
diff --git a/common/java/com/android/common/Patterns.java b/common/java/com/android/common/Patterns.java index 24a18c0..71c3a5e 100644 --- a/common/java/com/android/common/Patterns.java +++ b/common/java/com/android/common/Patterns.java @@ -25,87 +25,87 @@ import java.util.regex.Pattern; public class Patterns { /** * Regular expression pattern to match all IANA top-level domains. - * List accurate as of 2007/06/15. List taken from: + * List accurate as of 2010/02/05. List taken from: * http://data.iana.org/TLD/tlds-alpha-by-domain.txt - * This pattern is auto-generated by //device/tools/make-iana-tld-pattern.py + * This pattern is auto-generated by development/tools/make-iana-tld-pattern.py */ - public static final Pattern TOP_LEVEL_DOMAIN - = Pattern.compile( - "((aero|arpa|asia|a[cdefgilmnoqrstuwxz])" - + "|(biz|b[abdefghijmnorstvwyz])" - + "|(cat|com|coop|c[acdfghiklmnoruvxyz])" - + "|d[ejkmoz]" - + "|(edu|e[cegrstu])" - + "|f[ijkmor]" - + "|(gov|g[abdefghilmnpqrstuwy])" - + "|h[kmnrtu]" - + "|(info|int|i[delmnoqrst])" - + "|(jobs|j[emop])" - + "|k[eghimnrwyz]" - + "|l[abcikrstuvy]" - + "|(mil|mobi|museum|m[acdghklmnopqrstuvwxyz])" - + "|(name|net|n[acefgilopruz])" - + "|(org|om)" - + "|(pro|p[aefghklmnrstwy])" - + "|qa" - + "|r[eouw]" - + "|s[abcdeghijklmnortuvyz]" - + "|(tel|travel|t[cdfghjklmnoprtvwz])" - + "|u[agkmsyz]" - + "|v[aceginu]" - + "|w[fs]" - + "|y[etu]" - + "|z[amw])"); + public static final Pattern TOP_LEVEL_DOMAIN = Pattern.compile( + "((aero|arpa|asia|a[cdefgilmnoqrstuwxz])" + + "|(biz|b[abdefghijmnorstvwyz])" + + "|(cat|com|coop|c[acdfghiklmnoruvxyz])" + + "|d[ejkmoz]" + + "|(edu|e[cegrstu])" + + "|f[ijkmor]" + + "|(gov|g[abdefghilmnpqrstuwy])" + + "|h[kmnrtu]" + + "|(info|int|i[delmnoqrst])" + + "|(jobs|j[emop])" + + "|k[eghimnprwyz]" + + "|l[abcikrstuvy]" + + "|(mil|mobi|museum|m[acdeghklmnopqrstuvwxyz])" + + "|(name|net|n[acefgilopruz])" + + "|(org|om)" + + "|(pro|p[aefghklmnrstwy])" + + "|qa" + + "|r[eosuw]" + + "|s[abcdeghijklmnortuvyz]" + + "|(tel|travel|t[cdfghjklmnoprtvwz])" + + "|u[agksyz]" + + "|v[aceginu]" + + "|w[fs]" + + "|(xn\\-\\-0zwm56d|xn\\-\\-11b5bs3a9aj6g|xn\\-\\-80akhbyknj4f|xn\\-\\-9t4b11yi5a|xn\\-\\-deba0ad|xn\\-\\-g6w251d|xn\\-\\-hgbk6aj7f53bba|xn\\-\\-hlcj6aya9esc7a|xn\\-\\-jxalpdlp|xn\\-\\-kgbechtv|xn\\-\\-zckzah)" + + "|y[etu]" + + "|z[amw])"); /** * Regular expression pattern to match RFC 1738 URLs - * List accurate as of 2007/06/15. List taken from: + * List accurate as of 2010/02/05. List taken from: * http://data.iana.org/TLD/tlds-alpha-by-domain.txt - * This pattern is auto-generated by //device/tools/make-iana-tld-pattern.py + * This pattern is auto-generated by development/tools/make-iana-tld-pattern.py */ - public static final Pattern WEB_URL - = Pattern.compile( - "((?:(http|https|Http|Https|rtsp|Rtsp):\\/\\/(?:(?:[a-zA-Z0-9\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)" - + "\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,64}(?:\\:(?:[a-zA-Z0-9\\$\\-\\_" - + "\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,25})?\\@)?)?" - + "((?:(?:[a-zA-Z0-9][a-zA-Z0-9\\-]{0,64}\\.)+" // named host - + "(?:" // plus top level domain - + "(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])" - + "|(?:biz|b[abdefghijmnorstvwyz])" - + "|(?:cat|com|coop|c[acdfghiklmnoruvxyz])" - + "|d[ejkmoz]" - + "|(?:edu|e[cegrstu])" - + "|f[ijkmor]" - + "|(?:gov|g[abdefghilmnpqrstuwy])" - + "|h[kmnrtu]" - + "|(?:info|int|i[delmnoqrst])" - + "|(?:jobs|j[emop])" - + "|k[eghimnrwyz]" - + "|l[abcikrstuvy]" - + "|(?:mil|mobi|museum|m[acdghklmnopqrstuvwxyz])" - + "|(?:name|net|n[acefgilopruz])" - + "|(?:org|om)" - + "|(?:pro|p[aefghklmnrstwy])" - + "|qa" - + "|r[eouw]" - + "|s[abcdeghijklmnortuvyz]" - + "|(?:tel|travel|t[cdfghjklmnoprtvwz])" - + "|u[agkmsyz]" - + "|v[aceginu]" - + "|w[fs]" - + "|y[etu]" - + "|z[amw]))" - + "|(?:(?:25[0-5]|2[0-4]" // or ip address - + "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(?:25[0-5]|2[0-4][0-9]" - + "|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1]" - + "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}" - + "|[1-9][0-9]|[0-9])))" - + "(?:\\:\\d{1,5})?)" // plus option port number - + "(\\/(?:(?:[a-zA-Z0-9\\;\\/\\?\\:\\@\\&\\=\\#\\~" // plus option query params - + "\\-\\.\\+\\!\\*\\'\\(\\)\\,\\_])|(?:\\%[a-fA-F0-9]{2}))*)?" - + "(?:\\b|$)"); // and finally, a word boundary or end of - // input. This is to stop foo.sure from - // matching as foo.su + public static final Pattern WEB_URL = Pattern.compile( + "((?:(http|https|Http|Https|rtsp|Rtsp):\\/\\/(?:(?:[a-zA-Z0-9\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)" + + "\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,64}(?:\\:(?:[a-zA-Z0-9\\$\\-\\_" + + "\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,25})?\\@)?)?" + + "((?:(?:[a-zA-Z0-9][a-zA-Z0-9\\-]{0,64}\\.)+" // named host + + "(?:" // plus top level domain + + "(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])" + + "|(?:biz|b[abdefghijmnorstvwyz])" + + "|(?:cat|com|coop|c[acdfghiklmnoruvxyz])" + + "|d[ejkmoz]" + + "|(?:edu|e[cegrstu])" + + "|f[ijkmor]" + + "|(?:gov|g[abdefghilmnpqrstuwy])" + + "|h[kmnrtu]" + + "|(?:info|int|i[delmnoqrst])" + + "|(?:jobs|j[emop])" + + "|k[eghimnprwyz]" + + "|l[abcikrstuvy]" + + "|(?:mil|mobi|museum|m[acdeghklmnopqrstuvwxyz])" + + "|(?:name|net|n[acefgilopruz])" + + "|(?:org|om)" + + "|(?:pro|p[aefghklmnrstwy])" + + "|qa" + + "|r[eosuw]" + + "|s[abcdeghijklmnortuvyz]" + + "|(?:tel|travel|t[cdfghjklmnoprtvwz])" + + "|u[agksyz]" + + "|v[aceginu]" + + "|w[fs]" + + "|(?:xn\\-\\-0zwm56d|xn\\-\\-11b5bs3a9aj6g|xn\\-\\-80akhbyknj4f|xn\\-\\-9t4b11yi5a|xn\\-\\-deba0ad|xn\\-\\-g6w251d|xn\\-\\-hgbk6aj7f53bba|xn\\-\\-hlcj6aya9esc7a|xn\\-\\-jxalpdlp|xn\\-\\-kgbechtv|xn\\-\\-zckzah)" + + "|y[etu]" + + "|z[amw]))" + + "|(?:(?:25[0-5]|2[0-4]" // or ip address + + "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(?:25[0-5]|2[0-4][0-9]" + + "|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1]" + + "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}" + + "|[1-9][0-9]|[0-9])))" + + "(?:\\:\\d{1,5})?)" // plus option port number + + "(\\/(?:(?:[a-zA-Z0-9\\;\\/\\?\\:\\@\\&\\=\\#\\~" // plus option query params + + "\\-\\.\\+\\!\\*\\'\\(\\)\\,\\_])|(?:\\%[a-fA-F0-9]{2}))*)?" + + "(?:\\b|$)"); // and finally, a word boundary or end of + // input. This is to stop foo.sure from + // matching as foo.su public static final Pattern IP_ADDRESS = Pattern.compile( diff --git a/common/tests/src/com/android/common/PatternsTest.java b/common/tests/src/com/android/common/PatternsTest.java index 7fabe5e..635601e 100644 --- a/common/tests/src/com/android/common/PatternsTest.java +++ b/common/tests/src/com/android/common/PatternsTest.java @@ -31,6 +31,20 @@ public class PatternsTest extends TestCase { t = Patterns.TOP_LEVEL_DOMAIN.matcher("com").matches(); assertTrue("Missed valid TLD", t); + // One of the new top level domain. + t = Patterns.TOP_LEVEL_DOMAIN.matcher("me").matches(); + assertTrue("Missed valid TLD", t); + + // One of the new top level test domain. + t = Patterns.TOP_LEVEL_DOMAIN.matcher("xn--0zwm56d").matches(); + assertTrue("Missed valid TLD", t); + + t = Patterns.TOP_LEVEL_DOMAIN.matcher("mem").matches(); + assertFalse("Matched invalid TLD!", t); + + t = Patterns.TOP_LEVEL_DOMAIN.matcher("xn").matches(); + assertFalse("Matched invalid TLD!", t); + t = Patterns.TOP_LEVEL_DOMAIN.matcher("xer").matches(); assertFalse("Matched invalid TLD!", t); } @@ -42,6 +56,18 @@ public class PatternsTest extends TestCase { t = Patterns.WEB_URL.matcher("http://www.google.com").matches(); assertTrue("Valid URL", t); + // Google in one of the new top level domain. + t = Patterns.WEB_URL.matcher("http://www.google.me").matches(); + assertTrue("Valid URL", t); + t = Patterns.WEB_URL.matcher("google.me").matches(); + assertTrue("Valid URL", t); + + // Test url in Chinese: http://xn--fsqu00a.xn--0zwm56d + t = Patterns.WEB_URL.matcher("http://xn--fsqu00a.xn--0zwm56d").matches(); + assertTrue("Valid URL", t); + t = Patterns.WEB_URL.matcher("xn--fsqu00a.xn--0zwm56d").matches(); + assertTrue("Valid URL", t); + t = Patterns.WEB_URL.matcher("ftp://www.example.com").matches(); assertFalse("Matched invalid protocol", t); |
