diff options
author | Elliott Hughes <enh@google.com> | 2013-05-04 01:21:05 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2013-05-04 01:21:05 +0000 |
commit | f7e1ba4804f7365e6ab47a34e019532b428b1546 (patch) | |
tree | 01be2480fef2d8f0b590f7125a896d6dab72894f | |
parent | a4f2eac75586df51bb508a78d00cb353fa019a45 (diff) | |
parent | 5700a9718eef20f4228ed97d54a59cc70bf40753 (diff) | |
download | libcore-f7e1ba4804f7365e6ab47a34e019532b428b1546.zip libcore-f7e1ba4804f7365e6ab47a34e019532b428b1546.tar.gz libcore-f7e1ba4804f7365e6ab47a34e019532b428b1546.tar.bz2 |
Merge "Various regex test fixes."
10 files changed, 134 insertions, 1621 deletions
diff --git a/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/MatcherTest.java b/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/MatcherTest.java index bc87439..98450a4 100644 --- a/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/MatcherTest.java +++ b/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/MatcherTest.java @@ -607,16 +607,16 @@ public class MatcherTest extends TestCase { assertTrue(matcher.matches()); } - public void testAllCodePoints() { + public void testAllCodePoints_p() { // Regression for HARMONY-3145 int[] codePoint = new int[1]; Pattern p = Pattern.compile("(\\p{all})+"); boolean res = true; int cnt = 0; - String s; - for (int i = 0; i < 0x110000; i++) { + int step = 16; // Ideally 1, but devices are still too slow. + for (int i = 0; i < 0x110000; i += step) { codePoint[0] = i; - s = new String(codePoint, 0, 1); + String s = new String(codePoint, 0, 1); if (!s.matches(p.toString())) { cnt++; res = false; @@ -624,22 +624,25 @@ public class MatcherTest extends TestCase { } assertTrue(res); assertEquals(0, cnt); + } - p = Pattern.compile("(\\P{all})+"); - res = true; - cnt = 0; - - for (int i = 0; i < 0x110000; i++) { + public void testAllCodePoints_P() { + // Regression for HARMONY-3145 + int[] codePoint = new int[1]; + Pattern p = Pattern.compile("(\\P{all})+"); + boolean res = true; + int cnt = 0; + int step = 16; // Ideally 1, but devices are still too slow. + for (int i = 0; i < 0x110000; i += step) { codePoint[0] = i; - s = new String(codePoint, 0, 1); + String s = new String(codePoint, 0, 1); if (!s.matches(p.toString())) { cnt++; res = false; } } - assertFalse(res); - assertEquals(0x110000, cnt); + assertEquals(0x110000 / step, cnt); } /* @@ -735,14 +738,13 @@ public class MatcherTest extends TestCase { "----1 fish 2 fish red fish 5----", false); } - /* - * Test if Matcher's toString conatain pattern information - */ public void testToString() { - String result = Pattern.compile("(\\d{1,3})").matcher( - "aaaa123456789045").toString(); - assertTrue("The result doesn't contain pattern info", result - .contains("(\\d{1,3})")); + Matcher m = Pattern.compile("(\\d{1,3})").matcher("aaaa666456789045"); + assertEquals("java.util.regex.Matcher[pattern=(\\d{1,3}) region=0,16 lastmatch=]", m.toString()); + assertTrue(m.find()); + assertEquals("java.util.regex.Matcher[pattern=(\\d{1,3}) region=0,16 lastmatch=666]", m.toString()); + m.region(4, 8); + assertEquals("java.util.regex.Matcher[pattern=(\\d{1,3}) region=4,8 lastmatch=]", m.toString()); } private void hitEndTest(boolean callFind, String testNo, String regex, diff --git a/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/Pattern2Test.java b/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/Pattern2Test.java index e2fec73..cf30fc6 100644 --- a/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/Pattern2Test.java +++ b/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/Pattern2Test.java @@ -287,16 +287,18 @@ p = Pattern.compile("([0-9]+)[\\u0020:\\x21];"); } catch (PatternSyntaxException e) { } + // icu4c allows 1 to 6 hex digits in \x escapes. + p = Pattern.compile("\\xa"); + p = Pattern.compile("\\xab"); + p = Pattern.compile("\\xabc"); + p = Pattern.compile("\\xabcd"); + p = Pattern.compile("\\xabcde"); + p = Pattern.compile("\\xabcdef"); + // (Further digits would just be treated as characters after the escape.) try { - p = Pattern.compile("\\xa"); - fail("PatternSyntaxException expected"); - } catch (PatternSyntaxException e) { - } - - try { - p = Pattern.compile("\\xa;"); - fail("PatternSyntaxException expected"); - } catch (PatternSyntaxException e) { + p = Pattern.compile("\\xg"); + fail(); + } catch (PatternSyntaxException expected) { } // Test \0 (octal) sequences (1, 2 and 3 digit) @@ -369,11 +371,18 @@ p = Pattern.compile("([0-9]+)[\\u0020:\\x21];"); } // Test invalid control escapes - try { + // icu4c 50 accepts this pattern, and treats it as a literal. + //try { p = Pattern.compile("\\c"); - fail("PatternSyntaxException expected"); - } catch (PatternSyntaxException e) { - } + assertTrue(p.matcher("x\\cy").find()); + // fail(p.matcher("").toString()); + //} catch (PatternSyntaxException e) { + //} + + // But \cH works. + p = Pattern.compile("\\cH"); + assertTrue(p.matcher("x\u0008y").find()); + assertFalse(p.matcher("x\\cHy").find()); // originally contributed test did not check the result // TODO: check what RI does here @@ -467,15 +476,18 @@ p = Pattern.compile("([0-9]+)[\\u0020:\\x21];"); assertTrue(m.matches()); // Test ranges including the "-" character - p = Pattern.compile("[\\*-/]_+[---]!+[--AP]"); - m = p.matcher("-_-!!A"); - assertTrue(m.matches()); - m = p.matcher("\u002b_-!!!-"); - assertTrue(m.matches()); - m = p.matcher("!_-!@"); - assertFalse(m.matches()); - m = p.matcher(",______-!!!!!!!P"); - assertTrue(m.matches()); + // "---" collides with icu4c's "--" operator, and likely to be user error anyway. + if (false) { + p = Pattern.compile("[\\*-/]_+[---]!+[--AP]"); + m = p.matcher("-_-!!A"); + assertTrue(m.matches()); + m = p.matcher("\u002b_-!!!-"); + assertTrue(m.matches()); + m = p.matcher("!_-!@"); + assertFalse(m.matches()); + m = p.matcher(",______-!!!!!!!P"); + assertTrue(m.matches()); + } // Test nested ranges p = Pattern.compile("[pm[t]][a-z]+[[r]lp]"); @@ -503,13 +515,16 @@ p = Pattern.compile("([0-9]+)[\\u0020:\\x21];"); assertTrue(m.matches()); // Test error cases with && - p = Pattern.compile("[&&[xyz]]"); - m = p.matcher("&"); - // System.out.println(m.matches()); - m = p.matcher("x"); - // System.out.println(m.matches()); - m = p.matcher("y"); - // System.out.println(m.matches()); + // This is an RI bug that icu4c doesn't have. + if (false) { + p = Pattern.compile("[&&[xyz]]"); + m = p.matcher("&"); + // System.out.println(m.matches()); + m = p.matcher("x"); + // System.out.println(m.matches()); + m = p.matcher("y"); + // System.out.println(m.matches()); + } p = Pattern.compile("[[xyz]&[axy]]"); m = p.matcher("x"); // System.out.println(m.matches()); @@ -521,7 +536,10 @@ p = Pattern.compile("([0-9]+)[\\u0020:\\x21];"); m = p.matcher("a"); // System.out.println(m.matches()); - p = Pattern.compile("[[xyz]&&]"); + // icu4c rightly considers a missing rhs to && a syntax error. + if (false) { + p = Pattern.compile("[[xyz]&&]"); + } p = Pattern.compile("[[abc]&]"); @@ -795,54 +813,6 @@ p = Pattern.compile("([0-9]+)[\\u0020:\\x21];"); // TODO } - public void testUnicodeCategories() throws PatternSyntaxException { - // Test Unicode categories using \p and \P - // One letter codes: L, M, N, P, S, Z, C - // Two letter codes: Lu, Nd, Sc, Sm, ... - // See java.lang.Character and Unicode standard for complete list - // TODO - // Test \p{L} - // TODO - - // Test \p{N} - // TODO - - // ... etc - - // Test two letter codes: - // From unicode.org: - // Lu - // Ll - // Lt - // Lm - // Lo - // Mn - // Mc - // Me - // Nd - // Nl - // No - // Pc - // Pd - // Ps - // Pe - // Pi - // Pf - // Po - // Sm - // Sc - // Sk - // So - // Zs - // Zl - // Zp - // Cc - // Cf - // Cs - // Co - // Cn - } - public void testUnicodeBlocks() throws PatternSyntaxException { Pattern p; Matcher m; @@ -881,109 +851,34 @@ p = Pattern.compile("([0-9]+)[\\u0020:\\x21];"); if (UBlocks[i].low > 0) { m = p.matcher(Character.toString((char) (UBlocks[i].low - 1))); - assertFalse(m.matches()); + assertFalse(UBlocks[i].name, m.matches()); } for (j = UBlocks[i].low; j <= UBlocks[i].high; j++) { m = p.matcher(Character.toString((char) j)); - assertTrue(m.matches()); + assertTrue(UBlocks[i].name, m.matches()); } if (UBlocks[i].high < 0xFFFF) { m = p.matcher(Character.toString((char) (UBlocks[i].high + 1))); - assertFalse(m.matches()); + assertFalse(UBlocks[i].name, m.matches()); } p = Pattern.compile("\\P{In" + UBlocks[i].name + "}"); if (UBlocks[i].low > 0) { m = p.matcher(Character.toString((char) (UBlocks[i].low - 1))); - assertTrue(m.matches()); + assertTrue(UBlocks[i].name, m.matches()); } for (j = UBlocks[i].low; j < UBlocks[i].high; j++) { m = p.matcher(Character.toString((char) j)); - assertFalse(m.matches()); + assertFalse(UBlocks[i].name, m.matches()); } if (UBlocks[i].high < 0xFFFF) { m = p.matcher(Character.toString((char) (UBlocks[i].high + 1))); - assertTrue(m.matches()); + assertTrue(UBlocks[i].name, m.matches()); } } } - public void testCapturingGroups() throws PatternSyntaxException { - // Test simple capturing groups - // TODO - - // Test grouping without capture (?:...) - // TODO - - // Test combination of grouping and capture - // TODO - - // Test \<num> sequence with capturing and non-capturing groups - // TODO - - // Test \<num> with <num> out of range - // TODO - } - - public void testRepeats() { - // Test ? - // TODO - - // Test * - // TODO - - // Test + - // TODO - - // Test {<num>}, including 0, 1 and more - // TODO - - // Test {<num>,}, including 0, 1 and more - // TODO - - // Test {<n1>,<n2>}, with n1 < n2, n1 = n2 and n1 > n2 (illegal?) - // TODO - } - - public void testAnchors() throws PatternSyntaxException { - // Test ^, default and MULTILINE - // TODO - - // Test $, default and MULTILINE - // TODO - - // Test \b (word boundary) - // TODO - - // Test \B (not a word boundary) - // TODO - - // Test \A (beginning of string) - // TODO - - // Test \Z (end of string) - // TODO - - // Test \z (end of string) - // TODO - - // Test \G - // TODO - - // Test positive lookahead using (?=...) - // TODO - - // Test negative lookahead using (?!...) - // TODO - - // Test positive lookbehind using (?<=...) - // TODO - - // Test negative lookbehind using (?<!...) - // TODO - } - public void testMisc() throws PatternSyntaxException { Pattern p; Matcher m; @@ -1127,11 +1022,9 @@ p = Pattern.compile("([0-9]+)[\\u0020:\\x21];"); assertFalse(m.find()); m = p.matcher(""); - // FIXME: This matches the reference behaviour but is - // inconsistent with matching "a" - ie. the end of the - // target string should match against $ always but this - // appears to work with the null string only when not in - // multiline mode (see below) + // This differs from the RI behaviour but seems more correct. + assertTrue(m.find()); + assertTrue(m.group().equals("")); assertFalse(m.find()); p = Pattern.compile("^.*$"); @@ -1371,7 +1264,7 @@ p = Pattern.compile("([0-9]+)[\\u0020:\\x21];"); /* 3300; 33FF; CJK Compatibility */ new UBInfo(0x3300, 0x33FF, "CJKCompatibility"), // Character.UnicodeBlock.CJK_COMPATIBILITY /* 3400; 4DB5; CJK Unified Ideographs Extension A */ - new UBInfo(0x3400, 0x4DB5, "CJKUnifiedIdeographsExtensionA"), // Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A + new UBInfo(0x3400, 0x4DBF, "CJKUnifiedIdeographsExtensionA"), // Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A /* 4E00; 9FFF; CJK Unified Ideographs */ new UBInfo(0x4E00, 0x9FFF, "CJKUnifiedIdeographs"), // Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS /* A000; A48F; Yi Syllables */ @@ -1379,7 +1272,7 @@ p = Pattern.compile("([0-9]+)[\\u0020:\\x21];"); /* A490; A4CF; Yi Radicals */ new UBInfo(0xA490, 0xA4CF, "YiRadicals"), // Character.UnicodeBlock.YI_RADICALS /* AC00; D7A3; Hangul Syllables */ - new UBInfo(0xAC00, 0xD7A3, "HangulSyllables"), // Character.UnicodeBlock.HANGUL_SYLLABLES + new UBInfo(0xAC00, 0xD7AF, "HangulSyllables"), // Character.UnicodeBlock.HANGUL_SYLLABLES /* D800; DB7F; High Surrogates */ /* DB80; DBFF; High Private Use Surrogates */ /* DC00; DFFF; Low Surrogates */ @@ -1397,13 +1290,10 @@ p = Pattern.compile("([0-9]+)[\\u0020:\\x21];"); /* FE50; FE6F; Small Form Variants */ new UBInfo(0xFE50, 0xFE6F, "SmallFormVariants"), // Character.UnicodeBlock.SMALL_FORM_VARIANTS /* FE70; FEFE; Arabic Presentation Forms-B */ - // new UBInfo (0xFE70,0xFEFE,"InArabicPresentationForms-B"), // - // Character.UnicodeBlock.ARABIC_PRESENTATION_FORMS_B - /* FEFF; FEFF; Specials */ - new UBInfo(0xFEFF, 0xFEFF, "Specials"), // Character.UnicodeBlock.SPECIALS + new UBInfo(0xFE70, 0xFEFF, "ArabicPresentationForms-B"), // Character.UnicodeBlock.ARABIC_PRESENTATION_FORMS_B /* FF00; FFEF; Halfwidth and Fullwidth Forms */ new UBInfo(0xFF00, 0xFFEF, "HalfwidthandFullwidthForms"), // Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS /* FFF0; FFFD; Specials */ - new UBInfo(0xFFF0, 0xFFFD, "Specials") // Character.UnicodeBlock.SPECIALS + new UBInfo(0xFFF0, 0xFFFF, "Specials") // Character.UnicodeBlock.SPECIALS }; -}
\ No newline at end of file +} diff --git a/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/PatternErrorTest.java b/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/PatternErrorTest.java index a8eef6d..4abb2a0 100644 --- a/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/PatternErrorTest.java +++ b/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/PatternErrorTest.java @@ -4,9 +4,9 @@ * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -50,11 +50,13 @@ public class PatternErrorTest extends TestCase { flags |= Pattern.COMMENTS; flags |= Pattern.DOTALL; flags |= Pattern.UNICODE_CASE; + flags &= ~Pattern.CANON_EQ; // Android always throws given this flag. Pattern.compile("foo", flags); // add invalid flags - should get IllegalArgumentException // regression test for HARMONY-4248 flags |= 0xFFFFFFFF; + flags &= ~Pattern.CANON_EQ; // Android always throws given this flag. try { Pattern.compile("foo", flags); fail("Expected IllegalArgumentException to be thrown"); diff --git a/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/PatternTest.java b/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/PatternTest.java index 33da926..9977d91 100644 --- a/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/PatternTest.java +++ b/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/PatternTest.java @@ -277,6 +277,12 @@ public class PatternTest extends TestCase { } public void testFlagsMethod() { + // icu4c doesn't count inline flags that span the entire regex as being global flags. + // Android just returns those flags actually passed to Pattern.compile. + if (true) { + return; + } + String baseString; Pattern pat; @@ -507,16 +513,22 @@ public class PatternTest extends TestCase { assertTrue(mat.matches()); } - public void testRegressions() { + public void test_bug_181() { // Bug 181 Pattern.compile("[\\t-\\r]"); + } + public void test_bug_4472() { // HARMONY-4472 Pattern.compile("a*.+"); + } + public void test_bug_187() { // Bug187 - Pattern - .compile("|(?idmsux-idmsux)|(?idmsux-idmsux)|[^|\\[-\\0274|\\,-\\\\[^|W\\}\\nq\\x65\\002\\xFE\\05\\06\\00\\x66\\x47i\\,\\xF2\\=\\06\\u0EA4\\x9B\\x3C\\f\\|\\{\\xE5\\05\\r\\u944A\\xCA\\e|\\x19\\04\\x07\\04\\u607B\\023\\0073\\x91Tr\\0150\\x83]]?(?idmsux-idmsux:\\p{Alpha}{7}?)||(?<=[^\\uEC47\\01\\02\\u3421\\a\\f\\a\\013q\\035w\\e])(?<=\\p{Punct}{0,}?)(?=^\\p{Lower})(?!\\b{8,14})(?<![|\\00-\\0146[^|\\04\\01\\04\\060\\f\\u224DO\\x1A\\xC4\\00\\02\\0315\\0351\\u84A8\\xCBt\\xCC\\06|\\0141\\00\\=\\e\\f\\x6B\\0026Tb\\040\\x76xJ&&[\\\\-\\]\\05\\07\\02\\u2DAF\\t\\x9C\\e\\0023\\02\\,X\\e|\\u6058flY\\u954C]]]{5}?)(?<=\\p{Sc}{8}+)[^|\\026-\\u89BA|o\\u6277\\t\\07\\x50&&\\p{Punct}]{8,14}+((?<=^\\p{Punct})|(?idmsux-idmsux)||(?>[\\x3E-\\]])|(?idmsux-idmsux:\\p{Punct})|(?<![\\0111\\0371\\xDF\\u6A49\\07\\u2A4D\\00\\0212\\02Xd-\\xED[^\\a-\\0061|\\0257\\04\\f\\[\\0266\\043\\03\\x2D\\042&&[^\\f-\\]&&\\s]]])|(?>[|\\n\\042\\uB09F\\06\\u0F2B\\uC96D\\x89\\uC166\\xAA|\\04-\\][^|\\a\\|\\rx\\04\\uA770\\n\\02\\t\\052\\056\\0274\\|\\=\\07\\e|\\00-\\x1D&&[^\\005\\uB15B\\uCDAC\\n\\x74\\0103\\0147\\uD91B\\n\\062G\\u9B4B\\077\\}\\0324&&[^\\0302\\,\\0221\\04\\u6D16\\04xy\\uD193\\[\\061\\06\\045\\x0F|\\e\\xBB\\f\\u1B52\\023\\u3AD2\\033\\007\\022\\}\\x66\\uA63FJ-\\0304]]]]{0,0})||(?<![^|\\0154U\\u0877\\03\\fy\\n\\|\\0147\\07-\\=[|q\\u69BE\\0243\\rp\\053\\02\\x33I\\u5E39\\u9C40\\052-\\xBC[|\\0064-\\?|\\uFC0C\\x30\\0060\\x45\\\\\\02\\?p\\xD8\\0155\\07\\0367\\04\\uF07B\\000J[^|\\0051-\\{|\\u9E4E\\u7328\\]\\u6AB8\\06\\x71\\a\\]\\e\\|KN\\u06AA\\0000\\063\\u2523&&[\\005\\0277\\x41U\\034\\}R\\u14C7\\u4767\\x09\\n\\054Ev\\0144\\<\\f\\,Q-\\xE4]]]]]{3}+)|(?>^+)|(?![^|\\|\\nJ\\t\\<\\04E\\\\\\t\\01\\\\\\02\\|\\=\\}\\xF3\\uBEC2\\032K\\014\\uCC5F\\072q\\|\\0153\\xD9\\0322\\uC6C8[^\\t\\0342\\x34\\x91\\06\\{\\xF1\\a\\u1710\\?\\xE7\\uC106\\02pF\\<&&[^|\\]\\064\\u381D\\u50CF\\eO&&[^|\\06\\x2F\\04\\045\\032\\u8536W\\0377\\0017|\\x06\\uE5FA\\05\\xD4\\020\\04c\\xFC\\02H\\x0A\\r]]]]+?)(?idmsux-idmsux)|(?<![|\\r-\\,&&[I\\t\\r\\0201\\xDB\\e&&[^|\\02\\06\\00\\<\\a\\u7952\\064\\051\\073\\x41\\?n\\040\\0053\\031&&[\\x15-\\|]]]]{8,11}?)(?![^|\\<-\\uA74B\\xFA\\u7CD2\\024\\07n\\<\\x6A\\0042\\uE4FF\\r\\u896B\\[\\=\\042Y&&^\\p{ASCII}]++)|(?<![R-\\|&&[\\a\\0120A\\u6145\\<\\050-d[|\\e-\\uA07C|\\016-\\u80D9]]]{1,}+)|(?idmsux-idmsux)|(?idmsux-idmsux)|(?idmsux-idmsux:\\B{6,}?)|(?<=\\D{5,8}?)|(?>[\\{-\\0207|\\06-\\0276\\p{XDigit}])(?idmsux-idmsux:[^|\\x52\\0012\\]u\\xAD\\0051f\\0142\\\\l\\|\\050\\05\\f\\t\\u7B91\\r\\u7763\\{|h\\0104\\a\\f\\0234\\u2D4F&&^\\P{InGreek}]))"); + Pattern.compile("|(?idmsux-idmsux)|(?idmsux-idmsux)|[^|\\[-\\0274|\\,-\\\\[^|W\\}\\nq\\x65\\002\\xFE\\05\\06\\00\\x66\\x47i\\,\\xF2\\=\\06\\u0EA4\\x9B\\x3C\\f\\|\\{\\xE5\\05\\r\\u944A\\xCA\\e|\\x19\\04\\x07\\04\\u607B\\023\\0073\\x91Tr\\0150\\x83]]?(?idmsux-idmsux:\\p{Alpha}{7}?)||(?<=[^\\uEC47\\01\\02\\u3421\\a\\f\\a\\013q\\035w\\e])(?<=\\p{Punct}{0,}?)(?=^\\p{Lower})(?!\\b{8,14})(?<![|\\00-\\0146[^|\\04\\01\\04\\060\\f\\u224DO\\x1A\\xC4\\00\\02\\0315\\0351\\u84A8\\xCBt\\xCC\\06|\\0141\\00\\=\\e\\f\\x6B\\0026Tb\\040\\x76xJ&&[\\\\-\\]\\05\\07\\02\\u2DAF\\t\\x9C\\e\\0023\\02\\,X\\e|\\u6058flY\\u954C]]]{5}?)(?<=\\p{Sc}{8}+)[^|\\026-\\u89BA|o\\u6277\\t\\07\\x50&&\\p{Punct}]{8,14}+((?<=^\\p{Punct})|(?idmsux-idmsux)||(?>[\\x3E-\\]])|(?idmsux-idmsux:\\p{Punct})|(?<![\\0111\\0371\\xDF\\u6A49\\07\\u2A4D\\00\\0212\\02Xd-\\xED[^\\a-\\0061|\\0257\\04\\f\\[\\0266\\043\\03\\x2D\\042&&[^\\f-\\]&&\\s]]])|(?>[|\\n\\042\\uB09F\\06\\u0F2B\\uC96D\\x89\\uC166\\xAA|\\04-\\][^|\\a\\|\\rx\\04\\uA770\\n\\02\\t\\052\\056\\0274\\|\\=\\07\\e|\\00-\\x1D&&[^\\005\\uB15B\\uCDAC\\n\\x74\\0103\\0147\\uD91B\\n\\062G\\u9B4B\\077\\}\\0324&&[^\\0302\\,\\0221\\04\\u6D16\\04xy\\uD193\\[\\061\\06\\045\\x0F|\\e\\xBB\\f\\u1B52\\023\\u3AD2\\033\\007\\022\\}\\x66\\uA63FJ-\\0304]]]]{0,0})||(?<![^|\\0154U\\u0877\\03\\fy\\n\\|\\0147\\07-\\=[|q\\u69BE\\0243\\rp\\053\\02\\x33I\\u5E39\\u9C40\\052-\\xBC[|\\0064-\\?|\\uFC0C\\x30\\0060\\x45\\\\\\02\\?p\\xD8\\0155\\07\\0367\\04\\uF07B\\000J[^|\\0051-\\{|\\u9E4E\\u7328\\]\\u6AB8\\06\\x71\\a\\]\\e\\|KN\\u06AA\\0000\\063\\u2523&&[\\005\\0277\\x41U\\034\\}R\\u14C7\\u4767\\x09\\n\\054Ev\\0144\\<\\f\\,Q-\\xE4]]]]]{3}+)|(?>^+)|(?![^|\\|\\nJ\\t\\<\\04E\\\\\\t\\01\\\\\\02\\|\\=\\}\\xF3\\uBEC2\\032K\\014\\uCC5F\\072q\\|\\0153\\xD9\\0322\\uC6C8[^\\t\\0342\\x34\\x91\\06\\{\\xF1\\a\\u1710\\?\\xE7\\uC106\\02pF\\<&&[^|\\]\\064\\u381D\\u50CF\\eO&&[^|\\06\\x2F\\04\\045\\032\\u8536W\\0377\\0017|\\x06\\uE5FA\\05\\xD4\\020\\04c\\xFC\\02H\\x0A\\r]]]]+?)(?idmsux-idmsux)|(?<![|\\r-\\,&&[I\\t\\r\\0201\\xDB\\e&&[^|\\02\\06\\00\\<\\a\\u7952\\064\\051\\073\\x41\\?n\\040\\0053\\031&&[\\x15-\\|]]]]{8,11}?)(?![^|\\<-\\uA74B\\xFA\\u7CD2\\024\\07n\\<\\x6A\\0042\\uE4FF\\r\\u896B\\[\\=\\042Y&&^\\p{ASCII}]++)|(?<![R-\\|&&[\\a\\0120A\\u6145\\<\\050-d[|\\e-\\uA07C|\\016-\\u80D9]]]{1,}+)|(?idmsux-idmsux)|(?idmsux-idmsux)|(?idmsux-idmsux:\\B{6,}?)|(?<=\\D{5,8}?)|(?>[\\{-\\0207|\\06-\\0276\\p{XDigit}])(?idmsux-idmsux:[^|\\x52\\0012\\]u\\xAD\\0051f\\0142\\\\l\\|\\050\\05\\f\\t\\u7B91\\r\\u7763\\{|h\\0104\\a\\f\\0234\\u2D4F&&^\\P{InGreek}]))"); + } + + public void test_bug_5858() { // HARMONY-5858 Pattern.compile("\\u6211", Pattern.LITERAL); } @@ -940,13 +952,11 @@ public class PatternTest extends TestCase { boolean isCompiled = false; try { - Pattern.compile("(?:)", Pattern.CANON_EQ); - Pattern.compile("(?:)", Pattern.CANON_EQ | Pattern.DOTALL); - Pattern - .compile("(?:)", Pattern.CANON_EQ - | Pattern.CASE_INSENSITIVE); - Pattern.compile("(?:)", Pattern.CANON_EQ | Pattern.COMMENTS - | Pattern.UNIX_LINES); + // icu4c doesn't support CANON_EQ. + Pattern.compile("(?:)"/*, Pattern.CANON_EQ*/); + Pattern.compile("(?:)", /*Pattern.CANON_EQ |*/ Pattern.DOTALL); + Pattern.compile("(?:)", /*Pattern.CANON_EQ |*/ Pattern.CASE_INSENSITIVE); + Pattern.compile("(?:)", /*Pattern.CANON_EQ |*/ Pattern.COMMENTS | Pattern.UNIX_LINES); isCompiled = true; } catch (PatternSyntaxException e) { System.out.println(e); @@ -1055,6 +1065,10 @@ public class PatternTest extends TestCase { } public void testCanonEqFlag() { + // icu4c doesn't support CANON_EQ. + if (true) { + return; + } /* * for decompositions see @@ -1251,6 +1265,11 @@ public class PatternTest extends TestCase { } public void testIndexesCanonicalEq() { + // icu4c doesn't support CANON_EQ. + if (true) { + return; + } + String baseString; String testString; Pattern pat; @@ -1280,6 +1299,10 @@ public class PatternTest extends TestCase { } public void testCanonEqFlagWithSupplementaryCharacters() { + // icu4c doesn't support CANON_EQ. + if (true) { + return; + } /* * \u1D1BF->\u1D1BB\u1D16F->\u1D1B9\u1D165\u1D16F in UTF32 diff --git a/luni/src/main/java/java/util/regex/Matcher.java b/luni/src/main/java/java/util/regex/Matcher.java index 320e14c..d58d092 100644 --- a/luni/src/main/java/java/util/regex/Matcher.java +++ b/luni/src/main/java/java/util/regex/Matcher.java @@ -656,6 +656,16 @@ public final class Matcher implements MatchResult { } } + /** + * Returns a string representing this {@code Matcher}. + * The format of this string is unspecified. + */ + @Override public String toString() { + return getClass().getName() + "[pattern=" + pattern() + + " region=" + regionStart() + "," + regionEnd() + + " lastmatch=" + (matchFound ? group() : "") + "]"; + } + private static native void closeImpl(long addr); private static native boolean findImpl(long addr, String s, int startIndex, int[] offsets); private static native boolean findNextImpl(long addr, String s, int[] offsets); diff --git a/luni/src/main/java/java/util/regex/Pattern.java b/luni/src/main/java/java/util/regex/Pattern.java index 44b749e..45bd800 100644 --- a/luni/src/main/java/java/util/regex/Pattern.java +++ b/luni/src/main/java/java/util/regex/Pattern.java @@ -385,6 +385,10 @@ public final class Pattern implements Serializable { if ((flags & CANON_EQ) != 0) { throw new UnsupportedOperationException("CANON_EQ flag not supported"); } + int supportedFlags = CASE_INSENSITIVE | COMMENTS | DOTALL | LITERAL | MULTILINE | UNICODE_CASE | UNIX_LINES; + if ((flags & ~supportedFlags) != 0) { + throw new IllegalArgumentException("Unsupported flags: " + (flags & ~supportedFlags)); + } this.pattern = pattern; this.flags = flags; compile(); diff --git a/luni/src/test/java/libcore/java/util/regex/OldMatcherTest.java b/luni/src/test/java/libcore/java/util/regex/OldMatcherTest.java index 8d6e186..deb0626 100644 --- a/luni/src/test/java/libcore/java/util/regex/OldMatcherTest.java +++ b/luni/src/test/java/libcore/java/util/regex/OldMatcherTest.java @@ -421,44 +421,6 @@ public class OldMatcherTest extends TestCase { } - // BEGIN android-note - // Test took ages, now going in steps of 16 code points to speed things up. - // END android-note - public void testAllCodePoints() { - // Regression for HARMONY-3145 - int[] codePoint = new int[1]; - Pattern p = Pattern.compile("(\\p{all})+"); - boolean res = true; - int cnt = 0; - String s; - for (int i = 0; i < 0x110000; i = i + 0x10) { - codePoint[0] = i; - s = new String(codePoint, 0, 1); - if (!s.matches(p.toString())) { - cnt++; - res = false; - } - } - assertTrue(res); - assertEquals(0, cnt); - - p = Pattern.compile("(\\P{all})+"); - res = true; - cnt = 0; - - for (int i = 0; i < 0x110000; i = i + 0x10) { - codePoint[0] = i; - s = new String(codePoint, 0, 1); - if (!s.matches(p.toString())) { - cnt++; - res = false; - } - } - - assertFalse(res); - assertEquals(0x110000 / 0x10, cnt); - } - public void test_regionStart() { String testPattern = "(abb)"; String testString = "cccabbabbabbabbabb"; diff --git a/luni/src/test/java/org/apache/harmony/regex/tests/java/util/regex/Pattern2Test.java b/luni/src/test/java/org/apache/harmony/regex/tests/java/util/regex/Pattern2Test.java index 389194a..ee8ecfa 100644 --- a/luni/src/test/java/org/apache/harmony/regex/tests/java/util/regex/Pattern2Test.java +++ b/luni/src/test/java/org/apache/harmony/regex/tests/java/util/regex/Pattern2Test.java @@ -28,784 +28,6 @@ import junit.framework.TestCase; */ public class Pattern2Test extends TestCase { - public void testSimpleMatch() throws PatternSyntaxException { - Pattern p = Pattern.compile("foo.*"); - - Matcher m1 = p.matcher("foo123"); - assertTrue(m1.matches()); - assertTrue(m1.find(0)); - assertTrue(m1.lookingAt()); - - Matcher m2 = p.matcher("fox"); - assertFalse(m2.matches()); - assertFalse(m2.find(0)); - assertFalse(m2.lookingAt()); - - assertTrue(Pattern.matches("foo.*", "foo123")); - assertFalse(Pattern.matches("foo.*", "fox")); - - assertFalse(Pattern.matches("bar", "foobar")); - - assertTrue(Pattern.matches("", "")); - } - public void testCursors() { - Pattern p; - Matcher m; - - try { - p = Pattern.compile("foo"); - - m = p.matcher("foobar"); - assertTrue(m.find()); - assertEquals(0, m.start()); - assertEquals(3, m.end()); - assertFalse(m.find()); - - // Note: also testing reset here - m.reset(); - assertTrue(m.find()); - assertEquals(0, m.start()); - assertEquals(3, m.end()); - assertFalse(m.find()); - - m.reset("barfoobar"); - assertTrue(m.find()); - assertEquals(3, m.start()); - assertEquals(6, m.end()); - assertFalse(m.find()); - - m.reset("barfoo"); - assertTrue(m.find()); - assertEquals(3, m.start()); - assertEquals(6, m.end()); - assertFalse(m.find()); - - m.reset("foobarfoobarfoo"); - assertTrue(m.find()); - assertEquals(0, m.start()); - assertEquals(3, m.end()); - assertTrue(m.find()); - assertEquals(6, m.start()); - assertEquals(9, m.end()); - assertTrue(m.find()); - assertEquals(12, m.start()); - assertEquals(15, m.end()); - assertFalse(m.find()); - assertTrue(m.find(0)); - assertEquals(0, m.start()); - assertEquals(3, m.end()); - assertTrue(m.find(4)); - assertEquals(6, m.start()); - assertEquals(9, m.end()); - } catch (PatternSyntaxException e) { - System.out.println(e.getMessage()); - fail(); - } - } - public void testGroups() throws PatternSyntaxException { - Pattern p; - Matcher m; - - p = Pattern.compile("(p[0-9]*)#?(q[0-9]*)"); - - m = p.matcher("p1#q3p2q42p5p71p63#q888"); - assertTrue(m.find()); - assertEquals(0, m.start()); - assertEquals(5, m.end()); - assertEquals(2, m.groupCount()); - assertEquals(0, m.start(0)); - assertEquals(5, m.end(0)); - assertEquals(0, m.start(1)); - assertEquals(2, m.end(1)); - assertEquals(3, m.start(2)); - assertEquals(5, m.end(2)); - assertEquals("p1#q3", m.group()); - assertEquals("p1#q3", m.group(0)); - assertEquals("p1", m.group(1)); - assertEquals("q3", m.group(2)); - - assertTrue(m.find()); - assertEquals(5, m.start()); - assertEquals(10, m.end()); - assertEquals(2, m.groupCount()); - assertEquals(10, m.end(0)); - assertEquals(5, m.start(1)); - assertEquals(7, m.end(1)); - assertEquals(7, m.start(2)); - assertEquals(10, m.end(2)); - assertEquals("p2q42", m.group()); - assertEquals("p2q42", m.group(0)); - assertEquals("p2", m.group(1)); - assertEquals("q42", m.group(2)); - - assertTrue(m.find()); - assertEquals(15, m.start()); - assertEquals(23, m.end()); - assertEquals(2, m.groupCount()); - assertEquals(15, m.start(0)); - assertEquals(23, m.end(0)); - assertEquals(15, m.start(1)); - assertEquals(18, m.end(1)); - assertEquals(19, m.start(2)); - assertEquals(23, m.end(2)); - assertEquals("p63#q888", m.group()); - assertEquals("p63#q888", m.group(0)); - assertEquals("p63", m.group(1)); - assertEquals("q888", m.group(2)); - assertFalse(m.find()); - } - - public void testReplace() throws PatternSyntaxException { - Pattern p; - Matcher m; - - // Note: examples from book, - // Hitchens, Ron, 2002, "Java NIO", O'Reilly, page 171 - p = Pattern.compile("a*b"); - - m = p.matcher("aabfooaabfooabfoob"); - assertTrue(m.replaceAll("-").equals("-foo-foo-foo-")); - assertTrue(m.replaceFirst("-").equals("-fooaabfooabfoob")); - - /* - * p = Pattern.compile ("\\p{Blank}"); - * - * m = p.matcher ("fee fie foe fum"); assertTrue - * (m.replaceFirst("-").equals ("fee-fie foe fum")); assertTrue - * (m.replaceAll("-").equals ("fee-fie-foe-fum")); - */ - - p = Pattern.compile("([bB])yte"); - - m = p.matcher("Byte for byte"); - assertTrue(m.replaceFirst("$1ite").equals("Bite for byte")); - assertTrue(m.replaceAll("$1ite").equals("Bite for bite")); - - p = Pattern.compile("\\d\\d\\d\\d([- ])"); - - m = p.matcher("card #1234-5678-1234"); - assertTrue(m.replaceFirst("xxxx$1").equals("card #xxxx-5678-1234")); - assertTrue(m.replaceAll("xxxx$1").equals("card #xxxx-xxxx-1234")); - - p = Pattern.compile("(up|left)( *)(right|down)"); - - m = p.matcher("left right, up down"); - assertTrue(m.replaceFirst("$3$2$1").equals("right left, up down")); - assertTrue(m.replaceAll("$3$2$1").equals("right left, down up")); - - p = Pattern.compile("([CcPp][hl]e[ea]se)"); - - m = p.matcher("I want cheese. Please."); - assertTrue(m.replaceFirst("<b> $1 </b>").equals( - "I want <b> cheese </b>. Please.")); - assertTrue(m.replaceAll("<b> $1 </b>").equals( - "I want <b> cheese </b>. <b> Please </b>.")); - } - - public void testEscapes() throws PatternSyntaxException { - Pattern p; - Matcher m; - - // Test \\ sequence - p = Pattern.compile("([a-z]+)\\\\([a-z]+);"); - m = p.matcher("fred\\ginger;abbott\\costello;jekell\\hyde;"); - assertTrue(m.find()); - assertEquals("fred", m.group(1)); - assertEquals("ginger", m.group(2)); - assertTrue(m.find()); - assertEquals("abbott", m.group(1)); - assertEquals("costello", m.group(2)); - assertTrue(m.find()); - assertEquals("jekell", m.group(1)); - assertEquals("hyde", m.group(2)); - assertFalse(m.find()); - - // Test \n, \t, \r, \f, \e, \a sequences - p = Pattern.compile("([a-z]+)[\\n\\t\\r\\f\\e\\a]+([a-z]+)"); - m = p.matcher("aa\nbb;cc\u0009\rdd;ee\u000C\u001Bff;gg\n\u0007hh"); - assertTrue(m.find()); - assertEquals("aa", m.group(1)); - assertEquals("bb", m.group(2)); - assertTrue(m.find()); - assertEquals("cc", m.group(1)); - assertEquals("dd", m.group(2)); - assertTrue(m.find()); - assertEquals("ee", m.group(1)); - assertEquals("ff", m.group(2)); - assertTrue(m.find()); - assertEquals("gg", m.group(1)); - assertEquals("hh", m.group(2)); - assertFalse(m.find()); - - // Test \\u and \\x sequences -/* p = Pattern.compile("([0-9]+)[\\u0020:\\x21];"); - m = p.matcher("11:;22 ;33-;44!;"); - assertTrue(m.find()); - assertEquals("11", m.group(1)); - assertTrue(m.find()); - assertEquals("22", m.group(1)); - assertTrue(m.find()); - assertEquals("44", m.group(1)); - assertFalse(m.find()); -*/ - // Test invalid unicode sequences -/* try { - p = Pattern.compile("\\u"); - fail("PatternSyntaxException expected"); - } catch (PatternSyntaxException e) { - } - - try { - p = Pattern.compile("\\u;"); - fail("PatternSyntaxException expected"); - } catch (PatternSyntaxException e) { - } - - try { - p = Pattern.compile("\\u002"); - fail("PatternSyntaxException expected"); - } catch (PatternSyntaxException e) { - } - - try { - p = Pattern.compile("\\u002;"); - fail("PatternSyntaxException expected"); - } catch (PatternSyntaxException e) { - } - - // Test invalid hex sequences - try { - p = Pattern.compile("\\x"); - fail("PatternSyntaxException expected"); - } catch (PatternSyntaxException e) { - } - - try { - p = Pattern.compile("\\x;"); - fail("PatternSyntaxException expected"); - } catch (PatternSyntaxException e) { - } - - try { - p = Pattern.compile("\\xa"); - fail("PatternSyntaxException expected"); - } catch (PatternSyntaxException e) { - } - - try { - p = Pattern.compile("\\xa;"); - fail("PatternSyntaxException expected"); - } catch (PatternSyntaxException e) { - } -*/ - // Test \0 (octal) sequences (1, 2 and 3 digit) - p = Pattern.compile("([0-9]+)[\\07\\040\\0160];"); - m = p.matcher("11\u0007;22:;33 ;44p;"); - assertTrue(m.find()); - assertEquals("11", m.group(1)); - assertTrue(m.find()); - assertEquals("33", m.group(1)); - assertTrue(m.find()); - assertEquals("44", m.group(1)); - assertFalse(m.find()); - - // Test invalid octal sequences - try { - p = Pattern.compile("\\08"); - fail("PatternSyntaxException expected"); - } catch (PatternSyntaxException e) { - } - - //originally contributed test did not check the result - //TODO: check what RI does here -// try { -// p = Pattern.compile("\\0477"); -// fail("PatternSyntaxException expected"); -// } catch (PatternSyntaxException e) { -// } - - try { - p = Pattern.compile("\\0"); - fail("PatternSyntaxException expected"); - } catch (PatternSyntaxException e) { - } - - try { - p = Pattern.compile("\\0;"); - fail("PatternSyntaxException expected"); - } catch (PatternSyntaxException e) { - } - - - // Test \c (control character) sequence - p = Pattern.compile("([0-9]+)[\\cA\\cB\\cC\\cD];"); - m = p.matcher("11\u0001;22:;33\u0002;44p;55\u0003;66\u0004;"); - assertTrue(m.find()); - assertEquals("11", m.group(1)); - assertTrue(m.find()); - assertEquals("33", m.group(1)); - assertTrue(m.find()); - assertEquals("55", m.group(1)); - assertTrue(m.find()); - assertEquals("66", m.group(1)); - assertFalse(m.find()); - - // More thorough control escape test - // Ensure that each escape matches exactly the corresponding - // character - // code and no others (well, from 0-255 at least) - int i, j; - for (i = 0; i < 26; i++) { - p = Pattern.compile("\\c" + Character.toString((char) ('A' + i))); - int match_char = -1; - for (j = 0; j < 255; j++) { - m = p.matcher(Character.toString((char) j)); - if (m.matches()) { - assertEquals(-1, match_char); - match_char = j; - } - } - assertTrue(match_char == i + 1); - } - - // Test invalid control escapes -// BEGIN android-removed -// ICU doesn't complain about illegal control sequences -// try { -// p = Pattern.compile("\\c"); -// fail("PatternSyntaxException expected"); -// } catch (PatternSyntaxException e) { -// } -// END android-removed - - //originally contributed test did not check the result - //TODO: check what RI does here -// try { -// p = Pattern.compile("\\c;"); -// fail("PatternSyntaxException expected"); -// } catch (PatternSyntaxException e) { -// } -// -// try { -// p = Pattern.compile("\\ca;"); -// fail("PatternSyntaxException expected"); -// } catch (PatternSyntaxException e) { -// } -// -// try { -// p = Pattern.compile("\\c4;"); -// fail("PatternSyntaxException expected"); -// } catch (PatternSyntaxException e) { -// } - } - public void testCharacterClasses() throws PatternSyntaxException { - Pattern p; - Matcher m; - - // Test one character range - p = Pattern.compile("[p].*[l]"); - m = p.matcher("paul"); - assertTrue(m.matches()); - m = p.matcher("pool"); - assertTrue(m.matches()); - m = p.matcher("pong"); - assertFalse(m.matches()); - m = p.matcher("pl"); - assertTrue(m.matches()); - - // Test two character range - p = Pattern.compile("[pm].*[lp]"); - m = p.matcher("prop"); - assertTrue(m.matches()); - m = p.matcher("mall"); - assertTrue(m.matches()); - m = p.matcher("pong"); - assertFalse(m.matches()); - m = p.matcher("pill"); - assertTrue(m.matches()); - - // Test range including [ and ] - p = Pattern.compile("[<\\[].*[\\]>]"); - m = p.matcher("<foo>"); - assertTrue(m.matches()); - m = p.matcher("[bar]"); - assertTrue(m.matches()); - m = p.matcher("{foobar]"); - assertFalse(m.matches()); - m = p.matcher("<pill]"); - assertTrue(m.matches()); - - // Test range using ^ - p = Pattern.compile("[^bc][a-z]+[tr]"); - m = p.matcher("pat"); - assertTrue(m.matches()); - m = p.matcher("liar"); - assertTrue(m.matches()); - m = p.matcher("car"); - assertFalse(m.matches()); - m = p.matcher("gnat"); - assertTrue(m.matches()); - - // Test character range using - - p = Pattern.compile("[a-z]_+[a-zA-Z]-+[0-9p-z]"); - m = p.matcher("d__F-8"); - assertTrue(m.matches()); - m = p.matcher("c_a-q"); - assertTrue(m.matches()); - m = p.matcher("a__R-a"); - assertFalse(m.matches()); - m = p.matcher("r_____d-----5"); - assertTrue(m.matches()); - - // Test range using unicode characters and unicode and hex escapes - p = Pattern.compile("[\\u1234-\\u2345]_+[a-z]-+[\u0001-\\x11]"); - m = p.matcher("\u2000_q-\u0007"); - assertTrue(m.matches()); - m = p.matcher("\u1234_z-\u0001"); - assertTrue(m.matches()); - m = p.matcher("r_p-q"); - assertFalse(m.matches()); - m = p.matcher("\u2345_____d-----\n"); - assertTrue(m.matches()); - -// BEGIN android-removed -// The "---" collides with ICU's "--" operator and is likely to be a user error -// anyway, so we simply comment this one out. -// // Test ranges including the "-" character -// p = Pattern.compile("[\\*-/]_+[---]!+[--AP]"); -// m = p.matcher("-_-!!A"); -// assertTrue(m.matches()); -// m = p.matcher("\u002b_-!!!-"); -// assertTrue(m.matches()); -// m = p.matcher("!_-!@"); -// assertFalse(m.matches()); -// m = p.matcher(",______-!!!!!!!P"); -// assertTrue(m.matches()); -// END android-removed - - // Test nested ranges - p = Pattern.compile("[pm[t]][a-z]+[[r]lp]"); - m = p.matcher("prop"); - assertTrue(m.matches()); - m = p.matcher("tsar"); - assertTrue(m.matches()); - m = p.matcher("pong"); - assertFalse(m.matches()); - m = p.matcher("moor"); - assertTrue(m.matches()); - - // Test character class intersection with && - // TODO: figure out what x&&y or any class with a null intersection - // set (like [[a-c]&&[d-f]]) might mean. It doesn't mean "match - // nothing" and doesn't mean "match anything" so I'm stumped. - p = Pattern.compile("[[a-p]&&[g-z]]+-+[[a-z]&&q]-+[x&&[a-z]]-+"); - m = p.matcher("h--q--x--"); - assertTrue(m.matches()); - m = p.matcher("hog--q-x-"); - assertTrue(m.matches()); - m = p.matcher("ape--q-x-"); - assertFalse(m.matches()); - m = p.matcher("mop--q-x----"); - assertTrue(m.matches()); - - // Test error cases with && -// BEGIN android-removed -// This is more of a bug, and ICU doesn't have this behavior. -// p = Pattern.compile("[&&[xyz]]"); -// m = p.matcher("&"); -// // System.out.println(m.matches()); -// m = p.matcher("x"); -// // System.out.println(m.matches()); -// m = p.matcher("y"); -// // System.out.println(m.matches()); -// END android-removed - p = Pattern.compile("[[xyz]&[axy]]"); - m = p.matcher("x"); - // System.out.println(m.matches()); - m = p.matcher("z"); - // System.out.println(m.matches()); - m = p.matcher("&"); - // System.out.println(m.matches()); - p = Pattern.compile("[abc[123]&&[345]def]"); - m = p.matcher("a"); - // System.out.println(m.matches()); - -// BEGIN android-removed -// This is more of a bug, and ICU doesn't have this behavior. -// p = Pattern.compile("[[xyz]&&]"); -// END android-removed - p = Pattern.compile("[[abc]&]"); - - try { - p = Pattern.compile("[[abc]&&"); - fail("PatternSyntaxException expected"); - } catch (PatternSyntaxException e) { - } - - p = Pattern.compile("[[abc]\\&&[xyz]]"); - - p = Pattern.compile("[[abc]&\\&[xyz]]"); - - // Test 3-way intersection - p = Pattern.compile("[[a-p]&&[g-z]&&[d-k]]"); - m = p.matcher("g"); - assertTrue(m.matches()); - m = p.matcher("m"); - assertFalse(m.matches()); - - // Test nested intersection - p = Pattern.compile("[[[a-p]&&[g-z]]&&[d-k]]"); - m = p.matcher("g"); - assertTrue(m.matches()); - m = p.matcher("m"); - assertFalse(m.matches()); - - // Test character class subtraction with && and ^ - p = Pattern.compile("[[a-z]&&[^aeiou]][aeiou][[^xyz]&&[a-z]]"); - m = p.matcher("pop"); - assertTrue(m.matches()); - m = p.matcher("tag"); - assertTrue(m.matches()); - m = p.matcher("eat"); - assertFalse(m.matches()); - m = p.matcher("tax"); - assertFalse(m.matches()); - m = p.matcher("zip"); - assertTrue(m.matches()); - - // Test . (DOT), with and without DOTALL - // Note: DOT not allowed in character classes - p = Pattern.compile(".+/x.z"); - m = p.matcher("!$/xyz"); - assertTrue(m.matches()); - m = p.matcher("%\n\r/x\nz"); - assertFalse(m.matches()); - p = Pattern.compile(".+/x.z", Pattern.DOTALL); - m = p.matcher("%\n\r/x\nz"); - assertTrue(m.matches()); - - // Test \d (digit) - p = Pattern.compile("\\d+[a-z][\\dx]"); - m = p.matcher("42a6"); - assertTrue(m.matches()); - m = p.matcher("21zx"); - assertTrue(m.matches()); - m = p.matcher("ab6"); - assertFalse(m.matches()); - m = p.matcher("56912f9"); - assertTrue(m.matches()); - - // Test \D (not a digit) - p = Pattern.compile("\\D+[a-z]-[\\D3]"); - m = p.matcher("za-p"); - assertTrue(m.matches()); - m = p.matcher("%!e-3"); - assertTrue(m.matches()); - m = p.matcher("9a-x"); - assertFalse(m.matches()); - m = p.matcher("\u1234pp\ny-3"); - assertTrue(m.matches()); - - // Test \s (whitespace) - p = Pattern.compile("<[a-zA-Z]+\\s+[0-9]+[\\sx][^\\s]>"); - m = p.matcher("<cat \t1\fx>"); - assertTrue(m.matches()); - m = p.matcher("<cat \t1\f >"); - assertFalse(m.matches()); - m = p - .matcher("xyz <foo\n\r22 5> <pp \t\n\f\r \u000b41x\u1234><pp \nx7\rc> zzz"); - assertTrue(m.find()); - assertTrue(m.find()); - assertFalse(m.find()); - - // Test \S (not whitespace) - p = Pattern.compile("<[a-z] \\S[0-9][\\S\n]+[^\\S]221>"); - m = p.matcher("<f $0**\n** 221>"); - assertTrue(m.matches()); - m = p.matcher("<x 441\t221>"); - assertTrue(m.matches()); - m = p.matcher("<z \t9\ng 221>"); - assertFalse(m.matches()); - m = p.matcher("<z 60\ngg\u1234\f221>"); - assertTrue(m.matches()); - p = Pattern.compile("<[a-z] \\S[0-9][\\S\n]+[^\\S]221[\\S&&[^abc]]>"); - m = p.matcher("<f $0**\n** 221x>"); - assertTrue(m.matches()); - m = p.matcher("<x 441\t221z>"); - assertTrue(m.matches()); - m = p.matcher("<x 441\t221 >"); - assertFalse(m.matches()); - m = p.matcher("<x 441\t221c>"); - assertFalse(m.matches()); - m = p.matcher("<z \t9\ng 221x>"); - assertFalse(m.matches()); - m = p.matcher("<z 60\ngg\u1234\f221\u0001>"); - assertTrue(m.matches()); - - // Test \w (ascii word) - p = Pattern.compile("<\\w+\\s[0-9]+;[^\\w]\\w+/[\\w$]+;"); - m = p.matcher("<f1 99;!foo5/a$7;"); - assertTrue(m.matches()); - m = p.matcher("<f$ 99;!foo5/a$7;"); - assertFalse(m.matches()); - m = p - .matcher("<abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789 99;!foo5/a$7;"); - assertTrue(m.matches()); - - // Test \W (not an ascii word) - p = Pattern.compile("<\\W\\w+\\s[0-9]+;[\\W_][^\\W]+\\s[0-9]+;"); - m = p.matcher("<$foo3\n99;_bar\t0;"); - assertTrue(m.matches()); - m = p.matcher("<hh 99;_g 0;"); - assertFalse(m.matches()); - m = p.matcher("<*xx\t00;^zz\f11;"); - assertTrue(m.matches()); - - // Test x|y pattern - // TODO - } - public void testPOSIXGroups() throws PatternSyntaxException { - Pattern p; - Matcher m; - - // Test POSIX groups using \p and \P (in the group and not in the group) - // Groups are Lower, Upper, ASCII, Alpha, Digit, XDigit, Alnum, Punct, - // Graph, Print, Blank, Space, Cntrl - // Test \p{Lower} - /* - * FIXME: Requires complex range processing - * p = Pattern.compile("<\\p{Lower}\\d\\P{Lower}:[\\p{Lower}Z]\\s[^\\P{Lower}]>"); - * m = p.matcher("<a4P:g x>"); assertTrue(m.matches()); m = - * p.matcher("<p4%:Z\tq>"); assertTrue(m.matches()); m = - * p.matcher("<A6#:e e>"); assertFalse(m.matches()); - */ - p = Pattern.compile("\\p{Lower}+"); - m = p.matcher("abcdefghijklmnopqrstuvwxyz"); - assertTrue(m.matches()); - - // Invalid uses of \p{Lower} - try { - p = Pattern.compile("\\p"); - fail("PatternSyntaxException expected"); - } catch (PatternSyntaxException e) { - } - - try { - p = Pattern.compile("\\p;"); - fail("PatternSyntaxException expected"); - } catch (PatternSyntaxException e) { - } - - try { - p = Pattern.compile("\\p{"); - fail("PatternSyntaxException expected"); - } catch (PatternSyntaxException e) { - } - - try { - p = Pattern.compile("\\p{;"); - fail("PatternSyntaxException expected"); - } catch (PatternSyntaxException e) { - } - - try { - p = Pattern.compile("\\p{Lower"); - fail("PatternSyntaxException expected"); - } catch (PatternSyntaxException e) { - } - - try { - p = Pattern.compile("\\p{Lower;"); - fail("PatternSyntaxException expected"); - } catch (PatternSyntaxException e) { - } - - // Test \p{Upper} - /* - * FIXME: Requires complex range processing - * p = Pattern.compile("<\\p{Upper}\\d\\P{Upper}:[\\p{Upper}z]\\s[^\\P{Upper}]>"); - * m = p.matcher("<A4p:G X>"); assertTrue(m.matches()); m = - * p.matcher("<P4%:z\tQ>"); assertTrue(m.matches()); m = - * p.matcher("<a6#:E E>"); assertFalse(m.matches()); - */ - p = Pattern.compile("\\p{Upper}+"); - m = p.matcher("ABCDEFGHIJKLMNOPQRSTUVWXYZ"); - assertTrue(m.matches()); - - // Invalid uses of \p{Upper} - try { - p = Pattern.compile("\\p{Upper"); - fail("PatternSyntaxException expected"); - } catch (PatternSyntaxException e) { - } - - try { - p = Pattern.compile("\\p{Upper;"); - fail("PatternSyntaxException expected"); - } catch (PatternSyntaxException e) { - } - - // Test \p{ASCII} - /* - * FIXME: Requires complex range processing p = Pattern.compile("<\\p{ASCII}\\d\\P{ASCII}:[\\p{ASCII}\u1234]\\s[^\\P{ASCII}]>"); - * m = p.matcher("<A4\u0080:G X>"); assertTrue(m.matches()); m = - * p.matcher("<P4\u00ff:\u1234\t\n>"); assertTrue(m.matches()); m = - * p.matcher("<\u00846#:E E>"); assertFalse(m.matches()) - */ - int i; - p = Pattern.compile("\\p{ASCII}"); - for (i = 0; i < 0x80; i++) { - m = p.matcher(Character.toString((char) i)); - assertTrue(m.matches()); - } - for (; i < 0xff; i++) { - m = p.matcher(Character.toString((char) i)); - assertFalse(m.matches()); - } - - // Invalid uses of \p{ASCII} - try { - p = Pattern.compile("\\p{ASCII"); - fail("PatternSyntaxException expected"); - } catch (PatternSyntaxException e) { - } - - try { - p = Pattern.compile("\\p{ASCII;"); - fail("PatternSyntaxException expected"); - } catch (PatternSyntaxException e) { - } - - // Test \p{Alpha} - // TODO - - // Test \p{Digit} - // TODO - - // Test \p{XDigit} - // TODO - - // Test \p{Alnum} - // TODO - - // Test \p{Punct} - // TODO - - // Test \p{Graph} - // TODO - - // Test \p{Print} - // TODO - - // Test \p{Blank} - // TODO - - // Test \p{Space} - // TODO - - // Test \p{Cntrl} - // TODO - } public void testUnicodeCategories() throws PatternSyntaxException { // Test Unicode categories using \p and \P // One letter codes: L, M, N, P, S, Z, C @@ -906,75 +128,6 @@ public class Pattern2Test extends TestCase { } } - public void testUnicodeBlocks() throws PatternSyntaxException { - Pattern p; - Matcher m; - int i, j; - - // Test Unicode blocks using \p and \P - // FIXME: - // Note that LatinExtended-B and ArabicPresentations-B are unrecognized - // by the reference JDK. - for (i = 0; i < UBlocks.length; i++) { - /* - * p = Pattern.compile("\\p{"+UBlocks[i].name+"}"); - * - * if (UBlocks[i].low > 0) { m = - * p.matcher(Character.toString((char)(UBlocks[i].low-1))); - * assertFalse(m.matches()); } for (j=UBlocks[i].low; j <= - * UBlocks[i].high; j++) { m = - * p.matcher(Character.toString((char)j)); - * assertTrue(m.matches()); } if (UBlocks[i].high < 0xFFFF) { m = - * p.matcher(Character.toString((char)(UBlocks[i].high+1))); - * assertFalse(m.matches()); } - * - * p = Pattern.compile("\\P{"+UBlocks[i].name+"}"); - * - * if (UBlocks[i].low > 0) { m = - * p.matcher(Character.toString((char)(UBlocks[i].low-1))); - * assertTrue(m.matches()); } for (j=UBlocks[i].low; j < - * UBlocks[i].high; j++) { m = - * p.matcher(Character.toString((char)j)); - * assertFalse(m.matches()); } if (UBlocks[i].high < 0xFFFF) { m = - * p.matcher(Character.toString((char)(UBlocks[i].high+1))); - * assertTrue(m.matches()); } - */ - - p = Pattern.compile("\\p{In" + UBlocks[i].name + "}"); -// BEGIN android-changed -// Added the name of the block under test to the assertion to get more output. - - if (UBlocks[i].low > 0) { - m = p.matcher(Character.toString((char) (UBlocks[i].low - 1))); - assertFalse(UBlocks[i].name, m.matches()); - } - for (j = UBlocks[i].low; j <= UBlocks[i].high; j++) { - m = p.matcher(Character.toString((char) j)); - assertTrue(UBlocks[i].name, m.matches()); - } - if (UBlocks[i].high < 0xFFFF) { - m = p.matcher(Character.toString((char) (UBlocks[i].high + 1))); - assertFalse(UBlocks[i].name, m.matches()); - } - - p = Pattern.compile("\\P{In" + UBlocks[i].name + "}"); - - if (UBlocks[i].low > 0) { - m = p.matcher(Character.toString((char) (UBlocks[i].low - 1))); - assertTrue(UBlocks[i].name, m.matches()); - } - for (j = UBlocks[i].low; j < UBlocks[i].high; j++) { - m = p.matcher(Character.toString((char) j)); - assertFalse(UBlocks[i].name, m.matches()); - } - if (UBlocks[i].high < 0xFFFF) { - m = p.matcher(Character.toString((char) (UBlocks[i].high + 1))); - assertTrue(UBlocks[i].name, m.matches()); - } - -// END android-changed - } - } public void testCapturingGroups() throws PatternSyntaxException { Pattern p; Matcher m; @@ -1142,436 +295,4 @@ public class Pattern2Test extends TestCase { // Test negative lookbehind using (?<!...) // TODO } - public void testMisc() throws PatternSyntaxException { - Pattern p; - Matcher m; - - // Test (?>...) - // TODO - - // Test (?onflags-offflags) - // Valid flags are i,m,d,s,u,x - // TODO - - // Test (?onflags-offflags:...) - // TODO - - // Test \Q, \E - p = Pattern.compile("[a-z]+;\\Q[a-z]+;\\Q(foo.*);\\E[0-9]+"); - m = p.matcher("abc;[a-z]+;\\Q(foo.*);411"); - assertTrue(m.matches()); - m = p.matcher("abc;def;foo42;555"); - assertFalse(m.matches()); - m = p.matcher("abc;\\Qdef;\\Qfoo99;\\E123"); - assertFalse(m.matches()); - - p = Pattern.compile("[a-z]+;(foo[0-9]-\\Q(...)\\E);[0-9]+"); - m = p.matcher("abc;foo5-(...);123"); - assertTrue(m.matches()); - assertEquals("foo5-(...)", m.group(1)); - m = p.matcher("abc;foo9-(xxx);789"); - assertFalse(m.matches()); - - p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\Q$-\\E]+);[0-9]+"); - m = p.matcher("abc;bar0-def$-;123"); - assertTrue(m.matches()); - - // FIXME: - // This should work the same as the pattern above but fails with the - // the reference JDK - p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\Q-$\\E]+);[0-9]+"); - m = p.matcher("abc;bar0-def$-;123"); - // assertTrue(m.matches()); - - // FIXME: - // This should work too .. it looks as if just about anything that - // has more - // than one character between \Q and \E is broken in the the reference JDK - p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\Q[0-9]\\E]+);[0-9]+"); - m = p.matcher("abc;bar0-def[99]-]0x[;123"); - // assertTrue(m.matches()); - - // This is the same as above but with explicit escapes .. and this - // does work - // on the the reference JDK - p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\[0\\-9\\]]+);[0-9]+"); - m = p.matcher("abc;bar0-def[99]-]0x[;123"); - assertTrue(m.matches()); - - // Test #<comment text> - // TODO - } - public void testCompile1() throws PatternSyntaxException { - Pattern pattern = Pattern - .compile("[0-9A-Za-z][0-9A-Za-z\\x2e\\x3a\\x2d\\x5f]*"); - String name = "iso-8859-1"; - assertTrue(pattern.matcher(name).matches()); - } - public void testCompile2() throws PatternSyntaxException { - String findString = "\\Qimport\\E"; - - Pattern pattern = Pattern.compile(findString, 0); - Matcher matcher = pattern.matcher(new String( - "import a.A;\n\n import b.B;\nclass C {}")); - - assertTrue(matcher.find(0)); - } - public void testCompile3() throws PatternSyntaxException { - Pattern p; - Matcher m; - p = Pattern.compile("a$"); - m = p.matcher("a\n"); - assertTrue(m.find()); - assertEquals("a", m.group()); - assertFalse(m.find()); - - p = Pattern.compile("(a$)"); - m = p.matcher("a\n"); - assertTrue(m.find()); - assertEquals("a", m.group()); - assertEquals("a", m.group(1)); - assertFalse(m.find()); - - p = Pattern.compile("^.*$", Pattern.MULTILINE); - - m = p.matcher("a\n"); - assertTrue(m.find()); - // System.out.println("["+m.group()+"]"); - assertEquals("a", m.group()); - assertFalse(m.find()); - - m = p.matcher("a\nb\n"); - assertTrue(m.find()); - // System.out.println("["+m.group()+"]"); - assertEquals("a", m.group()); - assertTrue(m.find()); - // System.out.println("["+m.group()+"]"); - assertEquals("b", m.group()); - assertFalse(m.find()); - - m = p.matcher("a\nb"); - assertTrue(m.find()); - // System.out.println("["+m.group()+"]"); - assertEquals("a", m.group()); - assertTrue(m.find()); - assertEquals("b", m.group()); - assertFalse(m.find()); - - m = p.matcher("\naa\r\nbb\rcc\n\n"); - assertTrue(m.find()); - // System.out.println("["+m.group()+"]"); - assertTrue(m.group().equals("")); - assertTrue(m.find()); - // System.out.println("["+m.group()+"]"); - assertEquals("aa", m.group()); - assertTrue(m.find()); - // System.out.println("["+m.group()+"]"); - assertEquals("bb", m.group()); - assertTrue(m.find()); - // System.out.println("["+m.group()+"]"); - assertEquals("cc", m.group()); - assertTrue(m.find()); - // System.out.println("["+m.group()+"]"); - assertTrue(m.group().equals("")); - assertFalse(m.find()); - - m = p.matcher("a"); - assertTrue(m.find()); - assertEquals("a", m.group()); - assertFalse(m.find()); - -// BEGIN android-removed -// Makes no sense to duplicate this weird behavior -// m = p.matcher(""); -// // FIXME: This matches the reference behaviour but is -// // inconsistent with matching "a" - ie. the end of the -// // target string should match against $ always but this -// // appears to work with the null string only when not in -// // multiline mode (see below) -// assertFalse(m.find()); -// END android-removed - - p = Pattern.compile("^.*$"); - m = p.matcher(""); - assertTrue(m.find()); - assertTrue(m.group().equals("")); - assertFalse(m.find()); - } - public void testCompile4() throws PatternSyntaxException { - String findString = "\\Qpublic\\E"; - StringBuffer text = new StringBuffer(" public class Class {\n" - + " public class Class {"); - - Pattern pattern = Pattern.compile(findString, 0); - Matcher matcher = pattern.matcher(text); - - boolean found = matcher.find(); - assertTrue(found); - assertEquals(4, matcher.start()); - if (found) { - // modify text - text.delete(0, text.length()); - text.append("Text have been changed."); - matcher.reset(text); - } - - found = matcher.find(); - assertFalse(found); - } - public void testCompile5() throws PatternSyntaxException { - Pattern p = Pattern.compile("^[0-9]"); - String s[] = p.split("12", -1); - assertEquals("", s[0]); - assertEquals("2", s[1]); - assertEquals(2, s.length); - } - - // public void testCompile6() { - // String regex = "[\\p{L}[\\p{Mn}[\\p{Pc}[\\p{Nd}[\\p{Nl}[\\p{Sc}]]]]]]+"; - // String regex = "[\\p{L}\\p{Mn}\\p{Pc}\\p{Nd}\\p{Nl}\\p{Sc}]+"; - // try { - // Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE); - // assertTrue(true); - // } catch (PatternSyntaxException e) { - // System.out.println(e.getMessage()); - // assertTrue(false); - // } - // } - - private static class UBInfo { - public UBInfo(int low, int high, String name) { - this.name = name; - this.low = low; - this.high = high; - } - - public String name; - - public int low, high; - } - - // A table representing the unicode categories - //private static UBInfo[] UCategories = { - // Lu - // Ll - // Lt - // Lm - // Lo - // Mn - // Mc - // Me - // Nd - // Nl - // No - // Pc - // Pd - // Ps - // Pe - // Pi - // Pf - // Po - // Sm - // Sc - // Sk - // So - // Zs - // Zl - // Zp - // Cc - // Cf - // Cs - // Co - // Cn - //}; - - // A table representing the unicode character blocks - private static UBInfo[] UBlocks = { - /* 0000; 007F; Basic Latin */ - new UBInfo(0x0000, 0x007F, "BasicLatin"), // Character.UnicodeBlock.BASIC_LATIN - /* 0080; 00FF; Latin-1 Supplement */ - new UBInfo(0x0080, 0x00FF, "Latin-1Supplement"), // Character.UnicodeBlock.LATIN_1_SUPPLEMENT - /* 0100; 017F; Latin Extended-A */ - new UBInfo(0x0100, 0x017F, "LatinExtended-A"), // Character.UnicodeBlock.LATIN_EXTENDED_A - /* 0180; 024F; Latin Extended-B */ - // new UBInfo (0x0180,0x024F,"InLatinExtended-B"), // - // Character.UnicodeBlock.LATIN_EXTENDED_B - /* 0250; 02AF; IPA Extensions */ - new UBInfo(0x0250, 0x02AF, "IPAExtensions"), // Character.UnicodeBlock.IPA_EXTENSIONS - /* 02B0; 02FF; Spacing Modifier Letters */ - new UBInfo(0x02B0, 0x02FF, "SpacingModifierLetters"), // Character.UnicodeBlock.SPACING_MODIFIER_LETTERS - /* 0300; 036F; Combining Diacritical Marks */ - new UBInfo(0x0300, 0x036F, "CombiningDiacriticalMarks"), // Character.UnicodeBlock.COMBINING_DIACRITICAL_MARKS - /* 0370; 03FF; Greek */ - new UBInfo(0x0370, 0x03FF, "Greek"), // Character.UnicodeBlock.GREEK - /* 0400; 04FF; Cyrillic */ - new UBInfo(0x0400, 0x04FF, "Cyrillic"), // Character.UnicodeBlock.CYRILLIC - /* 0530; 058F; Armenian */ - new UBInfo(0x0530, 0x058F, "Armenian"), // Character.UnicodeBlock.ARMENIAN - /* 0590; 05FF; Hebrew */ - new UBInfo(0x0590, 0x05FF, "Hebrew"), // Character.UnicodeBlock.HEBREW - /* 0600; 06FF; Arabic */ - new UBInfo(0x0600, 0x06FF, "Arabic"), // Character.UnicodeBlock.ARABIC - /* 0700; 074F; Syriac */ - new UBInfo(0x0700, 0x074F, "Syriac"), // Character.UnicodeBlock.SYRIAC - /* 0780; 07BF; Thaana */ - new UBInfo(0x0780, 0x07BF, "Thaana"), // Character.UnicodeBlock.THAANA - /* 0900; 097F; Devanagari */ - new UBInfo(0x0900, 0x097F, "Devanagari"), // Character.UnicodeBlock.DEVANAGARI - /* 0980; 09FF; Bengali */ - new UBInfo(0x0980, 0x09FF, "Bengali"), // Character.UnicodeBlock.BENGALI - /* 0A00; 0A7F; Gurmukhi */ - new UBInfo(0x0A00, 0x0A7F, "Gurmukhi"), // Character.UnicodeBlock.GURMUKHI - /* 0A80; 0AFF; Gujarati */ - new UBInfo(0x0A80, 0x0AFF, "Gujarati"), // Character.UnicodeBlock.GUJARATI - /* 0B00; 0B7F; Oriya */ - new UBInfo(0x0B00, 0x0B7F, "Oriya"), // Character.UnicodeBlock.ORIYA - /* 0B80; 0BFF; Tamil */ - new UBInfo(0x0B80, 0x0BFF, "Tamil"), // Character.UnicodeBlock.TAMIL - /* 0C00; 0C7F; Telugu */ - new UBInfo(0x0C00, 0x0C7F, "Telugu"), // Character.UnicodeBlock.TELUGU - /* 0C80; 0CFF; Kannada */ - new UBInfo(0x0C80, 0x0CFF, "Kannada"), // Character.UnicodeBlock.KANNADA - /* 0D00; 0D7F; Malayalam */ - new UBInfo(0x0D00, 0x0D7F, "Malayalam"), // Character.UnicodeBlock.MALAYALAM - /* 0D80; 0DFF; Sinhala */ - new UBInfo(0x0D80, 0x0DFF, "Sinhala"), // Character.UnicodeBlock.SINHALA - /* 0E00; 0E7F; Thai */ - new UBInfo(0x0E00, 0x0E7F, "Thai"), // Character.UnicodeBlock.THAI - /* 0E80; 0EFF; Lao */ - new UBInfo(0x0E80, 0x0EFF, "Lao"), // Character.UnicodeBlock.LAO - /* 0F00; 0FFF; Tibetan */ - new UBInfo(0x0F00, 0x0FFF, "Tibetan"), // Character.UnicodeBlock.TIBETAN - /* 1000; 109F; Myanmar */ - new UBInfo(0x1000, 0x109F, "Myanmar"), // Character.UnicodeBlock.MYANMAR - /* 10A0; 10FF; Georgian */ - new UBInfo(0x10A0, 0x10FF, "Georgian"), // Character.UnicodeBlock.GEORGIAN - /* 1100; 11FF; Hangul Jamo */ - new UBInfo(0x1100, 0x11FF, "HangulJamo"), // Character.UnicodeBlock.HANGUL_JAMO - /* 1200; 137F; Ethiopic */ - new UBInfo(0x1200, 0x137F, "Ethiopic"), // Character.UnicodeBlock.ETHIOPIC - /* 13A0; 13FF; Cherokee */ - new UBInfo(0x13A0, 0x13FF, "Cherokee"), // Character.UnicodeBlock.CHEROKEE - /* 1400; 167F; Unified Canadian Aboriginal Syllabics */ - new UBInfo(0x1400, 0x167F, "UnifiedCanadianAboriginalSyllabics"), // Character.UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS - /* 1680; 169F; Ogham */ - new UBInfo(0x1680, 0x169F, "Ogham"), // Character.UnicodeBlock.OGHAM - /* 16A0; 16FF; Runic */ - new UBInfo(0x16A0, 0x16FF, "Runic"), // Character.UnicodeBlock.RUNIC - /* 1780; 17FF; Khmer */ - new UBInfo(0x1780, 0x17FF, "Khmer"), // Character.UnicodeBlock.KHMER - /* 1800; 18AF; Mongolian */ - new UBInfo(0x1800, 0x18AF, "Mongolian"), // Character.UnicodeBlock.MONGOLIAN - /* 1E00; 1EFF; Latin Extended Additional */ - new UBInfo(0x1E00, 0x1EFF, "LatinExtendedAdditional"), // Character.UnicodeBlock.LATIN_EXTENDED_ADDITIONAL - /* 1F00; 1FFF; Greek Extended */ - new UBInfo(0x1F00, 0x1FFF, "GreekExtended"), // Character.UnicodeBlock.GREEK_EXTENDED - /* 2000; 206F; General Punctuation */ - new UBInfo(0x2000, 0x206F, "GeneralPunctuation"), // Character.UnicodeBlock.GENERAL_PUNCTUATION - /* 2070; 209F; Superscripts and Subscripts */ - new UBInfo(0x2070, 0x209F, "SuperscriptsandSubscripts"), // Character.UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS - /* 20A0; 20CF; Currency Symbols */ - new UBInfo(0x20A0, 0x20CF, "CurrencySymbols"), // Character.UnicodeBlock.CURRENCY_SYMBOLS - /* 20D0; 20FF; Combining Marks for Symbols */ - new UBInfo(0x20D0, 0x20FF, "CombiningMarksforSymbols"), // Character.UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS - /* 2100; 214F; Letterlike Symbols */ - new UBInfo(0x2100, 0x214F, "LetterlikeSymbols"), // Character.UnicodeBlock.LETTERLIKE_SYMBOLS - /* 2150; 218F; Number Forms */ - new UBInfo(0x2150, 0x218F, "NumberForms"), // Character.UnicodeBlock.NUMBER_FORMS - /* 2190; 21FF; Arrows */ - new UBInfo(0x2190, 0x21FF, "Arrows"), // Character.UnicodeBlock.ARROWS - /* 2200; 22FF; Mathematical Operators */ - new UBInfo(0x2200, 0x22FF, "MathematicalOperators"), // Character.UnicodeBlock.MATHEMATICAL_OPERATORS - /* 2300; 23FF; Miscellaneous Technical */ - new UBInfo(0x2300, 0x23FF, "MiscellaneousTechnical"), // Character.UnicodeBlock.MISCELLANEOUS_TECHNICAL - /* 2400; 243F; Control Pictures */ - new UBInfo(0x2400, 0x243F, "ControlPictures"), // Character.UnicodeBlock.CONTROL_PICTURES - /* 2440; 245F; Optical Character Recognition */ - new UBInfo(0x2440, 0x245F, "OpticalCharacterRecognition"), // Character.UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION - /* 2460; 24FF; Enclosed Alphanumerics */ - new UBInfo(0x2460, 0x24FF, "EnclosedAlphanumerics"), // Character.UnicodeBlock.ENCLOSED_ALPHANUMERICS - /* 2500; 257F; Box Drawing */ - new UBInfo(0x2500, 0x257F, "BoxDrawing"), // Character.UnicodeBlock.BOX_DRAWING - /* 2580; 259F; Block Elements */ - new UBInfo(0x2580, 0x259F, "BlockElements"), // Character.UnicodeBlock.BLOCK_ELEMENTS - /* 25A0; 25FF; Geometric Shapes */ - new UBInfo(0x25A0, 0x25FF, "GeometricShapes"), // Character.UnicodeBlock.GEOMETRIC_SHAPES - /* 2600; 26FF; Miscellaneous Symbols */ - new UBInfo(0x2600, 0x26FF, "MiscellaneousSymbols"), // Character.UnicodeBlock.MISCELLANEOUS_SYMBOLS - /* 2700; 27BF; Dingbats */ - new UBInfo(0x2700, 0x27BF, "Dingbats"), // Character.UnicodeBlock.DINGBATS - /* 2800; 28FF; Braille Patterns */ - new UBInfo(0x2800, 0x28FF, "BraillePatterns"), // Character.UnicodeBlock.BRAILLE_PATTERNS - /* 2E80; 2EFF; CJK Radicals Supplement */ - new UBInfo(0x2E80, 0x2EFF, "CJKRadicalsSupplement"), // Character.UnicodeBlock.CJK_RADICALS_SUPPLEMENT - /* 2F00; 2FDF; Kangxi Radicals */ - new UBInfo(0x2F00, 0x2FDF, "KangxiRadicals"), // Character.UnicodeBlock.KANGXI_RADICALS - /* 2FF0; 2FFF; Ideographic Description Characters */ - new UBInfo(0x2FF0, 0x2FFF, "IdeographicDescriptionCharacters"), // Character.UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS - /* 3000; 303F; CJK Symbols and Punctuation */ - new UBInfo(0x3000, 0x303F, "CJKSymbolsandPunctuation"), // Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION - /* 3040; 309F; Hiragana */ - new UBInfo(0x3040, 0x309F, "Hiragana"), // Character.UnicodeBlock.HIRAGANA - /* 30A0; 30FF; Katakana */ - new UBInfo(0x30A0, 0x30FF, "Katakana"), // Character.UnicodeBlock.KATAKANA - /* 3100; 312F; Bopomofo */ - new UBInfo(0x3100, 0x312F, "Bopomofo"), // Character.UnicodeBlock.BOPOMOFO - /* 3130; 318F; Hangul Compatibility Jamo */ - new UBInfo(0x3130, 0x318F, "HangulCompatibilityJamo"), // Character.UnicodeBlock.HANGUL_COMPATIBILITY_JAMO - /* 3190; 319F; Kanbun */ - new UBInfo(0x3190, 0x319F, "Kanbun"), // Character.UnicodeBlock.KANBUN - /* 31A0; 31BF; Bopomofo Extended */ - new UBInfo(0x31A0, 0x31BF, "BopomofoExtended"), // Character.UnicodeBlock.BOPOMOFO_EXTENDED - /* 3200; 32FF; Enclosed CJK Letters and Months */ - new UBInfo(0x3200, 0x32FF, "EnclosedCJKLettersandMonths"), // Character.UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS - /* 3300; 33FF; CJK Compatibility */ - new UBInfo(0x3300, 0x33FF, "CJKCompatibility"), // Character.UnicodeBlock.CJK_COMPATIBILITY - /* 3400; 4DB5; CJK Unified Ideographs Extension A */ -// BEGIN android-changed -// Modified this to reflect current Unicode tables (or maybe it was a typo) - new UBInfo(0x3400, 0x4DBF, "CJKUnifiedIdeographsExtensionA"), // Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A -// END android-changed - /* 4E00; 9FFF; CJK Unified Ideographs */ - new UBInfo(0x4E00, 0x9FFF, "CJKUnifiedIdeographs"), // Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS - /* A000; A48F; Yi Syllables */ - new UBInfo(0xA000, 0xA48F, "YiSyllables"), // Character.UnicodeBlock.YI_SYLLABLES - /* A490; A4CF; Yi Radicals */ - new UBInfo(0xA490, 0xA4CF, "YiRadicals"), // Character.UnicodeBlock.YI_RADICALS - /* AC00; D7A3; Hangul Syllables */ -// BEGIN android-changed -// Modified this to reflect current Unicode tables (or maybe it was a typo) - new UBInfo(0xAC00, 0xD7AF, "HangulSyllables"), // Character.UnicodeBlock.HANGUL_SYLLABLES -// END android-changed - /* D800; DB7F; High Surrogates */ - /* DB80; DBFF; High Private Use Surrogates */ - /* DC00; DFFF; Low Surrogates */ - /* E000; F8FF; Private Use */ - /* F900; FAFF; CJK Compatibility Ideographs */ - new UBInfo(0xF900, 0xFAFF, "CJKCompatibilityIdeographs"), // Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS - /* FB00; FB4F; Alphabetic Presentation Forms */ - new UBInfo(0xFB00, 0xFB4F, "AlphabeticPresentationForms"), // Character.UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS - /* FB50; FDFF; Arabic Presentation Forms-A */ - new UBInfo(0xFB50, 0xFDFF, "ArabicPresentationForms-A"), // Character.UnicodeBlock.ARABIC_PRESENTATION_FORMS_A - /* FE20; FE2F; Combining Half Marks */ - new UBInfo(0xFE20, 0xFE2F, "CombiningHalfMarks"), // Character.UnicodeBlock.COMBINING_HALF_MARKS - /* FE30; FE4F; CJK Compatibility Forms */ - new UBInfo(0xFE30, 0xFE4F, "CJKCompatibilityForms"), // Character.UnicodeBlock.CJK_COMPATIBILITY_FORMS - /* FE50; FE6F; Small Form Variants */ - new UBInfo(0xFE50, 0xFE6F, "SmallFormVariants"), // Character.UnicodeBlock.SMALL_FORM_VARIANTS - /* FE70; FEFE; Arabic Presentation Forms-B */ - // new UBInfo (0xFE70,0xFEFE,"InArabicPresentationForms-B"), // - // Character.UnicodeBlock.ARABIC_PRESENTATION_FORMS_B - /* FEFF; FEFF; Specials */ -// BEGIN android-changed -// Modified this to reflect current Unicode tables (or maybe it was a typo) -// FEFF is actually still Arabic Presentation Forms B -// new UBInfo(0xFEFF, 0xFEFF, "Specials"), // Character.UnicodeBlock.SPECIALS -// END android-changed - /* FF00; FFEF; Halfwidth and Fullwidth Forms */ - new UBInfo(0xFF00, 0xFFEF, "HalfwidthandFullwidthForms"), // Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS - /* FFF0; FFFD; Specials */ - // BEGIN android-changed -// Modified this to reflect current Unicode tables (or maybe it was a typo) - new UBInfo(0xFFF0, 0xFFFF, "Specials") // Character.UnicodeBlock.SPECIALS -// END android-changed - }; } diff --git a/luni/src/test/java/org/apache/harmony/regex/tests/java/util/regex/PatternErrorTest.java b/luni/src/test/java/org/apache/harmony/regex/tests/java/util/regex/PatternErrorTest.java deleted file mode 100644 index fd2c3ae..0000000 --- a/luni/src/test/java/org/apache/harmony/regex/tests/java/util/regex/PatternErrorTest.java +++ /dev/null @@ -1,83 +0,0 @@ -/* Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.harmony.regex.tests.java.util.regex; - -import java.util.regex.Pattern; -import junit.framework.TestCase; - -/** - * Test boundary and error conditions in java.util.regex.Pattern - * - */ -public class PatternErrorTest extends TestCase { - public void testCompileErrors() throws Exception { - // null regex string - should get NullPointerException - try { - Pattern.compile(null); - fail("NullPointerException expected"); - } catch (NullPointerException e) { - } - - - // empty regex string - no exception should be thrown - Pattern.compile(""); - - // note: invalid regex syntax checked in PatternSyntaxExceptionTest - - // flags = 0 should raise no exception - int flags = 0; - Pattern.compile("foo", flags); - - // check that all valid flags accepted without exception - flags |= Pattern.UNIX_LINES; - flags |= Pattern.CASE_INSENSITIVE; - flags |= Pattern.MULTILINE; - // BEGIN android-changed - // We don't support that flag. - // flags |= Pattern.CANON_EQ; - // END android-changed - flags |= Pattern.COMMENTS; - flags |= Pattern.DOTALL; - flags |= Pattern.UNICODE_CASE; - Pattern.compile("foo", flags); - - // add invalid flags - should get IllegalArgumentException - /* - * TODO: Inconsistency between the reference JDK behaviour and spec - exception is - * not thrown - */ - /* - * Valid test is: - * flags |= 0xFFFFFFFF; - * try { - * Pattern.compile("foo",flags); - * } catch (IllegalArgumentException e) { - * // This is the expected exception - * } catch (Exception e) { - * fail(); - * } - */ - - /* Workaround test is: */ - // BEGIN android-changed - // We don't support that flag. - flags |= ~Pattern.CANON_EQ; - // END android-changed - // No exception expected to match incorrect the reference behaviour - Pattern.compile("foo", flags); - } -} diff --git a/luni/src/test/java/org/apache/harmony/regex/tests/java/util/regex/PatternTest.java b/luni/src/test/java/org/apache/harmony/regex/tests/java/util/regex/PatternTest.java index a81d294..7dfcabf 100644 --- a/luni/src/test/java/org/apache/harmony/regex/tests/java/util/regex/PatternTest.java +++ b/luni/src/test/java/org/apache/harmony/regex/tests/java/util/regex/PatternTest.java @@ -1020,24 +1020,6 @@ public class PatternTest extends TestCase { assertTrue(mat.matches()); } - public void testCompileNonCaptGroup() { - boolean isCompiled = false; - - try { -// BEGIN android-change -// We don't have canonical equivalence. - Pattern pat = Pattern.compile("(?:)"); - pat = Pattern.compile("(?:)", Pattern.DOTALL); - pat = Pattern.compile("(?:)", Pattern.CASE_INSENSITIVE); - pat = Pattern.compile("(?:)", Pattern.COMMENTS | Pattern.UNIX_LINES); -// END android-change - isCompiled = true; - } catch (PatternSyntaxException e) { - System.out.println(e); - } - assertTrue(isCompiled); - } - public void testEmbeddedFlags() { String baseString = "(?i)((?s)a)"; String testString = "A"; |