diff options
author | Jesse Wilson <jessewilson@google.com> | 2011-05-26 15:51:47 -0700 |
---|---|---|
committer | Jesse Wilson <jessewilson@google.com> | 2011-05-26 16:48:44 -0700 |
commit | ce257b03a1e5ff6075967e6a84cdb439cb2b01c8 (patch) | |
tree | a1b7030c9b949ce82c1615304a414249ae79ca4f | |
parent | 2d99ef561304174b8ae01a0a68d5b96d5edb9f10 (diff) | |
download | libcore-ce257b03a1e5ff6075967e6a84cdb439cb2b01c8.zip libcore-ce257b03a1e5ff6075967e6a84cdb439cb2b01c8.tar.gz libcore-ce257b03a1e5ff6075967e6a84cdb439cb2b01c8.tar.bz2 |
Clean Up URI's parsing code.
http://b/2753295
Change-Id: I190ee12c14d1b3b17e2c8cb5ef3868618f1ee124
-rw-r--r-- | luni/src/main/java/java/net/URI.java | 207 | ||||
-rw-r--r-- | luni/src/main/java/java/net/URLStreamHandler.java | 27 | ||||
-rw-r--r-- | luni/src/main/java/libcore/net/UriCodec.java | 25 | ||||
-rw-r--r-- | luni/src/main/java/libcore/net/url/UrlUtils.java | 17 | ||||
-rw-r--r-- | luni/src/test/java/libcore/java/net/URITest.java | 4 |
5 files changed, 108 insertions, 172 deletions
diff --git a/luni/src/main/java/java/net/URI.java b/luni/src/main/java/java/net/URI.java index f260b2e..352e25f 100644 --- a/luni/src/main/java/java/net/URI.java +++ b/luni/src/main/java/java/net/URI.java @@ -313,171 +313,100 @@ public final class URI implements Comparable<URI>, Serializable { parseURI(uri.toString(), false); } + /** + * Breaks uri into its component parts. This first splits URI into scheme, + * scheme-specific part and fragment: + * [scheme:][scheme-specific part][#fragment] + * + * Then it breaks the scheme-specific part into authority, path and query: + * [//authority][path][?query] + * + * Finally it delegates to parseAuthority to break the authority into user + * info, host and port: + * [user-info@][host][:port] + */ private void parseURI(String uri, boolean forceServer) throws URISyntaxException { - String temp = uri; - // assign uri string to the input value per spec string = uri; - int index, index1, index2, index3; - // parse into Fragment, Scheme, and SchemeSpecificPart - // then parse SchemeSpecificPart if necessary - // Fragment - index = temp.indexOf('#'); - if (index != -1) { - // remove the fragment from the end - fragment = temp.substring(index + 1); - validateFragment(uri, fragment, index + 1); - temp = temp.substring(0, index); - } - - // Scheme and SchemeSpecificPart - index = index1 = temp.indexOf(':'); - index2 = temp.indexOf('/'); - index3 = temp.indexOf('?'); - - // if a '/' or '?' occurs before the first ':' the uri has no - // specified scheme, and is therefore not absolute - if (index != -1 && (index2 >= index || index2 == -1) - && (index3 >= index || index3 == -1)) { - // the characters up to the first ':' comprise the scheme + // "#fragment" + int fragmentStart = UrlUtils.findFirstOf(uri, "#", 0, uri.length()); + if (fragmentStart < uri.length()) { + fragment = ALL_LEGAL_ENCODER.validate(uri, fragmentStart + 1, uri.length(), "fragment"); + } + + // scheme: + int start; + int colon = UrlUtils.findFirstOf(uri, ":", 0, fragmentStart); + if (colon < UrlUtils.findFirstOf(uri, "/?#", 0, fragmentStart)) { absolute = true; - scheme = temp.substring(0, index); - if (scheme.length() == 0) { - throw new URISyntaxException(uri, "Scheme expected", index); + scheme = validateScheme(uri, colon); + start = colon + 1; + + if (start == fragmentStart) { + throw new URISyntaxException(uri, "Scheme-specific part expected", start); } - validateScheme(uri, scheme, 0); - schemeSpecificPart = temp.substring(index + 1); - if (schemeSpecificPart.length() == 0) { - throw new URISyntaxException(uri, "Scheme-specific part expected", index + 1); + + // URIs with schemes followed by a non-/ char are opaque and need no further parsing. + if (!uri.regionMatches(start, "/", 0, 1)) { + opaque = true; + schemeSpecificPart = ALL_LEGAL_ENCODER.validate( + uri, start, fragmentStart, "scheme specific part"); + return; } } else { absolute = false; - schemeSpecificPart = temp; + start = 0; } - if (scheme == null || schemeSpecificPart.length() > 0 - && schemeSpecificPart.charAt(0) == '/') { - opaque = false; - // the URI is hierarchical + opaque = false; + schemeSpecificPart = uri.substring(start, fragmentStart); - // Query - temp = schemeSpecificPart; - index = temp.indexOf('?'); - if (index != -1) { - query = temp.substring(index + 1); - temp = temp.substring(0, index); - validateQuery(uri, query, index2 + 1 + index); + // "//authority" + int fileStart; + if (uri.regionMatches(start, "//", 0, 2)) { + int authorityStart = start + 2; + fileStart = UrlUtils.findFirstOf(uri, "/?", authorityStart, fragmentStart); + if (authorityStart == uri.length()) { + throw new URISyntaxException(uri, "Authority expected", uri.length()); } - - // Authority and Path - if (temp.startsWith("//")) { - index = temp.indexOf('/', 2); - if (index != -1) { - authority = temp.substring(2, index); - path = temp.substring(index); - } else { - authority = temp.substring(2); - if (authority.length() == 0 && query == null - && fragment == null) { - throw new URISyntaxException(uri, "Authority expected", uri.length()); - } - - path = ""; - // nothing left, so path is empty (not null, path should - // never be null) - } - - if (authority.length() == 0) { - authority = null; - } else { - validateAuthority(uri, authority, index1 + 3); - } - } else { // no authority specified - path = temp; + if (authorityStart < fileStart) { + authority = AUTHORITY_ENCODER.validate(uri, authorityStart, fileStart, "authority"); } - - int pathIndex = 0; - if (index2 > -1) { - pathIndex += index2; - } - if (index > -1) { - pathIndex += index; - } - validatePath(uri, path, pathIndex); - } else { // if not hierarchical, URI is opaque - opaque = true; - validateSsp(uri, schemeSpecificPart, index2 + 2 + index); - } - - parseAuthority(forceServer); - } - - private void validateScheme(String uri, String scheme, int index) - throws URISyntaxException { - // first char needs to be an alpha char - char ch = scheme.charAt(0); - if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))) { - throw new URISyntaxException(uri, "Illegal character in scheme", 0); + } else { + fileStart = start; } - try { - UriCodec.validateSimple(scheme, "+-."); - } catch (URISyntaxException e) { - throw new URISyntaxException(uri, "Illegal character in scheme", index + e.getIndex()); - } - } + // "path" + int queryStart = UrlUtils.findFirstOf(uri, "?", fileStart, fragmentStart); + path = PATH_ENCODER.validate(uri, fileStart, queryStart, "path"); - private void validateSsp(String uri, String ssp, int index) - throws URISyntaxException { - try { - ALL_LEGAL_ENCODER.validate(ssp); - } catch (URISyntaxException e) { - throw new URISyntaxException(uri, - e.getReason() + " in schemeSpecificPart", index + e.getIndex()); + // "?query" + if (queryStart < fragmentStart) { + query = ALL_LEGAL_ENCODER.validate(uri, queryStart + 1, fragmentStart, "query"); } - } - private void validateAuthority(String uri, String authority, int index) - throws URISyntaxException { - try { - AUTHORITY_ENCODER.validate(authority); - } catch (URISyntaxException e) { - throw new URISyntaxException(uri, e.getReason() + " in authority", index + e.getIndex()); - } + parseAuthority(forceServer); } - private void validatePath(String uri, String path, int index) - throws URISyntaxException { - try { - PATH_ENCODER.validate(path); - } catch (URISyntaxException e) { - throw new URISyntaxException(uri, e.getReason() + " in path", index + e.getIndex()); + private String validateScheme(String uri, int end) throws URISyntaxException { + if (end == 0) { + throw new URISyntaxException(uri, "Scheme expected", 0); } - } - - private void validateQuery(String uri, String query, int index) - throws URISyntaxException { - try { - ALL_LEGAL_ENCODER.validate(query); - } catch (URISyntaxException e) { - throw new URISyntaxException(uri, e.getReason() + " in query", index + e.getIndex()); + for (int i = 0; i < end; i++) { + if (!UrlUtils.isValidSchemeChar(i, uri.charAt(i))) { + throw new URISyntaxException(uri, "Illegal character in scheme", 0); + } } - } - private void validateFragment(String uri, String fragment, int index) - throws URISyntaxException { - try { - ALL_LEGAL_ENCODER.validate(fragment); - } catch (URISyntaxException e) { - throw new URISyntaxException(uri, e.getReason() + " in fragment", index + e.getIndex()); - } + return uri.substring(0, end); } /** - * Parse the authority string into its component parts: user info, - * host, and port. This operation doesn't apply to registry URIs, and - * calling it on such <i>may</i> result in a syntax exception. + * Breaks this URI's authority into user info, host and port parts. + * [user-info@][host][:port] + * If any part of this fails this method will give up and potentially leave + * these fields with their default values. * * @param forceServer true to always throw if the authority cannot be * parsed. If false, this method may still throw for some kinds of @@ -607,7 +536,7 @@ public final class URI implements Comparable<URI>, Serializable { if (ia instanceof Inet4Address) { return true; } - } catch (IllegalArgumentException ex) { + } catch (IllegalArgumentException ignored) { } if (forceServer) { diff --git a/luni/src/main/java/java/net/URLStreamHandler.java b/luni/src/main/java/java/net/URLStreamHandler.java index d5c922e..05895c2 100644 --- a/luni/src/main/java/java/net/URLStreamHandler.java +++ b/luni/src/main/java/java/net/URLStreamHandler.java @@ -101,9 +101,9 @@ public abstract class URLStreamHandler { if (spec.regionMatches(start, "//", 0, 2)) { // Parse the authority from the spec. int authorityStart = start + 2; - fileStart = findFirstOf(spec, "/?#", authorityStart, end); + fileStart = UrlUtils.findFirstOf(spec, "/?#", authorityStart, end); authority = spec.substring(authorityStart, fileStart); - int userInfoEnd = findFirstOf(spec, "@", authorityStart, fileStart); + int userInfoEnd = UrlUtils.findFirstOf(spec, "@", authorityStart, fileStart); int hostStart; if (userInfoEnd != fileStart) { userInfo = spec.substring(authorityStart, userInfoEnd); @@ -118,9 +118,9 @@ public abstract class URLStreamHandler { * colons like "[::1]", in which case we look for the port delimiter * colon after the ']' character. */ - int ipv6End = findFirstOf(spec, "]", hostStart, fileStart); + int ipv6End = UrlUtils.findFirstOf(spec, "]", hostStart, fileStart); int colonSearchFrom = (ipv6End != fileStart) ? ipv6End : hostStart; - int hostEnd = findFirstOf(spec, ":", colonSearchFrom, fileStart); + int hostEnd = UrlUtils.findFirstOf(spec, ":", colonSearchFrom, fileStart); host = spec.substring(hostStart, hostEnd); int portStart = hostEnd + 1; if (portStart < fileStart) { @@ -162,12 +162,12 @@ public abstract class URLStreamHandler { ref = spec.substring(pos + 1, nextPos); break; case '?': - nextPos = findFirstOf(spec, "#", pos, end); + nextPos = UrlUtils.findFirstOf(spec, "#", pos, end); query = spec.substring(pos + 1, nextPos); ref = null; break; default: - nextPos = findFirstOf(spec, "?#", pos, end); + nextPos = UrlUtils.findFirstOf(spec, "?#", pos, end); path = relativePath(path, spec.substring(pos, nextPos)); query = null; ref = null; @@ -186,21 +186,6 @@ public abstract class URLStreamHandler { } /** - * Returns the index of the first char of {@code chars} in {@code string} - * bounded between {@code start} and {@code end}. This returns {@code end} - * if none of the characters exist in the requested range. - */ - private static int findFirstOf(String string, String chars, int start, int end) { - for (int i = start; i < end; i++) { - char c = string.charAt(i); - if (chars.indexOf(c) != -1) { - return i; - } - } - return end; - } - - /** * Returns a new path by resolving {@code path} relative to {@code base}. */ private static String relativePath(String base, String path) { diff --git a/luni/src/main/java/libcore/net/UriCodec.java b/luni/src/main/java/libcore/net/UriCodec.java index 43e6ea1..e11a014 100644 --- a/luni/src/main/java/libcore/net/UriCodec.java +++ b/luni/src/main/java/libcore/net/UriCodec.java @@ -39,36 +39,39 @@ public abstract class UriCodec { /** * Throws if {@code s} is invalid according to this encoder. */ - public final void validate(String s) throws URISyntaxException { - for (int i = 0; i < s.length();) { - char ch = s.charAt(i); + public final String validate(String uri, int start, int end, String name) + throws URISyntaxException { + for (int i = start; i < end; ) { + char ch = uri.charAt(i); if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || isRetained(ch)) { i++; } else if (ch == '%') { - if (i + 2 >= s.length()) { - throw new URISyntaxException(s, "Incomplete % sequence", i); + if (i + 2 >= end) { + throw new URISyntaxException(uri, "Incomplete % sequence in " + name, i); } - int d1 = hexToInt(s.charAt(i + 1)); - int d2 = hexToInt(s.charAt(i + 2)); + int d1 = hexToInt(uri.charAt(i + 1)); + int d2 = hexToInt(uri.charAt(i + 2)); if (d1 == -1 || d2 == -1) { - throw new URISyntaxException(s, "Invalid % sequence: " + - s.substring(i, i + 3), i); + throw new URISyntaxException(uri, "Invalid % sequence: " + + uri.substring(i, i + 3) + " in " + name, i); } i += 3; } else { - throw new URISyntaxException(s, "Illegal character", i); + throw new URISyntaxException(uri, "Illegal character in " + name, i); } } + return uri.substring(start, end); } /** * Throws if {@code s} contains characters that are not letters, digits or * in {@code legal}. */ - public static void validateSimple(String s, String legal) throws URISyntaxException { + public static void validateSimple(String s, String legal) + throws URISyntaxException { for (int i = 0; i < s.length(); i++) { char ch = s.charAt(i); if (!((ch >= 'a' && ch <= 'z') diff --git a/luni/src/main/java/libcore/net/url/UrlUtils.java b/luni/src/main/java/libcore/net/url/UrlUtils.java index 81704fe..5d5f27d 100644 --- a/luni/src/main/java/libcore/net/url/UrlUtils.java +++ b/luni/src/main/java/libcore/net/url/UrlUtils.java @@ -115,7 +115,7 @@ public final class UrlUtils { return spec.substring(0, colon).toLowerCase(Locale.US); } - private static boolean isValidSchemeChar(int index, char c) { + public static boolean isValidSchemeChar(int index, char c) { if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { return true; } @@ -124,4 +124,19 @@ public final class UrlUtils { } return false; } + + /** + * Returns the index of the first char of {@code chars} in {@code string} + * bounded between {@code start} and {@code end}. This returns {@code end} + * if none of the characters exist in the requested range. + */ + public static int findFirstOf(String string, String chars, int start, int end) { + for (int i = start; i < end; i++) { + char c = string.charAt(i); + if (chars.indexOf(c) != -1) { + return i; + } + } + return end; + } } diff --git a/luni/src/test/java/libcore/java/net/URITest.java b/luni/src/test/java/libcore/java/net/URITest.java index 57ec713..3a419c0 100644 --- a/luni/src/test/java/libcore/java/net/URITest.java +++ b/luni/src/test/java/libcore/java/net/URITest.java @@ -531,5 +531,9 @@ public final class URITest extends TestCase { assertEquals(-1, uri.getPort()); } + // TODO: test http://#fragment + // TODO: test http://?query + // TODO: test http:///path + // Adding a new test? Consider adding an equivalent test to URLTest.java } |