summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJesse Wilson <jessewilson@google.com>2011-05-26 15:51:47 -0700
committerJesse Wilson <jessewilson@google.com>2011-05-26 16:48:44 -0700
commitce257b03a1e5ff6075967e6a84cdb439cb2b01c8 (patch)
treea1b7030c9b949ce82c1615304a414249ae79ca4f
parent2d99ef561304174b8ae01a0a68d5b96d5edb9f10 (diff)
downloadlibcore-ce257b03a1e5ff6075967e6a84cdb439cb2b01c8.zip
libcore-ce257b03a1e5ff6075967e6a84cdb439cb2b01c8.tar.gz
libcore-ce257b03a1e5ff6075967e6a84cdb439cb2b01c8.tar.bz2
Clean Up URI's parsing code.
http://b/2753295 Change-Id: I190ee12c14d1b3b17e2c8cb5ef3868618f1ee124
-rw-r--r--luni/src/main/java/java/net/URI.java207
-rw-r--r--luni/src/main/java/java/net/URLStreamHandler.java27
-rw-r--r--luni/src/main/java/libcore/net/UriCodec.java25
-rw-r--r--luni/src/main/java/libcore/net/url/UrlUtils.java17
-rw-r--r--luni/src/test/java/libcore/java/net/URITest.java4
5 files changed, 108 insertions, 172 deletions
diff --git a/luni/src/main/java/java/net/URI.java b/luni/src/main/java/java/net/URI.java
index f260b2e..352e25f 100644
--- a/luni/src/main/java/java/net/URI.java
+++ b/luni/src/main/java/java/net/URI.java
@@ -313,171 +313,100 @@ public final class URI implements Comparable<URI>, Serializable {
parseURI(uri.toString(), false);
}
+ /**
+ * Breaks uri into its component parts. This first splits URI into scheme,
+ * scheme-specific part and fragment:
+ * [scheme:][scheme-specific part][#fragment]
+ *
+ * Then it breaks the scheme-specific part into authority, path and query:
+ * [//authority][path][?query]
+ *
+ * Finally it delegates to parseAuthority to break the authority into user
+ * info, host and port:
+ * [user-info@][host][:port]
+ */
private void parseURI(String uri, boolean forceServer) throws URISyntaxException {
- String temp = uri;
- // assign uri string to the input value per spec
string = uri;
- int index, index1, index2, index3;
- // parse into Fragment, Scheme, and SchemeSpecificPart
- // then parse SchemeSpecificPart if necessary
- // Fragment
- index = temp.indexOf('#');
- if (index != -1) {
- // remove the fragment from the end
- fragment = temp.substring(index + 1);
- validateFragment(uri, fragment, index + 1);
- temp = temp.substring(0, index);
- }
-
- // Scheme and SchemeSpecificPart
- index = index1 = temp.indexOf(':');
- index2 = temp.indexOf('/');
- index3 = temp.indexOf('?');
-
- // if a '/' or '?' occurs before the first ':' the uri has no
- // specified scheme, and is therefore not absolute
- if (index != -1 && (index2 >= index || index2 == -1)
- && (index3 >= index || index3 == -1)) {
- // the characters up to the first ':' comprise the scheme
+ // "#fragment"
+ int fragmentStart = UrlUtils.findFirstOf(uri, "#", 0, uri.length());
+ if (fragmentStart < uri.length()) {
+ fragment = ALL_LEGAL_ENCODER.validate(uri, fragmentStart + 1, uri.length(), "fragment");
+ }
+
+ // scheme:
+ int start;
+ int colon = UrlUtils.findFirstOf(uri, ":", 0, fragmentStart);
+ if (colon < UrlUtils.findFirstOf(uri, "/?#", 0, fragmentStart)) {
absolute = true;
- scheme = temp.substring(0, index);
- if (scheme.length() == 0) {
- throw new URISyntaxException(uri, "Scheme expected", index);
+ scheme = validateScheme(uri, colon);
+ start = colon + 1;
+
+ if (start == fragmentStart) {
+ throw new URISyntaxException(uri, "Scheme-specific part expected", start);
}
- validateScheme(uri, scheme, 0);
- schemeSpecificPart = temp.substring(index + 1);
- if (schemeSpecificPart.length() == 0) {
- throw new URISyntaxException(uri, "Scheme-specific part expected", index + 1);
+
+ // URIs with schemes followed by a non-/ char are opaque and need no further parsing.
+ if (!uri.regionMatches(start, "/", 0, 1)) {
+ opaque = true;
+ schemeSpecificPart = ALL_LEGAL_ENCODER.validate(
+ uri, start, fragmentStart, "scheme specific part");
+ return;
}
} else {
absolute = false;
- schemeSpecificPart = temp;
+ start = 0;
}
- if (scheme == null || schemeSpecificPart.length() > 0
- && schemeSpecificPart.charAt(0) == '/') {
- opaque = false;
- // the URI is hierarchical
+ opaque = false;
+ schemeSpecificPart = uri.substring(start, fragmentStart);
- // Query
- temp = schemeSpecificPart;
- index = temp.indexOf('?');
- if (index != -1) {
- query = temp.substring(index + 1);
- temp = temp.substring(0, index);
- validateQuery(uri, query, index2 + 1 + index);
+ // "//authority"
+ int fileStart;
+ if (uri.regionMatches(start, "//", 0, 2)) {
+ int authorityStart = start + 2;
+ fileStart = UrlUtils.findFirstOf(uri, "/?", authorityStart, fragmentStart);
+ if (authorityStart == uri.length()) {
+ throw new URISyntaxException(uri, "Authority expected", uri.length());
}
-
- // Authority and Path
- if (temp.startsWith("//")) {
- index = temp.indexOf('/', 2);
- if (index != -1) {
- authority = temp.substring(2, index);
- path = temp.substring(index);
- } else {
- authority = temp.substring(2);
- if (authority.length() == 0 && query == null
- && fragment == null) {
- throw new URISyntaxException(uri, "Authority expected", uri.length());
- }
-
- path = "";
- // nothing left, so path is empty (not null, path should
- // never be null)
- }
-
- if (authority.length() == 0) {
- authority = null;
- } else {
- validateAuthority(uri, authority, index1 + 3);
- }
- } else { // no authority specified
- path = temp;
+ if (authorityStart < fileStart) {
+ authority = AUTHORITY_ENCODER.validate(uri, authorityStart, fileStart, "authority");
}
-
- int pathIndex = 0;
- if (index2 > -1) {
- pathIndex += index2;
- }
- if (index > -1) {
- pathIndex += index;
- }
- validatePath(uri, path, pathIndex);
- } else { // if not hierarchical, URI is opaque
- opaque = true;
- validateSsp(uri, schemeSpecificPart, index2 + 2 + index);
- }
-
- parseAuthority(forceServer);
- }
-
- private void validateScheme(String uri, String scheme, int index)
- throws URISyntaxException {
- // first char needs to be an alpha char
- char ch = scheme.charAt(0);
- if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))) {
- throw new URISyntaxException(uri, "Illegal character in scheme", 0);
+ } else {
+ fileStart = start;
}
- try {
- UriCodec.validateSimple(scheme, "+-.");
- } catch (URISyntaxException e) {
- throw new URISyntaxException(uri, "Illegal character in scheme", index + e.getIndex());
- }
- }
+ // "path"
+ int queryStart = UrlUtils.findFirstOf(uri, "?", fileStart, fragmentStart);
+ path = PATH_ENCODER.validate(uri, fileStart, queryStart, "path");
- private void validateSsp(String uri, String ssp, int index)
- throws URISyntaxException {
- try {
- ALL_LEGAL_ENCODER.validate(ssp);
- } catch (URISyntaxException e) {
- throw new URISyntaxException(uri,
- e.getReason() + " in schemeSpecificPart", index + e.getIndex());
+ // "?query"
+ if (queryStart < fragmentStart) {
+ query = ALL_LEGAL_ENCODER.validate(uri, queryStart + 1, fragmentStart, "query");
}
- }
- private void validateAuthority(String uri, String authority, int index)
- throws URISyntaxException {
- try {
- AUTHORITY_ENCODER.validate(authority);
- } catch (URISyntaxException e) {
- throw new URISyntaxException(uri, e.getReason() + " in authority", index + e.getIndex());
- }
+ parseAuthority(forceServer);
}
- private void validatePath(String uri, String path, int index)
- throws URISyntaxException {
- try {
- PATH_ENCODER.validate(path);
- } catch (URISyntaxException e) {
- throw new URISyntaxException(uri, e.getReason() + " in path", index + e.getIndex());
+ private String validateScheme(String uri, int end) throws URISyntaxException {
+ if (end == 0) {
+ throw new URISyntaxException(uri, "Scheme expected", 0);
}
- }
-
- private void validateQuery(String uri, String query, int index)
- throws URISyntaxException {
- try {
- ALL_LEGAL_ENCODER.validate(query);
- } catch (URISyntaxException e) {
- throw new URISyntaxException(uri, e.getReason() + " in query", index + e.getIndex());
+ for (int i = 0; i < end; i++) {
+ if (!UrlUtils.isValidSchemeChar(i, uri.charAt(i))) {
+ throw new URISyntaxException(uri, "Illegal character in scheme", 0);
+ }
}
- }
- private void validateFragment(String uri, String fragment, int index)
- throws URISyntaxException {
- try {
- ALL_LEGAL_ENCODER.validate(fragment);
- } catch (URISyntaxException e) {
- throw new URISyntaxException(uri, e.getReason() + " in fragment", index + e.getIndex());
- }
+ return uri.substring(0, end);
}
/**
- * Parse the authority string into its component parts: user info,
- * host, and port. This operation doesn't apply to registry URIs, and
- * calling it on such <i>may</i> result in a syntax exception.
+ * Breaks this URI's authority into user info, host and port parts.
+ * [user-info@][host][:port]
+ * If any part of this fails this method will give up and potentially leave
+ * these fields with their default values.
*
* @param forceServer true to always throw if the authority cannot be
* parsed. If false, this method may still throw for some kinds of
@@ -607,7 +536,7 @@ public final class URI implements Comparable<URI>, Serializable {
if (ia instanceof Inet4Address) {
return true;
}
- } catch (IllegalArgumentException ex) {
+ } catch (IllegalArgumentException ignored) {
}
if (forceServer) {
diff --git a/luni/src/main/java/java/net/URLStreamHandler.java b/luni/src/main/java/java/net/URLStreamHandler.java
index d5c922e..05895c2 100644
--- a/luni/src/main/java/java/net/URLStreamHandler.java
+++ b/luni/src/main/java/java/net/URLStreamHandler.java
@@ -101,9 +101,9 @@ public abstract class URLStreamHandler {
if (spec.regionMatches(start, "//", 0, 2)) {
// Parse the authority from the spec.
int authorityStart = start + 2;
- fileStart = findFirstOf(spec, "/?#", authorityStart, end);
+ fileStart = UrlUtils.findFirstOf(spec, "/?#", authorityStart, end);
authority = spec.substring(authorityStart, fileStart);
- int userInfoEnd = findFirstOf(spec, "@", authorityStart, fileStart);
+ int userInfoEnd = UrlUtils.findFirstOf(spec, "@", authorityStart, fileStart);
int hostStart;
if (userInfoEnd != fileStart) {
userInfo = spec.substring(authorityStart, userInfoEnd);
@@ -118,9 +118,9 @@ public abstract class URLStreamHandler {
* colons like "[::1]", in which case we look for the port delimiter
* colon after the ']' character.
*/
- int ipv6End = findFirstOf(spec, "]", hostStart, fileStart);
+ int ipv6End = UrlUtils.findFirstOf(spec, "]", hostStart, fileStart);
int colonSearchFrom = (ipv6End != fileStart) ? ipv6End : hostStart;
- int hostEnd = findFirstOf(spec, ":", colonSearchFrom, fileStart);
+ int hostEnd = UrlUtils.findFirstOf(spec, ":", colonSearchFrom, fileStart);
host = spec.substring(hostStart, hostEnd);
int portStart = hostEnd + 1;
if (portStart < fileStart) {
@@ -162,12 +162,12 @@ public abstract class URLStreamHandler {
ref = spec.substring(pos + 1, nextPos);
break;
case '?':
- nextPos = findFirstOf(spec, "#", pos, end);
+ nextPos = UrlUtils.findFirstOf(spec, "#", pos, end);
query = spec.substring(pos + 1, nextPos);
ref = null;
break;
default:
- nextPos = findFirstOf(spec, "?#", pos, end);
+ nextPos = UrlUtils.findFirstOf(spec, "?#", pos, end);
path = relativePath(path, spec.substring(pos, nextPos));
query = null;
ref = null;
@@ -186,21 +186,6 @@ public abstract class URLStreamHandler {
}
/**
- * Returns the index of the first char of {@code chars} in {@code string}
- * bounded between {@code start} and {@code end}. This returns {@code end}
- * if none of the characters exist in the requested range.
- */
- private static int findFirstOf(String string, String chars, int start, int end) {
- for (int i = start; i < end; i++) {
- char c = string.charAt(i);
- if (chars.indexOf(c) != -1) {
- return i;
- }
- }
- return end;
- }
-
- /**
* Returns a new path by resolving {@code path} relative to {@code base}.
*/
private static String relativePath(String base, String path) {
diff --git a/luni/src/main/java/libcore/net/UriCodec.java b/luni/src/main/java/libcore/net/UriCodec.java
index 43e6ea1..e11a014 100644
--- a/luni/src/main/java/libcore/net/UriCodec.java
+++ b/luni/src/main/java/libcore/net/UriCodec.java
@@ -39,36 +39,39 @@ public abstract class UriCodec {
/**
* Throws if {@code s} is invalid according to this encoder.
*/
- public final void validate(String s) throws URISyntaxException {
- for (int i = 0; i < s.length();) {
- char ch = s.charAt(i);
+ public final String validate(String uri, int start, int end, String name)
+ throws URISyntaxException {
+ for (int i = start; i < end; ) {
+ char ch = uri.charAt(i);
if ((ch >= 'a' && ch <= 'z')
|| (ch >= 'A' && ch <= 'Z')
|| (ch >= '0' && ch <= '9')
|| isRetained(ch)) {
i++;
} else if (ch == '%') {
- if (i + 2 >= s.length()) {
- throw new URISyntaxException(s, "Incomplete % sequence", i);
+ if (i + 2 >= end) {
+ throw new URISyntaxException(uri, "Incomplete % sequence in " + name, i);
}
- int d1 = hexToInt(s.charAt(i + 1));
- int d2 = hexToInt(s.charAt(i + 2));
+ int d1 = hexToInt(uri.charAt(i + 1));
+ int d2 = hexToInt(uri.charAt(i + 2));
if (d1 == -1 || d2 == -1) {
- throw new URISyntaxException(s, "Invalid % sequence: " +
- s.substring(i, i + 3), i);
+ throw new URISyntaxException(uri, "Invalid % sequence: "
+ + uri.substring(i, i + 3) + " in " + name, i);
}
i += 3;
} else {
- throw new URISyntaxException(s, "Illegal character", i);
+ throw new URISyntaxException(uri, "Illegal character in " + name, i);
}
}
+ return uri.substring(start, end);
}
/**
* Throws if {@code s} contains characters that are not letters, digits or
* in {@code legal}.
*/
- public static void validateSimple(String s, String legal) throws URISyntaxException {
+ public static void validateSimple(String s, String legal)
+ throws URISyntaxException {
for (int i = 0; i < s.length(); i++) {
char ch = s.charAt(i);
if (!((ch >= 'a' && ch <= 'z')
diff --git a/luni/src/main/java/libcore/net/url/UrlUtils.java b/luni/src/main/java/libcore/net/url/UrlUtils.java
index 81704fe..5d5f27d 100644
--- a/luni/src/main/java/libcore/net/url/UrlUtils.java
+++ b/luni/src/main/java/libcore/net/url/UrlUtils.java
@@ -115,7 +115,7 @@ public final class UrlUtils {
return spec.substring(0, colon).toLowerCase(Locale.US);
}
- private static boolean isValidSchemeChar(int index, char c) {
+ public static boolean isValidSchemeChar(int index, char c) {
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
return true;
}
@@ -124,4 +124,19 @@ public final class UrlUtils {
}
return false;
}
+
+ /**
+ * Returns the index of the first char of {@code chars} in {@code string}
+ * bounded between {@code start} and {@code end}. This returns {@code end}
+ * if none of the characters exist in the requested range.
+ */
+ public static int findFirstOf(String string, String chars, int start, int end) {
+ for (int i = start; i < end; i++) {
+ char c = string.charAt(i);
+ if (chars.indexOf(c) != -1) {
+ return i;
+ }
+ }
+ return end;
+ }
}
diff --git a/luni/src/test/java/libcore/java/net/URITest.java b/luni/src/test/java/libcore/java/net/URITest.java
index 57ec713..3a419c0 100644
--- a/luni/src/test/java/libcore/java/net/URITest.java
+++ b/luni/src/test/java/libcore/java/net/URITest.java
@@ -531,5 +531,9 @@ public final class URITest extends TestCase {
assertEquals(-1, uri.getPort());
}
+ // TODO: test http://#fragment
+ // TODO: test http://?query
+ // TODO: test http:///path
+
// Adding a new test? Consider adding an equivalent test to URLTest.java
}