Adding support for hash '#' comments to our JSON parser.

Neither the JSON RFC nor the documentation of Crockford's implementation mention these comments, but somehow the old parser used to support these. And so we shall also. See bug 2571423. Change-Id: I77d64c5ec53278d8df5fe1873404f1241320504b
author: Jesse Wilson <jessewilson@google.com> 2010-04-06 14:16:28 -0700
committer: Jesse Wilson <jessewilson@google.com> 2010-04-06 15:20:01 -0700
commit: 19554ec99d57e820a4c9da3bcde93d77b537d515 (patch)
tree: b64a8c41b84d39a4fbb30190bba46d96b9472803 /json
parent: f6b1c58bf436ad2091de956d4a027d615aebfade (diff)
download: libcore-19554ec99d57e820a4c9da3bcde93d77b537d515.zip
libcore-19554ec99d57e820a4c9da3bcde93d77b537d515.tar.gz
libcore-19554ec99d57e820a4c9da3bcde93d77b537d515.tar.bz2
3 files changed, 70 insertions, 30 deletions
diff --git a/json/src/main/java/org/json/JSONTokener.java b/json/src/main/java/org/json/JSONTokener.java
index f276173..3a82ab9 100644
--- a/json/src/main/java/org/json/JSONTokener.java
+++ b/json/src/main/java/org/json/JSONTokener.java
@@ -32,8 +32,25 @@ package org.json;
  * String query = object.getString("query");
  * JSONArray locations = object.getJSONArray("locations");</pre>
  *
- * <p>This parser is lenient. A successful parse does not necessarily indicate
- * that the input string is valid JSON.
+ * <p>For best interoperability and performance use JSON that complies with
+ * RFC 4627, such as that generated by {@link JSONStringer}. For legacy reasons
+ * this parser is lenient, so a successful parse does not indicate that the
+ * input string was valid JSON. All of the following syntax errors will be
+ * ignored:
+ * <ul>
+ *   <li>End of line comments starting with {@code //} or {@code #} and ending
+ *       with a newline character.
+ *   <li>C-style comments starting with {@code /*} and ending with
+ *       {@code *}{@code /}. Such comments may not be nested.
+ *   <li>Strings that are unquoted or {@code 'single quoted'}.
+ *   <li>Hexadecimal integers prefixed with {@code 0x} or {@code 0X}.
+ *   <li>Octal integers prefixed with {@code 0}.
+ *   <li>Array elements separated by {@code ;}.
+ *   <li>Unnecessary array separators. These are interpretted as if null was the
+ *       omitted value.
+ *   <li>Key-value pairs separated by {@code =} or {@code =>}.
+ *   <li>Key-value pairs separated by {@code ;}.
+ * </ul>
  *
  * <p>Each tokener may be used to parse a single JSON string. Instances of this
  * class are not thread safe. Although this class is nonfinal, it was not
@@ -107,11 +124,34 @@ public class JSONTokener {
                     }
 
                     char peek = in.charAt(pos);
-                    if (peek != '*' && peek != '/') {
-                        return c;
+                    switch (peek) {
+                        case '*':
+                            // skip a /* c-style comment */
+                            pos++;
+                            int commentEnd = in.indexOf("*/", pos);
+                            if (commentEnd == -1) {
+                                throw syntaxError("Unterminated comment");
+                            }
+                            pos = commentEnd + 2;
+                            continue;
+
+                        case '/':
+                            // skip a // end-of-line comment
+                            pos++;
+                            skipToEndOfLine();
+                            continue;
+
+                        default:
+                            return c;
                     }
 
-                    skipComment();
+                case '#':
+                    /*
+                     * Skip a # hash end-of-line comment. The JSON RFC doesn't
+                     * specify this behaviour, but it's required to parse
+                     * existing documents. See http://b/2571423.
+                     */
+                    skipToEndOfLine();
                     continue;
 
                 default:
@@ -123,32 +163,16 @@ public class JSONTokener {
     }
 
     /**
-     * Advances the position until it is beyond the current comment. The opening
-     * slash '/' should have already been read, and character at the current
-     * position be an asterisk '*' for a C-style comment or a slash '/' for an
-     * end-of-line comment.
-     *
-     * @throws JSONException if a C-style comment was not terminated.
+     * Advances the position until after the next newline character. If the line
+     * is terminated by "\r\n", the '\n' must be consumed as whitespace by the
+     * caller.
      */
-    private void skipComment() throws JSONException {
-        if (in.charAt(pos++) == '*') {
-            int commentEnd = in.indexOf("*/", pos);
-            if (commentEnd == -1) {
-                throw syntaxError("Unterminated comment");
-            }
-            pos = commentEnd + 2;
-
-        } else {
-            /*
-             * Skip to the next newline character. If the line is terminated by
-             * "\r\n", the '\n' will be consumed as whitespace by the caller.
-             */
-            for (; pos < in.length(); pos++) {
-                char c = in.charAt(pos);
-                if (c == '\r' || c == '\n') {
-                    pos++;
-                    break;
-                }
+    private void skipToEndOfLine() {
+        for (; pos < in.length(); pos++) {
+            char c = in.charAt(pos);
+            if (c == '\r' || c == '\n') {
+                pos++;
+                break;
             }
         }
     }
diff --git a/json/src/test/java/org/json/JSONTokenerTest.java b/json/src/test/java/org/json/JSONTokenerTest.java
index 70b7384..0d4f9d3 100644
--- a/json/src/test/java/org/json/JSONTokenerTest.java
+++ b/json/src/test/java/org/json/JSONTokenerTest.java
@@ -250,6 +250,17 @@ public class JSONTokenerTest extends TestCase {
         assertEquals('E', tokener.nextClean());
     }
 
+    /**
+     * Some applications rely on parsing '#' to lead an end-of-line comment.
+     * http://b/2571423
+     */
+    public void testNextCleanHashComments() throws JSONException {
+        JSONTokener tokener = new JSONTokener("A # B */ /* C */ \nD #");
+        assertEquals('A', tokener.nextClean());
+        assertEquals('D', tokener.nextClean());
+        assertEquals('\0', tokener.nextClean());
+    }
+
     public void testNextCleanCommentsTrailingSingleSlash() throws JSONException {
         JSONTokener tokener = new JSONTokener(" / S /");
         assertEquals('/', tokener.nextClean());
diff --git a/json/src/test/java/org/json/ParsingTest.java b/json/src/test/java/org/json/ParsingTest.java
index 16b9116..98d9069 100644
--- a/json/src/test/java/org/json/ParsingTest.java
+++ b/json/src/test/java/org/json/ParsingTest.java
@@ -131,6 +131,11 @@ public class ParsingTest extends TestCase {
 
     public void testParsingWithCommentsAndWhitespace() throws JSONException {
         assertParsed("baz", "  // foo bar \n baz");
+        assertParsed("baz", "  // foo bar \r baz");
+        assertParsed("baz", "  // foo bar \r\n baz");
+        assertParsed("baz", "  # foo bar \n baz");
+        assertParsed("baz", "  # foo bar \r baz");
+        assertParsed("baz", "  # foo bar \r\n baz");
         assertParsed(5, "  /* foo bar \n baz */ 5");
         assertParsed(5, "  /* foo bar \n baz */ 5 // quux");
         assertParsed(5, "  5   ");
author	Jesse Wilson <jessewilson@google.com>	2010-04-06 14:16:28 -0700
committer	Jesse Wilson <jessewilson@google.com>	2010-04-06 15:20:01 -0700
commit	19554ec99d57e820a4c9da3bcde93d77b537d515 (patch)
tree	b64a8c41b84d39a4fbb30190bba46d96b9472803 /json
parent	f6b1c58bf436ad2091de956d4a027d615aebfade (diff)
download	libcore-19554ec99d57e820a4c9da3bcde93d77b537d515.zip libcore-19554ec99d57e820a4c9da3bcde93d77b537d515.tar.gz libcore-19554ec99d57e820a4c9da3bcde93d77b537d515.tar.bz2