Change KxmlParser.next() to use one while loop instead of two.

This cleans up some of the structural problems with the KxmlParser code. It was unclear how different text tokens (text, CDATA and entity tokens) were concatenated in next() but not nextToken(). This moves the token loop from the next() method into the nextImpl() method, saving redundant calls to peekNext(). The code also had an instance field 'token' that was only well-defined during calls to next() and nextToken(). I've removed this field and passed parameters instead. This fixes some implementation bugs: - empty CDATA blocks aren't reported - empty entities aren't reported - double dash in comments are forbidden in strict mode  Change-Id: I8c17b61d63e84622556f3751dbf8af282c601d09 http://b/3090550
author: Jesse Wilson <jessewilson@google.com> 2010-11-20 00:44:27 -0800
committer: Jesse Wilson <jessewilson@google.com> 2010-11-20 01:03:25 -0800
commit: bbf35ecae9bb5b69fb0d016a57a666d0a9e0f2fc (patch)
tree: 5125d7df2546c651831325ee178d66c557fce891 /xml/src/main/java
parent: b73fc0c5bd9a5ac9295fd76d097bd56762a6ee17 (diff)
download: libcore-bbf35ecae9bb5b69fb0d016a57a666d0a9e0f2fc.zip
libcore-bbf35ecae9bb5b69fb0d016a57a666d0a9e0f2fc.tar.gz
libcore-bbf35ecae9bb5b69fb0d016a57a666d0a9e0f2fc.tar.bz2
1 files changed, 137 insertions, 121 deletions
diff --git a/xml/src/main/java/org/kxml2/io/KXmlParser.java b/xml/src/main/java/org/kxml2/io/KXmlParser.java
index d3834aa..4f41fe0 100644
--- a/xml/src/main/java/org/kxml2/io/KXmlParser.java
+++ b/xml/src/main/java/org/kxml2/io/KXmlParser.java
@@ -39,6 +39,7 @@ public class KXmlParser implements XmlPullParser {
 
     private static final char[] START_COMMENT = { '<', '!', '-', '-' };
     private static final char[] END_COMMENT = { '-', '-', '>' };
+    private static final char[] COMMENT_DOUBLE_DASH = { '-', '-' };
     private static final char[] START_CDATA = { '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[' };
     private static final char[] END_CDATA = { ']', ']', '>' };
     private static final char[] START_PROCESSING_INSTRUCTION = { '<', '?' };
@@ -105,7 +106,6 @@ public class KXmlParser implements XmlPullParser {
     private String error;
 
     private boolean unresolved;
-    private boolean token;
 
     public final StringPool stringPool = new StringPool();
 
@@ -248,11 +248,15 @@ public class KXmlParser implements XmlPullParser {
         }
     }
 
-    /**
-     * Common base for next() and nextToken(). Clears the state, except from txtPos and whitespace.
-     * Does not set the type variable.
-     */
-    private void nextImpl() throws IOException, XmlPullParserException {
+    public int next() throws XmlPullParserException, IOException {
+        return next(false);
+    }
+
+    public int nextToken() throws XmlPullParserException, IOException {
+        return next(true);
+    }
+
+    private int next(boolean justOneToken) throws IOException, XmlPullParserException {
         if (reader == null) {
             throw new XmlPullParserException("setInput() must be called first.", this, null);
         }
@@ -261,95 +265,124 @@ public class KXmlParser implements XmlPullParser {
             depth--;
         }
 
-        while (true) {
-            attributeCount = -1;
+        // degenerated needs to be handled before error because of possible
+        // processor expectations(!)
 
-            // degenerated needs to be handled before error because of possible
-            // processor expectations(!)
-
-            if (degenerated) {
-                degenerated = false;
-                type = END_TAG;
-                return;
-            }
+        if (degenerated) {
+            degenerated = false;
+            type = END_TAG;
+            return type;
+        }
 
-            if (error != null) {
+        if (error != null) {
+            if (justOneToken) {
                 text = error;
-                error = null;
                 type = COMMENT;
-                return;
+                error = null;
+                return type;
+            } else {
+                error = null;
             }
+        }
 
-            prefix = null;
-            name = null;
-            namespace = null;
+        type = peekType();
 
+        if (type == XML_DECLARATION) {
+            readXmlDeclaration();
             type = peekType();
+        }
+
+        text = null;
+        isWhitespace = true;
+        prefix = null;
+        name = null;
+        namespace = null;
+        attributeCount = -1;
 
+        while (true) {
             switch (type) {
 
+            /*
+             * Return immediately after encountering a start tag, end tag, or
+             * the end of the document.
+             */
+            case START_TAG:
+                parseStartTag(false);
+                return type;
+            case END_TAG:
+                readEndTag();
+                return type;
+            case END_DOCUMENT:
+                return type;
+
+            /*
+             * Return after any text token when we're looking for a single
+             * token. Otherwise concatenate all text between tags.
+             */
             case ENTITY_REF:
-                if (token) {
+                if (justOneToken) {
                     StringBuilder entityTextBuilder = new StringBuilder();
-                    readEntity(entityTextBuilder);
+                    readEntity(entityTextBuilder, true);
                     text = entityTextBuilder.toString();
-                    return;
+                    break;
                 }
                 // fall-through
             case TEXT:
-                text = readValue('<', !token, false);
+                text = readValue('<', !justOneToken, false);
                 if (depth == 0 && isWhitespace) {
                     type = IGNORABLE_WHITESPACE;
                 }
-                return;
-
-            case START_TAG:
-                text = null; // TODO: fix next()/nextToken() so this is handled there
-                parseStartTag(false);
-                return;
-
-            case END_TAG:
-                readEndTag();
-                return;
-
-            case END_DOCUMENT:
-                return;
-
-            case XML_DECLARATION:
-                readXmlDeclaration();
-                continue;
+                break;
+            case CDSECT:
+                read(START_CDATA);
+                text = readUntil(END_CDATA, true);
+                break;
 
+            /*
+             * Comments, processing instructions and declarations are returned
+             * when we're looking for a single token. Otherwise they're skipped.
+             */
+            case COMMENT:
+                String commentText = readComment(justOneToken);
+                if (justOneToken) {
+                    text = commentText;
+                }
+                break;
             case PROCESSING_INSTRUCTION:
                 read(START_PROCESSING_INSTRUCTION);
-                if (token) {
-                    text = readUntil(END_PROCESSING_INSTRUCTION, true);
-                } else {
-                    readUntil(END_PROCESSING_INSTRUCTION, false);
+                String processingInstruction = readUntil(END_PROCESSING_INSTRUCTION, justOneToken);
+                if (justOneToken) {
+                    text = processingInstruction;
                 }
-                return;
-
+                break;
             case DOCDECL:
-                readDoctype(token);
-                return;
-
-            case CDSECT:
-                String oldText = text;
-                read(START_CDATA);
-                text = readUntil(END_CDATA, true);
-                if (oldText != null) {
-                    text = oldText + text; // TODO: fix next()/nextToken() so this is handled there
+                String doctype = readDoctype(justOneToken);
+                if (justOneToken) {
+                    text = doctype;
                 }
-                return;
+                break;
+            }
 
-            case COMMENT:
-                read(START_COMMENT);
-                if (token) {
-                    text = readUntil(END_COMMENT, true);
-                } else {
-                    readUntil(END_COMMENT, false);
-                }
-                return;
+            if (justOneToken) {
+                return type;
+            }
+
+            if (type == IGNORABLE_WHITESPACE) {
+                text = null;
             }
+
+            /*
+             * We've read all that we can of a non-empty text block. Always
+             * report this as text, even if it was a CDATA block or entity
+             * reference.
+             */
+            int peek = peekType();
+            if (text != null && !text.isEmpty() && peek < TEXT) {
+                type = TEXT;
+                return type;
+            }
+
+            type = peek;
         }
     }
 
@@ -362,10 +395,14 @@ public class KXmlParser implements XmlPullParser {
      */
     private String readUntil(char[] delimiter, boolean returnText)
             throws IOException, XmlPullParserException {
-        int previous = -1;
         int start = position;
         StringBuilder result = null;
 
+        if (returnText && text != null) {
+            result = new StringBuilder();
+            result.append(text);
+        }
+
         search:
         while (true) {
             if (position + delimiter.length >= limit) {
@@ -387,7 +424,6 @@ public class KXmlParser implements XmlPullParser {
             // when the VM has better method inlining
             for (int i = 0; i < delimiter.length; i++) {
                 if (buffer[position + i] != delimiter[i]) {
-                    previous = buffer[position];
                     position++;
                     continue search;
                 }
@@ -396,10 +432,6 @@ public class KXmlParser implements XmlPullParser {
             break;
         }
 
-        if (delimiter == END_COMMENT && previous == '-') {
-            checkRelaxed("illegal comment delimiter: --->");
-        }
-
         int end = position;
         position += delimiter.length;
 
@@ -416,7 +448,7 @@ public class KXmlParser implements XmlPullParser {
     /**
      * Returns true if an XML declaration was read.
      */
-    private boolean readXmlDeclaration() throws IOException, XmlPullParserException {
+    private void readXmlDeclaration() throws IOException, XmlPullParserException {
         if (bufferStartLine != 0 || bufferStartColumn != 0 || position != 0) {
             checkRelaxed("processing instructions must not start with xml");
         }
@@ -455,10 +487,24 @@ public class KXmlParser implements XmlPullParser {
 
         isWhitespace = true;
         text = null;
-        return true;
     }
 
-    private void readDoctype(boolean assignText) throws IOException, XmlPullParserException {
+    private String readComment(boolean returnText) throws IOException, XmlPullParserException {
+        read(START_COMMENT);
+
+        if (relaxed) {
+            return readUntil(END_COMMENT, returnText);
+        }
+
+        String commentText = readUntil(COMMENT_DOUBLE_DASH, returnText);
+        if (peekCharacter() != '>') {
+            throw new XmlPullParserException("Comments may not contain --", this, null);
+        }
+        position++;
+        return commentText;
+    }
+
+    private String readDoctype(boolean returnText) throws IOException, XmlPullParserException {
         read(START_DOCTYPE);
 
         int start = position;
@@ -468,7 +514,7 @@ public class KXmlParser implements XmlPullParser {
 
         while (true) {
             if (position >= limit) {
-                if (start < position && assignText) {
+                if (start < position && returnText) {
                     if (result == null) {
                         result = new StringBuilder();
                     }
@@ -476,7 +522,7 @@ public class KXmlParser implements XmlPullParser {
                 }
                 if (!fillBuffer(1)) {
                     checkRelaxed(UNEXPECTED_EOF);
-                    return;
+                    return null;
                 }
                 start = position;
             }
@@ -496,13 +542,13 @@ public class KXmlParser implements XmlPullParser {
             }
         }
 
-        if (assignText) {
-            if (result == null) {
-                text = stringPool.get(buffer, start, position - start - 1); // omit the '>'
-            } else {
-                result.append(buffer, start, position - start - 1); // omit the '>'
-                text = result.toString();
-            }
+        if (!returnText) {
+            return null;
+        } else if (result == null) {
+            return stringPool.get(buffer, start, position - start - 1); // omit the '>'
+        } else {
+            result.append(buffer, start, position - start - 1); // omit the '>'
+            return result.toString();
         }
     }
 
@@ -690,7 +736,8 @@ public class KXmlParser implements XmlPullParser {
      * resolved entity to {@code out}. If the entity cannot be read or resolved,
      * {@code out} will contain the partial entity reference.
      */
-    private void readEntity(StringBuilder out) throws IOException, XmlPullParserException {
+    private void readEntity(StringBuilder out, boolean isEntityToken)
+            throws IOException, XmlPullParserException {
         int start = out.length();
 
         if (buffer[position++] != '&') {
@@ -728,7 +775,7 @@ public class KXmlParser implements XmlPullParser {
 
         String code = out.substring(start + 1, out.length() - 1);
 
-        if (token && type == ENTITY_REF) {
+        if (isEntityToken) {
             name = code;
         }
 
@@ -753,7 +800,7 @@ public class KXmlParser implements XmlPullParser {
         } else {
             // keep the unresolved entity "&code;" in the text for relaxed clients
             unresolved = true;
-            if (!token) {
+            if (!isEntityToken) {
                 checkRelaxed("unresolved: &" + code + ";");
             }
         }
@@ -795,7 +842,7 @@ public class KXmlParser implements XmlPullParser {
         StringBuilder result = null;
 
         // if a text section was already started, prefix the start
-        if (text != null) {
+        if (!inAttributeValue && text != null) {
             result = new StringBuilder();
             result.append(text);
         }
@@ -858,7 +905,7 @@ public class KXmlParser implements XmlPullParser {
 
             } else if (c == '&') {
                 isWhitespace = false; // TODO: what if the entity resolves to whitespace?
-                readEntity(result);
+                readEntity(result, false);
                 start = position;
                 continue;
 
@@ -1438,37 +1485,6 @@ public class KXmlParser implements XmlPullParser {
         return type;
     }
 
-    public int next() throws XmlPullParserException, IOException {
-        text = null;
-        isWhitespace = true;
-        int minType = 9999;
-        token = false;
-
-        do {
-            nextImpl();
-            if (type < minType) {
-                minType = type;
-            }
-        } while (minType > ENTITY_REF // ignorable
-                || (minType >= TEXT && peekType() >= TEXT));
-
-        type = minType;
-        if (type > TEXT) {
-            type = TEXT;
-        }
-
-        return type;
-    }
-
-    public int nextToken() throws XmlPullParserException, IOException {
-        isWhitespace = true;
-        text = null;
-
-        token = true;
-        nextImpl();
-        return type;
-    }
-
     // utility methods to make XML parsing easier ...
 
     public int nextTag() throws XmlPullParserException, IOException {
author	Jesse Wilson <jessewilson@google.com>	2010-11-20 00:44:27 -0800
committer	Jesse Wilson <jessewilson@google.com>	2010-11-20 01:03:25 -0800
commit	bbf35ecae9bb5b69fb0d016a57a666d0a9e0f2fc (patch)
tree	5125d7df2546c651831325ee178d66c557fce891 /xml/src/main/java
parent	b73fc0c5bd9a5ac9295fd76d097bd56762a6ee17 (diff)
download	libcore-bbf35ecae9bb5b69fb0d016a57a666d0a9e0f2fc.zip libcore-bbf35ecae9bb5b69fb0d016a57a666d0a9e0f2fc.tar.gz libcore-bbf35ecae9bb5b69fb0d016a57a666d0a9e0f2fc.tar.bz2