summaryrefslogtreecommitdiffstats
path: root/xml/src
diff options
context:
space:
mode:
authorJesse Wilson <jessewilson@google.com>2010-11-13 08:34:48 -0800
committerAndroid (Google) Code Review <android-gerrit@google.com>2010-11-13 08:34:48 -0800
commit4ab2cec301baf9704b1235aa50b544e8d7f53124 (patch)
tree9c3126e2fd8597522974b196df5a82cd9c7d2110 /xml/src
parent87987208b0e225bc2022190e57b01e8d57d29193 (diff)
parentfda724de28fe86804e6ef6a0afd7ae5be1529083 (diff)
downloadlibcore-4ab2cec301baf9704b1235aa50b544e8d7f53124.zip
libcore-4ab2cec301baf9704b1235aa50b544e8d7f53124.tar.gz
libcore-4ab2cec301baf9704b1235aa50b544e8d7f53124.tar.bz2
Merge "Optimize KxmlParser." into dalvik-dev
Diffstat (limited to 'xml/src')
-rw-r--r--xml/src/main/java/org/kxml2/io/KXmlParser.java1185
-rw-r--r--xml/src/main/java/org/xmlpull/v1/XmlPullParser.java2
2 files changed, 664 insertions, 523 deletions
diff --git a/xml/src/main/java/org/kxml2/io/KXmlParser.java b/xml/src/main/java/org/kxml2/io/KXmlParser.java
index 3ee5e43..9ca555b 100644
--- a/xml/src/main/java/org/kxml2/io/KXmlParser.java
+++ b/xml/src/main/java/org/kxml2/io/KXmlParser.java
@@ -36,13 +36,21 @@ import org.xmlpull.v1.XmlPullParserException;
*/
public class KXmlParser implements XmlPullParser {
- private Object location;
+ private static final char[] START_COMMENT = { '<', '!', '-', '-' };
+ private static final char[] END_COMMENT = { '-', '-', '>' };
+ private static final char[] START_CDATA = { '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[' };
+ private static final char[] END_CDATA = { ']', ']', '>' };
+ private static final char[] START_PROCESSING_INSTRUCTION = { '<', '?' };
+ private static final char[] END_PROCESSING_INSTRUCTION = { '?', '>' };
+ private static final char[] START_DOCTYPE = { '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E' };
+ // no END_DOCTYPE because doctype must be parsed
+
static final private String UNEXPECTED_EOF = "Unexpected EOF";
static final private String ILLEGAL_TYPE = "Wrong event type";
- static final private int LEGACY = 999;
- static final private int XML_DECL = 998;
+ static final private int XML_DECLARATION = 998;
// general
+ private String location;
private String version;
private Boolean standalone;
@@ -60,33 +68,31 @@ public class KXmlParser implements XmlPullParser {
private Reader reader;
private String encoding;
- private char[] srcBuf;
-
- private int srcPos;
- private int srcCount;
-
- private int line;
- private int column;
-
- // txtbuffer
-
- /** Target buffer for storing incoming text (including aggregated resolved entities) */
- private char[] txtBuf = new char[128];
- /** Write position */
- private int txtPos;
+ private final char[] buffer = new char[8192];
+ private int position = 0;
+ private int limit = 0;
+
+ /*
+ * Track the number of newlines and columns preceding the current buffer. To
+ * compute the line and column of a position in the buffer, compute the line
+ * and column in the buffer and add the preceding values.
+ */
+ private int bufferStartLine;
+ private int bufferStartColumn;
- // Event-related
+ // the current token
private int type;
private boolean isWhitespace;
private String namespace;
private String prefix;
private String name;
+ private String text;
private boolean degenerated;
private int attributeCount;
- /**
+ /*
* The current element's attributes arranged in groups of 4:
* i + 0 = attribute namespace URI
* i + 1 = attribute namespace prefix
@@ -97,21 +103,9 @@ public class KXmlParser implements XmlPullParser {
private String error;
- /**
- * A separate peek buffer seems simpler than managing wrap around in the first level read
- * buffer
- */
- private int[] peek = new int[2];
- private int peekCount;
- private boolean wasCR;
-
private boolean unresolved;
private boolean token;
- public KXmlParser() {
- srcBuf = new char[8192];
- }
-
/**
* Retains namespace attributes like {@code xmlns="http://foo"} or {@code xmlns:foo="http:foo"}
* in pulled elements. Most applications will only be interested in the effective namespaces of
@@ -161,7 +155,7 @@ public class KXmlParser implements XmlPullParser {
nspStack[j + 1] = attributes[i + 3];
if (attrName != null && attributes[i + 3].isEmpty()) {
- error("illegal empty namespace");
+ checkRelaxed("illegal empty namespace");
}
if (keepNamespaceAttributes) {
@@ -213,7 +207,7 @@ public class KXmlParser implements XmlPullParser {
int cut = name.indexOf(':');
if (cut == 0) {
- error("illegal tag name: " + name);
+ checkRelaxed("illegal tag name: " + name);
}
if (cut != -1) {
@@ -225,7 +219,7 @@ public class KXmlParser implements XmlPullParser {
if (this.namespace == null) {
if (prefix != null) {
- error("undefined prefix: " + prefix);
+ checkRelaxed("undefined prefix: " + prefix);
}
this.namespace = NO_NAMESPACE;
}
@@ -242,21 +236,13 @@ public class KXmlParser implements XmlPullParser {
return bigger;
}
- private void error(String desc) throws XmlPullParserException {
- if (relaxed) {
- if (error == null) {
- error = "ERR: " + desc;
- }
- } else {
- exception(desc);
+ private void checkRelaxed(String errorMessage) throws XmlPullParserException {
+ if (!relaxed) {
+ throw new XmlPullParserException(errorMessage, this, null);
+ }
+ if (error == null) {
+ error = "Error: " + errorMessage;
}
- }
-
- private void exception(String desc) throws XmlPullParserException {
- throw new XmlPullParserException(
- desc.length() < 100 ? desc : desc.substring(0, 100) + "\n",
- this,
- null);
}
/**
@@ -265,7 +251,7 @@ public class KXmlParser implements XmlPullParser {
*/
private void nextImpl() throws IOException, XmlPullParserException {
if (reader == null) {
- exception("No Input specified");
+ throw new XmlPullParserException("setInput() must be called first.", this, null);
}
if (type == END_TAG) {
@@ -285,9 +271,7 @@ public class KXmlParser implements XmlPullParser {
}
if (error != null) {
- for (int i = 0; i < error.length(); i++) {
- push(error.charAt(i));
- }
+ text = error;
error = null;
type = COMMENT;
return;
@@ -301,229 +285,243 @@ public class KXmlParser implements XmlPullParser {
switch (type) {
- case ENTITY_REF:
- pushEntity();
- return;
-
- case START_TAG:
- parseStartTag(false);
- return;
-
- case END_TAG:
- parseEndTag();
- return;
-
- case END_DOCUMENT:
- return;
-
- case TEXT:
- pushText('<', !token, false);
- if (depth == 0) {
- if (isWhitespace) {
- type = IGNORABLE_WHITESPACE;
- }
- }
+ case ENTITY_REF:
+ if (token) {
+ StringBuilder entityTextBuilder = new StringBuilder();
+ readEntity(entityTextBuilder);
+ text = entityTextBuilder.toString();
return;
+ }
+ // fall-through
+ case TEXT:
+ text = readValue('<', !token, false);
+ if (depth == 0 && isWhitespace) {
+ type = IGNORABLE_WHITESPACE;
+ }
+ return;
- default:
- type = parseLegacy(token);
- if (type != XML_DECL) {
- return;
- }
- }
- }
- }
+ case START_TAG:
+ text = null; // TODO: fix next()/nextToken() so this is handled there
+ parseStartTag(false);
+ return;
- private int parseLegacy(boolean push) throws IOException, XmlPullParserException {
- String req = "";
- int term;
- int result;
- int prev = 0;
+ case END_TAG:
+ readEndTag();
+ return;
- read(); // <
- int c = read();
+ case END_DOCUMENT:
+ return;
- if (c == '?') {
- if ((peek(0) == 'x' || peek(0) == 'X')
- && (peek(1) == 'm' || peek(1) == 'M')) {
+ case XML_DECLARATION:
+ readXmlDeclaration();
+ continue;
- if (push) {
- push(peek(0));
- push(peek(1));
+ case PROCESSING_INSTRUCTION:
+ read(START_PROCESSING_INSTRUCTION);
+ if (token) {
+ text = readUntil(END_PROCESSING_INSTRUCTION, true);
+ } else {
+ readUntil(END_PROCESSING_INSTRUCTION, false);
}
- read();
- read();
-
- if ((peek(0) == 'l' || peek(0) == 'L') && peek(1) <= ' ') {
-
- if (line != 1 || column > 4) {
- error("PI must not start with xml");
- }
-
- parseStartTag(true);
-
- if (attributeCount < 1 || !"version".equals(attributes[2])) {
- error("version expected");
- }
+ return;
- version = attributes[3];
+ case DOCDECL:
+ readDoctype(token);
+ return;
- int pos = 1;
+ case CDSECT:
+ String oldText = text;
+ read(START_CDATA);
+ text = readUntil(END_CDATA, true);
+ if (oldText != null) {
+ text = oldText + text; // TODO: fix next()/nextToken() so this is handled there
+ }
+ return;
- if (pos < attributeCount
- && "encoding".equals(attributes[2 + 4])) {
- encoding = attributes[3 + 4];
- pos++;
- }
+ case COMMENT:
+ read(START_COMMENT);
+ if (token) {
+ text = readUntil(END_COMMENT, true);
+ } else {
+ readUntil(END_COMMENT, false);
+ }
+ return;
+ }
+ }
+ }
- if (pos < attributeCount
- && "standalone".equals(attributes[4 * pos + 2])) {
- String st = attributes[3 + 4 * pos];
- if ("yes".equals(st)) {
- standalone = new Boolean(true);
- } else if ("no".equals(st)) {
- standalone = new Boolean(false);
- } else {
- error("illegal standalone value: " + st);
- }
- pos++;
- }
+ /**
+ * Reads text until the specified delimiter is encountered. Consumes the
+ * text and the delimiter.
+ *
+ * @param returnText true to return the read text excluding the delimiter;
+ * false to return null.
+ */
+ private String readUntil(char[] delimiter, boolean returnText)
+ throws IOException, XmlPullParserException {
+ int previous = -1;
+ int start = position;
+ StringBuilder result = null;
- if (pos != attributeCount) {
- error("illegal xmldecl");
+ search:
+ while (true) {
+ if (position + delimiter.length >= limit) {
+ if (start < position && returnText) {
+ if (result == null) {
+ result = new StringBuilder();
}
-
- isWhitespace = true;
- txtPos = 0;
-
- return XML_DECL;
+ result.append(buffer, start, position - start);
}
+ if (!fillBuffer(delimiter.length)) {
+ checkRelaxed(UNEXPECTED_EOF);
+ type = COMMENT;
+ return null;
+ }
+ start = position;
}
- term = '?';
- result = PROCESSING_INSTRUCTION;
- } else if (c == '!') {
- if (peek(0) == '-') {
- result = COMMENT;
- req = "--";
- term = '-';
- } else if (peek(0) == '[') {
- result = CDSECT;
- req = "[CDATA[";
- term = ']';
- push = true;
- } else {
- result = DOCDECL;
- req = "DOCTYPE";
- term = -1;
+ // TODO: replace with Arrays.equals(buffer, position, delimiter, 0, delimiter.length)
+ // when the VM has better method inlining
+ for (int i = 0; i < delimiter.length; i++) {
+ if (buffer[position + i] != delimiter[i]) {
+ previous = buffer[position];
+ position++;
+ continue search;
+ }
}
- } else {
- error("illegal: <" + c);
- return COMMENT;
+
+ break;
}
- for (int i = 0; i < req.length(); i++) {
- read(req.charAt(i));
+ if (delimiter == END_COMMENT && previous == '-') {
+ checkRelaxed("illegal comment delimiter: --->");
}
- if (result == DOCDECL) {
- parseDoctype(push);
+ int end = position;
+ position += delimiter.length;
+
+ if (!returnText) {
+ return null;
+ } else if (result == null) {
+ return new String(buffer, start, end - start);
} else {
- while (true) {
- c = read();
- if (c == -1) {
- error(UNEXPECTED_EOF);
- return COMMENT;
- }
+ result.append(buffer, start, end - start);
+ return result.toString();
+ }
+ }
- if (push) {
- push(c);
- }
+ /**
+ * Returns true if an XML declaration was read.
+ */
+ private boolean readXmlDeclaration() throws IOException, XmlPullParserException {
+ if (bufferStartLine != 0 || bufferStartColumn != 0 || position != 0) {
+ checkRelaxed("processing instructions must not start with xml");
+ }
- if ((term == '?' || c == term)
- && peek(0) == term
- && peek(1) == '>') {
- break;
- }
+ read(START_PROCESSING_INSTRUCTION);
+ parseStartTag(true);
- prev = c;
- }
+ if (attributeCount < 1 || !"version".equals(attributes[2])) {
+ checkRelaxed("version expected");
+ }
- if (term == '-' && prev == '-' && !relaxed) {
- error("illegal comment delimiter: --->");
- }
+ version = attributes[3];
+
+ int pos = 1;
- read();
- read();
+ if (pos < attributeCount && "encoding".equals(attributes[2 + 4])) {
+ encoding = attributes[3 + 4];
+ pos++;
+ }
- if (push && term != '?') {
- txtPos--;
+ if (pos < attributeCount && "standalone".equals(attributes[4 * pos + 2])) {
+ String st = attributes[3 + 4 * pos];
+ if ("yes".equals(st)) {
+ standalone = Boolean.TRUE;
+ } else if ("no".equals(st)) {
+ standalone = Boolean.FALSE;
+ } else {
+ checkRelaxed("illegal standalone value: " + st);
}
+ pos++;
+ }
+ if (pos != attributeCount) {
+ checkRelaxed("unexpected attributes in XML declaration");
}
- return result;
+
+ isWhitespace = true;
+ text = null;
+ return true;
}
- /**
- * precondition: &lt! consumed
- */
- private void parseDoctype(boolean push) throws IOException, XmlPullParserException {
+ private void readDoctype(boolean assignText) throws IOException, XmlPullParserException {
+ read(START_DOCTYPE);
+
+ int start = position;
+ StringBuilder result = null;
int nesting = 1;
boolean quoted = false;
while (true) {
- int i = read();
- switch (i) {
-
- case -1:
- error(UNEXPECTED_EOF);
+ if (position >= limit) {
+ if (start < position && assignText) {
+ if (result == null) {
+ result = new StringBuilder();
+ }
+ result.append(buffer, start, position - start);
+ }
+ if (!fillBuffer(1)) {
+ checkRelaxed(UNEXPECTED_EOF);
return;
+ }
+ start = position;
+ }
- case '\'':
- quoted = !quoted;
- break;
-
- case '<':
- if (!quoted) {
- nesting++;
- }
- break;
+ char i = buffer[position++];
- case '>':
- if (!quoted) {
- if ((--nesting) == 0) {
- return;
- }
- }
+ if (i == '\'') {
+ quoted = !quoted; // TODO: should this include a double quote as well?
+ } else if (i == '<') {
+ if (!quoted) {
+ nesting++;
+ }
+ } else if (i == '>') {
+ if (!quoted && --nesting == 0) {
break;
+ }
}
- if (push) {
- push(i);
+ }
+
+ if (assignText) {
+ if (result == null) {
+ text = new String(buffer, start, position - start - 1); // omit the '>'
+ } else {
+ result.append(buffer, start, position - start - 1); // omit the '>'
+ text = result.toString();
}
}
}
- /**
- * precondition: &lt;/ consumed
- */
- private void parseEndTag() throws IOException, XmlPullParserException {
- read(); // '<'
- read(); // '/'
- name = readName();
+ private void readEndTag() throws IOException, XmlPullParserException {
+ read('<');
+ read('/');
+ name = readName(); // TODO: pass the expected name in as a hint?
skip();
read('>');
- int sp = (depth - 1) << 2;
+ int sp = (depth - 1) * 4;
if (depth == 0) {
- error("element stack empty");
+ checkRelaxed("read end tag " + name + " with no tags open");
type = COMMENT;
return;
}
if (!relaxed) {
if (!name.equals(elementStack[sp + 3])) {
- error("expected: /" + elementStack[sp + 3] + " read: " + name);
+ throw new XmlPullParserException(
+ "expected: /" + elementStack[sp + 3] + " read: " + name, this, null);
}
namespace = elementStack[sp];
@@ -532,41 +530,51 @@ public class KXmlParser implements XmlPullParser {
}
}
- private int peekType() throws IOException {
- switch (peek(0)) {
- case -1:
- return END_DOCUMENT;
- case '&':
- return ENTITY_REF;
- case '<':
- switch (peek(1)) {
- case '/':
- return END_TAG;
- case '?':
- case '!':
- return LEGACY;
- default:
- return START_TAG;
- }
- default:
- return TEXT;
+ /**
+ * Returns the type of the next token.
+ */
+ private int peekType() throws IOException, XmlPullParserException {
+ if (position >= limit && !fillBuffer(1)) {
+ return END_DOCUMENT;
}
- }
- private String get(int pos) {
- return new String(txtBuf, pos, txtPos - pos);
- }
+ if (buffer[position] == '&') {
+ return ENTITY_REF;
- private void push(int c) {
- isWhitespace &= c <= ' ';
+ } else if (buffer[position] == '<') {
+ if (position + 2 >= limit && !fillBuffer(3)) {
+ throw new XmlPullParserException("Dangling <", this, null);
+ }
- if (txtPos == txtBuf.length) {
- char[] bigger = new char[txtPos * 4 / 3 + 4];
- System.arraycopy(txtBuf, 0, bigger, 0, txtPos);
- txtBuf = bigger;
+ if (buffer[position + 1] == '/') {
+ return END_TAG;
+ } else if (buffer[position + 1] == '?') {
+ // we're looking for "<?xml " with case insensitivity
+ if ((position + 5 < limit || fillBuffer(6))
+ && (buffer[position + 2] == 'x' || buffer[position + 2] == 'X')
+ && (buffer[position + 3] == 'm' || buffer[position + 3] == 'M')
+ && (buffer[position + 4] == 'l' || buffer[position + 4] == 'L')
+ && (buffer[position + 5] == ' ')) {
+ return XML_DECLARATION;
+ } else {
+ return PROCESSING_INSTRUCTION;
+ }
+ } else if (buffer[position + 1] == '!') {
+ if (buffer[position + 2] == START_DOCTYPE[2]) {
+ return DOCDECL;
+ } else if (buffer[position + 2] == START_CDATA[2]) {
+ return CDSECT;
+ } else if (buffer[position + 2] == START_COMMENT[2]) {
+ return COMMENT;
+ } else {
+ throw new XmlPullParserException("Unexpected <!", this, null);
+ }
+ } else {
+ return START_TAG;
+ }
+ } else {
+ return TEXT;
}
-
- txtBuf[txtPos++] = (char) c;
}
/**
@@ -574,7 +582,7 @@ public class KXmlParser implements XmlPullParser {
*/
private void parseStartTag(boolean xmldecl) throws IOException, XmlPullParserException {
if (!xmldecl) {
- read();
+ read('<');
}
name = readName();
attributeCount = 0;
@@ -582,84 +590,78 @@ public class KXmlParser implements XmlPullParser {
while (true) {
skip();
- int c = peek(0);
+ if (position >= limit && !fillBuffer(1)) {
+ checkRelaxed(UNEXPECTED_EOF);
+ return;
+ }
+
+ int c = buffer[position];
if (xmldecl) {
if (c == '?') {
- read();
+ position++;
read('>');
return;
}
} else {
if (c == '/') {
degenerated = true;
- read();
+ position++;
skip();
read('>');
break;
- }
-
- if (c == '>' && !xmldecl) {
- read();
+ } else if (c == '>') {
+ position++;
break;
}
}
- if (c == -1) {
- error(UNEXPECTED_EOF);
- return;
- }
-
String attrName = readName();
- if (attrName.length() == 0) {
- error("attr name expected");
- break;
- }
-
- int i = (attributeCount++) << 2;
-
+ int i = (attributeCount++) * 4;
attributes = ensureCapacity(attributes, i + 4);
-
attributes[i++] = "";
attributes[i++] = null;
attributes[i++] = attrName;
skip();
+ if (position >= limit && !fillBuffer(1)) {
+ checkRelaxed(UNEXPECTED_EOF);
+ return;
+ }
+
+ if (buffer[position] == '=') {
+ position++;
- if (peek(0) != '=') {
- if (!relaxed) {
- error("Attr.value missing f. " + attrName);
- }
- attributes[i] = attrName;
- } else {
- read('=');
skip();
- int delimiter = peek(0);
+ if (position >= limit && !fillBuffer(1)) {
+ checkRelaxed(UNEXPECTED_EOF);
+ return;
+ }
+ char delimiter = buffer[position];
- if (delimiter != '\'' && delimiter != '"') {
- if (!relaxed) {
- error("attr value delimiter missing!");
- }
+ if (delimiter == '\'' || delimiter == '"') {
+ position++;
+ } else if (relaxed) {
delimiter = ' ';
} else {
- read();
+ throw new XmlPullParserException("attr value delimiter missing!", this, null);
}
- int p = txtPos;
- pushText(delimiter, true, true);
-
- attributes[i] = get(p);
- txtPos = p;
+ attributes[i] = readValue(delimiter, true, true);
if (delimiter != ' ') {
- read(); // skip endquote
+ position++; // end quote
}
+ } else if (relaxed) {
+ attributes[i] = attrName;
+ } else {
+ checkRelaxed("Attr.value missing f. " + attrName);
+ attributes[i] = attrName;
}
}
- int sp = depth++ << 2;
-
+ int sp = depth++ * 4;
elementStack = ensureCapacity(elementStack, sp + 4);
elementStack[sp + 3] = name;
@@ -683,217 +685,352 @@ public class KXmlParser implements XmlPullParser {
}
/**
- * result: isWhitespace; if the setName parameter is set, the name of the entity is stored in
- * "name"
+ * Reads an entity reference from the buffer, resolves it, and writes the
+ * resolved entity to {@code out}. If the entity cannot be read or resolved,
+ * {@code out} will contain the partial entity reference.
*/
- private void pushEntity() throws IOException, XmlPullParserException {
- push(read()); // &
+ private void readEntity(StringBuilder out) throws IOException, XmlPullParserException {
+ int start = out.length();
+
+ if (buffer[position++] != '&') {
+ throw new AssertionError();
+ }
- int pos = txtPos;
+ out.append('&');
while (true) {
- int c = peek(0);
+ int c = peekCharacter();
+
if (c == ';') {
- read();
+ position++;
break;
- }
- if (c < 128 && (c < '0' || c > '9') && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z')
- && c != '_' && c != '-' && c != '#') {
- if (!relaxed) {
- error("unterminated entity ref");
- }
+ } else if (c >= 128
+ || (c >= '0' && c <= '9')
+ || (c >= 'a' && c <= 'z')
+ || (c >= 'A' && c <= 'Z')
+ || c == '_'
+ || c == '-'
+ || c == '#') {
+ position++;
+ out.append((char) c);
+
+ } else if (relaxed) {
+ // intentionally leave the partial reference in 'out'
return;
- }
- push(read());
+ } else {
+ throw new XmlPullParserException("unterminated entity ref", this, null);
+ }
}
- String code = get(pos);
- txtPos = pos - 1;
+ String code = out.substring(start + 1);
+ out.delete(start, out.length());
+
if (token && type == ENTITY_REF) {
name = code;
}
if (code.charAt(0) == '#') {
+ // TODO: check IndexOutOfBoundsException?
+ // TODO: save an intermediate string for 'code' if unneeded?
int c = code.charAt(1) == 'x'
? Integer.parseInt(code.substring(2), 16)
: Integer.parseInt(code.substring(1));
- push(c);
+ // TODO: set unresolved to false?
+ out.append((char) c);
return;
}
- String result = entityMap.get(code);
-
- unresolved = result == null;
+ String resolved = entityMap.get(code);
+ if (resolved != null) {
+ unresolved = false;
+ out.append(resolved);
+ return;
+ }
- if (unresolved) {
- if (!token) {
- error("unresolved: &" + code + ";");
- }
- } else {
- for (int i = 0; i < result.length(); i++) {
- push(result.charAt(i));
- }
+ unresolved = true;
+ if (!token) {
+ checkRelaxed("unresolved: &" + code + ";");
+ // TODO: should the &code; show up in the text in relaxed mode?
}
}
/**
- * types: '<': parse to any token (for nextToken ()) '"': parse to quote ' ': parse to
- * whitespace or '>'
+ * Returns the current text or attribute value. This also has the side
+ * effect of setting isWhitespace to false if a non-whitespace character is
+ * encountered.
+ *
+ * @param delimiter {@code >} for text, {@code "} and {@code '} for quoted
+ * attributes, or a space for unquoted attributes.
*/
- private void pushText(int delimiter, boolean resolveEntities, boolean inAttributeValue)
- throws IOException, XmlPullParserException {
+ private String readValue(char delimiter, boolean resolveEntities,
+ boolean inAttributeValue) throws IOException, XmlPullParserException {
+
+ /*
+ * This method returns all of the characters from the current position
+ * through to an appropriate delimiter.
+ *
+ * If we're lucky (which we usually are), we'll return a single slice of
+ * the buffer. This fast path avoids allocating a string builder.
+ *
+ * There are 5 unlucky characters we could encounter:
+ * - "&": entities must be resolved.
+ * - "<": this isn't permitted in attributes unless relaxed.
+ * - "]": this requires a lookahead to defend against the forbidden
+ * CDATA section delimiter "]]>".
+ * - "\r": If a "\r" is followed by a "\n", we discard the "\r". If it
+ * isn't followed by "\n", we replace "\r" with either a "\n"
+ * in text nodes or a space in attribute values.
+ * - "\n": In attribute values, "\n" must be replaced with a space.
+ *
+ * We could also get unlucky by needing to refill the buffer midway
+ * through the text.
+ */
+
+ int start = position;
+ StringBuilder result = null;
+
+ // if a text section was already started, prefix the start
+ if (text != null) {
+ result = new StringBuilder();
+ result.append(text);
+ }
+
+ while (true) {
- int next = peek(0);
- int cbrCount = 0;
+ /*
+ * Make sure we have at least a single character to read from the
+ * buffer. This mutates the buffer, so save the partial result
+ * to the slow path string builder first.
+ */
+ if (position >= limit) {
+ if (start < position) {
+ if (result == null) {
+ result = new StringBuilder();
+ }
+ result.append(buffer, start, position - start);
+ }
+ if (!fillBuffer(1)) {
+ return result != null ? result.toString() : "";
+ }
+ start = position;
+ }
- while (next != -1 && next != delimiter) { // covers eof, '<', '"'
+ char c = buffer[position];
- if (delimiter == ' ' && (next <= ' ' || next == '>')) {
+ if (c == delimiter
+ || (delimiter == ' ' && (c <= ' ' || c == '>'))
+ || c == '&' && !resolveEntities) {
break;
}
- if (next == '&') {
- if (!resolveEntities) {
- break;
- }
-
- pushEntity();
- } else if (next == '<' && inAttributeValue) {
- error("Illegal: \"<\" inside attribute value");
- } else if (next == '\n' && type == START_TAG) {
- read();
- push(' ');
- } else {
- push(read());
+ if (c != '\r'
+ && (c != '\n' || !inAttributeValue)
+ && c != '&'
+ && c != '<'
+ && (c != ']' || inAttributeValue)) {
+ isWhitespace &= (c <= ' ');
+ position++;
+ continue;
}
/*
- * "]]>" is allowed in attribute values, but is not allowed in
- * regular text between markup.
+ * We've encountered an unlucky character! Convert from fast
+ * path to slow path if we haven't done so already.
*/
- final boolean allowCloseCdata = inAttributeValue;
- if (!allowCloseCdata && (next == '>' && cbrCount >= 2 && delimiter != ']')) {
- error("Illegal: \"]]>\" outside CDATA section");
+ if (result == null) {
+ result = new StringBuilder();
}
+ result.append(buffer, start, position - start);
+
+ if (c == '\r') {
+ if ((position + 1 < limit || fillBuffer(2)) && buffer[position + 1] == '\n') {
+ position++;
+ }
+ c = inAttributeValue ? ' ' : '\n';
+
+ } else if (c == '\n') {
+ c = ' ';
+
+ } else if (c == '&') {
+ isWhitespace = false; // TODO: what if the entity resolves to whitespace?
+ readEntity(result);
+ start = position;
+ continue;
+
+ } else if (c == '<') {
+ if (inAttributeValue) {
+ checkRelaxed("Illegal: \"<\" inside attribute value");
+ }
+ isWhitespace = false;
+
+ } else if (c == ']') {
+ if ((position + 2 < limit || fillBuffer(3))
+ && buffer[position + 1] == ']' && buffer[position + 2] == '>') {
+ checkRelaxed("Illegal: \"]]>\" outside CDATA section");
+ }
+ isWhitespace = false;
- if (next == ']') {
- cbrCount++;
} else {
- cbrCount = 0;
+ throw new AssertionError();
}
- next = peek(0);
+ position++;
+ result.append(c);
+ start = position;
}
- }
- private void read(char c) throws IOException, XmlPullParserException {
- int a = read();
- if (a != c) {
- error("expected: '" + c + "' actual: '" + ((char) a) + "'");
+ if (result == null) {
+ return new String(buffer, start, position - start);
+ } else {
+ result.append(buffer, start, position - start);
+ return result.toString();
}
}
- private int read() throws IOException {
- int result;
+ private void read(char expected) throws IOException, XmlPullParserException {
+ int c = peekCharacter();
+ if (c != expected) {
+ checkRelaxed("expected: '" + expected + "' actual: '" + ((char) c) + "'");
+ }
+ position++;
+ }
- if (peekCount == 0) {
- result = peek(0);
- } else {
- result = peek[0];
- peek[0] = peek[1];
+ private void read(char[] chars) throws IOException, XmlPullParserException {
+ if (position + chars.length >= limit && !fillBuffer(chars.length)) {
+ checkRelaxed("expected: '" + new String(chars) + "' but was EOF");
+ return;
}
- peekCount--;
- column++;
+ // TODO: replace with Arrays.equals(buffer, position, delimiter, 0, delimiter.length)
+ // when the VM has better method inlining
+ for (int i = 0; i < chars.length; i++) {
+ if (buffer[position + i] != chars[i]) {
+ checkRelaxed("expected: \"" + new String(chars) + "\" but was \""
+ + new String(buffer, position, chars.length) + "...\"");
+ }
+ }
- if (result == '\n') {
+ position += chars.length;
+ }
- line++;
- column = 1;
+ private int peekCharacter() throws IOException, XmlPullParserException {
+ if (position < limit || fillBuffer(1)) {
+ return buffer[position];
}
-
- return result;
+ return -1;
}
/**
- * Does never read more than needed
+ * Returns true once {@code limit - position >= minimum}. If the data is
+ * exhausted before that many characters are available, this returns
+ * false.
*/
- private int peek(int pos) throws IOException {
- while (pos >= peekCount) {
- int nw;
- if (srcBuf.length <= 1) {
- nw = reader.read();
- } else if (srcPos < srcCount) {
- nw = srcBuf[srcPos++];
+ private boolean fillBuffer(int minimum) throws IOException {
+ // Before clobbering the old characters, update where buffer starts
+ for (int i = 0; i < position; i++) {
+ if (buffer[i] == '\n') {
+ bufferStartLine++;
+ bufferStartColumn = 0;
} else {
- srcCount = reader.read(srcBuf, 0, srcBuf.length);
- if (srcCount <= 0) {
- nw = -1;
- } else {
- nw = srcBuf[0];
- }
-
- srcPos = 1;
+ bufferStartColumn++;
}
+ }
- if (nw == '\r') {
- wasCR = true;
- peek[peekCount++] = '\n';
- } else {
- if (nw == '\n') {
- if (!wasCR) {
- peek[peekCount++] = '\n';
- }
- } else {
- peek[peekCount++] = nw;
- }
+ if (limit != position) {
+ limit -= position;
+ System.arraycopy(buffer, position, buffer, 0, limit);
+ } else {
+ limit = 0;
+ }
- wasCR = false;
+ position = 0;
+ int total;
+ while ((total = reader.read(buffer, limit, buffer.length - limit)) != -1) {
+ limit += total;
+ if (limit >= minimum) {
+ return true;
}
}
-
- return peek[pos];
+ return false;
}
+ /**
+ * Returns an element or attribute name. This is always non-empty for
+ * non-relaxed parsers.
+ */
private String readName() throws IOException, XmlPullParserException {
- int pos = txtPos;
- int c = peek(0);
- if ((c < 'a' || c > 'z')
- && (c < 'A' || c > 'Z')
- && c != '_'
- && c != ':'
- && c < 0x0c0
- && !relaxed) {
- error("name expected");
+ if (position >= limit && !fillBuffer(1)) {
+ checkRelaxed("name expected");
+ return "";
}
- do {
- push(read());
- c = peek(0);
- }
- while ((c >= 'a' && c <= 'z')
+ int start = position;
+ StringBuilder result = null;
+
+ // read the first character
+ char c = buffer[position];
+ if ((c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z')
- || (c >= '0' && c <= '9')
|| c == '_'
- || c == '-'
|| c == ':'
- || c == '.'
- || c >= 0x0b7);
+ || c >= '\u00c0' // TODO: check the XML spec
+ || relaxed) {
+ position++;
+ } else {
+ checkRelaxed("name expected");
+ return "";
+ }
- String result = get(pos);
- txtPos = pos;
- return result;
+ while (true) {
+ /*
+ * Make sure we have at least a single character to read from the
+ * buffer. This mutates the buffer, so save the partial result
+ * to the slow path string builder first.
+ */
+ if (position >= limit) {
+ if (result == null) {
+ result = new StringBuilder();
+ }
+ result.append(buffer, start, position - start);
+ if (!fillBuffer(1)) {
+ return result.toString();
+ }
+ start = position;
+ }
+
+ // read another character
+ c = buffer[position];
+ if ((c >= 'a' && c <= 'z')
+ || (c >= 'A' && c <= 'Z')
+ || (c >= '0' && c <= '9')
+ || c == '_'
+ || c == '-'
+ || c == ':'
+ || c == '.'
+ || c >= '\u00b7') { // TODO: check the XML spec
+ position++;
+ continue;
+ }
+
+ // we encountered a non-name character. done!
+ if (result == null) {
+ return new String(buffer, start, position - start);
+ } else {
+ result.append(buffer, start, position - start);
+ return result.toString();
+ }
+ }
}
private void skip() throws IOException {
- while (true) {
- int c = peek(0);
- if (c > ' ' || c == -1) {
+ while (position < limit || fillBuffer(1)) {
+ int c = buffer[position];
+ if (c > ' ') {
break;
}
- read();
+ position++;
}
}
@@ -902,8 +1039,6 @@ public class KXmlParser implements XmlPullParser {
public void setInput(Reader reader) throws XmlPullParserException {
this.reader = reader;
- line = 1;
- column = 0;
type = START_DOCUMENT;
name = null;
namespace = null;
@@ -917,9 +1052,10 @@ public class KXmlParser implements XmlPullParser {
return;
}
- srcPos = 0;
- srcCount = 0;
- peekCount = 0;
+ position = 0;
+ limit = 0;
+ bufferStartLine = 0;
+ bufferStartColumn = 0;
depth = 0;
entityMap = new HashMap<String, String>();
@@ -931,8 +1067,8 @@ public class KXmlParser implements XmlPullParser {
}
public void setInput(InputStream is, String _enc) throws XmlPullParserException {
- srcPos = 0;
- srcCount = 0;
+ position = 0;
+ limit = 0;
String enc = _enc;
if (is == null) {
@@ -941,66 +1077,64 @@ public class KXmlParser implements XmlPullParser {
try {
if (enc == null) {
- // read four bytes
-
- int chk = 0;
-
- while (srcCount < 4) {
+ // read the four bytes looking for an indication of the encoding in use
+ int firstFourBytes = 0;
+ while (limit < 4) {
int i = is.read();
if (i == -1) {
break;
}
- chk = (chk << 8) | i;
- srcBuf[srcCount++] = (char) i;
+ firstFourBytes = (firstFourBytes << 8) | i;
+ buffer[limit++] = (char) i;
}
- if (srcCount == 4) {
- switch (chk) {
- case 0x00000FEFF:
+ if (limit == 4) {
+ switch (firstFourBytes) {
+ case 0x00000FEFF: // UTF-32BE BOM
enc = "UTF-32BE";
- srcCount = 0;
+ limit = 0;
break;
- case 0x0FFFE0000:
+ case 0x0FFFE0000: // UTF-32LE BOM
enc = "UTF-32LE";
- srcCount = 0;
+ limit = 0;
break;
- case 0x03c:
+ case 0x0000003c: // '>' in UTF-32BE
enc = "UTF-32BE";
- srcBuf[0] = '<';
- srcCount = 1;
+ buffer[0] = '<';
+ limit = 1;
break;
- case 0x03c000000:
+ case 0x03c000000: // '<' in UTF-32LE
enc = "UTF-32LE";
- srcBuf[0] = '<';
- srcCount = 1;
+ buffer[0] = '<';
+ limit = 1;
break;
- case 0x0003c003f:
+ case 0x0003c003f: // "<?" in UTF-16BE
enc = "UTF-16BE";
- srcBuf[0] = '<';
- srcBuf[1] = '?';
- srcCount = 2;
+ buffer[0] = '<';
+ buffer[1] = '?';
+ limit = 2;
break;
- case 0x03c003f00:
+ case 0x03c003f00: // "<?" in UTF-16LE
enc = "UTF-16LE";
- srcBuf[0] = '<';
- srcBuf[1] = '?';
- srcCount = 2;
+ buffer[0] = '<';
+ buffer[1] = '?';
+ limit = 2;
break;
- case 0x03c3f786d:
+ case 0x03c3f786d: // "<?xm" in ASCII etc.
while (true) {
int i = is.read();
if (i == -1) {
break;
}
- srcBuf[srcCount++] = (char) i;
+ buffer[limit++] = (char) i;
if (i == '>') {
- String s = new String(srcBuf, 0, srcCount);
+ String s = new String(buffer, 0, limit);
int i0 = s.indexOf("encoding");
if (i0 != -1) {
while (s.charAt(i0) != '"'
@@ -1016,20 +1150,19 @@ public class KXmlParser implements XmlPullParser {
}
default:
- if ((chk & 0x0ffff0000) == 0x0FEFF0000) {
+ // handle a byte order mark followed by something other than <?
+ if ((firstFourBytes & 0x0ffff0000) == 0x0FEFF0000) {
enc = "UTF-16BE";
- srcBuf[0] =
- (char) ((srcBuf[2] << 8) | srcBuf[3]);
- srcCount = 1;
- } else if ((chk & 0x0ffff0000) == 0x0fffe0000) {
+ buffer[0] = (char) ((buffer[2] << 8) | buffer[3]);
+ limit = 1;
+ } else if ((firstFourBytes & 0x0ffff0000) == 0x0fffe0000) {
enc = "UTF-16LE";
- srcBuf[0] =
- (char) ((srcBuf[3] << 8) | srcBuf[2]);
- srcCount = 1;
- } else if ((chk & 0x0ffffff00) == 0x0EFBBBF00) {
+ buffer[0] = (char) ((buffer[3] << 8) | buffer[2]);
+ limit = 1;
+ } else if ((firstFourBytes & 0x0ffffff00) == 0x0EFBBBF00) {
enc = "UTF-8";
- srcBuf[0] = srcBuf[3];
- srcCount = 1;
+ buffer[0] = buffer[3];
+ limit = 1;
}
}
}
@@ -1039,15 +1172,12 @@ public class KXmlParser implements XmlPullParser {
enc = "UTF-8";
}
- int sc = srcCount;
+ int sc = limit;
setInput(new InputStreamReader(is, enc));
encoding = _enc;
- srcCount = sc;
+ limit = sc;
} catch (Exception e) {
- throw new XmlPullParserException(
- "Invalid stream or encoding: " + e.toString(),
- this,
- e);
+ throw new XmlPullParserException("Invalid stream or encoding: " + e, this, e);
}
}
@@ -1094,11 +1224,11 @@ public class KXmlParser implements XmlPullParser {
}
public String getNamespacePrefix(int pos) {
- return nspStack[pos << 1];
+ return nspStack[pos * 2];
}
public String getNamespaceUri(int pos) {
- return nspStack[(pos << 1) + 1];
+ return nspStack[(pos * 2) + 1];
}
public String getNamespace(String prefix) {
@@ -1144,12 +1274,11 @@ public class KXmlParser implements XmlPullParser {
}
buf.append(name);
- int cnt = attributeCount << 2;
+ int cnt = attributeCount * 4;
for (int i = 0; i < cnt; i += 4) {
buf.append(' ');
if (attributes[i + 1] != null) {
- buf.append(
- "{" + attributes[i] + "}" + attributes[i + 1] + ":");
+ buf.append("{" + attributes[i] + "}" + attributes[i + 1] + ":");
}
buf.append(attributes[i + 2] + "='" + attributes[i + 3] + "'");
}
@@ -1169,7 +1298,7 @@ public class KXmlParser implements XmlPullParser {
buf.append(text);
}
- buf.append("@" + line + ":" + column);
+ buf.append("@" + getLineNumber() + ":" + getColumnNumber());
if (location != null) {
buf.append(" in ");
buf.append(location);
@@ -1181,40 +1310,55 @@ public class KXmlParser implements XmlPullParser {
}
public int getLineNumber() {
- return line;
+ int result = bufferStartLine;
+ for (int i = 0; i < position; i++) {
+ if (buffer[i] == '\n') {
+ result++;
+ }
+ }
+ return result + 1; // the first line is '1'
}
public int getColumnNumber() {
- return column;
+ int result = bufferStartColumn;
+ for (int i = 0; i < position; i++) {
+ if (buffer[i] == '\n') {
+ result = 0;
+ } else {
+ result++;
+ }
+ }
+ return result + 1; // the first column is '1'
}
public boolean isWhitespace() throws XmlPullParserException {
if (type != TEXT && type != IGNORABLE_WHITESPACE && type != CDSECT) {
- exception(ILLEGAL_TYPE);
+ throw new XmlPullParserException(ILLEGAL_TYPE, this, null);
}
return isWhitespace;
}
public String getText() {
- return type < TEXT
- || (type == ENTITY_REF && unresolved) ? null : get(0);
+ if (type < TEXT || (type == ENTITY_REF && unresolved)) {
+ return null;
+ } else if (text == null) {
+ return "";
+ } else {
+ return text;
+ }
}
public char[] getTextCharacters(int[] poslen) {
- if (type >= TEXT) {
- if (type == ENTITY_REF) {
- poslen[0] = 0;
- poslen[1] = name.length();
- return name.toCharArray();
- }
- poslen[0] = 0;
- poslen[1] = txtPos;
- return txtBuf;
- }
-
- poslen[0] = -1;
- poslen[1] = -1;
- return null;
+ String text = getText();
+ if (text == null) {
+ poslen[0] = -1;
+ poslen[1] = -1;
+ return null;
+ }
+ char[] result = text.toCharArray();
+ poslen[0] = 0;
+ poslen[1] = result.length;
+ return result;
}
public String getNamespace() {
@@ -1231,7 +1375,7 @@ public class KXmlParser implements XmlPullParser {
public boolean isEmptyElementTag() throws XmlPullParserException {
if (type != START_TAG) {
- exception(ILLEGAL_TYPE);
+ throw new XmlPullParserException(ILLEGAL_TYPE, this, null);
}
return degenerated;
}
@@ -1252,33 +1396,32 @@ public class KXmlParser implements XmlPullParser {
if (index >= attributeCount) {
throw new IndexOutOfBoundsException();
}
- return attributes[index << 2];
+ return attributes[index * 4];
}
public String getAttributeName(int index) {
if (index >= attributeCount) {
throw new IndexOutOfBoundsException();
}
- return attributes[(index << 2) + 2];
+ return attributes[(index * 4) + 2];
}
public String getAttributePrefix(int index) {
if (index >= attributeCount) {
throw new IndexOutOfBoundsException();
}
- return attributes[(index << 2) + 1];
+ return attributes[(index * 4) + 1];
}
public String getAttributeValue(int index) {
if (index >= attributeCount) {
throw new IndexOutOfBoundsException();
}
- return attributes[(index << 2) + 3];
+ return attributes[(index * 4) + 3];
}
public String getAttributeValue(String namespace, String name) {
-
- for (int i = (attributeCount << 2) - 4; i >= 0; i -= 4) {
+ for (int i = (attributeCount * 4) - 4; i >= 0; i -= 4) {
if (attributes[i + 2].equals(name)
&& (namespace == null || attributes[i].equals(namespace))) {
return attributes[i + 3];
@@ -1293,8 +1436,7 @@ public class KXmlParser implements XmlPullParser {
}
public int next() throws XmlPullParserException, IOException {
-
- txtPos = 0;
+ text = null;
isWhitespace = true;
int minType = 9999;
token = false;
@@ -1304,9 +1446,7 @@ public class KXmlParser implements XmlPullParser {
if (type < minType) {
minType = type;
}
- // if (curr <= TEXT) type = curr;
- }
- while (minType > ENTITY_REF // ignorable
+ } while (minType > ENTITY_REF // ignorable
|| (minType >= TEXT && peekType() >= TEXT));
type = minType;
@@ -1319,7 +1459,7 @@ public class KXmlParser implements XmlPullParser {
public int nextToken() throws XmlPullParserException, IOException {
isWhitespace = true;
- txtPos = 0;
+ text = null;
token = true;
nextImpl();
@@ -1335,7 +1475,7 @@ public class KXmlParser implements XmlPullParser {
}
if (type != END_TAG && type != START_TAG) {
- exception("unexpected type");
+ throw new XmlPullParserException("unexpected type", this, null);
}
return type;
@@ -1347,14 +1487,14 @@ public class KXmlParser implements XmlPullParser {
if (type != this.type
|| (namespace != null && !namespace.equals(getNamespace()))
|| (name != null && !name.equals(getName()))) {
- exception(
- "expected: " + TYPES[type] + " {" + namespace + "}" + name);
+ throw new XmlPullParserException(
+ "expected: " + TYPES[type] + " {" + namespace + "}" + name, this, null);
}
}
public String nextText() throws XmlPullParserException, IOException {
if (type != START_TAG) {
- exception("precondition: START_TAG");
+ throw new XmlPullParserException("precondition: START_TAG", this, null);
}
next();
@@ -1368,7 +1508,7 @@ public class KXmlParser implements XmlPullParser {
}
if (type != END_TAG) {
- exception("END_TAG expected");
+ throw new XmlPullParserException("END_TAG expected", this, null);
}
return result;
@@ -1378,15 +1518,16 @@ public class KXmlParser implements XmlPullParser {
if (XmlPullParser.FEATURE_PROCESS_NAMESPACES.equals(feature)) {
processNsp = value;
} else if (isProp(feature, false, "relaxed")) {
+ // "http://xmlpull.org/v1/doc/features.html#relaxed"
relaxed = value;
} else {
- exception("unsupported feature: " + feature);
+ throw new XmlPullParserException("unsupported feature: " + feature, this, null);
}
}
public void setProperty(String property, Object value) throws XmlPullParserException {
if (isProp(property, true, "location")) {
- location = value;
+ location = String.valueOf(value);
} else {
throw new XmlPullParserException("unsupported property: " + property);
}
diff --git a/xml/src/main/java/org/xmlpull/v1/XmlPullParser.java b/xml/src/main/java/org/xmlpull/v1/XmlPullParser.java
index b2f5e39..48c95a9 100644
--- a/xml/src/main/java/org/xmlpull/v1/XmlPullParser.java
+++ b/xml/src/main/java/org/xmlpull/v1/XmlPullParser.java
@@ -59,7 +59,7 @@ import java.io.Reader;
* getProperty(&quot;<a href="http://xmlpull.org/v1/doc/properties.html#xmldecl-version">http://xmlpull.org/v1/doc/properties.html#xmldecl-version</a>&quot;)
* returns String ("1.0") or null if XMLDecl was not read or if property is not supported
* <li><b>standalone</b>:
- * getProperty(&quot;<a href="http://xmlpull.org/v1/doc/features.html#xmldecl-standalone">http://xmlpull.org/v1/doc/features.html#xmldecl-standalone</a>&quot;)
+ * getProperty(&quot;<a href="http://xmlpull.org/v1/doc/properties.html#xmldecl-standalone">http://xmlpull.org/v1/doc/properties.html#xmldecl-standalone</a>&quot;)
* returns Boolean: null if there was no standalone declaration
* or if property is not supported
* otherwise returns Boolean(true) if standalone="yes" and Boolean(false) when standalone="no"