diff options
-rw-r--r-- | luni/src/test/java/libcore/xml/KxmlPullParserDtdTest.java | 7 | ||||
-rw-r--r-- | luni/src/test/java/libcore/xml/PullParserDtdTest.java | 123 | ||||
-rw-r--r-- | luni/src/test/java/libcore/xml/PullParserTest.java | 11 | ||||
-rw-r--r-- | xml/src/main/java/org/kxml2/io/KXmlParser.java | 269 |
4 files changed, 350 insertions, 60 deletions
diff --git a/luni/src/test/java/libcore/xml/KxmlPullParserDtdTest.java b/luni/src/test/java/libcore/xml/KxmlPullParserDtdTest.java index 6de11c0..3175b37 100644 --- a/luni/src/test/java/libcore/xml/KxmlPullParserDtdTest.java +++ b/luni/src/test/java/libcore/xml/KxmlPullParserDtdTest.java @@ -18,9 +18,12 @@ package libcore.xml; import org.kxml2.io.KXmlParser; import org.xmlpull.v1.XmlPullParser; +import org.xmlpull.v1.XmlPullParserException; public class KxmlPullParserDtdTest extends PullParserDtdTest { - @Override XmlPullParser newPullParser() { - return new KXmlParser(); + @Override XmlPullParser newPullParser() throws XmlPullParserException { + KXmlParser result = new KXmlParser(); + result.setFeature(XmlPullParser.FEATURE_PROCESS_DOCDECL, true); + return result; } } diff --git a/luni/src/test/java/libcore/xml/PullParserDtdTest.java b/luni/src/test/java/libcore/xml/PullParserDtdTest.java index 9f2a6d2..38b7d8a 100644 --- a/luni/src/test/java/libcore/xml/PullParserDtdTest.java +++ b/luni/src/test/java/libcore/xml/PullParserDtdTest.java @@ -18,6 +18,7 @@ package libcore.xml; import java.io.IOException; import java.io.StringReader; +import java.util.Arrays; import junit.framework.TestCase; import org.xmlpull.v1.XmlPullParser; import org.xmlpull.v1.XmlPullParserException; @@ -27,6 +28,8 @@ import org.xmlpull.v1.XmlPullParserException; */ public abstract class PullParserDtdTest extends TestCase { + private static final int READ_BUFFER_SIZE = 8192; + /** * Android's Expat pull parser permits parameter entities to be declared, * but it doesn't permit such entities to be used. @@ -57,6 +60,19 @@ public abstract class PullParserDtdTest extends TestCase { assertParseFailure(parser); } + public void testGeneralAndParameterEntityWithTheSameName() throws Exception { + String xml = "<!DOCTYPE foo [" + + " <!ENTITY a \"aaa\">" + + " <!ENTITY % a \"bbb\">" + + "]><foo>&a;</foo>"; + XmlPullParser parser = newPullParser(xml); + assertEquals(XmlPullParser.START_TAG, parser.next()); + assertEquals(XmlPullParser.TEXT, parser.next()); + assertEquals("aaa", parser.getText()); + assertEquals(XmlPullParser.END_TAG, parser.next()); + assertEquals(XmlPullParser.END_DOCUMENT, parser.next()); + } + public void testInternalEntities() throws Exception { String xml = "<!DOCTYPE foo [" + " <!ENTITY a \"android\">" @@ -259,16 +275,82 @@ public abstract class PullParserDtdTest extends TestCase { public void testAttributeDefaultValues() throws Exception { String xml = "<!DOCTYPE foo [\n" + + " <!ATTLIST bar\n" + + " baz (a|b|c) \"c\">" + + "]>" + + "<foo>" + + "<bar/>" + + "<bar baz=\"a\"/>" + + "</foo>"; + XmlPullParser parser = newPullParser(xml); + assertEquals(XmlPullParser.START_TAG, parser.next()); + assertEquals(XmlPullParser.START_TAG, parser.next()); + assertEquals("bar", parser.getName()); + assertEquals("c", parser.getAttributeValue(null, "baz")); + assertEquals(XmlPullParser.END_TAG, parser.next()); + assertEquals(XmlPullParser.START_TAG, parser.next()); + assertEquals("bar", parser.getName()); + assertEquals("a", parser.getAttributeValue(null, "baz")); + assertEquals(XmlPullParser.END_TAG, parser.next()); + assertEquals(XmlPullParser.END_TAG, parser.next()); + assertEquals(XmlPullParser.END_DOCUMENT, parser.next()); + } + + public void testAttributeDefaultValueEntitiesExpanded() throws Exception { + String xml = "<!DOCTYPE foo [\n" + + " <!ENTITY g \"ghi\">" + " <!ELEMENT foo ANY>\n" + " <!ATTLIST foo\n" - + " bar (a|b|c) \"c\">" + + " bar CDATA \"abc & def &g; jk\">" + "]>" + "<foo></foo>"; XmlPullParser parser = newPullParser(xml); assertEquals(XmlPullParser.START_TAG, parser.next()); assertEquals("foo", parser.getName()); - assertEquals("c", parser.getAttributeValue(null, "bar")); + assertEquals("abc & def ghi jk", parser.getAttributeValue(null, "bar")); + assertEquals(XmlPullParser.END_TAG, parser.next()); + assertEquals(XmlPullParser.END_DOCUMENT, parser.next()); + } + + public void testAttributeDefaultValuesAndNamespaces() throws Exception { + String xml = "<!DOCTYPE foo [\n" + + " <!ATTLIST foo\n" + + " bar:a CDATA \"android\">" + + "]>" + + "<foo xmlns:bar='http://bar'></foo>"; + XmlPullParser parser = newPullParser(xml); + parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, true); + assertEquals(XmlPullParser.START_TAG, parser.next()); + assertEquals("foo", parser.getName()); + // In Expat, namespaces don't apply to default attributes + int index = indexOfAttributeWithName(parser, "bar:a"); + assertEquals("", parser.getAttributeNamespace(index)); + assertEquals("bar:a", parser.getAttributeName(index)); + assertEquals("android", parser.getAttributeValue(index)); + assertEquals("CDATA", parser.getAttributeType(index)); assertEquals(XmlPullParser.END_TAG, parser.next()); + assertEquals(XmlPullParser.END_DOCUMENT, parser.next()); + } + + private int indexOfAttributeWithName(XmlPullParser parser, String name) { + for (int i = 0; i < parser.getAttributeCount(); i++) { + if (parser.getAttributeName(i).equals(name)) { + return i; + } + } + return -1; + } + + public void testAttributeEntitiesExpandedEagerly() throws Exception { + String xml = "<!DOCTYPE foo [\n" + + " <!ELEMENT foo ANY>\n" + + " <!ATTLIST foo\n" + + " bar CDATA \"abc & def &g; jk\">" + + " <!ENTITY g \"ghi\">" + + "]>" + + "<foo></foo>"; + XmlPullParser parser = newPullParser(xml); + assertParseFailure(parser); } public void testRequiredAttributesOmitted() throws Exception { @@ -337,6 +419,35 @@ public abstract class PullParserDtdTest extends TestCase { assertEquals(XmlPullParser.END_DOCUMENT, parser.next()); } + public void testVeryLongEntities() throws Exception { + String a = repeat('a', READ_BUFFER_SIZE + 1); + String b = repeat('b', READ_BUFFER_SIZE + 1); + String c = repeat('c', READ_BUFFER_SIZE + 1); + + String xml = "<!DOCTYPE foo [\n" + + " <!ENTITY " + a + " \"d &" + b + "; e\">" + + " <!ENTITY " + b + " \"f " + c + " g\">" + + "]>" + + "<foo>h &" + a + "; i</foo>"; + XmlPullParser parser = newPullParser(xml); + assertEquals(XmlPullParser.START_TAG, parser.next()); + assertEquals(XmlPullParser.TEXT, parser.next()); + assertEquals("h d f " + c + " g e i", parser.getText()); + assertEquals(XmlPullParser.END_TAG, parser.next()); + assertEquals(XmlPullParser.END_DOCUMENT, parser.next()); + } + + public void testManuallyRegisteredEntitiesWithDoctypeParsing() throws Exception { + String xml = "<foo>&a;</foo>"; + XmlPullParser parser = newPullParser(xml); + try { + parser.defineEntityReplacementText("a", "android"); + fail(); + } catch (UnsupportedOperationException expected) { + } catch (IllegalStateException expected) { + } + } + public void testDoctypeWithNextToken() throws Exception { String xml = "<!DOCTYPE foo [<!ENTITY bb \"bar baz\">]><foo>a&bb;c</foo>"; XmlPullParser parser = newPullParser(xml); @@ -364,6 +475,12 @@ public abstract class PullParserDtdTest extends TestCase { } } + private String repeat(char c, int length) { + char[] chars = new char[length]; + Arrays.fill(chars, c); + return new String(chars); + } + private XmlPullParser newPullParser(String xml) throws XmlPullParserException { XmlPullParser result = newPullParser(); result.setInput(new StringReader(xml)); @@ -373,5 +490,5 @@ public abstract class PullParserDtdTest extends TestCase { /** * Creates a new pull parser. */ - abstract XmlPullParser newPullParser(); + abstract XmlPullParser newPullParser() throws XmlPullParserException; } diff --git a/luni/src/test/java/libcore/xml/PullParserTest.java b/luni/src/test/java/libcore/xml/PullParserTest.java index 47036c6..dc5a133 100644 --- a/luni/src/test/java/libcore/xml/PullParserTest.java +++ b/luni/src/test/java/libcore/xml/PullParserTest.java @@ -229,6 +229,17 @@ public abstract class PullParserTest extends TestCase { assertEquals("yz", parser.getText()); } + public void testCustomEntitiesAreNotEvaluated() throws Exception { + XmlPullParser parser = newPullParser(); + parser.setInput(new StringReader( + "<foo a='&a;'>&a;</foo>")); + parser.defineEntityReplacementText("a", "& &a;"); + assertEquals(XmlPullParser.START_TAG, parser.next()); + assertEquals("& &a;", parser.getAttributeValue(0)); + assertEquals(XmlPullParser.TEXT, parser.next()); + assertEquals("& &a;", parser.getText()); + } + public void testMissingEntities() throws Exception { XmlPullParser parser = newPullParser(); parser.setInput(new StringReader("<foo>&aaa;</foo>")); diff --git a/xml/src/main/java/org/kxml2/io/KXmlParser.java b/xml/src/main/java/org/kxml2/io/KXmlParser.java index 8c0b3b1..5b84d93 100644 --- a/xml/src/main/java/org/kxml2/io/KXmlParser.java +++ b/xml/src/main/java/org/kxml2/io/KXmlParser.java @@ -33,10 +33,26 @@ import org.xmlpull.v1.XmlPullParser; import org.xmlpull.v1.XmlPullParserException; /** - * A pull based XML parser. + * An XML pull parser with limited support for parsing internal DTDs. */ public class KXmlParser implements XmlPullParser { + private final String PROPERTY_XMLDECL_VERSION + = "http://xmlpull.org/v1/doc/properties.html#xmldecl-version"; + private final String PROPERTY_XMLDECL_STANDALONE + = "http://xmlpull.org/v1/doc/properties.html#xmldecl-standalone"; + private final String PROPERTY_LOCATION = "http://xmlpull.org/v1/doc/properties.html#location"; + private static final String FEATURE_RELAXED = "http://xmlpull.org/v1/doc/features.html#relaxed"; + + private static final Map<String, String> DEFAULT_ENTITIES = new HashMap<String, String>(); + static { + DEFAULT_ENTITIES.put("lt", "<"); + DEFAULT_ENTITIES.put("gt", ">"); + DEFAULT_ENTITIES.put("amp", "&"); + DEFAULT_ENTITIES.put("apos", "'"); + DEFAULT_ENTITIES.put("quot", "\""); + } + private static final int ELEMENTDECL = 11; private static final int ENTITYDECL = 12; private static final int ATTLISTDECL = 13; @@ -74,10 +90,34 @@ public class KXmlParser implements XmlPullParser { private String version; private Boolean standalone; + /** + * True if the {@code <!DOCTYPE>} contents are handled. The DTD defines + * entity values and default attribute values. These values are parsed at + * inclusion time and may contain both tags and entity references. + * + * <p>If this is false, the user must {@link #defineEntityReplacementText + * define entity values manually}. Such entity values are literal strings + * and will not be parsed. There is no API to define default attributes + * manually. + */ + private boolean processDocDecl; private boolean processNsp; private boolean relaxed; private boolean keepNamespaceAttributes; - private Map<String, String> entityMap; + + /** + * Entities defined in or for this document. This map is created lazily. + */ + private Map<String, char[]> documentEntities; + + /** + * Default attributes in this document. The outer map's key is the element + * name; the inner map's key is the attribute name. Both keys should be + * without namespace adjustments. This map is created lazily. + */ + private Map<String, Map<String, String>> defaultAttributes; + + private int depth; private String[] elementStack = new String[16]; private String[] nspStack = new String[8]; @@ -87,7 +127,8 @@ public class KXmlParser implements XmlPullParser { private Reader reader; private String encoding; - private final char[] buffer = new char[8192]; + private ContentSource nextContentSource; + private char[] buffer = new char[8192]; private int position = 0; private int limit = 0; @@ -136,17 +177,6 @@ public class KXmlParser implements XmlPullParser { this.keepNamespaceAttributes = true; } - private boolean isProp(String n1, boolean prop, String n2) { - if (!n1.startsWith("http://xmlpull.org/v1/doc/")) { - return false; - } - if (prop) { - return n1.substring(42).equals(n2); - } else { - return n1.substring(40).equals(n2); - } - } - private boolean adjustNsp() throws XmlPullParserException { boolean any = false; @@ -792,7 +822,15 @@ public class KXmlParser implements XmlPullParser { } private void defineAttributeDefault(String elementName, String attributeName, String value) { - // TODO: stash this attribute so we can recall it later + if (defaultAttributes == null) { + defaultAttributes = new HashMap<String, Map<String, String>>(); + } + Map<String, String> elementAttributes = defaultAttributes.get(elementName); + if (elementAttributes == null) { + elementAttributes = new HashMap<String, String>(); + defaultAttributes.put(elementName, elementAttributes); + } + elementAttributes.put(attributeName, value); } /** @@ -827,8 +865,11 @@ public class KXmlParser implements XmlPullParser { position++; String value = readValue((char) quote, true, ValueContext.ENTITY_DECLARATION); position++; - if (generalEntity) { - defineEntityReplacementText(name, value); // TODO: test parameter and general entity + if (generalEntity && processDocDecl) { + if (documentEntities == null) { + documentEntities = new HashMap<String, char[]>(); + } + documentEntities.put(name, value.toCharArray()); } } else if (readExternalId(true)) { skip(); @@ -987,9 +1028,9 @@ public class KXmlParser implements XmlPullParser { int i = (attributeCount++) * 4; attributes = ensureCapacity(attributes, i + 4); - attributes[i++] = ""; - attributes[i++] = null; - attributes[i++] = attrName; + attributes[i] = ""; + attributes[i + 1] = null; + attributes[i + 2] = attrName; skip(); if (position >= limit && !fillBuffer(1)) { @@ -1015,16 +1056,16 @@ public class KXmlParser implements XmlPullParser { throw new XmlPullParserException("attr value delimiter missing!", this, null); } - attributes[i] = readValue(delimiter, true, ValueContext.ATTRIBUTE); + attributes[i + 3] = readValue(delimiter, true, ValueContext.ATTRIBUTE); if (delimiter != ' ') { position++; // end quote } } else if (relaxed) { - attributes[i] = attrName; + attributes[i + 3] = attrName; } else { checkRelaxed("Attr.value missing f. " + attrName); - attributes[i] = attrName; + attributes[i + 3] = attrName; } } @@ -1046,6 +1087,25 @@ public class KXmlParser implements XmlPullParser { namespace = ""; } + // For consistency with Expat, add default attributes after fixing namespaces. + if (defaultAttributes != null) { + Map<String, String> elementDefaultAttributes = defaultAttributes.get(name); + if (elementDefaultAttributes != null) { + for (Map.Entry<String, String> entry : elementDefaultAttributes.entrySet()) { + if (getAttributeValue(null, entry.getKey()) != null) { + continue; // an explicit value overrides the default + } + + int i = (attributeCount++) * 4; + attributes = ensureCapacity(attributes, i + 4); + attributes[i] = ""; + attributes[i + 1] = null; + attributes[i + 2] = entry.getKey(); + attributes[i + 3] = entry.getValue(); + } + } + } + elementStack[sp] = namespace; elementStack[sp + 1] = prefix; elementStack[sp + 2] = name; @@ -1099,7 +1159,6 @@ public class KXmlParser implements XmlPullParser { name = code; } - String resolved; if (code.startsWith("#")) { try { int c = code.startsWith("#x") @@ -1108,23 +1167,43 @@ public class KXmlParser implements XmlPullParser { out.delete(start, out.length()); out.appendCodePoint(c); unresolved = false; + return; } catch (NumberFormatException notANumber) { throw new XmlPullParserException("Invalid character reference: &" + code); } catch (IllegalArgumentException invalidCodePoint) { throw new XmlPullParserException("Invalid character reference: &" + code); } - } else if (valueContext == ValueContext.ENTITY_DECLARATION) { - // keep the unresolved &code; in the text - } else if ((resolved = entityMap.get(code)) != null) { + } + + if (valueContext == ValueContext.ENTITY_DECLARATION) { + // keep the unresolved &code; in the text to resolve later + return; + } + + String defaultEntity = DEFAULT_ENTITIES.get(code); + if (defaultEntity != null) { out.delete(start, out.length()); - out.append(resolved); unresolved = false; - } else { - // keep the unresolved entity "&code;" in the text for relaxed clients - unresolved = true; - if (!isEntityToken) { - checkRelaxed("unresolved: &" + code + ";"); + out.append(defaultEntity); + return; + } + + char[] resolved; + if (documentEntities != null && (resolved = documentEntities.get(code)) != null) { + out.delete(start, out.length()); + unresolved = false; + if (processDocDecl) { + pushContentSource(resolved); // parse the entity as XML + } else { + out.append(resolved); // include the entity value as text } + return; + } + + // keep the unresolved entity "&code;" in the text for relaxed clients + unresolved = true; + if (!isEntityToken) { + checkRelaxed("unresolved: &" + code + ";"); } } @@ -1311,7 +1390,18 @@ public class KXmlParser implements XmlPullParser { * exhausted before that many characters are available, this returns * false. */ - private boolean fillBuffer(int minimum) throws IOException { + private boolean fillBuffer(int minimum) throws IOException, XmlPullParserException { + // If we've exhausted the current content source, remove it + while (nextContentSource != null) { + if (position < limit) { + throw new XmlPullParserException("Unbalanced entity!", this, null); + } + popContentSource(); + if (limit - position >= minimum) { + return true; + } + } + // Before clobbering the old characters, update where buffer starts for (int i = 0; i < position; i++) { if (buffer[i] == '\n') { @@ -1408,7 +1498,7 @@ public class KXmlParser implements XmlPullParser { } } - private void skip() throws IOException { + private void skip() throws IOException, XmlPullParserException { while (position < limit || fillBuffer(1)) { int c = buffer[position]; if (c > ' ') { @@ -1441,13 +1531,7 @@ public class KXmlParser implements XmlPullParser { bufferStartLine = 0; bufferStartColumn = 0; depth = 0; - - entityMap = new HashMap<String, String>(); - entityMap.put("amp", "&"); - entityMap.put("apos", "'"); - entityMap.put("gt", ">"); - entityMap.put("lt", "<"); - entityMap.put("quot", "\""); + documentEntities = null; } public void setInput(InputStream is, String _enc) throws XmlPullParserException { @@ -1568,8 +1652,10 @@ public class KXmlParser implements XmlPullParser { public boolean getFeature(String feature) { if (XmlPullParser.FEATURE_PROCESS_NAMESPACES.equals(feature)) { return processNsp; - } else if (isProp(feature, false, "relaxed")) { + } else if (FEATURE_RELAXED.equals(feature)) { return relaxed; + } else if (FEATURE_PROCESS_DOCDECL.equals(feature)) { + return processDocDecl; } else { return false; } @@ -1581,23 +1667,30 @@ public class KXmlParser implements XmlPullParser { public void defineEntityReplacementText(String entity, String value) throws XmlPullParserException { - if (entityMap == null) { - throw new RuntimeException("entity replacement text must be defined after setInput!"); + if (processDocDecl) { + throw new IllegalStateException( + "Entity replacement text may not be defined with DOCTYPE processing enabled."); + } + if (reader == null) { + throw new IllegalStateException( + "Entity replacement text must be defined after setInput()"); } - entityMap.put(entity, value); + if (documentEntities == null) { + documentEntities = new HashMap<String, char[]>(); + } + documentEntities.put(entity, value.toCharArray()); } public Object getProperty(String property) { - if (isProp(property, true, "xmldecl-version")) { + if (property.equals(PROPERTY_XMLDECL_VERSION)) { return version; - } - if (isProp(property, true, "xmldecl-standalone")) { + } else if (property.equals(PROPERTY_XMLDECL_STANDALONE)) { return standalone; - } - if (isProp(property, true, "location")) { + } else if (property.equals(PROPERTY_LOCATION)) { return location != null ? location : reader.toString(); + } else { + return null; } - return null; } public int getNamespaceCount(int depth) { @@ -1870,8 +1963,9 @@ public class KXmlParser implements XmlPullParser { public void setFeature(String feature, boolean value) throws XmlPullParserException { if (XmlPullParser.FEATURE_PROCESS_NAMESPACES.equals(feature)) { processNsp = value; - } else if (isProp(feature, false, "relaxed")) { - // "http://xmlpull.org/v1/doc/features.html#relaxed" + } else if (XmlPullParser.FEATURE_PROCESS_DOCDECL.equals(feature)) { + processDocDecl = value; + } else if (FEATURE_RELAXED.equals(feature)) { relaxed = value; } else { throw new XmlPullParserException("unsupported feature: " + feature, this, null); @@ -1879,10 +1973,75 @@ public class KXmlParser implements XmlPullParser { } public void setProperty(String property, Object value) throws XmlPullParserException { - if (isProp(property, true, "location")) { + if (property.equals(PROPERTY_LOCATION)) { location = String.valueOf(value); } else { throw new XmlPullParserException("unsupported property: " + property); } } + + /** + * A chain of buffers containing XML content. Each content source contains + * the parser's primary read buffer or the characters of entities actively + * being parsed. + * + * <p>For example, note the buffers needed to parse this document: + * <pre> {@code + * <!DOCTYPE foo [ + * <!ENTITY baz "ghi"> + * <!ENTITY bar "def &baz; jkl"> + * ]> + * <foo>abc &bar; mno</foo> + * }</pre> + * + * <p>Things get interesting when the bar entity is encountered. At that + * point two buffers are active: + * <ol> + * <li>The value for the bar entity, containing {@code "def &baz; jkl"} + * <li>The parser's primary read buffer, containing {@code " mno</foo>"} + * </ol> + * <p>The parser will return the characters {@code "def "} from the bar + * entity's buffer, and then it will encounter the baz entity. To handle + * that, three buffers will be active: + * <ol> + * <li>The value for the baz entity, containing {@code "ghi"} + * <li>The remaining value for the bar entity, containing {@code " jkl"} + * <li>The parser's primary read buffer, containing {@code " mno</foo>"} + * </ol> + * <p>The parser will then return the characters {@code ghi jkl mno} in that + * sequence by reading each buffer in sequence. + */ + static class ContentSource { + private final ContentSource next; + private final char[] buffer; + private final int position; + private final int limit; + ContentSource(ContentSource next, char[] buffer, int position, int limit) { + this.next = next; + this.buffer = buffer; + this.position = position; + this.limit = limit; + } + } + + /** + * Prepends the characters of {@code newBuffer} to be read before the + * current buffer. + */ + private void pushContentSource(char[] newBuffer) { + nextContentSource = new ContentSource(nextContentSource, buffer, position, limit); + buffer = newBuffer; + position = 0; + limit = newBuffer.length; + } + + /** + * Replaces the current exhausted buffer with the next buffer in the chain. + */ + private void popContentSource() { + buffer = nextContentSource.buffer; + position = nextContentSource.position; + limit = nextContentSource.limit; + nextContentSource = nextContentSource.next; + } } |