diff options
author | Jesse Wilson <jessewilson@google.com> | 2010-11-24 18:26:03 -0800 |
---|---|---|
committer | Jesse Wilson <jessewilson@google.com> | 2010-11-28 23:08:22 -0800 |
commit | 76c85883787791ba605a51d7bea3cfc27b5c5d95 (patch) | |
tree | dec5082628aee6336db0d51589ec8dc5c86da32a /xml/src | |
parent | 7fac047a67ee9a0295e4dc798c7c6880ccd83513 (diff) | |
download | libcore-76c85883787791ba605a51d7bea3cfc27b5c5d95.zip libcore-76c85883787791ba605a51d7bea3cfc27b5c5d95.tar.gz libcore-76c85883787791ba605a51d7bea3cfc27b5c5d95.tar.bz2 |
Resolve entity values recursively and support default attributes.
Change-Id: Ib32040e0ebe8ef52e8d382fb65ab4d08779901b7
http://b/3090550
Diffstat (limited to 'xml/src')
-rw-r--r-- | xml/src/main/java/org/kxml2/io/KXmlParser.java | 269 |
1 files changed, 214 insertions, 55 deletions
diff --git a/xml/src/main/java/org/kxml2/io/KXmlParser.java b/xml/src/main/java/org/kxml2/io/KXmlParser.java index 8c0b3b1..5b84d93 100644 --- a/xml/src/main/java/org/kxml2/io/KXmlParser.java +++ b/xml/src/main/java/org/kxml2/io/KXmlParser.java @@ -33,10 +33,26 @@ import org.xmlpull.v1.XmlPullParser; import org.xmlpull.v1.XmlPullParserException; /** - * A pull based XML parser. + * An XML pull parser with limited support for parsing internal DTDs. */ public class KXmlParser implements XmlPullParser { + private final String PROPERTY_XMLDECL_VERSION + = "http://xmlpull.org/v1/doc/properties.html#xmldecl-version"; + private final String PROPERTY_XMLDECL_STANDALONE + = "http://xmlpull.org/v1/doc/properties.html#xmldecl-standalone"; + private final String PROPERTY_LOCATION = "http://xmlpull.org/v1/doc/properties.html#location"; + private static final String FEATURE_RELAXED = "http://xmlpull.org/v1/doc/features.html#relaxed"; + + private static final Map<String, String> DEFAULT_ENTITIES = new HashMap<String, String>(); + static { + DEFAULT_ENTITIES.put("lt", "<"); + DEFAULT_ENTITIES.put("gt", ">"); + DEFAULT_ENTITIES.put("amp", "&"); + DEFAULT_ENTITIES.put("apos", "'"); + DEFAULT_ENTITIES.put("quot", "\""); + } + private static final int ELEMENTDECL = 11; private static final int ENTITYDECL = 12; private static final int ATTLISTDECL = 13; @@ -74,10 +90,34 @@ public class KXmlParser implements XmlPullParser { private String version; private Boolean standalone; + /** + * True if the {@code <!DOCTYPE>} contents are handled. The DTD defines + * entity values and default attribute values. These values are parsed at + * inclusion time and may contain both tags and entity references. + * + * <p>If this is false, the user must {@link #defineEntityReplacementText + * define entity values manually}. Such entity values are literal strings + * and will not be parsed. There is no API to define default attributes + * manually. + */ + private boolean processDocDecl; private boolean processNsp; private boolean relaxed; private boolean keepNamespaceAttributes; - private Map<String, String> entityMap; + + /** + * Entities defined in or for this document. This map is created lazily. + */ + private Map<String, char[]> documentEntities; + + /** + * Default attributes in this document. The outer map's key is the element + * name; the inner map's key is the attribute name. Both keys should be + * without namespace adjustments. This map is created lazily. + */ + private Map<String, Map<String, String>> defaultAttributes; + + private int depth; private String[] elementStack = new String[16]; private String[] nspStack = new String[8]; @@ -87,7 +127,8 @@ public class KXmlParser implements XmlPullParser { private Reader reader; private String encoding; - private final char[] buffer = new char[8192]; + private ContentSource nextContentSource; + private char[] buffer = new char[8192]; private int position = 0; private int limit = 0; @@ -136,17 +177,6 @@ public class KXmlParser implements XmlPullParser { this.keepNamespaceAttributes = true; } - private boolean isProp(String n1, boolean prop, String n2) { - if (!n1.startsWith("http://xmlpull.org/v1/doc/")) { - return false; - } - if (prop) { - return n1.substring(42).equals(n2); - } else { - return n1.substring(40).equals(n2); - } - } - private boolean adjustNsp() throws XmlPullParserException { boolean any = false; @@ -792,7 +822,15 @@ public class KXmlParser implements XmlPullParser { } private void defineAttributeDefault(String elementName, String attributeName, String value) { - // TODO: stash this attribute so we can recall it later + if (defaultAttributes == null) { + defaultAttributes = new HashMap<String, Map<String, String>>(); + } + Map<String, String> elementAttributes = defaultAttributes.get(elementName); + if (elementAttributes == null) { + elementAttributes = new HashMap<String, String>(); + defaultAttributes.put(elementName, elementAttributes); + } + elementAttributes.put(attributeName, value); } /** @@ -827,8 +865,11 @@ public class KXmlParser implements XmlPullParser { position++; String value = readValue((char) quote, true, ValueContext.ENTITY_DECLARATION); position++; - if (generalEntity) { - defineEntityReplacementText(name, value); // TODO: test parameter and general entity + if (generalEntity && processDocDecl) { + if (documentEntities == null) { + documentEntities = new HashMap<String, char[]>(); + } + documentEntities.put(name, value.toCharArray()); } } else if (readExternalId(true)) { skip(); @@ -987,9 +1028,9 @@ public class KXmlParser implements XmlPullParser { int i = (attributeCount++) * 4; attributes = ensureCapacity(attributes, i + 4); - attributes[i++] = ""; - attributes[i++] = null; - attributes[i++] = attrName; + attributes[i] = ""; + attributes[i + 1] = null; + attributes[i + 2] = attrName; skip(); if (position >= limit && !fillBuffer(1)) { @@ -1015,16 +1056,16 @@ public class KXmlParser implements XmlPullParser { throw new XmlPullParserException("attr value delimiter missing!", this, null); } - attributes[i] = readValue(delimiter, true, ValueContext.ATTRIBUTE); + attributes[i + 3] = readValue(delimiter, true, ValueContext.ATTRIBUTE); if (delimiter != ' ') { position++; // end quote } } else if (relaxed) { - attributes[i] = attrName; + attributes[i + 3] = attrName; } else { checkRelaxed("Attr.value missing f. " + attrName); - attributes[i] = attrName; + attributes[i + 3] = attrName; } } @@ -1046,6 +1087,25 @@ public class KXmlParser implements XmlPullParser { namespace = ""; } + // For consistency with Expat, add default attributes after fixing namespaces. + if (defaultAttributes != null) { + Map<String, String> elementDefaultAttributes = defaultAttributes.get(name); + if (elementDefaultAttributes != null) { + for (Map.Entry<String, String> entry : elementDefaultAttributes.entrySet()) { + if (getAttributeValue(null, entry.getKey()) != null) { + continue; // an explicit value overrides the default + } + + int i = (attributeCount++) * 4; + attributes = ensureCapacity(attributes, i + 4); + attributes[i] = ""; + attributes[i + 1] = null; + attributes[i + 2] = entry.getKey(); + attributes[i + 3] = entry.getValue(); + } + } + } + elementStack[sp] = namespace; elementStack[sp + 1] = prefix; elementStack[sp + 2] = name; @@ -1099,7 +1159,6 @@ public class KXmlParser implements XmlPullParser { name = code; } - String resolved; if (code.startsWith("#")) { try { int c = code.startsWith("#x") @@ -1108,23 +1167,43 @@ public class KXmlParser implements XmlPullParser { out.delete(start, out.length()); out.appendCodePoint(c); unresolved = false; + return; } catch (NumberFormatException notANumber) { throw new XmlPullParserException("Invalid character reference: &" + code); } catch (IllegalArgumentException invalidCodePoint) { throw new XmlPullParserException("Invalid character reference: &" + code); } - } else if (valueContext == ValueContext.ENTITY_DECLARATION) { - // keep the unresolved &code; in the text - } else if ((resolved = entityMap.get(code)) != null) { + } + + if (valueContext == ValueContext.ENTITY_DECLARATION) { + // keep the unresolved &code; in the text to resolve later + return; + } + + String defaultEntity = DEFAULT_ENTITIES.get(code); + if (defaultEntity != null) { out.delete(start, out.length()); - out.append(resolved); unresolved = false; - } else { - // keep the unresolved entity "&code;" in the text for relaxed clients - unresolved = true; - if (!isEntityToken) { - checkRelaxed("unresolved: &" + code + ";"); + out.append(defaultEntity); + return; + } + + char[] resolved; + if (documentEntities != null && (resolved = documentEntities.get(code)) != null) { + out.delete(start, out.length()); + unresolved = false; + if (processDocDecl) { + pushContentSource(resolved); // parse the entity as XML + } else { + out.append(resolved); // include the entity value as text } + return; + } + + // keep the unresolved entity "&code;" in the text for relaxed clients + unresolved = true; + if (!isEntityToken) { + checkRelaxed("unresolved: &" + code + ";"); } } @@ -1311,7 +1390,18 @@ public class KXmlParser implements XmlPullParser { * exhausted before that many characters are available, this returns * false. */ - private boolean fillBuffer(int minimum) throws IOException { + private boolean fillBuffer(int minimum) throws IOException, XmlPullParserException { + // If we've exhausted the current content source, remove it + while (nextContentSource != null) { + if (position < limit) { + throw new XmlPullParserException("Unbalanced entity!", this, null); + } + popContentSource(); + if (limit - position >= minimum) { + return true; + } + } + // Before clobbering the old characters, update where buffer starts for (int i = 0; i < position; i++) { if (buffer[i] == '\n') { @@ -1408,7 +1498,7 @@ public class KXmlParser implements XmlPullParser { } } - private void skip() throws IOException { + private void skip() throws IOException, XmlPullParserException { while (position < limit || fillBuffer(1)) { int c = buffer[position]; if (c > ' ') { @@ -1441,13 +1531,7 @@ public class KXmlParser implements XmlPullParser { bufferStartLine = 0; bufferStartColumn = 0; depth = 0; - - entityMap = new HashMap<String, String>(); - entityMap.put("amp", "&"); - entityMap.put("apos", "'"); - entityMap.put("gt", ">"); - entityMap.put("lt", "<"); - entityMap.put("quot", "\""); + documentEntities = null; } public void setInput(InputStream is, String _enc) throws XmlPullParserException { @@ -1568,8 +1652,10 @@ public class KXmlParser implements XmlPullParser { public boolean getFeature(String feature) { if (XmlPullParser.FEATURE_PROCESS_NAMESPACES.equals(feature)) { return processNsp; - } else if (isProp(feature, false, "relaxed")) { + } else if (FEATURE_RELAXED.equals(feature)) { return relaxed; + } else if (FEATURE_PROCESS_DOCDECL.equals(feature)) { + return processDocDecl; } else { return false; } @@ -1581,23 +1667,30 @@ public class KXmlParser implements XmlPullParser { public void defineEntityReplacementText(String entity, String value) throws XmlPullParserException { - if (entityMap == null) { - throw new RuntimeException("entity replacement text must be defined after setInput!"); + if (processDocDecl) { + throw new IllegalStateException( + "Entity replacement text may not be defined with DOCTYPE processing enabled."); + } + if (reader == null) { + throw new IllegalStateException( + "Entity replacement text must be defined after setInput()"); } - entityMap.put(entity, value); + if (documentEntities == null) { + documentEntities = new HashMap<String, char[]>(); + } + documentEntities.put(entity, value.toCharArray()); } public Object getProperty(String property) { - if (isProp(property, true, "xmldecl-version")) { + if (property.equals(PROPERTY_XMLDECL_VERSION)) { return version; - } - if (isProp(property, true, "xmldecl-standalone")) { + } else if (property.equals(PROPERTY_XMLDECL_STANDALONE)) { return standalone; - } - if (isProp(property, true, "location")) { + } else if (property.equals(PROPERTY_LOCATION)) { return location != null ? location : reader.toString(); + } else { + return null; } - return null; } public int getNamespaceCount(int depth) { @@ -1870,8 +1963,9 @@ public class KXmlParser implements XmlPullParser { public void setFeature(String feature, boolean value) throws XmlPullParserException { if (XmlPullParser.FEATURE_PROCESS_NAMESPACES.equals(feature)) { processNsp = value; - } else if (isProp(feature, false, "relaxed")) { - // "http://xmlpull.org/v1/doc/features.html#relaxed" + } else if (XmlPullParser.FEATURE_PROCESS_DOCDECL.equals(feature)) { + processDocDecl = value; + } else if (FEATURE_RELAXED.equals(feature)) { relaxed = value; } else { throw new XmlPullParserException("unsupported feature: " + feature, this, null); @@ -1879,10 +1973,75 @@ public class KXmlParser implements XmlPullParser { } public void setProperty(String property, Object value) throws XmlPullParserException { - if (isProp(property, true, "location")) { + if (property.equals(PROPERTY_LOCATION)) { location = String.valueOf(value); } else { throw new XmlPullParserException("unsupported property: " + property); } } + + /** + * A chain of buffers containing XML content. Each content source contains + * the parser's primary read buffer or the characters of entities actively + * being parsed. + * + * <p>For example, note the buffers needed to parse this document: + * <pre> {@code + * <!DOCTYPE foo [ + * <!ENTITY baz "ghi"> + * <!ENTITY bar "def &baz; jkl"> + * ]> + * <foo>abc &bar; mno</foo> + * }</pre> + * + * <p>Things get interesting when the bar entity is encountered. At that + * point two buffers are active: + * <ol> + * <li>The value for the bar entity, containing {@code "def &baz; jkl"} + * <li>The parser's primary read buffer, containing {@code " mno</foo>"} + * </ol> + * <p>The parser will return the characters {@code "def "} from the bar + * entity's buffer, and then it will encounter the baz entity. To handle + * that, three buffers will be active: + * <ol> + * <li>The value for the baz entity, containing {@code "ghi"} + * <li>The remaining value for the bar entity, containing {@code " jkl"} + * <li>The parser's primary read buffer, containing {@code " mno</foo>"} + * </ol> + * <p>The parser will then return the characters {@code ghi jkl mno} in that + * sequence by reading each buffer in sequence. + */ + static class ContentSource { + private final ContentSource next; + private final char[] buffer; + private final int position; + private final int limit; + ContentSource(ContentSource next, char[] buffer, int position, int limit) { + this.next = next; + this.buffer = buffer; + this.position = position; + this.limit = limit; + } + } + + /** + * Prepends the characters of {@code newBuffer} to be read before the + * current buffer. + */ + private void pushContentSource(char[] newBuffer) { + nextContentSource = new ContentSource(nextContentSource, buffer, position, limit); + buffer = newBuffer; + position = 0; + limit = newBuffer.length; + } + + /** + * Replaces the current exhausted buffer with the next buffer in the chain. + */ + private void popContentSource() { + buffer = nextContentSource.buffer; + position = nextContentSource.position; + limit = nextContentSource.limit; + nextContentSource = nextContentSource.next; + } } |