summaryrefslogtreecommitdiffstats
path: root/xml/src/main/java
diff options
context:
space:
mode:
authorJesse Wilson <jessewilson@google.com>2010-03-10 18:42:32 -0800
committerJesse Wilson <jessewilson@google.com>2010-03-11 14:29:51 -0800
commit302f069f38e7890594816ceab517c15bcd59a9b7 (patch)
tree5794bfeec4ece6de3f9cb858b3e08fc65279fa5b /xml/src/main/java
parent9b23380151a51659fe1e5d96d6e6c1457b4aecb2 (diff)
downloadlibcore-302f069f38e7890594816ceab517c15bcd59a9b7.zip
libcore-302f069f38e7890594816ceab517c15bcd59a9b7.tar.gz
libcore-302f069f38e7890594816ceab517c15bcd59a9b7.tar.bz2
Implementing getBaseUri() for DOM.
The tests for this uncover problems with the RI's implementation. It does bizarre things with hashes, trashing the absolute URI and returning a page-relative one (like "#foo"). Change-Id: Ib8af163a7b359e3f72a9c94eb3dd7e81e3a9a95c
Diffstat (limited to 'xml/src/main/java')
-rw-r--r--xml/src/main/java/org/apache/harmony/xml/dom/NodeImpl.java109
1 files changed, 80 insertions, 29 deletions
diff --git a/xml/src/main/java/org/apache/harmony/xml/dom/NodeImpl.java b/xml/src/main/java/org/apache/harmony/xml/dom/NodeImpl.java
index 5940417..2a8e1fa 100644
--- a/xml/src/main/java/org/apache/harmony/xml/dom/NodeImpl.java
+++ b/xml/src/main/java/org/apache/harmony/xml/dom/NodeImpl.java
@@ -16,6 +16,8 @@
package org.apache.harmony.xml.dom;
+import org.apache.xml.serializer.utils.SystemIDResolver;
+import org.apache.xml.utils.URI;
import org.w3c.dom.Attr;
import org.w3c.dom.CharacterData;
import org.w3c.dom.DOMException;
@@ -27,6 +29,7 @@ import org.w3c.dom.NodeList;
import org.w3c.dom.ProcessingInstruction;
import org.w3c.dom.UserDataHandler;
+import javax.xml.transform.TransformerException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@@ -244,35 +247,83 @@ public abstract class NodeImpl implements Node {
return matchesName(namespaceURI, getNamespaceURI(), wildcard) && matchesName(localName, getLocalName(), wildcard);
}
- public String getBaseURI() {
- /*
- * TODO: implement. For reference, here's Xerces' behaviour:
- *
- * In all cases, the returned URI should be sanitized before it is
- * returned. If the URI is malformed, null should be returned instead.
- *
- * For document nodes, this should return a member field that's
- * initialized by the parser.
- *
- * For element nodes, this should first look for the xml:base attribute.
- * if that exists and is absolute, it should be returned.
- * if that exists and is relative, it should be resolved to the parent's base URI
- * if it doesn't exist, the parent's baseURI should be returned
- *
- * For entity nodes, if a base URI exists that should be returned.
- * Otherwise the document's base URI should be returned
- *
- * For entity references, if a base URI exists that should be returned
- * otherwise it dereferences the entity (via the document) and uses the
- * entity's base URI.
- *
- * For notations, it returns the base URI field.
- *
- * For processing instructions, it returns the parent's base URI.
- *
- * For all other node types, it returns null.
- */
- return null;
+ public final String getBaseURI() {
+ switch (getNodeType()) {
+ case DOCUMENT_NODE:
+ return sanitizeUri(((Document) this).getDocumentURI());
+
+ case ELEMENT_NODE:
+ Element element = (Element) this;
+ String uri = element.getAttributeNS(
+ "http://www.w3.org/XML/1998/namespace", "base"); // or "xml:base"
+
+ // if this node has no base URI, return the parent's.
+ if (uri == null || uri.length() == 0) {
+ return getParentBaseUri();
+ }
+
+ // if this node's URI is absolute, return that
+ if (SystemIDResolver.isAbsoluteURI(uri)) {
+ return uri;
+ }
+
+ // this node has a relative URI. Try to resolve it against the
+ // parent, but if that doesn't work just give up and return null.
+ String parentUri = getParentBaseUri();
+ if (parentUri == null) {
+ return null;
+ }
+ try {
+ return SystemIDResolver.getAbsoluteURI(uri, parentUri);
+ } catch (TransformerException e) {
+ return null; // the spec requires that we swallow exceptions
+ }
+
+ case PROCESSING_INSTRUCTION_NODE:
+ return getParentBaseUri();
+
+ case NOTATION_NODE:
+ case ENTITY_NODE:
+ // When we support these node types, the parser should
+ // initialize a base URI field on these nodes.
+ return null;
+
+ case ENTITY_REFERENCE_NODE:
+ // TODO: get this value from the parser, falling back to the
+ // referenced entity's baseURI if that doesn't exist
+ return null;
+
+ case DOCUMENT_TYPE_NODE:
+ case DOCUMENT_FRAGMENT_NODE:
+ case ATTRIBUTE_NODE:
+ case TEXT_NODE:
+ case CDATA_SECTION_NODE:
+ case COMMENT_NODE:
+ return null;
+
+ default:
+ throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
+ "Unsupported node type " + getNodeType());
+ }
+ }
+
+ private String getParentBaseUri() {
+ Node parentNode = getParentNode();
+ return parentNode != null ? parentNode.getBaseURI() : null;
+ }
+
+ /**
+ * Returns the sanitized input if it is a URI, or {@code null} otherwise.
+ */
+ private String sanitizeUri(String uri) {
+ if (uri == null || uri.length() == 0) {
+ return null;
+ }
+ try {
+ return new URI(uri).toString();
+ } catch (URI.MalformedURIException e) {
+ return null;
+ }
}
public short compareDocumentPosition(Node other)