aboutsummaryrefslogtreecommitdiffstats
path: root/common/src/com/android/utils/PositionXmlParser.java
diff options
context:
space:
mode:
Diffstat (limited to 'common/src/com/android/utils/PositionXmlParser.java')
-rw-r--r--common/src/com/android/utils/PositionXmlParser.java729
1 files changed, 0 insertions, 729 deletions
diff --git a/common/src/com/android/utils/PositionXmlParser.java b/common/src/com/android/utils/PositionXmlParser.java
deleted file mode 100644
index 73574d5..0000000
--- a/common/src/com/android/utils/PositionXmlParser.java
+++ /dev/null
@@ -1,729 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.utils;
-
-import com.android.annotations.NonNull;
-import com.android.annotations.Nullable;
-
-import org.w3c.dom.Attr;
-import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-import org.w3c.dom.Node;
-import org.w3c.dom.Text;
-import org.xml.sax.Attributes;
-import org.xml.sax.InputSource;
-import org.xml.sax.Locator;
-import org.xml.sax.SAXException;
-import org.xml.sax.helpers.DefaultHandler;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.StringReader;
-import java.io.UnsupportedEncodingException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.parsers.ParserConfigurationException;
-import javax.xml.parsers.SAXParser;
-import javax.xml.parsers.SAXParserFactory;
-
-/**
- * A simple DOM XML parser which can retrieve exact beginning and end offsets
- * (and line and column numbers) for element nodes as well as attribute nodes.
- */
-public class PositionXmlParser {
- private static final String UTF_8 = "UTF-8"; //$NON-NLS-1$
- private static final String UTF_16 = "UTF_16"; //$NON-NLS-1$
- private static final String UTF_16LE = "UTF_16LE"; //$NON-NLS-1$
- private static final String CONTENT_KEY = "contents"; //$NON-NLS-1$
- private final static String POS_KEY = "offsets"; //$NON-NLS-1$
- private static final String NAMESPACE_PREFIX_FEATURE =
- "http://xml.org/sax/features/namespace-prefixes"; //$NON-NLS-1$
- private static final String NAMESPACE_FEATURE =
- "http://xml.org/sax/features/namespaces"; //$NON-NLS-1$
- /** See http://www.w3.org/TR/REC-xml/#NT-EncodingDecl */
- private static final Pattern ENCODING_PATTERN =
- Pattern.compile("encoding=['\"](\\S*)['\"]");//$NON-NLS-1$
-
- /**
- * Parses the XML content from the given input stream.
- *
- * @param input the input stream containing the XML to be parsed
- * @return the corresponding document
- * @throws ParserConfigurationException if a SAX parser is not available
- * @throws SAXException if the document contains a parsing error
- * @throws IOException if something is seriously wrong. This should not
- * happen since the input source is known to be constructed from
- * a string.
- */
- @Nullable
- public Document parse(@NonNull InputStream input)
- throws ParserConfigurationException, SAXException, IOException {
- // Read in all the data
- ByteArrayOutputStream out = new ByteArrayOutputStream();
- byte[] buf = new byte[1024];
- while (true) {
- int r = input.read(buf);
- if (r == -1) {
- break;
- }
- out.write(buf, 0, r);
- }
- input.close();
- return parse(out.toByteArray());
- }
-
- /**
- * Parses the XML content from the given byte array
- *
- * @param data the raw XML data (with unknown encoding)
- * @return the corresponding document
- * @throws ParserConfigurationException if a SAX parser is not available
- * @throws SAXException if the document contains a parsing error
- * @throws IOException if something is seriously wrong. This should not
- * happen since the input source is known to be constructed from
- * a string.
- */
- @Nullable
- public Document parse(@NonNull byte[] data)
- throws ParserConfigurationException, SAXException, IOException {
- String xml = getXmlString(data);
- return parse(xml, new InputSource(new StringReader(xml)), true);
- }
-
- /**
- * Parses the given XML content.
- *
- * @param xml the XML string to be parsed. This must be in the correct
- * encoding already.
- * @return the corresponding document
- * @throws ParserConfigurationException if a SAX parser is not available
- * @throws SAXException if the document contains a parsing error
- * @throws IOException if something is seriously wrong. This should not
- * happen since the input source is known to be constructed from
- * a string.
- */
- @Nullable
- public Document parse(@NonNull String xml)
- throws ParserConfigurationException, SAXException, IOException {
- return parse(xml, new InputSource(new StringReader(xml)), true);
- }
-
- @NonNull
- private Document parse(@NonNull String xml, @NonNull InputSource input, boolean checkBom)
- throws ParserConfigurationException, SAXException, IOException {
- try {
- SAXParserFactory factory = SAXParserFactory.newInstance();
- factory.setFeature(NAMESPACE_FEATURE, true);
- factory.setFeature(NAMESPACE_PREFIX_FEATURE, true);
- SAXParser parser = factory.newSAXParser();
- DomBuilder handler = new DomBuilder(xml);
- parser.parse(input, handler);
- return handler.getDocument();
- } catch (SAXException e) {
- if (checkBom && e.getMessage().contains("Content is not allowed in prolog")) {
- // Byte order mark in the string? Skip it. There are many markers
- // (see http://en.wikipedia.org/wiki/Byte_order_mark) so here we'll
- // just skip those up to the XML prolog beginning character, <
- xml = xml.replaceFirst("^([\\W]+)<","<"); //$NON-NLS-1$ //$NON-NLS-2$
- return parse(xml, new InputSource(new StringReader(xml)), false);
- }
- throw e;
- }
- }
-
- /**
- * Returns the String corresponding to the given byte array of XML data
- * (with unknown encoding). This method attempts to guess the encoding based
- * on the XML prologue.
- * @param data the XML data to be decoded into a string
- * @return a string corresponding to the XML data
- */
- public static String getXmlString(byte[] data) {
- int offset = 0;
-
- String defaultCharset = UTF_8;
- String charset = null;
- // Look for the byte order mark, to see if we need to remove bytes from
- // the input stream (and to determine whether files are big endian or little endian) etc
- // for files which do not specify the encoding.
- // See http://unicode.org/faq/utf_bom.html#BOM for more.
- if (data.length > 4) {
- if (data[0] == (byte)0xef && data[1] == (byte)0xbb && data[2] == (byte)0xbf) {
- // UTF-8
- defaultCharset = charset = UTF_8;
- offset += 3;
- } else if (data[0] == (byte)0xfe && data[1] == (byte)0xff) {
- // UTF-16, big-endian
- defaultCharset = charset = UTF_16;
- offset += 2;
- } else if (data[0] == (byte)0x0 && data[1] == (byte)0x0
- && data[2] == (byte)0xfe && data[3] == (byte)0xff) {
- // UTF-32, big-endian
- defaultCharset = charset = "UTF_32"; //$NON-NLS-1$
- offset += 4;
- } else if (data[0] == (byte)0xff && data[1] == (byte)0xfe
- && data[2] == (byte)0x0 && data[3] == (byte)0x0) {
- // UTF-32, little-endian. We must check for this *before* looking for
- // UTF_16LE since UTF_32LE has the same prefix!
- defaultCharset = charset = "UTF_32LE"; //$NON-NLS-1$
- offset += 4;
- } else if (data[0] == (byte)0xff && data[1] == (byte)0xfe) {
- // UTF-16, little-endian
- defaultCharset = charset = UTF_16LE;
- offset += 2;
- }
- }
- int length = data.length - offset;
-
- // Guess encoding by searching for an encoding= entry in the first line.
- // The prologue, and the encoding names, will always be in ASCII - which means
- // we don't need to worry about strange character encodings for the prologue characters.
- // However, one wrinkle is that the whole file may be encoded in something like UTF-16
- // where there are two bytes per character, so we can't just look for
- // ['e','n','c','o','d','i','n','g'] etc in the byte array since there could be
- // multiple bytes for each character. However, since again the prologue is in ASCII,
- // we can just drop the zeroes.
- boolean seenOddZero = false;
- boolean seenEvenZero = false;
- int prologueStart = -1;
- for (int lineEnd = offset; lineEnd < data.length; lineEnd++) {
- if (data[lineEnd] == 0) {
- if ((lineEnd - offset) % 2 == 0) {
- seenEvenZero = true;
- } else {
- seenOddZero = true;
- }
- } else if (data[lineEnd] == '\n' || data[lineEnd] == '\r') {
- break;
- } else if (data[lineEnd] == '<') {
- prologueStart = lineEnd;
- } else if (data[lineEnd] == '>') {
- // End of prologue. Quick check to see if this is a utf-8 file since that's
- // common
- for (int i = lineEnd - 4; i >= 0; i--) {
- if ((data[i] == 'u' || data[i] == 'U')
- && (data[i + 1] == 't' || data[i + 1] == 'T')
- && (data[i + 2] == 'f' || data[i + 2] == 'F')
- && (data[i + 3] == '-' || data[i + 3] == '_')
- && (data[i + 4] == '8')
- ) {
- charset = UTF_8;
- break;
- }
- }
-
- if (charset == null) {
- StringBuilder sb = new StringBuilder();
- for (int i = prologueStart; i <= lineEnd; i++) {
- if (data[i] != 0) {
- sb.append((char) data[i]);
- }
- }
- String prologue = sb.toString();
- int encodingIndex = prologue.indexOf("encoding"); //$NON-NLS-1$
- if (encodingIndex != -1) {
- Matcher matcher = ENCODING_PATTERN.matcher(prologue);
- if (matcher.find(encodingIndex)) {
- charset = matcher.group(1);
- }
- }
- }
-
- break;
- }
- }
-
- // No prologue on the first line, and no byte order mark: Assume UTF-8/16
- if (charset == null) {
- charset = seenOddZero ? UTF_16LE : seenEvenZero ? UTF_16 : UTF_8;
- }
-
- String xml = null;
- try {
- xml = new String(data, offset, length, charset);
- } catch (UnsupportedEncodingException e) {
- try {
- if (charset != defaultCharset) {
- xml = new String(data, offset, length, defaultCharset);
- }
- } catch (UnsupportedEncodingException u) {
- // Just use the default encoding below
- }
- }
- if (xml == null) {
- xml = new String(data, offset, length);
- }
- return xml;
- }
-
- /**
- * Returns the position for the given node. This is the start position. The
- * end position can be obtained via {@link Position#getEnd()}.
- *
- * @param node the node to look up position for
- * @return the position, or null if the node type is not supported for
- * position info
- */
- @Nullable
- public Position getPosition(@NonNull Node node) {
- return getPosition(node, -1, -1);
- }
-
- /**
- * Returns the position for the given node. This is the start position. The
- * end position can be obtained via {@link Position#getEnd()}. A specific
- * range within the node can be specified with the {@code start} and
- * {@code end} parameters.
- *
- * @param node the node to look up position for
- * @param start the relative offset within the node range to use as the
- * starting position, inclusive, or -1 to not limit the range
- * @param end the relative offset within the node range to use as the ending
- * position, or -1 to not limit the range
- * @return the position, or null if the node type is not supported for
- * position info
- */
- @Nullable
- public Position getPosition(@NonNull Node node, int start, int end) {
- // Look up the position information stored while parsing for the given node.
- // Note however that we only store position information for elements (because
- // there is no SAX callback for individual attributes).
- // Therefore, this method special cases this:
- // -- First, it looks at the owner element and uses its position
- // information as a first approximation.
- // -- Second, it uses that, as well as the original XML text, to search
- // within the node range for an exact text match on the attribute name
- // and if found uses that as the exact node offsets instead.
- if (node instanceof Attr) {
- Attr attr = (Attr) node;
- Position pos = (Position) attr.getOwnerElement().getUserData(POS_KEY);
- if (pos != null) {
- int startOffset = pos.getOffset();
- int endOffset = pos.getEnd().getOffset();
- if (start != -1) {
- startOffset += start;
- if (end != -1) {
- endOffset = start + end;
- }
- }
-
- // Find attribute in the text
- String contents = (String) node.getOwnerDocument().getUserData(CONTENT_KEY);
- if (contents == null) {
- return null;
- }
-
- // Locate the name=value attribute in the source text
- // Fast string check first for the common occurrence
- String name = attr.getName();
- Pattern pattern = Pattern.compile(
- String.format("%1$s\\s*=\\s*[\"'].*[\"']", name)); //$NON-NLS-1$
- Matcher matcher = pattern.matcher(contents);
- if (matcher.find(startOffset) && matcher.start() <= endOffset) {
- int index = matcher.start();
- // Adjust the line and column to this new offset
- int line = pos.getLine();
- int column = pos.getColumn();
- for (int offset = pos.getOffset(); offset < index; offset++) {
- char t = contents.charAt(offset);
- if (t == '\n') {
- line++;
- column = 0;
- } else {
- column++;
- }
- }
-
- Position attributePosition = createPosition(line, column, index);
- // Also set end range for retrieval in getLocation
- attributePosition.setEnd(createPosition(line, column + matcher.end() - index,
- matcher.end()));
- return attributePosition;
- } else {
- // No regexp match either: just fall back to element position
- return pos;
- }
- }
- } else if (node instanceof Text) {
- // Position of parent element, if any
- Position pos = null;
- if (node.getPreviousSibling() != null) {
- pos = (Position) node.getPreviousSibling().getUserData(POS_KEY);
- }
- if (pos == null) {
- pos = (Position) node.getParentNode().getUserData(POS_KEY);
- }
- if (pos != null) {
- // Attempt to point forward to the actual text node
- int startOffset = pos.getOffset();
- int endOffset = pos.getEnd().getOffset();
- int line = pos.getLine();
- int column = pos.getColumn();
-
- // Find attribute in the text
- String contents = (String) node.getOwnerDocument().getUserData(CONTENT_KEY);
- if (contents == null || contents.length() < endOffset) {
- return null;
- }
-
- boolean inAttribute = false;
- for (int offset = startOffset; offset <= endOffset; offset++) {
- char c = contents.charAt(offset);
- if (c == '>' && !inAttribute) {
- // Found the end of the element open tag: this is where the
- // text begins.
-
- // Skip >
- offset++;
- column++;
-
- String text = node.getNodeValue();
- int textIndex = 0;
- int textLength = text.length();
- int newLine = line;
- int newColumn = column;
- if (start != -1) {
- textLength = Math.min(textLength, start);
- for (; textIndex < textLength; textIndex++) {
- char t = text.charAt(textIndex);
- if (t == '\n') {
- newLine++;
- newColumn = 0;
- } else {
- newColumn++;
- }
- }
- } else {
- // Skip text whitespace prefix, if the text node contains
- // non-whitespace characters
- for (; textIndex < textLength; textIndex++) {
- char t = text.charAt(textIndex);
- if (t == '\n') {
- newLine++;
- newColumn = 0;
- } else if (!Character.isWhitespace(t)) {
- break;
- } else {
- newColumn++;
- }
- }
- }
- if (textIndex == text.length()) {
- textIndex = 0; // Whitespace node
- } else {
- line = newLine;
- column = newColumn;
- }
-
- Position attributePosition = createPosition(line, column,
- offset + textIndex);
- // Also set end range for retrieval in getLocation
- if (end != -1) {
- attributePosition.setEnd(createPosition(line, column,
- offset + end));
- } else {
- attributePosition.setEnd(createPosition(line, column,
- offset + textLength));
- }
- return attributePosition;
- } else if (c == '"') {
- inAttribute = !inAttribute;
- } else if (c == '\n') {
- line++;
- column = -1; // pre-subtract column added below
- }
- column++;
- }
-
- return pos;
- }
- }
-
- return (Position) node.getUserData(POS_KEY);
- }
-
- /**
- * SAX parser handler which incrementally builds up a DOM document as we go
- * along, and updates position information along the way. Position
- * information is attached to the DOM nodes by setting user data with the
- * {@link POS_KEY} key.
- */
- private final class DomBuilder extends DefaultHandler {
- private final String mXml;
- private final Document mDocument;
- private Locator mLocator;
- private int mCurrentLine = 0;
- private int mCurrentOffset;
- private int mCurrentColumn;
- private final List<Element> mStack = new ArrayList<Element>();
- private final StringBuilder mPendingText = new StringBuilder();
-
- private DomBuilder(String xml) throws ParserConfigurationException {
- mXml = xml;
-
- DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
- factory.setNamespaceAware(true);
- factory.setValidating(false);
- DocumentBuilder docBuilder = factory.newDocumentBuilder();
- mDocument = docBuilder.newDocument();
- mDocument.setUserData(CONTENT_KEY, xml, null);
- }
-
- /** Returns the document parsed by the handler */
- Document getDocument() {
- return mDocument;
- }
-
- @Override
- public void setDocumentLocator(Locator locator) {
- this.mLocator = locator;
- }
-
- @Override
- public void startElement(String uri, String localName, String qName,
- Attributes attributes) throws SAXException {
- try {
- flushText();
- Element element = mDocument.createElement(qName);
- for (int i = 0; i < attributes.getLength(); i++) {
- if (attributes.getURI(i) != null && attributes.getURI(i).length() > 0) {
- Attr attr = mDocument.createAttributeNS(attributes.getURI(i),
- attributes.getQName(i));
- attr.setValue(attributes.getValue(i));
- element.setAttributeNodeNS(attr);
- assert attr.getOwnerElement() == element;
- } else {
- Attr attr = mDocument.createAttribute(attributes.getQName(i));
- attr.setValue(attributes.getValue(i));
- element.setAttributeNode(attr);
- assert attr.getOwnerElement() == element;
- }
- }
-
- Position pos = getCurrentPosition();
-
- // The starting position reported to us by SAX is really the END of the
- // open tag in an element, when all the attributes have been processed.
- // We have to scan backwards to find the real beginning. We'll do that
- // by scanning backwards.
- // -1: Make sure that when we have <foo></foo> we don't consider </foo>
- // the beginning since pos.offset will typically point to the first character
- // AFTER the element open tag, which could be a closing tag or a child open
- // tag
-
- for (int offset = pos.getOffset() - 1; offset >= 0; offset--) {
- char c = mXml.charAt(offset);
- // < cannot appear in attribute values or anywhere else within
- // an element open tag, so we know the first occurrence is the real
- // element start
- if (c == '<') {
- // Adjust line position
- int line = pos.getLine();
- for (int i = offset, n = pos.getOffset(); i < n; i++) {
- if (mXml.charAt(i) == '\n') {
- line--;
- }
- }
-
- // Compute new column position
- int column = 0;
- for (int i = offset - 1; i >= 0; i--, column++) {
- if (mXml.charAt(i) == '\n') {
- break;
- }
- }
-
- pos = createPosition(line, column, offset);
- break;
- }
- }
-
- element.setUserData(POS_KEY, pos, null);
- mStack.add(element);
- } catch (Exception t) {
- throw new SAXException(t);
- }
- }
-
- @Override
- public void endElement(String uri, String localName, String qName) {
- flushText();
- Element element = mStack.remove(mStack.size() - 1);
-
- Position pos = (Position) element.getUserData(POS_KEY);
- assert pos != null;
- pos.setEnd(getCurrentPosition());
-
- if (mStack.isEmpty()) {
- mDocument.appendChild(element);
- } else {
- Element parent = mStack.get(mStack.size() - 1);
- parent.appendChild(element);
- }
- }
-
- /**
- * Returns a position holder for the current position. The most
- * important part of this function is to incrementally compute the
- * offset as well, by counting forwards until it reaches the new line
- * number and column position of the XML parser, counting characters as
- * it goes along.
- */
- private Position getCurrentPosition() {
- int line = mLocator.getLineNumber() - 1;
- int column = mLocator.getColumnNumber() - 1;
-
- // Compute offset incrementally now that we have the new line and column
- // numbers
- int xmlLength = mXml.length();
- while (mCurrentLine < line && mCurrentOffset < xmlLength) {
- char c = mXml.charAt(mCurrentOffset);
- if (c == '\r' && mCurrentOffset < xmlLength - 1) {
- if (mXml.charAt(mCurrentOffset + 1) != '\n') {
- mCurrentLine++;
- mCurrentColumn = 0;
- }
- } else if (c == '\n') {
- mCurrentLine++;
- mCurrentColumn = 0;
- } else {
- mCurrentColumn++;
- }
- mCurrentOffset++;
- }
-
- mCurrentOffset += column - mCurrentColumn;
- if (mCurrentOffset >= xmlLength) {
- // The parser sometimes passes wrong column numbers at the
- // end of the file: Ensure that the offset remains valid.
- mCurrentOffset = xmlLength;
- }
- mCurrentColumn = column;
-
- return createPosition(mCurrentLine, mCurrentColumn, mCurrentOffset);
- }
-
- @Override
- public void characters(char c[], int start, int length) throws SAXException {
- mPendingText.append(c, start, length);
- }
-
- private void flushText() {
- if (mPendingText.length() > 0 && !mStack.isEmpty()) {
- Element element = mStack.get(mStack.size() - 1);
- Node textNode = mDocument.createTextNode(mPendingText.toString());
- element.appendChild(textNode);
- mPendingText.setLength(0);
- }
- }
- }
-
- /**
- * Creates a position while constructing the DOM document. This method
- * allows a subclass to create a custom implementation of the position
- * class.
- *
- * @param line the line number for the position
- * @param column the column number for the position
- * @param offset the character offset
- * @return a new position
- */
- @NonNull
- protected Position createPosition(int line, int column, int offset) {
- return new DefaultPosition(line, column, offset);
- }
-
- protected interface Position {
- /**
- * Linked position: for a begin position this will point to the
- * corresponding end position. For an end position this will be null.
- *
- * @return the end position, or null
- */
- @Nullable
- public Position getEnd();
-
- /**
- * Linked position: for a begin position this will point to the
- * corresponding end position. For an end position this will be null.
- *
- * @param end the end position
- */
- public void setEnd(@NonNull Position end);
-
- /** @return the line number, 0-based */
- public int getLine();
-
- /** @return the offset number, 0-based */
- public int getOffset();
-
- /** @return the column number, 0-based, and -1 if the column number if not known */
- public int getColumn();
- }
-
- protected static class DefaultPosition implements Position {
- /** The line number (0-based where the first line is line 0) */
- private final int mLine;
- private final int mColumn;
- private final int mOffset;
- private Position mEnd;
-
- /**
- * Creates a new {@link Position}
- *
- * @param line the 0-based line number, or -1 if unknown
- * @param column the 0-based column number, or -1 if unknown
- * @param offset the offset, or -1 if unknown
- */
- public DefaultPosition(int line, int column, int offset) {
- this.mLine = line;
- this.mColumn = column;
- this.mOffset = offset;
- }
-
- @Override
- public int getLine() {
- return mLine;
- }
-
- @Override
- public int getOffset() {
- return mOffset;
- }
-
- @Override
- public int getColumn() {
- return mColumn;
- }
-
- @Override
- public Position getEnd() {
- return mEnd;
- }
-
- @Override
- public void setEnd(@NonNull Position end) {
- mEnd = end;
- }
- }
-}