From 6ce8e6ee5da964f724d39655fba0e432cff4c3a6 Mon Sep 17 00:00:00 2001 From: Jesse Wilson Date: Mon, 16 May 2011 15:37:02 -0700 Subject: Skip BOM characters even with an explicit charset. Change-Id: I697448528324cd68196d00ebf82ee8eecb72148d http://code.google.com/p/android/issues/detail?id=16892 --- xml/src/main/java/org/kxml2/io/KXmlParser.java | 169 +++++++++++++------------ 1 file changed, 89 insertions(+), 80 deletions(-) (limited to 'xml/src/main/java') diff --git a/xml/src/main/java/org/kxml2/io/KXmlParser.java b/xml/src/main/java/org/kxml2/io/KXmlParser.java index 4b4f328..d41ced9 100644 --- a/xml/src/main/java/org/kxml2/io/KXmlParser.java +++ b/xml/src/main/java/org/kxml2/io/KXmlParser.java @@ -1606,17 +1606,17 @@ public class KXmlParser implements XmlPullParser, Closeable { documentEntities = null; } - public void setInput(InputStream is, String _enc) throws XmlPullParserException { + public void setInput(InputStream is, String charset) throws XmlPullParserException { position = 0; limit = 0; - String enc = _enc; + boolean detectCharset = (charset == null); if (is == null) { throw new IllegalArgumentException(); } try { - if (enc == null) { + if (detectCharset) { // read the four bytes looking for an indication of the encoding in use int firstFourBytes = 0; while (limit < 4) { @@ -1630,93 +1630,102 @@ public class KXmlParser implements XmlPullParser, Closeable { if (limit == 4) { switch (firstFourBytes) { - case 0x00000FEFF: // UTF-32BE BOM - enc = "UTF-32BE"; - limit = 0; - break; - - case 0x0FFFE0000: // UTF-32LE BOM - enc = "UTF-32LE"; - limit = 0; - break; - - case 0x0000003c: // '>' in UTF-32BE - enc = "UTF-32BE"; - buffer[0] = '<'; - limit = 1; - break; + case 0x00000FEFF: // UTF-32BE BOM + charset = "UTF-32BE"; + limit = 0; + break; - case 0x03c000000: // '<' in UTF-32LE - enc = "UTF-32LE"; - buffer[0] = '<'; - limit = 1; - break; - - case 0x0003c003f: // "') { - String s = new String(buffer, 0, limit); - int i0 = s.indexOf("encoding"); - if (i0 != -1) { - while (s.charAt(i0) != '"' - && s.charAt(i0) != '\'') { - i0++; - } - char deli = s.charAt(i0++); - int i1 = s.indexOf(deli, i0); - enc = s.substring(i0, i1); + case 0x0FFFE0000: // UTF-32LE BOM + charset = "UTF-32LE"; + limit = 0; + break; + + case 0x0000003c: // '<' in UTF-32BE + charset = "UTF-32BE"; + buffer[0] = '<'; + limit = 1; + break; + + case 0x03c000000: // '<' in UTF-32LE + charset = "UTF-32LE"; + buffer[0] = '<'; + limit = 1; + break; + + case 0x0003c003f: // "') { + String s = new String(buffer, 0, limit); + int i0 = s.indexOf("encoding"); + if (i0 != -1) { + while (s.charAt(i0) != '"' && s.charAt(i0) != '\'') { + i0++; } - break; + char deli = s.charAt(i0++); + int i1 = s.indexOf(deli, i0); + charset = s.substring(i0, i1); } + break; } - break; - - default: - // handle a byte order mark followed by something other than + * is still at character 0. + */ + if (!detectCharset && peekCharacter() == 0xfeff) { + limit--; + System.arraycopy(buffer, 1, buffer, 0, limit); + } } catch (Exception e) { throw new XmlPullParserException("Invalid stream or encoding: " + e, this, e); } -- cgit v1.1