diff options
Diffstat (limited to 'xml/src/main/java/org/kxml2/io/KXmlSerializer.java')
-rw-r--r-- | xml/src/main/java/org/kxml2/io/KXmlSerializer.java | 53 |
1 files changed, 38 insertions, 15 deletions
diff --git a/xml/src/main/java/org/kxml2/io/KXmlSerializer.java b/xml/src/main/java/org/kxml2/io/KXmlSerializer.java index 8fa2756..bfdeece 100644 --- a/xml/src/main/java/org/kxml2/io/KXmlSerializer.java +++ b/xml/src/main/java/org/kxml2/io/KXmlSerializer.java @@ -125,14 +125,18 @@ public class KXmlSerializer implements XmlSerializer { // otherwise generate. // Note: tab, newline, and carriage return have already been // handled above. - boolean valid = (c >= 0x20 && c <= 0xd7ff) || (c >= 0xe000 && c <= 0xfffd); - if (!valid) { - reportInvalidCharacter(c); - } - if (unicode || c < 127) { - writer.write(c); + boolean allowedInXml = (c >= 0x20 && c <= 0xd7ff) || (c >= 0xe000 && c <= 0xfffd); + if (allowedInXml) { + if (unicode || c < 127) { + writer.write(c); + } else { + writer.write("&#" + ((int) c) + ";"); + } + } else if (Character.isHighSurrogate(c) && i < s.length() - 1) { + writeSurrogate(c, s.charAt(i + 1)); + ++i; } else { - writer.write("&#" + ((int) c) + ";"); + reportInvalidCharacter(c); } // END android-changed } @@ -141,7 +145,7 @@ public class KXmlSerializer implements XmlSerializer { // BEGIN android-added private static void reportInvalidCharacter(char ch) { - throw new IllegalArgumentException("Illegal character (" + Integer.toHexString((int) ch) + ")"); + throw new IllegalArgumentException("Illegal character (U+" + Integer.toHexString((int) ch) + ")"); } // END android-added @@ -548,22 +552,41 @@ public class KXmlSerializer implements XmlSerializer { // BEGIN android-changed: ]]> is not allowed within a CDATA, // so break and start a new one when necessary. data = data.replace("]]>", "]]]]><![CDATA[>"); - char[] chars = data.toCharArray(); - // We also aren't allowed any invalid characters. - for (char ch : chars) { - boolean valid = (ch >= 0x20 && ch <= 0xd7ff) || + writer.write("<![CDATA["); + for (int i = 0; i < data.length(); ++i) { + char ch = data.charAt(i); + boolean allowedInCdata = (ch >= 0x20 && ch <= 0xd7ff) || (ch == '\t' || ch == '\n' || ch == '\r') || (ch >= 0xe000 && ch <= 0xfffd); - if (!valid) { + if (allowedInCdata) { + writer.write(ch); + } else if (Character.isHighSurrogate(ch) && i < data.length() - 1) { + // Character entities aren't valid in CDATA, so break out for this. + writer.write("]]>"); + writeSurrogate(ch, data.charAt(++i)); + writer.write("<![CDATA["); + } else { reportInvalidCharacter(ch); } } - writer.write("<![CDATA["); - writer.write(chars, 0, chars.length); writer.write("]]>"); // END android-changed } + // BEGIN android-added + private void writeSurrogate(char high, char low) throws IOException { + if (!Character.isLowSurrogate(low)) { + throw new IllegalArgumentException("Bad surrogate pair (U+" + Integer.toHexString((int) high) + + " U+" + Integer.toHexString((int) low) + ")"); + } + // Java-style surrogate pairs aren't allowed in XML. We could use the > 3-byte encodings, but that + // seems likely to upset anything expecting modified UTF-8 rather than "real" UTF-8. It seems more + // conservative in a Java environment to use an entity reference instead. + int codePoint = Character.toCodePoint(high, low); + writer.write("&#" + codePoint + ";"); + } + // END android-added + public void comment(String comment) throws IOException { check(false); writer.write("<!--"); |