/*
* Copyright (C) 2007 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package android.text;
import org.ccil.cowan.tagsoup.HTMLSchema;
import org.ccil.cowan.tagsoup.Parser;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import android.content.res.Resources;
import android.graphics.Typeface;
import android.graphics.drawable.Drawable;
import android.text.style.AbsoluteSizeSpan;
import android.text.style.AlignmentSpan;
import android.text.style.CharacterStyle;
import android.text.style.ForegroundColorSpan;
import android.text.style.ImageSpan;
import android.text.style.ParagraphStyle;
import android.text.style.QuoteSpan;
import android.text.style.RelativeSizeSpan;
import android.text.style.StrikethroughSpan;
import android.text.style.StyleSpan;
import android.text.style.SubscriptSpan;
import android.text.style.SuperscriptSpan;
import android.text.style.TypefaceSpan;
import android.text.style.URLSpan;
import android.text.style.UnderlineSpan;
import android.util.Log;
import com.android.internal.util.XmlUtils;
import java.io.IOException;
import java.io.StringReader;
import java.nio.CharBuffer;
/**
* This class processes HTML strings into displayable styled text.
* Not all HTML tags are supported.
*/
public class Html {
/**
* Retrieves images for HTML <img> tags.
*/
public static interface ImageGetter {
/**
* This methos is called when the HTML parser encounters an
* <img> tag. The source
argument is the
* string from the "src" attribute; the return value should be
* a Drawable representation of the image or null
* for a generic replacement image. Make sure you call
* setBounds() on your Drawable if it doesn't already have
* its bounds set.
*/
public Drawable getDrawable(String source);
}
/**
* Is notified when HTML tags are encountered that the parser does
* not know how to interpret.
*/
public static interface TagHandler {
/**
* This method will be called whenn the HTML parser encounters
* a tag that it does not know how to interpret.
*/
public void handleTag(boolean opening, String tag,
Editable output, XMLReader xmlReader);
}
private Html() { }
/**
* Returns displayable styled text from the provided HTML string.
* Any <img> tags in the HTML will display as a generic
* replacement image which your program can then go through and
* replace with real images.
*
*
This uses TagSoup to handle real HTML, including all of the brokenness found in the wild. */ public static Spanned fromHtml(String source) { return fromHtml(source, null, null); } /** * Lazy initialization holder for HTML parser. This class will * a) be preloaded by the zygote, or b) not loaded until absolutely * necessary. */ private static class HtmlParser { private static final HTMLSchema schema = new HTMLSchema(); } /** * Returns displayable styled text from the provided HTML string. * Any <img> tags in the HTML will use the specified ImageGetter * to request a representation of the image (use null if you don't * want this) and the specified TagHandler to handle unknown tags * (specify null if you don't want this). * *
This uses TagSoup to handle real HTML, including all of the brokenness found in the wild. */ public static Spanned fromHtml(String source, ImageGetter imageGetter, TagHandler tagHandler) { Parser parser = new Parser(); try { parser.setProperty(Parser.schemaProperty, HtmlParser.schema); } catch (org.xml.sax.SAXNotRecognizedException e) { // Should not happen. throw new RuntimeException(e); } catch (org.xml.sax.SAXNotSupportedException e) { // Should not happen. throw new RuntimeException(e); } HtmlToSpannedConverter converter = new HtmlToSpannedConverter(source, imageGetter, tagHandler, parser); return converter.convert(); } /** * Returns an HTML representation of the provided Spanned text. */ public static String toHtml(Spanned text) { StringBuilder out = new StringBuilder(); withinHtml(out, text); return out.toString(); } private static void withinHtml(StringBuilder out, Spanned text) { int len = text.length(); int next; for (int i = 0; i < text.length(); i = next) { next = text.nextSpanTransition(i, len, ParagraphStyle.class); ParagraphStyle[] style = text.getSpans(i, next, ParagraphStyle.class); if (style.length > 0) { out.append("
"); } withinBlockquote(out, text, i, next); for (QuoteSpan quote: quotes) { out.append("\n"); } } } private static void withinBlockquote(StringBuilder out, Spanned text, int start, int end) { out.append("
"); int next; for (int i = start; i < end; i = next) { next = TextUtils.indexOf(text, '\n', i, end); if (next < 0) { next = end; } int nl = 0; while (next < end && text.charAt(next) == '\n') { nl++; next++; } withinParagraph(out, text, i, next - nl, nl, next == end); } out.append("
\n"); } private static void withinParagraph(StringBuilder out, Spanned text, int start, int end, int nl, boolean last) { int next; for (int i = start; i < end; i = next) { next = text.nextSpanTransition(i, end, CharacterStyle.class); CharacterStyle[] style = text.getSpans(i, next, CharacterStyle.class); for (int j = 0; j < style.length; j++) { if (style[j] instanceof StyleSpan) { int s = ((StyleSpan) style[j]).getStyle(); if ((s & Typeface.BOLD) != 0) { out.append(""); } if ((s & Typeface.ITALIC) != 0) { out.append(""); } } if (style[j] instanceof TypefaceSpan) { String s = ((TypefaceSpan) style[j]).getFamily(); if (s.equals("monospace")) { out.append(""); } } if (style[j] instanceof SuperscriptSpan) { out.append(""); } if (style[j] instanceof SubscriptSpan) { out.append(""); } if (style[j] instanceof UnderlineSpan) { out.append(""); } if (style[j] instanceof StrikethroughSpan) { out.append("";
if (nl == 1) {
out.append("
\n");
} else if (nl == 2) {
out.append(p);
} else {
for (int i = 2; i < nl; i++) {
out.append("
");
}
out.append(p);
}
}
private static void withinStyle(StringBuilder out, Spanned text,
int start, int end) {
for (int i = start; i < end; i++) {
char c = text.charAt(i);
if (c == '<') {
out.append("<");
} else if (c == '>') {
out.append(">");
} else if (c == '&') {
out.append("&");
} else if (c > 0x7E || c < ' ') {
out.append("" + ((int) c) + ";");
} else if (c == ' ') {
while (i + 1 < end && text.charAt(i + 1) == ' ') {
out.append(" ");
i++;
}
out.append(' ');
} else {
out.append(c);
}
}
}
}
class HtmlToSpannedConverter implements ContentHandler {
private static final float[] HEADER_SIZES = {
1.5f, 1.4f, 1.3f, 1.2f, 1.1f, 1f,
};
private String mSource;
private XMLReader mReader;
private SpannableStringBuilder mSpannableStringBuilder;
private Html.ImageGetter mImageGetter;
private Html.TagHandler mTagHandler;
public HtmlToSpannedConverter(
String source, Html.ImageGetter imageGetter, Html.TagHandler tagHandler,
Parser parser) {
mSource = source;
mSpannableStringBuilder = new SpannableStringBuilder();
mImageGetter = imageGetter;
mTagHandler = tagHandler;
mReader = parser;
}
public Spanned convert() {
mReader.setContentHandler(this);
try {
mReader.parse(new InputSource(new StringReader(mSource)));
} catch (IOException e) {
// We are reading from a string. There should not be IO problems.
throw new RuntimeException(e);
} catch (SAXException e) {
// TagSoup doesn't throw parse exceptions.
throw new RuntimeException(e);
}
// Fix flags and range for paragraph-type markup.
Object[] obj = mSpannableStringBuilder.getSpans(0, mSpannableStringBuilder.length(), ParagraphStyle.class);
for (int i = 0; i < obj.length; i++) {
int start = mSpannableStringBuilder.getSpanStart(obj[i]);
int end = mSpannableStringBuilder.getSpanEnd(obj[i]);
// If the last line of the range is blank, back off by one.
if (end - 2 >= 0) {
if (mSpannableStringBuilder.charAt(end - 1) == '\n' &&
mSpannableStringBuilder.charAt(end - 2) == '\n') {
end--;
}
}
if (end == start) {
mSpannableStringBuilder.removeSpan(obj[i]);
} else {
mSpannableStringBuilder.setSpan(obj[i], start, end, Spannable.SPAN_PARAGRAPH);
}
}
return mSpannableStringBuilder;
}
private void handleStartTag(String tag, Attributes attributes) {
if (tag.equalsIgnoreCase("br")) {
// We don't need to handle this. TagSoup will ensure that there's a for each
// so we can safely emite the linebreaks when we handle the close tag.
} else if (tag.equalsIgnoreCase("p")) {
handleP(mSpannableStringBuilder);
} else if (tag.equalsIgnoreCase("div")) {
handleP(mSpannableStringBuilder);
} else if (tag.equalsIgnoreCase("em")) {
start(mSpannableStringBuilder, new Bold());
} else if (tag.equalsIgnoreCase("b")) {
start(mSpannableStringBuilder, new Bold());
} else if (tag.equalsIgnoreCase("strong")) {
start(mSpannableStringBuilder, new Italic());
} else if (tag.equalsIgnoreCase("cite")) {
start(mSpannableStringBuilder, new Italic());
} else if (tag.equalsIgnoreCase("dfn")) {
start(mSpannableStringBuilder, new Italic());
} else if (tag.equalsIgnoreCase("i")) {
start(mSpannableStringBuilder, new Italic());
} else if (tag.equalsIgnoreCase("big")) {
start(mSpannableStringBuilder, new Big());
} else if (tag.equalsIgnoreCase("small")) {
start(mSpannableStringBuilder, new Small());
} else if (tag.equalsIgnoreCase("font")) {
startFont(mSpannableStringBuilder, attributes);
} else if (tag.equalsIgnoreCase("blockquote")) {
handleP(mSpannableStringBuilder);
start(mSpannableStringBuilder, new Blockquote());
} else if (tag.equalsIgnoreCase("tt")) {
start(mSpannableStringBuilder, new Monospace());
} else if (tag.equalsIgnoreCase("a")) {
startA(mSpannableStringBuilder, attributes);
} else if (tag.equalsIgnoreCase("u")) {
start(mSpannableStringBuilder, new Underline());
} else if (tag.equalsIgnoreCase("sup")) {
start(mSpannableStringBuilder, new Super());
} else if (tag.equalsIgnoreCase("sub")) {
start(mSpannableStringBuilder, new Sub());
} else if (tag.length() == 2 &&
Character.toLowerCase(tag.charAt(0)) == 'h' &&
tag.charAt(1) >= '1' && tag.charAt(1) <= '6') {
handleP(mSpannableStringBuilder);
start(mSpannableStringBuilder, new Header(tag.charAt(1) - '1'));
} else if (tag.equalsIgnoreCase("img")) {
startImg(mSpannableStringBuilder, attributes, mImageGetter);
} else if (mTagHandler != null) {
mTagHandler.handleTag(true, tag, mSpannableStringBuilder, mReader);
}
}
private void handleEndTag(String tag) {
if (tag.equalsIgnoreCase("br")) {
handleBr(mSpannableStringBuilder);
} else if (tag.equalsIgnoreCase("p")) {
handleP(mSpannableStringBuilder);
} else if (tag.equalsIgnoreCase("div")) {
handleP(mSpannableStringBuilder);
} else if (tag.equalsIgnoreCase("em")) {
end(mSpannableStringBuilder, Bold.class, new StyleSpan(Typeface.BOLD));
} else if (tag.equalsIgnoreCase("b")) {
end(mSpannableStringBuilder, Bold.class, new StyleSpan(Typeface.BOLD));
} else if (tag.equalsIgnoreCase("strong")) {
end(mSpannableStringBuilder, Italic.class, new StyleSpan(Typeface.ITALIC));
} else if (tag.equalsIgnoreCase("cite")) {
end(mSpannableStringBuilder, Italic.class, new StyleSpan(Typeface.ITALIC));
} else if (tag.equalsIgnoreCase("dfn")) {
end(mSpannableStringBuilder, Italic.class, new StyleSpan(Typeface.ITALIC));
} else if (tag.equalsIgnoreCase("i")) {
end(mSpannableStringBuilder, Italic.class, new StyleSpan(Typeface.ITALIC));
} else if (tag.equalsIgnoreCase("big")) {
end(mSpannableStringBuilder, Big.class, new RelativeSizeSpan(1.25f));
} else if (tag.equalsIgnoreCase("small")) {
end(mSpannableStringBuilder, Small.class, new RelativeSizeSpan(0.8f));
} else if (tag.equalsIgnoreCase("font")) {
endFont(mSpannableStringBuilder);
} else if (tag.equalsIgnoreCase("blockquote")) {
handleP(mSpannableStringBuilder);
end(mSpannableStringBuilder, Blockquote.class, new QuoteSpan());
} else if (tag.equalsIgnoreCase("tt")) {
end(mSpannableStringBuilder, Monospace.class,
new TypefaceSpan("monospace"));
} else if (tag.equalsIgnoreCase("a")) {
endA(mSpannableStringBuilder);
} else if (tag.equalsIgnoreCase("u")) {
end(mSpannableStringBuilder, Underline.class, new UnderlineSpan());
} else if (tag.equalsIgnoreCase("sup")) {
end(mSpannableStringBuilder, Super.class, new SuperscriptSpan());
} else if (tag.equalsIgnoreCase("sub")) {
end(mSpannableStringBuilder, Sub.class, new SubscriptSpan());
} else if (tag.length() == 2 &&
Character.toLowerCase(tag.charAt(0)) == 'h' &&
tag.charAt(1) >= '1' && tag.charAt(1) <= '6') {
handleP(mSpannableStringBuilder);
endHeader(mSpannableStringBuilder);
} else if (mTagHandler != null) {
mTagHandler.handleTag(false, tag, mSpannableStringBuilder, mReader);
}
}
private static void handleP(SpannableStringBuilder text) {
int len = text.length();
if (len >= 1 && text.charAt(len - 1) == '\n') {
if (len >= 2 && text.charAt(len - 2) == '\n') {
return;
}
text.append("\n");
return;
}
if (len != 0) {
text.append("\n\n");
}
}
private static void handleBr(SpannableStringBuilder text) {
text.append("\n");
}
private static Object getLast(Spanned text, Class kind) {
/*
* This knows that the last returned object from getSpans()
* will be the most recently added.
*/
Object[] objs = text.getSpans(0, text.length(), kind);
if (objs.length == 0) {
return null;
} else {
return objs[objs.length - 1];
}
}
private static void start(SpannableStringBuilder text, Object mark) {
int len = text.length();
text.setSpan(mark, len, len, Spannable.SPAN_MARK_MARK);
}
private static void end(SpannableStringBuilder text, Class kind,
Object repl) {
int len = text.length();
Object obj = getLast(text, kind);
int where = text.getSpanStart(obj);
text.removeSpan(obj);
if (where != len) {
text.setSpan(repl, where, len, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
}
return;
}
private static void startImg(SpannableStringBuilder text,
Attributes attributes, Html.ImageGetter img) {
String src = attributes.getValue("", "src");
Drawable d = null;
if (img != null) {
d = img.getDrawable(src);
}
if (d == null) {
d = Resources.getSystem().
getDrawable(com.android.internal.R.drawable.unknown_image);
d.setBounds(0, 0, d.getIntrinsicWidth(), d.getIntrinsicHeight());
}
int len = text.length();
text.append("\uFFFC");
text.setSpan(new ImageSpan(d, src), len, text.length(),
Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
}
private static void startFont(SpannableStringBuilder text,
Attributes attributes) {
String color = attributes.getValue("", "color");
String face = attributes.getValue("", "face");
int len = text.length();
text.setSpan(new Font(color, face), len, len, Spannable.SPAN_MARK_MARK);
}
private static void endFont(SpannableStringBuilder text) {
int len = text.length();
Object obj = getLast(text, Font.class);
int where = text.getSpanStart(obj);
text.removeSpan(obj);
if (where != len) {
Font f = (Font) obj;
if (f.mColor != null) {
int c = -1;
if (f.mColor.equalsIgnoreCase("aqua")) {
c = 0x00FFFF;
} else if (f.mColor.equalsIgnoreCase("black")) {
c = 0x000000;
} else if (f.mColor.equalsIgnoreCase("blue")) {
c = 0x0000FF;
} else if (f.mColor.equalsIgnoreCase("fuchsia")) {
c = 0xFF00FF;
} else if (f.mColor.equalsIgnoreCase("green")) {
c = 0x008000;
} else if (f.mColor.equalsIgnoreCase("grey")) {
c = 0x808080;
} else if (f.mColor.equalsIgnoreCase("lime")) {
c = 0x00FF00;
} else if (f.mColor.equalsIgnoreCase("maroon")) {
c = 0x800000;
} else if (f.mColor.equalsIgnoreCase("navy")) {
c = 0x000080;
} else if (f.mColor.equalsIgnoreCase("olive")) {
c = 0x808000;
} else if (f.mColor.equalsIgnoreCase("purple")) {
c = 0x800080;
} else if (f.mColor.equalsIgnoreCase("red")) {
c = 0xFF0000;
} else if (f.mColor.equalsIgnoreCase("silver")) {
c = 0xC0C0C0;
} else if (f.mColor.equalsIgnoreCase("teal")) {
c = 0x008080;
} else if (f.mColor.equalsIgnoreCase("white")) {
c = 0xFFFFFF;
} else if (f.mColor.equalsIgnoreCase("yellow")) {
c = 0xFFFF00;
} else {
try {
c = XmlUtils.convertValueToInt(f.mColor, -1);
} catch (NumberFormatException nfe) {
// Can't understand the color, so just drop it.
}
}
if (c != -1) {
text.setSpan(new ForegroundColorSpan(c | 0xFF000000),
where, len,
Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
}
}
if (f.mFace != null) {
text.setSpan(new TypefaceSpan(f.mFace), where, len,
Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
}
}
}
private static void startA(SpannableStringBuilder text, Attributes attributes) {
String href = attributes.getValue("", "href");
int len = text.length();
text.setSpan(new Href(href), len, len, Spannable.SPAN_MARK_MARK);
}
private static void endA(SpannableStringBuilder text) {
int len = text.length();
Object obj = getLast(text, Href.class);
int where = text.getSpanStart(obj);
text.removeSpan(obj);
if (where != len) {
Href h = (Href) obj;
if (h.mHref != null) {
text.setSpan(new URLSpan(h.mHref), where, len,
Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
}
}
}
private static void endHeader(SpannableStringBuilder text) {
int len = text.length();
Object obj = getLast(text, Header.class);
int where = text.getSpanStart(obj);
text.removeSpan(obj);
// Back off not to change only the text, not the blank line.
while (len > where && text.charAt(len - 1) == '\n') {
len--;
}
if (where != len) {
Header h = (Header) obj;
text.setSpan(new RelativeSizeSpan(HEADER_SIZES[h.mLevel]),
where, len, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
text.setSpan(new StyleSpan(Typeface.BOLD),
where, len, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
}
}
public void setDocumentLocator(Locator locator) {
}
public void startDocument() throws SAXException {
}
public void endDocument() throws SAXException {
}
public void startPrefixMapping(String prefix, String uri) throws SAXException {
}
public void endPrefixMapping(String prefix) throws SAXException {
}
public void startElement(String uri, String localName, String qName, Attributes attributes)
throws SAXException {
handleStartTag(localName, attributes);
}
public void endElement(String uri, String localName, String qName) throws SAXException {
handleEndTag(localName);
}
public void characters(char ch[], int start, int length) throws SAXException {
StringBuilder sb = new StringBuilder();
/*
* Ignore whitespace that immediately follows other whitespace;
* newlines count as spaces.
*/
for (int i = 0; i < length; i++) {
char c = ch[i + start];
if (c == ' ' || c == '\n') {
char pred;
int len = sb.length();
if (len == 0) {
len = mSpannableStringBuilder.length();
if (len == 0) {
pred = '\n';
} else {
pred = mSpannableStringBuilder.charAt(len - 1);
}
} else {
pred = sb.charAt(len - 1);
}
if (pred != ' ' && pred != '\n') {
sb.append(' ');
}
} else {
sb.append(c);
}
}
mSpannableStringBuilder.append(sb);
}
public void ignorableWhitespace(char ch[], int start, int length) throws SAXException {
}
public void processingInstruction(String target, String data) throws SAXException {
}
public void skippedEntity(String name) throws SAXException {
}
private static class Bold { }
private static class Italic { }
private static class Underline { }
private static class Big { }
private static class Small { }
private static class Monospace { }
private static class Blockquote { }
private static class Super { }
private static class Sub { }
private static class Font {
public String mColor;
public String mFace;
public Font(String color, String face) {
mColor = color;
mFace = face;
}
}
private static class Href {
public String mHref;
public Href(String href) {
mHref = href;
}
}
private static class Header {
private int mLevel;
public Header(int level) {
mLevel = level;
}
}
}