From 58b50f86cdde829d286ffcc1969ed3ce9e2cdc11 Mon Sep 17 00:00:00 2001 From: Daisuke Miyakawa Date: Thu, 8 Apr 2010 16:44:56 +0900 Subject: Refactor vCard parser so that its implementation detail would be hidden from users. Move the current implementation or 2.1 and 3.0 to VCardParserImpl_V21 and VCardParserImpl_V30. - From the view of users, it is strange to make vCard 3.0 inherit vCard 2.1. - Those two specifications still have similar implementation detail, so make implementation of vCard 3.0 inherits vCard 2.1 makes sense from technical view point. Now VCardParser_V21 and VCardParser_V30 are independent class. Remove strict mode in vCard 3.0, since strict parsing is not necessary from the view of actual use, and needs to extra effort to support it. Bug: 2576738 Change-Id: I1d5cb6cffffb53337faa2dd7c519e7254e12da86 --- core/java/android/pim/vcard/VCardParser.java | 56 +- .../android/pim/vcard/VCardParserImpl_V21.java | 996 +++++++++++++++++++++ .../android/pim/vcard/VCardParserImpl_V30.java | 305 +++++++ core/java/android/pim/vcard/VCardParser_V21.java | 995 +------------------- core/java/android/pim/vcard/VCardParser_V30.java | 333 +------ core/java/android/pim/vcard/VCardUtils.java | 4 +- 6 files changed, 1397 insertions(+), 1292 deletions(-) create mode 100644 core/java/android/pim/vcard/VCardParserImpl_V21.java create mode 100644 core/java/android/pim/vcard/VCardParserImpl_V30.java (limited to 'core/java/android/pim') diff --git a/core/java/android/pim/vcard/VCardParser.java b/core/java/android/pim/vcard/VCardParser.java index b38ea93..71ab5c4 100644 --- a/core/java/android/pim/vcard/VCardParser.java +++ b/core/java/android/pim/vcard/VCardParser.java @@ -20,47 +20,34 @@ import android.pim.vcard.exception.VCardException; import java.io.IOException; import java.io.InputStream; -public abstract class VCardParser { - protected final int mParseType; - protected boolean mCanceled; - - public VCardParser() { - this(VCardConfig.PARSE_TYPE_UNKNOWN); - } - - public VCardParser(int parseType) { - mParseType = parseType; - } - +public interface VCardParser { /** - *

+ *

* Parses the given stream and send the vCard data into VCardBuilderBase object. - * + *

. + *

* Note that vCard 2.1 specification allows "CHARSET" parameter, and some career sets * local encoding to it. For example, Japanese phone career uses Shift_JIS, which is * formally allowed in vCard 2.1, but not allowed in vCard 3.0. In vCard 2.1, * In some exreme case, it is allowed for vCard to have different charsets in one vCard. - *

- *

+ *

+ *

* We recommend you use {@link VCardSourceDetector} and detect which kind of source the * vCard comes from and explicitly specify a charset using the result. - *

+ *

* * @param is The source to parse. * @param interepreter A {@link VCardInterpreter} object which used to construct data. * @return Returns true for success. Otherwise returns false. * @throws IOException, VCardException */ - public final boolean parse(InputStream is, VCardInterpreter interepreter) - throws IOException, VCardException { - return parse(is, VCardConfig.DEFAULT_TEMPORARY_CHARSET, interepreter); - } + public boolean parse(InputStream is, VCardInterpreter interepreter) + throws IOException, VCardException; /** - *

+ *

* The method variants which accept charset. - *

+ *

* * @param is The source to parse. * @param charset Charset to be used. @@ -68,22 +55,25 @@ public abstract class VCardParser { * @return Returns true when successful. Otherwise returns false. * @throws IOException, VCardException */ - public abstract boolean parse(InputStream is, String charset, - VCardInterpreter interpreter) + public boolean parse(InputStream is, String charset, VCardInterpreter interpreter) throws IOException, VCardException; /** * The method variants which tells this object the operation is already canceled. + * @hide */ - public abstract void parse(InputStream is, String charset, + // TODO: remove this if possible. + public boolean parse(InputStream is, String charset, VCardInterpreter builder, boolean canceled) throws IOException, VCardException; - + /** - * Cancel parsing. - * Actual cancel is done after the end of the current one vcard entry parsing. + *

+ * Cancel parsing vCard. Useful when you want to stop the parse in the other threads. + *

+ *

+ * Actual cancel is done after parsing the current vcard. + *

*/ - public void cancel() { - mCanceled = true; - } + public abstract void cancel(); } diff --git a/core/java/android/pim/vcard/VCardParserImpl_V21.java b/core/java/android/pim/vcard/VCardParserImpl_V21.java new file mode 100644 index 0000000..c3b725f --- /dev/null +++ b/core/java/android/pim/vcard/VCardParserImpl_V21.java @@ -0,0 +1,996 @@ +/* + * Copyright (C) 2010 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package android.pim.vcard; + +import android.pim.vcard.exception.VCardAgentNotSupportedException; +import android.pim.vcard.exception.VCardException; +import android.pim.vcard.exception.VCardInvalidCommentLineException; +import android.pim.vcard.exception.VCardInvalidLineException; +import android.pim.vcard.exception.VCardNestedException; +import android.pim.vcard.exception.VCardVersionException; +import android.util.Log; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Set; + +/** + *

+ * Basic implementation achieving vCard parsing. Based on vCard 2.1, + *

+ */ +/* package */ class VCardParserImpl_V21 { + private static final String LOG_TAG = "VCardParserImpl_V21"; + + private static final class CustomBufferedReader extends BufferedReader { + private long mTime; + + public CustomBufferedReader(Reader in) { + super(in); + } + + @Override + public String readLine() throws IOException { + long start = System.currentTimeMillis(); + String ret = super.readLine(); + long end = System.currentTimeMillis(); + mTime += end - start; + return ret; + } + + public long getTotalmillisecond() { + return mTime; + } + } + + private static final String sDefaultEncoding = "8BIT"; + + //// Protected members + + protected boolean mCanceled; + + protected VCardInterpreter mInterpreter; + + /** + *

+ * The encoding type for deconding byte streams. This member variable is + * reset to a default encoding every time when a new item comes. + *

+ *

+ * "Encoding" in vCard is different from "Charset". It is mainly used for + * addresses, notes, images. "7BIT", "8BIT", "BASE64", and + * "QUOTED-PRINTABLE" are known examples. + *

+ */ + protected String mCurrentEncoding; + + /** + *

+ * The reader object to be used internally. + *

+ *

+ * Developers should not directly read a line from this object. Use + * getLine() unless there some reason. + *

+ */ + protected BufferedReader mReader; + + /** + *

+ * Set for storing unkonwn TYPE attributes, which is not acceptable in vCard + * specification, but happens to be seen in real world vCard. + *

+ */ + protected final Set mUnknownTypeSet = new HashSet(); + + /** + *

+ * Set for storing unkonwn VALUE attributes, which is not acceptable in + * vCard specification, but happens to be seen in real world vCard. + *

+ */ + protected final Set mUnknownValueSet = new HashSet(); + + // // Private members + + // In some cases, vCard is nested. Currently, we only consider the most + // interior vCard data. + // See v21_foma_1.vcf in test directory for more information. + private int mNestCount; + + // Used only for parsing END:VCARD. + private String mPreviousLine; + + // For measuring performance. + private long mTimeTotal; + + private long mTimeReadStartRecord; + + private long mTimeReadEndRecord; + + private long mTimeStartProperty; + + private long mTimeEndProperty; + + private long mTimeParseItems; + + private long mTimeParseLineAndHandleGroup; + + private long mTimeParsePropertyValues; + + private long mTimeParseAdrOrgN; + + private long mTimeHandleMiscPropertyValue; + + private long mTimeHandleQuotedPrintable; + + private long mTimeHandleBase64; + + /** + *

+ * Same as {{@link #VCardParserImpl_V21(VCardSourceDetector)} with + * {@link VCardConfig#PARSE_TYPE_UNKNOWN}. + *

+ */ + public VCardParserImpl_V21() { + this(VCardConfig.PARSE_TYPE_UNKNOWN); + } + + /** + *

+ * The constructor which uses the estimated type available from a given + * detector. + *

+ */ + public VCardParserImpl_V21(VCardSourceDetector detector) { + this(detector != null ? detector.getEstimatedType() : VCardConfig.PARSE_TYPE_UNKNOWN); + } + + /** + *

+ * The constructor which uses a given parse type like + * {@link VCardConfig#PARSE_TYPE_UNKNOWN}. + *

+ *

+ * This should be used only when you already know the exact type to be used. + *

+ */ + public VCardParserImpl_V21(int parseType) { + if (parseType == VCardConfig.PARSE_TYPE_FOMA) { + mNestCount = 1; + } + } + + /** + *

+ * Parses the file at the given position. + *

+ */ + //
vcard_file = [wsls] vcard [wsls]
+ protected void parseVCardFile() throws IOException, VCardException { + boolean firstReading = true; + while (true) { + if (mCanceled) { + break; + } + if (!parseOneVCard(firstReading)) { + break; + } + firstReading = false; + } + + if (mNestCount > 0) { + boolean useCache = true; + for (int i = 0; i < mNestCount; i++) { + readEndVCard(useCache, true); + useCache = false; + } + } + } + + /** + * @return true when a given property name is a valid property name. + */ + protected boolean isValidPropertyName(final String propertyName) { + if (!(getKnownPropertyNameSet().contains(propertyName.toUpperCase()) || + propertyName.startsWith("X-")) + && !mUnknownTypeSet.contains(propertyName)) { + mUnknownTypeSet.add(propertyName); + Log.w(LOG_TAG, "Property name unsupported by vCard 2.1: " + propertyName); + } + return true; + } + + /** + * @return String. It may be null, or its length may be 0 + * @throws IOException + */ + protected String getLine() throws IOException { + return mReader.readLine(); + } + + /** + * @return String with it's length > 0 + * @throws IOException + * @throws VCardException when the stream reached end of line + */ + protected String getNonEmptyLine() throws IOException, VCardException { + String line; + while (true) { + line = getLine(); + if (line == null) { + throw new VCardException("Reached end of buffer."); + } else if (line.trim().length() > 0) { + return line; + } + } + } + + /* + * vcard = "BEGIN" [ws] ":" [ws] "VCARD" [ws] 1*CRLF items *CRLF "END" [ws] + * ":" [ws] "VCARD" + */ + private boolean parseOneVCard(boolean firstReading) throws IOException, VCardException { + boolean allowGarbage = false; + if (firstReading) { + if (mNestCount > 0) { + for (int i = 0; i < mNestCount; i++) { + if (!readBeginVCard(allowGarbage)) { + return false; + } + allowGarbage = true; + } + } + } + + if (!readBeginVCard(allowGarbage)) { + return false; + } + long start; + if (mInterpreter != null) { + start = System.currentTimeMillis(); + mInterpreter.startEntry(); + mTimeReadStartRecord += System.currentTimeMillis() - start; + } + start = System.currentTimeMillis(); + parseItems(); + mTimeParseItems += System.currentTimeMillis() - start; + readEndVCard(true, false); + if (mInterpreter != null) { + start = System.currentTimeMillis(); + mInterpreter.endEntry(); + mTimeReadEndRecord += System.currentTimeMillis() - start; + } + return true; + } + + /** + * @return True when successful. False when reaching the end of line + * @throws IOException + * @throws VCardException + */ + protected boolean readBeginVCard(boolean allowGarbage) throws IOException, VCardException { + String line; + do { + while (true) { + line = getLine(); + if (line == null) { + return false; + } else if (line.trim().length() > 0) { + break; + } + } + String[] strArray = line.split(":", 2); + int length = strArray.length; + + // Though vCard 2.1/3.0 specification does not allow lower cases, + // vCard file emitted by some external vCard expoter have such + // invalid Strings. + // So we allow it. + // e.g. BEGIN:vCard + if (length == 2 && strArray[0].trim().equalsIgnoreCase("BEGIN") + && strArray[1].trim().equalsIgnoreCase("VCARD")) { + return true; + } else if (!allowGarbage) { + if (mNestCount > 0) { + mPreviousLine = line; + return false; + } else { + throw new VCardException("Expected String \"BEGIN:VCARD\" did not come " + + "(Instead, \"" + line + "\" came)"); + } + } + } while (allowGarbage); + + throw new VCardException("Reached where must not be reached."); + } + + /** + *

+ * The arguments useCache and allowGarbase are usually true and false + * accordingly when this function is called outside this function itself. + *

+ * + * @param useCache When true, line is obtained from mPreviousline. + * Otherwise, getLine() is used. + * @param allowGarbage When true, ignore non "END:VCARD" line. + * @throws IOException + * @throws VCardException + */ + protected void readEndVCard(boolean useCache, boolean allowGarbage) throws IOException, + VCardException { + String line; + do { + if (useCache) { + // Though vCard specification does not allow lower cases, + // some data may have them, so we allow it. + line = mPreviousLine; + } else { + while (true) { + line = getLine(); + if (line == null) { + throw new VCardException("Expected END:VCARD was not found."); + } else if (line.trim().length() > 0) { + break; + } + } + } + + String[] strArray = line.split(":", 2); + if (strArray.length == 2 && strArray[0].trim().equalsIgnoreCase("END") + && strArray[1].trim().equalsIgnoreCase("VCARD")) { + return; + } else if (!allowGarbage) { + throw new VCardException("END:VCARD != \"" + mPreviousLine + "\""); + } + useCache = false; + } while (allowGarbage); + } + + /* + * items = *CRLF item / item + */ + protected void parseItems() throws IOException, VCardException { + boolean ended = false; + + if (mInterpreter != null) { + long start = System.currentTimeMillis(); + mInterpreter.startProperty(); + mTimeStartProperty += System.currentTimeMillis() - start; + } + ended = parseItem(); + if (mInterpreter != null && !ended) { + long start = System.currentTimeMillis(); + mInterpreter.endProperty(); + mTimeEndProperty += System.currentTimeMillis() - start; + } + + while (!ended) { + // follow VCARD ,it wont reach endProperty + if (mInterpreter != null) { + long start = System.currentTimeMillis(); + mInterpreter.startProperty(); + mTimeStartProperty += System.currentTimeMillis() - start; + } + try { + ended = parseItem(); + } catch (VCardInvalidCommentLineException e) { + Log.e(LOG_TAG, "Invalid line which looks like some comment was found. Ignored."); + ended = false; + } + if (mInterpreter != null && !ended) { + long start = System.currentTimeMillis(); + mInterpreter.endProperty(); + mTimeEndProperty += System.currentTimeMillis() - start; + } + } + } + + /* + * item = [groups "."] name [params] ":" value CRLF / [groups "."] "ADR" + * [params] ":" addressparts CRLF / [groups "."] "ORG" [params] ":" orgparts + * CRLF / [groups "."] "N" [params] ":" nameparts CRLF / [groups "."] + * "AGENT" [params] ":" vcard CRLF + */ + protected boolean parseItem() throws IOException, VCardException { + mCurrentEncoding = sDefaultEncoding; + + final String line = getNonEmptyLine(); + long start = System.currentTimeMillis(); + + String[] propertyNameAndValue = separateLineAndHandleGroup(line); + if (propertyNameAndValue == null) { + return true; + } + if (propertyNameAndValue.length != 2) { + throw new VCardInvalidLineException("Invalid line \"" + line + "\""); + } + String propertyName = propertyNameAndValue[0].toUpperCase(); + String propertyValue = propertyNameAndValue[1]; + + mTimeParseLineAndHandleGroup += System.currentTimeMillis() - start; + + if (propertyName.equals("ADR") || propertyName.equals("ORG") || propertyName.equals("N")) { + start = System.currentTimeMillis(); + handleMultiplePropertyValue(propertyName, propertyValue); + mTimeParseAdrOrgN += System.currentTimeMillis() - start; + return false; + } else if (propertyName.equals("AGENT")) { + handleAgent(propertyValue); + return false; + } else if (isValidPropertyName(propertyName)) { + if (propertyName.equals("BEGIN")) { + if (propertyValue.equals("VCARD")) { + throw new VCardNestedException("This vCard has nested vCard data in it."); + } else { + throw new VCardException("Unknown BEGIN type: " + propertyValue); + } + } else if (propertyName.equals("VERSION") && !propertyValue.equals(getVersionString())) { + throw new VCardVersionException("Incompatible version: " + propertyValue + " != " + + getVersionString()); + } + start = System.currentTimeMillis(); + handlePropertyValue(propertyName, propertyValue); + mTimeParsePropertyValues += System.currentTimeMillis() - start; + return false; + } + + throw new VCardException("Unknown property name: \"" + propertyName + "\""); + } + + static private final int STATE_GROUP_OR_PROPNAME = 0; + + static private final int STATE_PARAMS = 1; + + // vCard 3.0 specification allows double-quoted param-value, while vCard 2.1 + // does not. + // This is just for safety. + static private final int STATE_PARAMS_IN_DQUOTE = 2; + + protected String[] separateLineAndHandleGroup(String line) throws VCardException { + int state = STATE_GROUP_OR_PROPNAME; + int nameIndex = 0; + + final String[] propertyNameAndValue = new String[2]; + + final int length = line.length(); + if (length > 0 && line.charAt(0) == '#') { + throw new VCardInvalidCommentLineException(); + } + + // This loop is developed so that we don't have to take care of bottle + // neck here. + // Refactor carefully when you need to do so. + for (int i = 0; i < length; i++) { + final char ch = line.charAt(i); + switch (state) { + case STATE_GROUP_OR_PROPNAME: { + if (ch == ':') { + final String propertyName = line.substring(nameIndex, i); + if (propertyName.equalsIgnoreCase("END")) { + mPreviousLine = line; + return null; + } + if (mInterpreter != null) { + mInterpreter.propertyName(propertyName); + } + propertyNameAndValue[0] = propertyName; + if (i < length - 1) { + propertyNameAndValue[1] = line.substring(i + 1); + } else { + propertyNameAndValue[1] = ""; + } + return propertyNameAndValue; + } else if (ch == '.') { + String groupName = line.substring(nameIndex, i); + if (mInterpreter != null) { + mInterpreter.propertyGroup(groupName); + } + nameIndex = i + 1; + } else if (ch == ';') { + String propertyName = line.substring(nameIndex, i); + if (propertyName.equalsIgnoreCase("END")) { + mPreviousLine = line; + return null; + } + if (mInterpreter != null) { + mInterpreter.propertyName(propertyName); + } + propertyNameAndValue[0] = propertyName; + nameIndex = i + 1; + state = STATE_PARAMS; + } + break; + } + case STATE_PARAMS: { + if (ch == '"') { + state = STATE_PARAMS_IN_DQUOTE; + } else if (ch == ';') { + handleParams(line.substring(nameIndex, i)); + nameIndex = i + 1; + } else if (ch == ':') { + handleParams(line.substring(nameIndex, i)); + if (i < length - 1) { + propertyNameAndValue[1] = line.substring(i + 1); + } else { + propertyNameAndValue[1] = ""; + } + return propertyNameAndValue; + } + break; + } + case STATE_PARAMS_IN_DQUOTE: { + if (ch == '"') { + state = STATE_PARAMS; + } + break; + } + } + } + + throw new VCardInvalidLineException("Invalid line: \"" + line + "\""); + } + + /* + * params = ";" [ws] paramlist paramlist = paramlist [ws] ";" [ws] param / + * param param = "TYPE" [ws] "=" [ws] ptypeval / "VALUE" [ws] "=" [ws] + * pvalueval / "ENCODING" [ws] "=" [ws] pencodingval / "CHARSET" [ws] "=" + * [ws] charsetval / "LANGUAGE" [ws] "=" [ws] langval / "X-" word [ws] "=" + * [ws] word / knowntype + */ + protected void handleParams(String params) throws VCardException { + String[] strArray = params.split("=", 2); + if (strArray.length == 2) { + final String paramName = strArray[0].trim().toUpperCase(); + String paramValue = strArray[1].trim(); + if (paramName.equals("TYPE")) { + handleType(paramValue); + } else if (paramName.equals("VALUE")) { + handleValue(paramValue); + } else if (paramName.equals("ENCODING")) { + handleEncoding(paramValue); + } else if (paramName.equals("CHARSET")) { + handleCharset(paramValue); + } else if (paramName.equals("LANGUAGE")) { + handleLanguage(paramValue); + } else if (paramName.startsWith("X-")) { + handleAnyParam(paramName, paramValue); + } else { + throw new VCardException("Unknown type \"" + paramName + "\""); + } + } else { + handleParamWithoutName(strArray[0]); + } + } + + /** + * vCard 3.0 parser may throw VCardException. + */ + @SuppressWarnings("unused") + protected void handleParamWithoutName(final String paramValue) throws VCardException { + handleType(paramValue); + } + + /* + * ptypeval = knowntype / "X-" word + */ + protected void handleType(final String ptypeval) { + String upperTypeValue = ptypeval; + if (!(getKnownTypeSet().contains(upperTypeValue) || upperTypeValue.startsWith("X-")) + && !mUnknownTypeSet.contains(ptypeval)) { + mUnknownTypeSet.add(ptypeval); + Log.w(LOG_TAG, String.format("TYPE unsupported by %s: ", getVersion(), ptypeval)); + } + if (mInterpreter != null) { + mInterpreter.propertyParamType("TYPE"); + mInterpreter.propertyParamValue(upperTypeValue); + } + } + + /* + * pvalueval = "INLINE" / "URL" / "CONTENT-ID" / "CID" / "X-" word + */ + protected void handleValue(final String pvalueval) { + if (!getKnownValueSet().contains(pvalueval.toUpperCase()) && pvalueval.startsWith("X-") + && !mUnknownValueSet.contains(pvalueval)) { + mUnknownValueSet.add(pvalueval); + Log.w(LOG_TAG, String.format("TYPE unsupported by %s: ", getVersion(), pvalueval)); + } + if (mInterpreter != null) { + mInterpreter.propertyParamType("VALUE"); + mInterpreter.propertyParamValue(pvalueval); + } + } + + /* + * pencodingval = "7BIT" / "8BIT" / "QUOTED-PRINTABLE" / "BASE64" / "X-" + * word + */ + protected void handleEncoding(String pencodingval) throws VCardException { + if (getAvailableEncodingSet().contains(pencodingval) || + pencodingval.startsWith("X-")) { + if (mInterpreter != null) { + mInterpreter.propertyParamType("ENCODING"); + mInterpreter.propertyParamValue(pencodingval); + } + mCurrentEncoding = pencodingval; + } else { + throw new VCardException("Unknown encoding \"" + pencodingval + "\""); + } + } + + /** + * vCard 2.1 specification only allows us-ascii and iso-8859-xxx (See RFC + * 1521), but today's vCard often contains other charset, so we allow them. + */ + protected void handleCharset(String charsetval) { + if (mInterpreter != null) { + mInterpreter.propertyParamType("CHARSET"); + mInterpreter.propertyParamValue(charsetval); + } + } + + /** + * See also Section 7.1 of RFC 1521 + */ + protected void handleLanguage(String langval) throws VCardException { + String[] strArray = langval.split("-"); + if (strArray.length != 2) { + throw new VCardException("Invalid Language: \"" + langval + "\""); + } + String tmp = strArray[0]; + int length = tmp.length(); + for (int i = 0; i < length; i++) { + if (!isAsciiLetter(tmp.charAt(i))) { + throw new VCardException("Invalid Language: \"" + langval + "\""); + } + } + tmp = strArray[1]; + length = tmp.length(); + for (int i = 0; i < length; i++) { + if (!isAsciiLetter(tmp.charAt(i))) { + throw new VCardException("Invalid Language: \"" + langval + "\""); + } + } + if (mInterpreter != null) { + mInterpreter.propertyParamType("LANGUAGE"); + mInterpreter.propertyParamValue(langval); + } + } + + private boolean isAsciiLetter(char ch) { + if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { + return true; + } + return false; + } + + /** + * Mainly for "X-" type. This accepts any kind of type without check. + */ + protected void handleAnyParam(String paramName, String paramValue) { + if (mInterpreter != null) { + mInterpreter.propertyParamType(paramName); + mInterpreter.propertyParamValue(paramValue); + } + } + + protected void handlePropertyValue(String propertyName, String propertyValue) + throws IOException, VCardException { + if (mCurrentEncoding.equalsIgnoreCase("QUOTED-PRINTABLE")) { + final long start = System.currentTimeMillis(); + final String result = getQuotedPrintable(propertyValue); + if (mInterpreter != null) { + ArrayList v = new ArrayList(); + v.add(result); + mInterpreter.propertyValues(v); + } + mTimeHandleQuotedPrintable += System.currentTimeMillis() - start; + } else if (mCurrentEncoding.equalsIgnoreCase("BASE64") + || mCurrentEncoding.equalsIgnoreCase("B")) { + final long start = System.currentTimeMillis(); + // It is very rare, but some BASE64 data may be so big that + // OutOfMemoryError occurs. To ignore such cases, use try-catch. + try { + final String result = getBase64(propertyValue); + if (mInterpreter != null) { + ArrayList v = new ArrayList(); + v.add(result); + mInterpreter.propertyValues(v); + } + } catch (OutOfMemoryError error) { + Log.e(LOG_TAG, "OutOfMemoryError happened during parsing BASE64 data!"); + if (mInterpreter != null) { + mInterpreter.propertyValues(null); + } + } + mTimeHandleBase64 += System.currentTimeMillis() - start; + } else { + if (!(mCurrentEncoding == null || mCurrentEncoding.equalsIgnoreCase("7BIT") + || mCurrentEncoding.equalsIgnoreCase("8BIT") || mCurrentEncoding.toUpperCase() + .startsWith("X-"))) { + Log.w(LOG_TAG, "The encoding unsupported by vCard spec: \"" + mCurrentEncoding + + "\"."); + } + + final long start = System.currentTimeMillis(); + if (mInterpreter != null) { + ArrayList v = new ArrayList(); + v.add(maybeUnescapeText(propertyValue)); + mInterpreter.propertyValues(v); + } + mTimeHandleMiscPropertyValue += System.currentTimeMillis() - start; + } + } + + /** + *

+ * Parses and returns Quoted-Printable. + *

+ * + * @param firstString The string following a parameter name and attributes. + * Example: "string" in + * "ADR:ENCODING=QUOTED-PRINTABLE:string\n\r". + * @return whole Quoted-Printable string, including a given argument and + * following lines. Excludes the last empty line following to Quoted + * Printable lines. + * @throws IOException + * @throws VCardException + */ + private String getQuotedPrintable(String firstString) throws IOException, VCardException { + // Specifically, there may be some padding between = and CRLF. + // See the following: + // + // qp-line := *(qp-segment transport-padding CRLF) + // qp-part transport-padding + // qp-segment := qp-section *(SPACE / TAB) "=" + // ; Maximum length of 76 characters + // + // e.g. (from RFC 2045) + // Now's the time = + // for all folk to come= + // to the aid of their country. + if (firstString.trim().endsWith("=")) { + // remove "transport-padding" + int pos = firstString.length() - 1; + while (firstString.charAt(pos) != '=') { + } + StringBuilder builder = new StringBuilder(); + builder.append(firstString.substring(0, pos + 1)); + builder.append("\r\n"); + String line; + while (true) { + line = getLine(); + if (line == null) { + throw new VCardException("File ended during parsing a Quoted-Printable String"); + } + if (line.trim().endsWith("=")) { + // remove "transport-padding" + pos = line.length() - 1; + while (line.charAt(pos) != '=') { + } + builder.append(line.substring(0, pos + 1)); + builder.append("\r\n"); + } else { + builder.append(line); + break; + } + } + return builder.toString(); + } else { + return firstString; + } + } + + protected String getBase64(String firstString) throws IOException, VCardException { + StringBuilder builder = new StringBuilder(); + builder.append(firstString); + + while (true) { + String line = getLine(); + if (line == null) { + throw new VCardException("File ended during parsing BASE64 binary"); + } + if (line.length() == 0) { + break; + } + builder.append(line); + } + + return builder.toString(); + } + + /** + *

+ * Mainly for "ADR", "ORG", and "N" + *

+ */ + /* + * addressparts = 0*6(strnosemi ";") strnosemi ; PO Box, Extended Addr, + * Street, Locality, Region, Postal Code, Country Name orgparts = + * *(strnosemi ";") strnosemi ; First is Organization Name, remainder are + * Organization Units. nameparts = 0*4(strnosemi ";") strnosemi ; Family, + * Given, Middle, Prefix, Suffix. ; Example:Public;John;Q.;Reverend Dr.;III, + * Esq. strnosemi = *(*nonsemi ("\;" / "\" CRLF)) *nonsemi ; To include a + * semicolon in this string, it must be escaped ; with a "\" character. We + * do not care the number of "strnosemi" here. We are not sure whether we + * should add "\" CRLF to each value. We exclude them for now. + */ + protected void handleMultiplePropertyValue(String propertyName, String propertyValue) + throws IOException, VCardException { + // vCard 2.1 does not allow QUOTED-PRINTABLE here, but some + // softwares/devices + // emit such data. + if (mCurrentEncoding.equalsIgnoreCase("QUOTED-PRINTABLE")) { + propertyValue = getQuotedPrintable(propertyValue); + } + + if (mInterpreter != null) { + mInterpreter.propertyValues(VCardUtils.constructListFromValue(propertyValue, + (getVersion() == VCardConfig.FLAG_V30))); + } + } + + /* + * vCard 2.1 specifies AGENT allows one vcard entry. Currently we emit an + * error toward the AGENT property. + * // TODO: Support AGENT property. + * item = + * ... / [groups "."] "AGENT" [params] ":" vcard CRLF vcard = "BEGIN" [ws] + * ":" [ws] "VCARD" [ws] 1*CRLF items *CRLF "END" [ws] ":" [ws] "VCARD" + */ + protected void handleAgent(final String propertyValue) throws VCardException { + if (!propertyValue.toUpperCase().contains("BEGIN:VCARD")) { + // Apparently invalid line seen in Windows Mobile 6.5. Ignore them. + return; + } else { + throw new VCardAgentNotSupportedException("AGENT Property is not supported now."); + } + } + + /** + * For vCard 3.0. + */ + protected String maybeUnescapeText(final String text) { + return text; + } + + /** + * Returns unescaped String if the character should be unescaped. Return + * null otherwise. e.g. In vCard 2.1, "\;" should be unescaped into ";" + * while "\x" should not be. + */ + protected String maybeUnescapeCharacter(final char ch) { + return unescapeCharacter(ch); + } + + /* package */ static String unescapeCharacter(final char ch) { + // Original vCard 2.1 specification does not allow transformation + // "\:" -> ":", "\," -> ",", and "\\" -> "\", but previous + // implementation of + // this class allowed them, so keep it as is. + if (ch == '\\' || ch == ';' || ch == ':' || ch == ',') { + return String.valueOf(ch); + } else { + return null; + } + } + + private void showPerformanceInfo() { + Log.d(LOG_TAG, "Total parsing time: " + mTimeTotal + " ms"); + if (mReader instanceof CustomBufferedReader) { + Log.d(LOG_TAG, "Total readLine time: " + + ((CustomBufferedReader) mReader).getTotalmillisecond() + " ms"); + } + Log.d(LOG_TAG, "Time for handling the beggining of the record: " + mTimeReadStartRecord + + " ms"); + Log.d(LOG_TAG, "Time for handling the end of the record: " + mTimeReadEndRecord + " ms"); + Log.d(LOG_TAG, "Time for parsing line, and handling group: " + mTimeParseLineAndHandleGroup + + " ms"); + Log.d(LOG_TAG, "Time for parsing ADR, ORG, and N fields:" + mTimeParseAdrOrgN + " ms"); + Log.d(LOG_TAG, "Time for parsing property values: " + mTimeParsePropertyValues + " ms"); + Log.d(LOG_TAG, "Time for handling normal property values: " + mTimeHandleMiscPropertyValue + + " ms"); + Log.d(LOG_TAG, "Time for handling Quoted-Printable: " + mTimeHandleQuotedPrintable + " ms"); + Log.d(LOG_TAG, "Time for handling Base64: " + mTimeHandleBase64 + " ms"); + } + + /** + * @return {@link VCardConfig#FLAG_V21} + */ + protected int getVersion() { + return VCardConfig.FLAG_V21; + } + + /** + * @return {@link VCardConfig#FLAG_V30} + */ + protected String getVersionString() { + return VCardConstants.VERSION_V21; + } + + protected Set getKnownPropertyNameSet() { + return VCardParser_V21.sKnownPropertyNameSet; + } + + protected Set getKnownTypeSet() { + return VCardParser_V21.sKnownTypeSet; + } + + protected Set getKnownValueSet() { + return VCardParser_V21.sKnownValueSet; + } + + protected Set getAvailableEncodingSet() { + return VCardParser_V21.sAvailableEncoding; + } + + protected String getDefaultEncoding() { + return sDefaultEncoding; + } + + + public boolean parse(InputStream is, String inputCharset, VCardInterpreter interpreter) + throws IOException, VCardException { + if (is == null) { + throw new NullPointerException("InputStream must not be null."); + } + if (inputCharset == null) { + inputCharset = VCardConfig.DEFAULT_TEMPORARY_CHARSET; + } + + final InputStreamReader tmpReader = new InputStreamReader(is, inputCharset); + if (VCardConfig.showPerformanceLog()) { + mReader = new CustomBufferedReader(tmpReader); + } else { + mReader = new BufferedReader(tmpReader); + } + + mInterpreter = interpreter; + + long start = System.currentTimeMillis(); + if (mInterpreter != null) { + mInterpreter.start(); + } + parseVCardFile(); + if (mInterpreter != null) { + mInterpreter.end(); + } + mTimeTotal += System.currentTimeMillis() - start; + + if (VCardConfig.showPerformanceLog()) { + showPerformanceInfo(); + } + + return true; + } + + public boolean parse(InputStream is, String charset, + VCardInterpreter builder, boolean canceled) + throws IOException, VCardException { + mCanceled = canceled; + return parse(is, charset, builder); + } + + public final void cancel() { + mCanceled = true; + } +} diff --git a/core/java/android/pim/vcard/VCardParserImpl_V30.java b/core/java/android/pim/vcard/VCardParserImpl_V30.java new file mode 100644 index 0000000..5257cde --- /dev/null +++ b/core/java/android/pim/vcard/VCardParserImpl_V30.java @@ -0,0 +1,305 @@ +/* + * Copyright (C) 2010 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package android.pim.vcard; + +import java.io.IOException; +import java.util.Set; + +import android.pim.vcard.exception.VCardException; +import android.util.Log; + +/* package */ class VCardParserImpl_V30 extends VCardParserImpl_V21 { + private static final String LOG_TAG = "VCardParserImpl_V30"; + + private String mPreviousLine; + + private boolean mEmittedAgentWarning = false; + + public VCardParserImpl_V30() { + super(); + } + + public VCardParserImpl_V30(VCardSourceDetector detector) { + this(detector != null ? detector.getEstimatedType() : VCardConfig.PARSE_TYPE_UNKNOWN); + } + + public VCardParserImpl_V30(int parseMode) { + super(parseMode); + } + + @Override + protected int getVersion() { + return VCardConfig.FLAG_V30; + } + + @Override + protected String getVersionString() { + return VCardConstants.VERSION_V30; + } + + @Override + protected String getLine() throws IOException { + if (mPreviousLine != null) { + String ret = mPreviousLine; + mPreviousLine = null; + return ret; + } else { + return mReader.readLine(); + } + } + + /** + * vCard 3.0 requires that the line with space at the beginning of the line + * must be combined with previous line. + */ + @Override + protected String getNonEmptyLine() throws IOException, VCardException { + String line; + StringBuilder builder = null; + while (true) { + line = mReader.readLine(); + if (line == null) { + if (builder != null) { + return builder.toString(); + } else if (mPreviousLine != null) { + String ret = mPreviousLine; + mPreviousLine = null; + return ret; + } + throw new VCardException("Reached end of buffer."); + } else if (line.length() == 0) { + if (builder != null) { + return builder.toString(); + } else if (mPreviousLine != null) { + String ret = mPreviousLine; + mPreviousLine = null; + return ret; + } + } else if (line.charAt(0) == ' ' || line.charAt(0) == '\t') { + if (builder != null) { + // See Section 5.8.1 of RFC 2425 (MIME-DIR document). + // Following is the excerpts from it. + // + // DESCRIPTION:This is a long description that exists on a long line. + // + // Can be represented as: + // + // DESCRIPTION:This is a long description + // that exists on a long line. + // + // It could also be represented as: + // + // DESCRIPTION:This is a long descrip + // tion that exists o + // n a long line. + builder.append(line.substring(1)); + } else if (mPreviousLine != null) { + builder = new StringBuilder(); + builder.append(mPreviousLine); + mPreviousLine = null; + builder.append(line.substring(1)); + } else { + throw new VCardException("Space exists at the beginning of the line"); + } + } else { + if (mPreviousLine == null) { + mPreviousLine = line; + if (builder != null) { + return builder.toString(); + } + } else { + String ret = mPreviousLine; + mPreviousLine = line; + return ret; + } + } + } + } + + /* + * vcard = [group "."] "BEGIN" ":" "VCARD" 1 * CRLF + * 1 * (contentline) + * ;A vCard object MUST include the VERSION, FN and N types. + * [group "."] "END" ":" "VCARD" 1 * CRLF + */ + @Override + protected boolean readBeginVCard(boolean allowGarbage) throws IOException, VCardException { + // TODO: vCard 3.0 supports group. + return super.readBeginVCard(allowGarbage); + } + + @Override + protected void readEndVCard(boolean useCache, boolean allowGarbage) + throws IOException, VCardException { + // TODO: vCard 3.0 supports group. + super.readEndVCard(useCache, allowGarbage); + } + + /** + * vCard 3.0 allows iana-token as paramType, while vCard 2.1 does not. + */ + @Override + protected void handleParams(final String params) throws VCardException { + try { + super.handleParams(params); + } catch (VCardException e) { + // maybe IANA type + String[] strArray = params.split("=", 2); + if (strArray.length == 2) { + handleAnyParam(strArray[0], strArray[1]); + } else { + // Must not come here in the current implementation. + throw new VCardException( + "Unknown params value: " + params); + } + } + } + + @Override + protected void handleAnyParam(final String paramName, final String paramValue) { + super.handleAnyParam(paramName, paramValue); + } + + @Override + protected void handleParamWithoutName(final String paramValue) throws VCardException { + super.handleParamWithoutName(paramValue); + } + + /* + * vCard 3.0 defines + * + * param = param-name "=" param-value *("," param-value) + * param-name = iana-token / x-name + * param-value = ptext / quoted-string + * quoted-string = DQUOTE QSAFE-CHAR DQUOTE + */ + @Override + protected void handleType(final String ptypevalues) { + String[] ptypeArray = ptypevalues.split(","); + mInterpreter.propertyParamType("TYPE"); + for (String value : ptypeArray) { + int length = value.length(); + if (length >= 2 && value.startsWith("\"") && value.endsWith("\"")) { + mInterpreter.propertyParamValue(value.substring(1, value.length() - 1)); + } else { + mInterpreter.propertyParamValue(value); + } + } + } + + @Override + protected void handleAgent(final String propertyValue) { + // The way how vCard 3.0 supports "AGENT" is completely different from vCard 2.1. + // + // e.g. + // AGENT:BEGIN:VCARD\nFN:Joe Friday\nTEL:+1-919-555-7878\n + // TITLE:Area Administrator\, Assistant\n EMAIL\;TYPE=INTERN\n + // ET:jfriday@host.com\nEND:VCARD\n + // + // TODO: fix this. + // + // issue: + // vCard 3.0 also allows this as an example. + // + // AGENT;VALUE=uri: + // CID:JQPUBLIC.part3.960129T083020.xyzMail@host3.com + // + // This is not vCard. Should we support this? + // + // Just ignore the line for now, since we cannot know how to handle it... + if (!mEmittedAgentWarning) { + Log.w(LOG_TAG, "AGENT in vCard 3.0 is not supported yet. Ignore it"); + mEmittedAgentWarning = true; + } + } + + /** + * vCard 3.0 does not require two CRLF at the last of BASE64 data. + * It only requires that data should be MIME-encoded. + */ + @Override + protected String getBase64(final String firstString) + throws IOException, VCardException { + final StringBuilder builder = new StringBuilder(); + builder.append(firstString); + + while (true) { + final String line = getLine(); + if (line == null) { + throw new VCardException( + "File ended during parsing BASE64 binary"); + } + if (line.length() == 0) { + break; + } else if (!line.startsWith(" ") && !line.startsWith("\t")) { + mPreviousLine = line; + break; + } + builder.append(line); + } + + return builder.toString(); + } + + /** + * ESCAPED-CHAR = "\\" / "\;" / "\," / "\n" / "\N") + * ; \\ encodes \, \n or \N encodes newline + * ; \; encodes ;, \, encodes , + * + * Note: Apple escapes ':' into '\:' while does not escape '\' + */ + @Override + protected String maybeUnescapeText(final String text) { + return unescapeText(text); + } + + public static String unescapeText(final String text) { + StringBuilder builder = new StringBuilder(); + final int length = text.length(); + for (int i = 0; i < length; i++) { + char ch = text.charAt(i); + if (ch == '\\' && i < length - 1) { + final char next_ch = text.charAt(++i); + if (next_ch == 'n' || next_ch == 'N') { + builder.append("\n"); + } else { + builder.append(next_ch); + } + } else { + builder.append(ch); + } + } + return builder.toString(); + } + + @Override + protected String maybeUnescapeCharacter(final char ch) { + return unescapeCharacter(ch); + } + + public static String unescapeCharacter(final char ch) { + if (ch == 'n' || ch == 'N') { + return "\n"; + } else { + return String.valueOf(ch); + } + } + + @Override + protected Set getKnownPropertyNameSet() { + return VCardParser_V30.sKnownPropertyNameSet; + } +} diff --git a/core/java/android/pim/vcard/VCardParser_V21.java b/core/java/android/pim/vcard/VCardParser_V21.java index 608523c..d29f60a 100644 --- a/core/java/android/pim/vcard/VCardParser_V21.java +++ b/core/java/android/pim/vcard/VCardParser_V21.java @@ -15,21 +15,12 @@ */ package android.pim.vcard; -import android.pim.vcard.exception.VCardAgentNotSupportedException; import android.pim.vcard.exception.VCardException; -import android.pim.vcard.exception.VCardInvalidCommentLineException; -import android.pim.vcard.exception.VCardInvalidLineException; -import android.pim.vcard.exception.VCardNestedException; -import android.pim.vcard.exception.VCardVersionException; -import android.util.Log; -import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; -import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashSet; import java.util.Set; @@ -50,979 +41,79 @@ import java.util.Set; * (possible in Japanese mobile phones). *

*/ -public class VCardParser_V21 extends VCardParser { - private static final String LOG_TAG = "VCardParser_V21"; - - private static final class CustomBufferedReader extends BufferedReader { - private long mTime; - - public CustomBufferedReader(Reader in) { - super(in); - } - - @Override - public String readLine() throws IOException { - long start = System.currentTimeMillis(); - String ret = super.readLine(); - long end = System.currentTimeMillis(); - mTime += end - start; - return ret; - } - - public long getTotalmillisecond() { - return mTime; - } - } - - /** Store the known-type */ - private static final HashSet sKnownTypeSet = new HashSet( - Arrays.asList("DOM", "INTL", "POSTAL", "PARCEL", "HOME", "WORK", - "PREF", "VOICE", "FAX", "MSG", "CELL", "PAGER", "BBS", - "MODEM", "CAR", "ISDN", "VIDEO", "AOL", "APPLELINK", - "ATTMAIL", "CIS", "EWORLD", "INTERNET", "IBMMAIL", - "MCIMAIL", "POWERSHARE", "PRODIGY", "TLX", "X400", "GIF", - "CGM", "WMF", "BMP", "MET", "PMB", "DIB", "PICT", "TIFF", - "PDF", "PS", "JPEG", "QTIME", "MPEG", "MPEG2", "AVI", - "WAVE", "AIFF", "PCM", "X509", "PGP")); - - /** Store the known-value */ - private static final HashSet sKnownValueSet = new HashSet( - Arrays.asList("INLINE", "URL", "CONTENT-ID", "CID")); - - /** Store the property names available in vCard 2.1 */ - private static final HashSet sAvailablePropertyNameSetV21 = - new HashSet(Arrays.asList( - "BEGIN", "LOGO", "PHOTO", "LABEL", "FN", "TITLE", "SOUND", - "VERSION", "TEL", "EMAIL", "TZ", "GEO", "NOTE", "URL", - "BDAY", "ROLE", "REV", "UID", "KEY", "MAILER")); - +public final class VCardParser_V21 implements VCardParser { /** - * Though vCard 2.1 specification does not allow "B" encoding, some data may have it. - * We allow it for safety... + * A unmodifiable Set storing the property names available in the vCard 2.1 specification. */ - private static final HashSet sAvailableEncodingV21 = - new HashSet(Arrays.asList( - "7BIT", "8BIT", "QUOTED-PRINTABLE", "BASE64", "B")); - - private static final String sDefaultEncoding = "8BIT"; - - - //// Protected members - - protected VCardInterpreter mInterpreter; - - /** - *

- * The encoding type for deconding byte streams. This member variable is reset to - * {{@link #sDefaultEncoding} every time when a new item comes. - *

- *

- * "Encoding" in vCard is different from "Charset". - * It is mainly used for addresses, notes, images. "7BIT", "8BIT", "BASE64", and - * "QUOTED-PRINTABLE" are known examples. - *

- */ - protected String mCurrentEncoding; + public static final Set sKnownPropertyNameSet = + Collections.unmodifiableSet(new HashSet( + Arrays.asList("BEGIN", "LOGO", "PHOTO", "LABEL", "FN", "TITLE", "SOUND", + "VERSION", "TEL", "EMAIL", "TZ", "GEO", "NOTE", "URL", + "BDAY", "ROLE", "REV", "UID", "KEY", "MAILER"))); /** - *

- * The reader object to be used internally. - *

- *

- * Developers should not directly read a line from this object. Use getLine() unless - * there some reason. - *

+ * A unmodifiable Set storing the types known in vCard 2.1. */ - protected BufferedReader mReader; + public static final Set sKnownTypeSet = + Collections.unmodifiableSet(new HashSet( + Arrays.asList("DOM", "INTL", "POSTAL", "PARCEL", "HOME", "WORK", + "PREF", "VOICE", "FAX", "MSG", "CELL", "PAGER", "BBS", + "MODEM", "CAR", "ISDN", "VIDEO", "AOL", "APPLELINK", + "ATTMAIL", "CIS", "EWORLD", "INTERNET", "IBMMAIL", + "MCIMAIL", "POWERSHARE", "PRODIGY", "TLX", "X400", "GIF", + "CGM", "WMF", "BMP", "MET", "PMB", "DIB", "PICT", "TIFF", + "PDF", "PS", "JPEG", "QTIME", "MPEG", "MPEG2", "AVI", + "WAVE", "AIFF", "PCM", "X509", "PGP"))); /** - *

- * Set for storing unkonwn TYPE attributes, which is not acceptable in vCard specification, - * but happens to be seen in real world vCard. - *

+ * A unmodifiable Set storing the values available in the vCard 2.1 specification. */ - protected final Set mUnknownTypeSet = new HashSet(); + public static final Set sKnownValueSet = + Collections.unmodifiableSet(new HashSet( + Arrays.asList("INLINE", "URL", "CONTENT-ID", "CID"))); /** - *

- * Set for storing unkonwn VALUE attributes, which is not acceptable in vCard specification, - * but happens to be seen in real world vCard. - *

+ * Though vCard 2.1 specification does not allow "B" encoding, some data may have it. + * We allow it for safety... */ - protected final Set mUnknownValueSet = new HashSet(); - - - //// Private members - - // In some cases, vCard is nested. Currently, we only consider the most interior vCard data. - // See v21_foma_1.vcf in test directory for more information. - private int mNestCount; - - // Used only for parsing END:VCARD. - private String mPreviousLine; + // TODO: move B to another something and make this member public + /* package */ static final HashSet sAvailableEncoding = + new HashSet(Arrays.asList( + "7BIT", "8BIT", "QUOTED-PRINTABLE", "BASE64", "B")); - // For measuring performance. - private long mTimeTotal; - private long mTimeReadStartRecord; - private long mTimeReadEndRecord; - private long mTimeStartProperty; - private long mTimeEndProperty; - private long mTimeParseItems; - private long mTimeParseLineAndHandleGroup; - private long mTimeParsePropertyValues; - private long mTimeParseAdrOrgN; - private long mTimeHandleMiscPropertyValue; - private long mTimeHandleQuotedPrintable; - private long mTimeHandleBase64; + private final VCardParserImpl_V21 mVCardParserImpl; - /** - *

- * Same as {{@link #VCardParser_V21(VCardSourceDetector)} with - * {@link VCardConfig#PARSE_TYPE_UNKNOWN}. - *

- */ public VCardParser_V21() { - this(VCardConfig.PARSE_TYPE_UNKNOWN); + mVCardParserImpl = new VCardParserImpl_V21(); } - /** - *

- * The constructor which uses the estimated type available from a given detector. - *

- */ public VCardParser_V21(VCardSourceDetector detector) { - this(detector != null ? detector.getEstimatedType() : VCardConfig.PARSE_TYPE_UNKNOWN); + mVCardParserImpl = new VCardParserImpl_V21(detector); } - /** - *

- * The constructor which uses a given parse type like - * {@link VCardConfig#PARSE_TYPE_UNKNOWN}. - *

- *

- * This should be used only when you already know the exact type to be used. - *

- */ public VCardParser_V21(int parseType) { - super(parseType); - if (parseType == VCardConfig.PARSE_TYPE_FOMA) { - mNestCount = 1; - } + mVCardParserImpl = new VCardParserImpl_V21(parseType); } - /** - *

- * Parses the file at the given position. - *

- */ - //
vcard_file = [wsls] vcard [wsls]
- protected void parseVCardFile() throws IOException, VCardException { - boolean firstReading = true; - while (true) { - if (mCanceled) { - break; - } - if (!parseOneVCard(firstReading)) { - break; - } - firstReading = false; - } + //// Implemented methods - if (mNestCount > 0) { - boolean useCache = true; - for (int i = 0; i < mNestCount; i++) { - readEndVCard(useCache, true); - useCache = false; - } - } - } - - /** - * @return {@link VCardConfig#FLAG_V21} - */ - protected int getVersion() { - return VCardConfig.FLAG_V21; - } - - /** - * @return {@link VCardConfig#FLAG_V30} - */ - protected String getVersionString() { - return VCardConstants.VERSION_V21; - } - - /** - * @return true when a given property name is a valid property name. - */ - protected boolean isValidPropertyName(final String propertyName) { - if (!(sAvailablePropertyNameSetV21.contains(propertyName.toUpperCase()) || - propertyName.startsWith("X-")) && - !mUnknownTypeSet.contains(propertyName)) { - mUnknownTypeSet.add(propertyName); - Log.w(LOG_TAG, "Property name unsupported by vCard 2.1: " + propertyName); - } - return true; - } - - /** - * @return true when the encoding is a valid encoding. - */ - protected boolean isValidEncoding(String encoding) { - return sAvailableEncodingV21.contains(encoding.toUpperCase()); - } - - /** - * @return String. It may be null, or its length may be 0 - * @throws IOException - */ - protected String getLine() throws IOException { - return mReader.readLine(); - } - - /** - * @return String with it's length > 0 - * @throws IOException - * @throws VCardException when the stream reached end of line - */ - protected String getNonEmptyLine() throws IOException, VCardException { - String line; - while (true) { - line = getLine(); - if (line == null) { - throw new VCardException("Reached end of buffer."); - } else if (line.trim().length() > 0) { - return line; - } - } - } - - /* - * vcard = "BEGIN" [ws] ":" [ws] "VCARD" [ws] 1*CRLF - * items *CRLF - * "END" [ws] ":" [ws] "VCARD" - */ - private boolean parseOneVCard(boolean firstReading) throws IOException, VCardException { - boolean allowGarbage = false; - if (firstReading) { - if (mNestCount > 0) { - for (int i = 0; i < mNestCount; i++) { - if (!readBeginVCard(allowGarbage)) { - return false; - } - allowGarbage = true; - } - } - } - - if (!readBeginVCard(allowGarbage)) { - return false; - } - long start; - if (mInterpreter != null) { - start = System.currentTimeMillis(); - mInterpreter.startEntry(); - mTimeReadStartRecord += System.currentTimeMillis() - start; - } - start = System.currentTimeMillis(); - parseItems(); - mTimeParseItems += System.currentTimeMillis() - start; - readEndVCard(true, false); - if (mInterpreter != null) { - start = System.currentTimeMillis(); - mInterpreter.endEntry(); - mTimeReadEndRecord += System.currentTimeMillis() - start; - } - return true; - } - - /** - * @return True when successful. False when reaching the end of line - * @throws IOException - * @throws VCardException - */ - protected boolean readBeginVCard(boolean allowGarbage) throws IOException, VCardException { - String line; - do { - while (true) { - line = getLine(); - if (line == null) { - return false; - } else if (line.trim().length() > 0) { - break; - } - } - String[] strArray = line.split(":", 2); - int length = strArray.length; - - // Though vCard 2.1/3.0 specification does not allow lower cases, - // vCard file emitted by some external vCard expoter have such invalid Strings. - // So we allow it. - // e.g. BEGIN:vCard - if (length == 2 && - strArray[0].trim().equalsIgnoreCase("BEGIN") && - strArray[1].trim().equalsIgnoreCase("VCARD")) { - return true; - } else if (!allowGarbage) { - if (mNestCount > 0) { - mPreviousLine = line; - return false; - } else { - throw new VCardException( - "Expected String \"BEGIN:VCARD\" did not come " - + "(Instead, \"" + line + "\" came)"); - } - } - } while(allowGarbage); - - throw new VCardException("Reached where must not be reached."); - } - - /** - *

- * The arguments useCache and allowGarbase are usually true and false accordingly when - * this function is called outside this function itself. - *

- * - * @param useCache When true, line is obtained from mPreviousline. Otherwise, getLine() - * is used. - * @param allowGarbage When true, ignore non "END:VCARD" line. - * @throws IOException - * @throws VCardException - */ - protected void readEndVCard(boolean useCache, boolean allowGarbage) + public boolean parse(InputStream is, VCardInterpreter interepreter) throws IOException, VCardException { - String line; - do { - if (useCache) { - // Though vCard specification does not allow lower cases, - // some data may have them, so we allow it. - line = mPreviousLine; - } else { - while (true) { - line = getLine(); - if (line == null) { - throw new VCardException("Expected END:VCARD was not found."); - } else if (line.trim().length() > 0) { - break; - } - } - } - - String[] strArray = line.split(":", 2); - if (strArray.length == 2 && - strArray[0].trim().equalsIgnoreCase("END") && - strArray[1].trim().equalsIgnoreCase("VCARD")) { - return; - } else if (!allowGarbage) { - throw new VCardException("END:VCARD != \"" + mPreviousLine + "\""); - } - useCache = false; - } while (allowGarbage); + return mVCardParserImpl.parse(is, VCardConfig.DEFAULT_TEMPORARY_CHARSET, interepreter); } - - /* - * items = *CRLF item - * / item - */ - protected void parseItems() throws IOException, VCardException { - boolean ended = false; - - if (mInterpreter != null) { - long start = System.currentTimeMillis(); - mInterpreter.startProperty(); - mTimeStartProperty += System.currentTimeMillis() - start; - } - ended = parseItem(); - if (mInterpreter != null && !ended) { - long start = System.currentTimeMillis(); - mInterpreter.endProperty(); - mTimeEndProperty += System.currentTimeMillis() - start; - } - while (!ended) { - // follow VCARD ,it wont reach endProperty - if (mInterpreter != null) { - long start = System.currentTimeMillis(); - mInterpreter.startProperty(); - mTimeStartProperty += System.currentTimeMillis() - start; - } - try { - ended = parseItem(); - } catch (VCardInvalidCommentLineException e) { - Log.e(LOG_TAG, "Invalid line which looks like some comment was found. Ignored."); - ended = false; - } - if (mInterpreter != null && !ended) { - long start = System.currentTimeMillis(); - mInterpreter.endProperty(); - mTimeEndProperty += System.currentTimeMillis() - start; - } - } - } - - /* - * item = [groups "."] name [params] ":" value CRLF - * / [groups "."] "ADR" [params] ":" addressparts CRLF - * / [groups "."] "ORG" [params] ":" orgparts CRLF - * / [groups "."] "N" [params] ":" nameparts CRLF - * / [groups "."] "AGENT" [params] ":" vcard CRLF - */ - protected boolean parseItem() throws IOException, VCardException { - mCurrentEncoding = sDefaultEncoding; - - final String line = getNonEmptyLine(); - long start = System.currentTimeMillis(); - - String[] propertyNameAndValue = separateLineAndHandleGroup(line); - if (propertyNameAndValue == null) { - return true; - } - if (propertyNameAndValue.length != 2) { - throw new VCardInvalidLineException("Invalid line \"" + line + "\""); - } - String propertyName = propertyNameAndValue[0].toUpperCase(); - String propertyValue = propertyNameAndValue[1]; - - mTimeParseLineAndHandleGroup += System.currentTimeMillis() - start; - - if (propertyName.equals("ADR") || propertyName.equals("ORG") || - propertyName.equals("N")) { - start = System.currentTimeMillis(); - handleMultiplePropertyValue(propertyName, propertyValue); - mTimeParseAdrOrgN += System.currentTimeMillis() - start; - return false; - } else if (propertyName.equals("AGENT")) { - handleAgent(propertyValue); - return false; - } else if (isValidPropertyName(propertyName)) { - if (propertyName.equals("BEGIN")) { - if (propertyValue.equals("VCARD")) { - throw new VCardNestedException("This vCard has nested vCard data in it."); - } else { - throw new VCardException("Unknown BEGIN type: " + propertyValue); - } - } else if (propertyName.equals("VERSION") && - !propertyValue.equals(getVersionString())) { - throw new VCardVersionException("Incompatible version: " + - propertyValue + " != " + getVersionString()); - } - start = System.currentTimeMillis(); - handlePropertyValue(propertyName, propertyValue); - mTimeParsePropertyValues += System.currentTimeMillis() - start; - return false; - } - - throw new VCardException("Unknown property name: \"" + propertyName + "\""); - } - - static private final int STATE_GROUP_OR_PROPNAME = 0; - static private final int STATE_PARAMS = 1; - // vCard 3.0 specification allows double-quoted param-value, while vCard 2.1 does not. - // This is just for safety. - static private final int STATE_PARAMS_IN_DQUOTE = 2; - - protected String[] separateLineAndHandleGroup(String line) throws VCardException { - int state = STATE_GROUP_OR_PROPNAME; - int nameIndex = 0; - - final String[] propertyNameAndValue = new String[2]; - - final int length = line.length(); - if (length > 0 && line.charAt(0) == '#') { - throw new VCardInvalidCommentLineException(); - } - - // This loop is developed so that we don't have to take care of bottle neck here. - // Refactor carefully when you need to do so. - for (int i = 0; i < length; i++) { - final char ch = line.charAt(i); - switch (state) { - case STATE_GROUP_OR_PROPNAME: { - if (ch == ':') { - final String propertyName = line.substring(nameIndex, i); - if (propertyName.equalsIgnoreCase("END")) { - mPreviousLine = line; - return null; - } - if (mInterpreter != null) { - mInterpreter.propertyName(propertyName); - } - propertyNameAndValue[0] = propertyName; - if (i < length - 1) { - propertyNameAndValue[1] = line.substring(i + 1); - } else { - propertyNameAndValue[1] = ""; - } - return propertyNameAndValue; - } else if (ch == '.') { - String groupName = line.substring(nameIndex, i); - if (mInterpreter != null) { - mInterpreter.propertyGroup(groupName); - } - nameIndex = i + 1; - } else if (ch == ';') { - String propertyName = line.substring(nameIndex, i); - if (propertyName.equalsIgnoreCase("END")) { - mPreviousLine = line; - return null; - } - if (mInterpreter != null) { - mInterpreter.propertyName(propertyName); - } - propertyNameAndValue[0] = propertyName; - nameIndex = i + 1; - state = STATE_PARAMS; - } - break; - } - case STATE_PARAMS: { - if (ch == '"') { - state = STATE_PARAMS_IN_DQUOTE; - } else if (ch == ';') { - handleParams(line.substring(nameIndex, i)); - nameIndex = i + 1; - } else if (ch == ':') { - handleParams(line.substring(nameIndex, i)); - if (i < length - 1) { - propertyNameAndValue[1] = line.substring(i + 1); - } else { - propertyNameAndValue[1] = ""; - } - return propertyNameAndValue; - } - break; - } - case STATE_PARAMS_IN_DQUOTE: { - if (ch == '"') { - state = STATE_PARAMS; - } - break; - } - } - } - - throw new VCardInvalidLineException("Invalid line: \"" + line + "\""); - } - - /* - * params = ";" [ws] paramlist - * paramlist = paramlist [ws] ";" [ws] param - * / param - * param = "TYPE" [ws] "=" [ws] ptypeval - * / "VALUE" [ws] "=" [ws] pvalueval - * / "ENCODING" [ws] "=" [ws] pencodingval - * / "CHARSET" [ws] "=" [ws] charsetval - * / "LANGUAGE" [ws] "=" [ws] langval - * / "X-" word [ws] "=" [ws] word - * / knowntype - */ - protected void handleParams(String params) throws VCardException { - String[] strArray = params.split("=", 2); - if (strArray.length == 2) { - final String paramName = strArray[0].trim().toUpperCase(); - String paramValue = strArray[1].trim(); - if (paramName.equals("TYPE")) { - handleType(paramValue); - } else if (paramName.equals("VALUE")) { - handleValue(paramValue); - } else if (paramName.equals("ENCODING")) { - handleEncoding(paramValue); - } else if (paramName.equals("CHARSET")) { - handleCharset(paramValue); - } else if (paramName.equals("LANGUAGE")) { - handleLanguage(paramValue); - } else if (paramName.startsWith("X-")) { - handleAnyParam(paramName, paramValue); - } else { - throw new VCardException("Unknown type \"" + paramName + "\""); - } - } else { - handleParamWithoutName(strArray[0]); - } - } - - /** - * vCard 3.0 parser may throw VCardException. - */ - @SuppressWarnings("unused") - protected void handleParamWithoutName(final String paramValue) throws VCardException { - handleType(paramValue); - } - - /** - * ptypeval = knowntype / "X-" word - */ - protected void handleType(final String ptypeval) { - String upperTypeValue = ptypeval; - if (!(sKnownTypeSet.contains(upperTypeValue) || upperTypeValue.startsWith("X-")) && - !mUnknownTypeSet.contains(ptypeval)) { - mUnknownTypeSet.add(ptypeval); - Log.w(LOG_TAG, "TYPE unsupported by vCard 2.1: " + ptypeval); - } - if (mInterpreter != null) { - mInterpreter.propertyParamType("TYPE"); - mInterpreter.propertyParamValue(upperTypeValue); - } - } - - /** - * pvalueval = "INLINE" / "URL" / "CONTENT-ID" / "CID" / "X-" word - */ - protected void handleValue(final String pvalueval) { - if (!sKnownValueSet.contains(pvalueval.toUpperCase()) && - pvalueval.startsWith("X-") && - !mUnknownValueSet.contains(pvalueval)) { - mUnknownValueSet.add(pvalueval); - Log.w(LOG_TAG, "VALUE unsupported by vCard 2.1: " + pvalueval); - } - if (mInterpreter != null) { - mInterpreter.propertyParamType("VALUE"); - mInterpreter.propertyParamValue(pvalueval); - } - } - - /** - * pencodingval = "7BIT" / "8BIT" / "QUOTED-PRINTABLE" / "BASE64" / "X-" word - */ - protected void handleEncoding(String pencodingval) throws VCardException { - if (isValidEncoding(pencodingval) || - pencodingval.startsWith("X-")) { - if (mInterpreter != null) { - mInterpreter.propertyParamType("ENCODING"); - mInterpreter.propertyParamValue(pencodingval); - } - mCurrentEncoding = pencodingval; - } else { - throw new VCardException("Unknown encoding \"" + pencodingval + "\""); - } - } - - /** - * vCard 2.1 specification only allows us-ascii and iso-8859-xxx (See RFC 1521), - * but today's vCard often contains other charset, so we allow them. - */ - protected void handleCharset(String charsetval) { - if (mInterpreter != null) { - mInterpreter.propertyParamType("CHARSET"); - mInterpreter.propertyParamValue(charsetval); - } - } - - /** - * See also Section 7.1 of RFC 1521 - */ - protected void handleLanguage(String langval) throws VCardException { - String[] strArray = langval.split("-"); - if (strArray.length != 2) { - throw new VCardException("Invalid Language: \"" + langval + "\""); - } - String tmp = strArray[0]; - int length = tmp.length(); - for (int i = 0; i < length; i++) { - if (!isAsciiLetter(tmp.charAt(i))) { - throw new VCardException("Invalid Language: \"" + langval + "\""); - } - } - tmp = strArray[1]; - length = tmp.length(); - for (int i = 0; i < length; i++) { - if (!isAsciiLetter(tmp.charAt(i))) { - throw new VCardException("Invalid Language: \"" + langval + "\""); - } - } - if (mInterpreter != null) { - mInterpreter.propertyParamType("LANGUAGE"); - mInterpreter.propertyParamValue(langval); - } - } - - private boolean isAsciiLetter(char ch) { - if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { - return true; - } - return false; - } - - /** - * Mainly for "X-" type. This accepts any kind of type without check. - */ - protected void handleAnyParam(String paramName, String paramValue) { - if (mInterpreter != null) { - mInterpreter.propertyParamType(paramName); - mInterpreter.propertyParamValue(paramValue); - } - } - - protected void handlePropertyValue(String propertyName, String propertyValue) - throws IOException, VCardException { - if (mCurrentEncoding.equalsIgnoreCase("QUOTED-PRINTABLE")) { - final long start = System.currentTimeMillis(); - final String result = getQuotedPrintable(propertyValue); - if (mInterpreter != null) { - ArrayList v = new ArrayList(); - v.add(result); - mInterpreter.propertyValues(v); - } - mTimeHandleQuotedPrintable += System.currentTimeMillis() - start; - } else if (mCurrentEncoding.equalsIgnoreCase("BASE64") || - mCurrentEncoding.equalsIgnoreCase("B")) { - final long start = System.currentTimeMillis(); - // It is very rare, but some BASE64 data may be so big that - // OutOfMemoryError occurs. To ignore such cases, use try-catch. - try { - final String result = getBase64(propertyValue); - if (mInterpreter != null) { - ArrayList v = new ArrayList(); - v.add(result); - mInterpreter.propertyValues(v); - } - } catch (OutOfMemoryError error) { - Log.e(LOG_TAG, "OutOfMemoryError happened during parsing BASE64 data!"); - if (mInterpreter != null) { - mInterpreter.propertyValues(null); - } - } - mTimeHandleBase64 += System.currentTimeMillis() - start; - } else { - if (!(mCurrentEncoding == null || mCurrentEncoding.equalsIgnoreCase("7BIT") - || mCurrentEncoding.equalsIgnoreCase("8BIT") - || mCurrentEncoding.toUpperCase().startsWith("X-"))) { - Log.w(LOG_TAG, "The encoding unsupported by vCard spec: \"" + mCurrentEncoding + "\"."); - } - - final long start = System.currentTimeMillis(); - if (mInterpreter != null) { - ArrayList v = new ArrayList(); - v.add(maybeUnescapeText(propertyValue)); - mInterpreter.propertyValues(v); - } - mTimeHandleMiscPropertyValue += System.currentTimeMillis() - start; - } - } - - /** - *

- * Parses and returns Quoted-Printable. - *

- * - * @param firstString The string following a parameter name and attributes. - * Example: "string" in "ADR:ENCODING=QUOTED-PRINTABLE:string\n\r". - * @return whole Quoted-Printable string, including a given argument and following lines. - * Excludes the last empty line following to Quoted Printable lines. - * @throws IOException - * @throws VCardException - */ - private String getQuotedPrintable(String firstString) throws IOException, VCardException { - // Specifically, there may be some padding between = and CRLF. - // See the following: - // - // qp-line := *(qp-segment transport-padding CRLF) - // qp-part transport-padding - // qp-segment := qp-section *(SPACE / TAB) "=" - // ; Maximum length of 76 characters - // - // e.g. (from RFC 2045) - // Now's the time = - // for all folk to come= - // to the aid of their country. - if (firstString.trim().endsWith("=")) { - // remove "transport-padding" - int pos = firstString.length() - 1; - while(firstString.charAt(pos) != '=') { - } - StringBuilder builder = new StringBuilder(); - builder.append(firstString.substring(0, pos + 1)); - builder.append("\r\n"); - String line; - while (true) { - line = getLine(); - if (line == null) { - throw new VCardException( - "File ended during parsing a Quoted-Printable String"); - } - if (line.trim().endsWith("=")) { - // remove "transport-padding" - pos = line.length() - 1; - while(line.charAt(pos) != '=') { - } - builder.append(line.substring(0, pos + 1)); - builder.append("\r\n"); - } else { - builder.append(line); - break; - } - } - return builder.toString(); - } else { - return firstString; - } - } - - protected String getBase64(String firstString) throws IOException, VCardException { - StringBuilder builder = new StringBuilder(); - builder.append(firstString); - - while (true) { - String line = getLine(); - if (line == null) { - throw new VCardException( - "File ended during parsing BASE64 binary"); - } - if (line.length() == 0) { - break; - } - builder.append(line); - } - - return builder.toString(); - } - - /** - *

- * Mainly for "ADR", "ORG", and "N" - *

- */ - /* - * - * addressparts = 0*6(strnosemi ";") strnosemi - * ; PO Box, Extended Addr, Street, Locality, Region, - * Postal Code, Country Name - * orgparts = *(strnosemi ";") strnosemi - * ; First is Organization Name, - * remainder are Organization Units. - * nameparts = 0*4(strnosemi ";") strnosemi - * ; Family, Given, Middle, Prefix, Suffix. - * ; Example:Public;John;Q.;Reverend Dr.;III, Esq. - * strnosemi = *(*nonsemi ("\;" / "\" CRLF)) *nonsemi - * ; To include a semicolon in this string, it must be escaped - * ; with a "\" character. - * - * We do not care the number of "strnosemi" here. - * - * We are not sure whether we should add "\" CRLF to each value. We exclude them for now. - */ - protected void handleMultiplePropertyValue(String propertyName, String propertyValue) + public boolean parse(InputStream is, String charset, VCardInterpreter interpreter) throws IOException, VCardException { - // vCard 2.1 does not allow QUOTED-PRINTABLE here, but some softwares/devices - // emit such data. - if (mCurrentEncoding.equalsIgnoreCase("QUOTED-PRINTABLE")) { - propertyValue = getQuotedPrintable(propertyValue); - } - - if (mInterpreter != null) { - mInterpreter.propertyValues(VCardUtils.constructListFromValue( - propertyValue, (getVersion() == VCardConfig.FLAG_V30))); - } - } - - /* - * vCard 2.1 specifies AGENT allows one vcard entry. Currently we emit an error - * toward the AGENT property. - * - * // TODO: Support AGENT property. - * - * item = ... - * / [groups "."] "AGENT" - * [params] ":" vcard CRLF - * vcard = "BEGIN" [ws] ":" [ws] "VCARD" [ws] 1*CRLF - * items *CRLF "END" [ws] ":" [ws] "VCARD" - */ - protected void handleAgent(final String propertyValue) throws VCardException { - if (!propertyValue.toUpperCase().contains("BEGIN:VCARD")) { - // Apparently invalid line seen in Windows Mobile 6.5. Ignore them. - return; - } else { - throw new VCardAgentNotSupportedException("AGENT Property is not supported now."); - } - } - - /** - * For vCard 3.0. - */ - protected String maybeUnescapeText(final String text) { - return text; - } - - /** - * Returns unescaped String if the character should be unescaped. Return null otherwise. - * e.g. In vCard 2.1, "\;" should be unescaped into ";" while "\x" should not be. - */ - protected String maybeUnescapeCharacter(final char ch) { - return unescapeCharacter(ch); - } - - public static String unescapeCharacter(final char ch) { - // Original vCard 2.1 specification does not allow transformation - // "\:" -> ":", "\," -> ",", and "\\" -> "\", but previous implementation of - // this class allowed them, so keep it as is. - if (ch == '\\' || ch == ';' || ch == ':' || ch == ',') { - return String.valueOf(ch); - } else { - return null; - } - } - - @Override - public boolean parse(InputStream is, String inputCharset, VCardInterpreter interpreter) - throws IOException, VCardException { - if (is == null) { - throw new NullPointerException("InputStream must not be null."); - } - if (inputCharset == null) { - inputCharset = VCardConfig.DEFAULT_TEMPORARY_CHARSET; - } - - final InputStreamReader tmpReader = new InputStreamReader(is, inputCharset); - if (VCardConfig.showPerformanceLog()) { - mReader = new CustomBufferedReader(tmpReader); - } else { - mReader = new BufferedReader(tmpReader); - } - - mInterpreter = interpreter; - - long start = System.currentTimeMillis(); - if (mInterpreter != null) { - mInterpreter.start(); - } - parseVCardFile(); - if (mInterpreter != null) { - mInterpreter.end(); - } - mTimeTotal += System.currentTimeMillis() - start; - - if (VCardConfig.showPerformanceLog()) { - showPerformanceInfo(); - } - - return true; + return mVCardParserImpl.parse(is, charset, interpreter); } - @Override - public void parse(InputStream is, String charset, VCardInterpreter builder, boolean canceled) + public boolean parse(InputStream is, String charset, + VCardInterpreter interpreter, boolean canceled) throws IOException, VCardException { - mCanceled = canceled; - parse(is, charset, builder); + return mVCardParserImpl.parse(is, charset, interpreter, canceled); } - private void showPerformanceInfo() { - Log.d(LOG_TAG, "Total parsing time: " + mTimeTotal + " ms"); - if (mReader instanceof CustomBufferedReader) { - Log.d(LOG_TAG, "Total readLine time: " + - ((CustomBufferedReader)mReader).getTotalmillisecond() + " ms"); - } - Log.d(LOG_TAG, "Time for handling the beggining of the record: " + - mTimeReadStartRecord + " ms"); - Log.d(LOG_TAG, "Time for handling the end of the record: " + - mTimeReadEndRecord + " ms"); - Log.d(LOG_TAG, "Time for parsing line, and handling group: " + - mTimeParseLineAndHandleGroup + " ms"); - Log.d(LOG_TAG, "Time for parsing ADR, ORG, and N fields:" + mTimeParseAdrOrgN + " ms"); - Log.d(LOG_TAG, "Time for parsing property values: " + mTimeParsePropertyValues + " ms"); - Log.d(LOG_TAG, "Time for handling normal property values: " + - mTimeHandleMiscPropertyValue + " ms"); - Log.d(LOG_TAG, "Time for handling Quoted-Printable: " + - mTimeHandleQuotedPrintable + " ms"); - Log.d(LOG_TAG, "Time for handling Base64: " + mTimeHandleBase64 + " ms"); + public void cancel() { + mVCardParserImpl.cancel(); } } diff --git a/core/java/android/pim/vcard/VCardParser_V30.java b/core/java/android/pim/vcard/VCardParser_V30.java index f5d546b..ec97f19 100644 --- a/core/java/android/pim/vcard/VCardParser_V30.java +++ b/core/java/android/pim/vcard/VCardParser_V30.java @@ -16,343 +16,66 @@ package android.pim.vcard; import android.pim.vcard.exception.VCardException; -import android.util.Log; import java.io.IOException; +import java.io.InputStream; import java.util.Arrays; +import java.util.Collections; import java.util.HashSet; +import java.util.Set; /** * The class used to parse vCard 3.0. * Please refer to vCard Specification 3.0 (http://tools.ietf.org/html/rfc2426). */ -public class VCardParser_V30 extends VCardParser_V21 { - private static final String LOG_TAG = "VCardParser_V30"; - - private static final HashSet sAcceptablePropsWithParam = new HashSet( - Arrays.asList( +public class VCardParser_V30 implements VCardParser { + public static final Set sKnownPropertyNameSet = + Collections.unmodifiableSet(new HashSet(Arrays.asList( "BEGIN", "LOGO", "PHOTO", "LABEL", "FN", "TITLE", "SOUND", "VERSION", "TEL", "EMAIL", "TZ", "GEO", "NOTE", "URL", "BDAY", "ROLE", "REV", "UID", "KEY", "MAILER", // 2.1 "NAME", "PROFILE", "SOURCE", "NICKNAME", "CLASS", - "SORT-STRING", "CATEGORIES", "PRODID")); // 3.0 + "SORT-STRING", "CATEGORIES", "PRODID"))); // 3.0 // Although "7bit" and "BASE64" is not allowed in vCard 3.0, we allow it for safety. - private static final HashSet sAcceptableEncodingV30 = new HashSet( - Arrays.asList("7BIT", "8BIT", "BASE64", "B")); - - // Although RFC 2426 specifies some property must not have parameters, we allow it, - // since there may be some careers which violates the RFC... - private static final HashSet acceptablePropsWithoutParam = new HashSet(); - - private String mPreviousLine; + public static final Set sAcceptableEncoding = + Collections.unmodifiableSet(new HashSet(Arrays.asList( + "7BIT", "8BIT", "BASE64", "B"))); - private boolean mEmittedAgentWarning = false; - - /** - * True when the caller wants the parser to be strict about the input. - * Currently this is only for testing. - */ - private final boolean mStrictParsing; + private final VCardParserImpl_V30 mVCardParserImpl; public VCardParser_V30() { - super(); - mStrictParsing = false; + mVCardParserImpl = new VCardParserImpl_V30(); } - /** - * @param strictParsing when true, this object throws VCardException when the vcard is not - * valid from the view of vCard 3.0 specification (defined in RFC 2426). Note that this class - * is not fully yet for being used with this flag and may not notice invalid line(s). - * - * @hide currently only for testing! - */ - public VCardParser_V30(boolean strictParsing) { - super(); - mStrictParsing = strictParsing; + public VCardParser_V30(VCardSourceDetector detector) { + mVCardParserImpl = new VCardParserImpl_V30(detector); } - public VCardParser_V30(int parseMode) { - super(parseMode); - mStrictParsing = false; + public VCardParser_V30(int parseType) { + mVCardParserImpl = new VCardParserImpl_V30(parseType); } - @Override - protected int getVersion() { - return VCardConfig.FLAG_V30; - } - - @Override - protected String getVersionString() { - return VCardConstants.VERSION_V30; - } - - @Override - protected boolean isValidPropertyName(String propertyName) { - if (!(sAcceptablePropsWithParam.contains(propertyName) || - acceptablePropsWithoutParam.contains(propertyName) || - propertyName.startsWith("X-")) && - !mUnknownTypeSet.contains(propertyName)) { - mUnknownTypeSet.add(propertyName); - Log.w(LOG_TAG, "Property name unsupported by vCard 3.0: " + propertyName); - } - return true; - } - @Override - protected boolean isValidEncoding(String encoding) { - return sAcceptableEncodingV30.contains(encoding.toUpperCase()); - } - - @Override - protected String getLine() throws IOException { - if (mPreviousLine != null) { - String ret = mPreviousLine; - mPreviousLine = null; - return ret; - } else { - return mReader.readLine(); - } - } - - /** - * vCard 3.0 requires that the line with space at the beginning of the line - * must be combined with previous line. - */ - @Override - protected String getNonEmptyLine() throws IOException, VCardException { - String line; - StringBuilder builder = null; - while (true) { - line = mReader.readLine(); - if (line == null) { - if (builder != null) { - return builder.toString(); - } else if (mPreviousLine != null) { - String ret = mPreviousLine; - mPreviousLine = null; - return ret; - } - throw new VCardException("Reached end of buffer."); - } else if (line.length() == 0) { - if (builder != null) { - return builder.toString(); - } else if (mPreviousLine != null) { - String ret = mPreviousLine; - mPreviousLine = null; - return ret; - } - } else if (line.charAt(0) == ' ' || line.charAt(0) == '\t') { - if (builder != null) { - // See Section 5.8.1 of RFC 2425 (MIME-DIR document). - // Following is the excerpts from it. - // - // DESCRIPTION:This is a long description that exists on a long line. - // - // Can be represented as: - // - // DESCRIPTION:This is a long description - // that exists on a long line. - // - // It could also be represented as: - // - // DESCRIPTION:This is a long descrip - // tion that exists o - // n a long line. - builder.append(line.substring(1)); - } else if (mPreviousLine != null) { - builder = new StringBuilder(); - builder.append(mPreviousLine); - mPreviousLine = null; - builder.append(line.substring(1)); - } else { - throw new VCardException("Space exists at the beginning of the line"); - } - } else { - if (mPreviousLine == null) { - mPreviousLine = line; - if (builder != null) { - return builder.toString(); - } - } else { - String ret = mPreviousLine; - mPreviousLine = line; - return ret; - } - } - } - } - + //// Implemented methods - /** - * vcard = [group "."] "BEGIN" ":" "VCARD" 1 * CRLF - * 1 * (contentline) - * ;A vCard object MUST include the VERSION, FN and N types. - * [group "."] "END" ":" "VCARD" 1 * CRLF - */ - @Override - protected boolean readBeginVCard(boolean allowGarbage) throws IOException, VCardException { - // TODO: vCard 3.0 supports group. - return super.readBeginVCard(allowGarbage); - } - - @Override - protected void readEndVCard(boolean useCache, boolean allowGarbage) + public boolean parse(InputStream is, VCardInterpreter interepreter) throws IOException, VCardException { - // TODO: vCard 3.0 supports group. - super.readEndVCard(useCache, allowGarbage); + return mVCardParserImpl.parse(is, VCardConfig.DEFAULT_TEMPORARY_CHARSET, interepreter); } - /** - * vCard 3.0 allows iana-token as paramType, while vCard 2.1 does not. - */ - @Override - protected void handleParams(String params) throws VCardException { - try { - super.handleParams(params); - } catch (VCardException e) { - // maybe IANA type - String[] strArray = params.split("=", 2); - if (strArray.length == 2) { - handleAnyParam(strArray[0], strArray[1]); - } else { - // Must not come here in the current implementation. - throw new VCardException( - "Unknown params value: " + params); - } - } - } - - @Override - protected void handleAnyParam(String paramName, String paramValue) { - super.handleAnyParam(paramName, paramValue); - } - - @Override - protected void handleParamWithoutName(final String paramValue) throws VCardException { - if (mStrictParsing) { - throw new VCardException("Parameter without name is not acceptable in vCard 3.0"); - } else { - super.handleParamWithoutName(paramValue); - } - } - - /** - * vCard 3.0 defines - * - * param = param-name "=" param-value *("," param-value) - * param-name = iana-token / x-name - * param-value = ptext / quoted-string - * quoted-string = DQUOTE QSAFE-CHAR DQUOTE - */ - @Override - protected void handleType(String ptypevalues) { - String[] ptypeArray = ptypevalues.split(","); - mInterpreter.propertyParamType("TYPE"); - for (String value : ptypeArray) { - int length = value.length(); - if (length >= 2 && value.startsWith("\"") && value.endsWith("\"")) { - mInterpreter.propertyParamValue(value.substring(1, value.length() - 1)); - } else { - mInterpreter.propertyParamValue(value); - } - } - } - - @Override - protected void handleAgent(String propertyValue) { - // The way how vCard 3.0 supports "AGENT" is completely different from vCard 2.1. - // - // e.g. - // AGENT:BEGIN:VCARD\nFN:Joe Friday\nTEL:+1-919-555-7878\n - // TITLE:Area Administrator\, Assistant\n EMAIL\;TYPE=INTERN\n - // ET:jfriday@host.com\nEND:VCARD\n - // - // TODO: fix this. - // - // issue: - // vCard 3.0 also allows this as an example. - // - // AGENT;VALUE=uri: - // CID:JQPUBLIC.part3.960129T083020.xyzMail@host3.com - // - // This is not vCard. Should we support this? - // - // Just ignore the line for now, since we cannot know how to handle it... - if (!mEmittedAgentWarning) { - Log.w(LOG_TAG, "AGENT in vCard 3.0 is not supported yet. Ignore it"); - mEmittedAgentWarning = true; - } - } - - /** - * vCard 3.0 does not require two CRLF at the last of BASE64 data. - * It only requires that data should be MIME-encoded. - */ - @Override - protected String getBase64(String firstString) throws IOException, VCardException { - StringBuilder builder = new StringBuilder(); - builder.append(firstString); - - while (true) { - String line = getLine(); - if (line == null) { - throw new VCardException( - "File ended during parsing BASE64 binary"); - } - if (line.length() == 0) { - break; - } else if (!line.startsWith(" ") && !line.startsWith("\t")) { - mPreviousLine = line; - break; - } - builder.append(line); - } - - return builder.toString(); - } - - /** - * ESCAPED-CHAR = "\\" / "\;" / "\," / "\n" / "\N") - * ; \\ encodes \, \n or \N encodes newline - * ; \; encodes ;, \, encodes , - * - * Note: Apple escapes ':' into '\:' while does not escape '\' - */ - @Override - protected String maybeUnescapeText(String text) { - return unescapeText(text); - } - - public static String unescapeText(String text) { - StringBuilder builder = new StringBuilder(); - int length = text.length(); - for (int i = 0; i < length; i++) { - char ch = text.charAt(i); - if (ch == '\\' && i < length - 1) { - char next_ch = text.charAt(++i); - if (next_ch == 'n' || next_ch == 'N') { - builder.append("\n"); - } else { - builder.append(next_ch); - } - } else { - builder.append(ch); - } - } - return builder.toString(); + public boolean parse(InputStream is, String charset, VCardInterpreter interpreter) + throws IOException, VCardException { + return mVCardParserImpl.parse(is, charset, interpreter); } - @Override - protected String maybeUnescapeCharacter(char ch) { - return unescapeCharacter(ch); + public boolean parse(InputStream is, String charset, + VCardInterpreter interpreter, boolean canceled) + throws IOException, VCardException { + return mVCardParserImpl.parse(is, charset, interpreter, canceled); } - public static String unescapeCharacter(char ch) { - if (ch == 'n' || ch == 'N') { - return "\n"; - } else { - return String.valueOf(ch); - } + public void cancel() { + mVCardParserImpl.cancel(); } } diff --git a/core/java/android/pim/vcard/VCardUtils.java b/core/java/android/pim/vcard/VCardUtils.java index 11b112b..26ebed7 100644 --- a/core/java/android/pim/vcard/VCardUtils.java +++ b/core/java/android/pim/vcard/VCardUtils.java @@ -329,8 +329,8 @@ public class VCardUtils { if (ch == '\\' && i < length - 1) { char nextCh = value.charAt(i + 1); final String unescapedString = - (isV30 ? VCardParser_V30.unescapeCharacter(nextCh) : - VCardParser_V21.unescapeCharacter(nextCh)); + (isV30 ? VCardParserImpl_V30.unescapeCharacter(nextCh) : + VCardParserImpl_V21.unescapeCharacter(nextCh)); if (unescapedString != null) { builder.append(unescapedString); i++; -- cgit v1.1