/*
* Copyright (C) 2011 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*/
package com.android.providers.contacts;
import android.database.CrossProcessCursor;
import android.database.Cursor;
import android.provider.ContactsContract.Contacts;
import android.provider.ContactsContract.SearchSnippetColumns;
import android.text.TextUtils;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Cursor wrapper for use when the results include snippets. This wrapper does special processing
* when the snippet field is retrieved, converting the data in that column from the raw form that is
* retrieved from the database into a snippet before returning (all other columns are simply passed
* through).
*
* Note that this wrapper implements {@link CrossProcessCursor}, but will only behave as such if the
* cursor it is wrapping is itself a {@link CrossProcessCursor} or another wrapper around the same.
*/
public class SnippetizingCursorWrapper extends CrossProcessCursorWrapper {
// Pattern for splitting a line into tokens. This matches e-mail addresses as a single token,
// otherwise splitting on any group of non-alphanumeric characters.
Pattern SPLIT_PATTERN = Pattern.compile("([\\w-\\.]+)@((?:[\\w]+\\.)+)([a-zA-Z]{2,4})|[\\w]+");
// Index of the snippet field (if any).
private final int mSnippetIndex;
// Parameters for snippetization.
private final String mQuery;
private final String mStartMatch;
private final String mEndMatch;
private final String mEllipsis;
private final int mMaxTokens;
// Whether to invoke the snippeting logic. If the query consisted of multiple tokens, the DB
// snippet() function was already used, so we should just return the snippet value directly.
private final boolean mDoSnippetizing;
/**
* Creates a cursor wrapper that does special handling on the snippet field (converting the
* raw content retrieved from the database into a snippet).
* @param cursor The cursor to wrap.
* @param query Query string.
* @param startMatch String to insert at the start of matches in the snippet.
* @param endMatch String to insert at the end of matches in the snippet.
* @param ellipsis Ellipsis characters to use at the start or end of the snippet if appropriate.
* @param maxTokens Maximum number of tokens to include in the snippet.
*/
SnippetizingCursorWrapper(Cursor cursor, String query, String startMatch, String endMatch,
String ellipsis, int maxTokens) {
super(cursor);
mSnippetIndex = getColumnIndex(SearchSnippetColumns.SNIPPET);
mQuery = query;
mStartMatch = startMatch;
mEndMatch = endMatch;
mEllipsis = ellipsis;
mMaxTokens = maxTokens;
mDoSnippetizing = mQuery.split(ContactsProvider2.QUERY_TOKENIZER_REGEX).length == 1;
}
@Override
public String getString(int columnIndex) {
String columnContent = super.getString(columnIndex);
// If the snippet column is being retrieved, do our custom snippetization logic.
if (mDoSnippetizing && columnIndex == mSnippetIndex) {
// Retrieve the display name - if it includes the query term, the snippet should be
// left empty.
int displayNameIndex = super.getColumnIndex(Contacts.DISPLAY_NAME);
String displayName = displayNameIndex < 0 ? null : super.getString(displayNameIndex);
return snippetize(columnContent, displayName);
} else {
return columnContent;
}
}
/**
* Creates a snippet for the given content, with the following algorithm:
*
* - Check for the query term as a prefix of any token in the display name; if any match is
* found, no snippet should be computed (return null).
* - Check for empty content (return null if found).
* - Check for a custom snippet (phone number or email matches generate a usable snippet
* via a subquery)
* - Check for a non-match of the query to the content - technically should never happen,
* but if it comes through, we'll just return null.
* - Break the content into multiple lines. For each line that includes the query string:
*
* - Tokenize it into alphanumeric chunks.
* - Do a prefix match for the query against each token.
* - If there's a match, replace the token with a version with the query term surrounded
* by the start and end match strings.
* - If this is the first token in the line that has a match, compute the start and end
* token positions to use for this line (which will be used to create the snippet).
* - If the line just processed had a match, reassemble the tokens (with ellipses, as
* needed) to produce the snippet, and return it.
*
*
*
* @param content The content to snippetize.
* @param displayName Display name for the contact.
* @return The snippet for the content, or null if no snippet should be shown.
*/
// TODO: Tokenization is done based on alphanumeric characters, which may not be appropriate for
// some locales (but this matches what the item view does for display).
private String snippetize(String content, String displayName) {
// If the display name already contains the query term, return empty - snippets should
// not be needed in that case.
String lowerDisplayName = displayName != null ? displayName.toLowerCase() : "";
String lowerQuery = mQuery.toLowerCase();
List nameTokens = new ArrayList();
List nameTokenOffsets = new ArrayList();
split(lowerDisplayName.trim(), nameTokens, nameTokenOffsets);
for (String nameToken : nameTokens) {
if (nameToken.startsWith(lowerQuery)) {
return null;
}
}
// If the content to snippetize is empty, return null.
if (TextUtils.isEmpty(content)) {
return null;
}
// Check to see if a custom snippet was already returned (identified by the string having no
// newlines and beginning and ending with the split delimiters).
String[] contentLines = content.split("\n");
if (contentLines.length == 1 && !TextUtils.isEmpty(contentLines[0])
&& contentLines[0].startsWith(mStartMatch) && contentLines[0].endsWith(mEndMatch)) {
// Custom snippet was retrieved - just return it.
return content;
}
// If the content isn't a custom snippet and doesn't contain the query term, return null.
if (!content.toLowerCase().contains(lowerQuery)) {
return null;
}
// Locate the lines of the content that contain the query term.
for (String contentLine : contentLines) {
if (contentLine.toLowerCase().contains(lowerQuery)) {
// Line contains the query string - now search for it at the start of tokens.
List lineTokens = new ArrayList();
List tokenOffsets = new ArrayList();
split(contentLine.trim(), lineTokens, tokenOffsets);
// As we find matches against the query, we'll populate this list with the marked
// (or unchanged) tokens.
List markedTokens = new ArrayList();
int firstToken = -1;
int lastToken = -1;
for (int i = 0; i < lineTokens.size(); i++) {
String token = lineTokens.get(i);
String lowerToken = token.toLowerCase();
if (lowerToken.startsWith(lowerQuery)) {
// Query term matched; surround the token with match markers.
markedTokens.add(mStartMatch + token + mEndMatch);
// If this is the first token found with a match, mark the token
// positions to use for assembling the snippet.
if (firstToken == -1) {
firstToken =
Math.max(0, i - (int) Math.floor(Math.abs(mMaxTokens) / 2.0));
lastToken =
Math.min(lineTokens.size(), firstToken + Math.abs(mMaxTokens));
}
} else {
markedTokens.add(token);
}
}
// Assemble the snippet by piecing the tokens back together.
if (firstToken > -1) {
StringBuilder sb = new StringBuilder();
if (firstToken > 0) {
sb.append(mEllipsis);
}
for (int i = firstToken; i < lastToken; i++) {
String markedToken = markedTokens.get(i);
String originalToken = lineTokens.get(i);
sb.append(markedToken);
if (i < lastToken - 1) {
// Add the characters that appeared between this token and the next.
sb.append(contentLine.substring(
tokenOffsets.get(i) + originalToken.length(),
tokenOffsets.get(i + 1)));
}
}
if (lastToken < lineTokens.size()) {
sb.append(mEllipsis);
}
return sb.toString();
}
}
}
return null;
}
/**
* Helper method for splitting a string into tokens. The lists passed in are populated with the
* tokens and offsets into the content of each token. The tokenization function parses e-mail
* addresses as a single token; otherwise it splits on any non-alphanumeric character.
* @param content Content to split.
* @param tokens List of token strings to populate.
* @param offsets List of offsets into the content for each token returned.
*/
private void split(String content, List tokens, List offsets) {
Matcher matcher = SPLIT_PATTERN.matcher(content);
while (matcher.find()) {
tokens.add(matcher.group());
offsets.add(matcher.start());
}
}
}