/* * Copyright (C) 2011 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License */ package com.android.providers.contacts; import android.database.CrossProcessCursor; import android.database.Cursor; import android.provider.ContactsContract.Contacts; import android.provider.ContactsContract.SearchSnippetColumns; import android.text.TextUtils; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Cursor wrapper for use when the results include snippets. This wrapper does special processing * when the snippet field is retrieved, converting the data in that column from the raw form that is * retrieved from the database into a snippet before returning (all other columns are simply passed * through). * * Note that this wrapper implements {@link CrossProcessCursor}, but will only behave as such if the * cursor it is wrapping is itself a {@link CrossProcessCursor} or another wrapper around the same. */ public class SnippetizingCursorWrapper extends CrossProcessCursorWrapper { // Pattern for splitting a line into tokens. This matches e-mail addresses as a single token, // otherwise splitting on any group of non-alphanumeric characters. Pattern SPLIT_PATTERN = Pattern.compile("([\\w-\\.]+)@((?:[\\w]+\\.)+)([a-zA-Z]{2,4})|[\\w]+"); // Index of the snippet field (if any). private final int mSnippetIndex; // Parameters for snippetization. private final String mQuery; private final String mStartMatch; private final String mEndMatch; private final String mEllipsis; private final int mMaxTokens; // Whether to invoke the snippeting logic. If the query consisted of multiple tokens, the DB // snippet() function was already used, so we should just return the snippet value directly. private final boolean mDoSnippetizing; /** * Creates a cursor wrapper that does special handling on the snippet field (converting the * raw content retrieved from the database into a snippet). * @param cursor The cursor to wrap. * @param query Query string. * @param startMatch String to insert at the start of matches in the snippet. * @param endMatch String to insert at the end of matches in the snippet. * @param ellipsis Ellipsis characters to use at the start or end of the snippet if appropriate. * @param maxTokens Maximum number of tokens to include in the snippet. */ SnippetizingCursorWrapper(Cursor cursor, String query, String startMatch, String endMatch, String ellipsis, int maxTokens) { super(cursor); mSnippetIndex = getColumnIndex(SearchSnippetColumns.SNIPPET); mQuery = query; mStartMatch = startMatch; mEndMatch = endMatch; mEllipsis = ellipsis; mMaxTokens = maxTokens; mDoSnippetizing = mQuery.split(ContactsProvider2.QUERY_TOKENIZER_REGEX).length == 1; } @Override public String getString(int columnIndex) { String columnContent = super.getString(columnIndex); // If the snippet column is being retrieved, do our custom snippetization logic. if (mDoSnippetizing && columnIndex == mSnippetIndex) { // Retrieve the display name - if it includes the query term, the snippet should be // left empty. int displayNameIndex = super.getColumnIndex(Contacts.DISPLAY_NAME); String displayName = displayNameIndex < 0 ? null : super.getString(displayNameIndex); return snippetize(columnContent, displayName); } else { return columnContent; } } /** * Creates a snippet for the given content, with the following algorithm: * * @param content The content to snippetize. * @param displayName Display name for the contact. * @return The snippet for the content, or null if no snippet should be shown. */ // TODO: Tokenization is done based on alphanumeric characters, which may not be appropriate for // some locales (but this matches what the item view does for display). private String snippetize(String content, String displayName) { // If the display name already contains the query term, return empty - snippets should // not be needed in that case. String lowerDisplayName = displayName != null ? displayName.toLowerCase() : ""; String lowerQuery = mQuery.toLowerCase(); List nameTokens = new ArrayList(); List nameTokenOffsets = new ArrayList(); split(lowerDisplayName.trim(), nameTokens, nameTokenOffsets); for (String nameToken : nameTokens) { if (nameToken.startsWith(lowerQuery)) { return null; } } // If the content to snippetize is empty, return null. if (TextUtils.isEmpty(content)) { return null; } // Check to see if a custom snippet was already returned (identified by the string having no // newlines and beginning and ending with the split delimiters). String[] contentLines = content.split("\n"); if (contentLines.length == 1 && !TextUtils.isEmpty(contentLines[0]) && contentLines[0].startsWith(mStartMatch) && contentLines[0].endsWith(mEndMatch)) { // Custom snippet was retrieved - just return it. return content; } // If the content isn't a custom snippet and doesn't contain the query term, return null. if (!content.toLowerCase().contains(lowerQuery)) { return null; } // Locate the lines of the content that contain the query term. for (String contentLine : contentLines) { if (contentLine.toLowerCase().contains(lowerQuery)) { // Line contains the query string - now search for it at the start of tokens. List lineTokens = new ArrayList(); List tokenOffsets = new ArrayList(); split(contentLine.trim(), lineTokens, tokenOffsets); // As we find matches against the query, we'll populate this list with the marked // (or unchanged) tokens. List markedTokens = new ArrayList(); int firstToken = -1; int lastToken = -1; for (int i = 0; i < lineTokens.size(); i++) { String token = lineTokens.get(i); String lowerToken = token.toLowerCase(); if (lowerToken.startsWith(lowerQuery)) { // Query term matched; surround the token with match markers. markedTokens.add(mStartMatch + token + mEndMatch); // If this is the first token found with a match, mark the token // positions to use for assembling the snippet. if (firstToken == -1) { firstToken = Math.max(0, i - (int) Math.floor(Math.abs(mMaxTokens) / 2.0)); lastToken = Math.min(lineTokens.size(), firstToken + Math.abs(mMaxTokens)); } } else { markedTokens.add(token); } } // Assemble the snippet by piecing the tokens back together. if (firstToken > -1) { StringBuilder sb = new StringBuilder(); if (firstToken > 0) { sb.append(mEllipsis); } for (int i = firstToken; i < lastToken; i++) { String markedToken = markedTokens.get(i); String originalToken = lineTokens.get(i); sb.append(markedToken); if (i < lastToken - 1) { // Add the characters that appeared between this token and the next. sb.append(contentLine.substring( tokenOffsets.get(i) + originalToken.length(), tokenOffsets.get(i + 1))); } } if (lastToken < lineTokens.size()) { sb.append(mEllipsis); } return sb.toString(); } } } return null; } /** * Helper method for splitting a string into tokens. The lists passed in are populated with the * tokens and offsets into the content of each token. The tokenization function parses e-mail * addresses as a single token; otherwise it splits on any non-alphanumeric character. * @param content Content to split. * @param tokens List of token strings to populate. * @param offsets List of offsets into the content for each token returned. */ private void split(String content, List tokens, List offsets) { Matcher matcher = SPLIT_PATTERN.matcher(content); while (matcher.find()) { tokens.add(matcher.group()); offsets.add(matcher.start()); } } }