diff options
author | Makoto Onuki <omakoto@google.com> | 2012-04-27 13:59:45 -0700 |
---|---|---|
committer | Makoto Onuki <omakoto@google.com> | 2012-04-30 15:03:08 -0700 |
commit | 116d86ddd67330428f9128613b4886fc0ea66221 (patch) | |
tree | 35d01b44ca3c25ab31b61094c43c2fa9af285fdf /src/com/android/providers/contacts/SearchIndexManager.java | |
parent | f76a0fe0629fb626d96081dc0f272433e4920ba3 (diff) | |
download | packages_providers_ContactsProvider-116d86ddd67330428f9128613b4886fc0ea66221.zip packages_providers_ContactsProvider-116d86ddd67330428f9128613b4886fc0ea66221.tar.gz packages_providers_ContactsProvider-116d86ddd67330428f9128613b4886fc0ea66221.tar.bz2 |
Make names with special chars searchable.
It's a better fix than I34bfa864, which was only a quick workaround for double
barrelled names.
Now names with other special characters are searchable too.
Also, previously, a query "doublebarrelled" wouldn't match "double-barrelled",
but now it will.
Bug 5592553
Change-Id: Id1d44261f577df7abf701311ed1c86fb093547da
Diffstat (limited to 'src/com/android/providers/contacts/SearchIndexManager.java')
-rw-r--r-- | src/com/android/providers/contacts/SearchIndexManager.java | 93 |
1 files changed, 56 insertions, 37 deletions
diff --git a/src/com/android/providers/contacts/SearchIndexManager.java b/src/com/android/providers/contacts/SearchIndexManager.java index 5ca9859..bd4e1cc 100644 --- a/src/com/android/providers/contacts/SearchIndexManager.java +++ b/src/com/android/providers/contacts/SearchIndexManager.java @@ -20,6 +20,8 @@ import com.android.providers.contacts.ContactsDatabaseHelper.MimetypesColumns; import com.android.providers.contacts.ContactsDatabaseHelper.RawContactsColumns; import com.android.providers.contacts.ContactsDatabaseHelper.SearchIndexColumns; import com.android.providers.contacts.ContactsDatabaseHelper.Tables; +import com.google.android.collect.Lists; +import com.google.common.annotations.VisibleForTesting; import android.content.ContentValues; import android.database.Cursor; @@ -35,7 +37,9 @@ import android.provider.ContactsContract.RawContacts; import android.text.TextUtils; import android.util.Log; +import java.util.ArrayList; import java.util.HashSet; +import java.util.List; import java.util.Set; import java.util.regex.Pattern; @@ -138,7 +142,7 @@ public class SearchIndexManager { appendContent(value, SEPARATOR_SPACE); } - public void appendContent(String value, int format) { + private void appendContent(String value, int format) { if (TextUtils.isEmpty(value)) { return; } @@ -182,18 +186,33 @@ public class SearchIndexManager { mSbTokens.append(token); } - private static final Pattern PATTERN_HYPHEN = Pattern.compile("\\-"); - public void appendName(String name) { if (TextUtils.isEmpty(name)) { return; } - if (name.indexOf('-') < 0) { - // Common case -- no hyphens in it. - appendNameInternal(name); - } else { - // In order to make hyphenated names searchable, let's split names with '-'. - for (String namePart : PATTERN_HYPHEN.split(name)) { + // First, put the original name. + appendNameInternal(name); + + // Then, if the name contains more than one FTS token, put each token into the index + // too. + // + // This is to make names with special characters searchable, such as "double-barrelled" + // "L'Image". + // + // Here's how it works: + // Because we "normalize" names when putting into the index, if we only put + // "double-barrelled", the index will only contain "doublebarrelled". + // Now, if the user searches for "double-barrelled", the searcher tokenizes it into + // two tokens, "double" and "barrelled". The first one matches "doublebarrelled" + // but the second one doesn't (because we only do the prefix match), so + // "doublebarrelled" doesn't match. + // So, here, we put each token in a name into the index too. In the case above, + // we put also "double" and "barrelled". + // With this, queries such as "double-barrelled", "double barrelled", "doublebarrelled" + // will all match "double-barrelled". + final List<String> nameParts = splitIntoFtsTokens(name); + if (nameParts.size() > 1) { + for (String namePart : nameParts) { if (!TextUtils.isEmpty(namePart)) { appendNameInternal(namePart); } @@ -201,6 +220,9 @@ public class SearchIndexManager { } } + /** + * Normalize a name and add to {@link #mSbName} + */ private void appendNameInternal(String name) { if (mSbName.length() != 0) { mSbName.append(' '); @@ -373,6 +395,29 @@ public class SearchIndexManager { } /** + * Token separator that matches SQLite's "simple" tokenizer. + * - Unicode codepoints >= 128: Everything + * - Unicode codepoints < 128: Alphanumeric and "_" + * - Everything else is a separator of tokens + */ + private static final Pattern FTS_TOKEN_SEPARATOR_RE = + Pattern.compile("[^\u0080-\uffff\\p{Alnum}_]"); + + /** + * Tokenize a string in the way as that of SQLite's "simple" tokenizer. + */ + @VisibleForTesting + static List<String> splitIntoFtsTokens(String s) { + final ArrayList<String> ret = Lists.newArrayList(); + for (String token : FTS_TOKEN_SEPARATOR_RE.split(s)) { + if (!TextUtils.isEmpty(token)) { + ret.add(token); + } + } + return ret; + } + + /** * Tokenizes the query and normalizes/hex encodes each token. The tokenizer uses the same * rules as SQLite's "simple" tokenizer. Each token is added to the retokenizer and then * returned as a String. @@ -380,35 +425,9 @@ public class SearchIndexManager { * @see FtsQueryBuilder#SCOPED_NAME_NORMALIZING */ public static String getFtsMatchQuery(String query, FtsQueryBuilder ftsQueryBuilder) { - // SQLite's "simple" tokenizer uses the following rules to detect characters: - // - Unicode codepoints >= 128: Everything - // - Unicode codepoints < 128: Alphanumeric and "_" - // Everything else is a separator of tokens - int tokenStart = -1; final StringBuilder result = new StringBuilder(); - for (int i = 0; i <= query.length(); i++) { - final boolean isChar; - if (i == query.length()) { - isChar = false; - } else { - final char ch = query.charAt(i); - if (ch >= 128) { - isChar = true; - } else { - isChar = Character.isLetterOrDigit(ch) || ch == '_'; - } - } - if (isChar) { - if (tokenStart == -1) { - tokenStart = i; - } - } else { - if (tokenStart != -1) { - final String token = query.substring(tokenStart, i); - ftsQueryBuilder.addToken(result, token); - tokenStart = -1; - } - } + for (String token : splitIntoFtsTokens(query)) { + ftsQueryBuilder.addToken(result, token); } return result.toString(); } |