Merge commit 'goog/eclair-dev' into merge3

Merged the new contacts content provider into goog/master. The old and new content providers now live side by side under separate authorities. Conflicts: Android.mk AndroidManifest.xml res/values/strings.xml
author: Evan Millar <emillar@google.com> 2009-07-08 14:58:53 -0700
committer: Evan Millar <emillar@google.com> 2009-07-08 16:46:00 -0700
commit: 28f8857b1b46bde18b85c6d3c2a63ac44c3c2e1c (patch)
tree: 679f09f13a62945e51cbae9f034f987520905664 /src/com/android/providers/contacts/NameSplitter.java
parent: 47a99760251f02a63b1c5bb8a51c7457ee4c2626 (diff)
parent: ca8172420c0913dff96ea607d477d8b8abfe5ddb (diff)
download: packages_providers_ContactsProvider-28f8857b1b46bde18b85c6d3c2a63ac44c3c2e1c.zip
packages_providers_ContactsProvider-28f8857b1b46bde18b85c6d3c2a63ac44c3c2e1c.tar.gz
packages_providers_ContactsProvider-28f8857b1b46bde18b85c6d3c2a63ac44c3c2e1c.tar.bz2
1 files changed, 297 insertions, 0 deletions
diff --git a/src/com/android/providers/contacts/NameSplitter.java b/src/com/android/providers/contacts/NameSplitter.java
new file mode 100644
index 0000000..aad3bc5
--- /dev/null
+++ b/src/com/android/providers/contacts/NameSplitter.java
@@ -0,0 +1,297 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+package com.android.providers.contacts;
+
+import java.util.HashSet;
+import java.util.StringTokenizer;
+
+/**
+ * The purpose of this class is to split a full name into given names and last
+ * name. The logic only supports having a single last name. If the full name has
+ * multiple last names the output will be incorrect.
+ * <p>
+ * Core algorithm:
+ * <ol>
+ * <li>Remove the suffixes (III, Ph.D., M.D.).</li>
+ * <li>Remove the prefixes (Mr., Pastor, Reverend, Sir).</li>
+ * <li>Assign the last remaining token as the last name.</li>
+ * <li>If the previous word to the last name is one from LASTNAME_PREFIXES, use
+ * this word also as the last name.</li>
+ * <li>Assign the rest of the words as the "given names".</li>
+ * </ol>
+ */
+public class NameSplitter {
+
+    private final HashSet<String> mPrefixesSet;
+    private final HashSet<String> mSuffixesSet;
+    private final int mMaxSuffixLength;
+    private final HashSet<String> mLastNamePrefixesSet;
+    private final HashSet<String> mConjuctions;
+
+    public static class Name {
+        private String prefix;
+        private String givenNames;
+        private String middleName;
+        private String familyName;
+        private String suffix;
+
+        public String getPrefix() {
+            return prefix;
+        }
+
+        public String getGivenNames() {
+            return givenNames;
+        }
+
+        public String getMiddleName() {
+            return middleName;
+        }
+
+        public String getFamilyName() {
+            return familyName;
+        }
+
+        public String getSuffix() {
+            return suffix;
+        }
+    }
+
+    private static class NameTokenizer extends StringTokenizer {
+        private static final int MAX_TOKENS = 10;
+        private final String[] mTokens;
+        private int mDotBitmask;
+        private int mStartPointer;
+        private int mEndPointer;
+
+        public NameTokenizer(String fullName) {
+            super(fullName, " .,", true);
+
+            mTokens = new String[MAX_TOKENS];
+
+            // Iterate over tokens, skipping over empty ones and marking tokens that
+            // are followed by dots.
+            while (hasMoreTokens() && mEndPointer < MAX_TOKENS) {
+                final String token = nextToken();
+                if (token.length() > 0) {
+                    final char c = token.charAt(0);
+                    if (c == ' ' || c == ',') {
+                        continue;
+                    }
+                }
+
+                if (mEndPointer > 0 && token.charAt(0) == '.') {
+                    mDotBitmask |= (1 << (mEndPointer - 1));
+                } else {
+                    mTokens[mEndPointer] = token;
+                    mEndPointer++;
+                }
+            }
+        }
+
+        /**
+         * Returns true if the token is followed by a dot in the original full name.
+         */
+        public boolean hasDot(int index) {
+            return (mDotBitmask & (1 << index)) != 0;
+        }
+    }
+
+    /**
+     * Constructor.
+     *
+     * @param commonPrefixes comma-separated list of common prefixes,
+     *            e.g. "Mr, Ms, Mrs"
+     * @param commonLastNamePrefixes comma-separated list of common last name prefixes,
+     *           e.g. "d', st, st., von"
+     * @param commonSuffixes comma-separated list of common suffixes,
+     *            e.g. "Jr, M.D., MD, D.D.S."
+     * @param commonConjunctions comma-separated list of common conjuctions,
+     *            e.g. "AND, Or"
+     */
+    public NameSplitter(String commonPrefixes, String commonLastNamePrefixes,
+            String commonSuffixes, String commonConjunctions) {
+        mPrefixesSet = convertToSet(commonPrefixes);
+        mLastNamePrefixesSet = convertToSet(commonLastNamePrefixes);
+        mSuffixesSet = convertToSet(commonSuffixes);
+        mConjuctions = convertToSet(commonConjunctions);
+
+        int maxLength = 0;
+        for (String suffix : mSuffixesSet) {
+            if (suffix.length() > maxLength) {
+                maxLength = suffix.length();
+            }
+        }
+
+        mMaxSuffixLength = maxLength;
+    }
+
+    /**
+     * Converts a comma-separated list of Strings to a set of Strings. Trims strings
+     * and converts them to upper case.
+     */
+    private static HashSet<String> convertToSet(String strings) {
+        HashSet<String> set = new HashSet<String>();
+        if (strings != null) {
+            String[] split = strings.split(",");
+            for (int i = 0; i < split.length; i++) {
+                set.add(split[i].trim().toUpperCase());
+            }
+        }
+        return set;
+    }
+
+    /**
+     * Parses a full name and returns parsed components in the Name object.
+     */
+    public void split(Name name, String fullName) {
+        if (fullName == null) {
+            return;
+        }
+
+        NameTokenizer tokens = new NameTokenizer(fullName);
+        parsePrefix(name, tokens);
+        parseSuffix(name, tokens);
+        parseLastName(name, tokens);
+        parseMiddleName(name, tokens);
+        parseGivenNames(name, tokens);
+    }
+
+    /**
+     * Parses the first word from the name if it is a prefix.
+     */
+    private void parsePrefix(Name name, NameTokenizer tokens) {
+        if (tokens.mStartPointer == tokens.mEndPointer) {
+            return;
+        }
+
+        String firstToken = tokens.mTokens[tokens.mStartPointer];
+        if (mPrefixesSet.contains(firstToken.toUpperCase())) {
+            name.prefix = firstToken;
+            tokens.mStartPointer++;
+        }
+    }
+
+    /**
+     * Parses the last word(s) from the name if it is a suffix.
+     */
+    private void parseSuffix(Name name, NameTokenizer tokens) {
+        if (tokens.mStartPointer == tokens.mEndPointer) {
+            return;
+        }
+
+        String lastToken = tokens.mTokens[tokens.mEndPointer - 1];
+        if (lastToken.length() > mMaxSuffixLength) {
+            return;
+        }
+
+        String normalized = lastToken.toUpperCase();
+        if (mSuffixesSet.contains(normalized)) {
+            name.suffix = lastToken;
+            tokens.mEndPointer--;
+            return;
+        }
+
+        if (tokens.hasDot(tokens.mEndPointer - 1)) {
+            lastToken += '.';
+        }
+        normalized += ".";
+
+        // Take care of suffixes like M.D. and D.D.S.
+        int pos = tokens.mEndPointer - 1;
+        while (normalized.length() <= mMaxSuffixLength) {
+
+            if (mSuffixesSet.contains(normalized)) {
+                name.suffix = lastToken;
+                tokens.mEndPointer = pos;
+                return;
+            }
+
+            if (pos == tokens.mStartPointer) {
+                break;
+            }
+
+            pos--;
+            if (tokens.hasDot(pos)) {
+                lastToken = tokens.mTokens[pos] + "." + lastToken;
+            } else {
+                lastToken = tokens.mTokens[pos] + " " + lastToken;
+            }
+
+            normalized = tokens.mTokens[pos].toUpperCase() + "." + normalized;
+        }
+    }
+
+    private void parseLastName(Name name, NameTokenizer tokens) {
+        if (tokens.mStartPointer == tokens.mEndPointer) {
+            return;
+        }
+
+        name.familyName = tokens.mTokens[tokens.mEndPointer - 1];
+        tokens.mEndPointer--;
+
+        // Take care of last names like "D'Onofrio" and "von Cliburn"
+        if ((tokens.mEndPointer - tokens.mStartPointer) > 0) {
+            String lastNamePrefix = tokens.mTokens[tokens.mEndPointer - 1];
+            final String normalized = lastNamePrefix.toUpperCase();
+            if (mLastNamePrefixesSet.contains(normalized)
+                    || mLastNamePrefixesSet.contains(normalized + ".")) {
+                if (tokens.hasDot(tokens.mEndPointer - 1)) {
+                    lastNamePrefix += '.';
+                }
+                name.familyName = lastNamePrefix + " " + name.familyName;
+                tokens.mEndPointer--;
+            }
+        }
+    }
+
+
+    private void parseMiddleName(Name name, NameTokenizer tokens) {
+        if (tokens.mStartPointer == tokens.mEndPointer) {
+            return;
+        }
+
+        if ((tokens.mEndPointer - tokens.mStartPointer) > 1) {
+            if ((tokens.mEndPointer - tokens.mStartPointer) == 2
+                    || !mConjuctions.contains(tokens.mTokens[tokens.mEndPointer - 2].
+                            toUpperCase())) {
+                name.middleName = tokens.mTokens[tokens.mEndPointer - 1];
+                tokens.mEndPointer--;
+            }
+        }
+    }
+
+    private void parseGivenNames(Name name, NameTokenizer tokens) {
+        if (tokens.mStartPointer == tokens.mEndPointer) {
+            return;
+        }
+
+        if ((tokens.mEndPointer - tokens.mStartPointer) == 1) {
+            name.givenNames = tokens.mTokens[tokens.mStartPointer];
+        } else {
+            StringBuilder sb = new StringBuilder();
+            for (int i = tokens.mStartPointer; i < tokens.mEndPointer; i++) {
+                if (i != tokens.mStartPointer) {
+                    sb.append(' ');
+                }
+                sb.append(tokens.mTokens[i]);
+                if (tokens.hasDot(i)) {
+                    sb.append('.');
+                }
+            }
+            name.givenNames = sb.toString();
+        }
+    }
+}
author	Evan Millar <emillar@google.com>	2009-07-08 14:58:53 -0700
committer	Evan Millar <emillar@google.com>	2009-07-08 16:46:00 -0700
commit	28f8857b1b46bde18b85c6d3c2a63ac44c3c2e1c (patch)
tree	679f09f13a62945e51cbae9f034f987520905664 /src/com/android/providers/contacts/NameSplitter.java
parent	47a99760251f02a63b1c5bb8a51c7457ee4c2626 (diff)
parent	ca8172420c0913dff96ea607d477d8b8abfe5ddb (diff)
download	packages_providers_ContactsProvider-28f8857b1b46bde18b85c6d3c2a63ac44c3c2e1c.zip packages_providers_ContactsProvider-28f8857b1b46bde18b85c6d3c2a63ac44c3c2e1c.tar.gz packages_providers_ContactsProvider-28f8857b1b46bde18b85c6d3c2a63ac44c3c2e1c.tar.bz2