summaryrefslogtreecommitdiffstats
path: root/src/com/android/providers/contacts/NameNormalizer.java
blob: e48727b96801f94bd3c14f88803d8f5b9294fd2e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
/*
 * Copyright (C) 2009 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License
 */
package com.android.providers.contacts;

import com.android.providers.contacts.util.Hex;
import com.google.common.annotations.VisibleForTesting;

import java.text.CollationKey;
import java.text.Collator;
import java.text.RuleBasedCollator;
import java.util.Locale;

/**
 * Converts a name to a normalized form by removing all non-letter characters and normalizing
 * UNICODE according to http://unicode.org/unicode/reports/tr15
 */
public class NameNormalizer {

    private static final Object sCollatorLock = new Object();

    private static Locale sCollatorLocale;

    private static RuleBasedCollator sCachedCompressingCollator;
    private static RuleBasedCollator sCachedComplexityCollator;

    /**
     * Ensure that the cached collators are for the current locale.
     */
    private static void ensureCollators() {
        final Locale locale = Locale.getDefault();
        if (locale.equals(sCollatorLocale)) {
            return;
        }
        sCollatorLocale = locale;

        sCachedCompressingCollator = (RuleBasedCollator) Collator.getInstance(locale);
        sCachedCompressingCollator.setStrength(Collator.PRIMARY);
        sCachedCompressingCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);

        sCachedComplexityCollator = (RuleBasedCollator) Collator.getInstance(locale);
        sCachedComplexityCollator.setStrength(Collator.SECONDARY);
    }

    @VisibleForTesting
    static RuleBasedCollator getCompressingCollator() {
        synchronized (sCollatorLock) {
            ensureCollators();
            return sCachedCompressingCollator;
        }
    }

    @VisibleForTesting
    static RuleBasedCollator getComplexityCollator() {
        synchronized (sCollatorLock) {
            ensureCollators();
            return sCachedComplexityCollator;
        }
    }

    /**
     * Converts the supplied name to a string that can be used to perform approximate matching
     * of names.  It ignores non-letter, non-digit characters, and removes accents.
     */
    public static String normalize(String name) {
        CollationKey key = getCompressingCollator().getCollationKey(lettersAndDigitsOnly(name));
        return Hex.encodeHex(key.toByteArray(), true);
    }

    /**
     * Compares "complexity" of two names, which is determined by the presence
     * of mixed case characters, accents and, if all else is equal, length.
     */
    public static int compareComplexity(String name1, String name2) {
        String clean1 = lettersAndDigitsOnly(name1);
        String clean2 = lettersAndDigitsOnly(name2);
        int diff = getComplexityCollator().compare(clean1, clean2);
        if (diff != 0) {
            return diff;
        }
        // compareTo sorts uppercase first. We know that there are no non-case
        // differences from the above test, so we can negate here to get the
        // lowercase-first comparison we really want...
        diff = -clean1.compareTo(clean2);
        if (diff != 0) {
            return diff;
        }
        return name1.length() - name2.length();
    }

    /**
     * Returns a string containing just the letters and digits from the original string.
     * Returns empty string if the original string is null.
     */
    private static String lettersAndDigitsOnly(String name) {
        if (name == null) {
            return "";
        }
        char[] letters = name.toCharArray();
        int length = 0;
        for (int i = 0; i < letters.length; i++) {
            final char c = letters[i];
            if (Character.isLetterOrDigit(c)) {
                letters[length++] = c;
            }
        }

        if (length != letters.length) {
            return new String(letters, 0, length);
        }

        return name;
    }
}