From d6c0a1330d3e51706d70351a34631f1f4e8fb02f Mon Sep 17 00:00:00 2001 From: Zheng Fu Date: Thu, 26 Mar 2015 13:40:37 -0700 Subject: New contact aggregator (part 1) Summary of the changes are: 1. In RawContactMatcher, add rawContactId and accountId to MatchScore inner class, so that match score is on raw contacts level instead] of accumulate to contact level 2. Add a wrapper RawContactMatchingCandidates to MatchScore to facilitate the aggregation. 3. In ContactAggregator2, add findRawContactMatchingCandidates() method to find the set of matching raw contacts for a given raw_contact_id. This method will use the logic of updateMatchScoresForSuggestionsBasedOnDataMatches() so that it will give a set of raw contacts candidate with matching score above threshold. 4. The second stage "Pair-wise comparison and find the connected component of all the raw contacts in RawContactMatchingCandidates" is done in new method reAggregateRawContacts(); 5. Some new methods still throw unSupportedOperationExceptions, and they will be implemented in part2. 6. Tests will be implemented in follow-up cls. Bug:19908937 Change-Id: I53483a29c24401e2f38a727168e7431cef86370a --- .../aggregation/util/RawContactMatcher.java | 147 +++++++-------------- .../util/RawContactMatchingCandidates.java | 113 ++++++++++++++++ 2 files changed, 161 insertions(+), 99 deletions(-) create mode 100644 src/com/android/providers/contacts/aggregation/util/RawContactMatchingCandidates.java (limited to 'src/com/android/providers/contacts/aggregation/util') diff --git a/src/com/android/providers/contacts/aggregation/util/RawContactMatcher.java b/src/com/android/providers/contacts/aggregation/util/RawContactMatcher.java index 5540a24..3b0150c 100644 --- a/src/com/android/providers/contacts/aggregation/util/RawContactMatcher.java +++ b/src/com/android/providers/contacts/aggregation/util/RawContactMatcher.java @@ -135,7 +135,8 @@ public class RawContactMatcher { * Populates the cells of the score matrix and score span matrix * corresponding to the {@code candidateNameType} and {@code nameType}. */ - private static void setScoreRange(int candidateNameType, int nameType, int scoreFrom, int scoreTo) { + private static void setScoreRange(int candidateNameType, int nameType, int scoreFrom, + int scoreTo) { int index = nameType * NameLookupType.TYPE_COUNT + candidateNameType; sMinScore[index] = scoreFrom; sMaxScore[index] = scoreTo; @@ -160,23 +161,29 @@ public class RawContactMatcher { } /** - * Captures the max score and match count for a specific contact. Used in an - * contactId - MatchScore map. + * Captures the max score and match count for a specific raw contact. Used in an + * rawContactId - MatchScore map. */ public static class MatchScore implements Comparable { + private long mRawContactId; private long mContactId; + private long mAccountId; private boolean mKeepIn; private boolean mKeepOut; private int mPrimaryScore; private int mSecondaryScore; private int mMatchCount; - public MatchScore(long contactId) { + public MatchScore(long rawContactId, long contactId, long accountId) { + this.mRawContactId = rawContactId; this.mContactId = contactId; + this.mAccountId = accountId; } - public void reset(long contactId) { + public void reset(long rawContactId, long contactId, long accountId) { + this.mRawContactId = rawContactId; this.mContactId = contactId; + this.mAccountId = accountId; mKeepIn = false; mKeepOut = false; mPrimaryScore = 0; @@ -184,10 +191,18 @@ public class RawContactMatcher { mMatchCount = 0; } + public long getRawContactId() { + return mRawContactId; + } + public long getContactId() { return mContactId; } + public long getAccountId() { + return mAccountId; + } + public void updatePrimaryScore(int score) { if (score > mPrimaryScore) { mPrimaryScore = score; @@ -236,8 +251,8 @@ public class RawContactMatcher { @Override public String toString() { - return mContactId + ": " + mPrimaryScore + "/" + mSecondaryScore + "(" + mMatchCount - + ")"; + return mRawContactId + "/" + mContactId + "/" + mAccountId + ": " + mPrimaryScore + + "/" + mSecondaryScore + "(" + mMatchCount + ")"; } } @@ -248,18 +263,18 @@ public class RawContactMatcher { private final NameDistance mNameDistanceConservative = new NameDistance(); private final NameDistance mNameDistanceApproximate = new NameDistance(MAX_MATCHED_NAME_LENGTH); - private MatchScore getMatchingScore(long contactId) { - MatchScore matchingScore = mScores.get(contactId); + private MatchScore getMatchingScore(long rawContactId, long contactId, long accountId) { + MatchScore matchingScore = mScores.get(rawContactId); if (matchingScore == null) { if (mScoreList.size() > mScoreCount) { matchingScore = mScoreList.get(mScoreCount); - matchingScore.reset(contactId); + matchingScore.reset(rawContactId, contactId, accountId); } else { - matchingScore = new MatchScore(contactId); + matchingScore = new MatchScore(rawContactId, contactId, accountId); mScoreList.add(matchingScore); } mScoreCount++; - mScores.put(contactId, matchingScore); + mScores.put(rawContactId, matchingScore); } return matchingScore; } @@ -267,8 +282,8 @@ public class RawContactMatcher { /** * Marks the contact as a full match, because we found an Identity match */ - public void matchIdentity(long contactId) { - updatePrimaryScore(contactId, MAX_SCORE); + public void matchIdentity(long rawContactId, long contactId, long accountId) { + updatePrimaryScore(rawContactId, contactId, accountId, MAX_SCORE); } /** @@ -278,15 +293,15 @@ public class RawContactMatcher { * of name we found and, if the match is approximate, the distance between the candidate and * actual name. */ - public void matchName(long contactId, int candidateNameType, String candidateName, - int nameType, String name, int algorithm) { + public void matchName(long rawContactId, long contactId, long accountId, int + candidateNameType, String candidateName, int nameType, String name, int algorithm) { int maxScore = getMaxScore(candidateNameType, nameType); if (maxScore == 0) { return; } if (candidateName.equals(name)) { - updatePrimaryScore(contactId, maxScore); + updatePrimaryScore(rawContactId, contactId, accountId, maxScore); return; } @@ -326,35 +341,36 @@ public class RawContactMatcher { score = 0; } - updatePrimaryScore(contactId, score); + updatePrimaryScore(rawContactId, contactId, accountId, score); } - public void updateScoreWithPhoneNumberMatch(long contactId) { - updateSecondaryScore(contactId, PHONE_MATCH_SCORE); + public void updateScoreWithPhoneNumberMatch(long rawContactId, long contactId, long accountId) { + updateSecondaryScore(rawContactId, contactId, accountId, PHONE_MATCH_SCORE); } - public void updateScoreWithEmailMatch(long contactId) { - updateSecondaryScore(contactId, EMAIL_MATCH_SCORE); + public void updateScoreWithEmailMatch(long rawContactId, long contactId, long accountId) { + updateSecondaryScore(rawContactId, contactId, accountId, EMAIL_MATCH_SCORE); } - public void updateScoreWithNicknameMatch(long contactId) { - updateSecondaryScore(contactId, NICKNAME_MATCH_SCORE); + public void updateScoreWithNicknameMatch(long rawContactId, long contactId, long accountId) { + updateSecondaryScore(rawContactId, contactId, accountId, NICKNAME_MATCH_SCORE); } - private void updatePrimaryScore(long contactId, int score) { - getMatchingScore(contactId).updatePrimaryScore(score); + private void updatePrimaryScore(long rawContactId, long contactId, long accountId, int score) { + getMatchingScore(rawContactId, contactId, accountId).updatePrimaryScore(score); } - private void updateSecondaryScore(long contactId, int score) { - getMatchingScore(contactId).updateSecondaryScore(score); + private void updateSecondaryScore(long rawContactId, long contactId, long accountId, + int score) { + getMatchingScore(rawContactId, contactId, accountId).updateSecondaryScore(score); } - public void keepIn(long contactId) { - getMatchingScore(contactId).keepIn(); + public void keepIn(long rawContactId, long contactId, long accountId) { + getMatchingScore(rawContactId, contactId, accountId).keepIn(); } - public void keepOut(long contactId) { - getMatchingScore(contactId).keepOut(); + public void keepOut(long rawContactId, long contactId, long accountId) { + getMatchingScore(rawContactId, contactId, accountId).keepOut(); } public void clear() { @@ -363,73 +379,6 @@ public class RawContactMatcher { } /** - * Returns a list of IDs for contacts that are matched on secondary data elements - * (phone number, email address, nickname). We still need to obtain the approximate - * primary score for those contacts to determine if any of them should be aggregated. - *

- * May return null. - */ - public List prepareSecondaryMatchCandidates(int threshold) { - ArrayList contactIds = null; - - for (int i = 0; i < mScoreCount; i++) { - MatchScore score = mScoreList.get(i); - if (score.mKeepOut) { - continue; - } - - int s = score.mSecondaryScore; - if (s >= threshold) { - if (contactIds == null) { - contactIds = new ArrayList(); - } - contactIds.add(score.mContactId); - } - score.mPrimaryScore = NO_DATA_SCORE; - } - return contactIds; - } - - /** - * Returns the contactId with the best match score over the specified threshold or -1 - * if no such contact is found. If multiple contacts are found, and - * {@code allowMultipleMatches} is {@code true}, it returns the first one found, but if - * {@code allowMultipleMatches} is {@code false} it'll return {@link #MULTIPLE_MATCHES}. - */ - public long pickBestMatch(int threshold, boolean allowMultipleMatches) { - long contactId = -1; - int maxScore = 0; - for (int i = 0; i < mScoreCount; i++) { - MatchScore score = mScoreList.get(i); - if (score.mKeepOut) { - continue; - } - - if (score.mKeepIn) { - return score.mContactId; - } - - int s = score.mPrimaryScore; - if (s == NO_DATA_SCORE) { - s = score.mSecondaryScore; - } - - if (s >= threshold) { - if (contactId != -1 && !allowMultipleMatches) { - return MULTIPLE_MATCHES; - } - // In order to make it stable, let's jut pick the one with the lowest ID - // if multiple candidates are found. - if ((s > maxScore) || ((s == maxScore) && (contactId > score.mContactId))) { - contactId = score.mContactId; - maxScore = s; - } - } - } - return contactId; - } - - /** * Returns matches in the order of descending score. */ public List pickBestMatches(int threshold) { diff --git a/src/com/android/providers/contacts/aggregation/util/RawContactMatchingCandidates.java b/src/com/android/providers/contacts/aggregation/util/RawContactMatchingCandidates.java new file mode 100644 index 0000000..39125b4 --- /dev/null +++ b/src/com/android/providers/contacts/aggregation/util/RawContactMatchingCandidates.java @@ -0,0 +1,113 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License + */ + +package com.android.providers.contacts.aggregation.util; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static com.android.internal.util.Preconditions.checkNotNull; + +/** + * Matching candidates for a raw contact, used in the contact aggregator. + */ +public class RawContactMatchingCandidates { + private List mBestMatches; + private Set mRawContactIds = null; + private Map mRawContactToContact = null; + private Map mRawContactToAccount = null; + + public RawContactMatchingCandidates(List mBestMatches) { + checkNotNull(mBestMatches); + this.mBestMatches = mBestMatches; + } + + public RawContactMatchingCandidates() { + mBestMatches = new ArrayList(); + } + + public int getCount() { + return mBestMatches.size(); + } + + public void add(RawContactMatcher.MatchScore score) { + mBestMatches.add(score); + if (mRawContactIds != null) { + mRawContactIds.add(score.getRawContactId()); + } + if (mRawContactToAccount != null) { + mRawContactToAccount.put(score.getRawContactId(), score.getAccountId()); + } + if (mRawContactToContact != null) { + mRawContactToContact.put(score.getRawContactId(), score.getContactId()); + } + } + + public Set getRawContactIdSet() { + if (mRawContactIds == null) { + createRawContactIdSet(); + } + return mRawContactIds; + } + + public Map getRawContactToAccount() { + if (mRawContactToAccount == null) { + createRawContactToAccountMap(); + } + return mRawContactToAccount; + } + + public Long getContactId(Long rawContactId) { + if (mRawContactToContact == null) { + createRawContactToContactMap(); + } + return mRawContactToContact.get(rawContactId); + } + + public Long getAccountId(Long rawContactId) { + if (mRawContactToAccount == null) { + createRawContactToAccountMap(); + } + return mRawContactToAccount.get(rawContactId); + } + + private void createRawContactToContactMap() { + mRawContactToContact = new HashMap(); + for (int i = 0; i < mBestMatches.size(); i++) { + mRawContactToContact.put(mBestMatches.get(i).getRawContactId(), + mBestMatches.get(i).getContactId()); + } + } + + private void createRawContactToAccountMap() { + mRawContactToAccount = new HashMap(); + for (int i = 0; i < mBestMatches.size(); i++) { + mRawContactToAccount.put(mBestMatches.get(i).getRawContactId(), + mBestMatches.get(i).getAccountId()); + } + } + + private void createRawContactIdSet() { + mRawContactIds = new HashSet(); + for (int i = 0; i < mBestMatches.size(); i++) { + mRawContactIds.add(mBestMatches.get(i).getRawContactId()); + } + } +} -- cgit v1.1