diff options
author | John Reck <jreck@google.com> | 2012-02-23 17:43:14 -0800 |
---|---|---|
committer | John Reck <jreck@google.com> | 2012-02-23 17:43:14 -0800 |
commit | b91bac35c4fbd25546c7e06d233f332f45dd0649 (patch) | |
tree | d5a63b5309a0ea0add792a92890c6f63f14e231a /Source | |
parent | 773979f92560dd1aead375c82fd75b584a141e5d (diff) | |
download | external_webkit-b91bac35c4fbd25546c7e06d233f332f45dd0649.zip external_webkit-b91bac35c4fbd25546c7e06d233f332f45dd0649.tar.gz external_webkit-b91bac35c4fbd25546c7e06d233f332f45dd0649.tar.bz2 |
Support phone & email address detection
Change-Id: Ib5d671eb67d48ea29a0a16a99101eba904a8951c
Diffstat (limited to 'Source')
-rw-r--r-- | Source/WebKit/Android.mk | 3 | ||||
-rw-r--r-- | Source/WebKit/android/content/PhoneEmailDetector.cpp | 369 | ||||
-rw-r--r-- | Source/WebKit/android/content/PhoneEmailDetector.h | 75 | ||||
-rw-r--r-- | Source/WebKit/android/jni/AndroidHitTestResult.cpp | 5 |
4 files changed, 451 insertions, 1 deletions
diff --git a/Source/WebKit/Android.mk b/Source/WebKit/Android.mk index 6066f83..34fd14e 100644 --- a/Source/WebKit/Android.mk +++ b/Source/WebKit/Android.mk @@ -50,8 +50,9 @@ LOCAL_SRC_FILES += \ \ android/icu/unicode/ucnv.cpp \ \ - android/content/content_detector.cpp \ android/content/address_detector.cpp \ + android/content/content_detector.cpp \ + android/content/PhoneEmailDetector.cpp \ \ android/jni/AndroidHitTestResult.cpp \ android/jni/CacheManager.cpp \ diff --git a/Source/WebKit/android/content/PhoneEmailDetector.cpp b/Source/WebKit/android/content/PhoneEmailDetector.cpp new file mode 100644 index 0000000..d188c0b --- /dev/null +++ b/Source/WebKit/android/content/PhoneEmailDetector.cpp @@ -0,0 +1,369 @@ +/* + * Copyright (C) 2011 Google Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#undef WEBKIT_IMPLEMENTATION +#undef LOG + +#include "base/utf_string_conversions.h" +#include "net/base/escape.h" +#include "PhoneEmailDetector.h" +#include "WebString.h" + +#define LOG_TAG "PhoneNumberDetector" +#include <cutils/log.h> + +#define PHONE_PATTERN "(200) /-.\\ 100 -. 0000" + +static const char kTelSchemaPrefix[] = "tel:"; +static const char kEmailSchemaPrefix[] = "mailto:"; + +void FindReset(FindState* state); +void FindResetNumber(FindState* state); +FoundState FindPartialNumber(const UChar* chars, unsigned length, + FindState* s); +struct FindState; + +static FoundState FindPartialEMail(const UChar* , unsigned length, FindState* ); +static bool IsDomainChar(UChar ch); +static bool IsMailboxChar(UChar ch); + +PhoneEmailDetector::PhoneEmailDetector() + : m_foundResult(FOUND_NONE) +{ +} + +bool PhoneEmailDetector::FindContent(const string16::const_iterator& begin, + const string16::const_iterator& end, + size_t* start_pos, + size_t* end_pos) +{ + FindReset(&m_findState); + m_foundResult = FindPartialNumber(begin, end - begin, &m_findState); + if (m_foundResult == FOUND_COMPLETE) + m_prefix = kTelSchemaPrefix; + else { + FindReset(&m_findState); + m_foundResult = FindPartialEMail(begin, end - begin, &m_findState); + m_prefix = kEmailSchemaPrefix; + } + *start_pos = m_findState.mStartResult; + *end_pos = m_findState.mEndResult; + return m_foundResult == FOUND_COMPLETE; +} + +std::string PhoneEmailDetector::GetContentText(const WebKit::WebRange& range) +{ + if (m_foundResult == FOUND_COMPLETE) { + if (m_prefix == kTelSchemaPrefix) + return UTF16ToUTF8(m_findState.mStore); + else + return UTF16ToUTF8(range.toPlainText()); + } + return std::string(); +} + +GURL PhoneEmailDetector::GetIntentURL(const std::string& content_text) +{ + return GURL(m_prefix + + EscapeQueryParamValue(content_text, true)); +} + +void FindReset(FindState* state) +{ + memset(state, 0, sizeof(FindState)); + state->mCurrent = ' '; + FindResetNumber(state); +} + +void FindResetNumber(FindState* state) +{ + state->mOpenParen = false; + state->mPattern = (char*) PHONE_PATTERN; + state->mStorePtr = state->mStore; +} + +FoundState FindPartialNumber(const UChar* chars, unsigned length, + FindState* s) +{ + char* pattern = s->mPattern; + UChar* store = s->mStorePtr; + const UChar* start = chars; + const UChar* end = chars + length; + const UChar* lastDigit = 0; + string16 search16(chars, length); + std::string searchSpace = UTF16ToUTF8(search16); + do { + bool initialized = s->mInitialized; + while (chars < end) { + if (initialized == false) { + s->mBackTwo = s->mBackOne; + s->mBackOne = s->mCurrent; + } + UChar ch = s->mCurrent = *chars; + do { + char patternChar = *pattern; + switch (patternChar) { + case '2': + if (initialized == false) { + s->mStartResult = chars - start; + initialized = true; + } + case '0': + case '1': + if (ch < patternChar || ch > '9') + goto resetPattern; + *store++ = ch; + pattern++; + lastDigit = chars; + goto nextChar; + case '\0': + if (WTF::isASCIIDigit(ch) == false) { + *store = '\0'; + goto checkMatch; + } + goto resetPattern; + case ' ': + if (ch == patternChar) + goto nextChar; + break; + case '(': + if (ch == patternChar) { + s->mStartResult = chars - start; + initialized = true; + s->mOpenParen = true; + } + goto commonPunctuation; + case ')': + if ((ch == patternChar) ^ s->mOpenParen) + goto resetPattern; + default: + commonPunctuation: + if (ch == patternChar) { + pattern++; + goto nextChar; + } + } + } while (++pattern); // never false + nextChar: + chars++; + } + break; +resetPattern: + if (s->mContinuationNode) + return FOUND_NONE; + FindResetNumber(s); + pattern = s->mPattern; + store = s->mStorePtr; + } while (++chars < end); +checkMatch: + if (WTF::isASCIIDigit(s->mBackOne != '1' ? s->mBackOne : s->mBackTwo)) { + return FOUND_NONE; + } + *store = '\0'; + s->mStorePtr = store; + s->mPattern = pattern; + s->mEndResult = lastDigit - start + 1; + char pState = pattern[0]; + return pState == '\0' ? FOUND_COMPLETE : pState == '(' || (WTF::isASCIIDigit(pState) && WTF::isASCIIDigit(pattern[-1])) ? + FOUND_NONE : FOUND_PARTIAL; +} + +FoundState FindPartialEMail(const UChar* chars, unsigned length, + FindState* s) +{ + // the following tables were generated by tests/browser/focusNavigation/BrowserDebug.cpp + // hand-edit at your own risk + static const int domainTwoLetter[] = { + 0x02df797c, // a followed by: [cdefgilmnoqrstuwxz] + 0x036e73fb, // b followed by: [abdefghijmnorstvwyz] + 0x03b67ded, // c followed by: [acdfghiklmnorsuvxyz] + 0x02005610, // d followed by: [ejkmoz] + 0x001e00d4, // e followed by: [ceghrstu] + 0x00025700, // f followed by: [ijkmor] + 0x015fb9fb, // g followed by: [abdefghilmnpqrstuwy] + 0x001a3400, // h followed by: [kmnrtu] + 0x000f7818, // i followed by: [delmnoqrst] + 0x0000d010, // j followed by: [emop] + 0x0342b1d0, // k followed by: [eghimnprwyz] + 0x013e0507, // l followed by: [abcikrstuvy] + 0x03fffccd, // m followed by: [acdghklmnopqrstuvwxyz] + 0x0212c975, // n followed by: [acefgilopruz] + 0x00001000, // o followed by: [m] + 0x014e3cf1, // p followed by: [aefghklmnrstwy] + 0x00000001, // q followed by: [a] + 0x00504010, // r followed by: [eouw] + 0x032a7fdf, // s followed by: [abcdeghijklmnortvyz] + 0x026afeec, // t followed by: [cdfghjklmnoprtvwz] + 0x03041441, // u followed by: [agkmsyz] + 0x00102155, // v followed by: [aceginu] + 0x00040020, // w followed by: [fs] + 0x00000000, // x + 0x00180010, // y followed by: [etu] + 0x00401001, // z followed by: [amw] + }; + + static char const* const longDomainNames[] = { + "\x03" "ero" "\x03" "rpa", // aero, arpa + "\x02" "iz", // biz + "\x02" "at" "\x02" "om" "\x03" "oop", // cat, com, coop + NULL, // d + "\x02" "du", // edu + NULL, // f + "\x02" "ov", // gov + NULL, // h + "\x03" "nfo" "\x02" "nt", // info, int + "\x03" "obs", // jobs + NULL, // k + NULL, // l + "\x02" "il" "\x03" "obi" "\x05" "useum", // mil, mobi, museum + "\x03" "ame" "\x02" "et", // name, net + "\x02" "rg", // , org + "\x02" "ro", // pro + NULL, // q + NULL, // r + NULL, // s + "\x05" "ravel", // travel + NULL, // u + NULL, // v + NULL, // w + NULL, // x + NULL, // y + NULL, // z + }; + + const UChar* start = chars; + const UChar* end = chars + length; + while (chars < end) { + UChar ch = *chars++; + if (ch != '@') + continue; + const UChar* atLocation = chars - 1; + // search for domain + ch = *chars++ | 0x20; // convert uppercase to lower + if (ch < 'a' || ch > 'z') + continue; + while (chars < end) { + ch = *chars++; + if (IsDomainChar(ch) == false) + goto nextAt; + if (ch != '.') + continue; + UChar firstLetter = *chars++ | 0x20; // first letter of the domain + if (chars >= end) + return FOUND_NONE; // only one letter; must be at least two + firstLetter -= 'a'; + if (firstLetter > 'z' - 'a') + continue; // non-letter followed '.' + int secondLetterMask = domainTwoLetter[firstLetter]; + ch = *chars | 0x20; // second letter of the domain + ch -= 'a'; + if (ch >= 'z' - 'a') + continue; + bool secondMatch = (secondLetterMask & 1 << ch) != 0; + const char* wordMatch = longDomainNames[firstLetter]; + int wordIndex = 0; + while (wordMatch != NULL) { + int len = *wordMatch++; + char match; + do { + match = wordMatch[wordIndex]; + if (match < 0x20) + goto foundDomainStart; + if (chars[wordIndex] != match) + break; + wordIndex++; + } while (true); + wordMatch += len; + if (*wordMatch == '\0') + break; + wordIndex = 0; + } + if (secondMatch) { + wordIndex = 1; + foundDomainStart: + chars += wordIndex; + if (chars < end) { + ch = *chars; + if (ch != '.') { + if (IsDomainChar(ch)) + goto nextDot; + } else if (chars + 1 < end && IsDomainChar(chars[1])) + goto nextDot; + } + // found domain. Search backwards from '@' for beginning of email address + s->mEndResult = chars - start; + chars = atLocation; + if (chars <= start) + goto nextAt; + ch = *--chars; + if (ch == '.') + goto nextAt; // mailbox can't end in period + do { + if (IsMailboxChar(ch) == false) { + chars++; + break; + } + if (chars == start) + break; + ch = *--chars; + } while (true); + UChar firstChar = *chars; + if (firstChar == '.' || firstChar == '@') // mailbox can't start with period or be empty + goto nextAt; + s->mStartResult = chars - start; + return FOUND_COMPLETE; + } + nextDot: + ; + } +nextAt: + chars = atLocation + 1; + } + return FOUND_NONE; +} + +bool IsDomainChar(UChar ch) +{ + static const unsigned body[] = {0x03ff6000, 0x07fffffe, 0x07fffffe}; // 0-9 . - A-Z a-z + ch -= 0x20; + if (ch > 'z' - 0x20) + return false; + return (body[ch >> 5] & 1 << (ch & 0x1f)) != 0; +} + +bool IsMailboxChar(UChar ch) +{ + // According to http://en.wikipedia.org/wiki/Email_address + // ! # $ % & ' * + - . / 0-9 = ? + // A-Z ^ _ + // ` a-z { | } ~ + static const unsigned body[] = {0xa3ffecfa, 0xc7fffffe, 0x7fffffff}; + ch -= 0x20; + if (ch > '~' - 0x20) + return false; + return (body[ch >> 5] & 1 << (ch & 0x1f)) != 0; +} diff --git a/Source/WebKit/android/content/PhoneEmailDetector.h b/Source/WebKit/android/content/PhoneEmailDetector.h new file mode 100644 index 0000000..b61de62 --- /dev/null +++ b/Source/WebKit/android/content/PhoneEmailDetector.h @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2011 Google Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "content/content_detector.h" +#include "PlatformString.h" + +#define NAVIGATION_MAX_PHONE_LENGTH 14 + +struct FindState { + int mStartResult; + int mEndResult; + char* mPattern; + UChar mStore[NAVIGATION_MAX_PHONE_LENGTH + 1]; + UChar* mStorePtr; + UChar mBackOne; + UChar mBackTwo; + UChar mCurrent; + bool mOpenParen; + bool mInitialized; + bool mContinuationNode; +}; + +enum FoundState { + FOUND_NONE, + FOUND_PARTIAL, + FOUND_COMPLETE +}; + +// Searches for phone numbers (US only) or email addresses based off of the navcache code +class PhoneEmailDetector : public ContentDetector { +public: + PhoneEmailDetector(); + virtual ~PhoneEmailDetector() {} + +private: + // Implementation of ContentDetector. + virtual bool FindContent(const string16::const_iterator& begin, + const string16::const_iterator& end, + size_t* start_pos, + size_t* end_pos); + + virtual std::string GetContentText(const WebKit::WebRange& range); + virtual GURL GetIntentURL(const std::string& content_text); + virtual size_t GetMaximumContentLength() { + return NAVIGATION_MAX_PHONE_LENGTH * 4; + } + + DISALLOW_COPY_AND_ASSIGN(PhoneEmailDetector); + + FindState m_findState; + FoundState m_foundResult; + const char* m_prefix; +}; diff --git a/Source/WebKit/android/jni/AndroidHitTestResult.cpp b/Source/WebKit/android/jni/AndroidHitTestResult.cpp index bd3f458..f5dcc48 100644 --- a/Source/WebKit/android/jni/AndroidHitTestResult.cpp +++ b/Source/WebKit/android/jni/AndroidHitTestResult.cpp @@ -29,6 +29,7 @@ #include "AndroidHitTestResult.h" #include "content/address_detector.h" +#include "content/PhoneEmailDetector.h" #include "android/WebHitTestInfo.h" #include "Document.h" #include "Element.h" @@ -149,8 +150,12 @@ void AndroidHitTestResult::buildHighlightRects() void AndroidHitTestResult::searchContentDetectors() { AddressDetector address; + PhoneEmailDetector phoneEmail; WebKit::WebHitTestInfo webHitTest(m_hitTestResult); m_searchResult = address.FindTappedContent(webHitTest); + if (!m_searchResult.valid) { + m_searchResult = phoneEmail.FindTappedContent(webHitTest); + } if (m_searchResult.valid) { m_highlightRects.clear(); RefPtr<Range> range = (PassRefPtr<Range>) m_searchResult.range; |