summaryrefslogtreecommitdiffstats
path: root/WebCore/editing/TextIterator.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'WebCore/editing/TextIterator.cpp')
-rw-r--r--WebCore/editing/TextIterator.cpp216
1 files changed, 18 insertions, 198 deletions
diff --git a/WebCore/editing/TextIterator.cpp b/WebCore/editing/TextIterator.cpp
index a3edd38..2ea16fb 100644
--- a/WebCore/editing/TextIterator.cpp
+++ b/WebCore/editing/TextIterator.cpp
@@ -38,8 +38,6 @@
#include "RenderTableRow.h"
#include "RenderTextControl.h"
#include "RenderTextFragment.h"
-#include "TextBoundaries.h"
-#include "TextBreakIterator.h"
#include "VisiblePosition.h"
#include "visible_units.h"
@@ -58,18 +56,14 @@ using namespace HTMLNames;
// Buffer that knows how to compare with a search target.
// Keeps enough of the previous text to be able to search in the future, but no more.
// Non-breaking spaces are always equal to normal spaces.
-// Case folding is also done if the CaseInsensitive option is specified.
-// Matches are further filtered if the AtWordStarts option is specified, although some
-// matches inside a word are permitted if TreatMedialCapitalAsWordStart is specified as well.
+// Case folding is also done if <isCaseSensitive> is false.
class SearchBuffer : public Noncopyable {
public:
- SearchBuffer(const String& target, FindOptions);
+ SearchBuffer(const String& target, bool isCaseSensitive);
~SearchBuffer();
// Returns number of characters appended; guaranteed to be in the range [1, length].
size_t append(const UChar*, size_t length);
- bool needsMoreContext() const;
- void prependContext(const UChar*, size_t length);
void reachedBreak();
// Result is the size in characters of what was found.
@@ -81,16 +75,11 @@ public:
private:
bool isBadMatch(const UChar*, size_t length) const;
- bool isWordStartMatch(size_t start, size_t length) const;
String m_target;
- FindOptions m_options;
-
Vector<UChar> m_buffer;
size_t m_overlap;
- size_t m_prefixLength;
bool m_atBreak;
- bool m_needsMoreContext;
bool m_targetRequiresKanaWorkaround;
Vector<UChar> m_normalizedTarget;
@@ -103,7 +92,7 @@ private:
size_t length() const;
String m_target;
- FindOptions m_options;
+ bool m_isCaseSensitive;
Vector<UChar> m_buffer;
Vector<bool> m_isCharacterStartBuffer;
@@ -1839,46 +1828,9 @@ static void normalizeCharacters(const UChar* characters, unsigned length, Vector
ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
}
-static bool isNonLatin1Separator(UChar32 character)
-{
- ASSERT_ARG(character, character >= 256);
-
- return U_GET_GC_MASK(character) & (U_GC_S_MASK | U_GC_P_MASK | U_GC_Z_MASK | U_GC_CF_MASK);
-}
-
-static inline bool isSeparator(UChar32 character)
-{
- static const bool latin1SeparatorTable[256] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // space ! " # $ % & ' ( ) * + , - . /
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, // : ; < = > ?
- 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // @
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, // [ \ ] ^ _
- 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // `
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, // { | } ~
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0
- };
-
- if (character < 256)
- return latin1SeparatorTable[character];
-
- return isNonLatin1Separator(character);
-}
-
-inline SearchBuffer::SearchBuffer(const String& target, FindOptions options)
+inline SearchBuffer::SearchBuffer(const String& target, bool isCaseSensitive)
: m_target(target)
- , m_options(options)
- , m_prefixLength(0)
, m_atBreak(true)
- , m_needsMoreContext(options & AtWordStarts)
, m_targetRequiresKanaWorkaround(containsKanaLetters(m_target))
{
ASSERT(!m_target.isEmpty());
@@ -1892,17 +1844,6 @@ inline SearchBuffer::SearchBuffer(const String& target, FindOptions options)
m_buffer.reserveInitialCapacity(max(targetLength * 8, minimumSearchBufferSize));
m_overlap = m_buffer.capacity() / 4;
- if ((m_options & AtWordStarts) && targetLength) {
- UChar32 targetFirstCharacter;
- U16_GET(m_target.characters(), 0, 0, targetLength, targetFirstCharacter);
- // Characters in the separator category never really occur at the beginning of a word,
- // so if the target begins with such a character, we just ignore the AtWordStart option.
- if (isSeparator(targetFirstCharacter)) {
- m_options &= ~AtWordStarts;
- m_needsMoreContext = false;
- }
- }
-
// Grab the single global searcher.
// If we ever have a reason to do more than once search buffer at once, we'll have
// to move to multiple searchers.
@@ -1911,7 +1852,7 @@ inline SearchBuffer::SearchBuffer(const String& target, FindOptions options)
UStringSearch* searcher = WebCore::searcher();
UCollator* collator = usearch_getCollator(searcher);
- UCollationStrength strength = m_options & CaseInsensitive ? UCOL_PRIMARY : UCOL_TERTIARY;
+ UCollationStrength strength = isCaseSensitive ? UCOL_TERTIARY : UCOL_PRIMARY;
if (ucol_getStrength(collator) != strength) {
ucol_setStrength(collator, strength);
usearch_reset(searcher);
@@ -1937,11 +1878,9 @@ inline size_t SearchBuffer::append(const UChar* characters, size_t length)
if (m_atBreak) {
m_buffer.shrink(0);
- m_prefixLength = 0;
m_atBreak = false;
} else if (m_buffer.size() == m_buffer.capacity()) {
memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap, m_overlap * sizeof(UChar));
- m_prefixLength -= min(m_prefixLength, m_buffer.size() - m_overlap);
m_buffer.shrink(m_overlap);
}
@@ -1953,35 +1892,6 @@ inline size_t SearchBuffer::append(const UChar* characters, size_t length)
return usableLength;
}
-inline bool SearchBuffer::needsMoreContext() const
-{
- return m_needsMoreContext;
-}
-
-inline void SearchBuffer::prependContext(const UChar* characters, size_t length)
-{
- ASSERT(m_needsMoreContext);
- ASSERT(m_prefixLength == m_buffer.size());
-
- if (!length)
- return;
-
- m_atBreak = false;
-
- size_t wordBoundaryContextStart = length;
- if (wordBoundaryContextStart) {
- U16_BACK_1(characters, 0, wordBoundaryContextStart);
- wordBoundaryContextStart = startOfLastWordBoundaryContext(characters, wordBoundaryContextStart);
- }
-
- size_t usableLength = min(m_buffer.capacity() - m_prefixLength, length - wordBoundaryContextStart);
- m_buffer.prepend(characters + length - usableLength, usableLength);
- m_prefixLength += usableLength;
-
- if (wordBoundaryContextStart || m_prefixLength == m_buffer.capacity())
- m_needsMoreContext = false;
-}
-
inline bool SearchBuffer::atBreak() const
{
return m_atBreak;
@@ -2052,55 +1962,6 @@ inline bool SearchBuffer::isBadMatch(const UChar* match, size_t matchLength) con
}
}
-inline bool SearchBuffer::isWordStartMatch(size_t start, size_t length) const
-{
- ASSERT(m_options & AtWordStarts);
-
- if (!start)
- return true;
-
- if (m_options & TreatMedialCapitalAsWordStart) {
- int size = m_buffer.size();
- int offset = start;
- UChar32 firstCharacter;
- U16_GET(m_buffer.data(), 0, offset, size, firstCharacter);
- UChar32 previousCharacter;
- U16_PREV(m_buffer.data(), 0, offset, previousCharacter);
-
- if (isSeparator(firstCharacter)) {
- // The start of a separator run is a word start (".org" in "webkit.org").
- if (!isSeparator(previousCharacter))
- return true;
- } else if (isASCIIUpper(firstCharacter)) {
- // The start of an uppercase run is a word start ("Kit" in "WebKit").
- if (!isASCIIUpper(previousCharacter))
- return true;
- // The last character of an uppercase run followed by a non-separator, non-digit
- // is a word start ("Request" in "XMLHTTPRequest").
- offset = start;
- U16_FWD_1(m_buffer.data(), offset, size);
- UChar32 nextCharacter = 0;
- if (offset < size)
- U16_GET(m_buffer.data(), 0, offset, size, nextCharacter);
- if (!isASCIIUpper(nextCharacter) && !isASCIIDigit(nextCharacter) && !isSeparator(nextCharacter))
- return true;
- } else if (isASCIIDigit(firstCharacter)) {
- // The start of a digit run is a word start ("2" in "WebKit2").
- if (!isASCIIDigit(previousCharacter))
- return true;
- } else if (isSeparator(previousCharacter) || isASCIIDigit(previousCharacter)) {
- // The start of a non-separator, non-uppercase, non-digit run is a word start,
- // except after an uppercase. ("org" in "webkit.org", but not "ore" in "WebCore").
- return true;
- }
- }
-
- size_t wordBreakSearchStart = start + length;
- while (wordBreakSearchStart > start)
- wordBreakSearchStart = findNextWordFromIndex(m_buffer.data(), m_buffer.size(), wordBreakSearchStart, false /* backwards */);
- return wordBreakSearchStart == start;
-}
-
inline size_t SearchBuffer::search(size_t& start)
{
size_t size = m_buffer.size();
@@ -2118,10 +1979,7 @@ inline size_t SearchBuffer::search(size_t& start)
usearch_setText(searcher, m_buffer.data(), size, &status);
ASSERT(status == U_ZERO_ERROR);
- usearch_setOffset(searcher, m_prefixLength, &status);
- ASSERT(status == U_ZERO_ERROR);
-
- int matchStart = usearch_next(searcher, &status);
+ int matchStart = usearch_first(searcher, &status);
ASSERT(status == U_ZERO_ERROR);
nextMatch:
@@ -2134,18 +1992,8 @@ nextMatch:
// The same match may appear later, matching more characters,
// possibly including a combining character that's not yet in the buffer.
if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) {
- size_t overlap = m_overlap;
- if (m_options & AtWordStarts) {
- // Ensure that there is sufficient context before matchStart the next time around for
- // determining if it is at a word boundary.
- int wordBoundaryContextStart = matchStart;
- U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart);
- wordBoundaryContextStart = startOfLastWordBoundaryContext(m_buffer.data(), wordBoundaryContextStart);
- overlap = min(size - 1, max(overlap, size - wordBoundaryContextStart));
- }
- memcpy(m_buffer.data(), m_buffer.data() + size - overlap, overlap * sizeof(UChar));
- m_prefixLength -= min(m_prefixLength, size - overlap);
- m_buffer.shrink(overlap);
+ memcpy(m_buffer.data(), m_buffer.data() + size - m_overlap, m_overlap * sizeof(UChar));
+ m_buffer.shrink(m_overlap);
return 0;
}
@@ -2153,7 +2001,7 @@ nextMatch:
ASSERT(matchStart + matchedLength <= size);
// If this match is "bad", move on to the next match.
- if (isBadMatch(m_buffer.data() + matchStart, matchedLength) || ((m_options & AtWordStarts) && !isWordStartMatch(matchStart, matchedLength))) {
+ if (isBadMatch(m_buffer.data() + matchStart, matchedLength)) {
matchStart = usearch_next(searcher, &status);
ASSERT(status == U_ZERO_ERROR);
goto nextMatch;
@@ -2161,7 +2009,6 @@ nextMatch:
size_t newSize = size - (matchStart + 1);
memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar));
- m_prefixLength -= min<size_t>(m_prefixLength, matchStart + 1);
m_buffer.shrink(newSize);
start = size - matchStart;
@@ -2170,9 +2017,9 @@ nextMatch:
#else // !ICU_UNICODE
-inline SearchBuffer::SearchBuffer(const String& target, FindOptions options)
- : m_target(options & CaseInsensitive ? target.foldCase() : target)
- , m_options(options)
+inline SearchBuffer::SearchBuffer(const String& target, bool isCaseSensitive)
+ : m_target(isCaseSensitive ? target : target.foldCase())
+ , m_isCaseSensitive(isCaseSensitive)
, m_buffer(m_target.length())
, m_isCharacterStartBuffer(m_target.length())
, m_isBufferFull(false)
@@ -2211,7 +2058,7 @@ inline void SearchBuffer::append(UChar c, bool isStart)
inline size_t SearchBuffer::append(const UChar* characters, size_t length)
{
ASSERT(length);
- if (!(m_options & CaseInsensitive)) {
+ if (m_isCaseSensitive) {
append(characters[0], true);
return 1;
}
@@ -2231,16 +2078,6 @@ inline size_t SearchBuffer::append(const UChar* characters, size_t length)
return 1;
}
-inline bool SearchBuffer::needsMoreContext() const
-{
- return false;
-}
-
-void SearchBuffer::prependContext(const UChar*, size_t)
-{
- ASSERT_NOT_REACHED();
-}
-
inline size_t SearchBuffer::search(size_t& start)
{
if (!m_isBufferFull)
@@ -2495,24 +2332,12 @@ static PassRefPtr<Range> collapsedToBoundary(const Range* range, bool forward)
return result.release();
}
-static size_t findPlainText(CharacterIterator& it, const String& target, FindOptions options, size_t& matchStart)
+static size_t findPlainText(CharacterIterator& it, const String& target, bool forward, bool caseSensitive, size_t& matchStart)
{
matchStart = 0;
size_t matchLength = 0;
- SearchBuffer buffer(target, options);
-
- if (buffer.needsMoreContext()) {
- RefPtr<Range> startRange = it.range();
- RefPtr<Range> beforeStartRange = startRange->ownerDocument()->createRange();
- ExceptionCode ec = 0;
- beforeStartRange->setEnd(startRange->startContainer(), startRange->startOffset(), ec);
- for (SimplifiedBackwardsTextIterator backwardsIterator(beforeStartRange.get()); !backwardsIterator.atEnd(); backwardsIterator.advance()) {
- buffer.prependContext(backwardsIterator.characters(), backwardsIterator.length());
- if (!buffer.needsMoreContext())
- break;
- }
- }
+ SearchBuffer buffer(target, caseSensitive);
while (!it.atEnd()) {
it.advance(buffer.append(it.characters(), it.length()));
@@ -2526,7 +2351,7 @@ tryAgain:
matchLength = newMatchLength;
// If searching forward, stop on the first match.
// If searching backward, don't stop, so we end up with the last match.
- if (!(options & Backwards))
+ if (forward)
break;
goto tryAgain;
}
@@ -2541,19 +2366,14 @@ tryAgain:
PassRefPtr<Range> findPlainText(const Range* range, const String& target, bool forward, bool caseSensitive)
{
- return findPlainText(range, target, (forward ? 0 : Backwards) | (caseSensitive ? 0 : CaseInsensitive));
-}
-
-PassRefPtr<Range> findPlainText(const Range* range, const String& target, FindOptions options)
-{
// First, find the text.
size_t matchStart;
size_t matchLength;
{
CharacterIterator findIterator(range, TextIteratorEntersTextControls);
- matchLength = findPlainText(findIterator, target, options, matchStart);
+ matchLength = findPlainText(findIterator, target, forward, caseSensitive, matchStart);
if (!matchLength)
- return collapsedToBoundary(range, !(options & Backwards));
+ return collapsedToBoundary(range, forward);
}
// Then, find the document position of the start and the end of the text.