diff options
Diffstat (limited to 'WebCore/editing/TextIterator.cpp')
-rw-r--r-- | WebCore/editing/TextIterator.cpp | 525 |
1 files changed, 381 insertions, 144 deletions
diff --git a/WebCore/editing/TextIterator.cpp b/WebCore/editing/TextIterator.cpp index 093e851..f617889 100644 --- a/WebCore/editing/TextIterator.cpp +++ b/WebCore/editing/TextIterator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved. + * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. * Copyright (C) 2005 Alexey Proskuryakov. * * Redistribution and use in source and binary forms, with or without @@ -38,10 +38,15 @@ #include "RenderTableCell.h" #include "RenderTableRow.h" #include "RenderTextControl.h" +#include "VisiblePosition.h" #include "visible_units.h" -using namespace std; +#if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION +#include <unicode/usearch.h> +#endif + using namespace WTF::Unicode; +using namespace std; namespace WebCore { @@ -49,26 +54,45 @@ using namespace HTMLNames; // Buffer that knows how to compare with a search target. // Keeps enough of the previous text to be able to search in the future, but no more. -class CircularSearchBuffer : Noncopyable { +// Non-breaking spaces are always equal to normal spaces. +// Case folding is also done if <isCaseSensitive> is false. +class SearchBuffer : Noncopyable { public: - CircularSearchBuffer(const String& target, bool isCaseSensitive); + SearchBuffer(const String& target, bool isCaseSensitive); + ~SearchBuffer(); + + // Returns number of characters appended; guaranteed to be in the range [1, length]. + size_t append(const UChar*, size_t length); + void reachedBreak(); - void clear() { m_cursor = 0; m_isBufferFull = false; } - void append(UChar); + // Result is the size in characters of what was found. + // And <startOffset> is the number of characters back to the start of what was found. + size_t search(size_t& startOffset); + bool atBreak() const; - bool isMatch() const; - unsigned length() const; +#if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION + +private: + String m_target; + Vector<UChar> m_buffer; + size_t m_overlap; + bool m_atBreak; + +#else private: void append(UChar, bool isCharacterStart); + size_t length() const; String m_target; bool m_isCaseSensitive; - Vector<UChar> m_characterBuffer; + Vector<UChar> m_buffer; Vector<bool> m_isCharacterStartBuffer; bool m_isBufferFull; - unsigned m_cursor; + size_t m_cursor; + +#endif }; // -------- @@ -98,15 +122,13 @@ TextIterator::TextIterator(const Range* r, bool emitCharactersBetweenAllVisibleP if (!r) return; - ExceptionCode ec = 0; - // get and validate the range endpoints - Node *startContainer = r->startContainer(ec); - int startOffset = r->startOffset(ec); - Node *endContainer = r->endContainer(ec); - int endOffset = r->endOffset(ec); - if (ec) + Node* startContainer = r->startContainer(); + if (!startContainer) return; + int startOffset = r->startOffset(); + Node* endContainer = r->endContainer(); + int endOffset = r->endOffset(); // Callers should be handing us well-formed ranges. If we discover that this isn't // the case, we could consider changing this assertion to an early return. @@ -264,7 +286,7 @@ static inline bool compareBoxStart(const InlineTextBox *first, const InlineTextB bool TextIterator::handleTextNode() { - RenderText* renderer = static_cast<RenderText*>(m_node->renderer()); + RenderText* renderer = toRenderText(m_node->renderer()); if (renderer->style()->visibility() != VISIBLE) return false; @@ -311,12 +333,12 @@ bool TextIterator::handleTextNode() void TextIterator::handleTextBox() { - RenderText *renderer = static_cast<RenderText *>(m_node->renderer()); + RenderText *renderer = toRenderText(m_node->renderer()); String str = renderer->text(); int start = m_offset; int end = (m_node == m_endContainer) ? m_endOffset : INT_MAX; while (m_textBox) { - int textBoxStart = m_textBox->m_start; + int textBoxStart = m_textBox->start(); int runStart = max(textBoxStart, start); // Check for collapsed space at the start of this run. @@ -324,10 +346,16 @@ void TextIterator::handleTextBox() bool needSpace = m_lastTextNodeEndedWithCollapsedSpace || (m_textBox == firstTextBox && textBoxStart == runStart && runStart > 0); if (needSpace && !isCollapsibleWhitespace(m_lastCharacter) && m_lastCharacter) { - emitCharacter(' ', m_node, 0, runStart, runStart); + if (m_lastTextNode == m_node && runStart > 0 && str[runStart - 1] == ' ') { + unsigned spaceRunStart = runStart - 1; + while (spaceRunStart > 0 && str[spaceRunStart - 1] == ' ') + --spaceRunStart; + emitText(m_node, spaceRunStart, spaceRunStart + 1); + } else + emitCharacter(' ', m_node, 0, runStart, runStart); return; } - int textBoxEnd = textBoxStart + m_textBox->m_len; + int textBoxEnd = textBoxStart + m_textBox->len(); int runEnd = min(textBoxEnd, end); // Determine what the next text box will be, but don't advance yet @@ -360,7 +388,7 @@ void TextIterator::handleTextBox() return; // Advance and return - int nextRunStart = nextTextBox ? nextTextBox->m_start : str.length(); + int nextRunStart = nextTextBox ? nextTextBox->start() : str.length(); if (nextRunStart > runEnd) m_lastTextNodeEndedWithCollapsedSpace = true; // collapsed space between runs or at the end m_textBox = nextTextBox; @@ -508,7 +536,7 @@ static bool shouldEmitExtraNewlineForNode(Node* node) // result even without margin collapsing. For example: <div><p>text</p></div> // will work right even if both the <div> and the <p> have bottom margins. RenderObject* r = node->renderer(); - if (!r) + if (!r || !r->isBox()) return false; // NOTE: We only do this for a select set of nodes, and fwiw WinIE appears @@ -522,7 +550,7 @@ static bool shouldEmitExtraNewlineForNode(Node* node) || node->hasTagName(pTag)) { RenderStyle* style = r->style(); if (style) { - int bottomMargin = r->collapsedMarginBottom(); + int bottomMargin = toRenderBox(r)->collapsedMarginBottom(); int fontSize = style->fontDescription().computedPixelSize(); if (bottomMargin * 2 >= fontSize) return true; @@ -532,6 +560,7 @@ static bool shouldEmitExtraNewlineForNode(Node* node) return false; } +// Whether or not we should emit a character as we enter m_node (if it's a container) or as we hit it (if it's atomic). bool TextIterator::shouldRepresentNodeOffsetZero() { if (m_emitCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isTable()) @@ -554,23 +583,29 @@ bool TextIterator::shouldRepresentNodeOffsetZero() // is whether the inline vs block flow changed since the previous visible element. // I think we're already in a special enough case that that won't be needed, tho. - // If we are at the start, obviously no newline is needed. + // No character needed if this is the first node in the range. if (m_node == m_startContainer) return false; - // If we are outside the start container's subtree, assume we need a newline. + // If we are outside the start container's subtree, assume we need to emit. // FIXME: m_startContainer could be an inline block if (!m_node->isDescendantOf(m_startContainer)) return true; // If we started as m_startContainer offset 0 and the current node is a descendant of // the start container, we already had enough context to correctly decide whether to - // emit a newline after a preceding block. We chose not to emit (m_haveEmitted is false), + // emit after a preceding block. We chose not to emit (m_haveEmitted is false), // so don't second guess that now. // NOTE: Is this really correct when m_node is not a leftmost descendant? Probably // immaterial since we likely would have already emitted something by now. if (m_startOffset == 0) return false; + + // If this node is unrendered or invisible the VisiblePosition checks below won't have much meaning. + // Additionally, if the range we are iterating over contains huge sections of unrendered content, + // we would create VisiblePositions on every call to this function without this check. + if (!m_node->renderer() || m_node->renderer()->style()->visibility() != VISIBLE) + return false; // The currPos.isNotNull() check is needed because positions in non-html content // (like svg) do not have visible positions, and we don't want to emit for them either. @@ -676,10 +711,11 @@ void TextIterator::emitCharacter(UChar c, Node *textNode, Node *offsetBaseNode, m_lastCharacter = c; } -void TextIterator::emitText(Node *textNode, int textStartOffset, int textEndOffset) +void TextIterator::emitText(Node* textNode, int textStartOffset, int textEndOffset) { - RenderText* renderer = static_cast<RenderText*>(m_node->renderer()); + RenderText* renderer = toRenderText(m_node->renderer()); String str = renderer->text(); + ASSERT(str.characters()); m_positionNode = textNode; m_positionOffsetBaseNode = 0; @@ -741,19 +777,12 @@ SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator(const Range *r) if (!r) return; - int exception = 0; - Node *startNode = r->startContainer(exception); - if (exception) - return; - Node *endNode = r->endContainer(exception); - if (exception) - return; - int startOffset = r->startOffset(exception); - if (exception) - return; - int endOffset = r->endOffset(exception); - if (exception) + Node* startNode = r->startContainer(); + if (!startNode) return; + Node* endNode = r->endContainer(); + int startOffset = r->startOffset(); + int endOffset = r->endOffset(); if (!startNode->offsetInCharacters()) { if (startOffset >= 0 && startOffset < static_cast<int>(startNode->childNodeCount())) { @@ -867,7 +896,7 @@ bool SimplifiedBackwardsTextIterator::handleTextNode() { m_lastTextNode = m_node; - RenderText *renderer = static_cast<RenderText *>(m_node->renderer()); + RenderText *renderer = toRenderText(m_node->renderer()); String str = renderer->text(); if (!renderer->firstTextBox() && str.length() > 0) @@ -968,12 +997,13 @@ PassRefPtr<Range> CharacterIterator::range() const if (m_textIterator.length() <= 1) { ASSERT(m_runOffset == 0); } else { - int exception = 0; - Node *n = r->startContainer(exception); - ASSERT(n == r->endContainer(exception)); - int offset = r->startOffset(exception) + m_runOffset; - r->setStart(n, offset, exception); - r->setEnd(n, offset + 1, exception); + Node* n = r->startContainer(); + ASSERT(n == r->endContainer()); + int offset = r->startOffset() + m_runOffset; + ExceptionCode ec = 0; + r->setStart(n, offset, ec); + r->setEnd(n, offset + 1, ec); + ASSERT(!ec); } } return r.release(); @@ -1037,6 +1067,20 @@ String CharacterIterator::string(int numChars) return String::adopt(result); } +static PassRefPtr<Range> characterSubrange(CharacterIterator& it, int offset, int length) +{ + it.advance(offset); + RefPtr<Range> start = it.range(); + + if (length > 1) + it.advance(length - 1); + RefPtr<Range> end = it.range(); + + return Range::create(start->startContainer()->document(), + start->startContainer(), start->startOffset(), + end->endContainer(), end->endOffset()); +} + // -------- WordAwareIterator::WordAwareIterator() @@ -1105,7 +1149,7 @@ void WordAwareIterator::advance() } m_buffer.append(m_textIterator.characters(), m_textIterator.length()); int exception = 0; - m_range->setEnd(m_textIterator.range()->endContainer(exception), m_textIterator.range()->endOffset(exception), exception); + m_range->setEnd(m_textIterator.range()->endContainer(), m_textIterator.range()->endOffset(), exception); } } @@ -1129,10 +1173,160 @@ const UChar* WordAwareIterator::characters() const // -------- -CircularSearchBuffer::CircularSearchBuffer(const String& s, bool isCaseSensitive) - : m_target(isCaseSensitive ? s : s.foldCase()) +#if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION + +static const size_t minimumSearchBufferSize = 8192; + +#ifndef NDEBUG +static bool searcherInUse; +#endif + +static UStringSearch* createSearcher() +{ + // Provide a non-empty pattern and non-empty text so usearch_open will not fail, + // but it doesn't matter exactly what it is, since we don't perform any searches + // without setting both the pattern and the text. + + // Pass empty string for the locale for now to get the Unicode Collation Algorithm, + // rather than something locale-specific. + + UErrorCode status = U_ZERO_ERROR; + UStringSearch* searcher = usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, "", 0, &status); + ASSERT(status == U_ZERO_ERROR); + return searcher; +} + +static UStringSearch* searcher() +{ + static UStringSearch* searcher = createSearcher(); + return searcher; +} + +static inline void lockSearcher() +{ +#ifndef NDEBUG + ASSERT(!searcherInUse); + searcherInUse = true; +#endif +} + +static inline void unlockSearcher() +{ +#ifndef NDEBUG + ASSERT(searcherInUse); + searcherInUse = false; +#endif +} + +inline SearchBuffer::SearchBuffer(const String& target, bool isCaseSensitive) + : m_target(target) + , m_atBreak(true) +{ + ASSERT(!m_target.isEmpty()); + + size_t targetLength = target.length(); + m_buffer.reserveCapacity(max(targetLength * 8, minimumSearchBufferSize)); + m_overlap = m_buffer.capacity() / 4; + + // Grab the single global searcher. + // If we ever have a reason to do more than once search buffer at once, we'll have + // to move to multiple searchers. + lockSearcher(); + + UStringSearch* searcher = WebCore::searcher(); + UCollator* collator = usearch_getCollator(searcher); + + UCollationStrength strength = isCaseSensitive ? UCOL_TERTIARY : UCOL_PRIMARY; + if (ucol_getStrength(collator) != strength) { + ucol_setStrength(collator, strength); + usearch_reset(searcher); + } + + UErrorCode status = U_ZERO_ERROR; + usearch_setPattern(searcher, m_target.characters(), targetLength, &status); + ASSERT(status == U_ZERO_ERROR); +} + +inline SearchBuffer::~SearchBuffer() +{ + unlockSearcher(); +} + +inline size_t SearchBuffer::append(const UChar* characters, size_t length) +{ + ASSERT(length); + + if (m_atBreak) { + m_buffer.shrink(0); + m_atBreak = false; + } else if (m_buffer.size() == m_buffer.capacity()) { + memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap, m_overlap * sizeof(UChar)); + m_buffer.shrink(m_overlap); + } + + size_t usableLength = min(m_buffer.capacity() - m_buffer.size(), length); + ASSERT(usableLength); + m_buffer.append(characters, usableLength); + return usableLength; +} + +inline bool SearchBuffer::atBreak() const +{ + return m_atBreak; +} + +inline void SearchBuffer::reachedBreak() +{ + m_atBreak = true; +} + +inline size_t SearchBuffer::search(size_t& start) +{ + size_t size = m_buffer.size(); + if (m_atBreak) { + if (!size) + return 0; + } else { + if (size != m_buffer.capacity()) + return 0; + } + + UStringSearch* searcher = WebCore::searcher(); + + UErrorCode status = U_ZERO_ERROR; + usearch_setText(searcher, m_buffer.data(), size, &status); + ASSERT(status == U_ZERO_ERROR); + + int matchStart = usearch_first(searcher, &status); + ASSERT(status == U_ZERO_ERROR); + if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) { + ASSERT(matchStart == USEARCH_DONE); + return 0; + } + + // Matches that start in the overlap area are only tentative. + // The same match may appear later, matching more characters, + // possibly including a combining character that's not yet in the buffer. + if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) { + memcpy(m_buffer.data(), m_buffer.data() + size - m_overlap, m_overlap * sizeof(UChar)); + m_buffer.shrink(m_overlap); + return 0; + } + + size_t newSize = size - (matchStart + 1); + memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar)); + m_buffer.shrink(newSize); + + start = size - matchStart; + return usearch_getMatchedLength(searcher); +} + +#else // !ICU_UNICODE + +inline SearchBuffer::SearchBuffer(const String& target, bool isCaseSensitive) + : m_target(isCaseSensitive ? target : target.foldCase()) , m_isCaseSensitive(isCaseSensitive) - , m_characterBuffer(m_target.length()) + , m_buffer(m_target.length()) , m_isCharacterStartBuffer(m_target.length()) , m_isBufferFull(false) , m_cursor(0) @@ -1141,9 +1335,24 @@ CircularSearchBuffer::CircularSearchBuffer(const String& s, bool isCaseSensitive m_target.replace(noBreakSpace, ' '); } -inline void CircularSearchBuffer::append(UChar c, bool isStart) +inline SearchBuffer::~SearchBuffer() { - m_characterBuffer[m_cursor] = c == noBreakSpace ? ' ' : c; +} + +inline void SearchBuffer::reachedBreak() +{ + m_cursor = 0; + m_isBufferFull = false; +} + +inline bool SearchBuffer::atBreak() const +{ + return !m_cursor && !m_isBufferFull; +} + +inline void SearchBuffer::append(UChar c, bool isStart) +{ + m_buffer[m_cursor] = c == noBreakSpace ? ' ' : c; m_isCharacterStartBuffer[m_cursor] = isStart; if (++m_cursor == m_target.length()) { m_cursor = 0; @@ -1151,16 +1360,17 @@ inline void CircularSearchBuffer::append(UChar c, bool isStart) } } -inline void CircularSearchBuffer::append(UChar c) +inline size_t SearchBuffer::append(const UChar* characters, size_t length) { + ASSERT(length); if (m_isCaseSensitive) { - append(c, true); - return; + append(characters[0], true); + return 1; } const int maxFoldedCharacters = 16; // sensible maximum is 3, this should be more than enough UChar foldedCharacters[maxFoldedCharacters]; bool error; - int numFoldedCharacters = foldCase(foldedCharacters, maxFoldedCharacters, &c, 1, &error); + int numFoldedCharacters = foldCase(foldedCharacters, maxFoldedCharacters, characters, 1, &error); ASSERT(!error); ASSERT(numFoldedCharacters); ASSERT(numFoldedCharacters <= maxFoldedCharacters); @@ -1170,34 +1380,48 @@ inline void CircularSearchBuffer::append(UChar c) for (int i = 1; i < numFoldedCharacters; ++i) append(foldedCharacters[i], false); } + return 1; } -inline bool CircularSearchBuffer::isMatch() const +inline size_t SearchBuffer::search(size_t& start) { if (!m_isBufferFull) - return false; + return 0; if (!m_isCharacterStartBuffer[m_cursor]) - return false; + return 0; + + size_t tailSpace = m_target.length() - m_cursor; + if (memcmp(&m_buffer[m_cursor], m_target.characters(), tailSpace * sizeof(UChar)) != 0) + return 0; + if (memcmp(&m_buffer[0], m_target.characters() + tailSpace, m_cursor * sizeof(UChar)) != 0) + return 0; + + start = length(); + + // Now that we've found a match once, we don't want to find it again, because those + // are the SearchBuffer semantics, allowing for a buffer where you append more than one + // character at a time. To do this we take advantage of m_isCharacterStartBuffer, but if + // we want to get rid of that in the future we could track this with a separate boolean + // or even move the characters to the start of the buffer and set m_isBufferFull to false. + m_isCharacterStartBuffer[m_cursor] = false; - unsigned tailSpace = m_target.length() - m_cursor; - return memcmp(&m_characterBuffer[m_cursor], m_target.characters(), tailSpace * sizeof(UChar)) == 0 - && memcmp(&m_characterBuffer[0], m_target.characters() + tailSpace, m_cursor * sizeof(UChar)) == 0; + return start; } // Returns the number of characters that were appended to the buffer (what we are searching in). // That's not necessarily the same length as the passed-in target string, because case folding // can make two strings match even though they're not the same length. -unsigned CircularSearchBuffer::length() const +size_t SearchBuffer::length() const { - ASSERT(isMatch()); - - unsigned bufferSize = m_target.length(); - unsigned length = 0; - for (unsigned i = 0; i < bufferSize; ++i) + size_t bufferSize = m_target.length(); + size_t length = 0; + for (size_t i = 0; i < bufferSize; ++i) length += m_isCharacterStartBuffer[i]; return length; } +#endif // !ICU_UNICODE + // -------- int TextIterator::rangeLength(const Range *r, bool forSelectionPreservation) @@ -1211,23 +1435,8 @@ int TextIterator::rangeLength(const Range *r, bool forSelectionPreservation) PassRefPtr<Range> TextIterator::subrange(Range* entireRange, int characterOffset, int characterCount) { - CharacterIterator chars(entireRange); - - chars.advance(characterOffset); - RefPtr<Range> start = chars.range(); - - chars.advance(characterCount); - RefPtr<Range> end = chars.range(); - - ExceptionCode ec = 0; - RefPtr<Range> result(Range::create(entireRange->ownerDocument(), - start->startContainer(ec), - start->startOffset(ec), - end->startContainer(ec), - end->startOffset(ec))); - ASSERT(!ec); - - return result.release(); + CharacterIterator entireRangeIterator(entireRange); + return characterSubrange(entireRangeIterator, characterOffset, characterCount); } PassRefPtr<Range> TextIterator::rangeFromLocationAndLength(Element *scope, int rangeLocation, int rangeLength, bool forSelectionPreservation) @@ -1244,13 +1453,13 @@ PassRefPtr<Range> TextIterator::rangeFromLocationAndLength(Element *scope, int r // FIXME: the atEnd() check shouldn't be necessary, workaround for <http://bugs.webkit.org/show_bug.cgi?id=6289>. if (rangeLocation == 0 && rangeLength == 0 && it.atEnd()) { - int exception = 0; textRunRange = it.range(); - resultRange->setStart(textRunRange->startContainer(exception), 0, exception); - ASSERT(exception == 0); - resultRange->setEnd(textRunRange->startContainer(exception), 0, exception); - ASSERT(exception == 0); + ExceptionCode ec = 0; + resultRange->setStart(textRunRange->startContainer(), 0, ec); + ASSERT(!ec); + resultRange->setEnd(textRunRange->startContainer(), 0, ec); + ASSERT(!ec); return resultRange.release(); } @@ -1267,12 +1476,13 @@ PassRefPtr<Range> TextIterator::rangeFromLocationAndLength(Element *scope, int r if (foundStart || foundEnd) { // FIXME: This is a workaround for the fact that the end of a run is often at the wrong // position for emitted '\n's. - if (len == 1 && it.characters()[0] == UChar('\n')) { + if (len == 1 && it.characters()[0] == '\n') { Position runStart = textRunRange->startPosition(); Position runEnd = VisiblePosition(runStart).next().deepEquivalent(); if (runEnd.isNotNull()) { ExceptionCode ec = 0; textRunRange->setEnd(runEnd.node(), runEnd.offset(), ec); + ASSERT(!ec); } } } @@ -1280,27 +1490,27 @@ PassRefPtr<Range> TextIterator::rangeFromLocationAndLength(Element *scope, int r if (foundStart) { startRangeFound = true; int exception = 0; - if (textRunRange->startContainer(exception)->isTextNode()) { + if (textRunRange->startContainer()->isTextNode()) { int offset = rangeLocation - docTextPosition; - resultRange->setStart(textRunRange->startContainer(exception), offset + textRunRange->startOffset(exception), exception); + resultRange->setStart(textRunRange->startContainer(), offset + textRunRange->startOffset(), exception); } else { if (rangeLocation == docTextPosition) - resultRange->setStart(textRunRange->startContainer(exception), textRunRange->startOffset(exception), exception); + resultRange->setStart(textRunRange->startContainer(), textRunRange->startOffset(), exception); else - resultRange->setStart(textRunRange->endContainer(exception), textRunRange->endOffset(exception), exception); + resultRange->setStart(textRunRange->endContainer(), textRunRange->endOffset(), exception); } } if (foundEnd) { int exception = 0; - if (textRunRange->startContainer(exception)->isTextNode()) { + if (textRunRange->startContainer()->isTextNode()) { int offset = rangeEnd - docTextPosition; - resultRange->setEnd(textRunRange->startContainer(exception), offset + textRunRange->startOffset(exception), exception); + resultRange->setEnd(textRunRange->startContainer(), offset + textRunRange->startOffset(), exception); } else { if (rangeEnd == docTextPosition) - resultRange->setEnd(textRunRange->startContainer(exception), textRunRange->startOffset(exception), exception); + resultRange->setEnd(textRunRange->startContainer(), textRunRange->startOffset(), exception); else - resultRange->setEnd(textRunRange->endContainer(exception), textRunRange->endOffset(exception), exception); + resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), exception); } docTextPosition += len; break; @@ -1313,7 +1523,7 @@ PassRefPtr<Range> TextIterator::rangeFromLocationAndLength(Element *scope, int r if (rangeLength != 0 && rangeEnd > docTextPosition) { // rangeEnd is out of bounds int exception = 0; - resultRange->setEnd(textRunRange->endContainer(exception), textRunRange->endOffset(exception), exception); + resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), exception); } return resultRange.release(); @@ -1321,7 +1531,7 @@ PassRefPtr<Range> TextIterator::rangeFromLocationAndLength(Element *scope, int r // -------- -UChar* plainTextToMallocAllocatedBuffer(const Range* r, unsigned& bufferLength) +UChar* plainTextToMallocAllocatedBuffer(const Range* r, unsigned& bufferLength, bool isDisplayString) { UChar* result = 0; @@ -1376,13 +1586,17 @@ exit: free(textSegments->at(i).first); delete textSegments; } + + if (isDisplayString && r->ownerDocument()) + r->ownerDocument()->displayBufferModifiedByEncoding(result, bufferLength); + return result; } String plainText(const Range* r) { unsigned length; - UChar* buf = plainTextToMallocAllocatedBuffer(r, length); + UChar* buf = plainTextToMallocAllocatedBuffer(r, length, false); if (!buf) return ""; String result(buf, length); @@ -1390,56 +1604,79 @@ String plainText(const Range* r) return result; } -PassRefPtr<Range> findPlainText(const Range* range, const String& target, bool forward, bool caseSensitive) +static inline bool isAllCollapsibleWhitespace(const String& string) { - // FIXME: Can we do Boyer-Moore or equivalent instead for speed? + const UChar* characters = string.characters(); + unsigned length = string.length(); + for (unsigned i = 0; i < length; ++i) { + if (!isCollapsibleWhitespace(characters[i])) + return false; + } + return true; +} +static PassRefPtr<Range> collapsedToBoundary(const Range* range, bool forward) +{ ExceptionCode ec = 0; RefPtr<Range> result = range->cloneRange(ec); + ASSERT(!ec); result->collapse(!forward, ec); + ASSERT(!ec); + return result.release(); +} - // FIXME: This code does not allow \n at the moment because of issues with <br>. - // Once we fix those, we can remove this check. - if (target.isEmpty() || target.find('\n') != -1) - return result.release(); - - unsigned matchStart = 0; - unsigned matchLength = 0; - { - CircularSearchBuffer searchBuffer(target, caseSensitive); - CharacterIterator it(range, false, true); - for (;;) { - if (searchBuffer.isMatch()) { - // Note that we found a match, and where we found it. - unsigned matchEnd = it.characterOffset(); - matchLength = searchBuffer.length(); - ASSERT(matchLength); - ASSERT(matchEnd >= matchLength); - matchStart = matchEnd - matchLength; - // If searching forward, stop on the first match. - // If searching backward, don't stop, so we end up with the last match. - if (forward) - break; - } - if (it.atBreak()) { - if (it.atEnd()) - break; - searchBuffer.clear(); - } - searchBuffer.append(it.characters()[0]); - it.advance(1); +static size_t findPlainText(CharacterIterator& it, const String& target, bool forward, bool caseSensitive, size_t& matchStart) +{ + matchStart = 0; + size_t matchLength = 0; + + SearchBuffer buffer(target, caseSensitive); + + while (!it.atEnd()) { + it.advance(buffer.append(it.characters(), it.length())); +tryAgain: + size_t matchStartOffset; + if (size_t newMatchLength = buffer.search(matchStartOffset)) { + // Note that we found a match, and where we found it. + size_t lastCharacterInBufferOffset = it.characterOffset(); + ASSERT(lastCharacterInBufferOffset >= matchStartOffset); + matchStart = lastCharacterInBufferOffset - matchStartOffset; + matchLength = newMatchLength; + // If searching forward, stop on the first match. + // If searching backward, don't stop, so we end up with the last match. + if (forward) + break; + goto tryAgain; + } + if (it.atBreak() && !buffer.atBreak()) { + buffer.reachedBreak(); + goto tryAgain; } } - if (matchLength) { - CharacterIterator it(range, false, true); - it.advance(matchStart); - result->setStart(it.range()->startContainer(ec), it.range()->startOffset(ec), ec); - it.advance(matchLength - 1); - result->setEnd(it.range()->endContainer(ec), it.range()->endOffset(ec), ec); + return matchLength; +} + +PassRefPtr<Range> findPlainText(const Range* range, const String& target, bool forward, bool caseSensitive) +{ + // We can't search effectively for a string that's entirely made of collapsible + // whitespace, so we won't even try. This also takes care of the empty string case. + if (isAllCollapsibleWhitespace(target)) + return collapsedToBoundary(range, forward); + + // First, find the text. + size_t matchStart; + size_t matchLength; + { + CharacterIterator findIterator(range, false, true); + matchLength = findPlainText(findIterator, target, forward, caseSensitive, matchStart); + if (!matchLength) + return collapsedToBoundary(range, forward); } - return result.release(); + // Then, find the document position of the start and the end of the text. + CharacterIterator computeRangeIterator(range, false, true); + return characterSubrange(computeRangeIterator, matchStart, matchLength); } } |