summaryrefslogtreecommitdiffstats
path: root/WebCore/editing/TextIterator.cpp
diff options
context:
space:
mode:
authorThe Android Open Source Project <initial-contribution@android.com>2009-03-05 14:34:32 -0800
committerThe Android Open Source Project <initial-contribution@android.com>2009-03-05 14:34:32 -0800
commit635860845790a19bf50bbc51ba8fb66a96dde068 (patch)
treeef6ad9ff73a5b57f65249d4232a202fa77e6a140 /WebCore/editing/TextIterator.cpp
parent8e35f3cfc7fba1d1c829dc557ebad6409cbe16a2 (diff)
downloadexternal_webkit-635860845790a19bf50bbc51ba8fb66a96dde068.zip
external_webkit-635860845790a19bf50bbc51ba8fb66a96dde068.tar.gz
external_webkit-635860845790a19bf50bbc51ba8fb66a96dde068.tar.bz2
auto import from //depot/cupcake/@136594
Diffstat (limited to 'WebCore/editing/TextIterator.cpp')
-rw-r--r--WebCore/editing/TextIterator.cpp525
1 files changed, 381 insertions, 144 deletions
diff --git a/WebCore/editing/TextIterator.cpp b/WebCore/editing/TextIterator.cpp
index 093e851..f617889 100644
--- a/WebCore/editing/TextIterator.cpp
+++ b/WebCore/editing/TextIterator.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved.
+ * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
* Copyright (C) 2005 Alexey Proskuryakov.
*
* Redistribution and use in source and binary forms, with or without
@@ -38,10 +38,15 @@
#include "RenderTableCell.h"
#include "RenderTableRow.h"
#include "RenderTextControl.h"
+#include "VisiblePosition.h"
#include "visible_units.h"
-using namespace std;
+#if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION
+#include <unicode/usearch.h>
+#endif
+
using namespace WTF::Unicode;
+using namespace std;
namespace WebCore {
@@ -49,26 +54,45 @@ using namespace HTMLNames;
// Buffer that knows how to compare with a search target.
// Keeps enough of the previous text to be able to search in the future, but no more.
-class CircularSearchBuffer : Noncopyable {
+// Non-breaking spaces are always equal to normal spaces.
+// Case folding is also done if <isCaseSensitive> is false.
+class SearchBuffer : Noncopyable {
public:
- CircularSearchBuffer(const String& target, bool isCaseSensitive);
+ SearchBuffer(const String& target, bool isCaseSensitive);
+ ~SearchBuffer();
+
+ // Returns number of characters appended; guaranteed to be in the range [1, length].
+ size_t append(const UChar*, size_t length);
+ void reachedBreak();
- void clear() { m_cursor = 0; m_isBufferFull = false; }
- void append(UChar);
+ // Result is the size in characters of what was found.
+ // And <startOffset> is the number of characters back to the start of what was found.
+ size_t search(size_t& startOffset);
+ bool atBreak() const;
- bool isMatch() const;
- unsigned length() const;
+#if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION
+
+private:
+ String m_target;
+ Vector<UChar> m_buffer;
+ size_t m_overlap;
+ bool m_atBreak;
+
+#else
private:
void append(UChar, bool isCharacterStart);
+ size_t length() const;
String m_target;
bool m_isCaseSensitive;
- Vector<UChar> m_characterBuffer;
+ Vector<UChar> m_buffer;
Vector<bool> m_isCharacterStartBuffer;
bool m_isBufferFull;
- unsigned m_cursor;
+ size_t m_cursor;
+
+#endif
};
// --------
@@ -98,15 +122,13 @@ TextIterator::TextIterator(const Range* r, bool emitCharactersBetweenAllVisibleP
if (!r)
return;
- ExceptionCode ec = 0;
-
// get and validate the range endpoints
- Node *startContainer = r->startContainer(ec);
- int startOffset = r->startOffset(ec);
- Node *endContainer = r->endContainer(ec);
- int endOffset = r->endOffset(ec);
- if (ec)
+ Node* startContainer = r->startContainer();
+ if (!startContainer)
return;
+ int startOffset = r->startOffset();
+ Node* endContainer = r->endContainer();
+ int endOffset = r->endOffset();
// Callers should be handing us well-formed ranges. If we discover that this isn't
// the case, we could consider changing this assertion to an early return.
@@ -264,7 +286,7 @@ static inline bool compareBoxStart(const InlineTextBox *first, const InlineTextB
bool TextIterator::handleTextNode()
{
- RenderText* renderer = static_cast<RenderText*>(m_node->renderer());
+ RenderText* renderer = toRenderText(m_node->renderer());
if (renderer->style()->visibility() != VISIBLE)
return false;
@@ -311,12 +333,12 @@ bool TextIterator::handleTextNode()
void TextIterator::handleTextBox()
{
- RenderText *renderer = static_cast<RenderText *>(m_node->renderer());
+ RenderText *renderer = toRenderText(m_node->renderer());
String str = renderer->text();
int start = m_offset;
int end = (m_node == m_endContainer) ? m_endOffset : INT_MAX;
while (m_textBox) {
- int textBoxStart = m_textBox->m_start;
+ int textBoxStart = m_textBox->start();
int runStart = max(textBoxStart, start);
// Check for collapsed space at the start of this run.
@@ -324,10 +346,16 @@ void TextIterator::handleTextBox()
bool needSpace = m_lastTextNodeEndedWithCollapsedSpace
|| (m_textBox == firstTextBox && textBoxStart == runStart && runStart > 0);
if (needSpace && !isCollapsibleWhitespace(m_lastCharacter) && m_lastCharacter) {
- emitCharacter(' ', m_node, 0, runStart, runStart);
+ if (m_lastTextNode == m_node && runStart > 0 && str[runStart - 1] == ' ') {
+ unsigned spaceRunStart = runStart - 1;
+ while (spaceRunStart > 0 && str[spaceRunStart - 1] == ' ')
+ --spaceRunStart;
+ emitText(m_node, spaceRunStart, spaceRunStart + 1);
+ } else
+ emitCharacter(' ', m_node, 0, runStart, runStart);
return;
}
- int textBoxEnd = textBoxStart + m_textBox->m_len;
+ int textBoxEnd = textBoxStart + m_textBox->len();
int runEnd = min(textBoxEnd, end);
// Determine what the next text box will be, but don't advance yet
@@ -360,7 +388,7 @@ void TextIterator::handleTextBox()
return;
// Advance and return
- int nextRunStart = nextTextBox ? nextTextBox->m_start : str.length();
+ int nextRunStart = nextTextBox ? nextTextBox->start() : str.length();
if (nextRunStart > runEnd)
m_lastTextNodeEndedWithCollapsedSpace = true; // collapsed space between runs or at the end
m_textBox = nextTextBox;
@@ -508,7 +536,7 @@ static bool shouldEmitExtraNewlineForNode(Node* node)
// result even without margin collapsing. For example: <div><p>text</p></div>
// will work right even if both the <div> and the <p> have bottom margins.
RenderObject* r = node->renderer();
- if (!r)
+ if (!r || !r->isBox())
return false;
// NOTE: We only do this for a select set of nodes, and fwiw WinIE appears
@@ -522,7 +550,7 @@ static bool shouldEmitExtraNewlineForNode(Node* node)
|| node->hasTagName(pTag)) {
RenderStyle* style = r->style();
if (style) {
- int bottomMargin = r->collapsedMarginBottom();
+ int bottomMargin = toRenderBox(r)->collapsedMarginBottom();
int fontSize = style->fontDescription().computedPixelSize();
if (bottomMargin * 2 >= fontSize)
return true;
@@ -532,6 +560,7 @@ static bool shouldEmitExtraNewlineForNode(Node* node)
return false;
}
+// Whether or not we should emit a character as we enter m_node (if it's a container) or as we hit it (if it's atomic).
bool TextIterator::shouldRepresentNodeOffsetZero()
{
if (m_emitCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isTable())
@@ -554,23 +583,29 @@ bool TextIterator::shouldRepresentNodeOffsetZero()
// is whether the inline vs block flow changed since the previous visible element.
// I think we're already in a special enough case that that won't be needed, tho.
- // If we are at the start, obviously no newline is needed.
+ // No character needed if this is the first node in the range.
if (m_node == m_startContainer)
return false;
- // If we are outside the start container's subtree, assume we need a newline.
+ // If we are outside the start container's subtree, assume we need to emit.
// FIXME: m_startContainer could be an inline block
if (!m_node->isDescendantOf(m_startContainer))
return true;
// If we started as m_startContainer offset 0 and the current node is a descendant of
// the start container, we already had enough context to correctly decide whether to
- // emit a newline after a preceding block. We chose not to emit (m_haveEmitted is false),
+ // emit after a preceding block. We chose not to emit (m_haveEmitted is false),
// so don't second guess that now.
// NOTE: Is this really correct when m_node is not a leftmost descendant? Probably
// immaterial since we likely would have already emitted something by now.
if (m_startOffset == 0)
return false;
+
+ // If this node is unrendered or invisible the VisiblePosition checks below won't have much meaning.
+ // Additionally, if the range we are iterating over contains huge sections of unrendered content,
+ // we would create VisiblePositions on every call to this function without this check.
+ if (!m_node->renderer() || m_node->renderer()->style()->visibility() != VISIBLE)
+ return false;
// The currPos.isNotNull() check is needed because positions in non-html content
// (like svg) do not have visible positions, and we don't want to emit for them either.
@@ -676,10 +711,11 @@ void TextIterator::emitCharacter(UChar c, Node *textNode, Node *offsetBaseNode,
m_lastCharacter = c;
}
-void TextIterator::emitText(Node *textNode, int textStartOffset, int textEndOffset)
+void TextIterator::emitText(Node* textNode, int textStartOffset, int textEndOffset)
{
- RenderText* renderer = static_cast<RenderText*>(m_node->renderer());
+ RenderText* renderer = toRenderText(m_node->renderer());
String str = renderer->text();
+ ASSERT(str.characters());
m_positionNode = textNode;
m_positionOffsetBaseNode = 0;
@@ -741,19 +777,12 @@ SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator(const Range *r)
if (!r)
return;
- int exception = 0;
- Node *startNode = r->startContainer(exception);
- if (exception)
- return;
- Node *endNode = r->endContainer(exception);
- if (exception)
- return;
- int startOffset = r->startOffset(exception);
- if (exception)
- return;
- int endOffset = r->endOffset(exception);
- if (exception)
+ Node* startNode = r->startContainer();
+ if (!startNode)
return;
+ Node* endNode = r->endContainer();
+ int startOffset = r->startOffset();
+ int endOffset = r->endOffset();
if (!startNode->offsetInCharacters()) {
if (startOffset >= 0 && startOffset < static_cast<int>(startNode->childNodeCount())) {
@@ -867,7 +896,7 @@ bool SimplifiedBackwardsTextIterator::handleTextNode()
{
m_lastTextNode = m_node;
- RenderText *renderer = static_cast<RenderText *>(m_node->renderer());
+ RenderText *renderer = toRenderText(m_node->renderer());
String str = renderer->text();
if (!renderer->firstTextBox() && str.length() > 0)
@@ -968,12 +997,13 @@ PassRefPtr<Range> CharacterIterator::range() const
if (m_textIterator.length() <= 1) {
ASSERT(m_runOffset == 0);
} else {
- int exception = 0;
- Node *n = r->startContainer(exception);
- ASSERT(n == r->endContainer(exception));
- int offset = r->startOffset(exception) + m_runOffset;
- r->setStart(n, offset, exception);
- r->setEnd(n, offset + 1, exception);
+ Node* n = r->startContainer();
+ ASSERT(n == r->endContainer());
+ int offset = r->startOffset() + m_runOffset;
+ ExceptionCode ec = 0;
+ r->setStart(n, offset, ec);
+ r->setEnd(n, offset + 1, ec);
+ ASSERT(!ec);
}
}
return r.release();
@@ -1037,6 +1067,20 @@ String CharacterIterator::string(int numChars)
return String::adopt(result);
}
+static PassRefPtr<Range> characterSubrange(CharacterIterator& it, int offset, int length)
+{
+ it.advance(offset);
+ RefPtr<Range> start = it.range();
+
+ if (length > 1)
+ it.advance(length - 1);
+ RefPtr<Range> end = it.range();
+
+ return Range::create(start->startContainer()->document(),
+ start->startContainer(), start->startOffset(),
+ end->endContainer(), end->endOffset());
+}
+
// --------
WordAwareIterator::WordAwareIterator()
@@ -1105,7 +1149,7 @@ void WordAwareIterator::advance()
}
m_buffer.append(m_textIterator.characters(), m_textIterator.length());
int exception = 0;
- m_range->setEnd(m_textIterator.range()->endContainer(exception), m_textIterator.range()->endOffset(exception), exception);
+ m_range->setEnd(m_textIterator.range()->endContainer(), m_textIterator.range()->endOffset(), exception);
}
}
@@ -1129,10 +1173,160 @@ const UChar* WordAwareIterator::characters() const
// --------
-CircularSearchBuffer::CircularSearchBuffer(const String& s, bool isCaseSensitive)
- : m_target(isCaseSensitive ? s : s.foldCase())
+#if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION
+
+static const size_t minimumSearchBufferSize = 8192;
+
+#ifndef NDEBUG
+static bool searcherInUse;
+#endif
+
+static UStringSearch* createSearcher()
+{
+ // Provide a non-empty pattern and non-empty text so usearch_open will not fail,
+ // but it doesn't matter exactly what it is, since we don't perform any searches
+ // without setting both the pattern and the text.
+
+ // Pass empty string for the locale for now to get the Unicode Collation Algorithm,
+ // rather than something locale-specific.
+
+ UErrorCode status = U_ZERO_ERROR;
+ UStringSearch* searcher = usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, "", 0, &status);
+ ASSERT(status == U_ZERO_ERROR);
+ return searcher;
+}
+
+static UStringSearch* searcher()
+{
+ static UStringSearch* searcher = createSearcher();
+ return searcher;
+}
+
+static inline void lockSearcher()
+{
+#ifndef NDEBUG
+ ASSERT(!searcherInUse);
+ searcherInUse = true;
+#endif
+}
+
+static inline void unlockSearcher()
+{
+#ifndef NDEBUG
+ ASSERT(searcherInUse);
+ searcherInUse = false;
+#endif
+}
+
+inline SearchBuffer::SearchBuffer(const String& target, bool isCaseSensitive)
+ : m_target(target)
+ , m_atBreak(true)
+{
+ ASSERT(!m_target.isEmpty());
+
+ size_t targetLength = target.length();
+ m_buffer.reserveCapacity(max(targetLength * 8, minimumSearchBufferSize));
+ m_overlap = m_buffer.capacity() / 4;
+
+ // Grab the single global searcher.
+ // If we ever have a reason to do more than once search buffer at once, we'll have
+ // to move to multiple searchers.
+ lockSearcher();
+
+ UStringSearch* searcher = WebCore::searcher();
+ UCollator* collator = usearch_getCollator(searcher);
+
+ UCollationStrength strength = isCaseSensitive ? UCOL_TERTIARY : UCOL_PRIMARY;
+ if (ucol_getStrength(collator) != strength) {
+ ucol_setStrength(collator, strength);
+ usearch_reset(searcher);
+ }
+
+ UErrorCode status = U_ZERO_ERROR;
+ usearch_setPattern(searcher, m_target.characters(), targetLength, &status);
+ ASSERT(status == U_ZERO_ERROR);
+}
+
+inline SearchBuffer::~SearchBuffer()
+{
+ unlockSearcher();
+}
+
+inline size_t SearchBuffer::append(const UChar* characters, size_t length)
+{
+ ASSERT(length);
+
+ if (m_atBreak) {
+ m_buffer.shrink(0);
+ m_atBreak = false;
+ } else if (m_buffer.size() == m_buffer.capacity()) {
+ memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap, m_overlap * sizeof(UChar));
+ m_buffer.shrink(m_overlap);
+ }
+
+ size_t usableLength = min(m_buffer.capacity() - m_buffer.size(), length);
+ ASSERT(usableLength);
+ m_buffer.append(characters, usableLength);
+ return usableLength;
+}
+
+inline bool SearchBuffer::atBreak() const
+{
+ return m_atBreak;
+}
+
+inline void SearchBuffer::reachedBreak()
+{
+ m_atBreak = true;
+}
+
+inline size_t SearchBuffer::search(size_t& start)
+{
+ size_t size = m_buffer.size();
+ if (m_atBreak) {
+ if (!size)
+ return 0;
+ } else {
+ if (size != m_buffer.capacity())
+ return 0;
+ }
+
+ UStringSearch* searcher = WebCore::searcher();
+
+ UErrorCode status = U_ZERO_ERROR;
+ usearch_setText(searcher, m_buffer.data(), size, &status);
+ ASSERT(status == U_ZERO_ERROR);
+
+ int matchStart = usearch_first(searcher, &status);
+ ASSERT(status == U_ZERO_ERROR);
+ if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) {
+ ASSERT(matchStart == USEARCH_DONE);
+ return 0;
+ }
+
+ // Matches that start in the overlap area are only tentative.
+ // The same match may appear later, matching more characters,
+ // possibly including a combining character that's not yet in the buffer.
+ if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) {
+ memcpy(m_buffer.data(), m_buffer.data() + size - m_overlap, m_overlap * sizeof(UChar));
+ m_buffer.shrink(m_overlap);
+ return 0;
+ }
+
+ size_t newSize = size - (matchStart + 1);
+ memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar));
+ m_buffer.shrink(newSize);
+
+ start = size - matchStart;
+ return usearch_getMatchedLength(searcher);
+}
+
+#else // !ICU_UNICODE
+
+inline SearchBuffer::SearchBuffer(const String& target, bool isCaseSensitive)
+ : m_target(isCaseSensitive ? target : target.foldCase())
, m_isCaseSensitive(isCaseSensitive)
- , m_characterBuffer(m_target.length())
+ , m_buffer(m_target.length())
, m_isCharacterStartBuffer(m_target.length())
, m_isBufferFull(false)
, m_cursor(0)
@@ -1141,9 +1335,24 @@ CircularSearchBuffer::CircularSearchBuffer(const String& s, bool isCaseSensitive
m_target.replace(noBreakSpace, ' ');
}
-inline void CircularSearchBuffer::append(UChar c, bool isStart)
+inline SearchBuffer::~SearchBuffer()
{
- m_characterBuffer[m_cursor] = c == noBreakSpace ? ' ' : c;
+}
+
+inline void SearchBuffer::reachedBreak()
+{
+ m_cursor = 0;
+ m_isBufferFull = false;
+}
+
+inline bool SearchBuffer::atBreak() const
+{
+ return !m_cursor && !m_isBufferFull;
+}
+
+inline void SearchBuffer::append(UChar c, bool isStart)
+{
+ m_buffer[m_cursor] = c == noBreakSpace ? ' ' : c;
m_isCharacterStartBuffer[m_cursor] = isStart;
if (++m_cursor == m_target.length()) {
m_cursor = 0;
@@ -1151,16 +1360,17 @@ inline void CircularSearchBuffer::append(UChar c, bool isStart)
}
}
-inline void CircularSearchBuffer::append(UChar c)
+inline size_t SearchBuffer::append(const UChar* characters, size_t length)
{
+ ASSERT(length);
if (m_isCaseSensitive) {
- append(c, true);
- return;
+ append(characters[0], true);
+ return 1;
}
const int maxFoldedCharacters = 16; // sensible maximum is 3, this should be more than enough
UChar foldedCharacters[maxFoldedCharacters];
bool error;
- int numFoldedCharacters = foldCase(foldedCharacters, maxFoldedCharacters, &c, 1, &error);
+ int numFoldedCharacters = foldCase(foldedCharacters, maxFoldedCharacters, characters, 1, &error);
ASSERT(!error);
ASSERT(numFoldedCharacters);
ASSERT(numFoldedCharacters <= maxFoldedCharacters);
@@ -1170,34 +1380,48 @@ inline void CircularSearchBuffer::append(UChar c)
for (int i = 1; i < numFoldedCharacters; ++i)
append(foldedCharacters[i], false);
}
+ return 1;
}
-inline bool CircularSearchBuffer::isMatch() const
+inline size_t SearchBuffer::search(size_t& start)
{
if (!m_isBufferFull)
- return false;
+ return 0;
if (!m_isCharacterStartBuffer[m_cursor])
- return false;
+ return 0;
+
+ size_t tailSpace = m_target.length() - m_cursor;
+ if (memcmp(&m_buffer[m_cursor], m_target.characters(), tailSpace * sizeof(UChar)) != 0)
+ return 0;
+ if (memcmp(&m_buffer[0], m_target.characters() + tailSpace, m_cursor * sizeof(UChar)) != 0)
+ return 0;
+
+ start = length();
+
+ // Now that we've found a match once, we don't want to find it again, because those
+ // are the SearchBuffer semantics, allowing for a buffer where you append more than one
+ // character at a time. To do this we take advantage of m_isCharacterStartBuffer, but if
+ // we want to get rid of that in the future we could track this with a separate boolean
+ // or even move the characters to the start of the buffer and set m_isBufferFull to false.
+ m_isCharacterStartBuffer[m_cursor] = false;
- unsigned tailSpace = m_target.length() - m_cursor;
- return memcmp(&m_characterBuffer[m_cursor], m_target.characters(), tailSpace * sizeof(UChar)) == 0
- && memcmp(&m_characterBuffer[0], m_target.characters() + tailSpace, m_cursor * sizeof(UChar)) == 0;
+ return start;
}
// Returns the number of characters that were appended to the buffer (what we are searching in).
// That's not necessarily the same length as the passed-in target string, because case folding
// can make two strings match even though they're not the same length.
-unsigned CircularSearchBuffer::length() const
+size_t SearchBuffer::length() const
{
- ASSERT(isMatch());
-
- unsigned bufferSize = m_target.length();
- unsigned length = 0;
- for (unsigned i = 0; i < bufferSize; ++i)
+ size_t bufferSize = m_target.length();
+ size_t length = 0;
+ for (size_t i = 0; i < bufferSize; ++i)
length += m_isCharacterStartBuffer[i];
return length;
}
+#endif // !ICU_UNICODE
+
// --------
int TextIterator::rangeLength(const Range *r, bool forSelectionPreservation)
@@ -1211,23 +1435,8 @@ int TextIterator::rangeLength(const Range *r, bool forSelectionPreservation)
PassRefPtr<Range> TextIterator::subrange(Range* entireRange, int characterOffset, int characterCount)
{
- CharacterIterator chars(entireRange);
-
- chars.advance(characterOffset);
- RefPtr<Range> start = chars.range();
-
- chars.advance(characterCount);
- RefPtr<Range> end = chars.range();
-
- ExceptionCode ec = 0;
- RefPtr<Range> result(Range::create(entireRange->ownerDocument(),
- start->startContainer(ec),
- start->startOffset(ec),
- end->startContainer(ec),
- end->startOffset(ec)));
- ASSERT(!ec);
-
- return result.release();
+ CharacterIterator entireRangeIterator(entireRange);
+ return characterSubrange(entireRangeIterator, characterOffset, characterCount);
}
PassRefPtr<Range> TextIterator::rangeFromLocationAndLength(Element *scope, int rangeLocation, int rangeLength, bool forSelectionPreservation)
@@ -1244,13 +1453,13 @@ PassRefPtr<Range> TextIterator::rangeFromLocationAndLength(Element *scope, int r
// FIXME: the atEnd() check shouldn't be necessary, workaround for <http://bugs.webkit.org/show_bug.cgi?id=6289>.
if (rangeLocation == 0 && rangeLength == 0 && it.atEnd()) {
- int exception = 0;
textRunRange = it.range();
- resultRange->setStart(textRunRange->startContainer(exception), 0, exception);
- ASSERT(exception == 0);
- resultRange->setEnd(textRunRange->startContainer(exception), 0, exception);
- ASSERT(exception == 0);
+ ExceptionCode ec = 0;
+ resultRange->setStart(textRunRange->startContainer(), 0, ec);
+ ASSERT(!ec);
+ resultRange->setEnd(textRunRange->startContainer(), 0, ec);
+ ASSERT(!ec);
return resultRange.release();
}
@@ -1267,12 +1476,13 @@ PassRefPtr<Range> TextIterator::rangeFromLocationAndLength(Element *scope, int r
if (foundStart || foundEnd) {
// FIXME: This is a workaround for the fact that the end of a run is often at the wrong
// position for emitted '\n's.
- if (len == 1 && it.characters()[0] == UChar('\n')) {
+ if (len == 1 && it.characters()[0] == '\n') {
Position runStart = textRunRange->startPosition();
Position runEnd = VisiblePosition(runStart).next().deepEquivalent();
if (runEnd.isNotNull()) {
ExceptionCode ec = 0;
textRunRange->setEnd(runEnd.node(), runEnd.offset(), ec);
+ ASSERT(!ec);
}
}
}
@@ -1280,27 +1490,27 @@ PassRefPtr<Range> TextIterator::rangeFromLocationAndLength(Element *scope, int r
if (foundStart) {
startRangeFound = true;
int exception = 0;
- if (textRunRange->startContainer(exception)->isTextNode()) {
+ if (textRunRange->startContainer()->isTextNode()) {
int offset = rangeLocation - docTextPosition;
- resultRange->setStart(textRunRange->startContainer(exception), offset + textRunRange->startOffset(exception), exception);
+ resultRange->setStart(textRunRange->startContainer(), offset + textRunRange->startOffset(), exception);
} else {
if (rangeLocation == docTextPosition)
- resultRange->setStart(textRunRange->startContainer(exception), textRunRange->startOffset(exception), exception);
+ resultRange->setStart(textRunRange->startContainer(), textRunRange->startOffset(), exception);
else
- resultRange->setStart(textRunRange->endContainer(exception), textRunRange->endOffset(exception), exception);
+ resultRange->setStart(textRunRange->endContainer(), textRunRange->endOffset(), exception);
}
}
if (foundEnd) {
int exception = 0;
- if (textRunRange->startContainer(exception)->isTextNode()) {
+ if (textRunRange->startContainer()->isTextNode()) {
int offset = rangeEnd - docTextPosition;
- resultRange->setEnd(textRunRange->startContainer(exception), offset + textRunRange->startOffset(exception), exception);
+ resultRange->setEnd(textRunRange->startContainer(), offset + textRunRange->startOffset(), exception);
} else {
if (rangeEnd == docTextPosition)
- resultRange->setEnd(textRunRange->startContainer(exception), textRunRange->startOffset(exception), exception);
+ resultRange->setEnd(textRunRange->startContainer(), textRunRange->startOffset(), exception);
else
- resultRange->setEnd(textRunRange->endContainer(exception), textRunRange->endOffset(exception), exception);
+ resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), exception);
}
docTextPosition += len;
break;
@@ -1313,7 +1523,7 @@ PassRefPtr<Range> TextIterator::rangeFromLocationAndLength(Element *scope, int r
if (rangeLength != 0 && rangeEnd > docTextPosition) { // rangeEnd is out of bounds
int exception = 0;
- resultRange->setEnd(textRunRange->endContainer(exception), textRunRange->endOffset(exception), exception);
+ resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), exception);
}
return resultRange.release();
@@ -1321,7 +1531,7 @@ PassRefPtr<Range> TextIterator::rangeFromLocationAndLength(Element *scope, int r
// --------
-UChar* plainTextToMallocAllocatedBuffer(const Range* r, unsigned& bufferLength)
+UChar* plainTextToMallocAllocatedBuffer(const Range* r, unsigned& bufferLength, bool isDisplayString)
{
UChar* result = 0;
@@ -1376,13 +1586,17 @@ exit:
free(textSegments->at(i).first);
delete textSegments;
}
+
+ if (isDisplayString && r->ownerDocument())
+ r->ownerDocument()->displayBufferModifiedByEncoding(result, bufferLength);
+
return result;
}
String plainText(const Range* r)
{
unsigned length;
- UChar* buf = plainTextToMallocAllocatedBuffer(r, length);
+ UChar* buf = plainTextToMallocAllocatedBuffer(r, length, false);
if (!buf)
return "";
String result(buf, length);
@@ -1390,56 +1604,79 @@ String plainText(const Range* r)
return result;
}
-PassRefPtr<Range> findPlainText(const Range* range, const String& target, bool forward, bool caseSensitive)
+static inline bool isAllCollapsibleWhitespace(const String& string)
{
- // FIXME: Can we do Boyer-Moore or equivalent instead for speed?
+ const UChar* characters = string.characters();
+ unsigned length = string.length();
+ for (unsigned i = 0; i < length; ++i) {
+ if (!isCollapsibleWhitespace(characters[i]))
+ return false;
+ }
+ return true;
+}
+static PassRefPtr<Range> collapsedToBoundary(const Range* range, bool forward)
+{
ExceptionCode ec = 0;
RefPtr<Range> result = range->cloneRange(ec);
+ ASSERT(!ec);
result->collapse(!forward, ec);
+ ASSERT(!ec);
+ return result.release();
+}
- // FIXME: This code does not allow \n at the moment because of issues with <br>.
- // Once we fix those, we can remove this check.
- if (target.isEmpty() || target.find('\n') != -1)
- return result.release();
-
- unsigned matchStart = 0;
- unsigned matchLength = 0;
- {
- CircularSearchBuffer searchBuffer(target, caseSensitive);
- CharacterIterator it(range, false, true);
- for (;;) {
- if (searchBuffer.isMatch()) {
- // Note that we found a match, and where we found it.
- unsigned matchEnd = it.characterOffset();
- matchLength = searchBuffer.length();
- ASSERT(matchLength);
- ASSERT(matchEnd >= matchLength);
- matchStart = matchEnd - matchLength;
- // If searching forward, stop on the first match.
- // If searching backward, don't stop, so we end up with the last match.
- if (forward)
- break;
- }
- if (it.atBreak()) {
- if (it.atEnd())
- break;
- searchBuffer.clear();
- }
- searchBuffer.append(it.characters()[0]);
- it.advance(1);
+static size_t findPlainText(CharacterIterator& it, const String& target, bool forward, bool caseSensitive, size_t& matchStart)
+{
+ matchStart = 0;
+ size_t matchLength = 0;
+
+ SearchBuffer buffer(target, caseSensitive);
+
+ while (!it.atEnd()) {
+ it.advance(buffer.append(it.characters(), it.length()));
+tryAgain:
+ size_t matchStartOffset;
+ if (size_t newMatchLength = buffer.search(matchStartOffset)) {
+ // Note that we found a match, and where we found it.
+ size_t lastCharacterInBufferOffset = it.characterOffset();
+ ASSERT(lastCharacterInBufferOffset >= matchStartOffset);
+ matchStart = lastCharacterInBufferOffset - matchStartOffset;
+ matchLength = newMatchLength;
+ // If searching forward, stop on the first match.
+ // If searching backward, don't stop, so we end up with the last match.
+ if (forward)
+ break;
+ goto tryAgain;
+ }
+ if (it.atBreak() && !buffer.atBreak()) {
+ buffer.reachedBreak();
+ goto tryAgain;
}
}
- if (matchLength) {
- CharacterIterator it(range, false, true);
- it.advance(matchStart);
- result->setStart(it.range()->startContainer(ec), it.range()->startOffset(ec), ec);
- it.advance(matchLength - 1);
- result->setEnd(it.range()->endContainer(ec), it.range()->endOffset(ec), ec);
+ return matchLength;
+}
+
+PassRefPtr<Range> findPlainText(const Range* range, const String& target, bool forward, bool caseSensitive)
+{
+ // We can't search effectively for a string that's entirely made of collapsible
+ // whitespace, so we won't even try. This also takes care of the empty string case.
+ if (isAllCollapsibleWhitespace(target))
+ return collapsedToBoundary(range, forward);
+
+ // First, find the text.
+ size_t matchStart;
+ size_t matchLength;
+ {
+ CharacterIterator findIterator(range, false, true);
+ matchLength = findPlainText(findIterator, target, forward, caseSensitive, matchStart);
+ if (!matchLength)
+ return collapsedToBoundary(range, forward);
}
- return result.release();
+ // Then, find the document position of the start and the end of the text.
+ CharacterIterator computeRangeIterator(range, false, true);
+ return characterSubrange(computeRangeIterator, matchStart, matchLength);
}
}