summaryrefslogtreecommitdiffstats
path: root/WebCore/html/HTMLTokenizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'WebCore/html/HTMLTokenizer.cpp')
-rw-r--r--WebCore/html/HTMLTokenizer.cpp85
1 files changed, 52 insertions, 33 deletions
diff --git a/WebCore/html/HTMLTokenizer.cpp b/WebCore/html/HTMLTokenizer.cpp
index a52aba6..a18701a 100644
--- a/WebCore/html/HTMLTokenizer.cpp
+++ b/WebCore/html/HTMLTokenizer.cpp
@@ -28,7 +28,6 @@
#include "config.h"
#include "HTMLTokenizer.h"
-#include "AtomicString.h"
#include "HTMLEntityParser.h"
#include "HTMLToken.h"
#include "HTMLNames.h"
@@ -36,6 +35,7 @@
#include <wtf/ASCIICType.h>
#include <wtf/CurrentTime.h>
#include <wtf/UnusedParam.h>
+#include <wtf/text/AtomicString.h>
#include <wtf/text/CString.h>
#include <wtf/unicode/Unicode.h>
@@ -152,7 +152,7 @@ inline bool HTMLTokenizer::processEntity(SegmentedString& source)
do { \
m_state = stateName; \
if (!m_inputStreamPreprocessor.advance(source, m_lineNumber)) \
- return shouldEmitBufferedCharacterToken(source); \
+ return haveBufferedCharacterToken(); \
cc = m_inputStreamPreprocessor.nextInputCharacter(); \
goto stateName; \
} while (false)
@@ -165,7 +165,7 @@ inline bool HTMLTokenizer::processEntity(SegmentedString& source)
do { \
m_state = stateName; \
if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source, m_lineNumber)) \
- return shouldEmitBufferedCharacterToken(source); \
+ return haveBufferedCharacterToken(); \
cc = m_inputStreamPreprocessor.nextInputCharacter(); \
goto stateName; \
} while (false)
@@ -202,7 +202,7 @@ bool HTMLTokenizer::emitAndReconsumeIn(SegmentedString&, State state)
// Check if we have buffered characters to emit first before emitting the EOF.
bool HTMLTokenizer::emitEndOfFile(SegmentedString& source)
{
- if (shouldEmitBufferedCharacterToken(source))
+ if (haveBufferedCharacterToken())
return true;
m_state = DataState;
source.advance(m_lineNumber);
@@ -229,7 +229,7 @@ bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source)
return true; \
if (source.isEmpty() \
|| !m_inputStreamPreprocessor.peek(source, m_lineNumber)) \
- return shouldEmitBufferedCharacterToken(source); \
+ return haveBufferedCharacterToken(); \
cc = m_inputStreamPreprocessor.nextInputCharacter(); \
goto stateName; \
} while (false)
@@ -260,7 +260,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
}
if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source, m_lineNumber))
- return shouldEmitBufferedCharacterToken(source);
+ return haveBufferedCharacterToken();
UChar cc = m_inputStreamPreprocessor.nextInputCharacter();
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
@@ -308,7 +308,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
BEGIN_STATE(CharacterReferenceInDataState) {
if (!processEntity(source))
- return shouldEmitBufferedCharacterToken(source);
+ return haveBufferedCharacterToken();
SWITCH_TO(DataState);
}
END_STATE()
@@ -329,7 +329,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
BEGIN_STATE(CharacterReferenceInRCDATAState) {
if (!processEntity(source))
- return shouldEmitBufferedCharacterToken(source);
+ return haveBufferedCharacterToken();
SWITCH_TO(RCDATAState);
}
END_STATE()
@@ -864,6 +864,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
return emitAndResumeIn(source, DataState);
else if (isASCIIUpper(cc)) {
m_token->addNewAttribute();
+ m_token->beginAttributeName(source.numberOfCharactersConsumed());
m_token->appendToAttributeName(toLowerCase(cc));
ADVANCE_TO(AttributeNameState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
@@ -873,6 +874,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
parseError();
m_token->addNewAttribute();
+ m_token->beginAttributeName(source.numberOfCharactersConsumed());
m_token->appendToAttributeName(cc);
ADVANCE_TO(AttributeNameState);
}
@@ -880,19 +882,24 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
END_STATE()
BEGIN_STATE(AttributeNameState) {
- if (isTokenizerWhitespace(cc))
+ if (isTokenizerWhitespace(cc)) {
+ m_token->endAttributeName(source.numberOfCharactersConsumed());
ADVANCE_TO(AfterAttributeNameState);
- else if (cc == '/')
+ } else if (cc == '/') {
+ m_token->endAttributeName(source.numberOfCharactersConsumed());
ADVANCE_TO(SelfClosingStartTagState);
- else if (cc == '=')
+ } else if (cc == '=') {
+ m_token->endAttributeName(source.numberOfCharactersConsumed());
ADVANCE_TO(BeforeAttributeValueState);
- else if (cc == '>')
+ } else if (cc == '>') {
+ m_token->endAttributeName(source.numberOfCharactersConsumed());
return emitAndResumeIn(source, DataState);
- else if (isASCIIUpper(cc)) {
+ } else if (isASCIIUpper(cc)) {
m_token->appendToAttributeName(toLowerCase(cc));
ADVANCE_TO(AttributeNameState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
+ m_token->endAttributeName(source.numberOfCharactersConsumed());
RECONSUME_IN(DataState);
} else {
if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
@@ -914,6 +921,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
return emitAndResumeIn(source, DataState);
else if (isASCIIUpper(cc)) {
m_token->addNewAttribute();
+ m_token->beginAttributeName(source.numberOfCharactersConsumed());
m_token->appendToAttributeName(toLowerCase(cc));
ADVANCE_TO(AttributeNameState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
@@ -923,6 +931,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
if (cc == '"' || cc == '\'' || cc == '<')
parseError();
m_token->addNewAttribute();
+ m_token->beginAttributeName(source.numberOfCharactersConsumed());
m_token->appendToAttributeName(cc);
ADVANCE_TO(AttributeNameState);
}
@@ -932,13 +941,16 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
BEGIN_STATE(BeforeAttributeValueState) {
if (isTokenizerWhitespace(cc))
ADVANCE_TO(BeforeAttributeValueState);
- else if (cc == '"')
+ else if (cc == '"') {
+ m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1);
ADVANCE_TO(AttributeValueDoubleQuotedState);
- else if (cc == '&')
+ } else if (cc == '&') {
+ m_token->beginAttributeValue(source.numberOfCharactersConsumed());
RECONSUME_IN(AttributeValueUnquotedState);
- else if (cc == '\'')
+ } else if (cc == '\'') {
+ m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1);
ADVANCE_TO(AttributeValueSingleQuotedState);
- else if (cc == '>') {
+ } else if (cc == '>') {
parseError();
return emitAndResumeIn(source, DataState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
@@ -947,6 +959,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
} else {
if (cc == '<' || cc == '=' || cc == '`')
parseError();
+ m_token->beginAttributeValue(source.numberOfCharactersConsumed());
m_token->appendToAttributeValue(cc);
ADVANCE_TO(AttributeValueUnquotedState);
}
@@ -954,13 +967,15 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
END_STATE()
BEGIN_STATE(AttributeValueDoubleQuotedState) {
- if (cc == '"')
+ if (cc == '"') {
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
ADVANCE_TO(AfterAttributeValueQuotedState);
- else if (cc == '&') {
+ } else if (cc == '&') {
m_additionalAllowedCharacter = '"';
ADVANCE_TO(CharacterReferenceInAttributeValueState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
RECONSUME_IN(DataState);
} else {
m_token->appendToAttributeValue(cc);
@@ -970,13 +985,15 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
END_STATE()
BEGIN_STATE(AttributeValueSingleQuotedState) {
- if (cc == '\'')
+ if (cc == '\'') {
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
ADVANCE_TO(AfterAttributeValueQuotedState);
- else if (cc == '&') {
+ } else if (cc == '&') {
m_additionalAllowedCharacter = '\'';
ADVANCE_TO(CharacterReferenceInAttributeValueState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
RECONSUME_IN(DataState);
} else {
m_token->appendToAttributeValue(cc);
@@ -986,15 +1003,18 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
END_STATE()
BEGIN_STATE(AttributeValueUnquotedState) {
- if (isTokenizerWhitespace(cc))
+ if (isTokenizerWhitespace(cc)) {
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
ADVANCE_TO(BeforeAttributeNameState);
- else if (cc == '&') {
+ } else if (cc == '&') {
m_additionalAllowedCharacter = '>';
ADVANCE_TO(CharacterReferenceInAttributeValueState);
- } else if (cc == '>')
+ } else if (cc == '>') {
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
return emitAndResumeIn(source, DataState);
- else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
RECONSUME_IN(DataState);
} else {
if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`')
@@ -1009,7 +1029,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
bool notEnoughCharacters = false;
unsigned value = consumeHTMLEntity(source, notEnoughCharacters, m_additionalAllowedCharacter);
if (notEnoughCharacters)
- return shouldEmitBufferedCharacterToken(source);
+ return haveBufferedCharacterToken();
if (!value)
m_token->appendToAttributeValue('&');
else if (value < 0xFFFF)
@@ -1093,14 +1113,14 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
m_token->beginComment();
SWITCH_TO(CommentStartState);
} else if (result == SegmentedString::NotEnoughCharacters)
- return shouldEmitBufferedCharacterToken(source);
+ return haveBufferedCharacterToken();
} else if (cc == 'D' || cc == 'd') {
SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(doctypeString);
if (result == SegmentedString::DidMatch) {
advanceStringAndASSERTIgnoringCase(source, "doctype");
SWITCH_TO(DOCTYPEState);
} else if (result == SegmentedString::NotEnoughCharacters)
- return shouldEmitBufferedCharacterToken(source);
+ return haveBufferedCharacterToken();
}
notImplemented();
// FIXME: We're still missing the bits about the insertion mode being in foreign content:
@@ -1313,14 +1333,14 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
advanceStringAndASSERTIgnoringCase(source, "public");
SWITCH_TO(AfterDOCTYPEPublicKeywordState);
} else if (result == SegmentedString::NotEnoughCharacters)
- return shouldEmitBufferedCharacterToken(source);
+ return haveBufferedCharacterToken();
} else if (cc == 'S' || cc == 's') {
SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(systemString);
if (result == SegmentedString::DidMatch) {
advanceStringAndASSERTIgnoringCase(source, "system");
SWITCH_TO(AfterDOCTYPESystemKeywordState);
} else if (result == SegmentedString::NotEnoughCharacters)
- return shouldEmitBufferedCharacterToken(source);
+ return haveBufferedCharacterToken();
}
parseError();
m_token->setForceQuirks();
@@ -1629,10 +1649,9 @@ inline void HTMLTokenizer::parseError()
notImplemented();
}
-inline bool HTMLTokenizer::shouldEmitBufferedCharacterToken(const SegmentedString& source)
+inline bool HTMLTokenizer::haveBufferedCharacterToken()
{
- return source.isClosed() && m_token->type() == HTMLToken::Character;
+ return m_token->type() == HTMLToken::Character;
}
}
-