diff options
Diffstat (limited to 'WebCore/html/HTMLTokenizer.cpp')
-rw-r--r-- | WebCore/html/HTMLTokenizer.cpp | 85 |
1 files changed, 52 insertions, 33 deletions
diff --git a/WebCore/html/HTMLTokenizer.cpp b/WebCore/html/HTMLTokenizer.cpp index a52aba6..a18701a 100644 --- a/WebCore/html/HTMLTokenizer.cpp +++ b/WebCore/html/HTMLTokenizer.cpp @@ -28,7 +28,6 @@ #include "config.h" #include "HTMLTokenizer.h" -#include "AtomicString.h" #include "HTMLEntityParser.h" #include "HTMLToken.h" #include "HTMLNames.h" @@ -36,6 +35,7 @@ #include <wtf/ASCIICType.h> #include <wtf/CurrentTime.h> #include <wtf/UnusedParam.h> +#include <wtf/text/AtomicString.h> #include <wtf/text/CString.h> #include <wtf/unicode/Unicode.h> @@ -152,7 +152,7 @@ inline bool HTMLTokenizer::processEntity(SegmentedString& source) do { \ m_state = stateName; \ if (!m_inputStreamPreprocessor.advance(source, m_lineNumber)) \ - return shouldEmitBufferedCharacterToken(source); \ + return haveBufferedCharacterToken(); \ cc = m_inputStreamPreprocessor.nextInputCharacter(); \ goto stateName; \ } while (false) @@ -165,7 +165,7 @@ inline bool HTMLTokenizer::processEntity(SegmentedString& source) do { \ m_state = stateName; \ if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source, m_lineNumber)) \ - return shouldEmitBufferedCharacterToken(source); \ + return haveBufferedCharacterToken(); \ cc = m_inputStreamPreprocessor.nextInputCharacter(); \ goto stateName; \ } while (false) @@ -202,7 +202,7 @@ bool HTMLTokenizer::emitAndReconsumeIn(SegmentedString&, State state) // Check if we have buffered characters to emit first before emitting the EOF. bool HTMLTokenizer::emitEndOfFile(SegmentedString& source) { - if (shouldEmitBufferedCharacterToken(source)) + if (haveBufferedCharacterToken()) return true; m_state = DataState; source.advance(m_lineNumber); @@ -229,7 +229,7 @@ bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source) return true; \ if (source.isEmpty() \ || !m_inputStreamPreprocessor.peek(source, m_lineNumber)) \ - return shouldEmitBufferedCharacterToken(source); \ + return haveBufferedCharacterToken(); \ cc = m_inputStreamPreprocessor.nextInputCharacter(); \ goto stateName; \ } while (false) @@ -260,7 +260,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) } if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source, m_lineNumber)) - return shouldEmitBufferedCharacterToken(source); + return haveBufferedCharacterToken(); UChar cc = m_inputStreamPreprocessor.nextInputCharacter(); // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody @@ -308,7 +308,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) BEGIN_STATE(CharacterReferenceInDataState) { if (!processEntity(source)) - return shouldEmitBufferedCharacterToken(source); + return haveBufferedCharacterToken(); SWITCH_TO(DataState); } END_STATE() @@ -329,7 +329,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) BEGIN_STATE(CharacterReferenceInRCDATAState) { if (!processEntity(source)) - return shouldEmitBufferedCharacterToken(source); + return haveBufferedCharacterToken(); SWITCH_TO(RCDATAState); } END_STATE() @@ -864,6 +864,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) return emitAndResumeIn(source, DataState); else if (isASCIIUpper(cc)) { m_token->addNewAttribute(); + m_token->beginAttributeName(source.numberOfCharactersConsumed()); m_token->appendToAttributeName(toLowerCase(cc)); ADVANCE_TO(AttributeNameState); } else if (cc == InputStreamPreprocessor::endOfFileMarker) { @@ -873,6 +874,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') parseError(); m_token->addNewAttribute(); + m_token->beginAttributeName(source.numberOfCharactersConsumed()); m_token->appendToAttributeName(cc); ADVANCE_TO(AttributeNameState); } @@ -880,19 +882,24 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) END_STATE() BEGIN_STATE(AttributeNameState) { - if (isTokenizerWhitespace(cc)) + if (isTokenizerWhitespace(cc)) { + m_token->endAttributeName(source.numberOfCharactersConsumed()); ADVANCE_TO(AfterAttributeNameState); - else if (cc == '/') + } else if (cc == '/') { + m_token->endAttributeName(source.numberOfCharactersConsumed()); ADVANCE_TO(SelfClosingStartTagState); - else if (cc == '=') + } else if (cc == '=') { + m_token->endAttributeName(source.numberOfCharactersConsumed()); ADVANCE_TO(BeforeAttributeValueState); - else if (cc == '>') + } else if (cc == '>') { + m_token->endAttributeName(source.numberOfCharactersConsumed()); return emitAndResumeIn(source, DataState); - else if (isASCIIUpper(cc)) { + } else if (isASCIIUpper(cc)) { m_token->appendToAttributeName(toLowerCase(cc)); ADVANCE_TO(AttributeNameState); } else if (cc == InputStreamPreprocessor::endOfFileMarker) { parseError(); + m_token->endAttributeName(source.numberOfCharactersConsumed()); RECONSUME_IN(DataState); } else { if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') @@ -914,6 +921,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) return emitAndResumeIn(source, DataState); else if (isASCIIUpper(cc)) { m_token->addNewAttribute(); + m_token->beginAttributeName(source.numberOfCharactersConsumed()); m_token->appendToAttributeName(toLowerCase(cc)); ADVANCE_TO(AttributeNameState); } else if (cc == InputStreamPreprocessor::endOfFileMarker) { @@ -923,6 +931,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) if (cc == '"' || cc == '\'' || cc == '<') parseError(); m_token->addNewAttribute(); + m_token->beginAttributeName(source.numberOfCharactersConsumed()); m_token->appendToAttributeName(cc); ADVANCE_TO(AttributeNameState); } @@ -932,13 +941,16 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) BEGIN_STATE(BeforeAttributeValueState) { if (isTokenizerWhitespace(cc)) ADVANCE_TO(BeforeAttributeValueState); - else if (cc == '"') + else if (cc == '"') { + m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1); ADVANCE_TO(AttributeValueDoubleQuotedState); - else if (cc == '&') + } else if (cc == '&') { + m_token->beginAttributeValue(source.numberOfCharactersConsumed()); RECONSUME_IN(AttributeValueUnquotedState); - else if (cc == '\'') + } else if (cc == '\'') { + m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1); ADVANCE_TO(AttributeValueSingleQuotedState); - else if (cc == '>') { + } else if (cc == '>') { parseError(); return emitAndResumeIn(source, DataState); } else if (cc == InputStreamPreprocessor::endOfFileMarker) { @@ -947,6 +959,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) } else { if (cc == '<' || cc == '=' || cc == '`') parseError(); + m_token->beginAttributeValue(source.numberOfCharactersConsumed()); m_token->appendToAttributeValue(cc); ADVANCE_TO(AttributeValueUnquotedState); } @@ -954,13 +967,15 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) END_STATE() BEGIN_STATE(AttributeValueDoubleQuotedState) { - if (cc == '"') + if (cc == '"') { + m_token->endAttributeValue(source.numberOfCharactersConsumed()); ADVANCE_TO(AfterAttributeValueQuotedState); - else if (cc == '&') { + } else if (cc == '&') { m_additionalAllowedCharacter = '"'; ADVANCE_TO(CharacterReferenceInAttributeValueState); } else if (cc == InputStreamPreprocessor::endOfFileMarker) { parseError(); + m_token->endAttributeValue(source.numberOfCharactersConsumed()); RECONSUME_IN(DataState); } else { m_token->appendToAttributeValue(cc); @@ -970,13 +985,15 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) END_STATE() BEGIN_STATE(AttributeValueSingleQuotedState) { - if (cc == '\'') + if (cc == '\'') { + m_token->endAttributeValue(source.numberOfCharactersConsumed()); ADVANCE_TO(AfterAttributeValueQuotedState); - else if (cc == '&') { + } else if (cc == '&') { m_additionalAllowedCharacter = '\''; ADVANCE_TO(CharacterReferenceInAttributeValueState); } else if (cc == InputStreamPreprocessor::endOfFileMarker) { parseError(); + m_token->endAttributeValue(source.numberOfCharactersConsumed()); RECONSUME_IN(DataState); } else { m_token->appendToAttributeValue(cc); @@ -986,15 +1003,18 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) END_STATE() BEGIN_STATE(AttributeValueUnquotedState) { - if (isTokenizerWhitespace(cc)) + if (isTokenizerWhitespace(cc)) { + m_token->endAttributeValue(source.numberOfCharactersConsumed()); ADVANCE_TO(BeforeAttributeNameState); - else if (cc == '&') { + } else if (cc == '&') { m_additionalAllowedCharacter = '>'; ADVANCE_TO(CharacterReferenceInAttributeValueState); - } else if (cc == '>') + } else if (cc == '>') { + m_token->endAttributeValue(source.numberOfCharactersConsumed()); return emitAndResumeIn(source, DataState); - else if (cc == InputStreamPreprocessor::endOfFileMarker) { + } else if (cc == InputStreamPreprocessor::endOfFileMarker) { parseError(); + m_token->endAttributeValue(source.numberOfCharactersConsumed()); RECONSUME_IN(DataState); } else { if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`') @@ -1009,7 +1029,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) bool notEnoughCharacters = false; unsigned value = consumeHTMLEntity(source, notEnoughCharacters, m_additionalAllowedCharacter); if (notEnoughCharacters) - return shouldEmitBufferedCharacterToken(source); + return haveBufferedCharacterToken(); if (!value) m_token->appendToAttributeValue('&'); else if (value < 0xFFFF) @@ -1093,14 +1113,14 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) m_token->beginComment(); SWITCH_TO(CommentStartState); } else if (result == SegmentedString::NotEnoughCharacters) - return shouldEmitBufferedCharacterToken(source); + return haveBufferedCharacterToken(); } else if (cc == 'D' || cc == 'd') { SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(doctypeString); if (result == SegmentedString::DidMatch) { advanceStringAndASSERTIgnoringCase(source, "doctype"); SWITCH_TO(DOCTYPEState); } else if (result == SegmentedString::NotEnoughCharacters) - return shouldEmitBufferedCharacterToken(source); + return haveBufferedCharacterToken(); } notImplemented(); // FIXME: We're still missing the bits about the insertion mode being in foreign content: @@ -1313,14 +1333,14 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) advanceStringAndASSERTIgnoringCase(source, "public"); SWITCH_TO(AfterDOCTYPEPublicKeywordState); } else if (result == SegmentedString::NotEnoughCharacters) - return shouldEmitBufferedCharacterToken(source); + return haveBufferedCharacterToken(); } else if (cc == 'S' || cc == 's') { SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(systemString); if (result == SegmentedString::DidMatch) { advanceStringAndASSERTIgnoringCase(source, "system"); SWITCH_TO(AfterDOCTYPESystemKeywordState); } else if (result == SegmentedString::NotEnoughCharacters) - return shouldEmitBufferedCharacterToken(source); + return haveBufferedCharacterToken(); } parseError(); m_token->setForceQuirks(); @@ -1629,10 +1649,9 @@ inline void HTMLTokenizer::parseError() notImplemented(); } -inline bool HTMLTokenizer::shouldEmitBufferedCharacterToken(const SegmentedString& source) +inline bool HTMLTokenizer::haveBufferedCharacterToken() { - return source.isClosed() && m_token->type() == HTMLToken::Character; + return m_token->type() == HTMLToken::Character; } } - |