diff options
author | Steve Block <steveblock@google.com> | 2011-05-06 11:45:16 +0100 |
---|---|---|
committer | Steve Block <steveblock@google.com> | 2011-05-12 13:44:10 +0100 |
commit | cad810f21b803229eb11403f9209855525a25d57 (patch) | |
tree | 29a6fd0279be608e0fe9ffe9841f722f0f4e4269 /WebCore/html/parser/HTMLTokenizer.cpp | |
parent | 121b0cf4517156d0ac5111caf9830c51b69bae8f (diff) | |
download | external_webkit-cad810f21b803229eb11403f9209855525a25d57.zip external_webkit-cad810f21b803229eb11403f9209855525a25d57.tar.gz external_webkit-cad810f21b803229eb11403f9209855525a25d57.tar.bz2 |
Merge WebKit at r75315: Initial merge by git.
Change-Id: I570314b346ce101c935ed22a626b48c2af266b84
Diffstat (limited to 'WebCore/html/parser/HTMLTokenizer.cpp')
-rw-r--r-- | WebCore/html/parser/HTMLTokenizer.cpp | 1698 |
1 files changed, 0 insertions, 1698 deletions
diff --git a/WebCore/html/parser/HTMLTokenizer.cpp b/WebCore/html/parser/HTMLTokenizer.cpp deleted file mode 100644 index 305fca2..0000000 --- a/WebCore/html/parser/HTMLTokenizer.cpp +++ /dev/null @@ -1,1698 +0,0 @@ -/* - * Copyright (C) 2008 Apple Inc. All Rights Reserved. - * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ - * Copyright (C) 2010 Google, Inc. All Rights Reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "HTMLTokenizer.h" - -#include "HTMLEntityParser.h" -#include "HTMLToken.h" -#include "HTMLTreeBuilder.h" -#include "HTMLNames.h" -#include "NotImplemented.h" -#include <wtf/ASCIICType.h> -#include <wtf/CurrentTime.h> -#include <wtf/UnusedParam.h> -#include <wtf/text/AtomicString.h> -#include <wtf/text/CString.h> -#include <wtf/unicode/Unicode.h> - -using namespace WTF; - -namespace WebCore { - -using namespace HTMLNames; - -const UChar HTMLTokenizer::InputStreamPreprocessor::endOfFileMarker = 0; - -namespace { - -inline UChar toLowerCase(UChar cc) -{ - ASSERT(isASCIIUpper(cc)); - const int lowerCaseOffset = 0x20; - return cc + lowerCaseOffset; -} - -inline bool isTokenizerWhitespace(UChar cc) -{ - return cc == ' ' || cc == '\x0A' || cc == '\x09' || cc == '\x0C'; -} - -inline void advanceStringAndASSERTIgnoringCase(SegmentedString& source, const char* expectedCharacters) -{ - while (*expectedCharacters) - source.advanceAndASSERTIgnoringCase(*expectedCharacters++); -} - -inline void advanceStringAndASSERT(SegmentedString& source, const char* expectedCharacters) -{ - while (*expectedCharacters) - source.advanceAndASSERT(*expectedCharacters++); -} - -inline bool vectorEqualsString(const Vector<UChar, 32>& vector, const String& string) -{ - if (vector.size() != string.length()) - return false; - const UChar* stringData = string.characters(); - const UChar* vectorData = vector.data(); - // FIXME: Is there a higher-level function we should be calling here? - return !memcmp(stringData, vectorData, vector.size() * sizeof(UChar)); -} - -inline bool isEndTagBufferingState(HTMLTokenizer::State state) -{ - switch (state) { - case HTMLTokenizer::RCDATAEndTagOpenState: - case HTMLTokenizer::RCDATAEndTagNameState: - case HTMLTokenizer::RAWTEXTEndTagOpenState: - case HTMLTokenizer::RAWTEXTEndTagNameState: - case HTMLTokenizer::ScriptDataEndTagOpenState: - case HTMLTokenizer::ScriptDataEndTagNameState: - case HTMLTokenizer::ScriptDataEscapedEndTagOpenState: - case HTMLTokenizer::ScriptDataEscapedEndTagNameState: - return true; - default: - return false; - } -} - -} - -HTMLTokenizer::HTMLTokenizer(bool usePreHTML5ParserQuirks) - : m_inputStreamPreprocessor(this) - , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks) -{ - reset(); -} - -HTMLTokenizer::~HTMLTokenizer() -{ -} - -void HTMLTokenizer::reset() -{ - m_state = DataState; - m_token = 0; - m_lineNumber = 0; - m_skipLeadingNewLineForListing = false; - m_forceNullCharacterReplacement = false; - m_shouldAllowCDATA = false; - m_additionalAllowedCharacter = '\0'; -} - -inline bool HTMLTokenizer::processEntity(SegmentedString& source) -{ - bool notEnoughCharacters = false; - Vector<UChar, 16> decodedEntity; - bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters); - if (notEnoughCharacters) - return false; - if (!success) { - ASSERT(decodedEntity.isEmpty()); - bufferCharacter('&'); - } else { - Vector<UChar>::const_iterator iter = decodedEntity.begin(); - for (; iter != decodedEntity.end(); ++iter) - bufferCharacter(*iter); - } - return true; -} - -#if COMPILER(MSVC) -// We need to disable the "unreachable code" warning because we want to assert -// that some code points aren't reached in the state machine. -#pragma warning(disable: 4702) -#endif - -#define BEGIN_STATE(stateName) case stateName: stateName: -#define END_STATE() ASSERT_NOT_REACHED(); break; - -// We use this macro when the HTML5 spec says "reconsume the current input -// character in the <mumble> state." -#define RECONSUME_IN(stateName) \ - do { \ - m_state = stateName; \ - goto stateName; \ - } while (false) - -// We use this macro when the HTML5 spec says "consume the next input -// character ... and switch to the <mumble> state." -#define ADVANCE_TO(stateName) \ - do { \ - m_state = stateName; \ - if (!m_inputStreamPreprocessor.advance(source, m_lineNumber)) \ - return haveBufferedCharacterToken(); \ - cc = m_inputStreamPreprocessor.nextInputCharacter(); \ - goto stateName; \ - } while (false) - -// Sometimes there's more complicated logic in the spec that separates when -// we consume the next input character and when we switch to a particular -// state. We handle those cases by advancing the source directly and using -// this macro to switch to the indicated state. -#define SWITCH_TO(stateName) \ - do { \ - m_state = stateName; \ - if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source, m_lineNumber)) \ - return haveBufferedCharacterToken(); \ - cc = m_inputStreamPreprocessor.nextInputCharacter(); \ - goto stateName; \ - } while (false) - - -inline void HTMLTokenizer::saveEndTagNameIfNeeded() -{ - ASSERT(m_token->type() != HTMLToken::Uninitialized); - if (m_token->type() == HTMLToken::StartTag) - m_appropriateEndTagName = m_token->name(); -} - -// We use this function when the HTML5 spec says "Emit the current <mumble> -// token. Switch to the <mumble> state." We use the word "resume" instead of -// switch to indicate that this macro actually returns and that we'll end up -// in the state when we "resume" (i.e., are called again). -bool HTMLTokenizer::emitAndResumeIn(SegmentedString& source, State state) -{ - m_state = state; - source.advance(m_lineNumber); - saveEndTagNameIfNeeded(); - return true; -} - -// Identical to emitAndResumeIn, except does not advance. -bool HTMLTokenizer::emitAndReconsumeIn(SegmentedString&, State state) -{ - m_state = state; - saveEndTagNameIfNeeded(); - return true; -} - -// Used to emit the EndOfFile token. -// Check if we have buffered characters to emit first before emitting the EOF. -bool HTMLTokenizer::emitEndOfFile(SegmentedString& source) -{ - if (haveBufferedCharacterToken()) - return true; - m_state = DataState; - source.advance(m_lineNumber); - m_token->clear(); - m_token->makeEndOfFile(); - return true; -} - -bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source) -{ - ASSERT(m_token->type() == HTMLToken::Character || m_token->type() == HTMLToken::Uninitialized); - source.advance(m_lineNumber); - if (m_token->type() == HTMLToken::Character) - return true; - m_token->beginEndTag(m_bufferedEndTagName); - m_bufferedEndTagName.clear(); - return false; -} - -#define FLUSH_AND_ADVANCE_TO(stateName) \ - do { \ - m_state = stateName; \ - if (flushBufferedEndTag(source)) \ - return true; \ - if (source.isEmpty() \ - || !m_inputStreamPreprocessor.peek(source, m_lineNumber)) \ - return haveBufferedCharacterToken(); \ - cc = m_inputStreamPreprocessor.nextInputCharacter(); \ - goto stateName; \ - } while (false) - -bool HTMLTokenizer::flushEmitAndResumeIn(SegmentedString& source, State state) -{ - m_state = state; - flushBufferedEndTag(source); - return true; -} - -bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) -{ - // If we have a token in progress, then we're supposed to be called back - // with the same token so we can finish it. - ASSERT(!m_token || m_token == &token || token.type() == HTMLToken::Uninitialized); - m_token = &token; - - if (!m_bufferedEndTagName.isEmpty() && !isEndTagBufferingState(m_state)) { - // FIXME: This should call flushBufferedEndTag(). - // We started an end tag during our last iteration. - m_token->beginEndTag(m_bufferedEndTagName); - m_bufferedEndTagName.clear(); - if (m_state == DataState) { - // We're back in the data state, so we must be done with the tag. - return true; - } - } - - if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source, m_lineNumber)) - return haveBufferedCharacterToken(); - UChar cc = m_inputStreamPreprocessor.nextInputCharacter(); - - // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody - // Note that this logic is different than the generic \r\n collapsing - // handled in the input stream preprocessor. This logic is here as an - // "authoring convenience" so folks can write: - // - // <pre> - // lorem ipsum - // lorem ipsum - // </pre> - // - // without getting an extra newline at the start of their <pre> element. - if (m_skipLeadingNewLineForListing) { - m_skipLeadingNewLineForListing = false; - if (cc == '\n') { - if (m_state == DataState) - ADVANCE_TO(DataState); - if (m_state == RCDATAState) - ADVANCE_TO(RCDATAState); - // When parsing text/plain documents, we run the tokenizer in the - // PLAINTEXTState and ignore m_skipLeadingNewLineForListing. - ASSERT(m_state == PLAINTEXTState); - } - } - - // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0 - switch (m_state) { - BEGIN_STATE(DataState) { - if (cc == '&') - ADVANCE_TO(CharacterReferenceInDataState); - else if (cc == '<') { - if (m_token->type() == HTMLToken::Character) { - // We have a bunch of character tokens queued up that we - // are emitting lazily here. - return true; - } - ADVANCE_TO(TagOpenState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) - return emitEndOfFile(source); - else { - bufferCharacter(cc); - ADVANCE_TO(DataState); - } - } - END_STATE() - - BEGIN_STATE(CharacterReferenceInDataState) { - if (!processEntity(source)) - return haveBufferedCharacterToken(); - SWITCH_TO(DataState); - } - END_STATE() - - BEGIN_STATE(RCDATAState) { - if (cc == '&') - ADVANCE_TO(CharacterReferenceInRCDATAState); - else if (cc == '<') - ADVANCE_TO(RCDATALessThanSignState); - else if (cc == InputStreamPreprocessor::endOfFileMarker) - return emitEndOfFile(source); - else { - bufferCharacter(cc); - ADVANCE_TO(RCDATAState); - } - } - END_STATE() - - BEGIN_STATE(CharacterReferenceInRCDATAState) { - if (!processEntity(source)) - return haveBufferedCharacterToken(); - SWITCH_TO(RCDATAState); - } - END_STATE() - - BEGIN_STATE(RAWTEXTState) { - if (cc == '<') - ADVANCE_TO(RAWTEXTLessThanSignState); - else if (cc == InputStreamPreprocessor::endOfFileMarker) - return emitEndOfFile(source); - else { - bufferCharacter(cc); - ADVANCE_TO(RAWTEXTState); - } - } - END_STATE() - - BEGIN_STATE(ScriptDataState) { - if (cc == '<') - ADVANCE_TO(ScriptDataLessThanSignState); - else if (cc == InputStreamPreprocessor::endOfFileMarker) - return emitEndOfFile(source); - else { - bufferCharacter(cc); - ADVANCE_TO(ScriptDataState); - } - } - END_STATE() - - BEGIN_STATE(PLAINTEXTState) { - if (cc == InputStreamPreprocessor::endOfFileMarker) - return emitEndOfFile(source); - else - bufferCharacter(cc); - ADVANCE_TO(PLAINTEXTState); - } - END_STATE() - - BEGIN_STATE(TagOpenState) { - if (cc == '!') - ADVANCE_TO(MarkupDeclarationOpenState); - else if (cc == '/') - ADVANCE_TO(EndTagOpenState); - else if (isASCIIUpper(cc)) { - m_token->beginStartTag(toLowerCase(cc)); - ADVANCE_TO(TagNameState); - } else if (isASCIILower(cc)) { - m_token->beginStartTag(cc); - ADVANCE_TO(TagNameState); - } else if (cc == '?') { - parseError(); - // The spec consumes the current character before switching - // to the bogus comment state, but it's easier to implement - // if we reconsume the current character. - RECONSUME_IN(BogusCommentState); - } else { - parseError(); - bufferCharacter('<'); - RECONSUME_IN(DataState); - } - } - END_STATE() - - BEGIN_STATE(EndTagOpenState) { - if (isASCIIUpper(cc)) { - m_token->beginEndTag(toLowerCase(cc)); - ADVANCE_TO(TagNameState); - } else if (isASCIILower(cc)) { - m_token->beginEndTag(cc); - ADVANCE_TO(TagNameState); - } else if (cc == '>') { - parseError(); - ADVANCE_TO(DataState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - bufferCharacter('<'); - bufferCharacter('/'); - RECONSUME_IN(DataState); - } else { - parseError(); - RECONSUME_IN(BogusCommentState); - } - } - END_STATE() - - BEGIN_STATE(TagNameState) { - if (isTokenizerWhitespace(cc)) - ADVANCE_TO(BeforeAttributeNameState); - else if (cc == '/') - ADVANCE_TO(SelfClosingStartTagState); - else if (cc == '>') - return emitAndResumeIn(source, DataState); - else if (m_usePreHTML5ParserQuirks && cc == '<') - return emitAndReconsumeIn(source, DataState); - else if (isASCIIUpper(cc)) { - m_token->appendToName(toLowerCase(cc)); - ADVANCE_TO(TagNameState); - } if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - RECONSUME_IN(DataState); - } else { - m_token->appendToName(cc); - ADVANCE_TO(TagNameState); - } - } - END_STATE() - - BEGIN_STATE(RCDATALessThanSignState) { - if (cc == '/') { - m_temporaryBuffer.clear(); - ASSERT(m_bufferedEndTagName.isEmpty()); - ADVANCE_TO(RCDATAEndTagOpenState); - } else { - bufferCharacter('<'); - RECONSUME_IN(RCDATAState); - } - } - END_STATE() - - BEGIN_STATE(RCDATAEndTagOpenState) { - if (isASCIIUpper(cc)) { - m_temporaryBuffer.append(cc); - addToPossibleEndTag(toLowerCase(cc)); - ADVANCE_TO(RCDATAEndTagNameState); - } else if (isASCIILower(cc)) { - m_temporaryBuffer.append(cc); - addToPossibleEndTag(cc); - ADVANCE_TO(RCDATAEndTagNameState); - } else { - bufferCharacter('<'); - bufferCharacter('/'); - RECONSUME_IN(RCDATAState); - } - } - END_STATE() - - BEGIN_STATE(RCDATAEndTagNameState) { - if (isASCIIUpper(cc)) { - m_temporaryBuffer.append(cc); - addToPossibleEndTag(toLowerCase(cc)); - ADVANCE_TO(RCDATAEndTagNameState); - } else if (isASCIILower(cc)) { - m_temporaryBuffer.append(cc); - addToPossibleEndTag(cc); - ADVANCE_TO(RCDATAEndTagNameState); - } else { - if (isTokenizerWhitespace(cc)) { - if (isAppropriateEndTag()) - FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState); - } else if (cc == '/') { - if (isAppropriateEndTag()) - FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState); - } else if (cc == '>') { - if (isAppropriateEndTag()) - return flushEmitAndResumeIn(source, DataState); - } - bufferCharacter('<'); - bufferCharacter('/'); - m_token->appendToCharacter(m_temporaryBuffer); - m_bufferedEndTagName.clear(); - RECONSUME_IN(RCDATAState); - } - } - END_STATE() - - BEGIN_STATE(RAWTEXTLessThanSignState) { - if (cc == '/') { - m_temporaryBuffer.clear(); - ASSERT(m_bufferedEndTagName.isEmpty()); - ADVANCE_TO(RAWTEXTEndTagOpenState); - } else { - bufferCharacter('<'); - RECONSUME_IN(RAWTEXTState); - } - } - END_STATE() - - BEGIN_STATE(RAWTEXTEndTagOpenState) { - if (isASCIIUpper(cc)) { - m_temporaryBuffer.append(cc); - addToPossibleEndTag(toLowerCase(cc)); - ADVANCE_TO(RAWTEXTEndTagNameState); - } else if (isASCIILower(cc)) { - m_temporaryBuffer.append(cc); - addToPossibleEndTag(cc); - ADVANCE_TO(RAWTEXTEndTagNameState); - } else { - bufferCharacter('<'); - bufferCharacter('/'); - RECONSUME_IN(RAWTEXTState); - } - } - END_STATE() - - BEGIN_STATE(RAWTEXTEndTagNameState) { - if (isASCIIUpper(cc)) { - m_temporaryBuffer.append(cc); - addToPossibleEndTag(toLowerCase(cc)); - ADVANCE_TO(RAWTEXTEndTagNameState); - } else if (isASCIILower(cc)) { - m_temporaryBuffer.append(cc); - addToPossibleEndTag(cc); - ADVANCE_TO(RAWTEXTEndTagNameState); - } else { - if (isTokenizerWhitespace(cc)) { - if (isAppropriateEndTag()) - FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState); - } else if (cc == '/') { - if (isAppropriateEndTag()) - FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState); - } else if (cc == '>') { - if (isAppropriateEndTag()) - return flushEmitAndResumeIn(source, DataState); - } - bufferCharacter('<'); - bufferCharacter('/'); - m_token->appendToCharacter(m_temporaryBuffer); - m_bufferedEndTagName.clear(); - RECONSUME_IN(RAWTEXTState); - } - } - END_STATE() - - BEGIN_STATE(ScriptDataLessThanSignState) { - if (cc == '/') { - m_temporaryBuffer.clear(); - ASSERT(m_bufferedEndTagName.isEmpty()); - ADVANCE_TO(ScriptDataEndTagOpenState); - } else if (cc == '!') { - bufferCharacter('<'); - bufferCharacter('!'); - ADVANCE_TO(ScriptDataEscapeStartState); - } else { - bufferCharacter('<'); - RECONSUME_IN(ScriptDataState); - } - } - END_STATE() - - BEGIN_STATE(ScriptDataEndTagOpenState) { - if (isASCIIUpper(cc)) { - m_temporaryBuffer.append(cc); - addToPossibleEndTag(toLowerCase(cc)); - ADVANCE_TO(ScriptDataEndTagNameState); - } else if (isASCIILower(cc)) { - m_temporaryBuffer.append(cc); - addToPossibleEndTag(cc); - ADVANCE_TO(ScriptDataEndTagNameState); - } else { - bufferCharacter('<'); - bufferCharacter('/'); - RECONSUME_IN(ScriptDataState); - } - } - END_STATE() - - BEGIN_STATE(ScriptDataEndTagNameState) { - if (isASCIIUpper(cc)) { - m_temporaryBuffer.append(cc); - addToPossibleEndTag(toLowerCase(cc)); - ADVANCE_TO(ScriptDataEndTagNameState); - } else if (isASCIILower(cc)) { - m_temporaryBuffer.append(cc); - addToPossibleEndTag(cc); - ADVANCE_TO(ScriptDataEndTagNameState); - } else { - if (isTokenizerWhitespace(cc)) { - if (isAppropriateEndTag()) - FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState); - } else if (cc == '/') { - if (isAppropriateEndTag()) - FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState); - } else if (cc == '>') { - if (isAppropriateEndTag()) - return flushEmitAndResumeIn(source, DataState); - } - bufferCharacter('<'); - bufferCharacter('/'); - m_token->appendToCharacter(m_temporaryBuffer); - m_bufferedEndTagName.clear(); - RECONSUME_IN(ScriptDataState); - } - } - END_STATE() - - BEGIN_STATE(ScriptDataEscapeStartState) { - if (cc == '-') { - bufferCharacter(cc); - ADVANCE_TO(ScriptDataEscapeStartDashState); - } else - RECONSUME_IN(ScriptDataState); - } - END_STATE() - - BEGIN_STATE(ScriptDataEscapeStartDashState) { - if (cc == '-') { - bufferCharacter(cc); - ADVANCE_TO(ScriptDataEscapedDashDashState); - } else - RECONSUME_IN(ScriptDataState); - } - END_STATE() - - BEGIN_STATE(ScriptDataEscapedState) { - if (cc == '-') { - bufferCharacter(cc); - ADVANCE_TO(ScriptDataEscapedDashState); - } else if (cc == '<') - ADVANCE_TO(ScriptDataEscapedLessThanSignState); - else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - RECONSUME_IN(DataState); - } else { - bufferCharacter(cc); - ADVANCE_TO(ScriptDataEscapedState); - } - } - END_STATE() - - BEGIN_STATE(ScriptDataEscapedDashState) { - if (cc == '-') { - bufferCharacter(cc); - ADVANCE_TO(ScriptDataEscapedDashDashState); - } else if (cc == '<') - ADVANCE_TO(ScriptDataEscapedLessThanSignState); - else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - RECONSUME_IN(DataState); - } else { - bufferCharacter(cc); - ADVANCE_TO(ScriptDataEscapedState); - } - } - END_STATE() - - BEGIN_STATE(ScriptDataEscapedDashDashState) { - if (cc == '-') { - bufferCharacter(cc); - ADVANCE_TO(ScriptDataEscapedDashDashState); - } else if (cc == '<') - ADVANCE_TO(ScriptDataEscapedLessThanSignState); - else if (cc == '>') { - bufferCharacter(cc); - ADVANCE_TO(ScriptDataState); - } if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - RECONSUME_IN(DataState); - } else { - bufferCharacter(cc); - ADVANCE_TO(ScriptDataEscapedState); - } - } - END_STATE() - - BEGIN_STATE(ScriptDataEscapedLessThanSignState) { - if (cc == '/') { - m_temporaryBuffer.clear(); - ASSERT(m_bufferedEndTagName.isEmpty()); - ADVANCE_TO(ScriptDataEscapedEndTagOpenState); - } else if (isASCIIUpper(cc)) { - bufferCharacter('<'); - bufferCharacter(cc); - m_temporaryBuffer.clear(); - m_temporaryBuffer.append(toLowerCase(cc)); - ADVANCE_TO(ScriptDataDoubleEscapeStartState); - } else if (isASCIILower(cc)) { - bufferCharacter('<'); - bufferCharacter(cc); - m_temporaryBuffer.clear(); - m_temporaryBuffer.append(cc); - ADVANCE_TO(ScriptDataDoubleEscapeStartState); - } else { - bufferCharacter('<'); - RECONSUME_IN(ScriptDataEscapedState); - } - } - END_STATE() - - BEGIN_STATE(ScriptDataEscapedEndTagOpenState) { - if (isASCIIUpper(cc)) { - m_temporaryBuffer.append(cc); - addToPossibleEndTag(toLowerCase(cc)); - ADVANCE_TO(ScriptDataEscapedEndTagNameState); - } else if (isASCIILower(cc)) { - m_temporaryBuffer.append(cc); - addToPossibleEndTag(cc); - ADVANCE_TO(ScriptDataEscapedEndTagNameState); - } else { - bufferCharacter('<'); - bufferCharacter('/'); - RECONSUME_IN(ScriptDataEscapedState); - } - } - END_STATE() - - BEGIN_STATE(ScriptDataEscapedEndTagNameState) { - if (isASCIIUpper(cc)) { - m_temporaryBuffer.append(cc); - addToPossibleEndTag(toLowerCase(cc)); - ADVANCE_TO(ScriptDataEscapedEndTagNameState); - } else if (isASCIILower(cc)) { - m_temporaryBuffer.append(cc); - addToPossibleEndTag(cc); - ADVANCE_TO(ScriptDataEscapedEndTagNameState); - } else { - if (isTokenizerWhitespace(cc)) { - if (isAppropriateEndTag()) - FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState); - } else if (cc == '/') { - if (isAppropriateEndTag()) - FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState); - } else if (cc == '>') { - if (isAppropriateEndTag()) - return flushEmitAndResumeIn(source, DataState); - } - bufferCharacter('<'); - bufferCharacter('/'); - m_token->appendToCharacter(m_temporaryBuffer); - m_bufferedEndTagName.clear(); - RECONSUME_IN(ScriptDataEscapedState); - } - } - END_STATE() - - BEGIN_STATE(ScriptDataDoubleEscapeStartState) { - if (isTokenizerWhitespace(cc) || cc == '/' || cc == '>') { - bufferCharacter(cc); - if (temporaryBufferIs(scriptTag.localName())) - ADVANCE_TO(ScriptDataDoubleEscapedState); - else - ADVANCE_TO(ScriptDataEscapedState); - } else if (isASCIIUpper(cc)) { - bufferCharacter(cc); - m_temporaryBuffer.append(toLowerCase(cc)); - ADVANCE_TO(ScriptDataDoubleEscapeStartState); - } else if (isASCIILower(cc)) { - bufferCharacter(cc); - m_temporaryBuffer.append(cc); - ADVANCE_TO(ScriptDataDoubleEscapeStartState); - } else - RECONSUME_IN(ScriptDataEscapedState); - } - END_STATE() - - BEGIN_STATE(ScriptDataDoubleEscapedState) { - if (cc == '-') { - bufferCharacter(cc); - ADVANCE_TO(ScriptDataDoubleEscapedDashState); - } else if (cc == '<') { - bufferCharacter(cc); - ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - RECONSUME_IN(DataState); - } else { - bufferCharacter(cc); - ADVANCE_TO(ScriptDataDoubleEscapedState); - } - } - END_STATE() - - BEGIN_STATE(ScriptDataDoubleEscapedDashState) { - if (cc == '-') { - bufferCharacter(cc); - ADVANCE_TO(ScriptDataDoubleEscapedDashDashState); - } else if (cc == '<') { - bufferCharacter(cc); - ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - RECONSUME_IN(DataState); - } else { - bufferCharacter(cc); - ADVANCE_TO(ScriptDataDoubleEscapedState); - } - } - END_STATE() - - BEGIN_STATE(ScriptDataDoubleEscapedDashDashState) { - if (cc == '-') { - bufferCharacter(cc); - ADVANCE_TO(ScriptDataDoubleEscapedDashDashState); - } else if (cc == '<') { - bufferCharacter(cc); - ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState); - } else if (cc == '>') { - bufferCharacter(cc); - ADVANCE_TO(ScriptDataState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - RECONSUME_IN(DataState); - } else { - bufferCharacter(cc); - ADVANCE_TO(ScriptDataDoubleEscapedState); - } - } - END_STATE() - - BEGIN_STATE(ScriptDataDoubleEscapedLessThanSignState) { - if (cc == '/') { - bufferCharacter(cc); - m_temporaryBuffer.clear(); - ADVANCE_TO(ScriptDataDoubleEscapeEndState); - } else - RECONSUME_IN(ScriptDataDoubleEscapedState); - } - END_STATE() - - BEGIN_STATE(ScriptDataDoubleEscapeEndState) { - if (isTokenizerWhitespace(cc) || cc == '/' || cc == '>') { - bufferCharacter(cc); - if (temporaryBufferIs(scriptTag.localName())) - ADVANCE_TO(ScriptDataEscapedState); - else - ADVANCE_TO(ScriptDataDoubleEscapedState); - } else if (isASCIIUpper(cc)) { - bufferCharacter(cc); - m_temporaryBuffer.append(toLowerCase(cc)); - ADVANCE_TO(ScriptDataDoubleEscapeEndState); - } else if (isASCIILower(cc)) { - bufferCharacter(cc); - m_temporaryBuffer.append(cc); - ADVANCE_TO(ScriptDataDoubleEscapeEndState); - } else - RECONSUME_IN(ScriptDataDoubleEscapedState); - } - END_STATE() - - BEGIN_STATE(BeforeAttributeNameState) { - if (isTokenizerWhitespace(cc)) - ADVANCE_TO(BeforeAttributeNameState); - else if (cc == '/') - ADVANCE_TO(SelfClosingStartTagState); - else if (cc == '>') - return emitAndResumeIn(source, DataState); - else if (m_usePreHTML5ParserQuirks && cc == '<') - return emitAndReconsumeIn(source, DataState); - else if (isASCIIUpper(cc)) { - m_token->addNewAttribute(); - m_token->beginAttributeName(source.numberOfCharactersConsumed()); - m_token->appendToAttributeName(toLowerCase(cc)); - ADVANCE_TO(AttributeNameState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - RECONSUME_IN(DataState); - } else { - if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') - parseError(); - m_token->addNewAttribute(); - m_token->beginAttributeName(source.numberOfCharactersConsumed()); - m_token->appendToAttributeName(cc); - ADVANCE_TO(AttributeNameState); - } - } - END_STATE() - - BEGIN_STATE(AttributeNameState) { - if (isTokenizerWhitespace(cc)) { - m_token->endAttributeName(source.numberOfCharactersConsumed()); - ADVANCE_TO(AfterAttributeNameState); - } else if (cc == '/') { - m_token->endAttributeName(source.numberOfCharactersConsumed()); - ADVANCE_TO(SelfClosingStartTagState); - } else if (cc == '=') { - m_token->endAttributeName(source.numberOfCharactersConsumed()); - ADVANCE_TO(BeforeAttributeValueState); - } else if (cc == '>') { - m_token->endAttributeName(source.numberOfCharactersConsumed()); - return emitAndResumeIn(source, DataState); - } else if (m_usePreHTML5ParserQuirks && cc == '<') { - m_token->endAttributeName(source.numberOfCharactersConsumed()); - return emitAndReconsumeIn(source, DataState); - } else if (isASCIIUpper(cc)) { - m_token->appendToAttributeName(toLowerCase(cc)); - ADVANCE_TO(AttributeNameState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - m_token->endAttributeName(source.numberOfCharactersConsumed()); - RECONSUME_IN(DataState); - } else { - if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') - parseError(); - m_token->appendToAttributeName(cc); - ADVANCE_TO(AttributeNameState); - } - } - END_STATE() - - BEGIN_STATE(AfterAttributeNameState) { - if (isTokenizerWhitespace(cc)) - ADVANCE_TO(AfterAttributeNameState); - else if (cc == '/') - ADVANCE_TO(SelfClosingStartTagState); - else if (cc == '=') - ADVANCE_TO(BeforeAttributeValueState); - else if (cc == '>') - return emitAndResumeIn(source, DataState); - else if (m_usePreHTML5ParserQuirks && cc == '<') - return emitAndReconsumeIn(source, DataState); - else if (isASCIIUpper(cc)) { - m_token->addNewAttribute(); - m_token->beginAttributeName(source.numberOfCharactersConsumed()); - m_token->appendToAttributeName(toLowerCase(cc)); - ADVANCE_TO(AttributeNameState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - RECONSUME_IN(DataState); - } else { - if (cc == '"' || cc == '\'' || cc == '<') - parseError(); - m_token->addNewAttribute(); - m_token->beginAttributeName(source.numberOfCharactersConsumed()); - m_token->appendToAttributeName(cc); - ADVANCE_TO(AttributeNameState); - } - } - END_STATE() - - BEGIN_STATE(BeforeAttributeValueState) { - if (isTokenizerWhitespace(cc)) - ADVANCE_TO(BeforeAttributeValueState); - else if (cc == '"') { - m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1); - ADVANCE_TO(AttributeValueDoubleQuotedState); - } else if (cc == '&') { - m_token->beginAttributeValue(source.numberOfCharactersConsumed()); - RECONSUME_IN(AttributeValueUnquotedState); - } else if (cc == '\'') { - m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1); - ADVANCE_TO(AttributeValueSingleQuotedState); - } else if (cc == '>') { - parseError(); - return emitAndResumeIn(source, DataState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - RECONSUME_IN(DataState); - } else { - if (cc == '<' || cc == '=' || cc == '`') - parseError(); - m_token->beginAttributeValue(source.numberOfCharactersConsumed()); - m_token->appendToAttributeValue(cc); - ADVANCE_TO(AttributeValueUnquotedState); - } - } - END_STATE() - - BEGIN_STATE(AttributeValueDoubleQuotedState) { - if (cc == '"') { - m_token->endAttributeValue(source.numberOfCharactersConsumed()); - ADVANCE_TO(AfterAttributeValueQuotedState); - } else if (cc == '&') { - m_additionalAllowedCharacter = '"'; - ADVANCE_TO(CharacterReferenceInAttributeValueState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - m_token->endAttributeValue(source.numberOfCharactersConsumed()); - RECONSUME_IN(DataState); - } else { - m_token->appendToAttributeValue(cc); - ADVANCE_TO(AttributeValueDoubleQuotedState); - } - } - END_STATE() - - BEGIN_STATE(AttributeValueSingleQuotedState) { - if (cc == '\'') { - m_token->endAttributeValue(source.numberOfCharactersConsumed()); - ADVANCE_TO(AfterAttributeValueQuotedState); - } else if (cc == '&') { - m_additionalAllowedCharacter = '\''; - ADVANCE_TO(CharacterReferenceInAttributeValueState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - m_token->endAttributeValue(source.numberOfCharactersConsumed()); - RECONSUME_IN(DataState); - } else { - m_token->appendToAttributeValue(cc); - ADVANCE_TO(AttributeValueSingleQuotedState); - } - } - END_STATE() - - BEGIN_STATE(AttributeValueUnquotedState) { - if (isTokenizerWhitespace(cc)) { - m_token->endAttributeValue(source.numberOfCharactersConsumed()); - ADVANCE_TO(BeforeAttributeNameState); - } else if (cc == '&') { - m_additionalAllowedCharacter = '>'; - ADVANCE_TO(CharacterReferenceInAttributeValueState); - } else if (cc == '>') { - m_token->endAttributeValue(source.numberOfCharactersConsumed()); - return emitAndResumeIn(source, DataState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - m_token->endAttributeValue(source.numberOfCharactersConsumed()); - RECONSUME_IN(DataState); - } else { - if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`') - parseError(); - m_token->appendToAttributeValue(cc); - ADVANCE_TO(AttributeValueUnquotedState); - } - } - END_STATE() - - BEGIN_STATE(CharacterReferenceInAttributeValueState) { - bool notEnoughCharacters = false; - Vector<UChar, 16> decodedEntity; - bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters, m_additionalAllowedCharacter); - if (notEnoughCharacters) - return haveBufferedCharacterToken(); - if (!success) { - ASSERT(decodedEntity.isEmpty()); - m_token->appendToAttributeValue('&'); - } else { - Vector<UChar>::const_iterator iter = decodedEntity.begin(); - for (; iter != decodedEntity.end(); ++iter) - m_token->appendToAttributeValue(*iter); - } - // We're supposed to switch back to the attribute value state that - // we were in when we were switched into this state. Rather than - // keeping track of this explictly, we observe that the previous - // state can be determined by m_additionalAllowedCharacter. - if (m_additionalAllowedCharacter == '"') - SWITCH_TO(AttributeValueDoubleQuotedState); - else if (m_additionalAllowedCharacter == '\'') - SWITCH_TO(AttributeValueSingleQuotedState); - else if (m_additionalAllowedCharacter == '>') - SWITCH_TO(AttributeValueUnquotedState); - else - ASSERT_NOT_REACHED(); - } - END_STATE() - - BEGIN_STATE(AfterAttributeValueQuotedState) { - if (isTokenizerWhitespace(cc)) - ADVANCE_TO(BeforeAttributeNameState); - else if (cc == '/') - ADVANCE_TO(SelfClosingStartTagState); - else if (cc == '>') - return emitAndResumeIn(source, DataState); - else if (m_usePreHTML5ParserQuirks && cc == '<') - return emitAndReconsumeIn(source, DataState); - else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - RECONSUME_IN(DataState); - } else { - parseError(); - RECONSUME_IN(BeforeAttributeNameState); - } - } - END_STATE() - - BEGIN_STATE(SelfClosingStartTagState) { - if (cc == '>') { - m_token->setSelfClosing(); - return emitAndResumeIn(source, DataState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - RECONSUME_IN(DataState); - } else { - parseError(); - RECONSUME_IN(BeforeAttributeNameState); - } - } - END_STATE() - - BEGIN_STATE(BogusCommentState) { - m_token->beginComment(); - RECONSUME_IN(ContinueBogusCommentState); - } - END_STATE() - - BEGIN_STATE(ContinueBogusCommentState) { - if (cc == '>') - return emitAndResumeIn(source, DataState); - else if (cc == InputStreamPreprocessor::endOfFileMarker) - return emitAndReconsumeIn(source, DataState); - else { - m_token->appendToComment(cc); - ADVANCE_TO(ContinueBogusCommentState); - } - } - END_STATE() - - BEGIN_STATE(MarkupDeclarationOpenState) { - DEFINE_STATIC_LOCAL(String, dashDashString, ("--")); - DEFINE_STATIC_LOCAL(String, doctypeString, ("doctype")); - DEFINE_STATIC_LOCAL(String, cdataString, ("[CDATA[")); - if (cc == '-') { - SegmentedString::LookAheadResult result = source.lookAhead(dashDashString); - if (result == SegmentedString::DidMatch) { - source.advanceAndASSERT('-'); - source.advanceAndASSERT('-'); - m_token->beginComment(); - SWITCH_TO(CommentStartState); - } else if (result == SegmentedString::NotEnoughCharacters) - return haveBufferedCharacterToken(); - } else if (cc == 'D' || cc == 'd') { - SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(doctypeString); - if (result == SegmentedString::DidMatch) { - advanceStringAndASSERTIgnoringCase(source, "doctype"); - SWITCH_TO(DOCTYPEState); - } else if (result == SegmentedString::NotEnoughCharacters) - return haveBufferedCharacterToken(); - } else if (cc == '[' && shouldAllowCDATA()) { - SegmentedString::LookAheadResult result = source.lookAhead(cdataString); - if (result == SegmentedString::DidMatch) { - advanceStringAndASSERT(source, "[CDATA["); - SWITCH_TO(CDATASectionState); - } else if (result == SegmentedString::NotEnoughCharacters) - return haveBufferedCharacterToken(); - } - parseError(); - RECONSUME_IN(BogusCommentState); - } - END_STATE() - - BEGIN_STATE(CommentStartState) { - if (cc == '-') - ADVANCE_TO(CommentStartDashState); - else if (cc == '>') { - parseError(); - return emitAndResumeIn(source, DataState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - return emitAndReconsumeIn(source, DataState); - } else { - m_token->appendToComment(cc); - ADVANCE_TO(CommentState); - } - } - END_STATE() - - BEGIN_STATE(CommentStartDashState) { - if (cc == '-') - ADVANCE_TO(CommentEndState); - else if (cc == '>') { - parseError(); - return emitAndResumeIn(source, DataState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - return emitAndReconsumeIn(source, DataState); - } else { - m_token->appendToComment('-'); - m_token->appendToComment(cc); - ADVANCE_TO(CommentState); - } - } - END_STATE() - - BEGIN_STATE(CommentState) { - if (cc == '-') - ADVANCE_TO(CommentEndDashState); - else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - return emitAndReconsumeIn(source, DataState); - } else { - m_token->appendToComment(cc); - ADVANCE_TO(CommentState); - } - } - END_STATE() - - BEGIN_STATE(CommentEndDashState) { - if (cc == '-') - ADVANCE_TO(CommentEndState); - else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - return emitAndReconsumeIn(source, DataState); - } else { - m_token->appendToComment('-'); - m_token->appendToComment(cc); - ADVANCE_TO(CommentState); - } - } - END_STATE() - - BEGIN_STATE(CommentEndState) { - if (cc == '>') - return emitAndResumeIn(source, DataState); - else if (cc == '!') { - parseError(); - ADVANCE_TO(CommentEndBangState); - } else if (cc == '-') { - parseError(); - m_token->appendToComment('-'); - ADVANCE_TO(CommentEndState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - return emitAndReconsumeIn(source, DataState); - } else { - parseError(); - m_token->appendToComment('-'); - m_token->appendToComment('-'); - m_token->appendToComment(cc); - ADVANCE_TO(CommentState); - } - } - END_STATE() - - BEGIN_STATE(CommentEndBangState) { - if (cc == '-') { - m_token->appendToComment('-'); - m_token->appendToComment('-'); - m_token->appendToComment('!'); - ADVANCE_TO(CommentEndDashState); - } else if (cc == '>') - return emitAndResumeIn(source, DataState); - else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - return emitAndReconsumeIn(source, DataState); - } else { - m_token->appendToComment('-'); - m_token->appendToComment('-'); - m_token->appendToComment('!'); - m_token->appendToComment(cc); - ADVANCE_TO(CommentState); - } - } - END_STATE() - - BEGIN_STATE(DOCTYPEState) { - if (isTokenizerWhitespace(cc)) - ADVANCE_TO(BeforeDOCTYPENameState); - else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - m_token->beginDOCTYPE(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, DataState); - } else { - parseError(); - RECONSUME_IN(BeforeDOCTYPENameState); - } - } - END_STATE() - - BEGIN_STATE(BeforeDOCTYPENameState) { - if (isTokenizerWhitespace(cc)) - ADVANCE_TO(BeforeDOCTYPENameState); - else if (isASCIIUpper(cc)) { - m_token->beginDOCTYPE(toLowerCase(cc)); - ADVANCE_TO(DOCTYPENameState); - } else if (cc == '>') { - parseError(); - m_token->beginDOCTYPE(); - m_token->setForceQuirks(); - return emitAndResumeIn(source, DataState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - m_token->beginDOCTYPE(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, DataState); - } else { - m_token->beginDOCTYPE(cc); - ADVANCE_TO(DOCTYPENameState); - } - } - END_STATE() - - BEGIN_STATE(DOCTYPENameState) { - if (isTokenizerWhitespace(cc)) - ADVANCE_TO(AfterDOCTYPENameState); - else if (cc == '>') - return emitAndResumeIn(source, DataState); - else if (isASCIIUpper(cc)) { - m_token->appendToName(toLowerCase(cc)); - ADVANCE_TO(DOCTYPENameState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, DataState); - } else { - m_token->appendToName(cc); - ADVANCE_TO(DOCTYPENameState); - } - } - END_STATE() - - BEGIN_STATE(AfterDOCTYPENameState) { - if (isTokenizerWhitespace(cc)) - ADVANCE_TO(AfterDOCTYPENameState); - if (cc == '>') - return emitAndResumeIn(source, DataState); - else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, DataState); - } else { - DEFINE_STATIC_LOCAL(String, publicString, ("public")); - DEFINE_STATIC_LOCAL(String, systemString, ("system")); - if (cc == 'P' || cc == 'p') { - SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(publicString); - if (result == SegmentedString::DidMatch) { - advanceStringAndASSERTIgnoringCase(source, "public"); - SWITCH_TO(AfterDOCTYPEPublicKeywordState); - } else if (result == SegmentedString::NotEnoughCharacters) - return haveBufferedCharacterToken(); - } else if (cc == 'S' || cc == 's') { - SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(systemString); - if (result == SegmentedString::DidMatch) { - advanceStringAndASSERTIgnoringCase(source, "system"); - SWITCH_TO(AfterDOCTYPESystemKeywordState); - } else if (result == SegmentedString::NotEnoughCharacters) - return haveBufferedCharacterToken(); - } - parseError(); - m_token->setForceQuirks(); - ADVANCE_TO(BogusDOCTYPEState); - } - } - END_STATE() - - BEGIN_STATE(AfterDOCTYPEPublicKeywordState) { - if (isTokenizerWhitespace(cc)) - ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState); - else if (cc == '"') { - parseError(); - m_token->setPublicIdentifierToEmptyString(); - ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState); - } else if (cc == '\'') { - parseError(); - m_token->setPublicIdentifierToEmptyString(); - ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState); - } else if (cc == '>') { - parseError(); - m_token->setForceQuirks(); - return emitAndResumeIn(source, DataState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, DataState); - } else { - parseError(); - m_token->setForceQuirks(); - ADVANCE_TO(BogusDOCTYPEState); - } - } - END_STATE() - - BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState) { - if (isTokenizerWhitespace(cc)) - ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState); - else if (cc == '"') { - m_token->setPublicIdentifierToEmptyString(); - ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState); - } else if (cc == '\'') { - m_token->setPublicIdentifierToEmptyString(); - ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState); - } else if (cc == '>') { - parseError(); - m_token->setForceQuirks(); - return emitAndResumeIn(source, DataState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, DataState); - } else { - parseError(); - m_token->setForceQuirks(); - ADVANCE_TO(BogusDOCTYPEState); - } - } - END_STATE() - - BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState) { - if (cc == '"') - ADVANCE_TO(AfterDOCTYPEPublicIdentifierState); - else if (cc == '>') { - parseError(); - m_token->setForceQuirks(); - return emitAndResumeIn(source, DataState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, DataState); - } else { - m_token->appendToPublicIdentifier(cc); - ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState); - } - } - END_STATE() - - BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState) { - if (cc == '\'') - ADVANCE_TO(AfterDOCTYPEPublicIdentifierState); - else if (cc == '>') { - parseError(); - m_token->setForceQuirks(); - return emitAndResumeIn(source, DataState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, DataState); - } else { - m_token->appendToPublicIdentifier(cc); - ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState); - } - } - END_STATE() - - BEGIN_STATE(AfterDOCTYPEPublicIdentifierState) { - if (isTokenizerWhitespace(cc)) - ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState); - else if (cc == '>') - return emitAndResumeIn(source, DataState); - else if (cc == '"') { - parseError(); - m_token->setSystemIdentifierToEmptyString(); - ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); - } else if (cc == '\'') { - parseError(); - m_token->setSystemIdentifierToEmptyString(); - ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, DataState); - } else { - parseError(); - m_token->setForceQuirks(); - ADVANCE_TO(BogusDOCTYPEState); - } - } - END_STATE() - - BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState) { - if (isTokenizerWhitespace(cc)) - ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState); - else if (cc == '>') - return emitAndResumeIn(source, DataState); - else if (cc == '"') { - m_token->setSystemIdentifierToEmptyString(); - ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); - } else if (cc == '\'') { - m_token->setSystemIdentifierToEmptyString(); - ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, DataState); - } else { - parseError(); - m_token->setForceQuirks(); - ADVANCE_TO(BogusDOCTYPEState); - } - } - END_STATE() - - BEGIN_STATE(AfterDOCTYPESystemKeywordState) { - if (isTokenizerWhitespace(cc)) - ADVANCE_TO(BeforeDOCTYPESystemIdentifierState); - else if (cc == '"') { - parseError(); - m_token->setSystemIdentifierToEmptyString(); - ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); - } else if (cc == '\'') { - parseError(); - m_token->setSystemIdentifierToEmptyString(); - ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); - } else if (cc == '>') { - parseError(); - m_token->setForceQuirks(); - return emitAndResumeIn(source, DataState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, DataState); - } else { - parseError(); - m_token->setForceQuirks(); - ADVANCE_TO(BogusDOCTYPEState); - } - } - END_STATE() - - BEGIN_STATE(BeforeDOCTYPESystemIdentifierState) { - if (isTokenizerWhitespace(cc)) - ADVANCE_TO(BeforeDOCTYPESystemIdentifierState); - if (cc == '"') { - m_token->setSystemIdentifierToEmptyString(); - ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); - } else if (cc == '\'') { - m_token->setSystemIdentifierToEmptyString(); - ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); - } else if (cc == '>') { - parseError(); - m_token->setForceQuirks(); - return emitAndResumeIn(source, DataState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, DataState); - } else { - parseError(); - m_token->setForceQuirks(); - ADVANCE_TO(BogusDOCTYPEState); - } - } - END_STATE() - - BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState) { - if (cc == '"') - ADVANCE_TO(AfterDOCTYPESystemIdentifierState); - else if (cc == '>') { - parseError(); - m_token->setForceQuirks(); - return emitAndResumeIn(source, DataState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, DataState); - } else { - m_token->appendToSystemIdentifier(cc); - ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); - } - } - END_STATE() - - BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState) { - if (cc == '\'') - ADVANCE_TO(AfterDOCTYPESystemIdentifierState); - else if (cc == '>') { - parseError(); - m_token->setForceQuirks(); - return emitAndResumeIn(source, DataState); - } else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, DataState); - } else { - m_token->appendToSystemIdentifier(cc); - ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); - } - } - END_STATE() - - BEGIN_STATE(AfterDOCTYPESystemIdentifierState) { - if (isTokenizerWhitespace(cc)) - ADVANCE_TO(AfterDOCTYPESystemIdentifierState); - else if (cc == '>') - return emitAndResumeIn(source, DataState); - else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - m_token->setForceQuirks(); - return emitAndReconsumeIn(source, DataState); - } else { - parseError(); - ADVANCE_TO(BogusDOCTYPEState); - } - } - END_STATE() - - BEGIN_STATE(BogusDOCTYPEState) { - if (cc == '>') - return emitAndResumeIn(source, DataState); - else if (cc == InputStreamPreprocessor::endOfFileMarker) - return emitAndReconsumeIn(source, DataState); - ADVANCE_TO(BogusDOCTYPEState); - } - END_STATE() - - BEGIN_STATE(CDATASectionState) { - if (cc == ']') - ADVANCE_TO(CDATASectionRightSquareBracketState); - else if (cc == InputStreamPreprocessor::endOfFileMarker) - RECONSUME_IN(DataState); - else { - bufferCharacter(cc); - ADVANCE_TO(CDATASectionState); - } - } - END_STATE() - - BEGIN_STATE(CDATASectionRightSquareBracketState) { - if (cc == ']') - ADVANCE_TO(CDATASectionDoubleRightSquareBracketState); - else { - bufferCharacter(']'); - RECONSUME_IN(CDATASectionState); - } - } - - BEGIN_STATE(CDATASectionDoubleRightSquareBracketState) { - if (cc == '>') - ADVANCE_TO(DataState); - else { - bufferCharacter(']'); - bufferCharacter(']'); - RECONSUME_IN(CDATASectionState); - } - } - END_STATE() - - } - - ASSERT_NOT_REACHED(); - return false; -} - -void HTMLTokenizer::updateStateFor(const AtomicString& tagName, Frame* frame) -{ - if (tagName == textareaTag || tagName == titleTag) - setState(RCDATAState); - else if (tagName == plaintextTag) - setState(PLAINTEXTState); - else if (tagName == scriptTag) - setState(ScriptDataState); - else if (tagName == styleTag - || tagName == iframeTag - || tagName == xmpTag - || (tagName == noembedTag && HTMLTreeBuilder::pluginsEnabled(frame)) - || tagName == noframesTag - || (tagName == noscriptTag && HTMLTreeBuilder::scriptEnabled(frame))) - setState(RAWTEXTState); -} - -inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString) -{ - return vectorEqualsString(m_temporaryBuffer, expectedString); -} - -inline void HTMLTokenizer::addToPossibleEndTag(UChar cc) -{ - ASSERT(isEndTagBufferingState(m_state)); - m_bufferedEndTagName.append(cc); -} - -inline bool HTMLTokenizer::isAppropriateEndTag() -{ - return m_bufferedEndTagName == m_appropriateEndTagName; -} - -inline void HTMLTokenizer::bufferCharacter(UChar character) -{ - ASSERT(character != InputStreamPreprocessor::endOfFileMarker); - m_token->ensureIsCharacterToken(); - m_token->appendToCharacter(character); -} - -inline void HTMLTokenizer::parseError() -{ - notImplemented(); -} - -inline bool HTMLTokenizer::haveBufferedCharacterToken() -{ - return m_token->type() == HTMLToken::Character; -} - -} |