summaryrefslogtreecommitdiffstats
path: root/WebCore/html/parser/HTMLTokenizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'WebCore/html/parser/HTMLTokenizer.cpp')
-rw-r--r--WebCore/html/parser/HTMLTokenizer.cpp64
1 files changed, 35 insertions, 29 deletions
diff --git a/WebCore/html/parser/HTMLTokenizer.cpp b/WebCore/html/parser/HTMLTokenizer.cpp
index f5405ff..305fca2 100644
--- a/WebCore/html/parser/HTMLTokenizer.cpp
+++ b/WebCore/html/parser/HTMLTokenizer.cpp
@@ -30,6 +30,7 @@
#include "HTMLEntityParser.h"
#include "HTMLToken.h"
+#include "HTMLTreeBuilder.h"
#include "HTMLNames.h"
#include "NotImplemented.h"
#include <wtf/ASCIICType.h>
@@ -102,8 +103,9 @@ inline bool isEndTagBufferingState(HTMLTokenizer::State state)
}
-HTMLTokenizer::HTMLTokenizer()
+HTMLTokenizer::HTMLTokenizer(bool usePreHTML5ParserQuirks)
: m_inputStreamPreprocessor(this)
+ , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
{
reset();
}
@@ -171,7 +173,7 @@ inline bool HTMLTokenizer::processEntity(SegmentedString& source)
// Sometimes there's more complicated logic in the spec that separates when
// we consume the next input character and when we switch to a particular
-// state. We handle those cases by advancing the source directly and using
+// state. We handle those cases by advancing the source directly and using
// this macro to switch to the indicated state.
#define SWITCH_TO(stateName) \
do { \
@@ -277,7 +279,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
// Note that this logic is different than the generic \r\n collapsing
- // handled in the input stream preprocessor. This logic is here as an
+ // handled in the input stream preprocessor. This logic is here as an
// "authoring convenience" so folks can write:
//
// <pre>
@@ -435,6 +437,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
ADVANCE_TO(SelfClosingStartTagState);
else if (cc == '>')
return emitAndResumeIn(source, DataState);
+ else if (m_usePreHTML5ParserQuirks && cc == '<')
+ return emitAndReconsumeIn(source, DataState);
else if (isASCIIUpper(cc)) {
m_token->appendToName(toLowerCase(cc));
ADVANCE_TO(TagNameState);
@@ -876,6 +880,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
ADVANCE_TO(SelfClosingStartTagState);
else if (cc == '>')
return emitAndResumeIn(source, DataState);
+ else if (m_usePreHTML5ParserQuirks && cc == '<')
+ return emitAndReconsumeIn(source, DataState);
else if (isASCIIUpper(cc)) {
m_token->addNewAttribute();
m_token->beginAttributeName(source.numberOfCharactersConsumed());
@@ -908,6 +914,9 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
} else if (cc == '>') {
m_token->endAttributeName(source.numberOfCharactersConsumed());
return emitAndResumeIn(source, DataState);
+ } else if (m_usePreHTML5ParserQuirks && cc == '<') {
+ m_token->endAttributeName(source.numberOfCharactersConsumed());
+ return emitAndReconsumeIn(source, DataState);
} else if (isASCIIUpper(cc)) {
m_token->appendToAttributeName(toLowerCase(cc));
ADVANCE_TO(AttributeNameState);
@@ -933,6 +942,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
ADVANCE_TO(BeforeAttributeValueState);
else if (cc == '>')
return emitAndResumeIn(source, DataState);
+ else if (m_usePreHTML5ParserQuirks && cc == '<')
+ return emitAndReconsumeIn(source, DataState);
else if (isASCIIUpper(cc)) {
m_token->addNewAttribute();
m_token->beginAttributeName(source.numberOfCharactersConsumed());
@@ -1054,7 +1065,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
m_token->appendToAttributeValue(*iter);
}
// We're supposed to switch back to the attribute value state that
- // we were in when we were switched into this state. Rather than
+ // we were in when we were switched into this state. Rather than
// keeping track of this explictly, we observe that the previous
// state can be determined by m_additionalAllowedCharacter.
if (m_additionalAllowedCharacter == '"')
@@ -1075,6 +1086,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
ADVANCE_TO(SelfClosingStartTagState);
else if (cc == '>')
return emitAndResumeIn(source, DataState);
+ else if (m_usePreHTML5ParserQuirks && cc == '<')
+ return emitAndReconsumeIn(source, DataState);
else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
RECONSUME_IN(DataState);
@@ -1213,13 +1226,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
BEGIN_STATE(CommentEndState) {
if (cc == '>')
return emitAndResumeIn(source, DataState);
- else if (isTokenizerWhitespace(cc)) {
- parseError();
- m_token->appendToComment('-');
- m_token->appendToComment('-');
- m_token->appendToComment(cc);
- ADVANCE_TO(CommentEndSpaceState);
- } else if (cc == '!') {
+ else if (cc == '!') {
parseError();
ADVANCE_TO(CommentEndBangState);
} else if (cc == '-') {
@@ -1260,24 +1267,6 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
}
END_STATE()
- BEGIN_STATE(CommentEndSpaceState) {
- if (isTokenizerWhitespace(cc)) {
- m_token->appendToComment(cc);
- ADVANCE_TO(CommentEndSpaceState);
- } else if (cc == '-')
- ADVANCE_TO(CommentEndDashState);
- else if (cc == '>')
- return emitAndResumeIn(source, DataState);
- else if (cc == InputStreamPreprocessor::endOfFileMarker) {
- parseError();
- return emitAndReconsumeIn(source, DataState);
- } else {
- m_token->appendToComment(cc);
- ADVANCE_TO(CommentState);
- }
- }
- END_STATE()
-
BEGIN_STATE(DOCTYPEState) {
if (isTokenizerWhitespace(cc))
ADVANCE_TO(BeforeDOCTYPENameState);
@@ -1656,6 +1645,23 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
return false;
}
+void HTMLTokenizer::updateStateFor(const AtomicString& tagName, Frame* frame)
+{
+ if (tagName == textareaTag || tagName == titleTag)
+ setState(RCDATAState);
+ else if (tagName == plaintextTag)
+ setState(PLAINTEXTState);
+ else if (tagName == scriptTag)
+ setState(ScriptDataState);
+ else if (tagName == styleTag
+ || tagName == iframeTag
+ || tagName == xmpTag
+ || (tagName == noembedTag && HTMLTreeBuilder::pluginsEnabled(frame))
+ || tagName == noframesTag
+ || (tagName == noscriptTag && HTMLTreeBuilder::scriptEnabled(frame)))
+ setState(RAWTEXTState);
+}
+
inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString)
{
return vectorEqualsString(m_temporaryBuffer, expectedString);