From 68513a70bcd92384395513322f1b801e7bf9c729 Mon Sep 17 00:00:00 2001 From: Steve Block Date: Wed, 29 Sep 2010 17:32:26 +0100 Subject: Merge WebKit at r67908: Initial merge by Git Change-Id: I43a553e7b3299b28cb6ee8aa035ed70fe342b972 --- WebCore/html/parser/HTMLConstructionSite.cpp | 20 +-- WebCore/html/parser/HTMLDocumentParser.cpp | 29 ++-- WebCore/html/parser/HTMLDocumentParser.h | 4 +- WebCore/html/parser/HTMLElementStack.cpp | 27 +++- WebCore/html/parser/HTMLElementStack.h | 2 +- WebCore/html/parser/HTMLParserIdioms.cpp | 144 ++++++++++++++++++++ WebCore/html/parser/HTMLParserIdioms.h | 66 +++++++++ WebCore/html/parser/HTMLPreloadScanner.cpp | 12 +- WebCore/html/parser/HTMLScriptRunner.cpp | 37 ++--- WebCore/html/parser/HTMLScriptRunner.h | 2 +- WebCore/html/parser/HTMLTokenizer.cpp | 64 +++++---- WebCore/html/parser/HTMLTokenizer.h | 36 +++-- WebCore/html/parser/HTMLTreeBuilder.cpp | 187 +++++++------------------- WebCore/html/parser/HTMLTreeBuilder.h | 15 --- WebCore/html/parser/HTMLViewSourceParser.cpp | 12 +- WebCore/html/parser/NestingLevelIncrementer.h | 50 +++++++ 16 files changed, 430 insertions(+), 277 deletions(-) create mode 100644 WebCore/html/parser/HTMLParserIdioms.cpp create mode 100644 WebCore/html/parser/HTMLParserIdioms.h create mode 100644 WebCore/html/parser/NestingLevelIncrementer.h (limited to 'WebCore/html/parser') diff --git a/WebCore/html/parser/HTMLConstructionSite.cpp b/WebCore/html/parser/HTMLConstructionSite.cpp index 0172b3d..6215bba 100644 --- a/WebCore/html/parser/HTMLConstructionSite.cpp +++ b/WebCore/html/parser/HTMLConstructionSite.cpp @@ -114,19 +114,19 @@ PassRefPtr HTMLConstructionSite::attach(ContainerNode* parent, PassRe void HTMLConstructionSite::attachAtSite(const AttachmentSite& site, PassRefPtr prpChild) { + // FIXME: It's unfortunate that we need to hold a reference to child + // here to call attach(). We should investigate whether we can rely on + // |site.parent| to hold a ref at this point. RefPtr child = prpChild; - if (site.nextChild) { + if (site.nextChild) site.parent->parserInsertBefore(child, site.nextChild); - if (site.parent->attached() && !child->attached()) - child->attach(); - return; - } - site.parent->parserAddChild(child); - // It's slightly unfortunate that we need to hold a reference to child - // here to call attach(). We should investigate whether we can rely on - // |site.parent| to hold a ref at this point. - if (site.parent->attached() && !child->attached()) + else + site.parent->parserAddChild(child); + + // JavaScript run from beforeload (or DOM Mutation or event handlers) + // might have removed the child, in which case we should not attach it. + if (child->parentNode() && site.parent->attached() && !child->attached()) child->attach(); } diff --git a/WebCore/html/parser/HTMLDocumentParser.cpp b/WebCore/html/parser/HTMLDocumentParser.cpp index a442d54..2da403f 100644 --- a/WebCore/html/parser/HTMLDocumentParser.cpp +++ b/WebCore/html/parser/HTMLDocumentParser.cpp @@ -36,6 +36,8 @@ #include "HTMLScriptRunner.h" #include "HTMLTreeBuilder.h" #include "HTMLDocument.h" +#include "NestingLevelIncrementer.h" +#include "Settings.h" #include "XSSAuditor.h" #include @@ -53,23 +55,6 @@ using namespace HTMLNames; namespace { -class NestingLevelIncrementer : public Noncopyable { -public: - explicit NestingLevelIncrementer(int& counter) - : m_counter(&counter) - { - ++(*m_counter); - } - - ~NestingLevelIncrementer() - { - --(*m_counter); - } - -private: - int* m_counter; -}; - // This is a direct transcription of step 4 from: // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case HTMLTokenizer::State tokenizerStateForContextElement(Element* contextElement, bool reportErrors) @@ -99,7 +84,7 @@ HTMLTokenizer::State tokenizerStateForContextElement(Element* contextElement, bo HTMLDocumentParser::HTMLDocumentParser(HTMLDocument* document, bool reportErrors) : ScriptableDocumentParser(document) - , m_tokenizer(HTMLTokenizer::create()) + , m_tokenizer(HTMLTokenizer::create(usePreHTML5ParserQuirks(document))) , m_scriptRunner(HTMLScriptRunner::create(document, this)) , m_treeBuilder(HTMLTreeBuilder::create(m_tokenizer.get(), document, reportErrors)) , m_parserScheduler(HTMLParserScheduler::create(this)) @@ -112,7 +97,7 @@ HTMLDocumentParser::HTMLDocumentParser(HTMLDocument* document, bool reportErrors // minimize code duplication between these constructors. HTMLDocumentParser::HTMLDocumentParser(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission) : ScriptableDocumentParser(fragment->document()) - , m_tokenizer(HTMLTokenizer::create()) + , m_tokenizer(HTMLTokenizer::create(usePreHTML5ParserQuirks(fragment->document()))) , m_treeBuilder(HTMLTreeBuilder::create(m_tokenizer.get(), fragment, contextElement, scriptingPermission)) , m_endWasDelayed(false) , m_writeNestingLevel(0) @@ -547,5 +532,11 @@ void HTMLDocumentParser::parseDocumentFragment(const String& source, DocumentFra ASSERT(!parser->processingData()); // Make sure we're done. parser->detach(); // Allows ~DocumentParser to assert it was detached before destruction. } + +bool HTMLDocumentParser::usePreHTML5ParserQuirks(Document* document) +{ + ASSERT(document); + return document->settings() && document->settings()->usePreHTML5ParserQuirks(); +} } diff --git a/WebCore/html/parser/HTMLDocumentParser.h b/WebCore/html/parser/HTMLDocumentParser.h index 6d5b6d7..e65a582 100644 --- a/WebCore/html/parser/HTMLDocumentParser.h +++ b/WebCore/html/parser/HTMLDocumentParser.h @@ -66,6 +66,8 @@ public: void resumeParsingAfterYield(); static void parseDocumentFragment(const String&, DocumentFragment*, Element* contextElement, FragmentScriptingPermission = FragmentScriptingAllowed); + + static bool usePreHTML5ParserQuirks(Document*); protected: virtual void insert(const SegmentedString&); @@ -139,7 +141,7 @@ private: OwnPtr m_parserScheduler; bool m_endWasDelayed; - int m_writeNestingLevel; + unsigned m_writeNestingLevel; }; } diff --git a/WebCore/html/parser/HTMLElementStack.cpp b/WebCore/html/parser/HTMLElementStack.cpp index b6f4111..123778d 100644 --- a/WebCore/html/parser/HTMLElementStack.cpp +++ b/WebCore/html/parser/HTMLElementStack.cpp @@ -28,6 +28,8 @@ #include "Element.h" #include "HTMLNames.h" +#include "MathMLNames.h" +#include "SVGNames.h" #include #if ENABLE(SVG) @@ -92,6 +94,19 @@ inline bool isTableRowScopeMarker(Element* element) || element->hasTagName(htmlTag); } +inline bool isForeignContentScopeMarker(Element* element) +{ + return element->hasTagName(MathMLNames::miTag) + || element->hasTagName(MathMLNames::moTag) + || element->hasTagName(MathMLNames::mnTag) + || element->hasTagName(MathMLNames::msTag) + || element->hasTagName(MathMLNames::mtextTag) + || element->hasTagName(SVGNames::foreignObjectTag) + || element->hasTagName(SVGNames::descTag) + || element->hasTagName(SVGNames::titleTag) + || element->namespaceURI() == HTMLNames::xhtmlNamespaceURI; +} + inline bool isButtonScopeMarker(Element* element) { return isScopeMarker(element) @@ -186,12 +201,6 @@ void HTMLElementStack::pop() popCommon(); } -void HTMLElementStack::popUntilElementWithNamespace(const AtomicString& namespaceURI) -{ - while (top()->namespaceURI() != namespaceURI) - pop(); -} - void HTMLElementStack::popUntil(const AtomicString& tagName) { while (!top()->hasLocalName(tagName)) { @@ -247,6 +256,12 @@ void HTMLElementStack::popUntilTableRowScopeMarker() pop(); } +void HTMLElementStack::popUntilForeignContentScopeMarker() +{ + while (!isForeignContentScopeMarker(top())) + pop(); +} + void HTMLElementStack::pushHTMLHtmlElement(PassRefPtr element) { ASSERT(!m_top); // should always be the bottom of the stack. diff --git a/WebCore/html/parser/HTMLElementStack.h b/WebCore/html/parser/HTMLElementStack.h index 73cfcb1..47fa603 100644 --- a/WebCore/html/parser/HTMLElementStack.h +++ b/WebCore/html/parser/HTMLElementStack.h @@ -90,7 +90,6 @@ public: void pop(); void popUntil(const AtomicString& tagName); - void popUntilElementWithNamespace(const AtomicString& namespaceURI); void popUntil(Element*); void popUntilPopped(const AtomicString& tagName); void popUntilPopped(Element*); @@ -98,6 +97,7 @@ public: void popUntilTableScopeMarker(); // "clear the stack back to a table context" in the spec. void popUntilTableBodyScopeMarker(); // "clear the stack back to a table body context" in the spec. void popUntilTableRowScopeMarker(); // "clear the stack back to a table row context" in the spec. + void popUntilForeignContentScopeMarker(); void popHTMLHeadElement(); void popHTMLBodyElement(); void popAll(); diff --git a/WebCore/html/parser/HTMLParserIdioms.cpp b/WebCore/html/parser/HTMLParserIdioms.cpp new file mode 100644 index 0000000..a558cf5 --- /dev/null +++ b/WebCore/html/parser/HTMLParserIdioms.cpp @@ -0,0 +1,144 @@ +/* + * Copyright (C) 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "HTMLParserIdioms.h" + +#include +#include +#include + +namespace WebCore { + +String stripLeadingAndTrailingHTMLSpaces(const String& string) +{ + const UChar* characters = string.characters(); + unsigned length = string.length(); + + unsigned numLeadingSpaces; + for (numLeadingSpaces = 0; numLeadingSpaces < length; ++numLeadingSpaces) { + if (isNotHTMLSpace(characters[numLeadingSpaces])) + break; + } + + if (numLeadingSpaces == length) + return emptyAtom; + + unsigned numTrailingSpaces; + for (numTrailingSpaces = 0; numTrailingSpaces < length; ++numTrailingSpaces) { + if (isNotHTMLSpace(characters[length - numTrailingSpaces - 1])) + break; + } + + ASSERT(numLeadingSpaces + numTrailingSpaces < length); + + return string.substring(numLeadingSpaces, length - numTrailingSpaces); +} + +String serializeForNumberType(double number) +{ + // According to HTML5, "the best representation of the number n as a floating + // point number" is a string produced by applying ToString() to n. + NumberToStringBuffer buffer; + unsigned length = numberToString(number, buffer); + return String(buffer, length); +} + +bool parseToDoubleForNumberType(const String& string, double* result) +{ + // See HTML5 2.4.4.3 `Real numbers.' + + // String::toDouble() accepts leading + and whitespace characters, which are not valid here. + UChar firstCharacter = string[0]; + if (firstCharacter != '-' && !isASCIIDigit(firstCharacter)) + return false; + + bool valid = false; + double value = string.toDouble(&valid); + if (!valid) + return false; + + // NaN and infinity are considered valid by String::toDouble, but not valid here. + if (!isfinite(value)) + return false; + + if (result) { + // The following expression converts -0 to +0. + *result = value ? value : 0; + } + + return true; +} + +// http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-integers +bool parseHTMLInteger(const String& input, int& value) +{ + // Step 1 + // Step 2 + const UChar* position = input.characters(); + const UChar* end = position + input.length(); + + // Step 3 + int sign = 1; + + // Step 4 + while (position < end) { + if (!isHTMLSpace(*position)) + break; + ++position; + } + + // Step 5 + if (position == end) + return false; + ASSERT(position < end); + + // Step 6 + if (*position == '-') { + sign = -1; + ++position; + } else if (*position == '+') + ++position; + if (position == end) + return false; + ASSERT(position < end); + + // Step 7 + if (!isASCIIDigit(*position)) + return false; + + // Step 8 + Vector digits; + while (position < end) { + if (!isASCIIDigit(*position)) + break; + digits.append(*position++); + } + + // Step 9 + value = sign * charactersToIntStrict(digits.data(), digits.size()); + return true; +} + +} diff --git a/WebCore/html/parser/HTMLParserIdioms.h b/WebCore/html/parser/HTMLParserIdioms.h new file mode 100644 index 0000000..f4704f7 --- /dev/null +++ b/WebCore/html/parser/HTMLParserIdioms.h @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef HTMLParserIdioms_h +#define HTMLParserIdioms_h + +#include +#include + +namespace WebCore { + +// Space characters as defined by the HTML specification. +bool isHTMLSpace(UChar); +bool isNotHTMLSpace(UChar); + +// Strip leading and trailing whitespace as defined by the HTML specification. +String stripLeadingAndTrailingHTMLSpaces(const String&); + +// An implementation of the HTML specification's algorithm to convert a number to a string for number and range types. +String serializeForNumberType(double); + +// Convert the specified string to a double. If the conversion fails, the return value is false. +// Leading or trailing illegal characters cause failure, as does passing an empty string. +// The double* parameter may be 0 to check if the string can be parsed without getting the result. +bool parseToDoubleForNumberType(const String&, double*); + +// http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-integers +bool parseHTMLInteger(const String&, int&); + +// Inline implementations of some of the functions declared above. + +inline bool isHTMLSpace(UChar character) +{ + // FIXME: Consider branch permutations as we did in isASCIISpace. + return character == '\t' || character == '\x0A' || character == '\x0C' || character == '\x0D' || character == ' '; +} + +inline bool isNotHTMLSpace(UChar character) +{ + return !isHTMLSpace(character); +} + +} + +#endif diff --git a/WebCore/html/parser/HTMLPreloadScanner.cpp b/WebCore/html/parser/HTMLPreloadScanner.cpp index 5283fa3..7859dd8 100644 --- a/WebCore/html/parser/HTMLPreloadScanner.cpp +++ b/WebCore/html/parser/HTMLPreloadScanner.cpp @@ -31,8 +31,8 @@ #include "CSSHelper.h" #include "CachedResourceLoader.h" #include "Document.h" +#include "HTMLDocumentParser.h" #include "HTMLTokenizer.h" -#include "HTMLTreeBuilder.h" #include "HTMLLinkElement.h" #include "HTMLNames.h" @@ -121,7 +121,7 @@ private: HTMLPreloadScanner::HTMLPreloadScanner(Document* document) : m_document(document) , m_cssScanner(document) - , m_tokenizer(HTMLTokenizer::create()) + , m_tokenizer(HTMLTokenizer::create(HTMLDocumentParser::usePreHTML5ParserQuirks(document))) , m_bodySeen(false) , m_inStyle(false) { @@ -157,13 +157,7 @@ void HTMLPreloadScanner::processToken() return; PreloadTask task(m_token); - m_tokenizer->setState(HTMLTreeBuilder::adjustedLexerState(m_tokenizer->state(), task.tagName(), m_document->frame())); - if (task.tagName() == scriptTag) { - // The tree builder handles scriptTag separately from the other tokenizer - // state adjustments, so we need to handle it separately too. - ASSERT(m_tokenizer->state() == HTMLTokenizer::DataState); - m_tokenizer->setState(HTMLTokenizer::ScriptDataState); - } + m_tokenizer->updateStateFor(task.tagName(), m_document->frame()); if (task.tagName() == bodyTag) m_bodySeen = true; diff --git a/WebCore/html/parser/HTMLScriptRunner.cpp b/WebCore/html/parser/HTMLScriptRunner.cpp index e1fc120..4f54f42 100644 --- a/WebCore/html/parser/HTMLScriptRunner.cpp +++ b/WebCore/html/parser/HTMLScriptRunner.cpp @@ -35,6 +35,7 @@ #include "HTMLScriptRunnerHost.h" #include "HTMLInputStream.h" #include "HTMLNames.h" +#include "NestingLevelIncrementer.h" #include "NotImplemented.h" #include "ScriptElement.h" #include "ScriptSourceCode.h" @@ -43,24 +44,6 @@ namespace WebCore { using namespace HTMLNames; -// FIXME: Factor out to avoid duplication with HTMLDocumentParser. -class NestingLevelIncrementer : public Noncopyable { -public: - explicit NestingLevelIncrementer(unsigned& nestingLevel) - : m_nestingLevel(&nestingLevel) - { - ++(*m_nestingLevel); - } - - ~NestingLevelIncrementer() - { - --(*m_nestingLevel); - } - -private: - unsigned* m_nestingLevel; -}; - HTMLScriptRunner::HTMLScriptRunner(Document* document, HTMLScriptRunnerHost* host) : m_document(document) , m_host(host) @@ -155,20 +138,16 @@ void HTMLScriptRunner::executePendingScriptAndDispatchEvent(PendingScript& pendi if (errorOccurred) scriptElement->dispatchEvent(createScriptErrorEvent()); else { - executeScript(scriptElement.get(), sourceCode); + executeScript(sourceCode); scriptElement->dispatchEvent(createScriptLoadEvent()); } } ASSERT(!m_scriptNestingLevel); } -void HTMLScriptRunner::executeScript(Element* element, const ScriptSourceCode& sourceCode) const +void HTMLScriptRunner::executeScript(const ScriptSourceCode& sourceCode) const { ASSERT(m_document); - ScriptElement* scriptElement = toScriptElement(element); - ASSERT(scriptElement); - if (!scriptElement->shouldExecuteAsJavaScript()) - return; ASSERT(isExecutingScript()); if (!m_document->frame()) return; @@ -317,9 +296,11 @@ void HTMLScriptRunner::runScript(Element* script, int startingLineNumber) InsertionPointRecord insertionPointRecord(m_host->inputStream()); NestingLevelIncrementer nestingLevelIncrementer(m_scriptNestingLevel); - // Check script type and language, current code uses ScriptElement::shouldExecuteAsJavaScript(), but that may not be HTML5 compliant. - notImplemented(); // event for support - + ScriptElement* scriptElement = toScriptElement(script); + ASSERT(scriptElement); + if (!scriptElement->shouldExecuteAsJavaScript()) + return; + if (script->hasAttribute(srcAttr)) { if (script->hasAttribute(asyncAttr)) // Async takes precendence over defer. return; // Asynchronous scripts handle themselves. @@ -335,7 +316,7 @@ void HTMLScriptRunner::runScript(Element* script, int startingLineNumber) // ASSERT(document()->haveStylesheetsLoaded()); ASSERT(isExecutingScript()); ScriptSourceCode sourceCode(script->textContent(), documentURLForScriptExecution(m_document), startingLineNumber); - executeScript(script, sourceCode); + executeScript(sourceCode); } } } diff --git a/WebCore/html/parser/HTMLScriptRunner.h b/WebCore/html/parser/HTMLScriptRunner.h index 47c96fd..be21dd2 100644 --- a/WebCore/html/parser/HTMLScriptRunner.h +++ b/WebCore/html/parser/HTMLScriptRunner.h @@ -68,7 +68,7 @@ private: void executeParsingBlockingScript(); void executePendingScriptAndDispatchEvent(PendingScript&); - void executeScript(Element*, const ScriptSourceCode&) const; + void executeScript(const ScriptSourceCode&) const; bool haveParsingBlockingScript() const; bool executeParsingBlockingScripts(); diff --git a/WebCore/html/parser/HTMLTokenizer.cpp b/WebCore/html/parser/HTMLTokenizer.cpp index f5405ff..305fca2 100644 --- a/WebCore/html/parser/HTMLTokenizer.cpp +++ b/WebCore/html/parser/HTMLTokenizer.cpp @@ -30,6 +30,7 @@ #include "HTMLEntityParser.h" #include "HTMLToken.h" +#include "HTMLTreeBuilder.h" #include "HTMLNames.h" #include "NotImplemented.h" #include @@ -102,8 +103,9 @@ inline bool isEndTagBufferingState(HTMLTokenizer::State state) } -HTMLTokenizer::HTMLTokenizer() +HTMLTokenizer::HTMLTokenizer(bool usePreHTML5ParserQuirks) : m_inputStreamPreprocessor(this) + , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks) { reset(); } @@ -171,7 +173,7 @@ inline bool HTMLTokenizer::processEntity(SegmentedString& source) // Sometimes there's more complicated logic in the spec that separates when // we consume the next input character and when we switch to a particular -// state. We handle those cases by advancing the source directly and using +// state. We handle those cases by advancing the source directly and using // this macro to switch to the indicated state. #define SWITCH_TO(stateName) \ do { \ @@ -277,7 +279,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody // Note that this logic is different than the generic \r\n collapsing - // handled in the input stream preprocessor. This logic is here as an + // handled in the input stream preprocessor. This logic is here as an // "authoring convenience" so folks can write: // //
@@ -435,6 +437,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
             ADVANCE_TO(SelfClosingStartTagState);
         else if (cc == '>')
             return emitAndResumeIn(source, DataState);
+        else if (m_usePreHTML5ParserQuirks && cc == '<')
+            return emitAndReconsumeIn(source, DataState);
         else if (isASCIIUpper(cc)) {
             m_token->appendToName(toLowerCase(cc));
             ADVANCE_TO(TagNameState);
@@ -876,6 +880,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
             ADVANCE_TO(SelfClosingStartTagState);
         else if (cc == '>')
             return emitAndResumeIn(source, DataState);
+        else if (m_usePreHTML5ParserQuirks && cc == '<')
+            return emitAndReconsumeIn(source, DataState);
         else if (isASCIIUpper(cc)) {
             m_token->addNewAttribute();
             m_token->beginAttributeName(source.numberOfCharactersConsumed());
@@ -908,6 +914,9 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
         } else if (cc == '>') {
             m_token->endAttributeName(source.numberOfCharactersConsumed());
             return emitAndResumeIn(source, DataState);
+        } else if (m_usePreHTML5ParserQuirks && cc == '<') {
+            m_token->endAttributeName(source.numberOfCharactersConsumed());
+            return emitAndReconsumeIn(source, DataState);
         } else if (isASCIIUpper(cc)) {
             m_token->appendToAttributeName(toLowerCase(cc));
             ADVANCE_TO(AttributeNameState);
@@ -933,6 +942,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
             ADVANCE_TO(BeforeAttributeValueState);
         else if (cc == '>')
             return emitAndResumeIn(source, DataState);
+        else if (m_usePreHTML5ParserQuirks && cc == '<')
+            return emitAndReconsumeIn(source, DataState);
         else if (isASCIIUpper(cc)) {
             m_token->addNewAttribute();
             m_token->beginAttributeName(source.numberOfCharactersConsumed());
@@ -1054,7 +1065,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
                 m_token->appendToAttributeValue(*iter);
         }
         // We're supposed to switch back to the attribute value state that
-        // we were in when we were switched into this state.  Rather than
+        // we were in when we were switched into this state. Rather than
         // keeping track of this explictly, we observe that the previous
         // state can be determined by m_additionalAllowedCharacter.
         if (m_additionalAllowedCharacter == '"')
@@ -1075,6 +1086,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
             ADVANCE_TO(SelfClosingStartTagState);
         else if (cc == '>')
             return emitAndResumeIn(source, DataState);
+        else if (m_usePreHTML5ParserQuirks && cc == '<')
+            return emitAndReconsumeIn(source, DataState);
         else if (cc == InputStreamPreprocessor::endOfFileMarker) {
             parseError();
             RECONSUME_IN(DataState);
@@ -1213,13 +1226,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
     BEGIN_STATE(CommentEndState) {
         if (cc == '>')
             return emitAndResumeIn(source, DataState);
-        else if (isTokenizerWhitespace(cc)) {
-            parseError();
-            m_token->appendToComment('-');
-            m_token->appendToComment('-');
-            m_token->appendToComment(cc);
-            ADVANCE_TO(CommentEndSpaceState);
-        } else if (cc == '!') {
+        else if (cc == '!') {
             parseError();
             ADVANCE_TO(CommentEndBangState);
         } else if (cc == '-') {
@@ -1260,24 +1267,6 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
     }
     END_STATE()
 
-    BEGIN_STATE(CommentEndSpaceState) {
-        if (isTokenizerWhitespace(cc)) {
-            m_token->appendToComment(cc);
-            ADVANCE_TO(CommentEndSpaceState);
-        } else if (cc == '-')
-            ADVANCE_TO(CommentEndDashState);
-        else if (cc == '>')
-            return emitAndResumeIn(source, DataState);
-        else if (cc == InputStreamPreprocessor::endOfFileMarker) {
-            parseError();
-            return emitAndReconsumeIn(source, DataState);
-        } else {
-            m_token->appendToComment(cc);
-            ADVANCE_TO(CommentState);
-        }
-    }
-    END_STATE()
-
     BEGIN_STATE(DOCTYPEState) {
         if (isTokenizerWhitespace(cc))
             ADVANCE_TO(BeforeDOCTYPENameState);
@@ -1656,6 +1645,23 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
     return false;
 }
 
+void HTMLTokenizer::updateStateFor(const AtomicString& tagName, Frame* frame)
+{
+    if (tagName == textareaTag || tagName == titleTag)
+        setState(RCDATAState);
+    else if (tagName == plaintextTag)
+        setState(PLAINTEXTState);
+    else if (tagName == scriptTag)
+        setState(ScriptDataState);
+    else if (tagName == styleTag
+        || tagName == iframeTag
+        || tagName == xmpTag
+        || (tagName == noembedTag && HTMLTreeBuilder::pluginsEnabled(frame))
+        || tagName == noframesTag
+        || (tagName == noscriptTag && HTMLTreeBuilder::scriptEnabled(frame)))
+        setState(RAWTEXTState);
+}
+
 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString)
 {
     return vectorEqualsString(m_temporaryBuffer, expectedString);
diff --git a/WebCore/html/parser/HTMLTokenizer.h b/WebCore/html/parser/HTMLTokenizer.h
index bab77f3..f16b049 100644
--- a/WebCore/html/parser/HTMLTokenizer.h
+++ b/WebCore/html/parser/HTMLTokenizer.h
@@ -36,6 +36,7 @@
 namespace WebCore {
 
 class Element;
+class Frame;
 class HTMLToken;
 
 class HTMLTokenizer : public Noncopyable {
@@ -96,7 +97,6 @@ public:
         CommentEndDashState,
         CommentEndState,
         CommentEndBangState,
-        CommentEndSpaceState,
         DOCTYPEState,
         BeforeDOCTYPENameState,
         DOCTYPENameState,
@@ -119,12 +119,12 @@ public:
         CDATASectionDoubleRightSquareBracketState,
     };
 
-    static PassOwnPtr create() { return adoptPtr(new HTMLTokenizer); }
+    static PassOwnPtr create(bool usePreHTML5ParserQuirks) { return adoptPtr(new HTMLTokenizer(usePreHTML5ParserQuirks)); }
     ~HTMLTokenizer();
 
     void reset();
 
-    // This function returns true if it emits a token.  Otherwise, callers
+    // This function returns true if it emits a token. Otherwise, callers
     // must provide the same (in progress) token on the next call (unless
     // they call reset() first).
     bool nextToken(SegmentedString&, HTMLToken&);
@@ -135,6 +135,22 @@ public:
     State state() const { return m_state; }
     void setState(State state) { m_state = state; }
 
+    // Updates the tokenizer's state according to the given tag name. This is
+    // an approximation of how the tree builder would update the tokenizer's
+    // state. This method is useful for approximating HTML tokenization. To
+    // get exactly the correct tokenization, you need the real tree builder.
+    //
+    // The main failures in the approximation are as follows:
+    //
+    //  * The first set of character tokens emitted for a 
 element might
+    //    contain an extra leading newline.
+    //  * The replacement of U+0000 with U+FFFD will not be sensitive to the
+    //    tree builder's insertion mode.
+    //  * CDATA sections in foreign content will be tokenized as bogus comments
+    //    instead of as character tokens.
+    //
+    void updateStateFor(const AtomicString& tagName, Frame*);
+
     // Hack to skip leading newline in 
/ for authoring ease.
     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
     void setSkipLeadingNewLineForListing(bool value) { m_skipLeadingNewLineForListing = value; }
@@ -177,8 +193,8 @@ private:
 
             // Every branch in this function is expensive, so we have a
             // fast-reject branch for characters that don't require special
-            // handling.  Please run the parser benchmark whenever you touch
-            // this function.  It's very hot.
+            // handling. Please run the parser benchmark whenever you touch
+            // this function. It's very hot.
             static const UChar specialCharacterMask = '\n' | '\r' | '\0';
             if (m_nextInputCharacter & ~specialCharacterMask) {
                 m_skipNextNewLine = false;
@@ -238,7 +254,7 @@ private:
         bool m_skipNextNewLine;
     };
 
-    HTMLTokenizer();
+    HTMLTokenizer(bool usePreHTML5ParserQuirks);
 
     inline bool processEntity(SegmentedString&);
 
@@ -257,7 +273,7 @@ private:
     inline bool temporaryBufferIs(const String&);
 
     // Sometimes we speculatively consume input characters and we don't
-    // know whether they represent end tags or RCDATA, etc.  These
+    // know whether they represent end tags or RCDATA, etc. These
     // functions help manage these state.
     inline void addToPossibleEndTag(UChar cc);
     inline void saveEndTagNameIfNeeded();
@@ -269,7 +285,7 @@ private:
 
     Vector m_appropriateEndTagName;
 
-    // m_token is owned by the caller.  If nextToken is not on the stack,
+    // m_token is owned by the caller. If nextToken is not on the stack,
     // this member might be pointing to unallocated memory.
     HTMLToken* m_token;
     int m_lineNumber;
@@ -282,7 +298,7 @@ private:
     Vector m_temporaryBuffer;
 
     // We occationally want to emit both a character token and an end tag
-    // token (e.g., when lexing script).  We buffer the name of the end tag
+    // token (e.g., when lexing script). We buffer the name of the end tag
     // token here so we remember it next time we re-enter the tokenizer.
     Vector m_bufferedEndTagName;
 
@@ -291,6 +307,8 @@ private:
 
     // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream
     InputStreamPreprocessor m_inputStreamPreprocessor;
+    
+    bool m_usePreHTML5ParserQuirks;
 };
 
 }
diff --git a/WebCore/html/parser/HTMLTreeBuilder.cpp b/WebCore/html/parser/HTMLTreeBuilder.cpp
index 406bb6c..afac2a0 100644
--- a/WebCore/html/parser/HTMLTreeBuilder.cpp
+++ b/WebCore/html/parser/HTMLTreeBuilder.cpp
@@ -26,16 +26,17 @@
 #include "config.h"
 #include "HTMLTreeBuilder.h"
 
+#include "CharacterNames.h"
 #include "Comment.h"
 #include "DocumentFragment.h"
 #include "DocumentType.h"
-#include "Element.h"
 #include "Frame.h"
 #include "HTMLDocument.h"
 #include "HTMLElementFactory.h"
 #include "HTMLFormElement.h"
 #include "HTMLHtmlElement.h"
 #include "HTMLNames.h"
+#include "HTMLParserIdioms.h"
 #include "HTMLScriptElement.h"
 #include "HTMLToken.h"
 #include "HTMLTokenizer.h"
@@ -44,15 +45,10 @@
 #include "NotImplemented.h"
 #include "SVGNames.h"
 #include "ScriptController.h"
-#include "Settings.h"
 #include "Text.h"
 #include "XLinkNames.h"
 #include "XMLNSNames.h"
 #include "XMLNames.h"
-// FIXME: Remove this include once we find a home for the free functions that
-// are using it.
-#include 
-#include 
 
 namespace WebCore {
 
@@ -62,42 +58,19 @@ static const int uninitializedLineNumberValue = -1;
 
 namespace {
 
-inline bool isTreeBuilderWhitepace(UChar c)
+inline bool isHTMLSpaceOrReplacementCharacter(UChar character)
 {
-    // FIXME: Consider branch permutations.
-    return c == '\t' || c == '\x0A' || c == '\x0C' || c == '\x0D' || c == ' ';
-}
-
-inline bool isNotTreeBuilderWhitepace(UChar c)
-{
-    return !isTreeBuilderWhitepace(c);
-}
-
-inline bool isTreeBuilderWhitepaceOrReplacementCharacter(UChar c)
-{
-    return isTreeBuilderWhitepace(c) || c == 0xFFFD;
-}
-
-template
-inline bool isAllSpecialCharacters(const String& string)
-{
-    const UChar* characters = string.characters();
-    const unsigned length = string.length();
-    for (unsigned i = 0; i < length; ++i) {
-        if (!isSpecialCharacter(characters[i]))
-            return false;
-    }
-    return true;
+    return isHTMLSpace(character) || character == replacementCharacter;
 }
 
 inline bool isAllWhitespace(const String& string)
 {
-    return isAllSpecialCharacters(string);
+    return string.isAllSpecialCharacters();
 }
 
 inline bool isAllWhitespaceOrReplacementCharacters(const String& string)
 {
-    return isAllSpecialCharacters(string);
+    return string.isAllSpecialCharacters();
 }
 
 bool isNumberedHeaderTag(const AtomicString& tagName)
@@ -132,11 +105,14 @@ bool isTableBodyContextTag(const AtomicString& tagName)
 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special
 bool isSpecialNode(Node* node)
 {
+    if (node->hasTagName(SVGNames::foreignObjectTag))
+        return true;
     if (node->namespaceURI() != xhtmlNamespaceURI)
         return false;
-    // FIXME: This list is out of sync with the spec.
     const AtomicString& tagName = node->localName();
     return tagName == addressTag
+        || tagName == appletTag
+        || tagName == areaTag
         || tagName == articleTag
         || tagName == asideTag
         || tagName == baseTag
@@ -146,6 +122,7 @@ bool isSpecialNode(Node* node)
         || tagName == bodyTag
         || tagName == brTag
         || tagName == buttonTag
+        || tagName == captionTag
         || tagName == centerTag
         || tagName == colTag
         || tagName == colgroupTag
@@ -158,6 +135,7 @@ bool isSpecialNode(Node* node)
         || tagName == dtTag
         || tagName == embedTag
         || tagName == fieldsetTag
+        || tagName == figcaptionTag
         || tagName == figureTag
         || tagName == footerTag
         || tagName == formTag
@@ -176,12 +154,14 @@ bool isSpecialNode(Node* node)
         || tagName == liTag
         || tagName == linkTag
         || tagName == listingTag
+        || tagName == marqueeTag
         || tagName == menuTag
         || tagName == metaTag
         || tagName == navTag
         || tagName == noembedTag
         || tagName == noframesTag
         || tagName == noscriptTag
+        || tagName == objectTag
         || tagName == olTag
         || tagName == pTag
         || tagName == paramTag
@@ -191,8 +171,12 @@ bool isSpecialNode(Node* node)
         || tagName == sectionTag
         || tagName == selectTag
         || tagName == styleTag
+        || tagName == summaryTag
+        || tagName == tableTag
         || isTableBodyContextTag(tagName)
+        || tagName == tdTag
         || tagName == textareaTag
+        || tagName == thTag
         || tagName == titleTag
         || tagName == trTag
         || tagName == ulTag
@@ -268,17 +252,17 @@ public:
 
     void skipLeadingWhitespace()
     {
-        skipLeading();
+        skipLeading();
     }
 
     String takeLeadingWhitespace()
     {
-        return takeLeading();
+        return takeLeading();
     }
 
     String takeLeadingNonWhitespace()
     {
-        return takeLeading();
+        return takeLeading();
     }
 
     String takeRemaining()
@@ -301,7 +285,7 @@ public:
         Vector whitespace;
         do {
             UChar cc = *m_current++;
-            if (isTreeBuilderWhitepace(cc))
+            if (isHTMLSpace(cc))
                 whitespace.append(cc);
         } while (m_current < m_end);
         // Returning the null string when there aren't any whitespace
@@ -402,7 +386,7 @@ HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
 }
 
 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
-    : m_dummyDocumentForFragmentParsing(HTMLDocument::create(0, KURL()))
+    : m_dummyDocumentForFragmentParsing(HTMLDocument::create(0, KURL(), fragment->document()->baseURI()))
     , m_fragment(fragment)
     , m_contextElement(contextElement)
     , m_scriptingPermission(scriptingPermission)
@@ -441,25 +425,6 @@ PassRefPtr HTMLTreeBuilder::takeScriptToProcess(int& scriptStartLine)
     return m_scriptToProcess.release();
 }
 
-HTMLTokenizer::State HTMLTreeBuilder::adjustedLexerState(HTMLTokenizer::State state, const AtomicString& tagName, Frame* frame)
-{
-    if (tagName == textareaTag || tagName == titleTag)
-        return HTMLTokenizer::RCDATAState;
-
-    if (tagName == styleTag
-        || tagName == iframeTag
-        || tagName == xmpTag
-        || (tagName == noembedTag && pluginsEnabled(frame))
-        || tagName == noframesTag
-        || (tagName == noscriptTag && scriptEnabled(frame)))
-        return HTMLTokenizer::RAWTEXTState;
-
-    if (tagName == plaintextTag)
-        return HTMLTokenizer::PLAINTEXTState;
-
-    return state;
-}
-
 void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken)
 {
     AtomicHTMLToken token(rawToken);
@@ -1121,8 +1086,6 @@ void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken& token)
         parseError(token);
         if (m_tree.form())
             return;
-        // FIXME: This deviates from the spec:
-        //        http://www.w3.org/Bugs/Public/show_bug.cgi?id=10216
         m_tree.insertHTMLFormElement(token, true);
         m_tree.openElements()->pop();
         return;
@@ -1477,7 +1440,6 @@ void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
         processStartTag(token);
         break;
     case InForeignContentMode: {
-        // FIXME: We're missing a bunch of if branches here.
         if (shouldProcessUsingSecondaryInsertionMode(token, m_tree.currentElement())) {
             processUsingSecondaryInsertionModeAndAdjustInsertionMode(token);
             return;
@@ -1522,8 +1484,10 @@ void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
             || token.name() == ulTag
             || token.name() == varTag
             || (token.name() == fontTag && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)))) {
-            m_tree.openElements()->popUntilElementWithNamespace(xhtmlNamespaceURI);
-            setInsertionMode(m_secondaryInsertionMode);
+            parseError(token);
+            m_tree.openElements()->popUntilForeignContentScopeMarker();
+            if (insertionMode() == InForeignContentMode && m_tree.openElements()->hasOnlyHTMLElementsInScope())
+                setInsertionMode(m_secondaryInsertionMode);
             processStartTag(token);
             return;
         }
@@ -1539,7 +1503,7 @@ void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
         break;
     }
     case TextMode:
-        notImplemented();
+        ASSERT_NOT_REACHED();
         break;
     }
 }
@@ -1601,21 +1565,6 @@ HTMLElementStack::ElementRecord* HTMLTreeBuilder::furthestBlockForFormattingElem
     return 0;
 }
 
-// FIXME: This should have a whitty name.
-// FIXME: This must be implemented in many other places in WebCore.
-void HTMLTreeBuilder::reparentChildren(Element* oldParent, Element* newParent)
-{
-    Node* child = oldParent->firstChild();
-    while (child) {
-        Node* nextChild = child->nextSibling();
-        oldParent->parserRemoveChild(child);
-        newParent->parserAddChild(child);
-        if (newParent->attached() && !child->attached())
-            child->attach();
-        child = nextChild;
-    }
-}
-
 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
 void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
 {
@@ -1708,7 +1657,7 @@ void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
         // 8
         RefPtr newElement = m_tree.createHTMLElementFromElementRecord(formattingElementRecord);
         // 9
-        reparentChildren(furthestBlock->element(), newElement.get());
+        newElement->takeAllChildrenFrom(furthestBlock->element());
         // 10
         Element* furthestBlockElement = furthestBlock->element();
         // FIXME: All this creation / parserAddChild / attach business should
@@ -1886,9 +1835,6 @@ void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken& token)
         m_tree.openElements()->popUntilPopped(token.name());
         m_tree.activeFormattingElements()->clearToLastMarker();
         setInsertionMode(InRowMode);
-        // FIXME: The fragment case of this ASSERT is a spec bug:
-        // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10338
-        ASSERT(m_tree.currentElement()->hasTagName(trTag) || (isParsingFragment() && m_fragmentContext.contextElement()->hasTagName(trTag)));
         return;
     }
     if (token.name() == bodyTag
@@ -1902,8 +1848,6 @@ void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken& token)
         || isTableBodyContextTag(token.name())) {
         if (!m_tree.openElements()->inTableScope(token.name())) {
             ASSERT(isParsingFragment());
-            // FIXME: It is unclear what the exact ASSERT should be.
-            // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10098
             parseError(token);
             return;
         }
@@ -2020,10 +1964,6 @@ void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token)
         m_tree.openElements()->popUntilNumberedHeaderElementPopped();
         return;
     }
-    if (token.name() == "sarcasm") {
-        notImplemented(); // Take a deep breath.
-        return;
-    }
     if (isFormattingTag(token.name())) {
         callTheAdoptionAgency(token);
         return;
@@ -2608,14 +2548,15 @@ void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
         // Fall through
     case InBodyMode:
     case InCellMode:
-        ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode);
-        notImplemented(); // Emit parse error based on what elemtns are still open.
+    case InCaptionMode:
+    case InRowMode:
+        ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode || insertionMode() == InCaptionMode || insertionMode() == InRowMode);
+        notImplemented(); // Emit parse error based on what elements are still open.
         break;
     case AfterBodyMode:
     case AfterAfterBodyMode:
         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
-        notImplemented();
-        break;
+        return;
     case InHeadNoscriptMode:
         ASSERT(insertionMode() == InHeadNoscriptMode);
         defaultForInHeadNoscript();
@@ -2647,9 +2588,11 @@ void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
         return;
     case InForeignContentMode:
         parseError(token);
-        // FIXME: Following the spec would infinitely recurse on 
-        // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10115
-        m_tree.openElements()->popUntilElementWithNamespace(xhtmlNamespaceURI);
+        m_tree.openElements()->popUntilForeignContentScopeMarker();
+        // FIXME: The spec adds the following condition before setting the
+        //        insertion mode.  However, this condition causes an infinite loop.
+        //        See http://www.w3.org/Bugs/Public/show_bug.cgi?id=10621
+        //        if (insertionMode() == InForeignContentMode && m_tree.openElements()->hasOnlyHTMLElementsInScope())
         setInsertionMode(m_secondaryInsertionMode);
         processEndOfFile(token);
         return;
@@ -2658,10 +2601,13 @@ void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
         processEndOfFile(token);
         return;
     case TextMode:
-    case InCaptionMode:
-    case InRowMode:
-        notImplemented();
-        break;
+        parseError(token);
+        if (m_tree.currentElement()->hasTagName(scriptTag))
+            notImplemented(); // mark the script element as "already started".
+        m_tree.openElements()->pop();
+        setInsertionMode(m_originalInsertionMode);
+        processEndOfFile(token);
+        return;
     }
     ASSERT(m_tree.openElements()->top());
     m_tree.openElements()->popAll();
@@ -2813,9 +2759,7 @@ bool HTMLTreeBuilder::scriptEnabled(Frame* frame)
 {
     if (!frame)
         return false;
-    if (ScriptController* scriptController = frame->script())
-        return scriptController->canExecuteScripts(NotAboutToExecuteScript);
-    return false;
+    return frame->script()->canExecuteScripts(NotAboutToExecuteScript);
 }
 
 bool HTMLTreeBuilder::pluginsEnabled(Frame* frame)
@@ -2825,41 +2769,4 @@ bool HTMLTreeBuilder::pluginsEnabled(Frame* frame)
     return frame->loader()->subframeLoader()->allowPlugins(NotAboutToInstantiatePlugin);
 }
 
-// FIXME: Move this function to a more appropriate place.
-String serializeForNumberType(double number)
-{
-    // According to HTML5, "the best representation of the number n as a floating
-    // point number" is a string produced by applying ToString() to n.
-    NumberToStringBuffer buffer;
-    unsigned length = numberToString(number, buffer);
-    return String(buffer, length);
-}
-
-// FIXME: Move this function to a more appropriate place.
-bool parseToDoubleForNumberType(const String& src, double* out)
-{
-    // See HTML5 2.4.4.3 `Real numbers.'
-
-    if (src.isEmpty())
-        return false;
-    // String::toDouble() accepts leading + \t \n \v \f \r and SPACE, which are invalid in HTML5.
-    // So, check the first character.
-    if (src[0] != '-' && (src[0] < '0' || src[0] > '9'))
-        return false;
-
-    bool valid = false;
-    double value = src.toDouble(&valid);
-    if (!valid)
-        return false;
-    // NaN and Infinity are not valid numbers according to the standard.
-    if (!isfinite(value))
-        return false;
-    // -0 -> 0
-    if (!value)
-        value = 0;
-    if (out)
-        *out = value;
-    return true;
-}
-
 }
diff --git a/WebCore/html/parser/HTMLTreeBuilder.h b/WebCore/html/parser/HTMLTreeBuilder.h
index 4634f0a..d522ea8 100644
--- a/WebCore/html/parser/HTMLTreeBuilder.h
+++ b/WebCore/html/parser/HTMLTreeBuilder.h
@@ -76,8 +76,6 @@ public:
     // Done, close any open tags, etc.
     void finished();
 
-    static HTMLTokenizer::State adjustedLexerState(HTMLTokenizer::State, const AtomicString& tagName, Frame*);
-
     static bool scriptEnabled(Frame*);
     static bool pluginsEnabled(Frame*);
 
@@ -170,7 +168,6 @@ private:
     PassRefPtr attributesForIsindexInput(AtomicHTMLToken&);
 
     HTMLElementStack::ElementRecord* furthestBlockForFormattingElement(Element*);
-    void reparentChildren(Element* oldParent, Element* newParent);
     void callTheAdoptionAgency(AtomicHTMLToken&);
 
     void closeTheCell();
@@ -257,18 +254,6 @@ private:
     int m_lastScriptElementStartLine;
 };
 
-// FIXME: Move these functions to a more appropriate place.
-
-// Converts the specified string to a floating number.
-// If the conversion fails, the return value is false. Take care that leading
-// or trailing unnecessary characters make failures.  This returns false for an
-// empty string input.
-// The double* parameter may be 0.
-bool parseToDoubleForNumberType(const String&, double*);
-// Converts the specified number to a string. This is an implementation of
-// HTML5's "algorithm to convert a number to a string" for NUMBER/RANGE types.
-String serializeForNumberType(double);
-
 }
 
 #endif
diff --git a/WebCore/html/parser/HTMLViewSourceParser.cpp b/WebCore/html/parser/HTMLViewSourceParser.cpp
index 8a7984d..ace8590 100644
--- a/WebCore/html/parser/HTMLViewSourceParser.cpp
+++ b/WebCore/html/parser/HTMLViewSourceParser.cpp
@@ -26,15 +26,15 @@
 #include "config.h"
 #include "HTMLViewSourceParser.h"
 
+#include "HTMLDocumentParser.h"
 #include "HTMLNames.h"
-#include "HTMLTreeBuilder.h"
 #include "HTMLViewSourceDocument.h"
 
 namespace WebCore {
 
 HTMLViewSourceParser::HTMLViewSourceParser(HTMLViewSourceDocument* document)
     : DecodedDataDocumentParser(document)
-    , m_tokenizer(HTMLTokenizer::create())
+    , m_tokenizer(HTMLTokenizer::create(HTMLDocumentParser::usePreHTML5ParserQuirks(document)))
 {
 }
 
@@ -87,13 +87,7 @@ void HTMLViewSourceParser::updateTokenizerState()
         return;
 
     AtomicString tagName(m_token.name().data(), m_token.name().size());
-    m_tokenizer->setState(HTMLTreeBuilder::adjustedLexerState(m_tokenizer->state(), tagName, document()->frame()));
-    if (tagName == HTMLNames::scriptTag) {
-        // The tree builder handles scriptTag separately from the other tokenizer
-        // state adjustments, so we need to handle it separately too.
-        ASSERT(m_tokenizer->state() == HTMLTokenizer::DataState);
-        m_tokenizer->setState(HTMLTokenizer::ScriptDataState);
-    }
+    m_tokenizer->updateStateFor(tagName, document()->frame());
 }
 
 void HTMLViewSourceParser::finish()
diff --git a/WebCore/html/parser/NestingLevelIncrementer.h b/WebCore/html/parser/NestingLevelIncrementer.h
new file mode 100644
index 0000000..c597876
--- /dev/null
+++ b/WebCore/html/parser/NestingLevelIncrementer.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef NestingLevelIncrementer_h
+#define NestingLevelIncrementer_h
+
+namespace WebCore {
+
+class NestingLevelIncrementer : public Noncopyable {
+public:
+    explicit NestingLevelIncrementer(unsigned& nestingLevel)
+        : m_nestingLevel(&nestingLevel)
+    {
+        ++(*m_nestingLevel);
+    }
+            
+    ~NestingLevelIncrementer()
+    {
+        --(*m_nestingLevel);
+    }
+            
+private:
+    unsigned* m_nestingLevel;
+};
+
+}
+
+#endif
-- 
cgit v1.1