16 files changed, 430 insertions, 277 deletions
diff --git a/WebCore/html/parser/HTMLConstructionSite.cpp b/WebCore/html/parser/HTMLConstructionSite.cpp
index 0172b3d..6215bba 100644
--- a/WebCore/html/parser/HTMLConstructionSite.cpp
+++ b/WebCore/html/parser/HTMLConstructionSite.cpp
@@ -114,19 +114,19 @@ PassRefPtr<ChildType> HTMLConstructionSite::attach(ContainerNode* parent, PassRe
 
 void HTMLConstructionSite::attachAtSite(const AttachmentSite& site, PassRefPtr<Node> prpChild)
 {
+    // FIXME: It's unfortunate that we need to hold a reference to child
+    // here to call attach().  We should investigate whether we can rely on
+    // |site.parent| to hold a ref at this point.
     RefPtr<Node> child = prpChild;
 
-    if (site.nextChild) {
+    if (site.nextChild)
         site.parent->parserInsertBefore(child, site.nextChild);
-        if (site.parent->attached() && !child->attached())
-            child->attach();
-        return;
-    }
-    site.parent->parserAddChild(child);
-    // It's slightly unfortunate that we need to hold a reference to child
-    // here to call attach().  We should investigate whether we can rely on
-    // |site.parent| to hold a ref at this point.
-    if (site.parent->attached() && !child->attached())
+    else
+        site.parent->parserAddChild(child);
+
+    // JavaScript run from beforeload (or DOM Mutation or event handlers)
+    // might have removed the child, in which case we should not attach it.
+    if (child->parentNode() && site.parent->attached() && !child->attached())
         child->attach();
 }
 
diff --git a/WebCore/html/parser/HTMLDocumentParser.cpp b/WebCore/html/parser/HTMLDocumentParser.cpp
index a442d54..2da403f 100644
--- a/WebCore/html/parser/HTMLDocumentParser.cpp
+++ b/WebCore/html/parser/HTMLDocumentParser.cpp
@@ -36,6 +36,8 @@
 #include "HTMLScriptRunner.h"
 #include "HTMLTreeBuilder.h"
 #include "HTMLDocument.h"
+#include "NestingLevelIncrementer.h"
+#include "Settings.h"
 #include "XSSAuditor.h"
 #include <wtf/CurrentTime.h>
 
@@ -53,23 +55,6 @@ using namespace HTMLNames;
 
 namespace {
 
-class NestingLevelIncrementer : public Noncopyable {
-public:
-    explicit NestingLevelIncrementer(int& counter)
-        : m_counter(&counter)
-    {
-        ++(*m_counter);
-    }
-
-    ~NestingLevelIncrementer()
-    {
-        --(*m_counter);
-    }
-
-private:
-    int* m_counter;
-};
-
 // This is a direct transcription of step 4 from:
 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
 HTMLTokenizer::State tokenizerStateForContextElement(Element* contextElement, bool reportErrors)
@@ -99,7 +84,7 @@ HTMLTokenizer::State tokenizerStateForContextElement(Element* contextElement, bo
 
 HTMLDocumentParser::HTMLDocumentParser(HTMLDocument* document, bool reportErrors)
     : ScriptableDocumentParser(document)
-    , m_tokenizer(HTMLTokenizer::create())
+    , m_tokenizer(HTMLTokenizer::create(usePreHTML5ParserQuirks(document)))
     , m_scriptRunner(HTMLScriptRunner::create(document, this))
     , m_treeBuilder(HTMLTreeBuilder::create(m_tokenizer.get(), document, reportErrors))
     , m_parserScheduler(HTMLParserScheduler::create(this))
@@ -112,7 +97,7 @@ HTMLDocumentParser::HTMLDocumentParser(HTMLDocument* document, bool reportErrors
 // minimize code duplication between these constructors.
 HTMLDocumentParser::HTMLDocumentParser(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
     : ScriptableDocumentParser(fragment->document())
-    , m_tokenizer(HTMLTokenizer::create())
+    , m_tokenizer(HTMLTokenizer::create(usePreHTML5ParserQuirks(fragment->document())))
     , m_treeBuilder(HTMLTreeBuilder::create(m_tokenizer.get(), fragment, contextElement, scriptingPermission))
     , m_endWasDelayed(false)
     , m_writeNestingLevel(0)
@@ -547,5 +532,11 @@ void HTMLDocumentParser::parseDocumentFragment(const String& source, DocumentFra
     ASSERT(!parser->processingData()); // Make sure we're done. <rdar://problem/3963151>
     parser->detach(); // Allows ~DocumentParser to assert it was detached before destruction.
 }
+    
+bool HTMLDocumentParser::usePreHTML5ParserQuirks(Document* document)
+{
+    ASSERT(document);
+    return document->settings() && document->settings()->usePreHTML5ParserQuirks();
+}
 
 }
diff --git a/WebCore/html/parser/HTMLDocumentParser.h b/WebCore/html/parser/HTMLDocumentParser.h
index 6d5b6d7..e65a582 100644
--- a/WebCore/html/parser/HTMLDocumentParser.h
+++ b/WebCore/html/parser/HTMLDocumentParser.h
@@ -66,6 +66,8 @@ public:
     void resumeParsingAfterYield();
 
     static void parseDocumentFragment(const String&, DocumentFragment*, Element* contextElement, FragmentScriptingPermission = FragmentScriptingAllowed);
+    
+    static bool usePreHTML5ParserQuirks(Document*);
 
 protected:
     virtual void insert(const SegmentedString&);
@@ -139,7 +141,7 @@ private:
     OwnPtr<HTMLParserScheduler> m_parserScheduler;
 
     bool m_endWasDelayed;
-    int m_writeNestingLevel;
+    unsigned m_writeNestingLevel;
 };
 
 }
diff --git a/WebCore/html/parser/HTMLElementStack.cpp b/WebCore/html/parser/HTMLElementStack.cpp
index b6f4111..123778d 100644
--- a/WebCore/html/parser/HTMLElementStack.cpp
+++ b/WebCore/html/parser/HTMLElementStack.cpp
@@ -28,6 +28,8 @@
 
 #include "Element.h"
 #include "HTMLNames.h"
+#include "MathMLNames.h"
+#include "SVGNames.h"
 #include <wtf/PassOwnPtr.h>
 
 #if ENABLE(SVG)
@@ -92,6 +94,19 @@ inline bool isTableRowScopeMarker(Element* element)
         || element->hasTagName(htmlTag);
 }
 
+inline bool isForeignContentScopeMarker(Element* element)
+{
+    return element->hasTagName(MathMLNames::miTag)
+        || element->hasTagName(MathMLNames::moTag)
+        || element->hasTagName(MathMLNames::mnTag)
+        || element->hasTagName(MathMLNames::msTag)
+        || element->hasTagName(MathMLNames::mtextTag)
+        || element->hasTagName(SVGNames::foreignObjectTag)
+        || element->hasTagName(SVGNames::descTag)
+        || element->hasTagName(SVGNames::titleTag)
+        || element->namespaceURI() == HTMLNames::xhtmlNamespaceURI;
+}
+
 inline bool isButtonScopeMarker(Element* element)
 {
     return isScopeMarker(element)
@@ -186,12 +201,6 @@ void HTMLElementStack::pop()
     popCommon();
 }
 
-void HTMLElementStack::popUntilElementWithNamespace(const AtomicString& namespaceURI)
-{
-    while (top()->namespaceURI() != namespaceURI)
-        pop();
-}
-
 void HTMLElementStack::popUntil(const AtomicString& tagName)
 {
     while (!top()->hasLocalName(tagName)) {
@@ -247,6 +256,12 @@ void HTMLElementStack::popUntilTableRowScopeMarker()
         pop();
 }
 
+void HTMLElementStack::popUntilForeignContentScopeMarker()
+{
+    while (!isForeignContentScopeMarker(top()))
+        pop();
+}
+
 void HTMLElementStack::pushHTMLHtmlElement(PassRefPtr<Element> element)
 {
     ASSERT(!m_top); // <html> should always be the bottom of the stack.
diff --git a/WebCore/html/parser/HTMLElementStack.h b/WebCore/html/parser/HTMLElementStack.h
index 73cfcb1..47fa603 100644
--- a/WebCore/html/parser/HTMLElementStack.h
+++ b/WebCore/html/parser/HTMLElementStack.h
@@ -90,7 +90,6 @@ public:
 
     void pop();
     void popUntil(const AtomicString& tagName);
-    void popUntilElementWithNamespace(const AtomicString& namespaceURI);
     void popUntil(Element*);
     void popUntilPopped(const AtomicString& tagName);
     void popUntilPopped(Element*);
@@ -98,6 +97,7 @@ public:
     void popUntilTableScopeMarker(); // "clear the stack back to a table context" in the spec.
     void popUntilTableBodyScopeMarker(); // "clear the stack back to a table body context" in the spec.
     void popUntilTableRowScopeMarker(); // "clear the stack back to a table row context" in the spec.
+    void popUntilForeignContentScopeMarker();
     void popHTMLHeadElement();
     void popHTMLBodyElement();
     void popAll();
diff --git a/WebCore/html/parser/HTMLParserIdioms.cpp b/WebCore/html/parser/HTMLParserIdioms.cpp
new file mode 100644
index 0000000..a558cf5
--- /dev/null
+++ b/WebCore/html/parser/HTMLParserIdioms.cpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2010 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1.  Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ * 2.  Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "HTMLParserIdioms.h"
+
+#include <wtf/MathExtras.h>
+#include <wtf/dtoa.h>
+#include <wtf/text/AtomicString.h>
+
+namespace WebCore {
+
+String stripLeadingAndTrailingHTMLSpaces(const String& string)
+{
+    const UChar* characters = string.characters();
+    unsigned length = string.length();
+
+    unsigned numLeadingSpaces;
+    for (numLeadingSpaces = 0; numLeadingSpaces < length; ++numLeadingSpaces) {
+        if (isNotHTMLSpace(characters[numLeadingSpaces]))
+            break;
+    }
+
+    if (numLeadingSpaces == length)
+        return emptyAtom;
+
+    unsigned numTrailingSpaces;
+    for (numTrailingSpaces = 0; numTrailingSpaces < length; ++numTrailingSpaces) {
+        if (isNotHTMLSpace(characters[length - numTrailingSpaces - 1]))
+            break;
+    }
+
+    ASSERT(numLeadingSpaces + numTrailingSpaces < length);
+
+    return string.substring(numLeadingSpaces, length - numTrailingSpaces);
+}
+
+String serializeForNumberType(double number)
+{
+    // According to HTML5, "the best representation of the number n as a floating
+    // point number" is a string produced by applying ToString() to n.
+    NumberToStringBuffer buffer;
+    unsigned length = numberToString(number, buffer);
+    return String(buffer, length);
+}
+
+bool parseToDoubleForNumberType(const String& string, double* result)
+{
+    // See HTML5 2.4.4.3 `Real numbers.'
+
+    // String::toDouble() accepts leading + and whitespace characters, which are not valid here.
+    UChar firstCharacter = string[0];
+    if (firstCharacter != '-' && !isASCIIDigit(firstCharacter))
+        return false;
+
+    bool valid = false;
+    double value = string.toDouble(&valid);
+    if (!valid)
+        return false;
+
+    // NaN and infinity are considered valid by String::toDouble, but not valid here.
+    if (!isfinite(value))
+        return false;
+
+    if (result) {
+        // The following expression converts -0 to +0.
+        *result = value ? value : 0;
+    }
+
+    return true;
+}
+
+// http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-integers
+bool parseHTMLInteger(const String& input, int& value)
+{
+    // Step 1
+    // Step 2
+    const UChar* position = input.characters();
+    const UChar* end = position + input.length();
+
+    // Step 3
+    int sign = 1;
+
+    // Step 4
+    while (position < end) {
+        if (!isHTMLSpace(*position))
+            break;
+        ++position;
+    }
+
+    // Step 5
+    if (position == end)
+        return false;
+    ASSERT(position < end);
+
+    // Step 6
+    if (*position == '-') {
+        sign = -1;
+        ++position;
+    } else if (*position == '+')
+        ++position;
+    if (position == end)
+        return false;
+    ASSERT(position < end);
+
+    // Step 7
+    if (!isASCIIDigit(*position))
+        return false;
+
+    // Step 8
+    Vector<UChar, 16> digits;
+    while (position < end) {
+        if (!isASCIIDigit(*position))
+            break;
+        digits.append(*position++);
+    }
+
+    // Step 9
+    value = sign * charactersToIntStrict(digits.data(), digits.size());
+    return true;
+}
+
+}
diff --git a/WebCore/html/parser/HTMLParserIdioms.h b/WebCore/html/parser/HTMLParserIdioms.h
new file mode 100644
index 0000000..f4704f7
--- /dev/null
+++ b/WebCore/html/parser/HTMLParserIdioms.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2010 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1.  Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ * 2.  Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLParserIdioms_h
+#define HTMLParserIdioms_h
+
+#include <wtf/Forward.h>
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+// Space characters as defined by the HTML specification.
+bool isHTMLSpace(UChar);
+bool isNotHTMLSpace(UChar);
+
+// Strip leading and trailing whitespace as defined by the HTML specification. 
+String stripLeadingAndTrailingHTMLSpaces(const String&);
+
+// An implementation of the HTML specification's algorithm to convert a number to a string for number and range types.
+String serializeForNumberType(double);
+
+// Convert the specified string to a double. If the conversion fails, the return value is false.
+// Leading or trailing illegal characters cause failure, as does passing an empty string.
+// The double* parameter may be 0 to check if the string can be parsed without getting the result.
+bool parseToDoubleForNumberType(const String&, double*);
+
+// http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-integers
+bool parseHTMLInteger(const String&, int&);
+
+// Inline implementations of some of the functions declared above.
+
+inline bool isHTMLSpace(UChar character)
+{
+    // FIXME: Consider branch permutations as we did in isASCIISpace.
+    return character == '\t' || character == '\x0A' || character == '\x0C' || character == '\x0D' || character == ' ';
+}
+
+inline bool isNotHTMLSpace(UChar character)
+{
+    return !isHTMLSpace(character);
+}
+
+}
+
+#endif
diff --git a/WebCore/html/parser/HTMLPreloadScanner.cpp b/WebCore/html/parser/HTMLPreloadScanner.cpp
index 5283fa3..7859dd8 100644
--- a/WebCore/html/parser/HTMLPreloadScanner.cpp
+++ b/WebCore/html/parser/HTMLPreloadScanner.cpp
@@ -31,8 +31,8 @@
 #include "CSSHelper.h"
 #include "CachedResourceLoader.h"
 #include "Document.h"
+#include "HTMLDocumentParser.h"
 #include "HTMLTokenizer.h"
-#include "HTMLTreeBuilder.h"
 #include "HTMLLinkElement.h"
 #include "HTMLNames.h"
 
@@ -121,7 +121,7 @@ private:
 HTMLPreloadScanner::HTMLPreloadScanner(Document* document)
     : m_document(document)
     , m_cssScanner(document)
-    , m_tokenizer(HTMLTokenizer::create())
+    , m_tokenizer(HTMLTokenizer::create(HTMLDocumentParser::usePreHTML5ParserQuirks(document)))
     , m_bodySeen(false)
     , m_inStyle(false)
 {
@@ -157,13 +157,7 @@ void HTMLPreloadScanner::processToken()
         return;
 
     PreloadTask task(m_token);
-    m_tokenizer->setState(HTMLTreeBuilder::adjustedLexerState(m_tokenizer->state(), task.tagName(), m_document->frame()));
-    if (task.tagName() == scriptTag) {
-        // The tree builder handles scriptTag separately from the other tokenizer
-        // state adjustments, so we need to handle it separately too.
-        ASSERT(m_tokenizer->state() == HTMLTokenizer::DataState);
-        m_tokenizer->setState(HTMLTokenizer::ScriptDataState);
-    }
+    m_tokenizer->updateStateFor(task.tagName(), m_document->frame());
 
     if (task.tagName() == bodyTag)
         m_bodySeen = true;
diff --git a/WebCore/html/parser/HTMLScriptRunner.cpp b/WebCore/html/parser/HTMLScriptRunner.cpp
index e1fc120..4f54f42 100644
--- a/WebCore/html/parser/HTMLScriptRunner.cpp
+++ b/WebCore/html/parser/HTMLScriptRunner.cpp
@@ -35,6 +35,7 @@
 #include "HTMLScriptRunnerHost.h"
 #include "HTMLInputStream.h"
 #include "HTMLNames.h"
+#include "NestingLevelIncrementer.h"
 #include "NotImplemented.h"
 #include "ScriptElement.h"
 #include "ScriptSourceCode.h"
@@ -43,24 +44,6 @@ namespace WebCore {
 
 using namespace HTMLNames;
 
-// FIXME: Factor out to avoid duplication with HTMLDocumentParser.
-class NestingLevelIncrementer : public Noncopyable {
-public:
-    explicit NestingLevelIncrementer(unsigned& nestingLevel)
-        : m_nestingLevel(&nestingLevel)
-    {
-        ++(*m_nestingLevel);
-    }
-
-    ~NestingLevelIncrementer()
-    {
-        --(*m_nestingLevel);
-    }
-
-private:
-    unsigned* m_nestingLevel;
-};
-
 HTMLScriptRunner::HTMLScriptRunner(Document* document, HTMLScriptRunnerHost* host)
     : m_document(document)
     , m_host(host)
@@ -155,20 +138,16 @@ void HTMLScriptRunner::executePendingScriptAndDispatchEvent(PendingScript& pendi
         if (errorOccurred)
             scriptElement->dispatchEvent(createScriptErrorEvent());
         else {
-            executeScript(scriptElement.get(), sourceCode);
+            executeScript(sourceCode);
             scriptElement->dispatchEvent(createScriptLoadEvent());
         }
     }
     ASSERT(!m_scriptNestingLevel);
 }
 
-void HTMLScriptRunner::executeScript(Element* element, const ScriptSourceCode& sourceCode) const
+void HTMLScriptRunner::executeScript(const ScriptSourceCode& sourceCode) const
 {
     ASSERT(m_document);
-    ScriptElement* scriptElement = toScriptElement(element);
-    ASSERT(scriptElement);
-    if (!scriptElement->shouldExecuteAsJavaScript())
-        return;
     ASSERT(isExecutingScript());
     if (!m_document->frame())
         return;
@@ -317,9 +296,11 @@ void HTMLScriptRunner::runScript(Element* script, int startingLineNumber)
         InsertionPointRecord insertionPointRecord(m_host->inputStream());
         NestingLevelIncrementer nestingLevelIncrementer(m_scriptNestingLevel);
 
-        // Check script type and language, current code uses ScriptElement::shouldExecuteAsJavaScript(), but that may not be HTML5 compliant.
-        notImplemented(); // event for support
-
+        ScriptElement* scriptElement = toScriptElement(script);
+        ASSERT(scriptElement);
+        if (!scriptElement->shouldExecuteAsJavaScript())
+            return;
+        
         if (script->hasAttribute(srcAttr)) {
             if (script->hasAttribute(asyncAttr)) // Async takes precendence over defer.
                 return; // Asynchronous scripts handle themselves.
@@ -335,7 +316,7 @@ void HTMLScriptRunner::runScript(Element* script, int startingLineNumber)
             // ASSERT(document()->haveStylesheetsLoaded());
             ASSERT(isExecutingScript());
             ScriptSourceCode sourceCode(script->textContent(), documentURLForScriptExecution(m_document), startingLineNumber);
-            executeScript(script, sourceCode);
+            executeScript(sourceCode);
         }
     }
 }
diff --git a/WebCore/html/parser/HTMLScriptRunner.h b/WebCore/html/parser/HTMLScriptRunner.h
index 47c96fd..be21dd2 100644
--- a/WebCore/html/parser/HTMLScriptRunner.h
+++ b/WebCore/html/parser/HTMLScriptRunner.h
@@ -68,7 +68,7 @@ private:
 
     void executeParsingBlockingScript();
     void executePendingScriptAndDispatchEvent(PendingScript&);
-    void executeScript(Element*, const ScriptSourceCode&) const;
+    void executeScript(const ScriptSourceCode&) const;
     bool haveParsingBlockingScript() const;
     bool executeParsingBlockingScripts();
 
diff --git a/WebCore/html/parser/HTMLTokenizer.cpp b/WebCore/html/parser/HTMLTokenizer.cpp
index f5405ff..305fca2 100644
--- a/WebCore/html/parser/HTMLTokenizer.cpp
+++ b/WebCore/html/parser/HTMLTokenizer.cpp
@@ -30,6 +30,7 @@
 
 #include "HTMLEntityParser.h"
 #include "HTMLToken.h"
+#include "HTMLTreeBuilder.h"
 #include "HTMLNames.h"
 #include "NotImplemented.h"
 #include <wtf/ASCIICType.h>
@@ -102,8 +103,9 @@ inline bool isEndTagBufferingState(HTMLTokenizer::State state)
 
 }
 
-HTMLTokenizer::HTMLTokenizer()
+HTMLTokenizer::HTMLTokenizer(bool usePreHTML5ParserQuirks)
     : m_inputStreamPreprocessor(this)
+    , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
 {
     reset();
 }
@@ -171,7 +173,7 @@ inline bool HTMLTokenizer::processEntity(SegmentedString& source)
 
 // Sometimes there's more complicated logic in the spec that separates when
 // we consume the next input character and when we switch to a particular
-// state.  We handle those cases by advancing the source directly and using
+// state. We handle those cases by advancing the source directly and using
 // this macro to switch to the indicated state.
 #define SWITCH_TO(stateName)                                               \
     do {                                                                   \
@@ -277,7 +279,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
 
     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
     // Note that this logic is different than the generic \r\n collapsing
-    // handled in the input stream preprocessor.  This logic is here as an
+    // handled in the input stream preprocessor. This logic is here as an
     // "authoring convenience" so folks can write:
     //
     // <pre>
@@ -435,6 +437,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
             ADVANCE_TO(SelfClosingStartTagState);
         else if (cc == '>')
             return emitAndResumeIn(source, DataState);
+        else if (m_usePreHTML5ParserQuirks && cc == '<')
+            return emitAndReconsumeIn(source, DataState);
         else if (isASCIIUpper(cc)) {
             m_token->appendToName(toLowerCase(cc));
             ADVANCE_TO(TagNameState);
@@ -876,6 +880,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
             ADVANCE_TO(SelfClosingStartTagState);
         else if (cc == '>')
             return emitAndResumeIn(source, DataState);
+        else if (m_usePreHTML5ParserQuirks && cc == '<')
+            return emitAndReconsumeIn(source, DataState);
         else if (isASCIIUpper(cc)) {
             m_token->addNewAttribute();
             m_token->beginAttributeName(source.numberOfCharactersConsumed());
@@ -908,6 +914,9 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
         } else if (cc == '>') {
             m_token->endAttributeName(source.numberOfCharactersConsumed());
             return emitAndResumeIn(source, DataState);
+        } else if (m_usePreHTML5ParserQuirks && cc == '<') {
+            m_token->endAttributeName(source.numberOfCharactersConsumed());
+            return emitAndReconsumeIn(source, DataState);
         } else if (isASCIIUpper(cc)) {
             m_token->appendToAttributeName(toLowerCase(cc));
             ADVANCE_TO(AttributeNameState);
@@ -933,6 +942,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
             ADVANCE_TO(BeforeAttributeValueState);
         else if (cc == '>')
             return emitAndResumeIn(source, DataState);
+        else if (m_usePreHTML5ParserQuirks && cc == '<')
+            return emitAndReconsumeIn(source, DataState);
         else if (isASCIIUpper(cc)) {
             m_token->addNewAttribute();
             m_token->beginAttributeName(source.numberOfCharactersConsumed());
@@ -1054,7 +1065,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
                 m_token->appendToAttributeValue(*iter);
         }
         // We're supposed to switch back to the attribute value state that
-        // we were in when we were switched into this state.  Rather than
+        // we were in when we were switched into this state. Rather than
         // keeping track of this explictly, we observe that the previous
         // state can be determined by m_additionalAllowedCharacter.
         if (m_additionalAllowedCharacter == '"')
@@ -1075,6 +1086,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
             ADVANCE_TO(SelfClosingStartTagState);
         else if (cc == '>')
             return emitAndResumeIn(source, DataState);
+        else if (m_usePreHTML5ParserQuirks && cc == '<')
+            return emitAndReconsumeIn(source, DataState);
         else if (cc == InputStreamPreprocessor::endOfFileMarker) {
             parseError();
             RECONSUME_IN(DataState);
@@ -1213,13 +1226,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
     BEGIN_STATE(CommentEndState) {
         if (cc == '>')
             return emitAndResumeIn(source, DataState);
-        else if (isTokenizerWhitespace(cc)) {
-            parseError();
-            m_token->appendToComment('-');
-            m_token->appendToComment('-');
-            m_token->appendToComment(cc);
-            ADVANCE_TO(CommentEndSpaceState);
-        } else if (cc == '!') {
+        else if (cc == '!') {
             parseError();
             ADVANCE_TO(CommentEndBangState);
         } else if (cc == '-') {
@@ -1260,24 +1267,6 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
     }
     END_STATE()
 
-    BEGIN_STATE(CommentEndSpaceState) {
-        if (isTokenizerWhitespace(cc)) {
-            m_token->appendToComment(cc);
-            ADVANCE_TO(CommentEndSpaceState);
-        } else if (cc == '-')
-            ADVANCE_TO(CommentEndDashState);
-        else if (cc == '>')
-            return emitAndResumeIn(source, DataState);
-        else if (cc == InputStreamPreprocessor::endOfFileMarker) {
-            parseError();
-            return emitAndReconsumeIn(source, DataState);
-        } else {
-            m_token->appendToComment(cc);
-            ADVANCE_TO(CommentState);
-        }
-    }
-    END_STATE()
-
     BEGIN_STATE(DOCTYPEState) {
         if (isTokenizerWhitespace(cc))
             ADVANCE_TO(BeforeDOCTYPENameState);
@@ -1656,6 +1645,23 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
     return false;
 }
 
+void HTMLTokenizer::updateStateFor(const AtomicString& tagName, Frame* frame)
+{
+    if (tagName == textareaTag || tagName == titleTag)
+        setState(RCDATAState);
+    else if (tagName == plaintextTag)
+        setState(PLAINTEXTState);
+    else if (tagName == scriptTag)
+        setState(ScriptDataState);
+    else if (tagName == styleTag
+        || tagName == iframeTag
+        || tagName == xmpTag
+        || (tagName == noembedTag && HTMLTreeBuilder::pluginsEnabled(frame))
+        || tagName == noframesTag
+        || (tagName == noscriptTag && HTMLTreeBuilder::scriptEnabled(frame)))
+        setState(RAWTEXTState);
+}
+
 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString)
 {
     return vectorEqualsString(m_temporaryBuffer, expectedString);
diff --git a/WebCore/html/parser/HTMLTokenizer.h b/WebCore/html/parser/HTMLTokenizer.h
index bab77f3..f16b049 100644
--- a/WebCore/html/parser/HTMLTokenizer.h
+++ b/WebCore/html/parser/HTMLTokenizer.h
@@ -36,6 +36,7 @@
 namespace WebCore {
 
 class Element;
+class Frame;
 class HTMLToken;
 
 class HTMLTokenizer : public Noncopyable {
@@ -96,7 +97,6 @@ public:
         CommentEndDashState,
         CommentEndState,
         CommentEndBangState,
-        CommentEndSpaceState,
         DOCTYPEState,
         BeforeDOCTYPENameState,
         DOCTYPENameState,
@@ -119,12 +119,12 @@ public:
         CDATASectionDoubleRightSquareBracketState,
     };
 
-    static PassOwnPtr<HTMLTokenizer> create() { return adoptPtr(new HTMLTokenizer); }
+    static PassOwnPtr<HTMLTokenizer> create(bool usePreHTML5ParserQuirks) { return adoptPtr(new HTMLTokenizer(usePreHTML5ParserQuirks)); }
     ~HTMLTokenizer();
 
     void reset();
 
-    // This function returns true if it emits a token.  Otherwise, callers
+    // This function returns true if it emits a token. Otherwise, callers
     // must provide the same (in progress) token on the next call (unless
     // they call reset() first).
     bool nextToken(SegmentedString&, HTMLToken&);
@@ -135,6 +135,22 @@ public:
     State state() const { return m_state; }
     void setState(State state) { m_state = state; }
 
+    // Updates the tokenizer's state according to the given tag name. This is
+    // an approximation of how the tree builder would update the tokenizer's
+    // state. This method is useful for approximating HTML tokenization. To
+    // get exactly the correct tokenization, you need the real tree builder.
+    //
+    // The main failures in the approximation are as follows:
+    //
+    //  * The first set of character tokens emitted for a <pre> element might
+    //    contain an extra leading newline.
+    //  * The replacement of U+0000 with U+FFFD will not be sensitive to the
+    //    tree builder's insertion mode.
+    //  * CDATA sections in foreign content will be tokenized as bogus comments
+    //    instead of as character tokens.
+    //
+    void updateStateFor(const AtomicString& tagName, Frame*);
+
     // Hack to skip leading newline in <pre>/<listing> for authoring ease.
     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
     void setSkipLeadingNewLineForListing(bool value) { m_skipLeadingNewLineForListing = value; }
@@ -177,8 +193,8 @@ private:
 
             // Every branch in this function is expensive, so we have a
             // fast-reject branch for characters that don't require special
-            // handling.  Please run the parser benchmark whenever you touch
-            // this function.  It's very hot.
+            // handling. Please run the parser benchmark whenever you touch
+            // this function. It's very hot.
             static const UChar specialCharacterMask = '\n' | '\r' | '\0';
             if (m_nextInputCharacter & ~specialCharacterMask) {
                 m_skipNextNewLine = false;
@@ -238,7 +254,7 @@ private:
         bool m_skipNextNewLine;
     };
 
-    HTMLTokenizer();
+    HTMLTokenizer(bool usePreHTML5ParserQuirks);
 
     inline bool processEntity(SegmentedString&);
 
@@ -257,7 +273,7 @@ private:
     inline bool temporaryBufferIs(const String&);
 
     // Sometimes we speculatively consume input characters and we don't
-    // know whether they represent end tags or RCDATA, etc.  These
+    // know whether they represent end tags or RCDATA, etc. These
     // functions help manage these state.
     inline void addToPossibleEndTag(UChar cc);
     inline void saveEndTagNameIfNeeded();
@@ -269,7 +285,7 @@ private:
 
     Vector<UChar, 32> m_appropriateEndTagName;
 
-    // m_token is owned by the caller.  If nextToken is not on the stack,
+    // m_token is owned by the caller. If nextToken is not on the stack,
     // this member might be pointing to unallocated memory.
     HTMLToken* m_token;
     int m_lineNumber;
@@ -282,7 +298,7 @@ private:
     Vector<UChar, 32> m_temporaryBuffer;
 
     // We occationally want to emit both a character token and an end tag
-    // token (e.g., when lexing script).  We buffer the name of the end tag
+    // token (e.g., when lexing script). We buffer the name of the end tag
     // token here so we remember it next time we re-enter the tokenizer.
     Vector<UChar, 32> m_bufferedEndTagName;
 
@@ -291,6 +307,8 @@ private:
 
     // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream
     InputStreamPreprocessor m_inputStreamPreprocessor;
+    
+    bool m_usePreHTML5ParserQuirks;
 };
 
 }
diff --git a/WebCore/html/parser/HTMLTreeBuilder.cpp b/WebCore/html/parser/HTMLTreeBuilder.cpp
index 406bb6c..afac2a0 100644
--- a/WebCore/html/parser/HTMLTreeBuilder.cpp
+++ b/WebCore/html/parser/HTMLTreeBuilder.cpp
@@ -26,16 +26,17 @@
 #include "config.h"
 #include "HTMLTreeBuilder.h"
 
+#include "CharacterNames.h"
 #include "Comment.h"
 #include "DocumentFragment.h"
 #include "DocumentType.h"
-#include "Element.h"
 #include "Frame.h"
 #include "HTMLDocument.h"
 #include "HTMLElementFactory.h"
 #include "HTMLFormElement.h"
 #include "HTMLHtmlElement.h"
 #include "HTMLNames.h"
+#include "HTMLParserIdioms.h"
 #include "HTMLScriptElement.h"
 #include "HTMLToken.h"
 #include "HTMLTokenizer.h"
@@ -44,15 +45,10 @@
 #include "NotImplemented.h"
 #include "SVGNames.h"
 #include "ScriptController.h"
-#include "Settings.h"
 #include "Text.h"
 #include "XLinkNames.h"
 #include "XMLNSNames.h"
 #include "XMLNames.h"
-// FIXME: Remove this include once we find a home for the free functions that
-// are using it.
-#include <wtf/dtoa.h>
-#include <wtf/UnusedParam.h>
 
 namespace WebCore {
 
@@ -62,42 +58,19 @@ static const int uninitializedLineNumberValue = -1;
 
 namespace {
 
-inline bool isTreeBuilderWhitepace(UChar c)
+inline bool isHTMLSpaceOrReplacementCharacter(UChar character)
 {
-    // FIXME: Consider branch permutations.
-    return c == '\t' || c == '\x0A' || c == '\x0C' || c == '\x0D' || c == ' ';
-}
-
-inline bool isNotTreeBuilderWhitepace(UChar c)
-{
-    return !isTreeBuilderWhitepace(c);
-}
-
-inline bool isTreeBuilderWhitepaceOrReplacementCharacter(UChar c)
-{
-    return isTreeBuilderWhitepace(c) || c == 0xFFFD;
-}
-
-template<bool isSpecialCharacter(UChar c)>
-inline bool isAllSpecialCharacters(const String& string)
-{
-    const UChar* characters = string.characters();
-    const unsigned length = string.length();
-    for (unsigned i = 0; i < length; ++i) {
-        if (!isSpecialCharacter(characters[i]))
-            return false;
-    }
-    return true;
+    return isHTMLSpace(character) || character == replacementCharacter;
 }
 
 inline bool isAllWhitespace(const String& string)
 {
-    return isAllSpecialCharacters<isTreeBuilderWhitepace>(string);
+    return string.isAllSpecialCharacters<isHTMLSpace>();
 }
 
 inline bool isAllWhitespaceOrReplacementCharacters(const String& string)
 {
-    return isAllSpecialCharacters<isTreeBuilderWhitepaceOrReplacementCharacter>(string);
+    return string.isAllSpecialCharacters<isHTMLSpaceOrReplacementCharacter>();
 }
 
 bool isNumberedHeaderTag(const AtomicString& tagName)
@@ -132,11 +105,14 @@ bool isTableBodyContextTag(const AtomicString& tagName)
 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special
 bool isSpecialNode(Node* node)
 {
+    if (node->hasTagName(SVGNames::foreignObjectTag))
+        return true;
     if (node->namespaceURI() != xhtmlNamespaceURI)
         return false;
-    // FIXME: This list is out of sync with the spec.
     const AtomicString& tagName = node->localName();
     return tagName == addressTag
+        || tagName == appletTag
+        || tagName == areaTag
         || tagName == articleTag
         || tagName == asideTag
         || tagName == baseTag
@@ -146,6 +122,7 @@ bool isSpecialNode(Node* node)
         || tagName == bodyTag
         || tagName == brTag
         || tagName == buttonTag
+        || tagName == captionTag
         || tagName == centerTag
         || tagName == colTag
         || tagName == colgroupTag
@@ -158,6 +135,7 @@ bool isSpecialNode(Node* node)
         || tagName == dtTag
         || tagName == embedTag
         || tagName == fieldsetTag
+        || tagName == figcaptionTag
         || tagName == figureTag
         || tagName == footerTag
         || tagName == formTag
@@ -176,12 +154,14 @@ bool isSpecialNode(Node* node)
         || tagName == liTag
         || tagName == linkTag
         || tagName == listingTag
+        || tagName == marqueeTag
         || tagName == menuTag
         || tagName == metaTag
         || tagName == navTag
         || tagName == noembedTag
         || tagName == noframesTag
         || tagName == noscriptTag
+        || tagName == objectTag
         || tagName == olTag
         || tagName == pTag
         || tagName == paramTag
@@ -191,8 +171,12 @@ bool isSpecialNode(Node* node)
         || tagName == sectionTag
         || tagName == selectTag
         || tagName == styleTag
+        || tagName == summaryTag
+        || tagName == tableTag
         || isTableBodyContextTag(tagName)
+        || tagName == tdTag
         || tagName == textareaTag
+        || tagName == thTag
         || tagName == titleTag
         || tagName == trTag
         || tagName == ulTag
@@ -268,17 +252,17 @@ public:
 
     void skipLeadingWhitespace()
     {
-        skipLeading<isTreeBuilderWhitepace>();
+        skipLeading<isHTMLSpace>();
     }
 
     String takeLeadingWhitespace()
     {
-        return takeLeading<isTreeBuilderWhitepace>();
+        return takeLeading<isHTMLSpace>();
     }
 
     String takeLeadingNonWhitespace()
     {
-        return takeLeading<isNotTreeBuilderWhitepace>();
+        return takeLeading<isNotHTMLSpace>();
     }
 
     String takeRemaining()
@@ -301,7 +285,7 @@ public:
         Vector<UChar> whitespace;
         do {
             UChar cc = *m_current++;
-            if (isTreeBuilderWhitepace(cc))
+            if (isHTMLSpace(cc))
                 whitespace.append(cc);
         } while (m_current < m_end);
         // Returning the null string when there aren't any whitespace
@@ -402,7 +386,7 @@ HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
 }
 
 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
-    : m_dummyDocumentForFragmentParsing(HTMLDocument::create(0, KURL()))
+    : m_dummyDocumentForFragmentParsing(HTMLDocument::create(0, KURL(), fragment->document()->baseURI()))
     , m_fragment(fragment)
     , m_contextElement(contextElement)
     , m_scriptingPermission(scriptingPermission)
@@ -441,25 +425,6 @@ PassRefPtr<Element> HTMLTreeBuilder::takeScriptToProcess(int& scriptStartLine)
     return m_scriptToProcess.release();
 }
 
-HTMLTokenizer::State HTMLTreeBuilder::adjustedLexerState(HTMLTokenizer::State state, const AtomicString& tagName, Frame* frame)
-{
-    if (tagName == textareaTag || tagName == titleTag)
-        return HTMLTokenizer::RCDATAState;
-
-    if (tagName == styleTag
-        || tagName == iframeTag
-        || tagName == xmpTag
-        || (tagName == noembedTag && pluginsEnabled(frame))
-        || tagName == noframesTag
-        || (tagName == noscriptTag && scriptEnabled(frame)))
-        return HTMLTokenizer::RAWTEXTState;
-
-    if (tagName == plaintextTag)
-        return HTMLTokenizer::PLAINTEXTState;
-
-    return state;
-}
-
 void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken)
 {
     AtomicHTMLToken token(rawToken);
@@ -1121,8 +1086,6 @@ void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken& token)
         parseError(token);
         if (m_tree.form())
             return;
-        // FIXME: This deviates from the spec:
-        //        http://www.w3.org/Bugs/Public/show_bug.cgi?id=10216
         m_tree.insertHTMLFormElement(token, true);
         m_tree.openElements()->pop();
         return;
@@ -1477,7 +1440,6 @@ void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
         processStartTag(token);
         break;
     case InForeignContentMode: {
-        // FIXME: We're missing a bunch of if branches here.
         if (shouldProcessUsingSecondaryInsertionMode(token, m_tree.currentElement())) {
             processUsingSecondaryInsertionModeAndAdjustInsertionMode(token);
             return;
@@ -1522,8 +1484,10 @@ void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
             || token.name() == ulTag
             || token.name() == varTag
             || (token.name() == fontTag && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)))) {
-            m_tree.openElements()->popUntilElementWithNamespace(xhtmlNamespaceURI);
-            setInsertionMode(m_secondaryInsertionMode);
+            parseError(token);
+            m_tree.openElements()->popUntilForeignContentScopeMarker();
+            if (insertionMode() == InForeignContentMode && m_tree.openElements()->hasOnlyHTMLElementsInScope())
+                setInsertionMode(m_secondaryInsertionMode);
             processStartTag(token);
             return;
         }
@@ -1539,7 +1503,7 @@ void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
         break;
     }
     case TextMode:
-        notImplemented();
+        ASSERT_NOT_REACHED();
         break;
     }
 }
@@ -1601,21 +1565,6 @@ HTMLElementStack::ElementRecord* HTMLTreeBuilder::furthestBlockForFormattingElem
     return 0;
 }
 
-// FIXME: This should have a whitty name.
-// FIXME: This must be implemented in many other places in WebCore.
-void HTMLTreeBuilder::reparentChildren(Element* oldParent, Element* newParent)
-{
-    Node* child = oldParent->firstChild();
-    while (child) {
-        Node* nextChild = child->nextSibling();
-        oldParent->parserRemoveChild(child);
-        newParent->parserAddChild(child);
-        if (newParent->attached() && !child->attached())
-            child->attach();
-        child = nextChild;
-    }
-}
-
 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
 void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
 {
@@ -1708,7 +1657,7 @@ void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
         // 8
         RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(formattingElementRecord);
         // 9
-        reparentChildren(furthestBlock->element(), newElement.get());
+        newElement->takeAllChildrenFrom(furthestBlock->element());
         // 10
         Element* furthestBlockElement = furthestBlock->element();
         // FIXME: All this creation / parserAddChild / attach business should
@@ -1886,9 +1835,6 @@ void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken& token)
         m_tree.openElements()->popUntilPopped(token.name());
         m_tree.activeFormattingElements()->clearToLastMarker();
         setInsertionMode(InRowMode);
-        // FIXME: The fragment case of this ASSERT is a spec bug:
-        // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10338
-        ASSERT(m_tree.currentElement()->hasTagName(trTag) || (isParsingFragment() && m_fragmentContext.contextElement()->hasTagName(trTag)));
         return;
     }
     if (token.name() == bodyTag
@@ -1902,8 +1848,6 @@ void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken& token)
         || isTableBodyContextTag(token.name())) {
         if (!m_tree.openElements()->inTableScope(token.name())) {
             ASSERT(isParsingFragment());
-            // FIXME: It is unclear what the exact ASSERT should be.
-            // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10098
             parseError(token);
             return;
         }
@@ -2020,10 +1964,6 @@ void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token)
         m_tree.openElements()->popUntilNumberedHeaderElementPopped();
         return;
     }
-    if (token.name() == "sarcasm") {
-        notImplemented(); // Take a deep breath.
-        return;
-    }
     if (isFormattingTag(token.name())) {
         callTheAdoptionAgency(token);
         return;
@@ -2608,14 +2548,15 @@ void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
         // Fall through
     case InBodyMode:
     case InCellMode:
-        ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode);
-        notImplemented(); // Emit parse error based on what elemtns are still open.
+    case InCaptionMode:
+    case InRowMode:
+        ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode || insertionMode() == InCaptionMode || insertionMode() == InRowMode);
+        notImplemented(); // Emit parse error based on what elements are still open.
         break;
     case AfterBodyMode:
     case AfterAfterBodyMode:
         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
-        notImplemented();
-        break;
+        return;
     case InHeadNoscriptMode:
         ASSERT(insertionMode() == InHeadNoscriptMode);
         defaultForInHeadNoscript();
@@ -2647,9 +2588,11 @@ void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
         return;
     case InForeignContentMode:
         parseError(token);
-        // FIXME: Following the spec would infinitely recurse on <svg><svg>
-        // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10115
-        m_tree.openElements()->popUntilElementWithNamespace(xhtmlNamespaceURI);
+        m_tree.openElements()->popUntilForeignContentScopeMarker();
+        // FIXME: The spec adds the following condition before setting the
+        //        insertion mode.  However, this condition causes an infinite loop.
+        //        See http://www.w3.org/Bugs/Public/show_bug.cgi?id=10621
+        //        if (insertionMode() == InForeignContentMode && m_tree.openElements()->hasOnlyHTMLElementsInScope())
         setInsertionMode(m_secondaryInsertionMode);
         processEndOfFile(token);
         return;
@@ -2658,10 +2601,13 @@ void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
         processEndOfFile(token);
         return;
     case TextMode:
-    case InCaptionMode:
-    case InRowMode:
-        notImplemented();
-        break;
+        parseError(token);
+        if (m_tree.currentElement()->hasTagName(scriptTag))
+            notImplemented(); // mark the script element as "already started".
+        m_tree.openElements()->pop();
+        setInsertionMode(m_originalInsertionMode);
+        processEndOfFile(token);
+        return;
     }
     ASSERT(m_tree.openElements()->top());
     m_tree.openElements()->popAll();
@@ -2813,9 +2759,7 @@ bool HTMLTreeBuilder::scriptEnabled(Frame* frame)
 {
     if (!frame)
         return false;
-    if (ScriptController* scriptController = frame->script())
-        return scriptController->canExecuteScripts(NotAboutToExecuteScript);
-    return false;
+    return frame->script()->canExecuteScripts(NotAboutToExecuteScript);
 }
 
 bool HTMLTreeBuilder::pluginsEnabled(Frame* frame)
@@ -2825,41 +2769,4 @@ bool HTMLTreeBuilder::pluginsEnabled(Frame* frame)
     return frame->loader()->subframeLoader()->allowPlugins(NotAboutToInstantiatePlugin);
 }
 
-// FIXME: Move this function to a more appropriate place.
-String serializeForNumberType(double number)
-{
-    // According to HTML5, "the best representation of the number n as a floating
-    // point number" is a string produced by applying ToString() to n.
-    NumberToStringBuffer buffer;
-    unsigned length = numberToString(number, buffer);
-    return String(buffer, length);
-}
-
-// FIXME: Move this function to a more appropriate place.
-bool parseToDoubleForNumberType(const String& src, double* out)
-{
-    // See HTML5 2.4.4.3 `Real numbers.'
-
-    if (src.isEmpty())
-        return false;
-    // String::toDouble() accepts leading + \t \n \v \f \r and SPACE, which are invalid in HTML5.
-    // So, check the first character.
-    if (src[0] != '-' && (src[0] < '0' || src[0] > '9'))
-        return false;
-
-    bool valid = false;
-    double value = src.toDouble(&valid);
-    if (!valid)
-        return false;
-    // NaN and Infinity are not valid numbers according to the standard.
-    if (!isfinite(value))
-        return false;
-    // -0 -> 0
-    if (!value)
-        value = 0;
-    if (out)
-        *out = value;
-    return true;
-}
-
 }
diff --git a/WebCore/html/parser/HTMLTreeBuilder.h b/WebCore/html/parser/HTMLTreeBuilder.h
index 4634f0a..d522ea8 100644
--- a/WebCore/html/parser/HTMLTreeBuilder.h
+++ b/WebCore/html/parser/HTMLTreeBuilder.h
@@ -76,8 +76,6 @@ public:
     // Done, close any open tags, etc.
     void finished();
 
-    static HTMLTokenizer::State adjustedLexerState(HTMLTokenizer::State, const AtomicString& tagName, Frame*);
-
     static bool scriptEnabled(Frame*);
     static bool pluginsEnabled(Frame*);
 
@@ -170,7 +168,6 @@ private:
     PassRefPtr<NamedNodeMap> attributesForIsindexInput(AtomicHTMLToken&);
 
     HTMLElementStack::ElementRecord* furthestBlockForFormattingElement(Element*);
-    void reparentChildren(Element* oldParent, Element* newParent);
     void callTheAdoptionAgency(AtomicHTMLToken&);
 
     void closeTheCell();
@@ -257,18 +254,6 @@ private:
     int m_lastScriptElementStartLine;
 };
 
-// FIXME: Move these functions to a more appropriate place.
-
-// Converts the specified string to a floating number.
-// If the conversion fails, the return value is false. Take care that leading
-// or trailing unnecessary characters make failures.  This returns false for an
-// empty string input.
-// The double* parameter may be 0.
-bool parseToDoubleForNumberType(const String&, double*);
-// Converts the specified number to a string. This is an implementation of
-// HTML5's "algorithm to convert a number to a string" for NUMBER/RANGE types.
-String serializeForNumberType(double);
-
 }
 
 #endif
diff --git a/WebCore/html/parser/HTMLViewSourceParser.cpp b/WebCore/html/parser/HTMLViewSourceParser.cpp
index 8a7984d..ace8590 100644
--- a/WebCore/html/parser/HTMLViewSourceParser.cpp
+++ b/WebCore/html/parser/HTMLViewSourceParser.cpp
@@ -26,15 +26,15 @@
 #include "config.h"
 #include "HTMLViewSourceParser.h"
 
+#include "HTMLDocumentParser.h"
 #include "HTMLNames.h"
-#include "HTMLTreeBuilder.h"
 #include "HTMLViewSourceDocument.h"
 
 namespace WebCore {
 
 HTMLViewSourceParser::HTMLViewSourceParser(HTMLViewSourceDocument* document)
     : DecodedDataDocumentParser(document)
-    , m_tokenizer(HTMLTokenizer::create())
+    , m_tokenizer(HTMLTokenizer::create(HTMLDocumentParser::usePreHTML5ParserQuirks(document)))
 {
 }
 
@@ -87,13 +87,7 @@ void HTMLViewSourceParser::updateTokenizerState()
         return;
 
     AtomicString tagName(m_token.name().data(), m_token.name().size());
-    m_tokenizer->setState(HTMLTreeBuilder::adjustedLexerState(m_tokenizer->state(), tagName, document()->frame()));
-    if (tagName == HTMLNames::scriptTag) {
-        // The tree builder handles scriptTag separately from the other tokenizer
-        // state adjustments, so we need to handle it separately too.
-        ASSERT(m_tokenizer->state() == HTMLTokenizer::DataState);
-        m_tokenizer->setState(HTMLTokenizer::ScriptDataState);
-    }
+    m_tokenizer->updateStateFor(tagName, document()->frame());
 }
 
 void HTMLViewSourceParser::finish()
diff --git a/WebCore/html/parser/NestingLevelIncrementer.h b/WebCore/html/parser/NestingLevelIncrementer.h
new file mode 100644
index 0000000..c597876
--- /dev/null
+++ b/WebCore/html/parser/NestingLevelIncrementer.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef NestingLevelIncrementer_h
+#define NestingLevelIncrementer_h
+
+namespace WebCore {
+
+class NestingLevelIncrementer : public Noncopyable {
+public:
+    explicit NestingLevelIncrementer(unsigned& nestingLevel)
+        : m_nestingLevel(&nestingLevel)
+    {
+        ++(*m_nestingLevel);
+    }
+            
+    ~NestingLevelIncrementer()
+    {
+        --(*m_nestingLevel);
+    }
+            
+private:
+    unsigned* m_nestingLevel;
+};
+
+}
+
+#endif