Merge WebKit at r75315: Initial merge by git.

Change-Id: I570314b346ce101c935ed22a626b48c2af266b84
author: Steve Block <steveblock@google.com> 2011-05-06 11:45:16 +0100
committer: Steve Block <steveblock@google.com> 2011-05-12 13:44:10 +0100
commit: cad810f21b803229eb11403f9209855525a25d57 (patch)
tree: 29a6fd0279be608e0fe9ffe9841f722f0f4e4269 /Source/WebCore/html/parser
parent: 121b0cf4517156d0ac5111caf9830c51b69bae8f (diff)
download: external_webkit-cad810f21b803229eb11403f9209855525a25d57.zip
external_webkit-cad810f21b803229eb11403f9209855525a25d57.tar.gz
external_webkit-cad810f21b803229eb11403f9209855525a25d57.tar.bz2
41 files changed, 13215 insertions, 0 deletions
diff --git a/Source/WebCore/html/parser/CSSPreloadScanner.cpp b/Source/WebCore/html/parser/CSSPreloadScanner.cpp
new file mode 100644
index 0000000..23364f9
--- /dev/null
+++ b/Source/WebCore/html/parser/CSSPreloadScanner.cpp
@@ -0,0 +1,195 @@
+/*
+ * Copyright (C) 2008, 2010 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
+ * Copyright (C) 2010 Google Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "CSSPreloadScanner.h"
+
+#include "CachedCSSStyleSheet.h"
+#include "CachedResourceLoader.h"
+#include "Document.h"
+#include "HTMLParserIdioms.h"
+#include "HTMLToken.h"
+
+namespace WebCore {
+
+CSSPreloadScanner::CSSPreloadScanner(Document* document)
+    : m_state(Initial)
+    , m_document(document)
+{
+}
+
+void CSSPreloadScanner::reset()
+{
+    m_state = Initial;
+    m_rule.clear();
+    m_ruleValue.clear();
+}
+
+void CSSPreloadScanner::scan(const HTMLToken& token, bool scanningBody)
+{
+    m_scanningBody = scanningBody;
+
+    const HTMLToken::DataVector& characters = token.characters();
+    for (HTMLToken::DataVector::const_iterator iter = characters.begin(); iter != characters.end(); ++iter)
+        tokenize(*iter);
+}
+
+inline void CSSPreloadScanner::tokenize(UChar c)
+{
+    // We are just interested in @import rules, no need for real tokenization here
+    // Searching for other types of resources is probably low payoff.
+    switch (m_state) {
+    case Initial:
+        if (c == '@')
+            m_state = RuleStart;
+        else if (c == '/')
+            m_state = MaybeComment;
+        break;
+    case MaybeComment:
+        if (c == '*')
+            m_state = Comment;
+        else
+            m_state = Initial;
+        break;
+    case Comment:
+        if (c == '*')
+            m_state = MaybeCommentEnd;
+        break;
+    case MaybeCommentEnd:
+        if (c == '/')
+            m_state = Initial;
+        else if (c == '*')
+            ;
+        else
+            m_state = Comment;
+        break;
+    case RuleStart:
+        if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
+            m_rule.clear();
+            m_ruleValue.clear();
+            m_rule.append(c);
+            m_state = Rule;
+        } else
+            m_state = Initial;
+        break;
+    case Rule:
+        if (isHTMLSpace(c))
+            m_state = AfterRule;
+        else if (c == ';')
+            m_state = Initial;
+        else
+            m_rule.append(c);
+        break;
+    case AfterRule:
+        if (isHTMLSpace(c))
+            ;
+        else if (c == ';')
+            m_state = Initial;
+        else {
+            m_state = RuleValue;
+            m_ruleValue.append(c);
+        }
+        break;
+    case RuleValue:
+        if (isHTMLSpace(c))
+            m_state = AfterRuleValue;
+        else if (c == ';') {
+            emitRule();
+            m_state = Initial;
+        } else 
+            m_ruleValue.append(c);
+        break;
+    case AfterRuleValue:
+        if (isHTMLSpace(c))
+            ;
+        else if (c == ';') {
+            emitRule();
+            m_state = Initial;
+        } else {
+            // FIXME: media rules
+            m_state = Initial;
+        }
+        break;
+    }
+}
+
+static String parseCSSStringOrURL(const UChar* characters, size_t length)
+{
+    size_t offset = 0;
+    size_t reducedLength = length;
+
+    while (reducedLength && isHTMLSpace(characters[offset])) {
+        ++offset;
+        --reducedLength;
+    }
+    while (reducedLength && isHTMLSpace(characters[offset + reducedLength - 1]))
+        --reducedLength;
+
+    if (reducedLength >= 5
+            && (characters[offset] == 'u' || characters[offset] == 'U')
+            && (characters[offset + 1] == 'r' || characters[offset + 1] == 'R')
+            && (characters[offset + 2] == 'l' || characters[offset + 2] == 'L')
+            && characters[offset + 3] == '('
+            && characters[offset + reducedLength - 1] == ')') {
+        offset += 4;
+        reducedLength -= 5;
+    }
+
+    while (reducedLength && isHTMLSpace(characters[offset])) {
+        ++offset;
+        --reducedLength;
+    }
+    while (reducedLength && isHTMLSpace(characters[offset + reducedLength - 1]))
+        --reducedLength;
+
+    if (reducedLength < 2 || characters[offset] != characters[offset + reducedLength - 1] || !(characters[offset] == '\'' || characters[offset] == '"'))
+        return String();
+    offset++;
+    reducedLength -= 2;
+
+    while (reducedLength && isHTMLSpace(characters[offset])) {
+        ++offset;
+        --reducedLength;
+    }
+    while (reducedLength && isHTMLSpace(characters[offset + reducedLength - 1]))
+        --reducedLength;
+
+    return String(characters + offset, reducedLength);
+}
+
+void CSSPreloadScanner::emitRule()
+{
+    if (equalIgnoringCase("import", m_rule.data(), m_rule.size())) {
+        String value = parseCSSStringOrURL(m_ruleValue.data(), m_ruleValue.size());
+        if (!value.isEmpty())
+            m_document->cachedResourceLoader()->preload(CachedResource::CSSStyleSheet, value, String(), m_scanningBody);
+    }
+    m_rule.clear();
+    m_ruleValue.clear();
+}
+
+}
diff --git a/Source/WebCore/html/parser/CSSPreloadScanner.h b/Source/WebCore/html/parser/CSSPreloadScanner.h
new file mode 100644
index 0000000..7ac282f
--- /dev/null
+++ b/Source/WebCore/html/parser/CSSPreloadScanner.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2010 Google Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef CSSPreloadScanner_h
+#define CSSPreloadScanner_h
+
+#include "PlatformString.h"
+#include <wtf/Vector.h>
+
+namespace WebCore {
+
+class Document;
+class HTMLToken;
+
+class CSSPreloadScanner : public Noncopyable {
+public:
+    CSSPreloadScanner(Document*);
+
+    void reset();
+    void scan(const HTMLToken&, bool scanningBody);
+
+private:
+    enum State {
+        Initial,
+        MaybeComment,
+        Comment,
+        MaybeCommentEnd,
+        RuleStart,
+        Rule,
+        AfterRule,
+        RuleValue,
+        AfterRuleValue
+    };
+
+    inline void tokenize(UChar c);
+    void emitRule();
+
+    State m_state;
+    Vector<UChar, 16> m_rule;
+    Vector<UChar> m_ruleValue;
+
+    bool m_scanningBody;
+    Document* m_document;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLConstructionSite.cpp b/Source/WebCore/html/parser/HTMLConstructionSite.cpp
new file mode 100644
index 0000000..c46b9b9
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLConstructionSite.cpp
@@ -0,0 +1,464 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "HTMLTreeBuilder.h"
+
+#include "Comment.h"
+#include "DocumentFragment.h"
+#include "DocumentType.h"
+#include "Element.h"
+#include "Frame.h"
+#include "HTMLDocument.h"
+#include "HTMLElementFactory.h"
+#include "HTMLFormElement.h"
+#include "HTMLHtmlElement.h"
+#include "HTMLNames.h"
+#include "HTMLScriptElement.h"
+#include "HTMLToken.h"
+#include "HTMLTokenizer.h"
+#include "LocalizedStrings.h"
+#if ENABLE(MATHML)
+#include "MathMLNames.h"
+#endif
+#include "NotImplemented.h"
+#if ENABLE(SVG)
+#include "SVGNames.h"
+#endif
+#include "ScriptController.h"
+#include "Settings.h"
+#include "Text.h"
+#include <wtf/UnusedParam.h>
+
+namespace WebCore {
+
+using namespace HTMLNames;
+
+namespace {
+
+bool hasImpliedEndTag(Element* element)
+{
+    return element->hasTagName(ddTag)
+        || element->hasTagName(dtTag)
+        || element->hasTagName(liTag)
+        || element->hasTagName(optionTag)
+        || element->hasTagName(optgroupTag)
+        || element->hasTagName(pTag)
+        || element->hasTagName(rpTag)
+        || element->hasTagName(rtTag);
+}
+
+bool causesFosterParenting(const QualifiedName& tagName)
+{
+    return tagName == tableTag
+        || tagName == tbodyTag
+        || tagName == tfootTag
+        || tagName == theadTag
+        || tagName == trTag;
+}
+
+} // namespace
+
+template<typename ChildType>
+PassRefPtr<ChildType> HTMLConstructionSite::attach(ContainerNode* parent, PassRefPtr<ChildType> prpChild)
+{
+    RefPtr<ChildType> child = prpChild;
+
+    // FIXME: It's confusing that HTMLConstructionSite::attach does the magic
+    // redirection to the foster parent but HTMLConstructionSite::attachAtSite
+    // doesn't.  It feels like we're missing a concept somehow.
+    if (shouldFosterParent()) {
+        fosterParent(child.get());
+        ASSERT(child->attached() || !child->parentNode() || !child->parentNode()->attached());
+        return child.release();
+    }
+
+    parent->parserAddChild(child);
+
+    // An event handler (DOM Mutation, beforeload, et al.) could have removed
+    // the child, in which case we shouldn't try attaching it.
+    if (!child->parentNode())
+        return child.release();
+
+    // It's slightly unfortunate that we need to hold a reference to child
+    // here to call attach().  We should investigate whether we can rely on
+    // |parent| to hold a ref at this point.  In the common case (at least
+    // for elements), however, we'll get to use this ref in the stack of
+    // open elements.
+    if (parent->attached() && !child->attached())
+        child->attach();
+    return child.release();
+}
+
+void HTMLConstructionSite::attachAtSite(const AttachmentSite& site, PassRefPtr<Node> prpChild)
+{
+    // FIXME: It's unfortunate that we need to hold a reference to child
+    // here to call attach().  We should investigate whether we can rely on
+    // |site.parent| to hold a ref at this point.
+    RefPtr<Node> child = prpChild;
+
+    if (site.nextChild)
+        site.parent->parserInsertBefore(child, site.nextChild);
+    else
+        site.parent->parserAddChild(child);
+
+    // JavaScript run from beforeload (or DOM Mutation or event handlers)
+    // might have removed the child, in which case we should not attach it.
+    if (child->parentNode() && site.parent->attached() && !child->attached())
+        child->attach();
+}
+
+HTMLConstructionSite::HTMLConstructionSite(Document* document, FragmentScriptingPermission scriptingPermission, bool isParsingFragment)
+    : m_document(document)
+    , m_fragmentScriptingPermission(scriptingPermission)
+    , m_isParsingFragment(isParsingFragment)
+    , m_redirectAttachToFosterParent(false)
+{
+}
+
+HTMLConstructionSite::~HTMLConstructionSite()
+{
+}
+
+void HTMLConstructionSite::detach()
+{
+    m_document = 0;
+}
+
+void HTMLConstructionSite::setForm(HTMLFormElement* form)
+{
+    // This method should only be needed for HTMLTreeBuilder in the fragment case.
+    ASSERT(!m_form);
+    m_form = form;
+}
+
+PassRefPtr<HTMLFormElement> HTMLConstructionSite::takeForm()
+{
+    return m_form.release();
+}
+
+void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded()
+{
+    ASSERT(m_document);
+    if (m_document->frame() && !m_isParsingFragment)
+        m_document->frame()->loader()->dispatchDocumentElementAvailable();
+}
+
+void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken& token)
+{
+    RefPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(m_document);
+    element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
+    m_openElements.pushHTMLHtmlElement(attach<Element>(m_document, element.get()));
+#if ENABLE(OFFLINE_WEB_APPLICATIONS)
+    element->insertedByParser();
+#endif
+    dispatchDocumentElementAvailableIfNeeded();
+}
+
+void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken& token, Element* element)
+{
+    if (!token.attributes())
+        return;
+
+    NamedNodeMap* attributes = element->attributes(false);
+    for (unsigned i = 0; i < token.attributes()->length(); ++i) {
+        Attribute* attribute = token.attributes()->attributeItem(i);
+        if (!attributes->getAttributeItem(attribute->name()))
+            element->setAttribute(attribute->name(), attribute->value());
+    }
+}
+
+void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken& token)
+{
+    // FIXME: parse error
+    mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement());
+}
+
+void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken& token)
+{
+    // FIXME: parse error
+    mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement());
+}
+
+void HTMLConstructionSite::insertDoctype(AtomicHTMLToken& token)
+{
+    ASSERT(token.type() == HTMLToken::DOCTYPE);
+    attach(m_document, DocumentType::create(m_document, token.name(), String::adopt(token.publicIdentifier()), String::adopt(token.systemIdentifier())));
+    
+    if (token.forceQuirks())
+        m_document->setCompatibilityMode(Document::QuirksMode);
+    else
+        m_document->setCompatibilityModeFromDoctype();
+}
+
+void HTMLConstructionSite::insertComment(AtomicHTMLToken& token)
+{
+    ASSERT(token.type() == HTMLToken::Comment);
+    attach(currentElement(), Comment::create(currentElement()->document(), token.comment()));
+}
+
+void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken& token)
+{
+    ASSERT(token.type() == HTMLToken::Comment);
+    attach(m_document, Comment::create(m_document, token.comment()));
+}
+
+void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken& token)
+{
+    ASSERT(token.type() == HTMLToken::Comment);
+    Element* parent = m_openElements.htmlElement();
+    attach(parent, Comment::create(parent->document(), token.comment()));
+}
+
+PassRefPtr<Element> HTMLConstructionSite::attachToCurrent(PassRefPtr<Element> child)
+{
+    return attach(currentElement(), child);
+}
+
+void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken& token)
+{
+    ASSERT(!shouldFosterParent());
+    m_head = attachToCurrent(createHTMLElement(token));
+    m_openElements.pushHTMLHeadElement(m_head);
+}
+
+void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken& token)
+{
+    ASSERT(!shouldFosterParent());
+    m_openElements.pushHTMLBodyElement(attachToCurrent(createHTMLElement(token)));
+}
+
+void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken& token, bool isDemoted)
+{
+    RefPtr<Element> element = createHTMLElement(token);
+    ASSERT(element->hasTagName(formTag));
+    RefPtr<HTMLFormElement> form = static_pointer_cast<HTMLFormElement>(element.release());
+    form->setDemoted(isDemoted);
+    m_openElements.push(attachToCurrent(form.release()));
+    ASSERT(currentElement()->isHTMLElement());
+    ASSERT(currentElement()->hasTagName(formTag));
+    m_form = static_cast<HTMLFormElement*>(currentElement());
+}
+
+void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken& token)
+{
+    m_openElements.push(attachToCurrent(createHTMLElement(token)));
+}
+
+void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken& token)
+{
+    ASSERT(token.type() == HTMLToken::StartTag);
+    RefPtr<Element> element = attachToCurrent(createHTMLElement(token));
+    // Normally HTMLElementStack is responsible for calling finishParsingChildren,
+    // but self-closing elements are never in the element stack so the stack
+    // doesn't get a chance to tell them that we're done parsing their children.
+    element->finishParsingChildren();
+    // FIXME: Do we want to acknowledge the token's self-closing flag?
+    // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
+}
+
+void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken& token)
+{
+    // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
+    // Possible active formatting elements include:
+    // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u.
+    insertHTMLElement(token);
+    m_activeFormattingElements.append(currentElement());
+}
+
+void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken& token)
+{
+    RefPtr<HTMLScriptElement> element = HTMLScriptElement::create(scriptTag, currentElement()->document(), true);
+    if (m_fragmentScriptingPermission == FragmentScriptingAllowed)
+        element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
+    m_openElements.push(attachToCurrent(element.release()));
+}
+
+void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken& token, const AtomicString& namespaceURI)
+{
+    ASSERT(token.type() == HTMLToken::StartTag);
+    notImplemented(); // parseError when xmlns or xmlns:xlink are wrong.
+
+    RefPtr<Element> element = attachToCurrent(createElement(token, namespaceURI));
+    if (!token.selfClosing())
+        m_openElements.push(element);
+}
+
+void HTMLConstructionSite::insertTextNode(const String& characters)
+{
+    AttachmentSite site;
+    site.parent = currentElement();
+    site.nextChild = 0;
+    if (shouldFosterParent())
+        findFosterSite(site);
+
+    Node* previousChild = site.nextChild ? site.nextChild->previousSibling() : site.parent->lastChild();
+    if (previousChild && previousChild->isTextNode()) {
+        // FIXME: We're only supposed to append to this text node if it
+        // was the last text node inserted by the parser.
+        CharacterData* textNode = static_cast<CharacterData*>(previousChild);
+        textNode->parserAppendData(characters);
+        return;
+    }
+
+    attachAtSite(site, Text::create(site.parent->document(), characters));
+}
+
+PassRefPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken& token, const AtomicString& namespaceURI)
+{
+    QualifiedName tagName(nullAtom, token.name(), namespaceURI);
+    RefPtr<Element> element = currentElement()->document()->createElement(tagName, true);
+    element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
+    return element.release();
+}
+
+PassRefPtr<Element> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken& token)
+{
+    QualifiedName tagName(nullAtom, token.name(), xhtmlNamespaceURI);
+    // FIXME: This can't use HTMLConstructionSite::createElement because we
+    // have to pass the current form element.  We should rework form association
+    // to occur after construction to allow better code sharing here.
+    RefPtr<Element> element = HTMLElementFactory::createHTMLElement(tagName, currentElement()->document(), form(), true);
+    element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
+    ASSERT(element->isHTMLElement());
+    return element.release();
+}
+
+PassRefPtr<Element> HTMLConstructionSite::createHTMLElementFromElementRecord(HTMLElementStack::ElementRecord* record)
+{
+    return createHTMLElementFromSavedElement(record->element());
+}
+
+namespace {
+
+PassRefPtr<NamedNodeMap> cloneAttributes(Element* element)
+{
+    NamedNodeMap* attributes = element->attributes(true);
+    if (!attributes)
+        return 0;
+
+    RefPtr<NamedNodeMap> newAttributes = NamedNodeMap::create();
+    for (size_t i = 0; i < attributes->length(); ++i) {
+        Attribute* attribute = attributes->attributeItem(i);
+        RefPtr<Attribute> clone = Attribute::createMapped(attribute->name(), attribute->value());
+        newAttributes->addAttribute(clone);
+    }
+    return newAttributes.release();
+}
+
+}
+
+PassRefPtr<Element> HTMLConstructionSite::createHTMLElementFromSavedElement(Element* element)
+{
+    // FIXME: This method is wrong.  We should be using the original token.
+    // Using an Element* causes us to fail examples like this:
+    // <b id="1"><p><script>document.getElementById("1").id = "2"</script></p>TEXT</b>
+    // When reconstructTheActiveFormattingElements calls this method to open
+    // a second <b> tag to wrap TEXT, it will have id "2", even though the HTML5
+    // spec implies it should be "1".  Minefield matches the HTML5 spec here.
+
+    ASSERT(element->isHTMLElement()); // otherwise localName() might be wrong.
+    AtomicHTMLToken fakeToken(HTMLToken::StartTag, element->localName(), cloneAttributes(element));
+    return createHTMLElement(fakeToken);
+}
+
+bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const
+{
+    if (m_activeFormattingElements.isEmpty())
+        return false;
+    unsigned index = m_activeFormattingElements.size();
+    do {
+        --index;
+        const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index);
+        if (entry.isMarker() || m_openElements.contains(entry.element())) {
+            firstUnopenElementIndex = index + 1;
+            return firstUnopenElementIndex < m_activeFormattingElements.size();
+        }
+    } while (index);
+    firstUnopenElementIndex = index;
+    return true;
+}
+
+void HTMLConstructionSite::reconstructTheActiveFormattingElements()
+{
+    unsigned firstUnopenElementIndex;
+    if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex))
+        return;
+
+    unsigned unopenEntryIndex = firstUnopenElementIndex;
+    ASSERT(unopenEntryIndex < m_activeFormattingElements.size());
+    for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) {
+        HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex);
+        RefPtr<Element> reconstructed = createHTMLElementFromSavedElement(unopenedEntry.element());
+        m_openElements.push(attachToCurrent(reconstructed.release()));
+        unopenedEntry.replaceElement(currentElement());
+    }
+}
+
+void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName)
+{
+    while (hasImpliedEndTag(currentElement()) && !currentElement()->hasLocalName(tagName))
+        m_openElements.pop();
+}
+
+void HTMLConstructionSite::generateImpliedEndTags()
+{
+    while (hasImpliedEndTag(currentElement()))
+        m_openElements.pop();
+}
+
+void HTMLConstructionSite::findFosterSite(AttachmentSite& site)
+{
+    HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName());
+    if (lastTableElementRecord) {
+        Element* lastTableElement = lastTableElementRecord->element();
+        if (ContainerNode* parent = lastTableElement->parentNode()) {
+            site.parent = parent;
+            site.nextChild = lastTableElement;
+            return;
+        }
+        site.parent = lastTableElementRecord->next()->element();
+        site.nextChild = 0;
+        return;
+    }
+    // Fragment case
+    site.parent = m_openElements.bottom(); // <html> element
+    site.nextChild = 0;
+}
+
+bool HTMLConstructionSite::shouldFosterParent() const
+{
+    return m_redirectAttachToFosterParent
+        && causesFosterParenting(currentElement()->tagQName());
+}
+
+void HTMLConstructionSite::fosterParent(Node* node)
+{
+    AttachmentSite site;
+    findFosterSite(site);
+    attachAtSite(site, node);
+}
+
+}
diff --git a/Source/WebCore/html/parser/HTMLConstructionSite.h b/Source/WebCore/html/parser/HTMLConstructionSite.h
new file mode 100644
index 0000000..8b09bf5
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLConstructionSite.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef HTMLConstructionSite_h
+#define HTMLConstructionSite_h
+
+#include "FragmentScriptingPermission.h"
+#include "HTMLElementStack.h"
+#include "HTMLFormattingElementList.h"
+#include "NotImplemented.h"
+#include <wtf/Noncopyable.h>
+#include <wtf/PassRefPtr.h>
+#include <wtf/RefPtr.h>
+
+namespace WebCore {
+
+class AtomicHTMLToken;
+class Document;
+class Element;
+
+class HTMLConstructionSite : public Noncopyable {
+public:
+    HTMLConstructionSite(Document*, FragmentScriptingPermission, bool isParsingFragment);
+    ~HTMLConstructionSite();
+
+    void detach();
+
+    void insertDoctype(AtomicHTMLToken&);
+    void insertComment(AtomicHTMLToken&);
+    void insertCommentOnDocument(AtomicHTMLToken&);
+    void insertCommentOnHTMLHtmlElement(AtomicHTMLToken&);
+    void insertHTMLElement(AtomicHTMLToken&);
+    void insertSelfClosingHTMLElement(AtomicHTMLToken&);
+    void insertFormattingElement(AtomicHTMLToken&);
+    void insertHTMLHeadElement(AtomicHTMLToken&);
+    void insertHTMLBodyElement(AtomicHTMLToken&);
+    void insertHTMLFormElement(AtomicHTMLToken&, bool isDemoted = false);
+    void insertScriptElement(AtomicHTMLToken&);
+    void insertTextNode(const String&);
+    void insertForeignElement(AtomicHTMLToken&, const AtomicString& namespaceURI);
+
+    void insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken&);
+    void insertHTMLHtmlStartTagInBody(AtomicHTMLToken&);
+    void insertHTMLBodyStartTagInBody(AtomicHTMLToken&);
+
+    PassRefPtr<Element> createHTMLElement(AtomicHTMLToken&);
+    PassRefPtr<Element> createHTMLElementFromElementRecord(HTMLElementStack::ElementRecord*);
+
+    bool shouldFosterParent() const;
+    void fosterParent(Node*);
+
+    bool indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const;
+    void reconstructTheActiveFormattingElements();
+
+    void generateImpliedEndTags();
+    void generateImpliedEndTagsWithExclusion(const AtomicString& tagName);
+
+    Element* currentElement() const { return m_openElements.top(); }
+    Element* oneBelowTop() const { return m_openElements.oneBelowTop(); }
+
+    HTMLElementStack* openElements() const { return &m_openElements; }
+    HTMLFormattingElementList* activeFormattingElements() const { return &m_activeFormattingElements; }
+
+    Element* head() const { return m_head.get(); }
+
+    void setForm(HTMLFormElement*);
+    HTMLFormElement* form() const { return m_form.get(); }
+    PassRefPtr<HTMLFormElement> takeForm();
+
+    class RedirectToFosterParentGuard : public Noncopyable {
+    public:
+        RedirectToFosterParentGuard(HTMLConstructionSite& tree)
+            : m_tree(tree)
+            , m_wasRedirectingBefore(tree.m_redirectAttachToFosterParent)
+        {
+            m_tree.m_redirectAttachToFosterParent = true;
+        }
+
+        ~RedirectToFosterParentGuard()
+        {
+            m_tree.m_redirectAttachToFosterParent = m_wasRedirectingBefore;
+        }
+
+    private:
+        HTMLConstructionSite& m_tree;
+        bool m_wasRedirectingBefore;
+    };
+
+private:
+    struct AttachmentSite {
+        ContainerNode* parent;
+        Node* nextChild;
+    };
+
+    template<typename ChildType>
+    PassRefPtr<ChildType> attach(ContainerNode* parent, PassRefPtr<ChildType> child);
+    PassRefPtr<Element> attachToCurrent(PassRefPtr<Element>);
+
+    void attachAtSite(const AttachmentSite&, PassRefPtr<Node> child);
+    void findFosterSite(AttachmentSite&);
+
+    PassRefPtr<Element> createHTMLElementFromSavedElement(Element*);
+    PassRefPtr<Element> createElement(AtomicHTMLToken&, const AtomicString& namespaceURI);
+
+    void mergeAttributesFromTokenIntoElement(AtomicHTMLToken&, Element*);
+    void dispatchDocumentElementAvailableIfNeeded();
+
+    Document* m_document;
+    RefPtr<Element> m_head;
+    RefPtr<HTMLFormElement> m_form;
+    mutable HTMLElementStack m_openElements;
+    mutable HTMLFormattingElementList m_activeFormattingElements;
+
+    FragmentScriptingPermission m_fragmentScriptingPermission;
+    bool m_isParsingFragment;
+
+    // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-intable
+    // In the "in table" insertion mode, we sometimes get into a state where
+    // "whenever a node would be inserted into the current node, it must instead
+    // be foster parented."  This flag tracks whether we're in that state.
+    bool m_redirectAttachToFosterParent;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLDocumentParser.cpp b/Source/WebCore/html/parser/HTMLDocumentParser.cpp
new file mode 100644
index 0000000..93e1309
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLDocumentParser.cpp
@@ -0,0 +1,549 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "HTMLDocumentParser.h"
+
+#include "DocumentFragment.h"
+#include "Element.h"
+#include "Frame.h"
+#include "HTMLNames.h"
+#include "HTMLParserScheduler.h"
+#include "HTMLTokenizer.h"
+#include "HTMLPreloadScanner.h"
+#include "HTMLScriptRunner.h"
+#include "HTMLTreeBuilder.h"
+#include "HTMLDocument.h"
+#include "InspectorInstrumentation.h"
+#include "NestingLevelIncrementer.h"
+#include "Settings.h"
+#include "XSSAuditor.h"
+#include <wtf/CurrentTime.h>
+
+#ifdef ANDROID_INSTRUMENT
+#include "TimeCounter.h"
+#endif
+
+namespace WebCore {
+
+using namespace HTMLNames;
+
+namespace {
+
+// This is a direct transcription of step 4 from:
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
+HTMLTokenizer::State tokenizerStateForContextElement(Element* contextElement, bool reportErrors)
+{
+    if (!contextElement)
+        return HTMLTokenizer::DataState;
+
+    const QualifiedName& contextTag = contextElement->tagQName();
+
+    if (contextTag.matches(titleTag) || contextTag.matches(textareaTag))
+        return HTMLTokenizer::RCDATAState;
+    if (contextTag.matches(styleTag)
+        || contextTag.matches(xmpTag)
+        || contextTag.matches(iframeTag)
+        || (contextTag.matches(noembedTag) && HTMLTreeBuilder::pluginsEnabled(contextElement->document()->frame()))
+        || (contextTag.matches(noscriptTag) && HTMLTreeBuilder::scriptEnabled(contextElement->document()->frame()))
+        || contextTag.matches(noframesTag))
+        return reportErrors ? HTMLTokenizer::RAWTEXTState : HTMLTokenizer::PLAINTEXTState;
+    if (contextTag.matches(scriptTag))
+        return reportErrors ? HTMLTokenizer::ScriptDataState : HTMLTokenizer::PLAINTEXTState;
+    if (contextTag.matches(plaintextTag))
+        return HTMLTokenizer::PLAINTEXTState;
+    return HTMLTokenizer::DataState;
+}
+
+} // namespace
+
+HTMLDocumentParser::HTMLDocumentParser(HTMLDocument* document, bool reportErrors)
+    : ScriptableDocumentParser(document)
+    , m_tokenizer(HTMLTokenizer::create(usePreHTML5ParserQuirks(document)))
+    , m_scriptRunner(HTMLScriptRunner::create(document, this))
+    , m_treeBuilder(HTMLTreeBuilder::create(this, document, reportErrors, usePreHTML5ParserQuirks(document)))
+    , m_parserScheduler(HTMLParserScheduler::create(this))
+    , m_endWasDelayed(false)
+    , m_writeNestingLevel(0)
+{
+}
+
+// FIXME: Member variables should be grouped into self-initializing structs to
+// minimize code duplication between these constructors.
+HTMLDocumentParser::HTMLDocumentParser(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
+    : ScriptableDocumentParser(fragment->document())
+    , m_tokenizer(HTMLTokenizer::create(usePreHTML5ParserQuirks(fragment->document())))
+    , m_treeBuilder(HTMLTreeBuilder::create(this, fragment, contextElement, scriptingPermission, usePreHTML5ParserQuirks(fragment->document())))
+    , m_endWasDelayed(false)
+    , m_writeNestingLevel(0)
+{
+    bool reportErrors = false; // For now document fragment parsing never reports errors.
+    m_tokenizer->setState(tokenizerStateForContextElement(contextElement, reportErrors));
+}
+
+HTMLDocumentParser::~HTMLDocumentParser()
+{
+    ASSERT(!m_parserScheduler);
+    ASSERT(!m_writeNestingLevel);
+    ASSERT(!m_preloadScanner);
+}
+
+void HTMLDocumentParser::detach()
+{
+    DocumentParser::detach();
+    if (m_scriptRunner)
+        m_scriptRunner->detach();
+    m_treeBuilder->detach();
+    // FIXME: It seems wrong that we would have a preload scanner here.
+    // Yet during fast/dom/HTMLScriptElement/script-load-events.html we do.
+    m_preloadScanner.clear();
+    m_parserScheduler.clear(); // Deleting the scheduler will clear any timers.
+}
+
+void HTMLDocumentParser::stopParsing()
+{
+    DocumentParser::stopParsing();
+    m_parserScheduler.clear(); // Deleting the scheduler will clear any timers.
+}
+
+// This kicks off "Once the user agent stops parsing" as described by:
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#the-end
+void HTMLDocumentParser::prepareToStopParsing()
+{
+    ASSERT(!hasInsertionPoint());
+
+    // pumpTokenizer can cause this parser to be detached from the Document,
+    // but we need to ensure it isn't deleted yet.
+    RefPtr<HTMLDocumentParser> protect(this);
+
+    // NOTE: This pump should only ever emit buffered character tokens,
+    // so ForceSynchronous vs. AllowYield should be meaningless.
+    pumpTokenizerIfPossible(ForceSynchronous);
+    
+    if (isStopped())
+        return;
+
+    DocumentParser::prepareToStopParsing();
+
+    // We will not have a scriptRunner when parsing a DocumentFragment.
+    if (m_scriptRunner)
+        document()->setReadyState(Document::Interactive);
+
+    attemptToRunDeferredScriptsAndEnd();
+}
+
+bool HTMLDocumentParser::processingData() const
+{
+    return isScheduledForResume() || inWrite();
+}
+
+void HTMLDocumentParser::pumpTokenizerIfPossible(SynchronousMode mode)
+{
+    if (isStopped() || m_treeBuilder->isPaused())
+        return;
+
+    // Once a resume is scheduled, HTMLParserScheduler controls when we next pump.
+    if (isScheduledForResume()) {
+        ASSERT(mode == AllowYield);
+        return;
+    }
+
+    pumpTokenizer(mode);
+}
+
+bool HTMLDocumentParser::isScheduledForResume() const
+{
+    return m_parserScheduler && m_parserScheduler->isScheduledForResume();
+}
+
+// Used by HTMLParserScheduler
+void HTMLDocumentParser::resumeParsingAfterYield()
+{
+    // pumpTokenizer can cause this parser to be detached from the Document,
+    // but we need to ensure it isn't deleted yet.
+    RefPtr<HTMLDocumentParser> protect(this);
+
+    // We should never be here unless we can pump immediately.  Call pumpTokenizer()
+    // directly so that ASSERTS will fire if we're wrong.
+    pumpTokenizer(AllowYield);
+    endIfDelayed();
+}
+
+bool HTMLDocumentParser::runScriptsForPausedTreeBuilder()
+{
+    ASSERT(m_treeBuilder->isPaused());
+
+    TextPosition1 scriptStartPosition = TextPosition1::belowRangePosition();
+    RefPtr<Element> scriptElement = m_treeBuilder->takeScriptToProcess(scriptStartPosition);
+    // We will not have a scriptRunner when parsing a DocumentFragment.
+    if (!m_scriptRunner)
+        return true;
+    return m_scriptRunner->execute(scriptElement.release(), scriptStartPosition);
+}
+
+void HTMLDocumentParser::pumpTokenizer(SynchronousMode mode)
+{
+    ASSERT(!isStopped());
+    ASSERT(!m_treeBuilder->isPaused());
+    ASSERT(!isScheduledForResume());
+    // ASSERT that this object is both attached to the Document and protected.
+    ASSERT(refCount() >= 2);
+
+    // We tell the InspectorInstrumentation about every pump, even if we
+    // end up pumping nothing.  It can filter out empty pumps itself.
+    // FIXME: m_input.current().length() is only accurate if we
+    // end up parsing the whole buffer in this pump.  We should pass how
+    // much we parsed as part of didWriteHTML instead of willWriteHTML.
+    InspectorInstrumentationCookie cookie = InspectorInstrumentation::willWriteHTML(document(), m_input.current().length(), m_tokenizer->lineNumber());
+
+    HTMLParserScheduler::PumpSession session;
+    // FIXME: This loop body has is now too long and needs cleanup.
+    while (mode == ForceSynchronous || m_parserScheduler->shouldContinueParsing(session)) {
+        // FIXME: It's wrong for the HTMLDocumentParser to reach back to the
+        //        Frame, but this approach is how the old parser handled
+        //        stopping when the page assigns window.location.  What really
+        //        should happen is that assigning window.location causes the
+        //        parser to stop parsing cleanly.  The problem is we're not
+        //        perpared to do that at every point where we run JavaScript.
+        if (!m_treeBuilder->isParsingFragment()
+            && document()->frame() && document()->frame()->navigationScheduler()->locationChangePending())
+            break;
+        if (!m_tokenizer->nextToken(m_input.current(), m_token))
+            break;
+
+        m_treeBuilder->constructTreeFromToken(m_token);
+        m_token.clear();
+
+        // JavaScript may have stopped or detached the parser.
+        if (isStopped())
+            return;
+
+        // The parser will pause itself when waiting on a script to load or run.
+        if (!m_treeBuilder->isPaused())
+            continue;
+
+        // If we're paused waiting for a script, we try to execute scripts before continuing.
+        bool shouldContinueParsing = runScriptsForPausedTreeBuilder();
+        m_treeBuilder->setPaused(!shouldContinueParsing);
+
+        // JavaScript may have stopped or detached the parser.
+        if (isStopped())
+            return;
+
+        if (!shouldContinueParsing)
+            break;
+    }
+
+    // Ensure we haven't been totally deref'ed after pumping. Any caller of this
+    // function should be holding a RefPtr to this to ensure we weren't deleted.
+    ASSERT(refCount() >= 1);
+
+    if (isWaitingForScripts()) {
+        ASSERT(m_tokenizer->state() == HTMLTokenizer::DataState);
+        if (!m_preloadScanner) {
+            m_preloadScanner.set(new HTMLPreloadScanner(document()));
+            m_preloadScanner->appendToEnd(m_input.current());
+        }
+        m_preloadScanner->scan();
+    }
+
+    InspectorInstrumentation::didWriteHTML(cookie, m_tokenizer->lineNumber());
+}
+
+bool HTMLDocumentParser::hasInsertionPoint()
+{
+    return m_input.hasInsertionPoint();
+}
+
+void HTMLDocumentParser::insert(const SegmentedString& source)
+{
+    if (isStopped())
+        return;
+
+#ifdef ANDROID_INSTRUMENT
+    android::TimeCounter::start(android::TimeCounter::ParsingTimeCounter);
+#endif
+
+    // pumpTokenizer can cause this parser to be detached from the Document,
+    // but we need to ensure it isn't deleted yet.
+    RefPtr<HTMLDocumentParser> protect(this);
+
+    {
+        NestingLevelIncrementer nestingLevelIncrementer(m_writeNestingLevel);
+
+        SegmentedString excludedLineNumberSource(source);
+        excludedLineNumberSource.setExcludeLineNumbers();
+        m_input.insertAtCurrentInsertionPoint(excludedLineNumberSource);
+        pumpTokenizerIfPossible(ForceSynchronous);
+    }
+
+    endIfDelayed();
+}
+
+void HTMLDocumentParser::append(const SegmentedString& source)
+{
+    if (isStopped())
+        return;
+
+    // pumpTokenizer can cause this parser to be detached from the Document,
+    // but we need to ensure it isn't deleted yet.
+    RefPtr<HTMLDocumentParser> protect(this);
+
+    {
+        NestingLevelIncrementer nestingLevelIncrementer(m_writeNestingLevel);
+
+        m_input.appendToEnd(source);
+        if (m_preloadScanner)
+            m_preloadScanner->appendToEnd(source);
+
+        if (m_writeNestingLevel > 1) {
+            // We've gotten data off the network in a nested write.
+            // We don't want to consume any more of the input stream now.  Do
+            // not worry.  We'll consume this data in a less-nested write().
+#ifdef ANDROID_INSTRUMENT
+            android::TimeCounter::record(android::TimeCounter::ParsingTimeCounter, __FUNCTION__);
+#endif
+            return;
+        }
+
+        pumpTokenizerIfPossible(AllowYield);
+    }
+
+    endIfDelayed();
+#ifdef ANDROID_INSTRUMENT
+    android::TimeCounter::record(android::TimeCounter::ParsingTimeCounter, __FUNCTION__);
+#endif
+}
+
+void HTMLDocumentParser::end()
+{
+    ASSERT(!isDetached());
+    ASSERT(!isScheduledForResume());
+
+    // Informs the the rest of WebCore that parsing is really finished (and deletes this).
+    m_treeBuilder->finished();
+}
+
+void HTMLDocumentParser::attemptToRunDeferredScriptsAndEnd()
+{
+    ASSERT(isStopping());
+    ASSERT(!hasInsertionPoint());
+    if (m_scriptRunner && !m_scriptRunner->executeScriptsWaitingForParsing())
+        return;
+    end();
+}
+
+void HTMLDocumentParser::attemptToEnd()
+{
+    // finish() indicates we will not receive any more data. If we are waiting on
+    // an external script to load, we can't finish parsing quite yet.
+
+    if (shouldDelayEnd()) {
+        m_endWasDelayed = true;
+        return;
+    }
+    prepareToStopParsing();
+}
+
+void HTMLDocumentParser::endIfDelayed()
+{
+    // If we've already been detached, don't bother ending.
+    if (isDetached())
+        return;
+
+    if (!m_endWasDelayed || shouldDelayEnd())
+        return;
+
+    m_endWasDelayed = false;
+    prepareToStopParsing();
+}
+
+void HTMLDocumentParser::finish()
+{
+    // FIXME: We should ASSERT(!m_parserStopped) here, since it does not
+    // makes sense to call any methods on DocumentParser once it's been stopped.
+    // However, FrameLoader::stop calls Document::finishParsing unconditionally
+    // which in turn calls m_parser->finish().
+
+    // We're not going to get any more data off the network, so we tell the
+    // input stream we've reached the end of file.  finish() can be called more
+    // than once, if the first time does not call end().
+    if (!m_input.haveSeenEndOfFile())
+        m_input.markEndOfFile();
+    attemptToEnd();
+}
+
+bool HTMLDocumentParser::finishWasCalled()
+{
+    return m_input.haveSeenEndOfFile();
+}
+
+// This function is virtual and just for the DocumentParser interface.
+bool HTMLDocumentParser::isExecutingScript() const
+{
+    return inScriptExecution();
+}
+
+// This function is non-virtual and used throughout the implementation.
+bool HTMLDocumentParser::inScriptExecution() const
+{
+    if (!m_scriptRunner)
+        return false;
+    return m_scriptRunner->isExecutingScript();
+}
+
+int HTMLDocumentParser::lineNumber() const
+{
+    return m_tokenizer->lineNumber();
+}
+
+TextPosition0 HTMLDocumentParser::textPosition() const
+{
+    const SegmentedString& currentString = m_input.current();
+    WTF::ZeroBasedNumber line = currentString.currentLine();
+    WTF::ZeroBasedNumber column = currentString.currentColumn();
+    ASSERT(m_tokenizer->lineNumber() == line.zeroBasedInt());
+
+    return TextPosition0(line, column);
+}
+
+bool HTMLDocumentParser::isWaitingForScripts() const
+{
+    return m_treeBuilder->isPaused();
+}
+
+void HTMLDocumentParser::resumeParsingAfterScriptExecution()
+{
+    ASSERT(!inScriptExecution());
+    ASSERT(!m_treeBuilder->isPaused());
+
+    m_preloadScanner.clear();
+    pumpTokenizerIfPossible(AllowYield);
+    endIfDelayed();
+}
+
+void HTMLDocumentParser::watchForLoad(CachedResource* cachedScript)
+{
+    ASSERT(!cachedScript->isLoaded());
+    // addClient would call notifyFinished if the load were complete.
+    // Callers do not expect to be re-entered from this call, so they should
+    // not an already-loaded CachedResource.
+    cachedScript->addClient(this);
+}
+
+void HTMLDocumentParser::stopWatchingForLoad(CachedResource* cachedScript)
+{
+    cachedScript->removeClient(this);
+}
+
+bool HTMLDocumentParser::shouldLoadExternalScriptFromSrc(const AtomicString& srcValue)
+{
+    if (!xssAuditor())
+        return true;
+    return xssAuditor()->canLoadExternalScriptFromSrc(srcValue);
+}
+
+void HTMLDocumentParser::notifyFinished(CachedResource* cachedResource)
+{
+    // pumpTokenizer can cause this parser to be detached from the Document,
+    // but we need to ensure it isn't deleted yet.
+    RefPtr<HTMLDocumentParser> protect(this);
+
+    ASSERT(m_scriptRunner);
+    ASSERT(!inScriptExecution());
+    if (isStopping()) {
+        attemptToRunDeferredScriptsAndEnd();
+        return;
+    }
+
+    ASSERT(m_treeBuilder->isPaused());
+    // Note: We only ever wait on one script at a time, so we always know this
+    // is the one we were waiting on and can un-pause the tree builder.
+    m_treeBuilder->setPaused(false);
+    bool shouldContinueParsing = m_scriptRunner->executeScriptsWaitingForLoad(cachedResource);
+    m_treeBuilder->setPaused(!shouldContinueParsing);
+    if (shouldContinueParsing)
+        resumeParsingAfterScriptExecution();
+}
+
+void HTMLDocumentParser::executeScriptsWaitingForStylesheets()
+{
+    // Document only calls this when the Document owns the DocumentParser
+    // so this will not be called in the DocumentFragment case.
+    ASSERT(m_scriptRunner);
+    // Ignore calls unless we have a script blocking the parser waiting on a
+    // stylesheet load.  Otherwise we are currently parsing and this
+    // is a re-entrant call from encountering a </ style> tag.
+    if (!m_scriptRunner->hasScriptsWaitingForStylesheets())
+        return;
+
+    // pumpTokenizer can cause this parser to be detached from the Document,
+    // but we need to ensure it isn't deleted yet.
+    RefPtr<HTMLDocumentParser> protect(this);
+
+    ASSERT(!m_scriptRunner->isExecutingScript());
+    ASSERT(m_treeBuilder->isPaused());
+    // Note: We only ever wait on one script at a time, so we always know this
+    // is the one we were waiting on and can un-pause the tree builder.
+    m_treeBuilder->setPaused(false);
+    bool shouldContinueParsing = m_scriptRunner->executeScriptsWaitingForStylesheets();
+    m_treeBuilder->setPaused(!shouldContinueParsing);
+    if (shouldContinueParsing)
+        resumeParsingAfterScriptExecution();
+}
+
+ScriptController* HTMLDocumentParser::script() const
+{
+    return document()->frame() ? document()->frame()->script() : 0;
+}
+
+void HTMLDocumentParser::parseDocumentFragment(const String& source, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
+{
+    RefPtr<HTMLDocumentParser> parser = HTMLDocumentParser::create(fragment, contextElement, scriptingPermission);
+    parser->insert(source); // Use insert() so that the parser will not yield.
+    parser->finish();
+    ASSERT(!parser->processingData()); // Make sure we're done. <rdar://problem/3963151>
+    parser->detach(); // Allows ~DocumentParser to assert it was detached before destruction.
+}
+    
+bool HTMLDocumentParser::usePreHTML5ParserQuirks(Document* document)
+{
+    ASSERT(document);
+    return document->settings() && document->settings()->usePreHTML5ParserQuirks();
+}
+
+void HTMLDocumentParser::suspendScheduledTasks()
+{
+    if (m_parserScheduler)
+        m_parserScheduler->suspend();
+}
+
+void HTMLDocumentParser::resumeScheduledTasks()
+{
+    if (m_parserScheduler)
+        m_parserScheduler->resume();
+}
+
+}
diff --git a/Source/WebCore/html/parser/HTMLDocumentParser.h b/Source/WebCore/html/parser/HTMLDocumentParser.h
new file mode 100644
index 0000000..80ca727
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLDocumentParser.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLDocumentParser_h
+#define HTMLDocumentParser_h
+
+#include "CachedResourceClient.h"
+#include "FragmentScriptingPermission.h"
+#include "HTMLInputStream.h"
+#include "HTMLScriptRunnerHost.h"
+#include "HTMLToken.h"
+#include "ScriptableDocumentParser.h"
+#include "SegmentedString.h"
+#include "Timer.h"
+#include <wtf/OwnPtr.h>
+
+namespace WebCore {
+
+class Document;
+class DocumentFragment;
+class HTMLDocument;
+class HTMLParserScheduler;
+class HTMLTokenizer;
+class HTMLScriptRunner;
+class HTMLTreeBuilder;
+class HTMLPreloadScanner;
+class ScriptController;
+class ScriptSourceCode;
+
+class HTMLDocumentParser :  public ScriptableDocumentParser, HTMLScriptRunnerHost, CachedResourceClient {
+public:
+    static PassRefPtr<HTMLDocumentParser> create(HTMLDocument* document, bool reportErrors)
+    {
+        return adoptRef(new HTMLDocumentParser(document, reportErrors));
+    }
+    static PassRefPtr<HTMLDocumentParser> create(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission permission)
+    {
+        return adoptRef(new HTMLDocumentParser(fragment, contextElement, permission));
+    }
+
+    virtual ~HTMLDocumentParser();
+
+    // Exposed for HTMLParserScheduler
+    void resumeParsingAfterYield();
+
+    static void parseDocumentFragment(const String&, DocumentFragment*, Element* contextElement, FragmentScriptingPermission = FragmentScriptingAllowed);
+    
+    static bool usePreHTML5ParserQuirks(Document*);
+
+    HTMLTokenizer* tokenizer() const { return m_tokenizer.get(); }
+
+    virtual TextPosition0 textPosition() const;
+    virtual void suspendScheduledTasks();
+    virtual void resumeScheduledTasks();
+
+protected:
+    virtual void insert(const SegmentedString&);
+    virtual void append(const SegmentedString&);
+    virtual void finish();
+
+    HTMLDocumentParser(HTMLDocument*, bool reportErrors);
+    HTMLDocumentParser(DocumentFragment*, Element* contextElement, FragmentScriptingPermission);
+
+    HTMLTreeBuilder* treeBuilder() const { return m_treeBuilder.get(); }
+
+private:
+    // DocumentParser
+    virtual void detach();
+    virtual bool hasInsertionPoint();
+    virtual bool finishWasCalled();
+    virtual bool processingData() const;
+    virtual void prepareToStopParsing();
+    virtual void stopParsing();
+    virtual bool isWaitingForScripts() const;
+    virtual bool isExecutingScript() const;
+    virtual void executeScriptsWaitingForStylesheets();
+    virtual int lineNumber() const;
+
+    // HTMLScriptRunnerHost
+    virtual void watchForLoad(CachedResource*);
+    virtual void stopWatchingForLoad(CachedResource*);
+    virtual bool shouldLoadExternalScriptFromSrc(const AtomicString&);
+    virtual HTMLInputStream& inputStream() { return m_input; }
+
+    // CachedResourceClient
+    virtual void notifyFinished(CachedResource*);
+
+    enum SynchronousMode {
+        AllowYield,
+        ForceSynchronous,
+    };
+    void pumpTokenizer(SynchronousMode);
+    void pumpTokenizerIfPossible(SynchronousMode);
+
+    bool runScriptsForPausedTreeBuilder();
+    void resumeParsingAfterScriptExecution();
+
+    void begin();
+    void attemptToEnd();
+    void endIfDelayed();
+    void attemptToRunDeferredScriptsAndEnd();
+    void end();
+
+    bool isScheduledForResume() const;
+    bool inScriptExecution() const;
+    bool inWrite() const { return m_writeNestingLevel > 0; }
+    bool shouldDelayEnd() const { return inWrite() || isWaitingForScripts() || inScriptExecution() || isScheduledForResume(); }
+
+    ScriptController* script() const;
+
+    HTMLInputStream m_input;
+
+    // We hold m_token here because it might be partially complete.
+    HTMLToken m_token;
+
+    OwnPtr<HTMLTokenizer> m_tokenizer;
+    OwnPtr<HTMLScriptRunner> m_scriptRunner;
+    OwnPtr<HTMLTreeBuilder> m_treeBuilder;
+    OwnPtr<HTMLPreloadScanner> m_preloadScanner;
+    OwnPtr<HTMLParserScheduler> m_parserScheduler;
+
+    bool m_endWasDelayed;
+    unsigned m_writeNestingLevel;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLElementStack.cpp b/Source/WebCore/html/parser/HTMLElementStack.cpp
new file mode 100644
index 0000000..6aab0f7
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLElementStack.cpp
@@ -0,0 +1,569 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "HTMLElementStack.h"
+
+#include "Element.h"
+#include "HTMLNames.h"
+#include "MathMLNames.h"
+#include "SVGNames.h"
+#include <wtf/PassOwnPtr.h>
+
+namespace WebCore {
+
+using namespace HTMLNames;
+
+namespace {
+
+inline bool isNumberedHeaderElement(Element* element)
+{
+    return element->hasTagName(h1Tag)
+        || element->hasTagName(h2Tag)
+        || element->hasTagName(h3Tag)
+        || element->hasTagName(h4Tag)
+        || element->hasTagName(h5Tag)
+        || element->hasTagName(h6Tag);
+}
+
+inline bool isScopeMarker(Element* element)
+{
+    return element->hasTagName(appletTag)
+        || element->hasTagName(captionTag)
+        || element->hasTagName(htmlTag)
+        || element->hasTagName(marqueeTag)
+        || element->hasTagName(objectTag)
+        || element->hasTagName(tableTag)
+        || element->hasTagName(tdTag)
+        || element->hasTagName(thTag)
+        || element->hasTagName(MathMLNames::miTag)
+        || element->hasTagName(MathMLNames::moTag)
+        || element->hasTagName(MathMLNames::mnTag)
+        || element->hasTagName(MathMLNames::msTag)
+        || element->hasTagName(MathMLNames::mtextTag)
+        || element->hasTagName(MathMLNames::annotation_xmlTag)
+        || element->hasTagName(SVGNames::foreignObjectTag)
+        || element->hasTagName(SVGNames::descTag)
+        || element->hasTagName(SVGNames::titleTag);
+}
+
+inline bool isListItemScopeMarker(Element* element)
+{
+    return isScopeMarker(element)
+        || element->hasTagName(olTag)
+        || element->hasTagName(ulTag);
+}
+
+inline bool isTableScopeMarker(Element* element)
+{
+    return element->hasTagName(tableTag)
+        || element->hasTagName(htmlTag);
+}
+
+inline bool isTableBodyScopeMarker(Element* element)
+{
+    return element->hasTagName(tbodyTag)
+        || element->hasTagName(tfootTag)
+        || element->hasTagName(theadTag)
+        || element->hasTagName(htmlTag);
+}
+
+inline bool isTableRowScopeMarker(Element* element)
+{
+    return element->hasTagName(trTag)
+        || element->hasTagName(htmlTag);
+}
+
+inline bool isForeignContentScopeMarker(Element* element)
+{
+    return element->hasTagName(MathMLNames::miTag)
+        || element->hasTagName(MathMLNames::moTag)
+        || element->hasTagName(MathMLNames::mnTag)
+        || element->hasTagName(MathMLNames::msTag)
+        || element->hasTagName(MathMLNames::mtextTag)
+        || element->hasTagName(SVGNames::foreignObjectTag)
+        || element->hasTagName(SVGNames::descTag)
+        || element->hasTagName(SVGNames::titleTag)
+        || element->namespaceURI() == HTMLNames::xhtmlNamespaceURI;
+}
+
+inline bool isButtonScopeMarker(Element* element)
+{
+    return isScopeMarker(element)
+        || element->hasTagName(buttonTag);
+}
+
+inline bool isSelectScopeMarker(Element* element)
+{
+    return !element->hasTagName(optgroupTag)
+        && !element->hasTagName(optionTag);
+}
+
+}
+
+HTMLElementStack::ElementRecord::ElementRecord(PassRefPtr<Element> element, PassOwnPtr<ElementRecord> next)
+    : m_element(element)
+    , m_next(next)
+{
+    ASSERT(m_element);
+}
+
+HTMLElementStack::ElementRecord::~ElementRecord()
+{
+}
+
+void HTMLElementStack::ElementRecord::replaceElement(PassRefPtr<Element> element)
+{
+    ASSERT(element);
+    // FIXME: Should this call finishParsingChildren?
+    m_element = element;
+}
+
+bool HTMLElementStack::ElementRecord::isAbove(ElementRecord* other) const
+{
+    for (ElementRecord* below = next(); below; below = below->next()) {
+        if (below == other)
+            return true;
+    }
+    return false;
+}
+
+HTMLElementStack::HTMLElementStack()
+    : m_htmlElement(0)
+    , m_headElement(0)
+    , m_bodyElement(0)
+{
+}
+
+HTMLElementStack::~HTMLElementStack()
+{
+}
+
+bool HTMLElementStack::hasOnlyOneElement() const
+{
+    return !topRecord()->next();
+}
+
+bool HTMLElementStack::secondElementIsHTMLBodyElement() const
+{
+    // This is used the fragment case of <body> and <frameset> in the "in body"
+    // insertion mode.
+    // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
+    ASSERT(m_htmlElement);
+    // If we have a body element, it must always be the second element on the
+    // stack, as we always start with an html element, and any other element
+    // would cause the implicit creation of a body element.
+    return !!m_bodyElement;
+}
+
+void HTMLElementStack::popHTMLHeadElement()
+{
+    ASSERT(top() == m_headElement);
+    m_headElement = 0;
+    popCommon();
+}
+
+void HTMLElementStack::popHTMLBodyElement()
+{
+    ASSERT(top() == m_bodyElement);
+    m_bodyElement = 0;
+    popCommon();
+}
+
+void HTMLElementStack::popAll()
+{
+    m_htmlElement = 0;
+    m_headElement = 0;
+    m_bodyElement = 0;
+    while (m_top) {
+        top()->finishParsingChildren();
+        m_top = m_top->releaseNext();
+    }
+}
+
+void HTMLElementStack::pop()
+{
+    ASSERT(!top()->hasTagName(HTMLNames::headTag));
+    popCommon();
+}
+
+void HTMLElementStack::popUntil(const AtomicString& tagName)
+{
+    while (!top()->hasLocalName(tagName)) {
+        // pop() will ASSERT at <body> if callers fail to check that there is an
+        // element with localName |tagName| on the stack of open elements.
+        pop();
+    }
+}
+
+void HTMLElementStack::popUntilPopped(const AtomicString& tagName)
+{
+    popUntil(tagName);
+    pop();
+}
+
+void HTMLElementStack::popUntilNumberedHeaderElementPopped()
+{
+    while (!isNumberedHeaderElement(top()))
+        pop();
+    pop();
+}
+
+void HTMLElementStack::popUntil(Element* element)
+{
+    while (top() != element)
+        pop();
+}
+
+void HTMLElementStack::popUntilPopped(Element* element)
+{
+    popUntil(element);
+    pop();
+}
+
+void HTMLElementStack::popUntilTableScopeMarker()
+{
+    // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#clear-the-stack-back-to-a-table-context
+    while (!isTableScopeMarker(top()))
+        pop();
+}
+
+void HTMLElementStack::popUntilTableBodyScopeMarker()
+{
+    // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#clear-the-stack-back-to-a-table-body-context
+    while (!isTableBodyScopeMarker(top()))
+        pop();
+}
+
+void HTMLElementStack::popUntilTableRowScopeMarker()
+{
+    // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#clear-the-stack-back-to-a-table-row-context
+    while (!isTableRowScopeMarker(top()))
+        pop();
+}
+
+void HTMLElementStack::popUntilForeignContentScopeMarker()
+{
+    while (!isForeignContentScopeMarker(top()))
+        pop();
+}
+
+void HTMLElementStack::pushHTMLHtmlElement(PassRefPtr<Element> element)
+{
+    ASSERT(!m_top); // <html> should always be the bottom of the stack.
+    ASSERT(element->hasTagName(HTMLNames::htmlTag));
+    ASSERT(!m_htmlElement);
+    m_htmlElement = element.get();
+    pushCommon(element);
+}
+
+void HTMLElementStack::pushHTMLHeadElement(PassRefPtr<Element> element)
+{
+    ASSERT(element->hasTagName(HTMLNames::headTag));
+    ASSERT(!m_headElement);
+    m_headElement = element.get();
+    pushCommon(element);
+}
+
+void HTMLElementStack::pushHTMLBodyElement(PassRefPtr<Element> element)
+{
+    ASSERT(element->hasTagName(HTMLNames::bodyTag));
+    ASSERT(!m_bodyElement);
+    m_bodyElement = element.get();
+    pushCommon(element);
+}
+
+void HTMLElementStack::push(PassRefPtr<Element> element)
+{
+    ASSERT(!element->hasTagName(HTMLNames::htmlTag));
+    ASSERT(!element->hasTagName(HTMLNames::headTag));
+    ASSERT(!element->hasTagName(HTMLNames::bodyTag));
+    ASSERT(m_htmlElement);
+    pushCommon(element);
+}
+
+void HTMLElementStack::insertAbove(PassRefPtr<Element> element, ElementRecord* recordBelow)
+{
+    ASSERT(element);
+    ASSERT(recordBelow);
+    ASSERT(m_top);
+    ASSERT(!element->hasTagName(HTMLNames::htmlTag));
+    ASSERT(!element->hasTagName(HTMLNames::headTag));
+    ASSERT(!element->hasTagName(HTMLNames::bodyTag));
+    ASSERT(m_htmlElement);
+    if (recordBelow == m_top) {
+        push(element);
+        return;
+    }
+
+    for (ElementRecord* recordAbove = m_top.get(); recordAbove; recordAbove = recordAbove->next()) {
+        if (recordAbove->next() != recordBelow)
+            continue;
+
+        recordAbove->setNext(adoptPtr(new ElementRecord(element, recordAbove->releaseNext())));
+        recordAbove->next()->element()->beginParsingChildren();
+        return;
+    }
+    ASSERT_NOT_REACHED();
+}
+
+HTMLElementStack::ElementRecord* HTMLElementStack::topRecord() const
+{
+    ASSERT(m_top);
+    return m_top.get();
+}
+
+Element* HTMLElementStack::oneBelowTop() const
+{
+    // We should never be calling this if it could be 0.
+    ASSERT(m_top);
+    ASSERT(m_top->next());
+    return m_top->next()->element();
+}
+
+Element* HTMLElementStack::bottom() const
+{
+    return htmlElement();
+}
+
+void HTMLElementStack::removeHTMLHeadElement(Element* element)
+{
+    ASSERT(m_headElement == element);
+    if (m_top->element() == element) {
+        popHTMLHeadElement();
+        return;
+    }
+    m_headElement = 0;
+    removeNonTopCommon(element);
+}
+
+void HTMLElementStack::remove(Element* element)
+{
+    ASSERT(!element->hasTagName(HTMLNames::headTag));
+    if (m_top->element() == element) {
+        pop();
+        return;
+    }
+    removeNonTopCommon(element);
+}
+
+HTMLElementStack::ElementRecord* HTMLElementStack::find(Element* element) const
+{
+    for (ElementRecord* pos = m_top.get(); pos; pos = pos->next()) {
+        if (pos->element() == element)
+            return pos;
+    }
+    return 0;
+}
+
+HTMLElementStack::ElementRecord* HTMLElementStack::topmost(const AtomicString& tagName) const
+{
+    for (ElementRecord* pos = m_top.get(); pos; pos = pos->next()) {
+        if (pos->element()->hasLocalName(tagName))
+            return pos;
+    }
+    return 0;
+}
+
+bool HTMLElementStack::contains(Element* element) const
+{
+    return !!find(element);
+}
+
+bool HTMLElementStack::contains(const AtomicString& tagName) const
+{
+    return !!topmost(tagName);
+}
+
+template <bool isMarker(Element*)>
+bool inScopeCommon(HTMLElementStack::ElementRecord* top, const AtomicString& targetTag)
+{
+    for (HTMLElementStack::ElementRecord* pos = top; pos; pos = pos->next()) {
+        Element* element = pos->element();
+        if (element->hasLocalName(targetTag))
+            return true;
+        if (isMarker(element))
+            return false;
+    }
+    ASSERT_NOT_REACHED(); // <html> is always on the stack and is a scope marker.
+    return false;
+}
+
+bool HTMLElementStack::hasOnlyHTMLElementsInScope() const
+{
+    for (ElementRecord* record = m_top.get(); record; record = record->next()) {
+        Element* element = record->element();
+        if (element->namespaceURI() != xhtmlNamespaceURI)
+            return false;
+        if (isScopeMarker(element))
+            return true;
+    }
+    ASSERT_NOT_REACHED(); // <html> is always on the stack and is a scope marker.
+    return true;
+}
+
+bool HTMLElementStack::hasNumberedHeaderElementInScope() const
+{
+    for (ElementRecord* record = m_top.get(); record; record = record->next()) {
+        Element* element = record->element();
+        if (isNumberedHeaderElement(element))
+            return true;
+        if (isScopeMarker(element))
+            return false;
+    }
+    ASSERT_NOT_REACHED(); // <html> is always on the stack and is a scope marker.
+    return false;
+}
+
+bool HTMLElementStack::inScope(Element* targetElement) const
+{
+    for (ElementRecord* pos = m_top.get(); pos; pos = pos->next()) {
+        Element* element = pos->element();
+        if (element == targetElement)
+            return true;
+        if (isScopeMarker(element))
+            return false;
+    }
+    ASSERT_NOT_REACHED(); // <html> is always on the stack and is a scope marker.
+    return false;
+}
+
+bool HTMLElementStack::inScope(const AtomicString& targetTag) const
+{
+    return inScopeCommon<isScopeMarker>(m_top.get(), targetTag);
+}
+
+bool HTMLElementStack::inScope(const QualifiedName& tagName) const
+{
+    // FIXME: Is localName() right for non-html elements?
+    return inScope(tagName.localName());
+}
+
+bool HTMLElementStack::inListItemScope(const AtomicString& targetTag) const
+{
+    return inScopeCommon<isListItemScopeMarker>(m_top.get(), targetTag);
+}
+
+bool HTMLElementStack::inListItemScope(const QualifiedName& tagName) const
+{
+    // FIXME: Is localName() right for non-html elements?
+    return inListItemScope(tagName.localName());
+}
+
+bool HTMLElementStack::inTableScope(const AtomicString& targetTag) const
+{
+    return inScopeCommon<isTableScopeMarker>(m_top.get(), targetTag);
+}
+
+bool HTMLElementStack::inTableScope(const QualifiedName& tagName) const
+{
+    // FIXME: Is localName() right for non-html elements?
+    return inTableScope(tagName.localName());
+}
+
+bool HTMLElementStack::inButtonScope(const AtomicString& targetTag) const
+{
+    return inScopeCommon<isButtonScopeMarker>(m_top.get(), targetTag);
+}
+
+bool HTMLElementStack::inButtonScope(const QualifiedName& tagName) const
+{
+    // FIXME: Is localName() right for non-html elements?
+    return inButtonScope(tagName.localName());
+}
+
+bool HTMLElementStack::inSelectScope(const AtomicString& targetTag) const
+{
+    return inScopeCommon<isSelectScopeMarker>(m_top.get(), targetTag);
+}
+
+bool HTMLElementStack::inSelectScope(const QualifiedName& tagName) const
+{
+    // FIXME: Is localName() right for non-html elements?
+    return inSelectScope(tagName.localName());
+}
+
+Element* HTMLElementStack::htmlElement() const
+{
+    ASSERT(m_htmlElement);
+    return m_htmlElement;
+}
+
+Element* HTMLElementStack::headElement() const
+{
+    ASSERT(m_headElement);
+    return m_headElement;
+}
+
+Element* HTMLElementStack::bodyElement() const
+{
+    ASSERT(m_bodyElement);
+    return m_bodyElement;
+}
+
+void HTMLElementStack::pushCommon(PassRefPtr<Element> element)
+{
+    ASSERT(m_htmlElement);
+    m_top = adoptPtr(new ElementRecord(element, m_top.release()));
+    top()->beginParsingChildren();
+}
+
+void HTMLElementStack::popCommon()
+{
+    ASSERT(!top()->hasTagName(HTMLNames::htmlTag));
+    ASSERT(!top()->hasTagName(HTMLNames::headTag) || !m_headElement);
+    ASSERT(!top()->hasTagName(HTMLNames::bodyTag) || !m_bodyElement);
+    top()->finishParsingChildren();
+    m_top = m_top->releaseNext();
+}
+
+void HTMLElementStack::removeNonTopCommon(Element* element)
+{
+    ASSERT(!element->hasTagName(HTMLNames::htmlTag));
+    ASSERT(!element->hasTagName(HTMLNames::bodyTag));
+    ASSERT(top() != element);
+    for (ElementRecord* pos = m_top.get(); pos; pos = pos->next()) {
+        if (pos->next()->element() == element) {
+            // FIXME: Is it OK to call finishParsingChildren()
+            // when the children aren't actually finished?
+            element->finishParsingChildren();
+            pos->setNext(pos->next()->releaseNext());
+            return;
+        }
+    }
+    ASSERT_NOT_REACHED();
+}
+
+#ifndef NDEBUG
+
+void HTMLElementStack::show()
+{
+    for (ElementRecord* record = m_top.get(); record; record = record->next())
+        record->element()->showNode();
+}
+
+#endif
+
+}
diff --git a/Source/WebCore/html/parser/HTMLElementStack.h b/Source/WebCore/html/parser/HTMLElementStack.h
new file mode 100644
index 0000000..8a8e160
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLElementStack.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef HTMLElementStack_h
+#define HTMLElementStack_h
+
+#include <wtf/Forward.h>
+#include <wtf/Noncopyable.h>
+#include <wtf/OwnPtr.h>
+#include <wtf/PassOwnPtr.h>
+#include <wtf/RefPtr.h>
+
+namespace WebCore {
+
+class Element;
+class QualifiedName;
+
+// NOTE: The HTML5 spec uses a backwards (grows downward) stack.  We're using
+// more standard (grows upwards) stack terminology here.
+class HTMLElementStack : public Noncopyable {
+public:
+    HTMLElementStack();
+    ~HTMLElementStack();
+
+    class ElementRecord : public Noncopyable {
+    public:
+        ~ElementRecord(); // Public for ~PassOwnPtr()
+    
+        Element* element() const { return m_element.get(); }
+        void replaceElement(PassRefPtr<Element>);
+
+        bool isAbove(ElementRecord*) const;
+
+        ElementRecord* next() const { return m_next.get(); }
+
+    private:
+        friend class HTMLElementStack;
+
+        ElementRecord(PassRefPtr<Element>, PassOwnPtr<ElementRecord>);
+
+        PassOwnPtr<ElementRecord> releaseNext() { return m_next.release(); }
+        void setNext(PassOwnPtr<ElementRecord> next) { m_next = next; }
+
+        RefPtr<Element> m_element;
+        OwnPtr<ElementRecord> m_next;
+    };
+
+    // Inlining this function is a (small) performance win on the parsing
+    // benchmark.
+    Element* top() const
+    {
+        ASSERT(m_top->element());
+        return m_top->element();
+    }
+
+    Element* oneBelowTop() const;
+    ElementRecord* topRecord() const;
+    Element* bottom() const;
+    ElementRecord* find(Element*) const;
+    ElementRecord* topmost(const AtomicString& tagName) const;
+
+    void insertAbove(PassRefPtr<Element>, ElementRecord*);
+
+    void push(PassRefPtr<Element>);
+    void pushHTMLHtmlElement(PassRefPtr<Element>);
+    void pushHTMLHeadElement(PassRefPtr<Element>);
+    void pushHTMLBodyElement(PassRefPtr<Element>);
+
+    void pop();
+    void popUntil(const AtomicString& tagName);
+    void popUntil(Element*);
+    void popUntilPopped(const AtomicString& tagName);
+    void popUntilPopped(Element*);
+    void popUntilNumberedHeaderElementPopped();
+    void popUntilTableScopeMarker(); // "clear the stack back to a table context" in the spec.
+    void popUntilTableBodyScopeMarker(); // "clear the stack back to a table body context" in the spec.
+    void popUntilTableRowScopeMarker(); // "clear the stack back to a table row context" in the spec.
+    void popUntilForeignContentScopeMarker();
+    void popHTMLHeadElement();
+    void popHTMLBodyElement();
+    void popAll();
+
+    void remove(Element*);
+    void removeHTMLHeadElement(Element*);
+
+    bool contains(Element*) const;
+    bool contains(const AtomicString& tagName) const;
+
+    bool inScope(Element*) const;
+    bool inScope(const AtomicString& tagName) const;
+    bool inScope(const QualifiedName&) const;
+    bool inListItemScope(const AtomicString& tagName) const;
+    bool inListItemScope(const QualifiedName&) const;
+    bool inTableScope(const AtomicString& tagName) const;
+    bool inTableScope(const QualifiedName&) const;
+    bool inButtonScope(const AtomicString& tagName) const;
+    bool inButtonScope(const QualifiedName&) const;
+    bool inSelectScope(const AtomicString& tagName) const;
+    bool inSelectScope(const QualifiedName&) const;
+
+    bool hasOnlyHTMLElementsInScope() const;
+    bool hasNumberedHeaderElementInScope() const;
+
+    bool hasOnlyOneElement() const;
+    bool secondElementIsHTMLBodyElement() const;
+
+    Element* htmlElement() const;
+    Element* headElement() const;
+    Element* bodyElement() const;
+
+#ifndef NDEBUG
+    void show();
+#endif
+
+private:
+    void pushCommon(PassRefPtr<Element>);
+    void popCommon();
+    void removeNonTopCommon(Element*);
+
+    OwnPtr<ElementRecord> m_top;
+
+    // We remember <html>, <head> and <body> as they are pushed.  Their
+    // ElementRecords keep them alive.  <html> is never popped.
+    // FIXME: We don't currently require type-specific information about
+    // these elements so we haven't yet bothered to plumb the types all the
+    // way down through createElement, etc.
+    Element* m_htmlElement;
+    Element* m_headElement;
+    Element* m_bodyElement;
+};
+
+} // namespace WebCore
+
+#endif // HTMLElementStack_h
diff --git a/Source/WebCore/html/parser/HTMLEntityNames.in b/Source/WebCore/html/parser/HTMLEntityNames.in
new file mode 100644
index 0000000..2d42ab2
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLEntityNames.in
@@ -0,0 +1,2138 @@
+"AElig;","U+000C6"
+"AElig","U+000C6"
+"AMP;","U+00026"
+"AMP","U+00026"
+"Aacute;","U+000C1"
+"Aacute","U+000C1"
+"Abreve;","U+00102"
+"Acirc;","U+000C2"
+"Acirc","U+000C2"
+"Acy;","U+00410"
+"Afr;","U+1D504"
+"Agrave;","U+000C0"
+"Agrave","U+000C0"
+"Alpha;","U+00391"
+"Amacr;","U+00100"
+"And;","U+02A53"
+"Aogon;","U+00104"
+"Aopf;","U+1D538"
+"ApplyFunction;","U+02061"
+"Aring;","U+000C5"
+"Aring","U+000C5"
+"Ascr;","U+1D49C"
+"Assign;","U+02254"
+"Atilde;","U+000C3"
+"Atilde","U+000C3"
+"Auml;","U+000C4"
+"Auml","U+000C4"
+"Backslash;","U+02216"
+"Barv;","U+02AE7"
+"Barwed;","U+02306"
+"Bcy;","U+00411"
+"Because;","U+02235"
+"Bernoullis;","U+0212C"
+"Beta;","U+00392"
+"Bfr;","U+1D505"
+"Bopf;","U+1D539"
+"Breve;","U+002D8"
+"Bscr;","U+0212C"
+"Bumpeq;","U+0224E"
+"CHcy;","U+00427"
+"COPY;","U+000A9"
+"COPY","U+000A9"
+"Cacute;","U+00106"
+"Cap;","U+022D2"
+"CapitalDifferentialD;","U+02145"
+"Cayleys;","U+0212D"
+"Ccaron;","U+0010C"
+"Ccedil;","U+000C7"
+"Ccedil","U+000C7"
+"Ccirc;","U+00108"
+"Cconint;","U+02230"
+"Cdot;","U+0010A"
+"Cedilla;","U+000B8"
+"CenterDot;","U+000B7"
+"Cfr;","U+0212D"
+"Chi;","U+003A7"
+"CircleDot;","U+02299"
+"CircleMinus;","U+02296"
+"CirclePlus;","U+02295"
+"CircleTimes;","U+02297"
+"ClockwiseContourIntegral;","U+02232"
+"CloseCurlyDoubleQuote;","U+0201D"
+"CloseCurlyQuote;","U+02019"
+"Colon;","U+02237"
+"Colone;","U+02A74"
+"Congruent;","U+02261"
+"Conint;","U+0222F"
+"ContourIntegral;","U+0222E"
+"Copf;","U+02102"
+"Coproduct;","U+02210"
+"CounterClockwiseContourIntegral;","U+02233"
+"Cross;","U+02A2F"
+"Cscr;","U+1D49E"
+"Cup;","U+022D3"
+"CupCap;","U+0224D"
+"DD;","U+02145"
+"DDotrahd;","U+02911"
+"DJcy;","U+00402"
+"DScy;","U+00405"
+"DZcy;","U+0040F"
+"Dagger;","U+02021"
+"Darr;","U+021A1"
+"Dashv;","U+02AE4"
+"Dcaron;","U+0010E"
+"Dcy;","U+00414"
+"Del;","U+02207"
+"Delta;","U+00394"
+"Dfr;","U+1D507"
+"DiacriticalAcute;","U+000B4"
+"DiacriticalDot;","U+002D9"
+"DiacriticalDoubleAcute;","U+002DD"
+"DiacriticalGrave;","U+00060"
+"DiacriticalTilde;","U+002DC"
+"Diamond;","U+022C4"
+"DifferentialD;","U+02146"
+"Dopf;","U+1D53B"
+"Dot;","U+000A8"
+"DotDot;","U+020DC"
+"DotEqual;","U+02250"
+"DoubleContourIntegral;","U+0222F"
+"DoubleDot;","U+000A8"
+"DoubleDownArrow;","U+021D3"
+"DoubleLeftArrow;","U+021D0"
+"DoubleLeftRightArrow;","U+021D4"
+"DoubleLeftTee;","U+02AE4"
+"DoubleLongLeftArrow;","U+027F8"
+"DoubleLongLeftRightArrow;","U+027FA"
+"DoubleLongRightArrow;","U+027F9"
+"DoubleRightArrow;","U+021D2"
+"DoubleRightTee;","U+022A8"
+"DoubleUpArrow;","U+021D1"
+"DoubleUpDownArrow;","U+021D5"
+"DoubleVerticalBar;","U+02225"
+"DownArrow;","U+02193"
+"DownArrowBar;","U+02913"
+"DownArrowUpArrow;","U+021F5"
+"DownBreve;","U+00311"
+"DownLeftRightVector;","U+02950"
+"DownLeftTeeVector;","U+0295E"
+"DownLeftVector;","U+021BD"
+"DownLeftVectorBar;","U+02956"
+"DownRightTeeVector;","U+0295F"
+"DownRightVector;","U+021C1"
+"DownRightVectorBar;","U+02957"
+"DownTee;","U+022A4"
+"DownTeeArrow;","U+021A7"
+"Downarrow;","U+021D3"
+"Dscr;","U+1D49F"
+"Dstrok;","U+00110"
+"ENG;","U+0014A"
+"ETH;","U+000D0"
+"ETH","U+000D0"
+"Eacute;","U+000C9"
+"Eacute","U+000C9"
+"Ecaron;","U+0011A"
+"Ecirc;","U+000CA"
+"Ecirc","U+000CA"
+"Ecy;","U+0042D"
+"Edot;","U+00116"
+"Efr;","U+1D508"
+"Egrave;","U+000C8"
+"Egrave","U+000C8"
+"Element;","U+02208"
+"Emacr;","U+00112"
+"EmptySmallSquare;","U+025FB"
+"EmptyVerySmallSquare;","U+025AB"
+"Eogon;","U+00118"
+"Eopf;","U+1D53C"
+"Epsilon;","U+00395"
+"Equal;","U+02A75"
+"EqualTilde;","U+02242"
+"Equilibrium;","U+021CC"
+"Escr;","U+02130"
+"Esim;","U+02A73"
+"Eta;","U+00397"
+"Euml;","U+000CB"
+"Euml","U+000CB"
+"Exists;","U+02203"
+"ExponentialE;","U+02147"
+"Fcy;","U+00424"
+"Ffr;","U+1D509"
+"FilledSmallSquare;","U+025FC"
+"FilledVerySmallSquare;","U+025AA"
+"Fopf;","U+1D53D"
+"ForAll;","U+02200"
+"Fouriertrf;","U+02131"
+"Fscr;","U+02131"
+"GJcy;","U+00403"
+"GT;","U+0003E"
+"GT","U+0003E"
+"Gamma;","U+00393"
+"Gammad;","U+003DC"
+"Gbreve;","U+0011E"
+"Gcedil;","U+00122"
+"Gcirc;","U+0011C"
+"Gcy;","U+00413"
+"Gdot;","U+00120"
+"Gfr;","U+1D50A"
+"Gg;","U+022D9"
+"Gopf;","U+1D53E"
+"GreaterEqual;","U+02265"
+"GreaterEqualLess;","U+022DB"
+"GreaterFullEqual;","U+02267"
+"GreaterGreater;","U+02AA2"
+"GreaterLess;","U+02277"
+"GreaterSlantEqual;","U+02A7E"
+"GreaterTilde;","U+02273"
+"Gscr;","U+1D4A2"
+"Gt;","U+0226B"
+"HARDcy;","U+0042A"
+"Hacek;","U+002C7"
+"Hat;","U+0005E"
+"Hcirc;","U+00124"
+"Hfr;","U+0210C"
+"HilbertSpace;","U+0210B"
+"Hopf;","U+0210D"
+"HorizontalLine;","U+02500"
+"Hscr;","U+0210B"
+"Hstrok;","U+00126"
+"HumpDownHump;","U+0224E"
+"HumpEqual;","U+0224F"
+"IEcy;","U+00415"
+"IJlig;","U+00132"
+"IOcy;","U+00401"
+"Iacute;","U+000CD"
+"Iacute","U+000CD"
+"Icirc;","U+000CE"
+"Icirc","U+000CE"
+"Icy;","U+00418"
+"Idot;","U+00130"
+"Ifr;","U+02111"
+"Igrave;","U+000CC"
+"Igrave","U+000CC"
+"Im;","U+02111"
+"Imacr;","U+0012A"
+"ImaginaryI;","U+02148"
+"Implies;","U+021D2"
+"Int;","U+0222C"
+"Integral;","U+0222B"
+"Intersection;","U+022C2"
+"InvisibleComma;","U+02063"
+"InvisibleTimes;","U+02062"
+"Iogon;","U+0012E"
+"Iopf;","U+1D540"
+"Iota;","U+00399"
+"Iscr;","U+02110"
+"Itilde;","U+00128"
+"Iukcy;","U+00406"
+"Iuml;","U+000CF"
+"Iuml","U+000CF"
+"Jcirc;","U+00134"
+"Jcy;","U+00419"
+"Jfr;","U+1D50D"
+"Jopf;","U+1D541"
+"Jscr;","U+1D4A5"
+"Jsercy;","U+00408"
+"Jukcy;","U+00404"
+"KHcy;","U+00425"
+"KJcy;","U+0040C"
+"Kappa;","U+0039A"
+"Kcedil;","U+00136"
+"Kcy;","U+0041A"
+"Kfr;","U+1D50E"
+"Kopf;","U+1D542"
+"Kscr;","U+1D4A6"
+"LJcy;","U+00409"
+"LT;","U+0003C"
+"LT","U+0003C"
+"Lacute;","U+00139"
+"Lambda;","U+0039B"
+"Lang;","U+027EA"
+"Laplacetrf;","U+02112"
+"Larr;","U+0219E"
+"Lcaron;","U+0013D"
+"Lcedil;","U+0013B"
+"Lcy;","U+0041B"
+"LeftAngleBracket;","U+027E8"
+"LeftArrow;","U+02190"
+"LeftArrowBar;","U+021E4"
+"LeftArrowRightArrow;","U+021C6"
+"LeftCeiling;","U+02308"
+"LeftDoubleBracket;","U+027E6"
+"LeftDownTeeVector;","U+02961"
+"LeftDownVector;","U+021C3"
+"LeftDownVectorBar;","U+02959"
+"LeftFloor;","U+0230A"
+"LeftRightArrow;","U+02194"
+"LeftRightVector;","U+0294E"
+"LeftTee;","U+022A3"
+"LeftTeeArrow;","U+021A4"
+"LeftTeeVector;","U+0295A"
+"LeftTriangle;","U+022B2"
+"LeftTriangleBar;","U+029CF"
+"LeftTriangleEqual;","U+022B4"
+"LeftUpDownVector;","U+02951"
+"LeftUpTeeVector;","U+02960"
+"LeftUpVector;","U+021BF"
+"LeftUpVectorBar;","U+02958"
+"LeftVector;","U+021BC"
+"LeftVectorBar;","U+02952"
+"Leftarrow;","U+021D0"
+"Leftrightarrow;","U+021D4"
+"LessEqualGreater;","U+022DA"
+"LessFullEqual;","U+02266"
+"LessGreater;","U+02276"
+"LessLess;","U+02AA1"
+"LessSlantEqual;","U+02A7D"
+"LessTilde;","U+02272"
+"Lfr;","U+1D50F"
+"Ll;","U+022D8"
+"Lleftarrow;","U+021DA"
+"Lmidot;","U+0013F"
+"LongLeftArrow;","U+027F5"
+"LongLeftRightArrow;","U+027F7"
+"LongRightArrow;","U+027F6"
+"Longleftarrow;","U+027F8"
+"Longleftrightarrow;","U+027FA"
+"Longrightarrow;","U+027F9"
+"Lopf;","U+1D543"
+"LowerLeftArrow;","U+02199"
+"LowerRightArrow;","U+02198"
+"Lscr;","U+02112"
+"Lsh;","U+021B0"
+"Lstrok;","U+00141"
+"Lt;","U+0226A"
+"Map;","U+02905"
+"Mcy;","U+0041C"
+"MediumSpace;","U+0205F"
+"Mellintrf;","U+02133"
+"Mfr;","U+1D510"
+"MinusPlus;","U+02213"
+"Mopf;","U+1D544"
+"Mscr;","U+02133"
+"Mu;","U+0039C"
+"NJcy;","U+0040A"
+"Nacute;","U+00143"
+"Ncaron;","U+00147"
+"Ncedil;","U+00145"
+"Ncy;","U+0041D"
+"NegativeMediumSpace;","U+0200B"
+"NegativeThickSpace;","U+0200B"
+"NegativeThinSpace;","U+0200B"
+"NegativeVeryThinSpace;","U+0200B"
+"NestedGreaterGreater;","U+0226B"
+"NestedLessLess;","U+0226A"
+"NewLine;","U+0000A"
+"Nfr;","U+1D511"
+"NoBreak;","U+02060"
+"NonBreakingSpace;","U+000A0"
+"Nopf;","U+02115"
+"Not;","U+02AEC"
+"NotCongruent;","U+02262"
+"NotCupCap;","U+0226D"
+"NotDoubleVerticalBar;","U+02226"
+"NotElement;","U+02209"
+"NotEqual;","U+02260"
+"NotExists;","U+02204"
+"NotGreater;","U+0226F"
+"NotGreaterEqual;","U+02271"
+"NotGreaterLess;","U+02279"
+"NotGreaterTilde;","U+02275"
+"NotLeftTriangle;","U+022EA"
+"NotLeftTriangleEqual;","U+022EC"
+"NotLess;","U+0226E"
+"NotLessEqual;","U+02270"
+"NotLessGreater;","U+02278"
+"NotLessTilde;","U+02274"
+"NotPrecedes;","U+02280"
+"NotPrecedesSlantEqual;","U+022E0"
+"NotReverseElement;","U+0220C"
+"NotRightTriangle;","U+022EB"
+"NotRightTriangleEqual;","U+022ED"
+"NotSquareSubsetEqual;","U+022E2"
+"NotSquareSupersetEqual;","U+022E3"
+"NotSubsetEqual;","U+02288"
+"NotSucceeds;","U+02281"
+"NotSucceedsSlantEqual;","U+022E1"
+"NotSupersetEqual;","U+02289"
+"NotTilde;","U+02241"
+"NotTildeEqual;","U+02244"
+"NotTildeFullEqual;","U+02247"
+"NotTildeTilde;","U+02249"
+"NotVerticalBar;","U+02224"
+"Nscr;","U+1D4A9"
+"Ntilde;","U+000D1"
+"Ntilde","U+000D1"
+"Nu;","U+0039D"
+"OElig;","U+00152"
+"Oacute;","U+000D3"
+"Oacute","U+000D3"
+"Ocirc;","U+000D4"
+"Ocirc","U+000D4"
+"Ocy;","U+0041E"
+"Odblac;","U+00150"
+"Ofr;","U+1D512"
+"Ograve;","U+000D2"
+"Ograve","U+000D2"
+"Omacr;","U+0014C"
+"Omega;","U+003A9"
+"Omicron;","U+0039F"
+"Oopf;","U+1D546"
+"OpenCurlyDoubleQuote;","U+0201C"
+"OpenCurlyQuote;","U+02018"
+"Or;","U+02A54"
+"Oscr;","U+1D4AA"
+"Oslash;","U+000D8"
+"Oslash","U+000D8"
+"Otilde;","U+000D5"
+"Otilde","U+000D5"
+"Otimes;","U+02A37"
+"Ouml;","U+000D6"
+"Ouml","U+000D6"
+"OverBar;","U+0203E"
+"OverBrace;","U+023DE"
+"OverBracket;","U+023B4"
+"OverParenthesis;","U+023DC"
+"PartialD;","U+02202"
+"Pcy;","U+0041F"
+"Pfr;","U+1D513"
+"Phi;","U+003A6"
+"Pi;","U+003A0"
+"PlusMinus;","U+000B1"
+"Poincareplane;","U+0210C"
+"Popf;","U+02119"
+"Pr;","U+02ABB"
+"Precedes;","U+0227A"
+"PrecedesEqual;","U+02AAF"
+"PrecedesSlantEqual;","U+0227C"
+"PrecedesTilde;","U+0227E"
+"Prime;","U+02033"
+"Product;","U+0220F"
+"Proportion;","U+02237"
+"Proportional;","U+0221D"
+"Pscr;","U+1D4AB"
+"Psi;","U+003A8"
+"QUOT;","U+00022"
+"QUOT","U+00022"
+"Qfr;","U+1D514"
+"Qopf;","U+0211A"
+"Qscr;","U+1D4AC"
+"RBarr;","U+02910"
+"REG;","U+000AE"
+"REG","U+000AE"
+"Racute;","U+00154"
+"Rang;","U+027EB"
+"Rarr;","U+021A0"
+"Rarrtl;","U+02916"
+"Rcaron;","U+00158"
+"Rcedil;","U+00156"
+"Rcy;","U+00420"
+"Re;","U+0211C"
+"ReverseElement;","U+0220B"
+"ReverseEquilibrium;","U+021CB"
+"ReverseUpEquilibrium;","U+0296F"
+"Rfr;","U+0211C"
+"Rho;","U+003A1"
+"RightAngleBracket;","U+027E9"
+"RightArrow;","U+02192"
+"RightArrowBar;","U+021E5"
+"RightArrowLeftArrow;","U+021C4"
+"RightCeiling;","U+02309"
+"RightDoubleBracket;","U+027E7"
+"RightDownTeeVector;","U+0295D"
+"RightDownVector;","U+021C2"
+"RightDownVectorBar;","U+02955"
+"RightFloor;","U+0230B"
+"RightTee;","U+022A2"
+"RightTeeArrow;","U+021A6"
+"RightTeeVector;","U+0295B"
+"RightTriangle;","U+022B3"
+"RightTriangleBar;","U+029D0"
+"RightTriangleEqual;","U+022B5"
+"RightUpDownVector;","U+0294F"
+"RightUpTeeVector;","U+0295C"
+"RightUpVector;","U+021BE"
+"RightUpVectorBar;","U+02954"
+"RightVector;","U+021C0"
+"RightVectorBar;","U+02953"
+"Rightarrow;","U+021D2"
+"Ropf;","U+0211D"
+"RoundImplies;","U+02970"
+"Rrightarrow;","U+021DB"
+"Rscr;","U+0211B"
+"Rsh;","U+021B1"
+"RuleDelayed;","U+029F4"
+"SHCHcy;","U+00429"
+"SHcy;","U+00428"
+"SOFTcy;","U+0042C"
+"Sacute;","U+0015A"
+"Sc;","U+02ABC"
+"Scaron;","U+00160"
+"Scedil;","U+0015E"
+"Scirc;","U+0015C"
+"Scy;","U+00421"
+"Sfr;","U+1D516"
+"ShortDownArrow;","U+02193"
+"ShortLeftArrow;","U+02190"
+"ShortRightArrow;","U+02192"
+"ShortUpArrow;","U+02191"
+"Sigma;","U+003A3"
+"SmallCircle;","U+02218"
+"Sopf;","U+1D54A"
+"Sqrt;","U+0221A"
+"Square;","U+025A1"
+"SquareIntersection;","U+02293"
+"SquareSubset;","U+0228F"
+"SquareSubsetEqual;","U+02291"
+"SquareSuperset;","U+02290"
+"SquareSupersetEqual;","U+02292"
+"SquareUnion;","U+02294"
+"Sscr;","U+1D4AE"
+"Star;","U+022C6"
+"Sub;","U+022D0"
+"Subset;","U+022D0"
+"SubsetEqual;","U+02286"
+"Succeeds;","U+0227B"
+"SucceedsEqual;","U+02AB0"
+"SucceedsSlantEqual;","U+0227D"
+"SucceedsTilde;","U+0227F"
+"SuchThat;","U+0220B"
+"Sum;","U+02211"
+"Sup;","U+022D1"
+"Superset;","U+02283"
+"SupersetEqual;","U+02287"
+"Supset;","U+022D1"
+"THORN;","U+000DE"
+"THORN","U+000DE"
+"TRADE;","U+02122"
+"TSHcy;","U+0040B"
+"TScy;","U+00426"
+"Tab;","U+00009"
+"Tau;","U+003A4"
+"Tcaron;","U+00164"
+"Tcedil;","U+00162"
+"Tcy;","U+00422"
+"Tfr;","U+1D517"
+"Therefore;","U+02234"
+"Theta;","U+00398"
+"ThinSpace;","U+02009"
+"Tilde;","U+0223C"
+"TildeEqual;","U+02243"
+"TildeFullEqual;","U+02245"
+"TildeTilde;","U+02248"
+"Topf;","U+1D54B"
+"TripleDot;","U+020DB"
+"Tscr;","U+1D4AF"
+"Tstrok;","U+00166"
+"Uacute;","U+000DA"
+"Uacute","U+000DA"
+"Uarr;","U+0219F"
+"Uarrocir;","U+02949"
+"Ubrcy;","U+0040E"
+"Ubreve;","U+0016C"
+"Ucirc;","U+000DB"
+"Ucirc","U+000DB"
+"Ucy;","U+00423"
+"Udblac;","U+00170"
+"Ufr;","U+1D518"
+"Ugrave;","U+000D9"
+"Ugrave","U+000D9"
+"Umacr;","U+0016A"
+"UnderBar;","U+0005F"
+"UnderBrace;","U+023DF"
+"UnderBracket;","U+023B5"
+"UnderParenthesis;","U+023DD"
+"Union;","U+022C3"
+"UnionPlus;","U+0228E"
+"Uogon;","U+00172"
+"Uopf;","U+1D54C"
+"UpArrow;","U+02191"
+"UpArrowBar;","U+02912"
+"UpArrowDownArrow;","U+021C5"
+"UpDownArrow;","U+02195"
+"UpEquilibrium;","U+0296E"
+"UpTee;","U+022A5"
+"UpTeeArrow;","U+021A5"
+"Uparrow;","U+021D1"
+"Updownarrow;","U+021D5"
+"UpperLeftArrow;","U+02196"
+"UpperRightArrow;","U+02197"
+"Upsi;","U+003D2"
+"Upsilon;","U+003A5"
+"Uring;","U+0016E"
+"Uscr;","U+1D4B0"
+"Utilde;","U+00168"
+"Uuml;","U+000DC"
+"Uuml","U+000DC"
+"VDash;","U+022AB"
+"Vbar;","U+02AEB"
+"Vcy;","U+00412"
+"Vdash;","U+022A9"
+"Vdashl;","U+02AE6"
+"Vee;","U+022C1"
+"Verbar;","U+02016"
+"Vert;","U+02016"
+"VerticalBar;","U+02223"
+"VerticalLine;","U+0007C"
+"VerticalSeparator;","U+02758"
+"VerticalTilde;","U+02240"
+"VeryThinSpace;","U+0200A"
+"Vfr;","U+1D519"
+"Vopf;","U+1D54D"
+"Vscr;","U+1D4B1"
+"Vvdash;","U+022AA"
+"Wcirc;","U+00174"
+"Wedge;","U+022C0"
+"Wfr;","U+1D51A"
+"Wopf;","U+1D54E"
+"Wscr;","U+1D4B2"
+"Xfr;","U+1D51B"
+"Xi;","U+0039E"
+"Xopf;","U+1D54F"
+"Xscr;","U+1D4B3"
+"YAcy;","U+0042F"
+"YIcy;","U+00407"
+"YUcy;","U+0042E"
+"Yacute;","U+000DD"
+"Yacute","U+000DD"
+"Ycirc;","U+00176"
+"Ycy;","U+0042B"
+"Yfr;","U+1D51C"
+"Yopf;","U+1D550"
+"Yscr;","U+1D4B4"
+"Yuml;","U+00178"
+"ZHcy;","U+00416"
+"Zacute;","U+00179"
+"Zcaron;","U+0017D"
+"Zcy;","U+00417"
+"Zdot;","U+0017B"
+"ZeroWidthSpace;","U+0200B"
+"Zeta;","U+00396"
+"Zfr;","U+02128"
+"Zopf;","U+02124"
+"Zscr;","U+1D4B5"
+"aacute;","U+000E1"
+"aacute","U+000E1"
+"abreve;","U+00103"
+"ac;","U+0223E"
+"acd;","U+0223F"
+"acirc;","U+000E2"
+"acirc","U+000E2"
+"acute;","U+000B4"
+"acute","U+000B4"
+"acy;","U+00430"
+"aelig;","U+000E6"
+"aelig","U+000E6"
+"af;","U+02061"
+"afr;","U+1D51E"
+"agrave;","U+000E0"
+"agrave","U+000E0"
+"alefsym;","U+02135"
+"aleph;","U+02135"
+"alpha;","U+003B1"
+"amacr;","U+00101"
+"amalg;","U+02A3F"
+"amp;","U+00026"
+"amp","U+00026"
+"and;","U+02227"
+"andand;","U+02A55"
+"andd;","U+02A5C"
+"andslope;","U+02A58"
+"andv;","U+02A5A"
+"ang;","U+02220"
+"ange;","U+029A4"
+"angle;","U+02220"
+"angmsd;","U+02221"
+"angmsdaa;","U+029A8"
+"angmsdab;","U+029A9"
+"angmsdac;","U+029AA"
+"angmsdad;","U+029AB"
+"angmsdae;","U+029AC"
+"angmsdaf;","U+029AD"
+"angmsdag;","U+029AE"
+"angmsdah;","U+029AF"
+"angrt;","U+0221F"
+"angrtvb;","U+022BE"
+"angrtvbd;","U+0299D"
+"angsph;","U+02222"
+"angst;","U+000C5"
+"angzarr;","U+0237C"
+"aogon;","U+00105"
+"aopf;","U+1D552"
+"ap;","U+02248"
+"apE;","U+02A70"
+"apacir;","U+02A6F"
+"ape;","U+0224A"
+"apid;","U+0224B"
+"apos;","U+00027"
+"approx;","U+02248"
+"approxeq;","U+0224A"
+"aring;","U+000E5"
+"aring","U+000E5"
+"ascr;","U+1D4B6"
+"ast;","U+0002A"
+"asymp;","U+02248"
+"asympeq;","U+0224D"
+"atilde;","U+000E3"
+"atilde","U+000E3"
+"auml;","U+000E4"
+"auml","U+000E4"
+"awconint;","U+02233"
+"awint;","U+02A11"
+"bNot;","U+02AED"
+"backcong;","U+0224C"
+"backepsilon;","U+003F6"
+"backprime;","U+02035"
+"backsim;","U+0223D"
+"backsimeq;","U+022CD"
+"barvee;","U+022BD"
+"barwed;","U+02305"
+"barwedge;","U+02305"
+"bbrk;","U+023B5"
+"bbrktbrk;","U+023B6"
+"bcong;","U+0224C"
+"bcy;","U+00431"
+"bdquo;","U+0201E"
+"becaus;","U+02235"
+"because;","U+02235"
+"bemptyv;","U+029B0"
+"bepsi;","U+003F6"
+"bernou;","U+0212C"
+"beta;","U+003B2"
+"beth;","U+02136"
+"between;","U+0226C"
+"bfr;","U+1D51F"
+"bigcap;","U+022C2"
+"bigcirc;","U+025EF"
+"bigcup;","U+022C3"
+"bigodot;","U+02A00"
+"bigoplus;","U+02A01"
+"bigotimes;","U+02A02"
+"bigsqcup;","U+02A06"
+"bigstar;","U+02605"
+"bigtriangledown;","U+025BD"
+"bigtriangleup;","U+025B3"
+"biguplus;","U+02A04"
+"bigvee;","U+022C1"
+"bigwedge;","U+022C0"
+"bkarow;","U+0290D"
+"blacklozenge;","U+029EB"
+"blacksquare;","U+025AA"
+"blacktriangle;","U+025B4"
+"blacktriangledown;","U+025BE"
+"blacktriangleleft;","U+025C2"
+"blacktriangleright;","U+025B8"
+"blank;","U+02423"
+"blk12;","U+02592"
+"blk14;","U+02591"
+"blk34;","U+02593"
+"block;","U+02588"
+"bnot;","U+02310"
+"bopf;","U+1D553"
+"bot;","U+022A5"
+"bottom;","U+022A5"
+"bowtie;","U+022C8"
+"boxDL;","U+02557"
+"boxDR;","U+02554"
+"boxDl;","U+02556"
+"boxDr;","U+02553"
+"boxH;","U+02550"
+"boxHD;","U+02566"
+"boxHU;","U+02569"
+"boxHd;","U+02564"
+"boxHu;","U+02567"
+"boxUL;","U+0255D"
+"boxUR;","U+0255A"
+"boxUl;","U+0255C"
+"boxUr;","U+02559"
+"boxV;","U+02551"
+"boxVH;","U+0256C"
+"boxVL;","U+02563"
+"boxVR;","U+02560"
+"boxVh;","U+0256B"
+"boxVl;","U+02562"
+"boxVr;","U+0255F"
+"boxbox;","U+029C9"
+"boxdL;","U+02555"
+"boxdR;","U+02552"
+"boxdl;","U+02510"
+"boxdr;","U+0250C"
+"boxh;","U+02500"
+"boxhD;","U+02565"
+"boxhU;","U+02568"
+"boxhd;","U+0252C"
+"boxhu;","U+02534"
+"boxminus;","U+0229F"
+"boxplus;","U+0229E"
+"boxtimes;","U+022A0"
+"boxuL;","U+0255B"
+"boxuR;","U+02558"
+"boxul;","U+02518"
+"boxur;","U+02514"
+"boxv;","U+02502"
+"boxvH;","U+0256A"
+"boxvL;","U+02561"
+"boxvR;","U+0255E"
+"boxvh;","U+0253C"
+"boxvl;","U+02524"
+"boxvr;","U+0251C"
+"bprime;","U+02035"
+"breve;","U+002D8"
+"brvbar;","U+000A6"
+"brvbar","U+000A6"
+"bscr;","U+1D4B7"
+"bsemi;","U+0204F"
+"bsim;","U+0223D"
+"bsime;","U+022CD"
+"bsol;","U+0005C"
+"bsolb;","U+029C5"
+"bsolhsub;","U+027C8"
+"bull;","U+02022"
+"bullet;","U+02022"
+"bump;","U+0224E"
+"bumpE;","U+02AAE"
+"bumpe;","U+0224F"
+"bumpeq;","U+0224F"
+"cacute;","U+00107"
+"cap;","U+02229"
+"capand;","U+02A44"
+"capbrcup;","U+02A49"
+"capcap;","U+02A4B"
+"capcup;","U+02A47"
+"capdot;","U+02A40"
+"caret;","U+02041"
+"caron;","U+002C7"
+"ccaps;","U+02A4D"
+"ccaron;","U+0010D"
+"ccedil;","U+000E7"
+"ccedil","U+000E7"
+"ccirc;","U+00109"
+"ccups;","U+02A4C"
+"ccupssm;","U+02A50"
+"cdot;","U+0010B"
+"cedil;","U+000B8"
+"cedil","U+000B8"
+"cemptyv;","U+029B2"
+"cent;","U+000A2"
+"cent","U+000A2"
+"centerdot;","U+000B7"
+"cfr;","U+1D520"
+"chcy;","U+00447"
+"check;","U+02713"
+"checkmark;","U+02713"
+"chi;","U+003C7"
+"cir;","U+025CB"
+"cirE;","U+029C3"
+"circ;","U+002C6"
+"circeq;","U+02257"
+"circlearrowleft;","U+021BA"
+"circlearrowright;","U+021BB"
+"circledR;","U+000AE"
+"circledS;","U+024C8"
+"circledast;","U+0229B"
+"circledcirc;","U+0229A"
+"circleddash;","U+0229D"
+"cire;","U+02257"
+"cirfnint;","U+02A10"
+"cirmid;","U+02AEF"
+"cirscir;","U+029C2"
+"clubs;","U+02663"
+"clubsuit;","U+02663"
+"colon;","U+0003A"
+"colone;","U+02254"
+"coloneq;","U+02254"
+"comma;","U+0002C"
+"commat;","U+00040"
+"comp;","U+02201"
+"compfn;","U+02218"
+"complement;","U+02201"
+"complexes;","U+02102"
+"cong;","U+02245"
+"congdot;","U+02A6D"
+"conint;","U+0222E"
+"copf;","U+1D554"
+"coprod;","U+02210"
+"copy;","U+000A9"
+"copy","U+000A9"
+"copysr;","U+02117"
+"crarr;","U+021B5"
+"cross;","U+02717"
+"cscr;","U+1D4B8"
+"csub;","U+02ACF"
+"csube;","U+02AD1"
+"csup;","U+02AD0"
+"csupe;","U+02AD2"
+"ctdot;","U+022EF"
+"cudarrl;","U+02938"
+"cudarrr;","U+02935"
+"cuepr;","U+022DE"
+"cuesc;","U+022DF"
+"cularr;","U+021B6"
+"cularrp;","U+0293D"
+"cup;","U+0222A"
+"cupbrcap;","U+02A48"
+"cupcap;","U+02A46"
+"cupcup;","U+02A4A"
+"cupdot;","U+0228D"
+"cupor;","U+02A45"
+"curarr;","U+021B7"
+"curarrm;","U+0293C"
+"curlyeqprec;","U+022DE"
+"curlyeqsucc;","U+022DF"
+"curlyvee;","U+022CE"
+"curlywedge;","U+022CF"
+"curren;","U+000A4"
+"curren","U+000A4"
+"curvearrowleft;","U+021B6"
+"curvearrowright;","U+021B7"
+"cuvee;","U+022CE"
+"cuwed;","U+022CF"
+"cwconint;","U+02232"
+"cwint;","U+02231"
+"cylcty;","U+0232D"
+"dArr;","U+021D3"
+"dHar;","U+02965"
+"dagger;","U+02020"
+"daleth;","U+02138"
+"darr;","U+02193"
+"dash;","U+02010"
+"dashv;","U+022A3"
+"dbkarow;","U+0290F"
+"dblac;","U+002DD"
+"dcaron;","U+0010F"
+"dcy;","U+00434"
+"dd;","U+02146"
+"ddagger;","U+02021"
+"ddarr;","U+021CA"
+"ddotseq;","U+02A77"
+"deg;","U+000B0"
+"deg","U+000B0"
+"delta;","U+003B4"
+"demptyv;","U+029B1"
+"dfisht;","U+0297F"
+"dfr;","U+1D521"
+"dharl;","U+021C3"
+"dharr;","U+021C2"
+"diam;","U+022C4"
+"diamond;","U+022C4"
+"diamondsuit;","U+02666"
+"diams;","U+02666"
+"die;","U+000A8"
+"digamma;","U+003DD"
+"disin;","U+022F2"
+"div;","U+000F7"
+"divide;","U+000F7"
+"divide","U+000F7"
+"divideontimes;","U+022C7"
+"divonx;","U+022C7"
+"djcy;","U+00452"
+"dlcorn;","U+0231E"
+"dlcrop;","U+0230D"
+"dollar;","U+00024"
+"dopf;","U+1D555"
+"dot;","U+002D9"
+"doteq;","U+02250"
+"doteqdot;","U+02251"
+"dotminus;","U+02238"
+"dotplus;","U+02214"
+"dotsquare;","U+022A1"
+"doublebarwedge;","U+02306"
+"downarrow;","U+02193"
+"downdownarrows;","U+021CA"
+"downharpoonleft;","U+021C3"
+"downharpoonright;","U+021C2"
+"drbkarow;","U+02910"
+"drcorn;","U+0231F"
+"drcrop;","U+0230C"
+"dscr;","U+1D4B9"
+"dscy;","U+00455"
+"dsol;","U+029F6"
+"dstrok;","U+00111"
+"dtdot;","U+022F1"
+"dtri;","U+025BF"
+"dtrif;","U+025BE"
+"duarr;","U+021F5"
+"duhar;","U+0296F"
+"dwangle;","U+029A6"
+"dzcy;","U+0045F"
+"dzigrarr;","U+027FF"
+"eDDot;","U+02A77"
+"eDot;","U+02251"
+"eacute;","U+000E9"
+"eacute","U+000E9"
+"easter;","U+02A6E"
+"ecaron;","U+0011B"
+"ecir;","U+02256"
+"ecirc;","U+000EA"
+"ecirc","U+000EA"
+"ecolon;","U+02255"
+"ecy;","U+0044D"
+"edot;","U+00117"
+"ee;","U+02147"
+"efDot;","U+02252"
+"efr;","U+1D522"
+"eg;","U+02A9A"
+"egrave;","U+000E8"
+"egrave","U+000E8"
+"egs;","U+02A96"
+"egsdot;","U+02A98"
+"el;","U+02A99"
+"elinters;","U+023E7"
+"ell;","U+02113"
+"els;","U+02A95"
+"elsdot;","U+02A97"
+"emacr;","U+00113"
+"empty;","U+02205"
+"emptyset;","U+02205"
+"emptyv;","U+02205"
+"emsp13;","U+02004"
+"emsp14;","U+02005"
+"emsp;","U+02003"
+"eng;","U+0014B"
+"ensp;","U+02002"
+"eogon;","U+00119"
+"eopf;","U+1D556"
+"epar;","U+022D5"
+"eparsl;","U+029E3"
+"eplus;","U+02A71"
+"epsi;","U+003B5"
+"epsilon;","U+003B5"
+"epsiv;","U+003F5"
+"eqcirc;","U+02256"
+"eqcolon;","U+02255"
+"eqsim;","U+02242"
+"eqslantgtr;","U+02A96"
+"eqslantless;","U+02A95"
+"equals;","U+0003D"
+"equest;","U+0225F"
+"equiv;","U+02261"
+"equivDD;","U+02A78"
+"eqvparsl;","U+029E5"
+"erDot;","U+02253"
+"erarr;","U+02971"
+"escr;","U+0212F"
+"esdot;","U+02250"
+"esim;","U+02242"
+"eta;","U+003B7"
+"eth;","U+000F0"
+"eth","U+000F0"
+"euml;","U+000EB"
+"euml","U+000EB"
+"euro;","U+020AC"
+"excl;","U+00021"
+"exist;","U+02203"
+"expectation;","U+02130"
+"exponentiale;","U+02147"
+"fallingdotseq;","U+02252"
+"fcy;","U+00444"
+"female;","U+02640"
+"ffilig;","U+0FB03"
+"fflig;","U+0FB00"
+"ffllig;","U+0FB04"
+"ffr;","U+1D523"
+"filig;","U+0FB01"
+"flat;","U+0266D"
+"fllig;","U+0FB02"
+"fltns;","U+025B1"
+"fnof;","U+00192"
+"fopf;","U+1D557"
+"forall;","U+02200"
+"fork;","U+022D4"
+"forkv;","U+02AD9"
+"fpartint;","U+02A0D"
+"frac12;","U+000BD"
+"frac12","U+000BD"
+"frac13;","U+02153"
+"frac14;","U+000BC"
+"frac14","U+000BC"
+"frac15;","U+02155"
+"frac16;","U+02159"
+"frac18;","U+0215B"
+"frac23;","U+02154"
+"frac25;","U+02156"
+"frac34;","U+000BE"
+"frac34","U+000BE"
+"frac35;","U+02157"
+"frac38;","U+0215C"
+"frac45;","U+02158"
+"frac56;","U+0215A"
+"frac58;","U+0215D"
+"frac78;","U+0215E"
+"frasl;","U+02044"
+"frown;","U+02322"
+"fscr;","U+1D4BB"
+"gE;","U+02267"
+"gEl;","U+02A8C"
+"gacute;","U+001F5"
+"gamma;","U+003B3"
+"gammad;","U+003DD"
+"gap;","U+02A86"
+"gbreve;","U+0011F"
+"gcirc;","U+0011D"
+"gcy;","U+00433"
+"gdot;","U+00121"
+"ge;","U+02265"
+"gel;","U+022DB"
+"geq;","U+02265"
+"geqq;","U+02267"
+"geqslant;","U+02A7E"
+"ges;","U+02A7E"
+"gescc;","U+02AA9"
+"gesdot;","U+02A80"
+"gesdoto;","U+02A82"
+"gesdotol;","U+02A84"
+"gesles;","U+02A94"
+"gfr;","U+1D524"
+"gg;","U+0226B"
+"ggg;","U+022D9"
+"gimel;","U+02137"
+"gjcy;","U+00453"
+"gl;","U+02277"
+"glE;","U+02A92"
+"gla;","U+02AA5"
+"glj;","U+02AA4"
+"gnE;","U+02269"
+"gnap;","U+02A8A"
+"gnapprox;","U+02A8A"
+"gne;","U+02A88"
+"gneq;","U+02A88"
+"gneqq;","U+02269"
+"gnsim;","U+022E7"
+"gopf;","U+1D558"
+"grave;","U+00060"
+"gscr;","U+0210A"
+"gsim;","U+02273"
+"gsime;","U+02A8E"
+"gsiml;","U+02A90"
+"gt;","U+0003E"
+"gt","U+0003E"
+"gtcc;","U+02AA7"
+"gtcir;","U+02A7A"
+"gtdot;","U+022D7"
+"gtlPar;","U+02995"
+"gtquest;","U+02A7C"
+"gtrapprox;","U+02A86"
+"gtrarr;","U+02978"
+"gtrdot;","U+022D7"
+"gtreqless;","U+022DB"
+"gtreqqless;","U+02A8C"
+"gtrless;","U+02277"
+"gtrsim;","U+02273"
+"hArr;","U+021D4"
+"hairsp;","U+0200A"
+"half;","U+000BD"
+"hamilt;","U+0210B"
+"hardcy;","U+0044A"
+"harr;","U+02194"
+"harrcir;","U+02948"
+"harrw;","U+021AD"
+"hbar;","U+0210F"
+"hcirc;","U+00125"
+"hearts;","U+02665"
+"heartsuit;","U+02665"
+"hellip;","U+02026"
+"hercon;","U+022B9"
+"hfr;","U+1D525"
+"hksearow;","U+02925"
+"hkswarow;","U+02926"
+"hoarr;","U+021FF"
+"homtht;","U+0223B"
+"hookleftarrow;","U+021A9"
+"hookrightarrow;","U+021AA"
+"hopf;","U+1D559"
+"horbar;","U+02015"
+"hscr;","U+1D4BD"
+"hslash;","U+0210F"
+"hstrok;","U+00127"
+"hybull;","U+02043"
+"hyphen;","U+02010"
+"iacute;","U+000ED"
+"iacute","U+000ED"
+"ic;","U+02063"
+"icirc;","U+000EE"
+"icirc","U+000EE"
+"icy;","U+00438"
+"iecy;","U+00435"
+"iexcl;","U+000A1"
+"iexcl","U+000A1"
+"iff;","U+021D4"
+"ifr;","U+1D526"
+"igrave;","U+000EC"
+"igrave","U+000EC"
+"ii;","U+02148"
+"iiiint;","U+02A0C"
+"iiint;","U+0222D"
+"iinfin;","U+029DC"
+"iiota;","U+02129"
+"ijlig;","U+00133"
+"imacr;","U+0012B"
+"image;","U+02111"
+"imagline;","U+02110"
+"imagpart;","U+02111"
+"imath;","U+00131"
+"imof;","U+022B7"
+"imped;","U+001B5"
+"in;","U+02208"
+"incare;","U+02105"
+"infin;","U+0221E"
+"infintie;","U+029DD"
+"inodot;","U+00131"
+"int;","U+0222B"
+"intcal;","U+022BA"
+"integers;","U+02124"
+"intercal;","U+022BA"
+"intlarhk;","U+02A17"
+"intprod;","U+02A3C"
+"iocy;","U+00451"
+"iogon;","U+0012F"
+"iopf;","U+1D55A"
+"iota;","U+003B9"
+"iprod;","U+02A3C"
+"iquest;","U+000BF"
+"iquest","U+000BF"
+"iscr;","U+1D4BE"
+"isin;","U+02208"
+"isinE;","U+022F9"
+"isindot;","U+022F5"
+"isins;","U+022F4"
+"isinsv;","U+022F3"
+"isinv;","U+02208"
+"it;","U+02062"
+"itilde;","U+00129"
+"iukcy;","U+00456"
+"iuml;","U+000EF"
+"iuml","U+000EF"
+"jcirc;","U+00135"
+"jcy;","U+00439"
+"jfr;","U+1D527"
+"jmath;","U+00237"
+"jopf;","U+1D55B"
+"jscr;","U+1D4BF"
+"jsercy;","U+00458"
+"jukcy;","U+00454"
+"kappa;","U+003BA"
+"kappav;","U+003F0"
+"kcedil;","U+00137"
+"kcy;","U+0043A"
+"kfr;","U+1D528"
+"kgreen;","U+00138"
+"khcy;","U+00445"
+"kjcy;","U+0045C"
+"kopf;","U+1D55C"
+"kscr;","U+1D4C0"
+"lAarr;","U+021DA"
+"lArr;","U+021D0"
+"lAtail;","U+0291B"
+"lBarr;","U+0290E"
+"lE;","U+02266"
+"lEg;","U+02A8B"
+"lHar;","U+02962"
+"lacute;","U+0013A"
+"laemptyv;","U+029B4"
+"lagran;","U+02112"
+"lambda;","U+003BB"
+"lang;","U+027E8"
+"langd;","U+02991"
+"langle;","U+027E8"
+"lap;","U+02A85"
+"laquo;","U+000AB"
+"laquo","U+000AB"
+"larr;","U+02190"
+"larrb;","U+021E4"
+"larrbfs;","U+0291F"
+"larrfs;","U+0291D"
+"larrhk;","U+021A9"
+"larrlp;","U+021AB"
+"larrpl;","U+02939"
+"larrsim;","U+02973"
+"larrtl;","U+021A2"
+"lat;","U+02AAB"
+"latail;","U+02919"
+"late;","U+02AAD"
+"lbarr;","U+0290C"
+"lbbrk;","U+02772"
+"lbrace;","U+0007B"
+"lbrack;","U+0005B"
+"lbrke;","U+0298B"
+"lbrksld;","U+0298F"
+"lbrkslu;","U+0298D"
+"lcaron;","U+0013E"
+"lcedil;","U+0013C"
+"lceil;","U+02308"
+"lcub;","U+0007B"
+"lcy;","U+0043B"
+"ldca;","U+02936"
+"ldquo;","U+0201C"
+"ldquor;","U+0201E"
+"ldrdhar;","U+02967"
+"ldrushar;","U+0294B"
+"ldsh;","U+021B2"
+"le;","U+02264"
+"leftarrow;","U+02190"
+"leftarrowtail;","U+021A2"
+"leftharpoondown;","U+021BD"
+"leftharpoonup;","U+021BC"
+"leftleftarrows;","U+021C7"
+"leftrightarrow;","U+02194"
+"leftrightarrows;","U+021C6"
+"leftrightharpoons;","U+021CB"
+"leftrightsquigarrow;","U+021AD"
+"leftthreetimes;","U+022CB"
+"leg;","U+022DA"
+"leq;","U+02264"
+"leqq;","U+02266"
+"leqslant;","U+02A7D"
+"les;","U+02A7D"
+"lescc;","U+02AA8"
+"lesdot;","U+02A7F"
+"lesdoto;","U+02A81"
+"lesdotor;","U+02A83"
+"lesges;","U+02A93"
+"lessapprox;","U+02A85"
+"lessdot;","U+022D6"
+"lesseqgtr;","U+022DA"
+"lesseqqgtr;","U+02A8B"
+"lessgtr;","U+02276"
+"lesssim;","U+02272"
+"lfisht;","U+0297C"
+"lfloor;","U+0230A"
+"lfr;","U+1D529"
+"lg;","U+02276"
+"lgE;","U+02A91"
+"lhard;","U+021BD"
+"lharu;","U+021BC"
+"lharul;","U+0296A"
+"lhblk;","U+02584"
+"ljcy;","U+00459"
+"ll;","U+0226A"
+"llarr;","U+021C7"
+"llcorner;","U+0231E"
+"llhard;","U+0296B"
+"lltri;","U+025FA"
+"lmidot;","U+00140"
+"lmoust;","U+023B0"
+"lmoustache;","U+023B0"
+"lnE;","U+02268"
+"lnap;","U+02A89"
+"lnapprox;","U+02A89"
+"lne;","U+02A87"
+"lneq;","U+02A87"
+"lneqq;","U+02268"
+"lnsim;","U+022E6"
+"loang;","U+027EC"
+"loarr;","U+021FD"
+"lobrk;","U+027E6"
+"longleftarrow;","U+027F5"
+"longleftrightarrow;","U+027F7"
+"longmapsto;","U+027FC"
+"longrightarrow;","U+027F6"
+"looparrowleft;","U+021AB"
+"looparrowright;","U+021AC"
+"lopar;","U+02985"
+"lopf;","U+1D55D"
+"loplus;","U+02A2D"
+"lotimes;","U+02A34"
+"lowast;","U+02217"
+"lowbar;","U+0005F"
+"loz;","U+025CA"
+"lozenge;","U+025CA"
+"lozf;","U+029EB"
+"lpar;","U+00028"
+"lparlt;","U+02993"
+"lrarr;","U+021C6"
+"lrcorner;","U+0231F"
+"lrhar;","U+021CB"
+"lrhard;","U+0296D"
+"lrm;","U+0200E"
+"lrtri;","U+022BF"
+"lsaquo;","U+02039"
+"lscr;","U+1D4C1"
+"lsh;","U+021B0"
+"lsim;","U+02272"
+"lsime;","U+02A8D"
+"lsimg;","U+02A8F"
+"lsqb;","U+0005B"
+"lsquo;","U+02018"
+"lsquor;","U+0201A"
+"lstrok;","U+00142"
+"lt;","U+0003C"
+"lt","U+0003C"
+"ltcc;","U+02AA6"
+"ltcir;","U+02A79"
+"ltdot;","U+022D6"
+"lthree;","U+022CB"
+"ltimes;","U+022C9"
+"ltlarr;","U+02976"
+"ltquest;","U+02A7B"
+"ltrPar;","U+02996"
+"ltri;","U+025C3"
+"ltrie;","U+022B4"
+"ltrif;","U+025C2"
+"lurdshar;","U+0294A"
+"luruhar;","U+02966"
+"mDDot;","U+0223A"
+"macr;","U+000AF"
+"macr","U+000AF"
+"male;","U+02642"
+"malt;","U+02720"
+"maltese;","U+02720"
+"map;","U+021A6"
+"mapsto;","U+021A6"
+"mapstodown;","U+021A7"
+"mapstoleft;","U+021A4"
+"mapstoup;","U+021A5"
+"marker;","U+025AE"
+"mcomma;","U+02A29"
+"mcy;","U+0043C"
+"mdash;","U+02014"
+"measuredangle;","U+02221"
+"mfr;","U+1D52A"
+"mho;","U+02127"
+"micro;","U+000B5"
+"micro","U+000B5"
+"mid;","U+02223"
+"midast;","U+0002A"
+"midcir;","U+02AF0"
+"middot;","U+000B7"
+"middot","U+000B7"
+"minus;","U+02212"
+"minusb;","U+0229F"
+"minusd;","U+02238"
+"minusdu;","U+02A2A"
+"mlcp;","U+02ADB"
+"mldr;","U+02026"
+"mnplus;","U+02213"
+"models;","U+022A7"
+"mopf;","U+1D55E"
+"mp;","U+02213"
+"mscr;","U+1D4C2"
+"mstpos;","U+0223E"
+"mu;","U+003BC"
+"multimap;","U+022B8"
+"mumap;","U+022B8"
+"nLeftarrow;","U+021CD"
+"nLeftrightarrow;","U+021CE"
+"nRightarrow;","U+021CF"
+"nVDash;","U+022AF"
+"nVdash;","U+022AE"
+"nabla;","U+02207"
+"nacute;","U+00144"
+"nap;","U+02249"
+"napos;","U+00149"
+"napprox;","U+02249"
+"natur;","U+0266E"
+"natural;","U+0266E"
+"naturals;","U+02115"
+"nbsp;","U+000A0"
+"nbsp","U+000A0"
+"ncap;","U+02A43"
+"ncaron;","U+00148"
+"ncedil;","U+00146"
+"ncong;","U+02247"
+"ncup;","U+02A42"
+"ncy;","U+0043D"
+"ndash;","U+02013"
+"ne;","U+02260"
+"neArr;","U+021D7"
+"nearhk;","U+02924"
+"nearr;","U+02197"
+"nearrow;","U+02197"
+"nequiv;","U+02262"
+"nesear;","U+02928"
+"nexist;","U+02204"
+"nexists;","U+02204"
+"nfr;","U+1D52B"
+"nge;","U+02271"
+"ngeq;","U+02271"
+"ngsim;","U+02275"
+"ngt;","U+0226F"
+"ngtr;","U+0226F"
+"nhArr;","U+021CE"
+"nharr;","U+021AE"
+"nhpar;","U+02AF2"
+"ni;","U+0220B"
+"nis;","U+022FC"
+"nisd;","U+022FA"
+"niv;","U+0220B"
+"njcy;","U+0045A"
+"nlArr;","U+021CD"
+"nlarr;","U+0219A"
+"nldr;","U+02025"
+"nle;","U+02270"
+"nleftarrow;","U+0219A"
+"nleftrightarrow;","U+021AE"
+"nleq;","U+02270"
+"nless;","U+0226E"
+"nlsim;","U+02274"
+"nlt;","U+0226E"
+"nltri;","U+022EA"
+"nltrie;","U+022EC"
+"nmid;","U+02224"
+"nopf;","U+1D55F"
+"not;","U+000AC"
+"not","U+000AC"
+"notin;","U+02209"
+"notinva;","U+02209"
+"notinvb;","U+022F7"
+"notinvc;","U+022F6"
+"notni;","U+0220C"
+"notniva;","U+0220C"
+"notnivb;","U+022FE"
+"notnivc;","U+022FD"
+"npar;","U+02226"
+"nparallel;","U+02226"
+"npolint;","U+02A14"
+"npr;","U+02280"
+"nprcue;","U+022E0"
+"nprec;","U+02280"
+"nrArr;","U+021CF"
+"nrarr;","U+0219B"
+"nrightarrow;","U+0219B"
+"nrtri;","U+022EB"
+"nrtrie;","U+022ED"
+"nsc;","U+02281"
+"nsccue;","U+022E1"
+"nscr;","U+1D4C3"
+"nshortmid;","U+02224"
+"nshortparallel;","U+02226"
+"nsim;","U+02241"
+"nsime;","U+02244"
+"nsimeq;","U+02244"
+"nsmid;","U+02224"
+"nspar;","U+02226"
+"nsqsube;","U+022E2"
+"nsqsupe;","U+022E3"
+"nsub;","U+02284"
+"nsube;","U+02288"
+"nsubseteq;","U+02288"
+"nsucc;","U+02281"
+"nsup;","U+02285"
+"nsupe;","U+02289"
+"nsupseteq;","U+02289"
+"ntgl;","U+02279"
+"ntilde;","U+000F1"
+"ntilde","U+000F1"
+"ntlg;","U+02278"
+"ntriangleleft;","U+022EA"
+"ntrianglelefteq;","U+022EC"
+"ntriangleright;","U+022EB"
+"ntrianglerighteq;","U+022ED"
+"nu;","U+003BD"
+"num;","U+00023"
+"numero;","U+02116"
+"numsp;","U+02007"
+"nvDash;","U+022AD"
+"nvHarr;","U+02904"
+"nvdash;","U+022AC"
+"nvinfin;","U+029DE"
+"nvlArr;","U+02902"
+"nvrArr;","U+02903"
+"nwArr;","U+021D6"
+"nwarhk;","U+02923"
+"nwarr;","U+02196"
+"nwarrow;","U+02196"
+"nwnear;","U+02927"
+"oS;","U+024C8"
+"oacute;","U+000F3"
+"oacute","U+000F3"
+"oast;","U+0229B"
+"ocir;","U+0229A"
+"ocirc;","U+000F4"
+"ocirc","U+000F4"
+"ocy;","U+0043E"
+"odash;","U+0229D"
+"odblac;","U+00151"
+"odiv;","U+02A38"
+"odot;","U+02299"
+"odsold;","U+029BC"
+"oelig;","U+00153"
+"ofcir;","U+029BF"
+"ofr;","U+1D52C"
+"ogon;","U+002DB"
+"ograve;","U+000F2"
+"ograve","U+000F2"
+"ogt;","U+029C1"
+"ohbar;","U+029B5"
+"ohm;","U+003A9"
+"oint;","U+0222E"
+"olarr;","U+021BA"
+"olcir;","U+029BE"
+"olcross;","U+029BB"
+"oline;","U+0203E"
+"olt;","U+029C0"
+"omacr;","U+0014D"
+"omega;","U+003C9"
+"omicron;","U+003BF"
+"omid;","U+029B6"
+"ominus;","U+02296"
+"oopf;","U+1D560"
+"opar;","U+029B7"
+"operp;","U+029B9"
+"oplus;","U+02295"
+"or;","U+02228"
+"orarr;","U+021BB"
+"ord;","U+02A5D"
+"order;","U+02134"
+"orderof;","U+02134"
+"ordf;","U+000AA"
+"ordf","U+000AA"
+"ordm;","U+000BA"
+"ordm","U+000BA"
+"origof;","U+022B6"
+"oror;","U+02A56"
+"orslope;","U+02A57"
+"orv;","U+02A5B"
+"oscr;","U+02134"
+"oslash;","U+000F8"
+"oslash","U+000F8"
+"osol;","U+02298"
+"otilde;","U+000F5"
+"otilde","U+000F5"
+"otimes;","U+02297"
+"otimesas;","U+02A36"
+"ouml;","U+000F6"
+"ouml","U+000F6"
+"ovbar;","U+0233D"
+"par;","U+02225"
+"para;","U+000B6"
+"para","U+000B6"
+"parallel;","U+02225"
+"parsim;","U+02AF3"
+"parsl;","U+02AFD"
+"part;","U+02202"
+"pcy;","U+0043F"
+"percnt;","U+00025"
+"period;","U+0002E"
+"permil;","U+02030"
+"perp;","U+022A5"
+"pertenk;","U+02031"
+"pfr;","U+1D52D"
+"phi;","U+003C6"
+"phiv;","U+003D5"
+"phmmat;","U+02133"
+"phone;","U+0260E"
+"pi;","U+003C0"
+"pitchfork;","U+022D4"
+"piv;","U+003D6"
+"planck;","U+0210F"
+"planckh;","U+0210E"
+"plankv;","U+0210F"
+"plus;","U+0002B"
+"plusacir;","U+02A23"
+"plusb;","U+0229E"
+"pluscir;","U+02A22"
+"plusdo;","U+02214"
+"plusdu;","U+02A25"
+"pluse;","U+02A72"
+"plusmn;","U+000B1"
+"plusmn","U+000B1"
+"plussim;","U+02A26"
+"plustwo;","U+02A27"
+"pm;","U+000B1"
+"pointint;","U+02A15"
+"popf;","U+1D561"
+"pound;","U+000A3"
+"pound","U+000A3"
+"pr;","U+0227A"
+"prE;","U+02AB3"
+"prap;","U+02AB7"
+"prcue;","U+0227C"
+"pre;","U+02AAF"
+"prec;","U+0227A"
+"precapprox;","U+02AB7"
+"preccurlyeq;","U+0227C"
+"preceq;","U+02AAF"
+"precnapprox;","U+02AB9"
+"precneqq;","U+02AB5"
+"precnsim;","U+022E8"
+"precsim;","U+0227E"
+"prime;","U+02032"
+"primes;","U+02119"
+"prnE;","U+02AB5"
+"prnap;","U+02AB9"
+"prnsim;","U+022E8"
+"prod;","U+0220F"
+"profalar;","U+0232E"
+"profline;","U+02312"
+"profsurf;","U+02313"
+"prop;","U+0221D"
+"propto;","U+0221D"
+"prsim;","U+0227E"
+"prurel;","U+022B0"
+"pscr;","U+1D4C5"
+"psi;","U+003C8"
+"puncsp;","U+02008"
+"qfr;","U+1D52E"
+"qint;","U+02A0C"
+"qopf;","U+1D562"
+"qprime;","U+02057"
+"qscr;","U+1D4C6"
+"quaternions;","U+0210D"
+"quatint;","U+02A16"
+"quest;","U+0003F"
+"questeq;","U+0225F"
+"quot;","U+00022"
+"quot","U+00022"
+"rAarr;","U+021DB"
+"rArr;","U+021D2"
+"rAtail;","U+0291C"
+"rBarr;","U+0290F"
+"rHar;","U+02964"
+"racute;","U+00155"
+"radic;","U+0221A"
+"raemptyv;","U+029B3"
+"rang;","U+027E9"
+"rangd;","U+02992"
+"range;","U+029A5"
+"rangle;","U+027E9"
+"raquo;","U+000BB"
+"raquo","U+000BB"
+"rarr;","U+02192"
+"rarrap;","U+02975"
+"rarrb;","U+021E5"
+"rarrbfs;","U+02920"
+"rarrc;","U+02933"
+"rarrfs;","U+0291E"
+"rarrhk;","U+021AA"
+"rarrlp;","U+021AC"
+"rarrpl;","U+02945"
+"rarrsim;","U+02974"
+"rarrtl;","U+021A3"
+"rarrw;","U+0219D"
+"ratail;","U+0291A"
+"ratio;","U+02236"
+"rationals;","U+0211A"
+"rbarr;","U+0290D"
+"rbbrk;","U+02773"
+"rbrace;","U+0007D"
+"rbrack;","U+0005D"
+"rbrke;","U+0298C"
+"rbrksld;","U+0298E"
+"rbrkslu;","U+02990"
+"rcaron;","U+00159"
+"rcedil;","U+00157"
+"rceil;","U+02309"
+"rcub;","U+0007D"
+"rcy;","U+00440"
+"rdca;","U+02937"
+"rdldhar;","U+02969"
+"rdquo;","U+0201D"
+"rdquor;","U+0201D"
+"rdsh;","U+021B3"
+"real;","U+0211C"
+"realine;","U+0211B"
+"realpart;","U+0211C"
+"reals;","U+0211D"
+"rect;","U+025AD"
+"reg;","U+000AE"
+"reg","U+000AE"
+"rfisht;","U+0297D"
+"rfloor;","U+0230B"
+"rfr;","U+1D52F"
+"rhard;","U+021C1"
+"rharu;","U+021C0"
+"rharul;","U+0296C"
+"rho;","U+003C1"
+"rhov;","U+003F1"
+"rightarrow;","U+02192"
+"rightarrowtail;","U+021A3"
+"rightharpoondown;","U+021C1"
+"rightharpoonup;","U+021C0"
+"rightleftarrows;","U+021C4"
+"rightleftharpoons;","U+021CC"
+"rightrightarrows;","U+021C9"
+"rightsquigarrow;","U+0219D"
+"rightthreetimes;","U+022CC"
+"ring;","U+002DA"
+"risingdotseq;","U+02253"
+"rlarr;","U+021C4"
+"rlhar;","U+021CC"
+"rlm;","U+0200F"
+"rmoust;","U+023B1"
+"rmoustache;","U+023B1"
+"rnmid;","U+02AEE"
+"roang;","U+027ED"
+"roarr;","U+021FE"
+"robrk;","U+027E7"
+"ropar;","U+02986"
+"ropf;","U+1D563"
+"roplus;","U+02A2E"
+"rotimes;","U+02A35"
+"rpar;","U+00029"
+"rpargt;","U+02994"
+"rppolint;","U+02A12"
+"rrarr;","U+021C9"
+"rsaquo;","U+0203A"
+"rscr;","U+1D4C7"
+"rsh;","U+021B1"
+"rsqb;","U+0005D"
+"rsquo;","U+02019"
+"rsquor;","U+02019"
+"rthree;","U+022CC"
+"rtimes;","U+022CA"
+"rtri;","U+025B9"
+"rtrie;","U+022B5"
+"rtrif;","U+025B8"
+"rtriltri;","U+029CE"
+"ruluhar;","U+02968"
+"rx;","U+0211E"
+"sacute;","U+0015B"
+"sbquo;","U+0201A"
+"sc;","U+0227B"
+"scE;","U+02AB4"
+"scap;","U+02AB8"
+"scaron;","U+00161"
+"sccue;","U+0227D"
+"sce;","U+02AB0"
+"scedil;","U+0015F"
+"scirc;","U+0015D"
+"scnE;","U+02AB6"
+"scnap;","U+02ABA"
+"scnsim;","U+022E9"
+"scpolint;","U+02A13"
+"scsim;","U+0227F"
+"scy;","U+00441"
+"sdot;","U+022C5"
+"sdotb;","U+022A1"
+"sdote;","U+02A66"
+"seArr;","U+021D8"
+"searhk;","U+02925"
+"searr;","U+02198"
+"searrow;","U+02198"
+"sect;","U+000A7"
+"sect","U+000A7"
+"semi;","U+0003B"
+"seswar;","U+02929"
+"setminus;","U+02216"
+"setmn;","U+02216"
+"sext;","U+02736"
+"sfr;","U+1D530"
+"sfrown;","U+02322"
+"sharp;","U+0266F"
+"shchcy;","U+00449"
+"shcy;","U+00448"
+"shortmid;","U+02223"
+"shortparallel;","U+02225"
+"shy;","U+000AD  "
+"shy","U+000AD "
+"sigma;","U+003C3"
+"sigmaf;","U+003C2"
+"sigmav;","U+003C2"
+"sim;","U+0223C"
+"simdot;","U+02A6A"
+"sime;","U+02243"
+"simeq;","U+02243"
+"simg;","U+02A9E"
+"simgE;","U+02AA0"
+"siml;","U+02A9D"
+"simlE;","U+02A9F"
+"simne;","U+02246"
+"simplus;","U+02A24"
+"simrarr;","U+02972"
+"slarr;","U+02190"
+"smallsetminus;","U+02216"
+"smashp;","U+02A33"
+"smeparsl;","U+029E4"
+"smid;","U+02223"
+"smile;","U+02323"
+"smt;","U+02AAA"
+"smte;","U+02AAC"
+"softcy;","U+0044C"
+"sol;","U+0002F"
+"solb;","U+029C4"
+"solbar;","U+0233F"
+"sopf;","U+1D564"
+"spades;","U+02660"
+"spadesuit;","U+02660"
+"spar;","U+02225"
+"sqcap;","U+02293"
+"sqcup;","U+02294"
+"sqsub;","U+0228F"
+"sqsube;","U+02291"
+"sqsubset;","U+0228F"
+"sqsubseteq;","U+02291"
+"sqsup;","U+02290"
+"sqsupe;","U+02292"
+"sqsupset;","U+02290"
+"sqsupseteq;","U+02292"
+"squ;","U+025A1"
+"square;","U+025A1"
+"squarf;","U+025AA"
+"squf;","U+025AA"
+"srarr;","U+02192"
+"sscr;","U+1D4C8"
+"ssetmn;","U+02216"
+"ssmile;","U+02323"
+"sstarf;","U+022C6"
+"star;","U+02606"
+"starf;","U+02605"
+"straightepsilon;","U+003F5"
+"straightphi;","U+003D5"
+"strns;","U+000AF"
+"sub;","U+02282"
+"subE;","U+02AC5"
+"subdot;","U+02ABD"
+"sube;","U+02286"
+"subedot;","U+02AC3"
+"submult;","U+02AC1"
+"subnE;","U+02ACB"
+"subne;","U+0228A"
+"subplus;","U+02ABF"
+"subrarr;","U+02979"
+"subset;","U+02282"
+"subseteq;","U+02286"
+"subseteqq;","U+02AC5"
+"subsetneq;","U+0228A"
+"subsetneqq;","U+02ACB"
+"subsim;","U+02AC7"
+"subsub;","U+02AD5"
+"subsup;","U+02AD3"
+"succ;","U+0227B"
+"succapprox;","U+02AB8"
+"succcurlyeq;","U+0227D"
+"succeq;","U+02AB0"
+"succnapprox;","U+02ABA"
+"succneqq;","U+02AB6"
+"succnsim;","U+022E9"
+"succsim;","U+0227F"
+"sum;","U+02211"
+"sung;","U+0266A"
+"sup1;","U+000B9"
+"sup1","U+000B9"
+"sup2;","U+000B2"
+"sup2","U+000B2"
+"sup3;","U+000B3"
+"sup3","U+000B3"
+"sup;","U+02283"
+"supE;","U+02AC6"
+"supdot;","U+02ABE"
+"supdsub;","U+02AD8"
+"supe;","U+02287"
+"supedot;","U+02AC4"
+"suphsol;","U+027C9"
+"suphsub;","U+02AD7"
+"suplarr;","U+0297B"
+"supmult;","U+02AC2"
+"supnE;","U+02ACC"
+"supne;","U+0228B"
+"supplus;","U+02AC0"
+"supset;","U+02283"
+"supseteq;","U+02287"
+"supseteqq;","U+02AC6"
+"supsetneq;","U+0228B"
+"supsetneqq;","U+02ACC"
+"supsim;","U+02AC8"
+"supsub;","U+02AD4"
+"supsup;","U+02AD6"
+"swArr;","U+021D9"
+"swarhk;","U+02926"
+"swarr;","U+02199"
+"swarrow;","U+02199"
+"swnwar;","U+0292A"
+"szlig;","U+000DF"
+"szlig","U+000DF"
+"target;","U+02316"
+"tau;","U+003C4"
+"tbrk;","U+023B4"
+"tcaron;","U+00165"
+"tcedil;","U+00163"
+"tcy;","U+00442"
+"tdot;","U+020DB"
+"telrec;","U+02315"
+"tfr;","U+1D531"
+"there4;","U+02234"
+"therefore;","U+02234"
+"theta;","U+003B8"
+"thetasym;","U+003D1"
+"thetav;","U+003D1"
+"thickapprox;","U+02248"
+"thicksim;","U+0223C"
+"thinsp;","U+02009"
+"thkap;","U+02248"
+"thksim;","U+0223C"
+"thorn;","U+000FE"
+"thorn","U+000FE"
+"tilde;","U+002DC"
+"times;","U+000D7"
+"times","U+000D7"
+"timesb;","U+022A0"
+"timesbar;","U+02A31"
+"timesd;","U+02A30"
+"tint;","U+0222D"
+"toea;","U+02928"
+"top;","U+022A4"
+"topbot;","U+02336"
+"topcir;","U+02AF1"
+"topf;","U+1D565"
+"topfork;","U+02ADA"
+"tosa;","U+02929"
+"tprime;","U+02034"
+"trade;","U+02122"
+"triangle;","U+025B5"
+"triangledown;","U+025BF"
+"triangleleft;","U+025C3"
+"trianglelefteq;","U+022B4"
+"triangleq;","U+0225C"
+"triangleright;","U+025B9"
+"trianglerighteq;","U+022B5"
+"tridot;","U+025EC"
+"trie;","U+0225C"
+"triminus;","U+02A3A"
+"triplus;","U+02A39"
+"trisb;","U+029CD"
+"tritime;","U+02A3B"
+"trpezium;","U+023E2"
+"tscr;","U+1D4C9"
+"tscy;","U+00446"
+"tshcy;","U+0045B"
+"tstrok;","U+00167"
+"twixt;","U+0226C"
+"twoheadleftarrow;","U+0219E"
+"twoheadrightarrow;","U+021A0"
+"uArr;","U+021D1"
+"uHar;","U+02963"
+"uacute;","U+000FA"
+"uacute","U+000FA"
+"uarr;","U+02191"
+"ubrcy;","U+0045E"
+"ubreve;","U+0016D"
+"ucirc;","U+000FB"
+"ucirc","U+000FB"
+"ucy;","U+00443"
+"udarr;","U+021C5"
+"udblac;","U+00171"
+"udhar;","U+0296E"
+"ufisht;","U+0297E"
+"ufr;","U+1D532"
+"ugrave;","U+000F9"
+"ugrave","U+000F9"
+"uharl;","U+021BF"
+"uharr;","U+021BE"
+"uhblk;","U+02580"
+"ulcorn;","U+0231C"
+"ulcorner;","U+0231C"
+"ulcrop;","U+0230F"
+"ultri;","U+025F8"
+"umacr;","U+0016B"
+"uml;","U+000A8"
+"uml","U+000A8"
+"uogon;","U+00173"
+"uopf;","U+1D566"
+"uparrow;","U+02191"
+"updownarrow;","U+02195"
+"upharpoonleft;","U+021BF"
+"upharpoonright;","U+021BE"
+"uplus;","U+0228E"
+"upsi;","U+003C5"
+"upsih;","U+003D2"
+"upsilon;","U+003C5"
+"upuparrows;","U+021C8"
+"urcorn;","U+0231D"
+"urcorner;","U+0231D"
+"urcrop;","U+0230E"
+"uring;","U+0016F"
+"urtri;","U+025F9"
+"uscr;","U+1D4CA"
+"utdot;","U+022F0"
+"utilde;","U+00169"
+"utri;","U+025B5"
+"utrif;","U+025B4"
+"uuarr;","U+021C8"
+"uuml;","U+000FC"
+"uuml","U+000FC"
+"uwangle;","U+029A7"
+"vArr;","U+021D5"
+"vBar;","U+02AE8"
+"vBarv;","U+02AE9"
+"vDash;","U+022A8"
+"vangrt;","U+0299C"
+"varepsilon;","U+003F5"
+"varkappa;","U+003F0"
+"varnothing;","U+02205"
+"varphi;","U+003D5"
+"varpi;","U+003D6"
+"varpropto;","U+0221D"
+"varr;","U+02195"
+"varrho;","U+003F1"
+"varsigma;","U+003C2"
+"vartheta;","U+003D1"
+"vartriangleleft;","U+022B2"
+"vartriangleright;","U+022B3"
+"vcy;","U+00432"
+"vdash;","U+022A2"
+"vee;","U+02228"
+"veebar;","U+022BB"
+"veeeq;","U+0225A"
+"vellip;","U+022EE"
+"verbar;","U+0007C"
+"vert;","U+0007C"
+"vfr;","U+1D533"
+"vltri;","U+022B2"
+"vopf;","U+1D567"
+"vprop;","U+0221D"
+"vrtri;","U+022B3"
+"vscr;","U+1D4CB"
+"vzigzag;","U+0299A"
+"wcirc;","U+00175"
+"wedbar;","U+02A5F"
+"wedge;","U+02227"
+"wedgeq;","U+02259"
+"weierp;","U+02118"
+"wfr;","U+1D534"
+"wopf;","U+1D568"
+"wp;","U+02118"
+"wr;","U+02240"
+"wreath;","U+02240"
+"wscr;","U+1D4CC"
+"xcap;","U+022C2"
+"xcirc;","U+025EF"
+"xcup;","U+022C3"
+"xdtri;","U+025BD"
+"xfr;","U+1D535"
+"xhArr;","U+027FA"
+"xharr;","U+027F7"
+"xi;","U+003BE"
+"xlArr;","U+027F8"
+"xlarr;","U+027F5"
+"xmap;","U+027FC"
+"xnis;","U+022FB"
+"xodot;","U+02A00"
+"xopf;","U+1D569"
+"xoplus;","U+02A01"
+"xotime;","U+02A02"
+"xrArr;","U+027F9"
+"xrarr;","U+027F6"
+"xscr;","U+1D4CD"
+"xsqcup;","U+02A06"
+"xuplus;","U+02A04"
+"xutri;","U+025B3"
+"xvee;","U+022C1"
+"xwedge;","U+022C0"
+"yacute;","U+000FD"
+"yacute","U+000FD"
+"yacy;","U+0044F"
+"ycirc;","U+00177"
+"ycy;","U+0044B"
+"yen;","U+000A5"
+"yen","U+000A5"
+"yfr;","U+1D536"
+"yicy;","U+00457"
+"yopf;","U+1D56A"
+"yscr;","U+1D4CE"
+"yucy;","U+0044E"
+"yuml;","U+000FF"
+"yuml","U+000FF"
+"zacute;","U+0017A"
+"zcaron;","U+0017E"
+"zcy;","U+00437"
+"zdot;","U+0017C"
+"zeetrf;","U+02128"
+"zeta;","U+003B6"
+"zfr;","U+1D537"
+"zhcy;","U+00436"
+"zigrarr;","U+021DD"
+"zopf;","U+1D56B"
+"zscr;","U+1D4CF"
+"zwj;","U+0200D"
+"zwnj;","U+0200C"
diff --git a/Source/WebCore/html/parser/HTMLEntityParser.cpp b/Source/WebCore/html/parser/HTMLEntityParser.cpp
new file mode 100644
index 0000000..6a422b8
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLEntityParser.cpp
@@ -0,0 +1,272 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "HTMLEntityParser.h"
+
+#include "HTMLEntitySearch.h"
+#include "HTMLEntityTable.h"
+#include <wtf/Vector.h>
+
+using namespace WTF;
+
+namespace WebCore {
+
+namespace {
+
+static const UChar windowsLatin1ExtensionArray[32] = {
+    0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87
+    0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F
+    0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97
+    0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178, // 98-9F
+};
+
+inline UChar adjustEntity(UChar32 value)
+{
+    if ((value & ~0x1F) != 0x0080)
+        return value;
+    return windowsLatin1ExtensionArray[value - 0x80];
+}
+
+inline UChar32 legalEntityFor(UChar32 value)
+{
+    // FIXME: A number of specific entity values generate parse errors.
+    if (value == 0 || value > 0x10FFFF || (value >= 0xD800 && value <= 0xDFFF))
+        return 0xFFFD;
+    if (U_IS_BMP(value))
+        return adjustEntity(value);
+    return value;
+}
+
+inline bool convertToUTF16(UChar32 value, Vector<UChar, 16>& decodedEntity)
+{
+    if (U_IS_BMP(value)) {
+        UChar character = static_cast<UChar>(value);
+        ASSERT(character == value);
+        decodedEntity.append(character);
+        return true;
+    }
+    decodedEntity.append(U16_LEAD(value));
+    decodedEntity.append(U16_TRAIL(value));
+    return true;
+}
+
+inline bool isHexDigit(UChar cc)
+{
+    return (cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f') || (cc >= 'A' && cc <= 'F');
+}
+
+inline bool isAlphaNumeric(UChar cc)
+{
+    return (cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'z') || (cc >= 'A' && cc <= 'Z');
+}
+
+void unconsumeCharacters(SegmentedString& source, const Vector<UChar, 10>& consumedCharacters)
+{
+    if (consumedCharacters.size() == 1)
+        source.push(consumedCharacters[0]);
+    else if (consumedCharacters.size() == 2) {
+        source.push(consumedCharacters[0]);
+        source.push(consumedCharacters[1]);
+    } else
+        source.prepend(SegmentedString(String(consumedCharacters.data(), consumedCharacters.size())));
+}
+
+}
+
+bool consumeHTMLEntity(SegmentedString& source, Vector<UChar, 16>& decodedEntity, bool& notEnoughCharacters, UChar additionalAllowedCharacter)
+{
+    ASSERT(!additionalAllowedCharacter || additionalAllowedCharacter == '"' || additionalAllowedCharacter == '\'' || additionalAllowedCharacter == '>');
+    ASSERT(!notEnoughCharacters);
+    ASSERT(decodedEntity.isEmpty());
+
+    enum EntityState {
+        Initial,
+        Number,
+        MaybeHexLowerCaseX,
+        MaybeHexUpperCaseX,
+        Hex,
+        Decimal,
+        Named
+    };
+    EntityState entityState = Initial;
+    UChar32 result = 0;
+    Vector<UChar, 10> consumedCharacters;
+
+    while (!source.isEmpty()) {
+        UChar cc = *source;
+        switch (entityState) {
+        case Initial: {
+            if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ' || cc == '<' || cc == '&')
+                return false;
+            if (additionalAllowedCharacter && cc == additionalAllowedCharacter)
+                return false;
+            if (cc == '#') {
+                entityState = Number;
+                break;
+            }
+            if ((cc >= 'a' && cc <= 'z') || (cc >= 'A' && cc <= 'Z')) {
+                entityState = Named;
+                continue;
+            }
+            return false;
+        }
+        case Number: {
+            if (cc == 'x') {
+                entityState = MaybeHexLowerCaseX;
+                break;
+            }
+            if (cc == 'X') {
+                entityState = MaybeHexUpperCaseX;
+                break;
+            }
+            if (cc >= '0' && cc <= '9') {
+                entityState = Decimal;
+                continue;
+            }
+            source.push('#');
+            return false;
+        }
+        case MaybeHexLowerCaseX: {
+            if (isHexDigit(cc)) {
+                entityState = Hex;
+                continue;
+            }
+            source.push('#');
+            source.push('x');
+            return false;
+        }
+        case MaybeHexUpperCaseX: {
+            if (isHexDigit(cc)) {
+                entityState = Hex;
+                continue;
+            }
+            source.push('#');
+            source.push('X');
+            return false;
+        }
+        case Hex: {
+            if (cc >= '0' && cc <= '9')
+                result = result * 16 + cc - '0';
+            else if (cc >= 'a' && cc <= 'f')
+                result = result * 16 + 10 + cc - 'a';
+            else if (cc >= 'A' && cc <= 'F')
+                result = result * 16 + 10 + cc - 'A';
+            else {
+                if (cc == ';')
+                    source.advanceAndASSERT(cc);
+                return convertToUTF16(legalEntityFor(result), decodedEntity);
+            }
+            break;
+        }
+        case Decimal: {
+            if (cc >= '0' && cc <= '9')
+                result = result * 10 + cc - '0';
+            else {
+                if (cc == ';')
+                    source.advanceAndASSERT(cc);
+                return convertToUTF16(legalEntityFor(result), decodedEntity);
+            }
+            break;
+        }
+        case Named: {
+            HTMLEntitySearch entitySearch;
+            while (!source.isEmpty()) {
+                cc = *source;
+                entitySearch.advance(cc);
+                if (!entitySearch.isEntityPrefix())
+                    break;
+                consumedCharacters.append(cc);
+                source.advanceAndASSERT(cc);
+            }
+            notEnoughCharacters = source.isEmpty();
+            if (notEnoughCharacters) {
+                // We can't an entity because there might be a longer entity
+                // that we could match if we had more data.
+                unconsumeCharacters(source, consumedCharacters);
+                return false;
+            }
+            if (!entitySearch.mostRecentMatch()) {
+                ASSERT(!entitySearch.currentValue());
+                unconsumeCharacters(source, consumedCharacters);
+                return false;
+            }
+            if (entitySearch.mostRecentMatch()->length != entitySearch.currentLength()) {
+                // We've consumed too many characters.  We need to walk the
+                // source back to the point at which we had consumed an
+                // actual entity.
+                unconsumeCharacters(source, consumedCharacters);
+                consumedCharacters.clear();
+                const int length = entitySearch.mostRecentMatch()->length;
+                const UChar* reference = entitySearch.mostRecentMatch()->entity;
+                for (int i = 0; i < length; ++i) {
+                    cc = *source;
+                    ASSERT_UNUSED(reference, cc == *reference++);
+                    consumedCharacters.append(cc);
+                    source.advanceAndASSERT(cc);
+                    ASSERT(!source.isEmpty());
+                }
+                cc = *source;
+            }
+            if (entitySearch.mostRecentMatch()->lastCharacter() == ';'
+                || !additionalAllowedCharacter
+                || !(isAlphaNumeric(cc) || cc == '=')) {
+                return convertToUTF16(entitySearch.mostRecentMatch()->value, decodedEntity);
+            }
+            unconsumeCharacters(source, consumedCharacters);
+            return false;
+        }
+        }
+        consumedCharacters.append(cc);
+        source.advanceAndASSERT(cc);
+    }
+    ASSERT(source.isEmpty());
+    notEnoughCharacters = true;
+    unconsumeCharacters(source, consumedCharacters);
+    return false;
+}
+
+UChar decodeNamedEntity(const char* name)
+{
+    HTMLEntitySearch search;
+    while (*name) {
+        search.advance(*name++);
+        if (!search.isEntityPrefix())
+            return 0;
+    }
+    search.advance(';');
+    UChar32 entityValue = search.currentValue();
+    if (U16_LENGTH(entityValue) != 1) {
+        // Callers need to move off this API if the entity table has values
+        // which do no fit in a 16 bit UChar!
+        ASSERT_NOT_REACHED();
+        return 0;
+    }
+    return static_cast<UChar>(entityValue);
+}
+
+} // namespace WebCore
diff --git a/Source/WebCore/html/parser/HTMLEntityParser.h b/Source/WebCore/html/parser/HTMLEntityParser.h
new file mode 100644
index 0000000..f02e849
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLEntityParser.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef HTMLEntityParser_h
+#define HTMLEntityParser_h
+
+#include "SegmentedString.h"
+
+namespace WebCore {
+
+bool consumeHTMLEntity(SegmentedString&, Vector<UChar, 16>& decodedEntity, bool& notEnoughCharacters, UChar additionalAllowedCharacter = '\0');
+
+// Used by the XML parser.  Not suitable for use in HTML parsing.  Use consumeHTMLEntity instead.
+UChar decodeNamedEntity(const char*);
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLEntitySearch.cpp b/Source/WebCore/html/parser/HTMLEntitySearch.cpp
new file mode 100644
index 0000000..56fb91a
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLEntitySearch.cpp
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "HTMLEntitySearch.h"
+
+#include "HTMLEntityTable.h"
+
+namespace WebCore {
+
+namespace {
+    
+const HTMLEntityTableEntry* halfway(const HTMLEntityTableEntry* left, const HTMLEntityTableEntry* right)
+{
+    return &left[(right - left) / 2];
+}
+
+}
+    
+HTMLEntitySearch::HTMLEntitySearch()
+    : m_currentLength(0)
+    , m_currentValue(0)
+    , m_mostRecentMatch(0)
+    , m_first(HTMLEntityTable::firstEntry())
+    , m_last(HTMLEntityTable::lastEntry())
+{
+}
+
+HTMLEntitySearch::CompareResult HTMLEntitySearch::compare(const HTMLEntityTableEntry* entry, UChar nextCharacter) const
+{
+    if (entry->length < m_currentLength + 1)
+        return Before;
+    UChar entryNextCharacter = entry->entity[m_currentLength];
+    if (entryNextCharacter == nextCharacter)
+        return Prefix;
+    return entryNextCharacter < nextCharacter ? Before : After;
+}
+
+const HTMLEntityTableEntry* HTMLEntitySearch::findFirst(UChar nextCharacter) const
+{
+    const HTMLEntityTableEntry* left = m_first;
+    const HTMLEntityTableEntry* right = m_last;
+    if (left == right)
+        return left;
+    CompareResult result = compare(left, nextCharacter);
+    if (result == Prefix)
+        return left;
+    if (result == After)
+        return right;
+    while (left + 1 < right) {
+        const HTMLEntityTableEntry* probe = halfway(left, right);
+        result = compare(probe, nextCharacter);
+        if (result == Before)
+            left = probe;
+        else {
+            ASSERT(result == After || result == Prefix);
+            right = probe;
+        }
+    }
+    ASSERT(left + 1 == right);
+    return right;
+}
+
+const HTMLEntityTableEntry* HTMLEntitySearch::findLast(UChar nextCharacter) const
+{
+    const HTMLEntityTableEntry* left = m_first;
+    const HTMLEntityTableEntry* right = m_last;
+    if (left == right)
+        return right;
+    CompareResult result = compare(right, nextCharacter);
+    if (result == Prefix)
+        return right;
+    if (result == Before)
+        return left;
+    while (left + 1 < right) {
+        const HTMLEntityTableEntry* probe = halfway(left, right);
+        result = compare(probe, nextCharacter);
+        if (result == After)
+            right = probe;
+        else {
+            ASSERT(result == Before || result == Prefix);
+            left = probe;
+        }
+    }
+    ASSERT(left + 1 == right);
+    return left;
+}
+
+void HTMLEntitySearch::advance(UChar nextCharacter)
+{
+    ASSERT(isEntityPrefix());
+    if (!m_currentLength) {
+        m_first = HTMLEntityTable::firstEntryStartingWith(nextCharacter);
+        m_last = HTMLEntityTable::lastEntryStartingWith(nextCharacter);
+        if (!m_first || !m_last)
+            return fail();
+    } else {
+        m_first = findFirst(nextCharacter);
+        m_last = findLast(nextCharacter);
+        if (m_first == m_last && compare(m_first, nextCharacter) != Prefix)
+            return fail();
+    }
+    ++m_currentLength;
+    if (m_first->length != m_currentLength) {
+        m_currentValue = 0;
+        return;
+    }
+    m_mostRecentMatch = m_first;
+    m_currentValue = m_mostRecentMatch->value;
+}
+
+}
diff --git a/Source/WebCore/html/parser/HTMLEntitySearch.h b/Source/WebCore/html/parser/HTMLEntitySearch.h
new file mode 100644
index 0000000..0c66318
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLEntitySearch.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef HTMLEntitySearch_h
+#define HTMLEntitySearch_h
+
+#include "PlatformString.h"
+
+namespace WebCore {
+
+struct HTMLEntityTableEntry;
+
+class HTMLEntitySearch {
+public:
+    HTMLEntitySearch();
+
+    void advance(UChar);
+
+    bool isEntityPrefix() const { return !!m_first; }
+    UChar32 currentValue() const { return m_currentValue; }
+    int currentLength() const { return m_currentLength; }
+
+    const HTMLEntityTableEntry* mostRecentMatch() const { return m_mostRecentMatch; }
+
+private:
+    enum CompareResult {
+        Before,
+        Prefix,
+        After,
+    };
+
+    CompareResult compare(const HTMLEntityTableEntry*, UChar) const;
+    const HTMLEntityTableEntry* findFirst(UChar) const;
+    const HTMLEntityTableEntry* findLast(UChar) const;
+
+    void fail()
+    {
+        m_currentValue = 0;
+        m_first = 0;
+        m_last = 0;
+    }
+
+    int m_currentLength;
+    UChar32 m_currentValue;
+
+    const HTMLEntityTableEntry* m_mostRecentMatch;
+    const HTMLEntityTableEntry* m_first;
+    const HTMLEntityTableEntry* m_last;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLEntityTable.h b/Source/WebCore/html/parser/HTMLEntityTable.h
new file mode 100644
index 0000000..3b9ab4e
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLEntityTable.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef HTMLEntityTable_h
+#define HTMLEntityTable_h
+
+#include "PlatformString.h"
+
+namespace WebCore {
+
+struct HTMLEntityTableEntry {
+    UChar lastCharacter() const { return entity[length - 1]; }
+
+    const UChar* entity;
+    int length;
+    UChar32 value;
+};
+
+class HTMLEntityTable {
+public:
+    static const HTMLEntityTableEntry* firstEntry();
+    static const HTMLEntityTableEntry* lastEntry();
+
+    static const HTMLEntityTableEntry* firstEntryStartingWith(UChar);
+    static const HTMLEntityTableEntry* lastEntryStartingWith(UChar);
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLFormattingElementList.cpp b/Source/WebCore/html/parser/HTMLFormattingElementList.cpp
new file mode 100644
index 0000000..22bf03e
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLFormattingElementList.cpp
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "HTMLFormattingElementList.h"
+
+#include "Element.h"
+#include "NotImplemented.h"
+
+namespace WebCore {
+
+HTMLFormattingElementList::HTMLFormattingElementList()
+{
+}
+
+HTMLFormattingElementList::~HTMLFormattingElementList()
+{
+}
+
+Element* HTMLFormattingElementList::closestElementInScopeWithName(const AtomicString& targetName)
+{
+    for (unsigned i = 1; i <= m_entries.size(); ++i) {
+        const Entry& entry = m_entries[m_entries.size() - i];
+        if (entry.isMarker())
+            return 0;
+        if (entry.element()->hasLocalName(targetName))
+            return entry.element();
+    }
+    return 0;
+}
+
+bool HTMLFormattingElementList::contains(Element* element)
+{
+    return !!find(element);
+}
+
+HTMLFormattingElementList::Entry* HTMLFormattingElementList::find(Element* element)
+{
+    size_t index = m_entries.reverseFind(element);
+    if (index != notFound) {
+        // This is somewhat of a hack, and is why this method can't be const.
+        return &m_entries[index];
+    }
+    return 0;
+}
+
+HTMLFormattingElementList::Bookmark HTMLFormattingElementList::bookmarkFor(Element* element)
+{
+    size_t index = m_entries.reverseFind(element);
+    ASSERT(index != notFound);
+    return Bookmark(&at(index));
+}
+
+void HTMLFormattingElementList::swapTo(Element* oldElement, Element* newElement, const Bookmark& bookmark)
+{
+    ASSERT(contains(oldElement));
+    ASSERT(!contains(newElement));
+    if (!bookmark.hasBeenMoved()) {
+        ASSERT(bookmark.mark()->element() == oldElement);
+        bookmark.mark()->replaceElement(newElement);
+        return;
+    }
+    size_t index = bookmark.mark() - first();
+    ASSERT(index < size());
+    m_entries.insert(index + 1, newElement);
+    remove(oldElement);
+}
+
+void HTMLFormattingElementList::append(Element* element)
+{
+    m_entries.append(element);
+}
+
+void HTMLFormattingElementList::remove(Element* element)
+{
+    size_t index = m_entries.reverseFind(element);
+    if (index != notFound)
+        m_entries.remove(index);
+}
+
+void HTMLFormattingElementList::appendMarker()
+{
+    m_entries.append(Entry::MarkerEntry);
+}
+
+void HTMLFormattingElementList::clearToLastMarker()
+{
+    // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#clear-the-list-of-active-formatting-elements-up-to-the-last-marker
+    while (m_entries.size()) {
+        bool shouldStop = m_entries.last().isMarker();
+        m_entries.removeLast();
+        if (shouldStop)
+            break;
+    }
+}
+
+#ifndef NDEBUG
+
+void HTMLFormattingElementList::show()
+{
+    for (unsigned i = 1; i <= m_entries.size(); ++i) {
+        const Entry& entry = m_entries[m_entries.size() - i];
+        if (entry.isMarker())
+            fprintf(stderr, "marker\n");
+        else
+            entry.element()->showNode();
+    }
+}
+
+#endif
+
+}
diff --git a/Source/WebCore/html/parser/HTMLFormattingElementList.h b/Source/WebCore/html/parser/HTMLFormattingElementList.h
new file mode 100644
index 0000000..aca05bb
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLFormattingElementList.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef HTMLFormattingElementList_h
+#define HTMLFormattingElementList_h
+
+#include <wtf/Forward.h>
+#include <wtf/RefPtr.h>
+#include <wtf/Vector.h>
+
+namespace WebCore {
+
+class Element;
+
+// This may end up merged into HTMLElementStack.
+class HTMLFormattingElementList : public Noncopyable {
+public:
+    HTMLFormattingElementList();
+    ~HTMLFormattingElementList();
+
+    // Ideally Entry would be private, but HTMLTreeBuilder has to coordinate
+    // between the HTMLFormattingElementList and HTMLElementStack and needs
+    // access to Entry::isMarker() and Entry::replaceElement() to do so.
+    class Entry {
+    public:
+        // Inline because they're hot and Vector<T> uses them.
+        explicit Entry(Element* element)
+            : m_element(element)
+        {
+            ASSERT(element);
+        }
+        enum MarkerEntryType { MarkerEntry };
+        Entry(MarkerEntryType)
+            : m_element(0)
+        {
+        }
+        ~Entry() {}
+
+        bool isMarker() const { return !m_element; }
+
+        Element* element() const
+        {
+            // The fact that !m_element == isMarker() is an implementation detail
+            // callers should check isMarker() before calling element().
+            ASSERT(m_element);
+            return m_element.get();
+        }
+        void replaceElement(PassRefPtr<Element> element) { m_element = element; }
+
+        // Needed for use with Vector.  These are super-hot and must be inline.
+        bool operator==(Element* element) const { return m_element == element; }
+        bool operator!=(Element* element) const { return m_element != element; }
+
+    private:
+        RefPtr<Element> m_element;
+    };
+
+    class Bookmark {
+    public:
+        Bookmark(Entry* entry)
+            : m_hasBeenMoved(false)
+            , m_mark(entry)
+        {
+        }
+
+        void moveToAfter(Entry* before)
+        {
+            m_hasBeenMoved = true;
+            m_mark = before;
+        }
+
+        bool hasBeenMoved() const { return m_hasBeenMoved; }
+        Entry* mark() const { return m_mark; }
+
+    private:
+        bool m_hasBeenMoved;
+        Entry* m_mark;
+    };
+
+    bool isEmpty() const { return !size(); }
+    size_t size() const { return m_entries.size(); }
+
+    Element* closestElementInScopeWithName(const AtomicString&);
+
+    Entry* find(Element*);
+    bool contains(Element*);
+    void append(Element*);
+    void remove(Element*);
+
+    Bookmark bookmarkFor(Element*);
+    void swapTo(Element* oldElement, Element* newElement, const Bookmark&);
+
+    void appendMarker();
+    // clearToLastMarker also clears the marker (per the HTML5 spec).
+    void clearToLastMarker();
+
+    const Entry& at(size_t i) const { return m_entries[i]; }
+    Entry& at(size_t i) { return m_entries[i]; }
+
+#ifndef NDEBUG
+    void show();
+#endif
+
+private:
+    Entry* first() { return &at(0); }
+
+    Vector<Entry> m_entries;
+};
+
+}
+
+#endif // HTMLFormattingElementList_h
diff --git a/Source/WebCore/html/parser/HTMLInputStream.h b/Source/WebCore/html/parser/HTMLInputStream.h
new file mode 100644
index 0000000..1bfbaf9
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLInputStream.h
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLInputStream_h
+#define HTMLInputStream_h
+
+#include "SegmentedString.h"
+
+namespace WebCore {
+
+// The InputStream is made up of a sequence of SegmentedStrings:
+//
+// [--current--][--next--][--next--] ... [--next--]
+//            /\                         (also called m_last)
+//            L_ current insertion point
+//
+// The current segmented string is stored in InputStream.  Each of the
+// afterInsertionPoint buffers are stored in InsertionPointRecords on the
+// stack.
+//
+// We remove characters from the "current" string in the InputStream.
+// document.write() will add characters at the current insertion point,
+// which appends them to the "current" string.
+//
+// m_last is a pointer to the last of the afterInsertionPoint strings.
+// The network adds data at the end of the InputStream, which appends
+// them to the "last" string.
+class HTMLInputStream : public Noncopyable {
+public:
+    HTMLInputStream()
+        : m_last(&m_first)
+    {
+    }
+
+    void appendToEnd(const SegmentedString& string)
+    {
+        m_last->append(string);
+    }
+
+    void insertAtCurrentInsertionPoint(const SegmentedString& string)
+    {
+        m_first.append(string);
+    }
+
+    bool hasInsertionPoint() const
+    {
+        if (&m_first != m_last)
+            return true;
+        if (!haveSeenEndOfFile()) {
+            // FIXME: Somehow we need to understand the difference between
+            // input streams that are coming off the network and streams that
+            // were created with document.open(). In the later case, we always
+            // have an isertion point at the end of the stream until someone
+            // calls document.close().
+            return true;
+        }
+        return false;
+    }
+
+    void markEndOfFile()
+    {
+        // FIXME: This should use InputStreamPreprocessor::endOfFileMarker
+        // once InputStreamPreprocessor is split off into its own header.
+        static const UChar endOfFileMarker = 0;
+        m_last->append(SegmentedString(String(&endOfFileMarker, 1)));
+        m_last->close();
+    }
+
+    bool haveSeenEndOfFile() const
+    {
+        return m_last->isClosed();
+    }
+
+    SegmentedString& current() { return m_first; }
+    const SegmentedString& current() const { return m_first; }
+
+    void splitInto(SegmentedString& next)
+    {
+        next = m_first;
+        m_first = SegmentedString();
+        if (m_last == &m_first) {
+            // We used to only have one SegmentedString in the InputStream
+            // but now we have two.  That means m_first is no longer also
+            // the m_last string, |next| is now the last one.
+            m_last = &next;
+        }
+    }
+
+    void mergeFrom(SegmentedString& next)
+    {
+        m_first.append(next);
+        if (m_last == &next) {
+            // The string |next| used to be the last SegmentedString in
+            // the InputStream.  Now that it's been merged into m_first,
+            // that makes m_first the last one.
+            m_last = &m_first;
+        }
+        if (next.isClosed()) {
+            // We also need to merge the "closed" state from next to
+            // m_first.  Arguably, this work could be done in append().
+            m_first.close();
+        }
+    }
+
+private:
+    SegmentedString m_first;
+    SegmentedString* m_last;
+};
+
+class InsertionPointRecord : public Noncopyable {
+public:
+    explicit InsertionPointRecord(HTMLInputStream& inputStream)
+        : m_inputStream(&inputStream)
+    {
+        m_line = m_inputStream->current().currentLine();
+        m_column = m_inputStream->current().currentColumn();
+        m_inputStream->splitInto(m_next);
+        // We 'fork' current position and use it for the generated script part.
+        // This is a bit weird, because generated part does not have positions within an HTML document.
+        m_inputStream->current().setCurrentPosition(m_line, m_column, 0);
+    }
+
+    ~InsertionPointRecord()
+    {
+        // Some inserted text may have remained in input stream. E.g. if script has written "&amp" or "<table",
+        // it stays in buffer because it cannot be properly tokenized before we see next part.
+        int unparsedRemainderLength = m_inputStream->current().length();
+        m_inputStream->mergeFrom(m_next);
+        // We restore position for the character that goes right after unparsed remainder.
+        m_inputStream->current().setCurrentPosition(m_line, m_column, unparsedRemainderLength);
+    }
+
+private:
+    HTMLInputStream* m_inputStream;
+    SegmentedString m_next;
+    WTF::ZeroBasedNumber m_line;
+    WTF::ZeroBasedNumber m_column;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLMetaCharsetParser.cpp b/Source/WebCore/html/parser/HTMLMetaCharsetParser.cpp
new file mode 100644
index 0000000..eac7d28
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLMetaCharsetParser.cpp
@@ -0,0 +1,200 @@
+/*
+ * Copyright (C) 2010 Google Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "HTMLMetaCharsetParser.h"
+
+#include "HTMLNames.h"
+#include "HTMLParserIdioms.h"
+#include "HTMLTokenizer.h"
+#include "PlatformString.h"
+#include "TextCodec.h"
+#include "TextEncodingRegistry.h"
+
+using namespace WTF;
+
+namespace WebCore {
+
+using namespace HTMLNames;
+
+HTMLMetaCharsetParser::HTMLMetaCharsetParser()
+    : m_tokenizer(HTMLTokenizer::create(false)) // No pre-HTML5 parser quirks.
+    , m_assumedCodec(newTextCodec(Latin1Encoding()))
+    , m_inHeadSection(true)
+    , m_doneChecking(false)
+{
+}
+
+HTMLMetaCharsetParser::~HTMLMetaCharsetParser()
+{
+}
+
+static const char charsetString[] = "charset";
+static const size_t charsetLength = sizeof("charset") - 1;
+
+String HTMLMetaCharsetParser::extractCharset(const String& value)
+{
+    size_t pos = 0;
+    unsigned length = value.length();
+
+    while (pos < length) {
+        pos = value.find(charsetString, pos, false);
+        if (pos == notFound)
+            break;
+
+        pos += charsetLength;
+
+        // Skip whitespace.
+        while (pos < length && value[pos] <= ' ')
+            ++pos;
+
+        if (value[pos] != '=')
+            continue;
+
+        ++pos;
+
+        while (pos < length && value[pos] <= ' ')
+            ++pos;
+
+        char quoteMark = 0;
+        if (pos < length && (value[pos] == '"' || value[pos] == '\'')) {
+            quoteMark = static_cast<char>(value[pos++]);
+            ASSERT(!(quoteMark & 0x80));
+        }
+            
+        if (pos == length)
+            break;
+
+        unsigned end = pos;
+        while (end < length && ((quoteMark && value[end] != quoteMark) || (!quoteMark && value[end] > ' ' && value[end] != '"' && value[end] != '\'' && value[end] != ';')))
+            ++end;
+
+        if (quoteMark && (end == length))
+            break; // Close quote not found.
+
+        return value.substring(pos, end - pos);
+    }
+
+    return "";
+}
+
+bool HTMLMetaCharsetParser::processMeta()
+{
+    bool gotPragma = false;
+    Mode mode = None;
+    String charset;
+
+    const HTMLToken::AttributeList& attributes = m_token.attributes();
+    for (HTMLToken::AttributeList::const_iterator iter = attributes.begin();
+         iter != attributes.end(); ++iter) {
+        AtomicString attributeName(iter->m_name.data(), iter->m_name.size());
+        String attributeValue(iter->m_value.data(), iter->m_value.size());
+
+        if (attributeName == http_equivAttr) {
+            if (equalIgnoringCase(attributeValue, "content-type"))
+                gotPragma = true;
+        } else if (charset.isEmpty()) {
+            if (attributeName == charsetAttr) {
+                charset = attributeValue;
+                mode = Charset;
+            } else if (attributeName == contentAttr) {
+                charset = extractCharset(attributeValue);
+                if (charset.length())
+                    mode = Pragma;
+            }
+        }
+    }
+
+    if (mode == Charset || (mode == Pragma && gotPragma)) {
+        m_encoding = TextEncoding(stripLeadingAndTrailingHTMLSpaces(charset));
+        if (m_encoding.isValid())
+            return true;
+    }
+
+    return false;
+}
+
+static const int bytesToCheckUnconditionally = 1024; // That many input bytes will be checked for meta charset even if <head> section is over.
+
+bool HTMLMetaCharsetParser::checkForMetaCharset(const char* data, size_t length)
+{
+    if (m_doneChecking)
+        return true;
+
+    ASSERT(!m_encoding.isValid());
+
+    // We still don't have an encoding, and are in the head.
+    // The following tags are allowed in <head>:
+    // SCRIPT|STYLE|META|LINK|OBJECT|TITLE|BASE
+
+    // We stop scanning when a tag that is not permitted in <head>
+    // is seen, rather when </head> is seen, because that more closely
+    // matches behavior in other browsers; more details in
+    // <http://bugs.webkit.org/show_bug.cgi?id=3590>.
+
+    // Additionally, we ignore things that looks like tags in <title>, <script>
+    // and <noscript>; see <http://bugs.webkit.org/show_bug.cgi?id=4560>,
+    // <http://bugs.webkit.org/show_bug.cgi?id=12165> and
+    // <http://bugs.webkit.org/show_bug.cgi?id=12389>.
+
+    // Since many sites have charset declarations after <body> or other tags
+    // that are disallowed in <head>, we don't bail out until we've checked at
+    // least bytesToCheckUnconditionally bytes of input.
+
+    m_input.append(SegmentedString(m_assumedCodec->decode(data, length)));
+
+    while (m_tokenizer->nextToken(m_input, m_token)) {
+        bool end = m_token.type() == HTMLToken::EndTag;
+        if (end || m_token.type() == HTMLToken::StartTag) {
+            AtomicString tagName(m_token.name().data(), m_token.name().size());
+            if (!end) {
+                m_tokenizer->updateStateFor(tagName, 0);
+                if (tagName == metaTag && processMeta()) {
+                    m_doneChecking = true;
+                    return true;
+                }
+            }
+
+            if (tagName != scriptTag && tagName != noscriptTag
+                && tagName != styleTag && tagName != linkTag
+                && tagName != metaTag && tagName != objectTag
+                && tagName != titleTag && tagName != baseTag
+                && (end || tagName != htmlTag) && (end || tagName != headTag)) {
+                m_inHeadSection = false;
+            }
+        }
+
+        if (!m_inHeadSection && m_input.numberOfCharactersConsumed() >= bytesToCheckUnconditionally) {
+            m_doneChecking = true;
+            return true;
+        }
+
+        m_token.clear();
+    }
+
+    return false;
+}
+
+}
diff --git a/Source/WebCore/html/parser/HTMLMetaCharsetParser.h b/Source/WebCore/html/parser/HTMLMetaCharsetParser.h
new file mode 100644
index 0000000..c3136f5
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLMetaCharsetParser.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2010 Google Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLMetaCharsetParser_h
+#define HTMLMetaCharsetParser_h
+
+#include "HTMLToken.h"
+#include "SegmentedString.h"
+#include "TextEncoding.h"
+#include <wtf/Noncopyable.h>
+
+namespace WebCore {
+
+class HTMLTokenizer;
+class TextCodec;
+
+class HTMLMetaCharsetParser : public Noncopyable {
+public:
+    static PassOwnPtr<HTMLMetaCharsetParser> create() { return adoptPtr(new HTMLMetaCharsetParser()); }
+
+    ~HTMLMetaCharsetParser();
+
+    // Returns true if done checking, regardless whether an encoding is found.
+    bool checkForMetaCharset(const char*, size_t);
+
+    const TextEncoding& encoding() { return m_encoding; }
+
+private:
+    HTMLMetaCharsetParser();
+
+    bool processMeta();
+    String extractCharset(const String&);
+
+    enum Mode {
+        None,
+        Charset,
+        Pragma,
+    };
+
+    OwnPtr<HTMLTokenizer> m_tokenizer;
+    OwnPtr<TextCodec> m_assumedCodec;
+    SegmentedString m_input;
+    HTMLToken m_token;
+    bool m_inHeadSection;
+
+    bool m_doneChecking;
+    TextEncoding m_encoding;
+};
+
+}
+#endif
diff --git a/Source/WebCore/html/parser/HTMLParserIdioms.cpp b/Source/WebCore/html/parser/HTMLParserIdioms.cpp
new file mode 100644
index 0000000..91ff8d3
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLParserIdioms.cpp
@@ -0,0 +1,221 @@
+/*
+ * Copyright (C) 2010 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1.  Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ * 2.  Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "HTMLParserIdioms.h"
+
+#include <wtf/MathExtras.h>
+#include <wtf/dtoa.h>
+#include <wtf/text/AtomicString.h>
+
+namespace WebCore {
+
+String stripLeadingAndTrailingHTMLSpaces(const String& string)
+{
+    const UChar* characters = string.characters();
+    unsigned length = string.length();
+
+    unsigned numLeadingSpaces;
+    for (numLeadingSpaces = 0; numLeadingSpaces < length; ++numLeadingSpaces) {
+        if (isNotHTMLSpace(characters[numLeadingSpaces]))
+            break;
+    }
+
+    if (numLeadingSpaces == length)
+        return string.isNull() ? string : emptyAtom.string();
+
+    unsigned numTrailingSpaces;
+    for (numTrailingSpaces = 0; numTrailingSpaces < length; ++numTrailingSpaces) {
+        if (isNotHTMLSpace(characters[length - numTrailingSpaces - 1]))
+            break;
+    }
+
+    ASSERT(numLeadingSpaces + numTrailingSpaces < length);
+
+    return string.substring(numLeadingSpaces, length - (numLeadingSpaces + numTrailingSpaces));
+}
+
+String serializeForNumberType(double number)
+{
+    // According to HTML5, "the best representation of the number n as a floating
+    // point number" is a string produced by applying ToString() to n.
+    NumberToStringBuffer buffer;
+    unsigned length = numberToString(number, buffer);
+    return String(buffer, length);
+}
+
+bool parseToDoubleForNumberType(const String& string, double* result)
+{
+    // See HTML5 2.4.4.3 `Real numbers.'
+
+    // String::toDouble() accepts leading + and whitespace characters, which are not valid here.
+    UChar firstCharacter = string[0];
+    if (firstCharacter != '-' && !isASCIIDigit(firstCharacter))
+        return false;
+
+    bool valid = false;
+    double value = string.toDouble(&valid);
+    if (!valid)
+        return false;
+
+    // NaN and infinity are considered valid by String::toDouble, but not valid here.
+    if (!isfinite(value))
+        return false;
+
+    // Numbers are considered finite IEEE 754 single-precision floating point values.
+    // See HTML5 2.4.4.3 `Real numbers.'
+    if (-FLT_MAX > value || value > FLT_MAX)
+        return false;
+
+    if (result) {
+        // The following expression converts -0 to +0.
+        *result = value ? value : 0;
+    }
+
+    return true;
+}
+
+bool parseToDoubleForNumberTypeWithDecimalPlaces(const String& string, double *result, unsigned *decimalPlaces)
+{
+    if (decimalPlaces)
+        *decimalPlaces = 0;
+
+    if (!parseToDoubleForNumberType(string, result))
+        return false;
+
+    if (!decimalPlaces)
+        return true;
+
+    size_t dotIndex = string.find('.');
+    size_t eIndex = string.find('e');
+    if (eIndex == notFound) 
+        eIndex = string.find('E');
+
+    unsigned baseDecimalPlaces = 0;
+    if (dotIndex != notFound) {
+        if (eIndex == notFound)
+            baseDecimalPlaces = string.length() - dotIndex - 1;
+        else
+            baseDecimalPlaces = eIndex - dotIndex - 1;
+    }
+
+    int exponent = 0;
+    if (eIndex != notFound) {
+        unsigned cursor = eIndex + 1, cursorSaved;
+        int digit, exponentSign;
+        int32_t exponent32;
+        size_t length = string.length();
+
+        // Not using String.toInt() in order to perform the same computation as dtoa() does.
+        exponentSign = 0;
+        switch (digit = string[cursor]) {
+        case '-':
+            exponentSign = 1;
+        case '+':
+            digit = string[++cursor];
+        }
+        if (digit >= '0' && digit <= '9') {
+            while (cursor < length && digit == '0')
+                digit = string[++cursor];
+            if (digit > '0' && digit <= '9') {
+                exponent32 = digit - '0';
+                cursorSaved = cursor;
+                while (cursor < length && (digit = string[++cursor]) >= '0' && digit <= '9')
+                    exponent32 = (10 * exponent32) + digit - '0';
+                if (cursor - cursorSaved > 8 || exponent32 > 19999)
+                    /* Avoid confusion from exponents
+                     * so large that e might overflow.
+                     */
+                    exponent = 19999; /* safe for 16 bit ints */
+                else
+                    exponent = static_cast<int>(exponent32);
+                if (exponentSign)
+                    exponent = -exponent;
+            } else
+                exponent = 0;
+        }
+    }
+
+    int intDecimalPlaces = baseDecimalPlaces - exponent;
+    if (intDecimalPlaces < 0)
+        *decimalPlaces = 0;
+    else if (intDecimalPlaces > 19999)
+        *decimalPlaces = 19999;
+    else
+        *decimalPlaces = static_cast<unsigned>(intDecimalPlaces);
+
+    return true;
+}
+
+// http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-integers
+bool parseHTMLInteger(const String& input, int& value)
+{
+    // Step 1
+    // Step 2
+    const UChar* position = input.characters();
+    const UChar* end = position + input.length();
+
+    // Step 3
+    int sign = 1;
+
+    // Step 4
+    while (position < end) {
+        if (!isHTMLSpace(*position))
+            break;
+        ++position;
+    }
+
+    // Step 5
+    if (position == end)
+        return false;
+    ASSERT(position < end);
+
+    // Step 6
+    if (*position == '-') {
+        sign = -1;
+        ++position;
+    } else if (*position == '+')
+        ++position;
+    if (position == end)
+        return false;
+    ASSERT(position < end);
+
+    // Step 7
+    if (!isASCIIDigit(*position))
+        return false;
+
+    // Step 8
+    Vector<UChar, 16> digits;
+    while (position < end) {
+        if (!isASCIIDigit(*position))
+            break;
+        digits.append(*position++);
+    }
+
+    // Step 9
+    value = sign * charactersToIntStrict(digits.data(), digits.size());
+    return true;
+}
+
+}
diff --git a/Source/WebCore/html/parser/HTMLParserIdioms.h b/Source/WebCore/html/parser/HTMLParserIdioms.h
new file mode 100644
index 0000000..4e8e58f
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLParserIdioms.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2010 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1.  Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ * 2.  Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLParserIdioms_h
+#define HTMLParserIdioms_h
+
+#include <wtf/Forward.h>
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+// Space characters as defined by the HTML specification.
+bool isHTMLSpace(UChar);
+bool isNotHTMLSpace(UChar);
+
+// Strip leading and trailing whitespace as defined by the HTML specification. 
+String stripLeadingAndTrailingHTMLSpaces(const String&);
+
+// An implementation of the HTML specification's algorithm to convert a number to a string for number and range types.
+String serializeForNumberType(double);
+
+// Convert the specified string to a double. If the conversion fails, the return value is false.
+// Leading or trailing illegal characters cause failure, as does passing an empty string.
+// The double* parameter may be 0 to check if the string can be parsed without getting the result.
+bool parseToDoubleForNumberType(const String&, double*);
+bool parseToDoubleForNumberTypeWithDecimalPlaces(const String&, double*, unsigned*);
+
+// http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-integers
+bool parseHTMLInteger(const String&, int&);
+
+// Inline implementations of some of the functions declared above.
+
+inline bool isHTMLSpace(UChar character)
+{
+    // Histogram from Apple's page load test combined with some ad hoc browsing some other test suites.
+    //
+    //     82%: 216330 non-space characters, all > U+0020
+    //     11%:  30017 plain space characters, U+0020
+    //      5%:  12099 newline characters, U+000A
+    //      2%:   5346 tab characters, U+0009
+    //
+    // No other characters seen. No U+000C or U+000D, and no other control characters.
+    // Accordingly, we check for non-spaces first, then space, then newline, then tab, then the other characters.
+
+    return character <= ' ' && (character == ' ' || character == '\n' || character == '\t' || character == '\r' || character == '\f');
+}
+
+inline bool isNotHTMLSpace(UChar character)
+{
+    return !isHTMLSpace(character);
+}
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLParserScheduler.cpp b/Source/WebCore/html/parser/HTMLParserScheduler.cpp
new file mode 100644
index 0000000..56db1aa
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLParserScheduler.cpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "HTMLParserScheduler.h"
+
+#include "FrameView.h" // Only for isLayoutTimerActive
+#include "HTMLDocumentParser.h"
+#include "Document.h"
+
+// defaultParserChunkSize is used to define how many tokens the parser will
+// process before checking against parserTimeLimit and possibly yielding.
+// This is a performance optimization to prevent checking after every token.
+static const int defaultParserChunkSize = 4096;
+
+// defaultParserTimeLimit is the seconds the parser will run in one write() call
+// before yielding.  Inline <script> execution can cause it to excede the limit.
+// FIXME: We would like this value to be 0.2.
+static const double defaultParserTimeLimit = 0.500;
+
+namespace WebCore {
+
+static double parserTimeLimit(Page* page)
+{
+    // We're using the poorly named customHTMLTokenizerTimeDelay setting.
+    if (page && page->hasCustomHTMLTokenizerTimeDelay())
+        return page->customHTMLTokenizerTimeDelay();
+    return defaultParserTimeLimit;
+}
+
+static int parserChunkSize(Page* page)
+{
+    // FIXME: We may need to divide the value from customHTMLTokenizerChunkSize
+    // by some constant to translate from the "character" based behavior of the
+    // old LegacyHTMLDocumentParser to the token-based behavior of this parser.
+    if (page && page->hasCustomHTMLTokenizerChunkSize())
+        return page->customHTMLTokenizerChunkSize();
+    return defaultParserChunkSize;
+}
+
+HTMLParserScheduler::HTMLParserScheduler(HTMLDocumentParser* parser)
+    : m_parser(parser)
+    , m_parserTimeLimit(parserTimeLimit(m_parser->document()->page()))
+    , m_parserChunkSize(parserChunkSize(m_parser->document()->page()))
+    , m_continueNextChunkTimer(this, &HTMLParserScheduler::continueNextChunkTimerFired)
+    , m_isSuspendedWithActiveTimer(false)
+{
+}
+
+HTMLParserScheduler::~HTMLParserScheduler()
+{
+    m_continueNextChunkTimer.stop();
+}
+
+// FIXME: This belongs on Document.
+static bool isLayoutTimerActive(Document* doc)
+{
+    ASSERT(doc);
+    return doc->view() && doc->view()->layoutPending() && !doc->minimumLayoutDelay();
+}
+
+void HTMLParserScheduler::continueNextChunkTimerFired(Timer<HTMLParserScheduler>* timer)
+{
+    ASSERT_UNUSED(timer, timer == &m_continueNextChunkTimer);
+    // FIXME: The timer class should handle timer priorities instead of this code.
+    // If a layout is scheduled, wait again to let the layout timer run first.
+    if (isLayoutTimerActive(m_parser->document())) {
+        m_continueNextChunkTimer.startOneShot(0);
+        return;
+    }
+    m_parser->resumeParsingAfterYield();
+}
+
+void HTMLParserScheduler::suspend()
+{
+    ASSERT(!m_isSuspendedWithActiveTimer);
+    if (!m_continueNextChunkTimer.isActive())
+        return;
+    m_isSuspendedWithActiveTimer = true;
+    m_continueNextChunkTimer.stop();
+}
+
+void HTMLParserScheduler::resume()
+{
+    ASSERT(!m_continueNextChunkTimer.isActive());
+    if (!m_isSuspendedWithActiveTimer)
+        return;
+    m_isSuspendedWithActiveTimer = false;
+    m_continueNextChunkTimer.startOneShot(0);
+}
+
+}
diff --git a/Source/WebCore/html/parser/HTMLParserScheduler.h b/Source/WebCore/html/parser/HTMLParserScheduler.h
new file mode 100644
index 0000000..3a20b2b
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLParserScheduler.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLParserScheduler_h
+#define HTMLParserScheduler_h
+
+#include "Timer.h"
+#include <wtf/CurrentTime.h>
+#include <wtf/Noncopyable.h>
+#include <wtf/PassOwnPtr.h>
+
+namespace WebCore {
+
+class HTMLDocumentParser;
+
+class HTMLParserScheduler :  public Noncopyable {
+public:
+    static PassOwnPtr<HTMLParserScheduler> create(HTMLDocumentParser* parser)
+    {
+        return adoptPtr(new HTMLParserScheduler(parser));
+    }
+    ~HTMLParserScheduler();
+
+    struct PumpSession {
+        PumpSession()
+            : processedTokens(0)
+            , startTime(currentTime())
+        {
+        }
+
+        int processedTokens;
+        double startTime;
+    };
+
+    // Inline as this is called after every token in the parser.
+    bool shouldContinueParsing(PumpSession& session)
+    {
+        if (session.processedTokens > m_parserChunkSize) {
+            session.processedTokens = 0;
+            double elapsedTime = currentTime() - session.startTime;
+            if (elapsedTime > m_parserTimeLimit) {
+                // Schedule the parser to continue and yield from the parser.
+                m_continueNextChunkTimer.startOneShot(0);
+                return false;
+            }
+        }
+
+        ++session.processedTokens;
+        return true;
+    }
+
+    bool isScheduledForResume() const { return m_isSuspendedWithActiveTimer || m_continueNextChunkTimer.isActive(); }
+
+    void suspend();
+    void resume();
+
+private:
+    HTMLParserScheduler(HTMLDocumentParser*);
+
+    void continueNextChunkTimerFired(Timer<HTMLParserScheduler>*);
+
+    HTMLDocumentParser* m_parser;
+
+    double m_parserTimeLimit;
+    int m_parserChunkSize;
+    Timer<HTMLParserScheduler> m_continueNextChunkTimer;
+    bool m_isSuspendedWithActiveTimer;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLPreloadScanner.cpp b/Source/WebCore/html/parser/HTMLPreloadScanner.cpp
new file mode 100644
index 0000000..d23542f
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLPreloadScanner.cpp
@@ -0,0 +1,194 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
+ * Copyright (C) 2010 Google Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "HTMLPreloadScanner.h"
+
+#include "CachedResourceLoader.h"
+#include "Document.h"
+#include "HTMLDocumentParser.h"
+#include "HTMLTokenizer.h"
+#include "HTMLLinkElement.h"
+#include "HTMLNames.h"
+#include "HTMLParserIdioms.h"
+#include "MediaList.h"
+#include "MediaQueryEvaluator.h"
+
+namespace WebCore {
+
+using namespace HTMLNames;
+
+namespace {
+
+class PreloadTask {
+public:
+    PreloadTask(const HTMLToken& token)
+        : m_tagName(token.name().data(), token.name().size())
+        , m_linkIsStyleSheet(false)
+        , m_linkMediaAttributeIsScreen(true)
+    {
+        processAttributes(token.attributes());
+    }
+
+    void processAttributes(const HTMLToken::AttributeList& attributes)
+    {
+        if (m_tagName != scriptTag && m_tagName != imgTag && m_tagName != linkTag)
+            return;
+
+        for (HTMLToken::AttributeList::const_iterator iter = attributes.begin();
+             iter != attributes.end(); ++iter) {
+            AtomicString attributeName(iter->m_name.data(), iter->m_name.size());
+            String attributeValue(iter->m_value.data(), iter->m_value.size());
+
+            if (attributeName == charsetAttr)
+                m_charset = attributeValue;
+
+            if (m_tagName == scriptTag || m_tagName == imgTag) {
+                if (attributeName == srcAttr)
+                    setUrlToLoad(attributeValue);
+            } else if (m_tagName == linkTag) {
+                if (attributeName == hrefAttr)
+                    setUrlToLoad(attributeValue);
+                else if (attributeName == relAttr)
+                    m_linkIsStyleSheet = relAttributeIsStyleSheet(attributeValue);
+                else if (attributeName == mediaAttr)
+                    m_linkMediaAttributeIsScreen = linkMediaAttributeIsScreen(attributeValue);
+            }
+        }
+    }
+
+    static bool relAttributeIsStyleSheet(const String& attributeValue)
+    {
+        HTMLLinkElement::RelAttribute rel;
+        HTMLLinkElement::tokenizeRelAttribute(attributeValue, rel);
+        return rel.m_isStyleSheet && !rel.m_isAlternate && !rel.m_isIcon && !rel.m_isDNSPrefetch;
+    }
+    
+    static bool linkMediaAttributeIsScreen(const String& attributeValue)
+    {
+        if (attributeValue.isEmpty())
+            return true;
+        RefPtr<MediaList> mediaList = MediaList::createAllowingDescriptionSyntax(attributeValue);
+    
+        // Only preload screen media stylesheets. Used this way, the evaluator evaluates to true for any 
+        // rules containing complex queries (full evaluation is possible but it requires a frame and a style selector which
+        // may be problematic here).
+        MediaQueryEvaluator mediaQueryEvaluator("screen");
+        return mediaQueryEvaluator.eval(mediaList.get());
+    }
+
+    void setUrlToLoad(const String& attributeValue)
+    {
+        // We only respect the first src/href, per HTML5:
+        // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#attribute-name-state
+        if (!m_urlToLoad.isEmpty())
+            return;
+        m_urlToLoad = stripLeadingAndTrailingHTMLSpaces(attributeValue);
+    }
+
+    void preload(Document* document, bool scanningBody)
+    {
+        if (m_urlToLoad.isEmpty())
+            return;
+
+        CachedResourceLoader* cachedResourceLoader = document->cachedResourceLoader();
+        if (m_tagName == scriptTag)
+            cachedResourceLoader->preload(CachedResource::Script, m_urlToLoad, m_charset, scanningBody);
+        else if (m_tagName == imgTag) 
+            cachedResourceLoader->preload(CachedResource::ImageResource, m_urlToLoad, String(), scanningBody);
+        else if (m_tagName == linkTag && m_linkIsStyleSheet && m_linkMediaAttributeIsScreen) 
+            cachedResourceLoader->preload(CachedResource::CSSStyleSheet, m_urlToLoad, m_charset, scanningBody);
+    }
+
+    const AtomicString& tagName() const { return m_tagName; }
+
+private:
+    AtomicString m_tagName;
+    String m_urlToLoad;
+    String m_charset;
+    bool m_linkIsStyleSheet;
+    bool m_linkMediaAttributeIsScreen;
+};
+
+} // namespace
+
+HTMLPreloadScanner::HTMLPreloadScanner(Document* document)
+    : m_document(document)
+    , m_cssScanner(document)
+    , m_tokenizer(HTMLTokenizer::create(HTMLDocumentParser::usePreHTML5ParserQuirks(document)))
+    , m_bodySeen(false)
+    , m_inStyle(false)
+{
+}
+
+void HTMLPreloadScanner::appendToEnd(const SegmentedString& source)
+{
+    m_source.append(source);
+}
+
+void HTMLPreloadScanner::scan()
+{
+    // FIXME: We should save and re-use these tokens in HTMLDocumentParser if
+    // the pending script doesn't end up calling document.write.
+    while (m_tokenizer->nextToken(m_source, m_token)) {
+        processToken();
+        m_token.clear();
+    }
+}
+
+void HTMLPreloadScanner::processToken()
+{
+    if (m_inStyle) {
+        if (m_token.type() == HTMLToken::Character)
+            m_cssScanner.scan(m_token, scanningBody());
+        else if (m_token.type() == HTMLToken::EndTag) {
+            m_inStyle = false;
+            m_cssScanner.reset();
+        }
+    }
+
+    if (m_token.type() != HTMLToken::StartTag)
+        return;
+
+    PreloadTask task(m_token);
+    m_tokenizer->updateStateFor(task.tagName(), m_document->frame());
+
+    if (task.tagName() == bodyTag)
+        m_bodySeen = true;
+
+    if (task.tagName() == styleTag)
+        m_inStyle = true;
+
+    task.preload(m_document, scanningBody());
+}
+
+bool HTMLPreloadScanner::scanningBody() const
+{
+    return m_document->body() || m_bodySeen;
+}
+
+}
diff --git a/Source/WebCore/html/parser/HTMLPreloadScanner.h b/Source/WebCore/html/parser/HTMLPreloadScanner.h
new file mode 100644
index 0000000..94a90e6
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLPreloadScanner.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2010 Google Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef HTMLPreloadScanner_h
+#define HTMLPreloadScanner_h
+
+#include "CSSPreloadScanner.h"
+#include "HTMLToken.h"
+#include "SegmentedString.h"
+#include <wtf/Noncopyable.h>
+
+namespace WebCore {
+
+class Document;
+class HTMLToken;
+class HTMLTokenizer;
+class SegmentedString;
+
+class HTMLPreloadScanner : public Noncopyable {
+public:
+    HTMLPreloadScanner(Document*);
+
+    void appendToEnd(const SegmentedString&);
+    void scan();
+
+private:
+    void processToken();
+    bool scanningBody() const;
+
+    Document* m_document;
+    SegmentedString m_source;
+    CSSPreloadScanner m_cssScanner;
+    OwnPtr<HTMLTokenizer> m_tokenizer;
+    HTMLToken m_token;
+    bool m_bodySeen;
+    bool m_inStyle;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLScriptRunner.cpp b/Source/WebCore/html/parser/HTMLScriptRunner.cpp
new file mode 100644
index 0000000..2fe1d30
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLScriptRunner.cpp
@@ -0,0 +1,321 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "HTMLScriptRunner.h"
+
+#include "Attribute.h"
+#include "CachedScript.h"
+#include "CachedResourceLoader.h"
+#include "Element.h"
+#include "Event.h"
+#include "Frame.h"
+#include "HTMLInputStream.h"
+#include "HTMLNames.h"
+#include "HTMLScriptRunnerHost.h"
+#include "IgnoreDestructiveWriteCountIncrementer.h"
+#include "NestingLevelIncrementer.h"
+#include "NotImplemented.h"
+#include "ScriptElement.h"
+#include "ScriptSourceCode.h"
+
+namespace WebCore {
+
+using namespace HTMLNames;
+
+HTMLScriptRunner::HTMLScriptRunner(Document* document, HTMLScriptRunnerHost* host)
+    : m_document(document)
+    , m_host(host)
+    , m_scriptNestingLevel(0)
+    , m_hasScriptsWaitingForStylesheets(false)
+{
+    ASSERT(m_host);
+}
+
+HTMLScriptRunner::~HTMLScriptRunner()
+{
+    // FIXME: Should we be passed a "done loading/parsing" callback sooner than destruction?
+    if (m_parsingBlockingScript.cachedScript() && m_parsingBlockingScript.watchingForLoad())
+        stopWatchingForLoad(m_parsingBlockingScript);
+
+    while (!m_scriptsToExecuteAfterParsing.isEmpty()) {
+        PendingScript pendingScript = m_scriptsToExecuteAfterParsing.takeFirst();
+        if (pendingScript.cachedScript() && pendingScript.watchingForLoad())
+            stopWatchingForLoad(pendingScript);
+    }
+}
+
+void HTMLScriptRunner::detach()
+{
+    m_document = 0;
+}
+
+static KURL documentURLForScriptExecution(Document* document)
+{
+    if (!document || !document->frame())
+        return KURL();
+
+    // Use the URL of the currently active document for this frame.
+    return document->frame()->document()->url();
+}
+
+inline PassRefPtr<Event> createScriptLoadEvent()
+{
+    return Event::create(eventNames().loadEvent, false, false);
+}
+
+inline PassRefPtr<Event> createScriptErrorEvent()
+{
+    return Event::create(eventNames().errorEvent, true, false);
+}
+
+ScriptSourceCode HTMLScriptRunner::sourceFromPendingScript(const PendingScript& script, bool& errorOccurred) const
+{
+    if (script.cachedScript()) {
+        errorOccurred = script.cachedScript()->errorOccurred();
+        ASSERT(script.cachedScript()->isLoaded());
+        return ScriptSourceCode(script.cachedScript());
+    }
+    errorOccurred = false;
+    return ScriptSourceCode(script.element()->textContent(), documentURLForScriptExecution(m_document), script.startingPosition());
+}
+
+bool HTMLScriptRunner::isPendingScriptReady(const PendingScript& script)
+{
+    m_hasScriptsWaitingForStylesheets = !m_document->haveStylesheetsLoaded();
+    if (m_hasScriptsWaitingForStylesheets)
+        return false;
+    if (script.cachedScript() && !script.cachedScript()->isLoaded())
+        return false;
+    return true;
+}
+
+void HTMLScriptRunner::executeParsingBlockingScript()
+{
+    ASSERT(m_document);
+    ASSERT(!m_scriptNestingLevel);
+    ASSERT(m_document->haveStylesheetsLoaded());
+    ASSERT(isPendingScriptReady(m_parsingBlockingScript));
+
+    InsertionPointRecord insertionPointRecord(m_host->inputStream());
+    executePendingScriptAndDispatchEvent(m_parsingBlockingScript);
+}
+
+void HTMLScriptRunner::executePendingScriptAndDispatchEvent(PendingScript& pendingScript)
+{
+    bool errorOccurred = false;
+    ScriptSourceCode sourceCode = sourceFromPendingScript(pendingScript, errorOccurred);
+
+    // Stop watching loads before executeScript to prevent recursion if the script reloads itself.
+    if (pendingScript.cachedScript() && pendingScript.watchingForLoad())
+        stopWatchingForLoad(pendingScript);
+
+    // Clear the pending script before possible rentrancy from executeScript()
+    RefPtr<Element> element = pendingScript.releaseElementAndClear();
+    if (ScriptElement* scriptElement = toScriptElement(element.get())) {
+        NestingLevelIncrementer nestingLevelIncrementer(m_scriptNestingLevel);
+        IgnoreDestructiveWriteCountIncrementer ignoreDestructiveWriteCountIncrementer(m_document);
+        if (errorOccurred)
+            element->dispatchEvent(createScriptErrorEvent());
+        else {
+            ASSERT(isExecutingScript());
+            scriptElement->executeScript(sourceCode);
+            element->dispatchEvent(createScriptLoadEvent());
+        }
+    }
+    ASSERT(!m_scriptNestingLevel);
+}
+
+void HTMLScriptRunner::watchForLoad(PendingScript& pendingScript)
+{
+    ASSERT(!pendingScript.watchingForLoad());
+    m_host->watchForLoad(pendingScript.cachedScript());
+    pendingScript.setWatchingForLoad(true);
+}
+
+void HTMLScriptRunner::stopWatchingForLoad(PendingScript& pendingScript)
+{
+    ASSERT(pendingScript.watchingForLoad());
+    m_host->stopWatchingForLoad(pendingScript.cachedScript());
+    pendingScript.setWatchingForLoad(false);
+}
+
+// This function should match 10.2.5.11 "An end tag whose tag name is 'script'"
+// Script handling lives outside the tree builder to keep the each class simple.
+bool HTMLScriptRunner::execute(PassRefPtr<Element> scriptElement, const TextPosition1& scriptStartPosition)
+{
+    ASSERT(scriptElement);
+    // FIXME: If scripting is disabled, always just return true;
+
+    // Try to execute the script given to us.
+    runScript(scriptElement.get(), scriptStartPosition);
+
+    if (haveParsingBlockingScript()) {
+        if (m_scriptNestingLevel)
+            return false; // Block the parser.  Unwind to the outermost HTMLScriptRunner::execute before continuing parsing.
+        if (!executeParsingBlockingScripts())
+            return false; // We still have a parsing blocking script, block the parser.
+    }
+    return true; // Scripts executed as expected, continue parsing.
+}
+
+bool HTMLScriptRunner::haveParsingBlockingScript() const
+{
+    return !!m_parsingBlockingScript.element();
+}
+
+bool HTMLScriptRunner::executeParsingBlockingScripts()
+{
+    while (haveParsingBlockingScript()) {
+        // We only really need to check once.
+        if (!isPendingScriptReady(m_parsingBlockingScript))
+            return false;
+        executeParsingBlockingScript();
+    }
+    return true;
+}
+
+bool HTMLScriptRunner::executeScriptsWaitingForLoad(CachedResource* cachedScript)
+{
+    ASSERT(!m_scriptNestingLevel);
+    ASSERT(haveParsingBlockingScript());
+    ASSERT_UNUSED(cachedScript, m_parsingBlockingScript.cachedScript() == cachedScript);
+    ASSERT(m_parsingBlockingScript.cachedScript()->isLoaded());
+    return executeParsingBlockingScripts();
+}
+
+bool HTMLScriptRunner::executeScriptsWaitingForStylesheets()
+{
+    ASSERT(m_document);
+    // Callers should check hasScriptsWaitingForStylesheets() before calling
+    // to prevent parser or script re-entry during </style> parsing.
+    ASSERT(hasScriptsWaitingForStylesheets());
+    ASSERT(!m_scriptNestingLevel);
+    ASSERT(m_document->haveStylesheetsLoaded());
+    return executeParsingBlockingScripts();
+}
+
+bool HTMLScriptRunner::executeScriptsWaitingForParsing()
+{
+    while (!m_scriptsToExecuteAfterParsing.isEmpty()) {
+        ASSERT(!m_scriptNestingLevel);
+        ASSERT(!haveParsingBlockingScript());
+        ASSERT(m_scriptsToExecuteAfterParsing.first().cachedScript());
+        if (!m_scriptsToExecuteAfterParsing.first().cachedScript()->isLoaded()) {
+            watchForLoad(m_scriptsToExecuteAfterParsing.first());
+            return false;
+        }
+        PendingScript first = m_scriptsToExecuteAfterParsing.takeFirst();
+        executePendingScriptAndDispatchEvent(first);
+        if (!m_document)
+            return false;
+    }
+    return true;
+}
+
+void HTMLScriptRunner::requestParsingBlockingScript(Element* element)
+{
+    if (!requestPendingScript(m_parsingBlockingScript, element))
+        return;
+
+    ASSERT(m_parsingBlockingScript.cachedScript());
+
+    // We only care about a load callback if cachedScript is not already
+    // in the cache.  Callers will attempt to run the m_parsingBlockingScript
+    // if possible before returning control to the parser.
+    if (!m_parsingBlockingScript.cachedScript()->isLoaded())
+        watchForLoad(m_parsingBlockingScript);
+}
+
+void HTMLScriptRunner::requestDeferredScript(Element* element)
+{
+    PendingScript pendingScript;
+    if (!requestPendingScript(pendingScript, element))
+        return;
+
+    ASSERT(pendingScript.cachedScript());
+    m_scriptsToExecuteAfterParsing.append(pendingScript);
+}
+
+bool HTMLScriptRunner::requestPendingScript(PendingScript& pendingScript, Element* script) const
+{
+    ASSERT(!pendingScript.element());
+    const AtomicString& srcValue = script->getAttribute(srcAttr);
+    // Allow the host to disllow script loads (using the XSSAuditor, etc.)
+    if (!m_host->shouldLoadExternalScriptFromSrc(srcValue))
+        return false;
+    // FIXME: We need to resolve the url relative to the element.
+    if (!script->dispatchBeforeLoadEvent(srcValue))
+        return false;
+    pendingScript.setElement(script);
+    // This should correctly return 0 for empty or invalid srcValues.
+    CachedScript* cachedScript = m_document->cachedResourceLoader()->requestScript(srcValue, toScriptElement(script)->scriptCharset());
+    if (!cachedScript) {
+        notImplemented(); // Dispatch error event.
+        return false;
+    }
+    pendingScript.setCachedScript(cachedScript);
+    return true;
+}
+
+// This method is meant to match the HTML5 definition of "running a script"
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#running-a-script
+void HTMLScriptRunner::runScript(Element* script, const TextPosition1& scriptStartPosition)
+{
+    ASSERT(m_document);
+    ASSERT(!haveParsingBlockingScript());
+    {
+        InsertionPointRecord insertionPointRecord(m_host->inputStream());
+        NestingLevelIncrementer nestingLevelIncrementer(m_scriptNestingLevel);
+
+        ScriptElement* scriptElement = toScriptElement(script);
+        ASSERT(scriptElement);
+        if (!scriptElement->shouldExecuteAsJavaScript())
+            return;
+        
+        if (script->hasAttribute(srcAttr)) {
+            if (script->hasAttribute(asyncAttr)) // Async takes precendence over defer.
+                return; // Asynchronous scripts handle themselves.
+
+            if (script->hasAttribute(deferAttr))
+                requestDeferredScript(script);
+            else
+                requestParsingBlockingScript(script);
+        } else if (!m_document->haveStylesheetsLoaded() && m_scriptNestingLevel == 1) {
+            // Block inline script execution on stylesheet load, unless we are in document.write().
+            // The latter case can only happen if a script both triggers a stylesheet load
+            // and writes an inline script. Since write is blocking we have to execute the
+            // written script immediately, ignoring the pending sheets.
+            m_parsingBlockingScript.setElement(script);
+            m_parsingBlockingScript.setStartingPosition(scriptStartPosition);
+        } else {
+            ASSERT(isExecutingScript());
+            ScriptSourceCode sourceCode(script->textContent(), documentURLForScriptExecution(m_document), scriptStartPosition);
+            scriptElement->executeScript(sourceCode);
+        }
+    }
+}
+
+}
diff --git a/Source/WebCore/html/parser/HTMLScriptRunner.h b/Source/WebCore/html/parser/HTMLScriptRunner.h
new file mode 100644
index 0000000..6cf74d8
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLScriptRunner.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef HTMLScriptRunner_h
+#define HTMLScriptRunner_h
+
+#include "PendingScript.h"
+#include <wtf/Deque.h>
+#include <wtf/text/TextPosition.h>
+#include <wtf/Noncopyable.h>
+#include <wtf/PassRefPtr.h>
+
+namespace WebCore {
+
+class CachedResource;
+class CachedScript;
+class Document;
+class Element;
+class Frame;
+class HTMLScriptRunnerHost;
+class ScriptSourceCode;
+
+class HTMLScriptRunner : public Noncopyable {
+public:
+    static PassOwnPtr<HTMLScriptRunner> create(Document* document, HTMLScriptRunnerHost* host)
+    {
+        return adoptPtr(new HTMLScriptRunner(document, host));
+    }
+    ~HTMLScriptRunner();
+
+    void detach();
+
+    // Processes the passed in script and any pending scripts if possible.
+    bool execute(PassRefPtr<Element> scriptToProcess, const TextPosition1& scriptStartPosition);
+
+    bool executeScriptsWaitingForLoad(CachedResource*);
+    bool hasScriptsWaitingForStylesheets() const { return m_hasScriptsWaitingForStylesheets; }
+    bool executeScriptsWaitingForStylesheets();
+    bool executeScriptsWaitingForParsing();
+
+    bool isExecutingScript() const { return !!m_scriptNestingLevel; }
+
+private:
+    HTMLScriptRunner(Document*, HTMLScriptRunnerHost*);
+
+    Frame* frame() const;
+
+    void executeParsingBlockingScript();
+    void executePendingScriptAndDispatchEvent(PendingScript&);
+    bool haveParsingBlockingScript() const;
+    bool executeParsingBlockingScripts();
+
+    void requestParsingBlockingScript(Element*);
+    void requestDeferredScript(Element*);
+    bool requestPendingScript(PendingScript&, Element*) const;
+
+    void runScript(Element*, const TextPosition1& scriptStartPosition);
+
+    // Helpers for dealing with HTMLScriptRunnerHost
+    void watchForLoad(PendingScript&);
+    void stopWatchingForLoad(PendingScript&);
+    bool isPendingScriptReady(const PendingScript&);
+    ScriptSourceCode sourceFromPendingScript(const PendingScript&, bool& errorOccurred) const;
+
+    Document* m_document;
+    HTMLScriptRunnerHost* m_host;
+    PendingScript m_parsingBlockingScript;
+    Deque<PendingScript> m_scriptsToExecuteAfterParsing; // http://www.whatwg.org/specs/web-apps/current-work/#list-of-scripts-that-will-execute-when-the-document-has-finished-parsing
+    unsigned m_scriptNestingLevel;
+
+    // We only want stylesheet loads to trigger script execution if script
+    // execution is currently stopped due to stylesheet loads, otherwise we'd
+    // cause nested script execution when parsing <style> tags since </style>
+    // tags can cause Document to call executeScriptsWaitingForStylesheets.
+    bool m_hasScriptsWaitingForStylesheets;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLScriptRunnerHost.h b/Source/WebCore/html/parser/HTMLScriptRunnerHost.h
new file mode 100644
index 0000000..5b40a931
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLScriptRunnerHost.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef HTMLScriptRunnerHost_h
+#define HTMLScriptRunnerHost_h
+
+#include <wtf/Forward.h>
+
+namespace WebCore {
+
+class CachedResource;
+class Element;
+class HTMLInputStream;
+class ScriptSourceCode;
+
+class HTMLScriptRunnerHost {
+public:
+    virtual ~HTMLScriptRunnerHost() { }
+
+    // Implementors should call cachedResource->addClient() here or soon after.
+    virtual void watchForLoad(CachedResource*) = 0;
+    // Implementors must call cachedResource->removeClient() immediately.
+    virtual void stopWatchingForLoad(CachedResource*) = 0;
+
+    // Implementors can block certain script loads (for XSSAuditor, etc.)
+    virtual bool shouldLoadExternalScriptFromSrc(const AtomicString&) = 0;
+    virtual HTMLInputStream& inputStream() = 0;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLToken.h b/Source/WebCore/html/parser/HTMLToken.h
new file mode 100644
index 0000000..42cddb8
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLToken.h
@@ -0,0 +1,526 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef HTMLToken_h
+#define HTMLToken_h
+
+#include "NamedNodeMap.h"
+#include <wtf/Noncopyable.h>
+#include <wtf/PassOwnPtr.h>
+#include <wtf/Vector.h>
+
+namespace WebCore {
+
+class HTMLToken : public Noncopyable {
+public:
+    enum Type {
+        Uninitialized,
+        DOCTYPE,
+        StartTag,
+        EndTag,
+        Comment,
+        Character,
+        EndOfFile,
+    };
+
+    class Range {
+    public:
+        int m_start;
+        int m_end;
+    };
+
+    class Attribute {
+    public:
+        Range m_nameRange;
+        Range m_valueRange;
+        WTF::Vector<UChar, 32> m_name;
+        WTF::Vector<UChar, 32> m_value;
+    };
+
+    typedef WTF::Vector<Attribute, 10> AttributeList;
+    typedef WTF::Vector<UChar, 1024> DataVector;
+
+    HTMLToken() { clear(); }
+
+    void clear(int startIndex = 0)
+    {
+        m_type = Uninitialized;
+        m_range.m_start = startIndex;
+        m_range.m_end = startIndex;
+        m_data.clear();
+    }
+
+    int startIndex() const { return m_range.m_start; }
+    int endIndex() const { return m_range.m_end; }
+
+    void end(int endIndex)
+    {
+        m_range.m_end = endIndex;
+    }
+
+    void makeEndOfFile()
+    {
+        ASSERT(m_type == Uninitialized);
+        m_type = EndOfFile;
+    }
+
+    void beginStartTag(UChar character)
+    {
+        ASSERT(character);
+        ASSERT(m_type == Uninitialized);
+        m_type = StartTag;
+        m_selfClosing = false;
+        m_currentAttribute = 0;
+        m_attributes.clear();
+
+        m_data.append(character);
+    }
+
+    template<typename T>
+    void beginEndTag(T characters)
+    {
+        ASSERT(m_type == Uninitialized);
+        m_type = EndTag;
+        m_selfClosing = false;
+        m_currentAttribute = 0;
+        m_attributes.clear();
+
+        m_data.append(characters);
+    }
+
+    // Starting a character token works slightly differently than starting
+    // other types of tokens because we want to save a per-character branch.
+    void ensureIsCharacterToken()
+    {
+        ASSERT(m_type == Uninitialized || m_type == Character);
+        m_type = Character;
+    }
+
+    void beginComment()
+    {
+        ASSERT(m_type == Uninitialized);
+        m_type = Comment;
+    }
+
+    void beginDOCTYPE()
+    {
+        ASSERT(m_type == Uninitialized);
+        m_type = DOCTYPE;
+        m_doctypeData = adoptPtr(new DoctypeData());
+    }
+
+    void beginDOCTYPE(UChar character)
+    {
+        ASSERT(character);
+        beginDOCTYPE();
+        m_data.append(character);
+    }
+
+    void appendToName(UChar character)
+    {
+        ASSERT(character);
+        ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE);
+        m_data.append(character);
+    }
+
+    template<typename T>
+    void appendToCharacter(T characters)
+    {
+        ASSERT(m_type == Character);
+        m_data.append(characters);
+    }
+
+    void appendToComment(UChar character)
+    {
+        ASSERT(character);
+        ASSERT(m_type == Comment);
+        m_data.append(character);
+    }
+
+    void addNewAttribute()
+    {
+        ASSERT(m_type == StartTag || m_type == EndTag);
+        m_attributes.grow(m_attributes.size() + 1);
+        m_currentAttribute = &m_attributes.last();
+#ifndef NDEBUG
+        m_currentAttribute->m_nameRange.m_start = 0;
+        m_currentAttribute->m_nameRange.m_end = 0;
+        m_currentAttribute->m_valueRange.m_start = 0;
+        m_currentAttribute->m_valueRange.m_end = 0;
+#endif
+    }
+
+    void beginAttributeName(int index)
+    {
+        m_currentAttribute->m_nameRange.m_start = index;
+    }
+
+    void endAttributeName(int index)
+    {
+        m_currentAttribute->m_nameRange.m_end = index;
+        m_currentAttribute->m_valueRange.m_start = index;
+        m_currentAttribute->m_valueRange.m_end = index;
+    }
+
+    void beginAttributeValue(int index)
+    {
+        m_currentAttribute->m_valueRange.m_start = index;
+#ifndef NDEBUG
+        m_currentAttribute->m_valueRange.m_end = 0;
+#endif
+    }
+
+    void endAttributeValue(int index)
+    {
+        m_currentAttribute->m_valueRange.m_end = index;
+    }
+
+    void appendToAttributeName(UChar character)
+    {
+        ASSERT(character);
+        ASSERT(m_type == StartTag || m_type == EndTag);
+        ASSERT(m_currentAttribute->m_nameRange.m_start);
+        m_currentAttribute->m_name.append(character);
+    }
+
+    void appendToAttributeValue(UChar character)
+    {
+        ASSERT(character);
+        ASSERT(m_type == StartTag || m_type == EndTag);
+        ASSERT(m_currentAttribute->m_valueRange.m_start);
+        m_currentAttribute->m_value.append(character);
+    }
+
+    Type type() const { return m_type; }
+
+    bool selfClosing() const
+    {
+        ASSERT(m_type == StartTag || m_type == EndTag);
+        return m_selfClosing;
+    }
+
+    void setSelfClosing()
+    {
+        ASSERT(m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag);
+        m_selfClosing = true;
+    }
+
+    const AttributeList& attributes() const
+    {
+        ASSERT(m_type == StartTag || m_type == EndTag);
+        return m_attributes;
+    }
+
+    const DataVector& name() const
+    {
+        ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE);
+        return m_data;
+    }
+
+    const DataVector& characters() const
+    {
+        ASSERT(m_type == Character);
+        return m_data;
+    }
+
+    const DataVector& comment() const
+    {
+        ASSERT(m_type == Comment);
+        return m_data;
+    }
+
+    // FIXME: Distinguish between a missing public identifer and an empty one.
+    const WTF::Vector<UChar>& publicIdentifier() const
+    {
+        ASSERT(m_type == DOCTYPE);
+        return m_doctypeData->m_publicIdentifier;
+    }
+
+    // FIXME: Distinguish between a missing system identifer and an empty one.
+    const WTF::Vector<UChar>& systemIdentifier() const
+    {
+        ASSERT(m_type == DOCTYPE);
+        return m_doctypeData->m_systemIdentifier;
+    }
+
+    void setPublicIdentifierToEmptyString()
+    {
+        ASSERT(m_type == DOCTYPE);
+        m_doctypeData->m_hasPublicIdentifier = true;
+        m_doctypeData->m_publicIdentifier.clear();
+    }
+
+    void setSystemIdentifierToEmptyString()
+    {
+        ASSERT(m_type == DOCTYPE);
+        m_doctypeData->m_hasSystemIdentifier = true;
+        m_doctypeData->m_systemIdentifier.clear();
+    }
+
+    bool forceQuirks() const
+    {
+        ASSERT(m_type == DOCTYPE);
+        return m_doctypeData->m_forceQuirks;
+    }
+
+    void setForceQuirks()
+    {
+        ASSERT(m_type == DOCTYPE);
+        m_doctypeData->m_forceQuirks = true;
+    }
+
+    void appendToPublicIdentifier(UChar character)
+    {
+        ASSERT(character);
+        ASSERT(m_type == DOCTYPE);
+        ASSERT(m_doctypeData->m_hasPublicIdentifier);
+        m_doctypeData->m_publicIdentifier.append(character);
+    }
+
+    void appendToSystemIdentifier(UChar character)
+    {
+        ASSERT(character);
+        ASSERT(m_type == DOCTYPE);
+        ASSERT(m_doctypeData->m_hasSystemIdentifier);
+        m_doctypeData->m_systemIdentifier.append(character);
+    }
+
+private:
+    // FIXME: I'm not sure what the final relationship between HTMLToken and
+    // AtomicHTMLToken will be.  I'm marking this a friend for now, but we'll
+    // want to end up with a cleaner interface between the two classes.
+    friend class AtomicHTMLToken;
+
+    class DoctypeData : public Noncopyable {
+    public:
+        DoctypeData()
+            : m_hasPublicIdentifier(false)
+            , m_hasSystemIdentifier(false)
+            , m_forceQuirks(false)
+        {
+        }
+
+        bool m_hasPublicIdentifier;
+        bool m_hasSystemIdentifier;
+        bool m_forceQuirks;
+        WTF::Vector<UChar> m_publicIdentifier;
+        WTF::Vector<UChar> m_systemIdentifier;
+    };
+
+    Type m_type;
+
+    // Which characters from the input stream are represented by this token.
+    Range m_range;
+
+    // "name" for DOCTYPE, StartTag, and EndTag
+    // "characters" for Character
+    // "data" for Comment
+    DataVector m_data;
+
+    // For DOCTYPE
+    OwnPtr<DoctypeData> m_doctypeData;
+
+    // For StartTag and EndTag
+    bool m_selfClosing;
+    AttributeList m_attributes;
+
+    // A pointer into m_attributes used during lexing.
+    Attribute* m_currentAttribute;
+};
+
+// FIXME: This class should eventually be named HTMLToken once we move the
+// exiting HTMLToken to be internal to the HTMLTokenizer.
+class AtomicHTMLToken : public Noncopyable {
+public:
+    AtomicHTMLToken(HTMLToken& token)
+        : m_type(token.type())
+    {
+        switch (m_type) {
+        case HTMLToken::Uninitialized:
+            ASSERT_NOT_REACHED();
+            break;
+        case HTMLToken::DOCTYPE:
+            m_name = AtomicString(token.name().data(), token.name().size());
+            m_doctypeData = token.m_doctypeData.release();
+            break;
+        case HTMLToken::EndOfFile:
+            break;
+        case HTMLToken::StartTag:
+        case HTMLToken::EndTag: {
+            m_selfClosing = token.selfClosing();
+            m_name = AtomicString(token.name().data(), token.name().size());
+            const HTMLToken::AttributeList& attributes = token.attributes();
+            for (HTMLToken::AttributeList::const_iterator iter = attributes.begin();
+                 iter != attributes.end(); ++iter) {
+                if (!iter->m_name.isEmpty()) {
+                    String name(iter->m_name.data(), iter->m_name.size());
+                    String value(iter->m_value.data(), iter->m_value.size());
+                    ASSERT(iter->m_nameRange.m_start);
+                    ASSERT(iter->m_nameRange.m_end);
+                    ASSERT(iter->m_valueRange.m_start);
+                    ASSERT(iter->m_valueRange.m_end);
+                    RefPtr<Attribute> mappedAttribute = Attribute::createMapped(name, value);
+                    if (!m_attributes) {
+                        m_attributes = NamedNodeMap::create();
+                        // Reserving capacity here improves the parser
+                        // benchmark.  It might be worth experimenting with
+                        // the constant to see where the optimal point is.
+                        m_attributes->reserveInitialCapacity(10);
+                    }
+                    m_attributes->insertAttribute(mappedAttribute.release(), false);
+                }
+            }
+            break;
+        }
+        case HTMLToken::Comment:
+            m_data = String(token.comment().data(), token.comment().size());
+            break;
+        case HTMLToken::Character:
+            m_externalCharacters = &token.characters();
+            break;
+        }
+    }
+
+    AtomicHTMLToken(HTMLToken::Type type, AtomicString name, PassRefPtr<NamedNodeMap> attributes = 0)
+        : m_type(type)
+        , m_name(name)
+        , m_attributes(attributes)
+    {
+        ASSERT(usesName());
+    }
+
+    HTMLToken::Type type() const { return m_type; }
+
+    const AtomicString& name() const
+    {
+        ASSERT(usesName());
+        return m_name;
+    }
+
+    void setName(const AtomicString& name)
+    {
+        ASSERT(usesName());
+        m_name = name;
+    }
+
+    bool selfClosing() const
+    {
+        ASSERT(m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag);
+        return m_selfClosing;
+    }
+
+    Attribute* getAttributeItem(const QualifiedName& attributeName)
+    {
+        ASSERT(usesAttributes());
+        if (!m_attributes)
+            return 0;
+        return m_attributes->getAttributeItem(attributeName);
+    }
+
+    NamedNodeMap* attributes() const
+    {
+        ASSERT(usesAttributes());
+        return m_attributes.get();
+    }
+
+    PassRefPtr<NamedNodeMap> takeAtributes()
+    {
+        ASSERT(usesAttributes());
+        return m_attributes.release();
+    }
+
+    const HTMLToken::DataVector& characters() const
+    {
+        ASSERT(m_type == HTMLToken::Character);
+        return *m_externalCharacters;
+    }
+
+    const String& comment() const
+    {
+        ASSERT(m_type == HTMLToken::Comment);
+        return m_data;
+    }
+
+    // FIXME: Distinguish between a missing public identifer and an empty one.
+    WTF::Vector<UChar>& publicIdentifier() const
+    {
+        ASSERT(m_type == HTMLToken::DOCTYPE);
+        return m_doctypeData->m_publicIdentifier;
+    }
+
+    // FIXME: Distinguish between a missing system identifer and an empty one.
+    WTF::Vector<UChar>& systemIdentifier() const
+    {
+        ASSERT(m_type == HTMLToken::DOCTYPE);
+        return m_doctypeData->m_systemIdentifier;
+    }
+
+    bool forceQuirks() const
+    {
+        ASSERT(m_type == HTMLToken::DOCTYPE);
+        return m_doctypeData->m_forceQuirks;
+    }
+
+private:
+    HTMLToken::Type m_type;
+
+    bool usesName() const
+    {
+        return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag || m_type == HTMLToken::DOCTYPE;
+    }
+
+    bool usesAttributes() const
+    {
+        return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag;
+    }
+
+    // "name" for DOCTYPE, StartTag, and EndTag
+    AtomicString m_name;
+
+    // "data" for Comment
+    String m_data;
+
+    // "characters" for Character
+    //
+    // We don't want to copy the the characters out of the HTMLToken, so we
+    // keep a pointer to its buffer instead.  This buffer is owned by the
+    // HTMLToken and causes a lifetime dependence between these objects.
+    //
+    // FIXME: Add a mechanism for "internalizing" the characters when the
+    //        HTMLToken is destructed.
+    const HTMLToken::DataVector* m_externalCharacters;
+
+    // For DOCTYPE
+    OwnPtr<HTMLToken::DoctypeData> m_doctypeData;
+
+    // For StartTag and EndTag
+    bool m_selfClosing;
+
+    RefPtr<NamedNodeMap> m_attributes;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLTokenizer.cpp b/Source/WebCore/html/parser/HTMLTokenizer.cpp
new file mode 100644
index 0000000..305fca2
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLTokenizer.cpp
@@ -0,0 +1,1698 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "HTMLTokenizer.h"
+
+#include "HTMLEntityParser.h"
+#include "HTMLToken.h"
+#include "HTMLTreeBuilder.h"
+#include "HTMLNames.h"
+#include "NotImplemented.h"
+#include <wtf/ASCIICType.h>
+#include <wtf/CurrentTime.h>
+#include <wtf/UnusedParam.h>
+#include <wtf/text/AtomicString.h>
+#include <wtf/text/CString.h>
+#include <wtf/unicode/Unicode.h>
+
+using namespace WTF;
+
+namespace WebCore {
+
+using namespace HTMLNames;
+
+const UChar HTMLTokenizer::InputStreamPreprocessor::endOfFileMarker = 0;
+
+namespace {
+
+inline UChar toLowerCase(UChar cc)
+{
+    ASSERT(isASCIIUpper(cc));
+    const int lowerCaseOffset = 0x20;
+    return cc + lowerCaseOffset;
+}
+
+inline bool isTokenizerWhitespace(UChar cc)
+{
+    return cc == ' ' || cc == '\x0A' || cc == '\x09' || cc == '\x0C';
+}
+
+inline void advanceStringAndASSERTIgnoringCase(SegmentedString& source, const char* expectedCharacters)
+{
+    while (*expectedCharacters)
+        source.advanceAndASSERTIgnoringCase(*expectedCharacters++);
+}
+
+inline void advanceStringAndASSERT(SegmentedString& source, const char* expectedCharacters)
+{
+    while (*expectedCharacters)
+        source.advanceAndASSERT(*expectedCharacters++);
+}
+
+inline bool vectorEqualsString(const Vector<UChar, 32>& vector, const String& string)
+{
+    if (vector.size() != string.length())
+        return false;
+    const UChar* stringData = string.characters();
+    const UChar* vectorData = vector.data();
+    // FIXME: Is there a higher-level function we should be calling here?
+    return !memcmp(stringData, vectorData, vector.size() * sizeof(UChar));
+}
+
+inline bool isEndTagBufferingState(HTMLTokenizer::State state)
+{
+    switch (state) {
+    case HTMLTokenizer::RCDATAEndTagOpenState:
+    case HTMLTokenizer::RCDATAEndTagNameState:
+    case HTMLTokenizer::RAWTEXTEndTagOpenState:
+    case HTMLTokenizer::RAWTEXTEndTagNameState:
+    case HTMLTokenizer::ScriptDataEndTagOpenState:
+    case HTMLTokenizer::ScriptDataEndTagNameState:
+    case HTMLTokenizer::ScriptDataEscapedEndTagOpenState:
+    case HTMLTokenizer::ScriptDataEscapedEndTagNameState:
+        return true;
+    default:
+        return false;
+    }
+}
+
+}
+
+HTMLTokenizer::HTMLTokenizer(bool usePreHTML5ParserQuirks)
+    : m_inputStreamPreprocessor(this)
+    , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
+{
+    reset();
+}
+
+HTMLTokenizer::~HTMLTokenizer()
+{
+}
+
+void HTMLTokenizer::reset()
+{
+    m_state = DataState;
+    m_token = 0;
+    m_lineNumber = 0;
+    m_skipLeadingNewLineForListing = false;
+    m_forceNullCharacterReplacement = false;
+    m_shouldAllowCDATA = false;
+    m_additionalAllowedCharacter = '\0';
+}
+
+inline bool HTMLTokenizer::processEntity(SegmentedString& source)
+{
+    bool notEnoughCharacters = false;
+    Vector<UChar, 16> decodedEntity;
+    bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters);
+    if (notEnoughCharacters)
+        return false;
+    if (!success) {
+        ASSERT(decodedEntity.isEmpty());
+        bufferCharacter('&');
+    } else {
+        Vector<UChar>::const_iterator iter = decodedEntity.begin();
+        for (; iter != decodedEntity.end(); ++iter)
+            bufferCharacter(*iter);
+    }
+    return true;
+}
+
+#if COMPILER(MSVC)
+// We need to disable the "unreachable code" warning because we want to assert
+// that some code points aren't reached in the state machine.
+#pragma warning(disable: 4702)
+#endif
+
+#define BEGIN_STATE(stateName) case stateName: stateName:
+#define END_STATE() ASSERT_NOT_REACHED(); break;
+
+// We use this macro when the HTML5 spec says "reconsume the current input
+// character in the <mumble> state."
+#define RECONSUME_IN(stateName)                                            \
+    do {                                                                   \
+        m_state = stateName;                                               \
+        goto stateName;                                                    \
+    } while (false)
+
+// We use this macro when the HTML5 spec says "consume the next input
+// character ... and switch to the <mumble> state."
+#define ADVANCE_TO(stateName)                                              \
+    do {                                                                   \
+        m_state = stateName;                                               \
+        if (!m_inputStreamPreprocessor.advance(source, m_lineNumber))      \
+            return haveBufferedCharacterToken();                           \
+        cc = m_inputStreamPreprocessor.nextInputCharacter();               \
+        goto stateName;                                                    \
+    } while (false)
+
+// Sometimes there's more complicated logic in the spec that separates when
+// we consume the next input character and when we switch to a particular
+// state. We handle those cases by advancing the source directly and using
+// this macro to switch to the indicated state.
+#define SWITCH_TO(stateName)                                               \
+    do {                                                                   \
+        m_state = stateName;                                               \
+        if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source, m_lineNumber)) \
+            return haveBufferedCharacterToken();                           \
+        cc = m_inputStreamPreprocessor.nextInputCharacter();               \
+        goto stateName;                                                    \
+    } while (false)
+
+
+inline void HTMLTokenizer::saveEndTagNameIfNeeded()
+{
+    ASSERT(m_token->type() != HTMLToken::Uninitialized);
+    if (m_token->type() == HTMLToken::StartTag)
+        m_appropriateEndTagName = m_token->name();
+}
+
+// We use this function when the HTML5 spec says "Emit the current <mumble>
+// token. Switch to the <mumble> state."  We use the word "resume" instead of
+// switch to indicate that this macro actually returns and that we'll end up
+// in the state when we "resume" (i.e., are called again).
+bool HTMLTokenizer::emitAndResumeIn(SegmentedString& source, State state)
+{
+    m_state = state;
+    source.advance(m_lineNumber);
+    saveEndTagNameIfNeeded();
+    return true;
+}
+
+// Identical to emitAndResumeIn, except does not advance.
+bool HTMLTokenizer::emitAndReconsumeIn(SegmentedString&, State state)
+{
+    m_state = state;
+    saveEndTagNameIfNeeded();
+    return true;
+}
+
+// Used to emit the EndOfFile token.
+// Check if we have buffered characters to emit first before emitting the EOF.
+bool HTMLTokenizer::emitEndOfFile(SegmentedString& source)
+{
+    if (haveBufferedCharacterToken())
+        return true;
+    m_state = DataState;
+    source.advance(m_lineNumber);
+    m_token->clear();
+    m_token->makeEndOfFile();
+    return true;
+}
+
+bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source)
+{
+    ASSERT(m_token->type() == HTMLToken::Character || m_token->type() == HTMLToken::Uninitialized);
+    source.advance(m_lineNumber);
+    if (m_token->type() == HTMLToken::Character)
+        return true;
+    m_token->beginEndTag(m_bufferedEndTagName);
+    m_bufferedEndTagName.clear();
+    return false;
+}
+
+#define FLUSH_AND_ADVANCE_TO(stateName)                                    \
+    do {                                                                   \
+        m_state = stateName;                                               \
+        if (flushBufferedEndTag(source))                                   \
+            return true;                                                   \
+        if (source.isEmpty()                                               \
+            || !m_inputStreamPreprocessor.peek(source, m_lineNumber))      \
+            return haveBufferedCharacterToken();                           \
+        cc = m_inputStreamPreprocessor.nextInputCharacter();               \
+        goto stateName;                                                    \
+    } while (false)
+
+bool HTMLTokenizer::flushEmitAndResumeIn(SegmentedString& source, State state)
+{
+    m_state = state;
+    flushBufferedEndTag(source);
+    return true;
+}
+
+bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
+{
+    // If we have a token in progress, then we're supposed to be called back
+    // with the same token so we can finish it.
+    ASSERT(!m_token || m_token == &token || token.type() == HTMLToken::Uninitialized);
+    m_token = &token;
+
+    if (!m_bufferedEndTagName.isEmpty() && !isEndTagBufferingState(m_state)) {
+        // FIXME: This should call flushBufferedEndTag().
+        // We started an end tag during our last iteration.
+        m_token->beginEndTag(m_bufferedEndTagName);
+        m_bufferedEndTagName.clear();
+        if (m_state == DataState) {
+            // We're back in the data state, so we must be done with the tag.
+            return true;
+        }
+    }
+
+    if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source, m_lineNumber))
+        return haveBufferedCharacterToken();
+    UChar cc = m_inputStreamPreprocessor.nextInputCharacter();
+
+    // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
+    // Note that this logic is different than the generic \r\n collapsing
+    // handled in the input stream preprocessor. This logic is here as an
+    // "authoring convenience" so folks can write:
+    //
+    // <pre>
+    // lorem ipsum
+    // lorem ipsum
+    // </pre>
+    //
+    // without getting an extra newline at the start of their <pre> element.
+    if (m_skipLeadingNewLineForListing) {
+        m_skipLeadingNewLineForListing = false;
+        if (cc == '\n') {
+            if (m_state == DataState)
+                ADVANCE_TO(DataState);
+            if (m_state == RCDATAState)
+                ADVANCE_TO(RCDATAState);
+            // When parsing text/plain documents, we run the tokenizer in the
+            // PLAINTEXTState and ignore m_skipLeadingNewLineForListing.
+            ASSERT(m_state == PLAINTEXTState);
+        }
+    }
+
+    // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0
+    switch (m_state) {
+    BEGIN_STATE(DataState) {
+        if (cc == '&')
+            ADVANCE_TO(CharacterReferenceInDataState);
+        else if (cc == '<') {
+            if (m_token->type() == HTMLToken::Character) {
+                // We have a bunch of character tokens queued up that we
+                // are emitting lazily here.
+                return true;
+            }
+            ADVANCE_TO(TagOpenState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker)
+            return emitEndOfFile(source);
+        else {
+            bufferCharacter(cc);
+            ADVANCE_TO(DataState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(CharacterReferenceInDataState) {
+        if (!processEntity(source))
+            return haveBufferedCharacterToken();
+        SWITCH_TO(DataState);
+    }
+    END_STATE()
+
+    BEGIN_STATE(RCDATAState) {
+        if (cc == '&')
+            ADVANCE_TO(CharacterReferenceInRCDATAState);
+        else if (cc == '<')
+            ADVANCE_TO(RCDATALessThanSignState);
+        else if (cc == InputStreamPreprocessor::endOfFileMarker)
+            return emitEndOfFile(source);
+        else {
+            bufferCharacter(cc);
+            ADVANCE_TO(RCDATAState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(CharacterReferenceInRCDATAState) {
+        if (!processEntity(source))
+            return haveBufferedCharacterToken();
+        SWITCH_TO(RCDATAState);
+    }
+    END_STATE()
+
+    BEGIN_STATE(RAWTEXTState) {
+        if (cc == '<')
+            ADVANCE_TO(RAWTEXTLessThanSignState);
+        else if (cc == InputStreamPreprocessor::endOfFileMarker)
+            return emitEndOfFile(source);
+        else {
+            bufferCharacter(cc);
+            ADVANCE_TO(RAWTEXTState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(ScriptDataState) {
+        if (cc == '<')
+            ADVANCE_TO(ScriptDataLessThanSignState);
+        else if (cc == InputStreamPreprocessor::endOfFileMarker)
+            return emitEndOfFile(source);
+        else {
+            bufferCharacter(cc);
+            ADVANCE_TO(ScriptDataState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(PLAINTEXTState) {
+        if (cc == InputStreamPreprocessor::endOfFileMarker)
+            return emitEndOfFile(source);
+        else
+            bufferCharacter(cc);
+        ADVANCE_TO(PLAINTEXTState);
+    }
+    END_STATE()
+
+    BEGIN_STATE(TagOpenState) {
+        if (cc == '!')
+            ADVANCE_TO(MarkupDeclarationOpenState);
+        else if (cc == '/')
+            ADVANCE_TO(EndTagOpenState);
+        else if (isASCIIUpper(cc)) {
+            m_token->beginStartTag(toLowerCase(cc));
+            ADVANCE_TO(TagNameState);
+        } else if (isASCIILower(cc)) {
+            m_token->beginStartTag(cc);
+            ADVANCE_TO(TagNameState);
+        } else if (cc == '?') {
+            parseError();
+            // The spec consumes the current character before switching
+            // to the bogus comment state, but it's easier to implement
+            // if we reconsume the current character.
+            RECONSUME_IN(BogusCommentState);
+        } else {
+            parseError();
+            bufferCharacter('<');
+            RECONSUME_IN(DataState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(EndTagOpenState) {
+        if (isASCIIUpper(cc)) {
+            m_token->beginEndTag(toLowerCase(cc));
+            ADVANCE_TO(TagNameState);
+        } else if (isASCIILower(cc)) {
+            m_token->beginEndTag(cc);
+            ADVANCE_TO(TagNameState);
+        } else if (cc == '>') {
+            parseError();
+            ADVANCE_TO(DataState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            bufferCharacter('<');
+            bufferCharacter('/');
+            RECONSUME_IN(DataState);
+        } else {
+            parseError();
+            RECONSUME_IN(BogusCommentState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(TagNameState) {
+        if (isTokenizerWhitespace(cc))
+            ADVANCE_TO(BeforeAttributeNameState);
+        else if (cc == '/')
+            ADVANCE_TO(SelfClosingStartTagState);
+        else if (cc == '>')
+            return emitAndResumeIn(source, DataState);
+        else if (m_usePreHTML5ParserQuirks && cc == '<')
+            return emitAndReconsumeIn(source, DataState);
+        else if (isASCIIUpper(cc)) {
+            m_token->appendToName(toLowerCase(cc));
+            ADVANCE_TO(TagNameState);
+        } if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            RECONSUME_IN(DataState);
+        } else {
+            m_token->appendToName(cc);
+            ADVANCE_TO(TagNameState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(RCDATALessThanSignState) {
+        if (cc == '/') {
+            m_temporaryBuffer.clear();
+            ASSERT(m_bufferedEndTagName.isEmpty());
+            ADVANCE_TO(RCDATAEndTagOpenState);
+        } else {
+            bufferCharacter('<');
+            RECONSUME_IN(RCDATAState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(RCDATAEndTagOpenState) {
+        if (isASCIIUpper(cc)) {
+            m_temporaryBuffer.append(cc);
+            addToPossibleEndTag(toLowerCase(cc));
+            ADVANCE_TO(RCDATAEndTagNameState);
+        } else if (isASCIILower(cc)) {
+            m_temporaryBuffer.append(cc);
+            addToPossibleEndTag(cc);
+            ADVANCE_TO(RCDATAEndTagNameState);
+        } else {
+            bufferCharacter('<');
+            bufferCharacter('/');
+            RECONSUME_IN(RCDATAState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(RCDATAEndTagNameState) {
+        if (isASCIIUpper(cc)) {
+            m_temporaryBuffer.append(cc);
+            addToPossibleEndTag(toLowerCase(cc));
+            ADVANCE_TO(RCDATAEndTagNameState);
+        } else if (isASCIILower(cc)) {
+            m_temporaryBuffer.append(cc);
+            addToPossibleEndTag(cc);
+            ADVANCE_TO(RCDATAEndTagNameState);
+        } else {
+            if (isTokenizerWhitespace(cc)) {
+                if (isAppropriateEndTag())
+                    FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
+            } else if (cc == '/') {
+                if (isAppropriateEndTag())
+                    FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
+            } else if (cc == '>') {
+                if (isAppropriateEndTag())
+                    return flushEmitAndResumeIn(source, DataState);
+            }
+            bufferCharacter('<');
+            bufferCharacter('/');
+            m_token->appendToCharacter(m_temporaryBuffer);
+            m_bufferedEndTagName.clear();
+            RECONSUME_IN(RCDATAState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(RAWTEXTLessThanSignState) {
+        if (cc == '/') {
+            m_temporaryBuffer.clear();
+            ASSERT(m_bufferedEndTagName.isEmpty());
+            ADVANCE_TO(RAWTEXTEndTagOpenState);
+        } else {
+            bufferCharacter('<');
+            RECONSUME_IN(RAWTEXTState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(RAWTEXTEndTagOpenState) {
+        if (isASCIIUpper(cc)) {
+            m_temporaryBuffer.append(cc);
+            addToPossibleEndTag(toLowerCase(cc));
+            ADVANCE_TO(RAWTEXTEndTagNameState);
+        } else if (isASCIILower(cc)) {
+            m_temporaryBuffer.append(cc);
+            addToPossibleEndTag(cc);
+            ADVANCE_TO(RAWTEXTEndTagNameState);
+        } else {
+            bufferCharacter('<');
+            bufferCharacter('/');
+            RECONSUME_IN(RAWTEXTState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(RAWTEXTEndTagNameState) {
+        if (isASCIIUpper(cc)) {
+            m_temporaryBuffer.append(cc);
+            addToPossibleEndTag(toLowerCase(cc));
+            ADVANCE_TO(RAWTEXTEndTagNameState);
+        } else if (isASCIILower(cc)) {
+            m_temporaryBuffer.append(cc);
+            addToPossibleEndTag(cc);
+            ADVANCE_TO(RAWTEXTEndTagNameState);
+        } else {
+            if (isTokenizerWhitespace(cc)) {
+                if (isAppropriateEndTag())
+                    FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
+            } else if (cc == '/') {
+                if (isAppropriateEndTag())
+                    FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
+            } else if (cc == '>') {
+                if (isAppropriateEndTag())
+                    return flushEmitAndResumeIn(source, DataState);
+            }
+            bufferCharacter('<');
+            bufferCharacter('/');
+            m_token->appendToCharacter(m_temporaryBuffer);
+            m_bufferedEndTagName.clear();
+            RECONSUME_IN(RAWTEXTState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(ScriptDataLessThanSignState) {
+        if (cc == '/') {
+            m_temporaryBuffer.clear();
+            ASSERT(m_bufferedEndTagName.isEmpty());
+            ADVANCE_TO(ScriptDataEndTagOpenState);
+        } else if (cc == '!') {
+            bufferCharacter('<');
+            bufferCharacter('!');
+            ADVANCE_TO(ScriptDataEscapeStartState);
+        } else {
+            bufferCharacter('<');
+            RECONSUME_IN(ScriptDataState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(ScriptDataEndTagOpenState) {
+        if (isASCIIUpper(cc)) {
+            m_temporaryBuffer.append(cc);
+            addToPossibleEndTag(toLowerCase(cc));
+            ADVANCE_TO(ScriptDataEndTagNameState);
+        } else if (isASCIILower(cc)) {
+            m_temporaryBuffer.append(cc);
+            addToPossibleEndTag(cc);
+            ADVANCE_TO(ScriptDataEndTagNameState);
+        } else {
+            bufferCharacter('<');
+            bufferCharacter('/');
+            RECONSUME_IN(ScriptDataState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(ScriptDataEndTagNameState) {
+        if (isASCIIUpper(cc)) {
+            m_temporaryBuffer.append(cc);
+            addToPossibleEndTag(toLowerCase(cc));
+            ADVANCE_TO(ScriptDataEndTagNameState);
+        } else if (isASCIILower(cc)) {
+            m_temporaryBuffer.append(cc);
+            addToPossibleEndTag(cc);
+            ADVANCE_TO(ScriptDataEndTagNameState);
+        } else {
+            if (isTokenizerWhitespace(cc)) {
+                if (isAppropriateEndTag())
+                    FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
+            } else if (cc == '/') {
+                if (isAppropriateEndTag())
+                    FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
+            } else if (cc == '>') {
+                if (isAppropriateEndTag())
+                    return flushEmitAndResumeIn(source, DataState);
+            }
+            bufferCharacter('<');
+            bufferCharacter('/');
+            m_token->appendToCharacter(m_temporaryBuffer);
+            m_bufferedEndTagName.clear();
+            RECONSUME_IN(ScriptDataState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(ScriptDataEscapeStartState) {
+        if (cc == '-') {
+            bufferCharacter(cc);
+            ADVANCE_TO(ScriptDataEscapeStartDashState);
+        } else
+            RECONSUME_IN(ScriptDataState);
+    }
+    END_STATE()
+
+    BEGIN_STATE(ScriptDataEscapeStartDashState) {
+        if (cc == '-') {
+            bufferCharacter(cc);
+            ADVANCE_TO(ScriptDataEscapedDashDashState);
+        } else
+            RECONSUME_IN(ScriptDataState);
+    }
+    END_STATE()
+
+    BEGIN_STATE(ScriptDataEscapedState) {
+        if (cc == '-') {
+            bufferCharacter(cc);
+            ADVANCE_TO(ScriptDataEscapedDashState);
+        } else if (cc == '<')
+            ADVANCE_TO(ScriptDataEscapedLessThanSignState);
+        else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            RECONSUME_IN(DataState);
+        } else {
+            bufferCharacter(cc);
+            ADVANCE_TO(ScriptDataEscapedState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(ScriptDataEscapedDashState) {
+        if (cc == '-') {
+            bufferCharacter(cc);
+            ADVANCE_TO(ScriptDataEscapedDashDashState);
+        } else if (cc == '<')
+            ADVANCE_TO(ScriptDataEscapedLessThanSignState);
+        else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            RECONSUME_IN(DataState);
+        } else {
+            bufferCharacter(cc);
+            ADVANCE_TO(ScriptDataEscapedState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(ScriptDataEscapedDashDashState) {
+        if (cc == '-') {
+            bufferCharacter(cc);
+            ADVANCE_TO(ScriptDataEscapedDashDashState);
+        } else if (cc == '<')
+            ADVANCE_TO(ScriptDataEscapedLessThanSignState);
+        else if (cc == '>') {
+            bufferCharacter(cc);
+            ADVANCE_TO(ScriptDataState);
+        } if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            RECONSUME_IN(DataState);
+        } else {
+            bufferCharacter(cc);
+            ADVANCE_TO(ScriptDataEscapedState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(ScriptDataEscapedLessThanSignState) {
+        if (cc == '/') {
+            m_temporaryBuffer.clear();
+            ASSERT(m_bufferedEndTagName.isEmpty());
+            ADVANCE_TO(ScriptDataEscapedEndTagOpenState);
+        } else if (isASCIIUpper(cc)) {
+            bufferCharacter('<');
+            bufferCharacter(cc);
+            m_temporaryBuffer.clear();
+            m_temporaryBuffer.append(toLowerCase(cc));
+            ADVANCE_TO(ScriptDataDoubleEscapeStartState);
+        } else if (isASCIILower(cc)) {
+            bufferCharacter('<');
+            bufferCharacter(cc);
+            m_temporaryBuffer.clear();
+            m_temporaryBuffer.append(cc);
+            ADVANCE_TO(ScriptDataDoubleEscapeStartState);
+        } else {
+            bufferCharacter('<');
+            RECONSUME_IN(ScriptDataEscapedState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(ScriptDataEscapedEndTagOpenState) {
+        if (isASCIIUpper(cc)) {
+            m_temporaryBuffer.append(cc);
+            addToPossibleEndTag(toLowerCase(cc));
+            ADVANCE_TO(ScriptDataEscapedEndTagNameState);
+        } else if (isASCIILower(cc)) {
+            m_temporaryBuffer.append(cc);
+            addToPossibleEndTag(cc);
+            ADVANCE_TO(ScriptDataEscapedEndTagNameState);
+        } else {
+            bufferCharacter('<');
+            bufferCharacter('/');
+            RECONSUME_IN(ScriptDataEscapedState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(ScriptDataEscapedEndTagNameState) {
+        if (isASCIIUpper(cc)) {
+            m_temporaryBuffer.append(cc);
+            addToPossibleEndTag(toLowerCase(cc));
+            ADVANCE_TO(ScriptDataEscapedEndTagNameState);
+        } else if (isASCIILower(cc)) {
+            m_temporaryBuffer.append(cc);
+            addToPossibleEndTag(cc);
+            ADVANCE_TO(ScriptDataEscapedEndTagNameState);
+        } else {
+            if (isTokenizerWhitespace(cc)) {
+                if (isAppropriateEndTag())
+                    FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
+            } else if (cc == '/') {
+                if (isAppropriateEndTag())
+                    FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
+            } else if (cc == '>') {
+                if (isAppropriateEndTag())
+                    return flushEmitAndResumeIn(source, DataState);
+            }
+            bufferCharacter('<');
+            bufferCharacter('/');
+            m_token->appendToCharacter(m_temporaryBuffer);
+            m_bufferedEndTagName.clear();
+            RECONSUME_IN(ScriptDataEscapedState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(ScriptDataDoubleEscapeStartState) {
+        if (isTokenizerWhitespace(cc) || cc == '/' || cc == '>') {
+            bufferCharacter(cc);
+            if (temporaryBufferIs(scriptTag.localName()))
+                ADVANCE_TO(ScriptDataDoubleEscapedState);
+            else
+                ADVANCE_TO(ScriptDataEscapedState);
+        } else if (isASCIIUpper(cc)) {
+            bufferCharacter(cc);
+            m_temporaryBuffer.append(toLowerCase(cc));
+            ADVANCE_TO(ScriptDataDoubleEscapeStartState);
+        } else if (isASCIILower(cc)) {
+            bufferCharacter(cc);
+            m_temporaryBuffer.append(cc);
+            ADVANCE_TO(ScriptDataDoubleEscapeStartState);
+        } else
+            RECONSUME_IN(ScriptDataEscapedState);
+    }
+    END_STATE()
+
+    BEGIN_STATE(ScriptDataDoubleEscapedState) {
+        if (cc == '-') {
+            bufferCharacter(cc);
+            ADVANCE_TO(ScriptDataDoubleEscapedDashState);
+        } else if (cc == '<') {
+            bufferCharacter(cc);
+            ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            RECONSUME_IN(DataState);
+        } else {
+            bufferCharacter(cc);
+            ADVANCE_TO(ScriptDataDoubleEscapedState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(ScriptDataDoubleEscapedDashState) {
+        if (cc == '-') {
+            bufferCharacter(cc);
+            ADVANCE_TO(ScriptDataDoubleEscapedDashDashState);
+        } else if (cc == '<') {
+            bufferCharacter(cc);
+            ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            RECONSUME_IN(DataState);
+        } else {
+            bufferCharacter(cc);
+            ADVANCE_TO(ScriptDataDoubleEscapedState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(ScriptDataDoubleEscapedDashDashState) {
+        if (cc == '-') {
+            bufferCharacter(cc);
+            ADVANCE_TO(ScriptDataDoubleEscapedDashDashState);
+        } else if (cc == '<') {
+            bufferCharacter(cc);
+            ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
+        } else if (cc == '>') {
+            bufferCharacter(cc);
+            ADVANCE_TO(ScriptDataState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            RECONSUME_IN(DataState);
+        } else {
+            bufferCharacter(cc);
+            ADVANCE_TO(ScriptDataDoubleEscapedState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(ScriptDataDoubleEscapedLessThanSignState) {
+        if (cc == '/') {
+            bufferCharacter(cc);
+            m_temporaryBuffer.clear();
+            ADVANCE_TO(ScriptDataDoubleEscapeEndState);
+        } else
+            RECONSUME_IN(ScriptDataDoubleEscapedState);
+    }
+    END_STATE()
+
+    BEGIN_STATE(ScriptDataDoubleEscapeEndState) {
+        if (isTokenizerWhitespace(cc) || cc == '/' || cc == '>') {
+            bufferCharacter(cc);
+            if (temporaryBufferIs(scriptTag.localName()))
+                ADVANCE_TO(ScriptDataEscapedState);
+            else
+                ADVANCE_TO(ScriptDataDoubleEscapedState);
+        } else if (isASCIIUpper(cc)) {
+            bufferCharacter(cc);
+            m_temporaryBuffer.append(toLowerCase(cc));
+            ADVANCE_TO(ScriptDataDoubleEscapeEndState);
+        } else if (isASCIILower(cc)) {
+            bufferCharacter(cc);
+            m_temporaryBuffer.append(cc);
+            ADVANCE_TO(ScriptDataDoubleEscapeEndState);
+        } else
+            RECONSUME_IN(ScriptDataDoubleEscapedState);
+    }
+    END_STATE()
+
+    BEGIN_STATE(BeforeAttributeNameState) {
+        if (isTokenizerWhitespace(cc))
+            ADVANCE_TO(BeforeAttributeNameState);
+        else if (cc == '/')
+            ADVANCE_TO(SelfClosingStartTagState);
+        else if (cc == '>')
+            return emitAndResumeIn(source, DataState);
+        else if (m_usePreHTML5ParserQuirks && cc == '<')
+            return emitAndReconsumeIn(source, DataState);
+        else if (isASCIIUpper(cc)) {
+            m_token->addNewAttribute();
+            m_token->beginAttributeName(source.numberOfCharactersConsumed());
+            m_token->appendToAttributeName(toLowerCase(cc));
+            ADVANCE_TO(AttributeNameState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            RECONSUME_IN(DataState);
+        } else {
+            if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
+                parseError();
+            m_token->addNewAttribute();
+            m_token->beginAttributeName(source.numberOfCharactersConsumed());
+            m_token->appendToAttributeName(cc);
+            ADVANCE_TO(AttributeNameState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(AttributeNameState) {
+        if (isTokenizerWhitespace(cc)) {
+            m_token->endAttributeName(source.numberOfCharactersConsumed());
+            ADVANCE_TO(AfterAttributeNameState);
+        } else if (cc == '/') {
+            m_token->endAttributeName(source.numberOfCharactersConsumed());
+            ADVANCE_TO(SelfClosingStartTagState);
+        } else if (cc == '=') {
+            m_token->endAttributeName(source.numberOfCharactersConsumed());
+            ADVANCE_TO(BeforeAttributeValueState);
+        } else if (cc == '>') {
+            m_token->endAttributeName(source.numberOfCharactersConsumed());
+            return emitAndResumeIn(source, DataState);
+        } else if (m_usePreHTML5ParserQuirks && cc == '<') {
+            m_token->endAttributeName(source.numberOfCharactersConsumed());
+            return emitAndReconsumeIn(source, DataState);
+        } else if (isASCIIUpper(cc)) {
+            m_token->appendToAttributeName(toLowerCase(cc));
+            ADVANCE_TO(AttributeNameState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            m_token->endAttributeName(source.numberOfCharactersConsumed());
+            RECONSUME_IN(DataState);
+        } else {
+            if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
+                parseError();
+            m_token->appendToAttributeName(cc);
+            ADVANCE_TO(AttributeNameState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(AfterAttributeNameState) {
+        if (isTokenizerWhitespace(cc))
+            ADVANCE_TO(AfterAttributeNameState);
+        else if (cc == '/')
+            ADVANCE_TO(SelfClosingStartTagState);
+        else if (cc == '=')
+            ADVANCE_TO(BeforeAttributeValueState);
+        else if (cc == '>')
+            return emitAndResumeIn(source, DataState);
+        else if (m_usePreHTML5ParserQuirks && cc == '<')
+            return emitAndReconsumeIn(source, DataState);
+        else if (isASCIIUpper(cc)) {
+            m_token->addNewAttribute();
+            m_token->beginAttributeName(source.numberOfCharactersConsumed());
+            m_token->appendToAttributeName(toLowerCase(cc));
+            ADVANCE_TO(AttributeNameState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            RECONSUME_IN(DataState);
+        } else {
+            if (cc == '"' || cc == '\'' || cc == '<')
+                parseError();
+            m_token->addNewAttribute();
+            m_token->beginAttributeName(source.numberOfCharactersConsumed());
+            m_token->appendToAttributeName(cc);
+            ADVANCE_TO(AttributeNameState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(BeforeAttributeValueState) {
+        if (isTokenizerWhitespace(cc))
+            ADVANCE_TO(BeforeAttributeValueState);
+        else if (cc == '"') {
+            m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1);
+            ADVANCE_TO(AttributeValueDoubleQuotedState);
+        } else if (cc == '&') {
+            m_token->beginAttributeValue(source.numberOfCharactersConsumed());
+            RECONSUME_IN(AttributeValueUnquotedState);
+        } else if (cc == '\'') {
+            m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1);
+            ADVANCE_TO(AttributeValueSingleQuotedState);
+        } else if (cc == '>') {
+            parseError();
+            return emitAndResumeIn(source, DataState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            RECONSUME_IN(DataState);
+        } else {
+            if (cc == '<' || cc == '=' || cc == '`')
+                parseError();
+            m_token->beginAttributeValue(source.numberOfCharactersConsumed());
+            m_token->appendToAttributeValue(cc);
+            ADVANCE_TO(AttributeValueUnquotedState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(AttributeValueDoubleQuotedState) {
+        if (cc == '"') {
+            m_token->endAttributeValue(source.numberOfCharactersConsumed());
+            ADVANCE_TO(AfterAttributeValueQuotedState);
+        } else if (cc == '&') {
+            m_additionalAllowedCharacter = '"';
+            ADVANCE_TO(CharacterReferenceInAttributeValueState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            m_token->endAttributeValue(source.numberOfCharactersConsumed());
+            RECONSUME_IN(DataState);
+        } else {
+            m_token->appendToAttributeValue(cc);
+            ADVANCE_TO(AttributeValueDoubleQuotedState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(AttributeValueSingleQuotedState) {
+        if (cc == '\'') {
+            m_token->endAttributeValue(source.numberOfCharactersConsumed());
+            ADVANCE_TO(AfterAttributeValueQuotedState);
+        } else if (cc == '&') {
+            m_additionalAllowedCharacter = '\'';
+            ADVANCE_TO(CharacterReferenceInAttributeValueState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            m_token->endAttributeValue(source.numberOfCharactersConsumed());
+            RECONSUME_IN(DataState);
+        } else {
+            m_token->appendToAttributeValue(cc);
+            ADVANCE_TO(AttributeValueSingleQuotedState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(AttributeValueUnquotedState) {
+        if (isTokenizerWhitespace(cc)) {
+            m_token->endAttributeValue(source.numberOfCharactersConsumed());
+            ADVANCE_TO(BeforeAttributeNameState);
+        } else if (cc == '&') {
+            m_additionalAllowedCharacter = '>';
+            ADVANCE_TO(CharacterReferenceInAttributeValueState);
+        } else if (cc == '>') {
+            m_token->endAttributeValue(source.numberOfCharactersConsumed());
+            return emitAndResumeIn(source, DataState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            m_token->endAttributeValue(source.numberOfCharactersConsumed());
+            RECONSUME_IN(DataState);
+        } else {
+            if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`')
+                parseError();
+            m_token->appendToAttributeValue(cc);
+            ADVANCE_TO(AttributeValueUnquotedState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(CharacterReferenceInAttributeValueState) {
+        bool notEnoughCharacters = false;
+        Vector<UChar, 16> decodedEntity;
+        bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters, m_additionalAllowedCharacter);
+        if (notEnoughCharacters)
+            return haveBufferedCharacterToken();
+        if (!success) {
+            ASSERT(decodedEntity.isEmpty());
+            m_token->appendToAttributeValue('&');
+        } else {
+            Vector<UChar>::const_iterator iter = decodedEntity.begin();
+            for (; iter != decodedEntity.end(); ++iter)
+                m_token->appendToAttributeValue(*iter);
+        }
+        // We're supposed to switch back to the attribute value state that
+        // we were in when we were switched into this state. Rather than
+        // keeping track of this explictly, we observe that the previous
+        // state can be determined by m_additionalAllowedCharacter.
+        if (m_additionalAllowedCharacter == '"')
+            SWITCH_TO(AttributeValueDoubleQuotedState);
+        else if (m_additionalAllowedCharacter == '\'')
+            SWITCH_TO(AttributeValueSingleQuotedState);
+        else if (m_additionalAllowedCharacter == '>')
+            SWITCH_TO(AttributeValueUnquotedState);
+        else
+            ASSERT_NOT_REACHED();
+    }
+    END_STATE()
+
+    BEGIN_STATE(AfterAttributeValueQuotedState) {
+        if (isTokenizerWhitespace(cc))
+            ADVANCE_TO(BeforeAttributeNameState);
+        else if (cc == '/')
+            ADVANCE_TO(SelfClosingStartTagState);
+        else if (cc == '>')
+            return emitAndResumeIn(source, DataState);
+        else if (m_usePreHTML5ParserQuirks && cc == '<')
+            return emitAndReconsumeIn(source, DataState);
+        else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            RECONSUME_IN(DataState);
+        } else {
+            parseError();
+            RECONSUME_IN(BeforeAttributeNameState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(SelfClosingStartTagState) {
+        if (cc == '>') {
+            m_token->setSelfClosing();
+            return emitAndResumeIn(source, DataState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            RECONSUME_IN(DataState);
+        } else {
+            parseError();
+            RECONSUME_IN(BeforeAttributeNameState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(BogusCommentState) {
+        m_token->beginComment();
+        RECONSUME_IN(ContinueBogusCommentState);
+    }
+    END_STATE()
+
+    BEGIN_STATE(ContinueBogusCommentState) {
+        if (cc == '>')
+            return emitAndResumeIn(source, DataState);
+        else if (cc == InputStreamPreprocessor::endOfFileMarker)
+            return emitAndReconsumeIn(source, DataState);
+        else {
+            m_token->appendToComment(cc);
+            ADVANCE_TO(ContinueBogusCommentState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(MarkupDeclarationOpenState) {
+        DEFINE_STATIC_LOCAL(String, dashDashString, ("--"));
+        DEFINE_STATIC_LOCAL(String, doctypeString, ("doctype"));
+        DEFINE_STATIC_LOCAL(String, cdataString, ("[CDATA["));
+        if (cc == '-') {
+            SegmentedString::LookAheadResult result = source.lookAhead(dashDashString);
+            if (result == SegmentedString::DidMatch) {
+                source.advanceAndASSERT('-');
+                source.advanceAndASSERT('-');
+                m_token->beginComment();
+                SWITCH_TO(CommentStartState);
+            } else if (result == SegmentedString::NotEnoughCharacters)
+                return haveBufferedCharacterToken();
+        } else if (cc == 'D' || cc == 'd') {
+            SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(doctypeString);
+            if (result == SegmentedString::DidMatch) {
+                advanceStringAndASSERTIgnoringCase(source, "doctype");
+                SWITCH_TO(DOCTYPEState);
+            } else if (result == SegmentedString::NotEnoughCharacters)
+                return haveBufferedCharacterToken();
+        } else if (cc == '[' && shouldAllowCDATA()) {
+            SegmentedString::LookAheadResult result = source.lookAhead(cdataString);
+            if (result == SegmentedString::DidMatch) {
+                advanceStringAndASSERT(source, "[CDATA[");
+                SWITCH_TO(CDATASectionState);
+            } else if (result == SegmentedString::NotEnoughCharacters)
+                return haveBufferedCharacterToken();
+        }
+        parseError();
+        RECONSUME_IN(BogusCommentState);
+    }
+    END_STATE()
+
+    BEGIN_STATE(CommentStartState) {
+        if (cc == '-')
+            ADVANCE_TO(CommentStartDashState);
+        else if (cc == '>') {
+            parseError();
+            return emitAndResumeIn(source, DataState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            return emitAndReconsumeIn(source, DataState);
+        } else {
+            m_token->appendToComment(cc);
+            ADVANCE_TO(CommentState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(CommentStartDashState) {
+        if (cc == '-')
+            ADVANCE_TO(CommentEndState);
+        else if (cc == '>') {
+            parseError();
+            return emitAndResumeIn(source, DataState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            return emitAndReconsumeIn(source, DataState);
+        } else {
+            m_token->appendToComment('-');
+            m_token->appendToComment(cc);
+            ADVANCE_TO(CommentState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(CommentState) {
+        if (cc == '-')
+            ADVANCE_TO(CommentEndDashState);
+        else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            return emitAndReconsumeIn(source, DataState);
+        } else {
+            m_token->appendToComment(cc);
+            ADVANCE_TO(CommentState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(CommentEndDashState) {
+        if (cc == '-')
+            ADVANCE_TO(CommentEndState);
+        else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            return emitAndReconsumeIn(source, DataState);
+        } else {
+            m_token->appendToComment('-');
+            m_token->appendToComment(cc);
+            ADVANCE_TO(CommentState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(CommentEndState) {
+        if (cc == '>')
+            return emitAndResumeIn(source, DataState);
+        else if (cc == '!') {
+            parseError();
+            ADVANCE_TO(CommentEndBangState);
+        } else if (cc == '-') {
+            parseError();
+            m_token->appendToComment('-');
+            ADVANCE_TO(CommentEndState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            return emitAndReconsumeIn(source, DataState);
+        } else {
+            parseError();
+            m_token->appendToComment('-');
+            m_token->appendToComment('-');
+            m_token->appendToComment(cc);
+            ADVANCE_TO(CommentState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(CommentEndBangState) {
+        if (cc == '-') {
+            m_token->appendToComment('-');
+            m_token->appendToComment('-');
+            m_token->appendToComment('!');
+            ADVANCE_TO(CommentEndDashState);
+        } else if (cc == '>')
+            return emitAndResumeIn(source, DataState);
+        else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            return emitAndReconsumeIn(source, DataState);
+        } else {
+            m_token->appendToComment('-');
+            m_token->appendToComment('-');
+            m_token->appendToComment('!');
+            m_token->appendToComment(cc);
+            ADVANCE_TO(CommentState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(DOCTYPEState) {
+        if (isTokenizerWhitespace(cc))
+            ADVANCE_TO(BeforeDOCTYPENameState);
+        else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            m_token->beginDOCTYPE();
+            m_token->setForceQuirks();
+            return emitAndReconsumeIn(source, DataState);
+        } else {
+            parseError();
+            RECONSUME_IN(BeforeDOCTYPENameState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(BeforeDOCTYPENameState) {
+        if (isTokenizerWhitespace(cc))
+            ADVANCE_TO(BeforeDOCTYPENameState);
+        else if (isASCIIUpper(cc)) {
+            m_token->beginDOCTYPE(toLowerCase(cc));
+            ADVANCE_TO(DOCTYPENameState);
+        } else if (cc == '>') {
+            parseError();
+            m_token->beginDOCTYPE();
+            m_token->setForceQuirks();
+            return emitAndResumeIn(source, DataState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            m_token->beginDOCTYPE();
+            m_token->setForceQuirks();
+            return emitAndReconsumeIn(source, DataState);
+        } else {
+            m_token->beginDOCTYPE(cc);
+            ADVANCE_TO(DOCTYPENameState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(DOCTYPENameState) {
+        if (isTokenizerWhitespace(cc))
+            ADVANCE_TO(AfterDOCTYPENameState);
+        else if (cc == '>')
+            return emitAndResumeIn(source, DataState);
+        else if (isASCIIUpper(cc)) {
+            m_token->appendToName(toLowerCase(cc));
+            ADVANCE_TO(DOCTYPENameState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            m_token->setForceQuirks();
+            return emitAndReconsumeIn(source, DataState);
+        } else {
+            m_token->appendToName(cc);
+            ADVANCE_TO(DOCTYPENameState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(AfterDOCTYPENameState) {
+        if (isTokenizerWhitespace(cc))
+            ADVANCE_TO(AfterDOCTYPENameState);
+        if (cc == '>')
+            return emitAndResumeIn(source, DataState);
+        else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            m_token->setForceQuirks();
+            return emitAndReconsumeIn(source, DataState);
+        } else {
+            DEFINE_STATIC_LOCAL(String, publicString, ("public"));
+            DEFINE_STATIC_LOCAL(String, systemString, ("system"));
+            if (cc == 'P' || cc == 'p') {
+                SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(publicString);
+                if (result == SegmentedString::DidMatch) {
+                    advanceStringAndASSERTIgnoringCase(source, "public");
+                    SWITCH_TO(AfterDOCTYPEPublicKeywordState);
+                } else if (result == SegmentedString::NotEnoughCharacters)
+                    return haveBufferedCharacterToken();
+            } else if (cc == 'S' || cc == 's') {
+                SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(systemString);
+                if (result == SegmentedString::DidMatch) {
+                    advanceStringAndASSERTIgnoringCase(source, "system");
+                    SWITCH_TO(AfterDOCTYPESystemKeywordState);
+                } else if (result == SegmentedString::NotEnoughCharacters)
+                    return haveBufferedCharacterToken();
+            }
+            parseError();
+            m_token->setForceQuirks();
+            ADVANCE_TO(BogusDOCTYPEState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(AfterDOCTYPEPublicKeywordState) {
+        if (isTokenizerWhitespace(cc))
+            ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
+        else if (cc == '"') {
+            parseError();
+            m_token->setPublicIdentifierToEmptyString();
+            ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
+        } else if (cc == '\'') {
+            parseError();
+            m_token->setPublicIdentifierToEmptyString();
+            ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
+        } else if (cc == '>') {
+            parseError();
+            m_token->setForceQuirks();
+            return emitAndResumeIn(source, DataState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            m_token->setForceQuirks();
+            return emitAndReconsumeIn(source, DataState);
+        } else {
+            parseError();
+            m_token->setForceQuirks();
+            ADVANCE_TO(BogusDOCTYPEState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState) {
+        if (isTokenizerWhitespace(cc))
+            ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
+        else if (cc == '"') {
+            m_token->setPublicIdentifierToEmptyString();
+            ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
+        } else if (cc == '\'') {
+            m_token->setPublicIdentifierToEmptyString();
+            ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
+        } else if (cc == '>') {
+            parseError();
+            m_token->setForceQuirks();
+            return emitAndResumeIn(source, DataState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            m_token->setForceQuirks();
+            return emitAndReconsumeIn(source, DataState);
+        } else {
+            parseError();
+            m_token->setForceQuirks();
+            ADVANCE_TO(BogusDOCTYPEState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState) {
+        if (cc == '"')
+            ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
+        else if (cc == '>') {
+            parseError();
+            m_token->setForceQuirks();
+            return emitAndResumeIn(source, DataState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            m_token->setForceQuirks();
+            return emitAndReconsumeIn(source, DataState);
+        } else {
+            m_token->appendToPublicIdentifier(cc);
+            ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState) {
+        if (cc == '\'')
+            ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
+        else if (cc == '>') {
+            parseError();
+            m_token->setForceQuirks();
+            return emitAndResumeIn(source, DataState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            m_token->setForceQuirks();
+            return emitAndReconsumeIn(source, DataState);
+        } else {
+            m_token->appendToPublicIdentifier(cc);
+            ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(AfterDOCTYPEPublicIdentifierState) {
+        if (isTokenizerWhitespace(cc))
+            ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
+        else if (cc == '>')
+            return emitAndResumeIn(source, DataState);
+        else if (cc == '"') {
+            parseError();
+            m_token->setSystemIdentifierToEmptyString();
+            ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+        } else if (cc == '\'') {
+            parseError();
+            m_token->setSystemIdentifierToEmptyString();
+            ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            m_token->setForceQuirks();
+            return emitAndReconsumeIn(source, DataState);
+        } else {
+            parseError();
+            m_token->setForceQuirks();
+            ADVANCE_TO(BogusDOCTYPEState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState) {
+        if (isTokenizerWhitespace(cc))
+            ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
+        else if (cc == '>')
+            return emitAndResumeIn(source, DataState);
+        else if (cc == '"') {
+            m_token->setSystemIdentifierToEmptyString();
+            ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+        } else if (cc == '\'') {
+            m_token->setSystemIdentifierToEmptyString();
+            ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            m_token->setForceQuirks();
+            return emitAndReconsumeIn(source, DataState);
+        } else {
+            parseError();
+            m_token->setForceQuirks();
+            ADVANCE_TO(BogusDOCTYPEState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(AfterDOCTYPESystemKeywordState) {
+        if (isTokenizerWhitespace(cc))
+            ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
+        else if (cc == '"') {
+            parseError();
+            m_token->setSystemIdentifierToEmptyString();
+            ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+        } else if (cc == '\'') {
+            parseError();
+            m_token->setSystemIdentifierToEmptyString();
+            ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+        } else if (cc == '>') {
+            parseError();
+            m_token->setForceQuirks();
+            return emitAndResumeIn(source, DataState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            m_token->setForceQuirks();
+            return emitAndReconsumeIn(source, DataState);
+        } else {
+            parseError();
+            m_token->setForceQuirks();
+            ADVANCE_TO(BogusDOCTYPEState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(BeforeDOCTYPESystemIdentifierState) {
+        if (isTokenizerWhitespace(cc))
+            ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
+        if (cc == '"') {
+            m_token->setSystemIdentifierToEmptyString();
+            ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+        } else if (cc == '\'') {
+            m_token->setSystemIdentifierToEmptyString();
+            ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+        } else if (cc == '>') {
+            parseError();
+            m_token->setForceQuirks();
+            return emitAndResumeIn(source, DataState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            m_token->setForceQuirks();
+            return emitAndReconsumeIn(source, DataState);
+        } else {
+            parseError();
+            m_token->setForceQuirks();
+            ADVANCE_TO(BogusDOCTYPEState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState) {
+        if (cc == '"')
+            ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
+        else if (cc == '>') {
+            parseError();
+            m_token->setForceQuirks();
+            return emitAndResumeIn(source, DataState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            m_token->setForceQuirks();
+            return emitAndReconsumeIn(source, DataState);
+        } else {
+            m_token->appendToSystemIdentifier(cc);
+            ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState) {
+        if (cc == '\'')
+            ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
+        else if (cc == '>') {
+            parseError();
+            m_token->setForceQuirks();
+            return emitAndResumeIn(source, DataState);
+        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            m_token->setForceQuirks();
+            return emitAndReconsumeIn(source, DataState);
+        } else {
+            m_token->appendToSystemIdentifier(cc);
+            ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(AfterDOCTYPESystemIdentifierState) {
+        if (isTokenizerWhitespace(cc))
+            ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
+        else if (cc == '>')
+            return emitAndResumeIn(source, DataState);
+        else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+            parseError();
+            m_token->setForceQuirks();
+            return emitAndReconsumeIn(source, DataState);
+        } else {
+            parseError();
+            ADVANCE_TO(BogusDOCTYPEState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(BogusDOCTYPEState) {
+        if (cc == '>')
+            return emitAndResumeIn(source, DataState);
+        else if (cc == InputStreamPreprocessor::endOfFileMarker)
+            return emitAndReconsumeIn(source, DataState);
+        ADVANCE_TO(BogusDOCTYPEState);
+    }
+    END_STATE()
+
+    BEGIN_STATE(CDATASectionState) {
+        if (cc == ']')
+            ADVANCE_TO(CDATASectionRightSquareBracketState);
+        else if (cc == InputStreamPreprocessor::endOfFileMarker)
+            RECONSUME_IN(DataState);
+        else {
+            bufferCharacter(cc);
+            ADVANCE_TO(CDATASectionState);
+        }
+    }
+    END_STATE()
+
+    BEGIN_STATE(CDATASectionRightSquareBracketState) {
+        if (cc == ']')
+            ADVANCE_TO(CDATASectionDoubleRightSquareBracketState);
+        else {
+            bufferCharacter(']');
+            RECONSUME_IN(CDATASectionState);
+        }
+    }
+
+    BEGIN_STATE(CDATASectionDoubleRightSquareBracketState) {
+        if (cc == '>')
+            ADVANCE_TO(DataState);
+        else {
+            bufferCharacter(']');
+            bufferCharacter(']');
+            RECONSUME_IN(CDATASectionState);
+        }
+    }
+    END_STATE()
+
+    }
+
+    ASSERT_NOT_REACHED();
+    return false;
+}
+
+void HTMLTokenizer::updateStateFor(const AtomicString& tagName, Frame* frame)
+{
+    if (tagName == textareaTag || tagName == titleTag)
+        setState(RCDATAState);
+    else if (tagName == plaintextTag)
+        setState(PLAINTEXTState);
+    else if (tagName == scriptTag)
+        setState(ScriptDataState);
+    else if (tagName == styleTag
+        || tagName == iframeTag
+        || tagName == xmpTag
+        || (tagName == noembedTag && HTMLTreeBuilder::pluginsEnabled(frame))
+        || tagName == noframesTag
+        || (tagName == noscriptTag && HTMLTreeBuilder::scriptEnabled(frame)))
+        setState(RAWTEXTState);
+}
+
+inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString)
+{
+    return vectorEqualsString(m_temporaryBuffer, expectedString);
+}
+
+inline void HTMLTokenizer::addToPossibleEndTag(UChar cc)
+{
+    ASSERT(isEndTagBufferingState(m_state));
+    m_bufferedEndTagName.append(cc);
+}
+
+inline bool HTMLTokenizer::isAppropriateEndTag()
+{
+    return m_bufferedEndTagName == m_appropriateEndTagName;
+}
+
+inline void HTMLTokenizer::bufferCharacter(UChar character)
+{
+    ASSERT(character != InputStreamPreprocessor::endOfFileMarker);
+    m_token->ensureIsCharacterToken();
+    m_token->appendToCharacter(character);
+}
+
+inline void HTMLTokenizer::parseError()
+{
+    notImplemented();
+}
+
+inline bool HTMLTokenizer::haveBufferedCharacterToken()
+{
+    return m_token->type() == HTMLToken::Character;
+}
+
+}
diff --git a/Source/WebCore/html/parser/HTMLTokenizer.h b/Source/WebCore/html/parser/HTMLTokenizer.h
new file mode 100644
index 0000000..f16b049
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLTokenizer.h
@@ -0,0 +1,316 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef HTMLTokenizer_h
+#define HTMLTokenizer_h
+
+#include "SegmentedString.h"
+#include <wtf/Noncopyable.h>
+#include <wtf/PassOwnPtr.h>
+#include <wtf/Vector.h>
+#include <wtf/text/AtomicString.h>
+
+namespace WebCore {
+
+class Element;
+class Frame;
+class HTMLToken;
+
+class HTMLTokenizer : public Noncopyable {
+public:
+    enum State {
+        DataState,
+        CharacterReferenceInDataState,
+        RCDATAState,
+        CharacterReferenceInRCDATAState,
+        RAWTEXTState,
+        ScriptDataState,
+        PLAINTEXTState,
+        TagOpenState,
+        EndTagOpenState,
+        TagNameState,
+        RCDATALessThanSignState,
+        RCDATAEndTagOpenState,
+        RCDATAEndTagNameState,
+        RAWTEXTLessThanSignState,
+        RAWTEXTEndTagOpenState,
+        RAWTEXTEndTagNameState,
+        ScriptDataLessThanSignState,
+        ScriptDataEndTagOpenState,
+        ScriptDataEndTagNameState,
+        ScriptDataEscapeStartState,
+        ScriptDataEscapeStartDashState,
+        ScriptDataEscapedState,
+        ScriptDataEscapedDashState,
+        ScriptDataEscapedDashDashState,
+        ScriptDataEscapedLessThanSignState,
+        ScriptDataEscapedEndTagOpenState,
+        ScriptDataEscapedEndTagNameState,
+        ScriptDataDoubleEscapeStartState,
+        ScriptDataDoubleEscapedState,
+        ScriptDataDoubleEscapedDashState,
+        ScriptDataDoubleEscapedDashDashState,
+        ScriptDataDoubleEscapedLessThanSignState,
+        ScriptDataDoubleEscapeEndState,
+        BeforeAttributeNameState,
+        AttributeNameState,
+        AfterAttributeNameState,
+        BeforeAttributeValueState,
+        AttributeValueDoubleQuotedState,
+        AttributeValueSingleQuotedState,
+        AttributeValueUnquotedState,
+        CharacterReferenceInAttributeValueState,
+        AfterAttributeValueQuotedState,
+        SelfClosingStartTagState,
+        BogusCommentState,
+        // The ContinueBogusCommentState is not in the HTML5 spec, but we use
+        // it internally to keep track of whether we've started the bogus
+        // comment token yet.
+        ContinueBogusCommentState,
+        MarkupDeclarationOpenState,
+        CommentStartState,
+        CommentStartDashState,
+        CommentState,
+        CommentEndDashState,
+        CommentEndState,
+        CommentEndBangState,
+        DOCTYPEState,
+        BeforeDOCTYPENameState,
+        DOCTYPENameState,
+        AfterDOCTYPENameState,
+        AfterDOCTYPEPublicKeywordState,
+        BeforeDOCTYPEPublicIdentifierState,
+        DOCTYPEPublicIdentifierDoubleQuotedState,
+        DOCTYPEPublicIdentifierSingleQuotedState,
+        AfterDOCTYPEPublicIdentifierState,
+        BetweenDOCTYPEPublicAndSystemIdentifiersState,
+        AfterDOCTYPESystemKeywordState,
+        BeforeDOCTYPESystemIdentifierState,
+        DOCTYPESystemIdentifierDoubleQuotedState,
+        DOCTYPESystemIdentifierSingleQuotedState,
+        AfterDOCTYPESystemIdentifierState,
+        BogusDOCTYPEState,
+        CDATASectionState,
+        // These CDATA states are not in the HTML5 spec, but we use them internally.
+        CDATASectionRightSquareBracketState,
+        CDATASectionDoubleRightSquareBracketState,
+    };
+
+    static PassOwnPtr<HTMLTokenizer> create(bool usePreHTML5ParserQuirks) { return adoptPtr(new HTMLTokenizer(usePreHTML5ParserQuirks)); }
+    ~HTMLTokenizer();
+
+    void reset();
+
+    // This function returns true if it emits a token. Otherwise, callers
+    // must provide the same (in progress) token on the next call (unless
+    // they call reset() first).
+    bool nextToken(SegmentedString&, HTMLToken&);
+
+    int lineNumber() const { return m_lineNumber; }
+    int columnNumber() const { return 1; } // Matches LegacyHTMLDocumentParser.h behavior.
+
+    State state() const { return m_state; }
+    void setState(State state) { m_state = state; }
+
+    // Updates the tokenizer's state according to the given tag name. This is
+    // an approximation of how the tree builder would update the tokenizer's
+    // state. This method is useful for approximating HTML tokenization. To
+    // get exactly the correct tokenization, you need the real tree builder.
+    //
+    // The main failures in the approximation are as follows:
+    //
+    //  * The first set of character tokens emitted for a <pre> element might
+    //    contain an extra leading newline.
+    //  * The replacement of U+0000 with U+FFFD will not be sensitive to the
+    //    tree builder's insertion mode.
+    //  * CDATA sections in foreign content will be tokenized as bogus comments
+    //    instead of as character tokens.
+    //
+    void updateStateFor(const AtomicString& tagName, Frame*);
+
+    // Hack to skip leading newline in <pre>/<listing> for authoring ease.
+    // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
+    void setSkipLeadingNewLineForListing(bool value) { m_skipLeadingNewLineForListing = value; }
+
+    bool forceNullCharacterReplacement() const { return m_forceNullCharacterReplacement; }
+    void setForceNullCharacterReplacement(bool value) { m_forceNullCharacterReplacement = value; }
+
+    bool shouldAllowCDATA() const { return m_shouldAllowCDATA; }
+    void setShouldAllowCDATA(bool value) { m_shouldAllowCDATA = value; }
+
+    bool shouldSkipNullCharacters() const
+    {
+        return !m_forceNullCharacterReplacement
+            && (m_state == DataState
+                || m_state == RCDATAState
+                || m_state == RAWTEXTState
+                || m_state == PLAINTEXTState);
+    }
+
+private:
+    // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream
+    class InputStreamPreprocessor : public Noncopyable {
+    public:
+        InputStreamPreprocessor(HTMLTokenizer* tokenizer)
+            : m_tokenizer(tokenizer)
+            , m_nextInputCharacter('\0')
+            , m_skipNextNewLine(false)
+        {
+        }
+
+        UChar nextInputCharacter() const { return m_nextInputCharacter; }
+
+        // Returns whether we succeeded in peeking at the next character.
+        // The only way we can fail to peek is if there are no more
+        // characters in |source| (after collapsing \r\n, etc).
+        ALWAYS_INLINE bool peek(SegmentedString& source, int& lineNumber)
+        {
+        PeekAgain:
+            m_nextInputCharacter = *source;
+
+            // Every branch in this function is expensive, so we have a
+            // fast-reject branch for characters that don't require special
+            // handling. Please run the parser benchmark whenever you touch
+            // this function. It's very hot.
+            static const UChar specialCharacterMask = '\n' | '\r' | '\0';
+            if (m_nextInputCharacter & ~specialCharacterMask) {
+                m_skipNextNewLine = false;
+                return true;
+            }
+
+            if (m_nextInputCharacter == '\n' && m_skipNextNewLine) {
+                m_skipNextNewLine = false;
+                source.advancePastNewline(lineNumber);
+                if (source.isEmpty())
+                    return false;
+                m_nextInputCharacter = *source;
+            }
+            if (m_nextInputCharacter == '\r') {
+                m_nextInputCharacter = '\n';
+                m_skipNextNewLine = true;
+            } else {
+                m_skipNextNewLine = false;
+                // FIXME: The spec indicates that the surrogate pair range as well as
+                // a number of specific character values are parse errors and should be replaced
+                // by the replacement character. We suspect this is a problem with the spec as doing
+                // that filtering breaks surrogate pair handling and causes us not to match Minefield.
+                if (m_nextInputCharacter == '\0' && !shouldTreatNullAsEndOfFileMarker(source)) {
+                    if (m_tokenizer->shouldSkipNullCharacters()) {
+                        source.advancePastNonNewline();
+                        if (source.isEmpty())
+                            return false;
+                        goto PeekAgain;
+                    }
+                    m_nextInputCharacter = 0xFFFD;
+                }
+            }
+            return true;
+        }
+
+        // Returns whether there are more characters in |source| after advancing.
+        bool advance(SegmentedString& source, int& lineNumber)
+        {
+            source.advance(lineNumber);
+            if (source.isEmpty())
+                return false;
+            return peek(source, lineNumber);
+        }
+
+        static const UChar endOfFileMarker;
+
+    private:
+        bool shouldTreatNullAsEndOfFileMarker(SegmentedString& source) const
+        {
+            return source.isClosed() && source.length() == 1;
+        }
+
+        HTMLTokenizer* m_tokenizer;
+
+        // http://www.whatwg.org/specs/web-apps/current-work/#next-input-character
+        UChar m_nextInputCharacter;
+        bool m_skipNextNewLine;
+    };
+
+    HTMLTokenizer(bool usePreHTML5ParserQuirks);
+
+    inline bool processEntity(SegmentedString&);
+
+    inline void parseError();
+    inline void bufferCharacter(UChar);
+    inline void bufferCodePoint(unsigned);
+
+    inline bool emitAndResumeIn(SegmentedString&, State);
+    inline bool emitAndReconsumeIn(SegmentedString&, State);
+    inline bool emitEndOfFile(SegmentedString&);
+    inline bool flushEmitAndResumeIn(SegmentedString&, State);
+
+    // Return whether we need to emit a character token before dealing with
+    // the buffered end tag.
+    inline bool flushBufferedEndTag(SegmentedString&);
+    inline bool temporaryBufferIs(const String&);
+
+    // Sometimes we speculatively consume input characters and we don't
+    // know whether they represent end tags or RCDATA, etc. These
+    // functions help manage these state.
+    inline void addToPossibleEndTag(UChar cc);
+    inline void saveEndTagNameIfNeeded();
+    inline bool isAppropriateEndTag();
+
+    inline bool haveBufferedCharacterToken();
+
+    State m_state;
+
+    Vector<UChar, 32> m_appropriateEndTagName;
+
+    // m_token is owned by the caller. If nextToken is not on the stack,
+    // this member might be pointing to unallocated memory.
+    HTMLToken* m_token;
+    int m_lineNumber;
+
+    bool m_skipLeadingNewLineForListing;
+    bool m_forceNullCharacterReplacement;
+    bool m_shouldAllowCDATA;
+
+    // http://www.whatwg.org/specs/web-apps/current-work/#temporary-buffer
+    Vector<UChar, 32> m_temporaryBuffer;
+
+    // We occationally want to emit both a character token and an end tag
+    // token (e.g., when lexing script). We buffer the name of the end tag
+    // token here so we remember it next time we re-enter the tokenizer.
+    Vector<UChar, 32> m_bufferedEndTagName;
+
+    // http://www.whatwg.org/specs/web-apps/current-work/#additional-allowed-character
+    UChar m_additionalAllowedCharacter;
+
+    // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream
+    InputStreamPreprocessor m_inputStreamPreprocessor;
+    
+    bool m_usePreHTML5ParserQuirks;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLTreeBuilder.cpp b/Source/WebCore/html/parser/HTMLTreeBuilder.cpp
new file mode 100644
index 0000000..02713e5
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLTreeBuilder.cpp
@@ -0,0 +1,2822 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "HTMLTreeBuilder.h"
+
+#include "CharacterNames.h"
+#include "Comment.h"
+#include "DocumentFragment.h"
+#include "DocumentType.h"
+#include "Frame.h"
+#include "HTMLDocument.h"
+#include "HTMLDocumentParser.h"
+#include "HTMLElementFactory.h"
+#include "HTMLFormElement.h"
+#include "HTMLHtmlElement.h"
+#include "HTMLNames.h"
+#include "HTMLParserIdioms.h"
+#include "HTMLScriptElement.h"
+#include "HTMLToken.h"
+#include "HTMLTokenizer.h"
+#include "LocalizedStrings.h"
+#include "MathMLNames.h"
+#include "NotImplemented.h"
+#include "SVGNames.h"
+#include "ScriptController.h"
+#include "Text.h"
+#include "XLinkNames.h"
+#include "XMLNSNames.h"
+#include "XMLNames.h"
+
+namespace WebCore {
+
+using namespace HTMLNames;
+
+static const int uninitializedLineNumberValue = -1;
+
+static TextPosition1 uninitializedPositionValue1()
+{
+    return TextPosition1(WTF::OneBasedNumber::fromOneBasedInt(-1), WTF::OneBasedNumber::base());
+}
+
+namespace {
+
+inline bool isHTMLSpaceOrReplacementCharacter(UChar character)
+{
+    return isHTMLSpace(character) || character == replacementCharacter;
+}
+
+inline bool isAllWhitespace(const String& string)
+{
+    return string.isAllSpecialCharacters<isHTMLSpace>();
+}
+
+inline bool isAllWhitespaceOrReplacementCharacters(const String& string)
+{
+    return string.isAllSpecialCharacters<isHTMLSpaceOrReplacementCharacter>();
+}
+
+bool isNumberedHeaderTag(const AtomicString& tagName)
+{
+    return tagName == h1Tag
+        || tagName == h2Tag
+        || tagName == h3Tag
+        || tagName == h4Tag
+        || tagName == h5Tag
+        || tagName == h6Tag;
+}
+
+bool isCaptionColOrColgroupTag(const AtomicString& tagName)
+{
+    return tagName == captionTag
+        || tagName == colTag
+        || tagName == colgroupTag;
+}
+
+bool isTableCellContextTag(const AtomicString& tagName)
+{
+    return tagName == thTag || tagName == tdTag;
+}
+
+bool isTableBodyContextTag(const AtomicString& tagName)
+{
+    return tagName == tbodyTag
+        || tagName == tfootTag
+        || tagName == theadTag;
+}
+
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special
+bool isSpecialNode(Node* node)
+{
+    if (node->hasTagName(MathMLNames::miTag)
+        || node->hasTagName(MathMLNames::moTag)
+        || node->hasTagName(MathMLNames::mnTag)
+        || node->hasTagName(MathMLNames::msTag)
+        || node->hasTagName(MathMLNames::mtextTag)
+        || node->hasTagName(MathMLNames::annotation_xmlTag)
+        || node->hasTagName(SVGNames::foreignObjectTag)
+        || node->hasTagName(SVGNames::descTag)
+        || node->hasTagName(SVGNames::titleTag))
+        return true;
+    if (node->namespaceURI() != xhtmlNamespaceURI)
+        return false;
+    const AtomicString& tagName = node->localName();
+    return tagName == addressTag
+        || tagName == appletTag
+        || tagName == areaTag
+        || tagName == articleTag
+        || tagName == asideTag
+        || tagName == baseTag
+        || tagName == basefontTag
+        || tagName == bgsoundTag
+        || tagName == blockquoteTag
+        || tagName == bodyTag
+        || tagName == brTag
+        || tagName == buttonTag
+        || tagName == captionTag
+        || tagName == centerTag
+        || tagName == colTag
+        || tagName == colgroupTag
+        || tagName == commandTag
+        || tagName == ddTag
+        || tagName == detailsTag
+        || tagName == dirTag
+        || tagName == divTag
+        || tagName == dlTag
+        || tagName == dtTag
+        || tagName == embedTag
+        || tagName == fieldsetTag
+        || tagName == figcaptionTag
+        || tagName == figureTag
+        || tagName == footerTag
+        || tagName == formTag
+        || tagName == frameTag
+        || tagName == framesetTag
+        || isNumberedHeaderTag(tagName)
+        || tagName == headTag
+        || tagName == headerTag
+        || tagName == hgroupTag
+        || tagName == hrTag
+        || tagName == htmlTag
+        || tagName == iframeTag
+        || tagName == imgTag
+        || tagName == inputTag
+        || tagName == isindexTag
+        || tagName == liTag
+        || tagName == linkTag
+        || tagName == listingTag
+        || tagName == marqueeTag
+        || tagName == menuTag
+        || tagName == metaTag
+        || tagName == navTag
+        || tagName == noembedTag
+        || tagName == noframesTag
+        || tagName == noscriptTag
+        || tagName == objectTag
+        || tagName == olTag
+        || tagName == pTag
+        || tagName == paramTag
+        || tagName == plaintextTag
+        || tagName == preTag
+        || tagName == scriptTag
+        || tagName == sectionTag
+        || tagName == selectTag
+        || tagName == styleTag
+        || tagName == summaryTag
+        || tagName == tableTag
+        || isTableBodyContextTag(tagName)
+        || tagName == tdTag
+        || tagName == textareaTag
+        || tagName == thTag
+        || tagName == titleTag
+        || tagName == trTag
+        || tagName == ulTag
+        || tagName == wbrTag
+        || tagName == xmpTag;
+}
+
+bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName)
+{
+    return tagName == bTag
+        || tagName == bigTag
+        || tagName == codeTag
+        || tagName == emTag
+        || tagName == fontTag
+        || tagName == iTag
+        || tagName == sTag
+        || tagName == smallTag
+        || tagName == strikeTag
+        || tagName == strongTag
+        || tagName == ttTag
+        || tagName == uTag;
+}
+
+bool isNonAnchorFormattingTag(const AtomicString& tagName)
+{
+    return tagName == nobrTag
+        || isNonAnchorNonNobrFormattingTag(tagName);
+}
+
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting
+bool isFormattingTag(const AtomicString& tagName)
+{
+    return tagName == aTag || isNonAnchorFormattingTag(tagName);
+}
+
+HTMLFormElement* closestFormAncestor(Element* element)
+{
+    while (element) {
+        if (element->hasTagName(formTag))
+            return static_cast<HTMLFormElement*>(element);
+        ContainerNode* parent = element->parentNode();
+        if (!parent || !parent->isElementNode())
+            return 0;
+        element = static_cast<Element*>(parent);
+    }
+    return 0;
+}
+
+} // namespace
+
+class HTMLTreeBuilder::ExternalCharacterTokenBuffer : public Noncopyable {
+public:
+    explicit ExternalCharacterTokenBuffer(AtomicHTMLToken& token)
+        : m_current(token.characters().data())
+        , m_end(m_current + token.characters().size())
+    {
+        ASSERT(!isEmpty());
+    }
+
+    explicit ExternalCharacterTokenBuffer(const String& string)
+        : m_current(string.characters())
+        , m_end(m_current + string.length())
+    {
+        ASSERT(!isEmpty());
+    }
+
+    ~ExternalCharacterTokenBuffer()
+    {
+        ASSERT(isEmpty());
+    }
+
+    bool isEmpty() const { return m_current == m_end; }
+
+    void skipLeadingWhitespace()
+    {
+        skipLeading<isHTMLSpace>();
+    }
+
+    String takeLeadingWhitespace()
+    {
+        return takeLeading<isHTMLSpace>();
+    }
+
+    String takeLeadingNonWhitespace()
+    {
+        return takeLeading<isNotHTMLSpace>();
+    }
+
+    String takeRemaining()
+    {
+        ASSERT(!isEmpty());
+        const UChar* start = m_current;
+        m_current = m_end;
+        return String(start, m_current - start);
+    }
+
+    void giveRemainingTo(Vector<UChar>& recipient)
+    {
+        recipient.append(m_current, m_end - m_current);
+        m_current = m_end;
+    }
+
+    String takeRemainingWhitespace()
+    {
+        ASSERT(!isEmpty());
+        Vector<UChar> whitespace;
+        do {
+            UChar cc = *m_current++;
+            if (isHTMLSpace(cc))
+                whitespace.append(cc);
+        } while (m_current < m_end);
+        // Returning the null string when there aren't any whitespace
+        // characters is slightly cleaner semantically because we don't want
+        // to insert a text node (as opposed to inserting an empty text node).
+        if (whitespace.isEmpty())
+            return String();
+        return String::adopt(whitespace);
+    }
+
+private:
+    template<bool characterPredicate(UChar)>
+    void skipLeading()
+    {
+        ASSERT(!isEmpty());
+        while (characterPredicate(*m_current)) {
+            if (++m_current == m_end)
+                return;
+        }
+    }
+
+    template<bool characterPredicate(UChar)>
+    String takeLeading()
+    {
+        ASSERT(!isEmpty());
+        const UChar* start = m_current;
+        skipLeading<characterPredicate>();
+        if (start == m_current)
+            return String();
+        return String(start, m_current - start);
+    }
+
+    const UChar* m_current;
+    const UChar* m_end;
+};
+
+
+HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks)
+    : m_framesetOk(true)
+    , m_document(document)
+    , m_tree(document, FragmentScriptingAllowed, false)
+    , m_reportErrors(reportErrors)
+    , m_isPaused(false)
+    , m_insertionMode(InitialMode)
+    , m_originalInsertionMode(InitialMode)
+    , m_parser(parser)
+    , m_scriptToProcessStartPosition(uninitializedPositionValue1())
+    , m_lastScriptElementStartPosition(TextPosition0::belowRangePosition())
+    , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
+    , m_hasPendingForeignInsertionModeSteps(false)
+{
+}
+
+// FIXME: Member variables should be grouped into self-initializing structs to
+// minimize code duplication between these constructors.
+HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks)
+    : m_framesetOk(true)
+    , m_fragmentContext(fragment, contextElement, scriptingPermission)
+    , m_document(m_fragmentContext.document())
+    , m_tree(m_document, scriptingPermission, true)
+    , m_reportErrors(false) // FIXME: Why not report errors in fragments?
+    , m_isPaused(false)
+    , m_insertionMode(InitialMode)
+    , m_originalInsertionMode(InitialMode)
+    , m_parser(parser)
+    , m_scriptToProcessStartPosition(uninitializedPositionValue1())
+    , m_lastScriptElementStartPosition(TextPosition0::belowRangePosition())
+    , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
+    , m_hasPendingForeignInsertionModeSteps(false)
+{
+    if (contextElement) {
+        // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm:
+        // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
+        m_document->setCompatibilityMode(contextElement->document()->compatibilityMode());
+        processFakeStartTag(htmlTag);
+        resetInsertionModeAppropriately();
+        m_tree.setForm(closestFormAncestor(contextElement));
+    }
+}
+
+HTMLTreeBuilder::~HTMLTreeBuilder()
+{
+}
+
+void HTMLTreeBuilder::detach()
+{
+    // This call makes little sense in fragment mode, but for consistency
+    // DocumentParser expects detach() to always be called before it's destroyed.
+    m_document = 0;
+    // HTMLConstructionSite might be on the callstack when detach() is called
+    // otherwise we'd just call m_tree.clear() here instead.
+    m_tree.detach();
+}
+
+HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
+    : m_fragment(0)
+    , m_contextElement(0)
+    , m_scriptingPermission(FragmentScriptingAllowed)
+{
+}
+
+HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
+    : m_dummyDocumentForFragmentParsing(HTMLDocument::create(0, KURL(), fragment->document()->baseURI()))
+    , m_fragment(fragment)
+    , m_contextElement(contextElement)
+    , m_scriptingPermission(scriptingPermission)
+{
+    m_dummyDocumentForFragmentParsing->setCompatibilityMode(fragment->document()->compatibilityMode());
+}
+
+Document* HTMLTreeBuilder::FragmentParsingContext::document() const
+{
+    ASSERT(m_fragment);
+    return m_dummyDocumentForFragmentParsing.get();
+}
+
+void HTMLTreeBuilder::FragmentParsingContext::finished()
+{
+    // Populate the DocumentFragment with the parsed content now that we're done.
+    ContainerNode* root = m_dummyDocumentForFragmentParsing.get();
+    if (m_contextElement)
+        root = m_dummyDocumentForFragmentParsing->documentElement();
+    m_fragment->takeAllChildrenFrom(root);
+}
+
+HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext()
+{
+}
+
+PassRefPtr<Element> HTMLTreeBuilder::takeScriptToProcess(TextPosition1& scriptStartPosition)
+{
+    // Unpause ourselves, callers may pause us again when processing the script.
+    // The HTML5 spec is written as though scripts are executed inside the tree
+    // builder.  We pause the parser to exit the tree builder, and then resume
+    // before running scripts.
+    m_isPaused = false;
+    scriptStartPosition = m_scriptToProcessStartPosition;
+    m_scriptToProcessStartPosition = uninitializedPositionValue1();
+    return m_scriptToProcess.release();
+}
+
+void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken)
+{
+    AtomicHTMLToken token(rawToken);
+    constructTreeFromAtomicToken(token);
+}
+
+void HTMLTreeBuilder::constructTreeFromAtomicToken(AtomicHTMLToken& token)
+{
+    processToken(token);
+
+    // Swallowing U+0000 characters isn't in the HTML5 spec, but turning all
+    // the U+0000 characters into replacement characters has compatibility
+    // problems.
+    m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || m_insertionMode == InForeignContentMode);
+    m_parser->tokenizer()->setShouldAllowCDATA(m_insertionMode == InForeignContentMode && m_tree.currentElement()->namespaceURI() != xhtmlNamespaceURI);
+}
+
+void HTMLTreeBuilder::processToken(AtomicHTMLToken& token)
+{
+    switch (token.type()) {
+    case HTMLToken::Uninitialized:
+        ASSERT_NOT_REACHED();
+        break;
+    case HTMLToken::DOCTYPE:
+        processDoctypeToken(token);
+        break;
+    case HTMLToken::StartTag:
+        processStartTag(token);
+        break;
+    case HTMLToken::EndTag:
+        processEndTag(token);
+        break;
+    case HTMLToken::Comment:
+        processComment(token);
+        return;
+    case HTMLToken::Character:
+        processCharacter(token);
+        break;
+    case HTMLToken::EndOfFile:
+        processEndOfFile(token);
+        break;
+    }
+}
+
+void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken& token)
+{
+    ASSERT(token.type() == HTMLToken::DOCTYPE);
+    if (m_insertionMode == InitialMode) {
+        m_tree.insertDoctype(token);
+        setInsertionMode(BeforeHTMLMode);
+        return;
+    }
+    if (m_insertionMode == InTableTextMode) {
+        defaultForInTableText();
+        processDoctypeToken(token);
+        return;
+    }
+    parseError(token);
+}
+
+void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, PassRefPtr<NamedNodeMap> attributes)
+{
+    // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
+    AtomicHTMLToken fakeToken(HTMLToken::StartTag, tagName.localName(), attributes);
+    processStartTag(fakeToken);
+}
+
+void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName)
+{
+    // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
+    AtomicHTMLToken fakeToken(HTMLToken::EndTag, tagName.localName());
+    processEndTag(fakeToken);
+}
+
+void HTMLTreeBuilder::processFakeCharacters(const String& characters)
+{
+    ASSERT(!characters.isEmpty());
+    ExternalCharacterTokenBuffer buffer(characters);
+    processCharacterBuffer(buffer);
+}
+
+void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope()
+{
+    if (!m_tree.openElements()->inButtonScope(pTag.localName()))
+        return;
+    AtomicHTMLToken endP(HTMLToken::EndTag, pTag.localName());
+    processEndTag(endP);
+}
+
+PassRefPtr<NamedNodeMap> HTMLTreeBuilder::attributesForIsindexInput(AtomicHTMLToken& token)
+{
+    RefPtr<NamedNodeMap> attributes = token.takeAtributes();
+    if (!attributes)
+        attributes = NamedNodeMap::create();
+    else {
+        attributes->removeAttribute(nameAttr);
+        attributes->removeAttribute(actionAttr);
+        attributes->removeAttribute(promptAttr);
+    }
+
+    RefPtr<Attribute> mappedAttribute = Attribute::createMapped(nameAttr, isindexTag.localName());
+    attributes->insertAttribute(mappedAttribute.release(), false);
+    return attributes.release();
+}
+
+void HTMLTreeBuilder::processIsindexStartTagForInBody(AtomicHTMLToken& token)
+{
+    ASSERT(token.type() == HTMLToken::StartTag);
+    ASSERT(token.name() == isindexTag);
+    parseError(token);
+    if (m_tree.form())
+        return;
+    notImplemented(); // Acknowledge self-closing flag
+    processFakeStartTag(formTag);
+    RefPtr<Attribute> actionAttribute = token.getAttributeItem(actionAttr);
+    if (actionAttribute) {
+        ASSERT(m_tree.currentElement()->hasTagName(formTag));
+        m_tree.currentElement()->setAttribute(actionAttr, actionAttribute->value());
+    }
+    processFakeStartTag(hrTag);
+    processFakeStartTag(labelTag);
+    RefPtr<Attribute> promptAttribute = token.getAttributeItem(promptAttr);
+    if (promptAttribute)
+        processFakeCharacters(promptAttribute->value());
+    else
+        processFakeCharacters(searchableIndexIntroduction());
+    processFakeStartTag(inputTag, attributesForIsindexInput(token));
+    notImplemented(); // This second set of characters may be needed by non-english locales.
+    processFakeEndTag(labelTag);
+    processFakeStartTag(hrTag);
+    processFakeEndTag(formTag);
+}
+
+namespace {
+
+bool isLi(const Element* element)
+{
+    return element->hasTagName(liTag);
+}
+
+bool isDdOrDt(const Element* element)
+{
+    return element->hasTagName(ddTag)
+        || element->hasTagName(dtTag);
+}
+
+}
+
+template <bool shouldClose(const Element*)>
+void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken& token)
+{
+    m_framesetOk = false;
+    HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
+    while (1) {
+        Element* node = nodeRecord->element();
+        if (shouldClose(node)) {
+            processFakeEndTag(node->tagQName());
+            break;
+        }
+        if (isSpecialNode(node) && !node->hasTagName(addressTag) && !node->hasTagName(divTag) && !node->hasTagName(pTag))
+            break;
+        nodeRecord = nodeRecord->next();
+    }
+    processFakePEndTagIfPInButtonScope();
+    m_tree.insertHTMLElement(token);
+}
+
+namespace {
+
+typedef HashMap<AtomicString, QualifiedName> PrefixedNameToQualifiedNameMap;
+
+void mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap* map, QualifiedName** names, size_t length)
+{
+    for (size_t i = 0; i < length; ++i) {
+        const QualifiedName& name = *names[i];
+        const AtomicString& localName = name.localName();
+        AtomicString loweredLocalName = localName.lower();
+        if (loweredLocalName != localName)
+            map->add(loweredLocalName, name);
+    }
+}
+
+void adjustSVGTagNameCase(AtomicHTMLToken& token)
+{
+    static PrefixedNameToQualifiedNameMap* caseMap = 0;
+    if (!caseMap) {
+        caseMap = new PrefixedNameToQualifiedNameMap;
+        size_t length = 0;
+        QualifiedName** svgTags = SVGNames::getSVGTags(&length);
+        mapLoweredLocalNameToName(caseMap, svgTags, length);
+    }
+
+    const QualifiedName& casedName = caseMap->get(token.name());
+    if (casedName.localName().isNull())
+        return;
+    token.setName(casedName.localName());
+}
+
+template<QualifiedName** getAttrs(size_t* length)>
+void adjustAttributes(AtomicHTMLToken& token)
+{
+    static PrefixedNameToQualifiedNameMap* caseMap = 0;
+    if (!caseMap) {
+        caseMap = new PrefixedNameToQualifiedNameMap;
+        size_t length = 0;
+        QualifiedName** attrs = getAttrs(&length);
+        mapLoweredLocalNameToName(caseMap, attrs, length);
+    }
+
+    NamedNodeMap* attributes = token.attributes();
+    if (!attributes)
+        return;
+
+    for (unsigned x = 0; x < attributes->length(); ++x) {
+        Attribute* attribute = attributes->attributeItem(x);
+        const QualifiedName& casedName = caseMap->get(attribute->localName());
+        if (!casedName.localName().isNull())
+            attribute->parserSetName(casedName);
+    }
+}
+
+void adjustSVGAttributes(AtomicHTMLToken& token)
+{
+    adjustAttributes<SVGNames::getSVGAttrs>(token);
+}
+
+void adjustMathMLAttributes(AtomicHTMLToken& token)
+{
+    adjustAttributes<MathMLNames::getMathMLAttrs>(token);
+}
+
+void addNamesWithPrefix(PrefixedNameToQualifiedNameMap* map, const AtomicString& prefix, QualifiedName** names, size_t length)
+{
+    for (size_t i = 0; i < length; ++i) {
+        QualifiedName* name = names[i];
+        const AtomicString& localName = name->localName();
+        AtomicString prefixColonLocalName(prefix + ":" + localName);
+        QualifiedName nameWithPrefix(prefix, localName, name->namespaceURI());
+        map->add(prefixColonLocalName, nameWithPrefix);
+    }
+}
+
+void adjustForeignAttributes(AtomicHTMLToken& token)
+{
+    static PrefixedNameToQualifiedNameMap* map = 0;
+    if (!map) {
+        map = new PrefixedNameToQualifiedNameMap;
+        size_t length = 0;
+        QualifiedName** attrs = XLinkNames::getXLinkAttrs(&length);
+        addNamesWithPrefix(map, "xlink", attrs, length);
+
+        attrs = XMLNames::getXMLAttrs(&length);
+        addNamesWithPrefix(map, "xml", attrs, length);
+
+        map->add("xmlns", XMLNSNames::xmlnsAttr);
+        map->add("xmlns:xlink", QualifiedName("xmlns", "xlink", XMLNSNames::xmlnsNamespaceURI));
+    }
+
+    NamedNodeMap* attributes = token.attributes();
+    if (!attributes)
+        return;
+
+    for (unsigned x = 0; x < attributes->length(); ++x) {
+        Attribute* attribute = attributes->attributeItem(x);
+        const QualifiedName& name = map->get(attribute->localName());
+        if (!name.localName().isNull())
+            attribute->parserSetName(name);
+    }
+}
+
+}
+
+void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken& token)
+{
+    ASSERT(token.type() == HTMLToken::StartTag);
+    if (token.name() == htmlTag) {
+        m_tree.insertHTMLHtmlStartTagInBody(token);
+        return;
+    }
+    if (token.name() == baseTag
+        || token.name() == basefontTag
+        || token.name() == bgsoundTag
+        || token.name() == commandTag
+        || token.name() == linkTag
+        || token.name() == metaTag
+        || token.name() == noframesTag
+        || token.name() == scriptTag
+        || token.name() == styleTag
+        || token.name() == titleTag) {
+        bool didProcess = processStartTagForInHead(token);
+        ASSERT_UNUSED(didProcess, didProcess);
+        return;
+    }
+    if (token.name() == bodyTag) {
+        if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
+            ASSERT(isParsingFragment());
+            return;
+        }
+        m_tree.insertHTMLBodyStartTagInBody(token);
+        return;
+    }
+    if (token.name() == framesetTag) {
+        parseError(token);
+        if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
+            ASSERT(isParsingFragment());
+            return;
+        }
+        if (!m_framesetOk)
+            return;
+        ExceptionCode ec = 0;
+        m_tree.openElements()->bodyElement()->remove(ec);
+        ASSERT(!ec);
+        m_tree.openElements()->popUntil(m_tree.openElements()->bodyElement());
+        m_tree.openElements()->popHTMLBodyElement();
+        ASSERT(m_tree.openElements()->top() == m_tree.openElements()->htmlElement());
+        m_tree.insertHTMLElement(token);
+        setInsertionMode(InFramesetMode);
+        return;
+    }
+    if (token.name() == addressTag
+        || token.name() == articleTag
+        || token.name() == asideTag
+        || token.name() == blockquoteTag
+        || token.name() == centerTag
+        || token.name() == detailsTag
+        || token.name() == dirTag
+        || token.name() == divTag
+        || token.name() == dlTag
+        || token.name() == fieldsetTag
+        || token.name() == figcaptionTag
+        || token.name() == figureTag
+        || token.name() == footerTag
+        || token.name() == headerTag
+        || token.name() == hgroupTag
+        || token.name() == menuTag
+        || token.name() == navTag
+        || token.name() == olTag
+        || token.name() == pTag
+        || token.name() == sectionTag
+        || token.name() == summaryTag
+        || token.name() == ulTag) {
+        processFakePEndTagIfPInButtonScope();
+        m_tree.insertHTMLElement(token);
+        return;
+    }
+    if (isNumberedHeaderTag(token.name())) {
+        processFakePEndTagIfPInButtonScope();
+        if (isNumberedHeaderTag(m_tree.currentElement()->localName())) {
+            parseError(token);
+            m_tree.openElements()->pop();
+        }
+        m_tree.insertHTMLElement(token);
+        return;
+    }
+    if (token.name() == preTag || token.name() == listingTag) {
+        processFakePEndTagIfPInButtonScope();
+        m_tree.insertHTMLElement(token);
+        m_parser->tokenizer()->setSkipLeadingNewLineForListing(true);
+        m_framesetOk = false;
+        return;
+    }
+    if (token.name() == formTag) {
+        if (m_tree.form()) {
+            parseError(token);
+            return;
+        }
+        processFakePEndTagIfPInButtonScope();
+        m_tree.insertHTMLFormElement(token);
+        return;
+    }
+    if (token.name() == liTag) {
+        processCloseWhenNestedTag<isLi>(token);
+        return;
+    }
+    if (token.name() == ddTag || token.name() == dtTag) {
+        processCloseWhenNestedTag<isDdOrDt>(token);
+        return;
+    }
+    if (token.name() == plaintextTag) {
+        processFakePEndTagIfPInButtonScope();
+        m_tree.insertHTMLElement(token);
+        m_parser->tokenizer()->setState(HTMLTokenizer::PLAINTEXTState);
+        return;
+    }
+    if (token.name() == buttonTag) {
+        if (m_tree.openElements()->inScope(buttonTag)) {
+            parseError(token);
+            processFakeEndTag(buttonTag);
+            reprocessStartTag(token); // FIXME: Could we just fall through here?
+            return;
+        }
+        m_tree.reconstructTheActiveFormattingElements();
+        m_tree.insertHTMLElement(token);
+        m_framesetOk = false;
+        return;
+    }
+    if (token.name() == aTag) {
+        Element* activeATag = m_tree.activeFormattingElements()->closestElementInScopeWithName(aTag.localName());
+        if (activeATag) {
+            parseError(token);
+            processFakeEndTag(aTag);
+            m_tree.activeFormattingElements()->remove(activeATag);
+            if (m_tree.openElements()->contains(activeATag))
+                m_tree.openElements()->remove(activeATag);
+        }
+        m_tree.reconstructTheActiveFormattingElements();
+        m_tree.insertFormattingElement(token);
+        return;
+    }
+    if (isNonAnchorNonNobrFormattingTag(token.name())) {
+        m_tree.reconstructTheActiveFormattingElements();
+        m_tree.insertFormattingElement(token);
+        return;
+    }
+    if (token.name() == nobrTag) {
+        m_tree.reconstructTheActiveFormattingElements();
+        if (m_tree.openElements()->inScope(nobrTag)) {
+            parseError(token);
+            processFakeEndTag(nobrTag);
+            m_tree.reconstructTheActiveFormattingElements();
+        }
+        m_tree.insertFormattingElement(token);
+        return;
+    }
+    if (token.name() == appletTag
+        || token.name() == marqueeTag
+        || token.name() == objectTag) {
+        m_tree.reconstructTheActiveFormattingElements();
+        m_tree.insertHTMLElement(token);
+        m_tree.activeFormattingElements()->appendMarker();
+        m_framesetOk = false;
+        return;
+    }
+    if (token.name() == tableTag) {
+        if (!m_document->inQuirksMode() && m_tree.openElements()->inButtonScope(pTag))
+            processFakeEndTag(pTag);
+        m_tree.insertHTMLElement(token);
+        m_framesetOk = false;
+        setInsertionMode(InTableMode);
+        return;
+    }
+    if (token.name() == imageTag) {
+        parseError(token);
+        // Apparently we're not supposed to ask.
+        token.setName(imgTag.localName());
+        prepareToReprocessToken();
+        // Note the fall through to the imgTag handling below!
+    }
+    if (token.name() == areaTag
+        || token.name() == brTag
+        || token.name() == embedTag
+        || token.name() == imgTag
+        || token.name() == keygenTag
+        || token.name() == wbrTag) {
+        m_tree.reconstructTheActiveFormattingElements();
+        m_tree.insertSelfClosingHTMLElement(token);
+        m_framesetOk = false;
+        return;
+    }
+    if (token.name() == inputTag) {
+        RefPtr<Attribute> typeAttribute = token.getAttributeItem(typeAttr);
+        m_tree.reconstructTheActiveFormattingElements();
+        m_tree.insertSelfClosingHTMLElement(token);
+        if (!typeAttribute || !equalIgnoringCase(typeAttribute->value(), "hidden"))
+            m_framesetOk = false;
+        return;
+    }
+    if (token.name() == paramTag
+        || token.name() == sourceTag
+        || token.name() == trackTag) {
+        m_tree.insertSelfClosingHTMLElement(token);
+        return;
+    }
+    if (token.name() == hrTag) {
+        processFakePEndTagIfPInButtonScope();
+        m_tree.insertSelfClosingHTMLElement(token);
+        m_framesetOk = false;
+        return;
+    }
+    if (token.name() == isindexTag) {
+        processIsindexStartTagForInBody(token);
+        return;
+    }
+    if (token.name() == textareaTag) {
+        m_tree.insertHTMLElement(token);
+        m_parser->tokenizer()->setSkipLeadingNewLineForListing(true);
+        m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
+        m_originalInsertionMode = m_insertionMode;
+        m_framesetOk = false;
+        setInsertionMode(TextMode);
+        return;
+    }
+    if (token.name() == xmpTag) {
+        processFakePEndTagIfPInButtonScope();
+        m_tree.reconstructTheActiveFormattingElements();
+        m_framesetOk = false;
+        processGenericRawTextStartTag(token);
+        return;
+    }
+    if (token.name() == iframeTag) {
+        m_framesetOk = false;
+        processGenericRawTextStartTag(token);
+        return;
+    }
+    if (token.name() == noembedTag && pluginsEnabled(m_document->frame())) {
+        processGenericRawTextStartTag(token);
+        return;
+    }
+    if (token.name() == noscriptTag && scriptEnabled(m_document->frame())) {
+        processGenericRawTextStartTag(token);
+        return;
+    }
+    if (token.name() == selectTag) {
+        m_tree.reconstructTheActiveFormattingElements();
+        m_tree.insertHTMLElement(token);
+        m_framesetOk = false;
+        if (m_insertionMode == InTableMode
+             || m_insertionMode == InCaptionMode
+             || m_insertionMode == InColumnGroupMode
+             || m_insertionMode == InTableBodyMode
+             || m_insertionMode == InRowMode
+             || m_insertionMode == InCellMode)
+            setInsertionMode(InSelectInTableMode);
+        else
+            setInsertionMode(InSelectMode);
+        return;
+    }
+    if (token.name() == optgroupTag || token.name() == optionTag) {
+        if (m_tree.openElements()->inScope(optionTag.localName())) {
+            AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
+            processEndTag(endOption);
+        }
+        m_tree.reconstructTheActiveFormattingElements();
+        m_tree.insertHTMLElement(token);
+        return;
+    }
+    if (token.name() == rpTag || token.name() == rtTag) {
+        if (m_tree.openElements()->inScope(rubyTag.localName())) {
+            m_tree.generateImpliedEndTags();
+            if (!m_tree.currentElement()->hasTagName(rubyTag)) {
+                parseError(token);
+                m_tree.openElements()->popUntil(rubyTag.localName());
+            }
+        }
+        m_tree.insertHTMLElement(token);
+        return;
+    }
+    if (token.name() == MathMLNames::mathTag.localName()) {
+        m_tree.reconstructTheActiveFormattingElements();
+        adjustMathMLAttributes(token);
+        adjustForeignAttributes(token);
+        m_tree.insertForeignElement(token, MathMLNames::mathmlNamespaceURI);
+        if (m_insertionMode != InForeignContentMode)
+            setInsertionMode(InForeignContentMode);
+        return;
+    }
+    if (token.name() == SVGNames::svgTag.localName()) {
+        m_tree.reconstructTheActiveFormattingElements();
+        adjustSVGAttributes(token);
+        adjustForeignAttributes(token);
+        m_tree.insertForeignElement(token, SVGNames::svgNamespaceURI);
+        if (m_insertionMode != InForeignContentMode)
+            setInsertionMode(InForeignContentMode);
+        return;
+    }
+    if (isCaptionColOrColgroupTag(token.name())
+        || token.name() == frameTag
+        || token.name() == headTag
+        || isTableBodyContextTag(token.name())
+        || isTableCellContextTag(token.name())
+        || token.name() == trTag) {
+        parseError(token);
+        return;
+    }
+    m_tree.reconstructTheActiveFormattingElements();
+    m_tree.insertHTMLElement(token);
+}
+
+bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup()
+{
+    if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) {
+        ASSERT(isParsingFragment());
+        // FIXME: parse error
+        return false;
+    }
+    m_tree.openElements()->pop();
+    setInsertionMode(InTableMode);
+    return true;
+}
+
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
+void HTMLTreeBuilder::closeTheCell()
+{
+    ASSERT(insertionMode() == InCellMode);
+    if (m_tree.openElements()->inTableScope(tdTag)) {
+        ASSERT(!m_tree.openElements()->inTableScope(thTag));
+        processFakeEndTag(tdTag);
+        return;
+    }
+    ASSERT(m_tree.openElements()->inTableScope(thTag));
+    processFakeEndTag(thTag);
+    ASSERT(insertionMode() == InRowMode);
+}
+
+void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken& token)
+{
+    ASSERT(token.type() == HTMLToken::StartTag);
+    if (token.name() == captionTag) {
+        m_tree.openElements()->popUntilTableScopeMarker();
+        m_tree.activeFormattingElements()->appendMarker();
+        m_tree.insertHTMLElement(token);
+        setInsertionMode(InCaptionMode);
+        return;
+    }
+    if (token.name() == colgroupTag) {
+        m_tree.openElements()->popUntilTableScopeMarker();
+        m_tree.insertHTMLElement(token);
+        setInsertionMode(InColumnGroupMode);
+        return;
+    }
+    if (token.name() == colTag) {
+        processFakeStartTag(colgroupTag);
+        ASSERT(InColumnGroupMode);
+        reprocessStartTag(token);
+        return;
+    }
+    if (isTableBodyContextTag(token.name())) {
+        m_tree.openElements()->popUntilTableScopeMarker();
+        m_tree.insertHTMLElement(token);
+        setInsertionMode(InTableBodyMode);
+        return;
+    }
+    if (isTableCellContextTag(token.name())
+        || token.name() == trTag) {
+        processFakeStartTag(tbodyTag);
+        ASSERT(insertionMode() == InTableBodyMode);
+        reprocessStartTag(token);
+        return;
+    }
+    if (token.name() == tableTag) {
+        parseError(token);
+        if (!processTableEndTagForInTable()) {
+            ASSERT(isParsingFragment());
+            return;
+        }
+        reprocessStartTag(token);
+        return;
+    }
+    if (token.name() == styleTag || token.name() == scriptTag) {
+        processStartTagForInHead(token);
+        return;
+    }
+    if (token.name() == inputTag) {
+        Attribute* typeAttribute = token.getAttributeItem(typeAttr);
+        if (typeAttribute && equalIgnoringCase(typeAttribute->value(), "hidden")) {
+            parseError(token);
+            m_tree.insertSelfClosingHTMLElement(token);
+            return;
+        }
+        // Fall through to "anything else" case.
+    }
+    if (token.name() == formTag) {
+        parseError(token);
+        if (m_tree.form())
+            return;
+        m_tree.insertHTMLFormElement(token, true);
+        m_tree.openElements()->pop();
+        return;
+    }
+    parseError(token);
+    HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
+    processStartTagForInBody(token);
+}
+
+namespace {
+
+bool shouldProcessForeignContentUsingInBodyInsertionMode(AtomicHTMLToken& token, Element* currentElement)
+{
+    ASSERT(token.type() == HTMLToken::StartTag);
+    if (currentElement->hasTagName(MathMLNames::miTag)
+        || currentElement->hasTagName(MathMLNames::moTag)
+        || currentElement->hasTagName(MathMLNames::mnTag)
+        || currentElement->hasTagName(MathMLNames::msTag)
+        || currentElement->hasTagName(MathMLNames::mtextTag)) {
+        return token.name() != MathMLNames::mglyphTag
+            && token.name() != MathMLNames::malignmarkTag;
+    }
+    if (currentElement->hasTagName(MathMLNames::annotation_xmlTag))
+        return token.name() == SVGNames::svgTag;
+    if (currentElement->hasTagName(SVGNames::foreignObjectTag)
+        || currentElement->hasTagName(SVGNames::descTag)
+        || currentElement->hasTagName(SVGNames::titleTag))
+        return true;
+    return currentElement->namespaceURI() == HTMLNames::xhtmlNamespaceURI;
+}
+
+}
+
+void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
+{
+    ASSERT(token.type() == HTMLToken::StartTag);
+    switch (insertionMode()) {
+    case InitialMode:
+        ASSERT(insertionMode() == InitialMode);
+        defaultForInitial();
+        // Fall through.
+    case BeforeHTMLMode:
+        ASSERT(insertionMode() == BeforeHTMLMode);
+        if (token.name() == htmlTag) {
+            m_tree.insertHTMLHtmlStartTagBeforeHTML(token);
+            setInsertionMode(BeforeHeadMode);
+            return;
+        }
+        defaultForBeforeHTML();
+        // Fall through.
+    case BeforeHeadMode:
+        ASSERT(insertionMode() == BeforeHeadMode);
+        if (token.name() == htmlTag) {
+            m_tree.insertHTMLHtmlStartTagInBody(token);
+            return;
+        }
+        if (token.name() == headTag) {
+            m_tree.insertHTMLHeadElement(token);
+            setInsertionMode(InHeadMode);
+            return;
+        }
+        defaultForBeforeHead();
+        // Fall through.
+    case InHeadMode:
+        ASSERT(insertionMode() == InHeadMode);
+        if (processStartTagForInHead(token))
+            return;
+        defaultForInHead();
+        // Fall through.
+    case AfterHeadMode:
+        ASSERT(insertionMode() == AfterHeadMode);
+        if (token.name() == htmlTag) {
+            m_tree.insertHTMLHtmlStartTagInBody(token);
+            return;
+        }
+        if (token.name() == bodyTag) {
+            m_framesetOk = false;
+            m_tree.insertHTMLBodyElement(token);
+            setInsertionMode(InBodyMode);
+            return;
+        }
+        if (token.name() == framesetTag) {
+            m_tree.insertHTMLElement(token);
+            setInsertionMode(InFramesetMode);
+            return;
+        }
+        if (token.name() == baseTag
+            || token.name() == basefontTag
+            || token.name() == bgsoundTag
+            || token.name() == linkTag
+            || token.name() == metaTag
+            || token.name() == noframesTag
+            || token.name() == scriptTag
+            || token.name() == styleTag
+            || token.name() == titleTag) {
+            parseError(token);
+            ASSERT(m_tree.head());
+            m_tree.openElements()->pushHTMLHeadElement(m_tree.head());
+            processStartTagForInHead(token);
+            m_tree.openElements()->removeHTMLHeadElement(m_tree.head());
+            return;
+        }
+        if (token.name() == headTag) {
+            parseError(token);
+            return;
+        }
+        defaultForAfterHead();
+        // Fall through
+    case InBodyMode:
+        ASSERT(insertionMode() == InBodyMode);
+        processStartTagForInBody(token);
+        break;
+    case InTableMode:
+        ASSERT(insertionMode() == InTableMode);
+        processStartTagForInTable(token);
+        break;
+    case InCaptionMode:
+        ASSERT(insertionMode() == InCaptionMode);
+        if (isCaptionColOrColgroupTag(token.name())
+            || isTableBodyContextTag(token.name())
+            || isTableCellContextTag(token.name())
+            || token.name() == trTag) {
+            parseError(token);
+            if (!processCaptionEndTagForInCaption()) {
+                ASSERT(isParsingFragment());
+                return;
+            }
+            reprocessStartTag(token);
+            return;
+        }
+        processStartTagForInBody(token);
+        break;
+    case InColumnGroupMode:
+        ASSERT(insertionMode() == InColumnGroupMode);
+        if (token.name() == htmlTag) {
+            m_tree.insertHTMLHtmlStartTagInBody(token);
+            return;
+        }
+        if (token.name() == colTag) {
+            m_tree.insertSelfClosingHTMLElement(token);
+            return;
+        }
+        if (!processColgroupEndTagForInColumnGroup()) {
+            ASSERT(isParsingFragment());
+            return;
+        }
+        reprocessStartTag(token);
+        break;
+    case InTableBodyMode:
+        ASSERT(insertionMode() == InTableBodyMode);
+        if (token.name() == trTag) {
+            m_tree.openElements()->popUntilTableBodyScopeMarker(); // How is there ever anything to pop?
+            m_tree.insertHTMLElement(token);
+            setInsertionMode(InRowMode);
+            return;
+        }
+        if (isTableCellContextTag(token.name())) {
+            parseError(token);
+            processFakeStartTag(trTag);
+            ASSERT(insertionMode() == InRowMode);
+            reprocessStartTag(token);
+            return;
+        }
+        if (isCaptionColOrColgroupTag(token.name()) || isTableBodyContextTag(token.name())) {
+            // FIXME: This is slow.
+            if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
+                ASSERT(isParsingFragment());
+                parseError(token);
+                return;
+            }
+            m_tree.openElements()->popUntilTableBodyScopeMarker();
+            ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
+            processFakeEndTag(m_tree.currentElement()->tagQName());
+            reprocessStartTag(token);
+            return;
+        }
+        processStartTagForInTable(token);
+        break;
+    case InRowMode:
+        ASSERT(insertionMode() == InRowMode);
+        if (isTableCellContextTag(token.name())) {
+            m_tree.openElements()->popUntilTableRowScopeMarker();
+            m_tree.insertHTMLElement(token);
+            setInsertionMode(InCellMode);
+            m_tree.activeFormattingElements()->appendMarker();
+            return;
+        }
+        if (token.name() == trTag
+            || isCaptionColOrColgroupTag(token.name())
+            || isTableBodyContextTag(token.name())) {
+            if (!processTrEndTagForInRow()) {
+                ASSERT(isParsingFragment());
+                return;
+            }
+            ASSERT(insertionMode() == InTableBodyMode);
+            reprocessStartTag(token);
+            return;
+        }
+        processStartTagForInTable(token);
+        break;
+    case InCellMode:
+        ASSERT(insertionMode() == InCellMode);
+        if (isCaptionColOrColgroupTag(token.name())
+            || isTableCellContextTag(token.name())
+            || token.name() == trTag
+            || isTableBodyContextTag(token.name())) {
+            // FIXME: This could be more efficient.
+            if (!m_tree.openElements()->inTableScope(tdTag) && !m_tree.openElements()->inTableScope(thTag)) {
+                ASSERT(isParsingFragment());
+                parseError(token);
+                return;
+            }
+            closeTheCell();
+            reprocessStartTag(token);
+            return;
+        }
+        processStartTagForInBody(token);
+        break;
+    case AfterBodyMode:
+    case AfterAfterBodyMode:
+        ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
+        if (token.name() == htmlTag) {
+            m_tree.insertHTMLHtmlStartTagInBody(token);
+            return;
+        }
+        setInsertionMode(InBodyMode);
+        reprocessStartTag(token);
+        break;
+    case InHeadNoscriptMode:
+        ASSERT(insertionMode() == InHeadNoscriptMode);
+        if (token.name() == htmlTag) {
+            m_tree.insertHTMLHtmlStartTagInBody(token);
+            return;
+        }
+        if (token.name() == basefontTag
+            || token.name() == bgsoundTag
+            || token.name() == linkTag
+            || token.name() == metaTag
+            || token.name() == noframesTag
+            || token.name() == styleTag) {
+            bool didProcess = processStartTagForInHead(token);
+            ASSERT_UNUSED(didProcess, didProcess);
+            return;
+        }
+        if (token.name() == htmlTag || token.name() == noscriptTag) {
+            parseError(token);
+            return;
+        }
+        defaultForInHeadNoscript();
+        processToken(token);
+        break;
+    case InFramesetMode:
+        ASSERT(insertionMode() == InFramesetMode);
+        if (token.name() == htmlTag) {
+            m_tree.insertHTMLHtmlStartTagInBody(token);
+            return;
+        }
+        if (token.name() == framesetTag) {
+            m_tree.insertHTMLElement(token);
+            return;
+        }
+        if (token.name() == frameTag) {
+            m_tree.insertSelfClosingHTMLElement(token);
+            return;
+        }
+        if (token.name() == noframesTag) {
+            processStartTagForInHead(token);
+            return;
+        }
+        parseError(token);
+        break;
+    case AfterFramesetMode:
+    case AfterAfterFramesetMode:
+        ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
+        if (token.name() == htmlTag) {
+            m_tree.insertHTMLHtmlStartTagInBody(token);
+            return;
+        }
+        if (token.name() == noframesTag) {
+            processStartTagForInHead(token);
+            return;
+        }
+        parseError(token);
+        break;
+    case InSelectInTableMode:
+        ASSERT(insertionMode() == InSelectInTableMode);
+        if (token.name() == captionTag
+            || token.name() == tableTag
+            || isTableBodyContextTag(token.name())
+            || token.name() == trTag
+            || isTableCellContextTag(token.name())) {
+            parseError(token);
+            AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
+            processEndTag(endSelect);
+            reprocessStartTag(token);
+            return;
+        }
+        // Fall through
+    case InSelectMode:
+        ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
+        if (token.name() == htmlTag) {
+            m_tree.insertHTMLHtmlStartTagInBody(token);
+            return;
+        }
+        if (token.name() == optionTag) {
+            if (m_tree.currentElement()->hasTagName(optionTag)) {
+                AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
+                processEndTag(endOption);
+            }
+            m_tree.insertHTMLElement(token);
+            return;
+        }
+        if (token.name() == optgroupTag) {
+            if (m_tree.currentElement()->hasTagName(optionTag)) {
+                AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
+                processEndTag(endOption);
+            }
+            if (m_tree.currentElement()->hasTagName(optgroupTag)) {
+                AtomicHTMLToken endOptgroup(HTMLToken::EndTag, optgroupTag.localName());
+                processEndTag(endOptgroup);
+            }
+            m_tree.insertHTMLElement(token);
+            return;
+        }
+        if (token.name() == selectTag) {
+            parseError(token);
+            AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
+            processEndTag(endSelect);
+            return;
+        }
+        if (token.name() == inputTag
+            || token.name() == keygenTag
+            || token.name() == textareaTag) {
+            parseError(token);
+            if (!m_tree.openElements()->inSelectScope(selectTag)) {
+                ASSERT(isParsingFragment());
+                return;
+            }
+            AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
+            processEndTag(endSelect);
+            reprocessStartTag(token);
+            return;
+        }
+        if (token.name() == scriptTag) {
+            bool didProcess = processStartTagForInHead(token);
+            ASSERT_UNUSED(didProcess, didProcess);
+            return;
+        }
+        break;
+    case InTableTextMode:
+        defaultForInTableText();
+        processStartTag(token);
+        break;
+    case InForeignContentMode: {
+        if (shouldProcessForeignContentUsingInBodyInsertionMode(token, m_tree.currentElement())) {
+            processForeignContentUsingInBodyModeAndResetMode(token);
+            return;
+        }
+        if (token.name() == bTag
+            || token.name() == bigTag
+            || token.name() == blockquoteTag
+            || token.name() == bodyTag
+            || token.name() == brTag
+            || token.name() == centerTag
+            || token.name() == codeTag
+            || token.name() == ddTag
+            || token.name() == divTag
+            || token.name() == dlTag
+            || token.name() == dtTag
+            || token.name() == emTag
+            || token.name() == embedTag
+            || isNumberedHeaderTag(token.name())
+            || token.name() == headTag
+            || token.name() == hrTag
+            || token.name() == iTag
+            || token.name() == imgTag
+            || token.name() == liTag
+            || token.name() == listingTag
+            || token.name() == menuTag
+            || token.name() == metaTag
+            || token.name() == nobrTag
+            || token.name() == olTag
+            || token.name() == pTag
+            || token.name() == preTag
+            || token.name() == rubyTag
+            || token.name() == sTag
+            || token.name() == smallTag
+            || token.name() == spanTag
+            || token.name() == strongTag
+            || token.name() == strikeTag
+            || token.name() == subTag
+            || token.name() == supTag
+            || token.name() == tableTag
+            || token.name() == ttTag
+            || token.name() == uTag
+            || token.name() == ulTag
+            || token.name() == varTag
+            || (token.name() == fontTag && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)))) {
+            parseError(token);
+            m_tree.openElements()->popUntilForeignContentScopeMarker();
+            resetInsertionModeAppropriately();
+            reprocessStartTag(token);
+            return;
+        }
+        const AtomicString& currentNamespace = m_tree.currentElement()->namespaceURI();
+        if (currentNamespace == MathMLNames::mathmlNamespaceURI)
+            adjustMathMLAttributes(token);
+        if (currentNamespace == SVGNames::svgNamespaceURI) {
+            adjustSVGTagNameCase(token);
+            adjustSVGAttributes(token);
+        }
+        adjustForeignAttributes(token);
+        m_tree.insertForeignElement(token, currentNamespace);
+        break;
+    }
+    case TextMode:
+        ASSERT_NOT_REACHED();
+        break;
+    }
+}
+
+bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken& token)
+{
+    ASSERT(token.type() == HTMLToken::EndTag);
+    ASSERT(token.name() == bodyTag);
+    if (!m_tree.openElements()->inScope(bodyTag.localName())) {
+        parseError(token);
+        return false;
+    }
+    notImplemented(); // Emit a more specific parse error based on stack contents.
+    setInsertionMode(AfterBodyMode);
+    return true;
+}
+
+void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken& token)
+{
+    ASSERT(token.type() == HTMLToken::EndTag);
+    HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
+    while (1) {
+        Element* node = record->element();
+        if (node->hasLocalName(token.name())) {
+            m_tree.generateImpliedEndTags();
+            if (!m_tree.currentElement()->hasLocalName(token.name())) {
+                parseError(token);
+                // FIXME: This is either a bug in the spec, or a bug in our
+                // implementation.  Filed a bug with HTML5:
+                // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10080
+                // We might have already popped the node for the token in
+                // generateImpliedEndTags, just abort.
+                if (!m_tree.openElements()->contains(node))
+                    return;
+            }
+            m_tree.openElements()->popUntilPopped(node);
+            return;
+        }
+        if (isSpecialNode(node)) {
+            parseError(token);
+            return;
+        }
+        record = record->next();
+    }
+}
+
+// FIXME: This probably belongs on HTMLElementStack.
+HTMLElementStack::ElementRecord* HTMLTreeBuilder::furthestBlockForFormattingElement(Element* formattingElement)
+{
+    HTMLElementStack::ElementRecord* furthestBlock = 0;
+    HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
+    for (; record; record = record->next()) {
+        if (record->element() == formattingElement)
+            return furthestBlock;
+        if (isSpecialNode(record->element()))
+            furthestBlock = record;
+    }
+    ASSERT_NOT_REACHED();
+    return 0;
+}
+
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
+void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
+{
+    // The adoption agency algorithm is N^2.  We limit the number of iterations
+    // to stop from hanging the whole browser.  This limit is copied from the
+    // legacy tree builder and might need to be tweaked in the future.
+    static const int adoptionAgencyIterationLimit = 10;
+
+    for (int i = 0; i < adoptionAgencyIterationLimit; ++i) {
+        // 1.
+        Element* formattingElement = m_tree.activeFormattingElements()->closestElementInScopeWithName(token.name());
+        if (!formattingElement || ((m_tree.openElements()->contains(formattingElement)) && !m_tree.openElements()->inScope(formattingElement))) {
+            parseError(token);
+            notImplemented(); // Check the stack of open elements for a more specific parse error.
+            return;
+        }
+        HTMLElementStack::ElementRecord* formattingElementRecord = m_tree.openElements()->find(formattingElement);
+        if (!formattingElementRecord) {
+            parseError(token);
+            m_tree.activeFormattingElements()->remove(formattingElement);
+            return;
+        }
+        if (formattingElement != m_tree.currentElement())
+            parseError(token);
+        // 2.
+        HTMLElementStack::ElementRecord* furthestBlock = furthestBlockForFormattingElement(formattingElement);
+        // 3.
+        if (!furthestBlock) {
+            m_tree.openElements()->popUntilPopped(formattingElement);
+            m_tree.activeFormattingElements()->remove(formattingElement);
+            return;
+        }
+        // 4.
+        ASSERT(furthestBlock->isAbove(formattingElementRecord));
+        Element* commonAncestor = formattingElementRecord->next()->element();
+        // 5.
+        HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement);
+        // 6.
+        HTMLElementStack::ElementRecord* node = furthestBlock;
+        HTMLElementStack::ElementRecord* nextNode = node->next();
+        HTMLElementStack::ElementRecord* lastNode = furthestBlock;
+        for (int i = 0; i < adoptionAgencyIterationLimit; ++i) {
+            // 6.1
+            node = nextNode;
+            ASSERT(node);
+            nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 6.2.
+            // 6.2
+            if (!m_tree.activeFormattingElements()->contains(node->element())) {
+                m_tree.openElements()->remove(node->element());
+                node = 0;
+                continue;
+            }
+            // 6.3
+            if (node == formattingElementRecord)
+                break;
+            // 6.5
+            RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(node);
+            HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements()->find(node->element());
+            nodeEntry->replaceElement(newElement.get());
+            node->replaceElement(newElement.release());
+            // 6.4 -- Intentionally out of order to handle the case where node
+            // was replaced in 6.5.
+            // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10096
+            if (lastNode == furthestBlock)
+                bookmark.moveToAfter(nodeEntry);
+            // 6.6
+            if (Element* parent = lastNode->element()->parentElement())
+                parent->parserRemoveChild(lastNode->element());
+            node->element()->parserAddChild(lastNode->element());
+            if (lastNode->element()->parentElement()->attached() && !lastNode->element()->attached())
+                lastNode->element()->lazyAttach();
+            // 6.7
+            lastNode = node;
+        }
+        // 7
+        const AtomicString& commonAncestorTag = commonAncestor->localName();
+        if (Element* parent = lastNode->element()->parentElement())
+            parent->parserRemoveChild(lastNode->element());
+        // FIXME: If this moves to HTMLConstructionSite, this check should use
+        // causesFosterParenting(tagName) instead.
+        if (commonAncestorTag == tableTag
+            || commonAncestorTag == trTag
+            || isTableBodyContextTag(commonAncestorTag))
+            m_tree.fosterParent(lastNode->element());
+        else {
+            commonAncestor->parserAddChild(lastNode->element());
+            if (lastNode->element()->parentElement()->attached() && !lastNode->element()->attached())
+                lastNode->element()->lazyAttach();
+        }
+        // 8
+        RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(formattingElementRecord);
+        // 9
+        newElement->takeAllChildrenFrom(furthestBlock->element());
+        // 10
+        Element* furthestBlockElement = furthestBlock->element();
+        // FIXME: All this creation / parserAddChild / attach business should
+        //        be in HTMLConstructionSite.  My guess is that steps 8--12
+        //        should all be in some HTMLConstructionSite function.
+        furthestBlockElement->parserAddChild(newElement);
+        if (furthestBlockElement->attached() && !newElement->attached()) {
+            // Notice that newElement might already be attached if, for example, one of the reparented
+            // children is a style element, which attaches itself automatically.
+            newElement->attach();
+        }
+        // 11
+        m_tree.activeFormattingElements()->swapTo(formattingElement, newElement.get(), bookmark);
+        // 12
+        m_tree.openElements()->remove(formattingElement);
+        m_tree.openElements()->insertAbove(newElement, furthestBlock);
+    }
+}
+
+void HTMLTreeBuilder::resetInsertionModeAppropriately()
+{
+    // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
+    bool last = false;
+    HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
+    while (1) {
+        Element* node = nodeRecord->element();
+        if (node == m_tree.openElements()->bottom()) {
+            ASSERT(isParsingFragment());
+            last = true;
+            node = m_fragmentContext.contextElement();
+        }
+        if (node->hasTagName(selectTag)) {
+            ASSERT(isParsingFragment());
+            return setInsertionMode(InSelectMode);
+        }
+        if (node->hasTagName(tdTag) || node->hasTagName(thTag))
+            return setInsertionMode(InCellMode);
+        if (node->hasTagName(trTag))
+            return setInsertionMode(InRowMode);
+        if (node->hasTagName(tbodyTag) || node->hasTagName(theadTag) || node->hasTagName(tfootTag))
+            return setInsertionMode(InTableBodyMode);
+        if (node->hasTagName(captionTag))
+            return setInsertionMode(InCaptionMode);
+        if (node->hasTagName(colgroupTag)) {
+            ASSERT(isParsingFragment());
+            return setInsertionMode(InColumnGroupMode);
+        }
+        if (node->hasTagName(tableTag))
+            return setInsertionMode(InTableMode);
+        if (node->hasTagName(headTag)) {
+            ASSERT(isParsingFragment());
+            return setInsertionMode(InBodyMode);
+        }
+        if (node->hasTagName(bodyTag))
+            return setInsertionMode(InBodyMode);
+        if (node->hasTagName(framesetTag)) {
+            ASSERT(isParsingFragment());
+            return setInsertionMode(InFramesetMode);
+        }
+        if (node->hasTagName(htmlTag)) {
+            ASSERT(isParsingFragment());
+            return setInsertionMode(BeforeHeadMode);
+        }
+        if (node->namespaceURI() == SVGNames::svgNamespaceURI
+            || node->namespaceURI() == MathMLNames::mathmlNamespaceURI)
+            return setInsertionMode(InForeignContentMode);
+        if (last) {
+            ASSERT(isParsingFragment());
+            return setInsertionMode(InBodyMode);
+        }
+        nodeRecord = nodeRecord->next();
+    }
+}
+
+void HTMLTreeBuilder::processEndTagForInTableBody(AtomicHTMLToken& token)
+{
+    ASSERT(token.type() == HTMLToken::EndTag);
+    if (isTableBodyContextTag(token.name())) {
+        if (!m_tree.openElements()->inTableScope(token.name())) {
+            parseError(token);
+            return;
+        }
+        m_tree.openElements()->popUntilTableBodyScopeMarker();
+        m_tree.openElements()->pop();
+        setInsertionMode(InTableMode);
+        return;
+    }
+    if (token.name() == tableTag) {
+        // FIXME: This is slow.
+        if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
+            ASSERT(isParsingFragment());
+            parseError(token);
+            return;
+        }
+        m_tree.openElements()->popUntilTableBodyScopeMarker();
+        ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
+        processFakeEndTag(m_tree.currentElement()->tagQName());
+        reprocessEndTag(token);
+        return;
+    }
+    if (token.name() == bodyTag
+        || isCaptionColOrColgroupTag(token.name())
+        || token.name() == htmlTag
+        || isTableCellContextTag(token.name())
+        || token.name() == trTag) {
+        parseError(token);
+        return;
+    }
+    processEndTagForInTable(token);
+}
+
+void HTMLTreeBuilder::processEndTagForInRow(AtomicHTMLToken& token)
+{
+    ASSERT(token.type() == HTMLToken::EndTag);
+    if (token.name() == trTag) {
+        processTrEndTagForInRow();
+        return;
+    }
+    if (token.name() == tableTag) {
+        if (!processTrEndTagForInRow()) {
+            ASSERT(isParsingFragment());
+            return;
+        }
+        ASSERT(insertionMode() == InTableBodyMode);
+        reprocessEndTag(token);
+        return;
+    }
+    if (isTableBodyContextTag(token.name())) {
+        if (!m_tree.openElements()->inTableScope(token.name())) {
+            parseError(token);
+            return;
+        }
+        processFakeEndTag(trTag);
+        ASSERT(insertionMode() == InTableBodyMode);
+        reprocessEndTag(token);
+        return;
+    }
+    if (token.name() == bodyTag
+        || isCaptionColOrColgroupTag(token.name())
+        || token.name() == htmlTag
+        || isTableCellContextTag(token.name())) {
+        parseError(token);
+        return;
+    }
+    processEndTagForInTable(token);
+}
+
+void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken& token)
+{
+    ASSERT(token.type() == HTMLToken::EndTag);
+    if (isTableCellContextTag(token.name())) {
+        if (!m_tree.openElements()->inTableScope(token.name())) {
+            parseError(token);
+            return;
+        }
+        m_tree.generateImpliedEndTags();
+        if (!m_tree.currentElement()->hasLocalName(token.name()))
+            parseError(token);
+        m_tree.openElements()->popUntilPopped(token.name());
+        m_tree.activeFormattingElements()->clearToLastMarker();
+        setInsertionMode(InRowMode);
+        return;
+    }
+    if (token.name() == bodyTag
+        || isCaptionColOrColgroupTag(token.name())
+        || token.name() == htmlTag) {
+        parseError(token);
+        return;
+    }
+    if (token.name() == tableTag
+        || token.name() == trTag
+        || isTableBodyContextTag(token.name())) {
+        if (!m_tree.openElements()->inTableScope(token.name())) {
+            ASSERT(isTableBodyContextTag(token.name()) || isParsingFragment());
+            parseError(token);
+            return;
+        }
+        closeTheCell();
+        reprocessEndTag(token);
+        return;
+    }
+    processEndTagForInBody(token);
+}
+
+void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token)
+{
+    ASSERT(token.type() == HTMLToken::EndTag);
+    if (token.name() == bodyTag) {
+        processBodyEndTagForInBody(token);
+        return;
+    }
+    if (token.name() == htmlTag) {
+        AtomicHTMLToken endBody(HTMLToken::EndTag, bodyTag.localName());
+        if (processBodyEndTagForInBody(endBody))
+            reprocessEndTag(token);
+        return;
+    }
+    if (token.name() == addressTag
+        || token.name() == articleTag
+        || token.name() == asideTag
+        || token.name() == blockquoteTag
+        || token.name() == buttonTag
+        || token.name() == centerTag
+        || token.name() == detailsTag
+        || token.name() == dirTag
+        || token.name() == divTag
+        || token.name() == dlTag
+        || token.name() == fieldsetTag
+        || token.name() == figcaptionTag
+        || token.name() == figureTag
+        || token.name() == footerTag
+        || token.name() == headerTag
+        || token.name() == hgroupTag
+        || token.name() == listingTag
+        || token.name() == menuTag
+        || token.name() == navTag
+        || token.name() == olTag
+        || token.name() == preTag
+        || token.name() == sectionTag
+        || token.name() == summaryTag
+        || token.name() == ulTag) {
+        if (!m_tree.openElements()->inScope(token.name())) {
+            parseError(token);
+            return;
+        }
+        m_tree.generateImpliedEndTags();
+        if (!m_tree.currentElement()->hasLocalName(token.name()))
+            parseError(token);
+        m_tree.openElements()->popUntilPopped(token.name());
+        return;
+    }
+    if (token.name() == formTag) {
+        RefPtr<Element> node = m_tree.takeForm();
+        if (!node || !m_tree.openElements()->inScope(node.get())) {
+            parseError(token);
+            return;
+        }
+        m_tree.generateImpliedEndTags();
+        if (m_tree.currentElement() != node.get())
+            parseError(token);
+        m_tree.openElements()->remove(node.get());
+    }
+    if (token.name() == pTag) {
+        if (!m_tree.openElements()->inButtonScope(token.name())) {
+            parseError(token);
+            processFakeStartTag(pTag);
+            ASSERT(m_tree.openElements()->inScope(token.name()));
+            reprocessEndTag(token);
+            return;
+        }
+        m_tree.generateImpliedEndTagsWithExclusion(token.name());
+        if (!m_tree.currentElement()->hasLocalName(token.name()))
+            parseError(token);
+        m_tree.openElements()->popUntilPopped(token.name());
+        return;
+    }
+    if (token.name() == liTag) {
+        if (!m_tree.openElements()->inListItemScope(token.name())) {
+            parseError(token);
+            return;
+        }
+        m_tree.generateImpliedEndTagsWithExclusion(token.name());
+        if (!m_tree.currentElement()->hasLocalName(token.name()))
+            parseError(token);
+        m_tree.openElements()->popUntilPopped(token.name());
+        return;
+    }
+    if (token.name() == ddTag
+        || token.name() == dtTag) {
+        if (!m_tree.openElements()->inScope(token.name())) {
+            parseError(token);
+            return;
+        }
+        m_tree.generateImpliedEndTagsWithExclusion(token.name());
+        if (!m_tree.currentElement()->hasLocalName(token.name()))
+            parseError(token);
+        m_tree.openElements()->popUntilPopped(token.name());
+        return;
+    }
+    if (isNumberedHeaderTag(token.name())) {
+        if (!m_tree.openElements()->hasNumberedHeaderElementInScope()) {
+            parseError(token);
+            return;
+        }
+        m_tree.generateImpliedEndTags();
+        if (!m_tree.currentElement()->hasLocalName(token.name()))
+            parseError(token);
+        m_tree.openElements()->popUntilNumberedHeaderElementPopped();
+        return;
+    }
+    if (isFormattingTag(token.name())) {
+        callTheAdoptionAgency(token);
+        return;
+    }
+    if (token.name() == appletTag
+        || token.name() == marqueeTag
+        || token.name() == objectTag) {
+        if (!m_tree.openElements()->inScope(token.name())) {
+            parseError(token);
+            return;
+        }
+        m_tree.generateImpliedEndTags();
+        if (!m_tree.currentElement()->hasLocalName(token.name()))
+            parseError(token);
+        m_tree.openElements()->popUntilPopped(token.name());
+        m_tree.activeFormattingElements()->clearToLastMarker();
+        return;
+    }
+    if (token.name() == brTag) {
+        parseError(token);
+        processFakeStartTag(brTag);
+        return;
+    }
+    processAnyOtherEndTagForInBody(token);
+}
+
+bool HTMLTreeBuilder::processCaptionEndTagForInCaption()
+{
+    if (!m_tree.openElements()->inTableScope(captionTag.localName())) {
+        ASSERT(isParsingFragment());
+        // FIXME: parse error
+        return false;
+    }
+    m_tree.generateImpliedEndTags();
+    // FIXME: parse error if (!m_tree.currentElement()->hasTagName(captionTag))
+    m_tree.openElements()->popUntilPopped(captionTag.localName());
+    m_tree.activeFormattingElements()->clearToLastMarker();
+    setInsertionMode(InTableMode);
+    return true;
+}
+
+bool HTMLTreeBuilder::processTrEndTagForInRow()
+{
+    if (!m_tree.openElements()->inTableScope(trTag.localName())) {
+        ASSERT(isParsingFragment());
+        // FIXME: parse error
+        return false;
+    }
+    m_tree.openElements()->popUntilTableRowScopeMarker();
+    ASSERT(m_tree.currentElement()->hasTagName(trTag));
+    m_tree.openElements()->pop();
+    setInsertionMode(InTableBodyMode);
+    return true;
+}
+
+bool HTMLTreeBuilder::processTableEndTagForInTable()
+{
+    if (!m_tree.openElements()->inTableScope(tableTag)) {
+        ASSERT(isParsingFragment());
+        // FIXME: parse error.
+        return false;
+    }
+    m_tree.openElements()->popUntilPopped(tableTag.localName());
+    resetInsertionModeAppropriately();
+    return true;
+}
+
+void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken& token)
+{
+    ASSERT(token.type() == HTMLToken::EndTag);
+    if (token.name() == tableTag) {
+        processTableEndTagForInTable();
+        return;
+    }
+    if (token.name() == bodyTag
+        || isCaptionColOrColgroupTag(token.name())
+        || token.name() == htmlTag
+        || isTableBodyContextTag(token.name())
+        || isTableCellContextTag(token.name())
+        || token.name() == trTag) {
+        parseError(token);
+        return;
+    }
+    // Is this redirection necessary here?
+    HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
+    processEndTagForInBody(token);
+}
+
+void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token)
+{
+    ASSERT(token.type() == HTMLToken::EndTag);
+    switch (insertionMode()) {
+    case InitialMode:
+        ASSERT(insertionMode() == InitialMode);
+        defaultForInitial();
+        // Fall through.
+    case BeforeHTMLMode:
+        ASSERT(insertionMode() == BeforeHTMLMode);
+        if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
+            parseError(token);
+            return;
+        }
+        defaultForBeforeHTML();
+        // Fall through.
+    case BeforeHeadMode:
+        ASSERT(insertionMode() == BeforeHeadMode);
+        if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
+            parseError(token);
+            return;
+        }
+        defaultForBeforeHead();
+        // Fall through.
+    case InHeadMode:
+        ASSERT(insertionMode() == InHeadMode);
+        if (token.name() == headTag) {
+            m_tree.openElements()->popHTMLHeadElement();
+            setInsertionMode(AfterHeadMode);
+            return;
+        }
+        if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
+            parseError(token);
+            return;
+        }
+        defaultForInHead();
+        // Fall through.
+    case AfterHeadMode:
+        ASSERT(insertionMode() == AfterHeadMode);
+        if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
+            parseError(token);
+            return;
+        }
+        defaultForAfterHead();
+        // Fall through
+    case InBodyMode:
+        ASSERT(insertionMode() == InBodyMode);
+        processEndTagForInBody(token);
+        break;
+    case InTableMode:
+        ASSERT(insertionMode() == InTableMode);
+        processEndTagForInTable(token);
+        break;
+    case InCaptionMode:
+        ASSERT(insertionMode() == InCaptionMode);
+        if (token.name() == captionTag) {
+            processCaptionEndTagForInCaption();
+            return;
+        }
+        if (token.name() == tableTag) {
+            parseError(token);
+            if (!processCaptionEndTagForInCaption()) {
+                ASSERT(isParsingFragment());
+                return;
+            }
+            reprocessEndTag(token);
+            return;
+        }
+        if (token.name() == bodyTag
+            || token.name() == colTag
+            || token.name() == colgroupTag
+            || token.name() == htmlTag
+            || isTableBodyContextTag(token.name())
+            || isTableCellContextTag(token.name())
+            || token.name() == trTag) {
+            parseError(token);
+            return;
+        }
+        processEndTagForInBody(token);
+        break;
+    case InColumnGroupMode:
+        ASSERT(insertionMode() == InColumnGroupMode);
+        if (token.name() == colgroupTag) {
+            processColgroupEndTagForInColumnGroup();
+            return;
+        }
+        if (token.name() == colTag) {
+            parseError(token);
+            return;
+        }
+        if (!processColgroupEndTagForInColumnGroup()) {
+            ASSERT(isParsingFragment());
+            return;
+        }
+        reprocessEndTag(token);
+        break;
+    case InRowMode:
+        ASSERT(insertionMode() == InRowMode);
+        processEndTagForInRow(token);
+        break;
+    case InCellMode:
+        ASSERT(insertionMode() == InCellMode);
+        processEndTagForInCell(token);
+        break;
+    case InTableBodyMode:
+        ASSERT(insertionMode() == InTableBodyMode);
+        processEndTagForInTableBody(token);
+        break;
+    case AfterBodyMode:
+        ASSERT(insertionMode() == AfterBodyMode);
+        if (token.name() == htmlTag) {
+            if (isParsingFragment()) {
+                parseError(token);
+                return;
+            }
+            setInsertionMode(AfterAfterBodyMode);
+            return;
+        }
+        prepareToReprocessToken();
+        // Fall through.
+    case AfterAfterBodyMode:
+        ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
+        parseError(token);
+        setInsertionMode(InBodyMode);
+        reprocessEndTag(token);
+        break;
+    case InHeadNoscriptMode:
+        ASSERT(insertionMode() == InHeadNoscriptMode);
+        if (token.name() == noscriptTag) {
+            ASSERT(m_tree.currentElement()->hasTagName(noscriptTag));
+            m_tree.openElements()->pop();
+            ASSERT(m_tree.currentElement()->hasTagName(headTag));
+            setInsertionMode(InHeadMode);
+            return;
+        }
+        if (token.name() != brTag) {
+            parseError(token);
+            return;
+        }
+        defaultForInHeadNoscript();
+        processToken(token);
+        break;
+    case TextMode:
+        if (token.name() == scriptTag) {
+            // Pause ourselves so that parsing stops until the script can be processed by the caller.
+            m_isPaused = true;
+            ASSERT(m_tree.currentElement()->hasTagName(scriptTag));
+            m_scriptToProcess = m_tree.currentElement();
+            m_scriptToProcessStartPosition = WTF::toOneBasedTextPosition(m_lastScriptElementStartPosition);
+            m_tree.openElements()->pop();
+            if (isParsingFragment() && m_fragmentContext.scriptingPermission() == FragmentScriptingNotAllowed)
+                m_scriptToProcess->removeAllChildren();
+            setInsertionMode(m_originalInsertionMode);
+
+            // This token will not have been created by the tokenizer if a
+            // self-closing script tag was encountered and pre-HTML5 parser
+            // quirks are enabled. We must set the tokenizer's state to
+            // DataState explicitly if the tokenizer didn't have a chance to.
+            ASSERT(m_parser->tokenizer()->state() == HTMLTokenizer::DataState || m_usePreHTML5ParserQuirks);
+            m_parser->tokenizer()->setState(HTMLTokenizer::DataState);
+            return;
+        }
+        m_tree.openElements()->pop();
+        setInsertionMode(m_originalInsertionMode);
+        break;
+    case InFramesetMode:
+        ASSERT(insertionMode() == InFramesetMode);
+        if (token.name() == framesetTag) {
+            if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) {
+                parseError(token);
+                return;
+            }
+            m_tree.openElements()->pop();
+            if (!isParsingFragment() && !m_tree.currentElement()->hasTagName(framesetTag))
+                setInsertionMode(AfterFramesetMode);
+            return;
+        }
+        break;
+    case AfterFramesetMode:
+        ASSERT(insertionMode() == AfterFramesetMode);
+        if (token.name() == htmlTag) {
+            setInsertionMode(AfterAfterFramesetMode);
+            return;
+        }
+        // Fall through.
+    case AfterAfterFramesetMode:
+        ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
+        parseError(token);
+        break;
+    case InSelectInTableMode:
+        ASSERT(insertionMode() == InSelectInTableMode);
+        if (token.name() == captionTag
+            || token.name() == tableTag
+            || isTableBodyContextTag(token.name())
+            || token.name() == trTag
+            || isTableCellContextTag(token.name())) {
+            parseError(token);
+            if (m_tree.openElements()->inTableScope(token.name())) {
+                AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
+                processEndTag(endSelect);
+                reprocessEndTag(token);
+            }
+            return;
+        }
+        // Fall through.
+    case InSelectMode:
+        ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
+        if (token.name() == optgroupTag) {
+            if (m_tree.currentElement()->hasTagName(optionTag) && m_tree.oneBelowTop()->hasTagName(optgroupTag))
+                processFakeEndTag(optionTag);
+            if (m_tree.currentElement()->hasTagName(optgroupTag)) {
+                m_tree.openElements()->pop();
+                return;
+            }
+            parseError(token);
+            return;
+        }
+        if (token.name() == optionTag) {
+            if (m_tree.currentElement()->hasTagName(optionTag)) {
+                m_tree.openElements()->pop();
+                return;
+            }
+            parseError(token);
+            return;
+        }
+        if (token.name() == selectTag) {
+            if (!m_tree.openElements()->inSelectScope(token.name())) {
+                ASSERT(isParsingFragment());
+                parseError(token);
+                return;
+            }
+            m_tree.openElements()->popUntilPopped(selectTag.localName());
+            resetInsertionModeAppropriately();
+            return;
+        }
+        break;
+    case InTableTextMode:
+        defaultForInTableText();
+        processEndTag(token);
+        break;
+    case InForeignContentMode:
+        if (token.name() == SVGNames::scriptTag && m_tree.currentElement()->hasTagName(SVGNames::scriptTag)) {
+            notImplemented();
+            return;
+        }
+        if (m_tree.currentElement()->namespaceURI() != xhtmlNamespaceURI) {
+            // FIXME: This code just wants an Element* iterator, instead of an ElementRecord*
+            HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
+            if (!nodeRecord->element()->hasLocalName(token.name()))
+                parseError(token);
+            while (1) {
+                if (nodeRecord->element()->hasLocalName(token.name())) {
+                    m_tree.openElements()->popUntilPopped(nodeRecord->element());
+                    resetForeignInsertionMode();
+                    return;
+                }
+                nodeRecord = nodeRecord->next();
+                if (nodeRecord->element()->namespaceURI() == xhtmlNamespaceURI)
+                    break;
+            }
+        }
+        // Any other end tag (also the last two steps of "An end tag, if the current node is not an element in the HTML namespace."
+        processForeignContentUsingInBodyModeAndResetMode(token);
+        break;
+    }
+}
+
+void HTMLTreeBuilder::prepareToReprocessToken()
+{
+    if (m_hasPendingForeignInsertionModeSteps) {
+        resetForeignInsertionMode();
+        m_hasPendingForeignInsertionModeSteps = false;
+    }
+}
+
+void HTMLTreeBuilder::reprocessStartTag(AtomicHTMLToken& token)
+{
+    prepareToReprocessToken();
+    processStartTag(token);
+}
+
+void HTMLTreeBuilder::reprocessEndTag(AtomicHTMLToken& token)
+{
+    prepareToReprocessToken();
+    processEndTag(token);
+}
+
+class HTMLTreeBuilder::FakeInsertionMode : public Noncopyable {
+public:
+    FakeInsertionMode(HTMLTreeBuilder* treeBuilder, InsertionMode mode)
+        : m_treeBuilder(treeBuilder)
+        , m_originalMode(treeBuilder->insertionMode())
+    {
+        m_treeBuilder->setFakeInsertionMode(mode);
+    }
+
+    ~FakeInsertionMode()
+    {
+        if (m_treeBuilder->isFakeInsertionMode())
+            m_treeBuilder->setInsertionMode(m_originalMode);
+    }
+
+private:
+    HTMLTreeBuilder* m_treeBuilder;
+    InsertionMode m_originalMode;
+};
+
+void HTMLTreeBuilder::processForeignContentUsingInBodyModeAndResetMode(AtomicHTMLToken& token)
+{
+    m_hasPendingForeignInsertionModeSteps = true;
+    {
+        FakeInsertionMode fakeMode(this, InBodyMode);
+        processToken(token);
+    }
+    if (m_hasPendingForeignInsertionModeSteps)
+        resetForeignInsertionMode();
+}
+
+void HTMLTreeBuilder::resetForeignInsertionMode()
+{
+    if (insertionMode() == InForeignContentMode)
+        resetInsertionModeAppropriately();
+}
+
+void HTMLTreeBuilder::processComment(AtomicHTMLToken& token)
+{
+    ASSERT(token.type() == HTMLToken::Comment);
+    if (m_insertionMode == InitialMode
+        || m_insertionMode == BeforeHTMLMode
+        || m_insertionMode == AfterAfterBodyMode
+        || m_insertionMode == AfterAfterFramesetMode) {
+        m_tree.insertCommentOnDocument(token);
+        return;
+    }
+    if (m_insertionMode == AfterBodyMode) {
+        m_tree.insertCommentOnHTMLHtmlElement(token);
+        return;
+    }
+    if (m_insertionMode == InTableTextMode) {
+        defaultForInTableText();
+        processComment(token);
+        return;
+    }
+    m_tree.insertComment(token);
+}
+
+void HTMLTreeBuilder::processCharacter(AtomicHTMLToken& token)
+{
+    ASSERT(token.type() == HTMLToken::Character);
+    ExternalCharacterTokenBuffer buffer(token);
+    processCharacterBuffer(buffer);
+}
+
+void HTMLTreeBuilder::processCharacterBuffer(ExternalCharacterTokenBuffer& buffer)
+{
+ReprocessBuffer:
+    switch (insertionMode()) {
+    case InitialMode: {
+        ASSERT(insertionMode() == InitialMode);
+        buffer.skipLeadingWhitespace();
+        if (buffer.isEmpty())
+            return;
+        defaultForInitial();
+        // Fall through.
+    }
+    case BeforeHTMLMode: {
+        ASSERT(insertionMode() == BeforeHTMLMode);
+        buffer.skipLeadingWhitespace();
+        if (buffer.isEmpty())
+            return;
+        defaultForBeforeHTML();
+        // Fall through.
+    }
+    case BeforeHeadMode: {
+        ASSERT(insertionMode() == BeforeHeadMode);
+        buffer.skipLeadingWhitespace();
+        if (buffer.isEmpty())
+            return;
+        defaultForBeforeHead();
+        // Fall through.
+    }
+    case InHeadMode: {
+        ASSERT(insertionMode() == InHeadMode);
+        String leadingWhitespace = buffer.takeLeadingWhitespace();
+        if (!leadingWhitespace.isEmpty())
+            m_tree.insertTextNode(leadingWhitespace);
+        if (buffer.isEmpty())
+            return;
+        defaultForInHead();
+        // Fall through.
+    }
+    case AfterHeadMode: {
+        ASSERT(insertionMode() == AfterHeadMode);
+        String leadingWhitespace = buffer.takeLeadingWhitespace();
+        if (!leadingWhitespace.isEmpty())
+            m_tree.insertTextNode(leadingWhitespace);
+        if (buffer.isEmpty())
+            return;
+        defaultForAfterHead();
+        // Fall through.
+    }
+    case InBodyMode:
+    case InCaptionMode:
+    case InCellMode: {
+        ASSERT(insertionMode() == InBodyMode || insertionMode() == InCaptionMode || insertionMode() == InCellMode);
+        m_tree.reconstructTheActiveFormattingElements();
+        String characters = buffer.takeRemaining();
+        m_tree.insertTextNode(characters);
+        if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
+            m_framesetOk = false;
+        break;
+    }
+    case InTableMode:
+    case InTableBodyMode:
+    case InRowMode: {
+        ASSERT(insertionMode() == InTableMode || insertionMode() == InTableBodyMode || insertionMode() == InRowMode);
+        ASSERT(m_pendingTableCharacters.isEmpty());
+        m_originalInsertionMode = m_insertionMode;
+        setInsertionMode(InTableTextMode);
+        prepareToReprocessToken();
+        // Fall through.
+    }
+    case InTableTextMode: {
+        buffer.giveRemainingTo(m_pendingTableCharacters);
+        break;
+    }
+    case InColumnGroupMode: {
+        ASSERT(insertionMode() == InColumnGroupMode);
+        String leadingWhitespace = buffer.takeLeadingWhitespace();
+        if (!leadingWhitespace.isEmpty())
+            m_tree.insertTextNode(leadingWhitespace);
+        if (buffer.isEmpty())
+            return;
+        if (!processColgroupEndTagForInColumnGroup()) {
+            ASSERT(isParsingFragment());
+            // The spec tells us to drop these characters on the floor.
+            buffer.takeLeadingNonWhitespace();
+            if (buffer.isEmpty())
+                return;
+        }
+        prepareToReprocessToken();
+        goto ReprocessBuffer;
+    }
+    case AfterBodyMode:
+    case AfterAfterBodyMode: {
+        ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
+        // FIXME: parse error
+        setInsertionMode(InBodyMode);
+        prepareToReprocessToken();
+        goto ReprocessBuffer;
+        break;
+    }
+    case TextMode: {
+        ASSERT(insertionMode() == TextMode);
+        m_tree.insertTextNode(buffer.takeRemaining());
+        break;
+    }
+    case InHeadNoscriptMode: {
+        ASSERT(insertionMode() == InHeadNoscriptMode);
+        String leadingWhitespace = buffer.takeLeadingWhitespace();
+        if (!leadingWhitespace.isEmpty())
+            m_tree.insertTextNode(leadingWhitespace);
+        if (buffer.isEmpty())
+            return;
+        defaultForInHeadNoscript();
+        goto ReprocessBuffer;
+        break;
+    }
+    case InFramesetMode:
+    case AfterFramesetMode: {
+        ASSERT(insertionMode() == InFramesetMode || insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
+        String leadingWhitespace = buffer.takeRemainingWhitespace();
+        if (!leadingWhitespace.isEmpty())
+            m_tree.insertTextNode(leadingWhitespace);
+        // FIXME: We should generate a parse error if we skipped over any
+        // non-whitespace characters.
+        break;
+    }
+    case InSelectInTableMode:
+    case InSelectMode: {
+        ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
+        m_tree.insertTextNode(buffer.takeRemaining());
+        break;
+    }
+    case InForeignContentMode: {
+        ASSERT(insertionMode() == InForeignContentMode);
+        String characters = buffer.takeRemaining();
+        m_tree.insertTextNode(characters);
+        if (m_framesetOk && !isAllWhitespace(characters))
+            m_framesetOk = false;
+        break;
+    }
+    case AfterAfterFramesetMode: {
+        String leadingWhitespace = buffer.takeRemainingWhitespace();
+        if (!leadingWhitespace.isEmpty()) {
+            m_tree.reconstructTheActiveFormattingElements();
+            m_tree.insertTextNode(leadingWhitespace);
+        }
+        // FIXME: We should generate a parse error if we skipped over any
+        // non-whitespace characters.
+        break;
+    }
+    }
+}
+
+void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
+{
+    ASSERT(token.type() == HTMLToken::EndOfFile);
+    switch (insertionMode()) {
+    case InitialMode:
+        ASSERT(insertionMode() == InitialMode);
+        defaultForInitial();
+        // Fall through.
+    case BeforeHTMLMode:
+        ASSERT(insertionMode() == BeforeHTMLMode);
+        defaultForBeforeHTML();
+        // Fall through.
+    case BeforeHeadMode:
+        ASSERT(insertionMode() == BeforeHeadMode);
+        defaultForBeforeHead();
+        // Fall through.
+    case InHeadMode:
+        ASSERT(insertionMode() == InHeadMode);
+        defaultForInHead();
+        // Fall through.
+    case AfterHeadMode:
+        ASSERT(insertionMode() == AfterHeadMode);
+        defaultForAfterHead();
+        // Fall through
+    case InBodyMode:
+    case InCellMode:
+    case InCaptionMode:
+    case InRowMode:
+        ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode || insertionMode() == InCaptionMode || insertionMode() == InRowMode);
+        notImplemented(); // Emit parse error based on what elements are still open.
+        break;
+    case AfterBodyMode:
+    case AfterAfterBodyMode:
+        ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
+        break;
+    case InHeadNoscriptMode:
+        ASSERT(insertionMode() == InHeadNoscriptMode);
+        defaultForInHeadNoscript();
+        processEndOfFile(token);
+        return;
+    case AfterFramesetMode:
+    case AfterAfterFramesetMode:
+        ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
+        break;
+    case InFramesetMode:
+    case InTableMode:
+    case InTableBodyMode:
+    case InSelectInTableMode:
+    case InSelectMode:
+        ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode || insertionMode() == InTableMode || insertionMode() == InFramesetMode || insertionMode() == InTableBodyMode);
+        if (m_tree.currentElement() != m_tree.openElements()->htmlElement())
+            parseError(token);
+        break;
+    case InColumnGroupMode:
+        if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) {
+            ASSERT(isParsingFragment());
+            return; // FIXME: Should we break here instead of returning?
+        }
+        if (!processColgroupEndTagForInColumnGroup()) {
+            ASSERT(isParsingFragment());
+            return; // FIXME: Should we break here instead of returning?
+        }
+        prepareToReprocessToken();
+        processEndOfFile(token);
+        return;
+    case InForeignContentMode:
+        setInsertionMode(InBodyMode);
+        processEndOfFile(token);
+        return;
+    case InTableTextMode:
+        defaultForInTableText();
+        processEndOfFile(token);
+        return;
+    case TextMode:
+        parseError(token);
+        if (m_tree.currentElement()->hasTagName(scriptTag))
+            notImplemented(); // mark the script element as "already started".
+        m_tree.openElements()->pop();
+        setInsertionMode(m_originalInsertionMode);
+        prepareToReprocessToken();
+        processEndOfFile(token);
+        return;
+    }
+    ASSERT(m_tree.openElements()->top());
+    m_tree.openElements()->popAll();
+}
+
+void HTMLTreeBuilder::defaultForInitial()
+{
+    notImplemented();
+    if (!m_fragmentContext.fragment())
+        m_document->setCompatibilityMode(Document::QuirksMode);
+    // FIXME: parse error
+    setInsertionMode(BeforeHTMLMode);
+    prepareToReprocessToken();
+}
+
+void HTMLTreeBuilder::defaultForBeforeHTML()
+{
+    AtomicHTMLToken startHTML(HTMLToken::StartTag, htmlTag.localName());
+    m_tree.insertHTMLHtmlStartTagBeforeHTML(startHTML);
+    setInsertionMode(BeforeHeadMode);
+    prepareToReprocessToken();
+}
+
+void HTMLTreeBuilder::defaultForBeforeHead()
+{
+    AtomicHTMLToken startHead(HTMLToken::StartTag, headTag.localName());
+    processStartTag(startHead);
+    prepareToReprocessToken();
+}
+
+void HTMLTreeBuilder::defaultForInHead()
+{
+    AtomicHTMLToken endHead(HTMLToken::EndTag, headTag.localName());
+    processEndTag(endHead);
+    prepareToReprocessToken();
+}
+
+void HTMLTreeBuilder::defaultForInHeadNoscript()
+{
+    AtomicHTMLToken endNoscript(HTMLToken::EndTag, noscriptTag.localName());
+    processEndTag(endNoscript);
+    prepareToReprocessToken();
+}
+
+void HTMLTreeBuilder::defaultForAfterHead()
+{
+    AtomicHTMLToken startBody(HTMLToken::StartTag, bodyTag.localName());
+    processStartTag(startBody);
+    m_framesetOk = true;
+    prepareToReprocessToken();
+}
+
+void HTMLTreeBuilder::defaultForInTableText()
+{
+    String characters = String::adopt(m_pendingTableCharacters);
+    if (!isAllWhitespace(characters)) {
+        // FIXME: parse error
+        HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
+        m_tree.reconstructTheActiveFormattingElements();
+        m_tree.insertTextNode(characters);
+        m_framesetOk = false;
+        setInsertionMode(m_originalInsertionMode);
+        prepareToReprocessToken();
+        return;
+    }
+    m_tree.insertTextNode(characters);
+    setInsertionMode(m_originalInsertionMode);
+    prepareToReprocessToken();
+}
+
+bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken& token)
+{
+    ASSERT(token.type() == HTMLToken::StartTag);
+    if (token.name() == htmlTag) {
+        m_tree.insertHTMLHtmlStartTagInBody(token);
+        return true;
+    }
+    if (token.name() == baseTag
+        || token.name() == basefontTag
+        || token.name() == bgsoundTag
+        || token.name() == commandTag
+        || token.name() == linkTag
+        || token.name() == metaTag) {
+        m_tree.insertSelfClosingHTMLElement(token);
+        // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process().
+        return true;
+    }
+    if (token.name() == titleTag) {
+        processGenericRCDATAStartTag(token);
+        return true;
+    }
+    if (token.name() == noscriptTag) {
+        if (scriptEnabled(m_document->frame())) {
+            processGenericRawTextStartTag(token);
+            return true;
+        }
+        m_tree.insertHTMLElement(token);
+        setInsertionMode(InHeadNoscriptMode);
+        return true;
+    }
+    if (token.name() == noframesTag || token.name() == styleTag) {
+        processGenericRawTextStartTag(token);
+        return true;
+    }
+    if (token.name() == scriptTag) {
+        processScriptStartTag(token);
+        if (m_usePreHTML5ParserQuirks && token.selfClosing())
+            processFakeEndTag(scriptTag);
+        return true;
+    }
+    if (token.name() == headTag) {
+        parseError(token);
+        return true;
+    }
+    return false;
+}
+
+void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken& token)
+{
+    ASSERT(token.type() == HTMLToken::StartTag);
+    m_tree.insertHTMLElement(token);
+    m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
+    m_originalInsertionMode = m_insertionMode;
+    setInsertionMode(TextMode);
+}
+
+void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken& token)
+{
+    ASSERT(token.type() == HTMLToken::StartTag);
+    m_tree.insertHTMLElement(token);
+    m_parser->tokenizer()->setState(HTMLTokenizer::RAWTEXTState);
+    m_originalInsertionMode = m_insertionMode;
+    setInsertionMode(TextMode);
+}
+
+void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken& token)
+{
+    ASSERT(token.type() == HTMLToken::StartTag);
+    m_tree.insertScriptElement(token);
+    m_parser->tokenizer()->setState(HTMLTokenizer::ScriptDataState);
+    m_originalInsertionMode = m_insertionMode;
+
+    TextPosition0 position = m_parser->textPosition();
+
+    ASSERT(position.m_line.zeroBasedInt() == m_parser->tokenizer()->lineNumber());
+
+    m_lastScriptElementStartPosition = position;
+
+    setInsertionMode(TextMode);
+}
+
+void HTMLTreeBuilder::finished()
+{
+    ASSERT(m_document);
+    if (isParsingFragment()) {
+        m_fragmentContext.finished();
+        return;
+    }
+
+    // Warning, this may detach the parser. Do not do anything else after this.
+    m_document->finishedParsing();
+}
+
+bool HTMLTreeBuilder::scriptEnabled(Frame* frame)
+{
+    if (!frame)
+        return false;
+    return frame->script()->canExecuteScripts(NotAboutToExecuteScript);
+}
+
+bool HTMLTreeBuilder::pluginsEnabled(Frame* frame)
+{
+    if (!frame)
+        return false;
+    return frame->loader()->subframeLoader()->allowPlugins(NotAboutToInstantiatePlugin);
+}
+
+}
diff --git a/Source/WebCore/html/parser/HTMLTreeBuilder.h b/Source/WebCore/html/parser/HTMLTreeBuilder.h
new file mode 100644
index 0000000..17b77b7
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLTreeBuilder.h
@@ -0,0 +1,267 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef HTMLTreeBuilder_h
+#define HTMLTreeBuilder_h
+
+#include "Element.h"
+#include "FragmentScriptingPermission.h"
+#include "HTMLConstructionSite.h"
+#include "HTMLElementStack.h"
+#include "HTMLFormattingElementList.h"
+#include "HTMLTokenizer.h"
+#include <wtf/text/TextPosition.h>
+#include <wtf/Noncopyable.h>
+#include <wtf/OwnPtr.h>
+#include <wtf/PassOwnPtr.h>
+#include <wtf/PassRefPtr.h>
+#include <wtf/RefPtr.h>
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+class AtomicHTMLToken;
+class Document;
+class DocumentFragment;
+class Frame;
+class HTMLToken;
+class HTMLDocument;
+class Node;
+class HTMLDocumentParser;
+
+class HTMLTreeBuilder : public Noncopyable {
+public:
+    static PassOwnPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks)
+    {
+        return adoptPtr(new HTMLTreeBuilder(parser, document, reportErrors, usePreHTML5ParserQuirks));
+    }
+    static PassOwnPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks)
+    {
+        return adoptPtr(new HTMLTreeBuilder(parser, fragment, contextElement, scriptingPermission, usePreHTML5ParserQuirks));
+    }
+    ~HTMLTreeBuilder();
+
+    bool isParsingFragment() const { return !!m_fragmentContext.fragment(); }
+
+    void detach();
+
+    void setPaused(bool paused) { m_isPaused = paused; }
+    bool isPaused() const { return m_isPaused; }
+
+    // The token really should be passed as a const& since it's never modified.
+    void constructTreeFromToken(HTMLToken&);
+    void constructTreeFromAtomicToken(AtomicHTMLToken&);
+
+    // Must be called when parser is paused before calling the parser again.
+    PassRefPtr<Element> takeScriptToProcess(TextPosition1& scriptStartPosition);
+
+    // Done, close any open tags, etc.
+    void finished();
+
+    static bool scriptEnabled(Frame*);
+    static bool pluginsEnabled(Frame*);
+
+private:
+    class FakeInsertionMode;
+    class ExternalCharacterTokenBuffer;
+    // Represents HTML5 "insertion mode"
+    // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode
+    enum InsertionMode {
+        InitialMode,
+        BeforeHTMLMode,
+        BeforeHeadMode,
+        InHeadMode,
+        InHeadNoscriptMode,
+        AfterHeadMode,
+        InBodyMode,
+        TextMode,
+        InTableMode,
+        InTableTextMode,
+        InCaptionMode,
+        InColumnGroupMode,
+        InTableBodyMode,
+        InRowMode,
+        InCellMode,
+        InSelectMode,
+        InSelectInTableMode,
+        InForeignContentMode,
+        AfterBodyMode,
+        InFramesetMode,
+        AfterFramesetMode,
+        AfterAfterBodyMode,
+        AfterAfterFramesetMode,
+    };
+
+    HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument*, bool reportErrors, bool usePreHTML5ParserQuirks);
+    HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment*, Element* contextElement, FragmentScriptingPermission, bool usePreHTML5ParserQuirks);
+
+    void processToken(AtomicHTMLToken&);
+
+    void processDoctypeToken(AtomicHTMLToken&);
+    void processStartTag(AtomicHTMLToken&);
+    void processEndTag(AtomicHTMLToken&);
+    void processComment(AtomicHTMLToken&);
+    void processCharacter(AtomicHTMLToken&);
+    void processEndOfFile(AtomicHTMLToken&);
+
+    bool processStartTagForInHead(AtomicHTMLToken&);
+    void processStartTagForInBody(AtomicHTMLToken&);
+    void processStartTagForInTable(AtomicHTMLToken&);
+    void processEndTagForInBody(AtomicHTMLToken&);
+    void processEndTagForInTable(AtomicHTMLToken&);
+    void processEndTagForInTableBody(AtomicHTMLToken&);
+    void processEndTagForInRow(AtomicHTMLToken&);
+    void processEndTagForInCell(AtomicHTMLToken&);
+
+    void processIsindexStartTagForInBody(AtomicHTMLToken&);
+    bool processBodyEndTagForInBody(AtomicHTMLToken&);
+    bool processTableEndTagForInTable();
+    bool processCaptionEndTagForInCaption();
+    bool processColgroupEndTagForInColumnGroup();
+    bool processTrEndTagForInRow();
+    // FIXME: This function should be inlined into its one call site or it
+    // needs to assert which tokens it can be called with.
+    void processAnyOtherEndTagForInBody(AtomicHTMLToken&);
+
+    void processCharacterBuffer(ExternalCharacterTokenBuffer&);
+
+    void processFakeStartTag(const QualifiedName&, PassRefPtr<NamedNodeMap> attributes = 0);
+    void processFakeEndTag(const QualifiedName&);
+    void processFakeCharacters(const String&);
+    void processFakePEndTagIfPInButtonScope();
+
+    void processGenericRCDATAStartTag(AtomicHTMLToken&);
+    void processGenericRawTextStartTag(AtomicHTMLToken&);
+    void processScriptStartTag(AtomicHTMLToken&);
+
+    // Default processing for the different insertion modes.
+    void defaultForInitial();
+    void defaultForBeforeHTML();
+    void defaultForBeforeHead();
+    void defaultForInHead();
+    void defaultForInHeadNoscript();
+    void defaultForAfterHead();
+    void defaultForInTableText();
+
+    void prepareToReprocessToken();
+
+    void reprocessStartTag(AtomicHTMLToken&);
+    void reprocessEndTag(AtomicHTMLToken&);
+
+    PassRefPtr<NamedNodeMap> attributesForIsindexInput(AtomicHTMLToken&);
+
+    HTMLElementStack::ElementRecord* furthestBlockForFormattingElement(Element*);
+    void callTheAdoptionAgency(AtomicHTMLToken&);
+
+    void closeTheCell();
+
+    template <bool shouldClose(const Element*)>
+    void processCloseWhenNestedTag(AtomicHTMLToken&);
+
+    bool m_framesetOk;
+
+    // FIXME: Implement error reporting.
+    void parseError(AtomicHTMLToken&) { }
+
+    InsertionMode insertionMode() const { return m_insertionMode; }
+    void setInsertionMode(InsertionMode mode)
+    {
+        m_insertionMode = mode;
+        m_isFakeInsertionMode = false;
+    }
+
+    bool isFakeInsertionMode() { return m_isFakeInsertionMode; }
+    void setFakeInsertionMode(InsertionMode mode)
+    {
+        m_insertionMode = mode;
+        m_isFakeInsertionMode = true;
+    }
+
+    void resetInsertionModeAppropriately();
+
+    void processForeignContentUsingInBodyModeAndResetMode(AtomicHTMLToken& token);
+    void resetForeignInsertionMode();
+
+    class FragmentParsingContext : public Noncopyable {
+    public:
+        FragmentParsingContext();
+        FragmentParsingContext(DocumentFragment*, Element* contextElement, FragmentScriptingPermission);
+        ~FragmentParsingContext();
+
+        Document* document() const;
+        DocumentFragment* fragment() const { return m_fragment; }
+        Element* contextElement() const { ASSERT(m_fragment); return m_contextElement; }
+        FragmentScriptingPermission scriptingPermission() const { ASSERT(m_fragment); return m_scriptingPermission; }
+
+        void finished();
+
+    private:
+        RefPtr<Document> m_dummyDocumentForFragmentParsing;
+        DocumentFragment* m_fragment;
+        Element* m_contextElement;
+
+        // FragmentScriptingNotAllowed causes the Parser to remove children
+        // from <script> tags (so javascript doesn't show up in pastes).
+        FragmentScriptingPermission m_scriptingPermission;
+    };
+
+    FragmentParsingContext m_fragmentContext;
+
+    Document* m_document;
+    HTMLConstructionSite m_tree;
+
+    bool m_reportErrors;
+    bool m_isPaused;
+    bool m_isFakeInsertionMode;
+
+    // FIXME: InsertionModes should be a separate object to prevent direct
+    // manipulation of these variables.  For now, be careful to always use
+    // setInsertionMode and never set m_insertionMode directly.
+    InsertionMode m_insertionMode;
+    InsertionMode m_originalInsertionMode;
+
+    // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#pending-table-character-tokens
+    Vector<UChar> m_pendingTableCharacters;
+
+    // We access parser because HTML5 spec requires that we be able to change the state of the tokenizer
+    // from within parser actions. We also need it to track the current position.
+    HTMLDocumentParser* m_parser;
+
+    RefPtr<Element> m_scriptToProcess; // <script> tag which needs processing before resuming the parser.
+    TextPosition1 m_scriptToProcessStartPosition; // Starting line number of the script tag needing processing.
+
+    // FIXME: We probably want to remove this member.  Originally, it was
+    // created to service the legacy tree builder, but it seems to be used for
+    // some other things now.
+    TextPosition0 m_lastScriptElementStartPosition;
+
+    bool m_usePreHTML5ParserQuirks;
+
+    bool m_hasPendingForeignInsertionModeSteps;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLViewSourceParser.cpp b/Source/WebCore/html/parser/HTMLViewSourceParser.cpp
new file mode 100644
index 0000000..ace8590
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLViewSourceParser.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "HTMLViewSourceParser.h"
+
+#include "HTMLDocumentParser.h"
+#include "HTMLNames.h"
+#include "HTMLViewSourceDocument.h"
+
+namespace WebCore {
+
+HTMLViewSourceParser::HTMLViewSourceParser(HTMLViewSourceDocument* document)
+    : DecodedDataDocumentParser(document)
+    , m_tokenizer(HTMLTokenizer::create(HTMLDocumentParser::usePreHTML5ParserQuirks(document)))
+{
+}
+
+HTMLViewSourceParser::~HTMLViewSourceParser()
+{
+}
+
+void HTMLViewSourceParser::insert(const SegmentedString&)
+{
+    ASSERT_NOT_REACHED();
+}
+
+void HTMLViewSourceParser::pumpTokenizer()
+{
+    while (m_tokenizer->nextToken(m_input.current(), m_token)) {
+        m_token.end(m_input.current().numberOfCharactersConsumed());
+        document()->addSource(sourceForToken(), m_token);
+        updateTokenizerState();
+        m_token.clear(m_input.current().numberOfCharactersConsumed());
+    }
+}
+
+void HTMLViewSourceParser::append(const SegmentedString& input)
+{
+    m_input.appendToEnd(input);
+    m_source.append(input);
+    pumpTokenizer();
+}
+
+String HTMLViewSourceParser::sourceForToken()
+{
+    if (m_token.type() == HTMLToken::EndOfFile)
+        return String();
+
+    ASSERT(m_source.numberOfCharactersConsumed() == m_token.startIndex());
+    UChar* data = 0;
+    int length = m_token.endIndex() - m_token.startIndex();
+    String source = String::createUninitialized(length, data);
+    for (int i = 0; i < length; ++i) {
+        data[i] = *m_source;
+        m_source.advance();
+    }
+    return source;
+}
+
+void HTMLViewSourceParser::updateTokenizerState()
+{
+    // FIXME: The tokenizer should do this work for us.
+    if (m_token.type() != HTMLToken::StartTag)
+        return;
+
+    AtomicString tagName(m_token.name().data(), m_token.name().size());
+    m_tokenizer->updateStateFor(tagName, document()->frame());
+}
+
+void HTMLViewSourceParser::finish()
+{
+    if (!m_input.haveSeenEndOfFile())
+        m_input.markEndOfFile();
+    pumpTokenizer();
+    document()->finishedParsing();
+}
+
+bool HTMLViewSourceParser::finishWasCalled()
+{
+    return m_input.haveSeenEndOfFile();
+}
+
+}
diff --git a/Source/WebCore/html/parser/HTMLViewSourceParser.h b/Source/WebCore/html/parser/HTMLViewSourceParser.h
new file mode 100644
index 0000000..abe55b4
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLViewSourceParser.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLViewSourceParser_h
+#define HTMLViewSourceParser_h
+
+#include "DecodedDataDocumentParser.h"
+#include "HTMLInputStream.h"
+#include "HTMLToken.h"
+#include "HTMLTokenizer.h"
+#include "HTMLViewSourceDocument.h"
+#include <wtf/PassOwnPtr.h>
+
+namespace WebCore {
+
+class HTMLTokenizer;
+class HTMLScriptRunner;
+class HTMLTreeBuilder;
+class HTMLPreloadScanner;
+class ScriptController;
+class ScriptSourceCode;
+
+class HTMLViewSourceParser :  public DecodedDataDocumentParser {
+public:
+    static PassRefPtr<HTMLViewSourceParser> create(HTMLViewSourceDocument* document)
+    {
+        return adoptRef(new HTMLViewSourceParser(document));
+    }
+    virtual ~HTMLViewSourceParser();
+
+protected:
+    explicit HTMLViewSourceParser(HTMLViewSourceDocument*);
+
+    HTMLTokenizer* tokenizer() const { return m_tokenizer.get(); }
+
+private:
+    // DocumentParser
+    virtual void insert(const SegmentedString&);
+    virtual void append(const SegmentedString&);
+    virtual void finish();
+    virtual bool finishWasCalled();
+
+    HTMLViewSourceDocument* document() const { return static_cast<HTMLViewSourceDocument*>(DecodedDataDocumentParser::document()); }
+
+    void pumpTokenizer();
+    String sourceForToken();
+    void updateTokenizerState();
+
+    HTMLInputStream m_input;
+    SegmentedString m_source;
+    HTMLToken m_token;
+    OwnPtr<HTMLTokenizer> m_tokenizer;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/NestingLevelIncrementer.h b/Source/WebCore/html/parser/NestingLevelIncrementer.h
new file mode 100644
index 0000000..c597876
--- /dev/null
+++ b/Source/WebCore/html/parser/NestingLevelIncrementer.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef NestingLevelIncrementer_h
+#define NestingLevelIncrementer_h
+
+namespace WebCore {
+
+class NestingLevelIncrementer : public Noncopyable {
+public:
+    explicit NestingLevelIncrementer(unsigned& nestingLevel)
+        : m_nestingLevel(&nestingLevel)
+    {
+        ++(*m_nestingLevel);
+    }
+            
+    ~NestingLevelIncrementer()
+    {
+        --(*m_nestingLevel);
+    }
+            
+private:
+    unsigned* m_nestingLevel;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/TextDocumentParser.cpp b/Source/WebCore/html/parser/TextDocumentParser.cpp
new file mode 100644
index 0000000..d03b744
--- /dev/null
+++ b/Source/WebCore/html/parser/TextDocumentParser.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2010 Google Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextDocumentParser.h"
+
+#include "HTMLDocument.h"
+#include "HTMLNames.h"
+#include "HTMLTokenizer.h"
+#include "HTMLTreeBuilder.h"
+
+namespace WebCore {
+
+using namespace HTMLNames;
+
+TextDocumentParser::TextDocumentParser(HTMLDocument* document)
+    : HTMLDocumentParser(document, false)
+    , m_haveInsertedFakePreElement(false)
+{
+    tokenizer()->setState(HTMLTokenizer::PLAINTEXTState);
+}
+
+TextDocumentParser::~TextDocumentParser()
+{
+}
+
+void TextDocumentParser::append(const SegmentedString& text)
+{
+    if (!m_haveInsertedFakePreElement)
+        insertFakePreElement();
+    HTMLDocumentParser::append(text);
+}
+
+void TextDocumentParser::insertFakePreElement()
+{
+    // In principle, we should create a specialized tree builder for
+    // TextDocuments, but instead we re-use the existing HTMLTreeBuilder.
+    // We create a fake token and give it to the tree builder rather than
+    // sending fake bytes through the front-end of the parser to avoid
+    // distrubing the line/column number calculations.
+
+    RefPtr<Attribute> styleAttribute = Attribute::createMapped("style", "word-wrap: break-word; white-space: pre-wrap;");
+    RefPtr<NamedNodeMap> attributes = NamedNodeMap::create();
+    attributes->insertAttribute(styleAttribute.release(), false);
+    AtomicHTMLToken fakePre(HTMLToken::StartTag, preTag.localName(), attributes.release());
+
+    treeBuilder()->constructTreeFromAtomicToken(fakePre);
+    m_haveInsertedFakePreElement = true;
+}
+
+}
diff --git a/Source/WebCore/html/parser/TextDocumentParser.h b/Source/WebCore/html/parser/TextDocumentParser.h
new file mode 100644
index 0000000..1cccc5b
--- /dev/null
+++ b/Source/WebCore/html/parser/TextDocumentParser.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2010 Google Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef TextDocumentParser_h
+#define TextDocumentParser_h
+
+#include "HTMLDocumentParser.h"
+
+namespace WebCore {
+
+class TextDocumentParser : public HTMLDocumentParser {
+public:
+    static PassRefPtr<TextDocumentParser> create(HTMLDocument* document)
+    {
+        return adoptRef(new TextDocumentParser(document));
+    }
+    virtual ~TextDocumentParser();
+
+private:
+    explicit TextDocumentParser(HTMLDocument*);
+
+    virtual void append(const SegmentedString&);
+    void insertFakePreElement();
+
+    bool m_haveInsertedFakePreElement;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/TextViewSourceParser.cpp b/Source/WebCore/html/parser/TextViewSourceParser.cpp
new file mode 100644
index 0000000..d7e6e3d
--- /dev/null
+++ b/Source/WebCore/html/parser/TextViewSourceParser.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextViewSourceParser.h"
+
+#include "HTMLTokenizer.h"
+
+namespace WebCore {
+
+TextViewSourceParser::TextViewSourceParser(HTMLViewSourceDocument* document)
+    : HTMLViewSourceParser(document)
+{
+    tokenizer()->setState(HTMLTokenizer::PLAINTEXTState);
+}
+
+TextViewSourceParser::~TextViewSourceParser()
+{
+}
+
+}
diff --git a/Source/WebCore/html/parser/TextViewSourceParser.h b/Source/WebCore/html/parser/TextViewSourceParser.h
new file mode 100644
index 0000000..e4170ed
--- /dev/null
+++ b/Source/WebCore/html/parser/TextViewSourceParser.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TextViewSourceParser_h
+#define TextViewSourceParser_h
+
+#include "HTMLViewSourceParser.h"
+
+namespace WebCore {
+
+class TextViewSourceParser :  public HTMLViewSourceParser {
+public:
+    static PassRefPtr<TextViewSourceParser> create(HTMLViewSourceDocument* document)
+    {
+        return adoptRef(new TextViewSourceParser(document));
+    }
+    virtual ~TextViewSourceParser();
+
+private:
+    explicit TextViewSourceParser(HTMLViewSourceDocument*);
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/create-html-entity-table b/Source/WebCore/html/parser/create-html-entity-table
new file mode 100755
index 0000000..e6132bc
--- /dev/null
+++ b/Source/WebCore/html/parser/create-html-entity-table
@@ -0,0 +1,178 @@
+#!/usr/bin/env python
+# Copyright (c) 2010 Google Inc. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+# 
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import csv
+import os.path
+import string
+import sys
+
+ENTITY = 0
+VALUE = 1
+
+def convert_entity_to_cpp_name(entity):
+    postfix = "EntityName"
+    if entity[-1] == ";":
+        return "%sSemicolon%s" % (entity[:-1], postfix)
+    return "%s%s" % (entity, postfix)
+
+
+def convert_entity_to_uchar_array(entity):
+    return "{'%s'}" % "', '".join(entity)
+
+
+def convert_value_to_int(value):
+    assert(value[0] == "U")
+    assert(value[1] == "+")
+    return "0x" + value[2:]
+
+
+def offset_table_entry(offset):
+    return "    &staticEntityTable[%s]," % offset
+
+
+program_name = os.path.basename(__file__)
+if len(sys.argv) < 4 or sys.argv[1] != "-o":
+    print >> sys.stderr, "Usage: %s -o OUTPUT_FILE INPUT_FILE" % program_name
+    exit(1)
+
+output_path = sys.argv[2]
+input_path = sys.argv[3]
+
+html_entity_names_file = open(input_path)
+entries = list(csv.reader(html_entity_names_file))
+html_entity_names_file.close()
+
+entries.sort(lambda a, b: cmp(a[ENTITY], b[ENTITY]))
+entity_count = len(entries)
+
+output_file = open(output_path, "w")
+
+print >> output_file, """/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+// THIS FILE IS GENERATED BY WebCore/html/parser/create-html-entity-table
+// DO NOT EDIT (unless you are a ninja)!
+
+#include "config.h"
+#include "HTMLEntityTable.h"
+
+namespace WebCore {
+
+namespace {
+"""
+
+for entry in entries:
+    print >> output_file, "const UChar %sEntityName[] = %s;" % (
+        convert_entity_to_cpp_name(entry[ENTITY]),
+        convert_entity_to_uchar_array(entry[ENTITY]))
+
+print >> output_file, """
+HTMLEntityTableEntry staticEntityTable[%s] = {""" % entity_count
+
+index = {}
+offset = 0
+for entry in entries:
+    letter = entry[ENTITY][0]
+    if not index.get(letter):
+        index[letter] = offset
+    print >> output_file, '    { %sEntityName, %s, %s },' % (
+        convert_entity_to_cpp_name(entry[ENTITY]),
+        len(entry[ENTITY]),
+        convert_value_to_int(entry[VALUE]))
+    offset += 1
+
+print >> output_file, """};
+"""
+
+print >> output_file, "const HTMLEntityTableEntry* uppercaseOffset[] = {"
+for letter in string.uppercase:
+    print >> output_file, offset_table_entry(index[letter])
+print >> output_file, offset_table_entry(index['a'])
+print >> output_file, """};
+
+const HTMLEntityTableEntry* lowercaseOffset[] = {"""
+for letter in string.lowercase:
+    print >> output_file, offset_table_entry(index[letter])
+print >> output_file, offset_table_entry(entity_count)
+print >> output_file, """};
+
+}
+
+const HTMLEntityTableEntry* HTMLEntityTable::firstEntryStartingWith(UChar c)
+{
+    if (c >= 'A' && c <= 'Z')
+        return uppercaseOffset[c - 'A'];
+    if (c >= 'a' && c <= 'z')
+        return lowercaseOffset[c - 'a'];
+    return 0;
+}
+
+const HTMLEntityTableEntry* HTMLEntityTable::lastEntryStartingWith(UChar c)
+{
+    if (c >= 'A' && c <= 'Z')
+        return uppercaseOffset[c - 'A' + 1] - 1;
+    if (c >= 'a' && c <= 'z')
+        return lowercaseOffset[c - 'a' + 1] - 1;
+    return 0;
+}
+
+const HTMLEntityTableEntry* HTMLEntityTable::firstEntry()
+{
+    return &staticEntityTable[0];
+}
+
+const HTMLEntityTableEntry* HTMLEntityTable::lastEntry()
+{
+    return &staticEntityTable[%s - 1];
+}
+
+}
+""" % entity_count
author	Steve Block <steveblock@google.com>	2011-05-06 11:45:16 +0100
committer	Steve Block <steveblock@google.com>	2011-05-12 13:44:10 +0100
commit	cad810f21b803229eb11403f9209855525a25d57 (patch)
tree	29a6fd0279be608e0fe9ffe9841f722f0f4e4269 /Source/WebCore/html/parser
parent	121b0cf4517156d0ac5111caf9830c51b69bae8f (diff)
download	external_webkit-cad810f21b803229eb11403f9209855525a25d57.zip external_webkit-cad810f21b803229eb11403f9209855525a25d57.tar.gz external_webkit-cad810f21b803229eb11403f9209855525a25d57.tar.bz2