summaryrefslogtreecommitdiffstats
path: root/Source/WebCore/html/parser
diff options
context:
space:
mode:
authorSteve Block <steveblock@google.com>2011-05-06 11:45:16 +0100
committerSteve Block <steveblock@google.com>2011-05-12 13:44:10 +0100
commitcad810f21b803229eb11403f9209855525a25d57 (patch)
tree29a6fd0279be608e0fe9ffe9841f722f0f4e4269 /Source/WebCore/html/parser
parent121b0cf4517156d0ac5111caf9830c51b69bae8f (diff)
downloadexternal_webkit-cad810f21b803229eb11403f9209855525a25d57.zip
external_webkit-cad810f21b803229eb11403f9209855525a25d57.tar.gz
external_webkit-cad810f21b803229eb11403f9209855525a25d57.tar.bz2
Merge WebKit at r75315: Initial merge by git.
Change-Id: I570314b346ce101c935ed22a626b48c2af266b84
Diffstat (limited to 'Source/WebCore/html/parser')
-rw-r--r--Source/WebCore/html/parser/CSSPreloadScanner.cpp195
-rw-r--r--Source/WebCore/html/parser/CSSPreloadScanner.h71
-rw-r--r--Source/WebCore/html/parser/HTMLConstructionSite.cpp464
-rw-r--r--Source/WebCore/html/parser/HTMLConstructionSite.h148
-rw-r--r--Source/WebCore/html/parser/HTMLDocumentParser.cpp549
-rw-r--r--Source/WebCore/html/parser/HTMLDocumentParser.h150
-rw-r--r--Source/WebCore/html/parser/HTMLElementStack.cpp569
-rw-r--r--Source/WebCore/html/parser/HTMLElementStack.h156
-rw-r--r--Source/WebCore/html/parser/HTMLEntityNames.in2138
-rw-r--r--Source/WebCore/html/parser/HTMLEntityParser.cpp272
-rw-r--r--Source/WebCore/html/parser/HTMLEntityParser.h41
-rw-r--r--Source/WebCore/html/parser/HTMLEntitySearch.cpp134
-rw-r--r--Source/WebCore/html/parser/HTMLEntitySearch.h75
-rw-r--r--Source/WebCore/html/parser/HTMLEntityTable.h52
-rw-r--r--Source/WebCore/html/parser/HTMLFormattingElementList.cpp134
-rw-r--r--Source/WebCore/html/parser/HTMLFormattingElementList.h134
-rw-r--r--Source/WebCore/html/parser/HTMLInputStream.h164
-rw-r--r--Source/WebCore/html/parser/HTMLMetaCharsetParser.cpp200
-rw-r--r--Source/WebCore/html/parser/HTMLMetaCharsetParser.h73
-rw-r--r--Source/WebCore/html/parser/HTMLParserIdioms.cpp221
-rw-r--r--Source/WebCore/html/parser/HTMLParserIdioms.h76
-rw-r--r--Source/WebCore/html/parser/HTMLParserScheduler.cpp114
-rw-r--r--Source/WebCore/html/parser/HTMLParserScheduler.h94
-rw-r--r--Source/WebCore/html/parser/HTMLPreloadScanner.cpp194
-rw-r--r--Source/WebCore/html/parser/HTMLPreloadScanner.h64
-rw-r--r--Source/WebCore/html/parser/HTMLScriptRunner.cpp321
-rw-r--r--Source/WebCore/html/parser/HTMLScriptRunner.h102
-rw-r--r--Source/WebCore/html/parser/HTMLScriptRunnerHost.h54
-rw-r--r--Source/WebCore/html/parser/HTMLToken.h526
-rw-r--r--Source/WebCore/html/parser/HTMLTokenizer.cpp1698
-rw-r--r--Source/WebCore/html/parser/HTMLTokenizer.h316
-rw-r--r--Source/WebCore/html/parser/HTMLTreeBuilder.cpp2822
-rw-r--r--Source/WebCore/html/parser/HTMLTreeBuilder.h267
-rw-r--r--Source/WebCore/html/parser/HTMLViewSourceParser.cpp106
-rw-r--r--Source/WebCore/html/parser/HTMLViewSourceParser.h79
-rw-r--r--Source/WebCore/html/parser/NestingLevelIncrementer.h50
-rw-r--r--Source/WebCore/html/parser/TextDocumentParser.cpp72
-rw-r--r--Source/WebCore/html/parser/TextDocumentParser.h52
-rw-r--r--Source/WebCore/html/parser/TextViewSourceParser.cpp43
-rw-r--r--Source/WebCore/html/parser/TextViewSourceParser.h47
-rwxr-xr-xSource/WebCore/html/parser/create-html-entity-table178
41 files changed, 13215 insertions, 0 deletions
diff --git a/Source/WebCore/html/parser/CSSPreloadScanner.cpp b/Source/WebCore/html/parser/CSSPreloadScanner.cpp
new file mode 100644
index 0000000..23364f9
--- /dev/null
+++ b/Source/WebCore/html/parser/CSSPreloadScanner.cpp
@@ -0,0 +1,195 @@
+/*
+ * Copyright (C) 2008, 2010 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
+ * Copyright (C) 2010 Google Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "CSSPreloadScanner.h"
+
+#include "CachedCSSStyleSheet.h"
+#include "CachedResourceLoader.h"
+#include "Document.h"
+#include "HTMLParserIdioms.h"
+#include "HTMLToken.h"
+
+namespace WebCore {
+
+CSSPreloadScanner::CSSPreloadScanner(Document* document)
+ : m_state(Initial)
+ , m_document(document)
+{
+}
+
+void CSSPreloadScanner::reset()
+{
+ m_state = Initial;
+ m_rule.clear();
+ m_ruleValue.clear();
+}
+
+void CSSPreloadScanner::scan(const HTMLToken& token, bool scanningBody)
+{
+ m_scanningBody = scanningBody;
+
+ const HTMLToken::DataVector& characters = token.characters();
+ for (HTMLToken::DataVector::const_iterator iter = characters.begin(); iter != characters.end(); ++iter)
+ tokenize(*iter);
+}
+
+inline void CSSPreloadScanner::tokenize(UChar c)
+{
+ // We are just interested in @import rules, no need for real tokenization here
+ // Searching for other types of resources is probably low payoff.
+ switch (m_state) {
+ case Initial:
+ if (c == '@')
+ m_state = RuleStart;
+ else if (c == '/')
+ m_state = MaybeComment;
+ break;
+ case MaybeComment:
+ if (c == '*')
+ m_state = Comment;
+ else
+ m_state = Initial;
+ break;
+ case Comment:
+ if (c == '*')
+ m_state = MaybeCommentEnd;
+ break;
+ case MaybeCommentEnd:
+ if (c == '/')
+ m_state = Initial;
+ else if (c == '*')
+ ;
+ else
+ m_state = Comment;
+ break;
+ case RuleStart:
+ if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
+ m_rule.clear();
+ m_ruleValue.clear();
+ m_rule.append(c);
+ m_state = Rule;
+ } else
+ m_state = Initial;
+ break;
+ case Rule:
+ if (isHTMLSpace(c))
+ m_state = AfterRule;
+ else if (c == ';')
+ m_state = Initial;
+ else
+ m_rule.append(c);
+ break;
+ case AfterRule:
+ if (isHTMLSpace(c))
+ ;
+ else if (c == ';')
+ m_state = Initial;
+ else {
+ m_state = RuleValue;
+ m_ruleValue.append(c);
+ }
+ break;
+ case RuleValue:
+ if (isHTMLSpace(c))
+ m_state = AfterRuleValue;
+ else if (c == ';') {
+ emitRule();
+ m_state = Initial;
+ } else
+ m_ruleValue.append(c);
+ break;
+ case AfterRuleValue:
+ if (isHTMLSpace(c))
+ ;
+ else if (c == ';') {
+ emitRule();
+ m_state = Initial;
+ } else {
+ // FIXME: media rules
+ m_state = Initial;
+ }
+ break;
+ }
+}
+
+static String parseCSSStringOrURL(const UChar* characters, size_t length)
+{
+ size_t offset = 0;
+ size_t reducedLength = length;
+
+ while (reducedLength && isHTMLSpace(characters[offset])) {
+ ++offset;
+ --reducedLength;
+ }
+ while (reducedLength && isHTMLSpace(characters[offset + reducedLength - 1]))
+ --reducedLength;
+
+ if (reducedLength >= 5
+ && (characters[offset] == 'u' || characters[offset] == 'U')
+ && (characters[offset + 1] == 'r' || characters[offset + 1] == 'R')
+ && (characters[offset + 2] == 'l' || characters[offset + 2] == 'L')
+ && characters[offset + 3] == '('
+ && characters[offset + reducedLength - 1] == ')') {
+ offset += 4;
+ reducedLength -= 5;
+ }
+
+ while (reducedLength && isHTMLSpace(characters[offset])) {
+ ++offset;
+ --reducedLength;
+ }
+ while (reducedLength && isHTMLSpace(characters[offset + reducedLength - 1]))
+ --reducedLength;
+
+ if (reducedLength < 2 || characters[offset] != characters[offset + reducedLength - 1] || !(characters[offset] == '\'' || characters[offset] == '"'))
+ return String();
+ offset++;
+ reducedLength -= 2;
+
+ while (reducedLength && isHTMLSpace(characters[offset])) {
+ ++offset;
+ --reducedLength;
+ }
+ while (reducedLength && isHTMLSpace(characters[offset + reducedLength - 1]))
+ --reducedLength;
+
+ return String(characters + offset, reducedLength);
+}
+
+void CSSPreloadScanner::emitRule()
+{
+ if (equalIgnoringCase("import", m_rule.data(), m_rule.size())) {
+ String value = parseCSSStringOrURL(m_ruleValue.data(), m_ruleValue.size());
+ if (!value.isEmpty())
+ m_document->cachedResourceLoader()->preload(CachedResource::CSSStyleSheet, value, String(), m_scanningBody);
+ }
+ m_rule.clear();
+ m_ruleValue.clear();
+}
+
+}
diff --git a/Source/WebCore/html/parser/CSSPreloadScanner.h b/Source/WebCore/html/parser/CSSPreloadScanner.h
new file mode 100644
index 0000000..7ac282f
--- /dev/null
+++ b/Source/WebCore/html/parser/CSSPreloadScanner.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2010 Google Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef CSSPreloadScanner_h
+#define CSSPreloadScanner_h
+
+#include "PlatformString.h"
+#include <wtf/Vector.h>
+
+namespace WebCore {
+
+class Document;
+class HTMLToken;
+
+class CSSPreloadScanner : public Noncopyable {
+public:
+ CSSPreloadScanner(Document*);
+
+ void reset();
+ void scan(const HTMLToken&, bool scanningBody);
+
+private:
+ enum State {
+ Initial,
+ MaybeComment,
+ Comment,
+ MaybeCommentEnd,
+ RuleStart,
+ Rule,
+ AfterRule,
+ RuleValue,
+ AfterRuleValue
+ };
+
+ inline void tokenize(UChar c);
+ void emitRule();
+
+ State m_state;
+ Vector<UChar, 16> m_rule;
+ Vector<UChar> m_ruleValue;
+
+ bool m_scanningBody;
+ Document* m_document;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLConstructionSite.cpp b/Source/WebCore/html/parser/HTMLConstructionSite.cpp
new file mode 100644
index 0000000..c46b9b9
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLConstructionSite.cpp
@@ -0,0 +1,464 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "HTMLTreeBuilder.h"
+
+#include "Comment.h"
+#include "DocumentFragment.h"
+#include "DocumentType.h"
+#include "Element.h"
+#include "Frame.h"
+#include "HTMLDocument.h"
+#include "HTMLElementFactory.h"
+#include "HTMLFormElement.h"
+#include "HTMLHtmlElement.h"
+#include "HTMLNames.h"
+#include "HTMLScriptElement.h"
+#include "HTMLToken.h"
+#include "HTMLTokenizer.h"
+#include "LocalizedStrings.h"
+#if ENABLE(MATHML)
+#include "MathMLNames.h"
+#endif
+#include "NotImplemented.h"
+#if ENABLE(SVG)
+#include "SVGNames.h"
+#endif
+#include "ScriptController.h"
+#include "Settings.h"
+#include "Text.h"
+#include <wtf/UnusedParam.h>
+
+namespace WebCore {
+
+using namespace HTMLNames;
+
+namespace {
+
+bool hasImpliedEndTag(Element* element)
+{
+ return element->hasTagName(ddTag)
+ || element->hasTagName(dtTag)
+ || element->hasTagName(liTag)
+ || element->hasTagName(optionTag)
+ || element->hasTagName(optgroupTag)
+ || element->hasTagName(pTag)
+ || element->hasTagName(rpTag)
+ || element->hasTagName(rtTag);
+}
+
+bool causesFosterParenting(const QualifiedName& tagName)
+{
+ return tagName == tableTag
+ || tagName == tbodyTag
+ || tagName == tfootTag
+ || tagName == theadTag
+ || tagName == trTag;
+}
+
+} // namespace
+
+template<typename ChildType>
+PassRefPtr<ChildType> HTMLConstructionSite::attach(ContainerNode* parent, PassRefPtr<ChildType> prpChild)
+{
+ RefPtr<ChildType> child = prpChild;
+
+ // FIXME: It's confusing that HTMLConstructionSite::attach does the magic
+ // redirection to the foster parent but HTMLConstructionSite::attachAtSite
+ // doesn't. It feels like we're missing a concept somehow.
+ if (shouldFosterParent()) {
+ fosterParent(child.get());
+ ASSERT(child->attached() || !child->parentNode() || !child->parentNode()->attached());
+ return child.release();
+ }
+
+ parent->parserAddChild(child);
+
+ // An event handler (DOM Mutation, beforeload, et al.) could have removed
+ // the child, in which case we shouldn't try attaching it.
+ if (!child->parentNode())
+ return child.release();
+
+ // It's slightly unfortunate that we need to hold a reference to child
+ // here to call attach(). We should investigate whether we can rely on
+ // |parent| to hold a ref at this point. In the common case (at least
+ // for elements), however, we'll get to use this ref in the stack of
+ // open elements.
+ if (parent->attached() && !child->attached())
+ child->attach();
+ return child.release();
+}
+
+void HTMLConstructionSite::attachAtSite(const AttachmentSite& site, PassRefPtr<Node> prpChild)
+{
+ // FIXME: It's unfortunate that we need to hold a reference to child
+ // here to call attach(). We should investigate whether we can rely on
+ // |site.parent| to hold a ref at this point.
+ RefPtr<Node> child = prpChild;
+
+ if (site.nextChild)
+ site.parent->parserInsertBefore(child, site.nextChild);
+ else
+ site.parent->parserAddChild(child);
+
+ // JavaScript run from beforeload (or DOM Mutation or event handlers)
+ // might have removed the child, in which case we should not attach it.
+ if (child->parentNode() && site.parent->attached() && !child->attached())
+ child->attach();
+}
+
+HTMLConstructionSite::HTMLConstructionSite(Document* document, FragmentScriptingPermission scriptingPermission, bool isParsingFragment)
+ : m_document(document)
+ , m_fragmentScriptingPermission(scriptingPermission)
+ , m_isParsingFragment(isParsingFragment)
+ , m_redirectAttachToFosterParent(false)
+{
+}
+
+HTMLConstructionSite::~HTMLConstructionSite()
+{
+}
+
+void HTMLConstructionSite::detach()
+{
+ m_document = 0;
+}
+
+void HTMLConstructionSite::setForm(HTMLFormElement* form)
+{
+ // This method should only be needed for HTMLTreeBuilder in the fragment case.
+ ASSERT(!m_form);
+ m_form = form;
+}
+
+PassRefPtr<HTMLFormElement> HTMLConstructionSite::takeForm()
+{
+ return m_form.release();
+}
+
+void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded()
+{
+ ASSERT(m_document);
+ if (m_document->frame() && !m_isParsingFragment)
+ m_document->frame()->loader()->dispatchDocumentElementAvailable();
+}
+
+void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken& token)
+{
+ RefPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(m_document);
+ element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
+ m_openElements.pushHTMLHtmlElement(attach<Element>(m_document, element.get()));
+#if ENABLE(OFFLINE_WEB_APPLICATIONS)
+ element->insertedByParser();
+#endif
+ dispatchDocumentElementAvailableIfNeeded();
+}
+
+void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken& token, Element* element)
+{
+ if (!token.attributes())
+ return;
+
+ NamedNodeMap* attributes = element->attributes(false);
+ for (unsigned i = 0; i < token.attributes()->length(); ++i) {
+ Attribute* attribute = token.attributes()->attributeItem(i);
+ if (!attributes->getAttributeItem(attribute->name()))
+ element->setAttribute(attribute->name(), attribute->value());
+ }
+}
+
+void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken& token)
+{
+ // FIXME: parse error
+ mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement());
+}
+
+void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken& token)
+{
+ // FIXME: parse error
+ mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement());
+}
+
+void HTMLConstructionSite::insertDoctype(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLToken::DOCTYPE);
+ attach(m_document, DocumentType::create(m_document, token.name(), String::adopt(token.publicIdentifier()), String::adopt(token.systemIdentifier())));
+
+ if (token.forceQuirks())
+ m_document->setCompatibilityMode(Document::QuirksMode);
+ else
+ m_document->setCompatibilityModeFromDoctype();
+}
+
+void HTMLConstructionSite::insertComment(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLToken::Comment);
+ attach(currentElement(), Comment::create(currentElement()->document(), token.comment()));
+}
+
+void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLToken::Comment);
+ attach(m_document, Comment::create(m_document, token.comment()));
+}
+
+void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLToken::Comment);
+ Element* parent = m_openElements.htmlElement();
+ attach(parent, Comment::create(parent->document(), token.comment()));
+}
+
+PassRefPtr<Element> HTMLConstructionSite::attachToCurrent(PassRefPtr<Element> child)
+{
+ return attach(currentElement(), child);
+}
+
+void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken& token)
+{
+ ASSERT(!shouldFosterParent());
+ m_head = attachToCurrent(createHTMLElement(token));
+ m_openElements.pushHTMLHeadElement(m_head);
+}
+
+void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken& token)
+{
+ ASSERT(!shouldFosterParent());
+ m_openElements.pushHTMLBodyElement(attachToCurrent(createHTMLElement(token)));
+}
+
+void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken& token, bool isDemoted)
+{
+ RefPtr<Element> element = createHTMLElement(token);
+ ASSERT(element->hasTagName(formTag));
+ RefPtr<HTMLFormElement> form = static_pointer_cast<HTMLFormElement>(element.release());
+ form->setDemoted(isDemoted);
+ m_openElements.push(attachToCurrent(form.release()));
+ ASSERT(currentElement()->isHTMLElement());
+ ASSERT(currentElement()->hasTagName(formTag));
+ m_form = static_cast<HTMLFormElement*>(currentElement());
+}
+
+void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken& token)
+{
+ m_openElements.push(attachToCurrent(createHTMLElement(token)));
+}
+
+void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLToken::StartTag);
+ RefPtr<Element> element = attachToCurrent(createHTMLElement(token));
+ // Normally HTMLElementStack is responsible for calling finishParsingChildren,
+ // but self-closing elements are never in the element stack so the stack
+ // doesn't get a chance to tell them that we're done parsing their children.
+ element->finishParsingChildren();
+ // FIXME: Do we want to acknowledge the token's self-closing flag?
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
+}
+
+void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken& token)
+{
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
+ // Possible active formatting elements include:
+ // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u.
+ insertHTMLElement(token);
+ m_activeFormattingElements.append(currentElement());
+}
+
+void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken& token)
+{
+ RefPtr<HTMLScriptElement> element = HTMLScriptElement::create(scriptTag, currentElement()->document(), true);
+ if (m_fragmentScriptingPermission == FragmentScriptingAllowed)
+ element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
+ m_openElements.push(attachToCurrent(element.release()));
+}
+
+void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken& token, const AtomicString& namespaceURI)
+{
+ ASSERT(token.type() == HTMLToken::StartTag);
+ notImplemented(); // parseError when xmlns or xmlns:xlink are wrong.
+
+ RefPtr<Element> element = attachToCurrent(createElement(token, namespaceURI));
+ if (!token.selfClosing())
+ m_openElements.push(element);
+}
+
+void HTMLConstructionSite::insertTextNode(const String& characters)
+{
+ AttachmentSite site;
+ site.parent = currentElement();
+ site.nextChild = 0;
+ if (shouldFosterParent())
+ findFosterSite(site);
+
+ Node* previousChild = site.nextChild ? site.nextChild->previousSibling() : site.parent->lastChild();
+ if (previousChild && previousChild->isTextNode()) {
+ // FIXME: We're only supposed to append to this text node if it
+ // was the last text node inserted by the parser.
+ CharacterData* textNode = static_cast<CharacterData*>(previousChild);
+ textNode->parserAppendData(characters);
+ return;
+ }
+
+ attachAtSite(site, Text::create(site.parent->document(), characters));
+}
+
+PassRefPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken& token, const AtomicString& namespaceURI)
+{
+ QualifiedName tagName(nullAtom, token.name(), namespaceURI);
+ RefPtr<Element> element = currentElement()->document()->createElement(tagName, true);
+ element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
+ return element.release();
+}
+
+PassRefPtr<Element> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken& token)
+{
+ QualifiedName tagName(nullAtom, token.name(), xhtmlNamespaceURI);
+ // FIXME: This can't use HTMLConstructionSite::createElement because we
+ // have to pass the current form element. We should rework form association
+ // to occur after construction to allow better code sharing here.
+ RefPtr<Element> element = HTMLElementFactory::createHTMLElement(tagName, currentElement()->document(), form(), true);
+ element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
+ ASSERT(element->isHTMLElement());
+ return element.release();
+}
+
+PassRefPtr<Element> HTMLConstructionSite::createHTMLElementFromElementRecord(HTMLElementStack::ElementRecord* record)
+{
+ return createHTMLElementFromSavedElement(record->element());
+}
+
+namespace {
+
+PassRefPtr<NamedNodeMap> cloneAttributes(Element* element)
+{
+ NamedNodeMap* attributes = element->attributes(true);
+ if (!attributes)
+ return 0;
+
+ RefPtr<NamedNodeMap> newAttributes = NamedNodeMap::create();
+ for (size_t i = 0; i < attributes->length(); ++i) {
+ Attribute* attribute = attributes->attributeItem(i);
+ RefPtr<Attribute> clone = Attribute::createMapped(attribute->name(), attribute->value());
+ newAttributes->addAttribute(clone);
+ }
+ return newAttributes.release();
+}
+
+}
+
+PassRefPtr<Element> HTMLConstructionSite::createHTMLElementFromSavedElement(Element* element)
+{
+ // FIXME: This method is wrong. We should be using the original token.
+ // Using an Element* causes us to fail examples like this:
+ // <b id="1"><p><script>document.getElementById("1").id = "2"</script></p>TEXT</b>
+ // When reconstructTheActiveFormattingElements calls this method to open
+ // a second <b> tag to wrap TEXT, it will have id "2", even though the HTML5
+ // spec implies it should be "1". Minefield matches the HTML5 spec here.
+
+ ASSERT(element->isHTMLElement()); // otherwise localName() might be wrong.
+ AtomicHTMLToken fakeToken(HTMLToken::StartTag, element->localName(), cloneAttributes(element));
+ return createHTMLElement(fakeToken);
+}
+
+bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const
+{
+ if (m_activeFormattingElements.isEmpty())
+ return false;
+ unsigned index = m_activeFormattingElements.size();
+ do {
+ --index;
+ const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index);
+ if (entry.isMarker() || m_openElements.contains(entry.element())) {
+ firstUnopenElementIndex = index + 1;
+ return firstUnopenElementIndex < m_activeFormattingElements.size();
+ }
+ } while (index);
+ firstUnopenElementIndex = index;
+ return true;
+}
+
+void HTMLConstructionSite::reconstructTheActiveFormattingElements()
+{
+ unsigned firstUnopenElementIndex;
+ if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex))
+ return;
+
+ unsigned unopenEntryIndex = firstUnopenElementIndex;
+ ASSERT(unopenEntryIndex < m_activeFormattingElements.size());
+ for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) {
+ HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex);
+ RefPtr<Element> reconstructed = createHTMLElementFromSavedElement(unopenedEntry.element());
+ m_openElements.push(attachToCurrent(reconstructed.release()));
+ unopenedEntry.replaceElement(currentElement());
+ }
+}
+
+void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName)
+{
+ while (hasImpliedEndTag(currentElement()) && !currentElement()->hasLocalName(tagName))
+ m_openElements.pop();
+}
+
+void HTMLConstructionSite::generateImpliedEndTags()
+{
+ while (hasImpliedEndTag(currentElement()))
+ m_openElements.pop();
+}
+
+void HTMLConstructionSite::findFosterSite(AttachmentSite& site)
+{
+ HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName());
+ if (lastTableElementRecord) {
+ Element* lastTableElement = lastTableElementRecord->element();
+ if (ContainerNode* parent = lastTableElement->parentNode()) {
+ site.parent = parent;
+ site.nextChild = lastTableElement;
+ return;
+ }
+ site.parent = lastTableElementRecord->next()->element();
+ site.nextChild = 0;
+ return;
+ }
+ // Fragment case
+ site.parent = m_openElements.bottom(); // <html> element
+ site.nextChild = 0;
+}
+
+bool HTMLConstructionSite::shouldFosterParent() const
+{
+ return m_redirectAttachToFosterParent
+ && causesFosterParenting(currentElement()->tagQName());
+}
+
+void HTMLConstructionSite::fosterParent(Node* node)
+{
+ AttachmentSite site;
+ findFosterSite(site);
+ attachAtSite(site, node);
+}
+
+}
diff --git a/Source/WebCore/html/parser/HTMLConstructionSite.h b/Source/WebCore/html/parser/HTMLConstructionSite.h
new file mode 100644
index 0000000..8b09bf5
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLConstructionSite.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLConstructionSite_h
+#define HTMLConstructionSite_h
+
+#include "FragmentScriptingPermission.h"
+#include "HTMLElementStack.h"
+#include "HTMLFormattingElementList.h"
+#include "NotImplemented.h"
+#include <wtf/Noncopyable.h>
+#include <wtf/PassRefPtr.h>
+#include <wtf/RefPtr.h>
+
+namespace WebCore {
+
+class AtomicHTMLToken;
+class Document;
+class Element;
+
+class HTMLConstructionSite : public Noncopyable {
+public:
+ HTMLConstructionSite(Document*, FragmentScriptingPermission, bool isParsingFragment);
+ ~HTMLConstructionSite();
+
+ void detach();
+
+ void insertDoctype(AtomicHTMLToken&);
+ void insertComment(AtomicHTMLToken&);
+ void insertCommentOnDocument(AtomicHTMLToken&);
+ void insertCommentOnHTMLHtmlElement(AtomicHTMLToken&);
+ void insertHTMLElement(AtomicHTMLToken&);
+ void insertSelfClosingHTMLElement(AtomicHTMLToken&);
+ void insertFormattingElement(AtomicHTMLToken&);
+ void insertHTMLHeadElement(AtomicHTMLToken&);
+ void insertHTMLBodyElement(AtomicHTMLToken&);
+ void insertHTMLFormElement(AtomicHTMLToken&, bool isDemoted = false);
+ void insertScriptElement(AtomicHTMLToken&);
+ void insertTextNode(const String&);
+ void insertForeignElement(AtomicHTMLToken&, const AtomicString& namespaceURI);
+
+ void insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken&);
+ void insertHTMLHtmlStartTagInBody(AtomicHTMLToken&);
+ void insertHTMLBodyStartTagInBody(AtomicHTMLToken&);
+
+ PassRefPtr<Element> createHTMLElement(AtomicHTMLToken&);
+ PassRefPtr<Element> createHTMLElementFromElementRecord(HTMLElementStack::ElementRecord*);
+
+ bool shouldFosterParent() const;
+ void fosterParent(Node*);
+
+ bool indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const;
+ void reconstructTheActiveFormattingElements();
+
+ void generateImpliedEndTags();
+ void generateImpliedEndTagsWithExclusion(const AtomicString& tagName);
+
+ Element* currentElement() const { return m_openElements.top(); }
+ Element* oneBelowTop() const { return m_openElements.oneBelowTop(); }
+
+ HTMLElementStack* openElements() const { return &m_openElements; }
+ HTMLFormattingElementList* activeFormattingElements() const { return &m_activeFormattingElements; }
+
+ Element* head() const { return m_head.get(); }
+
+ void setForm(HTMLFormElement*);
+ HTMLFormElement* form() const { return m_form.get(); }
+ PassRefPtr<HTMLFormElement> takeForm();
+
+ class RedirectToFosterParentGuard : public Noncopyable {
+ public:
+ RedirectToFosterParentGuard(HTMLConstructionSite& tree)
+ : m_tree(tree)
+ , m_wasRedirectingBefore(tree.m_redirectAttachToFosterParent)
+ {
+ m_tree.m_redirectAttachToFosterParent = true;
+ }
+
+ ~RedirectToFosterParentGuard()
+ {
+ m_tree.m_redirectAttachToFosterParent = m_wasRedirectingBefore;
+ }
+
+ private:
+ HTMLConstructionSite& m_tree;
+ bool m_wasRedirectingBefore;
+ };
+
+private:
+ struct AttachmentSite {
+ ContainerNode* parent;
+ Node* nextChild;
+ };
+
+ template<typename ChildType>
+ PassRefPtr<ChildType> attach(ContainerNode* parent, PassRefPtr<ChildType> child);
+ PassRefPtr<Element> attachToCurrent(PassRefPtr<Element>);
+
+ void attachAtSite(const AttachmentSite&, PassRefPtr<Node> child);
+ void findFosterSite(AttachmentSite&);
+
+ PassRefPtr<Element> createHTMLElementFromSavedElement(Element*);
+ PassRefPtr<Element> createElement(AtomicHTMLToken&, const AtomicString& namespaceURI);
+
+ void mergeAttributesFromTokenIntoElement(AtomicHTMLToken&, Element*);
+ void dispatchDocumentElementAvailableIfNeeded();
+
+ Document* m_document;
+ RefPtr<Element> m_head;
+ RefPtr<HTMLFormElement> m_form;
+ mutable HTMLElementStack m_openElements;
+ mutable HTMLFormattingElementList m_activeFormattingElements;
+
+ FragmentScriptingPermission m_fragmentScriptingPermission;
+ bool m_isParsingFragment;
+
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-intable
+ // In the "in table" insertion mode, we sometimes get into a state where
+ // "whenever a node would be inserted into the current node, it must instead
+ // be foster parented." This flag tracks whether we're in that state.
+ bool m_redirectAttachToFosterParent;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLDocumentParser.cpp b/Source/WebCore/html/parser/HTMLDocumentParser.cpp
new file mode 100644
index 0000000..93e1309
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLDocumentParser.cpp
@@ -0,0 +1,549 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "HTMLDocumentParser.h"
+
+#include "DocumentFragment.h"
+#include "Element.h"
+#include "Frame.h"
+#include "HTMLNames.h"
+#include "HTMLParserScheduler.h"
+#include "HTMLTokenizer.h"
+#include "HTMLPreloadScanner.h"
+#include "HTMLScriptRunner.h"
+#include "HTMLTreeBuilder.h"
+#include "HTMLDocument.h"
+#include "InspectorInstrumentation.h"
+#include "NestingLevelIncrementer.h"
+#include "Settings.h"
+#include "XSSAuditor.h"
+#include <wtf/CurrentTime.h>
+
+#ifdef ANDROID_INSTRUMENT
+#include "TimeCounter.h"
+#endif
+
+namespace WebCore {
+
+using namespace HTMLNames;
+
+namespace {
+
+// This is a direct transcription of step 4 from:
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
+HTMLTokenizer::State tokenizerStateForContextElement(Element* contextElement, bool reportErrors)
+{
+ if (!contextElement)
+ return HTMLTokenizer::DataState;
+
+ const QualifiedName& contextTag = contextElement->tagQName();
+
+ if (contextTag.matches(titleTag) || contextTag.matches(textareaTag))
+ return HTMLTokenizer::RCDATAState;
+ if (contextTag.matches(styleTag)
+ || contextTag.matches(xmpTag)
+ || contextTag.matches(iframeTag)
+ || (contextTag.matches(noembedTag) && HTMLTreeBuilder::pluginsEnabled(contextElement->document()->frame()))
+ || (contextTag.matches(noscriptTag) && HTMLTreeBuilder::scriptEnabled(contextElement->document()->frame()))
+ || contextTag.matches(noframesTag))
+ return reportErrors ? HTMLTokenizer::RAWTEXTState : HTMLTokenizer::PLAINTEXTState;
+ if (contextTag.matches(scriptTag))
+ return reportErrors ? HTMLTokenizer::ScriptDataState : HTMLTokenizer::PLAINTEXTState;
+ if (contextTag.matches(plaintextTag))
+ return HTMLTokenizer::PLAINTEXTState;
+ return HTMLTokenizer::DataState;
+}
+
+} // namespace
+
+HTMLDocumentParser::HTMLDocumentParser(HTMLDocument* document, bool reportErrors)
+ : ScriptableDocumentParser(document)
+ , m_tokenizer(HTMLTokenizer::create(usePreHTML5ParserQuirks(document)))
+ , m_scriptRunner(HTMLScriptRunner::create(document, this))
+ , m_treeBuilder(HTMLTreeBuilder::create(this, document, reportErrors, usePreHTML5ParserQuirks(document)))
+ , m_parserScheduler(HTMLParserScheduler::create(this))
+ , m_endWasDelayed(false)
+ , m_writeNestingLevel(0)
+{
+}
+
+// FIXME: Member variables should be grouped into self-initializing structs to
+// minimize code duplication between these constructors.
+HTMLDocumentParser::HTMLDocumentParser(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
+ : ScriptableDocumentParser(fragment->document())
+ , m_tokenizer(HTMLTokenizer::create(usePreHTML5ParserQuirks(fragment->document())))
+ , m_treeBuilder(HTMLTreeBuilder::create(this, fragment, contextElement, scriptingPermission, usePreHTML5ParserQuirks(fragment->document())))
+ , m_endWasDelayed(false)
+ , m_writeNestingLevel(0)
+{
+ bool reportErrors = false; // For now document fragment parsing never reports errors.
+ m_tokenizer->setState(tokenizerStateForContextElement(contextElement, reportErrors));
+}
+
+HTMLDocumentParser::~HTMLDocumentParser()
+{
+ ASSERT(!m_parserScheduler);
+ ASSERT(!m_writeNestingLevel);
+ ASSERT(!m_preloadScanner);
+}
+
+void HTMLDocumentParser::detach()
+{
+ DocumentParser::detach();
+ if (m_scriptRunner)
+ m_scriptRunner->detach();
+ m_treeBuilder->detach();
+ // FIXME: It seems wrong that we would have a preload scanner here.
+ // Yet during fast/dom/HTMLScriptElement/script-load-events.html we do.
+ m_preloadScanner.clear();
+ m_parserScheduler.clear(); // Deleting the scheduler will clear any timers.
+}
+
+void HTMLDocumentParser::stopParsing()
+{
+ DocumentParser::stopParsing();
+ m_parserScheduler.clear(); // Deleting the scheduler will clear any timers.
+}
+
+// This kicks off "Once the user agent stops parsing" as described by:
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#the-end
+void HTMLDocumentParser::prepareToStopParsing()
+{
+ ASSERT(!hasInsertionPoint());
+
+ // pumpTokenizer can cause this parser to be detached from the Document,
+ // but we need to ensure it isn't deleted yet.
+ RefPtr<HTMLDocumentParser> protect(this);
+
+ // NOTE: This pump should only ever emit buffered character tokens,
+ // so ForceSynchronous vs. AllowYield should be meaningless.
+ pumpTokenizerIfPossible(ForceSynchronous);
+
+ if (isStopped())
+ return;
+
+ DocumentParser::prepareToStopParsing();
+
+ // We will not have a scriptRunner when parsing a DocumentFragment.
+ if (m_scriptRunner)
+ document()->setReadyState(Document::Interactive);
+
+ attemptToRunDeferredScriptsAndEnd();
+}
+
+bool HTMLDocumentParser::processingData() const
+{
+ return isScheduledForResume() || inWrite();
+}
+
+void HTMLDocumentParser::pumpTokenizerIfPossible(SynchronousMode mode)
+{
+ if (isStopped() || m_treeBuilder->isPaused())
+ return;
+
+ // Once a resume is scheduled, HTMLParserScheduler controls when we next pump.
+ if (isScheduledForResume()) {
+ ASSERT(mode == AllowYield);
+ return;
+ }
+
+ pumpTokenizer(mode);
+}
+
+bool HTMLDocumentParser::isScheduledForResume() const
+{
+ return m_parserScheduler && m_parserScheduler->isScheduledForResume();
+}
+
+// Used by HTMLParserScheduler
+void HTMLDocumentParser::resumeParsingAfterYield()
+{
+ // pumpTokenizer can cause this parser to be detached from the Document,
+ // but we need to ensure it isn't deleted yet.
+ RefPtr<HTMLDocumentParser> protect(this);
+
+ // We should never be here unless we can pump immediately. Call pumpTokenizer()
+ // directly so that ASSERTS will fire if we're wrong.
+ pumpTokenizer(AllowYield);
+ endIfDelayed();
+}
+
+bool HTMLDocumentParser::runScriptsForPausedTreeBuilder()
+{
+ ASSERT(m_treeBuilder->isPaused());
+
+ TextPosition1 scriptStartPosition = TextPosition1::belowRangePosition();
+ RefPtr<Element> scriptElement = m_treeBuilder->takeScriptToProcess(scriptStartPosition);
+ // We will not have a scriptRunner when parsing a DocumentFragment.
+ if (!m_scriptRunner)
+ return true;
+ return m_scriptRunner->execute(scriptElement.release(), scriptStartPosition);
+}
+
+void HTMLDocumentParser::pumpTokenizer(SynchronousMode mode)
+{
+ ASSERT(!isStopped());
+ ASSERT(!m_treeBuilder->isPaused());
+ ASSERT(!isScheduledForResume());
+ // ASSERT that this object is both attached to the Document and protected.
+ ASSERT(refCount() >= 2);
+
+ // We tell the InspectorInstrumentation about every pump, even if we
+ // end up pumping nothing. It can filter out empty pumps itself.
+ // FIXME: m_input.current().length() is only accurate if we
+ // end up parsing the whole buffer in this pump. We should pass how
+ // much we parsed as part of didWriteHTML instead of willWriteHTML.
+ InspectorInstrumentationCookie cookie = InspectorInstrumentation::willWriteHTML(document(), m_input.current().length(), m_tokenizer->lineNumber());
+
+ HTMLParserScheduler::PumpSession session;
+ // FIXME: This loop body has is now too long and needs cleanup.
+ while (mode == ForceSynchronous || m_parserScheduler->shouldContinueParsing(session)) {
+ // FIXME: It's wrong for the HTMLDocumentParser to reach back to the
+ // Frame, but this approach is how the old parser handled
+ // stopping when the page assigns window.location. What really
+ // should happen is that assigning window.location causes the
+ // parser to stop parsing cleanly. The problem is we're not
+ // perpared to do that at every point where we run JavaScript.
+ if (!m_treeBuilder->isParsingFragment()
+ && document()->frame() && document()->frame()->navigationScheduler()->locationChangePending())
+ break;
+ if (!m_tokenizer->nextToken(m_input.current(), m_token))
+ break;
+
+ m_treeBuilder->constructTreeFromToken(m_token);
+ m_token.clear();
+
+ // JavaScript may have stopped or detached the parser.
+ if (isStopped())
+ return;
+
+ // The parser will pause itself when waiting on a script to load or run.
+ if (!m_treeBuilder->isPaused())
+ continue;
+
+ // If we're paused waiting for a script, we try to execute scripts before continuing.
+ bool shouldContinueParsing = runScriptsForPausedTreeBuilder();
+ m_treeBuilder->setPaused(!shouldContinueParsing);
+
+ // JavaScript may have stopped or detached the parser.
+ if (isStopped())
+ return;
+
+ if (!shouldContinueParsing)
+ break;
+ }
+
+ // Ensure we haven't been totally deref'ed after pumping. Any caller of this
+ // function should be holding a RefPtr to this to ensure we weren't deleted.
+ ASSERT(refCount() >= 1);
+
+ if (isWaitingForScripts()) {
+ ASSERT(m_tokenizer->state() == HTMLTokenizer::DataState);
+ if (!m_preloadScanner) {
+ m_preloadScanner.set(new HTMLPreloadScanner(document()));
+ m_preloadScanner->appendToEnd(m_input.current());
+ }
+ m_preloadScanner->scan();
+ }
+
+ InspectorInstrumentation::didWriteHTML(cookie, m_tokenizer->lineNumber());
+}
+
+bool HTMLDocumentParser::hasInsertionPoint()
+{
+ return m_input.hasInsertionPoint();
+}
+
+void HTMLDocumentParser::insert(const SegmentedString& source)
+{
+ if (isStopped())
+ return;
+
+#ifdef ANDROID_INSTRUMENT
+ android::TimeCounter::start(android::TimeCounter::ParsingTimeCounter);
+#endif
+
+ // pumpTokenizer can cause this parser to be detached from the Document,
+ // but we need to ensure it isn't deleted yet.
+ RefPtr<HTMLDocumentParser> protect(this);
+
+ {
+ NestingLevelIncrementer nestingLevelIncrementer(m_writeNestingLevel);
+
+ SegmentedString excludedLineNumberSource(source);
+ excludedLineNumberSource.setExcludeLineNumbers();
+ m_input.insertAtCurrentInsertionPoint(excludedLineNumberSource);
+ pumpTokenizerIfPossible(ForceSynchronous);
+ }
+
+ endIfDelayed();
+}
+
+void HTMLDocumentParser::append(const SegmentedString& source)
+{
+ if (isStopped())
+ return;
+
+ // pumpTokenizer can cause this parser to be detached from the Document,
+ // but we need to ensure it isn't deleted yet.
+ RefPtr<HTMLDocumentParser> protect(this);
+
+ {
+ NestingLevelIncrementer nestingLevelIncrementer(m_writeNestingLevel);
+
+ m_input.appendToEnd(source);
+ if (m_preloadScanner)
+ m_preloadScanner->appendToEnd(source);
+
+ if (m_writeNestingLevel > 1) {
+ // We've gotten data off the network in a nested write.
+ // We don't want to consume any more of the input stream now. Do
+ // not worry. We'll consume this data in a less-nested write().
+#ifdef ANDROID_INSTRUMENT
+ android::TimeCounter::record(android::TimeCounter::ParsingTimeCounter, __FUNCTION__);
+#endif
+ return;
+ }
+
+ pumpTokenizerIfPossible(AllowYield);
+ }
+
+ endIfDelayed();
+#ifdef ANDROID_INSTRUMENT
+ android::TimeCounter::record(android::TimeCounter::ParsingTimeCounter, __FUNCTION__);
+#endif
+}
+
+void HTMLDocumentParser::end()
+{
+ ASSERT(!isDetached());
+ ASSERT(!isScheduledForResume());
+
+ // Informs the the rest of WebCore that parsing is really finished (and deletes this).
+ m_treeBuilder->finished();
+}
+
+void HTMLDocumentParser::attemptToRunDeferredScriptsAndEnd()
+{
+ ASSERT(isStopping());
+ ASSERT(!hasInsertionPoint());
+ if (m_scriptRunner && !m_scriptRunner->executeScriptsWaitingForParsing())
+ return;
+ end();
+}
+
+void HTMLDocumentParser::attemptToEnd()
+{
+ // finish() indicates we will not receive any more data. If we are waiting on
+ // an external script to load, we can't finish parsing quite yet.
+
+ if (shouldDelayEnd()) {
+ m_endWasDelayed = true;
+ return;
+ }
+ prepareToStopParsing();
+}
+
+void HTMLDocumentParser::endIfDelayed()
+{
+ // If we've already been detached, don't bother ending.
+ if (isDetached())
+ return;
+
+ if (!m_endWasDelayed || shouldDelayEnd())
+ return;
+
+ m_endWasDelayed = false;
+ prepareToStopParsing();
+}
+
+void HTMLDocumentParser::finish()
+{
+ // FIXME: We should ASSERT(!m_parserStopped) here, since it does not
+ // makes sense to call any methods on DocumentParser once it's been stopped.
+ // However, FrameLoader::stop calls Document::finishParsing unconditionally
+ // which in turn calls m_parser->finish().
+
+ // We're not going to get any more data off the network, so we tell the
+ // input stream we've reached the end of file. finish() can be called more
+ // than once, if the first time does not call end().
+ if (!m_input.haveSeenEndOfFile())
+ m_input.markEndOfFile();
+ attemptToEnd();
+}
+
+bool HTMLDocumentParser::finishWasCalled()
+{
+ return m_input.haveSeenEndOfFile();
+}
+
+// This function is virtual and just for the DocumentParser interface.
+bool HTMLDocumentParser::isExecutingScript() const
+{
+ return inScriptExecution();
+}
+
+// This function is non-virtual and used throughout the implementation.
+bool HTMLDocumentParser::inScriptExecution() const
+{
+ if (!m_scriptRunner)
+ return false;
+ return m_scriptRunner->isExecutingScript();
+}
+
+int HTMLDocumentParser::lineNumber() const
+{
+ return m_tokenizer->lineNumber();
+}
+
+TextPosition0 HTMLDocumentParser::textPosition() const
+{
+ const SegmentedString& currentString = m_input.current();
+ WTF::ZeroBasedNumber line = currentString.currentLine();
+ WTF::ZeroBasedNumber column = currentString.currentColumn();
+ ASSERT(m_tokenizer->lineNumber() == line.zeroBasedInt());
+
+ return TextPosition0(line, column);
+}
+
+bool HTMLDocumentParser::isWaitingForScripts() const
+{
+ return m_treeBuilder->isPaused();
+}
+
+void HTMLDocumentParser::resumeParsingAfterScriptExecution()
+{
+ ASSERT(!inScriptExecution());
+ ASSERT(!m_treeBuilder->isPaused());
+
+ m_preloadScanner.clear();
+ pumpTokenizerIfPossible(AllowYield);
+ endIfDelayed();
+}
+
+void HTMLDocumentParser::watchForLoad(CachedResource* cachedScript)
+{
+ ASSERT(!cachedScript->isLoaded());
+ // addClient would call notifyFinished if the load were complete.
+ // Callers do not expect to be re-entered from this call, so they should
+ // not an already-loaded CachedResource.
+ cachedScript->addClient(this);
+}
+
+void HTMLDocumentParser::stopWatchingForLoad(CachedResource* cachedScript)
+{
+ cachedScript->removeClient(this);
+}
+
+bool HTMLDocumentParser::shouldLoadExternalScriptFromSrc(const AtomicString& srcValue)
+{
+ if (!xssAuditor())
+ return true;
+ return xssAuditor()->canLoadExternalScriptFromSrc(srcValue);
+}
+
+void HTMLDocumentParser::notifyFinished(CachedResource* cachedResource)
+{
+ // pumpTokenizer can cause this parser to be detached from the Document,
+ // but we need to ensure it isn't deleted yet.
+ RefPtr<HTMLDocumentParser> protect(this);
+
+ ASSERT(m_scriptRunner);
+ ASSERT(!inScriptExecution());
+ if (isStopping()) {
+ attemptToRunDeferredScriptsAndEnd();
+ return;
+ }
+
+ ASSERT(m_treeBuilder->isPaused());
+ // Note: We only ever wait on one script at a time, so we always know this
+ // is the one we were waiting on and can un-pause the tree builder.
+ m_treeBuilder->setPaused(false);
+ bool shouldContinueParsing = m_scriptRunner->executeScriptsWaitingForLoad(cachedResource);
+ m_treeBuilder->setPaused(!shouldContinueParsing);
+ if (shouldContinueParsing)
+ resumeParsingAfterScriptExecution();
+}
+
+void HTMLDocumentParser::executeScriptsWaitingForStylesheets()
+{
+ // Document only calls this when the Document owns the DocumentParser
+ // so this will not be called in the DocumentFragment case.
+ ASSERT(m_scriptRunner);
+ // Ignore calls unless we have a script blocking the parser waiting on a
+ // stylesheet load. Otherwise we are currently parsing and this
+ // is a re-entrant call from encountering a </ style> tag.
+ if (!m_scriptRunner->hasScriptsWaitingForStylesheets())
+ return;
+
+ // pumpTokenizer can cause this parser to be detached from the Document,
+ // but we need to ensure it isn't deleted yet.
+ RefPtr<HTMLDocumentParser> protect(this);
+
+ ASSERT(!m_scriptRunner->isExecutingScript());
+ ASSERT(m_treeBuilder->isPaused());
+ // Note: We only ever wait on one script at a time, so we always know this
+ // is the one we were waiting on and can un-pause the tree builder.
+ m_treeBuilder->setPaused(false);
+ bool shouldContinueParsing = m_scriptRunner->executeScriptsWaitingForStylesheets();
+ m_treeBuilder->setPaused(!shouldContinueParsing);
+ if (shouldContinueParsing)
+ resumeParsingAfterScriptExecution();
+}
+
+ScriptController* HTMLDocumentParser::script() const
+{
+ return document()->frame() ? document()->frame()->script() : 0;
+}
+
+void HTMLDocumentParser::parseDocumentFragment(const String& source, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
+{
+ RefPtr<HTMLDocumentParser> parser = HTMLDocumentParser::create(fragment, contextElement, scriptingPermission);
+ parser->insert(source); // Use insert() so that the parser will not yield.
+ parser->finish();
+ ASSERT(!parser->processingData()); // Make sure we're done. <rdar://problem/3963151>
+ parser->detach(); // Allows ~DocumentParser to assert it was detached before destruction.
+}
+
+bool HTMLDocumentParser::usePreHTML5ParserQuirks(Document* document)
+{
+ ASSERT(document);
+ return document->settings() && document->settings()->usePreHTML5ParserQuirks();
+}
+
+void HTMLDocumentParser::suspendScheduledTasks()
+{
+ if (m_parserScheduler)
+ m_parserScheduler->suspend();
+}
+
+void HTMLDocumentParser::resumeScheduledTasks()
+{
+ if (m_parserScheduler)
+ m_parserScheduler->resume();
+}
+
+}
diff --git a/Source/WebCore/html/parser/HTMLDocumentParser.h b/Source/WebCore/html/parser/HTMLDocumentParser.h
new file mode 100644
index 0000000..80ca727
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLDocumentParser.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLDocumentParser_h
+#define HTMLDocumentParser_h
+
+#include "CachedResourceClient.h"
+#include "FragmentScriptingPermission.h"
+#include "HTMLInputStream.h"
+#include "HTMLScriptRunnerHost.h"
+#include "HTMLToken.h"
+#include "ScriptableDocumentParser.h"
+#include "SegmentedString.h"
+#include "Timer.h"
+#include <wtf/OwnPtr.h>
+
+namespace WebCore {
+
+class Document;
+class DocumentFragment;
+class HTMLDocument;
+class HTMLParserScheduler;
+class HTMLTokenizer;
+class HTMLScriptRunner;
+class HTMLTreeBuilder;
+class HTMLPreloadScanner;
+class ScriptController;
+class ScriptSourceCode;
+
+class HTMLDocumentParser : public ScriptableDocumentParser, HTMLScriptRunnerHost, CachedResourceClient {
+public:
+ static PassRefPtr<HTMLDocumentParser> create(HTMLDocument* document, bool reportErrors)
+ {
+ return adoptRef(new HTMLDocumentParser(document, reportErrors));
+ }
+ static PassRefPtr<HTMLDocumentParser> create(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission permission)
+ {
+ return adoptRef(new HTMLDocumentParser(fragment, contextElement, permission));
+ }
+
+ virtual ~HTMLDocumentParser();
+
+ // Exposed for HTMLParserScheduler
+ void resumeParsingAfterYield();
+
+ static void parseDocumentFragment(const String&, DocumentFragment*, Element* contextElement, FragmentScriptingPermission = FragmentScriptingAllowed);
+
+ static bool usePreHTML5ParserQuirks(Document*);
+
+ HTMLTokenizer* tokenizer() const { return m_tokenizer.get(); }
+
+ virtual TextPosition0 textPosition() const;
+ virtual void suspendScheduledTasks();
+ virtual void resumeScheduledTasks();
+
+protected:
+ virtual void insert(const SegmentedString&);
+ virtual void append(const SegmentedString&);
+ virtual void finish();
+
+ HTMLDocumentParser(HTMLDocument*, bool reportErrors);
+ HTMLDocumentParser(DocumentFragment*, Element* contextElement, FragmentScriptingPermission);
+
+ HTMLTreeBuilder* treeBuilder() const { return m_treeBuilder.get(); }
+
+private:
+ // DocumentParser
+ virtual void detach();
+ virtual bool hasInsertionPoint();
+ virtual bool finishWasCalled();
+ virtual bool processingData() const;
+ virtual void prepareToStopParsing();
+ virtual void stopParsing();
+ virtual bool isWaitingForScripts() const;
+ virtual bool isExecutingScript() const;
+ virtual void executeScriptsWaitingForStylesheets();
+ virtual int lineNumber() const;
+
+ // HTMLScriptRunnerHost
+ virtual void watchForLoad(CachedResource*);
+ virtual void stopWatchingForLoad(CachedResource*);
+ virtual bool shouldLoadExternalScriptFromSrc(const AtomicString&);
+ virtual HTMLInputStream& inputStream() { return m_input; }
+
+ // CachedResourceClient
+ virtual void notifyFinished(CachedResource*);
+
+ enum SynchronousMode {
+ AllowYield,
+ ForceSynchronous,
+ };
+ void pumpTokenizer(SynchronousMode);
+ void pumpTokenizerIfPossible(SynchronousMode);
+
+ bool runScriptsForPausedTreeBuilder();
+ void resumeParsingAfterScriptExecution();
+
+ void begin();
+ void attemptToEnd();
+ void endIfDelayed();
+ void attemptToRunDeferredScriptsAndEnd();
+ void end();
+
+ bool isScheduledForResume() const;
+ bool inScriptExecution() const;
+ bool inWrite() const { return m_writeNestingLevel > 0; }
+ bool shouldDelayEnd() const { return inWrite() || isWaitingForScripts() || inScriptExecution() || isScheduledForResume(); }
+
+ ScriptController* script() const;
+
+ HTMLInputStream m_input;
+
+ // We hold m_token here because it might be partially complete.
+ HTMLToken m_token;
+
+ OwnPtr<HTMLTokenizer> m_tokenizer;
+ OwnPtr<HTMLScriptRunner> m_scriptRunner;
+ OwnPtr<HTMLTreeBuilder> m_treeBuilder;
+ OwnPtr<HTMLPreloadScanner> m_preloadScanner;
+ OwnPtr<HTMLParserScheduler> m_parserScheduler;
+
+ bool m_endWasDelayed;
+ unsigned m_writeNestingLevel;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLElementStack.cpp b/Source/WebCore/html/parser/HTMLElementStack.cpp
new file mode 100644
index 0000000..6aab0f7
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLElementStack.cpp
@@ -0,0 +1,569 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "HTMLElementStack.h"
+
+#include "Element.h"
+#include "HTMLNames.h"
+#include "MathMLNames.h"
+#include "SVGNames.h"
+#include <wtf/PassOwnPtr.h>
+
+namespace WebCore {
+
+using namespace HTMLNames;
+
+namespace {
+
+inline bool isNumberedHeaderElement(Element* element)
+{
+ return element->hasTagName(h1Tag)
+ || element->hasTagName(h2Tag)
+ || element->hasTagName(h3Tag)
+ || element->hasTagName(h4Tag)
+ || element->hasTagName(h5Tag)
+ || element->hasTagName(h6Tag);
+}
+
+inline bool isScopeMarker(Element* element)
+{
+ return element->hasTagName(appletTag)
+ || element->hasTagName(captionTag)
+ || element->hasTagName(htmlTag)
+ || element->hasTagName(marqueeTag)
+ || element->hasTagName(objectTag)
+ || element->hasTagName(tableTag)
+ || element->hasTagName(tdTag)
+ || element->hasTagName(thTag)
+ || element->hasTagName(MathMLNames::miTag)
+ || element->hasTagName(MathMLNames::moTag)
+ || element->hasTagName(MathMLNames::mnTag)
+ || element->hasTagName(MathMLNames::msTag)
+ || element->hasTagName(MathMLNames::mtextTag)
+ || element->hasTagName(MathMLNames::annotation_xmlTag)
+ || element->hasTagName(SVGNames::foreignObjectTag)
+ || element->hasTagName(SVGNames::descTag)
+ || element->hasTagName(SVGNames::titleTag);
+}
+
+inline bool isListItemScopeMarker(Element* element)
+{
+ return isScopeMarker(element)
+ || element->hasTagName(olTag)
+ || element->hasTagName(ulTag);
+}
+
+inline bool isTableScopeMarker(Element* element)
+{
+ return element->hasTagName(tableTag)
+ || element->hasTagName(htmlTag);
+}
+
+inline bool isTableBodyScopeMarker(Element* element)
+{
+ return element->hasTagName(tbodyTag)
+ || element->hasTagName(tfootTag)
+ || element->hasTagName(theadTag)
+ || element->hasTagName(htmlTag);
+}
+
+inline bool isTableRowScopeMarker(Element* element)
+{
+ return element->hasTagName(trTag)
+ || element->hasTagName(htmlTag);
+}
+
+inline bool isForeignContentScopeMarker(Element* element)
+{
+ return element->hasTagName(MathMLNames::miTag)
+ || element->hasTagName(MathMLNames::moTag)
+ || element->hasTagName(MathMLNames::mnTag)
+ || element->hasTagName(MathMLNames::msTag)
+ || element->hasTagName(MathMLNames::mtextTag)
+ || element->hasTagName(SVGNames::foreignObjectTag)
+ || element->hasTagName(SVGNames::descTag)
+ || element->hasTagName(SVGNames::titleTag)
+ || element->namespaceURI() == HTMLNames::xhtmlNamespaceURI;
+}
+
+inline bool isButtonScopeMarker(Element* element)
+{
+ return isScopeMarker(element)
+ || element->hasTagName(buttonTag);
+}
+
+inline bool isSelectScopeMarker(Element* element)
+{
+ return !element->hasTagName(optgroupTag)
+ && !element->hasTagName(optionTag);
+}
+
+}
+
+HTMLElementStack::ElementRecord::ElementRecord(PassRefPtr<Element> element, PassOwnPtr<ElementRecord> next)
+ : m_element(element)
+ , m_next(next)
+{
+ ASSERT(m_element);
+}
+
+HTMLElementStack::ElementRecord::~ElementRecord()
+{
+}
+
+void HTMLElementStack::ElementRecord::replaceElement(PassRefPtr<Element> element)
+{
+ ASSERT(element);
+ // FIXME: Should this call finishParsingChildren?
+ m_element = element;
+}
+
+bool HTMLElementStack::ElementRecord::isAbove(ElementRecord* other) const
+{
+ for (ElementRecord* below = next(); below; below = below->next()) {
+ if (below == other)
+ return true;
+ }
+ return false;
+}
+
+HTMLElementStack::HTMLElementStack()
+ : m_htmlElement(0)
+ , m_headElement(0)
+ , m_bodyElement(0)
+{
+}
+
+HTMLElementStack::~HTMLElementStack()
+{
+}
+
+bool HTMLElementStack::hasOnlyOneElement() const
+{
+ return !topRecord()->next();
+}
+
+bool HTMLElementStack::secondElementIsHTMLBodyElement() const
+{
+ // This is used the fragment case of <body> and <frameset> in the "in body"
+ // insertion mode.
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
+ ASSERT(m_htmlElement);
+ // If we have a body element, it must always be the second element on the
+ // stack, as we always start with an html element, and any other element
+ // would cause the implicit creation of a body element.
+ return !!m_bodyElement;
+}
+
+void HTMLElementStack::popHTMLHeadElement()
+{
+ ASSERT(top() == m_headElement);
+ m_headElement = 0;
+ popCommon();
+}
+
+void HTMLElementStack::popHTMLBodyElement()
+{
+ ASSERT(top() == m_bodyElement);
+ m_bodyElement = 0;
+ popCommon();
+}
+
+void HTMLElementStack::popAll()
+{
+ m_htmlElement = 0;
+ m_headElement = 0;
+ m_bodyElement = 0;
+ while (m_top) {
+ top()->finishParsingChildren();
+ m_top = m_top->releaseNext();
+ }
+}
+
+void HTMLElementStack::pop()
+{
+ ASSERT(!top()->hasTagName(HTMLNames::headTag));
+ popCommon();
+}
+
+void HTMLElementStack::popUntil(const AtomicString& tagName)
+{
+ while (!top()->hasLocalName(tagName)) {
+ // pop() will ASSERT at <body> if callers fail to check that there is an
+ // element with localName |tagName| on the stack of open elements.
+ pop();
+ }
+}
+
+void HTMLElementStack::popUntilPopped(const AtomicString& tagName)
+{
+ popUntil(tagName);
+ pop();
+}
+
+void HTMLElementStack::popUntilNumberedHeaderElementPopped()
+{
+ while (!isNumberedHeaderElement(top()))
+ pop();
+ pop();
+}
+
+void HTMLElementStack::popUntil(Element* element)
+{
+ while (top() != element)
+ pop();
+}
+
+void HTMLElementStack::popUntilPopped(Element* element)
+{
+ popUntil(element);
+ pop();
+}
+
+void HTMLElementStack::popUntilTableScopeMarker()
+{
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#clear-the-stack-back-to-a-table-context
+ while (!isTableScopeMarker(top()))
+ pop();
+}
+
+void HTMLElementStack::popUntilTableBodyScopeMarker()
+{
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#clear-the-stack-back-to-a-table-body-context
+ while (!isTableBodyScopeMarker(top()))
+ pop();
+}
+
+void HTMLElementStack::popUntilTableRowScopeMarker()
+{
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#clear-the-stack-back-to-a-table-row-context
+ while (!isTableRowScopeMarker(top()))
+ pop();
+}
+
+void HTMLElementStack::popUntilForeignContentScopeMarker()
+{
+ while (!isForeignContentScopeMarker(top()))
+ pop();
+}
+
+void HTMLElementStack::pushHTMLHtmlElement(PassRefPtr<Element> element)
+{
+ ASSERT(!m_top); // <html> should always be the bottom of the stack.
+ ASSERT(element->hasTagName(HTMLNames::htmlTag));
+ ASSERT(!m_htmlElement);
+ m_htmlElement = element.get();
+ pushCommon(element);
+}
+
+void HTMLElementStack::pushHTMLHeadElement(PassRefPtr<Element> element)
+{
+ ASSERT(element->hasTagName(HTMLNames::headTag));
+ ASSERT(!m_headElement);
+ m_headElement = element.get();
+ pushCommon(element);
+}
+
+void HTMLElementStack::pushHTMLBodyElement(PassRefPtr<Element> element)
+{
+ ASSERT(element->hasTagName(HTMLNames::bodyTag));
+ ASSERT(!m_bodyElement);
+ m_bodyElement = element.get();
+ pushCommon(element);
+}
+
+void HTMLElementStack::push(PassRefPtr<Element> element)
+{
+ ASSERT(!element->hasTagName(HTMLNames::htmlTag));
+ ASSERT(!element->hasTagName(HTMLNames::headTag));
+ ASSERT(!element->hasTagName(HTMLNames::bodyTag));
+ ASSERT(m_htmlElement);
+ pushCommon(element);
+}
+
+void HTMLElementStack::insertAbove(PassRefPtr<Element> element, ElementRecord* recordBelow)
+{
+ ASSERT(element);
+ ASSERT(recordBelow);
+ ASSERT(m_top);
+ ASSERT(!element->hasTagName(HTMLNames::htmlTag));
+ ASSERT(!element->hasTagName(HTMLNames::headTag));
+ ASSERT(!element->hasTagName(HTMLNames::bodyTag));
+ ASSERT(m_htmlElement);
+ if (recordBelow == m_top) {
+ push(element);
+ return;
+ }
+
+ for (ElementRecord* recordAbove = m_top.get(); recordAbove; recordAbove = recordAbove->next()) {
+ if (recordAbove->next() != recordBelow)
+ continue;
+
+ recordAbove->setNext(adoptPtr(new ElementRecord(element, recordAbove->releaseNext())));
+ recordAbove->next()->element()->beginParsingChildren();
+ return;
+ }
+ ASSERT_NOT_REACHED();
+}
+
+HTMLElementStack::ElementRecord* HTMLElementStack::topRecord() const
+{
+ ASSERT(m_top);
+ return m_top.get();
+}
+
+Element* HTMLElementStack::oneBelowTop() const
+{
+ // We should never be calling this if it could be 0.
+ ASSERT(m_top);
+ ASSERT(m_top->next());
+ return m_top->next()->element();
+}
+
+Element* HTMLElementStack::bottom() const
+{
+ return htmlElement();
+}
+
+void HTMLElementStack::removeHTMLHeadElement(Element* element)
+{
+ ASSERT(m_headElement == element);
+ if (m_top->element() == element) {
+ popHTMLHeadElement();
+ return;
+ }
+ m_headElement = 0;
+ removeNonTopCommon(element);
+}
+
+void HTMLElementStack::remove(Element* element)
+{
+ ASSERT(!element->hasTagName(HTMLNames::headTag));
+ if (m_top->element() == element) {
+ pop();
+ return;
+ }
+ removeNonTopCommon(element);
+}
+
+HTMLElementStack::ElementRecord* HTMLElementStack::find(Element* element) const
+{
+ for (ElementRecord* pos = m_top.get(); pos; pos = pos->next()) {
+ if (pos->element() == element)
+ return pos;
+ }
+ return 0;
+}
+
+HTMLElementStack::ElementRecord* HTMLElementStack::topmost(const AtomicString& tagName) const
+{
+ for (ElementRecord* pos = m_top.get(); pos; pos = pos->next()) {
+ if (pos->element()->hasLocalName(tagName))
+ return pos;
+ }
+ return 0;
+}
+
+bool HTMLElementStack::contains(Element* element) const
+{
+ return !!find(element);
+}
+
+bool HTMLElementStack::contains(const AtomicString& tagName) const
+{
+ return !!topmost(tagName);
+}
+
+template <bool isMarker(Element*)>
+bool inScopeCommon(HTMLElementStack::ElementRecord* top, const AtomicString& targetTag)
+{
+ for (HTMLElementStack::ElementRecord* pos = top; pos; pos = pos->next()) {
+ Element* element = pos->element();
+ if (element->hasLocalName(targetTag))
+ return true;
+ if (isMarker(element))
+ return false;
+ }
+ ASSERT_NOT_REACHED(); // <html> is always on the stack and is a scope marker.
+ return false;
+}
+
+bool HTMLElementStack::hasOnlyHTMLElementsInScope() const
+{
+ for (ElementRecord* record = m_top.get(); record; record = record->next()) {
+ Element* element = record->element();
+ if (element->namespaceURI() != xhtmlNamespaceURI)
+ return false;
+ if (isScopeMarker(element))
+ return true;
+ }
+ ASSERT_NOT_REACHED(); // <html> is always on the stack and is a scope marker.
+ return true;
+}
+
+bool HTMLElementStack::hasNumberedHeaderElementInScope() const
+{
+ for (ElementRecord* record = m_top.get(); record; record = record->next()) {
+ Element* element = record->element();
+ if (isNumberedHeaderElement(element))
+ return true;
+ if (isScopeMarker(element))
+ return false;
+ }
+ ASSERT_NOT_REACHED(); // <html> is always on the stack and is a scope marker.
+ return false;
+}
+
+bool HTMLElementStack::inScope(Element* targetElement) const
+{
+ for (ElementRecord* pos = m_top.get(); pos; pos = pos->next()) {
+ Element* element = pos->element();
+ if (element == targetElement)
+ return true;
+ if (isScopeMarker(element))
+ return false;
+ }
+ ASSERT_NOT_REACHED(); // <html> is always on the stack and is a scope marker.
+ return false;
+}
+
+bool HTMLElementStack::inScope(const AtomicString& targetTag) const
+{
+ return inScopeCommon<isScopeMarker>(m_top.get(), targetTag);
+}
+
+bool HTMLElementStack::inScope(const QualifiedName& tagName) const
+{
+ // FIXME: Is localName() right for non-html elements?
+ return inScope(tagName.localName());
+}
+
+bool HTMLElementStack::inListItemScope(const AtomicString& targetTag) const
+{
+ return inScopeCommon<isListItemScopeMarker>(m_top.get(), targetTag);
+}
+
+bool HTMLElementStack::inListItemScope(const QualifiedName& tagName) const
+{
+ // FIXME: Is localName() right for non-html elements?
+ return inListItemScope(tagName.localName());
+}
+
+bool HTMLElementStack::inTableScope(const AtomicString& targetTag) const
+{
+ return inScopeCommon<isTableScopeMarker>(m_top.get(), targetTag);
+}
+
+bool HTMLElementStack::inTableScope(const QualifiedName& tagName) const
+{
+ // FIXME: Is localName() right for non-html elements?
+ return inTableScope(tagName.localName());
+}
+
+bool HTMLElementStack::inButtonScope(const AtomicString& targetTag) const
+{
+ return inScopeCommon<isButtonScopeMarker>(m_top.get(), targetTag);
+}
+
+bool HTMLElementStack::inButtonScope(const QualifiedName& tagName) const
+{
+ // FIXME: Is localName() right for non-html elements?
+ return inButtonScope(tagName.localName());
+}
+
+bool HTMLElementStack::inSelectScope(const AtomicString& targetTag) const
+{
+ return inScopeCommon<isSelectScopeMarker>(m_top.get(), targetTag);
+}
+
+bool HTMLElementStack::inSelectScope(const QualifiedName& tagName) const
+{
+ // FIXME: Is localName() right for non-html elements?
+ return inSelectScope(tagName.localName());
+}
+
+Element* HTMLElementStack::htmlElement() const
+{
+ ASSERT(m_htmlElement);
+ return m_htmlElement;
+}
+
+Element* HTMLElementStack::headElement() const
+{
+ ASSERT(m_headElement);
+ return m_headElement;
+}
+
+Element* HTMLElementStack::bodyElement() const
+{
+ ASSERT(m_bodyElement);
+ return m_bodyElement;
+}
+
+void HTMLElementStack::pushCommon(PassRefPtr<Element> element)
+{
+ ASSERT(m_htmlElement);
+ m_top = adoptPtr(new ElementRecord(element, m_top.release()));
+ top()->beginParsingChildren();
+}
+
+void HTMLElementStack::popCommon()
+{
+ ASSERT(!top()->hasTagName(HTMLNames::htmlTag));
+ ASSERT(!top()->hasTagName(HTMLNames::headTag) || !m_headElement);
+ ASSERT(!top()->hasTagName(HTMLNames::bodyTag) || !m_bodyElement);
+ top()->finishParsingChildren();
+ m_top = m_top->releaseNext();
+}
+
+void HTMLElementStack::removeNonTopCommon(Element* element)
+{
+ ASSERT(!element->hasTagName(HTMLNames::htmlTag));
+ ASSERT(!element->hasTagName(HTMLNames::bodyTag));
+ ASSERT(top() != element);
+ for (ElementRecord* pos = m_top.get(); pos; pos = pos->next()) {
+ if (pos->next()->element() == element) {
+ // FIXME: Is it OK to call finishParsingChildren()
+ // when the children aren't actually finished?
+ element->finishParsingChildren();
+ pos->setNext(pos->next()->releaseNext());
+ return;
+ }
+ }
+ ASSERT_NOT_REACHED();
+}
+
+#ifndef NDEBUG
+
+void HTMLElementStack::show()
+{
+ for (ElementRecord* record = m_top.get(); record; record = record->next())
+ record->element()->showNode();
+}
+
+#endif
+
+}
diff --git a/Source/WebCore/html/parser/HTMLElementStack.h b/Source/WebCore/html/parser/HTMLElementStack.h
new file mode 100644
index 0000000..8a8e160
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLElementStack.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLElementStack_h
+#define HTMLElementStack_h
+
+#include <wtf/Forward.h>
+#include <wtf/Noncopyable.h>
+#include <wtf/OwnPtr.h>
+#include <wtf/PassOwnPtr.h>
+#include <wtf/RefPtr.h>
+
+namespace WebCore {
+
+class Element;
+class QualifiedName;
+
+// NOTE: The HTML5 spec uses a backwards (grows downward) stack. We're using
+// more standard (grows upwards) stack terminology here.
+class HTMLElementStack : public Noncopyable {
+public:
+ HTMLElementStack();
+ ~HTMLElementStack();
+
+ class ElementRecord : public Noncopyable {
+ public:
+ ~ElementRecord(); // Public for ~PassOwnPtr()
+
+ Element* element() const { return m_element.get(); }
+ void replaceElement(PassRefPtr<Element>);
+
+ bool isAbove(ElementRecord*) const;
+
+ ElementRecord* next() const { return m_next.get(); }
+
+ private:
+ friend class HTMLElementStack;
+
+ ElementRecord(PassRefPtr<Element>, PassOwnPtr<ElementRecord>);
+
+ PassOwnPtr<ElementRecord> releaseNext() { return m_next.release(); }
+ void setNext(PassOwnPtr<ElementRecord> next) { m_next = next; }
+
+ RefPtr<Element> m_element;
+ OwnPtr<ElementRecord> m_next;
+ };
+
+ // Inlining this function is a (small) performance win on the parsing
+ // benchmark.
+ Element* top() const
+ {
+ ASSERT(m_top->element());
+ return m_top->element();
+ }
+
+ Element* oneBelowTop() const;
+ ElementRecord* topRecord() const;
+ Element* bottom() const;
+ ElementRecord* find(Element*) const;
+ ElementRecord* topmost(const AtomicString& tagName) const;
+
+ void insertAbove(PassRefPtr<Element>, ElementRecord*);
+
+ void push(PassRefPtr<Element>);
+ void pushHTMLHtmlElement(PassRefPtr<Element>);
+ void pushHTMLHeadElement(PassRefPtr<Element>);
+ void pushHTMLBodyElement(PassRefPtr<Element>);
+
+ void pop();
+ void popUntil(const AtomicString& tagName);
+ void popUntil(Element*);
+ void popUntilPopped(const AtomicString& tagName);
+ void popUntilPopped(Element*);
+ void popUntilNumberedHeaderElementPopped();
+ void popUntilTableScopeMarker(); // "clear the stack back to a table context" in the spec.
+ void popUntilTableBodyScopeMarker(); // "clear the stack back to a table body context" in the spec.
+ void popUntilTableRowScopeMarker(); // "clear the stack back to a table row context" in the spec.
+ void popUntilForeignContentScopeMarker();
+ void popHTMLHeadElement();
+ void popHTMLBodyElement();
+ void popAll();
+
+ void remove(Element*);
+ void removeHTMLHeadElement(Element*);
+
+ bool contains(Element*) const;
+ bool contains(const AtomicString& tagName) const;
+
+ bool inScope(Element*) const;
+ bool inScope(const AtomicString& tagName) const;
+ bool inScope(const QualifiedName&) const;
+ bool inListItemScope(const AtomicString& tagName) const;
+ bool inListItemScope(const QualifiedName&) const;
+ bool inTableScope(const AtomicString& tagName) const;
+ bool inTableScope(const QualifiedName&) const;
+ bool inButtonScope(const AtomicString& tagName) const;
+ bool inButtonScope(const QualifiedName&) const;
+ bool inSelectScope(const AtomicString& tagName) const;
+ bool inSelectScope(const QualifiedName&) const;
+
+ bool hasOnlyHTMLElementsInScope() const;
+ bool hasNumberedHeaderElementInScope() const;
+
+ bool hasOnlyOneElement() const;
+ bool secondElementIsHTMLBodyElement() const;
+
+ Element* htmlElement() const;
+ Element* headElement() const;
+ Element* bodyElement() const;
+
+#ifndef NDEBUG
+ void show();
+#endif
+
+private:
+ void pushCommon(PassRefPtr<Element>);
+ void popCommon();
+ void removeNonTopCommon(Element*);
+
+ OwnPtr<ElementRecord> m_top;
+
+ // We remember <html>, <head> and <body> as they are pushed. Their
+ // ElementRecords keep them alive. <html> is never popped.
+ // FIXME: We don't currently require type-specific information about
+ // these elements so we haven't yet bothered to plumb the types all the
+ // way down through createElement, etc.
+ Element* m_htmlElement;
+ Element* m_headElement;
+ Element* m_bodyElement;
+};
+
+} // namespace WebCore
+
+#endif // HTMLElementStack_h
diff --git a/Source/WebCore/html/parser/HTMLEntityNames.in b/Source/WebCore/html/parser/HTMLEntityNames.in
new file mode 100644
index 0000000..2d42ab2
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLEntityNames.in
@@ -0,0 +1,2138 @@
+"AElig;","U+000C6"
+"AElig","U+000C6"
+"AMP;","U+00026"
+"AMP","U+00026"
+"Aacute;","U+000C1"
+"Aacute","U+000C1"
+"Abreve;","U+00102"
+"Acirc;","U+000C2"
+"Acirc","U+000C2"
+"Acy;","U+00410"
+"Afr;","U+1D504"
+"Agrave;","U+000C0"
+"Agrave","U+000C0"
+"Alpha;","U+00391"
+"Amacr;","U+00100"
+"And;","U+02A53"
+"Aogon;","U+00104"
+"Aopf;","U+1D538"
+"ApplyFunction;","U+02061"
+"Aring;","U+000C5"
+"Aring","U+000C5"
+"Ascr;","U+1D49C"
+"Assign;","U+02254"
+"Atilde;","U+000C3"
+"Atilde","U+000C3"
+"Auml;","U+000C4"
+"Auml","U+000C4"
+"Backslash;","U+02216"
+"Barv;","U+02AE7"
+"Barwed;","U+02306"
+"Bcy;","U+00411"
+"Because;","U+02235"
+"Bernoullis;","U+0212C"
+"Beta;","U+00392"
+"Bfr;","U+1D505"
+"Bopf;","U+1D539"
+"Breve;","U+002D8"
+"Bscr;","U+0212C"
+"Bumpeq;","U+0224E"
+"CHcy;","U+00427"
+"COPY;","U+000A9"
+"COPY","U+000A9"
+"Cacute;","U+00106"
+"Cap;","U+022D2"
+"CapitalDifferentialD;","U+02145"
+"Cayleys;","U+0212D"
+"Ccaron;","U+0010C"
+"Ccedil;","U+000C7"
+"Ccedil","U+000C7"
+"Ccirc;","U+00108"
+"Cconint;","U+02230"
+"Cdot;","U+0010A"
+"Cedilla;","U+000B8"
+"CenterDot;","U+000B7"
+"Cfr;","U+0212D"
+"Chi;","U+003A7"
+"CircleDot;","U+02299"
+"CircleMinus;","U+02296"
+"CirclePlus;","U+02295"
+"CircleTimes;","U+02297"
+"ClockwiseContourIntegral;","U+02232"
+"CloseCurlyDoubleQuote;","U+0201D"
+"CloseCurlyQuote;","U+02019"
+"Colon;","U+02237"
+"Colone;","U+02A74"
+"Congruent;","U+02261"
+"Conint;","U+0222F"
+"ContourIntegral;","U+0222E"
+"Copf;","U+02102"
+"Coproduct;","U+02210"
+"CounterClockwiseContourIntegral;","U+02233"
+"Cross;","U+02A2F"
+"Cscr;","U+1D49E"
+"Cup;","U+022D3"
+"CupCap;","U+0224D"
+"DD;","U+02145"
+"DDotrahd;","U+02911"
+"DJcy;","U+00402"
+"DScy;","U+00405"
+"DZcy;","U+0040F"
+"Dagger;","U+02021"
+"Darr;","U+021A1"
+"Dashv;","U+02AE4"
+"Dcaron;","U+0010E"
+"Dcy;","U+00414"
+"Del;","U+02207"
+"Delta;","U+00394"
+"Dfr;","U+1D507"
+"DiacriticalAcute;","U+000B4"
+"DiacriticalDot;","U+002D9"
+"DiacriticalDoubleAcute;","U+002DD"
+"DiacriticalGrave;","U+00060"
+"DiacriticalTilde;","U+002DC"
+"Diamond;","U+022C4"
+"DifferentialD;","U+02146"
+"Dopf;","U+1D53B"
+"Dot;","U+000A8"
+"DotDot;","U+020DC"
+"DotEqual;","U+02250"
+"DoubleContourIntegral;","U+0222F"
+"DoubleDot;","U+000A8"
+"DoubleDownArrow;","U+021D3"
+"DoubleLeftArrow;","U+021D0"
+"DoubleLeftRightArrow;","U+021D4"
+"DoubleLeftTee;","U+02AE4"
+"DoubleLongLeftArrow;","U+027F8"
+"DoubleLongLeftRightArrow;","U+027FA"
+"DoubleLongRightArrow;","U+027F9"
+"DoubleRightArrow;","U+021D2"
+"DoubleRightTee;","U+022A8"
+"DoubleUpArrow;","U+021D1"
+"DoubleUpDownArrow;","U+021D5"
+"DoubleVerticalBar;","U+02225"
+"DownArrow;","U+02193"
+"DownArrowBar;","U+02913"
+"DownArrowUpArrow;","U+021F5"
+"DownBreve;","U+00311"
+"DownLeftRightVector;","U+02950"
+"DownLeftTeeVector;","U+0295E"
+"DownLeftVector;","U+021BD"
+"DownLeftVectorBar;","U+02956"
+"DownRightTeeVector;","U+0295F"
+"DownRightVector;","U+021C1"
+"DownRightVectorBar;","U+02957"
+"DownTee;","U+022A4"
+"DownTeeArrow;","U+021A7"
+"Downarrow;","U+021D3"
+"Dscr;","U+1D49F"
+"Dstrok;","U+00110"
+"ENG;","U+0014A"
+"ETH;","U+000D0"
+"ETH","U+000D0"
+"Eacute;","U+000C9"
+"Eacute","U+000C9"
+"Ecaron;","U+0011A"
+"Ecirc;","U+000CA"
+"Ecirc","U+000CA"
+"Ecy;","U+0042D"
+"Edot;","U+00116"
+"Efr;","U+1D508"
+"Egrave;","U+000C8"
+"Egrave","U+000C8"
+"Element;","U+02208"
+"Emacr;","U+00112"
+"EmptySmallSquare;","U+025FB"
+"EmptyVerySmallSquare;","U+025AB"
+"Eogon;","U+00118"
+"Eopf;","U+1D53C"
+"Epsilon;","U+00395"
+"Equal;","U+02A75"
+"EqualTilde;","U+02242"
+"Equilibrium;","U+021CC"
+"Escr;","U+02130"
+"Esim;","U+02A73"
+"Eta;","U+00397"
+"Euml;","U+000CB"
+"Euml","U+000CB"
+"Exists;","U+02203"
+"ExponentialE;","U+02147"
+"Fcy;","U+00424"
+"Ffr;","U+1D509"
+"FilledSmallSquare;","U+025FC"
+"FilledVerySmallSquare;","U+025AA"
+"Fopf;","U+1D53D"
+"ForAll;","U+02200"
+"Fouriertrf;","U+02131"
+"Fscr;","U+02131"
+"GJcy;","U+00403"
+"GT;","U+0003E"
+"GT","U+0003E"
+"Gamma;","U+00393"
+"Gammad;","U+003DC"
+"Gbreve;","U+0011E"
+"Gcedil;","U+00122"
+"Gcirc;","U+0011C"
+"Gcy;","U+00413"
+"Gdot;","U+00120"
+"Gfr;","U+1D50A"
+"Gg;","U+022D9"
+"Gopf;","U+1D53E"
+"GreaterEqual;","U+02265"
+"GreaterEqualLess;","U+022DB"
+"GreaterFullEqual;","U+02267"
+"GreaterGreater;","U+02AA2"
+"GreaterLess;","U+02277"
+"GreaterSlantEqual;","U+02A7E"
+"GreaterTilde;","U+02273"
+"Gscr;","U+1D4A2"
+"Gt;","U+0226B"
+"HARDcy;","U+0042A"
+"Hacek;","U+002C7"
+"Hat;","U+0005E"
+"Hcirc;","U+00124"
+"Hfr;","U+0210C"
+"HilbertSpace;","U+0210B"
+"Hopf;","U+0210D"
+"HorizontalLine;","U+02500"
+"Hscr;","U+0210B"
+"Hstrok;","U+00126"
+"HumpDownHump;","U+0224E"
+"HumpEqual;","U+0224F"
+"IEcy;","U+00415"
+"IJlig;","U+00132"
+"IOcy;","U+00401"
+"Iacute;","U+000CD"
+"Iacute","U+000CD"
+"Icirc;","U+000CE"
+"Icirc","U+000CE"
+"Icy;","U+00418"
+"Idot;","U+00130"
+"Ifr;","U+02111"
+"Igrave;","U+000CC"
+"Igrave","U+000CC"
+"Im;","U+02111"
+"Imacr;","U+0012A"
+"ImaginaryI;","U+02148"
+"Implies;","U+021D2"
+"Int;","U+0222C"
+"Integral;","U+0222B"
+"Intersection;","U+022C2"
+"InvisibleComma;","U+02063"
+"InvisibleTimes;","U+02062"
+"Iogon;","U+0012E"
+"Iopf;","U+1D540"
+"Iota;","U+00399"
+"Iscr;","U+02110"
+"Itilde;","U+00128"
+"Iukcy;","U+00406"
+"Iuml;","U+000CF"
+"Iuml","U+000CF"
+"Jcirc;","U+00134"
+"Jcy;","U+00419"
+"Jfr;","U+1D50D"
+"Jopf;","U+1D541"
+"Jscr;","U+1D4A5"
+"Jsercy;","U+00408"
+"Jukcy;","U+00404"
+"KHcy;","U+00425"
+"KJcy;","U+0040C"
+"Kappa;","U+0039A"
+"Kcedil;","U+00136"
+"Kcy;","U+0041A"
+"Kfr;","U+1D50E"
+"Kopf;","U+1D542"
+"Kscr;","U+1D4A6"
+"LJcy;","U+00409"
+"LT;","U+0003C"
+"LT","U+0003C"
+"Lacute;","U+00139"
+"Lambda;","U+0039B"
+"Lang;","U+027EA"
+"Laplacetrf;","U+02112"
+"Larr;","U+0219E"
+"Lcaron;","U+0013D"
+"Lcedil;","U+0013B"
+"Lcy;","U+0041B"
+"LeftAngleBracket;","U+027E8"
+"LeftArrow;","U+02190"
+"LeftArrowBar;","U+021E4"
+"LeftArrowRightArrow;","U+021C6"
+"LeftCeiling;","U+02308"
+"LeftDoubleBracket;","U+027E6"
+"LeftDownTeeVector;","U+02961"
+"LeftDownVector;","U+021C3"
+"LeftDownVectorBar;","U+02959"
+"LeftFloor;","U+0230A"
+"LeftRightArrow;","U+02194"
+"LeftRightVector;","U+0294E"
+"LeftTee;","U+022A3"
+"LeftTeeArrow;","U+021A4"
+"LeftTeeVector;","U+0295A"
+"LeftTriangle;","U+022B2"
+"LeftTriangleBar;","U+029CF"
+"LeftTriangleEqual;","U+022B4"
+"LeftUpDownVector;","U+02951"
+"LeftUpTeeVector;","U+02960"
+"LeftUpVector;","U+021BF"
+"LeftUpVectorBar;","U+02958"
+"LeftVector;","U+021BC"
+"LeftVectorBar;","U+02952"
+"Leftarrow;","U+021D0"
+"Leftrightarrow;","U+021D4"
+"LessEqualGreater;","U+022DA"
+"LessFullEqual;","U+02266"
+"LessGreater;","U+02276"
+"LessLess;","U+02AA1"
+"LessSlantEqual;","U+02A7D"
+"LessTilde;","U+02272"
+"Lfr;","U+1D50F"
+"Ll;","U+022D8"
+"Lleftarrow;","U+021DA"
+"Lmidot;","U+0013F"
+"LongLeftArrow;","U+027F5"
+"LongLeftRightArrow;","U+027F7"
+"LongRightArrow;","U+027F6"
+"Longleftarrow;","U+027F8"
+"Longleftrightarrow;","U+027FA"
+"Longrightarrow;","U+027F9"
+"Lopf;","U+1D543"
+"LowerLeftArrow;","U+02199"
+"LowerRightArrow;","U+02198"
+"Lscr;","U+02112"
+"Lsh;","U+021B0"
+"Lstrok;","U+00141"
+"Lt;","U+0226A"
+"Map;","U+02905"
+"Mcy;","U+0041C"
+"MediumSpace;","U+0205F"
+"Mellintrf;","U+02133"
+"Mfr;","U+1D510"
+"MinusPlus;","U+02213"
+"Mopf;","U+1D544"
+"Mscr;","U+02133"
+"Mu;","U+0039C"
+"NJcy;","U+0040A"
+"Nacute;","U+00143"
+"Ncaron;","U+00147"
+"Ncedil;","U+00145"
+"Ncy;","U+0041D"
+"NegativeMediumSpace;","U+0200B"
+"NegativeThickSpace;","U+0200B"
+"NegativeThinSpace;","U+0200B"
+"NegativeVeryThinSpace;","U+0200B"
+"NestedGreaterGreater;","U+0226B"
+"NestedLessLess;","U+0226A"
+"NewLine;","U+0000A"
+"Nfr;","U+1D511"
+"NoBreak;","U+02060"
+"NonBreakingSpace;","U+000A0"
+"Nopf;","U+02115"
+"Not;","U+02AEC"
+"NotCongruent;","U+02262"
+"NotCupCap;","U+0226D"
+"NotDoubleVerticalBar;","U+02226"
+"NotElement;","U+02209"
+"NotEqual;","U+02260"
+"NotExists;","U+02204"
+"NotGreater;","U+0226F"
+"NotGreaterEqual;","U+02271"
+"NotGreaterLess;","U+02279"
+"NotGreaterTilde;","U+02275"
+"NotLeftTriangle;","U+022EA"
+"NotLeftTriangleEqual;","U+022EC"
+"NotLess;","U+0226E"
+"NotLessEqual;","U+02270"
+"NotLessGreater;","U+02278"
+"NotLessTilde;","U+02274"
+"NotPrecedes;","U+02280"
+"NotPrecedesSlantEqual;","U+022E0"
+"NotReverseElement;","U+0220C"
+"NotRightTriangle;","U+022EB"
+"NotRightTriangleEqual;","U+022ED"
+"NotSquareSubsetEqual;","U+022E2"
+"NotSquareSupersetEqual;","U+022E3"
+"NotSubsetEqual;","U+02288"
+"NotSucceeds;","U+02281"
+"NotSucceedsSlantEqual;","U+022E1"
+"NotSupersetEqual;","U+02289"
+"NotTilde;","U+02241"
+"NotTildeEqual;","U+02244"
+"NotTildeFullEqual;","U+02247"
+"NotTildeTilde;","U+02249"
+"NotVerticalBar;","U+02224"
+"Nscr;","U+1D4A9"
+"Ntilde;","U+000D1"
+"Ntilde","U+000D1"
+"Nu;","U+0039D"
+"OElig;","U+00152"
+"Oacute;","U+000D3"
+"Oacute","U+000D3"
+"Ocirc;","U+000D4"
+"Ocirc","U+000D4"
+"Ocy;","U+0041E"
+"Odblac;","U+00150"
+"Ofr;","U+1D512"
+"Ograve;","U+000D2"
+"Ograve","U+000D2"
+"Omacr;","U+0014C"
+"Omega;","U+003A9"
+"Omicron;","U+0039F"
+"Oopf;","U+1D546"
+"OpenCurlyDoubleQuote;","U+0201C"
+"OpenCurlyQuote;","U+02018"
+"Or;","U+02A54"
+"Oscr;","U+1D4AA"
+"Oslash;","U+000D8"
+"Oslash","U+000D8"
+"Otilde;","U+000D5"
+"Otilde","U+000D5"
+"Otimes;","U+02A37"
+"Ouml;","U+000D6"
+"Ouml","U+000D6"
+"OverBar;","U+0203E"
+"OverBrace;","U+023DE"
+"OverBracket;","U+023B4"
+"OverParenthesis;","U+023DC"
+"PartialD;","U+02202"
+"Pcy;","U+0041F"
+"Pfr;","U+1D513"
+"Phi;","U+003A6"
+"Pi;","U+003A0"
+"PlusMinus;","U+000B1"
+"Poincareplane;","U+0210C"
+"Popf;","U+02119"
+"Pr;","U+02ABB"
+"Precedes;","U+0227A"
+"PrecedesEqual;","U+02AAF"
+"PrecedesSlantEqual;","U+0227C"
+"PrecedesTilde;","U+0227E"
+"Prime;","U+02033"
+"Product;","U+0220F"
+"Proportion;","U+02237"
+"Proportional;","U+0221D"
+"Pscr;","U+1D4AB"
+"Psi;","U+003A8"
+"QUOT;","U+00022"
+"QUOT","U+00022"
+"Qfr;","U+1D514"
+"Qopf;","U+0211A"
+"Qscr;","U+1D4AC"
+"RBarr;","U+02910"
+"REG;","U+000AE"
+"REG","U+000AE"
+"Racute;","U+00154"
+"Rang;","U+027EB"
+"Rarr;","U+021A0"
+"Rarrtl;","U+02916"
+"Rcaron;","U+00158"
+"Rcedil;","U+00156"
+"Rcy;","U+00420"
+"Re;","U+0211C"
+"ReverseElement;","U+0220B"
+"ReverseEquilibrium;","U+021CB"
+"ReverseUpEquilibrium;","U+0296F"
+"Rfr;","U+0211C"
+"Rho;","U+003A1"
+"RightAngleBracket;","U+027E9"
+"RightArrow;","U+02192"
+"RightArrowBar;","U+021E5"
+"RightArrowLeftArrow;","U+021C4"
+"RightCeiling;","U+02309"
+"RightDoubleBracket;","U+027E7"
+"RightDownTeeVector;","U+0295D"
+"RightDownVector;","U+021C2"
+"RightDownVectorBar;","U+02955"
+"RightFloor;","U+0230B"
+"RightTee;","U+022A2"
+"RightTeeArrow;","U+021A6"
+"RightTeeVector;","U+0295B"
+"RightTriangle;","U+022B3"
+"RightTriangleBar;","U+029D0"
+"RightTriangleEqual;","U+022B5"
+"RightUpDownVector;","U+0294F"
+"RightUpTeeVector;","U+0295C"
+"RightUpVector;","U+021BE"
+"RightUpVectorBar;","U+02954"
+"RightVector;","U+021C0"
+"RightVectorBar;","U+02953"
+"Rightarrow;","U+021D2"
+"Ropf;","U+0211D"
+"RoundImplies;","U+02970"
+"Rrightarrow;","U+021DB"
+"Rscr;","U+0211B"
+"Rsh;","U+021B1"
+"RuleDelayed;","U+029F4"
+"SHCHcy;","U+00429"
+"SHcy;","U+00428"
+"SOFTcy;","U+0042C"
+"Sacute;","U+0015A"
+"Sc;","U+02ABC"
+"Scaron;","U+00160"
+"Scedil;","U+0015E"
+"Scirc;","U+0015C"
+"Scy;","U+00421"
+"Sfr;","U+1D516"
+"ShortDownArrow;","U+02193"
+"ShortLeftArrow;","U+02190"
+"ShortRightArrow;","U+02192"
+"ShortUpArrow;","U+02191"
+"Sigma;","U+003A3"
+"SmallCircle;","U+02218"
+"Sopf;","U+1D54A"
+"Sqrt;","U+0221A"
+"Square;","U+025A1"
+"SquareIntersection;","U+02293"
+"SquareSubset;","U+0228F"
+"SquareSubsetEqual;","U+02291"
+"SquareSuperset;","U+02290"
+"SquareSupersetEqual;","U+02292"
+"SquareUnion;","U+02294"
+"Sscr;","U+1D4AE"
+"Star;","U+022C6"
+"Sub;","U+022D0"
+"Subset;","U+022D0"
+"SubsetEqual;","U+02286"
+"Succeeds;","U+0227B"
+"SucceedsEqual;","U+02AB0"
+"SucceedsSlantEqual;","U+0227D"
+"SucceedsTilde;","U+0227F"
+"SuchThat;","U+0220B"
+"Sum;","U+02211"
+"Sup;","U+022D1"
+"Superset;","U+02283"
+"SupersetEqual;","U+02287"
+"Supset;","U+022D1"
+"THORN;","U+000DE"
+"THORN","U+000DE"
+"TRADE;","U+02122"
+"TSHcy;","U+0040B"
+"TScy;","U+00426"
+"Tab;","U+00009"
+"Tau;","U+003A4"
+"Tcaron;","U+00164"
+"Tcedil;","U+00162"
+"Tcy;","U+00422"
+"Tfr;","U+1D517"
+"Therefore;","U+02234"
+"Theta;","U+00398"
+"ThinSpace;","U+02009"
+"Tilde;","U+0223C"
+"TildeEqual;","U+02243"
+"TildeFullEqual;","U+02245"
+"TildeTilde;","U+02248"
+"Topf;","U+1D54B"
+"TripleDot;","U+020DB"
+"Tscr;","U+1D4AF"
+"Tstrok;","U+00166"
+"Uacute;","U+000DA"
+"Uacute","U+000DA"
+"Uarr;","U+0219F"
+"Uarrocir;","U+02949"
+"Ubrcy;","U+0040E"
+"Ubreve;","U+0016C"
+"Ucirc;","U+000DB"
+"Ucirc","U+000DB"
+"Ucy;","U+00423"
+"Udblac;","U+00170"
+"Ufr;","U+1D518"
+"Ugrave;","U+000D9"
+"Ugrave","U+000D9"
+"Umacr;","U+0016A"
+"UnderBar;","U+0005F"
+"UnderBrace;","U+023DF"
+"UnderBracket;","U+023B5"
+"UnderParenthesis;","U+023DD"
+"Union;","U+022C3"
+"UnionPlus;","U+0228E"
+"Uogon;","U+00172"
+"Uopf;","U+1D54C"
+"UpArrow;","U+02191"
+"UpArrowBar;","U+02912"
+"UpArrowDownArrow;","U+021C5"
+"UpDownArrow;","U+02195"
+"UpEquilibrium;","U+0296E"
+"UpTee;","U+022A5"
+"UpTeeArrow;","U+021A5"
+"Uparrow;","U+021D1"
+"Updownarrow;","U+021D5"
+"UpperLeftArrow;","U+02196"
+"UpperRightArrow;","U+02197"
+"Upsi;","U+003D2"
+"Upsilon;","U+003A5"
+"Uring;","U+0016E"
+"Uscr;","U+1D4B0"
+"Utilde;","U+00168"
+"Uuml;","U+000DC"
+"Uuml","U+000DC"
+"VDash;","U+022AB"
+"Vbar;","U+02AEB"
+"Vcy;","U+00412"
+"Vdash;","U+022A9"
+"Vdashl;","U+02AE6"
+"Vee;","U+022C1"
+"Verbar;","U+02016"
+"Vert;","U+02016"
+"VerticalBar;","U+02223"
+"VerticalLine;","U+0007C"
+"VerticalSeparator;","U+02758"
+"VerticalTilde;","U+02240"
+"VeryThinSpace;","U+0200A"
+"Vfr;","U+1D519"
+"Vopf;","U+1D54D"
+"Vscr;","U+1D4B1"
+"Vvdash;","U+022AA"
+"Wcirc;","U+00174"
+"Wedge;","U+022C0"
+"Wfr;","U+1D51A"
+"Wopf;","U+1D54E"
+"Wscr;","U+1D4B2"
+"Xfr;","U+1D51B"
+"Xi;","U+0039E"
+"Xopf;","U+1D54F"
+"Xscr;","U+1D4B3"
+"YAcy;","U+0042F"
+"YIcy;","U+00407"
+"YUcy;","U+0042E"
+"Yacute;","U+000DD"
+"Yacute","U+000DD"
+"Ycirc;","U+00176"
+"Ycy;","U+0042B"
+"Yfr;","U+1D51C"
+"Yopf;","U+1D550"
+"Yscr;","U+1D4B4"
+"Yuml;","U+00178"
+"ZHcy;","U+00416"
+"Zacute;","U+00179"
+"Zcaron;","U+0017D"
+"Zcy;","U+00417"
+"Zdot;","U+0017B"
+"ZeroWidthSpace;","U+0200B"
+"Zeta;","U+00396"
+"Zfr;","U+02128"
+"Zopf;","U+02124"
+"Zscr;","U+1D4B5"
+"aacute;","U+000E1"
+"aacute","U+000E1"
+"abreve;","U+00103"
+"ac;","U+0223E"
+"acd;","U+0223F"
+"acirc;","U+000E2"
+"acirc","U+000E2"
+"acute;","U+000B4"
+"acute","U+000B4"
+"acy;","U+00430"
+"aelig;","U+000E6"
+"aelig","U+000E6"
+"af;","U+02061"
+"afr;","U+1D51E"
+"agrave;","U+000E0"
+"agrave","U+000E0"
+"alefsym;","U+02135"
+"aleph;","U+02135"
+"alpha;","U+003B1"
+"amacr;","U+00101"
+"amalg;","U+02A3F"
+"amp;","U+00026"
+"amp","U+00026"
+"and;","U+02227"
+"andand;","U+02A55"
+"andd;","U+02A5C"
+"andslope;","U+02A58"
+"andv;","U+02A5A"
+"ang;","U+02220"
+"ange;","U+029A4"
+"angle;","U+02220"
+"angmsd;","U+02221"
+"angmsdaa;","U+029A8"
+"angmsdab;","U+029A9"
+"angmsdac;","U+029AA"
+"angmsdad;","U+029AB"
+"angmsdae;","U+029AC"
+"angmsdaf;","U+029AD"
+"angmsdag;","U+029AE"
+"angmsdah;","U+029AF"
+"angrt;","U+0221F"
+"angrtvb;","U+022BE"
+"angrtvbd;","U+0299D"
+"angsph;","U+02222"
+"angst;","U+000C5"
+"angzarr;","U+0237C"
+"aogon;","U+00105"
+"aopf;","U+1D552"
+"ap;","U+02248"
+"apE;","U+02A70"
+"apacir;","U+02A6F"
+"ape;","U+0224A"
+"apid;","U+0224B"
+"apos;","U+00027"
+"approx;","U+02248"
+"approxeq;","U+0224A"
+"aring;","U+000E5"
+"aring","U+000E5"
+"ascr;","U+1D4B6"
+"ast;","U+0002A"
+"asymp;","U+02248"
+"asympeq;","U+0224D"
+"atilde;","U+000E3"
+"atilde","U+000E3"
+"auml;","U+000E4"
+"auml","U+000E4"
+"awconint;","U+02233"
+"awint;","U+02A11"
+"bNot;","U+02AED"
+"backcong;","U+0224C"
+"backepsilon;","U+003F6"
+"backprime;","U+02035"
+"backsim;","U+0223D"
+"backsimeq;","U+022CD"
+"barvee;","U+022BD"
+"barwed;","U+02305"
+"barwedge;","U+02305"
+"bbrk;","U+023B5"
+"bbrktbrk;","U+023B6"
+"bcong;","U+0224C"
+"bcy;","U+00431"
+"bdquo;","U+0201E"
+"becaus;","U+02235"
+"because;","U+02235"
+"bemptyv;","U+029B0"
+"bepsi;","U+003F6"
+"bernou;","U+0212C"
+"beta;","U+003B2"
+"beth;","U+02136"
+"between;","U+0226C"
+"bfr;","U+1D51F"
+"bigcap;","U+022C2"
+"bigcirc;","U+025EF"
+"bigcup;","U+022C3"
+"bigodot;","U+02A00"
+"bigoplus;","U+02A01"
+"bigotimes;","U+02A02"
+"bigsqcup;","U+02A06"
+"bigstar;","U+02605"
+"bigtriangledown;","U+025BD"
+"bigtriangleup;","U+025B3"
+"biguplus;","U+02A04"
+"bigvee;","U+022C1"
+"bigwedge;","U+022C0"
+"bkarow;","U+0290D"
+"blacklozenge;","U+029EB"
+"blacksquare;","U+025AA"
+"blacktriangle;","U+025B4"
+"blacktriangledown;","U+025BE"
+"blacktriangleleft;","U+025C2"
+"blacktriangleright;","U+025B8"
+"blank;","U+02423"
+"blk12;","U+02592"
+"blk14;","U+02591"
+"blk34;","U+02593"
+"block;","U+02588"
+"bnot;","U+02310"
+"bopf;","U+1D553"
+"bot;","U+022A5"
+"bottom;","U+022A5"
+"bowtie;","U+022C8"
+"boxDL;","U+02557"
+"boxDR;","U+02554"
+"boxDl;","U+02556"
+"boxDr;","U+02553"
+"boxH;","U+02550"
+"boxHD;","U+02566"
+"boxHU;","U+02569"
+"boxHd;","U+02564"
+"boxHu;","U+02567"
+"boxUL;","U+0255D"
+"boxUR;","U+0255A"
+"boxUl;","U+0255C"
+"boxUr;","U+02559"
+"boxV;","U+02551"
+"boxVH;","U+0256C"
+"boxVL;","U+02563"
+"boxVR;","U+02560"
+"boxVh;","U+0256B"
+"boxVl;","U+02562"
+"boxVr;","U+0255F"
+"boxbox;","U+029C9"
+"boxdL;","U+02555"
+"boxdR;","U+02552"
+"boxdl;","U+02510"
+"boxdr;","U+0250C"
+"boxh;","U+02500"
+"boxhD;","U+02565"
+"boxhU;","U+02568"
+"boxhd;","U+0252C"
+"boxhu;","U+02534"
+"boxminus;","U+0229F"
+"boxplus;","U+0229E"
+"boxtimes;","U+022A0"
+"boxuL;","U+0255B"
+"boxuR;","U+02558"
+"boxul;","U+02518"
+"boxur;","U+02514"
+"boxv;","U+02502"
+"boxvH;","U+0256A"
+"boxvL;","U+02561"
+"boxvR;","U+0255E"
+"boxvh;","U+0253C"
+"boxvl;","U+02524"
+"boxvr;","U+0251C"
+"bprime;","U+02035"
+"breve;","U+002D8"
+"brvbar;","U+000A6"
+"brvbar","U+000A6"
+"bscr;","U+1D4B7"
+"bsemi;","U+0204F"
+"bsim;","U+0223D"
+"bsime;","U+022CD"
+"bsol;","U+0005C"
+"bsolb;","U+029C5"
+"bsolhsub;","U+027C8"
+"bull;","U+02022"
+"bullet;","U+02022"
+"bump;","U+0224E"
+"bumpE;","U+02AAE"
+"bumpe;","U+0224F"
+"bumpeq;","U+0224F"
+"cacute;","U+00107"
+"cap;","U+02229"
+"capand;","U+02A44"
+"capbrcup;","U+02A49"
+"capcap;","U+02A4B"
+"capcup;","U+02A47"
+"capdot;","U+02A40"
+"caret;","U+02041"
+"caron;","U+002C7"
+"ccaps;","U+02A4D"
+"ccaron;","U+0010D"
+"ccedil;","U+000E7"
+"ccedil","U+000E7"
+"ccirc;","U+00109"
+"ccups;","U+02A4C"
+"ccupssm;","U+02A50"
+"cdot;","U+0010B"
+"cedil;","U+000B8"
+"cedil","U+000B8"
+"cemptyv;","U+029B2"
+"cent;","U+000A2"
+"cent","U+000A2"
+"centerdot;","U+000B7"
+"cfr;","U+1D520"
+"chcy;","U+00447"
+"check;","U+02713"
+"checkmark;","U+02713"
+"chi;","U+003C7"
+"cir;","U+025CB"
+"cirE;","U+029C3"
+"circ;","U+002C6"
+"circeq;","U+02257"
+"circlearrowleft;","U+021BA"
+"circlearrowright;","U+021BB"
+"circledR;","U+000AE"
+"circledS;","U+024C8"
+"circledast;","U+0229B"
+"circledcirc;","U+0229A"
+"circleddash;","U+0229D"
+"cire;","U+02257"
+"cirfnint;","U+02A10"
+"cirmid;","U+02AEF"
+"cirscir;","U+029C2"
+"clubs;","U+02663"
+"clubsuit;","U+02663"
+"colon;","U+0003A"
+"colone;","U+02254"
+"coloneq;","U+02254"
+"comma;","U+0002C"
+"commat;","U+00040"
+"comp;","U+02201"
+"compfn;","U+02218"
+"complement;","U+02201"
+"complexes;","U+02102"
+"cong;","U+02245"
+"congdot;","U+02A6D"
+"conint;","U+0222E"
+"copf;","U+1D554"
+"coprod;","U+02210"
+"copy;","U+000A9"
+"copy","U+000A9"
+"copysr;","U+02117"
+"crarr;","U+021B5"
+"cross;","U+02717"
+"cscr;","U+1D4B8"
+"csub;","U+02ACF"
+"csube;","U+02AD1"
+"csup;","U+02AD0"
+"csupe;","U+02AD2"
+"ctdot;","U+022EF"
+"cudarrl;","U+02938"
+"cudarrr;","U+02935"
+"cuepr;","U+022DE"
+"cuesc;","U+022DF"
+"cularr;","U+021B6"
+"cularrp;","U+0293D"
+"cup;","U+0222A"
+"cupbrcap;","U+02A48"
+"cupcap;","U+02A46"
+"cupcup;","U+02A4A"
+"cupdot;","U+0228D"
+"cupor;","U+02A45"
+"curarr;","U+021B7"
+"curarrm;","U+0293C"
+"curlyeqprec;","U+022DE"
+"curlyeqsucc;","U+022DF"
+"curlyvee;","U+022CE"
+"curlywedge;","U+022CF"
+"curren;","U+000A4"
+"curren","U+000A4"
+"curvearrowleft;","U+021B6"
+"curvearrowright;","U+021B7"
+"cuvee;","U+022CE"
+"cuwed;","U+022CF"
+"cwconint;","U+02232"
+"cwint;","U+02231"
+"cylcty;","U+0232D"
+"dArr;","U+021D3"
+"dHar;","U+02965"
+"dagger;","U+02020"
+"daleth;","U+02138"
+"darr;","U+02193"
+"dash;","U+02010"
+"dashv;","U+022A3"
+"dbkarow;","U+0290F"
+"dblac;","U+002DD"
+"dcaron;","U+0010F"
+"dcy;","U+00434"
+"dd;","U+02146"
+"ddagger;","U+02021"
+"ddarr;","U+021CA"
+"ddotseq;","U+02A77"
+"deg;","U+000B0"
+"deg","U+000B0"
+"delta;","U+003B4"
+"demptyv;","U+029B1"
+"dfisht;","U+0297F"
+"dfr;","U+1D521"
+"dharl;","U+021C3"
+"dharr;","U+021C2"
+"diam;","U+022C4"
+"diamond;","U+022C4"
+"diamondsuit;","U+02666"
+"diams;","U+02666"
+"die;","U+000A8"
+"digamma;","U+003DD"
+"disin;","U+022F2"
+"div;","U+000F7"
+"divide;","U+000F7"
+"divide","U+000F7"
+"divideontimes;","U+022C7"
+"divonx;","U+022C7"
+"djcy;","U+00452"
+"dlcorn;","U+0231E"
+"dlcrop;","U+0230D"
+"dollar;","U+00024"
+"dopf;","U+1D555"
+"dot;","U+002D9"
+"doteq;","U+02250"
+"doteqdot;","U+02251"
+"dotminus;","U+02238"
+"dotplus;","U+02214"
+"dotsquare;","U+022A1"
+"doublebarwedge;","U+02306"
+"downarrow;","U+02193"
+"downdownarrows;","U+021CA"
+"downharpoonleft;","U+021C3"
+"downharpoonright;","U+021C2"
+"drbkarow;","U+02910"
+"drcorn;","U+0231F"
+"drcrop;","U+0230C"
+"dscr;","U+1D4B9"
+"dscy;","U+00455"
+"dsol;","U+029F6"
+"dstrok;","U+00111"
+"dtdot;","U+022F1"
+"dtri;","U+025BF"
+"dtrif;","U+025BE"
+"duarr;","U+021F5"
+"duhar;","U+0296F"
+"dwangle;","U+029A6"
+"dzcy;","U+0045F"
+"dzigrarr;","U+027FF"
+"eDDot;","U+02A77"
+"eDot;","U+02251"
+"eacute;","U+000E9"
+"eacute","U+000E9"
+"easter;","U+02A6E"
+"ecaron;","U+0011B"
+"ecir;","U+02256"
+"ecirc;","U+000EA"
+"ecirc","U+000EA"
+"ecolon;","U+02255"
+"ecy;","U+0044D"
+"edot;","U+00117"
+"ee;","U+02147"
+"efDot;","U+02252"
+"efr;","U+1D522"
+"eg;","U+02A9A"
+"egrave;","U+000E8"
+"egrave","U+000E8"
+"egs;","U+02A96"
+"egsdot;","U+02A98"
+"el;","U+02A99"
+"elinters;","U+023E7"
+"ell;","U+02113"
+"els;","U+02A95"
+"elsdot;","U+02A97"
+"emacr;","U+00113"
+"empty;","U+02205"
+"emptyset;","U+02205"
+"emptyv;","U+02205"
+"emsp13;","U+02004"
+"emsp14;","U+02005"
+"emsp;","U+02003"
+"eng;","U+0014B"
+"ensp;","U+02002"
+"eogon;","U+00119"
+"eopf;","U+1D556"
+"epar;","U+022D5"
+"eparsl;","U+029E3"
+"eplus;","U+02A71"
+"epsi;","U+003B5"
+"epsilon;","U+003B5"
+"epsiv;","U+003F5"
+"eqcirc;","U+02256"
+"eqcolon;","U+02255"
+"eqsim;","U+02242"
+"eqslantgtr;","U+02A96"
+"eqslantless;","U+02A95"
+"equals;","U+0003D"
+"equest;","U+0225F"
+"equiv;","U+02261"
+"equivDD;","U+02A78"
+"eqvparsl;","U+029E5"
+"erDot;","U+02253"
+"erarr;","U+02971"
+"escr;","U+0212F"
+"esdot;","U+02250"
+"esim;","U+02242"
+"eta;","U+003B7"
+"eth;","U+000F0"
+"eth","U+000F0"
+"euml;","U+000EB"
+"euml","U+000EB"
+"euro;","U+020AC"
+"excl;","U+00021"
+"exist;","U+02203"
+"expectation;","U+02130"
+"exponentiale;","U+02147"
+"fallingdotseq;","U+02252"
+"fcy;","U+00444"
+"female;","U+02640"
+"ffilig;","U+0FB03"
+"fflig;","U+0FB00"
+"ffllig;","U+0FB04"
+"ffr;","U+1D523"
+"filig;","U+0FB01"
+"flat;","U+0266D"
+"fllig;","U+0FB02"
+"fltns;","U+025B1"
+"fnof;","U+00192"
+"fopf;","U+1D557"
+"forall;","U+02200"
+"fork;","U+022D4"
+"forkv;","U+02AD9"
+"fpartint;","U+02A0D"
+"frac12;","U+000BD"
+"frac12","U+000BD"
+"frac13;","U+02153"
+"frac14;","U+000BC"
+"frac14","U+000BC"
+"frac15;","U+02155"
+"frac16;","U+02159"
+"frac18;","U+0215B"
+"frac23;","U+02154"
+"frac25;","U+02156"
+"frac34;","U+000BE"
+"frac34","U+000BE"
+"frac35;","U+02157"
+"frac38;","U+0215C"
+"frac45;","U+02158"
+"frac56;","U+0215A"
+"frac58;","U+0215D"
+"frac78;","U+0215E"
+"frasl;","U+02044"
+"frown;","U+02322"
+"fscr;","U+1D4BB"
+"gE;","U+02267"
+"gEl;","U+02A8C"
+"gacute;","U+001F5"
+"gamma;","U+003B3"
+"gammad;","U+003DD"
+"gap;","U+02A86"
+"gbreve;","U+0011F"
+"gcirc;","U+0011D"
+"gcy;","U+00433"
+"gdot;","U+00121"
+"ge;","U+02265"
+"gel;","U+022DB"
+"geq;","U+02265"
+"geqq;","U+02267"
+"geqslant;","U+02A7E"
+"ges;","U+02A7E"
+"gescc;","U+02AA9"
+"gesdot;","U+02A80"
+"gesdoto;","U+02A82"
+"gesdotol;","U+02A84"
+"gesles;","U+02A94"
+"gfr;","U+1D524"
+"gg;","U+0226B"
+"ggg;","U+022D9"
+"gimel;","U+02137"
+"gjcy;","U+00453"
+"gl;","U+02277"
+"glE;","U+02A92"
+"gla;","U+02AA5"
+"glj;","U+02AA4"
+"gnE;","U+02269"
+"gnap;","U+02A8A"
+"gnapprox;","U+02A8A"
+"gne;","U+02A88"
+"gneq;","U+02A88"
+"gneqq;","U+02269"
+"gnsim;","U+022E7"
+"gopf;","U+1D558"
+"grave;","U+00060"
+"gscr;","U+0210A"
+"gsim;","U+02273"
+"gsime;","U+02A8E"
+"gsiml;","U+02A90"
+"gt;","U+0003E"
+"gt","U+0003E"
+"gtcc;","U+02AA7"
+"gtcir;","U+02A7A"
+"gtdot;","U+022D7"
+"gtlPar;","U+02995"
+"gtquest;","U+02A7C"
+"gtrapprox;","U+02A86"
+"gtrarr;","U+02978"
+"gtrdot;","U+022D7"
+"gtreqless;","U+022DB"
+"gtreqqless;","U+02A8C"
+"gtrless;","U+02277"
+"gtrsim;","U+02273"
+"hArr;","U+021D4"
+"hairsp;","U+0200A"
+"half;","U+000BD"
+"hamilt;","U+0210B"
+"hardcy;","U+0044A"
+"harr;","U+02194"
+"harrcir;","U+02948"
+"harrw;","U+021AD"
+"hbar;","U+0210F"
+"hcirc;","U+00125"
+"hearts;","U+02665"
+"heartsuit;","U+02665"
+"hellip;","U+02026"
+"hercon;","U+022B9"
+"hfr;","U+1D525"
+"hksearow;","U+02925"
+"hkswarow;","U+02926"
+"hoarr;","U+021FF"
+"homtht;","U+0223B"
+"hookleftarrow;","U+021A9"
+"hookrightarrow;","U+021AA"
+"hopf;","U+1D559"
+"horbar;","U+02015"
+"hscr;","U+1D4BD"
+"hslash;","U+0210F"
+"hstrok;","U+00127"
+"hybull;","U+02043"
+"hyphen;","U+02010"
+"iacute;","U+000ED"
+"iacute","U+000ED"
+"ic;","U+02063"
+"icirc;","U+000EE"
+"icirc","U+000EE"
+"icy;","U+00438"
+"iecy;","U+00435"
+"iexcl;","U+000A1"
+"iexcl","U+000A1"
+"iff;","U+021D4"
+"ifr;","U+1D526"
+"igrave;","U+000EC"
+"igrave","U+000EC"
+"ii;","U+02148"
+"iiiint;","U+02A0C"
+"iiint;","U+0222D"
+"iinfin;","U+029DC"
+"iiota;","U+02129"
+"ijlig;","U+00133"
+"imacr;","U+0012B"
+"image;","U+02111"
+"imagline;","U+02110"
+"imagpart;","U+02111"
+"imath;","U+00131"
+"imof;","U+022B7"
+"imped;","U+001B5"
+"in;","U+02208"
+"incare;","U+02105"
+"infin;","U+0221E"
+"infintie;","U+029DD"
+"inodot;","U+00131"
+"int;","U+0222B"
+"intcal;","U+022BA"
+"integers;","U+02124"
+"intercal;","U+022BA"
+"intlarhk;","U+02A17"
+"intprod;","U+02A3C"
+"iocy;","U+00451"
+"iogon;","U+0012F"
+"iopf;","U+1D55A"
+"iota;","U+003B9"
+"iprod;","U+02A3C"
+"iquest;","U+000BF"
+"iquest","U+000BF"
+"iscr;","U+1D4BE"
+"isin;","U+02208"
+"isinE;","U+022F9"
+"isindot;","U+022F5"
+"isins;","U+022F4"
+"isinsv;","U+022F3"
+"isinv;","U+02208"
+"it;","U+02062"
+"itilde;","U+00129"
+"iukcy;","U+00456"
+"iuml;","U+000EF"
+"iuml","U+000EF"
+"jcirc;","U+00135"
+"jcy;","U+00439"
+"jfr;","U+1D527"
+"jmath;","U+00237"
+"jopf;","U+1D55B"
+"jscr;","U+1D4BF"
+"jsercy;","U+00458"
+"jukcy;","U+00454"
+"kappa;","U+003BA"
+"kappav;","U+003F0"
+"kcedil;","U+00137"
+"kcy;","U+0043A"
+"kfr;","U+1D528"
+"kgreen;","U+00138"
+"khcy;","U+00445"
+"kjcy;","U+0045C"
+"kopf;","U+1D55C"
+"kscr;","U+1D4C0"
+"lAarr;","U+021DA"
+"lArr;","U+021D0"
+"lAtail;","U+0291B"
+"lBarr;","U+0290E"
+"lE;","U+02266"
+"lEg;","U+02A8B"
+"lHar;","U+02962"
+"lacute;","U+0013A"
+"laemptyv;","U+029B4"
+"lagran;","U+02112"
+"lambda;","U+003BB"
+"lang;","U+027E8"
+"langd;","U+02991"
+"langle;","U+027E8"
+"lap;","U+02A85"
+"laquo;","U+000AB"
+"laquo","U+000AB"
+"larr;","U+02190"
+"larrb;","U+021E4"
+"larrbfs;","U+0291F"
+"larrfs;","U+0291D"
+"larrhk;","U+021A9"
+"larrlp;","U+021AB"
+"larrpl;","U+02939"
+"larrsim;","U+02973"
+"larrtl;","U+021A2"
+"lat;","U+02AAB"
+"latail;","U+02919"
+"late;","U+02AAD"
+"lbarr;","U+0290C"
+"lbbrk;","U+02772"
+"lbrace;","U+0007B"
+"lbrack;","U+0005B"
+"lbrke;","U+0298B"
+"lbrksld;","U+0298F"
+"lbrkslu;","U+0298D"
+"lcaron;","U+0013E"
+"lcedil;","U+0013C"
+"lceil;","U+02308"
+"lcub;","U+0007B"
+"lcy;","U+0043B"
+"ldca;","U+02936"
+"ldquo;","U+0201C"
+"ldquor;","U+0201E"
+"ldrdhar;","U+02967"
+"ldrushar;","U+0294B"
+"ldsh;","U+021B2"
+"le;","U+02264"
+"leftarrow;","U+02190"
+"leftarrowtail;","U+021A2"
+"leftharpoondown;","U+021BD"
+"leftharpoonup;","U+021BC"
+"leftleftarrows;","U+021C7"
+"leftrightarrow;","U+02194"
+"leftrightarrows;","U+021C6"
+"leftrightharpoons;","U+021CB"
+"leftrightsquigarrow;","U+021AD"
+"leftthreetimes;","U+022CB"
+"leg;","U+022DA"
+"leq;","U+02264"
+"leqq;","U+02266"
+"leqslant;","U+02A7D"
+"les;","U+02A7D"
+"lescc;","U+02AA8"
+"lesdot;","U+02A7F"
+"lesdoto;","U+02A81"
+"lesdotor;","U+02A83"
+"lesges;","U+02A93"
+"lessapprox;","U+02A85"
+"lessdot;","U+022D6"
+"lesseqgtr;","U+022DA"
+"lesseqqgtr;","U+02A8B"
+"lessgtr;","U+02276"
+"lesssim;","U+02272"
+"lfisht;","U+0297C"
+"lfloor;","U+0230A"
+"lfr;","U+1D529"
+"lg;","U+02276"
+"lgE;","U+02A91"
+"lhard;","U+021BD"
+"lharu;","U+021BC"
+"lharul;","U+0296A"
+"lhblk;","U+02584"
+"ljcy;","U+00459"
+"ll;","U+0226A"
+"llarr;","U+021C7"
+"llcorner;","U+0231E"
+"llhard;","U+0296B"
+"lltri;","U+025FA"
+"lmidot;","U+00140"
+"lmoust;","U+023B0"
+"lmoustache;","U+023B0"
+"lnE;","U+02268"
+"lnap;","U+02A89"
+"lnapprox;","U+02A89"
+"lne;","U+02A87"
+"lneq;","U+02A87"
+"lneqq;","U+02268"
+"lnsim;","U+022E6"
+"loang;","U+027EC"
+"loarr;","U+021FD"
+"lobrk;","U+027E6"
+"longleftarrow;","U+027F5"
+"longleftrightarrow;","U+027F7"
+"longmapsto;","U+027FC"
+"longrightarrow;","U+027F6"
+"looparrowleft;","U+021AB"
+"looparrowright;","U+021AC"
+"lopar;","U+02985"
+"lopf;","U+1D55D"
+"loplus;","U+02A2D"
+"lotimes;","U+02A34"
+"lowast;","U+02217"
+"lowbar;","U+0005F"
+"loz;","U+025CA"
+"lozenge;","U+025CA"
+"lozf;","U+029EB"
+"lpar;","U+00028"
+"lparlt;","U+02993"
+"lrarr;","U+021C6"
+"lrcorner;","U+0231F"
+"lrhar;","U+021CB"
+"lrhard;","U+0296D"
+"lrm;","U+0200E"
+"lrtri;","U+022BF"
+"lsaquo;","U+02039"
+"lscr;","U+1D4C1"
+"lsh;","U+021B0"
+"lsim;","U+02272"
+"lsime;","U+02A8D"
+"lsimg;","U+02A8F"
+"lsqb;","U+0005B"
+"lsquo;","U+02018"
+"lsquor;","U+0201A"
+"lstrok;","U+00142"
+"lt;","U+0003C"
+"lt","U+0003C"
+"ltcc;","U+02AA6"
+"ltcir;","U+02A79"
+"ltdot;","U+022D6"
+"lthree;","U+022CB"
+"ltimes;","U+022C9"
+"ltlarr;","U+02976"
+"ltquest;","U+02A7B"
+"ltrPar;","U+02996"
+"ltri;","U+025C3"
+"ltrie;","U+022B4"
+"ltrif;","U+025C2"
+"lurdshar;","U+0294A"
+"luruhar;","U+02966"
+"mDDot;","U+0223A"
+"macr;","U+000AF"
+"macr","U+000AF"
+"male;","U+02642"
+"malt;","U+02720"
+"maltese;","U+02720"
+"map;","U+021A6"
+"mapsto;","U+021A6"
+"mapstodown;","U+021A7"
+"mapstoleft;","U+021A4"
+"mapstoup;","U+021A5"
+"marker;","U+025AE"
+"mcomma;","U+02A29"
+"mcy;","U+0043C"
+"mdash;","U+02014"
+"measuredangle;","U+02221"
+"mfr;","U+1D52A"
+"mho;","U+02127"
+"micro;","U+000B5"
+"micro","U+000B5"
+"mid;","U+02223"
+"midast;","U+0002A"
+"midcir;","U+02AF0"
+"middot;","U+000B7"
+"middot","U+000B7"
+"minus;","U+02212"
+"minusb;","U+0229F"
+"minusd;","U+02238"
+"minusdu;","U+02A2A"
+"mlcp;","U+02ADB"
+"mldr;","U+02026"
+"mnplus;","U+02213"
+"models;","U+022A7"
+"mopf;","U+1D55E"
+"mp;","U+02213"
+"mscr;","U+1D4C2"
+"mstpos;","U+0223E"
+"mu;","U+003BC"
+"multimap;","U+022B8"
+"mumap;","U+022B8"
+"nLeftarrow;","U+021CD"
+"nLeftrightarrow;","U+021CE"
+"nRightarrow;","U+021CF"
+"nVDash;","U+022AF"
+"nVdash;","U+022AE"
+"nabla;","U+02207"
+"nacute;","U+00144"
+"nap;","U+02249"
+"napos;","U+00149"
+"napprox;","U+02249"
+"natur;","U+0266E"
+"natural;","U+0266E"
+"naturals;","U+02115"
+"nbsp;","U+000A0"
+"nbsp","U+000A0"
+"ncap;","U+02A43"
+"ncaron;","U+00148"
+"ncedil;","U+00146"
+"ncong;","U+02247"
+"ncup;","U+02A42"
+"ncy;","U+0043D"
+"ndash;","U+02013"
+"ne;","U+02260"
+"neArr;","U+021D7"
+"nearhk;","U+02924"
+"nearr;","U+02197"
+"nearrow;","U+02197"
+"nequiv;","U+02262"
+"nesear;","U+02928"
+"nexist;","U+02204"
+"nexists;","U+02204"
+"nfr;","U+1D52B"
+"nge;","U+02271"
+"ngeq;","U+02271"
+"ngsim;","U+02275"
+"ngt;","U+0226F"
+"ngtr;","U+0226F"
+"nhArr;","U+021CE"
+"nharr;","U+021AE"
+"nhpar;","U+02AF2"
+"ni;","U+0220B"
+"nis;","U+022FC"
+"nisd;","U+022FA"
+"niv;","U+0220B"
+"njcy;","U+0045A"
+"nlArr;","U+021CD"
+"nlarr;","U+0219A"
+"nldr;","U+02025"
+"nle;","U+02270"
+"nleftarrow;","U+0219A"
+"nleftrightarrow;","U+021AE"
+"nleq;","U+02270"
+"nless;","U+0226E"
+"nlsim;","U+02274"
+"nlt;","U+0226E"
+"nltri;","U+022EA"
+"nltrie;","U+022EC"
+"nmid;","U+02224"
+"nopf;","U+1D55F"
+"not;","U+000AC"
+"not","U+000AC"
+"notin;","U+02209"
+"notinva;","U+02209"
+"notinvb;","U+022F7"
+"notinvc;","U+022F6"
+"notni;","U+0220C"
+"notniva;","U+0220C"
+"notnivb;","U+022FE"
+"notnivc;","U+022FD"
+"npar;","U+02226"
+"nparallel;","U+02226"
+"npolint;","U+02A14"
+"npr;","U+02280"
+"nprcue;","U+022E0"
+"nprec;","U+02280"
+"nrArr;","U+021CF"
+"nrarr;","U+0219B"
+"nrightarrow;","U+0219B"
+"nrtri;","U+022EB"
+"nrtrie;","U+022ED"
+"nsc;","U+02281"
+"nsccue;","U+022E1"
+"nscr;","U+1D4C3"
+"nshortmid;","U+02224"
+"nshortparallel;","U+02226"
+"nsim;","U+02241"
+"nsime;","U+02244"
+"nsimeq;","U+02244"
+"nsmid;","U+02224"
+"nspar;","U+02226"
+"nsqsube;","U+022E2"
+"nsqsupe;","U+022E3"
+"nsub;","U+02284"
+"nsube;","U+02288"
+"nsubseteq;","U+02288"
+"nsucc;","U+02281"
+"nsup;","U+02285"
+"nsupe;","U+02289"
+"nsupseteq;","U+02289"
+"ntgl;","U+02279"
+"ntilde;","U+000F1"
+"ntilde","U+000F1"
+"ntlg;","U+02278"
+"ntriangleleft;","U+022EA"
+"ntrianglelefteq;","U+022EC"
+"ntriangleright;","U+022EB"
+"ntrianglerighteq;","U+022ED"
+"nu;","U+003BD"
+"num;","U+00023"
+"numero;","U+02116"
+"numsp;","U+02007"
+"nvDash;","U+022AD"
+"nvHarr;","U+02904"
+"nvdash;","U+022AC"
+"nvinfin;","U+029DE"
+"nvlArr;","U+02902"
+"nvrArr;","U+02903"
+"nwArr;","U+021D6"
+"nwarhk;","U+02923"
+"nwarr;","U+02196"
+"nwarrow;","U+02196"
+"nwnear;","U+02927"
+"oS;","U+024C8"
+"oacute;","U+000F3"
+"oacute","U+000F3"
+"oast;","U+0229B"
+"ocir;","U+0229A"
+"ocirc;","U+000F4"
+"ocirc","U+000F4"
+"ocy;","U+0043E"
+"odash;","U+0229D"
+"odblac;","U+00151"
+"odiv;","U+02A38"
+"odot;","U+02299"
+"odsold;","U+029BC"
+"oelig;","U+00153"
+"ofcir;","U+029BF"
+"ofr;","U+1D52C"
+"ogon;","U+002DB"
+"ograve;","U+000F2"
+"ograve","U+000F2"
+"ogt;","U+029C1"
+"ohbar;","U+029B5"
+"ohm;","U+003A9"
+"oint;","U+0222E"
+"olarr;","U+021BA"
+"olcir;","U+029BE"
+"olcross;","U+029BB"
+"oline;","U+0203E"
+"olt;","U+029C0"
+"omacr;","U+0014D"
+"omega;","U+003C9"
+"omicron;","U+003BF"
+"omid;","U+029B6"
+"ominus;","U+02296"
+"oopf;","U+1D560"
+"opar;","U+029B7"
+"operp;","U+029B9"
+"oplus;","U+02295"
+"or;","U+02228"
+"orarr;","U+021BB"
+"ord;","U+02A5D"
+"order;","U+02134"
+"orderof;","U+02134"
+"ordf;","U+000AA"
+"ordf","U+000AA"
+"ordm;","U+000BA"
+"ordm","U+000BA"
+"origof;","U+022B6"
+"oror;","U+02A56"
+"orslope;","U+02A57"
+"orv;","U+02A5B"
+"oscr;","U+02134"
+"oslash;","U+000F8"
+"oslash","U+000F8"
+"osol;","U+02298"
+"otilde;","U+000F5"
+"otilde","U+000F5"
+"otimes;","U+02297"
+"otimesas;","U+02A36"
+"ouml;","U+000F6"
+"ouml","U+000F6"
+"ovbar;","U+0233D"
+"par;","U+02225"
+"para;","U+000B6"
+"para","U+000B6"
+"parallel;","U+02225"
+"parsim;","U+02AF3"
+"parsl;","U+02AFD"
+"part;","U+02202"
+"pcy;","U+0043F"
+"percnt;","U+00025"
+"period;","U+0002E"
+"permil;","U+02030"
+"perp;","U+022A5"
+"pertenk;","U+02031"
+"pfr;","U+1D52D"
+"phi;","U+003C6"
+"phiv;","U+003D5"
+"phmmat;","U+02133"
+"phone;","U+0260E"
+"pi;","U+003C0"
+"pitchfork;","U+022D4"
+"piv;","U+003D6"
+"planck;","U+0210F"
+"planckh;","U+0210E"
+"plankv;","U+0210F"
+"plus;","U+0002B"
+"plusacir;","U+02A23"
+"plusb;","U+0229E"
+"pluscir;","U+02A22"
+"plusdo;","U+02214"
+"plusdu;","U+02A25"
+"pluse;","U+02A72"
+"plusmn;","U+000B1"
+"plusmn","U+000B1"
+"plussim;","U+02A26"
+"plustwo;","U+02A27"
+"pm;","U+000B1"
+"pointint;","U+02A15"
+"popf;","U+1D561"
+"pound;","U+000A3"
+"pound","U+000A3"
+"pr;","U+0227A"
+"prE;","U+02AB3"
+"prap;","U+02AB7"
+"prcue;","U+0227C"
+"pre;","U+02AAF"
+"prec;","U+0227A"
+"precapprox;","U+02AB7"
+"preccurlyeq;","U+0227C"
+"preceq;","U+02AAF"
+"precnapprox;","U+02AB9"
+"precneqq;","U+02AB5"
+"precnsim;","U+022E8"
+"precsim;","U+0227E"
+"prime;","U+02032"
+"primes;","U+02119"
+"prnE;","U+02AB5"
+"prnap;","U+02AB9"
+"prnsim;","U+022E8"
+"prod;","U+0220F"
+"profalar;","U+0232E"
+"profline;","U+02312"
+"profsurf;","U+02313"
+"prop;","U+0221D"
+"propto;","U+0221D"
+"prsim;","U+0227E"
+"prurel;","U+022B0"
+"pscr;","U+1D4C5"
+"psi;","U+003C8"
+"puncsp;","U+02008"
+"qfr;","U+1D52E"
+"qint;","U+02A0C"
+"qopf;","U+1D562"
+"qprime;","U+02057"
+"qscr;","U+1D4C6"
+"quaternions;","U+0210D"
+"quatint;","U+02A16"
+"quest;","U+0003F"
+"questeq;","U+0225F"
+"quot;","U+00022"
+"quot","U+00022"
+"rAarr;","U+021DB"
+"rArr;","U+021D2"
+"rAtail;","U+0291C"
+"rBarr;","U+0290F"
+"rHar;","U+02964"
+"racute;","U+00155"
+"radic;","U+0221A"
+"raemptyv;","U+029B3"
+"rang;","U+027E9"
+"rangd;","U+02992"
+"range;","U+029A5"
+"rangle;","U+027E9"
+"raquo;","U+000BB"
+"raquo","U+000BB"
+"rarr;","U+02192"
+"rarrap;","U+02975"
+"rarrb;","U+021E5"
+"rarrbfs;","U+02920"
+"rarrc;","U+02933"
+"rarrfs;","U+0291E"
+"rarrhk;","U+021AA"
+"rarrlp;","U+021AC"
+"rarrpl;","U+02945"
+"rarrsim;","U+02974"
+"rarrtl;","U+021A3"
+"rarrw;","U+0219D"
+"ratail;","U+0291A"
+"ratio;","U+02236"
+"rationals;","U+0211A"
+"rbarr;","U+0290D"
+"rbbrk;","U+02773"
+"rbrace;","U+0007D"
+"rbrack;","U+0005D"
+"rbrke;","U+0298C"
+"rbrksld;","U+0298E"
+"rbrkslu;","U+02990"
+"rcaron;","U+00159"
+"rcedil;","U+00157"
+"rceil;","U+02309"
+"rcub;","U+0007D"
+"rcy;","U+00440"
+"rdca;","U+02937"
+"rdldhar;","U+02969"
+"rdquo;","U+0201D"
+"rdquor;","U+0201D"
+"rdsh;","U+021B3"
+"real;","U+0211C"
+"realine;","U+0211B"
+"realpart;","U+0211C"
+"reals;","U+0211D"
+"rect;","U+025AD"
+"reg;","U+000AE"
+"reg","U+000AE"
+"rfisht;","U+0297D"
+"rfloor;","U+0230B"
+"rfr;","U+1D52F"
+"rhard;","U+021C1"
+"rharu;","U+021C0"
+"rharul;","U+0296C"
+"rho;","U+003C1"
+"rhov;","U+003F1"
+"rightarrow;","U+02192"
+"rightarrowtail;","U+021A3"
+"rightharpoondown;","U+021C1"
+"rightharpoonup;","U+021C0"
+"rightleftarrows;","U+021C4"
+"rightleftharpoons;","U+021CC"
+"rightrightarrows;","U+021C9"
+"rightsquigarrow;","U+0219D"
+"rightthreetimes;","U+022CC"
+"ring;","U+002DA"
+"risingdotseq;","U+02253"
+"rlarr;","U+021C4"
+"rlhar;","U+021CC"
+"rlm;","U+0200F"
+"rmoust;","U+023B1"
+"rmoustache;","U+023B1"
+"rnmid;","U+02AEE"
+"roang;","U+027ED"
+"roarr;","U+021FE"
+"robrk;","U+027E7"
+"ropar;","U+02986"
+"ropf;","U+1D563"
+"roplus;","U+02A2E"
+"rotimes;","U+02A35"
+"rpar;","U+00029"
+"rpargt;","U+02994"
+"rppolint;","U+02A12"
+"rrarr;","U+021C9"
+"rsaquo;","U+0203A"
+"rscr;","U+1D4C7"
+"rsh;","U+021B1"
+"rsqb;","U+0005D"
+"rsquo;","U+02019"
+"rsquor;","U+02019"
+"rthree;","U+022CC"
+"rtimes;","U+022CA"
+"rtri;","U+025B9"
+"rtrie;","U+022B5"
+"rtrif;","U+025B8"
+"rtriltri;","U+029CE"
+"ruluhar;","U+02968"
+"rx;","U+0211E"
+"sacute;","U+0015B"
+"sbquo;","U+0201A"
+"sc;","U+0227B"
+"scE;","U+02AB4"
+"scap;","U+02AB8"
+"scaron;","U+00161"
+"sccue;","U+0227D"
+"sce;","U+02AB0"
+"scedil;","U+0015F"
+"scirc;","U+0015D"
+"scnE;","U+02AB6"
+"scnap;","U+02ABA"
+"scnsim;","U+022E9"
+"scpolint;","U+02A13"
+"scsim;","U+0227F"
+"scy;","U+00441"
+"sdot;","U+022C5"
+"sdotb;","U+022A1"
+"sdote;","U+02A66"
+"seArr;","U+021D8"
+"searhk;","U+02925"
+"searr;","U+02198"
+"searrow;","U+02198"
+"sect;","U+000A7"
+"sect","U+000A7"
+"semi;","U+0003B"
+"seswar;","U+02929"
+"setminus;","U+02216"
+"setmn;","U+02216"
+"sext;","U+02736"
+"sfr;","U+1D530"
+"sfrown;","U+02322"
+"sharp;","U+0266F"
+"shchcy;","U+00449"
+"shcy;","U+00448"
+"shortmid;","U+02223"
+"shortparallel;","U+02225"
+"shy;","U+000AD "
+"shy","U+000AD "
+"sigma;","U+003C3"
+"sigmaf;","U+003C2"
+"sigmav;","U+003C2"
+"sim;","U+0223C"
+"simdot;","U+02A6A"
+"sime;","U+02243"
+"simeq;","U+02243"
+"simg;","U+02A9E"
+"simgE;","U+02AA0"
+"siml;","U+02A9D"
+"simlE;","U+02A9F"
+"simne;","U+02246"
+"simplus;","U+02A24"
+"simrarr;","U+02972"
+"slarr;","U+02190"
+"smallsetminus;","U+02216"
+"smashp;","U+02A33"
+"smeparsl;","U+029E4"
+"smid;","U+02223"
+"smile;","U+02323"
+"smt;","U+02AAA"
+"smte;","U+02AAC"
+"softcy;","U+0044C"
+"sol;","U+0002F"
+"solb;","U+029C4"
+"solbar;","U+0233F"
+"sopf;","U+1D564"
+"spades;","U+02660"
+"spadesuit;","U+02660"
+"spar;","U+02225"
+"sqcap;","U+02293"
+"sqcup;","U+02294"
+"sqsub;","U+0228F"
+"sqsube;","U+02291"
+"sqsubset;","U+0228F"
+"sqsubseteq;","U+02291"
+"sqsup;","U+02290"
+"sqsupe;","U+02292"
+"sqsupset;","U+02290"
+"sqsupseteq;","U+02292"
+"squ;","U+025A1"
+"square;","U+025A1"
+"squarf;","U+025AA"
+"squf;","U+025AA"
+"srarr;","U+02192"
+"sscr;","U+1D4C8"
+"ssetmn;","U+02216"
+"ssmile;","U+02323"
+"sstarf;","U+022C6"
+"star;","U+02606"
+"starf;","U+02605"
+"straightepsilon;","U+003F5"
+"straightphi;","U+003D5"
+"strns;","U+000AF"
+"sub;","U+02282"
+"subE;","U+02AC5"
+"subdot;","U+02ABD"
+"sube;","U+02286"
+"subedot;","U+02AC3"
+"submult;","U+02AC1"
+"subnE;","U+02ACB"
+"subne;","U+0228A"
+"subplus;","U+02ABF"
+"subrarr;","U+02979"
+"subset;","U+02282"
+"subseteq;","U+02286"
+"subseteqq;","U+02AC5"
+"subsetneq;","U+0228A"
+"subsetneqq;","U+02ACB"
+"subsim;","U+02AC7"
+"subsub;","U+02AD5"
+"subsup;","U+02AD3"
+"succ;","U+0227B"
+"succapprox;","U+02AB8"
+"succcurlyeq;","U+0227D"
+"succeq;","U+02AB0"
+"succnapprox;","U+02ABA"
+"succneqq;","U+02AB6"
+"succnsim;","U+022E9"
+"succsim;","U+0227F"
+"sum;","U+02211"
+"sung;","U+0266A"
+"sup1;","U+000B9"
+"sup1","U+000B9"
+"sup2;","U+000B2"
+"sup2","U+000B2"
+"sup3;","U+000B3"
+"sup3","U+000B3"
+"sup;","U+02283"
+"supE;","U+02AC6"
+"supdot;","U+02ABE"
+"supdsub;","U+02AD8"
+"supe;","U+02287"
+"supedot;","U+02AC4"
+"suphsol;","U+027C9"
+"suphsub;","U+02AD7"
+"suplarr;","U+0297B"
+"supmult;","U+02AC2"
+"supnE;","U+02ACC"
+"supne;","U+0228B"
+"supplus;","U+02AC0"
+"supset;","U+02283"
+"supseteq;","U+02287"
+"supseteqq;","U+02AC6"
+"supsetneq;","U+0228B"
+"supsetneqq;","U+02ACC"
+"supsim;","U+02AC8"
+"supsub;","U+02AD4"
+"supsup;","U+02AD6"
+"swArr;","U+021D9"
+"swarhk;","U+02926"
+"swarr;","U+02199"
+"swarrow;","U+02199"
+"swnwar;","U+0292A"
+"szlig;","U+000DF"
+"szlig","U+000DF"
+"target;","U+02316"
+"tau;","U+003C4"
+"tbrk;","U+023B4"
+"tcaron;","U+00165"
+"tcedil;","U+00163"
+"tcy;","U+00442"
+"tdot;","U+020DB"
+"telrec;","U+02315"
+"tfr;","U+1D531"
+"there4;","U+02234"
+"therefore;","U+02234"
+"theta;","U+003B8"
+"thetasym;","U+003D1"
+"thetav;","U+003D1"
+"thickapprox;","U+02248"
+"thicksim;","U+0223C"
+"thinsp;","U+02009"
+"thkap;","U+02248"
+"thksim;","U+0223C"
+"thorn;","U+000FE"
+"thorn","U+000FE"
+"tilde;","U+002DC"
+"times;","U+000D7"
+"times","U+000D7"
+"timesb;","U+022A0"
+"timesbar;","U+02A31"
+"timesd;","U+02A30"
+"tint;","U+0222D"
+"toea;","U+02928"
+"top;","U+022A4"
+"topbot;","U+02336"
+"topcir;","U+02AF1"
+"topf;","U+1D565"
+"topfork;","U+02ADA"
+"tosa;","U+02929"
+"tprime;","U+02034"
+"trade;","U+02122"
+"triangle;","U+025B5"
+"triangledown;","U+025BF"
+"triangleleft;","U+025C3"
+"trianglelefteq;","U+022B4"
+"triangleq;","U+0225C"
+"triangleright;","U+025B9"
+"trianglerighteq;","U+022B5"
+"tridot;","U+025EC"
+"trie;","U+0225C"
+"triminus;","U+02A3A"
+"triplus;","U+02A39"
+"trisb;","U+029CD"
+"tritime;","U+02A3B"
+"trpezium;","U+023E2"
+"tscr;","U+1D4C9"
+"tscy;","U+00446"
+"tshcy;","U+0045B"
+"tstrok;","U+00167"
+"twixt;","U+0226C"
+"twoheadleftarrow;","U+0219E"
+"twoheadrightarrow;","U+021A0"
+"uArr;","U+021D1"
+"uHar;","U+02963"
+"uacute;","U+000FA"
+"uacute","U+000FA"
+"uarr;","U+02191"
+"ubrcy;","U+0045E"
+"ubreve;","U+0016D"
+"ucirc;","U+000FB"
+"ucirc","U+000FB"
+"ucy;","U+00443"
+"udarr;","U+021C5"
+"udblac;","U+00171"
+"udhar;","U+0296E"
+"ufisht;","U+0297E"
+"ufr;","U+1D532"
+"ugrave;","U+000F9"
+"ugrave","U+000F9"
+"uharl;","U+021BF"
+"uharr;","U+021BE"
+"uhblk;","U+02580"
+"ulcorn;","U+0231C"
+"ulcorner;","U+0231C"
+"ulcrop;","U+0230F"
+"ultri;","U+025F8"
+"umacr;","U+0016B"
+"uml;","U+000A8"
+"uml","U+000A8"
+"uogon;","U+00173"
+"uopf;","U+1D566"
+"uparrow;","U+02191"
+"updownarrow;","U+02195"
+"upharpoonleft;","U+021BF"
+"upharpoonright;","U+021BE"
+"uplus;","U+0228E"
+"upsi;","U+003C5"
+"upsih;","U+003D2"
+"upsilon;","U+003C5"
+"upuparrows;","U+021C8"
+"urcorn;","U+0231D"
+"urcorner;","U+0231D"
+"urcrop;","U+0230E"
+"uring;","U+0016F"
+"urtri;","U+025F9"
+"uscr;","U+1D4CA"
+"utdot;","U+022F0"
+"utilde;","U+00169"
+"utri;","U+025B5"
+"utrif;","U+025B4"
+"uuarr;","U+021C8"
+"uuml;","U+000FC"
+"uuml","U+000FC"
+"uwangle;","U+029A7"
+"vArr;","U+021D5"
+"vBar;","U+02AE8"
+"vBarv;","U+02AE9"
+"vDash;","U+022A8"
+"vangrt;","U+0299C"
+"varepsilon;","U+003F5"
+"varkappa;","U+003F0"
+"varnothing;","U+02205"
+"varphi;","U+003D5"
+"varpi;","U+003D6"
+"varpropto;","U+0221D"
+"varr;","U+02195"
+"varrho;","U+003F1"
+"varsigma;","U+003C2"
+"vartheta;","U+003D1"
+"vartriangleleft;","U+022B2"
+"vartriangleright;","U+022B3"
+"vcy;","U+00432"
+"vdash;","U+022A2"
+"vee;","U+02228"
+"veebar;","U+022BB"
+"veeeq;","U+0225A"
+"vellip;","U+022EE"
+"verbar;","U+0007C"
+"vert;","U+0007C"
+"vfr;","U+1D533"
+"vltri;","U+022B2"
+"vopf;","U+1D567"
+"vprop;","U+0221D"
+"vrtri;","U+022B3"
+"vscr;","U+1D4CB"
+"vzigzag;","U+0299A"
+"wcirc;","U+00175"
+"wedbar;","U+02A5F"
+"wedge;","U+02227"
+"wedgeq;","U+02259"
+"weierp;","U+02118"
+"wfr;","U+1D534"
+"wopf;","U+1D568"
+"wp;","U+02118"
+"wr;","U+02240"
+"wreath;","U+02240"
+"wscr;","U+1D4CC"
+"xcap;","U+022C2"
+"xcirc;","U+025EF"
+"xcup;","U+022C3"
+"xdtri;","U+025BD"
+"xfr;","U+1D535"
+"xhArr;","U+027FA"
+"xharr;","U+027F7"
+"xi;","U+003BE"
+"xlArr;","U+027F8"
+"xlarr;","U+027F5"
+"xmap;","U+027FC"
+"xnis;","U+022FB"
+"xodot;","U+02A00"
+"xopf;","U+1D569"
+"xoplus;","U+02A01"
+"xotime;","U+02A02"
+"xrArr;","U+027F9"
+"xrarr;","U+027F6"
+"xscr;","U+1D4CD"
+"xsqcup;","U+02A06"
+"xuplus;","U+02A04"
+"xutri;","U+025B3"
+"xvee;","U+022C1"
+"xwedge;","U+022C0"
+"yacute;","U+000FD"
+"yacute","U+000FD"
+"yacy;","U+0044F"
+"ycirc;","U+00177"
+"ycy;","U+0044B"
+"yen;","U+000A5"
+"yen","U+000A5"
+"yfr;","U+1D536"
+"yicy;","U+00457"
+"yopf;","U+1D56A"
+"yscr;","U+1D4CE"
+"yucy;","U+0044E"
+"yuml;","U+000FF"
+"yuml","U+000FF"
+"zacute;","U+0017A"
+"zcaron;","U+0017E"
+"zcy;","U+00437"
+"zdot;","U+0017C"
+"zeetrf;","U+02128"
+"zeta;","U+003B6"
+"zfr;","U+1D537"
+"zhcy;","U+00436"
+"zigrarr;","U+021DD"
+"zopf;","U+1D56B"
+"zscr;","U+1D4CF"
+"zwj;","U+0200D"
+"zwnj;","U+0200C"
diff --git a/Source/WebCore/html/parser/HTMLEntityParser.cpp b/Source/WebCore/html/parser/HTMLEntityParser.cpp
new file mode 100644
index 0000000..6a422b8
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLEntityParser.cpp
@@ -0,0 +1,272 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "HTMLEntityParser.h"
+
+#include "HTMLEntitySearch.h"
+#include "HTMLEntityTable.h"
+#include <wtf/Vector.h>
+
+using namespace WTF;
+
+namespace WebCore {
+
+namespace {
+
+static const UChar windowsLatin1ExtensionArray[32] = {
+ 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87
+ 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F
+ 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97
+ 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178, // 98-9F
+};
+
+inline UChar adjustEntity(UChar32 value)
+{
+ if ((value & ~0x1F) != 0x0080)
+ return value;
+ return windowsLatin1ExtensionArray[value - 0x80];
+}
+
+inline UChar32 legalEntityFor(UChar32 value)
+{
+ // FIXME: A number of specific entity values generate parse errors.
+ if (value == 0 || value > 0x10FFFF || (value >= 0xD800 && value <= 0xDFFF))
+ return 0xFFFD;
+ if (U_IS_BMP(value))
+ return adjustEntity(value);
+ return value;
+}
+
+inline bool convertToUTF16(UChar32 value, Vector<UChar, 16>& decodedEntity)
+{
+ if (U_IS_BMP(value)) {
+ UChar character = static_cast<UChar>(value);
+ ASSERT(character == value);
+ decodedEntity.append(character);
+ return true;
+ }
+ decodedEntity.append(U16_LEAD(value));
+ decodedEntity.append(U16_TRAIL(value));
+ return true;
+}
+
+inline bool isHexDigit(UChar cc)
+{
+ return (cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f') || (cc >= 'A' && cc <= 'F');
+}
+
+inline bool isAlphaNumeric(UChar cc)
+{
+ return (cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'z') || (cc >= 'A' && cc <= 'Z');
+}
+
+void unconsumeCharacters(SegmentedString& source, const Vector<UChar, 10>& consumedCharacters)
+{
+ if (consumedCharacters.size() == 1)
+ source.push(consumedCharacters[0]);
+ else if (consumedCharacters.size() == 2) {
+ source.push(consumedCharacters[0]);
+ source.push(consumedCharacters[1]);
+ } else
+ source.prepend(SegmentedString(String(consumedCharacters.data(), consumedCharacters.size())));
+}
+
+}
+
+bool consumeHTMLEntity(SegmentedString& source, Vector<UChar, 16>& decodedEntity, bool& notEnoughCharacters, UChar additionalAllowedCharacter)
+{
+ ASSERT(!additionalAllowedCharacter || additionalAllowedCharacter == '"' || additionalAllowedCharacter == '\'' || additionalAllowedCharacter == '>');
+ ASSERT(!notEnoughCharacters);
+ ASSERT(decodedEntity.isEmpty());
+
+ enum EntityState {
+ Initial,
+ Number,
+ MaybeHexLowerCaseX,
+ MaybeHexUpperCaseX,
+ Hex,
+ Decimal,
+ Named
+ };
+ EntityState entityState = Initial;
+ UChar32 result = 0;
+ Vector<UChar, 10> consumedCharacters;
+
+ while (!source.isEmpty()) {
+ UChar cc = *source;
+ switch (entityState) {
+ case Initial: {
+ if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ' || cc == '<' || cc == '&')
+ return false;
+ if (additionalAllowedCharacter && cc == additionalAllowedCharacter)
+ return false;
+ if (cc == '#') {
+ entityState = Number;
+ break;
+ }
+ if ((cc >= 'a' && cc <= 'z') || (cc >= 'A' && cc <= 'Z')) {
+ entityState = Named;
+ continue;
+ }
+ return false;
+ }
+ case Number: {
+ if (cc == 'x') {
+ entityState = MaybeHexLowerCaseX;
+ break;
+ }
+ if (cc == 'X') {
+ entityState = MaybeHexUpperCaseX;
+ break;
+ }
+ if (cc >= '0' && cc <= '9') {
+ entityState = Decimal;
+ continue;
+ }
+ source.push('#');
+ return false;
+ }
+ case MaybeHexLowerCaseX: {
+ if (isHexDigit(cc)) {
+ entityState = Hex;
+ continue;
+ }
+ source.push('#');
+ source.push('x');
+ return false;
+ }
+ case MaybeHexUpperCaseX: {
+ if (isHexDigit(cc)) {
+ entityState = Hex;
+ continue;
+ }
+ source.push('#');
+ source.push('X');
+ return false;
+ }
+ case Hex: {
+ if (cc >= '0' && cc <= '9')
+ result = result * 16 + cc - '0';
+ else if (cc >= 'a' && cc <= 'f')
+ result = result * 16 + 10 + cc - 'a';
+ else if (cc >= 'A' && cc <= 'F')
+ result = result * 16 + 10 + cc - 'A';
+ else {
+ if (cc == ';')
+ source.advanceAndASSERT(cc);
+ return convertToUTF16(legalEntityFor(result), decodedEntity);
+ }
+ break;
+ }
+ case Decimal: {
+ if (cc >= '0' && cc <= '9')
+ result = result * 10 + cc - '0';
+ else {
+ if (cc == ';')
+ source.advanceAndASSERT(cc);
+ return convertToUTF16(legalEntityFor(result), decodedEntity);
+ }
+ break;
+ }
+ case Named: {
+ HTMLEntitySearch entitySearch;
+ while (!source.isEmpty()) {
+ cc = *source;
+ entitySearch.advance(cc);
+ if (!entitySearch.isEntityPrefix())
+ break;
+ consumedCharacters.append(cc);
+ source.advanceAndASSERT(cc);
+ }
+ notEnoughCharacters = source.isEmpty();
+ if (notEnoughCharacters) {
+ // We can't an entity because there might be a longer entity
+ // that we could match if we had more data.
+ unconsumeCharacters(source, consumedCharacters);
+ return false;
+ }
+ if (!entitySearch.mostRecentMatch()) {
+ ASSERT(!entitySearch.currentValue());
+ unconsumeCharacters(source, consumedCharacters);
+ return false;
+ }
+ if (entitySearch.mostRecentMatch()->length != entitySearch.currentLength()) {
+ // We've consumed too many characters. We need to walk the
+ // source back to the point at which we had consumed an
+ // actual entity.
+ unconsumeCharacters(source, consumedCharacters);
+ consumedCharacters.clear();
+ const int length = entitySearch.mostRecentMatch()->length;
+ const UChar* reference = entitySearch.mostRecentMatch()->entity;
+ for (int i = 0; i < length; ++i) {
+ cc = *source;
+ ASSERT_UNUSED(reference, cc == *reference++);
+ consumedCharacters.append(cc);
+ source.advanceAndASSERT(cc);
+ ASSERT(!source.isEmpty());
+ }
+ cc = *source;
+ }
+ if (entitySearch.mostRecentMatch()->lastCharacter() == ';'
+ || !additionalAllowedCharacter
+ || !(isAlphaNumeric(cc) || cc == '=')) {
+ return convertToUTF16(entitySearch.mostRecentMatch()->value, decodedEntity);
+ }
+ unconsumeCharacters(source, consumedCharacters);
+ return false;
+ }
+ }
+ consumedCharacters.append(cc);
+ source.advanceAndASSERT(cc);
+ }
+ ASSERT(source.isEmpty());
+ notEnoughCharacters = true;
+ unconsumeCharacters(source, consumedCharacters);
+ return false;
+}
+
+UChar decodeNamedEntity(const char* name)
+{
+ HTMLEntitySearch search;
+ while (*name) {
+ search.advance(*name++);
+ if (!search.isEntityPrefix())
+ return 0;
+ }
+ search.advance(';');
+ UChar32 entityValue = search.currentValue();
+ if (U16_LENGTH(entityValue) != 1) {
+ // Callers need to move off this API if the entity table has values
+ // which do no fit in a 16 bit UChar!
+ ASSERT_NOT_REACHED();
+ return 0;
+ }
+ return static_cast<UChar>(entityValue);
+}
+
+} // namespace WebCore
diff --git a/Source/WebCore/html/parser/HTMLEntityParser.h b/Source/WebCore/html/parser/HTMLEntityParser.h
new file mode 100644
index 0000000..f02e849
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLEntityParser.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLEntityParser_h
+#define HTMLEntityParser_h
+
+#include "SegmentedString.h"
+
+namespace WebCore {
+
+bool consumeHTMLEntity(SegmentedString&, Vector<UChar, 16>& decodedEntity, bool& notEnoughCharacters, UChar additionalAllowedCharacter = '\0');
+
+// Used by the XML parser. Not suitable for use in HTML parsing. Use consumeHTMLEntity instead.
+UChar decodeNamedEntity(const char*);
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLEntitySearch.cpp b/Source/WebCore/html/parser/HTMLEntitySearch.cpp
new file mode 100644
index 0000000..56fb91a
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLEntitySearch.cpp
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "HTMLEntitySearch.h"
+
+#include "HTMLEntityTable.h"
+
+namespace WebCore {
+
+namespace {
+
+const HTMLEntityTableEntry* halfway(const HTMLEntityTableEntry* left, const HTMLEntityTableEntry* right)
+{
+ return &left[(right - left) / 2];
+}
+
+}
+
+HTMLEntitySearch::HTMLEntitySearch()
+ : m_currentLength(0)
+ , m_currentValue(0)
+ , m_mostRecentMatch(0)
+ , m_first(HTMLEntityTable::firstEntry())
+ , m_last(HTMLEntityTable::lastEntry())
+{
+}
+
+HTMLEntitySearch::CompareResult HTMLEntitySearch::compare(const HTMLEntityTableEntry* entry, UChar nextCharacter) const
+{
+ if (entry->length < m_currentLength + 1)
+ return Before;
+ UChar entryNextCharacter = entry->entity[m_currentLength];
+ if (entryNextCharacter == nextCharacter)
+ return Prefix;
+ return entryNextCharacter < nextCharacter ? Before : After;
+}
+
+const HTMLEntityTableEntry* HTMLEntitySearch::findFirst(UChar nextCharacter) const
+{
+ const HTMLEntityTableEntry* left = m_first;
+ const HTMLEntityTableEntry* right = m_last;
+ if (left == right)
+ return left;
+ CompareResult result = compare(left, nextCharacter);
+ if (result == Prefix)
+ return left;
+ if (result == After)
+ return right;
+ while (left + 1 < right) {
+ const HTMLEntityTableEntry* probe = halfway(left, right);
+ result = compare(probe, nextCharacter);
+ if (result == Before)
+ left = probe;
+ else {
+ ASSERT(result == After || result == Prefix);
+ right = probe;
+ }
+ }
+ ASSERT(left + 1 == right);
+ return right;
+}
+
+const HTMLEntityTableEntry* HTMLEntitySearch::findLast(UChar nextCharacter) const
+{
+ const HTMLEntityTableEntry* left = m_first;
+ const HTMLEntityTableEntry* right = m_last;
+ if (left == right)
+ return right;
+ CompareResult result = compare(right, nextCharacter);
+ if (result == Prefix)
+ return right;
+ if (result == Before)
+ return left;
+ while (left + 1 < right) {
+ const HTMLEntityTableEntry* probe = halfway(left, right);
+ result = compare(probe, nextCharacter);
+ if (result == After)
+ right = probe;
+ else {
+ ASSERT(result == Before || result == Prefix);
+ left = probe;
+ }
+ }
+ ASSERT(left + 1 == right);
+ return left;
+}
+
+void HTMLEntitySearch::advance(UChar nextCharacter)
+{
+ ASSERT(isEntityPrefix());
+ if (!m_currentLength) {
+ m_first = HTMLEntityTable::firstEntryStartingWith(nextCharacter);
+ m_last = HTMLEntityTable::lastEntryStartingWith(nextCharacter);
+ if (!m_first || !m_last)
+ return fail();
+ } else {
+ m_first = findFirst(nextCharacter);
+ m_last = findLast(nextCharacter);
+ if (m_first == m_last && compare(m_first, nextCharacter) != Prefix)
+ return fail();
+ }
+ ++m_currentLength;
+ if (m_first->length != m_currentLength) {
+ m_currentValue = 0;
+ return;
+ }
+ m_mostRecentMatch = m_first;
+ m_currentValue = m_mostRecentMatch->value;
+}
+
+}
diff --git a/Source/WebCore/html/parser/HTMLEntitySearch.h b/Source/WebCore/html/parser/HTMLEntitySearch.h
new file mode 100644
index 0000000..0c66318
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLEntitySearch.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLEntitySearch_h
+#define HTMLEntitySearch_h
+
+#include "PlatformString.h"
+
+namespace WebCore {
+
+struct HTMLEntityTableEntry;
+
+class HTMLEntitySearch {
+public:
+ HTMLEntitySearch();
+
+ void advance(UChar);
+
+ bool isEntityPrefix() const { return !!m_first; }
+ UChar32 currentValue() const { return m_currentValue; }
+ int currentLength() const { return m_currentLength; }
+
+ const HTMLEntityTableEntry* mostRecentMatch() const { return m_mostRecentMatch; }
+
+private:
+ enum CompareResult {
+ Before,
+ Prefix,
+ After,
+ };
+
+ CompareResult compare(const HTMLEntityTableEntry*, UChar) const;
+ const HTMLEntityTableEntry* findFirst(UChar) const;
+ const HTMLEntityTableEntry* findLast(UChar) const;
+
+ void fail()
+ {
+ m_currentValue = 0;
+ m_first = 0;
+ m_last = 0;
+ }
+
+ int m_currentLength;
+ UChar32 m_currentValue;
+
+ const HTMLEntityTableEntry* m_mostRecentMatch;
+ const HTMLEntityTableEntry* m_first;
+ const HTMLEntityTableEntry* m_last;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLEntityTable.h b/Source/WebCore/html/parser/HTMLEntityTable.h
new file mode 100644
index 0000000..3b9ab4e
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLEntityTable.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLEntityTable_h
+#define HTMLEntityTable_h
+
+#include "PlatformString.h"
+
+namespace WebCore {
+
+struct HTMLEntityTableEntry {
+ UChar lastCharacter() const { return entity[length - 1]; }
+
+ const UChar* entity;
+ int length;
+ UChar32 value;
+};
+
+class HTMLEntityTable {
+public:
+ static const HTMLEntityTableEntry* firstEntry();
+ static const HTMLEntityTableEntry* lastEntry();
+
+ static const HTMLEntityTableEntry* firstEntryStartingWith(UChar);
+ static const HTMLEntityTableEntry* lastEntryStartingWith(UChar);
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLFormattingElementList.cpp b/Source/WebCore/html/parser/HTMLFormattingElementList.cpp
new file mode 100644
index 0000000..22bf03e
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLFormattingElementList.cpp
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "HTMLFormattingElementList.h"
+
+#include "Element.h"
+#include "NotImplemented.h"
+
+namespace WebCore {
+
+HTMLFormattingElementList::HTMLFormattingElementList()
+{
+}
+
+HTMLFormattingElementList::~HTMLFormattingElementList()
+{
+}
+
+Element* HTMLFormattingElementList::closestElementInScopeWithName(const AtomicString& targetName)
+{
+ for (unsigned i = 1; i <= m_entries.size(); ++i) {
+ const Entry& entry = m_entries[m_entries.size() - i];
+ if (entry.isMarker())
+ return 0;
+ if (entry.element()->hasLocalName(targetName))
+ return entry.element();
+ }
+ return 0;
+}
+
+bool HTMLFormattingElementList::contains(Element* element)
+{
+ return !!find(element);
+}
+
+HTMLFormattingElementList::Entry* HTMLFormattingElementList::find(Element* element)
+{
+ size_t index = m_entries.reverseFind(element);
+ if (index != notFound) {
+ // This is somewhat of a hack, and is why this method can't be const.
+ return &m_entries[index];
+ }
+ return 0;
+}
+
+HTMLFormattingElementList::Bookmark HTMLFormattingElementList::bookmarkFor(Element* element)
+{
+ size_t index = m_entries.reverseFind(element);
+ ASSERT(index != notFound);
+ return Bookmark(&at(index));
+}
+
+void HTMLFormattingElementList::swapTo(Element* oldElement, Element* newElement, const Bookmark& bookmark)
+{
+ ASSERT(contains(oldElement));
+ ASSERT(!contains(newElement));
+ if (!bookmark.hasBeenMoved()) {
+ ASSERT(bookmark.mark()->element() == oldElement);
+ bookmark.mark()->replaceElement(newElement);
+ return;
+ }
+ size_t index = bookmark.mark() - first();
+ ASSERT(index < size());
+ m_entries.insert(index + 1, newElement);
+ remove(oldElement);
+}
+
+void HTMLFormattingElementList::append(Element* element)
+{
+ m_entries.append(element);
+}
+
+void HTMLFormattingElementList::remove(Element* element)
+{
+ size_t index = m_entries.reverseFind(element);
+ if (index != notFound)
+ m_entries.remove(index);
+}
+
+void HTMLFormattingElementList::appendMarker()
+{
+ m_entries.append(Entry::MarkerEntry);
+}
+
+void HTMLFormattingElementList::clearToLastMarker()
+{
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#clear-the-list-of-active-formatting-elements-up-to-the-last-marker
+ while (m_entries.size()) {
+ bool shouldStop = m_entries.last().isMarker();
+ m_entries.removeLast();
+ if (shouldStop)
+ break;
+ }
+}
+
+#ifndef NDEBUG
+
+void HTMLFormattingElementList::show()
+{
+ for (unsigned i = 1; i <= m_entries.size(); ++i) {
+ const Entry& entry = m_entries[m_entries.size() - i];
+ if (entry.isMarker())
+ fprintf(stderr, "marker\n");
+ else
+ entry.element()->showNode();
+ }
+}
+
+#endif
+
+}
diff --git a/Source/WebCore/html/parser/HTMLFormattingElementList.h b/Source/WebCore/html/parser/HTMLFormattingElementList.h
new file mode 100644
index 0000000..aca05bb
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLFormattingElementList.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLFormattingElementList_h
+#define HTMLFormattingElementList_h
+
+#include <wtf/Forward.h>
+#include <wtf/RefPtr.h>
+#include <wtf/Vector.h>
+
+namespace WebCore {
+
+class Element;
+
+// This may end up merged into HTMLElementStack.
+class HTMLFormattingElementList : public Noncopyable {
+public:
+ HTMLFormattingElementList();
+ ~HTMLFormattingElementList();
+
+ // Ideally Entry would be private, but HTMLTreeBuilder has to coordinate
+ // between the HTMLFormattingElementList and HTMLElementStack and needs
+ // access to Entry::isMarker() and Entry::replaceElement() to do so.
+ class Entry {
+ public:
+ // Inline because they're hot and Vector<T> uses them.
+ explicit Entry(Element* element)
+ : m_element(element)
+ {
+ ASSERT(element);
+ }
+ enum MarkerEntryType { MarkerEntry };
+ Entry(MarkerEntryType)
+ : m_element(0)
+ {
+ }
+ ~Entry() {}
+
+ bool isMarker() const { return !m_element; }
+
+ Element* element() const
+ {
+ // The fact that !m_element == isMarker() is an implementation detail
+ // callers should check isMarker() before calling element().
+ ASSERT(m_element);
+ return m_element.get();
+ }
+ void replaceElement(PassRefPtr<Element> element) { m_element = element; }
+
+ // Needed for use with Vector. These are super-hot and must be inline.
+ bool operator==(Element* element) const { return m_element == element; }
+ bool operator!=(Element* element) const { return m_element != element; }
+
+ private:
+ RefPtr<Element> m_element;
+ };
+
+ class Bookmark {
+ public:
+ Bookmark(Entry* entry)
+ : m_hasBeenMoved(false)
+ , m_mark(entry)
+ {
+ }
+
+ void moveToAfter(Entry* before)
+ {
+ m_hasBeenMoved = true;
+ m_mark = before;
+ }
+
+ bool hasBeenMoved() const { return m_hasBeenMoved; }
+ Entry* mark() const { return m_mark; }
+
+ private:
+ bool m_hasBeenMoved;
+ Entry* m_mark;
+ };
+
+ bool isEmpty() const { return !size(); }
+ size_t size() const { return m_entries.size(); }
+
+ Element* closestElementInScopeWithName(const AtomicString&);
+
+ Entry* find(Element*);
+ bool contains(Element*);
+ void append(Element*);
+ void remove(Element*);
+
+ Bookmark bookmarkFor(Element*);
+ void swapTo(Element* oldElement, Element* newElement, const Bookmark&);
+
+ void appendMarker();
+ // clearToLastMarker also clears the marker (per the HTML5 spec).
+ void clearToLastMarker();
+
+ const Entry& at(size_t i) const { return m_entries[i]; }
+ Entry& at(size_t i) { return m_entries[i]; }
+
+#ifndef NDEBUG
+ void show();
+#endif
+
+private:
+ Entry* first() { return &at(0); }
+
+ Vector<Entry> m_entries;
+};
+
+}
+
+#endif // HTMLFormattingElementList_h
diff --git a/Source/WebCore/html/parser/HTMLInputStream.h b/Source/WebCore/html/parser/HTMLInputStream.h
new file mode 100644
index 0000000..1bfbaf9
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLInputStream.h
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLInputStream_h
+#define HTMLInputStream_h
+
+#include "SegmentedString.h"
+
+namespace WebCore {
+
+// The InputStream is made up of a sequence of SegmentedStrings:
+//
+// [--current--][--next--][--next--] ... [--next--]
+// /\ (also called m_last)
+// L_ current insertion point
+//
+// The current segmented string is stored in InputStream. Each of the
+// afterInsertionPoint buffers are stored in InsertionPointRecords on the
+// stack.
+//
+// We remove characters from the "current" string in the InputStream.
+// document.write() will add characters at the current insertion point,
+// which appends them to the "current" string.
+//
+// m_last is a pointer to the last of the afterInsertionPoint strings.
+// The network adds data at the end of the InputStream, which appends
+// them to the "last" string.
+class HTMLInputStream : public Noncopyable {
+public:
+ HTMLInputStream()
+ : m_last(&m_first)
+ {
+ }
+
+ void appendToEnd(const SegmentedString& string)
+ {
+ m_last->append(string);
+ }
+
+ void insertAtCurrentInsertionPoint(const SegmentedString& string)
+ {
+ m_first.append(string);
+ }
+
+ bool hasInsertionPoint() const
+ {
+ if (&m_first != m_last)
+ return true;
+ if (!haveSeenEndOfFile()) {
+ // FIXME: Somehow we need to understand the difference between
+ // input streams that are coming off the network and streams that
+ // were created with document.open(). In the later case, we always
+ // have an isertion point at the end of the stream until someone
+ // calls document.close().
+ return true;
+ }
+ return false;
+ }
+
+ void markEndOfFile()
+ {
+ // FIXME: This should use InputStreamPreprocessor::endOfFileMarker
+ // once InputStreamPreprocessor is split off into its own header.
+ static const UChar endOfFileMarker = 0;
+ m_last->append(SegmentedString(String(&endOfFileMarker, 1)));
+ m_last->close();
+ }
+
+ bool haveSeenEndOfFile() const
+ {
+ return m_last->isClosed();
+ }
+
+ SegmentedString& current() { return m_first; }
+ const SegmentedString& current() const { return m_first; }
+
+ void splitInto(SegmentedString& next)
+ {
+ next = m_first;
+ m_first = SegmentedString();
+ if (m_last == &m_first) {
+ // We used to only have one SegmentedString in the InputStream
+ // but now we have two. That means m_first is no longer also
+ // the m_last string, |next| is now the last one.
+ m_last = &next;
+ }
+ }
+
+ void mergeFrom(SegmentedString& next)
+ {
+ m_first.append(next);
+ if (m_last == &next) {
+ // The string |next| used to be the last SegmentedString in
+ // the InputStream. Now that it's been merged into m_first,
+ // that makes m_first the last one.
+ m_last = &m_first;
+ }
+ if (next.isClosed()) {
+ // We also need to merge the "closed" state from next to
+ // m_first. Arguably, this work could be done in append().
+ m_first.close();
+ }
+ }
+
+private:
+ SegmentedString m_first;
+ SegmentedString* m_last;
+};
+
+class InsertionPointRecord : public Noncopyable {
+public:
+ explicit InsertionPointRecord(HTMLInputStream& inputStream)
+ : m_inputStream(&inputStream)
+ {
+ m_line = m_inputStream->current().currentLine();
+ m_column = m_inputStream->current().currentColumn();
+ m_inputStream->splitInto(m_next);
+ // We 'fork' current position and use it for the generated script part.
+ // This is a bit weird, because generated part does not have positions within an HTML document.
+ m_inputStream->current().setCurrentPosition(m_line, m_column, 0);
+ }
+
+ ~InsertionPointRecord()
+ {
+ // Some inserted text may have remained in input stream. E.g. if script has written "&amp" or "<table",
+ // it stays in buffer because it cannot be properly tokenized before we see next part.
+ int unparsedRemainderLength = m_inputStream->current().length();
+ m_inputStream->mergeFrom(m_next);
+ // We restore position for the character that goes right after unparsed remainder.
+ m_inputStream->current().setCurrentPosition(m_line, m_column, unparsedRemainderLength);
+ }
+
+private:
+ HTMLInputStream* m_inputStream;
+ SegmentedString m_next;
+ WTF::ZeroBasedNumber m_line;
+ WTF::ZeroBasedNumber m_column;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLMetaCharsetParser.cpp b/Source/WebCore/html/parser/HTMLMetaCharsetParser.cpp
new file mode 100644
index 0000000..eac7d28
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLMetaCharsetParser.cpp
@@ -0,0 +1,200 @@
+/*
+ * Copyright (C) 2010 Google Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "HTMLMetaCharsetParser.h"
+
+#include "HTMLNames.h"
+#include "HTMLParserIdioms.h"
+#include "HTMLTokenizer.h"
+#include "PlatformString.h"
+#include "TextCodec.h"
+#include "TextEncodingRegistry.h"
+
+using namespace WTF;
+
+namespace WebCore {
+
+using namespace HTMLNames;
+
+HTMLMetaCharsetParser::HTMLMetaCharsetParser()
+ : m_tokenizer(HTMLTokenizer::create(false)) // No pre-HTML5 parser quirks.
+ , m_assumedCodec(newTextCodec(Latin1Encoding()))
+ , m_inHeadSection(true)
+ , m_doneChecking(false)
+{
+}
+
+HTMLMetaCharsetParser::~HTMLMetaCharsetParser()
+{
+}
+
+static const char charsetString[] = "charset";
+static const size_t charsetLength = sizeof("charset") - 1;
+
+String HTMLMetaCharsetParser::extractCharset(const String& value)
+{
+ size_t pos = 0;
+ unsigned length = value.length();
+
+ while (pos < length) {
+ pos = value.find(charsetString, pos, false);
+ if (pos == notFound)
+ break;
+
+ pos += charsetLength;
+
+ // Skip whitespace.
+ while (pos < length && value[pos] <= ' ')
+ ++pos;
+
+ if (value[pos] != '=')
+ continue;
+
+ ++pos;
+
+ while (pos < length && value[pos] <= ' ')
+ ++pos;
+
+ char quoteMark = 0;
+ if (pos < length && (value[pos] == '"' || value[pos] == '\'')) {
+ quoteMark = static_cast<char>(value[pos++]);
+ ASSERT(!(quoteMark & 0x80));
+ }
+
+ if (pos == length)
+ break;
+
+ unsigned end = pos;
+ while (end < length && ((quoteMark && value[end] != quoteMark) || (!quoteMark && value[end] > ' ' && value[end] != '"' && value[end] != '\'' && value[end] != ';')))
+ ++end;
+
+ if (quoteMark && (end == length))
+ break; // Close quote not found.
+
+ return value.substring(pos, end - pos);
+ }
+
+ return "";
+}
+
+bool HTMLMetaCharsetParser::processMeta()
+{
+ bool gotPragma = false;
+ Mode mode = None;
+ String charset;
+
+ const HTMLToken::AttributeList& attributes = m_token.attributes();
+ for (HTMLToken::AttributeList::const_iterator iter = attributes.begin();
+ iter != attributes.end(); ++iter) {
+ AtomicString attributeName(iter->m_name.data(), iter->m_name.size());
+ String attributeValue(iter->m_value.data(), iter->m_value.size());
+
+ if (attributeName == http_equivAttr) {
+ if (equalIgnoringCase(attributeValue, "content-type"))
+ gotPragma = true;
+ } else if (charset.isEmpty()) {
+ if (attributeName == charsetAttr) {
+ charset = attributeValue;
+ mode = Charset;
+ } else if (attributeName == contentAttr) {
+ charset = extractCharset(attributeValue);
+ if (charset.length())
+ mode = Pragma;
+ }
+ }
+ }
+
+ if (mode == Charset || (mode == Pragma && gotPragma)) {
+ m_encoding = TextEncoding(stripLeadingAndTrailingHTMLSpaces(charset));
+ if (m_encoding.isValid())
+ return true;
+ }
+
+ return false;
+}
+
+static const int bytesToCheckUnconditionally = 1024; // That many input bytes will be checked for meta charset even if <head> section is over.
+
+bool HTMLMetaCharsetParser::checkForMetaCharset(const char* data, size_t length)
+{
+ if (m_doneChecking)
+ return true;
+
+ ASSERT(!m_encoding.isValid());
+
+ // We still don't have an encoding, and are in the head.
+ // The following tags are allowed in <head>:
+ // SCRIPT|STYLE|META|LINK|OBJECT|TITLE|BASE
+
+ // We stop scanning when a tag that is not permitted in <head>
+ // is seen, rather when </head> is seen, because that more closely
+ // matches behavior in other browsers; more details in
+ // <http://bugs.webkit.org/show_bug.cgi?id=3590>.
+
+ // Additionally, we ignore things that looks like tags in <title>, <script>
+ // and <noscript>; see <http://bugs.webkit.org/show_bug.cgi?id=4560>,
+ // <http://bugs.webkit.org/show_bug.cgi?id=12165> and
+ // <http://bugs.webkit.org/show_bug.cgi?id=12389>.
+
+ // Since many sites have charset declarations after <body> or other tags
+ // that are disallowed in <head>, we don't bail out until we've checked at
+ // least bytesToCheckUnconditionally bytes of input.
+
+ m_input.append(SegmentedString(m_assumedCodec->decode(data, length)));
+
+ while (m_tokenizer->nextToken(m_input, m_token)) {
+ bool end = m_token.type() == HTMLToken::EndTag;
+ if (end || m_token.type() == HTMLToken::StartTag) {
+ AtomicString tagName(m_token.name().data(), m_token.name().size());
+ if (!end) {
+ m_tokenizer->updateStateFor(tagName, 0);
+ if (tagName == metaTag && processMeta()) {
+ m_doneChecking = true;
+ return true;
+ }
+ }
+
+ if (tagName != scriptTag && tagName != noscriptTag
+ && tagName != styleTag && tagName != linkTag
+ && tagName != metaTag && tagName != objectTag
+ && tagName != titleTag && tagName != baseTag
+ && (end || tagName != htmlTag) && (end || tagName != headTag)) {
+ m_inHeadSection = false;
+ }
+ }
+
+ if (!m_inHeadSection && m_input.numberOfCharactersConsumed() >= bytesToCheckUnconditionally) {
+ m_doneChecking = true;
+ return true;
+ }
+
+ m_token.clear();
+ }
+
+ return false;
+}
+
+}
diff --git a/Source/WebCore/html/parser/HTMLMetaCharsetParser.h b/Source/WebCore/html/parser/HTMLMetaCharsetParser.h
new file mode 100644
index 0000000..c3136f5
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLMetaCharsetParser.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2010 Google Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLMetaCharsetParser_h
+#define HTMLMetaCharsetParser_h
+
+#include "HTMLToken.h"
+#include "SegmentedString.h"
+#include "TextEncoding.h"
+#include <wtf/Noncopyable.h>
+
+namespace WebCore {
+
+class HTMLTokenizer;
+class TextCodec;
+
+class HTMLMetaCharsetParser : public Noncopyable {
+public:
+ static PassOwnPtr<HTMLMetaCharsetParser> create() { return adoptPtr(new HTMLMetaCharsetParser()); }
+
+ ~HTMLMetaCharsetParser();
+
+ // Returns true if done checking, regardless whether an encoding is found.
+ bool checkForMetaCharset(const char*, size_t);
+
+ const TextEncoding& encoding() { return m_encoding; }
+
+private:
+ HTMLMetaCharsetParser();
+
+ bool processMeta();
+ String extractCharset(const String&);
+
+ enum Mode {
+ None,
+ Charset,
+ Pragma,
+ };
+
+ OwnPtr<HTMLTokenizer> m_tokenizer;
+ OwnPtr<TextCodec> m_assumedCodec;
+ SegmentedString m_input;
+ HTMLToken m_token;
+ bool m_inHeadSection;
+
+ bool m_doneChecking;
+ TextEncoding m_encoding;
+};
+
+}
+#endif
diff --git a/Source/WebCore/html/parser/HTMLParserIdioms.cpp b/Source/WebCore/html/parser/HTMLParserIdioms.cpp
new file mode 100644
index 0000000..91ff8d3
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLParserIdioms.cpp
@@ -0,0 +1,221 @@
+/*
+ * Copyright (C) 2010 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "HTMLParserIdioms.h"
+
+#include <wtf/MathExtras.h>
+#include <wtf/dtoa.h>
+#include <wtf/text/AtomicString.h>
+
+namespace WebCore {
+
+String stripLeadingAndTrailingHTMLSpaces(const String& string)
+{
+ const UChar* characters = string.characters();
+ unsigned length = string.length();
+
+ unsigned numLeadingSpaces;
+ for (numLeadingSpaces = 0; numLeadingSpaces < length; ++numLeadingSpaces) {
+ if (isNotHTMLSpace(characters[numLeadingSpaces]))
+ break;
+ }
+
+ if (numLeadingSpaces == length)
+ return string.isNull() ? string : emptyAtom.string();
+
+ unsigned numTrailingSpaces;
+ for (numTrailingSpaces = 0; numTrailingSpaces < length; ++numTrailingSpaces) {
+ if (isNotHTMLSpace(characters[length - numTrailingSpaces - 1]))
+ break;
+ }
+
+ ASSERT(numLeadingSpaces + numTrailingSpaces < length);
+
+ return string.substring(numLeadingSpaces, length - (numLeadingSpaces + numTrailingSpaces));
+}
+
+String serializeForNumberType(double number)
+{
+ // According to HTML5, "the best representation of the number n as a floating
+ // point number" is a string produced by applying ToString() to n.
+ NumberToStringBuffer buffer;
+ unsigned length = numberToString(number, buffer);
+ return String(buffer, length);
+}
+
+bool parseToDoubleForNumberType(const String& string, double* result)
+{
+ // See HTML5 2.4.4.3 `Real numbers.'
+
+ // String::toDouble() accepts leading + and whitespace characters, which are not valid here.
+ UChar firstCharacter = string[0];
+ if (firstCharacter != '-' && !isASCIIDigit(firstCharacter))
+ return false;
+
+ bool valid = false;
+ double value = string.toDouble(&valid);
+ if (!valid)
+ return false;
+
+ // NaN and infinity are considered valid by String::toDouble, but not valid here.
+ if (!isfinite(value))
+ return false;
+
+ // Numbers are considered finite IEEE 754 single-precision floating point values.
+ // See HTML5 2.4.4.3 `Real numbers.'
+ if (-FLT_MAX > value || value > FLT_MAX)
+ return false;
+
+ if (result) {
+ // The following expression converts -0 to +0.
+ *result = value ? value : 0;
+ }
+
+ return true;
+}
+
+bool parseToDoubleForNumberTypeWithDecimalPlaces(const String& string, double *result, unsigned *decimalPlaces)
+{
+ if (decimalPlaces)
+ *decimalPlaces = 0;
+
+ if (!parseToDoubleForNumberType(string, result))
+ return false;
+
+ if (!decimalPlaces)
+ return true;
+
+ size_t dotIndex = string.find('.');
+ size_t eIndex = string.find('e');
+ if (eIndex == notFound)
+ eIndex = string.find('E');
+
+ unsigned baseDecimalPlaces = 0;
+ if (dotIndex != notFound) {
+ if (eIndex == notFound)
+ baseDecimalPlaces = string.length() - dotIndex - 1;
+ else
+ baseDecimalPlaces = eIndex - dotIndex - 1;
+ }
+
+ int exponent = 0;
+ if (eIndex != notFound) {
+ unsigned cursor = eIndex + 1, cursorSaved;
+ int digit, exponentSign;
+ int32_t exponent32;
+ size_t length = string.length();
+
+ // Not using String.toInt() in order to perform the same computation as dtoa() does.
+ exponentSign = 0;
+ switch (digit = string[cursor]) {
+ case '-':
+ exponentSign = 1;
+ case '+':
+ digit = string[++cursor];
+ }
+ if (digit >= '0' && digit <= '9') {
+ while (cursor < length && digit == '0')
+ digit = string[++cursor];
+ if (digit > '0' && digit <= '9') {
+ exponent32 = digit - '0';
+ cursorSaved = cursor;
+ while (cursor < length && (digit = string[++cursor]) >= '0' && digit <= '9')
+ exponent32 = (10 * exponent32) + digit - '0';
+ if (cursor - cursorSaved > 8 || exponent32 > 19999)
+ /* Avoid confusion from exponents
+ * so large that e might overflow.
+ */
+ exponent = 19999; /* safe for 16 bit ints */
+ else
+ exponent = static_cast<int>(exponent32);
+ if (exponentSign)
+ exponent = -exponent;
+ } else
+ exponent = 0;
+ }
+ }
+
+ int intDecimalPlaces = baseDecimalPlaces - exponent;
+ if (intDecimalPlaces < 0)
+ *decimalPlaces = 0;
+ else if (intDecimalPlaces > 19999)
+ *decimalPlaces = 19999;
+ else
+ *decimalPlaces = static_cast<unsigned>(intDecimalPlaces);
+
+ return true;
+}
+
+// http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-integers
+bool parseHTMLInteger(const String& input, int& value)
+{
+ // Step 1
+ // Step 2
+ const UChar* position = input.characters();
+ const UChar* end = position + input.length();
+
+ // Step 3
+ int sign = 1;
+
+ // Step 4
+ while (position < end) {
+ if (!isHTMLSpace(*position))
+ break;
+ ++position;
+ }
+
+ // Step 5
+ if (position == end)
+ return false;
+ ASSERT(position < end);
+
+ // Step 6
+ if (*position == '-') {
+ sign = -1;
+ ++position;
+ } else if (*position == '+')
+ ++position;
+ if (position == end)
+ return false;
+ ASSERT(position < end);
+
+ // Step 7
+ if (!isASCIIDigit(*position))
+ return false;
+
+ // Step 8
+ Vector<UChar, 16> digits;
+ while (position < end) {
+ if (!isASCIIDigit(*position))
+ break;
+ digits.append(*position++);
+ }
+
+ // Step 9
+ value = sign * charactersToIntStrict(digits.data(), digits.size());
+ return true;
+}
+
+}
diff --git a/Source/WebCore/html/parser/HTMLParserIdioms.h b/Source/WebCore/html/parser/HTMLParserIdioms.h
new file mode 100644
index 0000000..4e8e58f
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLParserIdioms.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2010 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLParserIdioms_h
+#define HTMLParserIdioms_h
+
+#include <wtf/Forward.h>
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+// Space characters as defined by the HTML specification.
+bool isHTMLSpace(UChar);
+bool isNotHTMLSpace(UChar);
+
+// Strip leading and trailing whitespace as defined by the HTML specification.
+String stripLeadingAndTrailingHTMLSpaces(const String&);
+
+// An implementation of the HTML specification's algorithm to convert a number to a string for number and range types.
+String serializeForNumberType(double);
+
+// Convert the specified string to a double. If the conversion fails, the return value is false.
+// Leading or trailing illegal characters cause failure, as does passing an empty string.
+// The double* parameter may be 0 to check if the string can be parsed without getting the result.
+bool parseToDoubleForNumberType(const String&, double*);
+bool parseToDoubleForNumberTypeWithDecimalPlaces(const String&, double*, unsigned*);
+
+// http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-integers
+bool parseHTMLInteger(const String&, int&);
+
+// Inline implementations of some of the functions declared above.
+
+inline bool isHTMLSpace(UChar character)
+{
+ // Histogram from Apple's page load test combined with some ad hoc browsing some other test suites.
+ //
+ // 82%: 216330 non-space characters, all > U+0020
+ // 11%: 30017 plain space characters, U+0020
+ // 5%: 12099 newline characters, U+000A
+ // 2%: 5346 tab characters, U+0009
+ //
+ // No other characters seen. No U+000C or U+000D, and no other control characters.
+ // Accordingly, we check for non-spaces first, then space, then newline, then tab, then the other characters.
+
+ return character <= ' ' && (character == ' ' || character == '\n' || character == '\t' || character == '\r' || character == '\f');
+}
+
+inline bool isNotHTMLSpace(UChar character)
+{
+ return !isHTMLSpace(character);
+}
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLParserScheduler.cpp b/Source/WebCore/html/parser/HTMLParserScheduler.cpp
new file mode 100644
index 0000000..56db1aa
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLParserScheduler.cpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "HTMLParserScheduler.h"
+
+#include "FrameView.h" // Only for isLayoutTimerActive
+#include "HTMLDocumentParser.h"
+#include "Document.h"
+
+// defaultParserChunkSize is used to define how many tokens the parser will
+// process before checking against parserTimeLimit and possibly yielding.
+// This is a performance optimization to prevent checking after every token.
+static const int defaultParserChunkSize = 4096;
+
+// defaultParserTimeLimit is the seconds the parser will run in one write() call
+// before yielding. Inline <script> execution can cause it to excede the limit.
+// FIXME: We would like this value to be 0.2.
+static const double defaultParserTimeLimit = 0.500;
+
+namespace WebCore {
+
+static double parserTimeLimit(Page* page)
+{
+ // We're using the poorly named customHTMLTokenizerTimeDelay setting.
+ if (page && page->hasCustomHTMLTokenizerTimeDelay())
+ return page->customHTMLTokenizerTimeDelay();
+ return defaultParserTimeLimit;
+}
+
+static int parserChunkSize(Page* page)
+{
+ // FIXME: We may need to divide the value from customHTMLTokenizerChunkSize
+ // by some constant to translate from the "character" based behavior of the
+ // old LegacyHTMLDocumentParser to the token-based behavior of this parser.
+ if (page && page->hasCustomHTMLTokenizerChunkSize())
+ return page->customHTMLTokenizerChunkSize();
+ return defaultParserChunkSize;
+}
+
+HTMLParserScheduler::HTMLParserScheduler(HTMLDocumentParser* parser)
+ : m_parser(parser)
+ , m_parserTimeLimit(parserTimeLimit(m_parser->document()->page()))
+ , m_parserChunkSize(parserChunkSize(m_parser->document()->page()))
+ , m_continueNextChunkTimer(this, &HTMLParserScheduler::continueNextChunkTimerFired)
+ , m_isSuspendedWithActiveTimer(false)
+{
+}
+
+HTMLParserScheduler::~HTMLParserScheduler()
+{
+ m_continueNextChunkTimer.stop();
+}
+
+// FIXME: This belongs on Document.
+static bool isLayoutTimerActive(Document* doc)
+{
+ ASSERT(doc);
+ return doc->view() && doc->view()->layoutPending() && !doc->minimumLayoutDelay();
+}
+
+void HTMLParserScheduler::continueNextChunkTimerFired(Timer<HTMLParserScheduler>* timer)
+{
+ ASSERT_UNUSED(timer, timer == &m_continueNextChunkTimer);
+ // FIXME: The timer class should handle timer priorities instead of this code.
+ // If a layout is scheduled, wait again to let the layout timer run first.
+ if (isLayoutTimerActive(m_parser->document())) {
+ m_continueNextChunkTimer.startOneShot(0);
+ return;
+ }
+ m_parser->resumeParsingAfterYield();
+}
+
+void HTMLParserScheduler::suspend()
+{
+ ASSERT(!m_isSuspendedWithActiveTimer);
+ if (!m_continueNextChunkTimer.isActive())
+ return;
+ m_isSuspendedWithActiveTimer = true;
+ m_continueNextChunkTimer.stop();
+}
+
+void HTMLParserScheduler::resume()
+{
+ ASSERT(!m_continueNextChunkTimer.isActive());
+ if (!m_isSuspendedWithActiveTimer)
+ return;
+ m_isSuspendedWithActiveTimer = false;
+ m_continueNextChunkTimer.startOneShot(0);
+}
+
+}
diff --git a/Source/WebCore/html/parser/HTMLParserScheduler.h b/Source/WebCore/html/parser/HTMLParserScheduler.h
new file mode 100644
index 0000000..3a20b2b
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLParserScheduler.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLParserScheduler_h
+#define HTMLParserScheduler_h
+
+#include "Timer.h"
+#include <wtf/CurrentTime.h>
+#include <wtf/Noncopyable.h>
+#include <wtf/PassOwnPtr.h>
+
+namespace WebCore {
+
+class HTMLDocumentParser;
+
+class HTMLParserScheduler : public Noncopyable {
+public:
+ static PassOwnPtr<HTMLParserScheduler> create(HTMLDocumentParser* parser)
+ {
+ return adoptPtr(new HTMLParserScheduler(parser));
+ }
+ ~HTMLParserScheduler();
+
+ struct PumpSession {
+ PumpSession()
+ : processedTokens(0)
+ , startTime(currentTime())
+ {
+ }
+
+ int processedTokens;
+ double startTime;
+ };
+
+ // Inline as this is called after every token in the parser.
+ bool shouldContinueParsing(PumpSession& session)
+ {
+ if (session.processedTokens > m_parserChunkSize) {
+ session.processedTokens = 0;
+ double elapsedTime = currentTime() - session.startTime;
+ if (elapsedTime > m_parserTimeLimit) {
+ // Schedule the parser to continue and yield from the parser.
+ m_continueNextChunkTimer.startOneShot(0);
+ return false;
+ }
+ }
+
+ ++session.processedTokens;
+ return true;
+ }
+
+ bool isScheduledForResume() const { return m_isSuspendedWithActiveTimer || m_continueNextChunkTimer.isActive(); }
+
+ void suspend();
+ void resume();
+
+private:
+ HTMLParserScheduler(HTMLDocumentParser*);
+
+ void continueNextChunkTimerFired(Timer<HTMLParserScheduler>*);
+
+ HTMLDocumentParser* m_parser;
+
+ double m_parserTimeLimit;
+ int m_parserChunkSize;
+ Timer<HTMLParserScheduler> m_continueNextChunkTimer;
+ bool m_isSuspendedWithActiveTimer;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLPreloadScanner.cpp b/Source/WebCore/html/parser/HTMLPreloadScanner.cpp
new file mode 100644
index 0000000..d23542f
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLPreloadScanner.cpp
@@ -0,0 +1,194 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
+ * Copyright (C) 2010 Google Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "HTMLPreloadScanner.h"
+
+#include "CachedResourceLoader.h"
+#include "Document.h"
+#include "HTMLDocumentParser.h"
+#include "HTMLTokenizer.h"
+#include "HTMLLinkElement.h"
+#include "HTMLNames.h"
+#include "HTMLParserIdioms.h"
+#include "MediaList.h"
+#include "MediaQueryEvaluator.h"
+
+namespace WebCore {
+
+using namespace HTMLNames;
+
+namespace {
+
+class PreloadTask {
+public:
+ PreloadTask(const HTMLToken& token)
+ : m_tagName(token.name().data(), token.name().size())
+ , m_linkIsStyleSheet(false)
+ , m_linkMediaAttributeIsScreen(true)
+ {
+ processAttributes(token.attributes());
+ }
+
+ void processAttributes(const HTMLToken::AttributeList& attributes)
+ {
+ if (m_tagName != scriptTag && m_tagName != imgTag && m_tagName != linkTag)
+ return;
+
+ for (HTMLToken::AttributeList::const_iterator iter = attributes.begin();
+ iter != attributes.end(); ++iter) {
+ AtomicString attributeName(iter->m_name.data(), iter->m_name.size());
+ String attributeValue(iter->m_value.data(), iter->m_value.size());
+
+ if (attributeName == charsetAttr)
+ m_charset = attributeValue;
+
+ if (m_tagName == scriptTag || m_tagName == imgTag) {
+ if (attributeName == srcAttr)
+ setUrlToLoad(attributeValue);
+ } else if (m_tagName == linkTag) {
+ if (attributeName == hrefAttr)
+ setUrlToLoad(attributeValue);
+ else if (attributeName == relAttr)
+ m_linkIsStyleSheet = relAttributeIsStyleSheet(attributeValue);
+ else if (attributeName == mediaAttr)
+ m_linkMediaAttributeIsScreen = linkMediaAttributeIsScreen(attributeValue);
+ }
+ }
+ }
+
+ static bool relAttributeIsStyleSheet(const String& attributeValue)
+ {
+ HTMLLinkElement::RelAttribute rel;
+ HTMLLinkElement::tokenizeRelAttribute(attributeValue, rel);
+ return rel.m_isStyleSheet && !rel.m_isAlternate && !rel.m_isIcon && !rel.m_isDNSPrefetch;
+ }
+
+ static bool linkMediaAttributeIsScreen(const String& attributeValue)
+ {
+ if (attributeValue.isEmpty())
+ return true;
+ RefPtr<MediaList> mediaList = MediaList::createAllowingDescriptionSyntax(attributeValue);
+
+ // Only preload screen media stylesheets. Used this way, the evaluator evaluates to true for any
+ // rules containing complex queries (full evaluation is possible but it requires a frame and a style selector which
+ // may be problematic here).
+ MediaQueryEvaluator mediaQueryEvaluator("screen");
+ return mediaQueryEvaluator.eval(mediaList.get());
+ }
+
+ void setUrlToLoad(const String& attributeValue)
+ {
+ // We only respect the first src/href, per HTML5:
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#attribute-name-state
+ if (!m_urlToLoad.isEmpty())
+ return;
+ m_urlToLoad = stripLeadingAndTrailingHTMLSpaces(attributeValue);
+ }
+
+ void preload(Document* document, bool scanningBody)
+ {
+ if (m_urlToLoad.isEmpty())
+ return;
+
+ CachedResourceLoader* cachedResourceLoader = document->cachedResourceLoader();
+ if (m_tagName == scriptTag)
+ cachedResourceLoader->preload(CachedResource::Script, m_urlToLoad, m_charset, scanningBody);
+ else if (m_tagName == imgTag)
+ cachedResourceLoader->preload(CachedResource::ImageResource, m_urlToLoad, String(), scanningBody);
+ else if (m_tagName == linkTag && m_linkIsStyleSheet && m_linkMediaAttributeIsScreen)
+ cachedResourceLoader->preload(CachedResource::CSSStyleSheet, m_urlToLoad, m_charset, scanningBody);
+ }
+
+ const AtomicString& tagName() const { return m_tagName; }
+
+private:
+ AtomicString m_tagName;
+ String m_urlToLoad;
+ String m_charset;
+ bool m_linkIsStyleSheet;
+ bool m_linkMediaAttributeIsScreen;
+};
+
+} // namespace
+
+HTMLPreloadScanner::HTMLPreloadScanner(Document* document)
+ : m_document(document)
+ , m_cssScanner(document)
+ , m_tokenizer(HTMLTokenizer::create(HTMLDocumentParser::usePreHTML5ParserQuirks(document)))
+ , m_bodySeen(false)
+ , m_inStyle(false)
+{
+}
+
+void HTMLPreloadScanner::appendToEnd(const SegmentedString& source)
+{
+ m_source.append(source);
+}
+
+void HTMLPreloadScanner::scan()
+{
+ // FIXME: We should save and re-use these tokens in HTMLDocumentParser if
+ // the pending script doesn't end up calling document.write.
+ while (m_tokenizer->nextToken(m_source, m_token)) {
+ processToken();
+ m_token.clear();
+ }
+}
+
+void HTMLPreloadScanner::processToken()
+{
+ if (m_inStyle) {
+ if (m_token.type() == HTMLToken::Character)
+ m_cssScanner.scan(m_token, scanningBody());
+ else if (m_token.type() == HTMLToken::EndTag) {
+ m_inStyle = false;
+ m_cssScanner.reset();
+ }
+ }
+
+ if (m_token.type() != HTMLToken::StartTag)
+ return;
+
+ PreloadTask task(m_token);
+ m_tokenizer->updateStateFor(task.tagName(), m_document->frame());
+
+ if (task.tagName() == bodyTag)
+ m_bodySeen = true;
+
+ if (task.tagName() == styleTag)
+ m_inStyle = true;
+
+ task.preload(m_document, scanningBody());
+}
+
+bool HTMLPreloadScanner::scanningBody() const
+{
+ return m_document->body() || m_bodySeen;
+}
+
+}
diff --git a/Source/WebCore/html/parser/HTMLPreloadScanner.h b/Source/WebCore/html/parser/HTMLPreloadScanner.h
new file mode 100644
index 0000000..94a90e6
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLPreloadScanner.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2010 Google Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLPreloadScanner_h
+#define HTMLPreloadScanner_h
+
+#include "CSSPreloadScanner.h"
+#include "HTMLToken.h"
+#include "SegmentedString.h"
+#include <wtf/Noncopyable.h>
+
+namespace WebCore {
+
+class Document;
+class HTMLToken;
+class HTMLTokenizer;
+class SegmentedString;
+
+class HTMLPreloadScanner : public Noncopyable {
+public:
+ HTMLPreloadScanner(Document*);
+
+ void appendToEnd(const SegmentedString&);
+ void scan();
+
+private:
+ void processToken();
+ bool scanningBody() const;
+
+ Document* m_document;
+ SegmentedString m_source;
+ CSSPreloadScanner m_cssScanner;
+ OwnPtr<HTMLTokenizer> m_tokenizer;
+ HTMLToken m_token;
+ bool m_bodySeen;
+ bool m_inStyle;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLScriptRunner.cpp b/Source/WebCore/html/parser/HTMLScriptRunner.cpp
new file mode 100644
index 0000000..2fe1d30
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLScriptRunner.cpp
@@ -0,0 +1,321 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "HTMLScriptRunner.h"
+
+#include "Attribute.h"
+#include "CachedScript.h"
+#include "CachedResourceLoader.h"
+#include "Element.h"
+#include "Event.h"
+#include "Frame.h"
+#include "HTMLInputStream.h"
+#include "HTMLNames.h"
+#include "HTMLScriptRunnerHost.h"
+#include "IgnoreDestructiveWriteCountIncrementer.h"
+#include "NestingLevelIncrementer.h"
+#include "NotImplemented.h"
+#include "ScriptElement.h"
+#include "ScriptSourceCode.h"
+
+namespace WebCore {
+
+using namespace HTMLNames;
+
+HTMLScriptRunner::HTMLScriptRunner(Document* document, HTMLScriptRunnerHost* host)
+ : m_document(document)
+ , m_host(host)
+ , m_scriptNestingLevel(0)
+ , m_hasScriptsWaitingForStylesheets(false)
+{
+ ASSERT(m_host);
+}
+
+HTMLScriptRunner::~HTMLScriptRunner()
+{
+ // FIXME: Should we be passed a "done loading/parsing" callback sooner than destruction?
+ if (m_parsingBlockingScript.cachedScript() && m_parsingBlockingScript.watchingForLoad())
+ stopWatchingForLoad(m_parsingBlockingScript);
+
+ while (!m_scriptsToExecuteAfterParsing.isEmpty()) {
+ PendingScript pendingScript = m_scriptsToExecuteAfterParsing.takeFirst();
+ if (pendingScript.cachedScript() && pendingScript.watchingForLoad())
+ stopWatchingForLoad(pendingScript);
+ }
+}
+
+void HTMLScriptRunner::detach()
+{
+ m_document = 0;
+}
+
+static KURL documentURLForScriptExecution(Document* document)
+{
+ if (!document || !document->frame())
+ return KURL();
+
+ // Use the URL of the currently active document for this frame.
+ return document->frame()->document()->url();
+}
+
+inline PassRefPtr<Event> createScriptLoadEvent()
+{
+ return Event::create(eventNames().loadEvent, false, false);
+}
+
+inline PassRefPtr<Event> createScriptErrorEvent()
+{
+ return Event::create(eventNames().errorEvent, true, false);
+}
+
+ScriptSourceCode HTMLScriptRunner::sourceFromPendingScript(const PendingScript& script, bool& errorOccurred) const
+{
+ if (script.cachedScript()) {
+ errorOccurred = script.cachedScript()->errorOccurred();
+ ASSERT(script.cachedScript()->isLoaded());
+ return ScriptSourceCode(script.cachedScript());
+ }
+ errorOccurred = false;
+ return ScriptSourceCode(script.element()->textContent(), documentURLForScriptExecution(m_document), script.startingPosition());
+}
+
+bool HTMLScriptRunner::isPendingScriptReady(const PendingScript& script)
+{
+ m_hasScriptsWaitingForStylesheets = !m_document->haveStylesheetsLoaded();
+ if (m_hasScriptsWaitingForStylesheets)
+ return false;
+ if (script.cachedScript() && !script.cachedScript()->isLoaded())
+ return false;
+ return true;
+}
+
+void HTMLScriptRunner::executeParsingBlockingScript()
+{
+ ASSERT(m_document);
+ ASSERT(!m_scriptNestingLevel);
+ ASSERT(m_document->haveStylesheetsLoaded());
+ ASSERT(isPendingScriptReady(m_parsingBlockingScript));
+
+ InsertionPointRecord insertionPointRecord(m_host->inputStream());
+ executePendingScriptAndDispatchEvent(m_parsingBlockingScript);
+}
+
+void HTMLScriptRunner::executePendingScriptAndDispatchEvent(PendingScript& pendingScript)
+{
+ bool errorOccurred = false;
+ ScriptSourceCode sourceCode = sourceFromPendingScript(pendingScript, errorOccurred);
+
+ // Stop watching loads before executeScript to prevent recursion if the script reloads itself.
+ if (pendingScript.cachedScript() && pendingScript.watchingForLoad())
+ stopWatchingForLoad(pendingScript);
+
+ // Clear the pending script before possible rentrancy from executeScript()
+ RefPtr<Element> element = pendingScript.releaseElementAndClear();
+ if (ScriptElement* scriptElement = toScriptElement(element.get())) {
+ NestingLevelIncrementer nestingLevelIncrementer(m_scriptNestingLevel);
+ IgnoreDestructiveWriteCountIncrementer ignoreDestructiveWriteCountIncrementer(m_document);
+ if (errorOccurred)
+ element->dispatchEvent(createScriptErrorEvent());
+ else {
+ ASSERT(isExecutingScript());
+ scriptElement->executeScript(sourceCode);
+ element->dispatchEvent(createScriptLoadEvent());
+ }
+ }
+ ASSERT(!m_scriptNestingLevel);
+}
+
+void HTMLScriptRunner::watchForLoad(PendingScript& pendingScript)
+{
+ ASSERT(!pendingScript.watchingForLoad());
+ m_host->watchForLoad(pendingScript.cachedScript());
+ pendingScript.setWatchingForLoad(true);
+}
+
+void HTMLScriptRunner::stopWatchingForLoad(PendingScript& pendingScript)
+{
+ ASSERT(pendingScript.watchingForLoad());
+ m_host->stopWatchingForLoad(pendingScript.cachedScript());
+ pendingScript.setWatchingForLoad(false);
+}
+
+// This function should match 10.2.5.11 "An end tag whose tag name is 'script'"
+// Script handling lives outside the tree builder to keep the each class simple.
+bool HTMLScriptRunner::execute(PassRefPtr<Element> scriptElement, const TextPosition1& scriptStartPosition)
+{
+ ASSERT(scriptElement);
+ // FIXME: If scripting is disabled, always just return true;
+
+ // Try to execute the script given to us.
+ runScript(scriptElement.get(), scriptStartPosition);
+
+ if (haveParsingBlockingScript()) {
+ if (m_scriptNestingLevel)
+ return false; // Block the parser. Unwind to the outermost HTMLScriptRunner::execute before continuing parsing.
+ if (!executeParsingBlockingScripts())
+ return false; // We still have a parsing blocking script, block the parser.
+ }
+ return true; // Scripts executed as expected, continue parsing.
+}
+
+bool HTMLScriptRunner::haveParsingBlockingScript() const
+{
+ return !!m_parsingBlockingScript.element();
+}
+
+bool HTMLScriptRunner::executeParsingBlockingScripts()
+{
+ while (haveParsingBlockingScript()) {
+ // We only really need to check once.
+ if (!isPendingScriptReady(m_parsingBlockingScript))
+ return false;
+ executeParsingBlockingScript();
+ }
+ return true;
+}
+
+bool HTMLScriptRunner::executeScriptsWaitingForLoad(CachedResource* cachedScript)
+{
+ ASSERT(!m_scriptNestingLevel);
+ ASSERT(haveParsingBlockingScript());
+ ASSERT_UNUSED(cachedScript, m_parsingBlockingScript.cachedScript() == cachedScript);
+ ASSERT(m_parsingBlockingScript.cachedScript()->isLoaded());
+ return executeParsingBlockingScripts();
+}
+
+bool HTMLScriptRunner::executeScriptsWaitingForStylesheets()
+{
+ ASSERT(m_document);
+ // Callers should check hasScriptsWaitingForStylesheets() before calling
+ // to prevent parser or script re-entry during </style> parsing.
+ ASSERT(hasScriptsWaitingForStylesheets());
+ ASSERT(!m_scriptNestingLevel);
+ ASSERT(m_document->haveStylesheetsLoaded());
+ return executeParsingBlockingScripts();
+}
+
+bool HTMLScriptRunner::executeScriptsWaitingForParsing()
+{
+ while (!m_scriptsToExecuteAfterParsing.isEmpty()) {
+ ASSERT(!m_scriptNestingLevel);
+ ASSERT(!haveParsingBlockingScript());
+ ASSERT(m_scriptsToExecuteAfterParsing.first().cachedScript());
+ if (!m_scriptsToExecuteAfterParsing.first().cachedScript()->isLoaded()) {
+ watchForLoad(m_scriptsToExecuteAfterParsing.first());
+ return false;
+ }
+ PendingScript first = m_scriptsToExecuteAfterParsing.takeFirst();
+ executePendingScriptAndDispatchEvent(first);
+ if (!m_document)
+ return false;
+ }
+ return true;
+}
+
+void HTMLScriptRunner::requestParsingBlockingScript(Element* element)
+{
+ if (!requestPendingScript(m_parsingBlockingScript, element))
+ return;
+
+ ASSERT(m_parsingBlockingScript.cachedScript());
+
+ // We only care about a load callback if cachedScript is not already
+ // in the cache. Callers will attempt to run the m_parsingBlockingScript
+ // if possible before returning control to the parser.
+ if (!m_parsingBlockingScript.cachedScript()->isLoaded())
+ watchForLoad(m_parsingBlockingScript);
+}
+
+void HTMLScriptRunner::requestDeferredScript(Element* element)
+{
+ PendingScript pendingScript;
+ if (!requestPendingScript(pendingScript, element))
+ return;
+
+ ASSERT(pendingScript.cachedScript());
+ m_scriptsToExecuteAfterParsing.append(pendingScript);
+}
+
+bool HTMLScriptRunner::requestPendingScript(PendingScript& pendingScript, Element* script) const
+{
+ ASSERT(!pendingScript.element());
+ const AtomicString& srcValue = script->getAttribute(srcAttr);
+ // Allow the host to disllow script loads (using the XSSAuditor, etc.)
+ if (!m_host->shouldLoadExternalScriptFromSrc(srcValue))
+ return false;
+ // FIXME: We need to resolve the url relative to the element.
+ if (!script->dispatchBeforeLoadEvent(srcValue))
+ return false;
+ pendingScript.setElement(script);
+ // This should correctly return 0 for empty or invalid srcValues.
+ CachedScript* cachedScript = m_document->cachedResourceLoader()->requestScript(srcValue, toScriptElement(script)->scriptCharset());
+ if (!cachedScript) {
+ notImplemented(); // Dispatch error event.
+ return false;
+ }
+ pendingScript.setCachedScript(cachedScript);
+ return true;
+}
+
+// This method is meant to match the HTML5 definition of "running a script"
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#running-a-script
+void HTMLScriptRunner::runScript(Element* script, const TextPosition1& scriptStartPosition)
+{
+ ASSERT(m_document);
+ ASSERT(!haveParsingBlockingScript());
+ {
+ InsertionPointRecord insertionPointRecord(m_host->inputStream());
+ NestingLevelIncrementer nestingLevelIncrementer(m_scriptNestingLevel);
+
+ ScriptElement* scriptElement = toScriptElement(script);
+ ASSERT(scriptElement);
+ if (!scriptElement->shouldExecuteAsJavaScript())
+ return;
+
+ if (script->hasAttribute(srcAttr)) {
+ if (script->hasAttribute(asyncAttr)) // Async takes precendence over defer.
+ return; // Asynchronous scripts handle themselves.
+
+ if (script->hasAttribute(deferAttr))
+ requestDeferredScript(script);
+ else
+ requestParsingBlockingScript(script);
+ } else if (!m_document->haveStylesheetsLoaded() && m_scriptNestingLevel == 1) {
+ // Block inline script execution on stylesheet load, unless we are in document.write().
+ // The latter case can only happen if a script both triggers a stylesheet load
+ // and writes an inline script. Since write is blocking we have to execute the
+ // written script immediately, ignoring the pending sheets.
+ m_parsingBlockingScript.setElement(script);
+ m_parsingBlockingScript.setStartingPosition(scriptStartPosition);
+ } else {
+ ASSERT(isExecutingScript());
+ ScriptSourceCode sourceCode(script->textContent(), documentURLForScriptExecution(m_document), scriptStartPosition);
+ scriptElement->executeScript(sourceCode);
+ }
+ }
+}
+
+}
diff --git a/Source/WebCore/html/parser/HTMLScriptRunner.h b/Source/WebCore/html/parser/HTMLScriptRunner.h
new file mode 100644
index 0000000..6cf74d8
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLScriptRunner.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLScriptRunner_h
+#define HTMLScriptRunner_h
+
+#include "PendingScript.h"
+#include <wtf/Deque.h>
+#include <wtf/text/TextPosition.h>
+#include <wtf/Noncopyable.h>
+#include <wtf/PassRefPtr.h>
+
+namespace WebCore {
+
+class CachedResource;
+class CachedScript;
+class Document;
+class Element;
+class Frame;
+class HTMLScriptRunnerHost;
+class ScriptSourceCode;
+
+class HTMLScriptRunner : public Noncopyable {
+public:
+ static PassOwnPtr<HTMLScriptRunner> create(Document* document, HTMLScriptRunnerHost* host)
+ {
+ return adoptPtr(new HTMLScriptRunner(document, host));
+ }
+ ~HTMLScriptRunner();
+
+ void detach();
+
+ // Processes the passed in script and any pending scripts if possible.
+ bool execute(PassRefPtr<Element> scriptToProcess, const TextPosition1& scriptStartPosition);
+
+ bool executeScriptsWaitingForLoad(CachedResource*);
+ bool hasScriptsWaitingForStylesheets() const { return m_hasScriptsWaitingForStylesheets; }
+ bool executeScriptsWaitingForStylesheets();
+ bool executeScriptsWaitingForParsing();
+
+ bool isExecutingScript() const { return !!m_scriptNestingLevel; }
+
+private:
+ HTMLScriptRunner(Document*, HTMLScriptRunnerHost*);
+
+ Frame* frame() const;
+
+ void executeParsingBlockingScript();
+ void executePendingScriptAndDispatchEvent(PendingScript&);
+ bool haveParsingBlockingScript() const;
+ bool executeParsingBlockingScripts();
+
+ void requestParsingBlockingScript(Element*);
+ void requestDeferredScript(Element*);
+ bool requestPendingScript(PendingScript&, Element*) const;
+
+ void runScript(Element*, const TextPosition1& scriptStartPosition);
+
+ // Helpers for dealing with HTMLScriptRunnerHost
+ void watchForLoad(PendingScript&);
+ void stopWatchingForLoad(PendingScript&);
+ bool isPendingScriptReady(const PendingScript&);
+ ScriptSourceCode sourceFromPendingScript(const PendingScript&, bool& errorOccurred) const;
+
+ Document* m_document;
+ HTMLScriptRunnerHost* m_host;
+ PendingScript m_parsingBlockingScript;
+ Deque<PendingScript> m_scriptsToExecuteAfterParsing; // http://www.whatwg.org/specs/web-apps/current-work/#list-of-scripts-that-will-execute-when-the-document-has-finished-parsing
+ unsigned m_scriptNestingLevel;
+
+ // We only want stylesheet loads to trigger script execution if script
+ // execution is currently stopped due to stylesheet loads, otherwise we'd
+ // cause nested script execution when parsing <style> tags since </style>
+ // tags can cause Document to call executeScriptsWaitingForStylesheets.
+ bool m_hasScriptsWaitingForStylesheets;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLScriptRunnerHost.h b/Source/WebCore/html/parser/HTMLScriptRunnerHost.h
new file mode 100644
index 0000000..5b40a931
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLScriptRunnerHost.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLScriptRunnerHost_h
+#define HTMLScriptRunnerHost_h
+
+#include <wtf/Forward.h>
+
+namespace WebCore {
+
+class CachedResource;
+class Element;
+class HTMLInputStream;
+class ScriptSourceCode;
+
+class HTMLScriptRunnerHost {
+public:
+ virtual ~HTMLScriptRunnerHost() { }
+
+ // Implementors should call cachedResource->addClient() here or soon after.
+ virtual void watchForLoad(CachedResource*) = 0;
+ // Implementors must call cachedResource->removeClient() immediately.
+ virtual void stopWatchingForLoad(CachedResource*) = 0;
+
+ // Implementors can block certain script loads (for XSSAuditor, etc.)
+ virtual bool shouldLoadExternalScriptFromSrc(const AtomicString&) = 0;
+ virtual HTMLInputStream& inputStream() = 0;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLToken.h b/Source/WebCore/html/parser/HTMLToken.h
new file mode 100644
index 0000000..42cddb8
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLToken.h
@@ -0,0 +1,526 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLToken_h
+#define HTMLToken_h
+
+#include "NamedNodeMap.h"
+#include <wtf/Noncopyable.h>
+#include <wtf/PassOwnPtr.h>
+#include <wtf/Vector.h>
+
+namespace WebCore {
+
+class HTMLToken : public Noncopyable {
+public:
+ enum Type {
+ Uninitialized,
+ DOCTYPE,
+ StartTag,
+ EndTag,
+ Comment,
+ Character,
+ EndOfFile,
+ };
+
+ class Range {
+ public:
+ int m_start;
+ int m_end;
+ };
+
+ class Attribute {
+ public:
+ Range m_nameRange;
+ Range m_valueRange;
+ WTF::Vector<UChar, 32> m_name;
+ WTF::Vector<UChar, 32> m_value;
+ };
+
+ typedef WTF::Vector<Attribute, 10> AttributeList;
+ typedef WTF::Vector<UChar, 1024> DataVector;
+
+ HTMLToken() { clear(); }
+
+ void clear(int startIndex = 0)
+ {
+ m_type = Uninitialized;
+ m_range.m_start = startIndex;
+ m_range.m_end = startIndex;
+ m_data.clear();
+ }
+
+ int startIndex() const { return m_range.m_start; }
+ int endIndex() const { return m_range.m_end; }
+
+ void end(int endIndex)
+ {
+ m_range.m_end = endIndex;
+ }
+
+ void makeEndOfFile()
+ {
+ ASSERT(m_type == Uninitialized);
+ m_type = EndOfFile;
+ }
+
+ void beginStartTag(UChar character)
+ {
+ ASSERT(character);
+ ASSERT(m_type == Uninitialized);
+ m_type = StartTag;
+ m_selfClosing = false;
+ m_currentAttribute = 0;
+ m_attributes.clear();
+
+ m_data.append(character);
+ }
+
+ template<typename T>
+ void beginEndTag(T characters)
+ {
+ ASSERT(m_type == Uninitialized);
+ m_type = EndTag;
+ m_selfClosing = false;
+ m_currentAttribute = 0;
+ m_attributes.clear();
+
+ m_data.append(characters);
+ }
+
+ // Starting a character token works slightly differently than starting
+ // other types of tokens because we want to save a per-character branch.
+ void ensureIsCharacterToken()
+ {
+ ASSERT(m_type == Uninitialized || m_type == Character);
+ m_type = Character;
+ }
+
+ void beginComment()
+ {
+ ASSERT(m_type == Uninitialized);
+ m_type = Comment;
+ }
+
+ void beginDOCTYPE()
+ {
+ ASSERT(m_type == Uninitialized);
+ m_type = DOCTYPE;
+ m_doctypeData = adoptPtr(new DoctypeData());
+ }
+
+ void beginDOCTYPE(UChar character)
+ {
+ ASSERT(character);
+ beginDOCTYPE();
+ m_data.append(character);
+ }
+
+ void appendToName(UChar character)
+ {
+ ASSERT(character);
+ ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE);
+ m_data.append(character);
+ }
+
+ template<typename T>
+ void appendToCharacter(T characters)
+ {
+ ASSERT(m_type == Character);
+ m_data.append(characters);
+ }
+
+ void appendToComment(UChar character)
+ {
+ ASSERT(character);
+ ASSERT(m_type == Comment);
+ m_data.append(character);
+ }
+
+ void addNewAttribute()
+ {
+ ASSERT(m_type == StartTag || m_type == EndTag);
+ m_attributes.grow(m_attributes.size() + 1);
+ m_currentAttribute = &m_attributes.last();
+#ifndef NDEBUG
+ m_currentAttribute->m_nameRange.m_start = 0;
+ m_currentAttribute->m_nameRange.m_end = 0;
+ m_currentAttribute->m_valueRange.m_start = 0;
+ m_currentAttribute->m_valueRange.m_end = 0;
+#endif
+ }
+
+ void beginAttributeName(int index)
+ {
+ m_currentAttribute->m_nameRange.m_start = index;
+ }
+
+ void endAttributeName(int index)
+ {
+ m_currentAttribute->m_nameRange.m_end = index;
+ m_currentAttribute->m_valueRange.m_start = index;
+ m_currentAttribute->m_valueRange.m_end = index;
+ }
+
+ void beginAttributeValue(int index)
+ {
+ m_currentAttribute->m_valueRange.m_start = index;
+#ifndef NDEBUG
+ m_currentAttribute->m_valueRange.m_end = 0;
+#endif
+ }
+
+ void endAttributeValue(int index)
+ {
+ m_currentAttribute->m_valueRange.m_end = index;
+ }
+
+ void appendToAttributeName(UChar character)
+ {
+ ASSERT(character);
+ ASSERT(m_type == StartTag || m_type == EndTag);
+ ASSERT(m_currentAttribute->m_nameRange.m_start);
+ m_currentAttribute->m_name.append(character);
+ }
+
+ void appendToAttributeValue(UChar character)
+ {
+ ASSERT(character);
+ ASSERT(m_type == StartTag || m_type == EndTag);
+ ASSERT(m_currentAttribute->m_valueRange.m_start);
+ m_currentAttribute->m_value.append(character);
+ }
+
+ Type type() const { return m_type; }
+
+ bool selfClosing() const
+ {
+ ASSERT(m_type == StartTag || m_type == EndTag);
+ return m_selfClosing;
+ }
+
+ void setSelfClosing()
+ {
+ ASSERT(m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag);
+ m_selfClosing = true;
+ }
+
+ const AttributeList& attributes() const
+ {
+ ASSERT(m_type == StartTag || m_type == EndTag);
+ return m_attributes;
+ }
+
+ const DataVector& name() const
+ {
+ ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE);
+ return m_data;
+ }
+
+ const DataVector& characters() const
+ {
+ ASSERT(m_type == Character);
+ return m_data;
+ }
+
+ const DataVector& comment() const
+ {
+ ASSERT(m_type == Comment);
+ return m_data;
+ }
+
+ // FIXME: Distinguish between a missing public identifer and an empty one.
+ const WTF::Vector<UChar>& publicIdentifier() const
+ {
+ ASSERT(m_type == DOCTYPE);
+ return m_doctypeData->m_publicIdentifier;
+ }
+
+ // FIXME: Distinguish between a missing system identifer and an empty one.
+ const WTF::Vector<UChar>& systemIdentifier() const
+ {
+ ASSERT(m_type == DOCTYPE);
+ return m_doctypeData->m_systemIdentifier;
+ }
+
+ void setPublicIdentifierToEmptyString()
+ {
+ ASSERT(m_type == DOCTYPE);
+ m_doctypeData->m_hasPublicIdentifier = true;
+ m_doctypeData->m_publicIdentifier.clear();
+ }
+
+ void setSystemIdentifierToEmptyString()
+ {
+ ASSERT(m_type == DOCTYPE);
+ m_doctypeData->m_hasSystemIdentifier = true;
+ m_doctypeData->m_systemIdentifier.clear();
+ }
+
+ bool forceQuirks() const
+ {
+ ASSERT(m_type == DOCTYPE);
+ return m_doctypeData->m_forceQuirks;
+ }
+
+ void setForceQuirks()
+ {
+ ASSERT(m_type == DOCTYPE);
+ m_doctypeData->m_forceQuirks = true;
+ }
+
+ void appendToPublicIdentifier(UChar character)
+ {
+ ASSERT(character);
+ ASSERT(m_type == DOCTYPE);
+ ASSERT(m_doctypeData->m_hasPublicIdentifier);
+ m_doctypeData->m_publicIdentifier.append(character);
+ }
+
+ void appendToSystemIdentifier(UChar character)
+ {
+ ASSERT(character);
+ ASSERT(m_type == DOCTYPE);
+ ASSERT(m_doctypeData->m_hasSystemIdentifier);
+ m_doctypeData->m_systemIdentifier.append(character);
+ }
+
+private:
+ // FIXME: I'm not sure what the final relationship between HTMLToken and
+ // AtomicHTMLToken will be. I'm marking this a friend for now, but we'll
+ // want to end up with a cleaner interface between the two classes.
+ friend class AtomicHTMLToken;
+
+ class DoctypeData : public Noncopyable {
+ public:
+ DoctypeData()
+ : m_hasPublicIdentifier(false)
+ , m_hasSystemIdentifier(false)
+ , m_forceQuirks(false)
+ {
+ }
+
+ bool m_hasPublicIdentifier;
+ bool m_hasSystemIdentifier;
+ bool m_forceQuirks;
+ WTF::Vector<UChar> m_publicIdentifier;
+ WTF::Vector<UChar> m_systemIdentifier;
+ };
+
+ Type m_type;
+
+ // Which characters from the input stream are represented by this token.
+ Range m_range;
+
+ // "name" for DOCTYPE, StartTag, and EndTag
+ // "characters" for Character
+ // "data" for Comment
+ DataVector m_data;
+
+ // For DOCTYPE
+ OwnPtr<DoctypeData> m_doctypeData;
+
+ // For StartTag and EndTag
+ bool m_selfClosing;
+ AttributeList m_attributes;
+
+ // A pointer into m_attributes used during lexing.
+ Attribute* m_currentAttribute;
+};
+
+// FIXME: This class should eventually be named HTMLToken once we move the
+// exiting HTMLToken to be internal to the HTMLTokenizer.
+class AtomicHTMLToken : public Noncopyable {
+public:
+ AtomicHTMLToken(HTMLToken& token)
+ : m_type(token.type())
+ {
+ switch (m_type) {
+ case HTMLToken::Uninitialized:
+ ASSERT_NOT_REACHED();
+ break;
+ case HTMLToken::DOCTYPE:
+ m_name = AtomicString(token.name().data(), token.name().size());
+ m_doctypeData = token.m_doctypeData.release();
+ break;
+ case HTMLToken::EndOfFile:
+ break;
+ case HTMLToken::StartTag:
+ case HTMLToken::EndTag: {
+ m_selfClosing = token.selfClosing();
+ m_name = AtomicString(token.name().data(), token.name().size());
+ const HTMLToken::AttributeList& attributes = token.attributes();
+ for (HTMLToken::AttributeList::const_iterator iter = attributes.begin();
+ iter != attributes.end(); ++iter) {
+ if (!iter->m_name.isEmpty()) {
+ String name(iter->m_name.data(), iter->m_name.size());
+ String value(iter->m_value.data(), iter->m_value.size());
+ ASSERT(iter->m_nameRange.m_start);
+ ASSERT(iter->m_nameRange.m_end);
+ ASSERT(iter->m_valueRange.m_start);
+ ASSERT(iter->m_valueRange.m_end);
+ RefPtr<Attribute> mappedAttribute = Attribute::createMapped(name, value);
+ if (!m_attributes) {
+ m_attributes = NamedNodeMap::create();
+ // Reserving capacity here improves the parser
+ // benchmark. It might be worth experimenting with
+ // the constant to see where the optimal point is.
+ m_attributes->reserveInitialCapacity(10);
+ }
+ m_attributes->insertAttribute(mappedAttribute.release(), false);
+ }
+ }
+ break;
+ }
+ case HTMLToken::Comment:
+ m_data = String(token.comment().data(), token.comment().size());
+ break;
+ case HTMLToken::Character:
+ m_externalCharacters = &token.characters();
+ break;
+ }
+ }
+
+ AtomicHTMLToken(HTMLToken::Type type, AtomicString name, PassRefPtr<NamedNodeMap> attributes = 0)
+ : m_type(type)
+ , m_name(name)
+ , m_attributes(attributes)
+ {
+ ASSERT(usesName());
+ }
+
+ HTMLToken::Type type() const { return m_type; }
+
+ const AtomicString& name() const
+ {
+ ASSERT(usesName());
+ return m_name;
+ }
+
+ void setName(const AtomicString& name)
+ {
+ ASSERT(usesName());
+ m_name = name;
+ }
+
+ bool selfClosing() const
+ {
+ ASSERT(m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag);
+ return m_selfClosing;
+ }
+
+ Attribute* getAttributeItem(const QualifiedName& attributeName)
+ {
+ ASSERT(usesAttributes());
+ if (!m_attributes)
+ return 0;
+ return m_attributes->getAttributeItem(attributeName);
+ }
+
+ NamedNodeMap* attributes() const
+ {
+ ASSERT(usesAttributes());
+ return m_attributes.get();
+ }
+
+ PassRefPtr<NamedNodeMap> takeAtributes()
+ {
+ ASSERT(usesAttributes());
+ return m_attributes.release();
+ }
+
+ const HTMLToken::DataVector& characters() const
+ {
+ ASSERT(m_type == HTMLToken::Character);
+ return *m_externalCharacters;
+ }
+
+ const String& comment() const
+ {
+ ASSERT(m_type == HTMLToken::Comment);
+ return m_data;
+ }
+
+ // FIXME: Distinguish between a missing public identifer and an empty one.
+ WTF::Vector<UChar>& publicIdentifier() const
+ {
+ ASSERT(m_type == HTMLToken::DOCTYPE);
+ return m_doctypeData->m_publicIdentifier;
+ }
+
+ // FIXME: Distinguish between a missing system identifer and an empty one.
+ WTF::Vector<UChar>& systemIdentifier() const
+ {
+ ASSERT(m_type == HTMLToken::DOCTYPE);
+ return m_doctypeData->m_systemIdentifier;
+ }
+
+ bool forceQuirks() const
+ {
+ ASSERT(m_type == HTMLToken::DOCTYPE);
+ return m_doctypeData->m_forceQuirks;
+ }
+
+private:
+ HTMLToken::Type m_type;
+
+ bool usesName() const
+ {
+ return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag || m_type == HTMLToken::DOCTYPE;
+ }
+
+ bool usesAttributes() const
+ {
+ return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag;
+ }
+
+ // "name" for DOCTYPE, StartTag, and EndTag
+ AtomicString m_name;
+
+ // "data" for Comment
+ String m_data;
+
+ // "characters" for Character
+ //
+ // We don't want to copy the the characters out of the HTMLToken, so we
+ // keep a pointer to its buffer instead. This buffer is owned by the
+ // HTMLToken and causes a lifetime dependence between these objects.
+ //
+ // FIXME: Add a mechanism for "internalizing" the characters when the
+ // HTMLToken is destructed.
+ const HTMLToken::DataVector* m_externalCharacters;
+
+ // For DOCTYPE
+ OwnPtr<HTMLToken::DoctypeData> m_doctypeData;
+
+ // For StartTag and EndTag
+ bool m_selfClosing;
+
+ RefPtr<NamedNodeMap> m_attributes;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLTokenizer.cpp b/Source/WebCore/html/parser/HTMLTokenizer.cpp
new file mode 100644
index 0000000..305fca2
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLTokenizer.cpp
@@ -0,0 +1,1698 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "HTMLTokenizer.h"
+
+#include "HTMLEntityParser.h"
+#include "HTMLToken.h"
+#include "HTMLTreeBuilder.h"
+#include "HTMLNames.h"
+#include "NotImplemented.h"
+#include <wtf/ASCIICType.h>
+#include <wtf/CurrentTime.h>
+#include <wtf/UnusedParam.h>
+#include <wtf/text/AtomicString.h>
+#include <wtf/text/CString.h>
+#include <wtf/unicode/Unicode.h>
+
+using namespace WTF;
+
+namespace WebCore {
+
+using namespace HTMLNames;
+
+const UChar HTMLTokenizer::InputStreamPreprocessor::endOfFileMarker = 0;
+
+namespace {
+
+inline UChar toLowerCase(UChar cc)
+{
+ ASSERT(isASCIIUpper(cc));
+ const int lowerCaseOffset = 0x20;
+ return cc + lowerCaseOffset;
+}
+
+inline bool isTokenizerWhitespace(UChar cc)
+{
+ return cc == ' ' || cc == '\x0A' || cc == '\x09' || cc == '\x0C';
+}
+
+inline void advanceStringAndASSERTIgnoringCase(SegmentedString& source, const char* expectedCharacters)
+{
+ while (*expectedCharacters)
+ source.advanceAndASSERTIgnoringCase(*expectedCharacters++);
+}
+
+inline void advanceStringAndASSERT(SegmentedString& source, const char* expectedCharacters)
+{
+ while (*expectedCharacters)
+ source.advanceAndASSERT(*expectedCharacters++);
+}
+
+inline bool vectorEqualsString(const Vector<UChar, 32>& vector, const String& string)
+{
+ if (vector.size() != string.length())
+ return false;
+ const UChar* stringData = string.characters();
+ const UChar* vectorData = vector.data();
+ // FIXME: Is there a higher-level function we should be calling here?
+ return !memcmp(stringData, vectorData, vector.size() * sizeof(UChar));
+}
+
+inline bool isEndTagBufferingState(HTMLTokenizer::State state)
+{
+ switch (state) {
+ case HTMLTokenizer::RCDATAEndTagOpenState:
+ case HTMLTokenizer::RCDATAEndTagNameState:
+ case HTMLTokenizer::RAWTEXTEndTagOpenState:
+ case HTMLTokenizer::RAWTEXTEndTagNameState:
+ case HTMLTokenizer::ScriptDataEndTagOpenState:
+ case HTMLTokenizer::ScriptDataEndTagNameState:
+ case HTMLTokenizer::ScriptDataEscapedEndTagOpenState:
+ case HTMLTokenizer::ScriptDataEscapedEndTagNameState:
+ return true;
+ default:
+ return false;
+ }
+}
+
+}
+
+HTMLTokenizer::HTMLTokenizer(bool usePreHTML5ParserQuirks)
+ : m_inputStreamPreprocessor(this)
+ , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
+{
+ reset();
+}
+
+HTMLTokenizer::~HTMLTokenizer()
+{
+}
+
+void HTMLTokenizer::reset()
+{
+ m_state = DataState;
+ m_token = 0;
+ m_lineNumber = 0;
+ m_skipLeadingNewLineForListing = false;
+ m_forceNullCharacterReplacement = false;
+ m_shouldAllowCDATA = false;
+ m_additionalAllowedCharacter = '\0';
+}
+
+inline bool HTMLTokenizer::processEntity(SegmentedString& source)
+{
+ bool notEnoughCharacters = false;
+ Vector<UChar, 16> decodedEntity;
+ bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters);
+ if (notEnoughCharacters)
+ return false;
+ if (!success) {
+ ASSERT(decodedEntity.isEmpty());
+ bufferCharacter('&');
+ } else {
+ Vector<UChar>::const_iterator iter = decodedEntity.begin();
+ for (; iter != decodedEntity.end(); ++iter)
+ bufferCharacter(*iter);
+ }
+ return true;
+}
+
+#if COMPILER(MSVC)
+// We need to disable the "unreachable code" warning because we want to assert
+// that some code points aren't reached in the state machine.
+#pragma warning(disable: 4702)
+#endif
+
+#define BEGIN_STATE(stateName) case stateName: stateName:
+#define END_STATE() ASSERT_NOT_REACHED(); break;
+
+// We use this macro when the HTML5 spec says "reconsume the current input
+// character in the <mumble> state."
+#define RECONSUME_IN(stateName) \
+ do { \
+ m_state = stateName; \
+ goto stateName; \
+ } while (false)
+
+// We use this macro when the HTML5 spec says "consume the next input
+// character ... and switch to the <mumble> state."
+#define ADVANCE_TO(stateName) \
+ do { \
+ m_state = stateName; \
+ if (!m_inputStreamPreprocessor.advance(source, m_lineNumber)) \
+ return haveBufferedCharacterToken(); \
+ cc = m_inputStreamPreprocessor.nextInputCharacter(); \
+ goto stateName; \
+ } while (false)
+
+// Sometimes there's more complicated logic in the spec that separates when
+// we consume the next input character and when we switch to a particular
+// state. We handle those cases by advancing the source directly and using
+// this macro to switch to the indicated state.
+#define SWITCH_TO(stateName) \
+ do { \
+ m_state = stateName; \
+ if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source, m_lineNumber)) \
+ return haveBufferedCharacterToken(); \
+ cc = m_inputStreamPreprocessor.nextInputCharacter(); \
+ goto stateName; \
+ } while (false)
+
+
+inline void HTMLTokenizer::saveEndTagNameIfNeeded()
+{
+ ASSERT(m_token->type() != HTMLToken::Uninitialized);
+ if (m_token->type() == HTMLToken::StartTag)
+ m_appropriateEndTagName = m_token->name();
+}
+
+// We use this function when the HTML5 spec says "Emit the current <mumble>
+// token. Switch to the <mumble> state." We use the word "resume" instead of
+// switch to indicate that this macro actually returns and that we'll end up
+// in the state when we "resume" (i.e., are called again).
+bool HTMLTokenizer::emitAndResumeIn(SegmentedString& source, State state)
+{
+ m_state = state;
+ source.advance(m_lineNumber);
+ saveEndTagNameIfNeeded();
+ return true;
+}
+
+// Identical to emitAndResumeIn, except does not advance.
+bool HTMLTokenizer::emitAndReconsumeIn(SegmentedString&, State state)
+{
+ m_state = state;
+ saveEndTagNameIfNeeded();
+ return true;
+}
+
+// Used to emit the EndOfFile token.
+// Check if we have buffered characters to emit first before emitting the EOF.
+bool HTMLTokenizer::emitEndOfFile(SegmentedString& source)
+{
+ if (haveBufferedCharacterToken())
+ return true;
+ m_state = DataState;
+ source.advance(m_lineNumber);
+ m_token->clear();
+ m_token->makeEndOfFile();
+ return true;
+}
+
+bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source)
+{
+ ASSERT(m_token->type() == HTMLToken::Character || m_token->type() == HTMLToken::Uninitialized);
+ source.advance(m_lineNumber);
+ if (m_token->type() == HTMLToken::Character)
+ return true;
+ m_token->beginEndTag(m_bufferedEndTagName);
+ m_bufferedEndTagName.clear();
+ return false;
+}
+
+#define FLUSH_AND_ADVANCE_TO(stateName) \
+ do { \
+ m_state = stateName; \
+ if (flushBufferedEndTag(source)) \
+ return true; \
+ if (source.isEmpty() \
+ || !m_inputStreamPreprocessor.peek(source, m_lineNumber)) \
+ return haveBufferedCharacterToken(); \
+ cc = m_inputStreamPreprocessor.nextInputCharacter(); \
+ goto stateName; \
+ } while (false)
+
+bool HTMLTokenizer::flushEmitAndResumeIn(SegmentedString& source, State state)
+{
+ m_state = state;
+ flushBufferedEndTag(source);
+ return true;
+}
+
+bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
+{
+ // If we have a token in progress, then we're supposed to be called back
+ // with the same token so we can finish it.
+ ASSERT(!m_token || m_token == &token || token.type() == HTMLToken::Uninitialized);
+ m_token = &token;
+
+ if (!m_bufferedEndTagName.isEmpty() && !isEndTagBufferingState(m_state)) {
+ // FIXME: This should call flushBufferedEndTag().
+ // We started an end tag during our last iteration.
+ m_token->beginEndTag(m_bufferedEndTagName);
+ m_bufferedEndTagName.clear();
+ if (m_state == DataState) {
+ // We're back in the data state, so we must be done with the tag.
+ return true;
+ }
+ }
+
+ if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source, m_lineNumber))
+ return haveBufferedCharacterToken();
+ UChar cc = m_inputStreamPreprocessor.nextInputCharacter();
+
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
+ // Note that this logic is different than the generic \r\n collapsing
+ // handled in the input stream preprocessor. This logic is here as an
+ // "authoring convenience" so folks can write:
+ //
+ // <pre>
+ // lorem ipsum
+ // lorem ipsum
+ // </pre>
+ //
+ // without getting an extra newline at the start of their <pre> element.
+ if (m_skipLeadingNewLineForListing) {
+ m_skipLeadingNewLineForListing = false;
+ if (cc == '\n') {
+ if (m_state == DataState)
+ ADVANCE_TO(DataState);
+ if (m_state == RCDATAState)
+ ADVANCE_TO(RCDATAState);
+ // When parsing text/plain documents, we run the tokenizer in the
+ // PLAINTEXTState and ignore m_skipLeadingNewLineForListing.
+ ASSERT(m_state == PLAINTEXTState);
+ }
+ }
+
+ // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0
+ switch (m_state) {
+ BEGIN_STATE(DataState) {
+ if (cc == '&')
+ ADVANCE_TO(CharacterReferenceInDataState);
+ else if (cc == '<') {
+ if (m_token->type() == HTMLToken::Character) {
+ // We have a bunch of character tokens queued up that we
+ // are emitting lazily here.
+ return true;
+ }
+ ADVANCE_TO(TagOpenState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker)
+ return emitEndOfFile(source);
+ else {
+ bufferCharacter(cc);
+ ADVANCE_TO(DataState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(CharacterReferenceInDataState) {
+ if (!processEntity(source))
+ return haveBufferedCharacterToken();
+ SWITCH_TO(DataState);
+ }
+ END_STATE()
+
+ BEGIN_STATE(RCDATAState) {
+ if (cc == '&')
+ ADVANCE_TO(CharacterReferenceInRCDATAState);
+ else if (cc == '<')
+ ADVANCE_TO(RCDATALessThanSignState);
+ else if (cc == InputStreamPreprocessor::endOfFileMarker)
+ return emitEndOfFile(source);
+ else {
+ bufferCharacter(cc);
+ ADVANCE_TO(RCDATAState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(CharacterReferenceInRCDATAState) {
+ if (!processEntity(source))
+ return haveBufferedCharacterToken();
+ SWITCH_TO(RCDATAState);
+ }
+ END_STATE()
+
+ BEGIN_STATE(RAWTEXTState) {
+ if (cc == '<')
+ ADVANCE_TO(RAWTEXTLessThanSignState);
+ else if (cc == InputStreamPreprocessor::endOfFileMarker)
+ return emitEndOfFile(source);
+ else {
+ bufferCharacter(cc);
+ ADVANCE_TO(RAWTEXTState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(ScriptDataState) {
+ if (cc == '<')
+ ADVANCE_TO(ScriptDataLessThanSignState);
+ else if (cc == InputStreamPreprocessor::endOfFileMarker)
+ return emitEndOfFile(source);
+ else {
+ bufferCharacter(cc);
+ ADVANCE_TO(ScriptDataState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(PLAINTEXTState) {
+ if (cc == InputStreamPreprocessor::endOfFileMarker)
+ return emitEndOfFile(source);
+ else
+ bufferCharacter(cc);
+ ADVANCE_TO(PLAINTEXTState);
+ }
+ END_STATE()
+
+ BEGIN_STATE(TagOpenState) {
+ if (cc == '!')
+ ADVANCE_TO(MarkupDeclarationOpenState);
+ else if (cc == '/')
+ ADVANCE_TO(EndTagOpenState);
+ else if (isASCIIUpper(cc)) {
+ m_token->beginStartTag(toLowerCase(cc));
+ ADVANCE_TO(TagNameState);
+ } else if (isASCIILower(cc)) {
+ m_token->beginStartTag(cc);
+ ADVANCE_TO(TagNameState);
+ } else if (cc == '?') {
+ parseError();
+ // The spec consumes the current character before switching
+ // to the bogus comment state, but it's easier to implement
+ // if we reconsume the current character.
+ RECONSUME_IN(BogusCommentState);
+ } else {
+ parseError();
+ bufferCharacter('<');
+ RECONSUME_IN(DataState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(EndTagOpenState) {
+ if (isASCIIUpper(cc)) {
+ m_token->beginEndTag(toLowerCase(cc));
+ ADVANCE_TO(TagNameState);
+ } else if (isASCIILower(cc)) {
+ m_token->beginEndTag(cc);
+ ADVANCE_TO(TagNameState);
+ } else if (cc == '>') {
+ parseError();
+ ADVANCE_TO(DataState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ bufferCharacter('<');
+ bufferCharacter('/');
+ RECONSUME_IN(DataState);
+ } else {
+ parseError();
+ RECONSUME_IN(BogusCommentState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(TagNameState) {
+ if (isTokenizerWhitespace(cc))
+ ADVANCE_TO(BeforeAttributeNameState);
+ else if (cc == '/')
+ ADVANCE_TO(SelfClosingStartTagState);
+ else if (cc == '>')
+ return emitAndResumeIn(source, DataState);
+ else if (m_usePreHTML5ParserQuirks && cc == '<')
+ return emitAndReconsumeIn(source, DataState);
+ else if (isASCIIUpper(cc)) {
+ m_token->appendToName(toLowerCase(cc));
+ ADVANCE_TO(TagNameState);
+ } if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ RECONSUME_IN(DataState);
+ } else {
+ m_token->appendToName(cc);
+ ADVANCE_TO(TagNameState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(RCDATALessThanSignState) {
+ if (cc == '/') {
+ m_temporaryBuffer.clear();
+ ASSERT(m_bufferedEndTagName.isEmpty());
+ ADVANCE_TO(RCDATAEndTagOpenState);
+ } else {
+ bufferCharacter('<');
+ RECONSUME_IN(RCDATAState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(RCDATAEndTagOpenState) {
+ if (isASCIIUpper(cc)) {
+ m_temporaryBuffer.append(cc);
+ addToPossibleEndTag(toLowerCase(cc));
+ ADVANCE_TO(RCDATAEndTagNameState);
+ } else if (isASCIILower(cc)) {
+ m_temporaryBuffer.append(cc);
+ addToPossibleEndTag(cc);
+ ADVANCE_TO(RCDATAEndTagNameState);
+ } else {
+ bufferCharacter('<');
+ bufferCharacter('/');
+ RECONSUME_IN(RCDATAState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(RCDATAEndTagNameState) {
+ if (isASCIIUpper(cc)) {
+ m_temporaryBuffer.append(cc);
+ addToPossibleEndTag(toLowerCase(cc));
+ ADVANCE_TO(RCDATAEndTagNameState);
+ } else if (isASCIILower(cc)) {
+ m_temporaryBuffer.append(cc);
+ addToPossibleEndTag(cc);
+ ADVANCE_TO(RCDATAEndTagNameState);
+ } else {
+ if (isTokenizerWhitespace(cc)) {
+ if (isAppropriateEndTag())
+ FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
+ } else if (cc == '/') {
+ if (isAppropriateEndTag())
+ FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
+ } else if (cc == '>') {
+ if (isAppropriateEndTag())
+ return flushEmitAndResumeIn(source, DataState);
+ }
+ bufferCharacter('<');
+ bufferCharacter('/');
+ m_token->appendToCharacter(m_temporaryBuffer);
+ m_bufferedEndTagName.clear();
+ RECONSUME_IN(RCDATAState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(RAWTEXTLessThanSignState) {
+ if (cc == '/') {
+ m_temporaryBuffer.clear();
+ ASSERT(m_bufferedEndTagName.isEmpty());
+ ADVANCE_TO(RAWTEXTEndTagOpenState);
+ } else {
+ bufferCharacter('<');
+ RECONSUME_IN(RAWTEXTState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(RAWTEXTEndTagOpenState) {
+ if (isASCIIUpper(cc)) {
+ m_temporaryBuffer.append(cc);
+ addToPossibleEndTag(toLowerCase(cc));
+ ADVANCE_TO(RAWTEXTEndTagNameState);
+ } else if (isASCIILower(cc)) {
+ m_temporaryBuffer.append(cc);
+ addToPossibleEndTag(cc);
+ ADVANCE_TO(RAWTEXTEndTagNameState);
+ } else {
+ bufferCharacter('<');
+ bufferCharacter('/');
+ RECONSUME_IN(RAWTEXTState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(RAWTEXTEndTagNameState) {
+ if (isASCIIUpper(cc)) {
+ m_temporaryBuffer.append(cc);
+ addToPossibleEndTag(toLowerCase(cc));
+ ADVANCE_TO(RAWTEXTEndTagNameState);
+ } else if (isASCIILower(cc)) {
+ m_temporaryBuffer.append(cc);
+ addToPossibleEndTag(cc);
+ ADVANCE_TO(RAWTEXTEndTagNameState);
+ } else {
+ if (isTokenizerWhitespace(cc)) {
+ if (isAppropriateEndTag())
+ FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
+ } else if (cc == '/') {
+ if (isAppropriateEndTag())
+ FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
+ } else if (cc == '>') {
+ if (isAppropriateEndTag())
+ return flushEmitAndResumeIn(source, DataState);
+ }
+ bufferCharacter('<');
+ bufferCharacter('/');
+ m_token->appendToCharacter(m_temporaryBuffer);
+ m_bufferedEndTagName.clear();
+ RECONSUME_IN(RAWTEXTState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(ScriptDataLessThanSignState) {
+ if (cc == '/') {
+ m_temporaryBuffer.clear();
+ ASSERT(m_bufferedEndTagName.isEmpty());
+ ADVANCE_TO(ScriptDataEndTagOpenState);
+ } else if (cc == '!') {
+ bufferCharacter('<');
+ bufferCharacter('!');
+ ADVANCE_TO(ScriptDataEscapeStartState);
+ } else {
+ bufferCharacter('<');
+ RECONSUME_IN(ScriptDataState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(ScriptDataEndTagOpenState) {
+ if (isASCIIUpper(cc)) {
+ m_temporaryBuffer.append(cc);
+ addToPossibleEndTag(toLowerCase(cc));
+ ADVANCE_TO(ScriptDataEndTagNameState);
+ } else if (isASCIILower(cc)) {
+ m_temporaryBuffer.append(cc);
+ addToPossibleEndTag(cc);
+ ADVANCE_TO(ScriptDataEndTagNameState);
+ } else {
+ bufferCharacter('<');
+ bufferCharacter('/');
+ RECONSUME_IN(ScriptDataState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(ScriptDataEndTagNameState) {
+ if (isASCIIUpper(cc)) {
+ m_temporaryBuffer.append(cc);
+ addToPossibleEndTag(toLowerCase(cc));
+ ADVANCE_TO(ScriptDataEndTagNameState);
+ } else if (isASCIILower(cc)) {
+ m_temporaryBuffer.append(cc);
+ addToPossibleEndTag(cc);
+ ADVANCE_TO(ScriptDataEndTagNameState);
+ } else {
+ if (isTokenizerWhitespace(cc)) {
+ if (isAppropriateEndTag())
+ FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
+ } else if (cc == '/') {
+ if (isAppropriateEndTag())
+ FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
+ } else if (cc == '>') {
+ if (isAppropriateEndTag())
+ return flushEmitAndResumeIn(source, DataState);
+ }
+ bufferCharacter('<');
+ bufferCharacter('/');
+ m_token->appendToCharacter(m_temporaryBuffer);
+ m_bufferedEndTagName.clear();
+ RECONSUME_IN(ScriptDataState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(ScriptDataEscapeStartState) {
+ if (cc == '-') {
+ bufferCharacter(cc);
+ ADVANCE_TO(ScriptDataEscapeStartDashState);
+ } else
+ RECONSUME_IN(ScriptDataState);
+ }
+ END_STATE()
+
+ BEGIN_STATE(ScriptDataEscapeStartDashState) {
+ if (cc == '-') {
+ bufferCharacter(cc);
+ ADVANCE_TO(ScriptDataEscapedDashDashState);
+ } else
+ RECONSUME_IN(ScriptDataState);
+ }
+ END_STATE()
+
+ BEGIN_STATE(ScriptDataEscapedState) {
+ if (cc == '-') {
+ bufferCharacter(cc);
+ ADVANCE_TO(ScriptDataEscapedDashState);
+ } else if (cc == '<')
+ ADVANCE_TO(ScriptDataEscapedLessThanSignState);
+ else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ RECONSUME_IN(DataState);
+ } else {
+ bufferCharacter(cc);
+ ADVANCE_TO(ScriptDataEscapedState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(ScriptDataEscapedDashState) {
+ if (cc == '-') {
+ bufferCharacter(cc);
+ ADVANCE_TO(ScriptDataEscapedDashDashState);
+ } else if (cc == '<')
+ ADVANCE_TO(ScriptDataEscapedLessThanSignState);
+ else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ RECONSUME_IN(DataState);
+ } else {
+ bufferCharacter(cc);
+ ADVANCE_TO(ScriptDataEscapedState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(ScriptDataEscapedDashDashState) {
+ if (cc == '-') {
+ bufferCharacter(cc);
+ ADVANCE_TO(ScriptDataEscapedDashDashState);
+ } else if (cc == '<')
+ ADVANCE_TO(ScriptDataEscapedLessThanSignState);
+ else if (cc == '>') {
+ bufferCharacter(cc);
+ ADVANCE_TO(ScriptDataState);
+ } if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ RECONSUME_IN(DataState);
+ } else {
+ bufferCharacter(cc);
+ ADVANCE_TO(ScriptDataEscapedState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(ScriptDataEscapedLessThanSignState) {
+ if (cc == '/') {
+ m_temporaryBuffer.clear();
+ ASSERT(m_bufferedEndTagName.isEmpty());
+ ADVANCE_TO(ScriptDataEscapedEndTagOpenState);
+ } else if (isASCIIUpper(cc)) {
+ bufferCharacter('<');
+ bufferCharacter(cc);
+ m_temporaryBuffer.clear();
+ m_temporaryBuffer.append(toLowerCase(cc));
+ ADVANCE_TO(ScriptDataDoubleEscapeStartState);
+ } else if (isASCIILower(cc)) {
+ bufferCharacter('<');
+ bufferCharacter(cc);
+ m_temporaryBuffer.clear();
+ m_temporaryBuffer.append(cc);
+ ADVANCE_TO(ScriptDataDoubleEscapeStartState);
+ } else {
+ bufferCharacter('<');
+ RECONSUME_IN(ScriptDataEscapedState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(ScriptDataEscapedEndTagOpenState) {
+ if (isASCIIUpper(cc)) {
+ m_temporaryBuffer.append(cc);
+ addToPossibleEndTag(toLowerCase(cc));
+ ADVANCE_TO(ScriptDataEscapedEndTagNameState);
+ } else if (isASCIILower(cc)) {
+ m_temporaryBuffer.append(cc);
+ addToPossibleEndTag(cc);
+ ADVANCE_TO(ScriptDataEscapedEndTagNameState);
+ } else {
+ bufferCharacter('<');
+ bufferCharacter('/');
+ RECONSUME_IN(ScriptDataEscapedState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(ScriptDataEscapedEndTagNameState) {
+ if (isASCIIUpper(cc)) {
+ m_temporaryBuffer.append(cc);
+ addToPossibleEndTag(toLowerCase(cc));
+ ADVANCE_TO(ScriptDataEscapedEndTagNameState);
+ } else if (isASCIILower(cc)) {
+ m_temporaryBuffer.append(cc);
+ addToPossibleEndTag(cc);
+ ADVANCE_TO(ScriptDataEscapedEndTagNameState);
+ } else {
+ if (isTokenizerWhitespace(cc)) {
+ if (isAppropriateEndTag())
+ FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
+ } else if (cc == '/') {
+ if (isAppropriateEndTag())
+ FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
+ } else if (cc == '>') {
+ if (isAppropriateEndTag())
+ return flushEmitAndResumeIn(source, DataState);
+ }
+ bufferCharacter('<');
+ bufferCharacter('/');
+ m_token->appendToCharacter(m_temporaryBuffer);
+ m_bufferedEndTagName.clear();
+ RECONSUME_IN(ScriptDataEscapedState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(ScriptDataDoubleEscapeStartState) {
+ if (isTokenizerWhitespace(cc) || cc == '/' || cc == '>') {
+ bufferCharacter(cc);
+ if (temporaryBufferIs(scriptTag.localName()))
+ ADVANCE_TO(ScriptDataDoubleEscapedState);
+ else
+ ADVANCE_TO(ScriptDataEscapedState);
+ } else if (isASCIIUpper(cc)) {
+ bufferCharacter(cc);
+ m_temporaryBuffer.append(toLowerCase(cc));
+ ADVANCE_TO(ScriptDataDoubleEscapeStartState);
+ } else if (isASCIILower(cc)) {
+ bufferCharacter(cc);
+ m_temporaryBuffer.append(cc);
+ ADVANCE_TO(ScriptDataDoubleEscapeStartState);
+ } else
+ RECONSUME_IN(ScriptDataEscapedState);
+ }
+ END_STATE()
+
+ BEGIN_STATE(ScriptDataDoubleEscapedState) {
+ if (cc == '-') {
+ bufferCharacter(cc);
+ ADVANCE_TO(ScriptDataDoubleEscapedDashState);
+ } else if (cc == '<') {
+ bufferCharacter(cc);
+ ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ RECONSUME_IN(DataState);
+ } else {
+ bufferCharacter(cc);
+ ADVANCE_TO(ScriptDataDoubleEscapedState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(ScriptDataDoubleEscapedDashState) {
+ if (cc == '-') {
+ bufferCharacter(cc);
+ ADVANCE_TO(ScriptDataDoubleEscapedDashDashState);
+ } else if (cc == '<') {
+ bufferCharacter(cc);
+ ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ RECONSUME_IN(DataState);
+ } else {
+ bufferCharacter(cc);
+ ADVANCE_TO(ScriptDataDoubleEscapedState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(ScriptDataDoubleEscapedDashDashState) {
+ if (cc == '-') {
+ bufferCharacter(cc);
+ ADVANCE_TO(ScriptDataDoubleEscapedDashDashState);
+ } else if (cc == '<') {
+ bufferCharacter(cc);
+ ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
+ } else if (cc == '>') {
+ bufferCharacter(cc);
+ ADVANCE_TO(ScriptDataState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ RECONSUME_IN(DataState);
+ } else {
+ bufferCharacter(cc);
+ ADVANCE_TO(ScriptDataDoubleEscapedState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(ScriptDataDoubleEscapedLessThanSignState) {
+ if (cc == '/') {
+ bufferCharacter(cc);
+ m_temporaryBuffer.clear();
+ ADVANCE_TO(ScriptDataDoubleEscapeEndState);
+ } else
+ RECONSUME_IN(ScriptDataDoubleEscapedState);
+ }
+ END_STATE()
+
+ BEGIN_STATE(ScriptDataDoubleEscapeEndState) {
+ if (isTokenizerWhitespace(cc) || cc == '/' || cc == '>') {
+ bufferCharacter(cc);
+ if (temporaryBufferIs(scriptTag.localName()))
+ ADVANCE_TO(ScriptDataEscapedState);
+ else
+ ADVANCE_TO(ScriptDataDoubleEscapedState);
+ } else if (isASCIIUpper(cc)) {
+ bufferCharacter(cc);
+ m_temporaryBuffer.append(toLowerCase(cc));
+ ADVANCE_TO(ScriptDataDoubleEscapeEndState);
+ } else if (isASCIILower(cc)) {
+ bufferCharacter(cc);
+ m_temporaryBuffer.append(cc);
+ ADVANCE_TO(ScriptDataDoubleEscapeEndState);
+ } else
+ RECONSUME_IN(ScriptDataDoubleEscapedState);
+ }
+ END_STATE()
+
+ BEGIN_STATE(BeforeAttributeNameState) {
+ if (isTokenizerWhitespace(cc))
+ ADVANCE_TO(BeforeAttributeNameState);
+ else if (cc == '/')
+ ADVANCE_TO(SelfClosingStartTagState);
+ else if (cc == '>')
+ return emitAndResumeIn(source, DataState);
+ else if (m_usePreHTML5ParserQuirks && cc == '<')
+ return emitAndReconsumeIn(source, DataState);
+ else if (isASCIIUpper(cc)) {
+ m_token->addNewAttribute();
+ m_token->beginAttributeName(source.numberOfCharactersConsumed());
+ m_token->appendToAttributeName(toLowerCase(cc));
+ ADVANCE_TO(AttributeNameState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ RECONSUME_IN(DataState);
+ } else {
+ if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
+ parseError();
+ m_token->addNewAttribute();
+ m_token->beginAttributeName(source.numberOfCharactersConsumed());
+ m_token->appendToAttributeName(cc);
+ ADVANCE_TO(AttributeNameState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(AttributeNameState) {
+ if (isTokenizerWhitespace(cc)) {
+ m_token->endAttributeName(source.numberOfCharactersConsumed());
+ ADVANCE_TO(AfterAttributeNameState);
+ } else if (cc == '/') {
+ m_token->endAttributeName(source.numberOfCharactersConsumed());
+ ADVANCE_TO(SelfClosingStartTagState);
+ } else if (cc == '=') {
+ m_token->endAttributeName(source.numberOfCharactersConsumed());
+ ADVANCE_TO(BeforeAttributeValueState);
+ } else if (cc == '>') {
+ m_token->endAttributeName(source.numberOfCharactersConsumed());
+ return emitAndResumeIn(source, DataState);
+ } else if (m_usePreHTML5ParserQuirks && cc == '<') {
+ m_token->endAttributeName(source.numberOfCharactersConsumed());
+ return emitAndReconsumeIn(source, DataState);
+ } else if (isASCIIUpper(cc)) {
+ m_token->appendToAttributeName(toLowerCase(cc));
+ ADVANCE_TO(AttributeNameState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ m_token->endAttributeName(source.numberOfCharactersConsumed());
+ RECONSUME_IN(DataState);
+ } else {
+ if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
+ parseError();
+ m_token->appendToAttributeName(cc);
+ ADVANCE_TO(AttributeNameState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(AfterAttributeNameState) {
+ if (isTokenizerWhitespace(cc))
+ ADVANCE_TO(AfterAttributeNameState);
+ else if (cc == '/')
+ ADVANCE_TO(SelfClosingStartTagState);
+ else if (cc == '=')
+ ADVANCE_TO(BeforeAttributeValueState);
+ else if (cc == '>')
+ return emitAndResumeIn(source, DataState);
+ else if (m_usePreHTML5ParserQuirks && cc == '<')
+ return emitAndReconsumeIn(source, DataState);
+ else if (isASCIIUpper(cc)) {
+ m_token->addNewAttribute();
+ m_token->beginAttributeName(source.numberOfCharactersConsumed());
+ m_token->appendToAttributeName(toLowerCase(cc));
+ ADVANCE_TO(AttributeNameState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ RECONSUME_IN(DataState);
+ } else {
+ if (cc == '"' || cc == '\'' || cc == '<')
+ parseError();
+ m_token->addNewAttribute();
+ m_token->beginAttributeName(source.numberOfCharactersConsumed());
+ m_token->appendToAttributeName(cc);
+ ADVANCE_TO(AttributeNameState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(BeforeAttributeValueState) {
+ if (isTokenizerWhitespace(cc))
+ ADVANCE_TO(BeforeAttributeValueState);
+ else if (cc == '"') {
+ m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1);
+ ADVANCE_TO(AttributeValueDoubleQuotedState);
+ } else if (cc == '&') {
+ m_token->beginAttributeValue(source.numberOfCharactersConsumed());
+ RECONSUME_IN(AttributeValueUnquotedState);
+ } else if (cc == '\'') {
+ m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1);
+ ADVANCE_TO(AttributeValueSingleQuotedState);
+ } else if (cc == '>') {
+ parseError();
+ return emitAndResumeIn(source, DataState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ RECONSUME_IN(DataState);
+ } else {
+ if (cc == '<' || cc == '=' || cc == '`')
+ parseError();
+ m_token->beginAttributeValue(source.numberOfCharactersConsumed());
+ m_token->appendToAttributeValue(cc);
+ ADVANCE_TO(AttributeValueUnquotedState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(AttributeValueDoubleQuotedState) {
+ if (cc == '"') {
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
+ ADVANCE_TO(AfterAttributeValueQuotedState);
+ } else if (cc == '&') {
+ m_additionalAllowedCharacter = '"';
+ ADVANCE_TO(CharacterReferenceInAttributeValueState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
+ RECONSUME_IN(DataState);
+ } else {
+ m_token->appendToAttributeValue(cc);
+ ADVANCE_TO(AttributeValueDoubleQuotedState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(AttributeValueSingleQuotedState) {
+ if (cc == '\'') {
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
+ ADVANCE_TO(AfterAttributeValueQuotedState);
+ } else if (cc == '&') {
+ m_additionalAllowedCharacter = '\'';
+ ADVANCE_TO(CharacterReferenceInAttributeValueState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
+ RECONSUME_IN(DataState);
+ } else {
+ m_token->appendToAttributeValue(cc);
+ ADVANCE_TO(AttributeValueSingleQuotedState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(AttributeValueUnquotedState) {
+ if (isTokenizerWhitespace(cc)) {
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
+ ADVANCE_TO(BeforeAttributeNameState);
+ } else if (cc == '&') {
+ m_additionalAllowedCharacter = '>';
+ ADVANCE_TO(CharacterReferenceInAttributeValueState);
+ } else if (cc == '>') {
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
+ return emitAndResumeIn(source, DataState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
+ RECONSUME_IN(DataState);
+ } else {
+ if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`')
+ parseError();
+ m_token->appendToAttributeValue(cc);
+ ADVANCE_TO(AttributeValueUnquotedState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(CharacterReferenceInAttributeValueState) {
+ bool notEnoughCharacters = false;
+ Vector<UChar, 16> decodedEntity;
+ bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters, m_additionalAllowedCharacter);
+ if (notEnoughCharacters)
+ return haveBufferedCharacterToken();
+ if (!success) {
+ ASSERT(decodedEntity.isEmpty());
+ m_token->appendToAttributeValue('&');
+ } else {
+ Vector<UChar>::const_iterator iter = decodedEntity.begin();
+ for (; iter != decodedEntity.end(); ++iter)
+ m_token->appendToAttributeValue(*iter);
+ }
+ // We're supposed to switch back to the attribute value state that
+ // we were in when we were switched into this state. Rather than
+ // keeping track of this explictly, we observe that the previous
+ // state can be determined by m_additionalAllowedCharacter.
+ if (m_additionalAllowedCharacter == '"')
+ SWITCH_TO(AttributeValueDoubleQuotedState);
+ else if (m_additionalAllowedCharacter == '\'')
+ SWITCH_TO(AttributeValueSingleQuotedState);
+ else if (m_additionalAllowedCharacter == '>')
+ SWITCH_TO(AttributeValueUnquotedState);
+ else
+ ASSERT_NOT_REACHED();
+ }
+ END_STATE()
+
+ BEGIN_STATE(AfterAttributeValueQuotedState) {
+ if (isTokenizerWhitespace(cc))
+ ADVANCE_TO(BeforeAttributeNameState);
+ else if (cc == '/')
+ ADVANCE_TO(SelfClosingStartTagState);
+ else if (cc == '>')
+ return emitAndResumeIn(source, DataState);
+ else if (m_usePreHTML5ParserQuirks && cc == '<')
+ return emitAndReconsumeIn(source, DataState);
+ else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ RECONSUME_IN(DataState);
+ } else {
+ parseError();
+ RECONSUME_IN(BeforeAttributeNameState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(SelfClosingStartTagState) {
+ if (cc == '>') {
+ m_token->setSelfClosing();
+ return emitAndResumeIn(source, DataState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ RECONSUME_IN(DataState);
+ } else {
+ parseError();
+ RECONSUME_IN(BeforeAttributeNameState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(BogusCommentState) {
+ m_token->beginComment();
+ RECONSUME_IN(ContinueBogusCommentState);
+ }
+ END_STATE()
+
+ BEGIN_STATE(ContinueBogusCommentState) {
+ if (cc == '>')
+ return emitAndResumeIn(source, DataState);
+ else if (cc == InputStreamPreprocessor::endOfFileMarker)
+ return emitAndReconsumeIn(source, DataState);
+ else {
+ m_token->appendToComment(cc);
+ ADVANCE_TO(ContinueBogusCommentState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(MarkupDeclarationOpenState) {
+ DEFINE_STATIC_LOCAL(String, dashDashString, ("--"));
+ DEFINE_STATIC_LOCAL(String, doctypeString, ("doctype"));
+ DEFINE_STATIC_LOCAL(String, cdataString, ("[CDATA["));
+ if (cc == '-') {
+ SegmentedString::LookAheadResult result = source.lookAhead(dashDashString);
+ if (result == SegmentedString::DidMatch) {
+ source.advanceAndASSERT('-');
+ source.advanceAndASSERT('-');
+ m_token->beginComment();
+ SWITCH_TO(CommentStartState);
+ } else if (result == SegmentedString::NotEnoughCharacters)
+ return haveBufferedCharacterToken();
+ } else if (cc == 'D' || cc == 'd') {
+ SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(doctypeString);
+ if (result == SegmentedString::DidMatch) {
+ advanceStringAndASSERTIgnoringCase(source, "doctype");
+ SWITCH_TO(DOCTYPEState);
+ } else if (result == SegmentedString::NotEnoughCharacters)
+ return haveBufferedCharacterToken();
+ } else if (cc == '[' && shouldAllowCDATA()) {
+ SegmentedString::LookAheadResult result = source.lookAhead(cdataString);
+ if (result == SegmentedString::DidMatch) {
+ advanceStringAndASSERT(source, "[CDATA[");
+ SWITCH_TO(CDATASectionState);
+ } else if (result == SegmentedString::NotEnoughCharacters)
+ return haveBufferedCharacterToken();
+ }
+ parseError();
+ RECONSUME_IN(BogusCommentState);
+ }
+ END_STATE()
+
+ BEGIN_STATE(CommentStartState) {
+ if (cc == '-')
+ ADVANCE_TO(CommentStartDashState);
+ else if (cc == '>') {
+ parseError();
+ return emitAndResumeIn(source, DataState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ return emitAndReconsumeIn(source, DataState);
+ } else {
+ m_token->appendToComment(cc);
+ ADVANCE_TO(CommentState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(CommentStartDashState) {
+ if (cc == '-')
+ ADVANCE_TO(CommentEndState);
+ else if (cc == '>') {
+ parseError();
+ return emitAndResumeIn(source, DataState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ return emitAndReconsumeIn(source, DataState);
+ } else {
+ m_token->appendToComment('-');
+ m_token->appendToComment(cc);
+ ADVANCE_TO(CommentState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(CommentState) {
+ if (cc == '-')
+ ADVANCE_TO(CommentEndDashState);
+ else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ return emitAndReconsumeIn(source, DataState);
+ } else {
+ m_token->appendToComment(cc);
+ ADVANCE_TO(CommentState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(CommentEndDashState) {
+ if (cc == '-')
+ ADVANCE_TO(CommentEndState);
+ else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ return emitAndReconsumeIn(source, DataState);
+ } else {
+ m_token->appendToComment('-');
+ m_token->appendToComment(cc);
+ ADVANCE_TO(CommentState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(CommentEndState) {
+ if (cc == '>')
+ return emitAndResumeIn(source, DataState);
+ else if (cc == '!') {
+ parseError();
+ ADVANCE_TO(CommentEndBangState);
+ } else if (cc == '-') {
+ parseError();
+ m_token->appendToComment('-');
+ ADVANCE_TO(CommentEndState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ return emitAndReconsumeIn(source, DataState);
+ } else {
+ parseError();
+ m_token->appendToComment('-');
+ m_token->appendToComment('-');
+ m_token->appendToComment(cc);
+ ADVANCE_TO(CommentState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(CommentEndBangState) {
+ if (cc == '-') {
+ m_token->appendToComment('-');
+ m_token->appendToComment('-');
+ m_token->appendToComment('!');
+ ADVANCE_TO(CommentEndDashState);
+ } else if (cc == '>')
+ return emitAndResumeIn(source, DataState);
+ else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ return emitAndReconsumeIn(source, DataState);
+ } else {
+ m_token->appendToComment('-');
+ m_token->appendToComment('-');
+ m_token->appendToComment('!');
+ m_token->appendToComment(cc);
+ ADVANCE_TO(CommentState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(DOCTYPEState) {
+ if (isTokenizerWhitespace(cc))
+ ADVANCE_TO(BeforeDOCTYPENameState);
+ else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ m_token->beginDOCTYPE();
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, DataState);
+ } else {
+ parseError();
+ RECONSUME_IN(BeforeDOCTYPENameState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(BeforeDOCTYPENameState) {
+ if (isTokenizerWhitespace(cc))
+ ADVANCE_TO(BeforeDOCTYPENameState);
+ else if (isASCIIUpper(cc)) {
+ m_token->beginDOCTYPE(toLowerCase(cc));
+ ADVANCE_TO(DOCTYPENameState);
+ } else if (cc == '>') {
+ parseError();
+ m_token->beginDOCTYPE();
+ m_token->setForceQuirks();
+ return emitAndResumeIn(source, DataState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ m_token->beginDOCTYPE();
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, DataState);
+ } else {
+ m_token->beginDOCTYPE(cc);
+ ADVANCE_TO(DOCTYPENameState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(DOCTYPENameState) {
+ if (isTokenizerWhitespace(cc))
+ ADVANCE_TO(AfterDOCTYPENameState);
+ else if (cc == '>')
+ return emitAndResumeIn(source, DataState);
+ else if (isASCIIUpper(cc)) {
+ m_token->appendToName(toLowerCase(cc));
+ ADVANCE_TO(DOCTYPENameState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, DataState);
+ } else {
+ m_token->appendToName(cc);
+ ADVANCE_TO(DOCTYPENameState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(AfterDOCTYPENameState) {
+ if (isTokenizerWhitespace(cc))
+ ADVANCE_TO(AfterDOCTYPENameState);
+ if (cc == '>')
+ return emitAndResumeIn(source, DataState);
+ else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, DataState);
+ } else {
+ DEFINE_STATIC_LOCAL(String, publicString, ("public"));
+ DEFINE_STATIC_LOCAL(String, systemString, ("system"));
+ if (cc == 'P' || cc == 'p') {
+ SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(publicString);
+ if (result == SegmentedString::DidMatch) {
+ advanceStringAndASSERTIgnoringCase(source, "public");
+ SWITCH_TO(AfterDOCTYPEPublicKeywordState);
+ } else if (result == SegmentedString::NotEnoughCharacters)
+ return haveBufferedCharacterToken();
+ } else if (cc == 'S' || cc == 's') {
+ SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(systemString);
+ if (result == SegmentedString::DidMatch) {
+ advanceStringAndASSERTIgnoringCase(source, "system");
+ SWITCH_TO(AfterDOCTYPESystemKeywordState);
+ } else if (result == SegmentedString::NotEnoughCharacters)
+ return haveBufferedCharacterToken();
+ }
+ parseError();
+ m_token->setForceQuirks();
+ ADVANCE_TO(BogusDOCTYPEState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(AfterDOCTYPEPublicKeywordState) {
+ if (isTokenizerWhitespace(cc))
+ ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
+ else if (cc == '"') {
+ parseError();
+ m_token->setPublicIdentifierToEmptyString();
+ ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
+ } else if (cc == '\'') {
+ parseError();
+ m_token->setPublicIdentifierToEmptyString();
+ ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
+ } else if (cc == '>') {
+ parseError();
+ m_token->setForceQuirks();
+ return emitAndResumeIn(source, DataState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, DataState);
+ } else {
+ parseError();
+ m_token->setForceQuirks();
+ ADVANCE_TO(BogusDOCTYPEState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState) {
+ if (isTokenizerWhitespace(cc))
+ ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
+ else if (cc == '"') {
+ m_token->setPublicIdentifierToEmptyString();
+ ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
+ } else if (cc == '\'') {
+ m_token->setPublicIdentifierToEmptyString();
+ ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
+ } else if (cc == '>') {
+ parseError();
+ m_token->setForceQuirks();
+ return emitAndResumeIn(source, DataState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, DataState);
+ } else {
+ parseError();
+ m_token->setForceQuirks();
+ ADVANCE_TO(BogusDOCTYPEState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState) {
+ if (cc == '"')
+ ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
+ else if (cc == '>') {
+ parseError();
+ m_token->setForceQuirks();
+ return emitAndResumeIn(source, DataState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, DataState);
+ } else {
+ m_token->appendToPublicIdentifier(cc);
+ ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState) {
+ if (cc == '\'')
+ ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
+ else if (cc == '>') {
+ parseError();
+ m_token->setForceQuirks();
+ return emitAndResumeIn(source, DataState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, DataState);
+ } else {
+ m_token->appendToPublicIdentifier(cc);
+ ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(AfterDOCTYPEPublicIdentifierState) {
+ if (isTokenizerWhitespace(cc))
+ ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
+ else if (cc == '>')
+ return emitAndResumeIn(source, DataState);
+ else if (cc == '"') {
+ parseError();
+ m_token->setSystemIdentifierToEmptyString();
+ ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+ } else if (cc == '\'') {
+ parseError();
+ m_token->setSystemIdentifierToEmptyString();
+ ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, DataState);
+ } else {
+ parseError();
+ m_token->setForceQuirks();
+ ADVANCE_TO(BogusDOCTYPEState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState) {
+ if (isTokenizerWhitespace(cc))
+ ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
+ else if (cc == '>')
+ return emitAndResumeIn(source, DataState);
+ else if (cc == '"') {
+ m_token->setSystemIdentifierToEmptyString();
+ ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+ } else if (cc == '\'') {
+ m_token->setSystemIdentifierToEmptyString();
+ ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, DataState);
+ } else {
+ parseError();
+ m_token->setForceQuirks();
+ ADVANCE_TO(BogusDOCTYPEState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(AfterDOCTYPESystemKeywordState) {
+ if (isTokenizerWhitespace(cc))
+ ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
+ else if (cc == '"') {
+ parseError();
+ m_token->setSystemIdentifierToEmptyString();
+ ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+ } else if (cc == '\'') {
+ parseError();
+ m_token->setSystemIdentifierToEmptyString();
+ ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+ } else if (cc == '>') {
+ parseError();
+ m_token->setForceQuirks();
+ return emitAndResumeIn(source, DataState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, DataState);
+ } else {
+ parseError();
+ m_token->setForceQuirks();
+ ADVANCE_TO(BogusDOCTYPEState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(BeforeDOCTYPESystemIdentifierState) {
+ if (isTokenizerWhitespace(cc))
+ ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
+ if (cc == '"') {
+ m_token->setSystemIdentifierToEmptyString();
+ ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+ } else if (cc == '\'') {
+ m_token->setSystemIdentifierToEmptyString();
+ ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+ } else if (cc == '>') {
+ parseError();
+ m_token->setForceQuirks();
+ return emitAndResumeIn(source, DataState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, DataState);
+ } else {
+ parseError();
+ m_token->setForceQuirks();
+ ADVANCE_TO(BogusDOCTYPEState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState) {
+ if (cc == '"')
+ ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
+ else if (cc == '>') {
+ parseError();
+ m_token->setForceQuirks();
+ return emitAndResumeIn(source, DataState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, DataState);
+ } else {
+ m_token->appendToSystemIdentifier(cc);
+ ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState) {
+ if (cc == '\'')
+ ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
+ else if (cc == '>') {
+ parseError();
+ m_token->setForceQuirks();
+ return emitAndResumeIn(source, DataState);
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, DataState);
+ } else {
+ m_token->appendToSystemIdentifier(cc);
+ ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(AfterDOCTYPESystemIdentifierState) {
+ if (isTokenizerWhitespace(cc))
+ ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
+ else if (cc == '>')
+ return emitAndResumeIn(source, DataState);
+ else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ parseError();
+ m_token->setForceQuirks();
+ return emitAndReconsumeIn(source, DataState);
+ } else {
+ parseError();
+ ADVANCE_TO(BogusDOCTYPEState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(BogusDOCTYPEState) {
+ if (cc == '>')
+ return emitAndResumeIn(source, DataState);
+ else if (cc == InputStreamPreprocessor::endOfFileMarker)
+ return emitAndReconsumeIn(source, DataState);
+ ADVANCE_TO(BogusDOCTYPEState);
+ }
+ END_STATE()
+
+ BEGIN_STATE(CDATASectionState) {
+ if (cc == ']')
+ ADVANCE_TO(CDATASectionRightSquareBracketState);
+ else if (cc == InputStreamPreprocessor::endOfFileMarker)
+ RECONSUME_IN(DataState);
+ else {
+ bufferCharacter(cc);
+ ADVANCE_TO(CDATASectionState);
+ }
+ }
+ END_STATE()
+
+ BEGIN_STATE(CDATASectionRightSquareBracketState) {
+ if (cc == ']')
+ ADVANCE_TO(CDATASectionDoubleRightSquareBracketState);
+ else {
+ bufferCharacter(']');
+ RECONSUME_IN(CDATASectionState);
+ }
+ }
+
+ BEGIN_STATE(CDATASectionDoubleRightSquareBracketState) {
+ if (cc == '>')
+ ADVANCE_TO(DataState);
+ else {
+ bufferCharacter(']');
+ bufferCharacter(']');
+ RECONSUME_IN(CDATASectionState);
+ }
+ }
+ END_STATE()
+
+ }
+
+ ASSERT_NOT_REACHED();
+ return false;
+}
+
+void HTMLTokenizer::updateStateFor(const AtomicString& tagName, Frame* frame)
+{
+ if (tagName == textareaTag || tagName == titleTag)
+ setState(RCDATAState);
+ else if (tagName == plaintextTag)
+ setState(PLAINTEXTState);
+ else if (tagName == scriptTag)
+ setState(ScriptDataState);
+ else if (tagName == styleTag
+ || tagName == iframeTag
+ || tagName == xmpTag
+ || (tagName == noembedTag && HTMLTreeBuilder::pluginsEnabled(frame))
+ || tagName == noframesTag
+ || (tagName == noscriptTag && HTMLTreeBuilder::scriptEnabled(frame)))
+ setState(RAWTEXTState);
+}
+
+inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString)
+{
+ return vectorEqualsString(m_temporaryBuffer, expectedString);
+}
+
+inline void HTMLTokenizer::addToPossibleEndTag(UChar cc)
+{
+ ASSERT(isEndTagBufferingState(m_state));
+ m_bufferedEndTagName.append(cc);
+}
+
+inline bool HTMLTokenizer::isAppropriateEndTag()
+{
+ return m_bufferedEndTagName == m_appropriateEndTagName;
+}
+
+inline void HTMLTokenizer::bufferCharacter(UChar character)
+{
+ ASSERT(character != InputStreamPreprocessor::endOfFileMarker);
+ m_token->ensureIsCharacterToken();
+ m_token->appendToCharacter(character);
+}
+
+inline void HTMLTokenizer::parseError()
+{
+ notImplemented();
+}
+
+inline bool HTMLTokenizer::haveBufferedCharacterToken()
+{
+ return m_token->type() == HTMLToken::Character;
+}
+
+}
diff --git a/Source/WebCore/html/parser/HTMLTokenizer.h b/Source/WebCore/html/parser/HTMLTokenizer.h
new file mode 100644
index 0000000..f16b049
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLTokenizer.h
@@ -0,0 +1,316 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLTokenizer_h
+#define HTMLTokenizer_h
+
+#include "SegmentedString.h"
+#include <wtf/Noncopyable.h>
+#include <wtf/PassOwnPtr.h>
+#include <wtf/Vector.h>
+#include <wtf/text/AtomicString.h>
+
+namespace WebCore {
+
+class Element;
+class Frame;
+class HTMLToken;
+
+class HTMLTokenizer : public Noncopyable {
+public:
+ enum State {
+ DataState,
+ CharacterReferenceInDataState,
+ RCDATAState,
+ CharacterReferenceInRCDATAState,
+ RAWTEXTState,
+ ScriptDataState,
+ PLAINTEXTState,
+ TagOpenState,
+ EndTagOpenState,
+ TagNameState,
+ RCDATALessThanSignState,
+ RCDATAEndTagOpenState,
+ RCDATAEndTagNameState,
+ RAWTEXTLessThanSignState,
+ RAWTEXTEndTagOpenState,
+ RAWTEXTEndTagNameState,
+ ScriptDataLessThanSignState,
+ ScriptDataEndTagOpenState,
+ ScriptDataEndTagNameState,
+ ScriptDataEscapeStartState,
+ ScriptDataEscapeStartDashState,
+ ScriptDataEscapedState,
+ ScriptDataEscapedDashState,
+ ScriptDataEscapedDashDashState,
+ ScriptDataEscapedLessThanSignState,
+ ScriptDataEscapedEndTagOpenState,
+ ScriptDataEscapedEndTagNameState,
+ ScriptDataDoubleEscapeStartState,
+ ScriptDataDoubleEscapedState,
+ ScriptDataDoubleEscapedDashState,
+ ScriptDataDoubleEscapedDashDashState,
+ ScriptDataDoubleEscapedLessThanSignState,
+ ScriptDataDoubleEscapeEndState,
+ BeforeAttributeNameState,
+ AttributeNameState,
+ AfterAttributeNameState,
+ BeforeAttributeValueState,
+ AttributeValueDoubleQuotedState,
+ AttributeValueSingleQuotedState,
+ AttributeValueUnquotedState,
+ CharacterReferenceInAttributeValueState,
+ AfterAttributeValueQuotedState,
+ SelfClosingStartTagState,
+ BogusCommentState,
+ // The ContinueBogusCommentState is not in the HTML5 spec, but we use
+ // it internally to keep track of whether we've started the bogus
+ // comment token yet.
+ ContinueBogusCommentState,
+ MarkupDeclarationOpenState,
+ CommentStartState,
+ CommentStartDashState,
+ CommentState,
+ CommentEndDashState,
+ CommentEndState,
+ CommentEndBangState,
+ DOCTYPEState,
+ BeforeDOCTYPENameState,
+ DOCTYPENameState,
+ AfterDOCTYPENameState,
+ AfterDOCTYPEPublicKeywordState,
+ BeforeDOCTYPEPublicIdentifierState,
+ DOCTYPEPublicIdentifierDoubleQuotedState,
+ DOCTYPEPublicIdentifierSingleQuotedState,
+ AfterDOCTYPEPublicIdentifierState,
+ BetweenDOCTYPEPublicAndSystemIdentifiersState,
+ AfterDOCTYPESystemKeywordState,
+ BeforeDOCTYPESystemIdentifierState,
+ DOCTYPESystemIdentifierDoubleQuotedState,
+ DOCTYPESystemIdentifierSingleQuotedState,
+ AfterDOCTYPESystemIdentifierState,
+ BogusDOCTYPEState,
+ CDATASectionState,
+ // These CDATA states are not in the HTML5 spec, but we use them internally.
+ CDATASectionRightSquareBracketState,
+ CDATASectionDoubleRightSquareBracketState,
+ };
+
+ static PassOwnPtr<HTMLTokenizer> create(bool usePreHTML5ParserQuirks) { return adoptPtr(new HTMLTokenizer(usePreHTML5ParserQuirks)); }
+ ~HTMLTokenizer();
+
+ void reset();
+
+ // This function returns true if it emits a token. Otherwise, callers
+ // must provide the same (in progress) token on the next call (unless
+ // they call reset() first).
+ bool nextToken(SegmentedString&, HTMLToken&);
+
+ int lineNumber() const { return m_lineNumber; }
+ int columnNumber() const { return 1; } // Matches LegacyHTMLDocumentParser.h behavior.
+
+ State state() const { return m_state; }
+ void setState(State state) { m_state = state; }
+
+ // Updates the tokenizer's state according to the given tag name. This is
+ // an approximation of how the tree builder would update the tokenizer's
+ // state. This method is useful for approximating HTML tokenization. To
+ // get exactly the correct tokenization, you need the real tree builder.
+ //
+ // The main failures in the approximation are as follows:
+ //
+ // * The first set of character tokens emitted for a <pre> element might
+ // contain an extra leading newline.
+ // * The replacement of U+0000 with U+FFFD will not be sensitive to the
+ // tree builder's insertion mode.
+ // * CDATA sections in foreign content will be tokenized as bogus comments
+ // instead of as character tokens.
+ //
+ void updateStateFor(const AtomicString& tagName, Frame*);
+
+ // Hack to skip leading newline in <pre>/<listing> for authoring ease.
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
+ void setSkipLeadingNewLineForListing(bool value) { m_skipLeadingNewLineForListing = value; }
+
+ bool forceNullCharacterReplacement() const { return m_forceNullCharacterReplacement; }
+ void setForceNullCharacterReplacement(bool value) { m_forceNullCharacterReplacement = value; }
+
+ bool shouldAllowCDATA() const { return m_shouldAllowCDATA; }
+ void setShouldAllowCDATA(bool value) { m_shouldAllowCDATA = value; }
+
+ bool shouldSkipNullCharacters() const
+ {
+ return !m_forceNullCharacterReplacement
+ && (m_state == DataState
+ || m_state == RCDATAState
+ || m_state == RAWTEXTState
+ || m_state == PLAINTEXTState);
+ }
+
+private:
+ // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream
+ class InputStreamPreprocessor : public Noncopyable {
+ public:
+ InputStreamPreprocessor(HTMLTokenizer* tokenizer)
+ : m_tokenizer(tokenizer)
+ , m_nextInputCharacter('\0')
+ , m_skipNextNewLine(false)
+ {
+ }
+
+ UChar nextInputCharacter() const { return m_nextInputCharacter; }
+
+ // Returns whether we succeeded in peeking at the next character.
+ // The only way we can fail to peek is if there are no more
+ // characters in |source| (after collapsing \r\n, etc).
+ ALWAYS_INLINE bool peek(SegmentedString& source, int& lineNumber)
+ {
+ PeekAgain:
+ m_nextInputCharacter = *source;
+
+ // Every branch in this function is expensive, so we have a
+ // fast-reject branch for characters that don't require special
+ // handling. Please run the parser benchmark whenever you touch
+ // this function. It's very hot.
+ static const UChar specialCharacterMask = '\n' | '\r' | '\0';
+ if (m_nextInputCharacter & ~specialCharacterMask) {
+ m_skipNextNewLine = false;
+ return true;
+ }
+
+ if (m_nextInputCharacter == '\n' && m_skipNextNewLine) {
+ m_skipNextNewLine = false;
+ source.advancePastNewline(lineNumber);
+ if (source.isEmpty())
+ return false;
+ m_nextInputCharacter = *source;
+ }
+ if (m_nextInputCharacter == '\r') {
+ m_nextInputCharacter = '\n';
+ m_skipNextNewLine = true;
+ } else {
+ m_skipNextNewLine = false;
+ // FIXME: The spec indicates that the surrogate pair range as well as
+ // a number of specific character values are parse errors and should be replaced
+ // by the replacement character. We suspect this is a problem with the spec as doing
+ // that filtering breaks surrogate pair handling and causes us not to match Minefield.
+ if (m_nextInputCharacter == '\0' && !shouldTreatNullAsEndOfFileMarker(source)) {
+ if (m_tokenizer->shouldSkipNullCharacters()) {
+ source.advancePastNonNewline();
+ if (source.isEmpty())
+ return false;
+ goto PeekAgain;
+ }
+ m_nextInputCharacter = 0xFFFD;
+ }
+ }
+ return true;
+ }
+
+ // Returns whether there are more characters in |source| after advancing.
+ bool advance(SegmentedString& source, int& lineNumber)
+ {
+ source.advance(lineNumber);
+ if (source.isEmpty())
+ return false;
+ return peek(source, lineNumber);
+ }
+
+ static const UChar endOfFileMarker;
+
+ private:
+ bool shouldTreatNullAsEndOfFileMarker(SegmentedString& source) const
+ {
+ return source.isClosed() && source.length() == 1;
+ }
+
+ HTMLTokenizer* m_tokenizer;
+
+ // http://www.whatwg.org/specs/web-apps/current-work/#next-input-character
+ UChar m_nextInputCharacter;
+ bool m_skipNextNewLine;
+ };
+
+ HTMLTokenizer(bool usePreHTML5ParserQuirks);
+
+ inline bool processEntity(SegmentedString&);
+
+ inline void parseError();
+ inline void bufferCharacter(UChar);
+ inline void bufferCodePoint(unsigned);
+
+ inline bool emitAndResumeIn(SegmentedString&, State);
+ inline bool emitAndReconsumeIn(SegmentedString&, State);
+ inline bool emitEndOfFile(SegmentedString&);
+ inline bool flushEmitAndResumeIn(SegmentedString&, State);
+
+ // Return whether we need to emit a character token before dealing with
+ // the buffered end tag.
+ inline bool flushBufferedEndTag(SegmentedString&);
+ inline bool temporaryBufferIs(const String&);
+
+ // Sometimes we speculatively consume input characters and we don't
+ // know whether they represent end tags or RCDATA, etc. These
+ // functions help manage these state.
+ inline void addToPossibleEndTag(UChar cc);
+ inline void saveEndTagNameIfNeeded();
+ inline bool isAppropriateEndTag();
+
+ inline bool haveBufferedCharacterToken();
+
+ State m_state;
+
+ Vector<UChar, 32> m_appropriateEndTagName;
+
+ // m_token is owned by the caller. If nextToken is not on the stack,
+ // this member might be pointing to unallocated memory.
+ HTMLToken* m_token;
+ int m_lineNumber;
+
+ bool m_skipLeadingNewLineForListing;
+ bool m_forceNullCharacterReplacement;
+ bool m_shouldAllowCDATA;
+
+ // http://www.whatwg.org/specs/web-apps/current-work/#temporary-buffer
+ Vector<UChar, 32> m_temporaryBuffer;
+
+ // We occationally want to emit both a character token and an end tag
+ // token (e.g., when lexing script). We buffer the name of the end tag
+ // token here so we remember it next time we re-enter the tokenizer.
+ Vector<UChar, 32> m_bufferedEndTagName;
+
+ // http://www.whatwg.org/specs/web-apps/current-work/#additional-allowed-character
+ UChar m_additionalAllowedCharacter;
+
+ // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream
+ InputStreamPreprocessor m_inputStreamPreprocessor;
+
+ bool m_usePreHTML5ParserQuirks;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLTreeBuilder.cpp b/Source/WebCore/html/parser/HTMLTreeBuilder.cpp
new file mode 100644
index 0000000..02713e5
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLTreeBuilder.cpp
@@ -0,0 +1,2822 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "HTMLTreeBuilder.h"
+
+#include "CharacterNames.h"
+#include "Comment.h"
+#include "DocumentFragment.h"
+#include "DocumentType.h"
+#include "Frame.h"
+#include "HTMLDocument.h"
+#include "HTMLDocumentParser.h"
+#include "HTMLElementFactory.h"
+#include "HTMLFormElement.h"
+#include "HTMLHtmlElement.h"
+#include "HTMLNames.h"
+#include "HTMLParserIdioms.h"
+#include "HTMLScriptElement.h"
+#include "HTMLToken.h"
+#include "HTMLTokenizer.h"
+#include "LocalizedStrings.h"
+#include "MathMLNames.h"
+#include "NotImplemented.h"
+#include "SVGNames.h"
+#include "ScriptController.h"
+#include "Text.h"
+#include "XLinkNames.h"
+#include "XMLNSNames.h"
+#include "XMLNames.h"
+
+namespace WebCore {
+
+using namespace HTMLNames;
+
+static const int uninitializedLineNumberValue = -1;
+
+static TextPosition1 uninitializedPositionValue1()
+{
+ return TextPosition1(WTF::OneBasedNumber::fromOneBasedInt(-1), WTF::OneBasedNumber::base());
+}
+
+namespace {
+
+inline bool isHTMLSpaceOrReplacementCharacter(UChar character)
+{
+ return isHTMLSpace(character) || character == replacementCharacter;
+}
+
+inline bool isAllWhitespace(const String& string)
+{
+ return string.isAllSpecialCharacters<isHTMLSpace>();
+}
+
+inline bool isAllWhitespaceOrReplacementCharacters(const String& string)
+{
+ return string.isAllSpecialCharacters<isHTMLSpaceOrReplacementCharacter>();
+}
+
+bool isNumberedHeaderTag(const AtomicString& tagName)
+{
+ return tagName == h1Tag
+ || tagName == h2Tag
+ || tagName == h3Tag
+ || tagName == h4Tag
+ || tagName == h5Tag
+ || tagName == h6Tag;
+}
+
+bool isCaptionColOrColgroupTag(const AtomicString& tagName)
+{
+ return tagName == captionTag
+ || tagName == colTag
+ || tagName == colgroupTag;
+}
+
+bool isTableCellContextTag(const AtomicString& tagName)
+{
+ return tagName == thTag || tagName == tdTag;
+}
+
+bool isTableBodyContextTag(const AtomicString& tagName)
+{
+ return tagName == tbodyTag
+ || tagName == tfootTag
+ || tagName == theadTag;
+}
+
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special
+bool isSpecialNode(Node* node)
+{
+ if (node->hasTagName(MathMLNames::miTag)
+ || node->hasTagName(MathMLNames::moTag)
+ || node->hasTagName(MathMLNames::mnTag)
+ || node->hasTagName(MathMLNames::msTag)
+ || node->hasTagName(MathMLNames::mtextTag)
+ || node->hasTagName(MathMLNames::annotation_xmlTag)
+ || node->hasTagName(SVGNames::foreignObjectTag)
+ || node->hasTagName(SVGNames::descTag)
+ || node->hasTagName(SVGNames::titleTag))
+ return true;
+ if (node->namespaceURI() != xhtmlNamespaceURI)
+ return false;
+ const AtomicString& tagName = node->localName();
+ return tagName == addressTag
+ || tagName == appletTag
+ || tagName == areaTag
+ || tagName == articleTag
+ || tagName == asideTag
+ || tagName == baseTag
+ || tagName == basefontTag
+ || tagName == bgsoundTag
+ || tagName == blockquoteTag
+ || tagName == bodyTag
+ || tagName == brTag
+ || tagName == buttonTag
+ || tagName == captionTag
+ || tagName == centerTag
+ || tagName == colTag
+ || tagName == colgroupTag
+ || tagName == commandTag
+ || tagName == ddTag
+ || tagName == detailsTag
+ || tagName == dirTag
+ || tagName == divTag
+ || tagName == dlTag
+ || tagName == dtTag
+ || tagName == embedTag
+ || tagName == fieldsetTag
+ || tagName == figcaptionTag
+ || tagName == figureTag
+ || tagName == footerTag
+ || tagName == formTag
+ || tagName == frameTag
+ || tagName == framesetTag
+ || isNumberedHeaderTag(tagName)
+ || tagName == headTag
+ || tagName == headerTag
+ || tagName == hgroupTag
+ || tagName == hrTag
+ || tagName == htmlTag
+ || tagName == iframeTag
+ || tagName == imgTag
+ || tagName == inputTag
+ || tagName == isindexTag
+ || tagName == liTag
+ || tagName == linkTag
+ || tagName == listingTag
+ || tagName == marqueeTag
+ || tagName == menuTag
+ || tagName == metaTag
+ || tagName == navTag
+ || tagName == noembedTag
+ || tagName == noframesTag
+ || tagName == noscriptTag
+ || tagName == objectTag
+ || tagName == olTag
+ || tagName == pTag
+ || tagName == paramTag
+ || tagName == plaintextTag
+ || tagName == preTag
+ || tagName == scriptTag
+ || tagName == sectionTag
+ || tagName == selectTag
+ || tagName == styleTag
+ || tagName == summaryTag
+ || tagName == tableTag
+ || isTableBodyContextTag(tagName)
+ || tagName == tdTag
+ || tagName == textareaTag
+ || tagName == thTag
+ || tagName == titleTag
+ || tagName == trTag
+ || tagName == ulTag
+ || tagName == wbrTag
+ || tagName == xmpTag;
+}
+
+bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName)
+{
+ return tagName == bTag
+ || tagName == bigTag
+ || tagName == codeTag
+ || tagName == emTag
+ || tagName == fontTag
+ || tagName == iTag
+ || tagName == sTag
+ || tagName == smallTag
+ || tagName == strikeTag
+ || tagName == strongTag
+ || tagName == ttTag
+ || tagName == uTag;
+}
+
+bool isNonAnchorFormattingTag(const AtomicString& tagName)
+{
+ return tagName == nobrTag
+ || isNonAnchorNonNobrFormattingTag(tagName);
+}
+
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting
+bool isFormattingTag(const AtomicString& tagName)
+{
+ return tagName == aTag || isNonAnchorFormattingTag(tagName);
+}
+
+HTMLFormElement* closestFormAncestor(Element* element)
+{
+ while (element) {
+ if (element->hasTagName(formTag))
+ return static_cast<HTMLFormElement*>(element);
+ ContainerNode* parent = element->parentNode();
+ if (!parent || !parent->isElementNode())
+ return 0;
+ element = static_cast<Element*>(parent);
+ }
+ return 0;
+}
+
+} // namespace
+
+class HTMLTreeBuilder::ExternalCharacterTokenBuffer : public Noncopyable {
+public:
+ explicit ExternalCharacterTokenBuffer(AtomicHTMLToken& token)
+ : m_current(token.characters().data())
+ , m_end(m_current + token.characters().size())
+ {
+ ASSERT(!isEmpty());
+ }
+
+ explicit ExternalCharacterTokenBuffer(const String& string)
+ : m_current(string.characters())
+ , m_end(m_current + string.length())
+ {
+ ASSERT(!isEmpty());
+ }
+
+ ~ExternalCharacterTokenBuffer()
+ {
+ ASSERT(isEmpty());
+ }
+
+ bool isEmpty() const { return m_current == m_end; }
+
+ void skipLeadingWhitespace()
+ {
+ skipLeading<isHTMLSpace>();
+ }
+
+ String takeLeadingWhitespace()
+ {
+ return takeLeading<isHTMLSpace>();
+ }
+
+ String takeLeadingNonWhitespace()
+ {
+ return takeLeading<isNotHTMLSpace>();
+ }
+
+ String takeRemaining()
+ {
+ ASSERT(!isEmpty());
+ const UChar* start = m_current;
+ m_current = m_end;
+ return String(start, m_current - start);
+ }
+
+ void giveRemainingTo(Vector<UChar>& recipient)
+ {
+ recipient.append(m_current, m_end - m_current);
+ m_current = m_end;
+ }
+
+ String takeRemainingWhitespace()
+ {
+ ASSERT(!isEmpty());
+ Vector<UChar> whitespace;
+ do {
+ UChar cc = *m_current++;
+ if (isHTMLSpace(cc))
+ whitespace.append(cc);
+ } while (m_current < m_end);
+ // Returning the null string when there aren't any whitespace
+ // characters is slightly cleaner semantically because we don't want
+ // to insert a text node (as opposed to inserting an empty text node).
+ if (whitespace.isEmpty())
+ return String();
+ return String::adopt(whitespace);
+ }
+
+private:
+ template<bool characterPredicate(UChar)>
+ void skipLeading()
+ {
+ ASSERT(!isEmpty());
+ while (characterPredicate(*m_current)) {
+ if (++m_current == m_end)
+ return;
+ }
+ }
+
+ template<bool characterPredicate(UChar)>
+ String takeLeading()
+ {
+ ASSERT(!isEmpty());
+ const UChar* start = m_current;
+ skipLeading<characterPredicate>();
+ if (start == m_current)
+ return String();
+ return String(start, m_current - start);
+ }
+
+ const UChar* m_current;
+ const UChar* m_end;
+};
+
+
+HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks)
+ : m_framesetOk(true)
+ , m_document(document)
+ , m_tree(document, FragmentScriptingAllowed, false)
+ , m_reportErrors(reportErrors)
+ , m_isPaused(false)
+ , m_insertionMode(InitialMode)
+ , m_originalInsertionMode(InitialMode)
+ , m_parser(parser)
+ , m_scriptToProcessStartPosition(uninitializedPositionValue1())
+ , m_lastScriptElementStartPosition(TextPosition0::belowRangePosition())
+ , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
+ , m_hasPendingForeignInsertionModeSteps(false)
+{
+}
+
+// FIXME: Member variables should be grouped into self-initializing structs to
+// minimize code duplication between these constructors.
+HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks)
+ : m_framesetOk(true)
+ , m_fragmentContext(fragment, contextElement, scriptingPermission)
+ , m_document(m_fragmentContext.document())
+ , m_tree(m_document, scriptingPermission, true)
+ , m_reportErrors(false) // FIXME: Why not report errors in fragments?
+ , m_isPaused(false)
+ , m_insertionMode(InitialMode)
+ , m_originalInsertionMode(InitialMode)
+ , m_parser(parser)
+ , m_scriptToProcessStartPosition(uninitializedPositionValue1())
+ , m_lastScriptElementStartPosition(TextPosition0::belowRangePosition())
+ , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
+ , m_hasPendingForeignInsertionModeSteps(false)
+{
+ if (contextElement) {
+ // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm:
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
+ m_document->setCompatibilityMode(contextElement->document()->compatibilityMode());
+ processFakeStartTag(htmlTag);
+ resetInsertionModeAppropriately();
+ m_tree.setForm(closestFormAncestor(contextElement));
+ }
+}
+
+HTMLTreeBuilder::~HTMLTreeBuilder()
+{
+}
+
+void HTMLTreeBuilder::detach()
+{
+ // This call makes little sense in fragment mode, but for consistency
+ // DocumentParser expects detach() to always be called before it's destroyed.
+ m_document = 0;
+ // HTMLConstructionSite might be on the callstack when detach() is called
+ // otherwise we'd just call m_tree.clear() here instead.
+ m_tree.detach();
+}
+
+HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
+ : m_fragment(0)
+ , m_contextElement(0)
+ , m_scriptingPermission(FragmentScriptingAllowed)
+{
+}
+
+HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
+ : m_dummyDocumentForFragmentParsing(HTMLDocument::create(0, KURL(), fragment->document()->baseURI()))
+ , m_fragment(fragment)
+ , m_contextElement(contextElement)
+ , m_scriptingPermission(scriptingPermission)
+{
+ m_dummyDocumentForFragmentParsing->setCompatibilityMode(fragment->document()->compatibilityMode());
+}
+
+Document* HTMLTreeBuilder::FragmentParsingContext::document() const
+{
+ ASSERT(m_fragment);
+ return m_dummyDocumentForFragmentParsing.get();
+}
+
+void HTMLTreeBuilder::FragmentParsingContext::finished()
+{
+ // Populate the DocumentFragment with the parsed content now that we're done.
+ ContainerNode* root = m_dummyDocumentForFragmentParsing.get();
+ if (m_contextElement)
+ root = m_dummyDocumentForFragmentParsing->documentElement();
+ m_fragment->takeAllChildrenFrom(root);
+}
+
+HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext()
+{
+}
+
+PassRefPtr<Element> HTMLTreeBuilder::takeScriptToProcess(TextPosition1& scriptStartPosition)
+{
+ // Unpause ourselves, callers may pause us again when processing the script.
+ // The HTML5 spec is written as though scripts are executed inside the tree
+ // builder. We pause the parser to exit the tree builder, and then resume
+ // before running scripts.
+ m_isPaused = false;
+ scriptStartPosition = m_scriptToProcessStartPosition;
+ m_scriptToProcessStartPosition = uninitializedPositionValue1();
+ return m_scriptToProcess.release();
+}
+
+void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken)
+{
+ AtomicHTMLToken token(rawToken);
+ constructTreeFromAtomicToken(token);
+}
+
+void HTMLTreeBuilder::constructTreeFromAtomicToken(AtomicHTMLToken& token)
+{
+ processToken(token);
+
+ // Swallowing U+0000 characters isn't in the HTML5 spec, but turning all
+ // the U+0000 characters into replacement characters has compatibility
+ // problems.
+ m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || m_insertionMode == InForeignContentMode);
+ m_parser->tokenizer()->setShouldAllowCDATA(m_insertionMode == InForeignContentMode && m_tree.currentElement()->namespaceURI() != xhtmlNamespaceURI);
+}
+
+void HTMLTreeBuilder::processToken(AtomicHTMLToken& token)
+{
+ switch (token.type()) {
+ case HTMLToken::Uninitialized:
+ ASSERT_NOT_REACHED();
+ break;
+ case HTMLToken::DOCTYPE:
+ processDoctypeToken(token);
+ break;
+ case HTMLToken::StartTag:
+ processStartTag(token);
+ break;
+ case HTMLToken::EndTag:
+ processEndTag(token);
+ break;
+ case HTMLToken::Comment:
+ processComment(token);
+ return;
+ case HTMLToken::Character:
+ processCharacter(token);
+ break;
+ case HTMLToken::EndOfFile:
+ processEndOfFile(token);
+ break;
+ }
+}
+
+void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLToken::DOCTYPE);
+ if (m_insertionMode == InitialMode) {
+ m_tree.insertDoctype(token);
+ setInsertionMode(BeforeHTMLMode);
+ return;
+ }
+ if (m_insertionMode == InTableTextMode) {
+ defaultForInTableText();
+ processDoctypeToken(token);
+ return;
+ }
+ parseError(token);
+}
+
+void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, PassRefPtr<NamedNodeMap> attributes)
+{
+ // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
+ AtomicHTMLToken fakeToken(HTMLToken::StartTag, tagName.localName(), attributes);
+ processStartTag(fakeToken);
+}
+
+void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName)
+{
+ // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
+ AtomicHTMLToken fakeToken(HTMLToken::EndTag, tagName.localName());
+ processEndTag(fakeToken);
+}
+
+void HTMLTreeBuilder::processFakeCharacters(const String& characters)
+{
+ ASSERT(!characters.isEmpty());
+ ExternalCharacterTokenBuffer buffer(characters);
+ processCharacterBuffer(buffer);
+}
+
+void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope()
+{
+ if (!m_tree.openElements()->inButtonScope(pTag.localName()))
+ return;
+ AtomicHTMLToken endP(HTMLToken::EndTag, pTag.localName());
+ processEndTag(endP);
+}
+
+PassRefPtr<NamedNodeMap> HTMLTreeBuilder::attributesForIsindexInput(AtomicHTMLToken& token)
+{
+ RefPtr<NamedNodeMap> attributes = token.takeAtributes();
+ if (!attributes)
+ attributes = NamedNodeMap::create();
+ else {
+ attributes->removeAttribute(nameAttr);
+ attributes->removeAttribute(actionAttr);
+ attributes->removeAttribute(promptAttr);
+ }
+
+ RefPtr<Attribute> mappedAttribute = Attribute::createMapped(nameAttr, isindexTag.localName());
+ attributes->insertAttribute(mappedAttribute.release(), false);
+ return attributes.release();
+}
+
+void HTMLTreeBuilder::processIsindexStartTagForInBody(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLToken::StartTag);
+ ASSERT(token.name() == isindexTag);
+ parseError(token);
+ if (m_tree.form())
+ return;
+ notImplemented(); // Acknowledge self-closing flag
+ processFakeStartTag(formTag);
+ RefPtr<Attribute> actionAttribute = token.getAttributeItem(actionAttr);
+ if (actionAttribute) {
+ ASSERT(m_tree.currentElement()->hasTagName(formTag));
+ m_tree.currentElement()->setAttribute(actionAttr, actionAttribute->value());
+ }
+ processFakeStartTag(hrTag);
+ processFakeStartTag(labelTag);
+ RefPtr<Attribute> promptAttribute = token.getAttributeItem(promptAttr);
+ if (promptAttribute)
+ processFakeCharacters(promptAttribute->value());
+ else
+ processFakeCharacters(searchableIndexIntroduction());
+ processFakeStartTag(inputTag, attributesForIsindexInput(token));
+ notImplemented(); // This second set of characters may be needed by non-english locales.
+ processFakeEndTag(labelTag);
+ processFakeStartTag(hrTag);
+ processFakeEndTag(formTag);
+}
+
+namespace {
+
+bool isLi(const Element* element)
+{
+ return element->hasTagName(liTag);
+}
+
+bool isDdOrDt(const Element* element)
+{
+ return element->hasTagName(ddTag)
+ || element->hasTagName(dtTag);
+}
+
+}
+
+template <bool shouldClose(const Element*)>
+void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken& token)
+{
+ m_framesetOk = false;
+ HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
+ while (1) {
+ Element* node = nodeRecord->element();
+ if (shouldClose(node)) {
+ processFakeEndTag(node->tagQName());
+ break;
+ }
+ if (isSpecialNode(node) && !node->hasTagName(addressTag) && !node->hasTagName(divTag) && !node->hasTagName(pTag))
+ break;
+ nodeRecord = nodeRecord->next();
+ }
+ processFakePEndTagIfPInButtonScope();
+ m_tree.insertHTMLElement(token);
+}
+
+namespace {
+
+typedef HashMap<AtomicString, QualifiedName> PrefixedNameToQualifiedNameMap;
+
+void mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap* map, QualifiedName** names, size_t length)
+{
+ for (size_t i = 0; i < length; ++i) {
+ const QualifiedName& name = *names[i];
+ const AtomicString& localName = name.localName();
+ AtomicString loweredLocalName = localName.lower();
+ if (loweredLocalName != localName)
+ map->add(loweredLocalName, name);
+ }
+}
+
+void adjustSVGTagNameCase(AtomicHTMLToken& token)
+{
+ static PrefixedNameToQualifiedNameMap* caseMap = 0;
+ if (!caseMap) {
+ caseMap = new PrefixedNameToQualifiedNameMap;
+ size_t length = 0;
+ QualifiedName** svgTags = SVGNames::getSVGTags(&length);
+ mapLoweredLocalNameToName(caseMap, svgTags, length);
+ }
+
+ const QualifiedName& casedName = caseMap->get(token.name());
+ if (casedName.localName().isNull())
+ return;
+ token.setName(casedName.localName());
+}
+
+template<QualifiedName** getAttrs(size_t* length)>
+void adjustAttributes(AtomicHTMLToken& token)
+{
+ static PrefixedNameToQualifiedNameMap* caseMap = 0;
+ if (!caseMap) {
+ caseMap = new PrefixedNameToQualifiedNameMap;
+ size_t length = 0;
+ QualifiedName** attrs = getAttrs(&length);
+ mapLoweredLocalNameToName(caseMap, attrs, length);
+ }
+
+ NamedNodeMap* attributes = token.attributes();
+ if (!attributes)
+ return;
+
+ for (unsigned x = 0; x < attributes->length(); ++x) {
+ Attribute* attribute = attributes->attributeItem(x);
+ const QualifiedName& casedName = caseMap->get(attribute->localName());
+ if (!casedName.localName().isNull())
+ attribute->parserSetName(casedName);
+ }
+}
+
+void adjustSVGAttributes(AtomicHTMLToken& token)
+{
+ adjustAttributes<SVGNames::getSVGAttrs>(token);
+}
+
+void adjustMathMLAttributes(AtomicHTMLToken& token)
+{
+ adjustAttributes<MathMLNames::getMathMLAttrs>(token);
+}
+
+void addNamesWithPrefix(PrefixedNameToQualifiedNameMap* map, const AtomicString& prefix, QualifiedName** names, size_t length)
+{
+ for (size_t i = 0; i < length; ++i) {
+ QualifiedName* name = names[i];
+ const AtomicString& localName = name->localName();
+ AtomicString prefixColonLocalName(prefix + ":" + localName);
+ QualifiedName nameWithPrefix(prefix, localName, name->namespaceURI());
+ map->add(prefixColonLocalName, nameWithPrefix);
+ }
+}
+
+void adjustForeignAttributes(AtomicHTMLToken& token)
+{
+ static PrefixedNameToQualifiedNameMap* map = 0;
+ if (!map) {
+ map = new PrefixedNameToQualifiedNameMap;
+ size_t length = 0;
+ QualifiedName** attrs = XLinkNames::getXLinkAttrs(&length);
+ addNamesWithPrefix(map, "xlink", attrs, length);
+
+ attrs = XMLNames::getXMLAttrs(&length);
+ addNamesWithPrefix(map, "xml", attrs, length);
+
+ map->add("xmlns", XMLNSNames::xmlnsAttr);
+ map->add("xmlns:xlink", QualifiedName("xmlns", "xlink", XMLNSNames::xmlnsNamespaceURI));
+ }
+
+ NamedNodeMap* attributes = token.attributes();
+ if (!attributes)
+ return;
+
+ for (unsigned x = 0; x < attributes->length(); ++x) {
+ Attribute* attribute = attributes->attributeItem(x);
+ const QualifiedName& name = map->get(attribute->localName());
+ if (!name.localName().isNull())
+ attribute->parserSetName(name);
+ }
+}
+
+}
+
+void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLToken::StartTag);
+ if (token.name() == htmlTag) {
+ m_tree.insertHTMLHtmlStartTagInBody(token);
+ return;
+ }
+ if (token.name() == baseTag
+ || token.name() == basefontTag
+ || token.name() == bgsoundTag
+ || token.name() == commandTag
+ || token.name() == linkTag
+ || token.name() == metaTag
+ || token.name() == noframesTag
+ || token.name() == scriptTag
+ || token.name() == styleTag
+ || token.name() == titleTag) {
+ bool didProcess = processStartTagForInHead(token);
+ ASSERT_UNUSED(didProcess, didProcess);
+ return;
+ }
+ if (token.name() == bodyTag) {
+ if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
+ ASSERT(isParsingFragment());
+ return;
+ }
+ m_tree.insertHTMLBodyStartTagInBody(token);
+ return;
+ }
+ if (token.name() == framesetTag) {
+ parseError(token);
+ if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
+ ASSERT(isParsingFragment());
+ return;
+ }
+ if (!m_framesetOk)
+ return;
+ ExceptionCode ec = 0;
+ m_tree.openElements()->bodyElement()->remove(ec);
+ ASSERT(!ec);
+ m_tree.openElements()->popUntil(m_tree.openElements()->bodyElement());
+ m_tree.openElements()->popHTMLBodyElement();
+ ASSERT(m_tree.openElements()->top() == m_tree.openElements()->htmlElement());
+ m_tree.insertHTMLElement(token);
+ setInsertionMode(InFramesetMode);
+ return;
+ }
+ if (token.name() == addressTag
+ || token.name() == articleTag
+ || token.name() == asideTag
+ || token.name() == blockquoteTag
+ || token.name() == centerTag
+ || token.name() == detailsTag
+ || token.name() == dirTag
+ || token.name() == divTag
+ || token.name() == dlTag
+ || token.name() == fieldsetTag
+ || token.name() == figcaptionTag
+ || token.name() == figureTag
+ || token.name() == footerTag
+ || token.name() == headerTag
+ || token.name() == hgroupTag
+ || token.name() == menuTag
+ || token.name() == navTag
+ || token.name() == olTag
+ || token.name() == pTag
+ || token.name() == sectionTag
+ || token.name() == summaryTag
+ || token.name() == ulTag) {
+ processFakePEndTagIfPInButtonScope();
+ m_tree.insertHTMLElement(token);
+ return;
+ }
+ if (isNumberedHeaderTag(token.name())) {
+ processFakePEndTagIfPInButtonScope();
+ if (isNumberedHeaderTag(m_tree.currentElement()->localName())) {
+ parseError(token);
+ m_tree.openElements()->pop();
+ }
+ m_tree.insertHTMLElement(token);
+ return;
+ }
+ if (token.name() == preTag || token.name() == listingTag) {
+ processFakePEndTagIfPInButtonScope();
+ m_tree.insertHTMLElement(token);
+ m_parser->tokenizer()->setSkipLeadingNewLineForListing(true);
+ m_framesetOk = false;
+ return;
+ }
+ if (token.name() == formTag) {
+ if (m_tree.form()) {
+ parseError(token);
+ return;
+ }
+ processFakePEndTagIfPInButtonScope();
+ m_tree.insertHTMLFormElement(token);
+ return;
+ }
+ if (token.name() == liTag) {
+ processCloseWhenNestedTag<isLi>(token);
+ return;
+ }
+ if (token.name() == ddTag || token.name() == dtTag) {
+ processCloseWhenNestedTag<isDdOrDt>(token);
+ return;
+ }
+ if (token.name() == plaintextTag) {
+ processFakePEndTagIfPInButtonScope();
+ m_tree.insertHTMLElement(token);
+ m_parser->tokenizer()->setState(HTMLTokenizer::PLAINTEXTState);
+ return;
+ }
+ if (token.name() == buttonTag) {
+ if (m_tree.openElements()->inScope(buttonTag)) {
+ parseError(token);
+ processFakeEndTag(buttonTag);
+ reprocessStartTag(token); // FIXME: Could we just fall through here?
+ return;
+ }
+ m_tree.reconstructTheActiveFormattingElements();
+ m_tree.insertHTMLElement(token);
+ m_framesetOk = false;
+ return;
+ }
+ if (token.name() == aTag) {
+ Element* activeATag = m_tree.activeFormattingElements()->closestElementInScopeWithName(aTag.localName());
+ if (activeATag) {
+ parseError(token);
+ processFakeEndTag(aTag);
+ m_tree.activeFormattingElements()->remove(activeATag);
+ if (m_tree.openElements()->contains(activeATag))
+ m_tree.openElements()->remove(activeATag);
+ }
+ m_tree.reconstructTheActiveFormattingElements();
+ m_tree.insertFormattingElement(token);
+ return;
+ }
+ if (isNonAnchorNonNobrFormattingTag(token.name())) {
+ m_tree.reconstructTheActiveFormattingElements();
+ m_tree.insertFormattingElement(token);
+ return;
+ }
+ if (token.name() == nobrTag) {
+ m_tree.reconstructTheActiveFormattingElements();
+ if (m_tree.openElements()->inScope(nobrTag)) {
+ parseError(token);
+ processFakeEndTag(nobrTag);
+ m_tree.reconstructTheActiveFormattingElements();
+ }
+ m_tree.insertFormattingElement(token);
+ return;
+ }
+ if (token.name() == appletTag
+ || token.name() == marqueeTag
+ || token.name() == objectTag) {
+ m_tree.reconstructTheActiveFormattingElements();
+ m_tree.insertHTMLElement(token);
+ m_tree.activeFormattingElements()->appendMarker();
+ m_framesetOk = false;
+ return;
+ }
+ if (token.name() == tableTag) {
+ if (!m_document->inQuirksMode() && m_tree.openElements()->inButtonScope(pTag))
+ processFakeEndTag(pTag);
+ m_tree.insertHTMLElement(token);
+ m_framesetOk = false;
+ setInsertionMode(InTableMode);
+ return;
+ }
+ if (token.name() == imageTag) {
+ parseError(token);
+ // Apparently we're not supposed to ask.
+ token.setName(imgTag.localName());
+ prepareToReprocessToken();
+ // Note the fall through to the imgTag handling below!
+ }
+ if (token.name() == areaTag
+ || token.name() == brTag
+ || token.name() == embedTag
+ || token.name() == imgTag
+ || token.name() == keygenTag
+ || token.name() == wbrTag) {
+ m_tree.reconstructTheActiveFormattingElements();
+ m_tree.insertSelfClosingHTMLElement(token);
+ m_framesetOk = false;
+ return;
+ }
+ if (token.name() == inputTag) {
+ RefPtr<Attribute> typeAttribute = token.getAttributeItem(typeAttr);
+ m_tree.reconstructTheActiveFormattingElements();
+ m_tree.insertSelfClosingHTMLElement(token);
+ if (!typeAttribute || !equalIgnoringCase(typeAttribute->value(), "hidden"))
+ m_framesetOk = false;
+ return;
+ }
+ if (token.name() == paramTag
+ || token.name() == sourceTag
+ || token.name() == trackTag) {
+ m_tree.insertSelfClosingHTMLElement(token);
+ return;
+ }
+ if (token.name() == hrTag) {
+ processFakePEndTagIfPInButtonScope();
+ m_tree.insertSelfClosingHTMLElement(token);
+ m_framesetOk = false;
+ return;
+ }
+ if (token.name() == isindexTag) {
+ processIsindexStartTagForInBody(token);
+ return;
+ }
+ if (token.name() == textareaTag) {
+ m_tree.insertHTMLElement(token);
+ m_parser->tokenizer()->setSkipLeadingNewLineForListing(true);
+ m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
+ m_originalInsertionMode = m_insertionMode;
+ m_framesetOk = false;
+ setInsertionMode(TextMode);
+ return;
+ }
+ if (token.name() == xmpTag) {
+ processFakePEndTagIfPInButtonScope();
+ m_tree.reconstructTheActiveFormattingElements();
+ m_framesetOk = false;
+ processGenericRawTextStartTag(token);
+ return;
+ }
+ if (token.name() == iframeTag) {
+ m_framesetOk = false;
+ processGenericRawTextStartTag(token);
+ return;
+ }
+ if (token.name() == noembedTag && pluginsEnabled(m_document->frame())) {
+ processGenericRawTextStartTag(token);
+ return;
+ }
+ if (token.name() == noscriptTag && scriptEnabled(m_document->frame())) {
+ processGenericRawTextStartTag(token);
+ return;
+ }
+ if (token.name() == selectTag) {
+ m_tree.reconstructTheActiveFormattingElements();
+ m_tree.insertHTMLElement(token);
+ m_framesetOk = false;
+ if (m_insertionMode == InTableMode
+ || m_insertionMode == InCaptionMode
+ || m_insertionMode == InColumnGroupMode
+ || m_insertionMode == InTableBodyMode
+ || m_insertionMode == InRowMode
+ || m_insertionMode == InCellMode)
+ setInsertionMode(InSelectInTableMode);
+ else
+ setInsertionMode(InSelectMode);
+ return;
+ }
+ if (token.name() == optgroupTag || token.name() == optionTag) {
+ if (m_tree.openElements()->inScope(optionTag.localName())) {
+ AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
+ processEndTag(endOption);
+ }
+ m_tree.reconstructTheActiveFormattingElements();
+ m_tree.insertHTMLElement(token);
+ return;
+ }
+ if (token.name() == rpTag || token.name() == rtTag) {
+ if (m_tree.openElements()->inScope(rubyTag.localName())) {
+ m_tree.generateImpliedEndTags();
+ if (!m_tree.currentElement()->hasTagName(rubyTag)) {
+ parseError(token);
+ m_tree.openElements()->popUntil(rubyTag.localName());
+ }
+ }
+ m_tree.insertHTMLElement(token);
+ return;
+ }
+ if (token.name() == MathMLNames::mathTag.localName()) {
+ m_tree.reconstructTheActiveFormattingElements();
+ adjustMathMLAttributes(token);
+ adjustForeignAttributes(token);
+ m_tree.insertForeignElement(token, MathMLNames::mathmlNamespaceURI);
+ if (m_insertionMode != InForeignContentMode)
+ setInsertionMode(InForeignContentMode);
+ return;
+ }
+ if (token.name() == SVGNames::svgTag.localName()) {
+ m_tree.reconstructTheActiveFormattingElements();
+ adjustSVGAttributes(token);
+ adjustForeignAttributes(token);
+ m_tree.insertForeignElement(token, SVGNames::svgNamespaceURI);
+ if (m_insertionMode != InForeignContentMode)
+ setInsertionMode(InForeignContentMode);
+ return;
+ }
+ if (isCaptionColOrColgroupTag(token.name())
+ || token.name() == frameTag
+ || token.name() == headTag
+ || isTableBodyContextTag(token.name())
+ || isTableCellContextTag(token.name())
+ || token.name() == trTag) {
+ parseError(token);
+ return;
+ }
+ m_tree.reconstructTheActiveFormattingElements();
+ m_tree.insertHTMLElement(token);
+}
+
+bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup()
+{
+ if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) {
+ ASSERT(isParsingFragment());
+ // FIXME: parse error
+ return false;
+ }
+ m_tree.openElements()->pop();
+ setInsertionMode(InTableMode);
+ return true;
+}
+
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
+void HTMLTreeBuilder::closeTheCell()
+{
+ ASSERT(insertionMode() == InCellMode);
+ if (m_tree.openElements()->inTableScope(tdTag)) {
+ ASSERT(!m_tree.openElements()->inTableScope(thTag));
+ processFakeEndTag(tdTag);
+ return;
+ }
+ ASSERT(m_tree.openElements()->inTableScope(thTag));
+ processFakeEndTag(thTag);
+ ASSERT(insertionMode() == InRowMode);
+}
+
+void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLToken::StartTag);
+ if (token.name() == captionTag) {
+ m_tree.openElements()->popUntilTableScopeMarker();
+ m_tree.activeFormattingElements()->appendMarker();
+ m_tree.insertHTMLElement(token);
+ setInsertionMode(InCaptionMode);
+ return;
+ }
+ if (token.name() == colgroupTag) {
+ m_tree.openElements()->popUntilTableScopeMarker();
+ m_tree.insertHTMLElement(token);
+ setInsertionMode(InColumnGroupMode);
+ return;
+ }
+ if (token.name() == colTag) {
+ processFakeStartTag(colgroupTag);
+ ASSERT(InColumnGroupMode);
+ reprocessStartTag(token);
+ return;
+ }
+ if (isTableBodyContextTag(token.name())) {
+ m_tree.openElements()->popUntilTableScopeMarker();
+ m_tree.insertHTMLElement(token);
+ setInsertionMode(InTableBodyMode);
+ return;
+ }
+ if (isTableCellContextTag(token.name())
+ || token.name() == trTag) {
+ processFakeStartTag(tbodyTag);
+ ASSERT(insertionMode() == InTableBodyMode);
+ reprocessStartTag(token);
+ return;
+ }
+ if (token.name() == tableTag) {
+ parseError(token);
+ if (!processTableEndTagForInTable()) {
+ ASSERT(isParsingFragment());
+ return;
+ }
+ reprocessStartTag(token);
+ return;
+ }
+ if (token.name() == styleTag || token.name() == scriptTag) {
+ processStartTagForInHead(token);
+ return;
+ }
+ if (token.name() == inputTag) {
+ Attribute* typeAttribute = token.getAttributeItem(typeAttr);
+ if (typeAttribute && equalIgnoringCase(typeAttribute->value(), "hidden")) {
+ parseError(token);
+ m_tree.insertSelfClosingHTMLElement(token);
+ return;
+ }
+ // Fall through to "anything else" case.
+ }
+ if (token.name() == formTag) {
+ parseError(token);
+ if (m_tree.form())
+ return;
+ m_tree.insertHTMLFormElement(token, true);
+ m_tree.openElements()->pop();
+ return;
+ }
+ parseError(token);
+ HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
+ processStartTagForInBody(token);
+}
+
+namespace {
+
+bool shouldProcessForeignContentUsingInBodyInsertionMode(AtomicHTMLToken& token, Element* currentElement)
+{
+ ASSERT(token.type() == HTMLToken::StartTag);
+ if (currentElement->hasTagName(MathMLNames::miTag)
+ || currentElement->hasTagName(MathMLNames::moTag)
+ || currentElement->hasTagName(MathMLNames::mnTag)
+ || currentElement->hasTagName(MathMLNames::msTag)
+ || currentElement->hasTagName(MathMLNames::mtextTag)) {
+ return token.name() != MathMLNames::mglyphTag
+ && token.name() != MathMLNames::malignmarkTag;
+ }
+ if (currentElement->hasTagName(MathMLNames::annotation_xmlTag))
+ return token.name() == SVGNames::svgTag;
+ if (currentElement->hasTagName(SVGNames::foreignObjectTag)
+ || currentElement->hasTagName(SVGNames::descTag)
+ || currentElement->hasTagName(SVGNames::titleTag))
+ return true;
+ return currentElement->namespaceURI() == HTMLNames::xhtmlNamespaceURI;
+}
+
+}
+
+void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLToken::StartTag);
+ switch (insertionMode()) {
+ case InitialMode:
+ ASSERT(insertionMode() == InitialMode);
+ defaultForInitial();
+ // Fall through.
+ case BeforeHTMLMode:
+ ASSERT(insertionMode() == BeforeHTMLMode);
+ if (token.name() == htmlTag) {
+ m_tree.insertHTMLHtmlStartTagBeforeHTML(token);
+ setInsertionMode(BeforeHeadMode);
+ return;
+ }
+ defaultForBeforeHTML();
+ // Fall through.
+ case BeforeHeadMode:
+ ASSERT(insertionMode() == BeforeHeadMode);
+ if (token.name() == htmlTag) {
+ m_tree.insertHTMLHtmlStartTagInBody(token);
+ return;
+ }
+ if (token.name() == headTag) {
+ m_tree.insertHTMLHeadElement(token);
+ setInsertionMode(InHeadMode);
+ return;
+ }
+ defaultForBeforeHead();
+ // Fall through.
+ case InHeadMode:
+ ASSERT(insertionMode() == InHeadMode);
+ if (processStartTagForInHead(token))
+ return;
+ defaultForInHead();
+ // Fall through.
+ case AfterHeadMode:
+ ASSERT(insertionMode() == AfterHeadMode);
+ if (token.name() == htmlTag) {
+ m_tree.insertHTMLHtmlStartTagInBody(token);
+ return;
+ }
+ if (token.name() == bodyTag) {
+ m_framesetOk = false;
+ m_tree.insertHTMLBodyElement(token);
+ setInsertionMode(InBodyMode);
+ return;
+ }
+ if (token.name() == framesetTag) {
+ m_tree.insertHTMLElement(token);
+ setInsertionMode(InFramesetMode);
+ return;
+ }
+ if (token.name() == baseTag
+ || token.name() == basefontTag
+ || token.name() == bgsoundTag
+ || token.name() == linkTag
+ || token.name() == metaTag
+ || token.name() == noframesTag
+ || token.name() == scriptTag
+ || token.name() == styleTag
+ || token.name() == titleTag) {
+ parseError(token);
+ ASSERT(m_tree.head());
+ m_tree.openElements()->pushHTMLHeadElement(m_tree.head());
+ processStartTagForInHead(token);
+ m_tree.openElements()->removeHTMLHeadElement(m_tree.head());
+ return;
+ }
+ if (token.name() == headTag) {
+ parseError(token);
+ return;
+ }
+ defaultForAfterHead();
+ // Fall through
+ case InBodyMode:
+ ASSERT(insertionMode() == InBodyMode);
+ processStartTagForInBody(token);
+ break;
+ case InTableMode:
+ ASSERT(insertionMode() == InTableMode);
+ processStartTagForInTable(token);
+ break;
+ case InCaptionMode:
+ ASSERT(insertionMode() == InCaptionMode);
+ if (isCaptionColOrColgroupTag(token.name())
+ || isTableBodyContextTag(token.name())
+ || isTableCellContextTag(token.name())
+ || token.name() == trTag) {
+ parseError(token);
+ if (!processCaptionEndTagForInCaption()) {
+ ASSERT(isParsingFragment());
+ return;
+ }
+ reprocessStartTag(token);
+ return;
+ }
+ processStartTagForInBody(token);
+ break;
+ case InColumnGroupMode:
+ ASSERT(insertionMode() == InColumnGroupMode);
+ if (token.name() == htmlTag) {
+ m_tree.insertHTMLHtmlStartTagInBody(token);
+ return;
+ }
+ if (token.name() == colTag) {
+ m_tree.insertSelfClosingHTMLElement(token);
+ return;
+ }
+ if (!processColgroupEndTagForInColumnGroup()) {
+ ASSERT(isParsingFragment());
+ return;
+ }
+ reprocessStartTag(token);
+ break;
+ case InTableBodyMode:
+ ASSERT(insertionMode() == InTableBodyMode);
+ if (token.name() == trTag) {
+ m_tree.openElements()->popUntilTableBodyScopeMarker(); // How is there ever anything to pop?
+ m_tree.insertHTMLElement(token);
+ setInsertionMode(InRowMode);
+ return;
+ }
+ if (isTableCellContextTag(token.name())) {
+ parseError(token);
+ processFakeStartTag(trTag);
+ ASSERT(insertionMode() == InRowMode);
+ reprocessStartTag(token);
+ return;
+ }
+ if (isCaptionColOrColgroupTag(token.name()) || isTableBodyContextTag(token.name())) {
+ // FIXME: This is slow.
+ if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
+ ASSERT(isParsingFragment());
+ parseError(token);
+ return;
+ }
+ m_tree.openElements()->popUntilTableBodyScopeMarker();
+ ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
+ processFakeEndTag(m_tree.currentElement()->tagQName());
+ reprocessStartTag(token);
+ return;
+ }
+ processStartTagForInTable(token);
+ break;
+ case InRowMode:
+ ASSERT(insertionMode() == InRowMode);
+ if (isTableCellContextTag(token.name())) {
+ m_tree.openElements()->popUntilTableRowScopeMarker();
+ m_tree.insertHTMLElement(token);
+ setInsertionMode(InCellMode);
+ m_tree.activeFormattingElements()->appendMarker();
+ return;
+ }
+ if (token.name() == trTag
+ || isCaptionColOrColgroupTag(token.name())
+ || isTableBodyContextTag(token.name())) {
+ if (!processTrEndTagForInRow()) {
+ ASSERT(isParsingFragment());
+ return;
+ }
+ ASSERT(insertionMode() == InTableBodyMode);
+ reprocessStartTag(token);
+ return;
+ }
+ processStartTagForInTable(token);
+ break;
+ case InCellMode:
+ ASSERT(insertionMode() == InCellMode);
+ if (isCaptionColOrColgroupTag(token.name())
+ || isTableCellContextTag(token.name())
+ || token.name() == trTag
+ || isTableBodyContextTag(token.name())) {
+ // FIXME: This could be more efficient.
+ if (!m_tree.openElements()->inTableScope(tdTag) && !m_tree.openElements()->inTableScope(thTag)) {
+ ASSERT(isParsingFragment());
+ parseError(token);
+ return;
+ }
+ closeTheCell();
+ reprocessStartTag(token);
+ return;
+ }
+ processStartTagForInBody(token);
+ break;
+ case AfterBodyMode:
+ case AfterAfterBodyMode:
+ ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
+ if (token.name() == htmlTag) {
+ m_tree.insertHTMLHtmlStartTagInBody(token);
+ return;
+ }
+ setInsertionMode(InBodyMode);
+ reprocessStartTag(token);
+ break;
+ case InHeadNoscriptMode:
+ ASSERT(insertionMode() == InHeadNoscriptMode);
+ if (token.name() == htmlTag) {
+ m_tree.insertHTMLHtmlStartTagInBody(token);
+ return;
+ }
+ if (token.name() == basefontTag
+ || token.name() == bgsoundTag
+ || token.name() == linkTag
+ || token.name() == metaTag
+ || token.name() == noframesTag
+ || token.name() == styleTag) {
+ bool didProcess = processStartTagForInHead(token);
+ ASSERT_UNUSED(didProcess, didProcess);
+ return;
+ }
+ if (token.name() == htmlTag || token.name() == noscriptTag) {
+ parseError(token);
+ return;
+ }
+ defaultForInHeadNoscript();
+ processToken(token);
+ break;
+ case InFramesetMode:
+ ASSERT(insertionMode() == InFramesetMode);
+ if (token.name() == htmlTag) {
+ m_tree.insertHTMLHtmlStartTagInBody(token);
+ return;
+ }
+ if (token.name() == framesetTag) {
+ m_tree.insertHTMLElement(token);
+ return;
+ }
+ if (token.name() == frameTag) {
+ m_tree.insertSelfClosingHTMLElement(token);
+ return;
+ }
+ if (token.name() == noframesTag) {
+ processStartTagForInHead(token);
+ return;
+ }
+ parseError(token);
+ break;
+ case AfterFramesetMode:
+ case AfterAfterFramesetMode:
+ ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
+ if (token.name() == htmlTag) {
+ m_tree.insertHTMLHtmlStartTagInBody(token);
+ return;
+ }
+ if (token.name() == noframesTag) {
+ processStartTagForInHead(token);
+ return;
+ }
+ parseError(token);
+ break;
+ case InSelectInTableMode:
+ ASSERT(insertionMode() == InSelectInTableMode);
+ if (token.name() == captionTag
+ || token.name() == tableTag
+ || isTableBodyContextTag(token.name())
+ || token.name() == trTag
+ || isTableCellContextTag(token.name())) {
+ parseError(token);
+ AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
+ processEndTag(endSelect);
+ reprocessStartTag(token);
+ return;
+ }
+ // Fall through
+ case InSelectMode:
+ ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
+ if (token.name() == htmlTag) {
+ m_tree.insertHTMLHtmlStartTagInBody(token);
+ return;
+ }
+ if (token.name() == optionTag) {
+ if (m_tree.currentElement()->hasTagName(optionTag)) {
+ AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
+ processEndTag(endOption);
+ }
+ m_tree.insertHTMLElement(token);
+ return;
+ }
+ if (token.name() == optgroupTag) {
+ if (m_tree.currentElement()->hasTagName(optionTag)) {
+ AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
+ processEndTag(endOption);
+ }
+ if (m_tree.currentElement()->hasTagName(optgroupTag)) {
+ AtomicHTMLToken endOptgroup(HTMLToken::EndTag, optgroupTag.localName());
+ processEndTag(endOptgroup);
+ }
+ m_tree.insertHTMLElement(token);
+ return;
+ }
+ if (token.name() == selectTag) {
+ parseError(token);
+ AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
+ processEndTag(endSelect);
+ return;
+ }
+ if (token.name() == inputTag
+ || token.name() == keygenTag
+ || token.name() == textareaTag) {
+ parseError(token);
+ if (!m_tree.openElements()->inSelectScope(selectTag)) {
+ ASSERT(isParsingFragment());
+ return;
+ }
+ AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
+ processEndTag(endSelect);
+ reprocessStartTag(token);
+ return;
+ }
+ if (token.name() == scriptTag) {
+ bool didProcess = processStartTagForInHead(token);
+ ASSERT_UNUSED(didProcess, didProcess);
+ return;
+ }
+ break;
+ case InTableTextMode:
+ defaultForInTableText();
+ processStartTag(token);
+ break;
+ case InForeignContentMode: {
+ if (shouldProcessForeignContentUsingInBodyInsertionMode(token, m_tree.currentElement())) {
+ processForeignContentUsingInBodyModeAndResetMode(token);
+ return;
+ }
+ if (token.name() == bTag
+ || token.name() == bigTag
+ || token.name() == blockquoteTag
+ || token.name() == bodyTag
+ || token.name() == brTag
+ || token.name() == centerTag
+ || token.name() == codeTag
+ || token.name() == ddTag
+ || token.name() == divTag
+ || token.name() == dlTag
+ || token.name() == dtTag
+ || token.name() == emTag
+ || token.name() == embedTag
+ || isNumberedHeaderTag(token.name())
+ || token.name() == headTag
+ || token.name() == hrTag
+ || token.name() == iTag
+ || token.name() == imgTag
+ || token.name() == liTag
+ || token.name() == listingTag
+ || token.name() == menuTag
+ || token.name() == metaTag
+ || token.name() == nobrTag
+ || token.name() == olTag
+ || token.name() == pTag
+ || token.name() == preTag
+ || token.name() == rubyTag
+ || token.name() == sTag
+ || token.name() == smallTag
+ || token.name() == spanTag
+ || token.name() == strongTag
+ || token.name() == strikeTag
+ || token.name() == subTag
+ || token.name() == supTag
+ || token.name() == tableTag
+ || token.name() == ttTag
+ || token.name() == uTag
+ || token.name() == ulTag
+ || token.name() == varTag
+ || (token.name() == fontTag && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)))) {
+ parseError(token);
+ m_tree.openElements()->popUntilForeignContentScopeMarker();
+ resetInsertionModeAppropriately();
+ reprocessStartTag(token);
+ return;
+ }
+ const AtomicString& currentNamespace = m_tree.currentElement()->namespaceURI();
+ if (currentNamespace == MathMLNames::mathmlNamespaceURI)
+ adjustMathMLAttributes(token);
+ if (currentNamespace == SVGNames::svgNamespaceURI) {
+ adjustSVGTagNameCase(token);
+ adjustSVGAttributes(token);
+ }
+ adjustForeignAttributes(token);
+ m_tree.insertForeignElement(token, currentNamespace);
+ break;
+ }
+ case TextMode:
+ ASSERT_NOT_REACHED();
+ break;
+ }
+}
+
+bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLToken::EndTag);
+ ASSERT(token.name() == bodyTag);
+ if (!m_tree.openElements()->inScope(bodyTag.localName())) {
+ parseError(token);
+ return false;
+ }
+ notImplemented(); // Emit a more specific parse error based on stack contents.
+ setInsertionMode(AfterBodyMode);
+ return true;
+}
+
+void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLToken::EndTag);
+ HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
+ while (1) {
+ Element* node = record->element();
+ if (node->hasLocalName(token.name())) {
+ m_tree.generateImpliedEndTags();
+ if (!m_tree.currentElement()->hasLocalName(token.name())) {
+ parseError(token);
+ // FIXME: This is either a bug in the spec, or a bug in our
+ // implementation. Filed a bug with HTML5:
+ // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10080
+ // We might have already popped the node for the token in
+ // generateImpliedEndTags, just abort.
+ if (!m_tree.openElements()->contains(node))
+ return;
+ }
+ m_tree.openElements()->popUntilPopped(node);
+ return;
+ }
+ if (isSpecialNode(node)) {
+ parseError(token);
+ return;
+ }
+ record = record->next();
+ }
+}
+
+// FIXME: This probably belongs on HTMLElementStack.
+HTMLElementStack::ElementRecord* HTMLTreeBuilder::furthestBlockForFormattingElement(Element* formattingElement)
+{
+ HTMLElementStack::ElementRecord* furthestBlock = 0;
+ HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
+ for (; record; record = record->next()) {
+ if (record->element() == formattingElement)
+ return furthestBlock;
+ if (isSpecialNode(record->element()))
+ furthestBlock = record;
+ }
+ ASSERT_NOT_REACHED();
+ return 0;
+}
+
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
+void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
+{
+ // The adoption agency algorithm is N^2. We limit the number of iterations
+ // to stop from hanging the whole browser. This limit is copied from the
+ // legacy tree builder and might need to be tweaked in the future.
+ static const int adoptionAgencyIterationLimit = 10;
+
+ for (int i = 0; i < adoptionAgencyIterationLimit; ++i) {
+ // 1.
+ Element* formattingElement = m_tree.activeFormattingElements()->closestElementInScopeWithName(token.name());
+ if (!formattingElement || ((m_tree.openElements()->contains(formattingElement)) && !m_tree.openElements()->inScope(formattingElement))) {
+ parseError(token);
+ notImplemented(); // Check the stack of open elements for a more specific parse error.
+ return;
+ }
+ HTMLElementStack::ElementRecord* formattingElementRecord = m_tree.openElements()->find(formattingElement);
+ if (!formattingElementRecord) {
+ parseError(token);
+ m_tree.activeFormattingElements()->remove(formattingElement);
+ return;
+ }
+ if (formattingElement != m_tree.currentElement())
+ parseError(token);
+ // 2.
+ HTMLElementStack::ElementRecord* furthestBlock = furthestBlockForFormattingElement(formattingElement);
+ // 3.
+ if (!furthestBlock) {
+ m_tree.openElements()->popUntilPopped(formattingElement);
+ m_tree.activeFormattingElements()->remove(formattingElement);
+ return;
+ }
+ // 4.
+ ASSERT(furthestBlock->isAbove(formattingElementRecord));
+ Element* commonAncestor = formattingElementRecord->next()->element();
+ // 5.
+ HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement);
+ // 6.
+ HTMLElementStack::ElementRecord* node = furthestBlock;
+ HTMLElementStack::ElementRecord* nextNode = node->next();
+ HTMLElementStack::ElementRecord* lastNode = furthestBlock;
+ for (int i = 0; i < adoptionAgencyIterationLimit; ++i) {
+ // 6.1
+ node = nextNode;
+ ASSERT(node);
+ nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 6.2.
+ // 6.2
+ if (!m_tree.activeFormattingElements()->contains(node->element())) {
+ m_tree.openElements()->remove(node->element());
+ node = 0;
+ continue;
+ }
+ // 6.3
+ if (node == formattingElementRecord)
+ break;
+ // 6.5
+ RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(node);
+ HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements()->find(node->element());
+ nodeEntry->replaceElement(newElement.get());
+ node->replaceElement(newElement.release());
+ // 6.4 -- Intentionally out of order to handle the case where node
+ // was replaced in 6.5.
+ // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10096
+ if (lastNode == furthestBlock)
+ bookmark.moveToAfter(nodeEntry);
+ // 6.6
+ if (Element* parent = lastNode->element()->parentElement())
+ parent->parserRemoveChild(lastNode->element());
+ node->element()->parserAddChild(lastNode->element());
+ if (lastNode->element()->parentElement()->attached() && !lastNode->element()->attached())
+ lastNode->element()->lazyAttach();
+ // 6.7
+ lastNode = node;
+ }
+ // 7
+ const AtomicString& commonAncestorTag = commonAncestor->localName();
+ if (Element* parent = lastNode->element()->parentElement())
+ parent->parserRemoveChild(lastNode->element());
+ // FIXME: If this moves to HTMLConstructionSite, this check should use
+ // causesFosterParenting(tagName) instead.
+ if (commonAncestorTag == tableTag
+ || commonAncestorTag == trTag
+ || isTableBodyContextTag(commonAncestorTag))
+ m_tree.fosterParent(lastNode->element());
+ else {
+ commonAncestor->parserAddChild(lastNode->element());
+ if (lastNode->element()->parentElement()->attached() && !lastNode->element()->attached())
+ lastNode->element()->lazyAttach();
+ }
+ // 8
+ RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(formattingElementRecord);
+ // 9
+ newElement->takeAllChildrenFrom(furthestBlock->element());
+ // 10
+ Element* furthestBlockElement = furthestBlock->element();
+ // FIXME: All this creation / parserAddChild / attach business should
+ // be in HTMLConstructionSite. My guess is that steps 8--12
+ // should all be in some HTMLConstructionSite function.
+ furthestBlockElement->parserAddChild(newElement);
+ if (furthestBlockElement->attached() && !newElement->attached()) {
+ // Notice that newElement might already be attached if, for example, one of the reparented
+ // children is a style element, which attaches itself automatically.
+ newElement->attach();
+ }
+ // 11
+ m_tree.activeFormattingElements()->swapTo(formattingElement, newElement.get(), bookmark);
+ // 12
+ m_tree.openElements()->remove(formattingElement);
+ m_tree.openElements()->insertAbove(newElement, furthestBlock);
+ }
+}
+
+void HTMLTreeBuilder::resetInsertionModeAppropriately()
+{
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
+ bool last = false;
+ HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
+ while (1) {
+ Element* node = nodeRecord->element();
+ if (node == m_tree.openElements()->bottom()) {
+ ASSERT(isParsingFragment());
+ last = true;
+ node = m_fragmentContext.contextElement();
+ }
+ if (node->hasTagName(selectTag)) {
+ ASSERT(isParsingFragment());
+ return setInsertionMode(InSelectMode);
+ }
+ if (node->hasTagName(tdTag) || node->hasTagName(thTag))
+ return setInsertionMode(InCellMode);
+ if (node->hasTagName(trTag))
+ return setInsertionMode(InRowMode);
+ if (node->hasTagName(tbodyTag) || node->hasTagName(theadTag) || node->hasTagName(tfootTag))
+ return setInsertionMode(InTableBodyMode);
+ if (node->hasTagName(captionTag))
+ return setInsertionMode(InCaptionMode);
+ if (node->hasTagName(colgroupTag)) {
+ ASSERT(isParsingFragment());
+ return setInsertionMode(InColumnGroupMode);
+ }
+ if (node->hasTagName(tableTag))
+ return setInsertionMode(InTableMode);
+ if (node->hasTagName(headTag)) {
+ ASSERT(isParsingFragment());
+ return setInsertionMode(InBodyMode);
+ }
+ if (node->hasTagName(bodyTag))
+ return setInsertionMode(InBodyMode);
+ if (node->hasTagName(framesetTag)) {
+ ASSERT(isParsingFragment());
+ return setInsertionMode(InFramesetMode);
+ }
+ if (node->hasTagName(htmlTag)) {
+ ASSERT(isParsingFragment());
+ return setInsertionMode(BeforeHeadMode);
+ }
+ if (node->namespaceURI() == SVGNames::svgNamespaceURI
+ || node->namespaceURI() == MathMLNames::mathmlNamespaceURI)
+ return setInsertionMode(InForeignContentMode);
+ if (last) {
+ ASSERT(isParsingFragment());
+ return setInsertionMode(InBodyMode);
+ }
+ nodeRecord = nodeRecord->next();
+ }
+}
+
+void HTMLTreeBuilder::processEndTagForInTableBody(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLToken::EndTag);
+ if (isTableBodyContextTag(token.name())) {
+ if (!m_tree.openElements()->inTableScope(token.name())) {
+ parseError(token);
+ return;
+ }
+ m_tree.openElements()->popUntilTableBodyScopeMarker();
+ m_tree.openElements()->pop();
+ setInsertionMode(InTableMode);
+ return;
+ }
+ if (token.name() == tableTag) {
+ // FIXME: This is slow.
+ if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
+ ASSERT(isParsingFragment());
+ parseError(token);
+ return;
+ }
+ m_tree.openElements()->popUntilTableBodyScopeMarker();
+ ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
+ processFakeEndTag(m_tree.currentElement()->tagQName());
+ reprocessEndTag(token);
+ return;
+ }
+ if (token.name() == bodyTag
+ || isCaptionColOrColgroupTag(token.name())
+ || token.name() == htmlTag
+ || isTableCellContextTag(token.name())
+ || token.name() == trTag) {
+ parseError(token);
+ return;
+ }
+ processEndTagForInTable(token);
+}
+
+void HTMLTreeBuilder::processEndTagForInRow(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLToken::EndTag);
+ if (token.name() == trTag) {
+ processTrEndTagForInRow();
+ return;
+ }
+ if (token.name() == tableTag) {
+ if (!processTrEndTagForInRow()) {
+ ASSERT(isParsingFragment());
+ return;
+ }
+ ASSERT(insertionMode() == InTableBodyMode);
+ reprocessEndTag(token);
+ return;
+ }
+ if (isTableBodyContextTag(token.name())) {
+ if (!m_tree.openElements()->inTableScope(token.name())) {
+ parseError(token);
+ return;
+ }
+ processFakeEndTag(trTag);
+ ASSERT(insertionMode() == InTableBodyMode);
+ reprocessEndTag(token);
+ return;
+ }
+ if (token.name() == bodyTag
+ || isCaptionColOrColgroupTag(token.name())
+ || token.name() == htmlTag
+ || isTableCellContextTag(token.name())) {
+ parseError(token);
+ return;
+ }
+ processEndTagForInTable(token);
+}
+
+void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLToken::EndTag);
+ if (isTableCellContextTag(token.name())) {
+ if (!m_tree.openElements()->inTableScope(token.name())) {
+ parseError(token);
+ return;
+ }
+ m_tree.generateImpliedEndTags();
+ if (!m_tree.currentElement()->hasLocalName(token.name()))
+ parseError(token);
+ m_tree.openElements()->popUntilPopped(token.name());
+ m_tree.activeFormattingElements()->clearToLastMarker();
+ setInsertionMode(InRowMode);
+ return;
+ }
+ if (token.name() == bodyTag
+ || isCaptionColOrColgroupTag(token.name())
+ || token.name() == htmlTag) {
+ parseError(token);
+ return;
+ }
+ if (token.name() == tableTag
+ || token.name() == trTag
+ || isTableBodyContextTag(token.name())) {
+ if (!m_tree.openElements()->inTableScope(token.name())) {
+ ASSERT(isTableBodyContextTag(token.name()) || isParsingFragment());
+ parseError(token);
+ return;
+ }
+ closeTheCell();
+ reprocessEndTag(token);
+ return;
+ }
+ processEndTagForInBody(token);
+}
+
+void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLToken::EndTag);
+ if (token.name() == bodyTag) {
+ processBodyEndTagForInBody(token);
+ return;
+ }
+ if (token.name() == htmlTag) {
+ AtomicHTMLToken endBody(HTMLToken::EndTag, bodyTag.localName());
+ if (processBodyEndTagForInBody(endBody))
+ reprocessEndTag(token);
+ return;
+ }
+ if (token.name() == addressTag
+ || token.name() == articleTag
+ || token.name() == asideTag
+ || token.name() == blockquoteTag
+ || token.name() == buttonTag
+ || token.name() == centerTag
+ || token.name() == detailsTag
+ || token.name() == dirTag
+ || token.name() == divTag
+ || token.name() == dlTag
+ || token.name() == fieldsetTag
+ || token.name() == figcaptionTag
+ || token.name() == figureTag
+ || token.name() == footerTag
+ || token.name() == headerTag
+ || token.name() == hgroupTag
+ || token.name() == listingTag
+ || token.name() == menuTag
+ || token.name() == navTag
+ || token.name() == olTag
+ || token.name() == preTag
+ || token.name() == sectionTag
+ || token.name() == summaryTag
+ || token.name() == ulTag) {
+ if (!m_tree.openElements()->inScope(token.name())) {
+ parseError(token);
+ return;
+ }
+ m_tree.generateImpliedEndTags();
+ if (!m_tree.currentElement()->hasLocalName(token.name()))
+ parseError(token);
+ m_tree.openElements()->popUntilPopped(token.name());
+ return;
+ }
+ if (token.name() == formTag) {
+ RefPtr<Element> node = m_tree.takeForm();
+ if (!node || !m_tree.openElements()->inScope(node.get())) {
+ parseError(token);
+ return;
+ }
+ m_tree.generateImpliedEndTags();
+ if (m_tree.currentElement() != node.get())
+ parseError(token);
+ m_tree.openElements()->remove(node.get());
+ }
+ if (token.name() == pTag) {
+ if (!m_tree.openElements()->inButtonScope(token.name())) {
+ parseError(token);
+ processFakeStartTag(pTag);
+ ASSERT(m_tree.openElements()->inScope(token.name()));
+ reprocessEndTag(token);
+ return;
+ }
+ m_tree.generateImpliedEndTagsWithExclusion(token.name());
+ if (!m_tree.currentElement()->hasLocalName(token.name()))
+ parseError(token);
+ m_tree.openElements()->popUntilPopped(token.name());
+ return;
+ }
+ if (token.name() == liTag) {
+ if (!m_tree.openElements()->inListItemScope(token.name())) {
+ parseError(token);
+ return;
+ }
+ m_tree.generateImpliedEndTagsWithExclusion(token.name());
+ if (!m_tree.currentElement()->hasLocalName(token.name()))
+ parseError(token);
+ m_tree.openElements()->popUntilPopped(token.name());
+ return;
+ }
+ if (token.name() == ddTag
+ || token.name() == dtTag) {
+ if (!m_tree.openElements()->inScope(token.name())) {
+ parseError(token);
+ return;
+ }
+ m_tree.generateImpliedEndTagsWithExclusion(token.name());
+ if (!m_tree.currentElement()->hasLocalName(token.name()))
+ parseError(token);
+ m_tree.openElements()->popUntilPopped(token.name());
+ return;
+ }
+ if (isNumberedHeaderTag(token.name())) {
+ if (!m_tree.openElements()->hasNumberedHeaderElementInScope()) {
+ parseError(token);
+ return;
+ }
+ m_tree.generateImpliedEndTags();
+ if (!m_tree.currentElement()->hasLocalName(token.name()))
+ parseError(token);
+ m_tree.openElements()->popUntilNumberedHeaderElementPopped();
+ return;
+ }
+ if (isFormattingTag(token.name())) {
+ callTheAdoptionAgency(token);
+ return;
+ }
+ if (token.name() == appletTag
+ || token.name() == marqueeTag
+ || token.name() == objectTag) {
+ if (!m_tree.openElements()->inScope(token.name())) {
+ parseError(token);
+ return;
+ }
+ m_tree.generateImpliedEndTags();
+ if (!m_tree.currentElement()->hasLocalName(token.name()))
+ parseError(token);
+ m_tree.openElements()->popUntilPopped(token.name());
+ m_tree.activeFormattingElements()->clearToLastMarker();
+ return;
+ }
+ if (token.name() == brTag) {
+ parseError(token);
+ processFakeStartTag(brTag);
+ return;
+ }
+ processAnyOtherEndTagForInBody(token);
+}
+
+bool HTMLTreeBuilder::processCaptionEndTagForInCaption()
+{
+ if (!m_tree.openElements()->inTableScope(captionTag.localName())) {
+ ASSERT(isParsingFragment());
+ // FIXME: parse error
+ return false;
+ }
+ m_tree.generateImpliedEndTags();
+ // FIXME: parse error if (!m_tree.currentElement()->hasTagName(captionTag))
+ m_tree.openElements()->popUntilPopped(captionTag.localName());
+ m_tree.activeFormattingElements()->clearToLastMarker();
+ setInsertionMode(InTableMode);
+ return true;
+}
+
+bool HTMLTreeBuilder::processTrEndTagForInRow()
+{
+ if (!m_tree.openElements()->inTableScope(trTag.localName())) {
+ ASSERT(isParsingFragment());
+ // FIXME: parse error
+ return false;
+ }
+ m_tree.openElements()->popUntilTableRowScopeMarker();
+ ASSERT(m_tree.currentElement()->hasTagName(trTag));
+ m_tree.openElements()->pop();
+ setInsertionMode(InTableBodyMode);
+ return true;
+}
+
+bool HTMLTreeBuilder::processTableEndTagForInTable()
+{
+ if (!m_tree.openElements()->inTableScope(tableTag)) {
+ ASSERT(isParsingFragment());
+ // FIXME: parse error.
+ return false;
+ }
+ m_tree.openElements()->popUntilPopped(tableTag.localName());
+ resetInsertionModeAppropriately();
+ return true;
+}
+
+void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLToken::EndTag);
+ if (token.name() == tableTag) {
+ processTableEndTagForInTable();
+ return;
+ }
+ if (token.name() == bodyTag
+ || isCaptionColOrColgroupTag(token.name())
+ || token.name() == htmlTag
+ || isTableBodyContextTag(token.name())
+ || isTableCellContextTag(token.name())
+ || token.name() == trTag) {
+ parseError(token);
+ return;
+ }
+ // Is this redirection necessary here?
+ HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
+ processEndTagForInBody(token);
+}
+
+void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLToken::EndTag);
+ switch (insertionMode()) {
+ case InitialMode:
+ ASSERT(insertionMode() == InitialMode);
+ defaultForInitial();
+ // Fall through.
+ case BeforeHTMLMode:
+ ASSERT(insertionMode() == BeforeHTMLMode);
+ if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
+ parseError(token);
+ return;
+ }
+ defaultForBeforeHTML();
+ // Fall through.
+ case BeforeHeadMode:
+ ASSERT(insertionMode() == BeforeHeadMode);
+ if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
+ parseError(token);
+ return;
+ }
+ defaultForBeforeHead();
+ // Fall through.
+ case InHeadMode:
+ ASSERT(insertionMode() == InHeadMode);
+ if (token.name() == headTag) {
+ m_tree.openElements()->popHTMLHeadElement();
+ setInsertionMode(AfterHeadMode);
+ return;
+ }
+ if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
+ parseError(token);
+ return;
+ }
+ defaultForInHead();
+ // Fall through.
+ case AfterHeadMode:
+ ASSERT(insertionMode() == AfterHeadMode);
+ if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
+ parseError(token);
+ return;
+ }
+ defaultForAfterHead();
+ // Fall through
+ case InBodyMode:
+ ASSERT(insertionMode() == InBodyMode);
+ processEndTagForInBody(token);
+ break;
+ case InTableMode:
+ ASSERT(insertionMode() == InTableMode);
+ processEndTagForInTable(token);
+ break;
+ case InCaptionMode:
+ ASSERT(insertionMode() == InCaptionMode);
+ if (token.name() == captionTag) {
+ processCaptionEndTagForInCaption();
+ return;
+ }
+ if (token.name() == tableTag) {
+ parseError(token);
+ if (!processCaptionEndTagForInCaption()) {
+ ASSERT(isParsingFragment());
+ return;
+ }
+ reprocessEndTag(token);
+ return;
+ }
+ if (token.name() == bodyTag
+ || token.name() == colTag
+ || token.name() == colgroupTag
+ || token.name() == htmlTag
+ || isTableBodyContextTag(token.name())
+ || isTableCellContextTag(token.name())
+ || token.name() == trTag) {
+ parseError(token);
+ return;
+ }
+ processEndTagForInBody(token);
+ break;
+ case InColumnGroupMode:
+ ASSERT(insertionMode() == InColumnGroupMode);
+ if (token.name() == colgroupTag) {
+ processColgroupEndTagForInColumnGroup();
+ return;
+ }
+ if (token.name() == colTag) {
+ parseError(token);
+ return;
+ }
+ if (!processColgroupEndTagForInColumnGroup()) {
+ ASSERT(isParsingFragment());
+ return;
+ }
+ reprocessEndTag(token);
+ break;
+ case InRowMode:
+ ASSERT(insertionMode() == InRowMode);
+ processEndTagForInRow(token);
+ break;
+ case InCellMode:
+ ASSERT(insertionMode() == InCellMode);
+ processEndTagForInCell(token);
+ break;
+ case InTableBodyMode:
+ ASSERT(insertionMode() == InTableBodyMode);
+ processEndTagForInTableBody(token);
+ break;
+ case AfterBodyMode:
+ ASSERT(insertionMode() == AfterBodyMode);
+ if (token.name() == htmlTag) {
+ if (isParsingFragment()) {
+ parseError(token);
+ return;
+ }
+ setInsertionMode(AfterAfterBodyMode);
+ return;
+ }
+ prepareToReprocessToken();
+ // Fall through.
+ case AfterAfterBodyMode:
+ ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
+ parseError(token);
+ setInsertionMode(InBodyMode);
+ reprocessEndTag(token);
+ break;
+ case InHeadNoscriptMode:
+ ASSERT(insertionMode() == InHeadNoscriptMode);
+ if (token.name() == noscriptTag) {
+ ASSERT(m_tree.currentElement()->hasTagName(noscriptTag));
+ m_tree.openElements()->pop();
+ ASSERT(m_tree.currentElement()->hasTagName(headTag));
+ setInsertionMode(InHeadMode);
+ return;
+ }
+ if (token.name() != brTag) {
+ parseError(token);
+ return;
+ }
+ defaultForInHeadNoscript();
+ processToken(token);
+ break;
+ case TextMode:
+ if (token.name() == scriptTag) {
+ // Pause ourselves so that parsing stops until the script can be processed by the caller.
+ m_isPaused = true;
+ ASSERT(m_tree.currentElement()->hasTagName(scriptTag));
+ m_scriptToProcess = m_tree.currentElement();
+ m_scriptToProcessStartPosition = WTF::toOneBasedTextPosition(m_lastScriptElementStartPosition);
+ m_tree.openElements()->pop();
+ if (isParsingFragment() && m_fragmentContext.scriptingPermission() == FragmentScriptingNotAllowed)
+ m_scriptToProcess->removeAllChildren();
+ setInsertionMode(m_originalInsertionMode);
+
+ // This token will not have been created by the tokenizer if a
+ // self-closing script tag was encountered and pre-HTML5 parser
+ // quirks are enabled. We must set the tokenizer's state to
+ // DataState explicitly if the tokenizer didn't have a chance to.
+ ASSERT(m_parser->tokenizer()->state() == HTMLTokenizer::DataState || m_usePreHTML5ParserQuirks);
+ m_parser->tokenizer()->setState(HTMLTokenizer::DataState);
+ return;
+ }
+ m_tree.openElements()->pop();
+ setInsertionMode(m_originalInsertionMode);
+ break;
+ case InFramesetMode:
+ ASSERT(insertionMode() == InFramesetMode);
+ if (token.name() == framesetTag) {
+ if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) {
+ parseError(token);
+ return;
+ }
+ m_tree.openElements()->pop();
+ if (!isParsingFragment() && !m_tree.currentElement()->hasTagName(framesetTag))
+ setInsertionMode(AfterFramesetMode);
+ return;
+ }
+ break;
+ case AfterFramesetMode:
+ ASSERT(insertionMode() == AfterFramesetMode);
+ if (token.name() == htmlTag) {
+ setInsertionMode(AfterAfterFramesetMode);
+ return;
+ }
+ // Fall through.
+ case AfterAfterFramesetMode:
+ ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
+ parseError(token);
+ break;
+ case InSelectInTableMode:
+ ASSERT(insertionMode() == InSelectInTableMode);
+ if (token.name() == captionTag
+ || token.name() == tableTag
+ || isTableBodyContextTag(token.name())
+ || token.name() == trTag
+ || isTableCellContextTag(token.name())) {
+ parseError(token);
+ if (m_tree.openElements()->inTableScope(token.name())) {
+ AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
+ processEndTag(endSelect);
+ reprocessEndTag(token);
+ }
+ return;
+ }
+ // Fall through.
+ case InSelectMode:
+ ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
+ if (token.name() == optgroupTag) {
+ if (m_tree.currentElement()->hasTagName(optionTag) && m_tree.oneBelowTop()->hasTagName(optgroupTag))
+ processFakeEndTag(optionTag);
+ if (m_tree.currentElement()->hasTagName(optgroupTag)) {
+ m_tree.openElements()->pop();
+ return;
+ }
+ parseError(token);
+ return;
+ }
+ if (token.name() == optionTag) {
+ if (m_tree.currentElement()->hasTagName(optionTag)) {
+ m_tree.openElements()->pop();
+ return;
+ }
+ parseError(token);
+ return;
+ }
+ if (token.name() == selectTag) {
+ if (!m_tree.openElements()->inSelectScope(token.name())) {
+ ASSERT(isParsingFragment());
+ parseError(token);
+ return;
+ }
+ m_tree.openElements()->popUntilPopped(selectTag.localName());
+ resetInsertionModeAppropriately();
+ return;
+ }
+ break;
+ case InTableTextMode:
+ defaultForInTableText();
+ processEndTag(token);
+ break;
+ case InForeignContentMode:
+ if (token.name() == SVGNames::scriptTag && m_tree.currentElement()->hasTagName(SVGNames::scriptTag)) {
+ notImplemented();
+ return;
+ }
+ if (m_tree.currentElement()->namespaceURI() != xhtmlNamespaceURI) {
+ // FIXME: This code just wants an Element* iterator, instead of an ElementRecord*
+ HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
+ if (!nodeRecord->element()->hasLocalName(token.name()))
+ parseError(token);
+ while (1) {
+ if (nodeRecord->element()->hasLocalName(token.name())) {
+ m_tree.openElements()->popUntilPopped(nodeRecord->element());
+ resetForeignInsertionMode();
+ return;
+ }
+ nodeRecord = nodeRecord->next();
+ if (nodeRecord->element()->namespaceURI() == xhtmlNamespaceURI)
+ break;
+ }
+ }
+ // Any other end tag (also the last two steps of "An end tag, if the current node is not an element in the HTML namespace."
+ processForeignContentUsingInBodyModeAndResetMode(token);
+ break;
+ }
+}
+
+void HTMLTreeBuilder::prepareToReprocessToken()
+{
+ if (m_hasPendingForeignInsertionModeSteps) {
+ resetForeignInsertionMode();
+ m_hasPendingForeignInsertionModeSteps = false;
+ }
+}
+
+void HTMLTreeBuilder::reprocessStartTag(AtomicHTMLToken& token)
+{
+ prepareToReprocessToken();
+ processStartTag(token);
+}
+
+void HTMLTreeBuilder::reprocessEndTag(AtomicHTMLToken& token)
+{
+ prepareToReprocessToken();
+ processEndTag(token);
+}
+
+class HTMLTreeBuilder::FakeInsertionMode : public Noncopyable {
+public:
+ FakeInsertionMode(HTMLTreeBuilder* treeBuilder, InsertionMode mode)
+ : m_treeBuilder(treeBuilder)
+ , m_originalMode(treeBuilder->insertionMode())
+ {
+ m_treeBuilder->setFakeInsertionMode(mode);
+ }
+
+ ~FakeInsertionMode()
+ {
+ if (m_treeBuilder->isFakeInsertionMode())
+ m_treeBuilder->setInsertionMode(m_originalMode);
+ }
+
+private:
+ HTMLTreeBuilder* m_treeBuilder;
+ InsertionMode m_originalMode;
+};
+
+void HTMLTreeBuilder::processForeignContentUsingInBodyModeAndResetMode(AtomicHTMLToken& token)
+{
+ m_hasPendingForeignInsertionModeSteps = true;
+ {
+ FakeInsertionMode fakeMode(this, InBodyMode);
+ processToken(token);
+ }
+ if (m_hasPendingForeignInsertionModeSteps)
+ resetForeignInsertionMode();
+}
+
+void HTMLTreeBuilder::resetForeignInsertionMode()
+{
+ if (insertionMode() == InForeignContentMode)
+ resetInsertionModeAppropriately();
+}
+
+void HTMLTreeBuilder::processComment(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLToken::Comment);
+ if (m_insertionMode == InitialMode
+ || m_insertionMode == BeforeHTMLMode
+ || m_insertionMode == AfterAfterBodyMode
+ || m_insertionMode == AfterAfterFramesetMode) {
+ m_tree.insertCommentOnDocument(token);
+ return;
+ }
+ if (m_insertionMode == AfterBodyMode) {
+ m_tree.insertCommentOnHTMLHtmlElement(token);
+ return;
+ }
+ if (m_insertionMode == InTableTextMode) {
+ defaultForInTableText();
+ processComment(token);
+ return;
+ }
+ m_tree.insertComment(token);
+}
+
+void HTMLTreeBuilder::processCharacter(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLToken::Character);
+ ExternalCharacterTokenBuffer buffer(token);
+ processCharacterBuffer(buffer);
+}
+
+void HTMLTreeBuilder::processCharacterBuffer(ExternalCharacterTokenBuffer& buffer)
+{
+ReprocessBuffer:
+ switch (insertionMode()) {
+ case InitialMode: {
+ ASSERT(insertionMode() == InitialMode);
+ buffer.skipLeadingWhitespace();
+ if (buffer.isEmpty())
+ return;
+ defaultForInitial();
+ // Fall through.
+ }
+ case BeforeHTMLMode: {
+ ASSERT(insertionMode() == BeforeHTMLMode);
+ buffer.skipLeadingWhitespace();
+ if (buffer.isEmpty())
+ return;
+ defaultForBeforeHTML();
+ // Fall through.
+ }
+ case BeforeHeadMode: {
+ ASSERT(insertionMode() == BeforeHeadMode);
+ buffer.skipLeadingWhitespace();
+ if (buffer.isEmpty())
+ return;
+ defaultForBeforeHead();
+ // Fall through.
+ }
+ case InHeadMode: {
+ ASSERT(insertionMode() == InHeadMode);
+ String leadingWhitespace = buffer.takeLeadingWhitespace();
+ if (!leadingWhitespace.isEmpty())
+ m_tree.insertTextNode(leadingWhitespace);
+ if (buffer.isEmpty())
+ return;
+ defaultForInHead();
+ // Fall through.
+ }
+ case AfterHeadMode: {
+ ASSERT(insertionMode() == AfterHeadMode);
+ String leadingWhitespace = buffer.takeLeadingWhitespace();
+ if (!leadingWhitespace.isEmpty())
+ m_tree.insertTextNode(leadingWhitespace);
+ if (buffer.isEmpty())
+ return;
+ defaultForAfterHead();
+ // Fall through.
+ }
+ case InBodyMode:
+ case InCaptionMode:
+ case InCellMode: {
+ ASSERT(insertionMode() == InBodyMode || insertionMode() == InCaptionMode || insertionMode() == InCellMode);
+ m_tree.reconstructTheActiveFormattingElements();
+ String characters = buffer.takeRemaining();
+ m_tree.insertTextNode(characters);
+ if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
+ m_framesetOk = false;
+ break;
+ }
+ case InTableMode:
+ case InTableBodyMode:
+ case InRowMode: {
+ ASSERT(insertionMode() == InTableMode || insertionMode() == InTableBodyMode || insertionMode() == InRowMode);
+ ASSERT(m_pendingTableCharacters.isEmpty());
+ m_originalInsertionMode = m_insertionMode;
+ setInsertionMode(InTableTextMode);
+ prepareToReprocessToken();
+ // Fall through.
+ }
+ case InTableTextMode: {
+ buffer.giveRemainingTo(m_pendingTableCharacters);
+ break;
+ }
+ case InColumnGroupMode: {
+ ASSERT(insertionMode() == InColumnGroupMode);
+ String leadingWhitespace = buffer.takeLeadingWhitespace();
+ if (!leadingWhitespace.isEmpty())
+ m_tree.insertTextNode(leadingWhitespace);
+ if (buffer.isEmpty())
+ return;
+ if (!processColgroupEndTagForInColumnGroup()) {
+ ASSERT(isParsingFragment());
+ // The spec tells us to drop these characters on the floor.
+ buffer.takeLeadingNonWhitespace();
+ if (buffer.isEmpty())
+ return;
+ }
+ prepareToReprocessToken();
+ goto ReprocessBuffer;
+ }
+ case AfterBodyMode:
+ case AfterAfterBodyMode: {
+ ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
+ // FIXME: parse error
+ setInsertionMode(InBodyMode);
+ prepareToReprocessToken();
+ goto ReprocessBuffer;
+ break;
+ }
+ case TextMode: {
+ ASSERT(insertionMode() == TextMode);
+ m_tree.insertTextNode(buffer.takeRemaining());
+ break;
+ }
+ case InHeadNoscriptMode: {
+ ASSERT(insertionMode() == InHeadNoscriptMode);
+ String leadingWhitespace = buffer.takeLeadingWhitespace();
+ if (!leadingWhitespace.isEmpty())
+ m_tree.insertTextNode(leadingWhitespace);
+ if (buffer.isEmpty())
+ return;
+ defaultForInHeadNoscript();
+ goto ReprocessBuffer;
+ break;
+ }
+ case InFramesetMode:
+ case AfterFramesetMode: {
+ ASSERT(insertionMode() == InFramesetMode || insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
+ String leadingWhitespace = buffer.takeRemainingWhitespace();
+ if (!leadingWhitespace.isEmpty())
+ m_tree.insertTextNode(leadingWhitespace);
+ // FIXME: We should generate a parse error if we skipped over any
+ // non-whitespace characters.
+ break;
+ }
+ case InSelectInTableMode:
+ case InSelectMode: {
+ ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
+ m_tree.insertTextNode(buffer.takeRemaining());
+ break;
+ }
+ case InForeignContentMode: {
+ ASSERT(insertionMode() == InForeignContentMode);
+ String characters = buffer.takeRemaining();
+ m_tree.insertTextNode(characters);
+ if (m_framesetOk && !isAllWhitespace(characters))
+ m_framesetOk = false;
+ break;
+ }
+ case AfterAfterFramesetMode: {
+ String leadingWhitespace = buffer.takeRemainingWhitespace();
+ if (!leadingWhitespace.isEmpty()) {
+ m_tree.reconstructTheActiveFormattingElements();
+ m_tree.insertTextNode(leadingWhitespace);
+ }
+ // FIXME: We should generate a parse error if we skipped over any
+ // non-whitespace characters.
+ break;
+ }
+ }
+}
+
+void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLToken::EndOfFile);
+ switch (insertionMode()) {
+ case InitialMode:
+ ASSERT(insertionMode() == InitialMode);
+ defaultForInitial();
+ // Fall through.
+ case BeforeHTMLMode:
+ ASSERT(insertionMode() == BeforeHTMLMode);
+ defaultForBeforeHTML();
+ // Fall through.
+ case BeforeHeadMode:
+ ASSERT(insertionMode() == BeforeHeadMode);
+ defaultForBeforeHead();
+ // Fall through.
+ case InHeadMode:
+ ASSERT(insertionMode() == InHeadMode);
+ defaultForInHead();
+ // Fall through.
+ case AfterHeadMode:
+ ASSERT(insertionMode() == AfterHeadMode);
+ defaultForAfterHead();
+ // Fall through
+ case InBodyMode:
+ case InCellMode:
+ case InCaptionMode:
+ case InRowMode:
+ ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode || insertionMode() == InCaptionMode || insertionMode() == InRowMode);
+ notImplemented(); // Emit parse error based on what elements are still open.
+ break;
+ case AfterBodyMode:
+ case AfterAfterBodyMode:
+ ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
+ break;
+ case InHeadNoscriptMode:
+ ASSERT(insertionMode() == InHeadNoscriptMode);
+ defaultForInHeadNoscript();
+ processEndOfFile(token);
+ return;
+ case AfterFramesetMode:
+ case AfterAfterFramesetMode:
+ ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
+ break;
+ case InFramesetMode:
+ case InTableMode:
+ case InTableBodyMode:
+ case InSelectInTableMode:
+ case InSelectMode:
+ ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode || insertionMode() == InTableMode || insertionMode() == InFramesetMode || insertionMode() == InTableBodyMode);
+ if (m_tree.currentElement() != m_tree.openElements()->htmlElement())
+ parseError(token);
+ break;
+ case InColumnGroupMode:
+ if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) {
+ ASSERT(isParsingFragment());
+ return; // FIXME: Should we break here instead of returning?
+ }
+ if (!processColgroupEndTagForInColumnGroup()) {
+ ASSERT(isParsingFragment());
+ return; // FIXME: Should we break here instead of returning?
+ }
+ prepareToReprocessToken();
+ processEndOfFile(token);
+ return;
+ case InForeignContentMode:
+ setInsertionMode(InBodyMode);
+ processEndOfFile(token);
+ return;
+ case InTableTextMode:
+ defaultForInTableText();
+ processEndOfFile(token);
+ return;
+ case TextMode:
+ parseError(token);
+ if (m_tree.currentElement()->hasTagName(scriptTag))
+ notImplemented(); // mark the script element as "already started".
+ m_tree.openElements()->pop();
+ setInsertionMode(m_originalInsertionMode);
+ prepareToReprocessToken();
+ processEndOfFile(token);
+ return;
+ }
+ ASSERT(m_tree.openElements()->top());
+ m_tree.openElements()->popAll();
+}
+
+void HTMLTreeBuilder::defaultForInitial()
+{
+ notImplemented();
+ if (!m_fragmentContext.fragment())
+ m_document->setCompatibilityMode(Document::QuirksMode);
+ // FIXME: parse error
+ setInsertionMode(BeforeHTMLMode);
+ prepareToReprocessToken();
+}
+
+void HTMLTreeBuilder::defaultForBeforeHTML()
+{
+ AtomicHTMLToken startHTML(HTMLToken::StartTag, htmlTag.localName());
+ m_tree.insertHTMLHtmlStartTagBeforeHTML(startHTML);
+ setInsertionMode(BeforeHeadMode);
+ prepareToReprocessToken();
+}
+
+void HTMLTreeBuilder::defaultForBeforeHead()
+{
+ AtomicHTMLToken startHead(HTMLToken::StartTag, headTag.localName());
+ processStartTag(startHead);
+ prepareToReprocessToken();
+}
+
+void HTMLTreeBuilder::defaultForInHead()
+{
+ AtomicHTMLToken endHead(HTMLToken::EndTag, headTag.localName());
+ processEndTag(endHead);
+ prepareToReprocessToken();
+}
+
+void HTMLTreeBuilder::defaultForInHeadNoscript()
+{
+ AtomicHTMLToken endNoscript(HTMLToken::EndTag, noscriptTag.localName());
+ processEndTag(endNoscript);
+ prepareToReprocessToken();
+}
+
+void HTMLTreeBuilder::defaultForAfterHead()
+{
+ AtomicHTMLToken startBody(HTMLToken::StartTag, bodyTag.localName());
+ processStartTag(startBody);
+ m_framesetOk = true;
+ prepareToReprocessToken();
+}
+
+void HTMLTreeBuilder::defaultForInTableText()
+{
+ String characters = String::adopt(m_pendingTableCharacters);
+ if (!isAllWhitespace(characters)) {
+ // FIXME: parse error
+ HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
+ m_tree.reconstructTheActiveFormattingElements();
+ m_tree.insertTextNode(characters);
+ m_framesetOk = false;
+ setInsertionMode(m_originalInsertionMode);
+ prepareToReprocessToken();
+ return;
+ }
+ m_tree.insertTextNode(characters);
+ setInsertionMode(m_originalInsertionMode);
+ prepareToReprocessToken();
+}
+
+bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLToken::StartTag);
+ if (token.name() == htmlTag) {
+ m_tree.insertHTMLHtmlStartTagInBody(token);
+ return true;
+ }
+ if (token.name() == baseTag
+ || token.name() == basefontTag
+ || token.name() == bgsoundTag
+ || token.name() == commandTag
+ || token.name() == linkTag
+ || token.name() == metaTag) {
+ m_tree.insertSelfClosingHTMLElement(token);
+ // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process().
+ return true;
+ }
+ if (token.name() == titleTag) {
+ processGenericRCDATAStartTag(token);
+ return true;
+ }
+ if (token.name() == noscriptTag) {
+ if (scriptEnabled(m_document->frame())) {
+ processGenericRawTextStartTag(token);
+ return true;
+ }
+ m_tree.insertHTMLElement(token);
+ setInsertionMode(InHeadNoscriptMode);
+ return true;
+ }
+ if (token.name() == noframesTag || token.name() == styleTag) {
+ processGenericRawTextStartTag(token);
+ return true;
+ }
+ if (token.name() == scriptTag) {
+ processScriptStartTag(token);
+ if (m_usePreHTML5ParserQuirks && token.selfClosing())
+ processFakeEndTag(scriptTag);
+ return true;
+ }
+ if (token.name() == headTag) {
+ parseError(token);
+ return true;
+ }
+ return false;
+}
+
+void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLToken::StartTag);
+ m_tree.insertHTMLElement(token);
+ m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
+ m_originalInsertionMode = m_insertionMode;
+ setInsertionMode(TextMode);
+}
+
+void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLToken::StartTag);
+ m_tree.insertHTMLElement(token);
+ m_parser->tokenizer()->setState(HTMLTokenizer::RAWTEXTState);
+ m_originalInsertionMode = m_insertionMode;
+ setInsertionMode(TextMode);
+}
+
+void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLToken::StartTag);
+ m_tree.insertScriptElement(token);
+ m_parser->tokenizer()->setState(HTMLTokenizer::ScriptDataState);
+ m_originalInsertionMode = m_insertionMode;
+
+ TextPosition0 position = m_parser->textPosition();
+
+ ASSERT(position.m_line.zeroBasedInt() == m_parser->tokenizer()->lineNumber());
+
+ m_lastScriptElementStartPosition = position;
+
+ setInsertionMode(TextMode);
+}
+
+void HTMLTreeBuilder::finished()
+{
+ ASSERT(m_document);
+ if (isParsingFragment()) {
+ m_fragmentContext.finished();
+ return;
+ }
+
+ // Warning, this may detach the parser. Do not do anything else after this.
+ m_document->finishedParsing();
+}
+
+bool HTMLTreeBuilder::scriptEnabled(Frame* frame)
+{
+ if (!frame)
+ return false;
+ return frame->script()->canExecuteScripts(NotAboutToExecuteScript);
+}
+
+bool HTMLTreeBuilder::pluginsEnabled(Frame* frame)
+{
+ if (!frame)
+ return false;
+ return frame->loader()->subframeLoader()->allowPlugins(NotAboutToInstantiatePlugin);
+}
+
+}
diff --git a/Source/WebCore/html/parser/HTMLTreeBuilder.h b/Source/WebCore/html/parser/HTMLTreeBuilder.h
new file mode 100644
index 0000000..17b77b7
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLTreeBuilder.h
@@ -0,0 +1,267 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLTreeBuilder_h
+#define HTMLTreeBuilder_h
+
+#include "Element.h"
+#include "FragmentScriptingPermission.h"
+#include "HTMLConstructionSite.h"
+#include "HTMLElementStack.h"
+#include "HTMLFormattingElementList.h"
+#include "HTMLTokenizer.h"
+#include <wtf/text/TextPosition.h>
+#include <wtf/Noncopyable.h>
+#include <wtf/OwnPtr.h>
+#include <wtf/PassOwnPtr.h>
+#include <wtf/PassRefPtr.h>
+#include <wtf/RefPtr.h>
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+class AtomicHTMLToken;
+class Document;
+class DocumentFragment;
+class Frame;
+class HTMLToken;
+class HTMLDocument;
+class Node;
+class HTMLDocumentParser;
+
+class HTMLTreeBuilder : public Noncopyable {
+public:
+ static PassOwnPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks)
+ {
+ return adoptPtr(new HTMLTreeBuilder(parser, document, reportErrors, usePreHTML5ParserQuirks));
+ }
+ static PassOwnPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks)
+ {
+ return adoptPtr(new HTMLTreeBuilder(parser, fragment, contextElement, scriptingPermission, usePreHTML5ParserQuirks));
+ }
+ ~HTMLTreeBuilder();
+
+ bool isParsingFragment() const { return !!m_fragmentContext.fragment(); }
+
+ void detach();
+
+ void setPaused(bool paused) { m_isPaused = paused; }
+ bool isPaused() const { return m_isPaused; }
+
+ // The token really should be passed as a const& since it's never modified.
+ void constructTreeFromToken(HTMLToken&);
+ void constructTreeFromAtomicToken(AtomicHTMLToken&);
+
+ // Must be called when parser is paused before calling the parser again.
+ PassRefPtr<Element> takeScriptToProcess(TextPosition1& scriptStartPosition);
+
+ // Done, close any open tags, etc.
+ void finished();
+
+ static bool scriptEnabled(Frame*);
+ static bool pluginsEnabled(Frame*);
+
+private:
+ class FakeInsertionMode;
+ class ExternalCharacterTokenBuffer;
+ // Represents HTML5 "insertion mode"
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode
+ enum InsertionMode {
+ InitialMode,
+ BeforeHTMLMode,
+ BeforeHeadMode,
+ InHeadMode,
+ InHeadNoscriptMode,
+ AfterHeadMode,
+ InBodyMode,
+ TextMode,
+ InTableMode,
+ InTableTextMode,
+ InCaptionMode,
+ InColumnGroupMode,
+ InTableBodyMode,
+ InRowMode,
+ InCellMode,
+ InSelectMode,
+ InSelectInTableMode,
+ InForeignContentMode,
+ AfterBodyMode,
+ InFramesetMode,
+ AfterFramesetMode,
+ AfterAfterBodyMode,
+ AfterAfterFramesetMode,
+ };
+
+ HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument*, bool reportErrors, bool usePreHTML5ParserQuirks);
+ HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment*, Element* contextElement, FragmentScriptingPermission, bool usePreHTML5ParserQuirks);
+
+ void processToken(AtomicHTMLToken&);
+
+ void processDoctypeToken(AtomicHTMLToken&);
+ void processStartTag(AtomicHTMLToken&);
+ void processEndTag(AtomicHTMLToken&);
+ void processComment(AtomicHTMLToken&);
+ void processCharacter(AtomicHTMLToken&);
+ void processEndOfFile(AtomicHTMLToken&);
+
+ bool processStartTagForInHead(AtomicHTMLToken&);
+ void processStartTagForInBody(AtomicHTMLToken&);
+ void processStartTagForInTable(AtomicHTMLToken&);
+ void processEndTagForInBody(AtomicHTMLToken&);
+ void processEndTagForInTable(AtomicHTMLToken&);
+ void processEndTagForInTableBody(AtomicHTMLToken&);
+ void processEndTagForInRow(AtomicHTMLToken&);
+ void processEndTagForInCell(AtomicHTMLToken&);
+
+ void processIsindexStartTagForInBody(AtomicHTMLToken&);
+ bool processBodyEndTagForInBody(AtomicHTMLToken&);
+ bool processTableEndTagForInTable();
+ bool processCaptionEndTagForInCaption();
+ bool processColgroupEndTagForInColumnGroup();
+ bool processTrEndTagForInRow();
+ // FIXME: This function should be inlined into its one call site or it
+ // needs to assert which tokens it can be called with.
+ void processAnyOtherEndTagForInBody(AtomicHTMLToken&);
+
+ void processCharacterBuffer(ExternalCharacterTokenBuffer&);
+
+ void processFakeStartTag(const QualifiedName&, PassRefPtr<NamedNodeMap> attributes = 0);
+ void processFakeEndTag(const QualifiedName&);
+ void processFakeCharacters(const String&);
+ void processFakePEndTagIfPInButtonScope();
+
+ void processGenericRCDATAStartTag(AtomicHTMLToken&);
+ void processGenericRawTextStartTag(AtomicHTMLToken&);
+ void processScriptStartTag(AtomicHTMLToken&);
+
+ // Default processing for the different insertion modes.
+ void defaultForInitial();
+ void defaultForBeforeHTML();
+ void defaultForBeforeHead();
+ void defaultForInHead();
+ void defaultForInHeadNoscript();
+ void defaultForAfterHead();
+ void defaultForInTableText();
+
+ void prepareToReprocessToken();
+
+ void reprocessStartTag(AtomicHTMLToken&);
+ void reprocessEndTag(AtomicHTMLToken&);
+
+ PassRefPtr<NamedNodeMap> attributesForIsindexInput(AtomicHTMLToken&);
+
+ HTMLElementStack::ElementRecord* furthestBlockForFormattingElement(Element*);
+ void callTheAdoptionAgency(AtomicHTMLToken&);
+
+ void closeTheCell();
+
+ template <bool shouldClose(const Element*)>
+ void processCloseWhenNestedTag(AtomicHTMLToken&);
+
+ bool m_framesetOk;
+
+ // FIXME: Implement error reporting.
+ void parseError(AtomicHTMLToken&) { }
+
+ InsertionMode insertionMode() const { return m_insertionMode; }
+ void setInsertionMode(InsertionMode mode)
+ {
+ m_insertionMode = mode;
+ m_isFakeInsertionMode = false;
+ }
+
+ bool isFakeInsertionMode() { return m_isFakeInsertionMode; }
+ void setFakeInsertionMode(InsertionMode mode)
+ {
+ m_insertionMode = mode;
+ m_isFakeInsertionMode = true;
+ }
+
+ void resetInsertionModeAppropriately();
+
+ void processForeignContentUsingInBodyModeAndResetMode(AtomicHTMLToken& token);
+ void resetForeignInsertionMode();
+
+ class FragmentParsingContext : public Noncopyable {
+ public:
+ FragmentParsingContext();
+ FragmentParsingContext(DocumentFragment*, Element* contextElement, FragmentScriptingPermission);
+ ~FragmentParsingContext();
+
+ Document* document() const;
+ DocumentFragment* fragment() const { return m_fragment; }
+ Element* contextElement() const { ASSERT(m_fragment); return m_contextElement; }
+ FragmentScriptingPermission scriptingPermission() const { ASSERT(m_fragment); return m_scriptingPermission; }
+
+ void finished();
+
+ private:
+ RefPtr<Document> m_dummyDocumentForFragmentParsing;
+ DocumentFragment* m_fragment;
+ Element* m_contextElement;
+
+ // FragmentScriptingNotAllowed causes the Parser to remove children
+ // from <script> tags (so javascript doesn't show up in pastes).
+ FragmentScriptingPermission m_scriptingPermission;
+ };
+
+ FragmentParsingContext m_fragmentContext;
+
+ Document* m_document;
+ HTMLConstructionSite m_tree;
+
+ bool m_reportErrors;
+ bool m_isPaused;
+ bool m_isFakeInsertionMode;
+
+ // FIXME: InsertionModes should be a separate object to prevent direct
+ // manipulation of these variables. For now, be careful to always use
+ // setInsertionMode and never set m_insertionMode directly.
+ InsertionMode m_insertionMode;
+ InsertionMode m_originalInsertionMode;
+
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#pending-table-character-tokens
+ Vector<UChar> m_pendingTableCharacters;
+
+ // We access parser because HTML5 spec requires that we be able to change the state of the tokenizer
+ // from within parser actions. We also need it to track the current position.
+ HTMLDocumentParser* m_parser;
+
+ RefPtr<Element> m_scriptToProcess; // <script> tag which needs processing before resuming the parser.
+ TextPosition1 m_scriptToProcessStartPosition; // Starting line number of the script tag needing processing.
+
+ // FIXME: We probably want to remove this member. Originally, it was
+ // created to service the legacy tree builder, but it seems to be used for
+ // some other things now.
+ TextPosition0 m_lastScriptElementStartPosition;
+
+ bool m_usePreHTML5ParserQuirks;
+
+ bool m_hasPendingForeignInsertionModeSteps;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLViewSourceParser.cpp b/Source/WebCore/html/parser/HTMLViewSourceParser.cpp
new file mode 100644
index 0000000..ace8590
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLViewSourceParser.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "HTMLViewSourceParser.h"
+
+#include "HTMLDocumentParser.h"
+#include "HTMLNames.h"
+#include "HTMLViewSourceDocument.h"
+
+namespace WebCore {
+
+HTMLViewSourceParser::HTMLViewSourceParser(HTMLViewSourceDocument* document)
+ : DecodedDataDocumentParser(document)
+ , m_tokenizer(HTMLTokenizer::create(HTMLDocumentParser::usePreHTML5ParserQuirks(document)))
+{
+}
+
+HTMLViewSourceParser::~HTMLViewSourceParser()
+{
+}
+
+void HTMLViewSourceParser::insert(const SegmentedString&)
+{
+ ASSERT_NOT_REACHED();
+}
+
+void HTMLViewSourceParser::pumpTokenizer()
+{
+ while (m_tokenizer->nextToken(m_input.current(), m_token)) {
+ m_token.end(m_input.current().numberOfCharactersConsumed());
+ document()->addSource(sourceForToken(), m_token);
+ updateTokenizerState();
+ m_token.clear(m_input.current().numberOfCharactersConsumed());
+ }
+}
+
+void HTMLViewSourceParser::append(const SegmentedString& input)
+{
+ m_input.appendToEnd(input);
+ m_source.append(input);
+ pumpTokenizer();
+}
+
+String HTMLViewSourceParser::sourceForToken()
+{
+ if (m_token.type() == HTMLToken::EndOfFile)
+ return String();
+
+ ASSERT(m_source.numberOfCharactersConsumed() == m_token.startIndex());
+ UChar* data = 0;
+ int length = m_token.endIndex() - m_token.startIndex();
+ String source = String::createUninitialized(length, data);
+ for (int i = 0; i < length; ++i) {
+ data[i] = *m_source;
+ m_source.advance();
+ }
+ return source;
+}
+
+void HTMLViewSourceParser::updateTokenizerState()
+{
+ // FIXME: The tokenizer should do this work for us.
+ if (m_token.type() != HTMLToken::StartTag)
+ return;
+
+ AtomicString tagName(m_token.name().data(), m_token.name().size());
+ m_tokenizer->updateStateFor(tagName, document()->frame());
+}
+
+void HTMLViewSourceParser::finish()
+{
+ if (!m_input.haveSeenEndOfFile())
+ m_input.markEndOfFile();
+ pumpTokenizer();
+ document()->finishedParsing();
+}
+
+bool HTMLViewSourceParser::finishWasCalled()
+{
+ return m_input.haveSeenEndOfFile();
+}
+
+}
diff --git a/Source/WebCore/html/parser/HTMLViewSourceParser.h b/Source/WebCore/html/parser/HTMLViewSourceParser.h
new file mode 100644
index 0000000..abe55b4
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLViewSourceParser.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLViewSourceParser_h
+#define HTMLViewSourceParser_h
+
+#include "DecodedDataDocumentParser.h"
+#include "HTMLInputStream.h"
+#include "HTMLToken.h"
+#include "HTMLTokenizer.h"
+#include "HTMLViewSourceDocument.h"
+#include <wtf/PassOwnPtr.h>
+
+namespace WebCore {
+
+class HTMLTokenizer;
+class HTMLScriptRunner;
+class HTMLTreeBuilder;
+class HTMLPreloadScanner;
+class ScriptController;
+class ScriptSourceCode;
+
+class HTMLViewSourceParser : public DecodedDataDocumentParser {
+public:
+ static PassRefPtr<HTMLViewSourceParser> create(HTMLViewSourceDocument* document)
+ {
+ return adoptRef(new HTMLViewSourceParser(document));
+ }
+ virtual ~HTMLViewSourceParser();
+
+protected:
+ explicit HTMLViewSourceParser(HTMLViewSourceDocument*);
+
+ HTMLTokenizer* tokenizer() const { return m_tokenizer.get(); }
+
+private:
+ // DocumentParser
+ virtual void insert(const SegmentedString&);
+ virtual void append(const SegmentedString&);
+ virtual void finish();
+ virtual bool finishWasCalled();
+
+ HTMLViewSourceDocument* document() const { return static_cast<HTMLViewSourceDocument*>(DecodedDataDocumentParser::document()); }
+
+ void pumpTokenizer();
+ String sourceForToken();
+ void updateTokenizerState();
+
+ HTMLInputStream m_input;
+ SegmentedString m_source;
+ HTMLToken m_token;
+ OwnPtr<HTMLTokenizer> m_tokenizer;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/NestingLevelIncrementer.h b/Source/WebCore/html/parser/NestingLevelIncrementer.h
new file mode 100644
index 0000000..c597876
--- /dev/null
+++ b/Source/WebCore/html/parser/NestingLevelIncrementer.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef NestingLevelIncrementer_h
+#define NestingLevelIncrementer_h
+
+namespace WebCore {
+
+class NestingLevelIncrementer : public Noncopyable {
+public:
+ explicit NestingLevelIncrementer(unsigned& nestingLevel)
+ : m_nestingLevel(&nestingLevel)
+ {
+ ++(*m_nestingLevel);
+ }
+
+ ~NestingLevelIncrementer()
+ {
+ --(*m_nestingLevel);
+ }
+
+private:
+ unsigned* m_nestingLevel;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/TextDocumentParser.cpp b/Source/WebCore/html/parser/TextDocumentParser.cpp
new file mode 100644
index 0000000..d03b744
--- /dev/null
+++ b/Source/WebCore/html/parser/TextDocumentParser.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2010 Google Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextDocumentParser.h"
+
+#include "HTMLDocument.h"
+#include "HTMLNames.h"
+#include "HTMLTokenizer.h"
+#include "HTMLTreeBuilder.h"
+
+namespace WebCore {
+
+using namespace HTMLNames;
+
+TextDocumentParser::TextDocumentParser(HTMLDocument* document)
+ : HTMLDocumentParser(document, false)
+ , m_haveInsertedFakePreElement(false)
+{
+ tokenizer()->setState(HTMLTokenizer::PLAINTEXTState);
+}
+
+TextDocumentParser::~TextDocumentParser()
+{
+}
+
+void TextDocumentParser::append(const SegmentedString& text)
+{
+ if (!m_haveInsertedFakePreElement)
+ insertFakePreElement();
+ HTMLDocumentParser::append(text);
+}
+
+void TextDocumentParser::insertFakePreElement()
+{
+ // In principle, we should create a specialized tree builder for
+ // TextDocuments, but instead we re-use the existing HTMLTreeBuilder.
+ // We create a fake token and give it to the tree builder rather than
+ // sending fake bytes through the front-end of the parser to avoid
+ // distrubing the line/column number calculations.
+
+ RefPtr<Attribute> styleAttribute = Attribute::createMapped("style", "word-wrap: break-word; white-space: pre-wrap;");
+ RefPtr<NamedNodeMap> attributes = NamedNodeMap::create();
+ attributes->insertAttribute(styleAttribute.release(), false);
+ AtomicHTMLToken fakePre(HTMLToken::StartTag, preTag.localName(), attributes.release());
+
+ treeBuilder()->constructTreeFromAtomicToken(fakePre);
+ m_haveInsertedFakePreElement = true;
+}
+
+}
diff --git a/Source/WebCore/html/parser/TextDocumentParser.h b/Source/WebCore/html/parser/TextDocumentParser.h
new file mode 100644
index 0000000..1cccc5b
--- /dev/null
+++ b/Source/WebCore/html/parser/TextDocumentParser.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2010 Google Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef TextDocumentParser_h
+#define TextDocumentParser_h
+
+#include "HTMLDocumentParser.h"
+
+namespace WebCore {
+
+class TextDocumentParser : public HTMLDocumentParser {
+public:
+ static PassRefPtr<TextDocumentParser> create(HTMLDocument* document)
+ {
+ return adoptRef(new TextDocumentParser(document));
+ }
+ virtual ~TextDocumentParser();
+
+private:
+ explicit TextDocumentParser(HTMLDocument*);
+
+ virtual void append(const SegmentedString&);
+ void insertFakePreElement();
+
+ bool m_haveInsertedFakePreElement;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/TextViewSourceParser.cpp b/Source/WebCore/html/parser/TextViewSourceParser.cpp
new file mode 100644
index 0000000..d7e6e3d
--- /dev/null
+++ b/Source/WebCore/html/parser/TextViewSourceParser.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextViewSourceParser.h"
+
+#include "HTMLTokenizer.h"
+
+namespace WebCore {
+
+TextViewSourceParser::TextViewSourceParser(HTMLViewSourceDocument* document)
+ : HTMLViewSourceParser(document)
+{
+ tokenizer()->setState(HTMLTokenizer::PLAINTEXTState);
+}
+
+TextViewSourceParser::~TextViewSourceParser()
+{
+}
+
+}
diff --git a/Source/WebCore/html/parser/TextViewSourceParser.h b/Source/WebCore/html/parser/TextViewSourceParser.h
new file mode 100644
index 0000000..e4170ed
--- /dev/null
+++ b/Source/WebCore/html/parser/TextViewSourceParser.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TextViewSourceParser_h
+#define TextViewSourceParser_h
+
+#include "HTMLViewSourceParser.h"
+
+namespace WebCore {
+
+class TextViewSourceParser : public HTMLViewSourceParser {
+public:
+ static PassRefPtr<TextViewSourceParser> create(HTMLViewSourceDocument* document)
+ {
+ return adoptRef(new TextViewSourceParser(document));
+ }
+ virtual ~TextViewSourceParser();
+
+private:
+ explicit TextViewSourceParser(HTMLViewSourceDocument*);
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/create-html-entity-table b/Source/WebCore/html/parser/create-html-entity-table
new file mode 100755
index 0000000..e6132bc
--- /dev/null
+++ b/Source/WebCore/html/parser/create-html-entity-table
@@ -0,0 +1,178 @@
+#!/usr/bin/env python
+# Copyright (c) 2010 Google Inc. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import csv
+import os.path
+import string
+import sys
+
+ENTITY = 0
+VALUE = 1
+
+def convert_entity_to_cpp_name(entity):
+ postfix = "EntityName"
+ if entity[-1] == ";":
+ return "%sSemicolon%s" % (entity[:-1], postfix)
+ return "%s%s" % (entity, postfix)
+
+
+def convert_entity_to_uchar_array(entity):
+ return "{'%s'}" % "', '".join(entity)
+
+
+def convert_value_to_int(value):
+ assert(value[0] == "U")
+ assert(value[1] == "+")
+ return "0x" + value[2:]
+
+
+def offset_table_entry(offset):
+ return " &staticEntityTable[%s]," % offset
+
+
+program_name = os.path.basename(__file__)
+if len(sys.argv) < 4 or sys.argv[1] != "-o":
+ print >> sys.stderr, "Usage: %s -o OUTPUT_FILE INPUT_FILE" % program_name
+ exit(1)
+
+output_path = sys.argv[2]
+input_path = sys.argv[3]
+
+html_entity_names_file = open(input_path)
+entries = list(csv.reader(html_entity_names_file))
+html_entity_names_file.close()
+
+entries.sort(lambda a, b: cmp(a[ENTITY], b[ENTITY]))
+entity_count = len(entries)
+
+output_file = open(output_path, "w")
+
+print >> output_file, """/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+// THIS FILE IS GENERATED BY WebCore/html/parser/create-html-entity-table
+// DO NOT EDIT (unless you are a ninja)!
+
+#include "config.h"
+#include "HTMLEntityTable.h"
+
+namespace WebCore {
+
+namespace {
+"""
+
+for entry in entries:
+ print >> output_file, "const UChar %sEntityName[] = %s;" % (
+ convert_entity_to_cpp_name(entry[ENTITY]),
+ convert_entity_to_uchar_array(entry[ENTITY]))
+
+print >> output_file, """
+HTMLEntityTableEntry staticEntityTable[%s] = {""" % entity_count
+
+index = {}
+offset = 0
+for entry in entries:
+ letter = entry[ENTITY][0]
+ if not index.get(letter):
+ index[letter] = offset
+ print >> output_file, ' { %sEntityName, %s, %s },' % (
+ convert_entity_to_cpp_name(entry[ENTITY]),
+ len(entry[ENTITY]),
+ convert_value_to_int(entry[VALUE]))
+ offset += 1
+
+print >> output_file, """};
+"""
+
+print >> output_file, "const HTMLEntityTableEntry* uppercaseOffset[] = {"
+for letter in string.uppercase:
+ print >> output_file, offset_table_entry(index[letter])
+print >> output_file, offset_table_entry(index['a'])
+print >> output_file, """};
+
+const HTMLEntityTableEntry* lowercaseOffset[] = {"""
+for letter in string.lowercase:
+ print >> output_file, offset_table_entry(index[letter])
+print >> output_file, offset_table_entry(entity_count)
+print >> output_file, """};
+
+}
+
+const HTMLEntityTableEntry* HTMLEntityTable::firstEntryStartingWith(UChar c)
+{
+ if (c >= 'A' && c <= 'Z')
+ return uppercaseOffset[c - 'A'];
+ if (c >= 'a' && c <= 'z')
+ return lowercaseOffset[c - 'a'];
+ return 0;
+}
+
+const HTMLEntityTableEntry* HTMLEntityTable::lastEntryStartingWith(UChar c)
+{
+ if (c >= 'A' && c <= 'Z')
+ return uppercaseOffset[c - 'A' + 1] - 1;
+ if (c >= 'a' && c <= 'z')
+ return lowercaseOffset[c - 'a' + 1] - 1;
+ return 0;
+}
+
+const HTMLEntityTableEntry* HTMLEntityTable::firstEntry()
+{
+ return &staticEntityTable[0];
+}
+
+const HTMLEntityTableEntry* HTMLEntityTable::lastEntry()
+{
+ return &staticEntityTable[%s - 1];
+}
+
+}
+""" % entity_count