diff options
author | Steve Block <steveblock@google.com> | 2010-07-08 12:51:48 +0100 |
---|---|---|
committer | Steve Block <steveblock@google.com> | 2010-07-09 15:33:40 +0100 |
commit | ca9cb53ed1119a3fd98fafa0972ffeb56dee1c24 (patch) | |
tree | bb45155550ec013adc0ad10f4d7d354c6469b022 /WebCore/html/HTMLTreeBuilder.cpp | |
parent | d4b24d9a829ed7de70381c8b99fb75a07ab40466 (diff) | |
download | external_webkit-ca9cb53ed1119a3fd98fafa0972ffeb56dee1c24.zip external_webkit-ca9cb53ed1119a3fd98fafa0972ffeb56dee1c24.tar.gz external_webkit-ca9cb53ed1119a3fd98fafa0972ffeb56dee1c24.tar.bz2 |
Merge WebKit at r62496: Initial merge by git
Change-Id: Ie3da0770eca22a70a632e3571f31cfabc80facb2
Diffstat (limited to 'WebCore/html/HTMLTreeBuilder.cpp')
-rw-r--r-- | WebCore/html/HTMLTreeBuilder.cpp | 1674 |
1 files changed, 1379 insertions, 295 deletions
diff --git a/WebCore/html/HTMLTreeBuilder.cpp b/WebCore/html/HTMLTreeBuilder.cpp index 6e40fd7..b4e7ce1 100644 --- a/WebCore/html/HTMLTreeBuilder.cpp +++ b/WebCore/html/HTMLTreeBuilder.cpp @@ -26,17 +26,27 @@ #include "config.h" #include "HTMLTreeBuilder.h" +#include "Comment.h" #include "DocumentFragment.h" +#include "DocumentType.h" #include "Element.h" #include "Frame.h" -#include "HTMLTokenizer.h" -#include "HTMLToken.h" #include "HTMLDocument.h" -#include "LegacyHTMLDocumentParser.h" +#include "HTMLElementFactory.h" +#include "HTMLHtmlElement.h" #include "HTMLNames.h" +#include "HTMLScriptElement.h" +#include "HTMLToken.h" +#include "HTMLTokenizer.h" +#include "LegacyHTMLDocumentParser.h" #include "LegacyHTMLTreeBuilder.h" #include "NotImplemented.h" +#if ENABLE(SVG) +#include "SVGNames.h" +#endif #include "ScriptController.h" +#include "Settings.h" +#include "Text.h" #include <wtf/UnusedParam.h> namespace WebCore { @@ -52,6 +62,142 @@ inline bool isTreeBuilderWhiteSpace(UChar cc) return cc == '\t' || cc == '\x0A' || cc == '\x0C' || cc == '\x0D' || cc == ' '; } +bool shouldUseLegacyTreeBuilder(Document* document) +{ + return !document->settings() || !document->settings()->html5TreeBuilderEnabled(); +} + +bool isNumberedHeaderTag(const AtomicString& tagName) +{ + return tagName == h1Tag + || tagName == h2Tag + || tagName == h3Tag + || tagName == h4Tag + || tagName == h5Tag + || tagName == h6Tag; +} + +bool isTableBodyContextTag(const AtomicString& tagName) +{ + return tagName == tbodyTag + || tagName == tfootTag + || tagName == theadTag; +} + +// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special +bool isSpecialTag(const AtomicString& tagName) +{ + return tagName == addressTag + || tagName == articleTag + || tagName == asideTag + || tagName == baseTag + || tagName == basefontTag + || tagName == "bgsound" + || tagName == blockquoteTag + || tagName == bodyTag + || tagName == brTag + || tagName == buttonTag + || tagName == centerTag + || tagName == colTag + || tagName == colgroupTag + || tagName == "command" + || tagName == ddTag + || tagName == "details" + || tagName == dirTag + || tagName == divTag + || tagName == dlTag + || tagName == dtTag + || tagName == embedTag + || tagName == fieldsetTag + || tagName == "figure" + || tagName == footerTag + || tagName == formTag + || tagName == frameTag + || tagName == framesetTag + || isNumberedHeaderTag(tagName) + || tagName == headTag + || tagName == headerTag + || tagName == hgroupTag + || tagName == hrTag + || tagName == iframeTag + || tagName == imgTag + || tagName == inputTag + || tagName == isindexTag + || tagName == liTag + || tagName == linkTag + || tagName == listingTag + || tagName == menuTag + || tagName == metaTag + || tagName == navTag + || tagName == noembedTag + || tagName == noframesTag + || tagName == noscriptTag + || tagName == olTag + || tagName == pTag + || tagName == paramTag + || tagName == plaintextTag + || tagName == preTag + || tagName == scriptTag + || tagName == sectionTag + || tagName == selectTag + || tagName == styleTag + || isTableBodyContextTag(tagName) + || tagName == textareaTag + || tagName == titleTag + || tagName == trTag + || tagName == ulTag + || tagName == wbrTag + || tagName == xmpTag; +} + +// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#scoping +// Same as isScopingTag in LegacyHTMLTreeBuilder.cpp +// and isScopeMarker in HTMLElementStack.cpp +bool isScopingTag(const AtomicString& tagName) +{ + return tagName == appletTag + || tagName == buttonTag + || tagName == captionTag +#if ENABLE(SVG_FOREIGN_OBJECT) + || tagName == SVGNames::foreignObjectTag +#endif + || tagName == htmlTag + || tagName == marqueeTag + || tagName == objectTag + || tagName == tableTag + || tagName == tdTag + || tagName == thTag; +} + +bool isNonAnchorFormattingTag(const AtomicString& tagName) +{ + return tagName == bTag + || tagName == bigTag + || tagName == codeTag + || tagName == emTag + || tagName == fontTag + || tagName == iTag + || tagName == nobrTag + || tagName == sTag + || tagName == smallTag + || tagName == strikeTag + || tagName == strongTag + || tagName == ttTag + || tagName == uTag; +} + +// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting +bool isFormattingTag(const AtomicString& tagName) +{ + return tagName == aTag || isNonAnchorFormattingTag(tagName); +} + +// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#phrasing +bool isPhrasingTag(const AtomicString& tagName) +{ + return !isSpecialTag(tagName) && !isScopingTag(tagName) && !isFormattingTag(tagName); +} + } // namespace HTMLTreeBuilder::HTMLTreeBuilder(HTMLTokenizer* tokenizer, HTMLDocument* document, bool reportErrors) @@ -60,11 +206,13 @@ HTMLTreeBuilder::HTMLTreeBuilder(HTMLTokenizer* tokenizer, HTMLDocument* documen , m_reportErrors(reportErrors) , m_isPaused(false) , m_insertionMode(InitialMode) + , m_originalInsertionMode(InitialMode) , m_tokenizer(tokenizer) - , m_legacyTreeBuilder(new LegacyHTMLTreeBuilder(document, reportErrors)) + , m_legacyTreeBuilder(shouldUseLegacyTreeBuilder(document) ? new LegacyHTMLTreeBuilder(document, reportErrors) : 0) , m_lastScriptElementStartLine(uninitializedLineNumberValue) , m_scriptToProcessStartLine(uninitializedLineNumberValue) , m_fragmentScriptingPermission(FragmentScriptingAllowed) + , m_isParsingFragment(false) { } @@ -76,11 +224,13 @@ HTMLTreeBuilder::HTMLTreeBuilder(HTMLTokenizer* tokenizer, DocumentFragment* fra , m_reportErrors(false) // FIXME: Why not report errors in fragments? , m_isPaused(false) , m_insertionMode(InitialMode) + , m_originalInsertionMode(InitialMode) , m_tokenizer(tokenizer) , m_legacyTreeBuilder(new LegacyHTMLTreeBuilder(fragment, scriptingPermission)) , m_lastScriptElementStartLine(uninitializedLineNumberValue) , m_scriptToProcessStartLine(uninitializedLineNumberValue) , m_fragmentScriptingPermission(scriptingPermission) + , m_isParsingFragment(true) { } @@ -166,7 +316,7 @@ HTMLTokenizer::State HTMLTreeBuilder::adjustedLexerState(HTMLTokenizer::State st return state; } -PassRefPtr<Node> HTMLTreeBuilder::passTokenToLegacyParser(HTMLToken& token) +void HTMLTreeBuilder::passTokenToLegacyParser(HTMLToken& token) { if (token.type() == HTMLToken::DOCTYPE) { DoctypeToken doctypeToken; @@ -176,9 +326,12 @@ PassRefPtr<Node> HTMLTreeBuilder::passTokenToLegacyParser(HTMLToken& token) doctypeToken.m_forceQuirks = token.forceQuirks(); m_legacyTreeBuilder->parseDoctypeToken(&doctypeToken); - return 0; + return; } + if (token.type() == HTMLToken::EndOfFile) + return; + // For now, we translate into an old-style token for testing. Token oldStyleToken; AtomicHTMLToken atomicToken(token); @@ -214,371 +367,1302 @@ PassRefPtr<Node> HTMLTreeBuilder::passTokenToLegacyParser(HTMLToken& token) } else if (oldStyleToken.tagName == framesetTag) setInsertionMode(AfterFramesetMode); } - return result.release(); } -PassRefPtr<Node> HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken) +void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken) { - // Make MSVC ignore our unreachable code for now. - if (true) - return passTokenToLegacyParser(rawToken); + if (m_legacyTreeBuilder) { + passTokenToLegacyParser(rawToken); + return; + } AtomicHTMLToken token(rawToken); + processToken(token); +} + +void HTMLTreeBuilder::processToken(AtomicHTMLToken& token) +{ + switch (token.type()) { + case HTMLToken::Uninitialized: + ASSERT_NOT_REACHED(); + break; + case HTMLToken::DOCTYPE: + processDoctypeToken(token); + break; + case HTMLToken::StartTag: + processStartTag(token); + break; + case HTMLToken::EndTag: + processEndTag(token); + break; + case HTMLToken::Comment: + processComment(token); + return; + case HTMLToken::Character: + processCharacter(token); + break; + case HTMLToken::EndOfFile: + processEndOfFile(token); + break; + } +} + +void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken& token) +{ + if (insertionMode() == InitialMode) { + insertDoctype(token); + return; + } + parseError(token); +} - // HTML5 expects the tokenizer to call the parser every time a character is - // emitted. We instead collect characters and call the parser with a batch. - // In order to make our first-pass parser code simple, processToken matches - // the spec in only handling one character at a time. - if (token.type() == HTMLToken::Character) { - StringImpl* characters = token.characters().impl(); - // FIXME: Calling processToken for each character is probably slow. - for (unsigned i = 0; i < characters->length(); ++i) - processToken(token, (*characters)[i]); - return 0; // FIXME: Should we be returning the Text node? +void HTMLTreeBuilder::insertHTMLStartTagBeforeHTML(AtomicHTMLToken& token) +{ + RefPtr<Element> element = HTMLHtmlElement::create(m_document); + element->setAttributeMap(token.attributes(), m_fragmentScriptingPermission); + m_openElements.pushHTMLHtmlElement(attach(m_document, element.release())); +} + +void HTMLTreeBuilder::mergeAttributesFromTokenIntoElement(AtomicHTMLToken& token, Element* element) +{ + if (!token.attributes()) + return; + + NamedNodeMap* attributes = element->attributes(false); + for (unsigned i = 0; i < token.attributes()->length(); ++i) { + Attribute* attribute = token.attributes()->attributeItem(i); + if (!attributes->getAttributeItem(attribute->name())) + element->setAttribute(attribute->name(), attribute->value()); } - return processToken(token); } -PassRefPtr<Node> HTMLTreeBuilder::processToken(AtomicHTMLToken& token, UChar cc) +void HTMLTreeBuilder::insertHTMLStartTagInBody(AtomicHTMLToken& token) +{ + parseError(token); + mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement()); +} + +void HTMLTreeBuilder::processFakePEndTagIfPInScope() +{ + if (!m_openElements.inScope(pTag.localName())) + return; + AtomicHTMLToken endP(HTMLToken::EndTag, pTag.localName()); + processEndTag(endP); +} + +void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token) { -reprocessToken: switch (insertionMode()) { - case InitialMode: { - switch (token.type()) { - case HTMLToken::Uninitialized: - ASSERT_NOT_REACHED(); - break; - case HTMLToken::DOCTYPE: - return insertDoctype(token); - case HTMLToken::Comment: - return insertComment(token); - case HTMLToken::Character: - if (isTreeBuilderWhiteSpace(cc)) - return 0; - break; - case HTMLToken::StartTag: - case HTMLToken::EndTag: - case HTMLToken::EndOfFile: - break; + case InitialMode: + ASSERT(insertionMode() == InitialMode); + processDefaultForInitialMode(token); + // Fall through. + case BeforeHTMLMode: + ASSERT(insertionMode() == BeforeHTMLMode); + if (token.name() == htmlTag) { + insertHTMLStartTagBeforeHTML(token); + setInsertionMode(BeforeHeadMode); + return; } - notImplemented(); - parseError(token); - setInsertionMode(BeforeHTMLMode); - goto reprocessToken; - } - case BeforeHTMLMode: { - switch (token.type()) { - case HTMLToken::Uninitialized: - ASSERT_NOT_REACHED(); - break; - case HTMLToken::DOCTYPE: + processDefaultForBeforeHTMLMode(token); + // Fall through. + case BeforeHeadMode: + ASSERT(insertionMode() == BeforeHeadMode); + if (token.name() == htmlTag) { + insertHTMLStartTagInBody(token); + return; + } + if (token.name() == headTag) { + insertHTMLHeadElement(token); + setInsertionMode(InHeadMode); + return; + } + processDefaultForBeforeHeadMode(token); + // Fall through. + case InHeadMode: + ASSERT(insertionMode() == InHeadMode); + if (processStartTagForInHead(token)) + return; + processDefaultForInHeadMode(token); + // Fall through. + case AfterHeadMode: + ASSERT(insertionMode() == AfterHeadMode); + if (token.name() == htmlTag) { + insertHTMLStartTagInBody(token); + return; + } + if (token.name() == bodyTag) { + m_framesetOk = false; + insertHTMLBodyElement(token); + m_insertionMode = InBodyMode; + return; + } + if (token.name() == framesetTag) { + insertElement(token); + setInsertionMode(InFramesetMode); + return; + } + if (token.name() == baseTag || token.name() == linkTag || token.name() == metaTag || token.name() == noframesTag || token.name() == scriptTag || token.name() == styleTag || token.name() == titleTag) { parseError(token); - return 0; - case HTMLToken::Comment: - return insertComment(token); - case HTMLToken::Character: - if (isTreeBuilderWhiteSpace(cc)) - return 0; - break; - case HTMLToken::StartTag: - if (token.name() == htmlTag) { - notImplemented(); - setInsertionMode(BeforeHeadMode); - return 0; - } - break; - case HTMLToken::EndTag: - if (token.name() == headTag || token.name() == bodyTag || token.name() == htmlTag || token.name() == brTag) - break; + ASSERT(m_headElement); + m_openElements.pushHTMLHeadElement(m_headElement); + processStartTagForInHead(token); + m_openElements.removeHTMLHeadElement(m_headElement.get()); + return; + } + if (token.name() == headTag) { parseError(token); - return 0; - case HTMLToken::EndOfFile: - break; + return; } - notImplemented(); - setInsertionMode(BeforeHeadMode); - goto reprocessToken; - } - case BeforeHeadMode: { - switch (token.type()) { - case HTMLToken::Uninitialized: - ASSERT_NOT_REACHED(); - break; - case HTMLToken::Character: - if (isTreeBuilderWhiteSpace(cc)) - return 0; - break; - case HTMLToken::Comment: - return insertComment(token); - case HTMLToken::DOCTYPE: + processDefaultForAfterHeadMode(token); + // Fall through + case InBodyMode: + ASSERT(insertionMode() == InBodyMode); + if (token.name() == htmlTag) { + insertHTMLStartTagInBody(token); + return; + } + if (token.name() == baseTag || token.name() == "command" || token.name() == linkTag || token.name() == metaTag || token.name() == noframesTag || token.name() == scriptTag || token.name() == styleTag || token.name() == titleTag) { + bool didProcess = processStartTagForInHead(token); + ASSERT_UNUSED(didProcess, didProcess); + return; + } + if (token.name() == bodyTag) { parseError(token); - return 0; - case HTMLToken::StartTag: - if (token.name() == htmlTag) { - notImplemented(); - return 0; - } - if (token.name() == headTag) { - m_headElement = insertElement(token); - setInsertionMode(InHeadMode); - return m_headElement; + notImplemented(); // fragment case + mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement()); + return; + } + if (token.name() == framesetTag) { + parseError(token); + notImplemented(); // fragment case + if (!m_framesetOk) + return; + ExceptionCode ec = 0; + m_openElements.bodyElement()->remove(ec); + ASSERT(!ec); + m_openElements.popUntil(m_openElements.bodyElement()); + m_openElements.popHTMLBodyElement(); + ASSERT(m_openElements.top() == m_openElements.htmlElement()); + insertElement(token); + m_insertionMode = InFramesetMode; + return; + } + if (token.name() == addressTag || token.name() == articleTag || token.name() == asideTag || token.name() == blockquoteTag || token.name() == centerTag || token.name() == "details" || token.name() == dirTag || token.name() == divTag || token.name() == dlTag || token.name() == fieldsetTag || token.name() == "figure" || token.name() == footerTag || token.name() == headerTag || token.name() == hgroupTag || token.name() == menuTag || token.name() == navTag || token.name() == olTag || token.name() == pTag || token.name() == sectionTag || token.name() == ulTag) { + processFakePEndTagIfPInScope(); + insertElement(token); + return; + } + if (isNumberedHeaderTag(token.name())) { + processFakePEndTagIfPInScope(); + notImplemented(); + insertElement(token); + return; + } + if (token.name() == preTag || token.name() == listingTag) { + processFakePEndTagIfPInScope(); + insertElement(token); + m_tokenizer->skipLeadingNewLineForListing(); + m_framesetOk = false; + return; + } + if (token.name() == formTag) { + notImplemented(); + processFakePEndTagIfPInScope(); + insertElement(token); + m_formElement = currentElement(); + return; + } + if (token.name() == liTag) { + notImplemented(); + processFakePEndTagIfPInScope(); + insertElement(token); + return; + } + if (token.name() == ddTag || token.name() == dtTag) { + notImplemented(); + processFakePEndTagIfPInScope(); + insertElement(token); + return; + } + if (token.name() == plaintextTag) { + processFakePEndTagIfPInScope(); + insertElement(token); + m_tokenizer->setState(HTMLTokenizer::PLAINTEXTState); + return; + } + if (token.name() == buttonTag) { + notImplemented(); + reconstructTheActiveFormattingElements(); + insertElement(token); + m_framesetOk = false; + return; + } + if (token.name() == aTag) { + notImplemented(); + reconstructTheActiveFormattingElements(); + insertFormattingElement(token); + return; + } + if (isNonAnchorFormattingTag(token.name())) { + reconstructTheActiveFormattingElements(); + insertFormattingElement(token); + return; + } + if (token.name() == nobrTag) { + reconstructTheActiveFormattingElements(); + notImplemented(); + insertFormattingElement(token); + return; + } + if (token.name() == appletTag || token.name() == marqueeTag || token.name() == objectTag) { + reconstructTheActiveFormattingElements(); + insertElement(token); + notImplemented(); + m_framesetOk = false; + return; + } + if (token.name() == tableTag) { + notImplemented(); + insertElement(token); + m_framesetOk = false; + m_insertionMode = InTableMode; + return; + } + if (token.name() == imageTag) { + parseError(token); + // Apparently we're not supposed to ask. + token.setName(imgTag.localName()); + // Note the fall through to the imgTag handling below! + } + if (token.name() == areaTag || token.name() == basefontTag || token.name() == "bgsound" || token.name() == brTag || token.name() == embedTag || token.name() == imgTag || token.name() == inputTag || token.name() == keygenTag || token.name() == wbrTag) { + reconstructTheActiveFormattingElements(); + insertSelfClosingElement(token); + m_framesetOk = false; + return; + } + if (token.name() == paramTag || token.name() == sourceTag || token.name() == "track") { + insertSelfClosingElement(token); + return; + } + if (token.name() == hrTag) { + processFakePEndTagIfPInScope(); + insertSelfClosingElement(token); + m_framesetOk = false; + return; + } + if (token.name() == isindexTag) { + parseError(token); + notImplemented(); + return; + } + if (token.name() == textareaTag) { + insertElement(token); + m_tokenizer->skipLeadingNewLineForListing(); + m_tokenizer->setState(HTMLTokenizer::RCDATAState); + m_originalInsertionMode = m_insertionMode; + m_framesetOk = false; + m_insertionMode = TextMode; + return; + } + if (token.name() == xmpTag) { + processFakePEndTagIfPInScope(); + reconstructTheActiveFormattingElements(); + m_framesetOk = false; + insertGenericRawTextElement(token); + return; + } + if (token.name() == iframeTag) { + m_framesetOk = false; + insertGenericRawTextElement(token); + return; + } + if (token.name() == noembedTag) { + insertGenericRawTextElement(token); + return; + } + if (token.name() == noscriptTag && isScriptingFlagEnabled(m_document->frame())) { + insertGenericRawTextElement(token); + return; + } + if (token.name() == selectTag) { + reconstructTheActiveFormattingElements(); + insertElement(token); + m_framesetOk = false; + if (m_insertionMode == InTableMode || m_insertionMode == InCaptionMode || m_insertionMode == InColumnGroupMode || m_insertionMode == InTableBodyMode || m_insertionMode == InRowMode || m_insertionMode == InCellMode) + m_insertionMode = InSelectInTableMode; + else + m_insertionMode = InSelectMode; + return; + } + if (token.name() == optgroupTag || token.name() == optionTag) { + if (m_openElements.inScope(optionTag.localName())) { + AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName()); + processEndTag(endOption); } - break; - case HTMLToken::EndTag: - if (token.name() == headTag || token.name() == bodyTag || token.name() == brTag) { - AtomicHTMLToken fakeHead(HTMLToken::StartTag, headTag.localName()); - processToken(fakeHead); - goto reprocessToken; + reconstructTheActiveFormattingElements(); + insertElement(token); + return; + } + if (token.name() == rpTag || token.name() == rtTag) { + if (m_openElements.inScope(rubyTag.localName())) { + generateImpliedEndTags(); + if (!currentElement()->hasTagName(rubyTag)) { + parseError(token); + m_openElements.popUntil(rubyTag.localName()); + } } + insertElement(token); + return; + } + if (token.name() == "math") { + // This is the MathML foreign content branch point. + notImplemented(); + } + if (token.name() == "svg") { + // This is the SVG foreign content branch point. + notImplemented(); + } + if (token.name() == captionTag || token.name() == colTag || token.name() == colgroupTag || token.name() == frameTag || token.name() == headTag || token.name() == tbodyTag || token.name() == tdTag || token.name() == tfootTag || token.name() == thTag || token.name() == theadTag || token.name() == trTag) { + parseError(token); + return; + } + reconstructTheActiveFormattingElements(); + insertElement(token); + break; + case InTableMode: + ASSERT(insertionMode() == InTableMode); + if (token.name() == captionTag) { + m_openElements.popUntilTableScopeMarker(); + m_activeFormattingElements.appendMarker(); + insertElement(token); + m_insertionMode = InCaptionMode; + return; + } + if (token.name() == colgroupTag) { + m_openElements.popUntilTableScopeMarker(); + insertElement(token); + m_insertionMode = InColumnGroupMode; + return; + } + if (token.name() == colTag) { + notImplemented(); + return; + } + if (isTableBodyContextTag(token.name())) { + m_openElements.popUntilTableScopeMarker(); + insertElement(token); + m_insertionMode = InTableBodyMode; + return; + } + if (token.name() == tdTag || token.name() == thTag || token.name() == trTag) { + notImplemented(); + return; + } + if (token.name() == tableTag) { + notImplemented(); + return; + } + if (token.name() == styleTag || token.name() == scriptTag) { + processStartTagForInHead(token); + return; + } + if (token.name() == inputTag) { + notImplemented(); + return; + } + if (token.name() == formTag) { + parseError(token); + if (m_formElement) + return; + insertSelfClosingElement(token); + return; + } + parseError(token); + notImplemented(); + break; + case InTableBodyMode: + ASSERT(insertionMode() == InTableBodyMode); + if (token.name() == trTag) { + m_openElements.popUntilTableBodyScopeMarker(); // How is there ever anything to pop? + insertElement(token); + m_insertionMode = InRowMode; + return; + } + if (token.name() == thTag || token.name() == tdTag) { + parseError(token); + AtomicHTMLToken fakeToken(HTMLToken::StartTag, trTag.localName()); + processStartTag(fakeToken); + ASSERT(insertionMode() == InRowMode); + processStartTag(token); + return; + } + if (token.name() == captionTag || token.name() == colTag || token.name() == colgroupTag || isTableBodyContextTag(token.name())) { + // FIXME: The spec is unclear as to what is supposed to happen here. + notImplemented(); + return; + } + notImplemented(); // process using "in table" rules + break; + case AfterBodyMode: + case AfterAfterBodyMode: + ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode); + if (token.name() == htmlTag) { + insertHTMLStartTagInBody(token); + return; + } + m_insertionMode = InBodyMode; + processStartTag(token); + break; + case InHeadNoscriptMode: + ASSERT(insertionMode() == InHeadNoscriptMode); + if (token.name() == htmlTag) { + insertHTMLStartTagInBody(token); + return; + } + if (token.name() == linkTag || token.name() == metaTag || token.name() == noframesTag || token.name() == styleTag) { + bool didProcess = processStartTagForInHead(token); + ASSERT_UNUSED(didProcess, didProcess); + return; + } + if (token.name() == htmlTag || token.name() == noscriptTag) { parseError(token); - return 0; - case HTMLToken::EndOfFile: - break; + return; + } + processDefaultForInHeadNoscriptMode(token); + processToken(token); + break; + case InFramesetMode: + ASSERT(insertionMode() == InFramesetMode); + if (token.name() == htmlTag) { + insertHTMLStartTagInBody(token); + return; + } + if (token.name() == framesetTag) { + insertElement(token); + return; + } + if (token.name() == frameTag) { + insertSelfClosingElement(token); + return; + } + if (token.name() == noframesTag) { + processStartTagForInHead(token); + return; } - AtomicHTMLToken fakeHead(HTMLToken::StartTag, headTag.localName()); - processToken(fakeHead); - goto reprocessToken; + parseError(token); + break; + case AfterFramesetMode: + case AfterAfterFramesetMode: + ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode); + if (token.name() == htmlTag) { + insertHTMLStartTagInBody(token); + return; + } + if (token.name() == noframesTag) { + processStartTagForInHead(token); + return; + } + parseError(token); + break; + default: + notImplemented(); + } +} + +bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken& token) +{ + if (!m_openElements.inScope(bodyTag.localName())) { + parseError(token); + return false; } - case InHeadMode: { - switch (token.type()) { - case HTMLToken::Uninitialized: - ASSERT_NOT_REACHED(); - break; - case HTMLToken::Character: - insertCharacter(cc); - break; - case HTMLToken::Comment: - return insertComment(token); - case HTMLToken::DOCTYPE: + notImplemented(); + m_insertionMode = AfterBodyMode; + return true; +} + +// FIXME: This probably belongs on HTMLElementStack. +HTMLElementStack::ElementRecord* HTMLTreeBuilder::furthestBlockForFormattingElement(Element* formattingElement) +{ + HTMLElementStack::ElementRecord* furthestBlock = 0; + HTMLElementStack::ElementRecord* record = m_openElements.topRecord(); + for (; record; record = record->next()) { + if (record->element() == formattingElement) + return furthestBlock; + const AtomicString& tagName = record->element()->localName(); + // !phrasing && !formatting == scoping || special + if (isScopingTag(tagName) || isSpecialTag(tagName)) + furthestBlock = record; + } + ASSERT_NOT_REACHED(); + return 0; +} + +void HTMLTreeBuilder::findFosterParentFor(Element* element) +{ + Element* fosterParentElement = 0; + HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName()); + if (lastTableElementRecord) { + Element* lastTableElement = lastTableElementRecord->element(); + if (lastTableElement->parent()) { + // FIXME: We need an insertElement which does not send mutation events. + ExceptionCode ec = 0; + lastTableElement->parent()->insertBefore(element, lastTableElement, ec); + ASSERT(!ec); + return; + } + fosterParentElement = lastTableElementRecord->next()->element(); + } else { + ASSERT(m_isParsingFragment); + fosterParentElement = m_openElements.bottom(); // <html> element + } + + fosterParentElement->parserAddChild(element); +} + +// FIXME: This should have a whitty name. +// FIXME: This must be implemented in many other places in WebCore. +void HTMLTreeBuilder::reparentChildren(Element* oldParent, Element* newParent) +{ + Node* child = oldParent->firstChild(); + while (child) { + Node* nextChild = child->nextSibling(); + ExceptionCode ec; + newParent->appendChild(child, ec); + ASSERT(!ec); + child = nextChild; + } +} + +// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody +void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token) +{ + while (1) { + // 1. + Element* formattingElement = m_activeFormattingElements.closestElementInScopeWithName(token.name()); + if (!formattingElement || !m_openElements.inScope(formattingElement)) { parseError(token); - return 0; - case HTMLToken::StartTag: - if (token.name() == htmlTag) { - notImplemented(); - return 0; - } - // FIXME: Atomize "command". - if (token.name() == baseTag || token.name() == "command" || token.name() == linkTag) { - PassRefPtr<Node> node = insertElement(token); - m_openElements.pop(); - notImplemented(); - return node; - } - if (token.name() == metaTag) { - PassRefPtr<Node> node = insertElement(token); - m_openElements.pop(); - notImplemented(); - return node; - } - if (token.name() == titleTag) - return insertGenericRCDATAElement(token); - if (token.name() == noscriptTag) { - if (isScriptingFlagEnabled(m_document->frame())) - return insertGenericRawTextElement(token); - PassRefPtr<Node> node = insertElement(token); - setInsertionMode(InHeadNoscriptMode); - return node; - } - if (token.name() == noframesTag || token.name() == styleTag) - return insertGenericRawTextElement(token); - if (token.name() == scriptTag) - return insertScriptElement(token); - if (token.name() == headTag) { - notImplemented(); - return 0; - } - break; - case HTMLToken::EndTag: - if (token.name() == headTag) { - ASSERT(m_openElements.top()->tagQName() == headTag); - m_openElements.pop(); - setInsertionMode(AfterHeadMode); - return 0; + notImplemented(); // Check the stack of open elements for a more specific parse error. + return; + } + HTMLElementStack::ElementRecord* formattingElementRecord = m_openElements.find(formattingElement); + if (!formattingElementRecord) { + parseError(token); + m_activeFormattingElements.remove(formattingElement); + return; + } + if (formattingElement != currentElement()) + parseError(token); + // 2. + HTMLElementStack::ElementRecord* furthestBlock = furthestBlockForFormattingElement(formattingElement); + // 3. + if (!furthestBlock) { + m_openElements.popUntil(formattingElement); + m_openElements.pop(); + m_activeFormattingElements.remove(formattingElement); + return; + } + // 4. + ASSERT(furthestBlock->isAbove(formattingElementRecord)); + Element* commonAncestor = formattingElementRecord->next()->element(); + // 5. + notImplemented(); // bookmark? + // 6. + HTMLElementStack::ElementRecord* node = furthestBlock; + HTMLElementStack::ElementRecord* nextNode = node->next(); + HTMLElementStack::ElementRecord* lastNode = furthestBlock; + while (1) { + // 6.1 + node = nextNode; + ASSERT(node); + nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 6.2. + // 6.2 + if (!m_activeFormattingElements.contains(node->element())) { + m_openElements.remove(node->element()); + node = 0; + continue; } - if (token.name() == bodyTag || token.name() == htmlTag || token.name() == brTag) + // 6.3 + if (node == formattingElementRecord) break; - parseError(token); - return 0; - break; - case HTMLToken::EndOfFile: - break; - } - AtomicHTMLToken fakeHead(HTMLToken::EndTag, headTag.localName()); - processToken(fakeHead); - goto reprocessToken; + // 6.4 + if (lastNode == furthestBlock) + notImplemented(); // move bookmark. + // 6.5 + // FIXME: We're supposed to save the original token in the entry. + AtomicHTMLToken fakeToken(HTMLToken::StartTag, node->element()->localName()); + // Is createElement correct? (instead of insertElement) + // Does this code ever leave newElement unattached? + RefPtr<Element> newElement = createElement(fakeToken); + HTMLFormattingElementList::Entry* nodeEntry = m_activeFormattingElements.find(node->element()); + nodeEntry->replaceElement(newElement.get()); + node->replaceElement(newElement.release()); + // 6.6 + // Use appendChild instead of parserAddChild to handle possible reparenting. + ExceptionCode ec; + node->element()->appendChild(lastNode->element(), ec); + ASSERT(!ec); + // 6.7 + lastNode = node; + } + // 7 + const AtomicString& commonAncestorTag = commonAncestor->localName(); + if (commonAncestorTag == tableTag + || commonAncestorTag == trTag + || isTableBodyContextTag(commonAncestorTag)) + findFosterParentFor(lastNode->element()); + else { + ExceptionCode ec; + commonAncestor->appendChild(lastNode->element(), ec); + ASSERT(!ec); + } + // 8 + // FIXME: We're supposed to save the original token in the entry. + AtomicHTMLToken fakeToken(HTMLToken::StartTag, formattingElement->localName()); + RefPtr<Element> newElement = createElement(fakeToken); + // 9 + reparentChildren(furthestBlock->element(), newElement.get()); + // 10 + furthestBlock->element()->parserAddChild(newElement); + // 11 + m_activeFormattingElements.remove(formattingElement); + notImplemented(); // insert new element at bookmark + // 12 + m_openElements.remove(formattingElement); + m_openElements.insertAbove(newElement, furthestBlock); } - case InHeadNoscriptMode: { - switch (token.type()) { - case HTMLToken::Uninitialized: - ASSERT_NOT_REACHED(); - break; - case HTMLToken::DOCTYPE: +} + +void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token) +{ + switch (insertionMode()) { + case InitialMode: + ASSERT(insertionMode() == InitialMode); + processDefaultForInitialMode(token); + // Fall through. + case BeforeHTMLMode: + ASSERT(insertionMode() == BeforeHTMLMode); + if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) { parseError(token); - return 0; - case HTMLToken::StartTag: - if (token.name() == htmlTag) { - notImplemented(); - return 0; - } - if (token.name() == linkTag || token.name() == metaTag || token.name() == noframesTag || token.name() == styleTag) { - notImplemented(); - return 0; - } - if (token.name() == htmlTag || token.name() == noscriptTag) { + return; + } + processDefaultForBeforeHTMLMode(token); + // Fall through. + case BeforeHeadMode: + ASSERT(insertionMode() == BeforeHeadMode); + if (token.name() != headTag && token.name() != bodyTag && token.name() != brTag) { + parseError(token); + return; + } + processDefaultForBeforeHeadMode(token); + // Fall through. + case InHeadMode: + ASSERT(insertionMode() == InHeadMode); + if (token.name() == headTag) { + m_openElements.popHTMLHeadElement(); + setInsertionMode(AfterHeadMode); + return; + } + if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) { + parseError(token); + return; + } + processDefaultForInHeadMode(token); + // Fall through. + case AfterHeadMode: + ASSERT(insertionMode() == AfterHeadMode); + if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) { + parseError(token); + return; + } + processDefaultForAfterHeadMode(token); + // Fall through + case InBodyMode: + ASSERT(insertionMode() == InBodyMode); + if (token.name() == bodyTag) { + processBodyEndTagForInBody(token); + return; + } + if (token.name() == htmlTag) { + if (processBodyEndTagForInBody(token)) + processEndTag(token); + return; + } + if (token.name() == addressTag || token.name() == articleTag || token.name() == asideTag || token.name() == blockquoteTag || token.name() == buttonTag || token.name() == centerTag || token.name() == "details" || token.name() == dirTag || token.name() == divTag || token.name() == dlTag || token.name() == fieldsetTag || token.name() == "figure" || token.name() == footerTag || token.name() == headerTag || token.name() == hgroupTag || token.name() == listingTag || token.name() == menuTag || token.name() == navTag || token.name() == olTag || token.name() == preTag || token.name() == sectionTag || token.name() == ulTag) { + if (!m_openElements.inScope(token.name())) { parseError(token); - return 0; + return; } - break; - case HTMLToken::EndTag: - if (token.name() == noscriptTag) { - ASSERT(m_openElements.top()->tagQName() == noscriptTag); - m_openElements.pop(); - ASSERT(m_openElements.top()->tagQName() == headTag); - setInsertionMode(InHeadMode); - return 0; - } - if (token.name() == brTag) - break; - parseError(token); - return 0; - case HTMLToken::Character: - notImplemented(); - break; - case HTMLToken::Comment: - notImplemented(); - return 0; - case HTMLToken::EndOfFile: - break; + generateImpliedEndTags(); + if (currentElement()->tagQName() != token.name()) + parseError(token); + m_openElements.popUntil(token.name()); + m_openElements.pop(); } - AtomicHTMLToken fakeNoscript(HTMLToken::EndTag, noscriptTag.localName()); - processToken(fakeNoscript); - goto reprocessToken; - } - case AfterHeadMode: { - switch (token.type()) { - case HTMLToken::Uninitialized: - ASSERT_NOT_REACHED(); - break; - case HTMLToken::Character: - if (isTreeBuilderWhiteSpace(cc)) { - insertCharacter(cc); - return 0; + if (token.name() == formTag) { + RefPtr<Element> node = m_formElement.release(); + if (!node || !m_openElements.inScope(node.get())) { + parseError(token); + return; } - break; - case HTMLToken::Comment: - return insertComment(token); - case HTMLToken::DOCTYPE: - parseError(token); - return 0; - case HTMLToken::StartTag: - if (token.name() == htmlTag) { + generateImpliedEndTags(); + if (currentElement() != node.get()) + parseError(token); + m_openElements.remove(node.get()); + } + if (token.name() == pTag) { + if (!m_openElements.inScope(token.name())) { + parseError(token); notImplemented(); - return 0; + return; } - if (token.name() == bodyTag) { - m_framesetOk = false; - return insertElement(token); + generateImpliedEndTagsWithExclusion(token.name()); + if (!currentElement()->hasLocalName(token.name())) + parseError(token); + m_openElements.popUntil(token.name()); + m_openElements.pop(); + return; + } + if (token.name() == liTag) { + if (!m_openElements.inListItemScope(token.name())) { + parseError(token); + return; } - if (token.name() == framesetTag) { - PassRefPtr<Node> node = insertElement(token); - setInsertionMode(InFramesetMode); - return node; + generateImpliedEndTagsWithExclusion(token.name()); + if (!currentElement()->hasLocalName(token.name())) + parseError(token); + m_openElements.popUntil(token.name()); + m_openElements.pop(); + return; + } + if (token.name() == ddTag || token.name() == dtTag) { + if (!m_openElements.inScope(token.name())) { + parseError(token); + return; } - if (token.name() == baseTag || token.name() == linkTag || token.name() == metaTag || token.name() == noframesTag || token.name() == scriptTag || token.name() == styleTag || token.name() == titleTag) { + generateImpliedEndTagsWithExclusion(token.name()); + if (!currentElement()->hasLocalName(token.name())) parseError(token); - ASSERT(m_headElement); - m_openElements.push(m_headElement.get()); - notImplemented(); - m_openElements.remove(m_headElement.get()); + m_openElements.popUntil(token.name()); + m_openElements.pop(); + return; + } + if (isNumberedHeaderTag(token.name())) { + if (!m_openElements.inScope(token.name())) { + parseError(token); + return; } - if (token.name() == headTag) { + generateImpliedEndTags(); + if (!currentElement()->hasLocalName(token.name())) parseError(token); - return 0; + m_openElements.popUntil(token.name()); + m_openElements.pop(); + return; + } + if (token.name() == "sarcasm") { + notImplemented(); // Take a deep breath. + return; + } + if (isFormattingTag(token.name())) { + callTheAdoptionAgency(token); + return; + } + if (token.name() == appletTag || token.name() == marqueeTag || token.name() == objectTag) { + if (!m_openElements.inScope(token.name())) { + parseError(token); + return; } - break; - case HTMLToken::EndTag: - if (token.name() == bodyTag || token.name() == htmlTag || token.name() == brTag) - break; + generateImpliedEndTags(); + if (currentElement()->tagQName() != token.name()) + parseError(token); + m_openElements.popUntil(token.name()); + m_openElements.pop(); + m_activeFormattingElements.clearToLastMarker(); + return; + } + if (token.name() == brTag) { + parseError(token); + reconstructTheActiveFormattingElements(); + // Notice that we lose the attributes. + AtomicHTMLToken startBr(HTMLToken::StartTag, token.name()); + insertSelfClosingElement(startBr); + m_framesetOk = false; + return; + } + // FIXME: We need an iterator over m_openElements to implement this + // correctly. + notImplemented(); + if (!m_openElements.inScope(token.name())) + return; + m_openElements.popUntil(token.name()); + m_openElements.pop(); + break; + case AfterBodyMode: + ASSERT(insertionMode() == AfterBodyMode); + if (token.name() == htmlTag) { + if (m_isParsingFragment) { + parseError(token); + return; + } + m_insertionMode = AfterAfterBodyMode; + return; + } + // Fall through. + case AfterAfterBodyMode: + ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode); + parseError(token); + m_insertionMode = InBodyMode; + processEndTag(token); + break; + case InHeadNoscriptMode: + ASSERT(insertionMode() == InHeadNoscriptMode); + if (token.name() == noscriptTag) { + ASSERT(currentElement()->tagQName() == noscriptTag); + m_openElements.pop(); + ASSERT(currentElement()->tagQName() == headTag); + setInsertionMode(InHeadMode); + return; + } + if (token.name() != brTag) { parseError(token); - return 0; - case HTMLToken::EndOfFile: - break; - } - AtomicHTMLToken fakeBody(HTMLToken::StartTag, bodyTag.localName()); - processToken(fakeBody); - m_framesetOk = true; - goto reprocessToken; + return; + } + processDefaultForInHeadNoscriptMode(token); + processToken(token); + break; + case TextMode: + if (token.name() == scriptTag) { + // Pause ourselves so that parsing stops until the script can be processed by the caller. + m_isPaused = true; + ASSERT(currentElement()->tagQName() == scriptTag); + m_scriptToProcess = currentElement(); + m_openElements.pop(); + m_insertionMode = m_originalInsertionMode; + return; + } + m_openElements.pop(); + m_insertionMode = m_originalInsertionMode; + break; + case InFramesetMode: + ASSERT(insertionMode() == InFramesetMode); + if (token.name() == framesetTag) { + if (currentElement() == m_openElements.htmlElement()) { + parseError(token); + return; + } + m_openElements.pop(); + if (!m_isParsingFragment && !currentElement()->hasTagName(framesetTag)) + m_insertionMode = AfterFramesetMode; + return; + } + break; + case AfterFramesetMode: + ASSERT(insertionMode() == AfterFramesetMode); + if (token.name() == htmlTag) { + m_insertionMode = AfterAfterFramesetMode; + return; + } + // Fall through. + case AfterAfterFramesetMode: + ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode); + parseError(token); + break; + default: + notImplemented(); } +} + +void HTMLTreeBuilder::processComment(AtomicHTMLToken& token) +{ + if (m_insertionMode == InitialMode || m_insertionMode == BeforeHTMLMode || m_insertionMode == AfterAfterBodyMode || m_insertionMode == AfterAfterFramesetMode) { + insertCommentOnDocument(token); + return; + } + if (m_insertionMode == AfterBodyMode) { + insertCommentOnHTMLHtmlElement(token); + return; + } + insertComment(token); +} + +void HTMLTreeBuilder::processCharacter(AtomicHTMLToken& token) +{ + // FIXME: We need to figure out how to handle each character individually. + switch (insertionMode()) { + case InitialMode: + ASSERT(insertionMode() == InitialMode); + notImplemented(); + processDefaultForInitialMode(token); + // Fall through. + case BeforeHTMLMode: + ASSERT(insertionMode() == BeforeHTMLMode); + notImplemented(); + processDefaultForBeforeHTMLMode(token); + // Fall through. + case BeforeHeadMode: + ASSERT(insertionMode() == BeforeHeadMode); + notImplemented(); + processDefaultForBeforeHeadMode(token); + // Fall through. + case InHeadMode: + ASSERT(insertionMode() == InHeadMode); + notImplemented(); + processDefaultForInHeadMode(token); + // Fall through. + case AfterHeadMode: + ASSERT(insertionMode() == AfterHeadMode); + notImplemented(); + processDefaultForAfterHeadMode(token); + // Fall through case InBodyMode: - case TextMode: - case InTableMode: - case InTableTextMode: - case InCaptionMode: - case InColumnGroupMode: - case InTableBodyMode: - case InRowMode: - case InCellMode: - case InSelectMode: - case InSelectInTableMode: - case InForeignContentMode: + ASSERT(insertionMode() == InBodyMode); + notImplemented(); + insertTextNode(token); + break; case AfterBodyMode: + case AfterAfterBodyMode: + ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode); + parseError(token); + m_insertionMode = InBodyMode; + processCharacter(token); + break; + case TextMode: + notImplemented(); + insertTextNode(token); + break; + case InHeadNoscriptMode: + ASSERT(insertionMode() == InHeadNoscriptMode); + processDefaultForInHeadNoscriptMode(token); + processToken(token); + break; case InFramesetMode: case AfterFramesetMode: + case AfterAfterFramesetMode: + ASSERT(insertionMode() == InFramesetMode || insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode); + parseError(token); + break; + default: + notImplemented(); + } +} + +void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token) +{ + switch (insertionMode()) { + case InitialMode: + ASSERT(insertionMode() == InitialMode); + processDefaultForInitialMode(token); + // Fall through. + case BeforeHTMLMode: + ASSERT(insertionMode() == BeforeHTMLMode); + processDefaultForBeforeHTMLMode(token); + // Fall through. + case BeforeHeadMode: + ASSERT(insertionMode() == BeforeHeadMode); + processDefaultForBeforeHeadMode(token); + // Fall through. + case InHeadMode: + ASSERT(insertionMode() == InHeadMode); + processDefaultForInHeadMode(token); + // Fall through. + case AfterHeadMode: + ASSERT(insertionMode() == AfterHeadMode); + processDefaultForAfterHeadMode(token); + // Fall through + case InBodyMode: + ASSERT(insertionMode() == InBodyMode); + notImplemented(); + break; + case AfterBodyMode: case AfterAfterBodyMode: + ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode); + notImplemented(); + break; + case InHeadNoscriptMode: + ASSERT(insertionMode() == InHeadNoscriptMode); + processDefaultForInHeadNoscriptMode(token); + processToken(token); + break; + case InFramesetMode: + ASSERT(insertionMode() == InFramesetMode); + if (currentElement() != m_openElements.htmlElement()) + parseError(token); + break; + case AfterFramesetMode: case AfterAfterFramesetMode: - ASSERT_NOT_REACHED(); + ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode); + break; + default: + notImplemented(); } +} - // Implementation coming in the next patch. - return 0; +void HTMLTreeBuilder::processDefaultForInitialMode(AtomicHTMLToken& token) +{ + notImplemented(); + parseError(token); + setInsertionMode(BeforeHTMLMode); } -PassRefPtr<Node> HTMLTreeBuilder::insertDoctype(AtomicHTMLToken& token) +void HTMLTreeBuilder::processDefaultForBeforeHTMLMode(AtomicHTMLToken&) { - ASSERT_UNUSED(token, token.type() == HTMLToken::DOCTYPE); - return 0; + AtomicHTMLToken startHTML(HTMLToken::StartTag, htmlTag.localName()); + insertHTMLStartTagBeforeHTML(startHTML); + setInsertionMode(BeforeHeadMode); } -PassRefPtr<Node> HTMLTreeBuilder::insertComment(AtomicHTMLToken& token) +void HTMLTreeBuilder::processDefaultForBeforeHeadMode(AtomicHTMLToken&) { - ASSERT_UNUSED(token, token.type() == HTMLToken::Comment); - return 0; + AtomicHTMLToken startHead(HTMLToken::StartTag, headTag.localName()); + processStartTag(startHead); } -PassRefPtr<Element> HTMLTreeBuilder::insertElement(AtomicHTMLToken& token) +void HTMLTreeBuilder::processDefaultForInHeadMode(AtomicHTMLToken&) { - ASSERT_UNUSED(token, token.type() == HTMLToken::StartTag); - return 0; + AtomicHTMLToken endHead(HTMLToken::EndTag, headTag.localName()); + processEndTag(endHead); } -void HTMLTreeBuilder::insertCharacter(UChar cc) +void HTMLTreeBuilder::processDefaultForInHeadNoscriptMode(AtomicHTMLToken&) { - ASSERT_UNUSED(cc, cc); + AtomicHTMLToken endNoscript(HTMLToken::EndTag, noscriptTag.localName()); + processEndTag(endNoscript); } -PassRefPtr<Node> HTMLTreeBuilder::insertGenericRCDATAElement(AtomicHTMLToken& token) +void HTMLTreeBuilder::processDefaultForAfterHeadMode(AtomicHTMLToken&) { - ASSERT_UNUSED(token, token.type() == HTMLToken::StartTag); - return 0; + AtomicHTMLToken startBody(HTMLToken::StartTag, bodyTag.localName()); + processStartTag(startBody); + m_framesetOk = true; } -PassRefPtr<Node> HTMLTreeBuilder::insertGenericRawTextElement(AtomicHTMLToken& token) +bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken& token) { - ASSERT_UNUSED(token, token.type() == HTMLToken::StartTag); - return 0; + if (token.name() == htmlTag) { + insertHTMLStartTagInBody(token); + return true; + } + // FIXME: Atomize "command". + if (token.name() == baseTag || token.name() == "command" || token.name() == linkTag || token.name() == metaTag) { + insertSelfClosingElement(token); + // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process(). + return true; + } + if (token.name() == titleTag) { + insertGenericRCDATAElement(token); + return true; + } + if (token.name() == noscriptTag) { + if (isScriptingFlagEnabled(m_document->frame())) { + insertGenericRawTextElement(token); + return true; + } + insertElement(token); + setInsertionMode(InHeadNoscriptMode); + return true; + } + if (token.name() == noframesTag || token.name() == styleTag) { + insertGenericRawTextElement(token); + return true; + } + if (token.name() == scriptTag) { + insertScriptElement(token); + return true; + } + if (token.name() == headTag) { + parseError(token); + return true; + } + return false; +} + +void HTMLTreeBuilder::insertDoctype(AtomicHTMLToken& token) +{ + ASSERT(token.type() == HTMLToken::DOCTYPE); + attach(m_document, DocumentType::create(m_document, token.name(), String::adopt(token.publicIdentifier()), String::adopt(token.systemIdentifier()))); + // FIXME: Move quirks mode detection from DocumentType element to here. + notImplemented(); + if (token.forceQuirks()) + m_document->setParseMode(Document::Compat); +} + +void HTMLTreeBuilder::insertComment(AtomicHTMLToken& token) +{ + ASSERT(token.type() == HTMLToken::Comment); + attach(currentElement(), Comment::create(m_document, token.comment())); +} + +void HTMLTreeBuilder::insertCommentOnDocument(AtomicHTMLToken& token) +{ + ASSERT(token.type() == HTMLToken::Comment); + attach(m_document, Comment::create(m_document, token.comment())); +} + +void HTMLTreeBuilder::insertCommentOnHTMLHtmlElement(AtomicHTMLToken& token) +{ + ASSERT(token.type() == HTMLToken::Comment); + attach(m_openElements.htmlElement(), Comment::create(m_document, token.comment())); +} + +PassRefPtr<Element> HTMLTreeBuilder::createElementAndAttachToCurrent(AtomicHTMLToken& token) +{ + ASSERT(token.type() == HTMLToken::StartTag); + return attach(currentElement(), createElement(token)); +} + +void HTMLTreeBuilder::insertHTMLHtmlElement(AtomicHTMLToken& token) +{ + m_openElements.pushHTMLHtmlElement(createElementAndAttachToCurrent(token)); } -PassRefPtr<Node> HTMLTreeBuilder::insertScriptElement(AtomicHTMLToken& token) +void HTMLTreeBuilder::insertHTMLHeadElement(AtomicHTMLToken& token) +{ + m_headElement = createElementAndAttachToCurrent(token); + m_openElements.pushHTMLHeadElement(m_headElement); +} + +void HTMLTreeBuilder::insertHTMLBodyElement(AtomicHTMLToken& token) +{ + m_openElements.pushHTMLBodyElement(createElementAndAttachToCurrent(token)); +} + +void HTMLTreeBuilder::insertElement(AtomicHTMLToken& token) +{ + m_openElements.push(createElementAndAttachToCurrent(token)); +} + +void HTMLTreeBuilder::insertSelfClosingElement(AtomicHTMLToken& token) +{ + ASSERT(token.type() == HTMLToken::StartTag); + attach(currentElement(), createElement(token)); + // FIXME: Do we want to acknowledge the token's self-closing flag? + // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag +} + +void HTMLTreeBuilder::insertFormattingElement(AtomicHTMLToken& token) +{ + // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements + // Possible active formatting elements include: + // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u. + insertElement(token); + m_activeFormattingElements.append(currentElement()); +} + +void HTMLTreeBuilder::insertGenericRCDATAElement(AtomicHTMLToken& token) +{ + insertElement(token); + m_tokenizer->setState(HTMLTokenizer::RCDATAState); + m_originalInsertionMode = m_insertionMode; + m_insertionMode = TextMode; +} + +void HTMLTreeBuilder::insertGenericRawTextElement(AtomicHTMLToken& token) +{ + insertElement(token); + m_tokenizer->setState(HTMLTokenizer::RAWTEXTState); + m_originalInsertionMode = m_insertionMode; + m_insertionMode = TextMode; +} + +void HTMLTreeBuilder::insertScriptElement(AtomicHTMLToken& token) { ASSERT_UNUSED(token, token.type() == HTMLToken::StartTag); - return 0; + RefPtr<HTMLScriptElement> element = HTMLScriptElement::create(scriptTag, m_document, true); + element->setAttributeMap(token.attributes(), m_fragmentScriptingPermission); + m_openElements.push(attach(currentElement(), element.release())); + m_tokenizer->setState(HTMLTokenizer::ScriptDataState); + m_originalInsertionMode = m_insertionMode; + m_insertionMode = TextMode; +} + +void HTMLTreeBuilder::insertTextNode(AtomicHTMLToken& token) +{ + attach(currentElement(), Text::create(m_document, token.characters())); +} + +PassRefPtr<Element> HTMLTreeBuilder::createElement(AtomicHTMLToken& token) +{ + RefPtr<Element> element = HTMLElementFactory::createHTMLElement(QualifiedName(nullAtom, token.name(), xhtmlNamespaceURI), m_document, 0); + element->setAttributeMap(token.attributes(), m_fragmentScriptingPermission); + return element.release(); +} + +bool HTMLTreeBuilder::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const +{ + if (m_activeFormattingElements.isEmpty()) + return false; + unsigned index = m_activeFormattingElements.size(); + do { + --index; + const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements[index]; + if (entry.isMarker() || m_openElements.contains(entry.element())) { + firstUnopenElementIndex = index; + return true; + } + } while (index); + return false; +} + +void HTMLTreeBuilder::reconstructTheActiveFormattingElements() +{ + unsigned firstUnopenElementIndex; + if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex)) + return; + + unsigned unopenEntryIndex = firstUnopenElementIndex; + ASSERT(unopenEntryIndex < m_activeFormattingElements.size()); + for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) { + HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements[unopenEntryIndex]; + // FIXME: We're supposed to save the original token in the entry. + AtomicHTMLToken fakeToken(HTMLToken::StartTag, unopenedEntry.element()->localName()); + insertElement(fakeToken); + unopenedEntry.replaceElement(currentElement()); + } +} + +namespace { + +bool hasImpliedEndTag(Element* element) +{ + return element->hasTagName(ddTag) + || element->hasTagName(dtTag) + || element->hasTagName(liTag) + || element->hasTagName(optionTag) + || element->hasTagName(optgroupTag) + || element->hasTagName(pTag) + || element->hasTagName(rpTag) + || element->hasTagName(rtTag); +} + +} + +void HTMLTreeBuilder::generateImpliedEndTagsWithExclusion(const AtomicString& tagName) +{ + while (hasImpliedEndTag(currentElement()) && !currentElement()->hasLocalName(tagName)) + m_openElements.pop(); +} + +void HTMLTreeBuilder::generateImpliedEndTags() +{ + while (hasImpliedEndTag(currentElement())) + m_openElements.pop(); } void HTMLTreeBuilder::finished() { // We should call m_document->finishedParsing() here, except // m_legacyTreeBuilder->finished() does it for us. - m_legacyTreeBuilder->finished(); + if (m_legacyTreeBuilder) { + m_legacyTreeBuilder->finished(); + return; + } + + AtomicHTMLToken eofToken(HTMLToken::EndOfFile, nullAtom); + processToken(eofToken); + + // Warning, this may delete the parser, so don't try to do anything else after this. + if (!m_isParsingFragment) + m_document->finishedParsing(); } bool HTMLTreeBuilder::isScriptingFlagEnabled(Frame* frame) |