/* * Copyright (C) 2010 Google, Inc. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "config.h" #include "HTMLTreeBuilder.h" #include "Comment.h" #include "DocumentFragment.h" #include "DocumentType.h" #include "Element.h" #include "Frame.h" #include "HTMLDocument.h" #include "HTMLElementFactory.h" #include "HTMLFormElement.h" #include "HTMLHtmlElement.h" #include "HTMLNames.h" #include "HTMLScriptElement.h" #include "HTMLToken.h" #include "HTMLTokenizer.h" #include "LocalizedStrings.h" #include "MathMLNames.h" #include "NotImplemented.h" #include "SVGNames.h" #include "ScriptController.h" #include "Settings.h" #include "Text.h" #include "XLinkNames.h" #include "XMLNSNames.h" #include "XMLNames.h" // FIXME: Remove this include once we find a home for the free functions that // are using it. #include #include namespace WebCore { using namespace HTMLNames; static const int uninitializedLineNumberValue = -1; namespace { inline bool isTreeBuilderWhitepace(UChar c) { // FIXME: Consider branch permutations. return c == '\t' || c == '\x0A' || c == '\x0C' || c == '\x0D' || c == ' '; } inline bool isNotTreeBuilderWhitepace(UChar c) { return !isTreeBuilderWhitepace(c); } inline bool isTreeBuilderWhitepaceOrReplacementCharacter(UChar c) { return isTreeBuilderWhitepace(c) || c == 0xFFFD; } template inline bool isAllSpecialCharacters(const String& string) { const UChar* characters = string.characters(); const unsigned length = string.length(); for (unsigned i = 0; i < length; ++i) { if (!isSpecialCharacter(characters[i])) return false; } return true; } inline bool isAllWhitespace(const String& string) { return isAllSpecialCharacters(string); } inline bool isAllWhitespaceOrReplacementCharacters(const String& string) { return isAllSpecialCharacters(string); } bool isNumberedHeaderTag(const AtomicString& tagName) { return tagName == h1Tag || tagName == h2Tag || tagName == h3Tag || tagName == h4Tag || tagName == h5Tag || tagName == h6Tag; } bool isCaptionColOrColgroupTag(const AtomicString& tagName) { return tagName == captionTag || tagName == colTag || tagName == colgroupTag; } bool isTableCellContextTag(const AtomicString& tagName) { return tagName == thTag || tagName == tdTag; } bool isTableBodyContextTag(const AtomicString& tagName) { return tagName == tbodyTag || tagName == tfootTag || tagName == theadTag; } // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special bool isSpecialNode(Node* node) { if (node->namespaceURI() != xhtmlNamespaceURI) return false; // FIXME: This list is out of sync with the spec. const AtomicString& tagName = node->localName(); return tagName == addressTag || tagName == articleTag || tagName == asideTag || tagName == baseTag || tagName == basefontTag || tagName == bgsoundTag || tagName == blockquoteTag || tagName == bodyTag || tagName == brTag || tagName == buttonTag || tagName == centerTag || tagName == colTag || tagName == colgroupTag || tagName == commandTag || tagName == ddTag || tagName == detailsTag || tagName == dirTag || tagName == divTag || tagName == dlTag || tagName == dtTag || tagName == embedTag || tagName == fieldsetTag || tagName == figureTag || tagName == footerTag || tagName == formTag || tagName == frameTag || tagName == framesetTag || isNumberedHeaderTag(tagName) || tagName == headTag || tagName == headerTag || tagName == hgroupTag || tagName == hrTag || tagName == htmlTag || tagName == iframeTag || tagName == imgTag || tagName == inputTag || tagName == isindexTag || tagName == liTag || tagName == linkTag || tagName == listingTag || tagName == menuTag || tagName == metaTag || tagName == navTag || tagName == noembedTag || tagName == noframesTag || tagName == noscriptTag || tagName == olTag || tagName == pTag || tagName == paramTag || tagName == plaintextTag || tagName == preTag || tagName == scriptTag || tagName == sectionTag || tagName == selectTag || tagName == styleTag || isTableBodyContextTag(tagName) || tagName == textareaTag || tagName == titleTag || tagName == trTag || tagName == ulTag || tagName == wbrTag || tagName == xmpTag; } bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName) { return tagName == bTag || tagName == bigTag || tagName == codeTag || tagName == emTag || tagName == fontTag || tagName == iTag || tagName == sTag || tagName == smallTag || tagName == strikeTag || tagName == strongTag || tagName == ttTag || tagName == uTag; } bool isNonAnchorFormattingTag(const AtomicString& tagName) { return tagName == nobrTag || isNonAnchorNonNobrFormattingTag(tagName); } // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting bool isFormattingTag(const AtomicString& tagName) { return tagName == aTag || isNonAnchorFormattingTag(tagName); } HTMLFormElement* closestFormAncestor(Element* element) { while (element) { if (element->hasTagName(formTag)) return static_cast(element); Node* parent = element->parent(); if (!parent || !parent->isElementNode()) return 0; element = static_cast(parent); } return 0; } } // namespace class HTMLTreeBuilder::ExternalCharacterTokenBuffer : public Noncopyable { public: explicit ExternalCharacterTokenBuffer(AtomicHTMLToken& token) : m_current(token.characters().data()) , m_end(m_current + token.characters().size()) { ASSERT(!isEmpty()); } explicit ExternalCharacterTokenBuffer(const String& string) : m_current(string.characters()) , m_end(m_current + string.length()) { ASSERT(!isEmpty()); } ~ExternalCharacterTokenBuffer() { ASSERT(isEmpty()); } bool isEmpty() const { return m_current == m_end; } void skipLeadingWhitespace() { skipLeading(); } String takeLeadingWhitespace() { return takeLeading(); } String takeLeadingNonWhitespace() { return takeLeading(); } String takeRemaining() { ASSERT(!isEmpty()); const UChar* start = m_current; m_current = m_end; return String(start, m_current - start); } void giveRemainingTo(Vector& recipient) { recipient.append(m_current, m_end - m_current); m_current = m_end; } String takeRemainingWhitespace() { ASSERT(!isEmpty()); Vector whitespace; do { UChar cc = *m_current++; if (isTreeBuilderWhitepace(cc)) whitespace.append(cc); } while (m_current < m_end); // Returning the null string when there aren't any whitespace // characters is slightly cleaner semantically because we don't want // to insert a text node (as opposed to inserting an empty text node). if (whitespace.isEmpty()) return String(); return String::adopt(whitespace); } private: template void skipLeading() { ASSERT(!isEmpty()); while (characterPredicate(*m_current)) { if (++m_current == m_end) return; } } template String takeLeading() { ASSERT(!isEmpty()); const UChar* start = m_current; skipLeading(); if (start == m_current) return String(); return String(start, m_current - start); } const UChar* m_current; const UChar* m_end; }; HTMLTreeBuilder::HTMLTreeBuilder(HTMLTokenizer* tokenizer, HTMLDocument* document, bool reportErrors) : m_framesetOk(true) , m_document(document) , m_tree(document, FragmentScriptingAllowed, false) , m_reportErrors(reportErrors) , m_isPaused(false) , m_insertionMode(InitialMode) , m_originalInsertionMode(InitialMode) , m_secondaryInsertionMode(InitialMode) , m_tokenizer(tokenizer) , m_scriptToProcessStartLine(uninitializedLineNumberValue) , m_lastScriptElementStartLine(uninitializedLineNumberValue) { } // FIXME: Member variables should be grouped into self-initializing structs to // minimize code duplication between these constructors. HTMLTreeBuilder::HTMLTreeBuilder(HTMLTokenizer* tokenizer, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission) : m_framesetOk(true) , m_fragmentContext(fragment, contextElement, scriptingPermission) , m_document(m_fragmentContext.document()) , m_tree(m_document, scriptingPermission, true) , m_reportErrors(false) // FIXME: Why not report errors in fragments? , m_isPaused(false) , m_insertionMode(InitialMode) , m_originalInsertionMode(InitialMode) , m_secondaryInsertionMode(InitialMode) , m_tokenizer(tokenizer) , m_scriptToProcessStartLine(uninitializedLineNumberValue) , m_lastScriptElementStartLine(uninitializedLineNumberValue) { if (contextElement) { // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm: // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case m_document->setCompatibilityMode(contextElement->document()->compatibilityMode()); processFakeStartTag(htmlTag); resetInsertionModeAppropriately(); m_tree.setForm(closestFormAncestor(contextElement)); } } HTMLTreeBuilder::~HTMLTreeBuilder() { } void HTMLTreeBuilder::detach() { // This call makes little sense in fragment mode, but for consistency // DocumentParser expects detach() to always be called before it's destroyed. m_document = 0; // HTMLConstructionSite might be on the callstack when detach() is called // otherwise we'd just call m_tree.clear() here instead. m_tree.detach(); } HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext() : m_fragment(0) , m_contextElement(0) , m_scriptingPermission(FragmentScriptingAllowed) { } HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission) : m_dummyDocumentForFragmentParsing(HTMLDocument::create(0, KURL())) , m_fragment(fragment) , m_contextElement(contextElement) , m_scriptingPermission(scriptingPermission) { m_dummyDocumentForFragmentParsing->setCompatibilityMode(fragment->document()->compatibilityMode()); } Document* HTMLTreeBuilder::FragmentParsingContext::document() const { ASSERT(m_fragment); return m_dummyDocumentForFragmentParsing.get(); } void HTMLTreeBuilder::FragmentParsingContext::finished() { // Populate the DocumentFragment with the parsed content now that we're done. ContainerNode* root = m_dummyDocumentForFragmentParsing.get(); if (m_contextElement) root = m_dummyDocumentForFragmentParsing->documentElement(); m_fragment->takeAllChildrenFrom(root); } HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext() { } PassRefPtr HTMLTreeBuilder::takeScriptToProcess(int& scriptStartLine) { // Unpause ourselves, callers may pause us again when processing the script. // The HTML5 spec is written as though scripts are executed inside the tree // builder. We pause the parser to exit the tree builder, and then resume // before running scripts. m_isPaused = false; scriptStartLine = m_scriptToProcessStartLine; m_scriptToProcessStartLine = uninitializedLineNumberValue; return m_scriptToProcess.release(); } HTMLTokenizer::State HTMLTreeBuilder::adjustedLexerState(HTMLTokenizer::State state, const AtomicString& tagName, Frame* frame) { if (tagName == textareaTag || tagName == titleTag) return HTMLTokenizer::RCDATAState; if (tagName == styleTag || tagName == iframeTag || tagName == xmpTag || (tagName == noembedTag && pluginsEnabled(frame)) || tagName == noframesTag || (tagName == noscriptTag && scriptEnabled(frame))) return HTMLTokenizer::RAWTEXTState; if (tagName == plaintextTag) return HTMLTokenizer::PLAINTEXTState; return state; } void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken) { AtomicHTMLToken token(rawToken); processToken(token); // Swallowing U+0000 characters isn't in the HTML5 spec, but turning all // the U+0000 characters into replacement characters has compatibility // problems. m_tokenizer->setForceNullCharacterReplacement(m_insertionMode == TextMode || m_insertionMode == InForeignContentMode); m_tokenizer->setShouldAllowCDATA(m_insertionMode == InForeignContentMode && m_tree.currentElement()->namespaceURI() != xhtmlNamespaceURI); } void HTMLTreeBuilder::processToken(AtomicHTMLToken& token) { switch (token.type()) { case HTMLToken::Uninitialized: ASSERT_NOT_REACHED(); break; case HTMLToken::DOCTYPE: processDoctypeToken(token); break; case HTMLToken::StartTag: processStartTag(token); break; case HTMLToken::EndTag: processEndTag(token); break; case HTMLToken::Comment: processComment(token); return; case HTMLToken::Character: processCharacter(token); break; case HTMLToken::EndOfFile: processEndOfFile(token); break; } } void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken& token) { ASSERT(token.type() == HTMLToken::DOCTYPE); if (m_insertionMode == InitialMode) { m_tree.insertDoctype(token); setInsertionMode(BeforeHTMLMode); return; } if (m_insertionMode == InTableTextMode) { defaultForInTableText(); processDoctypeToken(token); return; } parseError(token); } void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, PassRefPtr attributes) { // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags. AtomicHTMLToken fakeToken(HTMLToken::StartTag, tagName.localName(), attributes); processStartTag(fakeToken); } void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName) { // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags. AtomicHTMLToken fakeToken(HTMLToken::EndTag, tagName.localName()); processEndTag(fakeToken); } void HTMLTreeBuilder::processFakeCharacters(const String& characters) { ASSERT(!characters.isEmpty()); ExternalCharacterTokenBuffer buffer(characters); processCharacterBuffer(buffer); } void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope() { if (!m_tree.openElements()->inButtonScope(pTag.localName())) return; AtomicHTMLToken endP(HTMLToken::EndTag, pTag.localName()); processEndTag(endP); } PassRefPtr HTMLTreeBuilder::attributesForIsindexInput(AtomicHTMLToken& token) { RefPtr attributes = token.takeAtributes(); if (!attributes) attributes = NamedNodeMap::create(); else { attributes->removeAttribute(nameAttr); attributes->removeAttribute(actionAttr); attributes->removeAttribute(promptAttr); } RefPtr mappedAttribute = Attribute::createMapped(nameAttr, isindexTag.localName()); attributes->insertAttribute(mappedAttribute.release(), false); return attributes.release(); } void HTMLTreeBuilder::processIsindexStartTagForInBody(AtomicHTMLToken& token) { ASSERT(token.type() == HTMLToken::StartTag); ASSERT(token.name() == isindexTag); parseError(token); if (m_tree.form()) return; notImplemented(); // Acknowledge self-closing flag processFakeStartTag(formTag); Attribute* actionAttribute = token.getAttributeItem(actionAttr); if (actionAttribute) { ASSERT(m_tree.currentElement()->hasTagName(formTag)); m_tree.currentElement()->setAttribute(actionAttr, actionAttribute->value()); } processFakeStartTag(hrTag); processFakeStartTag(labelTag); Attribute* promptAttribute = token.getAttributeItem(promptAttr); if (promptAttribute) processFakeCharacters(promptAttribute->value()); else processFakeCharacters(searchableIndexIntroduction()); processFakeStartTag(inputTag, attributesForIsindexInput(token)); notImplemented(); // This second set of characters may be needed by non-english locales. processFakeEndTag(labelTag); processFakeStartTag(hrTag); processFakeEndTag(formTag); } namespace { bool isLi(const Element* element) { return element->hasTagName(liTag); } bool isDdOrDt(const Element* element) { return element->hasTagName(ddTag) || element->hasTagName(dtTag); } } template void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken& token) { m_framesetOk = false; HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord(); while (1) { Element* node = nodeRecord->element(); if (shouldClose(node)) { processFakeEndTag(node->tagQName()); break; } if (isSpecialNode(node) && !node->hasTagName(addressTag) && !node->hasTagName(divTag) && !node->hasTagName(pTag)) break; nodeRecord = nodeRecord->next(); } processFakePEndTagIfPInButtonScope(); m_tree.insertHTMLElement(token); } namespace { typedef HashMap PrefixedNameToQualifiedNameMap; void mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap* map, QualifiedName** names, size_t length) { for (size_t i = 0; i < length; ++i) { const QualifiedName& name = *names[i]; const AtomicString& localName = name.localName(); AtomicString loweredLocalName = localName.lower(); if (loweredLocalName != localName) map->add(loweredLocalName, name); } } void adjustSVGTagNameCase(AtomicHTMLToken& token) { static PrefixedNameToQualifiedNameMap* caseMap = 0; if (!caseMap) { caseMap = new PrefixedNameToQualifiedNameMap; size_t length = 0; QualifiedName** svgTags = SVGNames::getSVGTags(&length); mapLoweredLocalNameToName(caseMap, svgTags, length); } const QualifiedName& casedName = caseMap->get(token.name()); if (casedName.localName().isNull()) return; token.setName(casedName.localName()); } template void adjustAttributes(AtomicHTMLToken& token) { static PrefixedNameToQualifiedNameMap* caseMap = 0; if (!caseMap) { caseMap = new PrefixedNameToQualifiedNameMap; size_t length = 0; QualifiedName** attrs = getAttrs(&length); mapLoweredLocalNameToName(caseMap, attrs, length); } NamedNodeMap* attributes = token.attributes(); if (!attributes) return; for (unsigned x = 0; x < attributes->length(); ++x) { Attribute* attribute = attributes->attributeItem(x); const QualifiedName& casedName = caseMap->get(attribute->localName()); if (!casedName.localName().isNull()) attribute->parserSetName(casedName); } } void adjustSVGAttributes(AtomicHTMLToken& token) { adjustAttributes(token); } void adjustMathMLAttributes(AtomicHTMLToken& token) { adjustAttributes(token); } void addNamesWithPrefix(PrefixedNameToQualifiedNameMap* map, const AtomicString& prefix, QualifiedName** names, size_t length) { for (size_t i = 0; i < length; ++i) { QualifiedName* name = names[i]; const AtomicString& localName = name->localName(); AtomicString prefixColonLocalName(prefix + ":" + localName); QualifiedName nameWithPrefix(prefix, localName, name->namespaceURI()); map->add(prefixColonLocalName, nameWithPrefix); } } void adjustForeignAttributes(AtomicHTMLToken& token) { static PrefixedNameToQualifiedNameMap* map = 0; if (!map) { map = new PrefixedNameToQualifiedNameMap; size_t length = 0; QualifiedName** attrs = XLinkNames::getXLinkAttrs(&length); addNamesWithPrefix(map, "xlink", attrs, length); attrs = XMLNames::getXMLAttrs(&length); addNamesWithPrefix(map, "xml", attrs, length); map->add("xmlns", XMLNSNames::xmlnsAttr); map->add("xmlns:xlink", QualifiedName("xmlns", "xlink", XMLNSNames::xmlnsNamespaceURI)); } NamedNodeMap* attributes = token.attributes(); if (!attributes) return; for (unsigned x = 0; x < attributes->length(); ++x) { Attribute* attribute = attributes->attributeItem(x); const QualifiedName& name = map->get(attribute->localName()); if (!name.localName().isNull()) attribute->parserSetName(name); } } } void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken& token) { ASSERT(token.type() == HTMLToken::StartTag); if (token.name() == htmlTag) { m_tree.insertHTMLHtmlStartTagInBody(token); return; } if (token.name() == baseTag || token.name() == basefontTag || token.name() == bgsoundTag || token.name() == commandTag || token.name() == linkTag || token.name() == metaTag || token.name() == noframesTag || token.name() == scriptTag || token.name() == styleTag || token.name() == titleTag) { bool didProcess = processStartTagForInHead(token); ASSERT_UNUSED(didProcess, didProcess); return; } if (token.name() == bodyTag) { if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) { ASSERT(isParsingFragment()); return; } m_tree.insertHTMLBodyStartTagInBody(token); return; } if (token.name() == framesetTag) { parseError(token); if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) { ASSERT(isParsingFragment()); return; } if (!m_framesetOk) return; ExceptionCode ec = 0; m_tree.openElements()->bodyElement()->remove(ec); ASSERT(!ec); m_tree.openElements()->popUntil(m_tree.openElements()->bodyElement()); m_tree.openElements()->popHTMLBodyElement(); ASSERT(m_tree.openElements()->top() == m_tree.openElements()->htmlElement()); m_tree.insertHTMLElement(token); setInsertionMode(InFramesetMode); return; } if (token.name() == addressTag || token.name() == articleTag || token.name() == asideTag || token.name() == blockquoteTag || token.name() == centerTag || token.name() == detailsTag || token.name() == dirTag || token.name() == divTag || token.name() == dlTag || token.name() == fieldsetTag || token.name() == figcaptionTag || token.name() == figureTag || token.name() == footerTag || token.name() == headerTag || token.name() == hgroupTag || token.name() == menuTag || token.name() == navTag || token.name() == olTag || token.name() == pTag || token.name() == sectionTag || token.name() == summaryTag || token.name() == ulTag) { processFakePEndTagIfPInButtonScope(); m_tree.insertHTMLElement(token); return; } if (isNumberedHeaderTag(token.name())) { processFakePEndTagIfPInButtonScope(); if (isNumberedHeaderTag(m_tree.currentElement()->localName())) { parseError(token); m_tree.openElements()->pop(); } m_tree.insertHTMLElement(token); return; } if (token.name() == preTag || token.name() == listingTag) { processFakePEndTagIfPInButtonScope(); m_tree.insertHTMLElement(token); m_tokenizer->setSkipLeadingNewLineForListing(true); m_framesetOk = false; return; } if (token.name() == formTag) { if (m_tree.form()) { parseError(token); return; } processFakePEndTagIfPInButtonScope(); m_tree.insertHTMLFormElement(token); return; } if (token.name() == liTag) { processCloseWhenNestedTag(token); return; } if (token.name() == ddTag || token.name() == dtTag) { processCloseWhenNestedTag(token); return; } if (token.name() == plaintextTag) { processFakePEndTagIfPInButtonScope(); m_tree.insertHTMLElement(token); m_tokenizer->setState(HTMLTokenizer::PLAINTEXTState); return; } if (token.name() == buttonTag) { if (m_tree.openElements()->inScope(buttonTag)) { parseError(token); processFakeEndTag(buttonTag); processStartTag(token); // FIXME: Could we just fall through here? return; } m_tree.reconstructTheActiveFormattingElements(); m_tree.insertHTMLElement(token); m_framesetOk = false; return; } if (token.name() == aTag) { Element* activeATag = m_tree.activeFormattingElements()->closestElementInScopeWithName(aTag.localName()); if (activeATag) { parseError(token); processFakeEndTag(aTag); m_tree.activeFormattingElements()->remove(activeATag); if (m_tree.openElements()->contains(activeATag)) m_tree.openElements()->remove(activeATag); } m_tree.reconstructTheActiveFormattingElements(); m_tree.insertFormattingElement(token); return; } if (isNonAnchorNonNobrFormattingTag(token.name())) { m_tree.reconstructTheActiveFormattingElements(); m_tree.insertFormattingElement(token); return; } if (token.name() == nobrTag) { m_tree.reconstructTheActiveFormattingElements(); if (m_tree.openElements()->inScope(nobrTag)) { parseError(token); processFakeEndTag(nobrTag); m_tree.reconstructTheActiveFormattingElements(); } m_tree.insertFormattingElement(token); return; } if (token.name() == appletTag || token.name() == marqueeTag || token.name() == objectTag) { m_tree.reconstructTheActiveFormattingElements(); m_tree.insertHTMLElement(token); m_tree.activeFormattingElements()->appendMarker(); m_framesetOk = false; return; } if (token.name() == tableTag) { if (!m_document->inQuirksMode() && m_tree.openElements()->inButtonScope(pTag)) processFakeEndTag(pTag); m_tree.insertHTMLElement(token); m_framesetOk = false; setInsertionMode(InTableMode); return; } if (token.name() == imageTag) { parseError(token); // Apparently we're not supposed to ask. token.setName(imgTag.localName()); // Note the fall through to the imgTag handling below! } if (token.name() == areaTag || token.name() == brTag || token.name() == embedTag || token.name() == imgTag || token.name() == inputTag || token.name() == keygenTag || token.name() == wbrTag) { m_tree.reconstructTheActiveFormattingElements(); m_tree.insertSelfClosingHTMLElement(token); m_framesetOk = false; return; } if (token.name() == paramTag || token.name() == sourceTag || token.name() == trackTag) { m_tree.insertSelfClosingHTMLElement(token); return; } if (token.name() == hrTag) { processFakePEndTagIfPInButtonScope(); m_tree.insertSelfClosingHTMLElement(token); m_framesetOk = false; return; } if (token.name() == isindexTag) { processIsindexStartTagForInBody(token); return; } if (token.name() == textareaTag) { m_tree.insertHTMLElement(token); m_tokenizer->setSkipLeadingNewLineForListing(true); m_tokenizer->setState(HTMLTokenizer::RCDATAState); m_originalInsertionMode = m_insertionMode; m_framesetOk = false; setInsertionMode(TextMode); return; } if (token.name() == xmpTag) { processFakePEndTagIfPInButtonScope(); m_tree.reconstructTheActiveFormattingElements(); m_framesetOk = false; processGenericRawTextStartTag(token); return; } if (token.name() == iframeTag) { m_framesetOk = false; processGenericRawTextStartTag(token); return; } if (token.name() == noembedTag && pluginsEnabled(m_document->frame())) { processGenericRawTextStartTag(token); return; } if (token.name() == noscriptTag && scriptEnabled(m_document->frame())) { processGenericRawTextStartTag(token); return; } if (token.name() == selectTag) { m_tree.reconstructTheActiveFormattingElements(); m_tree.insertHTMLElement(token); m_framesetOk = false; if (m_insertionMode == InTableMode || m_insertionMode == InCaptionMode || m_insertionMode == InColumnGroupMode || m_insertionMode == InTableBodyMode || m_insertionMode == InRowMode || m_insertionMode == InCellMode) setInsertionMode(InSelectInTableMode); else setInsertionMode(InSelectMode); return; } if (token.name() == optgroupTag || token.name() == optionTag) { if (m_tree.openElements()->inScope(optionTag.localName())) { AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName()); processEndTag(endOption); } m_tree.reconstructTheActiveFormattingElements(); m_tree.insertHTMLElement(token); return; } if (token.name() == rpTag || token.name() == rtTag) { if (m_tree.openElements()->inScope(rubyTag.localName())) { m_tree.generateImpliedEndTags(); if (!m_tree.currentElement()->hasTagName(rubyTag)) { parseError(token); m_tree.openElements()->popUntil(rubyTag.localName()); } } m_tree.insertHTMLElement(token); return; } if (token.name() == MathMLNames::mathTag.localName()) { m_tree.reconstructTheActiveFormattingElements(); adjustMathMLAttributes(token); adjustForeignAttributes(token); m_tree.insertForeignElement(token, MathMLNames::mathmlNamespaceURI); if (m_insertionMode != InForeignContentMode) { setSecondaryInsertionMode(m_insertionMode); setInsertionMode(InForeignContentMode); } return; } if (token.name() == SVGNames::svgTag.localName()) { m_tree.reconstructTheActiveFormattingElements(); adjustSVGAttributes(token); adjustForeignAttributes(token); m_tree.insertForeignElement(token, SVGNames::svgNamespaceURI); if (m_insertionMode != InForeignContentMode) { setSecondaryInsertionMode(m_insertionMode); setInsertionMode(InForeignContentMode); } return; } if (isCaptionColOrColgroupTag(token.name()) || token.name() == frameTag || token.name() == headTag || isTableBodyContextTag(token.name()) || isTableCellContextTag(token.name()) || token.name() == trTag) { parseError(token); return; } m_tree.reconstructTheActiveFormattingElements(); m_tree.insertHTMLElement(token); } bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup() { if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) { ASSERT(isParsingFragment()); // FIXME: parse error return false; } m_tree.openElements()->pop(); setInsertionMode(InTableMode); return true; } // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell void HTMLTreeBuilder::closeTheCell() { ASSERT(insertionMode() == InCellMode); if (m_tree.openElements()->inTableScope(tdTag)) { ASSERT(!m_tree.openElements()->inTableScope(thTag)); processFakeEndTag(tdTag); return; } ASSERT(m_tree.openElements()->inTableScope(thTag)); processFakeEndTag(thTag); ASSERT(insertionMode() == InRowMode); } void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken& token) { ASSERT(token.type() == HTMLToken::StartTag); if (token.name() == captionTag) { m_tree.openElements()->popUntilTableScopeMarker(); m_tree.activeFormattingElements()->appendMarker(); m_tree.insertHTMLElement(token); setInsertionMode(InCaptionMode); return; } if (token.name() == colgroupTag) { m_tree.openElements()->popUntilTableScopeMarker(); m_tree.insertHTMLElement(token); setInsertionMode(InColumnGroupMode); return; } if (token.name() == colTag) { processFakeStartTag(colgroupTag); ASSERT(InColumnGroupMode); processStartTag(token); return; } if (isTableBodyContextTag(token.name())) { m_tree.openElements()->popUntilTableScopeMarker(); m_tree.insertHTMLElement(token); setInsertionMode(InTableBodyMode); return; } if (isTableCellContextTag(token.name()) || token.name() == trTag) { processFakeStartTag(tbodyTag); ASSERT(insertionMode() == InTableBodyMode); processStartTag(token); return; } if (token.name() == tableTag) { parseError(token); if (!processTableEndTagForInTable()) { ASSERT(isParsingFragment()); return; } processStartTag(token); return; } if (token.name() == styleTag || token.name() == scriptTag) { processStartTagForInHead(token); return; } if (token.name() == inputTag) { Attribute* typeAttribute = token.getAttributeItem(typeAttr); if (typeAttribute && equalIgnoringCase(typeAttribute->value(), "hidden")) { parseError(token); m_tree.insertSelfClosingHTMLElement(token); return; } // Fall through to "anything else" case. } if (token.name() == formTag) { parseError(token); if (m_tree.form()) return; // FIXME: This deviates from the spec: // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10216 m_tree.insertHTMLFormElement(token, true); m_tree.openElements()->pop(); return; } parseError(token); HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree); processStartTagForInBody(token); } namespace { bool shouldProcessUsingSecondaryInsertionMode(AtomicHTMLToken& token, Element* currentElement) { ASSERT(token.type() == HTMLToken::StartTag); if (currentElement->hasTagName(MathMLNames::miTag) || currentElement->hasTagName(MathMLNames::moTag) || currentElement->hasTagName(MathMLNames::mnTag) || currentElement->hasTagName(MathMLNames::msTag) || currentElement->hasTagName(MathMLNames::mtextTag)) { return token.name() != MathMLNames::mglyphTag && token.name() != MathMLNames::malignmarkTag; } if (currentElement->hasTagName(MathMLNames::annotation_xmlTag)) return token.name() == SVGNames::svgTag; if (currentElement->hasTagName(SVGNames::foreignObjectTag) || currentElement->hasTagName(SVGNames::descTag) || currentElement->hasTagName(SVGNames::titleTag)) return true; return currentElement->namespaceURI() == HTMLNames::xhtmlNamespaceURI; } } void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token) { ASSERT(token.type() == HTMLToken::StartTag); switch (insertionMode()) { case InitialMode: ASSERT(insertionMode() == InitialMode); defaultForInitial(); // Fall through. case BeforeHTMLMode: ASSERT(insertionMode() == BeforeHTMLMode); if (token.name() == htmlTag) { m_tree.insertHTMLHtmlStartTagBeforeHTML(token); setInsertionMode(BeforeHeadMode); return; } defaultForBeforeHTML(); // Fall through. case BeforeHeadMode: ASSERT(insertionMode() == BeforeHeadMode); if (token.name() == htmlTag) { m_tree.insertHTMLHtmlStartTagInBody(token); return; } if (token.name() == headTag) { m_tree.insertHTMLHeadElement(token); setInsertionMode(InHeadMode); return; } defaultForBeforeHead(); // Fall through. case InHeadMode: ASSERT(insertionMode() == InHeadMode); if (processStartTagForInHead(token)) return; defaultForInHead(); // Fall through. case AfterHeadMode: ASSERT(insertionMode() == AfterHeadMode); if (token.name() == htmlTag) { m_tree.insertHTMLHtmlStartTagInBody(token); return; } if (token.name() == bodyTag) { m_framesetOk = false; m_tree.insertHTMLBodyElement(token); setInsertionMode(InBodyMode); return; } if (token.name() == framesetTag) { m_tree.insertHTMLElement(token); setInsertionMode(InFramesetMode); return; } if (token.name() == baseTag || token.name() == basefontTag || token.name() == bgsoundTag || token.name() == linkTag || token.name() == metaTag || token.name() == noframesTag || token.name() == scriptTag || token.name() == styleTag || token.name() == titleTag) { parseError(token); ASSERT(m_tree.head()); m_tree.openElements()->pushHTMLHeadElement(m_tree.head()); processStartTagForInHead(token); m_tree.openElements()->removeHTMLHeadElement(m_tree.head()); return; } if (token.name() == headTag) { parseError(token); return; } defaultForAfterHead(); // Fall through case InBodyMode: ASSERT(insertionMode() == InBodyMode); processStartTagForInBody(token); break; case InTableMode: ASSERT(insertionMode() == InTableMode); processStartTagForInTable(token); break; case InCaptionMode: ASSERT(insertionMode() == InCaptionMode); if (isCaptionColOrColgroupTag(token.name()) || isTableBodyContextTag(token.name()) || isTableCellContextTag(token.name()) || token.name() == trTag) { parseError(token); if (!processCaptionEndTagForInCaption()) { ASSERT(isParsingFragment()); return; } processStartTag(token); return; } processStartTagForInBody(token); break; case InColumnGroupMode: ASSERT(insertionMode() == InColumnGroupMode); if (token.name() == htmlTag) { m_tree.insertHTMLHtmlStartTagInBody(token); return; } if (token.name() == colTag) { m_tree.insertSelfClosingHTMLElement(token); return; } if (!processColgroupEndTagForInColumnGroup()) { ASSERT(isParsingFragment()); return; } processStartTag(token); break; case InTableBodyMode: ASSERT(insertionMode() == InTableBodyMode); if (token.name() == trTag) { m_tree.openElements()->popUntilTableBodyScopeMarker(); // How is there ever anything to pop? m_tree.insertHTMLElement(token); setInsertionMode(InRowMode); return; } if (isTableCellContextTag(token.name())) { parseError(token); processFakeStartTag(trTag); ASSERT(insertionMode() == InRowMode); processStartTag(token); return; } if (isCaptionColOrColgroupTag(token.name()) || isTableBodyContextTag(token.name())) { // FIXME: This is slow. if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) { ASSERT(isParsingFragment()); parseError(token); return; } m_tree.openElements()->popUntilTableBodyScopeMarker(); ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName())); processFakeEndTag(m_tree.currentElement()->tagQName()); processStartTag(token); return; } processStartTagForInTable(token); break; case InRowMode: ASSERT(insertionMode() == InRowMode); if (isTableCellContextTag(token.name())) { m_tree.openElements()->popUntilTableRowScopeMarker(); m_tree.insertHTMLElement(token); setInsertionMode(InCellMode); m_tree.activeFormattingElements()->appendMarker(); return; } if (token.name() == trTag || isCaptionColOrColgroupTag(token.name()) || isTableBodyContextTag(token.name())) { if (!processTrEndTagForInRow()) { ASSERT(isParsingFragment()); return; } ASSERT(insertionMode() == InTableBodyMode); processStartTag(token); return; } processStartTagForInTable(token); break; case InCellMode: ASSERT(insertionMode() == InCellMode); if (isCaptionColOrColgroupTag(token.name()) || isTableCellContextTag(token.name()) || token.name() == trTag || isTableBodyContextTag(token.name())) { // FIXME: This could be more efficient. if (!m_tree.openElements()->inTableScope(tdTag) && !m_tree.openElements()->inTableScope(thTag)) { ASSERT(isParsingFragment()); parseError(token); return; } closeTheCell(); processStartTag(token); return; } processStartTagForInBody(token); break; case AfterBodyMode: case AfterAfterBodyMode: ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode); if (token.name() == htmlTag) { m_tree.insertHTMLHtmlStartTagInBody(token); return; } setInsertionMode(InBodyMode); processStartTag(token); break; case InHeadNoscriptMode: ASSERT(insertionMode() == InHeadNoscriptMode); if (token.name() == htmlTag) { m_tree.insertHTMLHtmlStartTagInBody(token); return; } if (token.name() == basefontTag || token.name() == bgsoundTag || token.name() == linkTag || token.name() == metaTag || token.name() == noframesTag || token.name() == styleTag) { bool didProcess = processStartTagForInHead(token); ASSERT_UNUSED(didProcess, didProcess); return; } if (token.name() == htmlTag || token.name() == noscriptTag) { parseError(token); return; } defaultForInHeadNoscript(); processToken(token); break; case InFramesetMode: ASSERT(insertionMode() == InFramesetMode); if (token.name() == htmlTag) { m_tree.insertHTMLHtmlStartTagInBody(token); return; } if (token.name() == framesetTag) { m_tree.insertHTMLElement(token); return; } if (token.name() == frameTag) { m_tree.insertSelfClosingHTMLElement(token); return; } if (token.name() == noframesTag) { processStartTagForInHead(token); return; } parseError(token); break; case AfterFramesetMode: case AfterAfterFramesetMode: ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode); if (token.name() == htmlTag) { m_tree.insertHTMLHtmlStartTagInBody(token); return; } if (token.name() == noframesTag) { processStartTagForInHead(token); return; } parseError(token); break; case InSelectInTableMode: ASSERT(insertionMode() == InSelectInTableMode); if (token.name() == captionTag || token.name() == tableTag || isTableBodyContextTag(token.name()) || token.name() == trTag || isTableCellContextTag(token.name())) { parseError(token); AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName()); processEndTag(endSelect); processStartTag(token); return; } // Fall through case InSelectMode: ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode); if (token.name() == htmlTag) { m_tree.insertHTMLHtmlStartTagInBody(token); return; } if (token.name() == optionTag) { if (m_tree.currentElement()->hasTagName(optionTag)) { AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName()); processEndTag(endOption); } m_tree.insertHTMLElement(token); return; } if (token.name() == optgroupTag) { if (m_tree.currentElement()->hasTagName(optionTag)) { AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName()); processEndTag(endOption); } if (m_tree.currentElement()->hasTagName(optgroupTag)) { AtomicHTMLToken endOptgroup(HTMLToken::EndTag, optgroupTag.localName()); processEndTag(endOptgroup); } m_tree.insertHTMLElement(token); return; } if (token.name() == selectTag) { parseError(token); AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName()); processEndTag(endSelect); return; } if (token.name() == inputTag || token.name() == keygenTag || token.name() == textareaTag) { parseError(token); if (!m_tree.openElements()->inTableScope(selectTag)) { ASSERT(isParsingFragment()); return; } AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName()); processEndTag(endSelect); processStartTag(token); return; } if (token.name() == scriptTag) { bool didProcess = processStartTagForInHead(token); ASSERT_UNUSED(didProcess, didProcess); return; } break; case InTableTextMode: defaultForInTableText(); processStartTag(token); break; case InForeignContentMode: { // FIXME: We're missing a bunch of if branches here. if (shouldProcessUsingSecondaryInsertionMode(token, m_tree.currentElement())) { processUsingSecondaryInsertionModeAndAdjustInsertionMode(token); return; } if (token.name() == bTag || token.name() == bigTag || token.name() == blockquoteTag || token.name() == bodyTag || token.name() == brTag || token.name() == centerTag || token.name() == codeTag || token.name() == ddTag || token.name() == divTag || token.name() == dlTag || token.name() == dtTag || token.name() == emTag || token.name() == embedTag || isNumberedHeaderTag(token.name()) || token.name() == headTag || token.name() == hrTag || token.name() == iTag || token.name() == imgTag || token.name() == liTag || token.name() == listingTag || token.name() == menuTag || token.name() == metaTag || token.name() == nobrTag || token.name() == olTag || token.name() == pTag || token.name() == preTag || token.name() == rubyTag || token.name() == sTag || token.name() == smallTag || token.name() == spanTag || token.name() == strongTag || token.name() == strikeTag || token.name() == subTag || token.name() == supTag || token.name() == tableTag || token.name() == ttTag || token.name() == uTag || token.name() == ulTag || token.name() == varTag || (token.name() == fontTag && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)))) { m_tree.openElements()->popUntilElementWithNamespace(xhtmlNamespaceURI); setInsertionMode(m_secondaryInsertionMode); processStartTag(token); return; } const AtomicString& currentNamespace = m_tree.currentElement()->namespaceURI(); if (currentNamespace == MathMLNames::mathmlNamespaceURI) adjustMathMLAttributes(token); if (currentNamespace == SVGNames::svgNamespaceURI) { adjustSVGTagNameCase(token); adjustSVGAttributes(token); } adjustForeignAttributes(token); m_tree.insertForeignElement(token, currentNamespace); break; } case TextMode: notImplemented(); break; } } bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken& token) { ASSERT(token.type() == HTMLToken::EndTag); ASSERT(token.name() == bodyTag); if (!m_tree.openElements()->inScope(bodyTag.localName())) { parseError(token); return false; } notImplemented(); // Emit a more specific parse error based on stack contents. setInsertionMode(AfterBodyMode); return true; } void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken& token) { ASSERT(token.type() == HTMLToken::EndTag); HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord(); while (1) { Element* node = record->element(); if (node->hasLocalName(token.name())) { m_tree.generateImpliedEndTags(); if (!m_tree.currentElement()->hasLocalName(token.name())) { parseError(token); // FIXME: This is either a bug in the spec, or a bug in our // implementation. Filed a bug with HTML5: // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10080 // We might have already popped the node for the token in // generateImpliedEndTags, just abort. if (!m_tree.openElements()->contains(node)) return; } m_tree.openElements()->popUntilPopped(node); return; } if (isSpecialNode(node)) { parseError(token); return; } record = record->next(); } } // FIXME: This probably belongs on HTMLElementStack. HTMLElementStack::ElementRecord* HTMLTreeBuilder::furthestBlockForFormattingElement(Element* formattingElement) { HTMLElementStack::ElementRecord* furthestBlock = 0; HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord(); for (; record; record = record->next()) { if (record->element() == formattingElement) return furthestBlock; if (isSpecialNode(record->element())) furthestBlock = record; } ASSERT_NOT_REACHED(); return 0; } // FIXME: This should have a whitty name. // FIXME: This must be implemented in many other places in WebCore. void HTMLTreeBuilder::reparentChildren(Element* oldParent, Element* newParent) { Node* child = oldParent->firstChild(); while (child) { Node* nextChild = child->nextSibling(); oldParent->parserRemoveChild(child); newParent->parserAddChild(child); if (newParent->attached() && !child->attached()) child->attach(); child = nextChild; } } // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token) { // The adoption agency algorithm is N^2. We limit the number of iterations // to stop from hanging the whole browser. This limit is copied from the // legacy tree builder and might need to be tweaked in the future. static const int adoptionAgencyIterationLimit = 10; for (int i = 0; i < adoptionAgencyIterationLimit; ++i) { // 1. Element* formattingElement = m_tree.activeFormattingElements()->closestElementInScopeWithName(token.name()); if (!formattingElement || ((m_tree.openElements()->contains(formattingElement)) && !m_tree.openElements()->inScope(formattingElement))) { parseError(token); notImplemented(); // Check the stack of open elements for a more specific parse error. return; } HTMLElementStack::ElementRecord* formattingElementRecord = m_tree.openElements()->find(formattingElement); if (!formattingElementRecord) { parseError(token); m_tree.activeFormattingElements()->remove(formattingElement); return; } if (formattingElement != m_tree.currentElement()) parseError(token); // 2. HTMLElementStack::ElementRecord* furthestBlock = furthestBlockForFormattingElement(formattingElement); // 3. if (!furthestBlock) { m_tree.openElements()->popUntilPopped(formattingElement); m_tree.activeFormattingElements()->remove(formattingElement); return; } // 4. ASSERT(furthestBlock->isAbove(formattingElementRecord)); Element* commonAncestor = formattingElementRecord->next()->element(); // 5. HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement); // 6. HTMLElementStack::ElementRecord* node = furthestBlock; HTMLElementStack::ElementRecord* nextNode = node->next(); HTMLElementStack::ElementRecord* lastNode = furthestBlock; for (int i = 0; i < adoptionAgencyIterationLimit; ++i) { // 6.1 node = nextNode; ASSERT(node); nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 6.2. // 6.2 if (!m_tree.activeFormattingElements()->contains(node->element())) { m_tree.openElements()->remove(node->element()); node = 0; continue; } // 6.3 if (node == formattingElementRecord) break; // 6.5 RefPtr newElement = m_tree.createHTMLElementFromElementRecord(node); HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements()->find(node->element()); nodeEntry->replaceElement(newElement.get()); node->replaceElement(newElement.release()); // 6.4 -- Intentionally out of order to handle the case where node // was replaced in 6.5. // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10096 if (lastNode == furthestBlock) bookmark.moveToAfter(nodeEntry); // 6.6 if (Element* parent = lastNode->element()->parentElement()) parent->parserRemoveChild(lastNode->element()); node->element()->parserAddChild(lastNode->element()); if (lastNode->element()->parentElement()->attached() && !lastNode->element()->attached()) lastNode->element()->lazyAttach(); // 6.7 lastNode = node; } // 7 const AtomicString& commonAncestorTag = commonAncestor->localName(); if (Element* parent = lastNode->element()->parentElement()) parent->parserRemoveChild(lastNode->element()); // FIXME: If this moves to HTMLConstructionSite, this check should use // causesFosterParenting(tagName) instead. if (commonAncestorTag == tableTag || commonAncestorTag == trTag || isTableBodyContextTag(commonAncestorTag)) m_tree.fosterParent(lastNode->element()); else { commonAncestor->parserAddChild(lastNode->element()); if (lastNode->element()->parentElement()->attached() && !lastNode->element()->attached()) lastNode->element()->lazyAttach(); } // 8 RefPtr newElement = m_tree.createHTMLElementFromElementRecord(formattingElementRecord); // 9 reparentChildren(furthestBlock->element(), newElement.get()); // 10 Element* furthestBlockElement = furthestBlock->element(); // FIXME: All this creation / parserAddChild / attach business should // be in HTMLConstructionSite. My guess is that steps 8--12 // should all be in some HTMLConstructionSite function. furthestBlockElement->parserAddChild(newElement); if (furthestBlockElement->attached() && !newElement->attached()) { // Notice that newElement might already be attached if, for example, one of the reparented // children is a style element, which attaches itself automatically. newElement->attach(); } // 11 m_tree.activeFormattingElements()->swapTo(formattingElement, newElement.get(), bookmark); // 12 m_tree.openElements()->remove(formattingElement); m_tree.openElements()->insertAbove(newElement, furthestBlock); } } void HTMLTreeBuilder::setSecondaryInsertionMode(InsertionMode mode) { ASSERT(mode != InForeignContentMode); m_secondaryInsertionMode = mode; } void HTMLTreeBuilder::setInsertionModeAndEnd(InsertionMode newInsertionMode, bool foreign) { setInsertionMode(newInsertionMode); if (foreign) { setSecondaryInsertionMode(m_insertionMode); setInsertionMode(InForeignContentMode); } } void HTMLTreeBuilder::resetInsertionModeAppropriately() { // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately bool last = false; bool foreign = false; HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord(); while (1) { Element* node = nodeRecord->element(); if (node == m_tree.openElements()->bottom()) { ASSERT(isParsingFragment()); last = true; node = m_fragmentContext.contextElement(); } if (node->hasTagName(selectTag)) { ASSERT(isParsingFragment()); return setInsertionModeAndEnd(InSelectMode, foreign); } if (node->hasTagName(tdTag) || node->hasTagName(thTag)) return setInsertionModeAndEnd(InCellMode, foreign); if (node->hasTagName(trTag)) return setInsertionModeAndEnd(InRowMode, foreign); if (isTableBodyContextTag(node->localName())) return setInsertionModeAndEnd(InTableBodyMode, foreign); if (node->hasTagName(captionTag)) return setInsertionModeAndEnd(InCaptionMode, foreign); if (node->hasTagName(colgroupTag)) { ASSERT(isParsingFragment()); return setInsertionModeAndEnd(InColumnGroupMode, foreign); } if (node->hasTagName(tableTag)) return setInsertionModeAndEnd(InTableMode, foreign); if (node->hasTagName(headTag)) { ASSERT(isParsingFragment()); return setInsertionModeAndEnd(InBodyMode, foreign); } if (node->hasTagName(bodyTag)) return setInsertionModeAndEnd(InBodyMode, foreign); if (node->hasTagName(framesetTag)) { ASSERT(isParsingFragment()); return setInsertionModeAndEnd(InFramesetMode, foreign); } if (node->hasTagName(htmlTag)) { ASSERT(isParsingFragment()); return setInsertionModeAndEnd(BeforeHeadMode, foreign); } if (node->namespaceURI() == SVGNames::svgNamespaceURI || node->namespaceURI() == MathMLNames::mathmlNamespaceURI) foreign = true; if (last) { ASSERT(isParsingFragment()); return setInsertionModeAndEnd(InBodyMode, foreign); } nodeRecord = nodeRecord->next(); } } void HTMLTreeBuilder::processEndTagForInTableBody(AtomicHTMLToken& token) { ASSERT(token.type() == HTMLToken::EndTag); if (isTableBodyContextTag(token.name())) { if (!m_tree.openElements()->inTableScope(token.name())) { parseError(token); return; } m_tree.openElements()->popUntilTableBodyScopeMarker(); m_tree.openElements()->pop(); setInsertionMode(InTableMode); return; } if (token.name() == tableTag) { // FIXME: This is slow. if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) { ASSERT(isParsingFragment()); parseError(token); return; } m_tree.openElements()->popUntilTableBodyScopeMarker(); ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName())); processFakeEndTag(m_tree.currentElement()->tagQName()); processEndTag(token); return; } if (token.name() == bodyTag || isCaptionColOrColgroupTag(token.name()) || token.name() == htmlTag || isTableCellContextTag(token.name()) || token.name() == trTag) { parseError(token); return; } processEndTagForInTable(token); } void HTMLTreeBuilder::processEndTagForInRow(AtomicHTMLToken& token) { ASSERT(token.type() == HTMLToken::EndTag); if (token.name() == trTag) { processTrEndTagForInRow(); return; } if (token.name() == tableTag) { if (!processTrEndTagForInRow()) { ASSERT(isParsingFragment()); return; } ASSERT(insertionMode() == InTableBodyMode); processEndTag(token); return; } if (isTableBodyContextTag(token.name())) { if (!m_tree.openElements()->inTableScope(token.name())) { parseError(token); return; } processFakeEndTag(trTag); ASSERT(insertionMode() == InTableBodyMode); processEndTag(token); return; } if (token.name() == bodyTag || isCaptionColOrColgroupTag(token.name()) || token.name() == htmlTag || isTableCellContextTag(token.name())) { parseError(token); return; } processEndTagForInTable(token); } void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken& token) { ASSERT(token.type() == HTMLToken::EndTag); if (isTableCellContextTag(token.name())) { if (!m_tree.openElements()->inTableScope(token.name())) { parseError(token); return; } m_tree.generateImpliedEndTags(); if (!m_tree.currentElement()->hasLocalName(token.name())) parseError(token); m_tree.openElements()->popUntilPopped(token.name()); m_tree.activeFormattingElements()->clearToLastMarker(); setInsertionMode(InRowMode); // FIXME: The fragment case of this ASSERT is a spec bug: // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10338 ASSERT(m_tree.currentElement()->hasTagName(trTag) || (isParsingFragment() && m_fragmentContext.contextElement()->hasTagName(trTag))); return; } if (token.name() == bodyTag || isCaptionColOrColgroupTag(token.name()) || token.name() == htmlTag) { parseError(token); return; } if (token.name() == tableTag || token.name() == trTag || isTableBodyContextTag(token.name())) { if (!m_tree.openElements()->inTableScope(token.name())) { ASSERT(isParsingFragment()); // FIXME: It is unclear what the exact ASSERT should be. // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10098 parseError(token); return; } closeTheCell(); processEndTag(token); return; } processEndTagForInBody(token); } void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token) { ASSERT(token.type() == HTMLToken::EndTag); if (token.name() == bodyTag) { processBodyEndTagForInBody(token); return; } if (token.name() == htmlTag) { AtomicHTMLToken endBody(HTMLToken::EndTag, bodyTag.localName()); if (processBodyEndTagForInBody(endBody)) processEndTag(token); return; } if (token.name() == addressTag || token.name() == articleTag || token.name() == asideTag || token.name() == blockquoteTag || token.name() == buttonTag || token.name() == centerTag || token.name() == detailsTag || token.name() == dirTag || token.name() == divTag || token.name() == dlTag || token.name() == fieldsetTag || token.name() == figcaptionTag || token.name() == figureTag || token.name() == footerTag || token.name() == headerTag || token.name() == hgroupTag || token.name() == listingTag || token.name() == menuTag || token.name() == navTag || token.name() == olTag || token.name() == preTag || token.name() == sectionTag || token.name() == summaryTag || token.name() == ulTag) { if (!m_tree.openElements()->inScope(token.name())) { parseError(token); return; } m_tree.generateImpliedEndTags(); if (!m_tree.currentElement()->hasLocalName(token.name())) parseError(token); m_tree.openElements()->popUntilPopped(token.name()); return; } if (token.name() == formTag) { RefPtr node = m_tree.takeForm(); if (!node || !m_tree.openElements()->inScope(node.get())) { parseError(token); return; } m_tree.generateImpliedEndTags(); if (m_tree.currentElement() != node.get()) parseError(token); m_tree.openElements()->remove(node.get()); } if (token.name() == pTag) { if (!m_tree.openElements()->inButtonScope(token.name())) { parseError(token); processFakeStartTag(pTag); ASSERT(m_tree.openElements()->inScope(token.name())); processEndTag(token); return; } m_tree.generateImpliedEndTagsWithExclusion(token.name()); if (!m_tree.currentElement()->hasLocalName(token.name())) parseError(token); m_tree.openElements()->popUntilPopped(token.name()); return; } if (token.name() == liTag) { if (!m_tree.openElements()->inListItemScope(token.name())) { parseError(token); return; } m_tree.generateImpliedEndTagsWithExclusion(token.name()); if (!m_tree.currentElement()->hasLocalName(token.name())) parseError(token); m_tree.openElements()->popUntilPopped(token.name()); return; } if (token.name() == ddTag || token.name() == dtTag) { if (!m_tree.openElements()->inScope(token.name())) { parseError(token); return; } m_tree.generateImpliedEndTagsWithExclusion(token.name()); if (!m_tree.currentElement()->hasLocalName(token.name())) parseError(token); m_tree.openElements()->popUntilPopped(token.name()); return; } if (isNumberedHeaderTag(token.name())) { if (!m_tree.openElements()->hasNumberedHeaderElementInScope()) { parseError(token); return; } m_tree.generateImpliedEndTags(); if (!m_tree.currentElement()->hasLocalName(token.name())) parseError(token); m_tree.openElements()->popUntilNumberedHeaderElementPopped(); return; } if (token.name() == "sarcasm") { notImplemented(); // Take a deep breath. return; } if (isFormattingTag(token.name())) { callTheAdoptionAgency(token); return; } if (token.name() == appletTag || token.name() == marqueeTag || token.name() == objectTag) { if (!m_tree.openElements()->inScope(token.name())) { parseError(token); return; } m_tree.generateImpliedEndTags(); if (!m_tree.currentElement()->hasLocalName(token.name())) parseError(token); m_tree.openElements()->popUntilPopped(token.name()); m_tree.activeFormattingElements()->clearToLastMarker(); return; } if (token.name() == brTag) { parseError(token); processFakeStartTag(brTag); return; } processAnyOtherEndTagForInBody(token); } bool HTMLTreeBuilder::processCaptionEndTagForInCaption() { if (!m_tree.openElements()->inTableScope(captionTag.localName())) { ASSERT(isParsingFragment()); // FIXME: parse error return false; } m_tree.generateImpliedEndTags(); // FIXME: parse error if (!m_tree.currentElement()->hasTagName(captionTag)) m_tree.openElements()->popUntilPopped(captionTag.localName()); m_tree.activeFormattingElements()->clearToLastMarker(); setInsertionMode(InTableMode); return true; } bool HTMLTreeBuilder::processTrEndTagForInRow() { if (!m_tree.openElements()->inTableScope(trTag.localName())) { ASSERT(isParsingFragment()); // FIXME: parse error return false; } m_tree.openElements()->popUntilTableRowScopeMarker(); ASSERT(m_tree.currentElement()->hasTagName(trTag)); m_tree.openElements()->pop(); setInsertionMode(InTableBodyMode); return true; } bool HTMLTreeBuilder::processTableEndTagForInTable() { if (!m_tree.openElements()->inTableScope(tableTag)) { ASSERT(isParsingFragment()); // FIXME: parse error. return false; } m_tree.openElements()->popUntilPopped(tableTag.localName()); resetInsertionModeAppropriately(); return true; } void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken& token) { ASSERT(token.type() == HTMLToken::EndTag); if (token.name() == tableTag) { processTableEndTagForInTable(); return; } if (token.name() == bodyTag || isCaptionColOrColgroupTag(token.name()) || token.name() == htmlTag || isTableBodyContextTag(token.name()) || isTableCellContextTag(token.name()) || token.name() == trTag) { parseError(token); return; } // Is this redirection necessary here? HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree); processEndTagForInBody(token); } void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token) { ASSERT(token.type() == HTMLToken::EndTag); switch (insertionMode()) { case InitialMode: ASSERT(insertionMode() == InitialMode); defaultForInitial(); // Fall through. case BeforeHTMLMode: ASSERT(insertionMode() == BeforeHTMLMode); if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) { parseError(token); return; } defaultForBeforeHTML(); // Fall through. case BeforeHeadMode: ASSERT(insertionMode() == BeforeHeadMode); if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) { parseError(token); return; } defaultForBeforeHead(); // Fall through. case InHeadMode: ASSERT(insertionMode() == InHeadMode); if (token.name() == headTag) { m_tree.openElements()->popHTMLHeadElement(); setInsertionMode(AfterHeadMode); return; } if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) { parseError(token); return; } defaultForInHead(); // Fall through. case AfterHeadMode: ASSERT(insertionMode() == AfterHeadMode); if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) { parseError(token); return; } defaultForAfterHead(); // Fall through case InBodyMode: ASSERT(insertionMode() == InBodyMode); processEndTagForInBody(token); break; case InTableMode: ASSERT(insertionMode() == InTableMode); processEndTagForInTable(token); break; case InCaptionMode: ASSERT(insertionMode() == InCaptionMode); if (token.name() == captionTag) { processCaptionEndTagForInCaption(); return; } if (token.name() == tableTag) { parseError(token); if (!processCaptionEndTagForInCaption()) { ASSERT(isParsingFragment()); return; } processEndTag(token); return; } if (token.name() == bodyTag || token.name() == colTag || token.name() == colgroupTag || token.name() == htmlTag || isTableBodyContextTag(token.name()) || isTableCellContextTag(token.name()) || token.name() == trTag) { parseError(token); return; } processEndTagForInBody(token); break; case InColumnGroupMode: ASSERT(insertionMode() == InColumnGroupMode); if (token.name() == colgroupTag) { processColgroupEndTagForInColumnGroup(); return; } if (token.name() == colTag) { parseError(token); return; } if (!processColgroupEndTagForInColumnGroup()) { ASSERT(isParsingFragment()); return; } processEndTag(token); break; case InRowMode: ASSERT(insertionMode() == InRowMode); processEndTagForInRow(token); break; case InCellMode: ASSERT(insertionMode() == InCellMode); processEndTagForInCell(token); break; case InTableBodyMode: ASSERT(insertionMode() == InTableBodyMode); processEndTagForInTableBody(token); break; case AfterBodyMode: ASSERT(insertionMode() == AfterBodyMode); if (token.name() == htmlTag) { if (isParsingFragment()) { parseError(token); return; } setInsertionMode(AfterAfterBodyMode); return; } // Fall through. case AfterAfterBodyMode: ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode); parseError(token); setInsertionMode(InBodyMode); processEndTag(token); break; case InHeadNoscriptMode: ASSERT(insertionMode() == InHeadNoscriptMode); if (token.name() == noscriptTag) { ASSERT(m_tree.currentElement()->hasTagName(noscriptTag)); m_tree.openElements()->pop(); ASSERT(m_tree.currentElement()->hasTagName(headTag)); setInsertionMode(InHeadMode); return; } if (token.name() != brTag) { parseError(token); return; } defaultForInHeadNoscript(); processToken(token); break; case TextMode: if (token.name() == scriptTag) { // Pause ourselves so that parsing stops until the script can be processed by the caller. m_isPaused = true; ASSERT(m_tree.currentElement()->hasTagName(scriptTag)); m_scriptToProcess = m_tree.currentElement(); m_scriptToProcessStartLine = m_lastScriptElementStartLine + 1; m_tree.openElements()->pop(); if (isParsingFragment() && m_fragmentContext.scriptingPermission() == FragmentScriptingNotAllowed) m_scriptToProcess->removeAllChildren(); setInsertionMode(m_originalInsertionMode); return; } m_tree.openElements()->pop(); setInsertionMode(m_originalInsertionMode); break; case InFramesetMode: ASSERT(insertionMode() == InFramesetMode); if (token.name() == framesetTag) { if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) { parseError(token); return; } m_tree.openElements()->pop(); if (!isParsingFragment() && !m_tree.currentElement()->hasTagName(framesetTag)) setInsertionMode(AfterFramesetMode); return; } break; case AfterFramesetMode: ASSERT(insertionMode() == AfterFramesetMode); if (token.name() == htmlTag) { setInsertionMode(AfterAfterFramesetMode); return; } // Fall through. case AfterAfterFramesetMode: ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode); parseError(token); break; case InSelectInTableMode: ASSERT(insertionMode() == InSelectInTableMode); if (token.name() == captionTag || token.name() == tableTag || isTableBodyContextTag(token.name()) || token.name() == trTag || isTableCellContextTag(token.name())) { parseError(token); if (m_tree.openElements()->inTableScope(token.name())) { AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName()); processEndTag(endSelect); processEndTag(token); } return; } // Fall through. case InSelectMode: ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode); if (token.name() == optgroupTag) { if (m_tree.currentElement()->hasTagName(optionTag) && m_tree.oneBelowTop()->hasTagName(optgroupTag)) processFakeEndTag(optionTag); if (m_tree.currentElement()->hasTagName(optgroupTag)) { m_tree.openElements()->pop(); return; } parseError(token); return; } if (token.name() == optionTag) { if (m_tree.currentElement()->hasTagName(optionTag)) { m_tree.openElements()->pop(); return; } parseError(token); return; } if (token.name() == selectTag) { if (!m_tree.openElements()->inTableScope(token.name())) { ASSERT(isParsingFragment()); parseError(token); return; } m_tree.openElements()->popUntilPopped(selectTag.localName()); resetInsertionModeAppropriately(); return; } break; case InTableTextMode: defaultForInTableText(); processEndTag(token); break; case InForeignContentMode: if (token.name() == SVGNames::scriptTag && m_tree.currentElement()->hasTagName(SVGNames::scriptTag)) { notImplemented(); return; } if (m_tree.currentElement()->namespaceURI() != xhtmlNamespaceURI) { // FIXME: This code just wants an Element* iterator, instead of an ElementRecord* HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord(); if (!nodeRecord->element()->hasLocalName(token.name())) parseError(token); while (1) { if (nodeRecord->element()->hasLocalName(token.name())) { m_tree.openElements()->popUntilPopped(nodeRecord->element()); break; } nodeRecord = nodeRecord->next(); if (nodeRecord->element()->namespaceURI() == xhtmlNamespaceURI) break; } } // Any other end tag (also the last two steps of "An end tag, if the current node is not an element in the HTML namespace." processUsingSecondaryInsertionModeAndAdjustInsertionMode(token); break; } } class HTMLTreeBuilder::FakeInsertionMode : public Noncopyable { public: FakeInsertionMode(HTMLTreeBuilder* treeBuilder, InsertionMode mode) : m_treeBuilder(treeBuilder) , m_originalMode(treeBuilder->insertionMode()) { m_treeBuilder->setFakeInsertionMode(mode); } ~FakeInsertionMode() { if (m_treeBuilder->isFakeInsertionMode()) m_treeBuilder->setInsertionMode(m_originalMode); } private: HTMLTreeBuilder* m_treeBuilder; InsertionMode m_originalMode; }; // This handles both secondary insertion mode processing, as well as updating // the insertion mode. These are separate steps in the spec, but always occur // right after one another. void HTMLTreeBuilder::processUsingSecondaryInsertionModeAndAdjustInsertionMode(AtomicHTMLToken& token) { ASSERT(token.type() == HTMLToken::StartTag || token.type() == HTMLToken::EndTag); { FakeInsertionMode fakeMode(this, m_secondaryInsertionMode); processToken(token); } if (insertionMode() == InForeignContentMode && m_tree.openElements()->hasOnlyHTMLElementsInScope()) setInsertionMode(m_secondaryInsertionMode); } void HTMLTreeBuilder::processComment(AtomicHTMLToken& token) { ASSERT(token.type() == HTMLToken::Comment); if (m_insertionMode == InitialMode || m_insertionMode == BeforeHTMLMode || m_insertionMode == AfterAfterBodyMode || m_insertionMode == AfterAfterFramesetMode) { m_tree.insertCommentOnDocument(token); return; } if (m_insertionMode == AfterBodyMode) { m_tree.insertCommentOnHTMLHtmlElement(token); return; } if (m_insertionMode == InTableTextMode) { defaultForInTableText(); processComment(token); return; } m_tree.insertComment(token); } void HTMLTreeBuilder::processCharacter(AtomicHTMLToken& token) { ASSERT(token.type() == HTMLToken::Character); ExternalCharacterTokenBuffer buffer(token); processCharacterBuffer(buffer); } void HTMLTreeBuilder::processCharacterBuffer(ExternalCharacterTokenBuffer& buffer) { ReprocessBuffer: switch (insertionMode()) { case InitialMode: { ASSERT(insertionMode() == InitialMode); buffer.skipLeadingWhitespace(); if (buffer.isEmpty()) return; defaultForInitial(); // Fall through. } case BeforeHTMLMode: { ASSERT(insertionMode() == BeforeHTMLMode); buffer.skipLeadingWhitespace(); if (buffer.isEmpty()) return; defaultForBeforeHTML(); // Fall through. } case BeforeHeadMode: { ASSERT(insertionMode() == BeforeHeadMode); buffer.skipLeadingWhitespace(); if (buffer.isEmpty()) return; defaultForBeforeHead(); // Fall through. } case InHeadMode: { ASSERT(insertionMode() == InHeadMode); String leadingWhitespace = buffer.takeLeadingWhitespace(); if (!leadingWhitespace.isEmpty()) m_tree.insertTextNode(leadingWhitespace); if (buffer.isEmpty()) return; defaultForInHead(); // Fall through. } case AfterHeadMode: { ASSERT(insertionMode() == AfterHeadMode); String leadingWhitespace = buffer.takeLeadingWhitespace(); if (!leadingWhitespace.isEmpty()) m_tree.insertTextNode(leadingWhitespace); if (buffer.isEmpty()) return; defaultForAfterHead(); // Fall through. } case InBodyMode: case InCaptionMode: case InCellMode: { ASSERT(insertionMode() == InBodyMode || insertionMode() == InCaptionMode || insertionMode() == InCellMode); m_tree.reconstructTheActiveFormattingElements(); String characters = buffer.takeRemaining(); m_tree.insertTextNode(characters); if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters)) m_framesetOk = false; break; } case InTableMode: case InTableBodyMode: case InRowMode: { ASSERT(insertionMode() == InTableMode || insertionMode() == InTableBodyMode || insertionMode() == InRowMode); ASSERT(m_pendingTableCharacters.isEmpty()); m_originalInsertionMode = m_insertionMode; setInsertionMode(InTableTextMode); // Fall through. } case InTableTextMode: { buffer.giveRemainingTo(m_pendingTableCharacters); break; } case InColumnGroupMode: { ASSERT(insertionMode() == InColumnGroupMode); String leadingWhitespace = buffer.takeLeadingWhitespace(); if (!leadingWhitespace.isEmpty()) m_tree.insertTextNode(leadingWhitespace); if (buffer.isEmpty()) return; if (!processColgroupEndTagForInColumnGroup()) { ASSERT(isParsingFragment()); // The spec tells us to drop these characters on the floor. buffer.takeLeadingNonWhitespace(); if (buffer.isEmpty()) return; } goto ReprocessBuffer; } case AfterBodyMode: case AfterAfterBodyMode: { ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode); // FIXME: parse error setInsertionMode(InBodyMode); goto ReprocessBuffer; break; } case TextMode: { ASSERT(insertionMode() == TextMode); m_tree.insertTextNode(buffer.takeRemaining()); break; } case InHeadNoscriptMode: { ASSERT(insertionMode() == InHeadNoscriptMode); String leadingWhitespace = buffer.takeLeadingWhitespace(); if (!leadingWhitespace.isEmpty()) m_tree.insertTextNode(leadingWhitespace); if (buffer.isEmpty()) return; defaultForInHeadNoscript(); goto ReprocessBuffer; break; } case InFramesetMode: case AfterFramesetMode: { ASSERT(insertionMode() == InFramesetMode || insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode); String leadingWhitespace = buffer.takeRemainingWhitespace(); if (!leadingWhitespace.isEmpty()) m_tree.insertTextNode(leadingWhitespace); // FIXME: We should generate a parse error if we skipped over any // non-whitespace characters. break; } case InSelectInTableMode: case InSelectMode: { ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode); m_tree.insertTextNode(buffer.takeRemaining()); break; } case InForeignContentMode: { ASSERT(insertionMode() == InForeignContentMode); String characters = buffer.takeRemaining(); m_tree.insertTextNode(characters); if (m_framesetOk && !isAllWhitespace(characters)) m_framesetOk = false; break; } case AfterAfterFramesetMode: { String leadingWhitespace = buffer.takeRemainingWhitespace(); if (!leadingWhitespace.isEmpty()) { m_tree.reconstructTheActiveFormattingElements(); m_tree.insertTextNode(leadingWhitespace); } // FIXME: We should generate a parse error if we skipped over any // non-whitespace characters. break; } } } void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token) { ASSERT(token.type() == HTMLToken::EndOfFile); switch (insertionMode()) { case InitialMode: ASSERT(insertionMode() == InitialMode); defaultForInitial(); // Fall through. case BeforeHTMLMode: ASSERT(insertionMode() == BeforeHTMLMode); defaultForBeforeHTML(); // Fall through. case BeforeHeadMode: ASSERT(insertionMode() == BeforeHeadMode); defaultForBeforeHead(); // Fall through. case InHeadMode: ASSERT(insertionMode() == InHeadMode); defaultForInHead(); // Fall through. case AfterHeadMode: ASSERT(insertionMode() == AfterHeadMode); defaultForAfterHead(); // Fall through case InBodyMode: case InCellMode: ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode); notImplemented(); // Emit parse error based on what elemtns are still open. break; case AfterBodyMode: case AfterAfterBodyMode: ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode); notImplemented(); break; case InHeadNoscriptMode: ASSERT(insertionMode() == InHeadNoscriptMode); defaultForInHeadNoscript(); processEndOfFile(token); return; case AfterFramesetMode: case AfterAfterFramesetMode: ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode); break; case InFramesetMode: case InTableMode: case InTableBodyMode: case InSelectInTableMode: case InSelectMode: ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode || insertionMode() == InTableMode || insertionMode() == InFramesetMode || insertionMode() == InTableBodyMode); if (m_tree.currentElement() != m_tree.openElements()->htmlElement()) parseError(token); break; case InColumnGroupMode: if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) { ASSERT(isParsingFragment()); return; } if (!processColgroupEndTagForInColumnGroup()) { ASSERT(isParsingFragment()); return; } processEndOfFile(token); return; case InForeignContentMode: parseError(token); // FIXME: Following the spec would infinitely recurse on // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10115 m_tree.openElements()->popUntilElementWithNamespace(xhtmlNamespaceURI); setInsertionMode(m_secondaryInsertionMode); processEndOfFile(token); return; case InTableTextMode: defaultForInTableText(); processEndOfFile(token); return; case TextMode: case InCaptionMode: case InRowMode: notImplemented(); break; } ASSERT(m_tree.openElements()->top()); m_tree.openElements()->popAll(); } void HTMLTreeBuilder::defaultForInitial() { notImplemented(); if (!m_fragmentContext.fragment()) m_document->setCompatibilityMode(Document::QuirksMode); // FIXME: parse error setInsertionMode(BeforeHTMLMode); } void HTMLTreeBuilder::defaultForBeforeHTML() { AtomicHTMLToken startHTML(HTMLToken::StartTag, htmlTag.localName()); m_tree.insertHTMLHtmlStartTagBeforeHTML(startHTML); setInsertionMode(BeforeHeadMode); } void HTMLTreeBuilder::defaultForBeforeHead() { AtomicHTMLToken startHead(HTMLToken::StartTag, headTag.localName()); processStartTag(startHead); } void HTMLTreeBuilder::defaultForInHead() { AtomicHTMLToken endHead(HTMLToken::EndTag, headTag.localName()); processEndTag(endHead); } void HTMLTreeBuilder::defaultForInHeadNoscript() { AtomicHTMLToken endNoscript(HTMLToken::EndTag, noscriptTag.localName()); processEndTag(endNoscript); } void HTMLTreeBuilder::defaultForAfterHead() { AtomicHTMLToken startBody(HTMLToken::StartTag, bodyTag.localName()); processStartTag(startBody); m_framesetOk = true; } void HTMLTreeBuilder::defaultForInTableText() { String characters = String::adopt(m_pendingTableCharacters); if (!isAllWhitespace(characters)) { // FIXME: parse error HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree); m_tree.reconstructTheActiveFormattingElements(); m_tree.insertTextNode(characters); m_framesetOk = false; setInsertionMode(m_originalInsertionMode); return; } m_tree.insertTextNode(characters); setInsertionMode(m_originalInsertionMode); } bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken& token) { ASSERT(token.type() == HTMLToken::StartTag); if (token.name() == htmlTag) { m_tree.insertHTMLHtmlStartTagInBody(token); return true; } if (token.name() == baseTag || token.name() == basefontTag || token.name() == bgsoundTag || token.name() == commandTag || token.name() == linkTag || token.name() == metaTag) { m_tree.insertSelfClosingHTMLElement(token); // Note: The custom processing for the tag is done in HTMLMetaElement::process(). return true; } if (token.name() == titleTag) { processGenericRCDATAStartTag(token); return true; } if (token.name() == noscriptTag) { if (scriptEnabled(m_document->frame())) { processGenericRawTextStartTag(token); return true; } m_tree.insertHTMLElement(token); setInsertionMode(InHeadNoscriptMode); return true; } if (token.name() == noframesTag || token.name() == styleTag) { processGenericRawTextStartTag(token); return true; } if (token.name() == scriptTag) { processScriptStartTag(token); return true; } if (token.name() == headTag) { parseError(token); return true; } return false; } void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken& token) { ASSERT(token.type() == HTMLToken::StartTag); m_tree.insertHTMLElement(token); m_tokenizer->setState(HTMLTokenizer::RCDATAState); m_originalInsertionMode = m_insertionMode; setInsertionMode(TextMode); } void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken& token) { ASSERT(token.type() == HTMLToken::StartTag); m_tree.insertHTMLElement(token); m_tokenizer->setState(HTMLTokenizer::RAWTEXTState); m_originalInsertionMode = m_insertionMode; setInsertionMode(TextMode); } void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken& token) { ASSERT(token.type() == HTMLToken::StartTag); m_tree.insertScriptElement(token); m_tokenizer->setState(HTMLTokenizer::ScriptDataState); m_originalInsertionMode = m_insertionMode; m_lastScriptElementStartLine = m_tokenizer->lineNumber(); setInsertionMode(TextMode); } void HTMLTreeBuilder::finished() { ASSERT(m_document); if (isParsingFragment()) { m_fragmentContext.finished(); return; } // Warning, this may detach the parser. Do not do anything else after this. m_document->finishedParsing(); } bool HTMLTreeBuilder::scriptEnabled(Frame* frame) { if (!frame) return false; if (ScriptController* scriptController = frame->script()) return scriptController->canExecuteScripts(NotAboutToExecuteScript); return false; } bool HTMLTreeBuilder::pluginsEnabled(Frame* frame) { if (!frame) return false; return frame->loader()->subframeLoader()->allowPlugins(NotAboutToInstantiatePlugin); } // FIXME: Move this function to a more appropriate place. String serializeForNumberType(double number) { // According to HTML5, "the best representation of the number n as a floating // point number" is a string produced by applying ToString() to n. NumberToStringBuffer buffer; unsigned length = numberToString(number, buffer); return String(buffer, length); } // FIXME: Move this function to a more appropriate place. bool parseToDoubleForNumberType(const String& src, double* out) { // See HTML5 2.4.4.3 `Real numbers.' if (src.isEmpty()) return false; // String::toDouble() accepts leading + \t \n \v \f \r and SPACE, which are invalid in HTML5. // So, check the first character. if (src[0] != '-' && (src[0] < '0' || src[0] > '9')) return false; bool valid = false; double value = src.toDouble(&valid); if (!valid) return false; // NaN and Infinity are not valid numbers according to the standard. if (!isfinite(value)) return false; // -0 -> 0 if (!value) value = 0; if (out) *out = value; return true; } }