/* Copyright (C) 1997 Martin Jones (mjones@kde.org) (C) 1997 Torben Weis (weis@kde.org) (C) 1999,2001 Lars Knoll (knoll@kde.org) (C) 2000,2001 Dirk Mueller (mueller@kde.org) Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/) This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "config.h" #include "LegacyHTMLTreeBuilder.h" #include "CharacterNames.h" #include "CSSPropertyNames.h" #include "CSSValueKeywords.h" #include "Chrome.h" #include "ChromeClient.h" #include "Comment.h" #include "Console.h" #include "DOMWindow.h" #include "DocumentFragment.h" #include "DocumentType.h" #include "Frame.h" #include "HTMLBodyElement.h" #include "HTMLDocument.h" #include "HTMLDivElement.h" #include "HTMLDListElement.h" #include "HTMLElementFactory.h" #include "HTMLFormElement.h" #include "HTMLHeadElement.h" #include "HTMLHRElement.h" #include "HTMLHtmlElement.h" #include "HTMLIsIndexElement.h" #include "HTMLMapElement.h" #include "HTMLNames.h" #include "HTMLParserQuirks.h" #include "HTMLTableCellElement.h" #include "HTMLTableRowElement.h" #include "HTMLTableSectionElement.h" #include "LegacyHTMLDocumentParser.h" #include "LocalizedStrings.h" #include "Page.h" #include "Settings.h" #include "Text.h" #include "TreeDepthLimit.h" #include #include namespace WebCore { using namespace HTMLNames; static const unsigned cMaxRedundantTagDepth = 20; static const unsigned cResidualStyleMaxDepth = 200; static const unsigned cResidualStyleIterationLimit = 10; static const int minBlockLevelTagPriority = 3; // A cap on the number of tags with priority minBlockLevelTagPriority or higher // allowed in m_blockStack. The cap is enforced by adding such new elements as // siblings instead of children once it is reached. static const size_t cMaxBlockDepth = 4096; typedef HashSet TagNameSet; template< size_t ArraySize > static void addTags(TagNameSet& set, QualifiedName (&names)[ArraySize]) { for (size_t x = 0; x < ArraySize; x++) { const QualifiedName& name = names[x]; set.add(name.localName().impl()); } } struct HTMLStackElem : Noncopyable { HTMLStackElem(const AtomicString& t, int lvl, Node* n, bool r, HTMLStackElem* nx) : tagName(t) , level(lvl) , strayTableContent(false) , node(n) , didRefNode(r) , next(nx) { } void derefNode() { if (didRefNode) node->deref(); } AtomicString tagName; int level; bool strayTableContent; Node* node; bool didRefNode; HTMLStackElem* next; }; /** * The parser parses tokenized input into the document, building up the * document tree. If the document is well-formed, parsing it is straightforward. * * Unfortunately, we have to handle many HTML documents that are not well-formed, * so the parser has to be tolerant about errors. * * We have to take care of at least the following error conditions: * * 1. The element being added is explicitly forbidden inside some outer tag. * In this case we should close all tags up to the one, which forbids * the element, and add it afterwards. * * 2. We are not allowed to add the element directly. It could be that * the person writing the document forgot some tag in between (or that the * tag in between is optional). This could be the case with the following * tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?). * * 3. We want to add a block element inside to an inline element. Close all * inline elements up to the next higher block element. * * 4. If this doesn't help, close elements until we are allowed to add the * element or ignore the tag. * */ LegacyHTMLTreeBuilder::LegacyHTMLTreeBuilder(HTMLDocument* doc, bool reportErrors) : m_document(doc) , m_current(doc) , m_didRefCurrent(false) , m_blockStack(0) , m_blocksInStack(0) , m_treeDepth(0) , m_hasPElementInScope(NotInScope) , m_inBody(false) , m_haveContent(false) , m_haveFrameSet(false) , m_isParsingFragment(false) , m_reportErrors(reportErrors) , m_handlingResidualStyleAcrossBlocks(false) , m_inStrayTableContent(0) , m_scriptingPermission(FragmentScriptingAllowed) , m_parserQuirks(m_document->page() ? m_document->page()->chrome()->client()->createHTMLParserQuirks() : 0) { } LegacyHTMLTreeBuilder::LegacyHTMLTreeBuilder(DocumentFragment* frag, FragmentScriptingPermission scriptingPermission) : m_document(frag->document()) , m_current(frag) , m_didRefCurrent(true) , m_blockStack(0) , m_blocksInStack(0) , m_treeDepth(0) , m_hasPElementInScope(NotInScope) , m_inBody(true) , m_haveContent(false) , m_haveFrameSet(false) , m_isParsingFragment(true) , m_reportErrors(false) , m_handlingResidualStyleAcrossBlocks(false) , m_inStrayTableContent(0) , m_scriptingPermission(scriptingPermission) , m_parserQuirks(m_document->page() ? m_document->page()->chrome()->client()->createHTMLParserQuirks() : 0) { if (frag) frag->ref(); } LegacyHTMLTreeBuilder::~LegacyHTMLTreeBuilder() { freeBlock(); if (m_didRefCurrent) m_current->deref(); } void LegacyHTMLTreeBuilder::reset() { ASSERT(!m_isParsingFragment); setCurrent(m_document); freeBlock(); m_treeDepth = 0; m_inBody = false; m_haveFrameSet = false; m_haveContent = false; m_inStrayTableContent = 0; m_currentFormElement = 0; m_currentMapElement = 0; m_head = 0; m_isindexElement = 0; m_skipModeTag = nullAtom; if (m_parserQuirks) m_parserQuirks->reset(); } void LegacyHTMLTreeBuilder::setCurrent(Node* newCurrent) { bool didRefNewCurrent = newCurrent && newCurrent != m_document; if (didRefNewCurrent) newCurrent->ref(); if (m_didRefCurrent) m_current->deref(); m_current = newCurrent; m_didRefCurrent = didRefNewCurrent; } inline static int tagPriorityOfNode(Node* n) { return n->isHTMLElement() ? static_cast(n)->tagPriority() : 0; } inline void LegacyHTMLTreeBuilder::limitDepth(int tagPriority) { while (m_treeDepth >= maxDOMTreeDepth) popBlock(m_blockStack->tagName); if (tagPriority >= minBlockLevelTagPriority) { while (m_blocksInStack >= cMaxBlockDepth) popBlock(m_blockStack->tagName); } } inline bool LegacyHTMLTreeBuilder::insertNodeAfterLimitDepth(Node* n, bool flat) { limitDepth(tagPriorityOfNode(n)); return insertNode(n, flat); } PassRefPtr LegacyHTMLTreeBuilder::parseToken(Token* t) { if (!m_skipModeTag.isNull()) { if (!t->beginTag && t->tagName == m_skipModeTag) // Found the end tag for the current skip mode, so we're done skipping. m_skipModeTag = nullAtom; else if (m_current->localName() == t->tagName) // Do not skip . // FIXME: What does that comment mean? How can it be right to parse a token without clearing m_skipModeTag? ; else return 0; } // Apparently some sites use
instead of
. Be compatible with IE and Firefox and treat this like
. if (t->isCloseTag(brTag) && m_document->inCompatMode()) { reportError(MalformedBRError); t->beginTag = true; } if (!t->beginTag) { processCloseTag(t); return 0; } // Ignore spaces, if we're not inside a paragraph or other inline code. // Do not alter the text if it is part of a scriptTag. if (t->tagName == textAtom && t->text && m_current->localName() != scriptTag) { if (m_inBody && !skipMode() && m_current->localName() != styleTag && m_current->localName() != titleTag && !t->text->containsOnlyWhitespace()) m_haveContent = true; // HTML5 requires text node coalescing. // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#insert-a-character Node* previousChild = m_current->lastChild(); if (previousChild && previousChild->isTextNode()) { // Only coalesce text nodes if the text node wouldn't be foster parented. if (!m_current->hasTagName(htmlTag) && !m_current->hasTagName(tableTag) && !m_current->hasTagName(trTag) && !m_current->hasTagName(theadTag) && !m_current->hasTagName(tbodyTag) && !m_current->hasTagName(tfootTag) && !m_current->hasTagName(titleTag)) { // Technically we're only supposed to merge into the previous // text node if it was the last node inserted by the parser. // (This was a spec modification made to make it easier for // mozilla to run their parser in a thread.) // In practice it does not seem to matter. CharacterData* textNode = static_cast(previousChild); textNode->parserAppendData(t->text); return textNode; } } RefPtr n; String text = t->text.get(); unsigned charsLeft = text.length(); while (charsLeft) { // split large blocks of text to nodes of manageable size n = Text::createWithLengthLimit(m_document, text, charsLeft); if (!insertNodeAfterLimitDepth(n.get(), t->selfClosingTag)) return 0; } return n; } RefPtr n = getNode(t); // just to be sure, and to catch currently unimplemented stuff if (!n) return 0; // set attributes if (n->isHTMLElement()) { HTMLElement* e = static_cast(n.get()); if (m_scriptingPermission == FragmentScriptingAllowed || t->tagName != scriptTag) e->setAttributeMap(t->attrs.get(), m_scriptingPermission); // take care of optional close tags if (e->endTagRequirement() == TagStatusOptional) popBlock(t->tagName); // If the node does not have a forbidden end tag requirement, and if the broken XML self-closing // syntax was used, report an error. if (t->brokenXMLStyle && e->endTagRequirement() != TagStatusForbidden) { if (t->tagName == scriptTag) reportError(IncorrectXMLCloseScriptWarning); else reportError(IncorrectXMLSelfCloseError, &t->tagName); } } if (!insertNodeAfterLimitDepth(n.get(), t->selfClosingTag)) { // we couldn't insert the node if (n->isElementNode()) { Element* e = static_cast(n.get()); e->setAttributeMap(0); } if (m_currentMapElement == n) m_currentMapElement = 0; if (m_currentFormElement == n) m_currentFormElement = 0; if (m_head == n) m_head = 0; return 0; } return n; } void LegacyHTMLTreeBuilder::parseDoctypeToken(DoctypeToken* t) { // Ignore any doctype after the first. Ignore doctypes in fragments. if (m_document->doctype() || m_isParsingFragment || m_current != m_document) return; // Make a new doctype node and set it as our doctype. m_document->legacyParserAddChild(DocumentType::create(m_document, String::adopt(t->m_name), String::adopt(t->m_publicID), String::adopt(t->m_systemID))); if (t->m_forceQuirks) m_document->setParseMode(Document::Compat); } static bool isTableSection(const Node* n) { return n->hasTagName(tbodyTag) || n->hasTagName(tfootTag) || n->hasTagName(theadTag); } static bool isTablePart(const Node* n) { return n->hasTagName(trTag) || n->hasTagName(tdTag) || n->hasTagName(thTag) || isTableSection(n); } static bool isTableRelated(const Node* n) { return n->hasTagName(tableTag) || isTablePart(n); } static bool isScopingTag(const AtomicString& tagName) { return tagName == appletTag || tagName == captionTag || tagName == tdTag || tagName == thTag || tagName == buttonTag || tagName == marqueeTag || tagName == objectTag || tagName == tableTag || tagName == htmlTag; } bool LegacyHTMLTreeBuilder::insertNode(Node* n, bool flat) { RefPtr protectNode(n); const AtomicString& localName = n->localName(); // is never allowed inside stray table content. Always pop out of the stray table content // and close up the first table, and then start the second table as a sibling. if (m_inStrayTableContent && localName == tableTag) popBlock(tableTag); if (m_parserQuirks && !m_parserQuirks->shouldInsertNode(m_current, n)) return false; int tagPriority = tagPriorityOfNode(n); // let's be stupid and just try to insert it. // this should work if the document is well-formed Node* newNode = m_current->legacyParserAddChild(n); if (!newNode) return handleError(n, flat, localName, tagPriority); // Try to handle the error. // don't push elements without end tags (e.g., ) on the stack bool parentAttached = m_current->attached(); if (tagPriority > 0 && !flat) { if (newNode == m_current) { // This case should only be hit when a demoted is placed inside a table. ASSERT(localName == formTag); reportError(FormInsideTablePartError, &m_current->localName()); HTMLFormElement* form = static_cast(n); form->setDemoted(true); } else { // The pushBlock function transfers ownership of current to the block stack // so we're guaranteed that m_didRefCurrent is false. The code below is an // optimized version of setCurrent that takes advantage of that fact and also // assumes that newNode is neither 0 nor a pointer to the document. pushBlock(localName, tagPriority); newNode->beginParsingChildren(); ASSERT(!m_didRefCurrent); newNode->ref(); m_current = newNode; m_didRefCurrent = true; } if (parentAttached && !n->attached() && !m_isParsingFragment) n->attach(); } else { if (parentAttached && !n->attached() && !m_isParsingFragment) n->attach(); n->finishParsingChildren(); } if (localName == htmlTag && m_document->frame() && !m_isParsingFragment) m_document->frame()->loader()->dispatchDocumentElementAvailable(); return true; } bool LegacyHTMLTreeBuilder::handleError(Node* n, bool flat, const AtomicString& localName, int tagPriority) { // Error handling code. This is just ad hoc handling of specific parent/child combinations. bool handled = false; // 1. Check out the element's tag name to decide how to deal with errors. if (n->isHTMLElement()) { HTMLElement* h = static_cast(n); if (h->hasLocalName(trTag) || h->hasLocalName(thTag) || h->hasLocalName(tdTag)) { if (m_inStrayTableContent && !isTableRelated(m_current)) { reportError(MisplacedTablePartError, &localName, &m_current->localName()); // pop out to the nearest enclosing table-related tag. while (m_blockStack && !isTableRelated(m_current)) popOneBlock(); return insertNode(n); } } else if (h->hasLocalName(headTag)) { if (!m_current->isDocumentNode() && !m_current->hasTagName(htmlTag)) { reportError(MisplacedHeadError); return false; } } else if (h->hasLocalName(metaTag) || h->hasLocalName(linkTag) || h->hasLocalName(baseTag)) { bool createdHead = false; if (!m_head) { createHead(); createdHead = true; } if (m_head) { if (!createdHead) reportError(MisplacedHeadContentError, &localName, &m_current->localName()); if (m_head->legacyParserAddChild(n)) { if (!n->attached() && !m_isParsingFragment) n->attach(); return true; } return false; } } else if (h->hasLocalName(htmlTag)) { if (!m_current->isDocumentNode() ) { if (m_document->documentElement() && m_document->documentElement()->hasTagName(htmlTag) && !m_isParsingFragment) { reportError(RedundantHTMLBodyError, &localName); // we have another element.... apply attributes to existing one // make sure we don't overwrite already existing attributes NamedNodeMap* map = static_cast(n)->attributes(true); Element* existingHTML = static_cast(m_document->documentElement()); NamedNodeMap* bmap = existingHTML->attributes(false); for (unsigned l = 0; map && l < map->length(); ++l) { Attribute* it = map->attributeItem(l); if (!bmap->getAttributeItem(it->name())) existingHTML->setAttribute(it->name(), it->value()); } } return false; } } else if (h->hasLocalName(titleTag) || h->hasLocalName(styleTag) || h->hasLocalName(scriptTag)) { bool createdHead = false; if (!m_head) { createHead(); createdHead = true; } if (m_head) { Node* newNode = m_head->legacyParserAddChild(n); if (!newNode) { setSkipMode(h->tagQName()); return false; } if (!createdHead) reportError(MisplacedHeadContentError, &localName, &m_current->localName()); pushBlock(localName, tagPriority); newNode->beginParsingChildren(); setCurrent(newNode); if (!n->attached() && !m_isParsingFragment) n->attach(); return true; } if (m_inBody) { setSkipMode(h->tagQName()); return false; } } else if (h->hasLocalName(bodyTag)) { if (m_inBody && m_document->body() && !m_isParsingFragment) { // we have another element.... apply attributes to existing one // make sure we don't overwrite already existing attributes // some sites use ... reportError(RedundantHTMLBodyError, &localName); NamedNodeMap* map = static_cast(n)->attributes(true); Element* existingBody = m_document->body(); NamedNodeMap* bmap = existingBody->attributes(false); for (unsigned l = 0; map && l < map->length(); ++l) { Attribute* it = map->attributeItem(l); if (!bmap->getAttributeItem(it->name())) existingBody->setAttribute(it->name(), it->value()); } return false; } else if (!m_current->isDocumentNode()) return false; } else if (h->hasLocalName(areaTag)) { if (m_currentMapElement) { reportError(MisplacedAreaError, &m_current->localName()); m_currentMapElement->legacyParserAddChild(n); if (!n->attached() && !m_isParsingFragment) n->attach(); handled = true; return true; } return false; } else if (h->hasLocalName(colgroupTag) || h->hasLocalName(captionTag)) { if (isTableRelated(m_current)) { while (m_blockStack && isTablePart(m_current)) popOneBlock(); return insertNode(n); } } } else if (n->isCommentNode() && !m_head) return false; // 2. Next we examine our currently active element to do some further error handling. if (m_current->isHTMLElement()) { HTMLElement* h = static_cast(m_current); const AtomicString& currentTagName = h->localName(); if (h->hasLocalName(htmlTag)) { HTMLElement* elt = n->isHTMLElement() ? static_cast(n) : 0; if (elt && (elt->hasLocalName(scriptTag) || elt->hasLocalName(styleTag) || elt->hasLocalName(metaTag) || elt->hasLocalName(linkTag) || elt->hasLocalName(objectTag) || elt->hasLocalName(embedTag) || elt->hasLocalName(titleTag) || elt->hasLocalName(isindexTag) || elt->hasLocalName(baseTag))) { if (!m_head) { m_head = HTMLHeadElement::create(m_document); insertNode(m_head.get()); handled = true; } } else { if (n->isTextNode()) { Text* t = static_cast(n); if (t->containsOnlyWhitespace()) { if (m_head && !m_inBody) { // We're between and . According to // the HTML5 parsing algorithm, we're supposed to // insert whitespace text nodes into the HTML element. ExceptionCode ec; m_current->appendChild(n, ec); return true; } return false; } } if (!m_haveFrameSet) { // Ensure that head exists. // But not for older versions of Mail, where the implicit isn't expected - if (!m_isParsingFragment && shouldCreateImplicitHead(m_document)) createHead(); popBlock(headTag); startBody(); insertNode(HTMLBodyElement::create(m_document).get()); handled = true; } else reportError(MisplacedFramesetContentError, &localName); } } else if (h->hasLocalName(headTag)) { if (n->hasTagName(htmlTag)) return false; else { // This means the body starts here... if (!m_haveFrameSet) { ASSERT(currentTagName == headTag); popBlock(currentTagName); startBody(); insertNode(HTMLBodyElement::create(m_document).get()); handled = true; } else reportError(MisplacedFramesetContentError, &localName); } } else if (h->hasLocalName(addressTag) || h->hasLocalName(fontTag) || h->hasLocalName(styleTag) || h->hasLocalName(titleTag)) { reportError(MisplacedContentRetryError, &localName, ¤tTagName); popBlock(currentTagName); handled = true; } else if (h->hasLocalName(captionTag)) { // Illegal content in a caption. Close the caption and try again. reportError(MisplacedCaptionContentError, &localName); popBlock(currentTagName); if (isTablePart(n)) return insertNode(n, flat); } else if (h->hasLocalName(tableTag) || h->hasLocalName(trTag) || isTableSection(h)) { if (n->hasTagName(tableTag)) { reportError(MisplacedTableError, ¤tTagName); if (m_isParsingFragment && !h->hasLocalName(tableTag)) // fragment may contain table parts without
ancestor, pop them one by one popBlock(h->localName()); popBlock(localName); // end the table handled = true; // ...and start a new one } else { ExceptionCode ec = 0; Node* node = m_current; Node* parent = node->parentNode(); // A script may have removed the current node's parent from the DOM // http://bugs.webkit.org/show_bug.cgi?id=7137 // FIXME: we should do real recovery here and re-parent with the correct node. if (!parent) return false; Node* grandparent = parent->parentNode(); if (n->isTextNode() || (h->hasLocalName(trTag) && isTableSection(parent) && grandparent && grandparent->hasTagName(tableTag)) || ((!n->hasTagName(tdTag) && !n->hasTagName(thTag) && !n->hasTagName(formTag) && !n->hasTagName(scriptTag)) && isTableSection(node) && parent->hasTagName(tableTag))) { node = (node->hasTagName(tableTag)) ? node : ((node->hasTagName(trTag)) ? grandparent : parent); // This can happen with fragments if (!node) return false; Node* parent = node->parentNode(); if (!parent) return false; parent->insertBefore(n, node, ec); if (!ec) { reportError(StrayTableContentError, &localName, ¤tTagName); if (n->isHTMLElement() && tagPriority > 0 && !flat && static_cast(n)->endTagRequirement() != TagStatusForbidden) { pushBlock(localName, tagPriority); n->beginParsingChildren(); setCurrent(n); m_inStrayTableContent++; m_blockStack->strayTableContent = true; } return true; } } if (!ec) { if (m_current->hasTagName(trTag)) { reportError(TablePartRequiredError, &localName, &tdTag.localName()); insertNode(HTMLTableCellElement::create(tdTag, m_document).get()); } else if (m_current->hasTagName(tableTag)) { // Don't report an error in this case, since making a happens all the time when you have
, // and it isn't really a parse error per se. insertNode(HTMLTableSectionElement::create(tbodyTag, m_document).get()); } else { reportError(TablePartRequiredError, &localName, &trTag.localName()); insertNode(HTMLTableRowElement::create(m_document).get()); } handled = true; } } } else if (h->hasLocalName(objectTag)) { reportError(MisplacedContentRetryError, &localName, ¤tTagName); popBlock(objectTag); handled = true; } else if (h->hasLocalName(pTag) || isHeadingTag(currentTagName)) { if (!isInline(n)) { popBlock(currentTagName); handled = true; } } else if (h->hasLocalName(optionTag) || h->hasLocalName(optgroupTag)) { if (localName == optgroupTag) { popBlock(currentTagName); handled = true; } else if (localName == selectTag) { // IE treats a nested select as . Let's do the same popBlock(localName); } } else if (h->hasLocalName(selectTag)) { if (localName == inputTag || localName == textareaTag) { reportError(MisplacedContentRetryError, &localName, ¤tTagName); popBlock(currentTagName); handled = true; } } else if (h->hasLocalName(colgroupTag)) { popBlock(currentTagName); handled = true; } else if (!h->hasLocalName(bodyTag)) { if (isInline(m_current)) { popInlineBlocks(); handled = true; } } } else if (m_current->isDocumentNode()) { if (n->isTextNode()) { Text* t = static_cast(n); if (t->containsOnlyWhitespace()) return false; } if (!m_document->documentElement()) { insertNode(HTMLHtmlElement::create(m_document).get()); handled = true; } } // 3. If we couldn't handle the error, just return false and attempt to error-correct again. if (!handled) { reportError(IgnoredContentError, &localName, &m_current->localName()); return false; } return insertNode(n); } typedef bool (LegacyHTMLTreeBuilder::*CreateErrorCheckFunc)(Token* t, RefPtr&); typedef HashMap FunctionMap; bool LegacyHTMLTreeBuilder::textCreateErrorCheck(Token* t, RefPtr& result) { result = Text::create(m_document, t->text.get()); return false; } bool LegacyHTMLTreeBuilder::commentCreateErrorCheck(Token* t, RefPtr& result) { result = Comment::create(m_document, t->text.get()); return false; } bool LegacyHTMLTreeBuilder::headCreateErrorCheck(Token*, RefPtr& result) { if (!m_head || m_current->localName() == htmlTag) { m_head = HTMLHeadElement::create(m_document); result = m_head; } else reportError(MisplacedHeadError); return false; } bool LegacyHTMLTreeBuilder::bodyCreateErrorCheck(Token*, RefPtr&) { // body no longer allowed if we have a frameset if (m_haveFrameSet) return false; // Ensure that head exists (unless parsing a fragment). // But not for older versions of Mail, where the implicit isn't expected - if (!m_isParsingFragment && shouldCreateImplicitHead(m_document)) createHead(); popBlock(headTag); startBody(); return true; } bool LegacyHTMLTreeBuilder::framesetCreateErrorCheck(Token*, RefPtr&) { popBlock(headTag); if (m_inBody && !m_haveFrameSet && !m_haveContent) { popBlock(bodyTag); // ### actually for IE document.body returns the now hidden "body" element // we can't implement that behaviour now because it could cause too many // regressions and the headaches are not worth the work as long as there is // no site actually relying on that detail (Dirk) if (m_document->body() && !m_isParsingFragment) m_document->body()->setAttribute(styleAttr, "display:none"); m_inBody = false; } if ((m_haveContent || m_haveFrameSet) && m_current->localName() == htmlTag) return false; m_haveFrameSet = true; startBody(); return true; } bool LegacyHTMLTreeBuilder::formCreateErrorCheck(Token* t, RefPtr& result) { // Only create a new form if we're not already inside one. // This is consistent with other browsers' behavior. if (!m_currentFormElement) { m_currentFormElement = HTMLFormElement::create(m_document); result = m_currentFormElement; pCloserCreateErrorCheck(t, result); } return false; } bool LegacyHTMLTreeBuilder::isindexCreateErrorCheck(Token* t, RefPtr& result) { RefPtr n = handleIsindex(t); if (!m_inBody) m_isindexElement = n.release(); else { t->selfClosingTag = true; result = n.release(); } return false; } bool LegacyHTMLTreeBuilder::selectCreateErrorCheck(Token*, RefPtr&) { return true; } bool LegacyHTMLTreeBuilder::ddCreateErrorCheck(Token* t, RefPtr& result) { pCloserCreateErrorCheck(t, result); popBlock(dtTag); popBlock(ddTag); return true; } bool LegacyHTMLTreeBuilder::dtCreateErrorCheck(Token* t, RefPtr& result) { pCloserCreateErrorCheck(t, result); popBlock(ddTag); popBlock(dtTag); return true; } bool LegacyHTMLTreeBuilder::rpCreateErrorCheck(Token*, RefPtr&) { popBlock(rpTag); popBlock(rtTag); return true; } bool LegacyHTMLTreeBuilder::rtCreateErrorCheck(Token*, RefPtr&) { popBlock(rpTag); popBlock(rtTag); return true; } bool LegacyHTMLTreeBuilder::nestedCreateErrorCheck(Token* t, RefPtr&) { popBlock(t->tagName); return true; } bool LegacyHTMLTreeBuilder::nestedPCloserCreateErrorCheck(Token* t, RefPtr& result) { pCloserCreateErrorCheck(t, result); popBlock(t->tagName); return true; } bool LegacyHTMLTreeBuilder::nestedStyleCreateErrorCheck(Token* t, RefPtr&) { return allowNestedRedundantTag(t->tagName); } bool LegacyHTMLTreeBuilder::colCreateErrorCheck(Token*, RefPtr&) { if (!m_current->hasTagName(tableTag)) return true; RefPtr implicitColgroup = HTMLElementFactory::createHTMLElement(colgroupTag, m_document, 0, true); insertNode(implicitColgroup.get()); return true; } bool LegacyHTMLTreeBuilder::tableCellCreateErrorCheck(Token*, RefPtr&) { popBlock(tdTag); popBlock(thTag); return true; } bool LegacyHTMLTreeBuilder::tableSectionCreateErrorCheck(Token*, RefPtr&) { popBlock(theadTag); popBlock(tbodyTag); popBlock(tfootTag); return true; } bool LegacyHTMLTreeBuilder::noembedCreateErrorCheck(Token*, RefPtr&) { setSkipMode(noembedTag); return true; } bool LegacyHTMLTreeBuilder::noframesCreateErrorCheck(Token*, RefPtr&) { setSkipMode(noframesTag); return true; } bool LegacyHTMLTreeBuilder::noscriptCreateErrorCheck(Token*, RefPtr&) { if (!m_isParsingFragment) { Frame* frame = m_document->frame(); if (frame && frame->script()->canExecuteScripts(NotAboutToExecuteScript)) setSkipMode(noscriptTag); } return true; } bool LegacyHTMLTreeBuilder::pCloserCreateErrorCheck(Token*, RefPtr&) { if (hasPElementInScope()) popBlock(pTag); return true; } bool LegacyHTMLTreeBuilder::pCloserStrictCreateErrorCheck(Token*, RefPtr&) { if (m_document->inCompatMode()) return true; if (hasPElementInScope()) popBlock(pTag); return true; } bool LegacyHTMLTreeBuilder::mapCreateErrorCheck(Token*, RefPtr& result) { m_currentMapElement = HTMLMapElement::create(m_document); result = m_currentMapElement; return false; } static void mapTagToFunc(FunctionMap& map, const QualifiedName& tag, CreateErrorCheckFunc func) { map.set(tag.localName().impl(), func); } template< size_t ArraySize > static void mapTagsToFunc(FunctionMap& map, QualifiedName (&names)[ArraySize], CreateErrorCheckFunc func) { for (size_t x = 0; x < ArraySize; x++) { const QualifiedName& name = names[x]; mapTagToFunc(map, name, func); } } PassRefPtr LegacyHTMLTreeBuilder::getNode(Token* t) { // Init our error handling table. DEFINE_STATIC_LOCAL(FunctionMap, gFunctionMap, ()); if (gFunctionMap.isEmpty()) { QualifiedName nestedCreateErrorTags[] = { aTag, buttonTag, nobrTag, trTag }; mapTagsToFunc(gFunctionMap, nestedCreateErrorTags, &LegacyHTMLTreeBuilder::nestedCreateErrorCheck); QualifiedName nestedStyleCreateErrorTags[] = { bTag, bigTag, iTag, markTag, sTag, smallTag, strikeTag, ttTag, uTag }; mapTagsToFunc(gFunctionMap, nestedStyleCreateErrorTags, &LegacyHTMLTreeBuilder::nestedStyleCreateErrorCheck); QualifiedName pCloserCreateErrorTags[] = { addressTag, articleTag, asideTag, blockquoteTag, centerTag, dirTag, divTag, dlTag, fieldsetTag, footerTag, h1Tag, h2Tag, h3Tag, h4Tag, h5Tag, h6Tag, headerTag, hgroupTag, hrTag, listingTag, menuTag, navTag, olTag, pTag, plaintextTag, preTag, sectionTag, ulTag }; mapTagsToFunc(gFunctionMap, pCloserCreateErrorTags, &LegacyHTMLTreeBuilder::pCloserCreateErrorCheck); mapTagToFunc(gFunctionMap, bodyTag, &LegacyHTMLTreeBuilder::bodyCreateErrorCheck); mapTagToFunc(gFunctionMap, colTag, &LegacyHTMLTreeBuilder::colCreateErrorCheck); mapTagToFunc(gFunctionMap, ddTag, &LegacyHTMLTreeBuilder::ddCreateErrorCheck); mapTagToFunc(gFunctionMap, dtTag, &LegacyHTMLTreeBuilder::dtCreateErrorCheck); mapTagToFunc(gFunctionMap, formTag, &LegacyHTMLTreeBuilder::formCreateErrorCheck); mapTagToFunc(gFunctionMap, framesetTag, &LegacyHTMLTreeBuilder::framesetCreateErrorCheck); mapTagToFunc(gFunctionMap, headTag, &LegacyHTMLTreeBuilder::headCreateErrorCheck); mapTagToFunc(gFunctionMap, isindexTag, &LegacyHTMLTreeBuilder::isindexCreateErrorCheck); mapTagToFunc(gFunctionMap, mapTag, &LegacyHTMLTreeBuilder::mapCreateErrorCheck); mapTagToFunc(gFunctionMap, liTag, &LegacyHTMLTreeBuilder::nestedPCloserCreateErrorCheck); mapTagToFunc(gFunctionMap, noembedTag, &LegacyHTMLTreeBuilder::noembedCreateErrorCheck); mapTagToFunc(gFunctionMap, noframesTag, &LegacyHTMLTreeBuilder::noframesCreateErrorCheck); mapTagToFunc(gFunctionMap, noscriptTag, &LegacyHTMLTreeBuilder::noscriptCreateErrorCheck); mapTagToFunc(gFunctionMap, tableTag, &LegacyHTMLTreeBuilder::pCloserStrictCreateErrorCheck); mapTagToFunc(gFunctionMap, rpTag, &LegacyHTMLTreeBuilder::rpCreateErrorCheck); mapTagToFunc(gFunctionMap, rtTag, &LegacyHTMLTreeBuilder::rtCreateErrorCheck); mapTagToFunc(gFunctionMap, selectTag, &LegacyHTMLTreeBuilder::selectCreateErrorCheck); mapTagToFunc(gFunctionMap, tdTag, &LegacyHTMLTreeBuilder::tableCellCreateErrorCheck); mapTagToFunc(gFunctionMap, thTag, &LegacyHTMLTreeBuilder::tableCellCreateErrorCheck); mapTagToFunc(gFunctionMap, tbodyTag, &LegacyHTMLTreeBuilder::tableSectionCreateErrorCheck); mapTagToFunc(gFunctionMap, tfootTag, &LegacyHTMLTreeBuilder::tableSectionCreateErrorCheck); mapTagToFunc(gFunctionMap, theadTag, &LegacyHTMLTreeBuilder::tableSectionCreateErrorCheck); gFunctionMap.set(commentAtom.impl(), &LegacyHTMLTreeBuilder::commentCreateErrorCheck); gFunctionMap.set(textAtom.impl(), &LegacyHTMLTreeBuilder::textCreateErrorCheck); } bool proceed = true; RefPtr result; if (CreateErrorCheckFunc errorCheckFunc = gFunctionMap.get(t->tagName.impl())) proceed = (this->*errorCheckFunc)(t, result); if (proceed) result = HTMLElementFactory::createHTMLElement(QualifiedName(nullAtom, t->tagName, xhtmlNamespaceURI), m_document, m_currentFormElement.get()); return result.release(); } bool LegacyHTMLTreeBuilder::allowNestedRedundantTag(const AtomicString& tagName) { // www.liceo.edu.mx is an example of a site that achieves a level of nesting of // about 1500 tags, all from a bunch of s. We will only allow at most 20 // nested tags of the same type before just ignoring them all together. unsigned i = 0; for (HTMLStackElem* curr = m_blockStack; i < cMaxRedundantTagDepth && curr && curr->tagName == tagName; curr = curr->next, i++) { } return i != cMaxRedundantTagDepth; } void LegacyHTMLTreeBuilder::processCloseTag(Token* t) { // Support for really broken html. // we never close the body tag, since some stupid web pages close it before the actual end of the doc. // let's rely on the end() call to close things. if (t->tagName == htmlTag || t->tagName == bodyTag || t->tagName == commentAtom) return; bool checkForCloseTagErrors = true; if (t->tagName == formTag && m_currentFormElement) { m_currentFormElement = 0; checkForCloseTagErrors = false; } else if (t->tagName == mapTag) m_currentMapElement = 0; else if (t->tagName == pTag) checkForCloseTagErrors = false; HTMLStackElem* oldElem = m_blockStack; popBlock(t->tagName, checkForCloseTagErrors); if (oldElem == m_blockStack && t->tagName == pTag) { // We encountered a stray

. Amazingly Gecko, WinIE, and MacIE all treat // this as a valid break, i.e.,

. So go ahead and make the empty // paragraph. t->beginTag = true; parseToken(t); popBlock(t->tagName); reportError(StrayParagraphCloseError); } } bool LegacyHTMLTreeBuilder::isHeadingTag(const AtomicString& tagName) { DEFINE_STATIC_LOCAL(TagNameSet, headingTags, ()); if (headingTags.isEmpty()) { QualifiedName tagNames[] = { h1Tag, h2Tag, h3Tag, h4Tag, h5Tag, h6Tag }; addTags(headingTags, tagNames); } return headingTags.contains(tagName.impl()); } bool LegacyHTMLTreeBuilder::isInline(Node* node) const { if (node->isTextNode()) return true; if (node->isHTMLElement()) { HTMLElement* e = static_cast(node); if (e->hasLocalName(aTag) || e->hasLocalName(fontTag) || e->hasLocalName(ttTag) || e->hasLocalName(uTag) || e->hasLocalName(bTag) || e->hasLocalName(iTag) || e->hasLocalName(sTag) || e->hasLocalName(strikeTag) || e->hasLocalName(bigTag) || e->hasLocalName(smallTag) || e->hasLocalName(emTag) || e->hasLocalName(strongTag) || e->hasLocalName(dfnTag) || e->hasLocalName(codeTag) || e->hasLocalName(sampTag) || e->hasLocalName(kbdTag) || e->hasLocalName(varTag) || e->hasLocalName(citeTag) || e->hasLocalName(abbrTag) || e->hasLocalName(acronymTag) || e->hasLocalName(subTag) || e->hasLocalName(supTag) || e->hasLocalName(spanTag) || e->hasLocalName(nobrTag) || e->hasLocalName(noframesTag) || e->hasLocalName(nolayerTag) || e->hasLocalName(noembedTag) || e->hasLocalName(markTag)) return true; #if !ENABLE(XHTMLMP) if (e->hasLocalName(noscriptTag) && !m_isParsingFragment) { Frame* frame = m_document->frame(); if (frame && frame->script()->canExecuteScripts(NotAboutToExecuteScript)) return true; } #endif } return false; } bool LegacyHTMLTreeBuilder::isResidualStyleTag(const AtomicString& tagName) { DEFINE_STATIC_LOCAL(HashSet, residualStyleTags, ()); if (residualStyleTags.isEmpty()) { QualifiedName tagNames[] = { aTag, fontTag, ttTag, uTag, bTag, iTag, sTag, strikeTag, bigTag, smallTag, emTag, strongTag, dfnTag, codeTag, sampTag, kbdTag, varTag, nobrTag, markTag }; addTags(residualStyleTags, tagNames); } return residualStyleTags.contains(tagName.impl()); } bool LegacyHTMLTreeBuilder::isAffectedByResidualStyle(const AtomicString& tagName) { DEFINE_STATIC_LOCAL(HashSet, unaffectedTags, ()); if (unaffectedTags.isEmpty()) { QualifiedName tagNames[] = { bodyTag, tableTag, theadTag, tbodyTag, tfootTag, trTag, thTag, tdTag, captionTag, colgroupTag, colTag, optionTag, optgroupTag, selectTag, objectTag, datagridTag, datalistTag }; addTags(unaffectedTags, tagNames); } return !unaffectedTags.contains(tagName.impl()); } void LegacyHTMLTreeBuilder::handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem* elem) { HTMLStackElem* maxElem = 0; bool finished = false; bool strayTableContent = elem->strayTableContent; unsigned iterationCount = 0; m_handlingResidualStyleAcrossBlocks = true; while (!finished && (iterationCount++ < cResidualStyleIterationLimit)) { // Find the outermost element that crosses over to a higher level. If there exists another higher-level // element, we will do another pass, until we have corrected the innermost one. ExceptionCode ec = 0; HTMLStackElem* curr = m_blockStack; HTMLStackElem* prev = 0; HTMLStackElem* prevMaxElem = 0; maxElem = 0; finished = true; while (curr && curr != elem) { if (curr->level > elem->level) { if (!isAffectedByResidualStyle(curr->tagName)) return; if (maxElem) // We will need another pass. finished = false; maxElem = curr; prevMaxElem = prev; } prev = curr; curr = curr->next; } if (!curr || !maxElem) return; Node* residualElem = prev->node; Node* blockElem = prevMaxElem ? prevMaxElem->node : m_current; Node* parentElem = elem->node; // Check to see if the reparenting that is going to occur is allowed according to the DOM. // FIXME: We should either always allow it or perform an additional fixup instead of // just bailing here. // Example:

blah

isn't doing a fixup right now. if (!parentElem->childAllowed(blockElem)) return; m_hasPElementInScope = Unknown; if (maxElem->node->parentNode() != elem->node) { // Walk the stack and remove any elements that aren't residual style tags. These // are basically just being closed up. Example: // Moo

Goo

. // In the above example, the doesn't need to be reopened. It can just close. HTMLStackElem* currElem = maxElem->next; HTMLStackElem* prevElem = maxElem; while (currElem != elem) { HTMLStackElem* nextElem = currElem->next; if (!isResidualStyleTag(currElem->tagName)) { prevElem->next = nextElem; prevElem->derefNode(); prevElem->node = currElem->node; prevElem->didRefNode = currElem->didRefNode; delete currElem; m_treeDepth--; } else prevElem = currElem; currElem = nextElem; } // We have to reopen residual tags in between maxElem and elem. An example of this case is: // Moo

Foo. // In this case, we need to transform the part before the

into: // Moo // so that the will remain open. This involves the modification of elements // in the block stack. // This will also affect how we ultimately reparent the block, since we want it to end up // under the reopened residual tags (e.g., the in the above example.) RefPtr prevNode = 0; currElem = maxElem; while (currElem->node != residualElem) { if (isResidualStyleTag(currElem->node->localName())) { // Create a clone of this element. // We call releaseRef to get a raw pointer since we plan to hand over ownership to currElem. Node* currNode = currElem->node->cloneNode(false).releaseRef(); reportError(ResidualStyleError, &currNode->localName()); // Change the stack element's node to point to the clone. // The stack element adopts the reference we obtained above by calling release(). currElem->derefNode(); currElem->node = currNode; currElem->didRefNode = true; // Attach the previous node as a child of this new node. if (prevNode) currNode->appendChild(prevNode, ec); else // The new parent for the block element is going to be the innermost clone. parentElem = currNode; // FIXME: We shifted parentElem to be a residual inline. We never checked to see if blockElem could be legally placed inside the inline though. prevNode = currNode; } currElem = currElem->next; } // Now append the chain of new residual style elements if one exists. if (prevNode) elem->node->appendChild(prevNode, ec); // FIXME: This append can result in weird stuff happening, like an inline chain being put into a table section. } // Check if the block is still in the tree. If it isn't, then we don't // want to remove it from its parent (that would crash) or insert it into // a new parent later. See http://bugs.webkit.org/show_bug.cgi?id=6778 bool isBlockStillInTree = blockElem->parentNode(); // We need to make a clone of |residualElem| and place it just inside |blockElem|. // All content of |blockElem| is reparented to be under this clone. We then // reparent |blockElem| using real DOM calls so that attachment/detachment will // be performed to fix up the rendering tree. // So for this example: ...

FooGoo

// The end result will be: ...

FooGoo

// // Step 1: Remove |blockElem| from its parent, doing a batch detach of all the kids. if (isBlockStillInTree) blockElem->parentNode()->removeChild(blockElem, ec); Node* newNodePtr = 0; if (blockElem->firstChild()) { // Step 2: Clone |residualElem|. RefPtr newNode = residualElem->cloneNode(false); // Shallow clone. We don't pick up the same kids. newNodePtr = newNode.get(); reportError(ResidualStyleError, &newNode->localName()); // Step 3: Place |blockElem|'s children under |newNode|. Remove all of the children of |blockElem| // before we've put |newElem| into the document. That way we'll only do one attachment of all // the new content (instead of a bunch of individual attachments). Node* currNode = blockElem->firstChild(); while (currNode) { Node* nextNode = currNode->nextSibling(); newNode->appendChild(currNode, ec); currNode = nextNode; } // Step 4: Place |newNode| under |blockElem|. |blockElem| is still out of the document, so no // attachment can occur yet. blockElem->appendChild(newNode.release(), ec); } else finished = true; // Step 5: Reparent |blockElem|. Now the full attachment of the fixed up tree takes place. if (isBlockStillInTree) parentElem->appendChild(blockElem, ec); // Step 6: Pull |elem| out of the stack, since it is no longer enclosing us. Also update // the node associated with the previous stack element so that when it gets popped, // it doesn't make the residual element the next current node. HTMLStackElem* currElem = maxElem; HTMLStackElem* prevElem = 0; while (currElem != elem) { prevElem = currElem; currElem = currElem->next; } prevElem->next = elem->next; prevElem->derefNode(); prevElem->node = elem->node; prevElem->didRefNode = elem->didRefNode; m_treeDepth--; if (!finished) { // Repurpose |elem| to represent |newNode| and insert it at the appropriate position // in the stack. We do not do this for the innermost block, because in that case the new // node is effectively no longer open. elem->next = maxElem; elem->node = prevMaxElem->node; elem->didRefNode = prevMaxElem->didRefNode; elem->strayTableContent = false; prevMaxElem->next = elem; ASSERT(newNodePtr); prevMaxElem->node = newNodePtr; newNodePtr->ref(); prevMaxElem->didRefNode = true; m_treeDepth++; } else delete elem; } // FIXME: If we ever make a case like this work: //
// Then this check will be too simplistic. Right now the
chain will end up inside the , which is pretty crazy. if (strayTableContent) m_inStrayTableContent--; // Step 7: Reopen intermediate inlines, e.g.,

FooGoo

. // In the above example, Goo should stay italic. // We cap the number of tags we're willing to reopen based off cResidualStyleMaxDepth. HTMLStackElem* curr = m_blockStack; HTMLStackElem* residualStyleStack = 0; unsigned stackDepth = 1; unsigned redundantStyleCount = 0; while (curr && curr != maxElem) { // We will actually schedule this tag for reopening // after we complete the close of this entire block. if (isResidualStyleTag(curr->tagName) && stackDepth++ < cResidualStyleMaxDepth) { // We've overloaded the use of stack elements and are just reusing the // struct with a slightly different meaning to the variables. Instead of chaining // from innermost to outermost, we build up a list of all the tags we need to reopen // from the outermost to the innermost, i.e., residualStyleStack will end up pointing // to the outermost tag we need to reopen. // We also set curr->node to be the actual element that corresponds to the ID stored in // curr->id rather than the node that you should pop to when the element gets pulled off // the stack. if (residualStyleStack && curr->tagName == residualStyleStack->tagName && curr->node->attributes()->mapsEquivalent(residualStyleStack->node->attributes())) redundantStyleCount++; else redundantStyleCount = 0; if (redundantStyleCount < cMaxRedundantTagDepth) moveOneBlockToStack(residualStyleStack); else popOneBlock(); } else popOneBlock(); curr = m_blockStack; } reopenResidualStyleTags(residualStyleStack, 0); // Stray table content can't be an issue here, since some element above will always become the root of new stray table content. m_handlingResidualStyleAcrossBlocks = false; } void LegacyHTMLTreeBuilder::reopenResidualStyleTags(HTMLStackElem* elem, Node* malformedTableParent) { // Loop for each tag that needs to be reopened. while (elem) { // Create a shallow clone of the DOM node for this element. RefPtr newNode = elem->node->cloneNode(false); reportError(ResidualStyleError, &newNode->localName()); // Append the new node. In the malformed table case, we need to insert before the table, // which will be the last child. ExceptionCode ec = 0; if (malformedTableParent) malformedTableParent->insertBefore(newNode, malformedTableParent->lastChild(), ec); else m_current->appendChild(newNode, ec); // FIXME: Is it really OK to ignore the exceptions here? // Now push a new stack element for this node we just created. pushBlock(elem->tagName, elem->level); newNode->beginParsingChildren(); // Set our strayTableContent boolean if needed, so that the reopened tag also knows // that it is inside a malformed table. m_blockStack->strayTableContent = malformedTableParent != 0; if (m_blockStack->strayTableContent) m_inStrayTableContent++; // Clear our malformed table parent variable. malformedTableParent = 0; // Update |current| manually to point to the new node. setCurrent(newNode.get()); // Advance to the next tag that needs to be reopened. HTMLStackElem* next = elem->next; elem->derefNode(); delete elem; elem = next; } } void LegacyHTMLTreeBuilder::pushBlock(const AtomicString& tagName, int level) { m_blockStack = new HTMLStackElem(tagName, level, m_current, m_didRefCurrent, m_blockStack); if (level >= minBlockLevelTagPriority) m_blocksInStack++; m_treeDepth++; m_didRefCurrent = false; if (tagName == pTag) m_hasPElementInScope = InScope; else if (isScopingTag(tagName)) m_hasPElementInScope = NotInScope; } void LegacyHTMLTreeBuilder::popBlock(const AtomicString& tagName, bool reportErrors) { HTMLStackElem* elem = m_blockStack; if (m_parserQuirks && elem && !m_parserQuirks->shouldPopBlock(elem->tagName, tagName)) return; int maxLevel = 0; while (elem && (elem->tagName != tagName)) { if (maxLevel < elem->level) maxLevel = elem->level; elem = elem->next; } if (!elem) { if (reportErrors) reportError(StrayCloseTagError, &tagName, 0, true); return; } if (maxLevel > elem->level) { // We didn't match because the tag is in a different scope, e.g., //

Foo. Try to correct the problem. if (!isResidualStyleTag(tagName)) return; return handleResidualStyleCloseTagAcrossBlocks(elem); } bool isAffectedByStyle = isAffectedByResidualStyle(elem->tagName); HTMLStackElem* residualStyleStack = 0; Node* malformedTableParent = 0; elem = m_blockStack; unsigned stackDepth = 1; unsigned redundantStyleCount = 0; while (elem) { if (elem->tagName == tagName) { int strayTable = m_inStrayTableContent; popOneBlock(); elem = 0; // This element was the root of some malformed content just inside an implicit or // explicit or . // If we end up needing to reopen residual style tags, the root of the reopened chain // must also know that it is the root of malformed content inside a /. if (strayTable && (m_inStrayTableContent < strayTable) && residualStyleStack) { Node* curr = m_current; while (curr && !curr->hasTagName(tableTag)) curr = curr->parentNode(); malformedTableParent = curr ? curr->parentNode() : 0; } } else { if (m_currentFormElement && elem->tagName == formTag) // A is being closed prematurely (and this is // malformed HTML). Set an attribute on the form to clear out its // bottom margin. m_currentFormElement->setMalformed(true); // Schedule this tag for reopening // after we complete the close of this entire block. if (isAffectedByStyle && isResidualStyleTag(elem->tagName) && stackDepth++ < cResidualStyleMaxDepth) { // We've overloaded the use of stack elements and are just reusing the // struct with a slightly different meaning to the variables. Instead of chaining // from innermost to outermost, we build up a list of all the tags we need to reopen // from the outermost to the innermost, i.e., residualStyleStack will end up pointing // to the outermost tag we need to reopen. // We also set elem->node to be the actual element that corresponds to the ID stored in // elem->id rather than the node that you should pop to when the element gets pulled off // the stack. if (residualStyleStack && elem->tagName == residualStyleStack->tagName && elem->node->attributes()->mapsEquivalent(residualStyleStack->node->attributes())) redundantStyleCount++; else redundantStyleCount = 0; if (redundantStyleCount < cMaxRedundantTagDepth) moveOneBlockToStack(residualStyleStack); else popOneBlock(); } else popOneBlock(); elem = m_blockStack; } } reopenResidualStyleTags(residualStyleStack, malformedTableParent); } inline HTMLStackElem* LegacyHTMLTreeBuilder::popOneBlockCommon() { HTMLStackElem* elem = m_blockStack; // Form elements restore their state during the parsing process. // Also, a few elements (, ) need to know when all child elements (s) are available. if (m_current && elem->node != m_current) m_current->finishParsingChildren(); if (m_blockStack->level >= minBlockLevelTagPriority) { ASSERT(m_blocksInStack > 0); m_blocksInStack--; } m_treeDepth--; m_blockStack = elem->next; m_current = elem->node; m_didRefCurrent = elem->didRefNode; if (elem->strayTableContent) m_inStrayTableContent--; if (elem->tagName == pTag) m_hasPElementInScope = NotInScope; else if (isScopingTag(elem->tagName)) m_hasPElementInScope = Unknown; return elem; } void LegacyHTMLTreeBuilder::popOneBlock() { // Store the current node before popOneBlockCommon overwrites it. Node* lastCurrent = m_current; bool didRefLastCurrent = m_didRefCurrent; delete popOneBlockCommon(); if (didRefLastCurrent) lastCurrent->deref(); } void LegacyHTMLTreeBuilder::moveOneBlockToStack(HTMLStackElem*& head) { // We'll be using the stack element we're popping, but for the current node. // See the two callers for details. // Store the current node before popOneBlockCommon overwrites it. Node* lastCurrent = m_current; bool didRefLastCurrent = m_didRefCurrent; // Pop the block, but don't deref the current node as popOneBlock does because // we'll be using the pointer in the new stack element. HTMLStackElem* elem = popOneBlockCommon(); // Transfer the current node into the stack element. // No need to deref the old elem->node because popOneBlockCommon transferred // it into the m_current/m_didRefCurrent fields. elem->node = lastCurrent; elem->didRefNode = didRefLastCurrent; elem->next = head; head = elem; } void LegacyHTMLTreeBuilder::checkIfHasPElementInScope() { m_hasPElementInScope = NotInScope; HTMLStackElem* elem = m_blockStack; while (elem) { const AtomicString& tagName = elem->tagName; if (tagName == pTag) { m_hasPElementInScope = InScope; return; } else if (isScopingTag(tagName)) return; elem = elem->next; } } void LegacyHTMLTreeBuilder::popInlineBlocks() { while (m_blockStack && isInline(m_current)) popOneBlock(); } void LegacyHTMLTreeBuilder::freeBlock() { while (m_blockStack) popOneBlock(); ASSERT(!m_blocksInStack); ASSERT(!m_treeDepth); } void LegacyHTMLTreeBuilder::createHead() { if (m_head) return; if (!m_document->documentElement() && !m_isParsingFragment) { insertNode(HTMLHtmlElement::create(m_document).get()); ASSERT(m_document->documentElement() || m_isParsingFragment); } m_head = HTMLHeadElement::create(m_document); if (m_isParsingFragment) return; HTMLElement* body = m_document->body(); ExceptionCode ec = 0; m_document->documentElement()->insertBefore(m_head.get(), body, ec); if (ec) m_head = 0; // If the body does not exist yet, then the should be pushed as the current block. if (m_head && !body) { pushBlock(m_head->localName(), m_head->tagPriority()); setCurrent(m_head.get()); } } PassRefPtr LegacyHTMLTreeBuilder::handleIsindex(Token* t) { RefPtr n = HTMLDivElement::create(m_document); NamedNodeMap* attrs = t->attrs.get(); RefPtr isIndex = HTMLIsIndexElement::create(m_document, m_currentFormElement.get()); isIndex->setAttributeMap(attrs); isIndex->setAttribute(typeAttr, "khtml_isindex"); String text = searchableIndexIntroduction(); if (attrs) { if (Attribute* a = attrs->getAttributeItem(promptAttr)) text = a->value().string() + " "; t->attrs = 0; } n->legacyParserAddChild(HTMLHRElement::create(m_document)); n->legacyParserAddChild(Text::create(m_document, text)); n->legacyParserAddChild(isIndex.release()); n->legacyParserAddChild(HTMLHRElement::create(m_document)); return n.release(); } void LegacyHTMLTreeBuilder::startBody() { if (m_inBody) return; m_inBody = true; if (m_isindexElement) { insertNode(m_isindexElement.get(), true /* don't descend into this node */); m_isindexElement = 0; } } void LegacyHTMLTreeBuilder::finished() { // In the case of a completely empty document, here's the place to create the HTML element. if (m_current && m_current->isDocumentNode() && !m_document->documentElement()) insertNode(HTMLHtmlElement::create(m_document).get()); // This ensures that "current" is not left pointing to a node when the document is destroyed. freeBlock(); setCurrent(0); // Warning, this may delete the parser, so don't try to do anything else after this. if (!m_isParsingFragment) m_document->finishedParsing(); } void LegacyHTMLTreeBuilder::reportErrorToConsole(HTMLParserErrorCode errorCode, const AtomicString* tagName1, const AtomicString* tagName2, bool closeTags) { Frame* frame = m_document->frame(); if (!frame) return; ScriptableDocumentParser* parser = m_document->scriptableDocumentParser(); int lineNumber = parser->lineNumber() + 1; AtomicString tag1; AtomicString tag2; if (tagName1) { if (*tagName1 == "#text") tag1 = "Text"; else if (*tagName1 == "#comment") tag1 = ""; else tag1 = (closeTags ? ""; } if (tagName2) { if (*tagName2 == "#text") tag2 = "Text"; else if (*tagName2 == "#comment") tag2 = ""; else tag2 = (closeTags ? ""; } const char* errorMsg = htmlParserErrorMessageTemplate(errorCode); if (!errorMsg) return; String message; if (parser->processingContentWrittenByScript()) message += htmlParserDocumentWriteMessage(); message += errorMsg; message.replace("%tag1", tag1); message.replace("%tag2", tag2); frame->domWindow()->console()->addMessage(HTMLMessageSource, LogMessageType, isWarning(errorCode) ? WarningMessageLevel : ErrorMessageLevel, message, lineNumber, m_document->url().string()); } #ifdef BUILDING_ON_LEOPARD bool shouldCreateImplicitHead(Document* document) { ASSERT(document); Settings* settings = document->page() ? document->page()->settings() : 0; return settings ? !settings->needsLeopardMailQuirks() : true; } #elif defined(BUILDING_ON_TIGER) bool shouldCreateImplicitHead(Document* document) { ASSERT(document); Settings* settings = document->page() ? document->page()->settings() : 0; return settings ? !settings->needsTigerMailQuirks() : true; } #endif String serializeForNumberType(double number) { // According to HTML5, "the best representation of the number n as a floating // point number" is a string produced by applying ToString() to n. DtoaBuffer buffer; unsigned length; doubleToStringInJavaScriptFormat(number, buffer, &length); return String(buffer, length); } bool parseToDoubleForNumberType(const String& src, double* out) { // See HTML5 2.4.4.3 `Real numbers.' if (src.isEmpty()) return false; // String::toDouble() accepts leading + \t \n \v \f \r and SPACE, which are invalid in HTML5. // So, check the first character. if (src[0] != '-' && (src[0] < '0' || src[0] > '9')) return false; bool valid = false; double value = src.toDouble(&valid); if (!valid) return false; // NaN and Infinity are not valid numbers according to the standard. if (!isfinite(value)) return false; // -0 -> 0 if (!value) value = 0; if (out) *out = value; return true; } }