/* Copyright (C) 1997 Martin Jones (mjones@kde.org) (C) 1997 Torben Weis (weis@kde.org) (C) 1999,2001 Lars Knoll (knoll@kde.org) (C) 2000,2001 Dirk Mueller (mueller@kde.org) Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/) This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "config.h" #include "HTMLParser.h" #include "CharacterNames.h" #include "CSSPropertyNames.h" #include "CSSValueKeywords.h" #include "Chrome.h" #include "ChromeClient.h" #include "Comment.h" #include "Console.h" #include "DOMWindow.h" #include "DocumentFragment.h" #include "DocumentType.h" #include "Frame.h" #include "HTMLBodyElement.h" #include "HTMLDocument.h" #include "HTMLDivElement.h" #include "HTMLDListElement.h" #include "HTMLElementFactory.h" #include "HTMLFormElement.h" #include "HTMLHeadElement.h" #include "HTMLHRElement.h" #include "HTMLHtmlElement.h" #include "HTMLIsIndexElement.h" #include "HTMLMapElement.h" #include "HTMLNames.h" #include "HTMLParserQuirks.h" #include "HTMLTableCellElement.h" #include "HTMLTableRowElement.h" #include "HTMLTableSectionElement.h" #include "HTMLTokenizer.h" #include "LocalizedStrings.h" #include "Page.h" #include "Settings.h" #include "Text.h" #include namespace WebCore { using namespace HTMLNames; static const unsigned cMaxRedundantTagDepth = 20; static const unsigned cResidualStyleMaxDepth = 200; static const unsigned cResidualStyleIterationLimit = 5; static const int minBlockLevelTagPriority = 3; // A cap on the number of tags with priority minBlockLevelTagPriority or higher // allowed in m_blockStack. The cap is enforced by adding such new elements as // siblings instead of children once it is reached. static const size_t cMaxBlockDepth = 4096; struct HTMLStackElem : Noncopyable { HTMLStackElem(const AtomicString& t, int lvl, Node* n, bool r, HTMLStackElem* nx) : tagName(t) , level(lvl) , strayTableContent(false) , node(n) , didRefNode(r) , next(nx) { } void derefNode() { if (didRefNode) node->deref(); } AtomicString tagName; int level; bool strayTableContent; Node* node; bool didRefNode; HTMLStackElem* next; }; /** * The parser parses tokenized input into the document, building up the * document tree. If the document is well-formed, parsing it is straightforward. * * Unfortunately, we have to handle many HTML documents that are not well-formed, * so the parser has to be tolerant about errors. * * We have to take care of at least the following error conditions: * * 1. The element being added is explicitly forbidden inside some outer tag. * In this case we should close all tags up to the one, which forbids * the element, and add it afterwards. * * 2. We are not allowed to add the element directly. It could be that * the person writing the document forgot some tag in between (or that the * tag in between is optional). This could be the case with the following * tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?). * * 3. We want to add a block element inside to an inline element. Close all * inline elements up to the next higher block element. * * 4. If this doesn't help, close elements until we are allowed to add the * element or ignore the tag. * */ HTMLParser::HTMLParser(HTMLDocument* doc, bool reportErrors) : m_document(doc) , m_current(doc) , m_didRefCurrent(false) , m_blockStack(0) , m_blocksInStack(0) , m_hasPElementInScope(NotInScope) , m_inBody(false) , m_haveContent(false) , m_haveFrameSet(false) , m_isParsingFragment(false) , m_reportErrors(reportErrors) , m_handlingResidualStyleAcrossBlocks(false) , m_inStrayTableContent(0) , m_scriptingPermission(FragmentScriptingAllowed) , m_parserQuirks(m_document->page() ? m_document->page()->chrome()->client()->createHTMLParserQuirks() : 0) { } HTMLParser::HTMLParser(DocumentFragment* frag, FragmentScriptingPermission scriptingPermission) : m_document(frag->document()) , m_current(frag) , m_didRefCurrent(true) , m_blockStack(0) , m_blocksInStack(0) , m_hasPElementInScope(NotInScope) , m_inBody(true) , m_haveContent(false) , m_haveFrameSet(false) , m_isParsingFragment(true) , m_reportErrors(false) , m_handlingResidualStyleAcrossBlocks(false) , m_inStrayTableContent(0) , m_scriptingPermission(scriptingPermission) , m_parserQuirks(m_document->page() ? m_document->page()->chrome()->client()->createHTMLParserQuirks() : 0) { if (frag) frag->ref(); } HTMLParser::~HTMLParser() { freeBlock(); if (m_didRefCurrent) m_current->deref(); } void HTMLParser::reset() { ASSERT(!m_isParsingFragment); setCurrent(m_document); freeBlock(); m_inBody = false; m_haveFrameSet = false; m_haveContent = false; m_inStrayTableContent = 0; m_currentFormElement = 0; m_currentMapElement = 0; m_head = 0; m_isindexElement = 0; m_skipModeTag = nullAtom; if (m_parserQuirks) m_parserQuirks->reset(); } void HTMLParser::setCurrent(Node* newCurrent) { bool didRefNewCurrent = newCurrent && newCurrent != m_document; if (didRefNewCurrent) newCurrent->ref(); if (m_didRefCurrent) m_current->deref(); m_current = newCurrent; m_didRefCurrent = didRefNewCurrent; } inline static int tagPriorityOfNode(Node* n) { return n->isHTMLElement() ? static_cast(n)->tagPriority() : 0; } inline void HTMLParser::limitBlockDepth(int tagPriority) { if (tagPriority >= minBlockLevelTagPriority) { while (m_blocksInStack >= cMaxBlockDepth) popBlock(m_blockStack->tagName); } } inline bool HTMLParser::insertNodeAfterLimitBlockDepth(Node* n, bool flat) { limitBlockDepth(tagPriorityOfNode(n)); return insertNode(n, flat); } PassRefPtr HTMLParser::parseToken(Token* t) { if (!m_skipModeTag.isNull()) { if (!t->beginTag && t->tagName == m_skipModeTag) // Found the end tag for the current skip mode, so we're done skipping. m_skipModeTag = nullAtom; else if (m_current->localName() == t->tagName) // Do not skip . // FIXME: What does that comment mean? How can it be right to parse a token without clearing m_skipModeTag? ; else return 0; } // Apparently some sites use
instead of
. Be compatible with IE and Firefox and treat this like
. if (t->isCloseTag(brTag) && m_document->inCompatMode()) { reportError(MalformedBRError); t->beginTag = true; } if (!t->beginTag) { processCloseTag(t); return 0; } // Ignore spaces, if we're not inside a paragraph or other inline code. // Do not alter the text if it is part of a scriptTag. if (t->tagName == textAtom && t->text && m_current->localName() != scriptTag) { if (m_inBody && !skipMode() && m_current->localName() != styleTag && m_current->localName() != titleTag && !t->text->containsOnlyWhitespace()) m_haveContent = true; RefPtr n; String text = t->text.get(); unsigned charsLeft = text.length(); while (charsLeft) { // split large blocks of text to nodes of manageable size n = Text::createWithLengthLimit(m_document, text, charsLeft); if (!insertNodeAfterLimitBlockDepth(n.get(), t->selfClosingTag)) return 0; } return n; } RefPtr n = getNode(t); // just to be sure, and to catch currently unimplemented stuff if (!n) return 0; // set attributes if (n->isHTMLElement()) { HTMLElement* e = static_cast(n.get()); if (m_scriptingPermission == FragmentScriptingAllowed || t->tagName != scriptTag) e->setAttributeMap(t->attrs.get(), m_scriptingPermission); // take care of optional close tags if (e->endTagRequirement() == TagStatusOptional) popBlock(t->tagName); // If the node does not have a forbidden end tag requirement, and if the broken XML self-closing // syntax was used, report an error. if (t->brokenXMLStyle && e->endTagRequirement() != TagStatusForbidden) { if (t->tagName == scriptTag) reportError(IncorrectXMLCloseScriptWarning); else reportError(IncorrectXMLSelfCloseError, &t->tagName); } } if (!insertNodeAfterLimitBlockDepth(n.get(), t->selfClosingTag)) { // we couldn't insert the node if (n->isElementNode()) { Element* e = static_cast(n.get()); e->setAttributeMap(0); } if (m_currentMapElement == n) m_currentMapElement = 0; if (m_currentFormElement == n) m_currentFormElement = 0; if (m_head == n) m_head = 0; return 0; } return n; } void HTMLParser::parseDoctypeToken(DoctypeToken* t) { // Ignore any doctype after the first. Ignore doctypes in fragments. if (m_document->doctype() || m_isParsingFragment || m_current != m_document) return; // Make a new doctype node and set it as our doctype. m_document->addChild(DocumentType::create(m_document, String::adopt(t->m_name), String::adopt(t->m_publicID), String::adopt(t->m_systemID))); } static bool isTableSection(const Node* n) { return n->hasTagName(tbodyTag) || n->hasTagName(tfootTag) || n->hasTagName(theadTag); } static bool isTablePart(const Node* n) { return n->hasTagName(trTag) || n->hasTagName(tdTag) || n->hasTagName(thTag) || isTableSection(n); } static bool isTableRelated(const Node* n) { return n->hasTagName(tableTag) || isTablePart(n); } static bool isScopingTag(const AtomicString& tagName) { return tagName == appletTag || tagName == captionTag || tagName == tdTag || tagName == thTag || tagName == buttonTag || tagName == marqueeTag || tagName == objectTag || tagName == tableTag || tagName == htmlTag; } bool HTMLParser::insertNode(Node* n, bool flat) { RefPtr protectNode(n); const AtomicString& localName = n->localName(); // is never allowed inside stray table content. Always pop out of the stray table content // and close up the first table, and then start the second table as a sibling. if (m_inStrayTableContent && localName == tableTag) popBlock(tableTag); if (m_parserQuirks && !m_parserQuirks->shouldInsertNode(m_current, n)) return false; int tagPriority = tagPriorityOfNode(n); // let's be stupid and just try to insert it. // this should work if the document is well-formed Node* newNode = m_current->addChild(n); if (!newNode) return handleError(n, flat, localName, tagPriority); // Try to handle the error. // don't push elements without end tags (e.g., ) on the stack bool parentAttached = m_current->attached(); if (tagPriority > 0 && !flat) { if (newNode == m_current) { // This case should only be hit when a demoted is placed inside a table. ASSERT(localName == formTag); reportError(FormInsideTablePartError, &m_current->localName()); HTMLFormElement* form = static_cast(n); form->setDemoted(true); } else { // The pushBlock function transfers ownership of current to the block stack // so we're guaranteed that m_didRefCurrent is false. The code below is an // optimized version of setCurrent that takes advantage of that fact and also // assumes that newNode is neither 0 nor a pointer to the document. pushBlock(localName, tagPriority); newNode->beginParsingChildren(); ASSERT(!m_didRefCurrent); newNode->ref(); m_current = newNode; m_didRefCurrent = true; } if (parentAttached && !n->attached() && !m_isParsingFragment) n->attach(); } else { if (parentAttached && !n->attached() && !m_isParsingFragment) n->attach(); n->finishParsingChildren(); } if (localName == htmlTag && m_document->frame()) m_document->frame()->loader()->dispatchDocumentElementAvailable(); return true; } bool HTMLParser::handleError(Node* n, bool flat, const AtomicString& localName, int tagPriority) { // Error handling code. This is just ad hoc handling of specific parent/child combinations. HTMLElement* e; bool handled = false; // 1. Check out the element's tag name to decide how to deal with errors. if (n->isHTMLElement()) { HTMLElement* h = static_cast(n); if (h->hasLocalName(trTag) || h->hasLocalName(thTag) || h->hasLocalName(tdTag)) { if (m_inStrayTableContent && !isTableRelated(m_current)) { reportError(MisplacedTablePartError, &localName, &m_current->localName()); // pop out to the nearest enclosing table-related tag. while (m_blockStack && !isTableRelated(m_current)) popOneBlock(); return insertNode(n); } } else if (h->hasLocalName(headTag)) { if (!m_current->isDocumentNode() && !m_current->hasTagName(htmlTag)) { reportError(MisplacedHeadError); return false; } } else if (h->hasLocalName(metaTag) || h->hasLocalName(linkTag) || h->hasLocalName(baseTag)) { bool createdHead = false; if (!m_head) { createHead(); createdHead = true; } if (m_head) { if (!createdHead) reportError(MisplacedHeadContentError, &localName, &m_current->localName()); if (m_head->addChild(n)) { if (!n->attached() && !m_isParsingFragment) n->attach(); return true; } else return false; } } else if (h->hasLocalName(htmlTag)) { if (!m_current->isDocumentNode() ) { if (m_document->documentElement() && m_document->documentElement()->hasTagName(htmlTag)) { reportError(RedundantHTMLBodyError, &localName); // we have another element.... apply attributes to existing one // make sure we don't overwrite already existing attributes NamedNodeMap* map = static_cast(n)->attributes(true); Element* existingHTML = static_cast(m_document->documentElement()); NamedNodeMap* bmap = existingHTML->attributes(false); for (unsigned l = 0; map && l < map->length(); ++l) { Attribute* it = map->attributeItem(l); if (!bmap->getAttributeItem(it->name())) existingHTML->setAttribute(it->name(), it->value()); } } return false; } } else if (h->hasLocalName(titleTag) || h->hasLocalName(styleTag) || h->hasLocalName(scriptTag)) { bool createdHead = false; if (!m_head) { createHead(); createdHead = true; } if (m_head) { Node* newNode = m_head->addChild(n); if (!newNode) { setSkipMode(h->tagQName()); return false; } if (!createdHead) reportError(MisplacedHeadContentError, &localName, &m_current->localName()); pushBlock(localName, tagPriority); newNode->beginParsingChildren(); setCurrent(newNode); if (!n->attached() && !m_isParsingFragment) n->attach(); return true; } if (m_inBody) { setSkipMode(h->tagQName()); return false; } } else if (h->hasLocalName(bodyTag)) { if (m_inBody && m_document->body()) { // we have another element.... apply attributes to existing one // make sure we don't overwrite already existing attributes // some sites use ... reportError(RedundantHTMLBodyError, &localName); NamedNodeMap* map = static_cast(n)->attributes(true); Element* existingBody = m_document->body(); NamedNodeMap* bmap = existingBody->attributes(false); for (unsigned l = 0; map && l < map->length(); ++l) { Attribute* it = map->attributeItem(l); if (!bmap->getAttributeItem(it->name())) existingBody->setAttribute(it->name(), it->value()); } return false; } else if (!m_current->isDocumentNode()) return false; } else if (h->hasLocalName(areaTag)) { if (m_currentMapElement) { reportError(MisplacedAreaError, &m_current->localName()); m_currentMapElement->addChild(n); if (!n->attached() && !m_isParsingFragment) n->attach(); handled = true; return true; } return false; } else if (h->hasLocalName(colgroupTag) || h->hasLocalName(captionTag)) { if (isTableRelated(m_current)) { while (m_blockStack && isTablePart(m_current)) popOneBlock(); return insertNode(n); } } } else if (n->isCommentNode() && !m_head) return false; // 2. Next we examine our currently active element to do some further error handling. if (m_current->isHTMLElement()) { HTMLElement* h = static_cast(m_current); const AtomicString& currentTagName = h->localName(); if (h->hasLocalName(htmlTag)) { HTMLElement* elt = n->isHTMLElement() ? static_cast(n) : 0; if (elt && (elt->hasLocalName(scriptTag) || elt->hasLocalName(styleTag) || elt->hasLocalName(metaTag) || elt->hasLocalName(linkTag) || elt->hasLocalName(objectTag) || elt->hasLocalName(embedTag) || elt->hasLocalName(titleTag) || elt->hasLocalName(isindexTag) || elt->hasLocalName(baseTag))) { if (!m_head) { m_head = new HTMLHeadElement(headTag, m_document); insertNode(m_head.get()); handled = true; } } else { if (n->isTextNode()) { Text* t = static_cast(n); if (t->containsOnlyWhitespace()) return false; } if (!m_haveFrameSet) { // Ensure that head exists. // But not for older versions of Mail, where the implicit isn't expected - if (shouldCreateImplicitHead(m_document)) createHead(); popBlock(headTag); e = new HTMLBodyElement(bodyTag, m_document); startBody(); insertNode(e); handled = true; } else reportError(MisplacedFramesetContentError, &localName); } } else if (h->hasLocalName(headTag)) { if (n->hasTagName(htmlTag)) return false; else { // This means the body starts here... if (!m_haveFrameSet) { ASSERT(currentTagName == headTag); popBlock(currentTagName); e = new HTMLBodyElement(bodyTag, m_document); startBody(); insertNode(e); handled = true; } else reportError(MisplacedFramesetContentError, &localName); } } else if (h->hasLocalName(addressTag) || h->hasLocalName(fontTag) || h->hasLocalName(styleTag) || h->hasLocalName(titleTag)) { reportError(MisplacedContentRetryError, &localName, ¤tTagName); popBlock(currentTagName); handled = true; } else if (h->hasLocalName(captionTag)) { // Illegal content in a caption. Close the caption and try again. reportError(MisplacedCaptionContentError, &localName); popBlock(currentTagName); if (isTablePart(n)) return insertNode(n, flat); } else if (h->hasLocalName(tableTag) || h->hasLocalName(trTag) || isTableSection(h)) { if (n->hasTagName(tableTag)) { reportError(MisplacedTableError, ¤tTagName); if (m_isParsingFragment && !h->hasLocalName(tableTag)) // fragment may contain table parts without
ancestor, pop them one by one popBlock(h->localName()); popBlock(localName); // end the table handled = true; // ...and start a new one } else { ExceptionCode ec = 0; Node* node = m_current; Node* parent = node->parentNode(); // A script may have removed the current node's parent from the DOM // http://bugs.webkit.org/show_bug.cgi?id=7137 // FIXME: we should do real recovery here and re-parent with the correct node. if (!parent) return false; Node* grandparent = parent->parentNode(); if (n->isTextNode() || (h->hasLocalName(trTag) && isTableSection(parent) && grandparent && grandparent->hasTagName(tableTag)) || ((!n->hasTagName(tdTag) && !n->hasTagName(thTag) && !n->hasTagName(formTag) && !n->hasTagName(scriptTag)) && isTableSection(node) && parent->hasTagName(tableTag))) { node = (node->hasTagName(tableTag)) ? node : ((node->hasTagName(trTag)) ? grandparent : parent); // This can happen with fragments if (!node) return false; Node* parent = node->parentNode(); if (!parent) return false; parent->insertBefore(n, node, ec); if (!ec) { reportError(StrayTableContentError, &localName, ¤tTagName); if (n->isHTMLElement() && tagPriority > 0 && !flat && static_cast(n)->endTagRequirement() != TagStatusForbidden) { pushBlock(localName, tagPriority); n->beginParsingChildren(); setCurrent(n); m_inStrayTableContent++; m_blockStack->strayTableContent = true; } return true; } } if (!ec) { if (m_current->hasTagName(trTag)) { reportError(TablePartRequiredError, &localName, &tdTag.localName()); e = new HTMLTableCellElement(tdTag, m_document); } else if (m_current->hasTagName(tableTag)) { // Don't report an error in this case, since making a happens all the time when you have
, // and it isn't really a parse error per se. e = new HTMLTableSectionElement(tbodyTag, m_document); } else { reportError(TablePartRequiredError, &localName, &trTag.localName()); e = new HTMLTableRowElement(trTag, m_document); } insertNode(e); handled = true; } } } else if (h->hasLocalName(objectTag)) { reportError(MisplacedContentRetryError, &localName, ¤tTagName); popBlock(objectTag); handled = true; } else if (h->hasLocalName(pTag) || isHeadingTag(currentTagName)) { if (!isInline(n)) { popBlock(currentTagName); handled = true; } } else if (h->hasLocalName(optionTag) || h->hasLocalName(optgroupTag)) { if (localName == optgroupTag) { popBlock(currentTagName); handled = true; } else if (localName == selectTag) { // IE treats a nested select as . Let's do the same popBlock(localName); } } else if (h->hasLocalName(selectTag)) { if (localName == inputTag || localName == textareaTag) { reportError(MisplacedContentRetryError, &localName, ¤tTagName); popBlock(currentTagName); handled = true; } } else if (h->hasLocalName(colgroupTag)) { popBlock(currentTagName); handled = true; } else if (!h->hasLocalName(bodyTag)) { if (isInline(m_current)) { popInlineBlocks(); handled = true; } } } else if (m_current->isDocumentNode()) { if (n->isTextNode()) { Text* t = static_cast(n); if (t->containsOnlyWhitespace()) return false; } if (!m_document->documentElement()) { e = new HTMLHtmlElement(htmlTag, m_document); insertNode(e); handled = true; } } // 3. If we couldn't handle the error, just return false and attempt to error-correct again. if (!handled) { reportError(IgnoredContentError, &localName, &m_current->localName()); return false; } return insertNode(n); } typedef bool (HTMLParser::*CreateErrorCheckFunc)(Token* t, RefPtr&); typedef HashMap FunctionMap; bool HTMLParser::textCreateErrorCheck(Token* t, RefPtr& result) { result = Text::create(m_document, t->text.get()); return false; } bool HTMLParser::commentCreateErrorCheck(Token* t, RefPtr& result) { result = Comment::create(m_document, t->text.get()); return false; } bool HTMLParser::headCreateErrorCheck(Token*, RefPtr& result) { if (!m_head || m_current->localName() == htmlTag) { m_head = new HTMLHeadElement(headTag, m_document); result = m_head; } else reportError(MisplacedHeadError); return false; } bool HTMLParser::bodyCreateErrorCheck(Token*, RefPtr&) { // body no longer allowed if we have a frameset if (m_haveFrameSet) return false; // Ensure that head exists (unless parsing a fragment). // But not for older versions of Mail, where the implicit isn't expected - if (!m_isParsingFragment && shouldCreateImplicitHead(m_document)) createHead(); popBlock(headTag); startBody(); return true; } bool HTMLParser::framesetCreateErrorCheck(Token*, RefPtr&) { popBlock(headTag); if (m_inBody && !m_haveFrameSet && !m_haveContent) { popBlock(bodyTag); // ### actually for IE document.body returns the now hidden "body" element // we can't implement that behaviour now because it could cause too many // regressions and the headaches are not worth the work as long as there is // no site actually relying on that detail (Dirk) if (m_document->body()) m_document->body()->setAttribute(styleAttr, "display:none"); m_inBody = false; } if ((m_haveContent || m_haveFrameSet) && m_current->localName() == htmlTag) return false; m_haveFrameSet = true; startBody(); return true; } bool HTMLParser::formCreateErrorCheck(Token* t, RefPtr& result) { // Only create a new form if we're not already inside one. // This is consistent with other browsers' behavior. if (!m_currentFormElement) { m_currentFormElement = new HTMLFormElement(formTag, m_document); result = m_currentFormElement; pCloserCreateErrorCheck(t, result); } return false; } bool HTMLParser::isindexCreateErrorCheck(Token* t, RefPtr& result) { RefPtr n = handleIsindex(t); if (!m_inBody) m_isindexElement = n.release(); else { t->selfClosingTag = true; result = n.release(); } return false; } bool HTMLParser::selectCreateErrorCheck(Token*, RefPtr&) { return true; } bool HTMLParser::ddCreateErrorCheck(Token* t, RefPtr& result) { pCloserCreateErrorCheck(t, result); popBlock(dtTag); popBlock(ddTag); return true; } bool HTMLParser::dtCreateErrorCheck(Token* t, RefPtr& result) { pCloserCreateErrorCheck(t, result); popBlock(ddTag); popBlock(dtTag); return true; } bool HTMLParser::rpCreateErrorCheck(Token*, RefPtr&) { popBlock(rpTag); popBlock(rtTag); return true; } bool HTMLParser::rtCreateErrorCheck(Token*, RefPtr&) { popBlock(rpTag); popBlock(rtTag); return true; } bool HTMLParser::nestedCreateErrorCheck(Token* t, RefPtr&) { popBlock(t->tagName); return true; } bool HTMLParser::nestedPCloserCreateErrorCheck(Token* t, RefPtr& result) { pCloserCreateErrorCheck(t, result); popBlock(t->tagName); return true; } bool HTMLParser::nestedStyleCreateErrorCheck(Token* t, RefPtr&) { return allowNestedRedundantTag(t->tagName); } bool HTMLParser::tableCellCreateErrorCheck(Token*, RefPtr&) { popBlock(tdTag); popBlock(thTag); return true; } bool HTMLParser::tableSectionCreateErrorCheck(Token*, RefPtr&) { popBlock(theadTag); popBlock(tbodyTag); popBlock(tfootTag); return true; } bool HTMLParser::noembedCreateErrorCheck(Token*, RefPtr&) { setSkipMode(noembedTag); return true; } bool HTMLParser::noframesCreateErrorCheck(Token*, RefPtr&) { setSkipMode(noframesTag); return true; } bool HTMLParser::noscriptCreateErrorCheck(Token*, RefPtr&) { if (!m_isParsingFragment) { Settings* settings = m_document->settings(); if (settings && settings->isJavaScriptEnabled()) setSkipMode(noscriptTag); } return true; } bool HTMLParser::pCloserCreateErrorCheck(Token*, RefPtr&) { if (hasPElementInScope()) popBlock(pTag); return true; } bool HTMLParser::pCloserStrictCreateErrorCheck(Token*, RefPtr&) { if (m_document->inCompatMode()) return true; if (hasPElementInScope()) popBlock(pTag); return true; } bool HTMLParser::mapCreateErrorCheck(Token*, RefPtr& result) { m_currentMapElement = new HTMLMapElement(mapTag, m_document); result = m_currentMapElement; return false; } PassRefPtr HTMLParser::getNode(Token* t) { // Init our error handling table. DEFINE_STATIC_LOCAL(FunctionMap, gFunctionMap, ()); if (gFunctionMap.isEmpty()) { gFunctionMap.set(aTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck); gFunctionMap.set(addressTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); gFunctionMap.set(articleTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); gFunctionMap.set(asideTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); gFunctionMap.set(bTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); gFunctionMap.set(bigTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); gFunctionMap.set(blockquoteTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); gFunctionMap.set(bodyTag.localName().impl(), &HTMLParser::bodyCreateErrorCheck); gFunctionMap.set(buttonTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck); gFunctionMap.set(centerTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); gFunctionMap.set(commentAtom.impl(), &HTMLParser::commentCreateErrorCheck); gFunctionMap.set(ddTag.localName().impl(), &HTMLParser::ddCreateErrorCheck); gFunctionMap.set(dirTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); gFunctionMap.set(divTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); gFunctionMap.set(dlTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); gFunctionMap.set(dtTag.localName().impl(), &HTMLParser::dtCreateErrorCheck); gFunctionMap.set(formTag.localName().impl(), &HTMLParser::formCreateErrorCheck); gFunctionMap.set(fieldsetTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); gFunctionMap.set(footerTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); gFunctionMap.set(framesetTag.localName().impl(), &HTMLParser::framesetCreateErrorCheck); gFunctionMap.set(h1Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); gFunctionMap.set(h2Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); gFunctionMap.set(h3Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); gFunctionMap.set(h4Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); gFunctionMap.set(h5Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); gFunctionMap.set(h6Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); gFunctionMap.set(headTag.localName().impl(), &HTMLParser::headCreateErrorCheck); gFunctionMap.set(headerTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); gFunctionMap.set(hrTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); gFunctionMap.set(iTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); gFunctionMap.set(isindexTag.localName().impl(), &HTMLParser::isindexCreateErrorCheck); gFunctionMap.set(liTag.localName().impl(), &HTMLParser::nestedPCloserCreateErrorCheck); gFunctionMap.set(listingTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); gFunctionMap.set(mapTag.localName().impl(), &HTMLParser::mapCreateErrorCheck); gFunctionMap.set(menuTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); gFunctionMap.set(navTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); gFunctionMap.set(nobrTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck); gFunctionMap.set(noembedTag.localName().impl(), &HTMLParser::noembedCreateErrorCheck); gFunctionMap.set(noframesTag.localName().impl(), &HTMLParser::noframesCreateErrorCheck); gFunctionMap.set(noscriptTag.localName().impl(), &HTMLParser::noscriptCreateErrorCheck); gFunctionMap.set(olTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); gFunctionMap.set(pTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); gFunctionMap.set(plaintextTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); gFunctionMap.set(preTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); gFunctionMap.set(rpTag.localName().impl(), &HTMLParser::rpCreateErrorCheck); gFunctionMap.set(rtTag.localName().impl(), &HTMLParser::rtCreateErrorCheck); gFunctionMap.set(sTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); gFunctionMap.set(sectionTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); gFunctionMap.set(selectTag.localName().impl(), &HTMLParser::selectCreateErrorCheck); gFunctionMap.set(smallTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); gFunctionMap.set(strikeTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); gFunctionMap.set(tableTag.localName().impl(), &HTMLParser::pCloserStrictCreateErrorCheck); gFunctionMap.set(tbodyTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck); gFunctionMap.set(tdTag.localName().impl(), &HTMLParser::tableCellCreateErrorCheck); gFunctionMap.set(textAtom.impl(), &HTMLParser::textCreateErrorCheck); gFunctionMap.set(tfootTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck); gFunctionMap.set(thTag.localName().impl(), &HTMLParser::tableCellCreateErrorCheck); gFunctionMap.set(theadTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck); gFunctionMap.set(trTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck); gFunctionMap.set(ttTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); gFunctionMap.set(uTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); gFunctionMap.set(ulTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); } bool proceed = true; RefPtr result; if (CreateErrorCheckFunc errorCheckFunc = gFunctionMap.get(t->tagName.impl())) proceed = (this->*errorCheckFunc)(t, result); if (proceed) result = HTMLElementFactory::createHTMLElement(QualifiedName(nullAtom, t->tagName, xhtmlNamespaceURI), m_document, m_currentFormElement.get()); return result.release(); } bool HTMLParser::allowNestedRedundantTag(const AtomicString& tagName) { // www.liceo.edu.mx is an example of a site that achieves a level of nesting of // about 1500 tags, all from a bunch of s. We will only allow at most 20 // nested tags of the same type before just ignoring them all together. unsigned i = 0; for (HTMLStackElem* curr = m_blockStack; i < cMaxRedundantTagDepth && curr && curr->tagName == tagName; curr = curr->next, i++) { } return i != cMaxRedundantTagDepth; } void HTMLParser::processCloseTag(Token* t) { // Support for really broken html. // we never close the body tag, since some stupid web pages close it before the actual end of the doc. // let's rely on the end() call to close things. if (t->tagName == htmlTag || t->tagName == bodyTag || t->tagName == commentAtom) return; bool checkForCloseTagErrors = true; if (t->tagName == formTag && m_currentFormElement) { m_currentFormElement = 0; checkForCloseTagErrors = false; } else if (t->tagName == mapTag) m_currentMapElement = 0; else if (t->tagName == pTag) checkForCloseTagErrors = false; HTMLStackElem* oldElem = m_blockStack; popBlock(t->tagName, checkForCloseTagErrors); if (oldElem == m_blockStack && t->tagName == pTag) { // We encountered a stray

. Amazingly Gecko, WinIE, and MacIE all treat // this as a valid break, i.e.,

. So go ahead and make the empty // paragraph. t->beginTag = true; parseToken(t); popBlock(t->tagName); reportError(StrayParagraphCloseError); } } bool HTMLParser::isHeadingTag(const AtomicString& tagName) { DEFINE_STATIC_LOCAL(HashSet, headingTags, ()); if (headingTags.isEmpty()) { headingTags.add(h1Tag.localName().impl()); headingTags.add(h2Tag.localName().impl()); headingTags.add(h3Tag.localName().impl()); headingTags.add(h4Tag.localName().impl()); headingTags.add(h5Tag.localName().impl()); headingTags.add(h6Tag.localName().impl()); } return headingTags.contains(tagName.impl()); } bool HTMLParser::isInline(Node* node) const { if (node->isTextNode()) return true; if (node->isHTMLElement()) { HTMLElement* e = static_cast(node); if (e->hasLocalName(aTag) || e->hasLocalName(fontTag) || e->hasLocalName(ttTag) || e->hasLocalName(uTag) || e->hasLocalName(bTag) || e->hasLocalName(iTag) || e->hasLocalName(sTag) || e->hasLocalName(strikeTag) || e->hasLocalName(bigTag) || e->hasLocalName(smallTag) || e->hasLocalName(emTag) || e->hasLocalName(strongTag) || e->hasLocalName(dfnTag) || e->hasLocalName(codeTag) || e->hasLocalName(sampTag) || e->hasLocalName(kbdTag) || e->hasLocalName(varTag) || e->hasLocalName(citeTag) || e->hasLocalName(abbrTag) || e->hasLocalName(acronymTag) || e->hasLocalName(subTag) || e->hasLocalName(supTag) || e->hasLocalName(spanTag) || e->hasLocalName(nobrTag) || e->hasLocalName(noframesTag) || e->hasLocalName(nolayerTag) || e->hasLocalName(noembedTag)) return true; #if !ENABLE(XHTMLMP) if (e->hasLocalName(noscriptTag) && !m_isParsingFragment) { Settings* settings = m_document->settings(); if (settings && settings->isJavaScriptEnabled()) return true; } #endif } return false; } bool HTMLParser::isResidualStyleTag(const AtomicString& tagName) { DEFINE_STATIC_LOCAL(HashSet, residualStyleTags, ()); if (residualStyleTags.isEmpty()) { residualStyleTags.add(aTag.localName().impl()); residualStyleTags.add(fontTag.localName().impl()); residualStyleTags.add(ttTag.localName().impl()); residualStyleTags.add(uTag.localName().impl()); residualStyleTags.add(bTag.localName().impl()); residualStyleTags.add(iTag.localName().impl()); residualStyleTags.add(sTag.localName().impl()); residualStyleTags.add(strikeTag.localName().impl()); residualStyleTags.add(bigTag.localName().impl()); residualStyleTags.add(smallTag.localName().impl()); residualStyleTags.add(emTag.localName().impl()); residualStyleTags.add(strongTag.localName().impl()); residualStyleTags.add(dfnTag.localName().impl()); residualStyleTags.add(codeTag.localName().impl()); residualStyleTags.add(sampTag.localName().impl()); residualStyleTags.add(kbdTag.localName().impl()); residualStyleTags.add(varTag.localName().impl()); residualStyleTags.add(nobrTag.localName().impl()); } return residualStyleTags.contains(tagName.impl()); } bool HTMLParser::isAffectedByResidualStyle(const AtomicString& tagName) { DEFINE_STATIC_LOCAL(HashSet, unaffectedTags, ()); if (unaffectedTags.isEmpty()) { unaffectedTags.add(bodyTag.localName().impl()); unaffectedTags.add(tableTag.localName().impl()); unaffectedTags.add(theadTag.localName().impl()); unaffectedTags.add(tbodyTag.localName().impl()); unaffectedTags.add(tfootTag.localName().impl()); unaffectedTags.add(trTag.localName().impl()); unaffectedTags.add(thTag.localName().impl()); unaffectedTags.add(tdTag.localName().impl()); unaffectedTags.add(captionTag.localName().impl()); unaffectedTags.add(colgroupTag.localName().impl()); unaffectedTags.add(colTag.localName().impl()); unaffectedTags.add(optionTag.localName().impl()); unaffectedTags.add(optgroupTag.localName().impl()); unaffectedTags.add(selectTag.localName().impl()); unaffectedTags.add(objectTag.localName().impl()); unaffectedTags.add(datagridTag.localName().impl()); unaffectedTags.add(datalistTag.localName().impl()); } return !unaffectedTags.contains(tagName.impl()); } void HTMLParser::handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem* elem) { HTMLStackElem* maxElem = 0; bool finished = false; bool strayTableContent = elem->strayTableContent; unsigned iterationCount = 0; m_handlingResidualStyleAcrossBlocks = true; while (!finished && (iterationCount++ < cResidualStyleIterationLimit)) { // Find the outermost element that crosses over to a higher level. If there exists another higher-level // element, we will do another pass, until we have corrected the innermost one. ExceptionCode ec = 0; HTMLStackElem* curr = m_blockStack; HTMLStackElem* prev = 0; HTMLStackElem* prevMaxElem = 0; maxElem = 0; finished = true; while (curr && curr != elem) { if (curr->level > elem->level) { if (!isAffectedByResidualStyle(curr->tagName)) return; if (maxElem) // We will need another pass. finished = false; maxElem = curr; prevMaxElem = prev; } prev = curr; curr = curr->next; } if (!curr || !maxElem) return; Node* residualElem = prev->node; Node* blockElem = prevMaxElem ? prevMaxElem->node : m_current; Node* parentElem = elem->node; // Check to see if the reparenting that is going to occur is allowed according to the DOM. // FIXME: We should either always allow it or perform an additional fixup instead of // just bailing here. // Example:

blah

isn't doing a fixup right now. if (!parentElem->childAllowed(blockElem)) return; m_hasPElementInScope = Unknown; if (maxElem->node->parentNode() != elem->node) { // Walk the stack and remove any elements that aren't residual style tags. These // are basically just being closed up. Example: // Moo

Goo

. // In the above example, the doesn't need to be reopened. It can just close. HTMLStackElem* currElem = maxElem->next; HTMLStackElem* prevElem = maxElem; while (currElem != elem) { HTMLStackElem* nextElem = currElem->next; if (!isResidualStyleTag(currElem->tagName)) { prevElem->next = nextElem; prevElem->derefNode(); prevElem->node = currElem->node; prevElem->didRefNode = currElem->didRefNode; delete currElem; } else prevElem = currElem; currElem = nextElem; } // We have to reopen residual tags in between maxElem and elem. An example of this case is: // Moo

Foo. // In this case, we need to transform the part before the

into: // Moo // so that the will remain open. This involves the modification of elements // in the block stack. // This will also affect how we ultimately reparent the block, since we want it to end up // under the reopened residual tags (e.g., the in the above example.) RefPtr prevNode = 0; currElem = maxElem; while (currElem->node != residualElem) { if (isResidualStyleTag(currElem->node->localName())) { // Create a clone of this element. // We call releaseRef to get a raw pointer since we plan to hand over ownership to currElem. Node* currNode = currElem->node->cloneNode(false).releaseRef(); reportError(ResidualStyleError, &currNode->localName()); // Change the stack element's node to point to the clone. // The stack element adopts the reference we obtained above by calling release(). currElem->derefNode(); currElem->node = currNode; currElem->didRefNode = true; // Attach the previous node as a child of this new node. if (prevNode) currNode->appendChild(prevNode, ec); else // The new parent for the block element is going to be the innermost clone. parentElem = currNode; // FIXME: We shifted parentElem to be a residual inline. We never checked to see if blockElem could be legally placed inside the inline though. prevNode = currNode; } currElem = currElem->next; } // Now append the chain of new residual style elements if one exists. if (prevNode) elem->node->appendChild(prevNode, ec); // FIXME: This append can result in weird stuff happening, like an inline chain being put into a table section. } // Check if the block is still in the tree. If it isn't, then we don't // want to remove it from its parent (that would crash) or insert it into // a new parent later. See http://bugs.webkit.org/show_bug.cgi?id=6778 bool isBlockStillInTree = blockElem->parentNode(); // We need to make a clone of |residualElem| and place it just inside |blockElem|. // All content of |blockElem| is reparented to be under this clone. We then // reparent |blockElem| using real DOM calls so that attachment/detachment will // be performed to fix up the rendering tree. // So for this example: ...

FooGoo

// The end result will be: ...

FooGoo

// // Step 1: Remove |blockElem| from its parent, doing a batch detach of all the kids. if (isBlockStillInTree) blockElem->parentNode()->removeChild(blockElem, ec); Node* newNodePtr = 0; if (blockElem->firstChild()) { // Step 2: Clone |residualElem|. RefPtr newNode = residualElem->cloneNode(false); // Shallow clone. We don't pick up the same kids. newNodePtr = newNode.get(); reportError(ResidualStyleError, &newNode->localName()); // Step 3: Place |blockElem|'s children under |newNode|. Remove all of the children of |blockElem| // before we've put |newElem| into the document. That way we'll only do one attachment of all // the new content (instead of a bunch of individual attachments). Node* currNode = blockElem->firstChild(); while (currNode) { Node* nextNode = currNode->nextSibling(); newNode->appendChild(currNode, ec); currNode = nextNode; } // Step 4: Place |newNode| under |blockElem|. |blockElem| is still out of the document, so no // attachment can occur yet. blockElem->appendChild(newNode.release(), ec); } else finished = true; // Step 5: Reparent |blockElem|. Now the full attachment of the fixed up tree takes place. if (isBlockStillInTree) parentElem->appendChild(blockElem, ec); // Step 6: Pull |elem| out of the stack, since it is no longer enclosing us. Also update // the node associated with the previous stack element so that when it gets popped, // it doesn't make the residual element the next current node. HTMLStackElem* currElem = maxElem; HTMLStackElem* prevElem = 0; while (currElem != elem) { prevElem = currElem; currElem = currElem->next; } prevElem->next = elem->next; prevElem->derefNode(); prevElem->node = elem->node; prevElem->didRefNode = elem->didRefNode; if (!finished) { // Repurpose |elem| to represent |newNode| and insert it at the appropriate position // in the stack. We do not do this for the innermost block, because in that case the new // node is effectively no longer open. elem->next = maxElem; elem->node = prevMaxElem->node; elem->didRefNode = prevMaxElem->didRefNode; elem->strayTableContent = false; prevMaxElem->next = elem; ASSERT(newNodePtr); prevMaxElem->node = newNodePtr; newNodePtr->ref(); prevMaxElem->didRefNode = true; } else delete elem; } // FIXME: If we ever make a case like this work: //
// Then this check will be too simplistic. Right now the
chain will end up inside the , which is pretty crazy. if (strayTableContent) m_inStrayTableContent--; // Step 7: Reopen intermediate inlines, e.g.,

FooGoo

. // In the above example, Goo should stay italic. // We cap the number of tags we're willing to reopen based off cResidualStyleMaxDepth. HTMLStackElem* curr = m_blockStack; HTMLStackElem* residualStyleStack = 0; unsigned stackDepth = 1; unsigned redundantStyleCount = 0; while (curr && curr != maxElem) { // We will actually schedule this tag for reopening // after we complete the close of this entire block. if (isResidualStyleTag(curr->tagName) && stackDepth++ < cResidualStyleMaxDepth) { // We've overloaded the use of stack elements and are just reusing the // struct with a slightly different meaning to the variables. Instead of chaining // from innermost to outermost, we build up a list of all the tags we need to reopen // from the outermost to the innermost, i.e., residualStyleStack will end up pointing // to the outermost tag we need to reopen. // We also set curr->node to be the actual element that corresponds to the ID stored in // curr->id rather than the node that you should pop to when the element gets pulled off // the stack. if (residualStyleStack && curr->tagName == residualStyleStack->tagName && curr->node->attributes()->mapsEquivalent(residualStyleStack->node->attributes())) redundantStyleCount++; else redundantStyleCount = 0; if (redundantStyleCount < cMaxRedundantTagDepth) moveOneBlockToStack(residualStyleStack); else popOneBlock(); } else popOneBlock(); curr = m_blockStack; } reopenResidualStyleTags(residualStyleStack, 0); // Stray table content can't be an issue here, since some element above will always become the root of new stray table content. m_handlingResidualStyleAcrossBlocks = false; } void HTMLParser::reopenResidualStyleTags(HTMLStackElem* elem, Node* malformedTableParent) { // Loop for each tag that needs to be reopened. while (elem) { // Create a shallow clone of the DOM node for this element. RefPtr newNode = elem->node->cloneNode(false); reportError(ResidualStyleError, &newNode->localName()); // Append the new node. In the malformed table case, we need to insert before the table, // which will be the last child. ExceptionCode ec = 0; if (malformedTableParent) malformedTableParent->insertBefore(newNode, malformedTableParent->lastChild(), ec); else m_current->appendChild(newNode, ec); // FIXME: Is it really OK to ignore the exceptions here? // Now push a new stack element for this node we just created. pushBlock(elem->tagName, elem->level); newNode->beginParsingChildren(); // Set our strayTableContent boolean if needed, so that the reopened tag also knows // that it is inside a malformed table. m_blockStack->strayTableContent = malformedTableParent != 0; if (m_blockStack->strayTableContent) m_inStrayTableContent++; // Clear our malformed table parent variable. malformedTableParent = 0; // Update |current| manually to point to the new node. setCurrent(newNode.get()); // Advance to the next tag that needs to be reopened. HTMLStackElem* next = elem->next; elem->derefNode(); delete elem; elem = next; } } void HTMLParser::pushBlock(const AtomicString& tagName, int level) { m_blockStack = new HTMLStackElem(tagName, level, m_current, m_didRefCurrent, m_blockStack); if (level >= minBlockLevelTagPriority) m_blocksInStack++; m_didRefCurrent = false; if (tagName == pTag) m_hasPElementInScope = InScope; else if (isScopingTag(tagName)) m_hasPElementInScope = NotInScope; } void HTMLParser::popBlock(const AtomicString& tagName, bool reportErrors) { HTMLStackElem* elem = m_blockStack; if (m_parserQuirks && elem && !m_parserQuirks->shouldPopBlock(elem->tagName, tagName)) return; int maxLevel = 0; while (elem && (elem->tagName != tagName)) { if (maxLevel < elem->level) maxLevel = elem->level; elem = elem->next; } if (!elem) { if (reportErrors) reportError(StrayCloseTagError, &tagName, 0, true); return; } if (maxLevel > elem->level) { // We didn't match because the tag is in a different scope, e.g., //

Foo. Try to correct the problem. if (!isResidualStyleTag(tagName)) return; return handleResidualStyleCloseTagAcrossBlocks(elem); } bool isAffectedByStyle = isAffectedByResidualStyle(elem->tagName); HTMLStackElem* residualStyleStack = 0; Node* malformedTableParent = 0; elem = m_blockStack; unsigned stackDepth = 1; unsigned redundantStyleCount = 0; while (elem) { if (elem->tagName == tagName) { int strayTable = m_inStrayTableContent; popOneBlock(); elem = 0; // This element was the root of some malformed content just inside an implicit or // explicit or . // If we end up needing to reopen residual style tags, the root of the reopened chain // must also know that it is the root of malformed content inside a /. if (strayTable && (m_inStrayTableContent < strayTable) && residualStyleStack) { Node* curr = m_current; while (curr && !curr->hasTagName(tableTag)) curr = curr->parentNode(); malformedTableParent = curr ? curr->parentNode() : 0; } } else { if (m_currentFormElement && elem->tagName == formTag) // A is being closed prematurely (and this is // malformed HTML). Set an attribute on the form to clear out its // bottom margin. m_currentFormElement->setMalformed(true); // Schedule this tag for reopening // after we complete the close of this entire block. if (isAffectedByStyle && isResidualStyleTag(elem->tagName) && stackDepth++ < cResidualStyleMaxDepth) { // We've overloaded the use of stack elements and are just reusing the // struct with a slightly different meaning to the variables. Instead of chaining // from innermost to outermost, we build up a list of all the tags we need to reopen // from the outermost to the innermost, i.e., residualStyleStack will end up pointing // to the outermost tag we need to reopen. // We also set elem->node to be the actual element that corresponds to the ID stored in // elem->id rather than the node that you should pop to when the element gets pulled off // the stack. if (residualStyleStack && elem->tagName == residualStyleStack->tagName && elem->node->attributes()->mapsEquivalent(residualStyleStack->node->attributes())) redundantStyleCount++; else redundantStyleCount = 0; if (redundantStyleCount < cMaxRedundantTagDepth) moveOneBlockToStack(residualStyleStack); else popOneBlock(); } else popOneBlock(); elem = m_blockStack; } } reopenResidualStyleTags(residualStyleStack, malformedTableParent); } inline HTMLStackElem* HTMLParser::popOneBlockCommon() { HTMLStackElem* elem = m_blockStack; // Form elements restore their state during the parsing process. // Also, a few elements (, ) need to know when all child elements (s) are available. if (m_current && elem->node != m_current) m_current->finishParsingChildren(); if (m_blockStack->level >= minBlockLevelTagPriority) { ASSERT(m_blocksInStack > 0); m_blocksInStack--; } m_blockStack = elem->next; m_current = elem->node; m_didRefCurrent = elem->didRefNode; if (elem->strayTableContent) m_inStrayTableContent--; if (elem->tagName == pTag) m_hasPElementInScope = NotInScope; else if (isScopingTag(elem->tagName)) m_hasPElementInScope = Unknown; return elem; } void HTMLParser::popOneBlock() { // Store the current node before popOneBlockCommon overwrites it. Node* lastCurrent = m_current; bool didRefLastCurrent = m_didRefCurrent; delete popOneBlockCommon(); if (didRefLastCurrent) lastCurrent->deref(); } void HTMLParser::moveOneBlockToStack(HTMLStackElem*& head) { // We'll be using the stack element we're popping, but for the current node. // See the two callers for details. // Store the current node before popOneBlockCommon overwrites it. Node* lastCurrent = m_current; bool didRefLastCurrent = m_didRefCurrent; // Pop the block, but don't deref the current node as popOneBlock does because // we'll be using the pointer in the new stack element. HTMLStackElem* elem = popOneBlockCommon(); // Transfer the current node into the stack element. // No need to deref the old elem->node because popOneBlockCommon transferred // it into the m_current/m_didRefCurrent fields. elem->node = lastCurrent; elem->didRefNode = didRefLastCurrent; elem->next = head; head = elem; } void HTMLParser::checkIfHasPElementInScope() { m_hasPElementInScope = NotInScope; HTMLStackElem* elem = m_blockStack; while (elem) { const AtomicString& tagName = elem->tagName; if (tagName == pTag) { m_hasPElementInScope = InScope; return; } else if (isScopingTag(tagName)) return; elem = elem->next; } } void HTMLParser::popInlineBlocks() { while (m_blockStack && isInline(m_current)) popOneBlock(); } void HTMLParser::freeBlock() { while (m_blockStack) popOneBlock(); ASSERT(!m_blocksInStack); } void HTMLParser::createHead() { if (m_head) return; if (!m_document->documentElement()) { insertNode(new HTMLHtmlElement(htmlTag, m_document)); ASSERT(m_document->documentElement()); } m_head = new HTMLHeadElement(headTag, m_document); HTMLElement* body = m_document->body(); ExceptionCode ec = 0; m_document->documentElement()->insertBefore(m_head.get(), body, ec); if (ec) m_head = 0; // If the body does not exist yet, then the should be pushed as the current block. if (m_head && !body) { pushBlock(m_head->localName(), m_head->tagPriority()); setCurrent(m_head.get()); } } PassRefPtr HTMLParser::handleIsindex(Token* t) { RefPtr n = new HTMLDivElement(divTag, m_document); NamedMappedAttrMap* attrs = t->attrs.get(); RefPtr isIndex = new HTMLIsIndexElement(isindexTag, m_document, m_currentFormElement.get()); isIndex->setAttributeMap(attrs); isIndex->setAttribute(typeAttr, "khtml_isindex"); String text = searchableIndexIntroduction(); if (attrs) { if (Attribute* a = attrs->getAttributeItem(promptAttr)) text = a->value().string() + " "; t->attrs = 0; } n->addChild(new HTMLHRElement(hrTag, m_document)); n->addChild(Text::create(m_document, text)); n->addChild(isIndex.release()); n->addChild(new HTMLHRElement(hrTag, m_document)); return n.release(); } void HTMLParser::startBody() { if (m_inBody) return; m_inBody = true; if (m_isindexElement) { insertNode(m_isindexElement.get(), true /* don't descend into this node */); m_isindexElement = 0; } } void HTMLParser::finished() { // In the case of a completely empty document, here's the place to create the HTML element. if (m_current && m_current->isDocumentNode() && !m_document->documentElement()) insertNode(new HTMLHtmlElement(htmlTag, m_document)); // This ensures that "current" is not left pointing to a node when the document is destroyed. freeBlock(); setCurrent(0); // Warning, this may delete the tokenizer and parser, so don't try to do anything else after this. if (!m_isParsingFragment) m_document->finishedParsing(); } void HTMLParser::reportErrorToConsole(HTMLParserErrorCode errorCode, const AtomicString* tagName1, const AtomicString* tagName2, bool closeTags) { Frame* frame = m_document->frame(); if (!frame) return; HTMLTokenizer* htmlTokenizer = static_cast(m_document->tokenizer()); int lineNumber = htmlTokenizer->lineNumber() + 1; AtomicString tag1; AtomicString tag2; if (tagName1) { if (*tagName1 == "#text") tag1 = "Text"; else if (*tagName1 == "#comment") tag1 = ""; else tag1 = (closeTags ? ""; } if (tagName2) { if (*tagName2 == "#text") tag2 = "Text"; else if (*tagName2 == "#comment") tag2 = ""; else tag2 = (closeTags ? ""; } const char* errorMsg = htmlParserErrorMessageTemplate(errorCode); if (!errorMsg) return; String message; if (htmlTokenizer->processingContentWrittenByScript()) message += htmlParserDocumentWriteMessage(); message += errorMsg; message.replace("%tag1", tag1); message.replace("%tag2", tag2); frame->domWindow()->console()->addMessage(HTMLMessageSource, LogMessageType, isWarning(errorCode) ? WarningMessageLevel : ErrorMessageLevel, message, lineNumber, m_document->url().string()); } #ifdef BUILDING_ON_LEOPARD bool shouldCreateImplicitHead(Document* document) { ASSERT(document); Settings* settings = document->page() ? document->page()->settings() : 0; return settings ? !settings->needsLeopardMailQuirks() : true; } #elif defined(BUILDING_ON_TIGER) bool shouldCreateImplicitHead(Document* document) { ASSERT(document); Settings* settings = document->page() ? document->page()->settings() : 0; return settings ? !settings->needsTigerMailQuirks() : true; } #endif }