diff options
Diffstat (limited to 'WebCore/html/HTMLParser.cpp')
-rw-r--r-- | WebCore/html/HTMLParser.cpp | 1602 |
1 files changed, 1602 insertions, 0 deletions
diff --git a/WebCore/html/HTMLParser.cpp b/WebCore/html/HTMLParser.cpp new file mode 100644 index 0000000..ba2220c --- /dev/null +++ b/WebCore/html/HTMLParser.cpp @@ -0,0 +1,1602 @@ +/* + Copyright (C) 1997 Martin Jones (mjones@kde.org) + (C) 1997 Torben Weis (weis@kde.org) + (C) 1999,2001 Lars Knoll (knoll@kde.org) + (C) 2000,2001 Dirk Mueller (mueller@kde.org) + Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. +*/ + +#include "config.h" +#include "HTMLParser.h" + +#include "CharacterNames.h" +#include "CSSPropertyNames.h" +#include "CSSValueKeywords.h" +#include "Comment.h" +#include "Console.h" +#include "DOMWindow.h" +#include "DocumentFragment.h" +#include "DocumentType.h" +#include "Frame.h" +#include "HTMLBodyElement.h" +#include "HTMLDocument.h" +#include "HTMLDivElement.h" +#include "HTMLDListElement.h" +#include "HTMLElementFactory.h" +#include "HTMLFormElement.h" +#include "HTMLHeadElement.h" +#include "HTMLHRElement.h" +#include "HTMLHtmlElement.h" +#include "HTMLIsIndexElement.h" +#include "HTMLMapElement.h" +#include "HTMLNames.h" +#include "HTMLTableCellElement.h" +#include "HTMLTableRowElement.h" +#include "HTMLTableSectionElement.h" +#include "HTMLTokenizer.h" +#include "LocalizedStrings.h" +#include "Settings.h" +#include "Text.h" + +namespace WebCore { + +using namespace HTMLNames; + +static const unsigned cMaxRedundantTagDepth = 20; +static const unsigned cResidualStyleMaxDepth = 200; + +struct HTMLStackElem : Noncopyable { + HTMLStackElem(const AtomicString& t, int lvl, Node* n, bool r, HTMLStackElem* nx) + : tagName(t) + , level(lvl) + , strayTableContent(false) + , node(n) + , didRefNode(r) + , next(nx) + { + } + + void derefNode() + { + if (didRefNode) + node->deref(); + } + + AtomicString tagName; + int level; + bool strayTableContent; + Node* node; + bool didRefNode; + HTMLStackElem* next; +}; + +/** + * The parser parses tokenized input into the document, building up the + * document tree. If the document is well-formed, parsing it is straightforward. + * + * Unfortunately, we have to handle many HTML documents that are not well-formed, + * so the parser has to be tolerant about errors. + * + * We have to take care of at least the following error conditions: + * + * 1. The element being added is explicitly forbidden inside some outer tag. + * In this case we should close all tags up to the one, which forbids + * the element, and add it afterwards. + * + * 2. We are not allowed to add the element directly. It could be that + * the person writing the document forgot some tag in between (or that the + * tag in between is optional). This could be the case with the following + * tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?). + * + * 3. We want to add a block element inside to an inline element. Close all + * inline elements up to the next higher block element. + * + * 4. If this doesn't help, close elements until we are allowed to add the + * element or ignore the tag. + * + */ + +HTMLParser::HTMLParser(HTMLDocument* doc, bool reportErrors) + : document(doc) + , current(doc) + , didRefCurrent(false) + , blockStack(0) + , m_hasPElementInScope(NotInScope) + , head(0) + , inBody(false) + , haveContent(false) + , haveFrameSet(false) + , m_isParsingFragment(false) + , m_reportErrors(reportErrors) + , m_handlingResidualStyleAcrossBlocks(false) + , inStrayTableContent(0) +{ +} + +HTMLParser::HTMLParser(DocumentFragment* frag) + : document(frag->document()) + , current(frag) + , didRefCurrent(true) + , blockStack(0) + , m_hasPElementInScope(NotInScope) + , head(0) + , inBody(true) + , haveContent(false) + , haveFrameSet(false) + , m_isParsingFragment(true) + , m_reportErrors(false) + , m_handlingResidualStyleAcrossBlocks(false) + , inStrayTableContent(0) +{ + if (frag) + frag->ref(); +} + +HTMLParser::~HTMLParser() +{ + freeBlock(); + if (didRefCurrent) + current->deref(); +} + +void HTMLParser::reset() +{ + ASSERT(!m_isParsingFragment); + + setCurrent(document); + + freeBlock(); + + inBody = false; + haveFrameSet = false; + haveContent = false; + inStrayTableContent = 0; + + m_currentFormElement = 0; + m_currentMapElement = 0; + head = 0; + m_isindexElement = 0; + + m_skipModeTag = nullAtom; +} + +void HTMLParser::setCurrent(Node* newCurrent) +{ + bool didRefNewCurrent = newCurrent && newCurrent != document; + if (didRefNewCurrent) + newCurrent->ref(); + if (didRefCurrent) + current->deref(); + current = newCurrent; + didRefCurrent = didRefNewCurrent; +} + +PassRefPtr<Node> HTMLParser::parseToken(Token* t) +{ + if (!m_skipModeTag.isNull()) { + if (!t->beginTag && t->tagName == m_skipModeTag) + // Found the end tag for the current skip mode, so we're done skipping. + m_skipModeTag = nullAtom; + else if (current->localName() == t->tagName) + // Do not skip </iframe>. + // FIXME: What does that comment mean? How can it be right to parse a token without clearing m_skipModeTag? + ; + else + return 0; + } + + // Apparently some sites use </br> instead of <br>. Be compatible with IE and Firefox and treat this like <br>. + if (t->isCloseTag(brTag) && document->inCompatMode()) { + reportError(MalformedBRError); + t->beginTag = true; + } + + if (!t->beginTag) { + processCloseTag(t); + return 0; + } + + // Ignore spaces, if we're not inside a paragraph or other inline code. + // Do not alter the text if it is part of a scriptTag. + if (t->tagName == textAtom && t->text && current->localName() != scriptTag) { + if (inBody && !skipMode() && current->localName() != styleTag && + current->localName() != titleTag && !t->text->containsOnlyWhitespace()) + haveContent = true; + + RefPtr<Node> n; + String text = t->text.get(); + unsigned charsLeft = text.length(); + while (charsLeft) { + // split large blocks of text to nodes of manageable size + n = Text::createWithLengthLimit(document, text, charsLeft); + if (!insertNode(n.get(), t->flat)) + return 0; + } + return n; + } + + RefPtr<Node> n = getNode(t); + // just to be sure, and to catch currently unimplemented stuff + if (!n) + return 0; + + // set attributes + if (n->isHTMLElement()) { + HTMLElement* e = static_cast<HTMLElement*>(n.get()); + e->setAttributeMap(t->attrs.get()); + + // take care of optional close tags + if (e->endTagRequirement() == TagStatusOptional) + popBlock(t->tagName); + + // If the node does not have a forbidden end tag requirement, and if the broken XML self-closing + // syntax was used, report an error. + if (t->brokenXMLStyle && e->endTagRequirement() != TagStatusForbidden) { + if (t->tagName == scriptTag) + reportError(IncorrectXMLCloseScriptWarning); + else + reportError(IncorrectXMLSelfCloseError, &t->tagName); + } + } + + if (!insertNode(n.get(), t->flat)) { + // we couldn't insert the node + + if (n->isElementNode()) { + Element* e = static_cast<Element*>(n.get()); + e->setAttributeMap(0); + } + + if (m_currentMapElement == n) + m_currentMapElement = 0; + + if (m_currentFormElement == n) + m_currentFormElement = 0; + + if (head == n) + head = 0; + + return 0; + } + return n; +} + +void HTMLParser::parseDoctypeToken(DoctypeToken* t) +{ + // Ignore any doctype after the first. Ignore doctypes in fragments. + if (document->doctype() || m_isParsingFragment || current != document) + return; + + // Make a new doctype node and set it as our doctype. + document->addChild(DocumentType::create(document, String::adopt(t->m_name), String::adopt(t->m_publicID), String::adopt(t->m_systemID))); +} + +static bool isTableSection(Node* n) +{ + return n->hasTagName(tbodyTag) || n->hasTagName(tfootTag) || n->hasTagName(theadTag); +} + +static bool isTablePart(Node* n) +{ + return n->hasTagName(trTag) || n->hasTagName(tdTag) || n->hasTagName(thTag) || + isTableSection(n); +} + +static bool isTableRelated(Node* n) +{ + return n->hasTagName(tableTag) || isTablePart(n); +} + +static bool isScopingTag(const AtomicString& tagName) +{ + return tagName == appletTag || tagName == captionTag || tagName == tdTag || tagName == thTag || tagName == buttonTag || tagName == marqueeTag || tagName == objectTag || tagName == tableTag || tagName == htmlTag; +} + +bool HTMLParser::insertNode(Node* n, bool flat) +{ + RefPtr<Node> protectNode(n); + + const AtomicString& localName = n->localName(); + int tagPriority = n->isHTMLElement() ? static_cast<HTMLElement*>(n)->tagPriority() : 0; + + // <table> is never allowed inside stray table content. Always pop out of the stray table content + // and close up the first table, and then start the second table as a sibling. + if (inStrayTableContent && localName == tableTag) + popBlock(tableTag); + + // let's be stupid and just try to insert it. + // this should work if the document is well-formed + Node* newNode = current->addChild(n); + if (!newNode) + return handleError(n, flat, localName, tagPriority); // Try to handle the error. + + // don't push elements without end tags (e.g., <img>) on the stack + bool parentAttached = current->attached(); + if (tagPriority > 0 && !flat) { + if (newNode == current) { + // This case should only be hit when a demoted <form> is placed inside a table. + ASSERT(localName == formTag); + reportError(FormInsideTablePartError, ¤t->localName()); + } else { + // The pushBlock function transfers ownership of current to the block stack + // so we're guaranteed that didRefCurrent is false. The code below is an + // optimized version of setCurrent that takes advantage of that fact and also + // assumes that newNode is neither 0 nor a pointer to the document. + pushBlock(localName, tagPriority); + newNode->beginParsingChildren(); + ASSERT(!didRefCurrent); + newNode->ref(); + current = newNode; + didRefCurrent = true; + } + if (parentAttached && !n->attached() && !m_isParsingFragment) + n->attach(); + } else { + if (parentAttached && !n->attached() && !m_isParsingFragment) + n->attach(); + n->finishParsingChildren(); + } + + return true; +} + +bool HTMLParser::handleError(Node* n, bool flat, const AtomicString& localName, int tagPriority) +{ + // Error handling code. This is just ad hoc handling of specific parent/child combinations. + HTMLElement* e; + bool handled = false; + + // 1. Check out the element's tag name to decide how to deal with errors. + if (n->isHTMLElement()) { + HTMLElement* h = static_cast<HTMLElement*>(n); + if (h->hasLocalName(trTag) || h->hasLocalName(thTag) || h->hasLocalName(tdTag)) { + if (inStrayTableContent && !isTableRelated(current)) { + reportError(MisplacedTablePartError, &localName, ¤t->localName()); + // pop out to the nearest enclosing table-related tag. + while (blockStack && !isTableRelated(current)) + popOneBlock(); + return insertNode(n); + } + } else if (h->hasLocalName(headTag)) { + if (!current->isDocumentNode() && !current->hasTagName(htmlTag)) { + reportError(MisplacedHeadError); + return false; + } + } else if (h->hasLocalName(metaTag) || h->hasLocalName(linkTag) || h->hasLocalName(baseTag)) { + bool createdHead = false; + if (!head) { + createHead(); + createdHead = true; + } + if (head) { + if (!createdHead) + reportError(MisplacedHeadContentError, &localName, ¤t->localName()); + if (head->addChild(n)) { + if (!n->attached() && !m_isParsingFragment) + n->attach(); + return true; + } else + return false; + } + } else if (h->hasLocalName(htmlTag)) { + if (!current->isDocumentNode() ) { + if (document->documentElement() && document->documentElement()->hasTagName(htmlTag)) { + reportError(RedundantHTMLBodyError, &localName); + // we have another <HTML> element.... apply attributes to existing one + // make sure we don't overwrite already existing attributes + NamedAttrMap* map = static_cast<Element*>(n)->attributes(true); + Element* existingHTML = static_cast<Element*>(document->documentElement()); + NamedAttrMap* bmap = existingHTML->attributes(false); + for (unsigned l = 0; map && l < map->length(); ++l) { + Attribute* it = map->attributeItem(l); + if (!bmap->getAttributeItem(it->name())) + existingHTML->setAttribute(it->name(), it->value()); + } + } + return false; + } + } else if (h->hasLocalName(titleTag) || h->hasLocalName(styleTag)) { + bool createdHead = false; + if (!head) { + createHead(); + createdHead = true; + } + if (head) { + Node* newNode = head->addChild(n); + if (!newNode) { + setSkipMode(h->tagQName()); + return false; + } + + if (!createdHead) + reportError(MisplacedHeadContentError, &localName, ¤t->localName()); + + pushBlock(localName, tagPriority); + newNode->beginParsingChildren(); + setCurrent(newNode); + if (!n->attached() && !m_isParsingFragment) + n->attach(); + return true; + } + if (inBody) { + setSkipMode(h->tagQName()); + return false; + } + } else if (h->hasLocalName(bodyTag)) { + if (inBody && document->body()) { + // we have another <BODY> element.... apply attributes to existing one + // make sure we don't overwrite already existing attributes + // some sites use <body bgcolor=rightcolor>...<body bgcolor=wrongcolor> + reportError(RedundantHTMLBodyError, &localName); + NamedAttrMap* map = static_cast<Element*>(n)->attributes(true); + Element* existingBody = document->body(); + NamedAttrMap* bmap = existingBody->attributes(false); + for (unsigned l = 0; map && l < map->length(); ++l) { + Attribute* it = map->attributeItem(l); + if (!bmap->getAttributeItem(it->name())) + existingBody->setAttribute(it->name(), it->value()); + } + return false; + } + else if (!current->isDocumentNode()) + return false; + } else if (h->hasLocalName(areaTag)) { + if (m_currentMapElement) { + reportError(MisplacedAreaError, ¤t->localName()); + m_currentMapElement->addChild(n); + if (!n->attached() && !m_isParsingFragment) + n->attach(); + handled = true; + return true; + } + return false; + } else if (h->hasLocalName(colgroupTag) || h->hasLocalName(captionTag)) { + if (isTableRelated(current)) { + while (blockStack && isTablePart(current)) + popOneBlock(); + return insertNode(n); + } + } + } else if (n->isCommentNode() && !head) + return false; + + // 2. Next we examine our currently active element to do some further error handling. + if (current->isHTMLElement()) { + HTMLElement* h = static_cast<HTMLElement*>(current); + const AtomicString& currentTagName = h->localName(); + if (h->hasLocalName(htmlTag)) { + HTMLElement* elt = n->isHTMLElement() ? static_cast<HTMLElement*>(n) : 0; + if (elt && (elt->hasLocalName(scriptTag) || elt->hasLocalName(styleTag) || + elt->hasLocalName(metaTag) || elt->hasLocalName(linkTag) || + elt->hasLocalName(objectTag) || elt->hasLocalName(embedTag) || + elt->hasLocalName(titleTag) || elt->hasLocalName(isindexTag) || + elt->hasLocalName(baseTag))) { + if (!head) { + head = new HTMLHeadElement(document); + e = head; + insertNode(e); + handled = true; + } + } else { + if (n->isTextNode()) { + Text* t = static_cast<Text*>(n); + if (t->containsOnlyWhitespace()) + return false; + } + if (!haveFrameSet) { + e = new HTMLBodyElement(document); + startBody(); + insertNode(e); + handled = true; + } else + reportError(MisplacedFramesetContentError, &localName); + } + } else if (h->hasLocalName(headTag)) { + if (n->hasTagName(htmlTag)) + return false; + else { + // This means the body starts here... + if (!haveFrameSet) { + popBlock(currentTagName); + e = new HTMLBodyElement(document); + startBody(); + insertNode(e); + handled = true; + } else + reportError(MisplacedFramesetContentError, &localName); + } + } else if (h->hasLocalName(addressTag) || h->hasLocalName(fontTag) + || h->hasLocalName(styleTag) || h->hasLocalName(titleTag)) { + reportError(MisplacedContentRetryError, &localName, ¤tTagName); + popBlock(currentTagName); + handled = true; + } else if (h->hasLocalName(captionTag)) { + // Illegal content in a caption. Close the caption and try again. + reportError(MisplacedCaptionContentError, &localName); + popBlock(currentTagName); + if (isTablePart(n)) + return insertNode(n, flat); + } else if (h->hasLocalName(tableTag) || h->hasLocalName(trTag) || isTableSection(h)) { + if (n->hasTagName(tableTag)) { + reportError(MisplacedTableError, ¤tTagName); + if (m_isParsingFragment && !h->hasLocalName(tableTag)) + // fragment may contain table parts without <table> ancestor, pop them one by one + popBlock(h->localName()); + popBlock(localName); // end the table + handled = true; // ...and start a new one + } else { + ExceptionCode ec = 0; + Node* node = current; + Node* parent = node->parentNode(); + // A script may have removed the current node's parent from the DOM + // http://bugs.webkit.org/show_bug.cgi?id=7137 + // FIXME: we should do real recovery here and re-parent with the correct node. + if (!parent) + return false; + Node* grandparent = parent->parentNode(); + + if (n->isTextNode() || + (h->hasLocalName(trTag) && + isTableSection(parent) && grandparent && grandparent->hasTagName(tableTag)) || + ((!n->hasTagName(tdTag) && !n->hasTagName(thTag) && + !n->hasTagName(formTag) && !n->hasTagName(scriptTag)) && isTableSection(node) && + parent->hasTagName(tableTag))) { + node = (node->hasTagName(tableTag)) ? node : + ((node->hasTagName(trTag)) ? grandparent : parent); + // This can happen with fragments + if (!node) + return false; + Node* parent = node->parentNode(); + if (!parent) + return false; + parent->insertBefore(n, node, ec); + if (!ec) { + reportError(StrayTableContentError, &localName, ¤tTagName); + if (n->isHTMLElement() && tagPriority > 0 && + !flat && static_cast<HTMLElement*>(n)->endTagRequirement() != TagStatusForbidden) + { + pushBlock(localName, tagPriority); + n->beginParsingChildren(); + setCurrent(n); + inStrayTableContent++; + blockStack->strayTableContent = true; + } + return true; + } + } + + if (!ec) { + if (current->hasTagName(trTag)) { + reportError(TablePartRequiredError, &localName, &tdTag.localName()); + e = new HTMLTableCellElement(tdTag, document); + } else if (current->hasTagName(tableTag)) { + // Don't report an error in this case, since making a <tbody> happens all the time when you have <table><tr>, + // and it isn't really a parse error per se. + e = new HTMLTableSectionElement(tbodyTag, document); + } else { + reportError(TablePartRequiredError, &localName, &trTag.localName()); + e = new HTMLTableRowElement(document); + } + + insertNode(e); + handled = true; + } + } + } else if (h->hasLocalName(objectTag)) { + reportError(MisplacedContentRetryError, &localName, ¤tTagName); + popBlock(objectTag); + handled = true; + } else if (h->hasLocalName(pTag) || isHeaderTag(currentTagName)) { + if (!isInline(n)) { + popBlock(currentTagName); + handled = true; + } + } else if (h->hasLocalName(optionTag) || h->hasLocalName(optgroupTag)) { + if (localName == optgroupTag) { + popBlock(currentTagName); + handled = true; + } else if (localName == selectTag) { + // IE treats a nested select as </select>. Let's do the same + popBlock(localName); + } + } else if (h->hasLocalName(selectTag)) { + if (localName == inputTag || localName == textareaTag) { + reportError(MisplacedContentRetryError, &localName, ¤tTagName); + popBlock(currentTagName); + handled = true; + } + } else if (h->hasLocalName(colgroupTag)) { + popBlock(currentTagName); + handled = true; + } else if (!h->hasLocalName(bodyTag)) { + if (isInline(current)) { + popInlineBlocks(); + handled = true; + } + } + } else if (current->isDocumentNode()) { + if (n->isTextNode()) { + Text* t = static_cast<Text*>(n); + if (t->containsOnlyWhitespace()) + return false; + } + + if (!document->documentElement()) { + e = new HTMLHtmlElement(document); + insertNode(e); + handled = true; + } + } + + // 3. If we couldn't handle the error, just return false and attempt to error-correct again. + if (!handled) { + reportError(IgnoredContentError, &localName, ¤t->localName()); + return false; + } + return insertNode(n); +} + +typedef bool (HTMLParser::*CreateErrorCheckFunc)(Token* t, RefPtr<Node>&); +typedef HashMap<AtomicStringImpl*, CreateErrorCheckFunc> FunctionMap; + +bool HTMLParser::textCreateErrorCheck(Token* t, RefPtr<Node>& result) +{ + result = new Text(document, t->text.get()); + return false; +} + +bool HTMLParser::commentCreateErrorCheck(Token* t, RefPtr<Node>& result) +{ + result = new Comment(document, t->text.get()); + return false; +} + +bool HTMLParser::headCreateErrorCheck(Token* t, RefPtr<Node>& result) +{ + if (!head || current->localName() == htmlTag) { + head = new HTMLHeadElement(document); + result = head; + } else + reportError(MisplacedHeadError); + return false; +} + +bool HTMLParser::bodyCreateErrorCheck(Token* t, RefPtr<Node>& result) +{ + // body no longer allowed if we have a frameset + if (haveFrameSet) + return false; + popBlock(headTag); + startBody(); + return true; +} + +bool HTMLParser::framesetCreateErrorCheck(Token* t, RefPtr<Node>& result) +{ + popBlock(headTag); + if (inBody && !haveFrameSet && !haveContent) { + popBlock(bodyTag); + // ### actually for IE document.body returns the now hidden "body" element + // we can't implement that behaviour now because it could cause too many + // regressions and the headaches are not worth the work as long as there is + // no site actually relying on that detail (Dirk) + if (document->body()) + document->body()->setAttribute(styleAttr, "display:none"); + inBody = false; + } + if ((haveContent || haveFrameSet) && current->localName() == htmlTag) + return false; + haveFrameSet = true; + startBody(); + return true; +} + +bool HTMLParser::formCreateErrorCheck(Token* t, RefPtr<Node>& result) +{ + // Only create a new form if we're not already inside one. + // This is consistent with other browsers' behavior. + if (!m_currentFormElement) { + m_currentFormElement = new HTMLFormElement(document); + result = m_currentFormElement; + pCloserCreateErrorCheck(t, result); + } + return false; +} + +bool HTMLParser::isindexCreateErrorCheck(Token* t, RefPtr<Node>& result) +{ + RefPtr<Node> n = handleIsindex(t); + if (!inBody) { + m_isindexElement = n.release(); + } else { + t->flat = true; + result = n.release(); + } + return false; +} + +bool HTMLParser::selectCreateErrorCheck(Token* t, RefPtr<Node>& result) +{ + return true; +} + +bool HTMLParser::ddCreateErrorCheck(Token* t, RefPtr<Node>& result) +{ + pCloserCreateErrorCheck(t, result); + popBlock(dtTag); + popBlock(ddTag); + return true; +} + +bool HTMLParser::dtCreateErrorCheck(Token* t, RefPtr<Node>& result) +{ + pCloserCreateErrorCheck(t, result); + popBlock(ddTag); + popBlock(dtTag); + return true; +} + +bool HTMLParser::nestedCreateErrorCheck(Token* t, RefPtr<Node>& result) +{ + popBlock(t->tagName); + return true; +} + +bool HTMLParser::nestedPCloserCreateErrorCheck(Token* t, RefPtr<Node>& result) +{ + pCloserCreateErrorCheck(t, result); + popBlock(t->tagName); + return true; +} + +bool HTMLParser::nestedStyleCreateErrorCheck(Token* t, RefPtr<Node>& result) +{ + return allowNestedRedundantTag(t->tagName); +} + +bool HTMLParser::tableCellCreateErrorCheck(Token* t, RefPtr<Node>& result) +{ + popBlock(tdTag); + popBlock(thTag); + return true; +} + +bool HTMLParser::tableSectionCreateErrorCheck(Token* t, RefPtr<Node>& result) +{ + popBlock(theadTag); + popBlock(tbodyTag); + popBlock(tfootTag); + return true; +} + +bool HTMLParser::noembedCreateErrorCheck(Token* t, RefPtr<Node>& result) +{ + setSkipMode(noembedTag); + return true; +} + +bool HTMLParser::noframesCreateErrorCheck(Token* t, RefPtr<Node>& result) +{ + setSkipMode(noframesTag); + return true; +} + +bool HTMLParser::noscriptCreateErrorCheck(Token* t, RefPtr<Node>& result) +{ + if (!m_isParsingFragment) { + Settings* settings = document->settings(); + if (settings && settings->isJavaScriptEnabled()) + setSkipMode(noscriptTag); + } + return true; +} + +bool HTMLParser::pCloserCreateErrorCheck(Token* t, RefPtr<Node>& result) +{ + if (hasPElementInScope()) + popBlock(pTag); + return true; +} + +bool HTMLParser::pCloserStrictCreateErrorCheck(Token* t, RefPtr<Node>& result) +{ + if (document->inCompatMode()) + return true; + if (hasPElementInScope()) + popBlock(pTag); + return true; +} + +bool HTMLParser::mapCreateErrorCheck(Token* t, RefPtr<Node>& result) +{ + m_currentMapElement = new HTMLMapElement(document); + result = m_currentMapElement; + return false; +} + +PassRefPtr<Node> HTMLParser::getNode(Token* t) +{ + // Init our error handling table. + static FunctionMap gFunctionMap; + if (gFunctionMap.isEmpty()) { + gFunctionMap.set(aTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck); + gFunctionMap.set(addressTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); + gFunctionMap.set(bTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); + gFunctionMap.set(bigTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); + gFunctionMap.set(blockquoteTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); + gFunctionMap.set(bodyTag.localName().impl(), &HTMLParser::bodyCreateErrorCheck); + gFunctionMap.set(buttonTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck); + gFunctionMap.set(centerTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); + gFunctionMap.set(commentAtom.impl(), &HTMLParser::commentCreateErrorCheck); + gFunctionMap.set(ddTag.localName().impl(), &HTMLParser::ddCreateErrorCheck); + gFunctionMap.set(dirTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); + gFunctionMap.set(divTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); + gFunctionMap.set(dlTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); + gFunctionMap.set(dtTag.localName().impl(), &HTMLParser::dtCreateErrorCheck); + gFunctionMap.set(formTag.localName().impl(), &HTMLParser::formCreateErrorCheck); + gFunctionMap.set(fieldsetTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); + gFunctionMap.set(framesetTag.localName().impl(), &HTMLParser::framesetCreateErrorCheck); + gFunctionMap.set(h1Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); + gFunctionMap.set(h2Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); + gFunctionMap.set(h3Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); + gFunctionMap.set(h4Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); + gFunctionMap.set(h5Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); + gFunctionMap.set(h6Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); + gFunctionMap.set(headTag.localName().impl(), &HTMLParser::headCreateErrorCheck); + gFunctionMap.set(hrTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); + gFunctionMap.set(iTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); + gFunctionMap.set(isindexTag.localName().impl(), &HTMLParser::isindexCreateErrorCheck); + gFunctionMap.set(liTag.localName().impl(), &HTMLParser::nestedPCloserCreateErrorCheck); + gFunctionMap.set(listingTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); + gFunctionMap.set(mapTag.localName().impl(), &HTMLParser::mapCreateErrorCheck); + gFunctionMap.set(menuTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); + gFunctionMap.set(nobrTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck); + gFunctionMap.set(noembedTag.localName().impl(), &HTMLParser::noembedCreateErrorCheck); + gFunctionMap.set(noframesTag.localName().impl(), &HTMLParser::noframesCreateErrorCheck); + gFunctionMap.set(noscriptTag.localName().impl(), &HTMLParser::noscriptCreateErrorCheck); + gFunctionMap.set(olTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); + gFunctionMap.set(pTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); + gFunctionMap.set(plaintextTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); + gFunctionMap.set(preTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); + gFunctionMap.set(sTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); + gFunctionMap.set(selectTag.localName().impl(), &HTMLParser::selectCreateErrorCheck); + gFunctionMap.set(smallTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); + gFunctionMap.set(strikeTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); + gFunctionMap.set(tableTag.localName().impl(), &HTMLParser::pCloserStrictCreateErrorCheck); + gFunctionMap.set(tbodyTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck); + gFunctionMap.set(tdTag.localName().impl(), &HTMLParser::tableCellCreateErrorCheck); + gFunctionMap.set(textAtom.impl(), &HTMLParser::textCreateErrorCheck); + gFunctionMap.set(tfootTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck); + gFunctionMap.set(thTag.localName().impl(), &HTMLParser::tableCellCreateErrorCheck); + gFunctionMap.set(theadTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck); + gFunctionMap.set(trTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck); + gFunctionMap.set(ttTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); + gFunctionMap.set(uTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); + gFunctionMap.set(ulTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); + } + + bool proceed = true; + RefPtr<Node> result; + if (CreateErrorCheckFunc errorCheckFunc = gFunctionMap.get(t->tagName.impl())) + proceed = (this->*errorCheckFunc)(t, result); + if (proceed) + result = HTMLElementFactory::createHTMLElement(t->tagName, document, m_currentFormElement.get()); + return result.release(); +} + +bool HTMLParser::allowNestedRedundantTag(const AtomicString& tagName) +{ + // www.liceo.edu.mx is an example of a site that achieves a level of nesting of + // about 1500 tags, all from a bunch of <b>s. We will only allow at most 20 + // nested tags of the same type before just ignoring them all together. + unsigned i = 0; + for (HTMLStackElem* curr = blockStack; + i < cMaxRedundantTagDepth && curr && curr->tagName == tagName; + curr = curr->next, i++) { } + return i != cMaxRedundantTagDepth; +} + +void HTMLParser::processCloseTag(Token* t) +{ + // Support for really broken html. + // we never close the body tag, since some stupid web pages close it before the actual end of the doc. + // let's rely on the end() call to close things. + if (t->tagName == htmlTag || t->tagName == bodyTag || t->tagName == commentAtom) + return; + + bool checkForCloseTagErrors = true; + if (t->tagName == formTag && m_currentFormElement) { + m_currentFormElement = 0; + checkForCloseTagErrors = false; + } else if (t->tagName == mapTag) + m_currentMapElement = 0; + else if (t->tagName == pTag) + checkForCloseTagErrors = false; + + HTMLStackElem* oldElem = blockStack; + popBlock(t->tagName, checkForCloseTagErrors); + if (oldElem == blockStack && t->tagName == pTag) { + // We encountered a stray </p>. Amazingly Gecko, WinIE, and MacIE all treat + // this as a valid break, i.e., <p></p>. So go ahead and make the empty + // paragraph. + t->beginTag = true; + parseToken(t); + popBlock(t->tagName); + reportError(StrayParagraphCloseError); + } +} + +bool HTMLParser::isHeaderTag(const AtomicString& tagName) +{ + static HashSet<AtomicStringImpl*> headerTags; + if (headerTags.isEmpty()) { + headerTags.add(h1Tag.localName().impl()); + headerTags.add(h2Tag.localName().impl()); + headerTags.add(h3Tag.localName().impl()); + headerTags.add(h4Tag.localName().impl()); + headerTags.add(h5Tag.localName().impl()); + headerTags.add(h6Tag.localName().impl()); + } + + return headerTags.contains(tagName.impl()); +} + +bool HTMLParser::isInline(Node* node) const +{ + if (node->isTextNode()) + return true; + + if (node->isHTMLElement()) { + HTMLElement* e = static_cast<HTMLElement*>(node); + if (e->hasLocalName(aTag) || e->hasLocalName(fontTag) || e->hasLocalName(ttTag) || + e->hasLocalName(uTag) || e->hasLocalName(bTag) || e->hasLocalName(iTag) || + e->hasLocalName(sTag) || e->hasLocalName(strikeTag) || e->hasLocalName(bigTag) || + e->hasLocalName(smallTag) || e->hasLocalName(emTag) || e->hasLocalName(strongTag) || + e->hasLocalName(dfnTag) || e->hasLocalName(codeTag) || e->hasLocalName(sampTag) || + e->hasLocalName(kbdTag) || e->hasLocalName(varTag) || e->hasLocalName(citeTag) || + e->hasLocalName(abbrTag) || e->hasLocalName(acronymTag) || e->hasLocalName(subTag) || + e->hasLocalName(supTag) || e->hasLocalName(spanTag) || e->hasLocalName(nobrTag) || + e->hasLocalName(noframesTag) || e->hasLocalName(nolayerTag) || + e->hasLocalName(noembedTag)) + return true; + if (e->hasLocalName(noscriptTag) && !m_isParsingFragment) { + Settings* settings = document->settings(); + if (settings && settings->isJavaScriptEnabled()) + return true; + } + } + + return false; +} + +bool HTMLParser::isResidualStyleTag(const AtomicString& tagName) +{ + static HashSet<AtomicStringImpl*> residualStyleTags; + if (residualStyleTags.isEmpty()) { + residualStyleTags.add(aTag.localName().impl()); + residualStyleTags.add(fontTag.localName().impl()); + residualStyleTags.add(ttTag.localName().impl()); + residualStyleTags.add(uTag.localName().impl()); + residualStyleTags.add(bTag.localName().impl()); + residualStyleTags.add(iTag.localName().impl()); + residualStyleTags.add(sTag.localName().impl()); + residualStyleTags.add(strikeTag.localName().impl()); + residualStyleTags.add(bigTag.localName().impl()); + residualStyleTags.add(smallTag.localName().impl()); + residualStyleTags.add(emTag.localName().impl()); + residualStyleTags.add(strongTag.localName().impl()); + residualStyleTags.add(dfnTag.localName().impl()); + residualStyleTags.add(codeTag.localName().impl()); + residualStyleTags.add(sampTag.localName().impl()); + residualStyleTags.add(kbdTag.localName().impl()); + residualStyleTags.add(varTag.localName().impl()); + residualStyleTags.add(nobrTag.localName().impl()); + } + + return residualStyleTags.contains(tagName.impl()); +} + +bool HTMLParser::isAffectedByResidualStyle(const AtomicString& tagName) +{ + static HashSet<AtomicStringImpl*> unaffectedTags; + if (unaffectedTags.isEmpty()) { + unaffectedTags.add(bodyTag.localName().impl()); + unaffectedTags.add(tableTag.localName().impl()); + unaffectedTags.add(theadTag.localName().impl()); + unaffectedTags.add(tbodyTag.localName().impl()); + unaffectedTags.add(tfootTag.localName().impl()); + unaffectedTags.add(trTag.localName().impl()); + unaffectedTags.add(thTag.localName().impl()); + unaffectedTags.add(tdTag.localName().impl()); + unaffectedTags.add(captionTag.localName().impl()); + unaffectedTags.add(colgroupTag.localName().impl()); + unaffectedTags.add(colTag.localName().impl()); + unaffectedTags.add(optionTag.localName().impl()); + unaffectedTags.add(optgroupTag.localName().impl()); + unaffectedTags.add(selectTag.localName().impl()); + unaffectedTags.add(objectTag.localName().impl()); + } + + return !unaffectedTags.contains(tagName.impl()); +} + +void HTMLParser::handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem* elem) +{ + HTMLStackElem* maxElem = 0; + bool finished = false; + bool strayTableContent = elem->strayTableContent; + + m_handlingResidualStyleAcrossBlocks = true; + while (!finished) { + // Find the outermost element that crosses over to a higher level. If there exists another higher-level + // element, we will do another pass, until we have corrected the innermost one. + ExceptionCode ec = 0; + HTMLStackElem* curr = blockStack; + HTMLStackElem* prev = 0; + HTMLStackElem* prevMaxElem = 0; + maxElem = 0; + finished = true; + while (curr && curr != elem) { + if (curr->level > elem->level) { + if (!isAffectedByResidualStyle(curr->tagName)) + return; + if (maxElem) + // We will need another pass. + finished = false; + maxElem = curr; + prevMaxElem = prev; + } + + prev = curr; + curr = curr->next; + } + + if (!curr || !maxElem) + return; + + Node* residualElem = prev->node; + Node* blockElem = prevMaxElem ? prevMaxElem->node : current; + Node* parentElem = elem->node; + + // Check to see if the reparenting that is going to occur is allowed according to the DOM. + // FIXME: We should either always allow it or perform an additional fixup instead of + // just bailing here. + // Example: <p><font><center>blah</font></center></p> isn't doing a fixup right now. + if (!parentElem->childAllowed(blockElem)) + return; + + m_hasPElementInScope = Unknown; + + if (maxElem->node->parentNode() != elem->node) { + // Walk the stack and remove any elements that aren't residual style tags. These + // are basically just being closed up. Example: + // <font><span>Moo<p>Goo</font></p>. + // In the above example, the <span> doesn't need to be reopened. It can just close. + HTMLStackElem* currElem = maxElem->next; + HTMLStackElem* prevElem = maxElem; + while (currElem != elem) { + HTMLStackElem* nextElem = currElem->next; + if (!isResidualStyleTag(currElem->tagName)) { + prevElem->next = nextElem; + prevElem->derefNode(); + prevElem->node = currElem->node; + prevElem->didRefNode = currElem->didRefNode; + delete currElem; + } + else + prevElem = currElem; + currElem = nextElem; + } + + // We have to reopen residual tags in between maxElem and elem. An example of this case is: + // <font><i>Moo<p>Foo</font>. + // In this case, we need to transform the part before the <p> into: + // <font><i>Moo</i></font><i> + // so that the <i> will remain open. This involves the modification of elements + // in the block stack. + // This will also affect how we ultimately reparent the block, since we want it to end up + // under the reopened residual tags (e.g., the <i> in the above example.) + RefPtr<Node> prevNode = 0; + currElem = maxElem; + while (currElem->node != residualElem) { + if (isResidualStyleTag(currElem->node->localName())) { + // Create a clone of this element. + // We call releaseRef to get a raw pointer since we plan to hand over ownership to currElem. + Node* currNode = currElem->node->cloneNode(false).releaseRef(); + reportError(ResidualStyleError, &currNode->localName()); + + // Change the stack element's node to point to the clone. + // The stack element adopts the reference we obtained above by calling release(). + currElem->derefNode(); + currElem->node = currNode; + currElem->didRefNode = true; + + // Attach the previous node as a child of this new node. + if (prevNode) + currNode->appendChild(prevNode, ec); + else // The new parent for the block element is going to be the innermost clone. + parentElem = currNode; // FIXME: We shifted parentElem to be a residual inline. We never checked to see if blockElem could be legally placed inside the inline though. + + prevNode = currNode; + } + + currElem = currElem->next; + } + + // Now append the chain of new residual style elements if one exists. + if (prevNode) + elem->node->appendChild(prevNode, ec); // FIXME: This append can result in weird stuff happening, like an inline chain being put into a table section. + } + + // Check if the block is still in the tree. If it isn't, then we don't + // want to remove it from its parent (that would crash) or insert it into + // a new parent later. See http://bugs.webkit.org/show_bug.cgi?id=6778 + bool isBlockStillInTree = blockElem->parentNode(); + + // We need to make a clone of |residualElem| and place it just inside |blockElem|. + // All content of |blockElem| is reparented to be under this clone. We then + // reparent |blockElem| using real DOM calls so that attachment/detachment will + // be performed to fix up the rendering tree. + // So for this example: <b>...<p>Foo</b>Goo</p> + // The end result will be: <b>...</b><p><b>Foo</b>Goo</p> + // + // Step 1: Remove |blockElem| from its parent, doing a batch detach of all the kids. + if (isBlockStillInTree) + blockElem->parentNode()->removeChild(blockElem, ec); + + Node* newNodePtr = 0; + if (blockElem->firstChild()) { + // Step 2: Clone |residualElem|. + RefPtr<Node> newNode = residualElem->cloneNode(false); // Shallow clone. We don't pick up the same kids. + newNodePtr = newNode.get(); + reportError(ResidualStyleError, &newNode->localName()); + + // Step 3: Place |blockElem|'s children under |newNode|. Remove all of the children of |blockElem| + // before we've put |newElem| into the document. That way we'll only do one attachment of all + // the new content (instead of a bunch of individual attachments). + Node* currNode = blockElem->firstChild(); + while (currNode) { + Node* nextNode = currNode->nextSibling(); + newNode->appendChild(currNode, ec); + currNode = nextNode; + } + + // Step 4: Place |newNode| under |blockElem|. |blockElem| is still out of the document, so no + // attachment can occur yet. + blockElem->appendChild(newNode.release(), ec); + } else + finished = true; + + // Step 5: Reparent |blockElem|. Now the full attachment of the fixed up tree takes place. + if (isBlockStillInTree) + parentElem->appendChild(blockElem, ec); + + // Step 6: Pull |elem| out of the stack, since it is no longer enclosing us. Also update + // the node associated with the previous stack element so that when it gets popped, + // it doesn't make the residual element the next current node. + HTMLStackElem* currElem = maxElem; + HTMLStackElem* prevElem = 0; + while (currElem != elem) { + prevElem = currElem; + currElem = currElem->next; + } + prevElem->next = elem->next; + prevElem->derefNode(); + prevElem->node = elem->node; + prevElem->didRefNode = elem->didRefNode; + if (!finished) { + // Repurpose |elem| to represent |newNode| and insert it at the appropriate position + // in the stack. We do not do this for the innermost block, because in that case the new + // node is effectively no longer open. + elem->next = maxElem; + elem->node = prevMaxElem->node; + elem->didRefNode = prevMaxElem->didRefNode; + elem->strayTableContent = false; + prevMaxElem->next = elem; + ASSERT(newNodePtr); + prevMaxElem->node = newNodePtr; + prevMaxElem->didRefNode = false; + } else + delete elem; + } + + // FIXME: If we ever make a case like this work: + // <table><b><i><form></b></form></i></table> + // Then this check will be too simplistic. Right now the <i><form> chain will end up inside the <tbody>, which is pretty crazy. + if (strayTableContent) + inStrayTableContent--; + + // Step 7: Reopen intermediate inlines, e.g., <b><p><i>Foo</b>Goo</p>. + // In the above example, Goo should stay italic. + // We cap the number of tags we're willing to reopen based off cResidualStyleMaxDepth. + + HTMLStackElem* curr = blockStack; + HTMLStackElem* residualStyleStack = 0; + unsigned stackDepth = 1; + unsigned redundantStyleCount = 0; + while (curr && curr != maxElem) { + // We will actually schedule this tag for reopening + // after we complete the close of this entire block. + if (isResidualStyleTag(curr->tagName) && stackDepth++ < cResidualStyleMaxDepth) { + // We've overloaded the use of stack elements and are just reusing the + // struct with a slightly different meaning to the variables. Instead of chaining + // from innermost to outermost, we build up a list of all the tags we need to reopen + // from the outermost to the innermost, i.e., residualStyleStack will end up pointing + // to the outermost tag we need to reopen. + // We also set curr->node to be the actual element that corresponds to the ID stored in + // curr->id rather than the node that you should pop to when the element gets pulled off + // the stack. + if (residualStyleStack && curr->tagName == residualStyleStack->tagName && curr->node->attributes()->mapsEquivalent(residualStyleStack->node->attributes())) + redundantStyleCount++; + else + redundantStyleCount = 0; + + if (redundantStyleCount < cMaxRedundantTagDepth) + moveOneBlockToStack(residualStyleStack); + else + popOneBlock(); + } else + popOneBlock(); + + curr = blockStack; + } + + reopenResidualStyleTags(residualStyleStack, 0); // Stray table content can't be an issue here, since some element above will always become the root of new stray table content. + + m_handlingResidualStyleAcrossBlocks = false; +} + +void HTMLParser::reopenResidualStyleTags(HTMLStackElem* elem, Node* malformedTableParent) +{ + // Loop for each tag that needs to be reopened. + while (elem) { + // Create a shallow clone of the DOM node for this element. + RefPtr<Node> newNode = elem->node->cloneNode(false); + reportError(ResidualStyleError, &newNode->localName()); + + // Append the new node. In the malformed table case, we need to insert before the table, + // which will be the last child. + ExceptionCode ec = 0; + if (malformedTableParent) + malformedTableParent->insertBefore(newNode, malformedTableParent->lastChild(), ec); + else + current->appendChild(newNode, ec); + // FIXME: Is it really OK to ignore the exceptions here? + + // Now push a new stack element for this node we just created. + pushBlock(elem->tagName, elem->level); + newNode->beginParsingChildren(); + + // Set our strayTableContent boolean if needed, so that the reopened tag also knows + // that it is inside a malformed table. + blockStack->strayTableContent = malformedTableParent != 0; + if (blockStack->strayTableContent) + inStrayTableContent++; + + // Clear our malformed table parent variable. + malformedTableParent = 0; + + // Update |current| manually to point to the new node. + setCurrent(newNode.get()); + + // Advance to the next tag that needs to be reopened. + HTMLStackElem* next = elem->next; + elem->derefNode(); + delete elem; + elem = next; + } +} + +void HTMLParser::pushBlock(const AtomicString& tagName, int level) +{ + blockStack = new HTMLStackElem(tagName, level, current, didRefCurrent, blockStack); + didRefCurrent = false; + if (tagName == pTag) + m_hasPElementInScope = InScope; + else if (isScopingTag(tagName)) + m_hasPElementInScope = NotInScope; +} + +void HTMLParser::popBlock(const AtomicString& tagName, bool reportErrors) +{ + HTMLStackElem* elem = blockStack; + + int maxLevel = 0; + + while (elem && (elem->tagName != tagName)) { + if (maxLevel < elem->level) + maxLevel = elem->level; + elem = elem->next; + } + + if (!elem) { + if (reportErrors) + reportError(StrayCloseTagError, &tagName, 0, true); + return; + } + + if (maxLevel > elem->level) { + // We didn't match because the tag is in a different scope, e.g., + // <b><p>Foo</b>. Try to correct the problem. + if (!isResidualStyleTag(tagName)) + return; + return handleResidualStyleCloseTagAcrossBlocks(elem); + } + + bool isAffectedByStyle = isAffectedByResidualStyle(elem->tagName); + HTMLStackElem* residualStyleStack = 0; + Node* malformedTableParent = 0; + + elem = blockStack; + unsigned stackDepth = 1; + unsigned redundantStyleCount = 0; + while (elem) { + if (elem->tagName == tagName) { + int strayTable = inStrayTableContent; + popOneBlock(); + elem = 0; + + // This element was the root of some malformed content just inside an implicit or + // explicit <tbody> or <tr>. + // If we end up needing to reopen residual style tags, the root of the reopened chain + // must also know that it is the root of malformed content inside a <tbody>/<tr>. + if (strayTable && (inStrayTableContent < strayTable) && residualStyleStack) { + Node* curr = current; + while (curr && !curr->hasTagName(tableTag)) + curr = curr->parentNode(); + malformedTableParent = curr ? curr->parentNode() : 0; + } + } + else { + if (m_currentFormElement && elem->tagName == formTag) + // A <form> is being closed prematurely (and this is + // malformed HTML). Set an attribute on the form to clear out its + // bottom margin. + m_currentFormElement->setMalformed(true); + + // Schedule this tag for reopening + // after we complete the close of this entire block. + if (isAffectedByStyle && isResidualStyleTag(elem->tagName) && stackDepth++ < cResidualStyleMaxDepth) { + // We've overloaded the use of stack elements and are just reusing the + // struct with a slightly different meaning to the variables. Instead of chaining + // from innermost to outermost, we build up a list of all the tags we need to reopen + // from the outermost to the innermost, i.e., residualStyleStack will end up pointing + // to the outermost tag we need to reopen. + // We also set elem->node to be the actual element that corresponds to the ID stored in + // elem->id rather than the node that you should pop to when the element gets pulled off + // the stack. + if (residualStyleStack && elem->tagName == residualStyleStack->tagName && elem->node->attributes()->mapsEquivalent(residualStyleStack->node->attributes())) + redundantStyleCount++; + else + redundantStyleCount = 0; + + if (redundantStyleCount < cMaxRedundantTagDepth) + moveOneBlockToStack(residualStyleStack); + else + popOneBlock(); + } else + popOneBlock(); + elem = blockStack; + } + } + + reopenResidualStyleTags(residualStyleStack, malformedTableParent); +} + +inline HTMLStackElem* HTMLParser::popOneBlockCommon() +{ + HTMLStackElem* elem = blockStack; + + // Form elements restore their state during the parsing process. + // Also, a few elements (<applet>, <object>) need to know when all child elements (<param>s) are available. + if (current && elem->node != current) + current->finishParsingChildren(); + + blockStack = elem->next; + current = elem->node; + didRefCurrent = elem->didRefNode; + + if (elem->strayTableContent) + inStrayTableContent--; + + if (elem->tagName == pTag) + m_hasPElementInScope = NotInScope; + else if (isScopingTag(elem->tagName)) + m_hasPElementInScope = Unknown; + + return elem; +} + +void HTMLParser::popOneBlock() +{ + // Store the current node before popOneBlockCommon overwrites it. + Node* lastCurrent = current; + bool didRefLastCurrent = didRefCurrent; + + delete popOneBlockCommon(); + + if (didRefLastCurrent) + lastCurrent->deref(); +} + +void HTMLParser::moveOneBlockToStack(HTMLStackElem*& head) +{ + // We'll be using the stack element we're popping, but for the current node. + // See the two callers for details. + + // Store the current node before popOneBlockCommon overwrites it. + Node* lastCurrent = current; + bool didRefLastCurrent = didRefCurrent; + + // Pop the block, but don't deref the current node as popOneBlock does because + // we'll be using the pointer in the new stack element. + HTMLStackElem* elem = popOneBlockCommon(); + + // Transfer the current node into the stack element. + // No need to deref the old elem->node because popOneBlockCommon transferred + // it into the current/didRefCurrent fields. + elem->node = lastCurrent; + elem->didRefNode = didRefLastCurrent; + elem->next = head; + head = elem; +} + +void HTMLParser::checkIfHasPElementInScope() +{ + m_hasPElementInScope = NotInScope; + HTMLStackElem* elem = blockStack; + while (elem) { + const AtomicString& tagName = elem->tagName; + if (tagName == pTag) { + m_hasPElementInScope = InScope; + return; + } else if (isScopingTag(tagName)) + return; + elem = elem->next; + } +} + +void HTMLParser::popInlineBlocks() +{ + while (blockStack && isInline(current)) + popOneBlock(); +} + +void HTMLParser::freeBlock() +{ + while (blockStack) + popOneBlock(); +} + +void HTMLParser::createHead() +{ + if (head || !document->documentElement()) + return; + + head = new HTMLHeadElement(document); + HTMLElement* body = document->body(); + ExceptionCode ec = 0; + document->documentElement()->insertBefore(head, body, ec); + if (ec) + head = 0; + + // If the body does not exist yet, then the <head> should be pushed as the current block. + if (head && !body) { + pushBlock(head->localName(), head->tagPriority()); + setCurrent(head); + } +} + +PassRefPtr<Node> HTMLParser::handleIsindex(Token* t) +{ + RefPtr<Node> n = new HTMLDivElement(document); + + NamedMappedAttrMap* attrs = t->attrs.get(); + + RefPtr<HTMLIsIndexElement> isIndex = new HTMLIsIndexElement(document, m_currentFormElement.get()); + isIndex->setAttributeMap(attrs); + isIndex->setAttribute(typeAttr, "khtml_isindex"); + + String text = searchableIndexIntroduction(); + if (attrs) { + if (Attribute* a = attrs->getAttributeItem(promptAttr)) + text = a->value().string() + " "; + t->attrs = 0; + } + + n->addChild(new HTMLHRElement(document)); + n->addChild(new Text(document, text)); + n->addChild(isIndex.release()); + n->addChild(new HTMLHRElement(document)); + + return n.release(); +} + +void HTMLParser::startBody() +{ + if (inBody) + return; + + inBody = true; + + if (m_isindexElement) { + insertNode(m_isindexElement.get(), true /* don't descend into this node */); + m_isindexElement = 0; + } +} + +void HTMLParser::finished() +{ + // In the case of a completely empty document, here's the place to create the HTML element. + if (current && current->isDocumentNode() && !document->documentElement()) + insertNode(new HTMLHtmlElement(document)); + + // This ensures that "current" is not left pointing to a node when the document is destroyed. + freeBlock(); + setCurrent(0); + + // Warning, this may delete the tokenizer and parser, so don't try to do anything else after this. + if (!m_isParsingFragment) + document->finishedParsing(); +} + +void HTMLParser::reportErrorToConsole(HTMLParserErrorCode errorCode, const AtomicString* tagName1, const AtomicString* tagName2, bool closeTags) +{ + Frame* frame = document->frame(); + if (!frame) + return; + + HTMLTokenizer* htmlTokenizer = static_cast<HTMLTokenizer*>(document->tokenizer()); + int lineNumber = htmlTokenizer->lineNumber() + 1; + + AtomicString tag1; + AtomicString tag2; + if (tagName1) { + if (*tagName1 == "#text") + tag1 = "Text"; + else if (*tagName1 == "#comment") + tag1 = "<!-- comment -->"; + else + tag1 = (closeTags ? "</" : "<") + *tagName1 + ">"; + } + if (tagName2) { + if (*tagName2 == "#text") + tag2 = "Text"; + else if (*tagName2 == "#comment") + tag2 = "<!-- comment -->"; + else + tag2 = (closeTags ? "</" : "<") + *tagName2 + ">"; + } + + const char* errorMsg = htmlParserErrorMessageTemplate(errorCode); + if (!errorMsg) + return; + + String message; + if (htmlTokenizer->processingContentWrittenByScript()) + message += htmlParserDocumentWriteMessage(); + message += errorMsg; + message.replace("%tag1", tag1); + message.replace("%tag2", tag2); + + frame->domWindow()->console()->addMessage(HTMLMessageSource, + isWarning(errorCode) ? WarningMessageLevel : ErrorMessageLevel, + message, lineNumber, document->url().string()); +} + +} |