diff options
Diffstat (limited to 'WebCore/html/LegacyHTMLTreeBuilder.cpp')
-rw-r--r-- | WebCore/html/LegacyHTMLTreeBuilder.cpp | 1786 |
1 files changed, 0 insertions, 1786 deletions
diff --git a/WebCore/html/LegacyHTMLTreeBuilder.cpp b/WebCore/html/LegacyHTMLTreeBuilder.cpp deleted file mode 100644 index f39579c..0000000 --- a/WebCore/html/LegacyHTMLTreeBuilder.cpp +++ /dev/null @@ -1,1786 +0,0 @@ -/* - Copyright (C) 1997 Martin Jones (mjones@kde.org) - (C) 1997 Torben Weis (weis@kde.org) - (C) 1999,2001 Lars Knoll (knoll@kde.org) - (C) 2000,2001 Dirk Mueller (mueller@kde.org) - Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. - Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/) - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public License - along with this library; see the file COPYING.LIB. If not, write to - the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - Boston, MA 02110-1301, USA. -*/ - -#include "config.h" -#include "LegacyHTMLTreeBuilder.h" - -#include "CharacterNames.h" -#include "CSSPropertyNames.h" -#include "CSSValueKeywords.h" -#include "Chrome.h" -#include "ChromeClient.h" -#include "Comment.h" -#include "Console.h" -#include "DOMWindow.h" -#include "DocumentFragment.h" -#include "DocumentType.h" -#include "Frame.h" -#include "HTMLBodyElement.h" -#include "HTMLDocument.h" -#include "HTMLDivElement.h" -#include "HTMLDListElement.h" -#include "HTMLElementFactory.h" -#include "HTMLFormElement.h" -#include "HTMLHeadElement.h" -#include "HTMLHRElement.h" -#include "HTMLHtmlElement.h" -#include "HTMLIsIndexElement.h" -#include "HTMLMapElement.h" -#include "HTMLNames.h" -#include "HTMLParserQuirks.h" -#include "HTMLTableCellElement.h" -#include "HTMLTableRowElement.h" -#include "HTMLTableSectionElement.h" -#include "LocalizedStrings.h" -#include "Page.h" -#include "ScriptableDocumentParser.h" -#include "Settings.h" -#include "Text.h" -#include "TreeDepthLimit.h" -#include <wtf/StdLibExtras.h> -#include <wtf/dtoa.h> - -namespace WebCore { - -using namespace HTMLNames; - -static const unsigned cMaxRedundantTagDepth = 20; -static const unsigned cResidualStyleMaxDepth = 200; -static const unsigned cResidualStyleIterationLimit = 10; - - -static const int minBlockLevelTagPriority = 3; - -// A cap on the number of tags with priority minBlockLevelTagPriority or higher -// allowed in m_blockStack. The cap is enforced by adding such new elements as -// siblings instead of children once it is reached. -static const size_t cMaxBlockDepth = 4096; - - -typedef HashSet<AtomicStringImpl*> TagNameSet; - -template< size_t ArraySize > -static void addTags(TagNameSet& set, QualifiedName (&names)[ArraySize]) -{ - for (size_t x = 0; x < ArraySize; x++) { - const QualifiedName& name = names[x]; - set.add(name.localName().impl()); - } -} - -struct HTMLStackElem : Noncopyable { - HTMLStackElem(const AtomicString& t, int lvl, Node* n, bool r, HTMLStackElem* nx) - : tagName(t) - , level(lvl) - , strayTableContent(false) - , node(n) - , didRefNode(r) - , next(nx) - { - } - - void derefNode() - { - if (didRefNode) - node->deref(); - } - - AtomicString tagName; - int level; - bool strayTableContent; - Node* node; - bool didRefNode; - HTMLStackElem* next; -}; - -/** - * The parser parses tokenized input into the document, building up the - * document tree. If the document is well-formed, parsing it is straightforward. - * - * Unfortunately, we have to handle many HTML documents that are not well-formed, - * so the parser has to be tolerant about errors. - * - * We have to take care of at least the following error conditions: - * - * 1. The element being added is explicitly forbidden inside some outer tag. - * In this case we should close all tags up to the one, which forbids - * the element, and add it afterwards. - * - * 2. We are not allowed to add the element directly. It could be that - * the person writing the document forgot some tag in between (or that the - * tag in between is optional). This could be the case with the following - * tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?). - * - * 3. We want to add a block element inside to an inline element. Close all - * inline elements up to the next higher block element. - * - * 4. If this doesn't help, close elements until we are allowed to add the - * element or ignore the tag. - * - */ - -LegacyHTMLTreeBuilder::LegacyHTMLTreeBuilder(HTMLDocument* doc, bool reportErrors) - : m_document(doc) - , m_current(doc) - , m_didRefCurrent(false) - , m_blockStack(0) - , m_blocksInStack(0) - , m_treeDepth(0) - , m_hasPElementInScope(NotInScope) - , m_inBody(false) - , m_haveContent(false) - , m_haveFrameSet(false) - , m_isParsingFragment(false) - , m_reportErrors(reportErrors) - , m_handlingResidualStyleAcrossBlocks(false) - , m_inStrayTableContent(0) - , m_scriptingPermission(FragmentScriptingAllowed) - , m_parserQuirks(m_document->page() ? m_document->page()->chrome()->client()->createHTMLParserQuirks() : 0) -{ -} - -LegacyHTMLTreeBuilder::LegacyHTMLTreeBuilder(DocumentFragment* frag, FragmentScriptingPermission scriptingPermission) - : m_document(frag->document()) - , m_current(frag) - , m_didRefCurrent(true) - , m_blockStack(0) - , m_blocksInStack(0) - , m_treeDepth(0) - , m_hasPElementInScope(NotInScope) - , m_inBody(true) - , m_haveContent(false) - , m_haveFrameSet(false) - , m_isParsingFragment(true) - , m_reportErrors(false) - , m_handlingResidualStyleAcrossBlocks(false) - , m_inStrayTableContent(0) - , m_scriptingPermission(scriptingPermission) - , m_parserQuirks(m_document->page() ? m_document->page()->chrome()->client()->createHTMLParserQuirks() : 0) -{ - if (frag) - frag->ref(); -} - -LegacyHTMLTreeBuilder::~LegacyHTMLTreeBuilder() -{ - freeBlock(); - if (m_didRefCurrent) - m_current->deref(); -} - -void LegacyHTMLTreeBuilder::reset() -{ - ASSERT(!m_isParsingFragment); - - setCurrent(m_document); - - freeBlock(); - - m_treeDepth = 0; - m_inBody = false; - m_haveFrameSet = false; - m_haveContent = false; - m_inStrayTableContent = 0; - - m_currentFormElement = 0; - m_currentMapElement = 0; - m_head = 0; - m_isindexElement = 0; - - m_skipModeTag = nullAtom; - - if (m_parserQuirks) - m_parserQuirks->reset(); -} - -void LegacyHTMLTreeBuilder::setCurrent(Node* newCurrent) -{ - bool didRefNewCurrent = newCurrent && newCurrent != m_document; - if (didRefNewCurrent) - newCurrent->ref(); - if (m_didRefCurrent) - m_current->deref(); - m_current = newCurrent; - m_didRefCurrent = didRefNewCurrent; -} - -inline static int tagPriorityOfNode(Node* n) -{ - return n->isHTMLElement() ? static_cast<HTMLElement*>(n)->tagPriority() : 0; -} - -inline void LegacyHTMLTreeBuilder::limitDepth(int tagPriority) -{ - while (m_treeDepth >= maxDOMTreeDepth) - popBlock(m_blockStack->tagName); - if (tagPriority >= minBlockLevelTagPriority) { - while (m_blocksInStack >= cMaxBlockDepth) - popBlock(m_blockStack->tagName); - } -} - -inline bool LegacyHTMLTreeBuilder::insertNodeAfterLimitDepth(Node* n, bool flat) -{ - limitDepth(tagPriorityOfNode(n)); - return insertNode(n, flat); -} - -PassRefPtr<Node> LegacyHTMLTreeBuilder::parseToken(Token* t) -{ - if (!m_skipModeTag.isNull()) { - if (!t->beginTag && t->tagName == m_skipModeTag) - // Found the end tag for the current skip mode, so we're done skipping. - m_skipModeTag = nullAtom; - else if (m_current->localName() == t->tagName) - // Do not skip </iframe>. - // FIXME: What does that comment mean? How can it be right to parse a token without clearing m_skipModeTag? - ; - else - return 0; - } - - // Apparently some sites use </br> instead of <br>. Be compatible with IE and Firefox and treat this like <br>. - if (t->isCloseTag(brTag) && m_document->inCompatMode()) { - reportError(MalformedBRError); - t->beginTag = true; - } - - if (!t->beginTag) { - processCloseTag(t); - return 0; - } - - // Ignore spaces, if we're not inside a paragraph or other inline code. - // Do not alter the text if it is part of a scriptTag. - if (t->tagName == textAtom && t->text && m_current->localName() != scriptTag) { - if (m_inBody && !skipMode() && m_current->localName() != styleTag && - m_current->localName() != titleTag && !t->text->containsOnlyWhitespace()) - m_haveContent = true; - - // HTML5 requires text node coalescing. - // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#insert-a-character - Node* previousChild = m_current->lastChild(); - if (previousChild && previousChild->isTextNode()) { - // Only coalesce text nodes if the text node wouldn't be foster parented. - if (!m_current->hasTagName(htmlTag) - && !m_current->hasTagName(tableTag) - && !m_current->hasTagName(trTag) - && !m_current->hasTagName(theadTag) - && !m_current->hasTagName(tbodyTag) - && !m_current->hasTagName(tfootTag) - && !m_current->hasTagName(titleTag)) { - // Technically we're only supposed to merge into the previous - // text node if it was the last node inserted by the parser. - // (This was a spec modification made to make it easier for - // mozilla to run their parser in a thread.) - // In practice it does not seem to matter. - CharacterData* textNode = static_cast<CharacterData*>(previousChild); - textNode->parserAppendData(t->text); - return textNode; - } - } - - RefPtr<Node> n; - String text = t->text.get(); - unsigned charsLeft = text.length(); - while (charsLeft) { - // split large blocks of text to nodes of manageable size - n = Text::createWithLengthLimit(m_document, text, charsLeft); - if (!insertNodeAfterLimitDepth(n.get(), t->selfClosingTag)) - return 0; - } - return n; - } - - RefPtr<Node> n = getNode(t); - // just to be sure, and to catch currently unimplemented stuff - if (!n) - return 0; - - // set attributes - if (n->isHTMLElement()) { - HTMLElement* e = static_cast<HTMLElement*>(n.get()); - if (m_scriptingPermission == FragmentScriptingAllowed || t->tagName != scriptTag) - e->setAttributeMap(t->attrs.get(), m_scriptingPermission); - - // take care of optional close tags - if (e->endTagRequirement() == TagStatusOptional) - popBlock(t->tagName); - - // If the node does not have a forbidden end tag requirement, and if the broken XML self-closing - // syntax was used, report an error. - if (t->brokenXMLStyle && e->endTagRequirement() != TagStatusForbidden) { - if (t->tagName == scriptTag) - reportError(IncorrectXMLCloseScriptWarning); - else - reportError(IncorrectXMLSelfCloseError, &t->tagName); - } - } - - if (!insertNodeAfterLimitDepth(n.get(), t->selfClosingTag)) { - // we couldn't insert the node - - if (n->isElementNode()) { - Element* e = static_cast<Element*>(n.get()); - e->setAttributeMap(0); - } - - if (m_currentMapElement == n) - m_currentMapElement = 0; - - if (m_currentFormElement == n) - m_currentFormElement = 0; - - if (m_head == n) - m_head = 0; - - return 0; - } - return n; -} - -void LegacyHTMLTreeBuilder::parseDoctypeToken(DoctypeToken* t) -{ - // Ignore any doctype after the first. Ignore doctypes in fragments. - if (m_document->doctype() || m_isParsingFragment || m_current != m_document) - return; - - // Make a new doctype node and set it as our doctype. - m_document->legacyParserAddChild(DocumentType::create(m_document, String::adopt(t->m_name), String::adopt(t->m_publicID), String::adopt(t->m_systemID))); - if (t->m_forceQuirks) - m_document->setParseMode(Document::Compat); -} - -static bool isTableSection(const Node* n) -{ - return n->hasTagName(tbodyTag) || n->hasTagName(tfootTag) || n->hasTagName(theadTag); -} - -static bool isTablePart(const Node* n) -{ - return n->hasTagName(trTag) || n->hasTagName(tdTag) || n->hasTagName(thTag) - || isTableSection(n); -} - -static bool isTableRelated(const Node* n) -{ - return n->hasTagName(tableTag) || isTablePart(n); -} - -static bool isScopingTag(const AtomicString& tagName) -{ - return tagName == appletTag || tagName == captionTag || tagName == tdTag - || tagName == thTag || tagName == buttonTag || tagName == marqueeTag - || tagName == objectTag || tagName == tableTag || tagName == htmlTag; -} - -bool LegacyHTMLTreeBuilder::insertNode(Node* n, bool flat) -{ - RefPtr<Node> protectNode(n); - - const AtomicString& localName = n->localName(); - - // <table> is never allowed inside stray table content. Always pop out of the stray table content - // and close up the first table, and then start the second table as a sibling. - if (m_inStrayTableContent && localName == tableTag) - popBlock(tableTag); - - if (m_parserQuirks && !m_parserQuirks->shouldInsertNode(m_current, n)) - return false; - - int tagPriority = tagPriorityOfNode(n); - - // let's be stupid and just try to insert it. - // this should work if the document is well-formed - Node* newNode = m_current->legacyParserAddChild(n); - if (!newNode) - return handleError(n, flat, localName, tagPriority); // Try to handle the error. - - // don't push elements without end tags (e.g., <img>) on the stack - bool parentAttached = m_current->attached(); - if (tagPriority > 0 && !flat) { - if (newNode == m_current) { - // This case should only be hit when a demoted <form> is placed inside a table. - ASSERT(localName == formTag); - reportError(FormInsideTablePartError, &m_current->localName()); - HTMLFormElement* form = static_cast<HTMLFormElement*>(n); - form->setDemoted(true); - } else { - // The pushBlock function transfers ownership of current to the block stack - // so we're guaranteed that m_didRefCurrent is false. The code below is an - // optimized version of setCurrent that takes advantage of that fact and also - // assumes that newNode is neither 0 nor a pointer to the document. - pushBlock(localName, tagPriority); - newNode->beginParsingChildren(); - ASSERT(!m_didRefCurrent); - newNode->ref(); - m_current = newNode; - m_didRefCurrent = true; - } - if (parentAttached && !n->attached() && !m_isParsingFragment) - n->attach(); - } else { - if (parentAttached && !n->attached() && !m_isParsingFragment) - n->attach(); - n->finishParsingChildren(); - } - - if (localName == htmlTag && m_document->frame() && !m_isParsingFragment) - m_document->frame()->loader()->dispatchDocumentElementAvailable(); - - return true; -} - -bool LegacyHTMLTreeBuilder::handleError(Node* n, bool flat, const AtomicString& localName, int tagPriority) -{ - // Error handling code. This is just ad hoc handling of specific parent/child combinations. - bool handled = false; - - // 1. Check out the element's tag name to decide how to deal with errors. - if (n->isHTMLElement()) { - HTMLElement* h = static_cast<HTMLElement*>(n); - if (h->hasLocalName(trTag) || h->hasLocalName(thTag) || h->hasLocalName(tdTag)) { - if (m_inStrayTableContent && !isTableRelated(m_current)) { - reportError(MisplacedTablePartError, &localName, &m_current->localName()); - // pop out to the nearest enclosing table-related tag. - while (m_blockStack && !isTableRelated(m_current)) - popOneBlock(); - return insertNode(n); - } - } else if (h->hasLocalName(headTag)) { - if (!m_current->isDocumentNode() && !m_current->hasTagName(htmlTag)) { - reportError(MisplacedHeadError); - return false; - } - } else if (h->hasLocalName(metaTag) || h->hasLocalName(linkTag) || h->hasLocalName(baseTag)) { - bool createdHead = false; - if (!m_head) { - createHead(); - createdHead = true; - } - if (m_head) { - if (!createdHead) - reportError(MisplacedHeadContentError, &localName, &m_current->localName()); - if (m_head->legacyParserAddChild(n)) { - if (!n->attached() && !m_isParsingFragment) - n->attach(); - return true; - } - return false; - } - } else if (h->hasLocalName(htmlTag)) { - if (!m_current->isDocumentNode() ) { - if (m_document->documentElement() && m_document->documentElement()->hasTagName(htmlTag) && !m_isParsingFragment) { - reportError(RedundantHTMLBodyError, &localName); - // we have another <HTML> element.... apply attributes to existing one - // make sure we don't overwrite already existing attributes - NamedNodeMap* map = static_cast<Element*>(n)->attributes(true); - Element* existingHTML = static_cast<Element*>(m_document->documentElement()); - NamedNodeMap* bmap = existingHTML->attributes(false); - for (unsigned l = 0; map && l < map->length(); ++l) { - Attribute* it = map->attributeItem(l); - if (!bmap->getAttributeItem(it->name())) - existingHTML->setAttribute(it->name(), it->value()); - } - } - return false; - } - } else if (h->hasLocalName(titleTag) || h->hasLocalName(styleTag) || h->hasLocalName(scriptTag)) { - bool createdHead = false; - if (!m_head) { - createHead(); - createdHead = true; - } - if (m_head) { - Node* newNode = m_head->legacyParserAddChild(n); - if (!newNode) { - setSkipMode(h->tagQName()); - return false; - } - - if (!createdHead) - reportError(MisplacedHeadContentError, &localName, &m_current->localName()); - - pushBlock(localName, tagPriority); - newNode->beginParsingChildren(); - setCurrent(newNode); - if (!n->attached() && !m_isParsingFragment) - n->attach(); - return true; - } - if (m_inBody) { - setSkipMode(h->tagQName()); - return false; - } - } else if (h->hasLocalName(bodyTag)) { - if (m_inBody && m_document->body() && !m_isParsingFragment) { - // we have another <BODY> element.... apply attributes to existing one - // make sure we don't overwrite already existing attributes - // some sites use <body bgcolor=rightcolor>...<body bgcolor=wrongcolor> - reportError(RedundantHTMLBodyError, &localName); - NamedNodeMap* map = static_cast<Element*>(n)->attributes(true); - Element* existingBody = m_document->body(); - NamedNodeMap* bmap = existingBody->attributes(false); - for (unsigned l = 0; map && l < map->length(); ++l) { - Attribute* it = map->attributeItem(l); - if (!bmap->getAttributeItem(it->name())) - existingBody->setAttribute(it->name(), it->value()); - } - return false; - } else if (!m_current->isDocumentNode()) - return false; - } else if (h->hasLocalName(areaTag)) { - if (m_currentMapElement) { - reportError(MisplacedAreaError, &m_current->localName()); - m_currentMapElement->legacyParserAddChild(n); - if (!n->attached() && !m_isParsingFragment) - n->attach(); - handled = true; - return true; - } - return false; - } else if (h->hasLocalName(colgroupTag) || h->hasLocalName(captionTag)) { - if (isTableRelated(m_current)) { - while (m_blockStack && isTablePart(m_current)) - popOneBlock(); - return insertNode(n); - } - } - } else if (n->isCommentNode() && !m_head) - return false; - - // 2. Next we examine our currently active element to do some further error handling. - if (m_current->isHTMLElement()) { - HTMLElement* h = static_cast<HTMLElement*>(m_current); - const AtomicString& currentTagName = h->localName(); - if (h->hasLocalName(htmlTag)) { - HTMLElement* elt = n->isHTMLElement() ? static_cast<HTMLElement*>(n) : 0; - if (elt && (elt->hasLocalName(scriptTag) || elt->hasLocalName(styleTag) || - elt->hasLocalName(metaTag) || elt->hasLocalName(linkTag) || - elt->hasLocalName(objectTag) || elt->hasLocalName(embedTag) || - elt->hasLocalName(titleTag) || elt->hasLocalName(isindexTag) || - elt->hasLocalName(baseTag))) { - if (!m_head) { - m_head = HTMLHeadElement::create(m_document); - insertNode(m_head.get()); - handled = true; - } - } else { - if (n->isTextNode()) { - Text* t = static_cast<Text*>(n); - if (t->containsOnlyWhitespace()) { - if (m_head && !m_inBody) { - // We're between </head> and <body>. According to - // the HTML5 parsing algorithm, we're supposed to - // insert whitespace text nodes into the HTML element. - ExceptionCode ec; - m_current->appendChild(n, ec); - return true; - } - return false; - } - } - if (!m_haveFrameSet) { - // Ensure that head exists. - // But not for older versions of Mail, where the implicit <head> isn't expected - <rdar://problem/6863795> - if (!m_isParsingFragment && shouldCreateImplicitHead(m_document)) - createHead(); - - popBlock(headTag); - startBody(); - insertNode(HTMLBodyElement::create(m_document).get()); - handled = true; - } else - reportError(MisplacedFramesetContentError, &localName); - } - } else if (h->hasLocalName(headTag)) { - if (n->hasTagName(htmlTag)) - return false; - else { - // This means the body starts here... - if (!m_haveFrameSet) { - ASSERT(currentTagName == headTag); - popBlock(currentTagName); - startBody(); - insertNode(HTMLBodyElement::create(m_document).get()); - handled = true; - } else - reportError(MisplacedFramesetContentError, &localName); - } - } else if (h->hasLocalName(addressTag) || h->hasLocalName(fontTag) - || h->hasLocalName(styleTag) || h->hasLocalName(titleTag)) { - reportError(MisplacedContentRetryError, &localName, ¤tTagName); - popBlock(currentTagName); - handled = true; - } else if (h->hasLocalName(captionTag)) { - // Illegal content in a caption. Close the caption and try again. - reportError(MisplacedCaptionContentError, &localName); - popBlock(currentTagName); - if (isTablePart(n)) - return insertNode(n, flat); - } else if (h->hasLocalName(tableTag) || h->hasLocalName(trTag) || isTableSection(h)) { - if (n->hasTagName(tableTag)) { - reportError(MisplacedTableError, ¤tTagName); - if (m_isParsingFragment && !h->hasLocalName(tableTag)) - // fragment may contain table parts without <table> ancestor, pop them one by one - popBlock(h->localName()); - popBlock(localName); // end the table - handled = true; // ...and start a new one - } else { - ExceptionCode ec = 0; - Node* node = m_current; - Node* parent = node->parentNode(); - // A script may have removed the current node's parent from the DOM - // http://bugs.webkit.org/show_bug.cgi?id=7137 - // FIXME: we should do real recovery here and re-parent with the correct node. - if (!parent) - return false; - Node* grandparent = parent->parentNode(); - - if (n->isTextNode() || - (h->hasLocalName(trTag) && - isTableSection(parent) && grandparent && grandparent->hasTagName(tableTag)) || - ((!n->hasTagName(tdTag) && !n->hasTagName(thTag) && - !n->hasTagName(formTag) && !n->hasTagName(scriptTag)) && isTableSection(node) && - parent->hasTagName(tableTag))) { - node = (node->hasTagName(tableTag)) ? node : - ((node->hasTagName(trTag)) ? grandparent : parent); - // This can happen with fragments - if (!node) - return false; - Node* parent = node->parentNode(); - if (!parent) - return false; - parent->insertBefore(n, node, ec); - if (!ec) { - reportError(StrayTableContentError, &localName, ¤tTagName); - if (n->isHTMLElement() && tagPriority > 0 && - !flat && static_cast<HTMLElement*>(n)->endTagRequirement() != TagStatusForbidden) - { - pushBlock(localName, tagPriority); - n->beginParsingChildren(); - setCurrent(n); - m_inStrayTableContent++; - m_blockStack->strayTableContent = true; - } - return true; - } - } - - if (!ec) { - if (m_current->hasTagName(trTag)) { - reportError(TablePartRequiredError, &localName, &tdTag.localName()); - insertNode(HTMLTableCellElement::create(tdTag, m_document).get()); - } else if (m_current->hasTagName(tableTag)) { - // Don't report an error in this case, since making a <tbody> happens all the time when you have <table><tr>, - // and it isn't really a parse error per se. - insertNode(HTMLTableSectionElement::create(tbodyTag, m_document).get()); - } else { - reportError(TablePartRequiredError, &localName, &trTag.localName()); - insertNode(HTMLTableRowElement::create(m_document).get()); - } - handled = true; - } - } - } else if (h->hasLocalName(objectTag)) { - reportError(MisplacedContentRetryError, &localName, ¤tTagName); - popBlock(objectTag); - handled = true; - } else if (h->hasLocalName(pTag) || isHeadingTag(currentTagName)) { - if (!isInline(n)) { - popBlock(currentTagName); - handled = true; - } - } else if (h->hasLocalName(optionTag) || h->hasLocalName(optgroupTag)) { - if (localName == optgroupTag) { - popBlock(currentTagName); - handled = true; - } else if (localName == selectTag) { - // IE treats a nested select as </select>. Let's do the same - popBlock(localName); - } - } else if (h->hasLocalName(selectTag)) { - if (localName == inputTag || localName == textareaTag) { - reportError(MisplacedContentRetryError, &localName, ¤tTagName); - popBlock(currentTagName); - handled = true; - } - } else if (h->hasLocalName(colgroupTag)) { - popBlock(currentTagName); - handled = true; - } else if (!h->hasLocalName(bodyTag)) { - if (isInline(m_current)) { - popInlineBlocks(); - handled = true; - } - } - } else if (m_current->isDocumentNode()) { - if (n->isTextNode()) { - Text* t = static_cast<Text*>(n); - if (t->containsOnlyWhitespace()) - return false; - } - - if (!m_document->documentElement()) { - insertNode(HTMLHtmlElement::create(m_document).get()); - handled = true; - } - } - - // 3. If we couldn't handle the error, just return false and attempt to error-correct again. - if (!handled) { - reportError(IgnoredContentError, &localName, &m_current->localName()); - return false; - } - return insertNode(n); -} - -typedef bool (LegacyHTMLTreeBuilder::*CreateErrorCheckFunc)(Token* t, RefPtr<Node>&); -typedef HashMap<AtomicStringImpl*, CreateErrorCheckFunc> FunctionMap; - -bool LegacyHTMLTreeBuilder::textCreateErrorCheck(Token* t, RefPtr<Node>& result) -{ - result = Text::create(m_document, t->text.get()); - return false; -} - -bool LegacyHTMLTreeBuilder::commentCreateErrorCheck(Token* t, RefPtr<Node>& result) -{ - result = Comment::create(m_document, t->text.get()); - return false; -} - -bool LegacyHTMLTreeBuilder::headCreateErrorCheck(Token*, RefPtr<Node>& result) -{ - if (!m_head || m_current->localName() == htmlTag) { - m_head = HTMLHeadElement::create(m_document); - result = m_head; - } else - reportError(MisplacedHeadError); - return false; -} - -bool LegacyHTMLTreeBuilder::bodyCreateErrorCheck(Token*, RefPtr<Node>&) -{ - // body no longer allowed if we have a frameset - if (m_haveFrameSet) - return false; - - // Ensure that head exists (unless parsing a fragment). - // But not for older versions of Mail, where the implicit <head> isn't expected - <rdar://problem/6863795> - if (!m_isParsingFragment && shouldCreateImplicitHead(m_document)) - createHead(); - - popBlock(headTag); - startBody(); - return true; -} - -bool LegacyHTMLTreeBuilder::framesetCreateErrorCheck(Token*, RefPtr<Node>&) -{ - popBlock(headTag); - if (m_inBody && !m_haveFrameSet && !m_haveContent) { - popBlock(bodyTag); - // ### actually for IE document.body returns the now hidden "body" element - // we can't implement that behaviour now because it could cause too many - // regressions and the headaches are not worth the work as long as there is - // no site actually relying on that detail (Dirk) - if (m_document->body() && !m_isParsingFragment) - m_document->body()->setAttribute(styleAttr, "display:none"); - m_inBody = false; - } - if ((m_haveContent || m_haveFrameSet) && m_current->localName() == htmlTag) - return false; - m_haveFrameSet = true; - startBody(); - return true; -} - -bool LegacyHTMLTreeBuilder::formCreateErrorCheck(Token* t, RefPtr<Node>& result) -{ - // Only create a new form if we're not already inside one. - // This is consistent with other browsers' behavior. - if (!m_currentFormElement) { - m_currentFormElement = HTMLFormElement::create(m_document); - result = m_currentFormElement; - pCloserCreateErrorCheck(t, result); - } - return false; -} - -bool LegacyHTMLTreeBuilder::isindexCreateErrorCheck(Token* t, RefPtr<Node>& result) -{ - RefPtr<Node> n = handleIsindex(t); - if (!m_inBody) - m_isindexElement = n.release(); - else { - t->selfClosingTag = true; - result = n.release(); - } - return false; -} - -bool LegacyHTMLTreeBuilder::selectCreateErrorCheck(Token*, RefPtr<Node>&) -{ - return true; -} - -bool LegacyHTMLTreeBuilder::ddCreateErrorCheck(Token* t, RefPtr<Node>& result) -{ - pCloserCreateErrorCheck(t, result); - popBlock(dtTag); - popBlock(ddTag); - return true; -} - -bool LegacyHTMLTreeBuilder::dtCreateErrorCheck(Token* t, RefPtr<Node>& result) -{ - pCloserCreateErrorCheck(t, result); - popBlock(ddTag); - popBlock(dtTag); - return true; -} - -bool LegacyHTMLTreeBuilder::rpCreateErrorCheck(Token*, RefPtr<Node>&) -{ - popBlock(rpTag); - popBlock(rtTag); - return true; -} - -bool LegacyHTMLTreeBuilder::rtCreateErrorCheck(Token*, RefPtr<Node>&) -{ - popBlock(rpTag); - popBlock(rtTag); - return true; -} - -bool LegacyHTMLTreeBuilder::nestedCreateErrorCheck(Token* t, RefPtr<Node>&) -{ - popBlock(t->tagName); - return true; -} - -bool LegacyHTMLTreeBuilder::nestedPCloserCreateErrorCheck(Token* t, RefPtr<Node>& result) -{ - pCloserCreateErrorCheck(t, result); - popBlock(t->tagName); - return true; -} - -bool LegacyHTMLTreeBuilder::nestedStyleCreateErrorCheck(Token* t, RefPtr<Node>&) -{ - return allowNestedRedundantTag(t->tagName); -} - -bool LegacyHTMLTreeBuilder::colCreateErrorCheck(Token*, RefPtr<Node>&) -{ - if (!m_current->hasTagName(tableTag)) - return true; - RefPtr<Element> implicitColgroup = HTMLElementFactory::createHTMLElement(colgroupTag, m_document, 0, true); - insertNode(implicitColgroup.get()); - return true; -} - -bool LegacyHTMLTreeBuilder::tableCellCreateErrorCheck(Token*, RefPtr<Node>&) -{ - popBlock(tdTag); - popBlock(thTag); - return true; -} - -bool LegacyHTMLTreeBuilder::tableSectionCreateErrorCheck(Token*, RefPtr<Node>&) -{ - popBlock(theadTag); - popBlock(tbodyTag); - popBlock(tfootTag); - return true; -} - -bool LegacyHTMLTreeBuilder::noembedCreateErrorCheck(Token*, RefPtr<Node>&) -{ - setSkipMode(noembedTag); - return true; -} - -bool LegacyHTMLTreeBuilder::noframesCreateErrorCheck(Token*, RefPtr<Node>&) -{ - setSkipMode(noframesTag); - return true; -} - -bool LegacyHTMLTreeBuilder::noscriptCreateErrorCheck(Token*, RefPtr<Node>&) -{ - if (!m_isParsingFragment) { - Frame* frame = m_document->frame(); - if (frame && frame->script()->canExecuteScripts(NotAboutToExecuteScript)) - setSkipMode(noscriptTag); - } - return true; -} - -bool LegacyHTMLTreeBuilder::pCloserCreateErrorCheck(Token*, RefPtr<Node>&) -{ - if (hasPElementInScope()) - popBlock(pTag); - return true; -} - -bool LegacyHTMLTreeBuilder::pCloserStrictCreateErrorCheck(Token*, RefPtr<Node>&) -{ - if (m_document->inCompatMode()) - return true; - if (hasPElementInScope()) - popBlock(pTag); - return true; -} - -bool LegacyHTMLTreeBuilder::mapCreateErrorCheck(Token*, RefPtr<Node>& result) -{ - m_currentMapElement = HTMLMapElement::create(m_document); - result = m_currentMapElement; - return false; -} - -static void mapTagToFunc(FunctionMap& map, const QualifiedName& tag, CreateErrorCheckFunc func) -{ - map.set(tag.localName().impl(), func); -} - -template< size_t ArraySize > -static void mapTagsToFunc(FunctionMap& map, QualifiedName (&names)[ArraySize], CreateErrorCheckFunc func) -{ - for (size_t x = 0; x < ArraySize; x++) { - const QualifiedName& name = names[x]; - mapTagToFunc(map, name, func); - } -} - -PassRefPtr<Node> LegacyHTMLTreeBuilder::getNode(Token* t) -{ - // Init our error handling table. - DEFINE_STATIC_LOCAL(FunctionMap, gFunctionMap, ()); - if (gFunctionMap.isEmpty()) { - QualifiedName nestedCreateErrorTags[] = { aTag, buttonTag, nobrTag, trTag }; - mapTagsToFunc(gFunctionMap, nestedCreateErrorTags, &LegacyHTMLTreeBuilder::nestedCreateErrorCheck); - - QualifiedName nestedStyleCreateErrorTags[] = { bTag, bigTag, iTag, markTag, sTag, smallTag, strikeTag, ttTag, uTag }; - mapTagsToFunc(gFunctionMap, nestedStyleCreateErrorTags, &LegacyHTMLTreeBuilder::nestedStyleCreateErrorCheck); - - QualifiedName pCloserCreateErrorTags[] = { addressTag, articleTag, - asideTag, blockquoteTag, centerTag, dirTag, divTag, dlTag, - fieldsetTag, footerTag, h1Tag, h2Tag, h3Tag, h4Tag, h5Tag, h6Tag, - headerTag, hgroupTag, hrTag, listingTag, menuTag, navTag, olTag, - pTag, plaintextTag, preTag, sectionTag, ulTag }; - mapTagsToFunc(gFunctionMap, pCloserCreateErrorTags, &LegacyHTMLTreeBuilder::pCloserCreateErrorCheck); - - mapTagToFunc(gFunctionMap, bodyTag, &LegacyHTMLTreeBuilder::bodyCreateErrorCheck); - mapTagToFunc(gFunctionMap, colTag, &LegacyHTMLTreeBuilder::colCreateErrorCheck); - mapTagToFunc(gFunctionMap, ddTag, &LegacyHTMLTreeBuilder::ddCreateErrorCheck); - mapTagToFunc(gFunctionMap, dtTag, &LegacyHTMLTreeBuilder::dtCreateErrorCheck); - mapTagToFunc(gFunctionMap, formTag, &LegacyHTMLTreeBuilder::formCreateErrorCheck); - mapTagToFunc(gFunctionMap, framesetTag, &LegacyHTMLTreeBuilder::framesetCreateErrorCheck); - mapTagToFunc(gFunctionMap, headTag, &LegacyHTMLTreeBuilder::headCreateErrorCheck); - mapTagToFunc(gFunctionMap, isindexTag, &LegacyHTMLTreeBuilder::isindexCreateErrorCheck); - mapTagToFunc(gFunctionMap, mapTag, &LegacyHTMLTreeBuilder::mapCreateErrorCheck); - mapTagToFunc(gFunctionMap, liTag, &LegacyHTMLTreeBuilder::nestedPCloserCreateErrorCheck); - mapTagToFunc(gFunctionMap, noembedTag, &LegacyHTMLTreeBuilder::noembedCreateErrorCheck); - mapTagToFunc(gFunctionMap, noframesTag, &LegacyHTMLTreeBuilder::noframesCreateErrorCheck); - mapTagToFunc(gFunctionMap, noscriptTag, &LegacyHTMLTreeBuilder::noscriptCreateErrorCheck); - mapTagToFunc(gFunctionMap, tableTag, &LegacyHTMLTreeBuilder::pCloserStrictCreateErrorCheck); - mapTagToFunc(gFunctionMap, rpTag, &LegacyHTMLTreeBuilder::rpCreateErrorCheck); - mapTagToFunc(gFunctionMap, rtTag, &LegacyHTMLTreeBuilder::rtCreateErrorCheck); - mapTagToFunc(gFunctionMap, selectTag, &LegacyHTMLTreeBuilder::selectCreateErrorCheck); - mapTagToFunc(gFunctionMap, tdTag, &LegacyHTMLTreeBuilder::tableCellCreateErrorCheck); - mapTagToFunc(gFunctionMap, thTag, &LegacyHTMLTreeBuilder::tableCellCreateErrorCheck); - mapTagToFunc(gFunctionMap, tbodyTag, &LegacyHTMLTreeBuilder::tableSectionCreateErrorCheck); - mapTagToFunc(gFunctionMap, tfootTag, &LegacyHTMLTreeBuilder::tableSectionCreateErrorCheck); - mapTagToFunc(gFunctionMap, theadTag, &LegacyHTMLTreeBuilder::tableSectionCreateErrorCheck); - - gFunctionMap.set(commentAtom.impl(), &LegacyHTMLTreeBuilder::commentCreateErrorCheck); - gFunctionMap.set(textAtom.impl(), &LegacyHTMLTreeBuilder::textCreateErrorCheck); - } - - bool proceed = true; - RefPtr<Node> result; - if (CreateErrorCheckFunc errorCheckFunc = gFunctionMap.get(t->tagName.impl())) - proceed = (this->*errorCheckFunc)(t, result); - if (proceed) - result = HTMLElementFactory::createHTMLElement(QualifiedName(nullAtom, t->tagName, xhtmlNamespaceURI), m_document, m_currentFormElement.get()); - return result.release(); -} - -bool LegacyHTMLTreeBuilder::allowNestedRedundantTag(const AtomicString& tagName) -{ - // www.liceo.edu.mx is an example of a site that achieves a level of nesting of - // about 1500 tags, all from a bunch of <b>s. We will only allow at most 20 - // nested tags of the same type before just ignoring them all together. - unsigned i = 0; - for (HTMLStackElem* curr = m_blockStack; - i < cMaxRedundantTagDepth && curr && curr->tagName == tagName; - curr = curr->next, i++) { } - return i != cMaxRedundantTagDepth; -} - -void LegacyHTMLTreeBuilder::processCloseTag(Token* t) -{ - // Support for really broken html. - // we never close the body tag, since some stupid web pages close it before the actual end of the doc. - // let's rely on the end() call to close things. - if (t->tagName == htmlTag || t->tagName == bodyTag || t->tagName == commentAtom) - return; - - bool checkForCloseTagErrors = true; - if (t->tagName == formTag && m_currentFormElement) { - m_currentFormElement = 0; - checkForCloseTagErrors = false; - } else if (t->tagName == mapTag) - m_currentMapElement = 0; - else if (t->tagName == pTag) - checkForCloseTagErrors = false; - - HTMLStackElem* oldElem = m_blockStack; - popBlock(t->tagName, checkForCloseTagErrors); - if (oldElem == m_blockStack && t->tagName == pTag) { - // We encountered a stray </p>. Amazingly Gecko, WinIE, and MacIE all treat - // this as a valid break, i.e., <p></p>. So go ahead and make the empty - // paragraph. - t->beginTag = true; - parseToken(t); - popBlock(t->tagName); - reportError(StrayParagraphCloseError); - } -} - -bool LegacyHTMLTreeBuilder::isHeadingTag(const AtomicString& tagName) -{ - DEFINE_STATIC_LOCAL(TagNameSet, headingTags, ()); - if (headingTags.isEmpty()) { - QualifiedName tagNames[] = { h1Tag, h2Tag, h3Tag, h4Tag, h5Tag, h6Tag }; - addTags(headingTags, tagNames); - } - return headingTags.contains(tagName.impl()); -} - -bool LegacyHTMLTreeBuilder::isInline(Node* node) const -{ - if (node->isTextNode()) - return true; - - if (node->isHTMLElement()) { - HTMLElement* e = static_cast<HTMLElement*>(node); - if (e->hasLocalName(aTag) || e->hasLocalName(fontTag) || e->hasLocalName(ttTag) || - e->hasLocalName(uTag) || e->hasLocalName(bTag) || e->hasLocalName(iTag) || - e->hasLocalName(sTag) || e->hasLocalName(strikeTag) || e->hasLocalName(bigTag) || - e->hasLocalName(smallTag) || e->hasLocalName(emTag) || e->hasLocalName(strongTag) || - e->hasLocalName(dfnTag) || e->hasLocalName(codeTag) || e->hasLocalName(sampTag) || - e->hasLocalName(kbdTag) || e->hasLocalName(varTag) || e->hasLocalName(citeTag) || - e->hasLocalName(abbrTag) || e->hasLocalName(acronymTag) || e->hasLocalName(subTag) || - e->hasLocalName(supTag) || e->hasLocalName(spanTag) || e->hasLocalName(nobrTag) || - e->hasLocalName(noframesTag) || e->hasLocalName(nolayerTag) || - e->hasLocalName(noembedTag) || e->hasLocalName(markTag)) - return true; -#if !ENABLE(XHTMLMP) - if (e->hasLocalName(noscriptTag) && !m_isParsingFragment) { - Frame* frame = m_document->frame(); - if (frame && frame->script()->canExecuteScripts(NotAboutToExecuteScript)) - return true; - } -#endif - } - - return false; -} - -bool LegacyHTMLTreeBuilder::isResidualStyleTag(const AtomicString& tagName) -{ - DEFINE_STATIC_LOCAL(HashSet<AtomicStringImpl*>, residualStyleTags, ()); - if (residualStyleTags.isEmpty()) { - QualifiedName tagNames[] = { aTag, fontTag, ttTag, uTag, bTag, iTag, - sTag, strikeTag, bigTag, smallTag, emTag, strongTag, dfnTag, - codeTag, sampTag, kbdTag, varTag, nobrTag, markTag }; - addTags(residualStyleTags, tagNames); - } - return residualStyleTags.contains(tagName.impl()); -} - -bool LegacyHTMLTreeBuilder::isAffectedByResidualStyle(const AtomicString& tagName) -{ - DEFINE_STATIC_LOCAL(HashSet<AtomicStringImpl*>, unaffectedTags, ()); - if (unaffectedTags.isEmpty()) { - QualifiedName tagNames[] = { bodyTag, tableTag, theadTag, tbodyTag, - tfootTag, trTag, thTag, tdTag, captionTag, colgroupTag, colTag, - optionTag, optgroupTag, selectTag, objectTag, datagridTag, datalistTag }; - addTags(unaffectedTags, tagNames); - } - return !unaffectedTags.contains(tagName.impl()); -} - -void LegacyHTMLTreeBuilder::handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem* elem) -{ - HTMLStackElem* maxElem = 0; - bool finished = false; - bool strayTableContent = elem->strayTableContent; - - unsigned iterationCount = 0; - - m_handlingResidualStyleAcrossBlocks = true; - while (!finished && (iterationCount++ < cResidualStyleIterationLimit)) { - // Find the outermost element that crosses over to a higher level. If there exists another higher-level - // element, we will do another pass, until we have corrected the innermost one. - ExceptionCode ec = 0; - HTMLStackElem* curr = m_blockStack; - HTMLStackElem* prev = 0; - HTMLStackElem* prevMaxElem = 0; - maxElem = 0; - finished = true; - while (curr && curr != elem) { - if (curr->level > elem->level) { - if (!isAffectedByResidualStyle(curr->tagName)) - return; - if (maxElem) - // We will need another pass. - finished = false; - maxElem = curr; - prevMaxElem = prev; - } - - prev = curr; - curr = curr->next; - } - - if (!curr || !maxElem) - return; - - Node* residualElem = prev->node; - Node* blockElem = prevMaxElem ? prevMaxElem->node : m_current; - Node* parentElem = elem->node; - - // Check to see if the reparenting that is going to occur is allowed according to the DOM. - // FIXME: We should either always allow it or perform an additional fixup instead of - // just bailing here. - // Example: <p><font><center>blah</font></center></p> isn't doing a fixup right now. - if (!parentElem->childAllowed(blockElem)) - return; - - m_hasPElementInScope = Unknown; - - if (maxElem->node->parentNode() != elem->node) { - // Walk the stack and remove any elements that aren't residual style tags. These - // are basically just being closed up. Example: - // <font><span>Moo<p>Goo</font></p>. - // In the above example, the <span> doesn't need to be reopened. It can just close. - HTMLStackElem* currElem = maxElem->next; - HTMLStackElem* prevElem = maxElem; - while (currElem != elem) { - HTMLStackElem* nextElem = currElem->next; - if (!isResidualStyleTag(currElem->tagName)) { - prevElem->next = nextElem; - prevElem->derefNode(); - prevElem->node = currElem->node; - prevElem->didRefNode = currElem->didRefNode; - delete currElem; - m_treeDepth--; - } else - prevElem = currElem; - currElem = nextElem; - } - - // We have to reopen residual tags in between maxElem and elem. An example of this case is: - // <font><i>Moo<p>Foo</font>. - // In this case, we need to transform the part before the <p> into: - // <font><i>Moo</i></font><i> - // so that the <i> will remain open. This involves the modification of elements - // in the block stack. - // This will also affect how we ultimately reparent the block, since we want it to end up - // under the reopened residual tags (e.g., the <i> in the above example.) - RefPtr<Node> prevNode = 0; - currElem = maxElem; - while (currElem->node != residualElem) { - if (isResidualStyleTag(currElem->node->localName())) { - // Create a clone of this element. - // We call releaseRef to get a raw pointer since we plan to hand over ownership to currElem. - Node* currNode = currElem->node->cloneNode(false).releaseRef(); - reportError(ResidualStyleError, &currNode->localName()); - - // Change the stack element's node to point to the clone. - // The stack element adopts the reference we obtained above by calling release(). - currElem->derefNode(); - currElem->node = currNode; - currElem->didRefNode = true; - - // Attach the previous node as a child of this new node. - if (prevNode) - currNode->appendChild(prevNode, ec); - else // The new parent for the block element is going to be the innermost clone. - parentElem = currNode; // FIXME: We shifted parentElem to be a residual inline. We never checked to see if blockElem could be legally placed inside the inline though. - - prevNode = currNode; - } - - currElem = currElem->next; - } - - // Now append the chain of new residual style elements if one exists. - if (prevNode) - elem->node->appendChild(prevNode, ec); // FIXME: This append can result in weird stuff happening, like an inline chain being put into a table section. - } - - // Check if the block is still in the tree. If it isn't, then we don't - // want to remove it from its parent (that would crash) or insert it into - // a new parent later. See http://bugs.webkit.org/show_bug.cgi?id=6778 - bool isBlockStillInTree = blockElem->parentNode(); - - // We need to make a clone of |residualElem| and place it just inside |blockElem|. - // All content of |blockElem| is reparented to be under this clone. We then - // reparent |blockElem| using real DOM calls so that attachment/detachment will - // be performed to fix up the rendering tree. - // So for this example: <b>...<p>Foo</b>Goo</p> - // The end result will be: <b>...</b><p><b>Foo</b>Goo</p> - // - // Step 1: Remove |blockElem| from its parent, doing a batch detach of all the kids. - if (isBlockStillInTree) - blockElem->parentNode()->removeChild(blockElem, ec); - - Node* newNodePtr = 0; - if (blockElem->firstChild()) { - // Step 2: Clone |residualElem|. - RefPtr<Node> newNode = residualElem->cloneNode(false); // Shallow clone. We don't pick up the same kids. - newNodePtr = newNode.get(); - reportError(ResidualStyleError, &newNode->localName()); - - // Step 3: Place |blockElem|'s children under |newNode|. Remove all of the children of |blockElem| - // before we've put |newElem| into the document. That way we'll only do one attachment of all - // the new content (instead of a bunch of individual attachments). - Node* currNode = blockElem->firstChild(); - while (currNode) { - Node* nextNode = currNode->nextSibling(); - newNode->appendChild(currNode, ec); - currNode = nextNode; - } - - // Step 4: Place |newNode| under |blockElem|. |blockElem| is still out of the document, so no - // attachment can occur yet. - blockElem->appendChild(newNode.release(), ec); - } else - finished = true; - - // Step 5: Reparent |blockElem|. Now the full attachment of the fixed up tree takes place. - if (isBlockStillInTree) - parentElem->appendChild(blockElem, ec); - - // Step 6: Pull |elem| out of the stack, since it is no longer enclosing us. Also update - // the node associated with the previous stack element so that when it gets popped, - // it doesn't make the residual element the next current node. - HTMLStackElem* currElem = maxElem; - HTMLStackElem* prevElem = 0; - while (currElem != elem) { - prevElem = currElem; - currElem = currElem->next; - } - prevElem->next = elem->next; - prevElem->derefNode(); - prevElem->node = elem->node; - prevElem->didRefNode = elem->didRefNode; - m_treeDepth--; - if (!finished) { - // Repurpose |elem| to represent |newNode| and insert it at the appropriate position - // in the stack. We do not do this for the innermost block, because in that case the new - // node is effectively no longer open. - elem->next = maxElem; - elem->node = prevMaxElem->node; - elem->didRefNode = prevMaxElem->didRefNode; - elem->strayTableContent = false; - prevMaxElem->next = elem; - ASSERT(newNodePtr); - prevMaxElem->node = newNodePtr; - newNodePtr->ref(); - prevMaxElem->didRefNode = true; - m_treeDepth++; - } else - delete elem; - } - - // FIXME: If we ever make a case like this work: - // <table><b><i><form></b></form></i></table> - // Then this check will be too simplistic. Right now the <i><form> chain will end up inside the <tbody>, which is pretty crazy. - if (strayTableContent) - m_inStrayTableContent--; - - // Step 7: Reopen intermediate inlines, e.g., <b><p><i>Foo</b>Goo</p>. - // In the above example, Goo should stay italic. - // We cap the number of tags we're willing to reopen based off cResidualStyleMaxDepth. - - HTMLStackElem* curr = m_blockStack; - HTMLStackElem* residualStyleStack = 0; - unsigned stackDepth = 1; - unsigned redundantStyleCount = 0; - while (curr && curr != maxElem) { - // We will actually schedule this tag for reopening - // after we complete the close of this entire block. - if (isResidualStyleTag(curr->tagName) && stackDepth++ < cResidualStyleMaxDepth) { - // We've overloaded the use of stack elements and are just reusing the - // struct with a slightly different meaning to the variables. Instead of chaining - // from innermost to outermost, we build up a list of all the tags we need to reopen - // from the outermost to the innermost, i.e., residualStyleStack will end up pointing - // to the outermost tag we need to reopen. - // We also set curr->node to be the actual element that corresponds to the ID stored in - // curr->id rather than the node that you should pop to when the element gets pulled off - // the stack. - if (residualStyleStack && curr->tagName == residualStyleStack->tagName && curr->node->attributes()->mapsEquivalent(residualStyleStack->node->attributes())) - redundantStyleCount++; - else - redundantStyleCount = 0; - - if (redundantStyleCount < cMaxRedundantTagDepth) - moveOneBlockToStack(residualStyleStack); - else - popOneBlock(); - } else - popOneBlock(); - - curr = m_blockStack; - } - - reopenResidualStyleTags(residualStyleStack, 0); // Stray table content can't be an issue here, since some element above will always become the root of new stray table content. - - m_handlingResidualStyleAcrossBlocks = false; -} - -void LegacyHTMLTreeBuilder::reopenResidualStyleTags(HTMLStackElem* elem, Node* malformedTableParent) -{ - // Loop for each tag that needs to be reopened. - while (elem) { - // Create a shallow clone of the DOM node for this element. - RefPtr<Node> newNode = elem->node->cloneNode(false); - reportError(ResidualStyleError, &newNode->localName()); - - // Append the new node. In the malformed table case, we need to insert before the table, - // which will be the last child. - ExceptionCode ec = 0; - if (malformedTableParent) - malformedTableParent->insertBefore(newNode, malformedTableParent->lastChild(), ec); - else - m_current->appendChild(newNode, ec); - // FIXME: Is it really OK to ignore the exceptions here? - - // Now push a new stack element for this node we just created. - pushBlock(elem->tagName, elem->level); - newNode->beginParsingChildren(); - - // Set our strayTableContent boolean if needed, so that the reopened tag also knows - // that it is inside a malformed table. - m_blockStack->strayTableContent = malformedTableParent != 0; - if (m_blockStack->strayTableContent) - m_inStrayTableContent++; - - // Clear our malformed table parent variable. - malformedTableParent = 0; - - // Update |current| manually to point to the new node. - setCurrent(newNode.get()); - - // Advance to the next tag that needs to be reopened. - HTMLStackElem* next = elem->next; - elem->derefNode(); - delete elem; - elem = next; - } -} - -void LegacyHTMLTreeBuilder::pushBlock(const AtomicString& tagName, int level) -{ - m_blockStack = new HTMLStackElem(tagName, level, m_current, m_didRefCurrent, m_blockStack); - if (level >= minBlockLevelTagPriority) - m_blocksInStack++; - m_treeDepth++; - m_didRefCurrent = false; - if (tagName == pTag) - m_hasPElementInScope = InScope; - else if (isScopingTag(tagName)) - m_hasPElementInScope = NotInScope; -} - -void LegacyHTMLTreeBuilder::popBlock(const AtomicString& tagName, bool reportErrors) -{ - HTMLStackElem* elem = m_blockStack; - - if (m_parserQuirks && elem && !m_parserQuirks->shouldPopBlock(elem->tagName, tagName)) - return; - - int maxLevel = 0; - - while (elem && (elem->tagName != tagName)) { - if (maxLevel < elem->level) - maxLevel = elem->level; - elem = elem->next; - } - - if (!elem) { - if (reportErrors) - reportError(StrayCloseTagError, &tagName, 0, true); - return; - } - - if (maxLevel > elem->level) { - // We didn't match because the tag is in a different scope, e.g., - // <b><p>Foo</b>. Try to correct the problem. - if (!isResidualStyleTag(tagName)) - return; - return handleResidualStyleCloseTagAcrossBlocks(elem); - } - - bool isAffectedByStyle = isAffectedByResidualStyle(elem->tagName); - HTMLStackElem* residualStyleStack = 0; - Node* malformedTableParent = 0; - - elem = m_blockStack; - unsigned stackDepth = 1; - unsigned redundantStyleCount = 0; - while (elem) { - if (elem->tagName == tagName) { - int strayTable = m_inStrayTableContent; - popOneBlock(); - elem = 0; - - // This element was the root of some malformed content just inside an implicit or - // explicit <tbody> or <tr>. - // If we end up needing to reopen residual style tags, the root of the reopened chain - // must also know that it is the root of malformed content inside a <tbody>/<tr>. - if (strayTable && (m_inStrayTableContent < strayTable) && residualStyleStack) { - Node* curr = m_current; - while (curr && !curr->hasTagName(tableTag)) - curr = curr->parentNode(); - malformedTableParent = curr ? curr->parentNode() : 0; - } - } - else { - if (m_currentFormElement && elem->tagName == formTag) - // A <form> is being closed prematurely (and this is - // malformed HTML). Set an attribute on the form to clear out its - // bottom margin. - m_currentFormElement->setMalformed(true); - - // Schedule this tag for reopening - // after we complete the close of this entire block. - if (isAffectedByStyle && isResidualStyleTag(elem->tagName) && stackDepth++ < cResidualStyleMaxDepth) { - // We've overloaded the use of stack elements and are just reusing the - // struct with a slightly different meaning to the variables. Instead of chaining - // from innermost to outermost, we build up a list of all the tags we need to reopen - // from the outermost to the innermost, i.e., residualStyleStack will end up pointing - // to the outermost tag we need to reopen. - // We also set elem->node to be the actual element that corresponds to the ID stored in - // elem->id rather than the node that you should pop to when the element gets pulled off - // the stack. - if (residualStyleStack && elem->tagName == residualStyleStack->tagName && elem->node->attributes()->mapsEquivalent(residualStyleStack->node->attributes())) - redundantStyleCount++; - else - redundantStyleCount = 0; - - if (redundantStyleCount < cMaxRedundantTagDepth) - moveOneBlockToStack(residualStyleStack); - else - popOneBlock(); - } else - popOneBlock(); - elem = m_blockStack; - } - } - - reopenResidualStyleTags(residualStyleStack, malformedTableParent); -} - -inline HTMLStackElem* LegacyHTMLTreeBuilder::popOneBlockCommon() -{ - HTMLStackElem* elem = m_blockStack; - - // Form elements restore their state during the parsing process. - // Also, a few elements (<applet>, <object>) need to know when all child elements (<param>s) are available. - if (m_current && elem->node != m_current) - m_current->finishParsingChildren(); - - if (m_blockStack->level >= minBlockLevelTagPriority) { - ASSERT(m_blocksInStack > 0); - m_blocksInStack--; - } - m_treeDepth--; - m_blockStack = elem->next; - m_current = elem->node; - m_didRefCurrent = elem->didRefNode; - - if (elem->strayTableContent) - m_inStrayTableContent--; - - if (elem->tagName == pTag) - m_hasPElementInScope = NotInScope; - else if (isScopingTag(elem->tagName)) - m_hasPElementInScope = Unknown; - - return elem; -} - -void LegacyHTMLTreeBuilder::popOneBlock() -{ - // Store the current node before popOneBlockCommon overwrites it. - Node* lastCurrent = m_current; - bool didRefLastCurrent = m_didRefCurrent; - - delete popOneBlockCommon(); - - if (didRefLastCurrent) - lastCurrent->deref(); -} - -void LegacyHTMLTreeBuilder::moveOneBlockToStack(HTMLStackElem*& head) -{ - // We'll be using the stack element we're popping, but for the current node. - // See the two callers for details. - - // Store the current node before popOneBlockCommon overwrites it. - Node* lastCurrent = m_current; - bool didRefLastCurrent = m_didRefCurrent; - - // Pop the block, but don't deref the current node as popOneBlock does because - // we'll be using the pointer in the new stack element. - HTMLStackElem* elem = popOneBlockCommon(); - - // Transfer the current node into the stack element. - // No need to deref the old elem->node because popOneBlockCommon transferred - // it into the m_current/m_didRefCurrent fields. - elem->node = lastCurrent; - elem->didRefNode = didRefLastCurrent; - elem->next = head; - head = elem; -} - -void LegacyHTMLTreeBuilder::checkIfHasPElementInScope() -{ - m_hasPElementInScope = NotInScope; - HTMLStackElem* elem = m_blockStack; - while (elem) { - const AtomicString& tagName = elem->tagName; - if (tagName == pTag) { - m_hasPElementInScope = InScope; - return; - } else if (isScopingTag(tagName)) - return; - elem = elem->next; - } -} - -void LegacyHTMLTreeBuilder::popInlineBlocks() -{ - while (m_blockStack && isInline(m_current)) - popOneBlock(); -} - -void LegacyHTMLTreeBuilder::freeBlock() -{ - while (m_blockStack) - popOneBlock(); - ASSERT(!m_blocksInStack); - ASSERT(!m_treeDepth); -} - -void LegacyHTMLTreeBuilder::createHead() -{ - if (m_head) - return; - - if (!m_document->documentElement() && !m_isParsingFragment) { - insertNode(HTMLHtmlElement::create(m_document).get()); - ASSERT(m_document->documentElement() || m_isParsingFragment); - } - - m_head = HTMLHeadElement::create(m_document); - - if (m_isParsingFragment) - return; - - HTMLElement* body = m_document->body(); - ExceptionCode ec = 0; - m_document->documentElement()->insertBefore(m_head.get(), body, ec); - if (ec) - m_head = 0; - - // If the body does not exist yet, then the <head> should be pushed as the current block. - if (m_head && !body) { - pushBlock(m_head->localName(), m_head->tagPriority()); - setCurrent(m_head.get()); - } -} - -PassRefPtr<Node> LegacyHTMLTreeBuilder::handleIsindex(Token* t) -{ - RefPtr<Node> n = HTMLDivElement::create(m_document); - - NamedNodeMap* attrs = t->attrs.get(); - - RefPtr<HTMLIsIndexElement> isIndex = HTMLIsIndexElement::create(m_document, m_currentFormElement.get()); - isIndex->setAttributeMap(attrs); - isIndex->setAttribute(typeAttr, "khtml_isindex"); - - String text = searchableIndexIntroduction(); - if (attrs) { - if (Attribute* a = attrs->getAttributeItem(promptAttr)) - text = a->value().string() + " "; - t->attrs = 0; - } - - n->legacyParserAddChild(HTMLHRElement::create(m_document)); - n->legacyParserAddChild(Text::create(m_document, text)); - n->legacyParserAddChild(isIndex.release()); - n->legacyParserAddChild(HTMLHRElement::create(m_document)); - - return n.release(); -} - -void LegacyHTMLTreeBuilder::startBody() -{ - if (m_inBody) - return; - - m_inBody = true; - - if (m_isindexElement) { - insertNode(m_isindexElement.get(), true /* don't descend into this node */); - m_isindexElement = 0; - } -} - -void LegacyHTMLTreeBuilder::finished() -{ - // In the case of a completely empty document, here's the place to create the HTML element. - if (m_current && m_current->isDocumentNode() && !m_document->documentElement()) - insertNode(HTMLHtmlElement::create(m_document).get()); - - // This ensures that "current" is not left pointing to a node when the document is destroyed. - freeBlock(); - setCurrent(0); - - // Warning, this may delete the parser, so don't try to do anything else after this. - if (!m_isParsingFragment) - m_document->finishedParsing(); -} - -void LegacyHTMLTreeBuilder::reportErrorToConsole(HTMLParserErrorCode errorCode, const AtomicString* tagName1, const AtomicString* tagName2, bool closeTags) -{ - Frame* frame = m_document->frame(); - if (!frame) - return; - - ScriptableDocumentParser* parser = m_document->scriptableDocumentParser(); - int lineNumber = parser->lineNumber() + 1; - - AtomicString tag1; - AtomicString tag2; - if (tagName1) { - if (*tagName1 == "#text") - tag1 = "Text"; - else if (*tagName1 == "#comment") - tag1 = "<!-- comment -->"; - else - tag1 = (closeTags ? "</" : "<") + *tagName1 + ">"; - } - if (tagName2) { - if (*tagName2 == "#text") - tag2 = "Text"; - else if (*tagName2 == "#comment") - tag2 = "<!-- comment -->"; - else - tag2 = (closeTags ? "</" : "<") + *tagName2 + ">"; - } - - const char* errorMsg = htmlParserErrorMessageTemplate(errorCode); - if (!errorMsg) - return; - - String message; - if (parser->processingContentWrittenByScript()) - message += htmlParserDocumentWriteMessage(); - message += errorMsg; - message.replace("%tag1", tag1); - message.replace("%tag2", tag2); - - frame->domWindow()->console()->addMessage(HTMLMessageSource, LogMessageType, - isWarning(errorCode) ? WarningMessageLevel : ErrorMessageLevel, - message, lineNumber, m_document->url().string()); -} - -#ifdef BUILDING_ON_LEOPARD -bool shouldCreateImplicitHead(Document* document) -{ - ASSERT(document); - - Settings* settings = document->page() ? document->page()->settings() : 0; - return settings ? !settings->needsLeopardMailQuirks() : true; -} -#elif defined(BUILDING_ON_TIGER) -bool shouldCreateImplicitHead(Document* document) -{ - ASSERT(document); - - Settings* settings = document->page() ? document->page()->settings() : 0; - return settings ? !settings->needsTigerMailQuirks() : true; -} -#endif - - -String serializeForNumberType(double number) -{ - // According to HTML5, "the best representation of the number n as a floating - // point number" is a string produced by applying ToString() to n. - DtoaBuffer buffer; - unsigned length; - doubleToStringInJavaScriptFormat(number, buffer, &length); - return String(buffer, length); -} - -bool parseToDoubleForNumberType(const String& src, double* out) -{ - // See HTML5 2.4.4.3 `Real numbers.' - - if (src.isEmpty()) - return false; - // String::toDouble() accepts leading + \t \n \v \f \r and SPACE, which are invalid in HTML5. - // So, check the first character. - if (src[0] != '-' && (src[0] < '0' || src[0] > '9')) - return false; - - bool valid = false; - double value = src.toDouble(&valid); - if (!valid) - return false; - // NaN and Infinity are not valid numbers according to the standard. - if (!isfinite(value)) - return false; - // -0 -> 0 - if (!value) - value = 0; - if (out) - *out = value; - return true; -} - -} |