diff options
Diffstat (limited to 'Source/WebCore/html/parser')
-rw-r--r-- | Source/WebCore/html/parser/HTMLConstructionSite.cpp | 65 | ||||
-rw-r--r-- | Source/WebCore/html/parser/HTMLConstructionSite.h | 2 | ||||
-rw-r--r-- | Source/WebCore/html/parser/HTMLDocumentParser.cpp | 126 | ||||
-rw-r--r-- | Source/WebCore/html/parser/HTMLDocumentParser.h | 10 | ||||
-rw-r--r-- | Source/WebCore/html/parser/HTMLElementStack.cpp | 184 | ||||
-rw-r--r-- | Source/WebCore/html/parser/HTMLElementStack.h | 29 | ||||
-rw-r--r-- | Source/WebCore/html/parser/HTMLParserScheduler.cpp | 15 | ||||
-rw-r--r-- | Source/WebCore/html/parser/HTMLParserScheduler.h | 39 | ||||
-rw-r--r-- | Source/WebCore/html/parser/HTMLScriptRunner.cpp | 49 | ||||
-rw-r--r-- | Source/WebCore/html/parser/HTMLTreeBuilder.cpp | 133 | ||||
-rw-r--r-- | Source/WebCore/html/parser/HTMLTreeBuilder.h | 5 | ||||
-rw-r--r-- | Source/WebCore/html/parser/NestingLevelIncrementer.h | 2 | ||||
-rw-r--r-- | Source/WebCore/html/parser/XSSFilter.cpp | 22 | ||||
-rw-r--r-- | Source/WebCore/html/parser/XSSFilter.h | 2 | ||||
-rwxr-xr-x | Source/WebCore/html/parser/create-html-entity-table | 47 |
15 files changed, 413 insertions, 317 deletions
diff --git a/Source/WebCore/html/parser/HTMLConstructionSite.cpp b/Source/WebCore/html/parser/HTMLConstructionSite.cpp index a026ef9..2be6039 100644 --- a/Source/WebCore/html/parser/HTMLConstructionSite.cpp +++ b/Source/WebCore/html/parser/HTMLConstructionSite.cpp @@ -1,5 +1,6 @@ /* * Copyright (C) 2010 Google, Inc. All Rights Reserved. + * Copyright (C) 2011 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -58,16 +59,16 @@ using namespace HTMLNames; namespace { -bool hasImpliedEndTag(Element* element) +bool hasImpliedEndTag(ContainerNode* node) { - return element->hasTagName(ddTag) - || element->hasTagName(dtTag) - || element->hasTagName(liTag) - || element->hasTagName(optionTag) - || element->hasTagName(optgroupTag) - || element->hasTagName(pTag) - || element->hasTagName(rpTag) - || element->hasTagName(rtTag); + return node->hasTagName(ddTag) + || node->hasTagName(dtTag) + || node->hasTagName(liTag) + || node->hasTagName(optionTag) + || node->hasTagName(optgroupTag) + || node->hasTagName(pTag) + || node->hasTagName(rpTag) + || node->hasTagName(rtTag); } bool causesFosterParenting(const QualifiedName& tagName) @@ -204,6 +205,12 @@ void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken& void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken& token) { // FIXME: parse error + + // Fragments do not have a root HTML element, so any additional HTML elements + // encountered during fragment parsing should be ignored. + if (m_isParsingFragment) + return; + mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement()); } @@ -236,7 +243,7 @@ void HTMLConstructionSite::insertDoctype(AtomicHTMLToken& token) void HTMLConstructionSite::insertComment(AtomicHTMLToken& token) { ASSERT(token.type() == HTMLToken::Comment); - attach(currentElement(), Comment::create(currentElement()->document(), token.comment())); + attach(currentNode(), Comment::create(currentNode()->document(), token.comment())); } void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken& token) @@ -248,13 +255,13 @@ void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken& token) void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken& token) { ASSERT(token.type() == HTMLToken::Comment); - Element* parent = m_openElements.htmlElement(); + ContainerNode* parent = m_openElements.rootNode(); attach(parent, Comment::create(parent->document(), token.comment())); } PassRefPtr<Element> HTMLConstructionSite::attachToCurrent(PassRefPtr<Element> child) { - return attach(currentElement(), child); + return attach(currentNode(), child); } void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken& token) @@ -310,7 +317,7 @@ void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken& token) void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken& token) { - RefPtr<HTMLScriptElement> element = HTMLScriptElement::create(scriptTag, currentElement()->document(), true); + RefPtr<HTMLScriptElement> element = HTMLScriptElement::create(scriptTag, currentNode()->document(), true); if (m_fragmentScriptingPermission == FragmentScriptingAllowed) element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission); m_openElements.push(attachToCurrent(element.release())); @@ -329,27 +336,40 @@ void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken& token, const At void HTMLConstructionSite::insertTextNode(const String& characters) { AttachmentSite site; - site.parent = currentElement(); + site.parent = currentNode(); site.nextChild = 0; if (shouldFosterParent()) findFosterSite(site); + unsigned currentPosition = 0; + + // FIXME: Splitting text nodes into smaller chunks contradicts HTML5 spec, but is currently necessary + // for performance, see <https://bugs.webkit.org/show_bug.cgi?id=55898>. + Node* previousChild = site.nextChild ? site.nextChild->previousSibling() : site.parent->lastChild(); if (previousChild && previousChild->isTextNode()) { // FIXME: We're only supposed to append to this text node if it // was the last text node inserted by the parser. CharacterData* textNode = static_cast<CharacterData*>(previousChild); - textNode->parserAppendData(characters); - return; + currentPosition = textNode->parserAppendData(characters.characters(), characters.length(), Text::defaultLengthLimit); } - attachAtSite(site, Text::create(site.parent->document(), characters)); + while (currentPosition < characters.length()) { + RefPtr<Text> textNode = Text::createWithLengthLimit(site.parent->document(), characters, currentPosition); + // If we have a whole string of unbreakable characters the above could lead to an infinite loop. Exceeding the length limit is the lesser evil. + if (!textNode->length()) + textNode = Text::create(site.parent->document(), characters.substring(currentPosition)); + + currentPosition += textNode->length(); + ASSERT(currentPosition <= characters.length()); + attachAtSite(site, textNode.release()); + } } PassRefPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken& token, const AtomicString& namespaceURI) { QualifiedName tagName(nullAtom, token.name(), namespaceURI); - RefPtr<Element> element = currentElement()->document()->createElement(tagName, true); + RefPtr<Element> element = currentNode()->document()->createElement(tagName, true); element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission); return element.release(); } @@ -360,7 +380,7 @@ PassRefPtr<Element> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken& tok // FIXME: This can't use HTMLConstructionSite::createElement because we // have to pass the current form element. We should rework form association // to occur after construction to allow better code sharing here. - RefPtr<Element> element = HTMLElementFactory::createHTMLElement(tagName, currentElement()->document(), form(), true); + RefPtr<Element> element = HTMLElementFactory::createHTMLElement(tagName, currentNode()->document(), form(), true); element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission); ASSERT(element->isHTMLElement()); return element.release(); @@ -439,13 +459,13 @@ void HTMLConstructionSite::reconstructTheActiveFormattingElements() void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName) { - while (hasImpliedEndTag(currentElement()) && !currentElement()->hasLocalName(tagName)) + while (hasImpliedEndTag(currentNode()) && !currentNode()->hasLocalName(tagName)) m_openElements.pop(); } void HTMLConstructionSite::generateImpliedEndTags() { - while (hasImpliedEndTag(currentElement())) + while (hasImpliedEndTag(currentNode())) m_openElements.pop(); } @@ -464,13 +484,14 @@ void HTMLConstructionSite::findFosterSite(AttachmentSite& site) return; } // Fragment case - site.parent = m_openElements.bottom(); // <html> element + site.parent = m_openElements.rootNode(); // DocumentFragment site.nextChild = 0; } bool HTMLConstructionSite::shouldFosterParent() const { return m_redirectAttachToFosterParent + && currentNode()->isElementNode() && causesFosterParenting(currentElement()->tagQName()); } diff --git a/Source/WebCore/html/parser/HTMLConstructionSite.h b/Source/WebCore/html/parser/HTMLConstructionSite.h index 0298503..380e487 100644 --- a/Source/WebCore/html/parser/HTMLConstructionSite.h +++ b/Source/WebCore/html/parser/HTMLConstructionSite.h @@ -1,5 +1,6 @@ /* * Copyright (C) 2010 Google, Inc. All Rights Reserved. + * Copyright (C) 2011 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -80,6 +81,7 @@ public: void generateImpliedEndTagsWithExclusion(const AtomicString& tagName); Element* currentElement() const { return m_openElements.top(); } + ContainerNode* currentNode() const { return m_openElements.topNode(); } Element* oneBelowTop() const { return m_openElements.oneBelowTop(); } HTMLElementStack* openElements() const { return &m_openElements; } diff --git a/Source/WebCore/html/parser/HTMLDocumentParser.cpp b/Source/WebCore/html/parser/HTMLDocumentParser.cpp index 2fe9486..843df45 100644 --- a/Source/WebCore/html/parser/HTMLDocumentParser.cpp +++ b/Source/WebCore/html/parser/HTMLDocumentParser.cpp @@ -86,7 +86,7 @@ HTMLDocumentParser::HTMLDocumentParser(HTMLDocument* document, bool reportErrors , m_parserScheduler(HTMLParserScheduler::create(this)) , m_xssFilter(this) , m_endWasDelayed(false) - , m_writeNestingLevel(0) + , m_pumpSessionNestingLevel(0) { } @@ -98,7 +98,7 @@ HTMLDocumentParser::HTMLDocumentParser(DocumentFragment* fragment, Element* cont , m_treeBuilder(HTMLTreeBuilder::create(this, fragment, contextElement, scriptingPermission, usePreHTML5ParserQuirks(fragment->document()))) , m_xssFilter(this) , m_endWasDelayed(false) - , m_writeNestingLevel(0) + , m_pumpSessionNestingLevel(0) { bool reportErrors = false; // For now document fragment parsing never reports errors. m_tokenizer->setState(tokenizerStateForContextElement(contextElement, reportErrors)); @@ -107,7 +107,7 @@ HTMLDocumentParser::HTMLDocumentParser(DocumentFragment* fragment, Element* cont HTMLDocumentParser::~HTMLDocumentParser() { ASSERT(!m_parserScheduler); - ASSERT(!m_writeNestingLevel); + ASSERT(!m_pumpSessionNestingLevel); ASSERT(!m_preloadScanner); } @@ -155,9 +155,14 @@ void HTMLDocumentParser::prepareToStopParsing() attemptToRunDeferredScriptsAndEnd(); } +bool HTMLDocumentParser::isParsingFragment() const +{ + return m_treeBuilder->isParsingFragment(); +} + bool HTMLDocumentParser::processingData() const { - return isScheduledForResume() || inWrite(); + return isScheduledForResume() || inPumpSession(); } void HTMLDocumentParser::pumpTokenizerIfPossible(SynchronousMode mode) @@ -204,6 +209,36 @@ bool HTMLDocumentParser::runScriptsForPausedTreeBuilder() return m_scriptRunner->execute(scriptElement.release(), scriptStartPosition); } +bool HTMLDocumentParser::canTakeNextToken(SynchronousMode mode, PumpSession& session) +{ + if (isStopped()) + return false; + + // The parser will pause itself when waiting on a script to load or run. + if (m_treeBuilder->isPaused()) { + // If we're paused waiting for a script, we try to execute scripts before continuing. + bool shouldContinueParsing = runScriptsForPausedTreeBuilder(); + m_treeBuilder->setPaused(!shouldContinueParsing); + if (!shouldContinueParsing || isStopped()) + return false; + } + + // FIXME: It's wrong for the HTMLDocumentParser to reach back to the + // Frame, but this approach is how the old parser handled + // stopping when the page assigns window.location. What really + // should happen is that assigning window.location causes the + // parser to stop parsing cleanly. The problem is we're not + // perpared to do that at every point where we run JavaScript. + if (!isParsingFragment() + && document()->frame() && document()->frame()->navigationScheduler()->locationChangePending()) + return false; + + if (mode == AllowYield) + m_parserScheduler->checkForYieldBeforeToken(session); + + return true; +} + void HTMLDocumentParser::pumpTokenizer(SynchronousMode mode) { ASSERT(!isStopped()); @@ -212,6 +247,8 @@ void HTMLDocumentParser::pumpTokenizer(SynchronousMode mode) // ASSERT that this object is both attached to the Document and protected. ASSERT(refCount() >= 2); + PumpSession session(m_pumpSessionNestingLevel); + // We tell the InspectorInstrumentation about every pump, even if we // end up pumping nothing. It can filter out empty pumps itself. // FIXME: m_input.current().length() is only accurate if we @@ -219,53 +256,35 @@ void HTMLDocumentParser::pumpTokenizer(SynchronousMode mode) // much we parsed as part of didWriteHTML instead of willWriteHTML. InspectorInstrumentationCookie cookie = InspectorInstrumentation::willWriteHTML(document(), m_input.current().length(), m_tokenizer->lineNumber()); - HTMLParserScheduler::PumpSession session; - // FIXME: This loop body has is now too long and needs cleanup. - while (mode == ForceSynchronous || m_parserScheduler->shouldContinueParsing(session)) { - // FIXME: It's wrong for the HTMLDocumentParser to reach back to the - // Frame, but this approach is how the old parser handled - // stopping when the page assigns window.location. What really - // should happen is that assigning window.location causes the - // parser to stop parsing cleanly. The problem is we're not - // perpared to do that at every point where we run JavaScript. - if (!m_treeBuilder->isParsingFragment() - && document()->frame() && document()->frame()->navigationScheduler()->locationChangePending()) - break; + while (canTakeNextToken(mode, session) && !session.needsYield) { + if (!isParsingFragment()) + m_sourceTracker.start(m_input, m_token); - m_sourceTracker.start(m_input, m_token); if (!m_tokenizer->nextToken(m_input.current(), m_token)) break; - m_sourceTracker.end(m_input, m_token); - m_xssFilter.filterToken(m_token); + if (!isParsingFragment()) { + m_sourceTracker.end(m_input, m_token); + + // We do not XSS filter innerHTML, which means we (intentionally) fail + // http/tests/security/xssAuditor/dom-write-innerHTML.html + m_xssFilter.filterToken(m_token); + } m_treeBuilder->constructTreeFromToken(m_token); m_token.clear(); - - // JavaScript may have stopped or detached the parser. - if (isStopped()) - return; - - // The parser will pause itself when waiting on a script to load or run. - if (!m_treeBuilder->isPaused()) - continue; - - // If we're paused waiting for a script, we try to execute scripts before continuing. - bool shouldContinueParsing = runScriptsForPausedTreeBuilder(); - m_treeBuilder->setPaused(!shouldContinueParsing); - - // JavaScript may have stopped or detached the parser. - if (isStopped()) - return; - - if (!shouldContinueParsing) - break; } // Ensure we haven't been totally deref'ed after pumping. Any caller of this // function should be holding a RefPtr to this to ensure we weren't deleted. ASSERT(refCount() >= 1); + if (isStopped()) + return; + + if (session.needsYield) + m_parserScheduler->scheduleForResume(); + if (isWaitingForScripts()) { ASSERT(m_tokenizer->state() == HTMLTokenizer::DataState); if (!m_preloadScanner) { @@ -301,14 +320,10 @@ void HTMLDocumentParser::insert(const SegmentedString& source) // but we need to ensure it isn't deleted yet. RefPtr<HTMLDocumentParser> protect(this); - { - NestingLevelIncrementer nestingLevelIncrementer(m_writeNestingLevel); - - SegmentedString excludedLineNumberSource(source); - excludedLineNumberSource.setExcludeLineNumbers(); - m_input.insertAtCurrentInsertionPoint(excludedLineNumberSource); - pumpTokenizerIfPossible(ForceSynchronous); - } + SegmentedString excludedLineNumberSource(source); + excludedLineNumberSource.setExcludeLineNumbers(); + m_input.insertAtCurrentInsertionPoint(excludedLineNumberSource); + pumpTokenizerIfPossible(ForceSynchronous); endIfDelayed(); } @@ -322,13 +337,11 @@ void HTMLDocumentParser::append(const SegmentedString& source) // but we need to ensure it isn't deleted yet. RefPtr<HTMLDocumentParser> protect(this); - { - NestingLevelIncrementer nestingLevelIncrementer(m_writeNestingLevel); - - m_input.appendToEnd(source); - if (m_preloadScanner) - m_preloadScanner->appendToEnd(source); + m_input.appendToEnd(source); + if (m_preloadScanner) + m_preloadScanner->appendToEnd(source); +<<<<<<< HEAD if (m_writeNestingLevel > 1) { // We've gotten data off the network in a nested write. // We don't want to consume any more of the input stream now. Do @@ -340,8 +353,17 @@ void HTMLDocumentParser::append(const SegmentedString& source) } pumpTokenizerIfPossible(AllowYield); +======= + if (inPumpSession()) { + // We've gotten data off the network in a nested write. + // We don't want to consume any more of the input stream now. Do + // not worry. We'll consume this data in a less-nested write(). + return; +>>>>>>> WebKit at r80534 } + pumpTokenizerIfPossible(AllowYield); + endIfDelayed(); #ifdef ANDROID_INSTRUMENT android::TimeCounter::record(android::TimeCounter::ParsingTimeCounter, __FUNCTION__); diff --git a/Source/WebCore/html/parser/HTMLDocumentParser.h b/Source/WebCore/html/parser/HTMLDocumentParser.h index be2ca1b..4bc33e4 100644 --- a/Source/WebCore/html/parser/HTMLDocumentParser.h +++ b/Source/WebCore/html/parser/HTMLDocumentParser.h @@ -51,6 +51,8 @@ class HTMLPreloadScanner; class ScriptController; class ScriptSourceCode; +class PumpSession; + class HTMLDocumentParser : public ScriptableDocumentParser, HTMLScriptRunnerHost, CachedResourceClient { WTF_MAKE_FAST_ALLOCATED; public: @@ -116,6 +118,7 @@ private: AllowYield, ForceSynchronous, }; + bool canTakeNextToken(SynchronousMode, PumpSession&); void pumpTokenizer(SynchronousMode); void pumpTokenizerIfPossible(SynchronousMode); @@ -128,10 +131,11 @@ private: void attemptToRunDeferredScriptsAndEnd(); void end(); + bool isParsingFragment() const; bool isScheduledForResume() const; bool inScriptExecution() const; - bool inWrite() const { return m_writeNestingLevel > 0; } - bool shouldDelayEnd() const { return inWrite() || isWaitingForScripts() || inScriptExecution() || isScheduledForResume(); } + bool inPumpSession() const { return m_pumpSessionNestingLevel > 0; } + bool shouldDelayEnd() const { return inPumpSession() || isWaitingForScripts() || inScriptExecution() || isScheduledForResume(); } ScriptController* script() const; @@ -149,7 +153,7 @@ private: XSSFilter m_xssFilter; bool m_endWasDelayed; - unsigned m_writeNestingLevel; + unsigned m_pumpSessionNestingLevel; }; } diff --git a/Source/WebCore/html/parser/HTMLElementStack.cpp b/Source/WebCore/html/parser/HTMLElementStack.cpp index 6aab0f7..6f5f9ed 100644 --- a/Source/WebCore/html/parser/HTMLElementStack.cpp +++ b/Source/WebCore/html/parser/HTMLElementStack.cpp @@ -1,5 +1,6 @@ /* * Copyright (C) 2010 Google, Inc. All Rights Reserved. + * Copyright (C) 2011 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -26,6 +27,7 @@ #include "config.h" #include "HTMLElementStack.h" +#include "DocumentFragment.h" #include "Element.h" #include "HTMLNames.h" #include "MathMLNames.h" @@ -38,62 +40,68 @@ using namespace HTMLNames; namespace { -inline bool isNumberedHeaderElement(Element* element) +inline bool isNumberedHeaderElement(ContainerNode* node) { - return element->hasTagName(h1Tag) - || element->hasTagName(h2Tag) - || element->hasTagName(h3Tag) - || element->hasTagName(h4Tag) - || element->hasTagName(h5Tag) - || element->hasTagName(h6Tag); + return node->hasTagName(h1Tag) + || node->hasTagName(h2Tag) + || node->hasTagName(h3Tag) + || node->hasTagName(h4Tag) + || node->hasTagName(h5Tag) + || node->hasTagName(h6Tag); +} + +inline bool isRootMarker(ContainerNode* node) +{ + return node->nodeType() == Node::DOCUMENT_FRAGMENT_NODE + || node->hasTagName(htmlTag); } -inline bool isScopeMarker(Element* element) +inline bool isScopeMarker(ContainerNode* node) { - return element->hasTagName(appletTag) - || element->hasTagName(captionTag) - || element->hasTagName(htmlTag) - || element->hasTagName(marqueeTag) - || element->hasTagName(objectTag) - || element->hasTagName(tableTag) - || element->hasTagName(tdTag) - || element->hasTagName(thTag) - || element->hasTagName(MathMLNames::miTag) - || element->hasTagName(MathMLNames::moTag) - || element->hasTagName(MathMLNames::mnTag) - || element->hasTagName(MathMLNames::msTag) - || element->hasTagName(MathMLNames::mtextTag) - || element->hasTagName(MathMLNames::annotation_xmlTag) - || element->hasTagName(SVGNames::foreignObjectTag) - || element->hasTagName(SVGNames::descTag) - || element->hasTagName(SVGNames::titleTag); + return node->hasTagName(appletTag) + || node->hasTagName(captionTag) + || node->hasTagName(marqueeTag) + || node->hasTagName(objectTag) + || node->hasTagName(tableTag) + || node->hasTagName(tdTag) + || node->hasTagName(thTag) + || node->hasTagName(MathMLNames::miTag) + || node->hasTagName(MathMLNames::moTag) + || node->hasTagName(MathMLNames::mnTag) + || node->hasTagName(MathMLNames::msTag) + || node->hasTagName(MathMLNames::mtextTag) + || node->hasTagName(MathMLNames::annotation_xmlTag) + || node->hasTagName(SVGNames::foreignObjectTag) + || node->hasTagName(SVGNames::descTag) + || node->hasTagName(SVGNames::titleTag) + || isRootMarker(node); } -inline bool isListItemScopeMarker(Element* element) +inline bool isListItemScopeMarker(ContainerNode* node) { - return isScopeMarker(element) - || element->hasTagName(olTag) - || element->hasTagName(ulTag); + return isScopeMarker(node) + || node->hasTagName(olTag) + || node->hasTagName(ulTag); } -inline bool isTableScopeMarker(Element* element) +inline bool isTableScopeMarker(ContainerNode* node) { - return element->hasTagName(tableTag) - || element->hasTagName(htmlTag); + return node->hasTagName(tableTag) + || isRootMarker(node); } -inline bool isTableBodyScopeMarker(Element* element) +inline bool isTableBodyScopeMarker(ContainerNode* node) { - return element->hasTagName(tbodyTag) - || element->hasTagName(tfootTag) - || element->hasTagName(theadTag) - || element->hasTagName(htmlTag); + return node->hasTagName(tbodyTag) + || node->hasTagName(tfootTag) + || node->hasTagName(theadTag) + || isRootMarker(node); } -inline bool isTableRowScopeMarker(Element* element) +inline bool isTableRowScopeMarker(ContainerNode* node) { - return element->hasTagName(trTag) - || element->hasTagName(htmlTag); + return node->hasTagName(trTag) + || isRootMarker(node); } inline bool isForeignContentScopeMarker(Element* element) @@ -109,25 +117,25 @@ inline bool isForeignContentScopeMarker(Element* element) || element->namespaceURI() == HTMLNames::xhtmlNamespaceURI; } -inline bool isButtonScopeMarker(Element* element) +inline bool isButtonScopeMarker(ContainerNode* node) { - return isScopeMarker(element) - || element->hasTagName(buttonTag); + return isScopeMarker(node) + || node->hasTagName(buttonTag); } -inline bool isSelectScopeMarker(Element* element) +inline bool isSelectScopeMarker(ContainerNode* node) { - return !element->hasTagName(optgroupTag) - && !element->hasTagName(optionTag); + return !node->hasTagName(optgroupTag) + && !node->hasTagName(optionTag); } } -HTMLElementStack::ElementRecord::ElementRecord(PassRefPtr<Element> element, PassOwnPtr<ElementRecord> next) - : m_element(element) +HTMLElementStack::ElementRecord::ElementRecord(PassRefPtr<ContainerNode> node, PassOwnPtr<ElementRecord> next) + : m_node(node) , m_next(next) { - ASSERT(m_element); + ASSERT(m_node); } HTMLElementStack::ElementRecord::~ElementRecord() @@ -137,8 +145,9 @@ HTMLElementStack::ElementRecord::~ElementRecord() void HTMLElementStack::ElementRecord::replaceElement(PassRefPtr<Element> element) { ASSERT(element); + ASSERT(!m_node || m_node->isElementNode()); // FIXME: Should this call finishParsingChildren? - m_element = element; + m_node = element; } bool HTMLElementStack::ElementRecord::isAbove(ElementRecord* other) const @@ -151,7 +160,7 @@ bool HTMLElementStack::ElementRecord::isAbove(ElementRecord* other) const } HTMLElementStack::HTMLElementStack() - : m_htmlElement(0) + : m_rootNode(0) , m_headElement(0) , m_bodyElement(0) { @@ -171,7 +180,7 @@ bool HTMLElementStack::secondElementIsHTMLBodyElement() const // This is used the fragment case of <body> and <frameset> in the "in body" // insertion mode. // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody - ASSERT(m_htmlElement); + ASSERT(m_rootNode); // If we have a body element, it must always be the second element on the // stack, as we always start with an html element, and any other element // would cause the implicit creation of a body element. @@ -194,11 +203,11 @@ void HTMLElementStack::popHTMLBodyElement() void HTMLElementStack::popAll() { - m_htmlElement = 0; + m_rootNode = 0; m_headElement = 0; m_bodyElement = 0; while (m_top) { - top()->finishParsingChildren(); + topNode()->finishParsingChildren(); m_top = m_top->releaseNext(); } } @@ -226,7 +235,7 @@ void HTMLElementStack::popUntilPopped(const AtomicString& tagName) void HTMLElementStack::popUntilNumberedHeaderElementPopped() { - while (!isNumberedHeaderElement(top())) + while (!isNumberedHeaderElement(topNode())) pop(); pop(); } @@ -246,21 +255,21 @@ void HTMLElementStack::popUntilPopped(Element* element) void HTMLElementStack::popUntilTableScopeMarker() { // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#clear-the-stack-back-to-a-table-context - while (!isTableScopeMarker(top())) + while (!isTableScopeMarker(topNode())) pop(); } void HTMLElementStack::popUntilTableBodyScopeMarker() { // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#clear-the-stack-back-to-a-table-body-context - while (!isTableBodyScopeMarker(top())) + while (!isTableBodyScopeMarker(topNode())) pop(); } void HTMLElementStack::popUntilTableRowScopeMarker() { // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#clear-the-stack-back-to-a-table-row-context - while (!isTableRowScopeMarker(top())) + while (!isTableRowScopeMarker(topNode())) pop(); } @@ -269,14 +278,25 @@ void HTMLElementStack::popUntilForeignContentScopeMarker() while (!isForeignContentScopeMarker(top())) pop(); } + +void HTMLElementStack::pushRootNode(PassRefPtr<ContainerNode> rootNode) +{ + ASSERT(rootNode->nodeType() == Node::DOCUMENT_FRAGMENT_NODE); + pushRootNodeCommon(rootNode); +} void HTMLElementStack::pushHTMLHtmlElement(PassRefPtr<Element> element) { - ASSERT(!m_top); // <html> should always be the bottom of the stack. ASSERT(element->hasTagName(HTMLNames::htmlTag)); - ASSERT(!m_htmlElement); - m_htmlElement = element.get(); - pushCommon(element); + pushRootNodeCommon(element); +} + +void HTMLElementStack::pushRootNodeCommon(PassRefPtr<ContainerNode> rootNode) +{ + ASSERT(!m_top); + ASSERT(!m_rootNode); + m_rootNode = rootNode.get(); + pushCommon(rootNode); } void HTMLElementStack::pushHTMLHeadElement(PassRefPtr<Element> element) @@ -300,7 +320,7 @@ void HTMLElementStack::push(PassRefPtr<Element> element) ASSERT(!element->hasTagName(HTMLNames::htmlTag)); ASSERT(!element->hasTagName(HTMLNames::headTag)); ASSERT(!element->hasTagName(HTMLNames::bodyTag)); - ASSERT(m_htmlElement); + ASSERT(m_rootNode); pushCommon(element); } @@ -312,7 +332,7 @@ void HTMLElementStack::insertAbove(PassRefPtr<Element> element, ElementRecord* r ASSERT(!element->hasTagName(HTMLNames::htmlTag)); ASSERT(!element->hasTagName(HTMLNames::headTag)); ASSERT(!element->hasTagName(HTMLNames::bodyTag)); - ASSERT(m_htmlElement); + ASSERT(m_rootNode); if (recordBelow == m_top) { push(element); return; @@ -372,7 +392,7 @@ void HTMLElementStack::remove(Element* element) HTMLElementStack::ElementRecord* HTMLElementStack::find(Element* element) const { for (ElementRecord* pos = m_top.get(); pos; pos = pos->next()) { - if (pos->element() == element) + if (pos->node() == element) return pos; } return 0; @@ -381,7 +401,7 @@ HTMLElementStack::ElementRecord* HTMLElementStack::find(Element* element) const HTMLElementStack::ElementRecord* HTMLElementStack::topmost(const AtomicString& tagName) const { for (ElementRecord* pos = m_top.get(); pos; pos = pos->next()) { - if (pos->element()->hasLocalName(tagName)) + if (pos->node()->hasLocalName(tagName)) return pos; } return 0; @@ -397,14 +417,14 @@ bool HTMLElementStack::contains(const AtomicString& tagName) const return !!topmost(tagName); } -template <bool isMarker(Element*)> +template <bool isMarker(ContainerNode*)> bool inScopeCommon(HTMLElementStack::ElementRecord* top, const AtomicString& targetTag) { for (HTMLElementStack::ElementRecord* pos = top; pos; pos = pos->next()) { - Element* element = pos->element(); - if (element->hasLocalName(targetTag)) + ContainerNode* node = pos->node(); + if (node->hasLocalName(targetTag)) return true; - if (isMarker(element)) + if (isMarker(node)) return false; } ASSERT_NOT_REACHED(); // <html> is always on the stack and is a scope marker. @@ -427,10 +447,10 @@ bool HTMLElementStack::hasOnlyHTMLElementsInScope() const bool HTMLElementStack::hasNumberedHeaderElementInScope() const { for (ElementRecord* record = m_top.get(); record; record = record->next()) { - Element* element = record->element(); - if (isNumberedHeaderElement(element)) + ContainerNode* node = record->node(); + if (isNumberedHeaderElement(node)) return true; - if (isScopeMarker(element)) + if (isScopeMarker(node)) return false; } ASSERT_NOT_REACHED(); // <html> is always on the stack and is a scope marker. @@ -507,8 +527,8 @@ bool HTMLElementStack::inSelectScope(const QualifiedName& tagName) const Element* HTMLElementStack::htmlElement() const { - ASSERT(m_htmlElement); - return m_htmlElement; + ASSERT(m_rootNode); + return toElement(m_rootNode); } Element* HTMLElementStack::headElement() const @@ -522,12 +542,18 @@ Element* HTMLElementStack::bodyElement() const ASSERT(m_bodyElement); return m_bodyElement; } + +ContainerNode* HTMLElementStack::rootNode() const +{ + ASSERT(m_rootNode); + return m_rootNode; +} -void HTMLElementStack::pushCommon(PassRefPtr<Element> element) +void HTMLElementStack::pushCommon(PassRefPtr<ContainerNode> node) { - ASSERT(m_htmlElement); - m_top = adoptPtr(new ElementRecord(element, m_top.release())); - top()->beginParsingChildren(); + ASSERT(m_rootNode); + m_top = adoptPtr(new ElementRecord(node, m_top.release())); + topNode()->beginParsingChildren(); } void HTMLElementStack::popCommon() diff --git a/Source/WebCore/html/parser/HTMLElementStack.h b/Source/WebCore/html/parser/HTMLElementStack.h index ad8b941..a710932 100644 --- a/Source/WebCore/html/parser/HTMLElementStack.h +++ b/Source/WebCore/html/parser/HTMLElementStack.h @@ -1,5 +1,6 @@ /* * Copyright (C) 2010 Google, Inc. All Rights Reserved. + * Copyright (C) 2011 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -26,6 +27,7 @@ #ifndef HTMLElementStack_h #define HTMLElementStack_h +#include "Element.h" #include <wtf/Forward.h> #include <wtf/Noncopyable.h> #include <wtf/OwnPtr.h> @@ -34,6 +36,8 @@ namespace WebCore { +class ContainerNode; +class DocumentFragment; class Element; class QualifiedName; @@ -50,7 +54,8 @@ public: public: ~ElementRecord(); // Public for ~PassOwnPtr() - Element* element() const { return m_element.get(); } + Element* element() const { return toElement(m_node.get()); } + ContainerNode* node() const { return m_node.get(); } void replaceElement(PassRefPtr<Element>); bool isAbove(ElementRecord*) const; @@ -60,12 +65,12 @@ public: private: friend class HTMLElementStack; - ElementRecord(PassRefPtr<Element>, PassOwnPtr<ElementRecord>); + ElementRecord(PassRefPtr<ContainerNode>, PassOwnPtr<ElementRecord>); PassOwnPtr<ElementRecord> releaseNext() { return m_next.release(); } void setNext(PassOwnPtr<ElementRecord> next) { m_next = next; } - RefPtr<Element> m_element; + RefPtr<ContainerNode> m_node; OwnPtr<ElementRecord> m_next; }; @@ -76,6 +81,12 @@ public: ASSERT(m_top->element()); return m_top->element(); } + + ContainerNode* topNode() const + { + ASSERT(m_top->node()); + return m_top->node(); + } Element* oneBelowTop() const; ElementRecord* topRecord() const; @@ -86,6 +97,7 @@ public: void insertAbove(PassRefPtr<Element>, ElementRecord*); void push(PassRefPtr<Element>); + void pushRootNode(PassRefPtr<ContainerNode>); void pushHTMLHtmlElement(PassRefPtr<Element>); void pushHTMLHeadElement(PassRefPtr<Element>); void pushHTMLBodyElement(PassRefPtr<Element>); @@ -131,24 +143,27 @@ public: Element* htmlElement() const; Element* headElement() const; Element* bodyElement() const; + + ContainerNode* rootNode() const; #ifndef NDEBUG void show(); #endif private: - void pushCommon(PassRefPtr<Element>); + void pushCommon(PassRefPtr<ContainerNode>); + void pushRootNodeCommon(PassRefPtr<ContainerNode>); void popCommon(); void removeNonTopCommon(Element*); OwnPtr<ElementRecord> m_top; - // We remember <html>, <head> and <body> as they are pushed. Their - // ElementRecords keep them alive. <html> is never popped. + // We remember the root node, <head> and <body> as they are pushed. Their + // ElementRecords keep them alive. The root node is never popped. // FIXME: We don't currently require type-specific information about // these elements so we haven't yet bothered to plumb the types all the // way down through createElement, etc. - Element* m_htmlElement; + ContainerNode* m_rootNode; Element* m_headElement; Element* m_bodyElement; }; diff --git a/Source/WebCore/html/parser/HTMLParserScheduler.cpp b/Source/WebCore/html/parser/HTMLParserScheduler.cpp index 56db1aa..c4525c8 100644 --- a/Source/WebCore/html/parser/HTMLParserScheduler.cpp +++ b/Source/WebCore/html/parser/HTMLParserScheduler.cpp @@ -74,25 +74,24 @@ HTMLParserScheduler::~HTMLParserScheduler() m_continueNextChunkTimer.stop(); } -// FIXME: This belongs on Document. -static bool isLayoutTimerActive(Document* doc) -{ - ASSERT(doc); - return doc->view() && doc->view()->layoutPending() && !doc->minimumLayoutDelay(); -} - void HTMLParserScheduler::continueNextChunkTimerFired(Timer<HTMLParserScheduler>* timer) { ASSERT_UNUSED(timer, timer == &m_continueNextChunkTimer); // FIXME: The timer class should handle timer priorities instead of this code. // If a layout is scheduled, wait again to let the layout timer run first. - if (isLayoutTimerActive(m_parser->document())) { + if (m_parser->document()->isLayoutTimerActive()) { m_continueNextChunkTimer.startOneShot(0); return; } m_parser->resumeParsingAfterYield(); } +void HTMLParserScheduler::scheduleForResume() +{ + m_continueNextChunkTimer.startOneShot(0); +} + + void HTMLParserScheduler::suspend() { ASSERT(!m_isSuspendedWithActiveTimer); diff --git a/Source/WebCore/html/parser/HTMLParserScheduler.h b/Source/WebCore/html/parser/HTMLParserScheduler.h index c415c62..9aa12eb 100644 --- a/Source/WebCore/html/parser/HTMLParserScheduler.h +++ b/Source/WebCore/html/parser/HTMLParserScheduler.h @@ -26,6 +26,7 @@ #ifndef HTMLParserScheduler_h #define HTMLParserScheduler_h +#include "NestingLevelIncrementer.h" #include "Timer.h" #include <wtf/CurrentTime.h> #include <wtf/PassOwnPtr.h> @@ -34,6 +35,21 @@ namespace WebCore { class HTMLDocumentParser; +class PumpSession : public NestingLevelIncrementer { +public: + PumpSession(unsigned& nestingLevel) + : NestingLevelIncrementer(nestingLevel) + , processedTokens(0) + , startTime(currentTime()) + , needsYield(false) + { + } + + int processedTokens; + double startTime; + bool needsYield; +}; + class HTMLParserScheduler { WTF_MAKE_NONCOPYABLE(HTMLParserScheduler); WTF_MAKE_FAST_ALLOCATED; public: @@ -43,34 +59,19 @@ public: } ~HTMLParserScheduler(); - struct PumpSession { - PumpSession() - : processedTokens(0) - , startTime(currentTime()) - { - } - - int processedTokens; - double startTime; - }; - // Inline as this is called after every token in the parser. - bool shouldContinueParsing(PumpSession& session) + void checkForYieldBeforeToken(PumpSession& session) { if (session.processedTokens > m_parserChunkSize) { session.processedTokens = 0; double elapsedTime = currentTime() - session.startTime; - if (elapsedTime > m_parserTimeLimit) { - // Schedule the parser to continue and yield from the parser. - m_continueNextChunkTimer.startOneShot(0); - return false; - } + if (elapsedTime > m_parserTimeLimit) + session.needsYield = true; } - ++session.processedTokens; - return true; } + void scheduleForResume(); bool isScheduledForResume() const { return m_isSuspendedWithActiveTimer || m_continueNextChunkTimer.isActive(); } void suspend(); diff --git a/Source/WebCore/html/parser/HTMLScriptRunner.cpp b/Source/WebCore/html/parser/HTMLScriptRunner.cpp index c99858d..99fff5e 100644 --- a/Source/WebCore/html/parser/HTMLScriptRunner.cpp +++ b/Source/WebCore/html/parser/HTMLScriptRunner.cpp @@ -262,17 +262,9 @@ void HTMLScriptRunner::requestDeferredScript(Element* element) bool HTMLScriptRunner::requestPendingScript(PendingScript& pendingScript, Element* script) const { ASSERT(!pendingScript.element()); - const AtomicString& srcValue = script->getAttribute(srcAttr); - // Allow the host to disllow script loads (using the XSSAuditor, etc.) - // FIXME: this check should be performed on the final URL in a redirect chain. - if (!m_host->shouldLoadExternalScriptFromSrc(srcValue)) - return false; - // FIXME: We need to resolve the url relative to the element. - if (!script->dispatchBeforeLoadEvent(srcValue)) - return false; pendingScript.setElement(script); // This should correctly return 0 for empty or invalid srcValues. - CachedScript* cachedScript = m_document->cachedResourceLoader()->requestScript(srcValue, toScriptElement(script)->scriptCharset()); + CachedScript* cachedScript = toScriptElement(script)->cachedScript().get(); if (!cachedScript) { notImplemented(); // Dispatch error event. return false; @@ -293,29 +285,24 @@ void HTMLScriptRunner::runScript(Element* script, const TextPosition1& scriptSta ScriptElement* scriptElement = toScriptElement(script); ASSERT(scriptElement); - if (!scriptElement->shouldExecuteAsJavaScript()) + + scriptElement->prepareScript(scriptStartPosition); + + if (!scriptElement->willBeParserExecuted()) return; - - if (script->hasAttribute(srcAttr)) { - if (script->hasAttribute(asyncAttr)) // Async takes precendence over defer. - return; // Asynchronous scripts handle themselves. - - if (script->hasAttribute(deferAttr)) - requestDeferredScript(script); - else - requestParsingBlockingScript(script); - } else if (!m_document->haveStylesheetsLoaded() && m_scriptNestingLevel == 1) { - // Block inline script execution on stylesheet load, unless we are in document.write(). - // The latter case can only happen if a script both triggers a stylesheet load - // and writes an inline script. Since write is blocking we have to execute the - // written script immediately, ignoring the pending sheets. - m_parsingBlockingScript.setElement(script); - m_parsingBlockingScript.setStartingPosition(scriptStartPosition); - } else { - ASSERT(isExecutingScript()); - ScriptSourceCode sourceCode(script->textContent(), documentURLForScriptExecution(m_document), scriptStartPosition); - scriptElement->executeScript(sourceCode); - } + + if (scriptElement->willExecuteWhenDocumentFinishedParsing()) + requestDeferredScript(script); + else if (scriptElement->readyToBeParserExecuted()) { + if (m_scriptNestingLevel == 1) { + m_parsingBlockingScript.setElement(script); + m_parsingBlockingScript.setStartingPosition(scriptStartPosition); + } else { + ScriptSourceCode sourceCode(script->textContent(), documentURLForScriptExecution(m_document), scriptStartPosition); + scriptElement->executeScript(sourceCode); + } + } else + requestParsingBlockingScript(script); } } diff --git a/Source/WebCore/html/parser/HTMLTreeBuilder.cpp b/Source/WebCore/html/parser/HTMLTreeBuilder.cpp index d2931ac..8f9e3e1 100644 --- a/Source/WebCore/html/parser/HTMLTreeBuilder.cpp +++ b/Source/WebCore/html/parser/HTMLTreeBuilder.cpp @@ -1,5 +1,6 @@ /* * Copyright (C) 2010 Google, Inc. All Rights Reserved. + * Copyright (C) 2011 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -122,6 +123,8 @@ bool isSpecialNode(Node* node) || node->hasTagName(SVGNames::descTag) || node->hasTagName(SVGNames::titleTag)) return true; + if (node->nodeType() == Node::DOCUMENT_FRAGMENT_NODE) + return true; if (node->namespaceURI() != xhtmlNamespaceURI) return false; const AtomicString& tagName = node->localName(); @@ -375,7 +378,9 @@ HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* f if (contextElement) { // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm: // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case - processFakeStartTag(htmlTag); + // For efficiency, we skip step 4.2 ("Let root be a new html element with no attributes") + // and instead use the DocumentFragment as a root node. + m_tree.openElements()->pushRootNode(fragment); resetInsertionModeAppropriately(); m_tree.setForm(closestFormAncestor(contextElement)); } @@ -410,19 +415,6 @@ HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment ASSERT(!fragment->hasChildNodes()); } -void HTMLTreeBuilder::FragmentParsingContext::finished() -{ - if (!m_contextElement) - return; - - // The HTML5 spec says to return the children of the fragment's document - // element when there is a context element (10.4.7). - RefPtr<ContainerNode> documentElement = firstElementChild(m_fragment); - m_fragment->removeChildren(); - ASSERT(documentElement); - m_fragment->takeAllChildrenFrom(documentElement.get()); -} - HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext() { } @@ -453,7 +445,7 @@ void HTMLTreeBuilder::constructTreeFromAtomicToken(AtomicHTMLToken& token) // the U+0000 characters into replacement characters has compatibility // problems. m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || m_insertionMode == InForeignContentMode); - m_parser->tokenizer()->setShouldAllowCDATA(m_insertionMode == InForeignContentMode && m_tree.currentElement()->namespaceURI() != xhtmlNamespaceURI); + m_parser->tokenizer()->setShouldAllowCDATA(m_insertionMode == InForeignContentMode && m_tree.currentNode()->namespaceURI() != xhtmlNamespaceURI); } void HTMLTreeBuilder::processToken(AtomicHTMLToken& token) @@ -574,12 +566,12 @@ void HTMLTreeBuilder::processIsindexStartTagForInBody(AtomicHTMLToken& token) namespace { -bool isLi(const Element* element) +bool isLi(const ContainerNode* element) { return element->hasTagName(liTag); } -bool isDdOrDt(const Element* element) +bool isDdOrDt(const ContainerNode* element) { return element->hasTagName(ddTag) || element->hasTagName(dtTag); @@ -587,15 +579,16 @@ bool isDdOrDt(const Element* element) } -template <bool shouldClose(const Element*)> +template <bool shouldClose(const ContainerNode*)> void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken& token) { m_framesetOk = false; HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord(); while (1) { - Element* node = nodeRecord->element(); + ContainerNode* node = nodeRecord->node(); if (shouldClose(node)) { - processFakeEndTag(node->tagQName()); + ASSERT(node->isElementNode()); + processFakeEndTag(toElement(node)->tagQName()); break; } if (isSpecialNode(node) && !node->hasTagName(addressTag) && !node->hasTagName(divTag) && !node->hasTagName(pTag)) @@ -786,7 +779,7 @@ void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken& token) } if (isNumberedHeaderTag(token.name())) { processFakePEndTagIfPInButtonScope(); - if (isNumberedHeaderTag(m_tree.currentElement()->localName())) { + if (isNumberedHeaderTag(m_tree.currentNode()->localName())) { parseError(token); m_tree.openElements()->pop(); } @@ -978,7 +971,7 @@ void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken& token) if (token.name() == rpTag || token.name() == rtTag) { if (m_tree.openElements()->inScope(rubyTag.localName())) { m_tree.generateImpliedEndTags(); - if (!m_tree.currentElement()->hasTagName(rubyTag)) { + if (!m_tree.currentNode()->hasTagName(rubyTag)) { parseError(token); m_tree.openElements()->popUntil(rubyTag.localName()); } @@ -1019,7 +1012,7 @@ void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken& token) bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup() { - if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) { + if (m_tree.currentNode() == m_tree.openElements()->rootNode()) { ASSERT(isParsingFragment()); // FIXME: parse error return false; @@ -1115,7 +1108,7 @@ void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken& token) namespace { -bool shouldProcessForeignContentUsingInBodyInsertionMode(AtomicHTMLToken& token, Element* currentElement) +bool shouldProcessForeignContentUsingInBodyInsertionMode(AtomicHTMLToken& token, ContainerNode* currentElement) { ASSERT(token.type() == HTMLToken::StartTag); if (currentElement->hasTagName(MathMLNames::miTag) @@ -1409,7 +1402,7 @@ void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token) return; } if (token.name() == optionTag) { - if (m_tree.currentElement()->hasTagName(optionTag)) { + if (m_tree.currentNode()->hasTagName(optionTag)) { AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName()); processEndTag(endOption); } @@ -1417,11 +1410,11 @@ void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token) return; } if (token.name() == optgroupTag) { - if (m_tree.currentElement()->hasTagName(optionTag)) { + if (m_tree.currentNode()->hasTagName(optionTag)) { AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName()); processEndTag(endOption); } - if (m_tree.currentElement()->hasTagName(optgroupTag)) { + if (m_tree.currentNode()->hasTagName(optgroupTag)) { AtomicHTMLToken endOptgroup(HTMLToken::EndTag, optgroupTag.localName()); processEndTag(endOptgroup); } @@ -1543,20 +1536,24 @@ void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken& token) ASSERT(token.type() == HTMLToken::EndTag); HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord(); while (1) { - Element* node = record->element(); + ContainerNode* node = record->node(); if (node->hasLocalName(token.name())) { m_tree.generateImpliedEndTags(); - if (!m_tree.currentElement()->hasLocalName(token.name())) { + // FIXME: The ElementRecord pointed to by record might be deleted by + // the preceding call. Perhaps we should hold a RefPtr so that it + // stays alive for the duration of record's scope. + record = 0; + if (!m_tree.currentNode()->hasLocalName(token.name())) { parseError(token); // FIXME: This is either a bug in the spec, or a bug in our // implementation. Filed a bug with HTML5: // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10080 // We might have already popped the node for the token in // generateImpliedEndTags, just abort. - if (!m_tree.openElements()->contains(node)) + if (!m_tree.openElements()->contains(toElement(node))) return; } - m_tree.openElements()->popUntilPopped(node); + m_tree.openElements()->popUntilPopped(toElement(node)); return; } if (isSpecialNode(node)) { @@ -1616,7 +1613,7 @@ void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token) } // 4. ASSERT(furthestBlock->isAbove(formattingElementRecord)); - Element* commonAncestor = formattingElementRecord->next()->element(); + ContainerNode* commonAncestor = formattingElementRecord->next()->node(); // 5. HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement); // 6. @@ -1668,7 +1665,9 @@ void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token) m_tree.fosterParent(lastNode->element()); else { commonAncestor->parserAddChild(lastNode->element()); - if (lastNode->element()->parentElement()->attached() && !lastNode->element()->attached()) + ASSERT(lastNode->node()->isElementNode()); + ASSERT(lastNode->element()->parentNode()); + if (lastNode->element()->parentNode()->attached() && !lastNode->element()->attached()) lastNode->element()->lazyAttach(); } // 8 @@ -1700,8 +1699,8 @@ void HTMLTreeBuilder::resetInsertionModeAppropriately() bool last = false; HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord(); while (1) { - Element* node = nodeRecord->element(); - if (node == m_tree.openElements()->bottom()) { + ContainerNode* node = nodeRecord->node(); + if (node == m_tree.openElements()->rootNode()) { ASSERT(isParsingFragment()); last = true; node = m_fragmentContext.contextElement(); @@ -1831,7 +1830,7 @@ void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken& token) return; } m_tree.generateImpliedEndTags(); - if (!m_tree.currentElement()->hasLocalName(token.name())) + if (!m_tree.currentNode()->hasLocalName(token.name())) parseError(token); m_tree.openElements()->popUntilPopped(token.name()); m_tree.activeFormattingElements()->clearToLastMarker(); @@ -1901,7 +1900,7 @@ void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token) return; } m_tree.generateImpliedEndTags(); - if (!m_tree.currentElement()->hasLocalName(token.name())) + if (!m_tree.currentNode()->hasLocalName(token.name())) parseError(token); m_tree.openElements()->popUntilPopped(token.name()); return; @@ -1926,7 +1925,7 @@ void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token) return; } m_tree.generateImpliedEndTagsWithExclusion(token.name()); - if (!m_tree.currentElement()->hasLocalName(token.name())) + if (!m_tree.currentNode()->hasLocalName(token.name())) parseError(token); m_tree.openElements()->popUntilPopped(token.name()); return; @@ -1937,7 +1936,7 @@ void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token) return; } m_tree.generateImpliedEndTagsWithExclusion(token.name()); - if (!m_tree.currentElement()->hasLocalName(token.name())) + if (!m_tree.currentNode()->hasLocalName(token.name())) parseError(token); m_tree.openElements()->popUntilPopped(token.name()); return; @@ -1949,7 +1948,7 @@ void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token) return; } m_tree.generateImpliedEndTagsWithExclusion(token.name()); - if (!m_tree.currentElement()->hasLocalName(token.name())) + if (!m_tree.currentNode()->hasLocalName(token.name())) parseError(token); m_tree.openElements()->popUntilPopped(token.name()); return; @@ -1960,7 +1959,7 @@ void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token) return; } m_tree.generateImpliedEndTags(); - if (!m_tree.currentElement()->hasLocalName(token.name())) + if (!m_tree.currentNode()->hasLocalName(token.name())) parseError(token); m_tree.openElements()->popUntilNumberedHeaderElementPopped(); return; @@ -1977,7 +1976,7 @@ void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token) return; } m_tree.generateImpliedEndTags(); - if (!m_tree.currentElement()->hasLocalName(token.name())) + if (!m_tree.currentNode()->hasLocalName(token.name())) parseError(token); m_tree.openElements()->popUntilPopped(token.name()); m_tree.activeFormattingElements()->clearToLastMarker(); @@ -2221,7 +2220,7 @@ void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token) case InFramesetMode: ASSERT(insertionMode() == InFramesetMode); if (token.name() == framesetTag) { - if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) { + if (m_tree.currentNode() == m_tree.openElements()->rootNode()) { parseError(token); return; } @@ -2261,9 +2260,9 @@ void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token) case InSelectMode: ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode); if (token.name() == optgroupTag) { - if (m_tree.currentElement()->hasTagName(optionTag) && m_tree.oneBelowTop()->hasTagName(optgroupTag)) + if (m_tree.currentNode()->hasTagName(optionTag) && m_tree.oneBelowTop()->hasTagName(optgroupTag)) processFakeEndTag(optionTag); - if (m_tree.currentElement()->hasTagName(optgroupTag)) { + if (m_tree.currentNode()->hasTagName(optgroupTag)) { m_tree.openElements()->pop(); return; } @@ -2271,7 +2270,7 @@ void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token) return; } if (token.name() == optionTag) { - if (m_tree.currentElement()->hasTagName(optionTag)) { + if (m_tree.currentNode()->hasTagName(optionTag)) { m_tree.openElements()->pop(); return; } @@ -2294,23 +2293,29 @@ void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token) processEndTag(token); break; case InForeignContentMode: - if (token.name() == SVGNames::scriptTag && m_tree.currentElement()->hasTagName(SVGNames::scriptTag)) { + if (token.name() == SVGNames::scriptTag && m_tree.currentNode()->hasTagName(SVGNames::scriptTag)) { notImplemented(); return; } - if (m_tree.currentElement()->namespaceURI() != xhtmlNamespaceURI) { + if (m_tree.currentNode()->namespaceURI() != xhtmlNamespaceURI) { // FIXME: This code just wants an Element* iterator, instead of an ElementRecord* HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord(); - if (!nodeRecord->element()->hasLocalName(token.name())) + if (!nodeRecord->node()->hasLocalName(token.name())) parseError(token); while (1) { - if (nodeRecord->element()->hasLocalName(token.name())) { + if (nodeRecord->node()->hasLocalName(token.name())) { m_tree.openElements()->popUntilPopped(nodeRecord->element()); resetForeignInsertionMode(); return; } nodeRecord = nodeRecord->next(); - if (nodeRecord->element()->namespaceURI() == xhtmlNamespaceURI) + + if (nodeRecord->node()->nodeType() == Node::DOCUMENT_FRAGMENT_NODE) { + ASSERT(isParsingFragment()); + break; + } + + if (nodeRecord->node()->namespaceURI() == xhtmlNamespaceURI) break; } } @@ -2609,11 +2614,11 @@ void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token) case InSelectInTableMode: case InSelectMode: ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode || insertionMode() == InTableMode || insertionMode() == InFramesetMode || insertionMode() == InTableBodyMode); - if (m_tree.currentElement() != m_tree.openElements()->htmlElement()) + if (m_tree.currentNode() != m_tree.openElements()->rootNode()) parseError(token); break; case InColumnGroupMode: - if (m_tree.currentElement() == m_tree.openElements()->htmlElement()) { + if (m_tree.currentNode() == m_tree.openElements()->rootNode()) { ASSERT(isParsingFragment()); return; // FIXME: Should we break here instead of returning? } @@ -2634,7 +2639,7 @@ void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token) return; case TextMode: parseError(token); - if (m_tree.currentElement()->hasTagName(scriptTag)) + if (m_tree.currentNode()->hasTagName(scriptTag)) notImplemented(); // mark the script element as "already started". m_tree.openElements()->pop(); setInsertionMode(m_originalInsertionMode); @@ -2642,7 +2647,7 @@ void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token) processEndOfFile(token); return; } - ASSERT(m_tree.openElements()->top()); + ASSERT(m_tree.currentNode()); m_tree.openElements()->popAll(); } @@ -2794,28 +2799,16 @@ void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken& token) void HTMLTreeBuilder::finished() { - ASSERT(m_document); - if (isParsingFragment()) { - m_fragmentContext.finished(); + if (isParsingFragment()) return; - } - + + ASSERT(m_document); // Warning, this may detach the parser. Do not do anything else after this. m_document->finishedParsing(); } void HTMLTreeBuilder::parseError(AtomicHTMLToken&) { - DEFINE_STATIC_LOCAL(String, parseErrorMessage, ("HTML parse error (recovered gracefully)")); - - if (!m_reportErrors) - return; - - DOMWindow* domWindow = m_document->domWindow(); - if (!domWindow) - return; - - domWindow->console()->addMessage(HTMLMessageSource, LogMessageType, WarningMessageLevel, parseErrorMessage, m_parser->lineNumber(), m_document->url().string()); } bool HTMLTreeBuilder::scriptEnabled(Frame* frame) diff --git a/Source/WebCore/html/parser/HTMLTreeBuilder.h b/Source/WebCore/html/parser/HTMLTreeBuilder.h index 0cec667..5bdc44b 100644 --- a/Source/WebCore/html/parser/HTMLTreeBuilder.h +++ b/Source/WebCore/html/parser/HTMLTreeBuilder.h @@ -1,5 +1,6 @@ /* * Copyright (C) 2010 Google, Inc. All Rights Reserved. + * Copyright (C) 2011 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -178,7 +179,7 @@ private: void closeTheCell(); - template <bool shouldClose(const Element*)> + template <bool shouldClose(const ContainerNode*)> void processCloseWhenNestedTag(AtomicHTMLToken&); bool m_framesetOk; @@ -215,8 +216,6 @@ private: Element* contextElement() const { ASSERT(m_fragment); return m_contextElement; } FragmentScriptingPermission scriptingPermission() const { ASSERT(m_fragment); return m_scriptingPermission; } - void finished(); - private: DocumentFragment* m_fragment; Element* m_contextElement; diff --git a/Source/WebCore/html/parser/NestingLevelIncrementer.h b/Source/WebCore/html/parser/NestingLevelIncrementer.h index 8155635..bf08425 100644 --- a/Source/WebCore/html/parser/NestingLevelIncrementer.h +++ b/Source/WebCore/html/parser/NestingLevelIncrementer.h @@ -26,6 +26,8 @@ #ifndef NestingLevelIncrementer_h #define NestingLevelIncrementer_h +#include <wtf/Noncopyable.h> + namespace WebCore { class NestingLevelIncrementer { diff --git a/Source/WebCore/html/parser/XSSFilter.cpp b/Source/WebCore/html/parser/XSSFilter.cpp index de31f76..ddc3318 100644 --- a/Source/WebCore/html/parser/XSSFilter.cpp +++ b/Source/WebCore/html/parser/XSSFilter.cpp @@ -245,10 +245,14 @@ bool XSSFilter::filterTokenInitial(HTMLToken& token) didBlockScript |= filterEmbedToken(token); else if (hasName(token, appletTag)) didBlockScript |= filterAppletToken(token); + else if (hasName(token, iframeTag)) + didBlockScript |= filterIframeToken(token); else if (hasName(token, metaTag)) didBlockScript |= filterMetaToken(token); else if (hasName(token, baseTag)) didBlockScript |= filterBaseToken(token); + else if (hasName(token, formTag)) + didBlockScript |= filterFormToken(token); return didBlockScript; } @@ -351,6 +355,15 @@ bool XSSFilter::filterAppletToken(HTMLToken& token) return didBlockScript; } +bool XSSFilter::filterIframeToken(HTMLToken& token) +{ + ASSERT(m_state == Initial); + ASSERT(token.type() == HTMLToken::StartTag); + ASSERT(hasName(token, iframeTag)); + + return eraseAttributeIfInjected(token, srcAttr); +} + bool XSSFilter::filterMetaToken(HTMLToken& token) { ASSERT(m_state == Initial); @@ -369,6 +382,15 @@ bool XSSFilter::filterBaseToken(HTMLToken& token) return eraseAttributeIfInjected(token, hrefAttr); } +bool XSSFilter::filterFormToken(HTMLToken& token) +{ + ASSERT(m_state == Initial); + ASSERT(token.type() == HTMLToken::StartTag); + ASSERT(hasName(token, formTag)); + + return eraseAttributeIfInjected(token, actionAttr); +} + bool XSSFilter::eraseDangerousAttributesIfInjected(HTMLToken& token) { DEFINE_STATIC_LOCAL(String, safeJavaScriptURL, ("javascript:void(0)")); diff --git a/Source/WebCore/html/parser/XSSFilter.h b/Source/WebCore/html/parser/XSSFilter.h index 2c7d428..c9ba12e 100644 --- a/Source/WebCore/html/parser/XSSFilter.h +++ b/Source/WebCore/html/parser/XSSFilter.h @@ -58,8 +58,10 @@ private: bool filterParamToken(HTMLToken&); bool filterEmbedToken(HTMLToken&); bool filterAppletToken(HTMLToken&); + bool filterIframeToken(HTMLToken&); bool filterMetaToken(HTMLToken&); bool filterBaseToken(HTMLToken&); + bool filterFormToken(HTMLToken&); bool eraseDangerousAttributesIfInjected(HTMLToken&); bool eraseAttributeIfInjected(HTMLToken&, const QualifiedName&, const String& replacementValue = String()); diff --git a/Source/WebCore/html/parser/create-html-entity-table b/Source/WebCore/html/parser/create-html-entity-table index e6132bc..92fb39c 100755 --- a/Source/WebCore/html/parser/create-html-entity-table +++ b/Source/WebCore/html/parser/create-html-entity-table @@ -58,7 +58,8 @@ def offset_table_entry(offset): program_name = os.path.basename(__file__) if len(sys.argv) < 4 or sys.argv[1] != "-o": - print >> sys.stderr, "Usage: %s -o OUTPUT_FILE INPUT_FILE" % program_name + # Python 3, change to: print("Usage: %s -o OUTPUT_FILE INPUT_FILE" % program_name, file=sys.stderr) + sys.stderr.write("Usage: %s -o OUTPUT_FILE INPUT_FILE\n" % program_name) exit(1) output_path = sys.argv[2] @@ -68,12 +69,12 @@ html_entity_names_file = open(input_path) entries = list(csv.reader(html_entity_names_file)) html_entity_names_file.close() -entries.sort(lambda a, b: cmp(a[ENTITY], b[ENTITY])) +entries.sort(key = lambda entry: entry[ENTITY]) entity_count = len(entries) output_file = open(output_path, "w") -print >> output_file, """/* +output_file.write("""/* * Copyright (C) 2010 Google, Inc. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without @@ -107,15 +108,15 @@ print >> output_file, """/* namespace WebCore { namespace { -""" +""") for entry in entries: - print >> output_file, "const UChar %sEntityName[] = %s;" % ( + output_file.write("const UChar %sEntityName[] = %s;" % ( convert_entity_to_cpp_name(entry[ENTITY]), - convert_entity_to_uchar_array(entry[ENTITY])) + convert_entity_to_uchar_array(entry[ENTITY]))) -print >> output_file, """ -HTMLEntityTableEntry staticEntityTable[%s] = {""" % entity_count +output_file.write(""" +HTMLEntityTableEntry staticEntityTable[%s] = {""" % entity_count) index = {} offset = 0 @@ -123,26 +124,26 @@ for entry in entries: letter = entry[ENTITY][0] if not index.get(letter): index[letter] = offset - print >> output_file, ' { %sEntityName, %s, %s },' % ( + output_file.write(' { %sEntityName, %s, %s },' % ( convert_entity_to_cpp_name(entry[ENTITY]), len(entry[ENTITY]), - convert_value_to_int(entry[VALUE])) + convert_value_to_int(entry[VALUE]))) offset += 1 -print >> output_file, """}; -""" +output_file.write("""}; +""") -print >> output_file, "const HTMLEntityTableEntry* uppercaseOffset[] = {" -for letter in string.uppercase: - print >> output_file, offset_table_entry(index[letter]) -print >> output_file, offset_table_entry(index['a']) -print >> output_file, """}; +output_file.write("const HTMLEntityTableEntry* uppercaseOffset[] = {") +for letter in string.ascii_uppercase: + output_file.write(offset_table_entry(index[letter])) +output_file.write(offset_table_entry(index['a'])) +output_file.write("""}; -const HTMLEntityTableEntry* lowercaseOffset[] = {""" -for letter in string.lowercase: - print >> output_file, offset_table_entry(index[letter]) -print >> output_file, offset_table_entry(entity_count) -print >> output_file, """}; +const HTMLEntityTableEntry* lowercaseOffset[] = {""") +for letter in string.ascii_lowercase: + output_file.write(offset_table_entry(index[letter])) +output_file.write(offset_table_entry(entity_count)) +output_file.write("""}; } @@ -175,4 +176,4 @@ const HTMLEntityTableEntry* HTMLEntityTable::lastEntry() } } -""" % entity_count +""" % entity_count) |