diff options
Diffstat (limited to 'WebCore/html/parser')
-rw-r--r-- | WebCore/html/parser/HTMLConstructionSite.cpp | 20 | ||||
-rw-r--r-- | WebCore/html/parser/HTMLDocumentParser.cpp | 29 | ||||
-rw-r--r-- | WebCore/html/parser/HTMLDocumentParser.h | 4 | ||||
-rw-r--r-- | WebCore/html/parser/HTMLElementStack.cpp | 27 | ||||
-rw-r--r-- | WebCore/html/parser/HTMLElementStack.h | 2 | ||||
-rw-r--r-- | WebCore/html/parser/HTMLParserIdioms.cpp | 144 | ||||
-rw-r--r-- | WebCore/html/parser/HTMLParserIdioms.h | 66 | ||||
-rw-r--r-- | WebCore/html/parser/HTMLPreloadScanner.cpp | 12 | ||||
-rw-r--r-- | WebCore/html/parser/HTMLScriptRunner.cpp | 37 | ||||
-rw-r--r-- | WebCore/html/parser/HTMLScriptRunner.h | 2 | ||||
-rw-r--r-- | WebCore/html/parser/HTMLTokenizer.cpp | 64 | ||||
-rw-r--r-- | WebCore/html/parser/HTMLTokenizer.h | 36 | ||||
-rw-r--r-- | WebCore/html/parser/HTMLTreeBuilder.cpp | 187 | ||||
-rw-r--r-- | WebCore/html/parser/HTMLTreeBuilder.h | 15 | ||||
-rw-r--r-- | WebCore/html/parser/HTMLViewSourceParser.cpp | 12 | ||||
-rw-r--r-- | WebCore/html/parser/NestingLevelIncrementer.h | 50 |
16 files changed, 430 insertions, 277 deletions
diff --git a/WebCore/html/parser/HTMLConstructionSite.cpp b/WebCore/html/parser/HTMLConstructionSite.cpp index 0172b3d..6215bba 100644 --- a/WebCore/html/parser/HTMLConstructionSite.cpp +++ b/WebCore/html/parser/HTMLConstructionSite.cpp @@ -114,19 +114,19 @@ PassRefPtr<ChildType> HTMLConstructionSite::attach(ContainerNode* parent, PassRe void HTMLConstructionSite::attachAtSite(const AttachmentSite& site, PassRefPtr<Node> prpChild) { + // FIXME: It's unfortunate that we need to hold a reference to child + // here to call attach(). We should investigate whether we can rely on + // |site.parent| to hold a ref at this point. RefPtr<Node> child = prpChild; - if (site.nextChild) { + if (site.nextChild) site.parent->parserInsertBefore(child, site.nextChild); - if (site.parent->attached() && !child->attached()) - child->attach(); - return; - } - site.parent->parserAddChild(child); - // It's slightly unfortunate that we need to hold a reference to child - // here to call attach(). We should investigate whether we can rely on - // |site.parent| to hold a ref at this point. - if (site.parent->attached() && !child->attached()) + else + site.parent->parserAddChild(child); + + // JavaScript run from beforeload (or DOM Mutation or event handlers) + // might have removed the child, in which case we should not attach it. + if (child->parentNode() && site.parent->attached() && !child->attached()) child->attach(); } diff --git a/WebCore/html/parser/HTMLDocumentParser.cpp b/WebCore/html/parser/HTMLDocumentParser.cpp index a442d54..2da403f 100644 --- a/WebCore/html/parser/HTMLDocumentParser.cpp +++ b/WebCore/html/parser/HTMLDocumentParser.cpp @@ -36,6 +36,8 @@ #include "HTMLScriptRunner.h" #include "HTMLTreeBuilder.h" #include "HTMLDocument.h" +#include "NestingLevelIncrementer.h" +#include "Settings.h" #include "XSSAuditor.h" #include <wtf/CurrentTime.h> @@ -53,23 +55,6 @@ using namespace HTMLNames; namespace { -class NestingLevelIncrementer : public Noncopyable { -public: - explicit NestingLevelIncrementer(int& counter) - : m_counter(&counter) - { - ++(*m_counter); - } - - ~NestingLevelIncrementer() - { - --(*m_counter); - } - -private: - int* m_counter; -}; - // This is a direct transcription of step 4 from: // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case HTMLTokenizer::State tokenizerStateForContextElement(Element* contextElement, bool reportErrors) @@ -99,7 +84,7 @@ HTMLTokenizer::State tokenizerStateForContextElement(Element* contextElement, bo HTMLDocumentParser::HTMLDocumentParser(HTMLDocument* document, bool reportErrors) : ScriptableDocumentParser(document) - , m_tokenizer(HTMLTokenizer::create()) + , m_tokenizer(HTMLTokenizer::create(usePreHTML5ParserQuirks(document))) , m_scriptRunner(HTMLScriptRunner::create(document, this)) , m_treeBuilder(HTMLTreeBuilder::create(m_tokenizer.get(), document, reportErrors)) , m_parserScheduler(HTMLParserScheduler::create(this)) @@ -112,7 +97,7 @@ HTMLDocumentParser::HTMLDocumentParser(HTMLDocument* document, bool reportErrors // minimize code duplication between these constructors. HTMLDocumentParser::HTMLDocumentParser(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission) : ScriptableDocumentParser(fragment->document()) - , m_tokenizer(HTMLTokenizer::create()) + , m_tokenizer(HTMLTokenizer::create(usePreHTML5ParserQuirks(fragment->document()))) , m_treeBuilder(HTMLTreeBuilder::create(m_tokenizer.get(), fragment, contextElement, scriptingPermission)) , m_endWasDelayed(false) , m_writeNestingLevel(0) @@ -547,5 +532,11 @@ void HTMLDocumentParser::parseDocumentFragment(const String& source, DocumentFra ASSERT(!parser->processingData()); // Make sure we're done. <rdar://problem/3963151> parser->detach(); // Allows ~DocumentParser to assert it was detached before destruction. } + +bool HTMLDocumentParser::usePreHTML5ParserQuirks(Document* document) +{ + ASSERT(document); + return document->settings() && document->settings()->usePreHTML5ParserQuirks(); +} } diff --git a/WebCore/html/parser/HTMLDocumentParser.h b/WebCore/html/parser/HTMLDocumentParser.h index 6d5b6d7..e65a582 100644 --- a/WebCore/html/parser/HTMLDocumentParser.h +++ b/WebCore/html/parser/HTMLDocumentParser.h @@ -66,6 +66,8 @@ public: void resumeParsingAfterYield(); static void parseDocumentFragment(const String&, DocumentFragment*, Element* contextElement, FragmentScriptingPermission = FragmentScriptingAllowed); + + static bool usePreHTML5ParserQuirks(Document*); protected: virtual void insert(const SegmentedString&); @@ -139,7 +141,7 @@ private: OwnPtr<HTMLParserScheduler> m_parserScheduler; bool m_endWasDelayed; - int m_writeNestingLevel; + unsigned m_writeNestingLevel; }; } diff --git a/WebCore/html/parser/HTMLElementStack.cpp b/WebCore/html/parser/HTMLElementStack.cpp index b6f4111..123778d 100644 --- a/WebCore/html/parser/HTMLElementStack.cpp +++ b/WebCore/html/parser/HTMLElementStack.cpp @@ -28,6 +28,8 @@ #include "Element.h" #include "HTMLNames.h" +#include "MathMLNames.h" +#include "SVGNames.h" #include <wtf/PassOwnPtr.h> #if ENABLE(SVG) @@ -92,6 +94,19 @@ inline bool isTableRowScopeMarker(Element* element) || element->hasTagName(htmlTag); } +inline bool isForeignContentScopeMarker(Element* element) +{ + return element->hasTagName(MathMLNames::miTag) + || element->hasTagName(MathMLNames::moTag) + || element->hasTagName(MathMLNames::mnTag) + || element->hasTagName(MathMLNames::msTag) + || element->hasTagName(MathMLNames::mtextTag) + || element->hasTagName(SVGNames::foreignObjectTag) + || element->hasTagName(SVGNames::descTag) + || element->hasTagName(SVGNames::titleTag) + || element->namespaceURI() == HTMLNames::xhtmlNamespaceURI; +} + inline bool isButtonScopeMarker(Element* element) { return isScopeMarker(element) @@ -186,12 +201,6 @@ void HTMLElementStack::pop() popCommon(); } -void HTMLElementStack::popUntilElementWithNamespace(const AtomicString& namespaceURI) -{ - while (top()->namespaceURI() != namespaceURI) - pop(); -} - void HTMLElementStack::popUntil(const AtomicString& tagName) { while (!top()->hasLocalName(tagName)) { @@ -247,6 +256,12 @@ void HTMLElementStack::popUntilTableRowScopeMarker() pop(); } +void HTMLElementStack::popUntilForeignContentScopeMarker() +{ + while (!isForeignContentScopeMarker(top())) + pop(); +} + void HTMLElementStack::pushHTMLHtmlElement(PassRefPtr<Element> element) { ASSERT(!m_top); // <html> should always be the bottom of the stack. diff --git a/WebCore/html/parser/HTMLElementStack.h b/WebCore/html/parser/HTMLElementStack.h index 73cfcb1..47fa603 100644 --- a/WebCore/html/parser/HTMLElementStack.h +++ b/WebCore/html/parser/HTMLElementStack.h @@ -90,7 +90,6 @@ public: void pop(); void popUntil(const AtomicString& tagName); - void popUntilElementWithNamespace(const AtomicString& namespaceURI); void popUntil(Element*); void popUntilPopped(const AtomicString& tagName); void popUntilPopped(Element*); @@ -98,6 +97,7 @@ public: void popUntilTableScopeMarker(); // "clear the stack back to a table context" in the spec. void popUntilTableBodyScopeMarker(); // "clear the stack back to a table body context" in the spec. void popUntilTableRowScopeMarker(); // "clear the stack back to a table row context" in the spec. + void popUntilForeignContentScopeMarker(); void popHTMLHeadElement(); void popHTMLBodyElement(); void popAll(); diff --git a/WebCore/html/parser/HTMLParserIdioms.cpp b/WebCore/html/parser/HTMLParserIdioms.cpp new file mode 100644 index 0000000..a558cf5 --- /dev/null +++ b/WebCore/html/parser/HTMLParserIdioms.cpp @@ -0,0 +1,144 @@ +/* + * Copyright (C) 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "HTMLParserIdioms.h" + +#include <wtf/MathExtras.h> +#include <wtf/dtoa.h> +#include <wtf/text/AtomicString.h> + +namespace WebCore { + +String stripLeadingAndTrailingHTMLSpaces(const String& string) +{ + const UChar* characters = string.characters(); + unsigned length = string.length(); + + unsigned numLeadingSpaces; + for (numLeadingSpaces = 0; numLeadingSpaces < length; ++numLeadingSpaces) { + if (isNotHTMLSpace(characters[numLeadingSpaces])) + break; + } + + if (numLeadingSpaces == length) + return emptyAtom; + + unsigned numTrailingSpaces; + for (numTrailingSpaces = 0; numTrailingSpaces < length; ++numTrailingSpaces) { + if (isNotHTMLSpace(characters[length - numTrailingSpaces - 1])) + break; + } + + ASSERT(numLeadingSpaces + numTrailingSpaces < length); + + return string.substring(numLeadingSpaces, length - numTrailingSpaces); +} + +String serializeForNumberType(double number) +{ + // According to HTML5, "the best representation of the number n as a floating + // point number" is a string produced by applying ToString() to n. + NumberToStringBuffer buffer; + unsigned length = numberToString(number, buffer); + return String(buffer, length); +} + +bool parseToDoubleForNumberType(const String& string, double* result) +{ + // See HTML5 2.4.4.3 `Real numbers.' + + // String::toDouble() accepts leading + and whitespace characters, which are not valid here. + UChar firstCharacter = string[0]; + if (firstCharacter != '-' && !isASCIIDigit(firstCharacter)) + return false; + + bool valid = false; + double value = string.toDouble(&valid); + if (!valid) + return false; + + // NaN and infinity are considered valid by String::toDouble, but not valid here. + if (!isfinite(value)) + return false; + + if (result) { + // The following expression converts -0 to +0. + *result = value ? value : 0; + } + + return true; +} + +// http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-integers +bool parseHTMLInteger(const String& input, int& value) +{ + // Step 1 + // Step 2 + const UChar* position = input.characters(); + const UChar* end = position + input.length(); + + // Step 3 + int sign = 1; + + // Step 4 + while (position < end) { + if (!isHTMLSpace(*position)) + break; + ++position; + } + + // Step 5 + if (position == end) + return false; + ASSERT(position < end); + + // Step 6 + if (*position == '-') { + sign = -1; + ++position; + } else if (*position == '+') + ++position; + if (position == end) + return false; + ASSERT(position < end); + + // Step 7 + if (!isASCIIDigit(*position)) + return false; + + // Step 8 + Vector<UChar, 16> digits; + while (position < end) { + if (!isASCIIDigit(*position)) + break; + digits.append(*position++); + } + + // Step 9 + value = sign * charactersToIntStrict(digits.data(), digits.size()); + return true; +} + +} diff --git a/WebCore/html/parser/HTMLParserIdioms.h b/WebCore/html/parser/HTMLParserIdioms.h new file mode 100644 index 0000000..f4704f7 --- /dev/null +++ b/WebCore/html/parser/HTMLParserIdioms.h @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef HTMLParserIdioms_h +#define HTMLParserIdioms_h + +#include <wtf/Forward.h> +#include <wtf/unicode/Unicode.h> + +namespace WebCore { + +// Space characters as defined by the HTML specification. +bool isHTMLSpace(UChar); +bool isNotHTMLSpace(UChar); + +// Strip leading and trailing whitespace as defined by the HTML specification. +String stripLeadingAndTrailingHTMLSpaces(const String&); + +// An implementation of the HTML specification's algorithm to convert a number to a string for number and range types. +String serializeForNumberType(double); + +// Convert the specified string to a double. If the conversion fails, the return value is false. +// Leading or trailing illegal characters cause failure, as does passing an empty string. +// The double* parameter may be 0 to check if the string can be parsed without getting the result. +bool parseToDoubleForNumberType(const String&, double*); + +// http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-integers +bool parseHTMLInteger(const String&, int&); + +// Inline implementations of some of the functions declared above. + +inline bool isHTMLSpace(UChar character) +{ + // FIXME: Consider branch permutations as we did in isASCIISpace. + return character == '\t' || character == '\x0A' || character == '\x0C' || character == '\x0D' || character == ' '; +} + +inline bool isNotHTMLSpace(UChar character) +{ + return !isHTMLSpace(character); +} + +} + +#endif diff --git a/WebCore/html/parser/HTMLPreloadScanner.cpp b/WebCore/html/parser/HTMLPreloadScanner.cpp index 5283fa3..7859dd8 100644 --- a/WebCore/html/parser/HTMLPreloadScanner.cpp +++ b/WebCore/html/parser/HTMLPreloadScanner.cpp @@ -31,8 +31,8 @@ #include "CSSHelper.h" #include "CachedResourceLoader.h" #include "Document.h" +#include "HTMLDocumentParser.h" #include "HTMLTokenizer.h" -#include "HTMLTreeBuilder.h" #include "HTMLLinkElement.h" #include "HTMLNames.h" @@ -121,7 +121,7 @@ private: HTMLPreloadScanner::HTMLPreloadScanner(Document* document) : m_document(document) , m_cssScanner(document) - , m_tokenizer(HTMLTokenizer::create()) + , m_tokenizer(HTMLTokenizer::create(HTMLDocumentParser::usePreHTML5ParserQuirks(document))) , m_bodySeen(false) , m_inStyle(false) { @@ -157,13 +157,7 @@ void HTMLPreloadScanner::processToken() return; PreloadTask task(m_token); - m_tokenizer->setState(HTMLTreeBuilder::adjustedLexerState(m_tokenizer->state(), task.tagName(), m_document->frame())); - if (task.tagName() == scriptTag) { - // The tree builder handles scriptTag separately from the other tokenizer - // state adjustments, so we need to handle it separately too. - ASSERT(m_tokenizer->state() == HTMLTokenizer::DataState); - m_tokenizer->setState(HTMLTokenizer::ScriptDataState); - } + m_tokenizer->updateStateFor(task.tagName(), m_document->frame()); if (task.tagName() == bodyTag) m_bodySeen = true; diff --git a/WebCore/html/parser/HTMLScriptRunner.cpp b/WebCore/html/parser/HTMLScriptRunner.cpp index e1fc120..4f54f42 100644 --- a/WebCore/html/parser/HTMLScriptRunner.cpp +++ b/WebCore/html/parser/HTMLScriptRunner.cpp @@ -35,6 +35,7 @@ #include "HTMLScriptRunnerHost.h" #include "HTMLInputStream.h" #include "HTMLNames.h" +#include "NestingLevelIncrementer.h" #include "NotImplemented.h" #include "ScriptElement.h" #include "ScriptSourceCode.h" @@ -43,24 +44,6 @@ namespace WebCore { using namespace HTMLNames; -// FIXME: Factor out to avoid duplication with HTMLDocumentParser. -class NestingLevelIncrementer : public Noncopyable { -public: - explicit NestingLevelIncrementer(unsigned& nestingLevel) - : m_nestingLevel(&nestingLevel) - { - ++(*m_nestingLevel); - } - - ~NestingLevelIncrementer() - { - --(*m_nestingLevel); - } - -private: - unsigned* m_nestingLevel; -}; - HTMLScriptRunner::HTMLScriptRunner(Document* document, HTMLScriptRunnerHost* host) : m_document(document) , m_host(host) @@ -155,20 +138,16 @@ void HTMLScriptRunner::executePendingScriptAndDispatchEvent(PendingScript& pendi if (errorOccurred) scriptElement->dispatchEvent(createScriptErrorEvent()); else { - executeScript(scriptElement.get(), sourceCode); + executeScript(sourceCode); scriptElement->dispatchEvent(createScriptLoadEvent()); } } ASSERT(!m_scriptNestingLevel); } -void HTMLScriptRunner::executeScript(Element* element, const ScriptSourceCode& sourceCode) const +void HTMLScriptRunner::executeScript(const ScriptSourceCode& sourceCode) const { ASSERT(m_document); - ScriptElement* scriptElement = toScriptElement(element); - ASSERT(scriptElement); - if (!scriptElement->shouldExecuteAsJavaScript()) - return; ASSERT(isExecutingScript()); if (!m_document->frame()) return; @@ -317,9 +296,11 @@ void HTMLScriptRunner::runScript(Element* script, int startingLineNumber) InsertionPointRecord insertionPointRecord(m_host->inputStream()); NestingLevelIncrementer nestingLevelIncrementer(m_scriptNestingLevel); - // Check script type and language, current code uses ScriptElement::shouldExecuteAsJavaScript(), but that may not be HTML5 compliant. - notImplemented(); // event for support - + ScriptElement* scriptElement = toScriptElement(script); + ASSERT(scriptElement); + if (!scriptElement->shouldExecuteAsJavaScript()) + return; + if (script->hasAttribute(srcAttr)) { if (script->hasAttribute(asyncAttr)) // Async takes precendence over defer. return; // Asynchronous scripts handle themselves. @@ -335,7 +316,7 @@ void HTMLScriptRunner::runScript(Element* script, int startingLineNumber) // ASSERT(document()->haveStylesheetsLoaded()); ASSERT(isExecutingScript()); ScriptSourceCode sourceCode(script->textContent(), documentURLForScriptExecution(m_document), startingLineNumber); - executeScript(script, sourceCode); + executeScript(sourceCode); } } } diff --git a/WebCore/html/parser/HTMLScriptRunner.h b/WebCore/html/parser/HTMLScriptRunner.h index 47c96fd..be21dd2 100644 --- a/WebCore/html/parser/HTMLScriptRunner.h +++ b/WebCore/html/parser/HTMLScriptRunner.h @@ -68,7 +68,7 @@ private: void executeParsingBlockingScript(); void executePendingScriptAndDispatchEvent(PendingScript&); - void executeScript(Element*, const ScriptSourceCode&) const; + void executeScript(const ScriptSourceCode&) const; bool haveParsingBlockingScript() const; bool executeParsingBlockingScripts(); diff --git a/WebCore/html/parser/HTMLTokenizer.cpp b/WebCore/html/parser/HTMLTokenizer.cpp index f5405ff..305fca2 100644 --- a/WebCore/html/parser/HTMLTokenizer.cpp +++ b/WebCore/html/parser/HTMLTokenizer.cpp @@ -30,6 +30,7 @@ #include "HTMLEntityParser.h" #include "HTMLToken.h" +#include "HTMLTreeBuilder.h" #include "HTMLNames.h" #include "NotImplemented.h" #include <wtf/ASCIICType.h> @@ -102,8 +103,9 @@ inline bool isEndTagBufferingState(HTMLTokenizer::State state) } -HTMLTokenizer::HTMLTokenizer() +HTMLTokenizer::HTMLTokenizer(bool usePreHTML5ParserQuirks) : m_inputStreamPreprocessor(this) + , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks) { reset(); } @@ -171,7 +173,7 @@ inline bool HTMLTokenizer::processEntity(SegmentedString& source) // Sometimes there's more complicated logic in the spec that separates when // we consume the next input character and when we switch to a particular -// state. We handle those cases by advancing the source directly and using +// state. We handle those cases by advancing the source directly and using // this macro to switch to the indicated state. #define SWITCH_TO(stateName) \ do { \ @@ -277,7 +279,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody // Note that this logic is different than the generic \r\n collapsing - // handled in the input stream preprocessor. This logic is here as an + // handled in the input stream preprocessor. This logic is here as an // "authoring convenience" so folks can write: // // <pre> @@ -435,6 +437,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) ADVANCE_TO(SelfClosingStartTagState); else if (cc == '>') return emitAndResumeIn(source, DataState); + else if (m_usePreHTML5ParserQuirks && cc == '<') + return emitAndReconsumeIn(source, DataState); else if (isASCIIUpper(cc)) { m_token->appendToName(toLowerCase(cc)); ADVANCE_TO(TagNameState); @@ -876,6 +880,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) ADVANCE_TO(SelfClosingStartTagState); else if (cc == '>') return emitAndResumeIn(source, DataState); + else if (m_usePreHTML5ParserQuirks && cc == '<') + return emitAndReconsumeIn(source, DataState); else if (isASCIIUpper(cc)) { m_token->addNewAttribute(); m_token->beginAttributeName(source.numberOfCharactersConsumed()); @@ -908,6 +914,9 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) } else if (cc == '>') { m_token->endAttributeName(source.numberOfCharactersConsumed()); return emitAndResumeIn(source, DataState); + } else if (m_usePreHTML5ParserQuirks && cc == '<') { + m_token->endAttributeName(source.numberOfCharactersConsumed()); + return emitAndReconsumeIn(source, DataState); } else if (isASCIIUpper(cc)) { m_token->appendToAttributeName(toLowerCase(cc)); ADVANCE_TO(AttributeNameState); @@ -933,6 +942,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) ADVANCE_TO(BeforeAttributeValueState); else if (cc == '>') return emitAndResumeIn(source, DataState); + else if (m_usePreHTML5ParserQuirks && cc == '<') + return emitAndReconsumeIn(source, DataState); else if (isASCIIUpper(cc)) { m_token->addNewAttribute(); m_token->beginAttributeName(source.numberOfCharactersConsumed()); @@ -1054,7 +1065,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) m_token->appendToAttributeValue(*iter); } // We're supposed to switch back to the attribute value state that - // we were in when we were switched into this state. Rather than + // we were in when we were switched into this state. Rather than // keeping track of this explictly, we observe that the previous // state can be determined by m_additionalAllowedCharacter. if (m_additionalAllowedCharacter == '"') @@ -1075,6 +1086,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) ADVANCE_TO(SelfClosingStartTagState); else if (cc == '>') return emitAndResumeIn(source, DataState); + else if (m_usePreHTML5ParserQuirks && cc == '<') + return emitAndReconsumeIn(source, DataState); else if (cc == InputStreamPreprocessor::endOfFileMarker) { parseError(); RECONSUME_IN(DataState); @@ -1213,13 +1226,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) BEGIN_STATE(CommentEndState) { if (cc == '>') return emitAndResumeIn(source, DataState); - else if (isTokenizerWhitespace(cc)) { - parseError(); - m_token->appendToComment('-'); - m_token->appendToComment('-'); - m_token->appendToComment(cc); - ADVANCE_TO(CommentEndSpaceState); - } else if (cc == '!') { + else if (cc == '!') { parseError(); ADVANCE_TO(CommentEndBangState); } else if (cc == '-') { @@ -1260,24 +1267,6 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) } END_STATE() - BEGIN_STATE(CommentEndSpaceState) { - if (isTokenizerWhitespace(cc)) { - m_token->appendToComment(cc); - ADVANCE_TO(CommentEndSpaceState); - } else if (cc == '-') - ADVANCE_TO(CommentEndDashState); - else if (cc == '>') - return emitAndResumeIn(source, DataState); - else if (cc == InputStreamPreprocessor::endOfFileMarker) { - parseError(); - return emitAndReconsumeIn(source, DataState); - } else { - m_token->appendToComment(cc); - ADVANCE_TO(CommentState); - } - } - END_STATE() - BEGIN_STATE(DOCTYPEState) { if (isTokenizerWhitespace(cc)) ADVANCE_TO(BeforeDOCTYPENameState); @@ -1656,6 +1645,23 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) return false; } +void HTMLTokenizer::updateStateFor(const AtomicString& tagName, Frame* frame) +{ + if (tagName == textareaTag || tagName == titleTag) + setState(RCDATAState); + else if (tagName == plaintextTag) + setState(PLAINTEXTState); + else if (tagName == scriptTag) + setState(ScriptDataState); + else if (tagName == styleTag + || tagName == iframeTag + || tagName == xmpTag + || (tagName == noembedTag && HTMLTreeBuilder::pluginsEnabled(frame)) + || tagName == noframesTag + || (tagName == noscriptTag && HTMLTreeBuilder::scriptEnabled(frame))) + setState(RAWTEXTState); +} + inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString) { return vectorEqualsString(m_temporaryBuffer, expectedString); diff --git a/WebCore/html/parser/HTMLTokenizer.h b/WebCore/html/parser/HTMLTokenizer.h index bab77f3..f16b049 100644 --- a/WebCore/html/parser/HTMLTokenizer.h +++ b/WebCore/html/parser/HTMLTokenizer.h @@ -36,6 +36,7 @@ namespace WebCore { class Element; +class Frame; class HTMLToken; class HTMLTokenizer : public Noncopyable { @@ -96,7 +97,6 @@ public: CommentEndDashState, CommentEndState, CommentEndBangState, - CommentEndSpaceState, DOCTYPEState, BeforeDOCTYPENameState, DOCTYPENameState, @@ -119,12 +119,12 @@ public: CDATASectionDoubleRightSquareBracketState, }; - static PassOwnPtr<HTMLTokenizer> create() { return adoptPtr(new HTMLTokenizer); } + static PassOwnPtr<HTMLTokenizer> create(bool usePreHTML5ParserQuirks) { return adoptPtr(new HTMLTokenizer(usePreHTML5ParserQuirks)); } ~HTMLTokenizer(); void reset(); - // This function returns true if it emits a token. Otherwise, callers + // This function returns true if it emits a token. Otherwise, callers // must provide the same (in progress) token on the next call (unless // they call reset() first). bool nextToken(SegmentedString&, HTMLToken&); @@ -135,6 +135,22 @@ public: State state() const { return m_state; } void setState(State state) { m_state = state; } + // Updates the tokenizer's state according to the given tag name. This is + // an approximation of how the tree builder would update the tokenizer's + // state. This method is useful for approximating HTML tokenization. To + // get exactly the correct tokenization, you need the real tree builder. + // + // The main failures in the approximation are as follows: + // + // * The first set of character tokens emitted for a <pre> element might + // contain an extra leading newline. + // * The replacement of U+0000 with U+FFFD will not be sensitive to the + // tree builder's insertion mode. + // * CDATA sections in foreign content will be tokenized as bogus comments + // instead of as character tokens. + // + void updateStateFor(const AtomicString& tagName, Frame*); + // Hack to skip leading newline in <pre>/<listing> for authoring ease. // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody void setSkipLeadingNewLineForListing(bool value) { m_skipLeadingNewLineForListing = value; } @@ -177,8 +193,8 @@ private: // Every branch in this function is expensive, so we have a // fast-reject branch for characters that don't require special - // handling. Please run the parser benchmark whenever you touch - // this function. It's very hot. + // handling. Please run the parser benchmark whenever you touch + // this function. It's very hot. static const UChar specialCharacterMask = '\n' | '\r' | '\0'; if (m_nextInputCharacter & ~specialCharacterMask) { m_skipNextNewLine = false; @@ -238,7 +254,7 @@ private: bool m_skipNextNewLine; }; - HTMLTokenizer(); + HTMLTokenizer(bool usePreHTML5ParserQuirks); inline bool processEntity(SegmentedString&); @@ -257,7 +273,7 @@ private: inline bool temporaryBufferIs(const String&); // Sometimes we speculatively consume input characters and we don't - // know whether they represent end tags or RCDATA, etc. These + // know whether they represent end tags or RCDATA, etc. These // functions help manage these state. inline void addToPossibleEndTag(UChar cc); inline void saveEndTagNameIfNeeded(); @@ -269,7 +285,7 @@ private: Vector<UChar, 32> m_appropriateEndTagName; - // m_token is owned by the caller. If nextToken is not on the stack, + // m_token is owned by the caller. If nextToken is not on the stack, // this member might be pointing to unallocated memory. HTMLToken* m_token; int m_lineNumber; @@ -282,7 +298,7 @@ private: Vector<UChar, 32> m_temporaryBuffer; // We occationally want to emit both a character token and an end tag - // token (e.g., when lexing script). We buffer the name of the end tag + // token (e.g., when lexing script). We buffer the name of the end tag // token here so we remember it next time we re-enter the tokenizer. Vector<UChar, 32> m_bufferedEndTagName; @@ -291,6 +307,8 @@ private: // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream InputStreamPreprocessor m_inputStreamPreprocessor; + + bool m_usePreHTML5ParserQuirks; }; } diff --git a/WebCore/html/parser/HTMLTreeBuilder.cpp b/WebCore/html/parser/HTMLTreeBuilder.cpp index 406bb6c..afac2a0 100644 --- a/WebCore/html/parser/HTMLTreeBuilder.cpp +++ b/WebCore/html/parser/HTMLTreeBuilder.cpp @@ -26,16 +26,17 @@ #include "config.h" #include "HTMLTreeBuilder.h" +#include "CharacterNames.h" #include "Comment.h" #include "DocumentFragment.h" #include "DocumentType.h" -#include "Element.h" #include "Frame.h" #include "HTMLDocument.h" #include "HTMLElementFactory.h" #include "HTMLFormElement.h" #include "HTMLHtmlElement.h" #include "HTMLNames.h" +#include "HTMLParserIdioms.h" #include "HTMLScriptElement.h" #include "HTMLToken.h" #include "HTMLTokenizer.h" @@ -44,15 +45,10 @@ #include "NotImplemented.h" #include "SVGNames.h" #include "ScriptController.h" -#include "Settings.h" #include "Text.h" #include "XLinkNames.h" #include "XMLNSNames.h" #include "XMLNames.h" -// FIXME: Remove this include once we find a home for the free functions that -// are using it. -#include <wtf/dtoa.h> -#include <wtf/UnusedParam.h> namespace WebCore { @@ -62,42 +58,19 @@ static const int uninitializedLineNumberValue = -1; namespace { -inline bool isTreeBuilderWhitepace(UChar c) +inline bool isHTMLSpaceOrReplacementCharacter(UChar character) { - // FIXME: Consider branch permutations. - return c == '\t' || c == '\x0A' || c == '\x0C' || c == '\x0D' || c == ' '; -} - -inline bool isNotTreeBuilderWhitepace(UChar c) -{ - return !isTreeBuilderWhitepace(c); -} - -inline bool isTreeBuilderWhitepaceOrReplacementCharacter(UChar c) -{ - return isTreeBuilderWhitepace(c) || c == 0xFFFD; -} - -template<bool isSpecialCharacter(UChar c)> -inline bool isAllSpecialCharacters(const String& string) -{ - const UChar* characters = string.characters(); - const unsigned length = string.length(); - for (unsigned i = 0; i < length; ++i) { - if (!isSpecialCharacter(characters[i])) - return false; - } - return true; + return isHTMLSpace(character) || character == replacementCharacter; } inline bool isAllWhitespace(const String& string) { - return isAllSpecialCharacters<isTreeBuilderWhitepace>(string); + return string.isAllSpecialCharacters<isHTMLSpace>(); } inline bool isAllWhitespaceOrReplacementCharacters(const String& string) { - return isAllSpecialCharacters<isTreeBuilderWhitepaceOrReplacementCharacter>(string); + return string.isAllSpecialCharacters<isHTMLSpaceOrReplacementCharacter>(); } bool isNumberedHeaderTag(const AtomicString& tagName) @@ -132,11 +105,14 @@ bool isTableBodyContextTag(const AtomicString& tagName) // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special bool isSpecialNode(Node* node) { + if (node->hasTagName(SVGNames::foreignObjectTag)) + return true; if (node->namespaceURI() != xhtmlNamespaceURI) return false; - // FIXME: This list is out of sync with the spec. const AtomicString& tagName = node->localName(); return tagName == addressTag + || tagName == appletTag + || tagName == areaTag || tagName == articleTag || tagName == asideTag || tagName == baseTag @@ -146,6 +122,7 @@ bool isSpecialNode(Node* node) || tagName == bodyTag || tagName == brTag || tagName == buttonTag + || tagName == captionTag || tagName == centerTag || tagName == colTag || tagName == colgroupTag @@ -158,6 +135,7 @@ bool isSpecialNode(Node* node) || tagName == dtTag || tagName == embedTag || tagName == fieldsetTag + || tagName == figcaptionTag || tagName == figureTag || tagName == footerTag || tagName == formTag @@ -176,12 +154,14 @@ bool isSpecialNode(Node* node) || tagName == liTag || tagName == linkTag || tagName == listingTag + || tagName == marqueeTag || tagName == menuTag || tagName == metaTag || tagName == navTag || tagName == noembedTag || tagName == noframesTag || tagName == noscriptTag + || tagName == objectTag || tagName == olTag || tagName == pTag || tagName == paramTag @@ -191,8 +171,12 @@ bool isSpecialNode(Node* node) || tagName == sectionTag || tagName == selectTag || tagName == styleTag + || tagName == summaryTag + || tagName == tableTag || isTableBodyContextTag(tagName) + || tagName == tdTag || tagName == textareaTag + || tagName == thTag || tagName == titleTag || tagName == trTag || tagName == ulTag @@ -268,17 +252,17 @@ public: void skipLeadingWhitespace() { - skipLeading<isTreeBuilderWhitepace>(); + skipLeading<isHTMLSpace>(); } String takeLeadingWhitespace() { - return takeLeading<isTreeBuilderWhitepace>(); + return takeLeading<isHTMLSpace>(); } String takeLeadingNonWhitespace() { - return takeLeading<isNotTreeBuilderWhitepace>(); + return takeLeading<isNotHTMLSpace>(); } String takeRemaining() @@ -301,7 +285,7 @@ public: Vector<UChar> whitespace; do { UChar cc = *m_current++; - if (isTreeBuilderWhitepace(cc)) + if (isHTMLSpace(cc)) whitespace.append(cc); } while (m_current < m_end); // Returning the null string when there aren't any whitespace @@ -402,7 +386,7 @@ HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext() } HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission) - : m_dummyDocumentForFragmentParsing(HTMLDocument::create(0, KURL())) + : m_dummyDocumentForFragmentParsing(HTMLDocument::create(0, KURL(), fragment->document()->baseURI())) , m_fragment(fragment) , m_contextElement(contextElement) , m_scriptingPermission(scriptingPermission) @@ -441,25 +425,6 @@ PassRefPtr<Element> HTMLTreeBuilder::takeScriptToProcess(int& scriptStartLine) return m_scriptToProcess.release(); } -HTMLTokenizer::State HTMLTreeBuilder::adjustedLexerState(HTMLTokenizer::State state, const AtomicString& tagName, Frame* frame) -{ - if (tagName == textareaTag || tagName == titleTag) - return HTMLTokenizer::RCDATAState; - - if (tagName == styleTag - || tagName == iframeTag - || tagName == xmpTag - || (tagName == noembedTag && pluginsEnabled(frame)) - || tagName == noframesTag - || (tagName == noscriptTag && scriptEnabled(frame))) - return HTMLTokenizer::RAWTEXTState; - - if (tagName == plaintextTag) - return HTMLTokenizer::PLAINTEXTState; - - return state; -} - void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken) { AtomicHTMLToken token(rawToken); @@ -1121,8 +1086,6 @@ void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken& token) parseError(token); if (m_tree.form()) return; - // FIXME: This deviates from the spec: - // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10216 m_tree.insertHTMLFormElement(token, true); m_tree.openElements()->pop(); return; @@ -1477,7 +1440,6 @@ void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token) processStartTag(token); break; case InForeignContentMode: { - // FIXME: We're missing a bunch of if branches here. if (shouldProcessUsingSecondaryInsertionMode(token, m_tree.currentElement())) { processUsingSecondaryInsertionModeAndAdjustInsertionMode(token); return; @@ -1522,8 +1484,10 @@ void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token) || token.name() == ulTag || token.name() == varTag || (token.name() == fontTag && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)))) { - m_tree.openElements()->popUntilElementWithNamespace(xhtmlNamespaceURI); - setInsertionMode(m_secondaryInsertionMode); + parseError(token); + m_tree.openElements()->popUntilForeignContentScopeMarker(); + if (insertionMode() == InForeignContentMode && m_tree.openElements()->hasOnlyHTMLElementsInScope()) + setInsertionMode(m_secondaryInsertionMode); processStartTag(token); return; } @@ -1539,7 +1503,7 @@ void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token) break; } case TextMode: - notImplemented(); + ASSERT_NOT_REACHED(); break; } } @@ -1601,21 +1565,6 @@ HTMLElementStack::ElementRecord* HTMLTreeBuilder::furthestBlockForFormattingElem return 0; } -// FIXME: This should have a whitty name. -// FIXME: This must be implemented in many other places in WebCore. -void HTMLTreeBuilder::reparentChildren(Element* oldParent, Element* newParent) -{ - Node* child = oldParent->firstChild(); - while (child) { - Node* nextChild = child->nextSibling(); - oldParent->parserRemoveChild(child); - newParent->parserAddChild(child); - if (newParent->attached() && !child->attached()) - child->attach(); - child = nextChild; - } -} - // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token) { @@ -1708,7 +1657,7 @@ void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token) // 8 RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(formattingElementRecord); // 9 - reparentChildren(furthestBlock->element(), newElement.get()); + newElement->takeAllChildrenFrom(furthestBlock->element()); // 10 Element* furthestBlockElement = furthestBlock->element(); // FIXME: All this creation / parserAddChild / attach business should @@ -1886,9 +1835,6 @@ void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken& token) m_tree.openElements()->popUntilPopped(token.name()); m_tree.activeFormattingElements()->clearToLastMarker(); setInsertionMode(InRowMode); - // FIXME: The fragment case of this ASSERT is a spec bug: - // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10338 - ASSERT(m_tree.currentElement()->hasTagName(trTag) || (isParsingFragment() && m_fragmentContext.contextElement()->hasTagName(trTag))); return; } if (token.name() == bodyTag @@ -1902,8 +1848,6 @@ void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken& token) || isTableBodyContextTag(token.name())) { if (!m_tree.openElements()->inTableScope(token.name())) { ASSERT(isParsingFragment()); - // FIXME: It is unclear what the exact ASSERT should be. - // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10098 parseError(token); return; } @@ -2020,10 +1964,6 @@ void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token) m_tree.openElements()->popUntilNumberedHeaderElementPopped(); return; } - if (token.name() == "sarcasm") { - notImplemented(); // Take a deep breath. - return; - } if (isFormattingTag(token.name())) { callTheAdoptionAgency(token); return; @@ -2608,14 +2548,15 @@ void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token) // Fall through case InBodyMode: case InCellMode: - ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode); - notImplemented(); // Emit parse error based on what elemtns are still open. + case InCaptionMode: + case InRowMode: + ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode || insertionMode() == InCaptionMode || insertionMode() == InRowMode); + notImplemented(); // Emit parse error based on what elements are still open. break; case AfterBodyMode: case AfterAfterBodyMode: ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode); - notImplemented(); - break; + return; case InHeadNoscriptMode: ASSERT(insertionMode() == InHeadNoscriptMode); defaultForInHeadNoscript(); @@ -2647,9 +2588,11 @@ void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token) return; case InForeignContentMode: parseError(token); - // FIXME: Following the spec would infinitely recurse on <svg><svg> - // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10115 - m_tree.openElements()->popUntilElementWithNamespace(xhtmlNamespaceURI); + m_tree.openElements()->popUntilForeignContentScopeMarker(); + // FIXME: The spec adds the following condition before setting the + // insertion mode. However, this condition causes an infinite loop. + // See http://www.w3.org/Bugs/Public/show_bug.cgi?id=10621 + // if (insertionMode() == InForeignContentMode && m_tree.openElements()->hasOnlyHTMLElementsInScope()) setInsertionMode(m_secondaryInsertionMode); processEndOfFile(token); return; @@ -2658,10 +2601,13 @@ void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token) processEndOfFile(token); return; case TextMode: - case InCaptionMode: - case InRowMode: - notImplemented(); - break; + parseError(token); + if (m_tree.currentElement()->hasTagName(scriptTag)) + notImplemented(); // mark the script element as "already started". + m_tree.openElements()->pop(); + setInsertionMode(m_originalInsertionMode); + processEndOfFile(token); + return; } ASSERT(m_tree.openElements()->top()); m_tree.openElements()->popAll(); @@ -2813,9 +2759,7 @@ bool HTMLTreeBuilder::scriptEnabled(Frame* frame) { if (!frame) return false; - if (ScriptController* scriptController = frame->script()) - return scriptController->canExecuteScripts(NotAboutToExecuteScript); - return false; + return frame->script()->canExecuteScripts(NotAboutToExecuteScript); } bool HTMLTreeBuilder::pluginsEnabled(Frame* frame) @@ -2825,41 +2769,4 @@ bool HTMLTreeBuilder::pluginsEnabled(Frame* frame) return frame->loader()->subframeLoader()->allowPlugins(NotAboutToInstantiatePlugin); } -// FIXME: Move this function to a more appropriate place. -String serializeForNumberType(double number) -{ - // According to HTML5, "the best representation of the number n as a floating - // point number" is a string produced by applying ToString() to n. - NumberToStringBuffer buffer; - unsigned length = numberToString(number, buffer); - return String(buffer, length); -} - -// FIXME: Move this function to a more appropriate place. -bool parseToDoubleForNumberType(const String& src, double* out) -{ - // See HTML5 2.4.4.3 `Real numbers.' - - if (src.isEmpty()) - return false; - // String::toDouble() accepts leading + \t \n \v \f \r and SPACE, which are invalid in HTML5. - // So, check the first character. - if (src[0] != '-' && (src[0] < '0' || src[0] > '9')) - return false; - - bool valid = false; - double value = src.toDouble(&valid); - if (!valid) - return false; - // NaN and Infinity are not valid numbers according to the standard. - if (!isfinite(value)) - return false; - // -0 -> 0 - if (!value) - value = 0; - if (out) - *out = value; - return true; -} - } diff --git a/WebCore/html/parser/HTMLTreeBuilder.h b/WebCore/html/parser/HTMLTreeBuilder.h index 4634f0a..d522ea8 100644 --- a/WebCore/html/parser/HTMLTreeBuilder.h +++ b/WebCore/html/parser/HTMLTreeBuilder.h @@ -76,8 +76,6 @@ public: // Done, close any open tags, etc. void finished(); - static HTMLTokenizer::State adjustedLexerState(HTMLTokenizer::State, const AtomicString& tagName, Frame*); - static bool scriptEnabled(Frame*); static bool pluginsEnabled(Frame*); @@ -170,7 +168,6 @@ private: PassRefPtr<NamedNodeMap> attributesForIsindexInput(AtomicHTMLToken&); HTMLElementStack::ElementRecord* furthestBlockForFormattingElement(Element*); - void reparentChildren(Element* oldParent, Element* newParent); void callTheAdoptionAgency(AtomicHTMLToken&); void closeTheCell(); @@ -257,18 +254,6 @@ private: int m_lastScriptElementStartLine; }; -// FIXME: Move these functions to a more appropriate place. - -// Converts the specified string to a floating number. -// If the conversion fails, the return value is false. Take care that leading -// or trailing unnecessary characters make failures. This returns false for an -// empty string input. -// The double* parameter may be 0. -bool parseToDoubleForNumberType(const String&, double*); -// Converts the specified number to a string. This is an implementation of -// HTML5's "algorithm to convert a number to a string" for NUMBER/RANGE types. -String serializeForNumberType(double); - } #endif diff --git a/WebCore/html/parser/HTMLViewSourceParser.cpp b/WebCore/html/parser/HTMLViewSourceParser.cpp index 8a7984d..ace8590 100644 --- a/WebCore/html/parser/HTMLViewSourceParser.cpp +++ b/WebCore/html/parser/HTMLViewSourceParser.cpp @@ -26,15 +26,15 @@ #include "config.h" #include "HTMLViewSourceParser.h" +#include "HTMLDocumentParser.h" #include "HTMLNames.h" -#include "HTMLTreeBuilder.h" #include "HTMLViewSourceDocument.h" namespace WebCore { HTMLViewSourceParser::HTMLViewSourceParser(HTMLViewSourceDocument* document) : DecodedDataDocumentParser(document) - , m_tokenizer(HTMLTokenizer::create()) + , m_tokenizer(HTMLTokenizer::create(HTMLDocumentParser::usePreHTML5ParserQuirks(document))) { } @@ -87,13 +87,7 @@ void HTMLViewSourceParser::updateTokenizerState() return; AtomicString tagName(m_token.name().data(), m_token.name().size()); - m_tokenizer->setState(HTMLTreeBuilder::adjustedLexerState(m_tokenizer->state(), tagName, document()->frame())); - if (tagName == HTMLNames::scriptTag) { - // The tree builder handles scriptTag separately from the other tokenizer - // state adjustments, so we need to handle it separately too. - ASSERT(m_tokenizer->state() == HTMLTokenizer::DataState); - m_tokenizer->setState(HTMLTokenizer::ScriptDataState); - } + m_tokenizer->updateStateFor(tagName, document()->frame()); } void HTMLViewSourceParser::finish() diff --git a/WebCore/html/parser/NestingLevelIncrementer.h b/WebCore/html/parser/NestingLevelIncrementer.h new file mode 100644 index 0000000..c597876 --- /dev/null +++ b/WebCore/html/parser/NestingLevelIncrementer.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2010 Google, Inc. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef NestingLevelIncrementer_h +#define NestingLevelIncrementer_h + +namespace WebCore { + +class NestingLevelIncrementer : public Noncopyable { +public: + explicit NestingLevelIncrementer(unsigned& nestingLevel) + : m_nestingLevel(&nestingLevel) + { + ++(*m_nestingLevel); + } + + ~NestingLevelIncrementer() + { + --(*m_nestingLevel); + } + +private: + unsigned* m_nestingLevel; +}; + +} + +#endif |