summaryrefslogtreecommitdiffstats
path: root/WebCore/html/parser
diff options
context:
space:
mode:
Diffstat (limited to 'WebCore/html/parser')
-rw-r--r--WebCore/html/parser/HTMLConstructionSite.cpp20
-rw-r--r--WebCore/html/parser/HTMLDocumentParser.cpp29
-rw-r--r--WebCore/html/parser/HTMLDocumentParser.h4
-rw-r--r--WebCore/html/parser/HTMLElementStack.cpp27
-rw-r--r--WebCore/html/parser/HTMLElementStack.h2
-rw-r--r--WebCore/html/parser/HTMLParserIdioms.cpp144
-rw-r--r--WebCore/html/parser/HTMLParserIdioms.h66
-rw-r--r--WebCore/html/parser/HTMLPreloadScanner.cpp12
-rw-r--r--WebCore/html/parser/HTMLScriptRunner.cpp37
-rw-r--r--WebCore/html/parser/HTMLScriptRunner.h2
-rw-r--r--WebCore/html/parser/HTMLTokenizer.cpp64
-rw-r--r--WebCore/html/parser/HTMLTokenizer.h36
-rw-r--r--WebCore/html/parser/HTMLTreeBuilder.cpp187
-rw-r--r--WebCore/html/parser/HTMLTreeBuilder.h15
-rw-r--r--WebCore/html/parser/HTMLViewSourceParser.cpp12
-rw-r--r--WebCore/html/parser/NestingLevelIncrementer.h50
16 files changed, 430 insertions, 277 deletions
diff --git a/WebCore/html/parser/HTMLConstructionSite.cpp b/WebCore/html/parser/HTMLConstructionSite.cpp
index 0172b3d..6215bba 100644
--- a/WebCore/html/parser/HTMLConstructionSite.cpp
+++ b/WebCore/html/parser/HTMLConstructionSite.cpp
@@ -114,19 +114,19 @@ PassRefPtr<ChildType> HTMLConstructionSite::attach(ContainerNode* parent, PassRe
void HTMLConstructionSite::attachAtSite(const AttachmentSite& site, PassRefPtr<Node> prpChild)
{
+ // FIXME: It's unfortunate that we need to hold a reference to child
+ // here to call attach(). We should investigate whether we can rely on
+ // |site.parent| to hold a ref at this point.
RefPtr<Node> child = prpChild;
- if (site.nextChild) {
+ if (site.nextChild)
site.parent->parserInsertBefore(child, site.nextChild);
- if (site.parent->attached() && !child->attached())
- child->attach();
- return;
- }
- site.parent->parserAddChild(child);
- // It's slightly unfortunate that we need to hold a reference to child
- // here to call attach(). We should investigate whether we can rely on
- // |site.parent| to hold a ref at this point.
- if (site.parent->attached() && !child->attached())
+ else
+ site.parent->parserAddChild(child);
+
+ // JavaScript run from beforeload (or DOM Mutation or event handlers)
+ // might have removed the child, in which case we should not attach it.
+ if (child->parentNode() && site.parent->attached() && !child->attached())
child->attach();
}
diff --git a/WebCore/html/parser/HTMLDocumentParser.cpp b/WebCore/html/parser/HTMLDocumentParser.cpp
index a442d54..2da403f 100644
--- a/WebCore/html/parser/HTMLDocumentParser.cpp
+++ b/WebCore/html/parser/HTMLDocumentParser.cpp
@@ -36,6 +36,8 @@
#include "HTMLScriptRunner.h"
#include "HTMLTreeBuilder.h"
#include "HTMLDocument.h"
+#include "NestingLevelIncrementer.h"
+#include "Settings.h"
#include "XSSAuditor.h"
#include <wtf/CurrentTime.h>
@@ -53,23 +55,6 @@ using namespace HTMLNames;
namespace {
-class NestingLevelIncrementer : public Noncopyable {
-public:
- explicit NestingLevelIncrementer(int& counter)
- : m_counter(&counter)
- {
- ++(*m_counter);
- }
-
- ~NestingLevelIncrementer()
- {
- --(*m_counter);
- }
-
-private:
- int* m_counter;
-};
-
// This is a direct transcription of step 4 from:
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
HTMLTokenizer::State tokenizerStateForContextElement(Element* contextElement, bool reportErrors)
@@ -99,7 +84,7 @@ HTMLTokenizer::State tokenizerStateForContextElement(Element* contextElement, bo
HTMLDocumentParser::HTMLDocumentParser(HTMLDocument* document, bool reportErrors)
: ScriptableDocumentParser(document)
- , m_tokenizer(HTMLTokenizer::create())
+ , m_tokenizer(HTMLTokenizer::create(usePreHTML5ParserQuirks(document)))
, m_scriptRunner(HTMLScriptRunner::create(document, this))
, m_treeBuilder(HTMLTreeBuilder::create(m_tokenizer.get(), document, reportErrors))
, m_parserScheduler(HTMLParserScheduler::create(this))
@@ -112,7 +97,7 @@ HTMLDocumentParser::HTMLDocumentParser(HTMLDocument* document, bool reportErrors
// minimize code duplication between these constructors.
HTMLDocumentParser::HTMLDocumentParser(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
: ScriptableDocumentParser(fragment->document())
- , m_tokenizer(HTMLTokenizer::create())
+ , m_tokenizer(HTMLTokenizer::create(usePreHTML5ParserQuirks(fragment->document())))
, m_treeBuilder(HTMLTreeBuilder::create(m_tokenizer.get(), fragment, contextElement, scriptingPermission))
, m_endWasDelayed(false)
, m_writeNestingLevel(0)
@@ -547,5 +532,11 @@ void HTMLDocumentParser::parseDocumentFragment(const String& source, DocumentFra
ASSERT(!parser->processingData()); // Make sure we're done. <rdar://problem/3963151>
parser->detach(); // Allows ~DocumentParser to assert it was detached before destruction.
}
+
+bool HTMLDocumentParser::usePreHTML5ParserQuirks(Document* document)
+{
+ ASSERT(document);
+ return document->settings() && document->settings()->usePreHTML5ParserQuirks();
+}
}
diff --git a/WebCore/html/parser/HTMLDocumentParser.h b/WebCore/html/parser/HTMLDocumentParser.h
index 6d5b6d7..e65a582 100644
--- a/WebCore/html/parser/HTMLDocumentParser.h
+++ b/WebCore/html/parser/HTMLDocumentParser.h
@@ -66,6 +66,8 @@ public:
void resumeParsingAfterYield();
static void parseDocumentFragment(const String&, DocumentFragment*, Element* contextElement, FragmentScriptingPermission = FragmentScriptingAllowed);
+
+ static bool usePreHTML5ParserQuirks(Document*);
protected:
virtual void insert(const SegmentedString&);
@@ -139,7 +141,7 @@ private:
OwnPtr<HTMLParserScheduler> m_parserScheduler;
bool m_endWasDelayed;
- int m_writeNestingLevel;
+ unsigned m_writeNestingLevel;
};
}
diff --git a/WebCore/html/parser/HTMLElementStack.cpp b/WebCore/html/parser/HTMLElementStack.cpp
index b6f4111..123778d 100644
--- a/WebCore/html/parser/HTMLElementStack.cpp
+++ b/WebCore/html/parser/HTMLElementStack.cpp
@@ -28,6 +28,8 @@
#include "Element.h"
#include "HTMLNames.h"
+#include "MathMLNames.h"
+#include "SVGNames.h"
#include <wtf/PassOwnPtr.h>
#if ENABLE(SVG)
@@ -92,6 +94,19 @@ inline bool isTableRowScopeMarker(Element* element)
|| element->hasTagName(htmlTag);
}
+inline bool isForeignContentScopeMarker(Element* element)
+{
+ return element->hasTagName(MathMLNames::miTag)
+ || element->hasTagName(MathMLNames::moTag)
+ || element->hasTagName(MathMLNames::mnTag)
+ || element->hasTagName(MathMLNames::msTag)
+ || element->hasTagName(MathMLNames::mtextTag)
+ || element->hasTagName(SVGNames::foreignObjectTag)
+ || element->hasTagName(SVGNames::descTag)
+ || element->hasTagName(SVGNames::titleTag)
+ || element->namespaceURI() == HTMLNames::xhtmlNamespaceURI;
+}
+
inline bool isButtonScopeMarker(Element* element)
{
return isScopeMarker(element)
@@ -186,12 +201,6 @@ void HTMLElementStack::pop()
popCommon();
}
-void HTMLElementStack::popUntilElementWithNamespace(const AtomicString& namespaceURI)
-{
- while (top()->namespaceURI() != namespaceURI)
- pop();
-}
-
void HTMLElementStack::popUntil(const AtomicString& tagName)
{
while (!top()->hasLocalName(tagName)) {
@@ -247,6 +256,12 @@ void HTMLElementStack::popUntilTableRowScopeMarker()
pop();
}
+void HTMLElementStack::popUntilForeignContentScopeMarker()
+{
+ while (!isForeignContentScopeMarker(top()))
+ pop();
+}
+
void HTMLElementStack::pushHTMLHtmlElement(PassRefPtr<Element> element)
{
ASSERT(!m_top); // <html> should always be the bottom of the stack.
diff --git a/WebCore/html/parser/HTMLElementStack.h b/WebCore/html/parser/HTMLElementStack.h
index 73cfcb1..47fa603 100644
--- a/WebCore/html/parser/HTMLElementStack.h
+++ b/WebCore/html/parser/HTMLElementStack.h
@@ -90,7 +90,6 @@ public:
void pop();
void popUntil(const AtomicString& tagName);
- void popUntilElementWithNamespace(const AtomicString& namespaceURI);
void popUntil(Element*);
void popUntilPopped(const AtomicString& tagName);
void popUntilPopped(Element*);
@@ -98,6 +97,7 @@ public:
void popUntilTableScopeMarker(); // "clear the stack back to a table context" in the spec.
void popUntilTableBodyScopeMarker(); // "clear the stack back to a table body context" in the spec.
void popUntilTableRowScopeMarker(); // "clear the stack back to a table row context" in the spec.
+ void popUntilForeignContentScopeMarker();
void popHTMLHeadElement();
void popHTMLBodyElement();
void popAll();
diff --git a/WebCore/html/parser/HTMLParserIdioms.cpp b/WebCore/html/parser/HTMLParserIdioms.cpp
new file mode 100644
index 0000000..a558cf5
--- /dev/null
+++ b/WebCore/html/parser/HTMLParserIdioms.cpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2010 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "HTMLParserIdioms.h"
+
+#include <wtf/MathExtras.h>
+#include <wtf/dtoa.h>
+#include <wtf/text/AtomicString.h>
+
+namespace WebCore {
+
+String stripLeadingAndTrailingHTMLSpaces(const String& string)
+{
+ const UChar* characters = string.characters();
+ unsigned length = string.length();
+
+ unsigned numLeadingSpaces;
+ for (numLeadingSpaces = 0; numLeadingSpaces < length; ++numLeadingSpaces) {
+ if (isNotHTMLSpace(characters[numLeadingSpaces]))
+ break;
+ }
+
+ if (numLeadingSpaces == length)
+ return emptyAtom;
+
+ unsigned numTrailingSpaces;
+ for (numTrailingSpaces = 0; numTrailingSpaces < length; ++numTrailingSpaces) {
+ if (isNotHTMLSpace(characters[length - numTrailingSpaces - 1]))
+ break;
+ }
+
+ ASSERT(numLeadingSpaces + numTrailingSpaces < length);
+
+ return string.substring(numLeadingSpaces, length - numTrailingSpaces);
+}
+
+String serializeForNumberType(double number)
+{
+ // According to HTML5, "the best representation of the number n as a floating
+ // point number" is a string produced by applying ToString() to n.
+ NumberToStringBuffer buffer;
+ unsigned length = numberToString(number, buffer);
+ return String(buffer, length);
+}
+
+bool parseToDoubleForNumberType(const String& string, double* result)
+{
+ // See HTML5 2.4.4.3 `Real numbers.'
+
+ // String::toDouble() accepts leading + and whitespace characters, which are not valid here.
+ UChar firstCharacter = string[0];
+ if (firstCharacter != '-' && !isASCIIDigit(firstCharacter))
+ return false;
+
+ bool valid = false;
+ double value = string.toDouble(&valid);
+ if (!valid)
+ return false;
+
+ // NaN and infinity are considered valid by String::toDouble, but not valid here.
+ if (!isfinite(value))
+ return false;
+
+ if (result) {
+ // The following expression converts -0 to +0.
+ *result = value ? value : 0;
+ }
+
+ return true;
+}
+
+// http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-integers
+bool parseHTMLInteger(const String& input, int& value)
+{
+ // Step 1
+ // Step 2
+ const UChar* position = input.characters();
+ const UChar* end = position + input.length();
+
+ // Step 3
+ int sign = 1;
+
+ // Step 4
+ while (position < end) {
+ if (!isHTMLSpace(*position))
+ break;
+ ++position;
+ }
+
+ // Step 5
+ if (position == end)
+ return false;
+ ASSERT(position < end);
+
+ // Step 6
+ if (*position == '-') {
+ sign = -1;
+ ++position;
+ } else if (*position == '+')
+ ++position;
+ if (position == end)
+ return false;
+ ASSERT(position < end);
+
+ // Step 7
+ if (!isASCIIDigit(*position))
+ return false;
+
+ // Step 8
+ Vector<UChar, 16> digits;
+ while (position < end) {
+ if (!isASCIIDigit(*position))
+ break;
+ digits.append(*position++);
+ }
+
+ // Step 9
+ value = sign * charactersToIntStrict(digits.data(), digits.size());
+ return true;
+}
+
+}
diff --git a/WebCore/html/parser/HTMLParserIdioms.h b/WebCore/html/parser/HTMLParserIdioms.h
new file mode 100644
index 0000000..f4704f7
--- /dev/null
+++ b/WebCore/html/parser/HTMLParserIdioms.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2010 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLParserIdioms_h
+#define HTMLParserIdioms_h
+
+#include <wtf/Forward.h>
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+// Space characters as defined by the HTML specification.
+bool isHTMLSpace(UChar);
+bool isNotHTMLSpace(UChar);
+
+// Strip leading and trailing whitespace as defined by the HTML specification.
+String stripLeadingAndTrailingHTMLSpaces(const String&);
+
+// An implementation of the HTML specification's algorithm to convert a number to a string for number and range types.
+String serializeForNumberType(double);
+
+// Convert the specified string to a double. If the conversion fails, the return value is false.
+// Leading or trailing illegal characters cause failure, as does passing an empty string.
+// The double* parameter may be 0 to check if the string can be parsed without getting the result.
+bool parseToDoubleForNumberType(const String&, double*);
+
+// http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-integers
+bool parseHTMLInteger(const String&, int&);
+
+// Inline implementations of some of the functions declared above.
+
+inline bool isHTMLSpace(UChar character)
+{
+ // FIXME: Consider branch permutations as we did in isASCIISpace.
+ return character == '\t' || character == '\x0A' || character == '\x0C' || character == '\x0D' || character == ' ';
+}
+
+inline bool isNotHTMLSpace(UChar character)
+{
+ return !isHTMLSpace(character);
+}
+
+}
+
+#endif
diff --git a/WebCore/html/parser/HTMLPreloadScanner.cpp b/WebCore/html/parser/HTMLPreloadScanner.cpp
index 5283fa3..7859dd8 100644
--- a/WebCore/html/parser/HTMLPreloadScanner.cpp
+++ b/WebCore/html/parser/HTMLPreloadScanner.cpp
@@ -31,8 +31,8 @@
#include "CSSHelper.h"
#include "CachedResourceLoader.h"
#include "Document.h"
+#include "HTMLDocumentParser.h"
#include "HTMLTokenizer.h"
-#include "HTMLTreeBuilder.h"
#include "HTMLLinkElement.h"
#include "HTMLNames.h"
@@ -121,7 +121,7 @@ private:
HTMLPreloadScanner::HTMLPreloadScanner(Document* document)
: m_document(document)
, m_cssScanner(document)
- , m_tokenizer(HTMLTokenizer::create())
+ , m_tokenizer(HTMLTokenizer::create(HTMLDocumentParser::usePreHTML5ParserQuirks(document)))
, m_bodySeen(false)
, m_inStyle(false)
{
@@ -157,13 +157,7 @@ void HTMLPreloadScanner::processToken()
return;
PreloadTask task(m_token);
- m_tokenizer->setState(HTMLTreeBuilder::adjustedLexerState(m_tokenizer->state(), task.tagName(), m_document->frame()));
- if (task.tagName() == scriptTag) {
- // The tree builder handles scriptTag separately from the other tokenizer
- // state adjustments, so we need to handle it separately too.
- ASSERT(m_tokenizer->state() == HTMLTokenizer::DataState);
- m_tokenizer->setState(HTMLTokenizer::ScriptDataState);
- }
+ m_tokenizer->updateStateFor(task.tagName(), m_document->frame());
if (task.tagName() == bodyTag)
m_bodySeen = true;
diff --git a/WebCore/html/parser/HTMLScriptRunner.cpp b/WebCore/html/parser/HTMLScriptRunner.cpp
index e1fc120..4f54f42 100644
--- a/WebCore/html/parser/HTMLScriptRunner.cpp
+++ b/WebCore/html/parser/HTMLScriptRunner.cpp
@@ -35,6 +35,7 @@
#include "HTMLScriptRunnerHost.h"
#include "HTMLInputStream.h"
#include "HTMLNames.h"
+#include "NestingLevelIncrementer.h"
#include "NotImplemented.h"
#include "ScriptElement.h"
#include "ScriptSourceCode.h"
@@ -43,24 +44,6 @@ namespace WebCore {
using namespace HTMLNames;
-// FIXME: Factor out to avoid duplication with HTMLDocumentParser.
-class NestingLevelIncrementer : public Noncopyable {
-public:
- explicit NestingLevelIncrementer(unsigned& nestingLevel)
- : m_nestingLevel(&nestingLevel)
- {
- ++(*m_nestingLevel);
- }
-
- ~NestingLevelIncrementer()
- {
- --(*m_nestingLevel);
- }
-
-private:
- unsigned* m_nestingLevel;
-};
-
HTMLScriptRunner::HTMLScriptRunner(Document* document, HTMLScriptRunnerHost* host)
: m_document(document)
, m_host(host)
@@ -155,20 +138,16 @@ void HTMLScriptRunner::executePendingScriptAndDispatchEvent(PendingScript& pendi
if (errorOccurred)
scriptElement->dispatchEvent(createScriptErrorEvent());
else {
- executeScript(scriptElement.get(), sourceCode);
+ executeScript(sourceCode);
scriptElement->dispatchEvent(createScriptLoadEvent());
}
}
ASSERT(!m_scriptNestingLevel);
}
-void HTMLScriptRunner::executeScript(Element* element, const ScriptSourceCode& sourceCode) const
+void HTMLScriptRunner::executeScript(const ScriptSourceCode& sourceCode) const
{
ASSERT(m_document);
- ScriptElement* scriptElement = toScriptElement(element);
- ASSERT(scriptElement);
- if (!scriptElement->shouldExecuteAsJavaScript())
- return;
ASSERT(isExecutingScript());
if (!m_document->frame())
return;
@@ -317,9 +296,11 @@ void HTMLScriptRunner::runScript(Element* script, int startingLineNumber)
InsertionPointRecord insertionPointRecord(m_host->inputStream());
NestingLevelIncrementer nestingLevelIncrementer(m_scriptNestingLevel);
- // Check script type and language, current code uses ScriptElement::shouldExecuteAsJavaScript(), but that may not be HTML5 compliant.
- notImplemented(); // event for support
-
+ ScriptElement* scriptElement = toScriptElement(script);
+ ASSERT(scriptElement);
+ if (!scriptElement->shouldExecuteAsJavaScript())
+ return;
+
if (script->hasAttribute(srcAttr)) {
if (script->hasAttribute(asyncAttr)) // Async takes precendence over defer.
return; // Asynchronous scripts handle themselves.
@@ -335,7 +316,7 @@ void HTMLScriptRunner::runScript(Element* script, int startingLineNumber)
// ASSERT(document()->haveStylesheetsLoaded());
ASSERT(isExecutingScript());
ScriptSourceCode sourceCode(script->textContent(), documentURLForScriptExecution(m_document), startingLineNumber);
- executeScript(script, sourceCode);
+ executeScript(sourceCode);
}
}
}
diff --git a/WebCore/html/parser/HTMLScriptRunner.h b/WebCore/html/parser/HTMLScriptRunner.h
index 47c96fd..be21dd2 100644
--- a/WebCore/html/parser/HTMLScriptRunner.h
+++ b/WebCore/html/parser/HTMLScriptRunner.h
@@ -68,7 +68,7 @@ private:
void executeParsingBlockingScript();
void executePendingScriptAndDispatchEvent(PendingScript&);
- void executeScript(Element*, const ScriptSourceCode&) const;
+ void executeScript(const ScriptSourceCode&) const;
bool haveParsingBlockingScript() const;
bool executeParsingBlockingScripts();
diff --git a/WebCore/html/parser/HTMLTokenizer.cpp b/WebCore/html/parser/HTMLTokenizer.cpp
index f5405ff..305fca2 100644
--- a/WebCore/html/parser/HTMLTokenizer.cpp
+++ b/WebCore/html/parser/HTMLTokenizer.cpp
@@ -30,6 +30,7 @@
#include "HTMLEntityParser.h"
#include "HTMLToken.h"
+#include "HTMLTreeBuilder.h"
#include "HTMLNames.h"
#include "NotImplemented.h"
#include <wtf/ASCIICType.h>
@@ -102,8 +103,9 @@ inline bool isEndTagBufferingState(HTMLTokenizer::State state)
}
-HTMLTokenizer::HTMLTokenizer()
+HTMLTokenizer::HTMLTokenizer(bool usePreHTML5ParserQuirks)
: m_inputStreamPreprocessor(this)
+ , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
{
reset();
}
@@ -171,7 +173,7 @@ inline bool HTMLTokenizer::processEntity(SegmentedString& source)
// Sometimes there's more complicated logic in the spec that separates when
// we consume the next input character and when we switch to a particular
-// state. We handle those cases by advancing the source directly and using
+// state. We handle those cases by advancing the source directly and using
// this macro to switch to the indicated state.
#define SWITCH_TO(stateName) \
do { \
@@ -277,7 +279,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
// Note that this logic is different than the generic \r\n collapsing
- // handled in the input stream preprocessor. This logic is here as an
+ // handled in the input stream preprocessor. This logic is here as an
// "authoring convenience" so folks can write:
//
// <pre>
@@ -435,6 +437,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
ADVANCE_TO(SelfClosingStartTagState);
else if (cc == '>')
return emitAndResumeIn(source, DataState);
+ else if (m_usePreHTML5ParserQuirks && cc == '<')
+ return emitAndReconsumeIn(source, DataState);
else if (isASCIIUpper(cc)) {
m_token->appendToName(toLowerCase(cc));
ADVANCE_TO(TagNameState);
@@ -876,6 +880,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
ADVANCE_TO(SelfClosingStartTagState);
else if (cc == '>')
return emitAndResumeIn(source, DataState);
+ else if (m_usePreHTML5ParserQuirks && cc == '<')
+ return emitAndReconsumeIn(source, DataState);
else if (isASCIIUpper(cc)) {
m_token->addNewAttribute();
m_token->beginAttributeName(source.numberOfCharactersConsumed());
@@ -908,6 +914,9 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
} else if (cc == '>') {
m_token->endAttributeName(source.numberOfCharactersConsumed());
return emitAndResumeIn(source, DataState);
+ } else if (m_usePreHTML5ParserQuirks && cc == '<') {
+ m_token->endAttributeName(source.numberOfCharactersConsumed());
+ return emitAndReconsumeIn(source, DataState);
} else if (isASCIIUpper(cc)) {
m_token->appendToAttributeName(toLowerCase(cc));
ADVANCE_TO(AttributeNameState);
@@ -933,6 +942,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
ADVANCE_TO(BeforeAttributeValueState);
else if (cc == '>')
return emitAndResumeIn(source, DataState);
+ else if (m_usePreHTML5ParserQuirks && cc == '<')
+ return emitAndReconsumeIn(source, DataState);
else if (isASCIIUpper(cc)) {
m_token->addNewAttribute();
m_token->beginAttributeName(source.numberOfCharactersConsumed());
@@ -1054,7 +1065,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
m_token->appendToAttributeValue(*iter);
}
// We're supposed to switch back to the attribute value state that
- // we were in when we were switched into this state. Rather than
+ // we were in when we were switched into this state. Rather than
// keeping track of this explictly, we observe that the previous
// state can be determined by m_additionalAllowedCharacter.
if (m_additionalAllowedCharacter == '"')
@@ -1075,6 +1086,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
ADVANCE_TO(SelfClosingStartTagState);
else if (cc == '>')
return emitAndResumeIn(source, DataState);
+ else if (m_usePreHTML5ParserQuirks && cc == '<')
+ return emitAndReconsumeIn(source, DataState);
else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
RECONSUME_IN(DataState);
@@ -1213,13 +1226,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
BEGIN_STATE(CommentEndState) {
if (cc == '>')
return emitAndResumeIn(source, DataState);
- else if (isTokenizerWhitespace(cc)) {
- parseError();
- m_token->appendToComment('-');
- m_token->appendToComment('-');
- m_token->appendToComment(cc);
- ADVANCE_TO(CommentEndSpaceState);
- } else if (cc == '!') {
+ else if (cc == '!') {
parseError();
ADVANCE_TO(CommentEndBangState);
} else if (cc == '-') {
@@ -1260,24 +1267,6 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
}
END_STATE()
- BEGIN_STATE(CommentEndSpaceState) {
- if (isTokenizerWhitespace(cc)) {
- m_token->appendToComment(cc);
- ADVANCE_TO(CommentEndSpaceState);
- } else if (cc == '-')
- ADVANCE_TO(CommentEndDashState);
- else if (cc == '>')
- return emitAndResumeIn(source, DataState);
- else if (cc == InputStreamPreprocessor::endOfFileMarker) {
- parseError();
- return emitAndReconsumeIn(source, DataState);
- } else {
- m_token->appendToComment(cc);
- ADVANCE_TO(CommentState);
- }
- }
- END_STATE()
-
BEGIN_STATE(DOCTYPEState) {
if (isTokenizerWhitespace(cc))
ADVANCE_TO(BeforeDOCTYPENameState);
@@ -1656,6 +1645,23 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
return false;
}
+void HTMLTokenizer::updateStateFor(const AtomicString& tagName, Frame* frame)
+{
+ if (tagName == textareaTag || tagName == titleTag)
+ setState(RCDATAState);
+ else if (tagName == plaintextTag)
+ setState(PLAINTEXTState);
+ else if (tagName == scriptTag)
+ setState(ScriptDataState);
+ else if (tagName == styleTag
+ || tagName == iframeTag
+ || tagName == xmpTag
+ || (tagName == noembedTag && HTMLTreeBuilder::pluginsEnabled(frame))
+ || tagName == noframesTag
+ || (tagName == noscriptTag && HTMLTreeBuilder::scriptEnabled(frame)))
+ setState(RAWTEXTState);
+}
+
inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString)
{
return vectorEqualsString(m_temporaryBuffer, expectedString);
diff --git a/WebCore/html/parser/HTMLTokenizer.h b/WebCore/html/parser/HTMLTokenizer.h
index bab77f3..f16b049 100644
--- a/WebCore/html/parser/HTMLTokenizer.h
+++ b/WebCore/html/parser/HTMLTokenizer.h
@@ -36,6 +36,7 @@
namespace WebCore {
class Element;
+class Frame;
class HTMLToken;
class HTMLTokenizer : public Noncopyable {
@@ -96,7 +97,6 @@ public:
CommentEndDashState,
CommentEndState,
CommentEndBangState,
- CommentEndSpaceState,
DOCTYPEState,
BeforeDOCTYPENameState,
DOCTYPENameState,
@@ -119,12 +119,12 @@ public:
CDATASectionDoubleRightSquareBracketState,
};
- static PassOwnPtr<HTMLTokenizer> create() { return adoptPtr(new HTMLTokenizer); }
+ static PassOwnPtr<HTMLTokenizer> create(bool usePreHTML5ParserQuirks) { return adoptPtr(new HTMLTokenizer(usePreHTML5ParserQuirks)); }
~HTMLTokenizer();
void reset();
- // This function returns true if it emits a token. Otherwise, callers
+ // This function returns true if it emits a token. Otherwise, callers
// must provide the same (in progress) token on the next call (unless
// they call reset() first).
bool nextToken(SegmentedString&, HTMLToken&);
@@ -135,6 +135,22 @@ public:
State state() const { return m_state; }
void setState(State state) { m_state = state; }
+ // Updates the tokenizer's state according to the given tag name. This is
+ // an approximation of how the tree builder would update the tokenizer's
+ // state. This method is useful for approximating HTML tokenization. To
+ // get exactly the correct tokenization, you need the real tree builder.
+ //
+ // The main failures in the approximation are as follows:
+ //
+ // * The first set of character tokens emitted for a <pre> element might
+ // contain an extra leading newline.
+ // * The replacement of U+0000 with U+FFFD will not be sensitive to the
+ // tree builder's insertion mode.
+ // * CDATA sections in foreign content will be tokenized as bogus comments
+ // instead of as character tokens.
+ //
+ void updateStateFor(const AtomicString& tagName, Frame*);
+
// Hack to skip leading newline in <pre>/<listing> for authoring ease.
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
void setSkipLeadingNewLineForListing(bool value) { m_skipLeadingNewLineForListing = value; }
@@ -177,8 +193,8 @@ private:
// Every branch in this function is expensive, so we have a
// fast-reject branch for characters that don't require special
- // handling. Please run the parser benchmark whenever you touch
- // this function. It's very hot.
+ // handling. Please run the parser benchmark whenever you touch
+ // this function. It's very hot.
static const UChar specialCharacterMask = '\n' | '\r' | '\0';
if (m_nextInputCharacter & ~specialCharacterMask) {
m_skipNextNewLine = false;
@@ -238,7 +254,7 @@ private:
bool m_skipNextNewLine;
};
- HTMLTokenizer();
+ HTMLTokenizer(bool usePreHTML5ParserQuirks);
inline bool processEntity(SegmentedString&);
@@ -257,7 +273,7 @@ private:
inline bool temporaryBufferIs(const String&);
// Sometimes we speculatively consume input characters and we don't
- // know whether they represent end tags or RCDATA, etc. These
+ // know whether they represent end tags or RCDATA, etc. These
// functions help manage these state.
inline void addToPossibleEndTag(UChar cc);
inline void saveEndTagNameIfNeeded();
@@ -269,7 +285,7 @@ private:
Vector<UChar, 32> m_appropriateEndTagName;
- // m_token is owned by the caller. If nextToken is not on the stack,
+ // m_token is owned by the caller. If nextToken is not on the stack,
// this member might be pointing to unallocated memory.
HTMLToken* m_token;
int m_lineNumber;
@@ -282,7 +298,7 @@ private:
Vector<UChar, 32> m_temporaryBuffer;
// We occationally want to emit both a character token and an end tag
- // token (e.g., when lexing script). We buffer the name of the end tag
+ // token (e.g., when lexing script). We buffer the name of the end tag
// token here so we remember it next time we re-enter the tokenizer.
Vector<UChar, 32> m_bufferedEndTagName;
@@ -291,6 +307,8 @@ private:
// http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream
InputStreamPreprocessor m_inputStreamPreprocessor;
+
+ bool m_usePreHTML5ParserQuirks;
};
}
diff --git a/WebCore/html/parser/HTMLTreeBuilder.cpp b/WebCore/html/parser/HTMLTreeBuilder.cpp
index 406bb6c..afac2a0 100644
--- a/WebCore/html/parser/HTMLTreeBuilder.cpp
+++ b/WebCore/html/parser/HTMLTreeBuilder.cpp
@@ -26,16 +26,17 @@
#include "config.h"
#include "HTMLTreeBuilder.h"
+#include "CharacterNames.h"
#include "Comment.h"
#include "DocumentFragment.h"
#include "DocumentType.h"
-#include "Element.h"
#include "Frame.h"
#include "HTMLDocument.h"
#include "HTMLElementFactory.h"
#include "HTMLFormElement.h"
#include "HTMLHtmlElement.h"
#include "HTMLNames.h"
+#include "HTMLParserIdioms.h"
#include "HTMLScriptElement.h"
#include "HTMLToken.h"
#include "HTMLTokenizer.h"
@@ -44,15 +45,10 @@
#include "NotImplemented.h"
#include "SVGNames.h"
#include "ScriptController.h"
-#include "Settings.h"
#include "Text.h"
#include "XLinkNames.h"
#include "XMLNSNames.h"
#include "XMLNames.h"
-// FIXME: Remove this include once we find a home for the free functions that
-// are using it.
-#include <wtf/dtoa.h>
-#include <wtf/UnusedParam.h>
namespace WebCore {
@@ -62,42 +58,19 @@ static const int uninitializedLineNumberValue = -1;
namespace {
-inline bool isTreeBuilderWhitepace(UChar c)
+inline bool isHTMLSpaceOrReplacementCharacter(UChar character)
{
- // FIXME: Consider branch permutations.
- return c == '\t' || c == '\x0A' || c == '\x0C' || c == '\x0D' || c == ' ';
-}
-
-inline bool isNotTreeBuilderWhitepace(UChar c)
-{
- return !isTreeBuilderWhitepace(c);
-}
-
-inline bool isTreeBuilderWhitepaceOrReplacementCharacter(UChar c)
-{
- return isTreeBuilderWhitepace(c) || c == 0xFFFD;
-}
-
-template<bool isSpecialCharacter(UChar c)>
-inline bool isAllSpecialCharacters(const String& string)
-{
- const UChar* characters = string.characters();
- const unsigned length = string.length();
- for (unsigned i = 0; i < length; ++i) {
- if (!isSpecialCharacter(characters[i]))
- return false;
- }
- return true;
+ return isHTMLSpace(character) || character == replacementCharacter;
}
inline bool isAllWhitespace(const String& string)
{
- return isAllSpecialCharacters<isTreeBuilderWhitepace>(string);
+ return string.isAllSpecialCharacters<isHTMLSpace>();
}
inline bool isAllWhitespaceOrReplacementCharacters(const String& string)
{
- return isAllSpecialCharacters<isTreeBuilderWhitepaceOrReplacementCharacter>(string);
+ return string.isAllSpecialCharacters<isHTMLSpaceOrReplacementCharacter>();
}
bool isNumberedHeaderTag(const AtomicString& tagName)
@@ -132,11 +105,14 @@ bool isTableBodyContextTag(const AtomicString& tagName)
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special
bool isSpecialNode(Node* node)
{
+ if (node->hasTagName(SVGNames::foreignObjectTag))
+ return true;
if (node->namespaceURI() != xhtmlNamespaceURI)
return false;
- // FIXME: This list is out of sync with the spec.
const AtomicString& tagName = node->localName();
return tagName == addressTag
+ || tagName == appletTag
+ || tagName == areaTag
|| tagName == articleTag
|| tagName == asideTag
|| tagName == baseTag
@@ -146,6 +122,7 @@ bool isSpecialNode(Node* node)
|| tagName == bodyTag
|| tagName == brTag
|| tagName == buttonTag
+ || tagName == captionTag
|| tagName == centerTag
|| tagName == colTag
|| tagName == colgroupTag
@@ -158,6 +135,7 @@ bool isSpecialNode(Node* node)
|| tagName == dtTag
|| tagName == embedTag
|| tagName == fieldsetTag
+ || tagName == figcaptionTag
|| tagName == figureTag
|| tagName == footerTag
|| tagName == formTag
@@ -176,12 +154,14 @@ bool isSpecialNode(Node* node)
|| tagName == liTag
|| tagName == linkTag
|| tagName == listingTag
+ || tagName == marqueeTag
|| tagName == menuTag
|| tagName == metaTag
|| tagName == navTag
|| tagName == noembedTag
|| tagName == noframesTag
|| tagName == noscriptTag
+ || tagName == objectTag
|| tagName == olTag
|| tagName == pTag
|| tagName == paramTag
@@ -191,8 +171,12 @@ bool isSpecialNode(Node* node)
|| tagName == sectionTag
|| tagName == selectTag
|| tagName == styleTag
+ || tagName == summaryTag
+ || tagName == tableTag
|| isTableBodyContextTag(tagName)
+ || tagName == tdTag
|| tagName == textareaTag
+ || tagName == thTag
|| tagName == titleTag
|| tagName == trTag
|| tagName == ulTag
@@ -268,17 +252,17 @@ public:
void skipLeadingWhitespace()
{
- skipLeading<isTreeBuilderWhitepace>();
+ skipLeading<isHTMLSpace>();
}
String takeLeadingWhitespace()
{
- return takeLeading<isTreeBuilderWhitepace>();
+ return takeLeading<isHTMLSpace>();
}
String takeLeadingNonWhitespace()
{
- return takeLeading<isNotTreeBuilderWhitepace>();
+ return takeLeading<isNotHTMLSpace>();
}
String takeRemaining()
@@ -301,7 +285,7 @@ public:
Vector<UChar> whitespace;
do {
UChar cc = *m_current++;
- if (isTreeBuilderWhitepace(cc))
+ if (isHTMLSpace(cc))
whitespace.append(cc);
} while (m_current < m_end);
// Returning the null string when there aren't any whitespace
@@ -402,7 +386,7 @@ HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
}
HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
- : m_dummyDocumentForFragmentParsing(HTMLDocument::create(0, KURL()))
+ : m_dummyDocumentForFragmentParsing(HTMLDocument::create(0, KURL(), fragment->document()->baseURI()))
, m_fragment(fragment)
, m_contextElement(contextElement)
, m_scriptingPermission(scriptingPermission)
@@ -441,25 +425,6 @@ PassRefPtr<Element> HTMLTreeBuilder::takeScriptToProcess(int& scriptStartLine)
return m_scriptToProcess.release();
}
-HTMLTokenizer::State HTMLTreeBuilder::adjustedLexerState(HTMLTokenizer::State state, const AtomicString& tagName, Frame* frame)
-{
- if (tagName == textareaTag || tagName == titleTag)
- return HTMLTokenizer::RCDATAState;
-
- if (tagName == styleTag
- || tagName == iframeTag
- || tagName == xmpTag
- || (tagName == noembedTag && pluginsEnabled(frame))
- || tagName == noframesTag
- || (tagName == noscriptTag && scriptEnabled(frame)))
- return HTMLTokenizer::RAWTEXTState;
-
- if (tagName == plaintextTag)
- return HTMLTokenizer::PLAINTEXTState;
-
- return state;
-}
-
void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken)
{
AtomicHTMLToken token(rawToken);
@@ -1121,8 +1086,6 @@ void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken& token)
parseError(token);
if (m_tree.form())
return;
- // FIXME: This deviates from the spec:
- // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10216
m_tree.insertHTMLFormElement(token, true);
m_tree.openElements()->pop();
return;
@@ -1477,7 +1440,6 @@ void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
processStartTag(token);
break;
case InForeignContentMode: {
- // FIXME: We're missing a bunch of if branches here.
if (shouldProcessUsingSecondaryInsertionMode(token, m_tree.currentElement())) {
processUsingSecondaryInsertionModeAndAdjustInsertionMode(token);
return;
@@ -1522,8 +1484,10 @@ void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
|| token.name() == ulTag
|| token.name() == varTag
|| (token.name() == fontTag && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)))) {
- m_tree.openElements()->popUntilElementWithNamespace(xhtmlNamespaceURI);
- setInsertionMode(m_secondaryInsertionMode);
+ parseError(token);
+ m_tree.openElements()->popUntilForeignContentScopeMarker();
+ if (insertionMode() == InForeignContentMode && m_tree.openElements()->hasOnlyHTMLElementsInScope())
+ setInsertionMode(m_secondaryInsertionMode);
processStartTag(token);
return;
}
@@ -1539,7 +1503,7 @@ void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
break;
}
case TextMode:
- notImplemented();
+ ASSERT_NOT_REACHED();
break;
}
}
@@ -1601,21 +1565,6 @@ HTMLElementStack::ElementRecord* HTMLTreeBuilder::furthestBlockForFormattingElem
return 0;
}
-// FIXME: This should have a whitty name.
-// FIXME: This must be implemented in many other places in WebCore.
-void HTMLTreeBuilder::reparentChildren(Element* oldParent, Element* newParent)
-{
- Node* child = oldParent->firstChild();
- while (child) {
- Node* nextChild = child->nextSibling();
- oldParent->parserRemoveChild(child);
- newParent->parserAddChild(child);
- if (newParent->attached() && !child->attached())
- child->attach();
- child = nextChild;
- }
-}
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
{
@@ -1708,7 +1657,7 @@ void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
// 8
RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(formattingElementRecord);
// 9
- reparentChildren(furthestBlock->element(), newElement.get());
+ newElement->takeAllChildrenFrom(furthestBlock->element());
// 10
Element* furthestBlockElement = furthestBlock->element();
// FIXME: All this creation / parserAddChild / attach business should
@@ -1886,9 +1835,6 @@ void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken& token)
m_tree.openElements()->popUntilPopped(token.name());
m_tree.activeFormattingElements()->clearToLastMarker();
setInsertionMode(InRowMode);
- // FIXME: The fragment case of this ASSERT is a spec bug:
- // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10338
- ASSERT(m_tree.currentElement()->hasTagName(trTag) || (isParsingFragment() && m_fragmentContext.contextElement()->hasTagName(trTag)));
return;
}
if (token.name() == bodyTag
@@ -1902,8 +1848,6 @@ void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken& token)
|| isTableBodyContextTag(token.name())) {
if (!m_tree.openElements()->inTableScope(token.name())) {
ASSERT(isParsingFragment());
- // FIXME: It is unclear what the exact ASSERT should be.
- // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10098
parseError(token);
return;
}
@@ -2020,10 +1964,6 @@ void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token)
m_tree.openElements()->popUntilNumberedHeaderElementPopped();
return;
}
- if (token.name() == "sarcasm") {
- notImplemented(); // Take a deep breath.
- return;
- }
if (isFormattingTag(token.name())) {
callTheAdoptionAgency(token);
return;
@@ -2608,14 +2548,15 @@ void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
// Fall through
case InBodyMode:
case InCellMode:
- ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode);
- notImplemented(); // Emit parse error based on what elemtns are still open.
+ case InCaptionMode:
+ case InRowMode:
+ ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode || insertionMode() == InCaptionMode || insertionMode() == InRowMode);
+ notImplemented(); // Emit parse error based on what elements are still open.
break;
case AfterBodyMode:
case AfterAfterBodyMode:
ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
- notImplemented();
- break;
+ return;
case InHeadNoscriptMode:
ASSERT(insertionMode() == InHeadNoscriptMode);
defaultForInHeadNoscript();
@@ -2647,9 +2588,11 @@ void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
return;
case InForeignContentMode:
parseError(token);
- // FIXME: Following the spec would infinitely recurse on <svg><svg>
- // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10115
- m_tree.openElements()->popUntilElementWithNamespace(xhtmlNamespaceURI);
+ m_tree.openElements()->popUntilForeignContentScopeMarker();
+ // FIXME: The spec adds the following condition before setting the
+ // insertion mode. However, this condition causes an infinite loop.
+ // See http://www.w3.org/Bugs/Public/show_bug.cgi?id=10621
+ // if (insertionMode() == InForeignContentMode && m_tree.openElements()->hasOnlyHTMLElementsInScope())
setInsertionMode(m_secondaryInsertionMode);
processEndOfFile(token);
return;
@@ -2658,10 +2601,13 @@ void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
processEndOfFile(token);
return;
case TextMode:
- case InCaptionMode:
- case InRowMode:
- notImplemented();
- break;
+ parseError(token);
+ if (m_tree.currentElement()->hasTagName(scriptTag))
+ notImplemented(); // mark the script element as "already started".
+ m_tree.openElements()->pop();
+ setInsertionMode(m_originalInsertionMode);
+ processEndOfFile(token);
+ return;
}
ASSERT(m_tree.openElements()->top());
m_tree.openElements()->popAll();
@@ -2813,9 +2759,7 @@ bool HTMLTreeBuilder::scriptEnabled(Frame* frame)
{
if (!frame)
return false;
- if (ScriptController* scriptController = frame->script())
- return scriptController->canExecuteScripts(NotAboutToExecuteScript);
- return false;
+ return frame->script()->canExecuteScripts(NotAboutToExecuteScript);
}
bool HTMLTreeBuilder::pluginsEnabled(Frame* frame)
@@ -2825,41 +2769,4 @@ bool HTMLTreeBuilder::pluginsEnabled(Frame* frame)
return frame->loader()->subframeLoader()->allowPlugins(NotAboutToInstantiatePlugin);
}
-// FIXME: Move this function to a more appropriate place.
-String serializeForNumberType(double number)
-{
- // According to HTML5, "the best representation of the number n as a floating
- // point number" is a string produced by applying ToString() to n.
- NumberToStringBuffer buffer;
- unsigned length = numberToString(number, buffer);
- return String(buffer, length);
-}
-
-// FIXME: Move this function to a more appropriate place.
-bool parseToDoubleForNumberType(const String& src, double* out)
-{
- // See HTML5 2.4.4.3 `Real numbers.'
-
- if (src.isEmpty())
- return false;
- // String::toDouble() accepts leading + \t \n \v \f \r and SPACE, which are invalid in HTML5.
- // So, check the first character.
- if (src[0] != '-' && (src[0] < '0' || src[0] > '9'))
- return false;
-
- bool valid = false;
- double value = src.toDouble(&valid);
- if (!valid)
- return false;
- // NaN and Infinity are not valid numbers according to the standard.
- if (!isfinite(value))
- return false;
- // -0 -> 0
- if (!value)
- value = 0;
- if (out)
- *out = value;
- return true;
-}
-
}
diff --git a/WebCore/html/parser/HTMLTreeBuilder.h b/WebCore/html/parser/HTMLTreeBuilder.h
index 4634f0a..d522ea8 100644
--- a/WebCore/html/parser/HTMLTreeBuilder.h
+++ b/WebCore/html/parser/HTMLTreeBuilder.h
@@ -76,8 +76,6 @@ public:
// Done, close any open tags, etc.
void finished();
- static HTMLTokenizer::State adjustedLexerState(HTMLTokenizer::State, const AtomicString& tagName, Frame*);
-
static bool scriptEnabled(Frame*);
static bool pluginsEnabled(Frame*);
@@ -170,7 +168,6 @@ private:
PassRefPtr<NamedNodeMap> attributesForIsindexInput(AtomicHTMLToken&);
HTMLElementStack::ElementRecord* furthestBlockForFormattingElement(Element*);
- void reparentChildren(Element* oldParent, Element* newParent);
void callTheAdoptionAgency(AtomicHTMLToken&);
void closeTheCell();
@@ -257,18 +254,6 @@ private:
int m_lastScriptElementStartLine;
};
-// FIXME: Move these functions to a more appropriate place.
-
-// Converts the specified string to a floating number.
-// If the conversion fails, the return value is false. Take care that leading
-// or trailing unnecessary characters make failures. This returns false for an
-// empty string input.
-// The double* parameter may be 0.
-bool parseToDoubleForNumberType(const String&, double*);
-// Converts the specified number to a string. This is an implementation of
-// HTML5's "algorithm to convert a number to a string" for NUMBER/RANGE types.
-String serializeForNumberType(double);
-
}
#endif
diff --git a/WebCore/html/parser/HTMLViewSourceParser.cpp b/WebCore/html/parser/HTMLViewSourceParser.cpp
index 8a7984d..ace8590 100644
--- a/WebCore/html/parser/HTMLViewSourceParser.cpp
+++ b/WebCore/html/parser/HTMLViewSourceParser.cpp
@@ -26,15 +26,15 @@
#include "config.h"
#include "HTMLViewSourceParser.h"
+#include "HTMLDocumentParser.h"
#include "HTMLNames.h"
-#include "HTMLTreeBuilder.h"
#include "HTMLViewSourceDocument.h"
namespace WebCore {
HTMLViewSourceParser::HTMLViewSourceParser(HTMLViewSourceDocument* document)
: DecodedDataDocumentParser(document)
- , m_tokenizer(HTMLTokenizer::create())
+ , m_tokenizer(HTMLTokenizer::create(HTMLDocumentParser::usePreHTML5ParserQuirks(document)))
{
}
@@ -87,13 +87,7 @@ void HTMLViewSourceParser::updateTokenizerState()
return;
AtomicString tagName(m_token.name().data(), m_token.name().size());
- m_tokenizer->setState(HTMLTreeBuilder::adjustedLexerState(m_tokenizer->state(), tagName, document()->frame()));
- if (tagName == HTMLNames::scriptTag) {
- // The tree builder handles scriptTag separately from the other tokenizer
- // state adjustments, so we need to handle it separately too.
- ASSERT(m_tokenizer->state() == HTMLTokenizer::DataState);
- m_tokenizer->setState(HTMLTokenizer::ScriptDataState);
- }
+ m_tokenizer->updateStateFor(tagName, document()->frame());
}
void HTMLViewSourceParser::finish()
diff --git a/WebCore/html/parser/NestingLevelIncrementer.h b/WebCore/html/parser/NestingLevelIncrementer.h
new file mode 100644
index 0000000..c597876
--- /dev/null
+++ b/WebCore/html/parser/NestingLevelIncrementer.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef NestingLevelIncrementer_h
+#define NestingLevelIncrementer_h
+
+namespace WebCore {
+
+class NestingLevelIncrementer : public Noncopyable {
+public:
+ explicit NestingLevelIncrementer(unsigned& nestingLevel)
+ : m_nestingLevel(&nestingLevel)
+ {
+ ++(*m_nestingLevel);
+ }
+
+ ~NestingLevelIncrementer()
+ {
+ --(*m_nestingLevel);
+ }
+
+private:
+ unsigned* m_nestingLevel;
+};
+
+}
+
+#endif