summaryrefslogtreecommitdiffstats
path: root/Source/WebCore/html/parser
diff options
context:
space:
mode:
Diffstat (limited to 'Source/WebCore/html/parser')
-rw-r--r--Source/WebCore/html/parser/HTMLConstructionSite.cpp30
-rw-r--r--Source/WebCore/html/parser/HTMLConstructionSite.h9
-rw-r--r--Source/WebCore/html/parser/HTMLDocumentParser.cpp24
-rw-r--r--Source/WebCore/html/parser/HTMLDocumentParser.h8
-rw-r--r--Source/WebCore/html/parser/HTMLInputStream.h12
-rw-r--r--Source/WebCore/html/parser/HTMLParserIdioms.cpp3
-rw-r--r--Source/WebCore/html/parser/HTMLScriptRunner.cpp1
-rw-r--r--Source/WebCore/html/parser/HTMLSourceTracker.cpp69
-rw-r--r--Source/WebCore/html/parser/HTMLSourceTracker.h55
-rw-r--r--Source/WebCore/html/parser/HTMLToken.h55
-rw-r--r--Source/WebCore/html/parser/HTMLTreeBuilder.cpp49
-rw-r--r--Source/WebCore/html/parser/HTMLTreeBuilder.h5
-rw-r--r--Source/WebCore/html/parser/HTMLViewSourceParser.cpp24
-rw-r--r--Source/WebCore/html/parser/HTMLViewSourceParser.h3
-rw-r--r--Source/WebCore/html/parser/XSSFilter.cpp450
-rw-r--r--Source/WebCore/html/parser/XSSFilter.h87
16 files changed, 805 insertions, 79 deletions
diff --git a/Source/WebCore/html/parser/HTMLConstructionSite.cpp b/Source/WebCore/html/parser/HTMLConstructionSite.cpp
index c46b9b9..a026ef9 100644
--- a/Source/WebCore/html/parser/HTMLConstructionSite.cpp
+++ b/Source/WebCore/html/parser/HTMLConstructionSite.cpp
@@ -130,10 +130,20 @@ void HTMLConstructionSite::attachAtSite(const AttachmentSite& site, PassRefPtr<N
child->attach();
}
-HTMLConstructionSite::HTMLConstructionSite(Document* document, FragmentScriptingPermission scriptingPermission, bool isParsingFragment)
+HTMLConstructionSite::HTMLConstructionSite(Document* document)
: m_document(document)
+ , m_attachmentRoot(document)
+ , m_fragmentScriptingPermission(FragmentScriptingAllowed)
+ , m_isParsingFragment(false)
+ , m_redirectAttachToFosterParent(false)
+{
+}
+
+HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, FragmentScriptingPermission scriptingPermission)
+ : m_document(fragment->document())
+ , m_attachmentRoot(fragment)
, m_fragmentScriptingPermission(scriptingPermission)
- , m_isParsingFragment(isParsingFragment)
+ , m_isParsingFragment(true)
, m_redirectAttachToFosterParent(false)
{
}
@@ -145,6 +155,7 @@ HTMLConstructionSite::~HTMLConstructionSite()
void HTMLConstructionSite::detach()
{
m_document = 0;
+ m_attachmentRoot = 0;
}
void HTMLConstructionSite::setForm(HTMLFormElement* form)
@@ -170,7 +181,7 @@ void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken& tok
{
RefPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(m_document);
element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
- m_openElements.pushHTMLHtmlElement(attach<Element>(m_document, element.get()));
+ m_openElements.pushHTMLHtmlElement(attach<Element>(m_attachmentRoot, element.get()));
#if ENABLE(OFFLINE_WEB_APPLICATIONS)
element->insertedByParser();
#endif
@@ -205,7 +216,16 @@ void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken& token)
void HTMLConstructionSite::insertDoctype(AtomicHTMLToken& token)
{
ASSERT(token.type() == HTMLToken::DOCTYPE);
- attach(m_document, DocumentType::create(m_document, token.name(), String::adopt(token.publicIdentifier()), String::adopt(token.systemIdentifier())));
+ attach(m_attachmentRoot, DocumentType::create(m_document, token.name(), String::adopt(token.publicIdentifier()), String::adopt(token.systemIdentifier())));
+
+ // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which
+ // never occurs. However, if we ever chose to support such, this code is subtly wrong,
+ // because context-less fragments can determine their own quirks mode, and thus change
+ // parsing rules (like <p> inside <table>). For now we ASSERT that we never hit this code
+ // in a fragment, as changing the owning document's compatibility mode would be wrong.
+ ASSERT(!m_isParsingFragment);
+ if (m_isParsingFragment)
+ return;
if (token.forceQuirks())
m_document->setCompatibilityMode(Document::QuirksMode);
@@ -222,7 +242,7 @@ void HTMLConstructionSite::insertComment(AtomicHTMLToken& token)
void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken& token)
{
ASSERT(token.type() == HTMLToken::Comment);
- attach(m_document, Comment::create(m_document, token.comment()));
+ attach(m_attachmentRoot, Comment::create(m_document, token.comment()));
}
void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken& token)
diff --git a/Source/WebCore/html/parser/HTMLConstructionSite.h b/Source/WebCore/html/parser/HTMLConstructionSite.h
index 5a4a65d..0298503 100644
--- a/Source/WebCore/html/parser/HTMLConstructionSite.h
+++ b/Source/WebCore/html/parser/HTMLConstructionSite.h
@@ -43,7 +43,8 @@ class Element;
class HTMLConstructionSite {
WTF_MAKE_NONCOPYABLE(HTMLConstructionSite);
public:
- HTMLConstructionSite(Document*, FragmentScriptingPermission, bool isParsingFragment);
+ HTMLConstructionSite(Document*);
+ HTMLConstructionSite(DocumentFragment*, FragmentScriptingPermission);
~HTMLConstructionSite();
void detach();
@@ -130,6 +131,12 @@ private:
void dispatchDocumentElementAvailableIfNeeded();
Document* m_document;
+
+ // This is the root ContainerNode to which the parser attaches all newly
+ // constructed nodes. It points to a DocumentFragment when parsing fragments
+ // and a Document in all other cases.
+ ContainerNode* m_attachmentRoot;
+
RefPtr<Element> m_head;
RefPtr<HTMLFormElement> m_form;
mutable HTMLElementStack m_openElements;
diff --git a/Source/WebCore/html/parser/HTMLDocumentParser.cpp b/Source/WebCore/html/parser/HTMLDocumentParser.cpp
index 93e1309..2fe9486 100644
--- a/Source/WebCore/html/parser/HTMLDocumentParser.cpp
+++ b/Source/WebCore/html/parser/HTMLDocumentParser.cpp
@@ -39,7 +39,6 @@
#include "InspectorInstrumentation.h"
#include "NestingLevelIncrementer.h"
#include "Settings.h"
-#include "XSSAuditor.h"
#include <wtf/CurrentTime.h>
#ifdef ANDROID_INSTRUMENT
@@ -85,6 +84,7 @@ HTMLDocumentParser::HTMLDocumentParser(HTMLDocument* document, bool reportErrors
, m_scriptRunner(HTMLScriptRunner::create(document, this))
, m_treeBuilder(HTMLTreeBuilder::create(this, document, reportErrors, usePreHTML5ParserQuirks(document)))
, m_parserScheduler(HTMLParserScheduler::create(this))
+ , m_xssFilter(this)
, m_endWasDelayed(false)
, m_writeNestingLevel(0)
{
@@ -96,6 +96,7 @@ HTMLDocumentParser::HTMLDocumentParser(DocumentFragment* fragment, Element* cont
: ScriptableDocumentParser(fragment->document())
, m_tokenizer(HTMLTokenizer::create(usePreHTML5ParserQuirks(fragment->document())))
, m_treeBuilder(HTMLTreeBuilder::create(this, fragment, contextElement, scriptingPermission, usePreHTML5ParserQuirks(fragment->document())))
+ , m_xssFilter(this)
, m_endWasDelayed(false)
, m_writeNestingLevel(0)
{
@@ -230,8 +231,13 @@ void HTMLDocumentParser::pumpTokenizer(SynchronousMode mode)
if (!m_treeBuilder->isParsingFragment()
&& document()->frame() && document()->frame()->navigationScheduler()->locationChangePending())
break;
+
+ m_sourceTracker.start(m_input, m_token);
if (!m_tokenizer->nextToken(m_input.current(), m_token))
break;
+ m_sourceTracker.end(m_input, m_token);
+
+ m_xssFilter.filterToken(m_token);
m_treeBuilder->constructTreeFromToken(m_token);
m_token.clear();
@@ -274,7 +280,12 @@ void HTMLDocumentParser::pumpTokenizer(SynchronousMode mode)
bool HTMLDocumentParser::hasInsertionPoint()
{
- return m_input.hasInsertionPoint();
+ // FIXME: The wasCreatedByScript() branch here might not be fully correct.
+ // Our model of the EOF character differs slightly from the one in
+ // the spec because our treatment is uniform between network-sourced
+ // and script-sourced input streams whereas the spec treats them
+ // differently.
+ return m_input.hasInsertionPoint() || (wasCreatedByScript() && !m_input.haveSeenEndOfFile());
}
void HTMLDocumentParser::insert(const SegmentedString& source)
@@ -414,6 +425,11 @@ bool HTMLDocumentParser::inScriptExecution() const
return m_scriptRunner->isExecutingScript();
}
+String HTMLDocumentParser::sourceForToken(const HTMLToken& token)
+{
+ return m_sourceTracker.sourceForToken(token);
+}
+
int HTMLDocumentParser::lineNumber() const
{
return m_tokenizer->lineNumber();
@@ -460,9 +476,7 @@ void HTMLDocumentParser::stopWatchingForLoad(CachedResource* cachedScript)
bool HTMLDocumentParser::shouldLoadExternalScriptFromSrc(const AtomicString& srcValue)
{
- if (!xssAuditor())
- return true;
- return xssAuditor()->canLoadExternalScriptFromSrc(srcValue);
+ return document()->contentSecurityPolicy()->canLoadExternalScriptFromSrc(srcValue);
}
void HTMLDocumentParser::notifyFinished(CachedResource* cachedResource)
diff --git a/Source/WebCore/html/parser/HTMLDocumentParser.h b/Source/WebCore/html/parser/HTMLDocumentParser.h
index f925269..be2ca1b 100644
--- a/Source/WebCore/html/parser/HTMLDocumentParser.h
+++ b/Source/WebCore/html/parser/HTMLDocumentParser.h
@@ -30,10 +30,12 @@
#include "FragmentScriptingPermission.h"
#include "HTMLInputStream.h"
#include "HTMLScriptRunnerHost.h"
+#include "HTMLSourceTracker.h"
#include "HTMLToken.h"
#include "ScriptableDocumentParser.h"
#include "SegmentedString.h"
#include "Timer.h"
+#include "XSSFilter.h"
#include <wtf/OwnPtr.h>
namespace WebCore {
@@ -71,8 +73,11 @@ public:
static bool usePreHTML5ParserQuirks(Document*);
HTMLTokenizer* tokenizer() const { return m_tokenizer.get(); }
+ String sourceForToken(const HTMLToken&);
virtual TextPosition0 textPosition() const;
+ virtual int lineNumber() const;
+
virtual void suspendScheduledTasks();
virtual void resumeScheduledTasks();
@@ -97,7 +102,6 @@ private:
virtual bool isWaitingForScripts() const;
virtual bool isExecutingScript() const;
virtual void executeScriptsWaitingForStylesheets();
- virtual int lineNumber() const;
// HTMLScriptRunnerHost
virtual void watchForLoad(CachedResource*);
@@ -141,6 +145,8 @@ private:
OwnPtr<HTMLTreeBuilder> m_treeBuilder;
OwnPtr<HTMLPreloadScanner> m_preloadScanner;
OwnPtr<HTMLParserScheduler> m_parserScheduler;
+ HTMLSourceTracker m_sourceTracker;
+ XSSFilter m_xssFilter;
bool m_endWasDelayed;
unsigned m_writeNestingLevel;
diff --git a/Source/WebCore/html/parser/HTMLInputStream.h b/Source/WebCore/html/parser/HTMLInputStream.h
index d95ec31..512ae88 100644
--- a/Source/WebCore/html/parser/HTMLInputStream.h
+++ b/Source/WebCore/html/parser/HTMLInputStream.h
@@ -67,17 +67,7 @@ public:
bool hasInsertionPoint() const
{
- if (&m_first != m_last)
- return true;
- if (!haveSeenEndOfFile()) {
- // FIXME: Somehow we need to understand the difference between
- // input streams that are coming off the network and streams that
- // were created with document.open(). In the later case, we always
- // have an isertion point at the end of the stream until someone
- // calls document.close().
- return true;
- }
- return false;
+ return &m_first != m_last;
}
void markEndOfFile()
diff --git a/Source/WebCore/html/parser/HTMLParserIdioms.cpp b/Source/WebCore/html/parser/HTMLParserIdioms.cpp
index 91ff8d3..2be6af9 100644
--- a/Source/WebCore/html/parser/HTMLParserIdioms.cpp
+++ b/Source/WebCore/html/parser/HTMLParserIdioms.cpp
@@ -25,6 +25,7 @@
#include "config.h"
#include "HTMLParserIdioms.h"
+#include <limits>
#include <wtf/MathExtras.h>
#include <wtf/dtoa.h>
#include <wtf/text/AtomicString.h>
@@ -85,7 +86,7 @@ bool parseToDoubleForNumberType(const String& string, double* result)
// Numbers are considered finite IEEE 754 single-precision floating point values.
// See HTML5 2.4.4.3 `Real numbers.'
- if (-FLT_MAX > value || value > FLT_MAX)
+ if (-std::numeric_limits<float>::max() > value || value > std::numeric_limits<float>::max())
return false;
if (result) {
diff --git a/Source/WebCore/html/parser/HTMLScriptRunner.cpp b/Source/WebCore/html/parser/HTMLScriptRunner.cpp
index 2fe1d30..c99858d 100644
--- a/Source/WebCore/html/parser/HTMLScriptRunner.cpp
+++ b/Source/WebCore/html/parser/HTMLScriptRunner.cpp
@@ -264,6 +264,7 @@ bool HTMLScriptRunner::requestPendingScript(PendingScript& pendingScript, Elemen
ASSERT(!pendingScript.element());
const AtomicString& srcValue = script->getAttribute(srcAttr);
// Allow the host to disllow script loads (using the XSSAuditor, etc.)
+ // FIXME: this check should be performed on the final URL in a redirect chain.
if (!m_host->shouldLoadExternalScriptFromSrc(srcValue))
return false;
// FIXME: We need to resolve the url relative to the element.
diff --git a/Source/WebCore/html/parser/HTMLSourceTracker.cpp b/Source/WebCore/html/parser/HTMLSourceTracker.cpp
new file mode 100644
index 0000000..cf43105
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLSourceTracker.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2010 Adam Barth. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "HTMLSourceTracker.h"
+
+namespace WebCore {
+
+HTMLSourceTracker::HTMLSourceTracker()
+{
+}
+
+void HTMLSourceTracker::start(const HTMLInputStream& input, HTMLToken& token)
+{
+ m_sourceFromPreviousSegments = token.type() == HTMLToken::Uninitialized ? String() : m_sourceFromPreviousSegments + m_source.toString();
+ m_source = input.current();
+ token.setBaseOffset(input.current().numberOfCharactersConsumed() - m_sourceFromPreviousSegments.length());
+}
+
+void HTMLSourceTracker::end(const HTMLInputStream& input, HTMLToken& token)
+{
+ m_cachedSourceForToken = String();
+ // FIXME: This work should really be done by the HTMLTokenizer.
+ token.end(input.current().numberOfCharactersConsumed());
+}
+
+String HTMLSourceTracker::sourceForToken(const HTMLToken& token)
+{
+ if (token.type() == HTMLToken::EndOfFile)
+ return String(); // Hides the null character we use to mark the end of file.
+
+ if (!m_cachedSourceForToken.isEmpty())
+ return m_cachedSourceForToken;
+
+ ASSERT(!token.startIndex());
+ UChar* data = 0;
+ int length = token.endIndex() - token.startIndex() - m_sourceFromPreviousSegments.length();
+ String source = String::createUninitialized(length, data);
+ for (int i = 0; i < length; ++i) {
+ data[i] = *m_source;
+ m_source.advance();
+ }
+ m_cachedSourceForToken = m_sourceFromPreviousSegments + source;
+ return m_cachedSourceForToken;
+}
+
+}
diff --git a/Source/WebCore/html/parser/HTMLSourceTracker.h b/Source/WebCore/html/parser/HTMLSourceTracker.h
new file mode 100644
index 0000000..17ae191
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLSourceTracker.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2010 Adam Barth. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLSourceTracker_h
+#define HTMLSourceTracker_h
+
+#include "HTMLInputStream.h"
+#include "HTMLToken.h"
+
+namespace WebCore {
+
+class HTMLSourceTracker {
+ WTF_MAKE_NONCOPYABLE(HTMLSourceTracker);
+public:
+ HTMLSourceTracker();
+
+ // FIXME: Once we move "end" into HTMLTokenizer, rename "start" to
+ // something that makes it obvious that this method can be called multiple
+ // times.
+ void start(const HTMLInputStream&, HTMLToken&);
+ void end(const HTMLInputStream&, HTMLToken&);
+
+ String sourceForToken(const HTMLToken&);
+
+private:
+ String m_sourceFromPreviousSegments;
+ SegmentedString m_source;
+ String m_cachedSourceForToken;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLToken.h b/Source/WebCore/html/parser/HTMLToken.h
index 1cbc151..aa16ab2 100644
--- a/Source/WebCore/html/parser/HTMLToken.h
+++ b/Source/WebCore/html/parser/HTMLToken.h
@@ -64,20 +64,26 @@ public:
HTMLToken() { clear(); }
- void clear(int startIndex = 0)
+ void clear()
{
m_type = Uninitialized;
- m_range.m_start = startIndex;
- m_range.m_end = startIndex;
+ m_range.m_start = 0;
+ m_range.m_end = 0;
+ m_baseOffset = 0;
m_data.clear();
}
int startIndex() const { return m_range.m_start; }
int endIndex() const { return m_range.m_end; }
- void end(int endIndex)
+ void setBaseOffset(int offset)
{
- m_range.m_end = endIndex;
+ m_baseOffset = offset;
+ }
+
+ void end(int endOffset)
+ {
+ m_range.m_end = endOffset - m_baseOffset;
}
void makeEndOfFile()
@@ -172,29 +178,30 @@ public:
#endif
}
- void beginAttributeName(int index)
+ void beginAttributeName(int offset)
{
- m_currentAttribute->m_nameRange.m_start = index;
+ m_currentAttribute->m_nameRange.m_start = offset - m_baseOffset;
}
- void endAttributeName(int index)
+ void endAttributeName(int offset)
{
+ int index = offset - m_baseOffset;
m_currentAttribute->m_nameRange.m_end = index;
m_currentAttribute->m_valueRange.m_start = index;
m_currentAttribute->m_valueRange.m_end = index;
}
- void beginAttributeValue(int index)
+ void beginAttributeValue(int offset)
{
- m_currentAttribute->m_valueRange.m_start = index;
+ m_currentAttribute->m_valueRange.m_start = offset - m_baseOffset;
#ifndef NDEBUG
m_currentAttribute->m_valueRange.m_end = 0;
#endif
}
- void endAttributeValue(int index)
+ void endAttributeValue(int offset)
{
- m_currentAttribute->m_valueRange.m_end = index;
+ m_currentAttribute->m_valueRange.m_end = offset - m_baseOffset;
}
void appendToAttributeName(UChar character)
@@ -213,6 +220,13 @@ public:
m_currentAttribute->m_value.append(character);
}
+ void appendToAttributeValue(size_t i, const String& value)
+ {
+ ASSERT(!value.isEmpty());
+ ASSERT(m_type == StartTag || m_type == EndTag);
+ m_attributes[i].m_value.append(value.characters(), value.length());
+ }
+
Type type() const { return m_type; }
bool selfClosing() const
@@ -239,6 +253,18 @@ public:
return m_data;
}
+ void eraseCharacters()
+ {
+ ASSERT(m_type == Character);
+ m_data.clear();
+ }
+
+ void eraseValueOfAttribute(size_t i)
+ {
+ ASSERT(m_type == StartTag || m_type == EndTag);
+ m_attributes[i].m_value.clear();
+ }
+
const DataVector& characters() const
{
ASSERT(m_type == Character);
@@ -331,9 +357,8 @@ private:
};
Type m_type;
-
- // Which characters from the input stream are represented by this token.
- Range m_range;
+ Range m_range; // Always starts at zero.
+ int m_baseOffset;
// "name" for DOCTYPE, StartTag, and EndTag
// "characters" for Character
diff --git a/Source/WebCore/html/parser/HTMLTreeBuilder.cpp b/Source/WebCore/html/parser/HTMLTreeBuilder.cpp
index 97cee13..d2931ac 100644
--- a/Source/WebCore/html/parser/HTMLTreeBuilder.cpp
+++ b/Source/WebCore/html/parser/HTMLTreeBuilder.cpp
@@ -26,8 +26,8 @@
#include "config.h"
#include "HTMLTreeBuilder.h"
-#include "CharacterNames.h"
#include "Comment.h"
+#include "DOMWindow.h"
#include "DocumentFragment.h"
#include "DocumentType.h"
#include "Frame.h"
@@ -50,6 +50,7 @@
#include "XLinkNames.h"
#include "XMLNSNames.h"
#include "XMLNames.h"
+#include <wtf/unicode/CharacterNames.h>
namespace WebCore {
@@ -341,7 +342,7 @@ private:
HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks)
: m_framesetOk(true)
, m_document(document)
- , m_tree(document, FragmentScriptingAllowed, false)
+ , m_tree(document)
, m_reportErrors(reportErrors)
, m_isPaused(false)
, m_insertionMode(InitialMode)
@@ -359,8 +360,8 @@ HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument* docum
HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks)
: m_framesetOk(true)
, m_fragmentContext(fragment, contextElement, scriptingPermission)
- , m_document(m_fragmentContext.document())
- , m_tree(m_document, scriptingPermission, true)
+ , m_document(fragment->document())
+ , m_tree(fragment, scriptingPermission)
, m_reportErrors(false) // FIXME: Why not report errors in fragments?
, m_isPaused(false)
, m_insertionMode(InitialMode)
@@ -374,7 +375,6 @@ HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* f
if (contextElement) {
// Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm:
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
- m_document->setCompatibilityMode(contextElement->document()->compatibilityMode());
processFakeStartTag(htmlTag);
resetInsertionModeAppropriately();
m_tree.setForm(closestFormAncestor(contextElement));
@@ -403,27 +403,24 @@ HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
}
HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
- : m_dummyDocumentForFragmentParsing(HTMLDocument::create(0, KURL(), fragment->document()->baseURI()))
- , m_fragment(fragment)
+ : m_fragment(fragment)
, m_contextElement(contextElement)
, m_scriptingPermission(scriptingPermission)
{
- m_dummyDocumentForFragmentParsing->setCompatibilityMode(fragment->document()->compatibilityMode());
-}
-
-Document* HTMLTreeBuilder::FragmentParsingContext::document() const
-{
- ASSERT(m_fragment);
- return m_dummyDocumentForFragmentParsing.get();
+ ASSERT(!fragment->hasChildNodes());
}
void HTMLTreeBuilder::FragmentParsingContext::finished()
{
- // Populate the DocumentFragment with the parsed content now that we're done.
- ContainerNode* root = m_dummyDocumentForFragmentParsing.get();
- if (m_contextElement)
- root = m_dummyDocumentForFragmentParsing->documentElement();
- m_fragment->takeAllChildrenFrom(root);
+ if (!m_contextElement)
+ return;
+
+ // The HTML5 spec says to return the children of the fragment's document
+ // element when there is a context element (10.4.7).
+ RefPtr<ContainerNode> documentElement = firstElementChild(m_fragment);
+ m_fragment->removeChildren();
+ ASSERT(documentElement);
+ m_fragment->takeAllChildrenFrom(documentElement.get());
}
HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext()
@@ -2807,6 +2804,20 @@ void HTMLTreeBuilder::finished()
m_document->finishedParsing();
}
+void HTMLTreeBuilder::parseError(AtomicHTMLToken&)
+{
+ DEFINE_STATIC_LOCAL(String, parseErrorMessage, ("HTML parse error (recovered gracefully)"));
+
+ if (!m_reportErrors)
+ return;
+
+ DOMWindow* domWindow = m_document->domWindow();
+ if (!domWindow)
+ return;
+
+ domWindow->console()->addMessage(HTMLMessageSource, LogMessageType, WarningMessageLevel, parseErrorMessage, m_parser->lineNumber(), m_document->url().string());
+}
+
bool HTMLTreeBuilder::scriptEnabled(Frame* frame)
{
if (!frame)
diff --git a/Source/WebCore/html/parser/HTMLTreeBuilder.h b/Source/WebCore/html/parser/HTMLTreeBuilder.h
index 309ac6f..0cec667 100644
--- a/Source/WebCore/html/parser/HTMLTreeBuilder.h
+++ b/Source/WebCore/html/parser/HTMLTreeBuilder.h
@@ -183,8 +183,7 @@ private:
bool m_framesetOk;
- // FIXME: Implement error reporting.
- void parseError(AtomicHTMLToken&) { }
+ void parseError(AtomicHTMLToken&);
InsertionMode insertionMode() const { return m_insertionMode; }
void setInsertionMode(InsertionMode mode)
@@ -212,7 +211,6 @@ private:
FragmentParsingContext(DocumentFragment*, Element* contextElement, FragmentScriptingPermission);
~FragmentParsingContext();
- Document* document() const;
DocumentFragment* fragment() const { return m_fragment; }
Element* contextElement() const { ASSERT(m_fragment); return m_contextElement; }
FragmentScriptingPermission scriptingPermission() const { ASSERT(m_fragment); return m_scriptingPermission; }
@@ -220,7 +218,6 @@ private:
void finished();
private:
- RefPtr<Document> m_dummyDocumentForFragmentParsing;
DocumentFragment* m_fragment;
Element* m_contextElement;
diff --git a/Source/WebCore/html/parser/HTMLViewSourceParser.cpp b/Source/WebCore/html/parser/HTMLViewSourceParser.cpp
index ace8590..7cdbdc7 100644
--- a/Source/WebCore/html/parser/HTMLViewSourceParser.cpp
+++ b/Source/WebCore/html/parser/HTMLViewSourceParser.cpp
@@ -49,35 +49,27 @@ void HTMLViewSourceParser::insert(const SegmentedString&)
void HTMLViewSourceParser::pumpTokenizer()
{
- while (m_tokenizer->nextToken(m_input.current(), m_token)) {
- m_token.end(m_input.current().numberOfCharactersConsumed());
+ while (true) {
+ m_sourceTracker.start(m_input, m_token);
+ if (!m_tokenizer->nextToken(m_input.current(), m_token))
+ break;
+ m_sourceTracker.end(m_input, m_token);
+
document()->addSource(sourceForToken(), m_token);
updateTokenizerState();
- m_token.clear(m_input.current().numberOfCharactersConsumed());
+ m_token.clear();
}
}
void HTMLViewSourceParser::append(const SegmentedString& input)
{
m_input.appendToEnd(input);
- m_source.append(input);
pumpTokenizer();
}
String HTMLViewSourceParser::sourceForToken()
{
- if (m_token.type() == HTMLToken::EndOfFile)
- return String();
-
- ASSERT(m_source.numberOfCharactersConsumed() == m_token.startIndex());
- UChar* data = 0;
- int length = m_token.endIndex() - m_token.startIndex();
- String source = String::createUninitialized(length, data);
- for (int i = 0; i < length; ++i) {
- data[i] = *m_source;
- m_source.advance();
- }
- return source;
+ return m_sourceTracker.sourceForToken(m_token);
}
void HTMLViewSourceParser::updateTokenizerState()
diff --git a/Source/WebCore/html/parser/HTMLViewSourceParser.h b/Source/WebCore/html/parser/HTMLViewSourceParser.h
index abe55b4..2e6ddfe 100644
--- a/Source/WebCore/html/parser/HTMLViewSourceParser.h
+++ b/Source/WebCore/html/parser/HTMLViewSourceParser.h
@@ -28,6 +28,7 @@
#include "DecodedDataDocumentParser.h"
#include "HTMLInputStream.h"
+#include "HTMLSourceTracker.h"
#include "HTMLToken.h"
#include "HTMLTokenizer.h"
#include "HTMLViewSourceDocument.h"
@@ -69,8 +70,8 @@ private:
void updateTokenizerState();
HTMLInputStream m_input;
- SegmentedString m_source;
HTMLToken m_token;
+ HTMLSourceTracker m_sourceTracker;
OwnPtr<HTMLTokenizer> m_tokenizer;
};
diff --git a/Source/WebCore/html/parser/XSSFilter.cpp b/Source/WebCore/html/parser/XSSFilter.cpp
new file mode 100644
index 0000000..de31f76
--- /dev/null
+++ b/Source/WebCore/html/parser/XSSFilter.cpp
@@ -0,0 +1,450 @@
+/*
+ * Copyright (C) 2011 Adam Barth. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "XSSFilter.h"
+
+#include "DOMWindow.h"
+#include "Document.h"
+#include "DocumentLoader.h"
+#include "Frame.h"
+#include "HTMLDocumentParser.h"
+#include "HTMLNames.h"
+#include "HTMLParamElement.h"
+#include "HTMLParserIdioms.h"
+#include "Settings.h"
+#include "TextEncoding.h"
+#include "TextResourceDecoder.h"
+#include <wtf/text/CString.h>
+
+namespace WebCore {
+
+using namespace HTMLNames;
+
+namespace {
+
+bool isNonCanonicalCharacter(UChar c)
+{
+ // We remove all non-ASCII characters, including non-printable ASCII characters.
+ //
+ // Note, we don't remove backslashes like PHP stripslashes(), which among other things converts "\\0" to the \0 character.
+ // Instead, we remove backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0"). However, this has the
+ // adverse effect that we remove any legitimate zeros from a string.
+ //
+ // For instance: new String("http://localhost:8000") => new String("http://localhost:8").
+ return (c == '\\' || c == '0' || c == '\0' || c >= 127);
+}
+
+String canonicalize(const String& string)
+{
+ return string.removeCharacters(&isNonCanonicalCharacter);
+}
+
+bool isRequiredForInjection(UChar c)
+{
+ return (c == '\'' || c == '"' || c == '<' || c == '>');
+}
+
+bool hasName(const HTMLToken& token, const QualifiedName& name)
+{
+ return equalIgnoringNullity(token.name(), static_cast<const String&>(name.localName()));
+}
+
+bool findAttributeWithName(const HTMLToken& token, const QualifiedName& name, size_t& indexOfMatchingAttribute)
+{
+ for (size_t i = 0; i < token.attributes().size(); ++i) {
+ if (equalIgnoringNullity(token.attributes().at(i).m_name, name.localName())) {
+ indexOfMatchingAttribute = i;
+ return true;
+ }
+ }
+ return false;
+}
+
+bool isNameOfInlineEventHandler(const Vector<UChar, 32>& name)
+{
+ const size_t lengthOfShortestInlineEventHandlerName = 5; // To wit: oncut.
+ if (name.size() < lengthOfShortestInlineEventHandlerName)
+ return false;
+ return name[0] == 'o' && name[1] == 'n';
+}
+
+bool containsJavaScriptURL(const Vector<UChar, 32>& value)
+{
+ static const char javaScriptScheme[] = "javascript:";
+ static const size_t lengthOfJavaScriptScheme = sizeof(javaScriptScheme) - 1;
+
+ size_t i;
+ for (i = 0; i < value.size(); ++i) {
+ if (!isHTMLSpace(value[i]))
+ break;
+ }
+
+ if (value.size() - i < lengthOfJavaScriptScheme)
+ return false;
+
+ return equalIgnoringCase(value.data() + i, javaScriptScheme, lengthOfJavaScriptScheme);
+}
+
+String decodeURL(const String& string, const TextEncoding& encoding)
+{
+ String workingString = string;
+ workingString.replace('+', ' ');
+ workingString = decodeURLEscapeSequences(workingString);
+ CString workingStringUTF8 = workingString.utf8();
+ String decodedString = encoding.decode(workingStringUTF8.data(), workingStringUTF8.length());
+ // FIXME: Is this check necessary?
+ if (decodedString.isEmpty())
+ return canonicalize(workingString);
+ return canonicalize(decodedString);
+}
+
+}
+
+XSSFilter::XSSFilter(HTMLDocumentParser* parser)
+ : m_parser(parser)
+ , m_isEnabled(false)
+ , m_xssProtection(XSSProtectionEnabled)
+ , m_state(Uninitialized)
+{
+ ASSERT(m_parser);
+ if (Frame* frame = parser->document()->frame()) {
+ if (Settings* settings = frame->settings())
+ m_isEnabled = settings->xssAuditorEnabled();
+ }
+ // Although tempting to call init() at this point, the various objects
+ // we want to reference might not all have been constructed yet.
+}
+
+void XSSFilter::init()
+{
+ const size_t miniumLengthForSuffixTree = 512; // FIXME: Tune this parameter.
+ const int suffixTreeDepth = 5;
+
+ ASSERT(m_state == Uninitialized);
+ m_state = Initial;
+
+ if (!m_isEnabled)
+ return;
+
+ // In theory, the Document could have detached from the Frame after the
+ // XSSFilter was constructed.
+ if (!m_parser->document()->frame()) {
+ m_isEnabled = false;
+ return;
+ }
+
+ const KURL& url = m_parser->document()->url();
+
+ if (url.protocolIsData()) {
+ m_isEnabled = false;
+ return;
+ }
+
+ TextResourceDecoder* decoder = m_parser->document()->decoder();
+ m_decodedURL = decoder ? decodeURL(url.string(), decoder->encoding()) : url.string();
+ if (m_decodedURL.find(isRequiredForInjection, 0) == notFound)
+ m_decodedURL = String();
+
+ if (DocumentLoader* documentLoader = m_parser->document()->frame()->loader()->documentLoader()) {
+ DEFINE_STATIC_LOCAL(String, XSSProtectionHeader, ("X-XSS-Protection"));
+ m_xssProtection = parseXSSProtectionHeader(documentLoader->response().httpHeaderField(XSSProtectionHeader));
+
+ FormData* httpBody = documentLoader->originalRequest().httpBody();
+ if (httpBody && !httpBody->isEmpty()) {
+ String httpBodyAsString = httpBody->flattenToString();
+ m_decodedHTTPBody = decoder ? decodeURL(httpBodyAsString, decoder->encoding()) : httpBodyAsString;
+ if (m_decodedHTTPBody.find(isRequiredForInjection, 0) == notFound)
+ m_decodedHTTPBody = String();
+ if (m_decodedHTTPBody.length() >= miniumLengthForSuffixTree)
+ m_decodedHTTPBodySuffixTree = adoptPtr(new SuffixTree<ASCIICodebook>(m_decodedHTTPBody, suffixTreeDepth));
+ }
+ }
+
+ if (m_decodedURL.isEmpty() && m_decodedHTTPBody.isEmpty())
+ m_isEnabled = false;
+}
+
+void XSSFilter::filterToken(HTMLToken& token)
+{
+ if (m_state == Uninitialized) {
+ init();
+ ASSERT(m_state == Initial);
+ }
+
+ if (!m_isEnabled || m_xssProtection == XSSProtectionDisabled)
+ return;
+
+ bool didBlockScript = false;
+
+ switch (m_state) {
+ case Uninitialized:
+ ASSERT_NOT_REACHED();
+ break;
+ case Initial:
+ didBlockScript = filterTokenInitial(token);
+ break;
+ case AfterScriptStartTag:
+ didBlockScript = filterTokenAfterScriptStartTag(token);
+ ASSERT(m_state == Initial);
+ m_cachedSnippet = String();
+ break;
+ }
+
+ if (didBlockScript) {
+ // FIXME: Consider using a more helpful console message.
+ DEFINE_STATIC_LOCAL(String, consoleMessage, ("Refused to execute a JavaScript script. Source code of script found within request.\n"));
+ // FIXME: We should add the real line number to the console.
+ m_parser->document()->domWindow()->console()->addMessage(JSMessageSource, LogMessageType, ErrorMessageLevel, consoleMessage, 1, String());
+
+ if (m_xssProtection == XSSProtectionBlockEnabled) {
+ m_parser->document()->frame()->loader()->stopAllLoaders();
+ m_parser->document()->frame()->navigationScheduler()->scheduleLocationChange(m_parser->document()->securityOrigin(), blankURL(), String());
+ }
+ }
+}
+
+bool XSSFilter::filterTokenInitial(HTMLToken& token)
+{
+ ASSERT(m_state == Initial);
+
+ if (token.type() != HTMLToken::StartTag)
+ return false;
+
+ bool didBlockScript = eraseDangerousAttributesIfInjected(token);
+
+ if (hasName(token, scriptTag))
+ didBlockScript |= filterScriptToken(token);
+ else if (hasName(token, objectTag))
+ didBlockScript |= filterObjectToken(token);
+ else if (hasName(token, paramTag))
+ didBlockScript |= filterParamToken(token);
+ else if (hasName(token, embedTag))
+ didBlockScript |= filterEmbedToken(token);
+ else if (hasName(token, appletTag))
+ didBlockScript |= filterAppletToken(token);
+ else if (hasName(token, metaTag))
+ didBlockScript |= filterMetaToken(token);
+ else if (hasName(token, baseTag))
+ didBlockScript |= filterBaseToken(token);
+
+ return didBlockScript;
+}
+
+bool XSSFilter::filterTokenAfterScriptStartTag(HTMLToken& token)
+{
+ ASSERT(m_state == AfterScriptStartTag);
+ m_state = Initial;
+
+ if (token.type() != HTMLToken::Character) {
+ ASSERT(token.type() == HTMLToken::EndTag || token.type() == HTMLToken::EndOfFile);
+ return false;
+ }
+
+ int start = 0;
+ // FIXME: We probably want to grab only the first few characters of the
+ // contents of the script element.
+ int end = token.endIndex() - token.startIndex();
+ if (isContainedInRequest(m_cachedSnippet + snippetForRange(token, start, end))) {
+ token.eraseCharacters();
+ token.appendToCharacter(' '); // Technically, character tokens can't be empty.
+ return true;
+ }
+ return false;
+}
+
+bool XSSFilter::filterScriptToken(HTMLToken& token)
+{
+ ASSERT(m_state == Initial);
+ ASSERT(token.type() == HTMLToken::StartTag);
+ ASSERT(hasName(token, scriptTag));
+
+ if (eraseAttributeIfInjected(token, srcAttr, blankURL().string()))
+ return true;
+
+ m_state = AfterScriptStartTag;
+ m_cachedSnippet = m_parser->sourceForToken(token);
+ return false;
+}
+
+bool XSSFilter::filterObjectToken(HTMLToken& token)
+{
+ ASSERT(m_state == Initial);
+ ASSERT(token.type() == HTMLToken::StartTag);
+ ASSERT(hasName(token, objectTag));
+
+ bool didBlockScript = false;
+
+ didBlockScript |= eraseAttributeIfInjected(token, dataAttr, blankURL().string());
+ didBlockScript |= eraseAttributeIfInjected(token, typeAttr);
+ didBlockScript |= eraseAttributeIfInjected(token, classidAttr);
+
+ return didBlockScript;
+}
+
+bool XSSFilter::filterParamToken(HTMLToken& token)
+{
+ ASSERT(m_state == Initial);
+ ASSERT(token.type() == HTMLToken::StartTag);
+ ASSERT(hasName(token, paramTag));
+
+ size_t indexOfNameAttribute;
+ if (!findAttributeWithName(token, nameAttr, indexOfNameAttribute))
+ return false;
+
+ const HTMLToken::Attribute& nameAttribute = token.attributes().at(indexOfNameAttribute);
+ String name = String(nameAttribute.m_value.data(), nameAttribute.m_value.size());
+
+ if (!HTMLParamElement::isURLParameter(name))
+ return false;
+
+ return eraseAttributeIfInjected(token, valueAttr, blankURL().string());
+}
+
+bool XSSFilter::filterEmbedToken(HTMLToken& token)
+{
+ ASSERT(m_state == Initial);
+ ASSERT(token.type() == HTMLToken::StartTag);
+ ASSERT(hasName(token, embedTag));
+
+ bool didBlockScript = false;
+
+ didBlockScript |= eraseAttributeIfInjected(token, srcAttr, blankURL().string());
+ didBlockScript |= eraseAttributeIfInjected(token, typeAttr);
+
+ return didBlockScript;
+}
+
+bool XSSFilter::filterAppletToken(HTMLToken& token)
+{
+ ASSERT(m_state == Initial);
+ ASSERT(token.type() == HTMLToken::StartTag);
+ ASSERT(hasName(token, appletTag));
+
+ bool didBlockScript = false;
+
+ didBlockScript |= eraseAttributeIfInjected(token, codeAttr);
+ didBlockScript |= eraseAttributeIfInjected(token, objectAttr);
+
+ return didBlockScript;
+}
+
+bool XSSFilter::filterMetaToken(HTMLToken& token)
+{
+ ASSERT(m_state == Initial);
+ ASSERT(token.type() == HTMLToken::StartTag);
+ ASSERT(hasName(token, metaTag));
+
+ return eraseAttributeIfInjected(token, http_equivAttr);
+}
+
+bool XSSFilter::filterBaseToken(HTMLToken& token)
+{
+ ASSERT(m_state == Initial);
+ ASSERT(token.type() == HTMLToken::StartTag);
+ ASSERT(hasName(token, baseTag));
+
+ return eraseAttributeIfInjected(token, hrefAttr);
+}
+
+bool XSSFilter::eraseDangerousAttributesIfInjected(HTMLToken& token)
+{
+ DEFINE_STATIC_LOCAL(String, safeJavaScriptURL, ("javascript:void(0)"));
+
+ bool didBlockScript = false;
+ for (size_t i = 0; i < token.attributes().size(); ++i) {
+ const HTMLToken::Attribute& attribute = token.attributes().at(i);
+ bool isInlineEventHandler = isNameOfInlineEventHandler(attribute.m_name);
+ bool valueContainsJavaScriptURL = isInlineEventHandler ? false : containsJavaScriptURL(attribute.m_value);
+ if (!isInlineEventHandler && !valueContainsJavaScriptURL)
+ continue;
+ if (!isContainedInRequest(snippetForAttribute(token, attribute)))
+ continue;
+ token.eraseValueOfAttribute(i);
+ if (valueContainsJavaScriptURL)
+ token.appendToAttributeValue(i, safeJavaScriptURL);
+ didBlockScript = true;
+ }
+ return didBlockScript;
+}
+
+bool XSSFilter::eraseAttributeIfInjected(HTMLToken& token, const QualifiedName& attributeName, const String& replacementValue)
+{
+ size_t indexOfAttribute;
+ if (findAttributeWithName(token, attributeName, indexOfAttribute)) {
+ const HTMLToken::Attribute& attribute = token.attributes().at(indexOfAttribute);
+ if (isContainedInRequest(snippetForAttribute(token, attribute))) {
+ if (attributeName == srcAttr && isSameOriginResource(String(attribute.m_value.data(), attribute.m_value.size())))
+ return false;
+ token.eraseValueOfAttribute(indexOfAttribute);
+ if (!replacementValue.isEmpty())
+ token.appendToAttributeValue(indexOfAttribute, replacementValue);
+ return true;
+ }
+ }
+ return false;
+}
+
+String XSSFilter::snippetForRange(const HTMLToken& token, int start, int end)
+{
+ // FIXME: There's an extra allocation here that we could save by
+ // passing the range to the parser.
+ return m_parser->sourceForToken(token).substring(start, end - start);
+}
+
+String XSSFilter::snippetForAttribute(const HTMLToken& token, const HTMLToken::Attribute& attribute)
+{
+ // FIXME: We should grab one character before the name also.
+ int start = attribute.m_nameRange.m_start - token.startIndex();
+ // FIXME: We probably want to grab only the first few characters of the attribute value.
+ int end = attribute.m_valueRange.m_end - token.startIndex();
+ return snippetForRange(token, start, end);
+}
+
+bool XSSFilter::isContainedInRequest(const String& snippet)
+{
+ ASSERT(!snippet.isEmpty());
+ String canonicalizedSnippet = canonicalize(snippet);
+ ASSERT(!canonicalizedSnippet.isEmpty());
+ if (m_decodedURL.find(canonicalizedSnippet, 0, false) != notFound)
+ return true;
+ if (m_decodedHTTPBodySuffixTree && !m_decodedHTTPBodySuffixTree->mightContain(canonicalizedSnippet))
+ return false;
+ return m_decodedHTTPBody.find(canonicalizedSnippet, 0, false) != notFound;
+}
+
+bool XSSFilter::isSameOriginResource(const String& url)
+{
+ // If the resource is loaded from the same URL as the enclosing page, it's
+ // probably not an XSS attack, so we reduce false positives by allowing the
+ // request. If the resource has a query string, we're more suspicious,
+ // however, because that's pretty rare and the attacker might be able to
+ // trick a server-side script into doing something dangerous with the query
+ // string.
+ KURL resourceURL(m_parser->document()->url(), url);
+ return (m_parser->document()->url().host() == resourceURL.host() && resourceURL.query().isEmpty());
+}
+
+}
diff --git a/Source/WebCore/html/parser/XSSFilter.h b/Source/WebCore/html/parser/XSSFilter.h
new file mode 100644
index 0000000..2c7d428
--- /dev/null
+++ b/Source/WebCore/html/parser/XSSFilter.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2011 Adam Barth. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef XSSFilter_h
+#define XSSFilter_h
+
+#include "HTMLToken.h"
+#include "HTTPParsers.h"
+#include "SuffixTree.h"
+
+namespace WebCore {
+
+class HTMLDocumentParser;
+
+class XSSFilter {
+ WTF_MAKE_NONCOPYABLE(XSSFilter);
+public:
+ explicit XSSFilter(HTMLDocumentParser*);
+
+ void filterToken(HTMLToken&);
+
+private:
+ enum State {
+ Uninitialized,
+ Initial,
+ AfterScriptStartTag,
+ };
+
+ void init();
+
+ bool filterTokenInitial(HTMLToken&);
+ bool filterTokenAfterScriptStartTag(HTMLToken&);
+
+ bool filterScriptToken(HTMLToken&);
+ bool filterObjectToken(HTMLToken&);
+ bool filterParamToken(HTMLToken&);
+ bool filterEmbedToken(HTMLToken&);
+ bool filterAppletToken(HTMLToken&);
+ bool filterMetaToken(HTMLToken&);
+ bool filterBaseToken(HTMLToken&);
+
+ bool eraseDangerousAttributesIfInjected(HTMLToken&);
+ bool eraseAttributeIfInjected(HTMLToken&, const QualifiedName&, const String& replacementValue = String());
+
+ String snippetForRange(const HTMLToken&, int start, int end);
+ String snippetForAttribute(const HTMLToken&, const HTMLToken::Attribute&);
+
+ bool isContainedInRequest(const String&);
+ bool isSameOriginResource(const String& url);
+
+ HTMLDocumentParser* m_parser;
+ bool m_isEnabled;
+ XSSProtectionDisposition m_xssProtection;
+
+ String m_decodedURL;
+ String m_decodedHTTPBody;
+ OwnPtr<SuffixTree<ASCIICodebook> > m_decodedHTTPBodySuffixTree;
+
+ State m_state;
+ String m_cachedSnippet;
+};
+
+}
+
+#endif