diff options
Diffstat (limited to 'Source/WebCore/html/parser')
-rw-r--r-- | Source/WebCore/html/parser/HTMLConstructionSite.cpp | 30 | ||||
-rw-r--r-- | Source/WebCore/html/parser/HTMLConstructionSite.h | 9 | ||||
-rw-r--r-- | Source/WebCore/html/parser/HTMLDocumentParser.cpp | 24 | ||||
-rw-r--r-- | Source/WebCore/html/parser/HTMLDocumentParser.h | 8 | ||||
-rw-r--r-- | Source/WebCore/html/parser/HTMLInputStream.h | 12 | ||||
-rw-r--r-- | Source/WebCore/html/parser/HTMLParserIdioms.cpp | 3 | ||||
-rw-r--r-- | Source/WebCore/html/parser/HTMLScriptRunner.cpp | 1 | ||||
-rw-r--r-- | Source/WebCore/html/parser/HTMLSourceTracker.cpp | 69 | ||||
-rw-r--r-- | Source/WebCore/html/parser/HTMLSourceTracker.h | 55 | ||||
-rw-r--r-- | Source/WebCore/html/parser/HTMLToken.h | 55 | ||||
-rw-r--r-- | Source/WebCore/html/parser/HTMLTreeBuilder.cpp | 49 | ||||
-rw-r--r-- | Source/WebCore/html/parser/HTMLTreeBuilder.h | 5 | ||||
-rw-r--r-- | Source/WebCore/html/parser/HTMLViewSourceParser.cpp | 24 | ||||
-rw-r--r-- | Source/WebCore/html/parser/HTMLViewSourceParser.h | 3 | ||||
-rw-r--r-- | Source/WebCore/html/parser/XSSFilter.cpp | 450 | ||||
-rw-r--r-- | Source/WebCore/html/parser/XSSFilter.h | 87 |
16 files changed, 805 insertions, 79 deletions
diff --git a/Source/WebCore/html/parser/HTMLConstructionSite.cpp b/Source/WebCore/html/parser/HTMLConstructionSite.cpp index c46b9b9..a026ef9 100644 --- a/Source/WebCore/html/parser/HTMLConstructionSite.cpp +++ b/Source/WebCore/html/parser/HTMLConstructionSite.cpp @@ -130,10 +130,20 @@ void HTMLConstructionSite::attachAtSite(const AttachmentSite& site, PassRefPtr<N child->attach(); } -HTMLConstructionSite::HTMLConstructionSite(Document* document, FragmentScriptingPermission scriptingPermission, bool isParsingFragment) +HTMLConstructionSite::HTMLConstructionSite(Document* document) : m_document(document) + , m_attachmentRoot(document) + , m_fragmentScriptingPermission(FragmentScriptingAllowed) + , m_isParsingFragment(false) + , m_redirectAttachToFosterParent(false) +{ +} + +HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, FragmentScriptingPermission scriptingPermission) + : m_document(fragment->document()) + , m_attachmentRoot(fragment) , m_fragmentScriptingPermission(scriptingPermission) - , m_isParsingFragment(isParsingFragment) + , m_isParsingFragment(true) , m_redirectAttachToFosterParent(false) { } @@ -145,6 +155,7 @@ HTMLConstructionSite::~HTMLConstructionSite() void HTMLConstructionSite::detach() { m_document = 0; + m_attachmentRoot = 0; } void HTMLConstructionSite::setForm(HTMLFormElement* form) @@ -170,7 +181,7 @@ void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken& tok { RefPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(m_document); element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission); - m_openElements.pushHTMLHtmlElement(attach<Element>(m_document, element.get())); + m_openElements.pushHTMLHtmlElement(attach<Element>(m_attachmentRoot, element.get())); #if ENABLE(OFFLINE_WEB_APPLICATIONS) element->insertedByParser(); #endif @@ -205,7 +216,16 @@ void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken& token) void HTMLConstructionSite::insertDoctype(AtomicHTMLToken& token) { ASSERT(token.type() == HTMLToken::DOCTYPE); - attach(m_document, DocumentType::create(m_document, token.name(), String::adopt(token.publicIdentifier()), String::adopt(token.systemIdentifier()))); + attach(m_attachmentRoot, DocumentType::create(m_document, token.name(), String::adopt(token.publicIdentifier()), String::adopt(token.systemIdentifier()))); + + // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which + // never occurs. However, if we ever chose to support such, this code is subtly wrong, + // because context-less fragments can determine their own quirks mode, and thus change + // parsing rules (like <p> inside <table>). For now we ASSERT that we never hit this code + // in a fragment, as changing the owning document's compatibility mode would be wrong. + ASSERT(!m_isParsingFragment); + if (m_isParsingFragment) + return; if (token.forceQuirks()) m_document->setCompatibilityMode(Document::QuirksMode); @@ -222,7 +242,7 @@ void HTMLConstructionSite::insertComment(AtomicHTMLToken& token) void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken& token) { ASSERT(token.type() == HTMLToken::Comment); - attach(m_document, Comment::create(m_document, token.comment())); + attach(m_attachmentRoot, Comment::create(m_document, token.comment())); } void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken& token) diff --git a/Source/WebCore/html/parser/HTMLConstructionSite.h b/Source/WebCore/html/parser/HTMLConstructionSite.h index 5a4a65d..0298503 100644 --- a/Source/WebCore/html/parser/HTMLConstructionSite.h +++ b/Source/WebCore/html/parser/HTMLConstructionSite.h @@ -43,7 +43,8 @@ class Element; class HTMLConstructionSite { WTF_MAKE_NONCOPYABLE(HTMLConstructionSite); public: - HTMLConstructionSite(Document*, FragmentScriptingPermission, bool isParsingFragment); + HTMLConstructionSite(Document*); + HTMLConstructionSite(DocumentFragment*, FragmentScriptingPermission); ~HTMLConstructionSite(); void detach(); @@ -130,6 +131,12 @@ private: void dispatchDocumentElementAvailableIfNeeded(); Document* m_document; + + // This is the root ContainerNode to which the parser attaches all newly + // constructed nodes. It points to a DocumentFragment when parsing fragments + // and a Document in all other cases. + ContainerNode* m_attachmentRoot; + RefPtr<Element> m_head; RefPtr<HTMLFormElement> m_form; mutable HTMLElementStack m_openElements; diff --git a/Source/WebCore/html/parser/HTMLDocumentParser.cpp b/Source/WebCore/html/parser/HTMLDocumentParser.cpp index 93e1309..2fe9486 100644 --- a/Source/WebCore/html/parser/HTMLDocumentParser.cpp +++ b/Source/WebCore/html/parser/HTMLDocumentParser.cpp @@ -39,7 +39,6 @@ #include "InspectorInstrumentation.h" #include "NestingLevelIncrementer.h" #include "Settings.h" -#include "XSSAuditor.h" #include <wtf/CurrentTime.h> #ifdef ANDROID_INSTRUMENT @@ -85,6 +84,7 @@ HTMLDocumentParser::HTMLDocumentParser(HTMLDocument* document, bool reportErrors , m_scriptRunner(HTMLScriptRunner::create(document, this)) , m_treeBuilder(HTMLTreeBuilder::create(this, document, reportErrors, usePreHTML5ParserQuirks(document))) , m_parserScheduler(HTMLParserScheduler::create(this)) + , m_xssFilter(this) , m_endWasDelayed(false) , m_writeNestingLevel(0) { @@ -96,6 +96,7 @@ HTMLDocumentParser::HTMLDocumentParser(DocumentFragment* fragment, Element* cont : ScriptableDocumentParser(fragment->document()) , m_tokenizer(HTMLTokenizer::create(usePreHTML5ParserQuirks(fragment->document()))) , m_treeBuilder(HTMLTreeBuilder::create(this, fragment, contextElement, scriptingPermission, usePreHTML5ParserQuirks(fragment->document()))) + , m_xssFilter(this) , m_endWasDelayed(false) , m_writeNestingLevel(0) { @@ -230,8 +231,13 @@ void HTMLDocumentParser::pumpTokenizer(SynchronousMode mode) if (!m_treeBuilder->isParsingFragment() && document()->frame() && document()->frame()->navigationScheduler()->locationChangePending()) break; + + m_sourceTracker.start(m_input, m_token); if (!m_tokenizer->nextToken(m_input.current(), m_token)) break; + m_sourceTracker.end(m_input, m_token); + + m_xssFilter.filterToken(m_token); m_treeBuilder->constructTreeFromToken(m_token); m_token.clear(); @@ -274,7 +280,12 @@ void HTMLDocumentParser::pumpTokenizer(SynchronousMode mode) bool HTMLDocumentParser::hasInsertionPoint() { - return m_input.hasInsertionPoint(); + // FIXME: The wasCreatedByScript() branch here might not be fully correct. + // Our model of the EOF character differs slightly from the one in + // the spec because our treatment is uniform between network-sourced + // and script-sourced input streams whereas the spec treats them + // differently. + return m_input.hasInsertionPoint() || (wasCreatedByScript() && !m_input.haveSeenEndOfFile()); } void HTMLDocumentParser::insert(const SegmentedString& source) @@ -414,6 +425,11 @@ bool HTMLDocumentParser::inScriptExecution() const return m_scriptRunner->isExecutingScript(); } +String HTMLDocumentParser::sourceForToken(const HTMLToken& token) +{ + return m_sourceTracker.sourceForToken(token); +} + int HTMLDocumentParser::lineNumber() const { return m_tokenizer->lineNumber(); @@ -460,9 +476,7 @@ void HTMLDocumentParser::stopWatchingForLoad(CachedResource* cachedScript) bool HTMLDocumentParser::shouldLoadExternalScriptFromSrc(const AtomicString& srcValue) { - if (!xssAuditor()) - return true; - return xssAuditor()->canLoadExternalScriptFromSrc(srcValue); + return document()->contentSecurityPolicy()->canLoadExternalScriptFromSrc(srcValue); } void HTMLDocumentParser::notifyFinished(CachedResource* cachedResource) diff --git a/Source/WebCore/html/parser/HTMLDocumentParser.h b/Source/WebCore/html/parser/HTMLDocumentParser.h index f925269..be2ca1b 100644 --- a/Source/WebCore/html/parser/HTMLDocumentParser.h +++ b/Source/WebCore/html/parser/HTMLDocumentParser.h @@ -30,10 +30,12 @@ #include "FragmentScriptingPermission.h" #include "HTMLInputStream.h" #include "HTMLScriptRunnerHost.h" +#include "HTMLSourceTracker.h" #include "HTMLToken.h" #include "ScriptableDocumentParser.h" #include "SegmentedString.h" #include "Timer.h" +#include "XSSFilter.h" #include <wtf/OwnPtr.h> namespace WebCore { @@ -71,8 +73,11 @@ public: static bool usePreHTML5ParserQuirks(Document*); HTMLTokenizer* tokenizer() const { return m_tokenizer.get(); } + String sourceForToken(const HTMLToken&); virtual TextPosition0 textPosition() const; + virtual int lineNumber() const; + virtual void suspendScheduledTasks(); virtual void resumeScheduledTasks(); @@ -97,7 +102,6 @@ private: virtual bool isWaitingForScripts() const; virtual bool isExecutingScript() const; virtual void executeScriptsWaitingForStylesheets(); - virtual int lineNumber() const; // HTMLScriptRunnerHost virtual void watchForLoad(CachedResource*); @@ -141,6 +145,8 @@ private: OwnPtr<HTMLTreeBuilder> m_treeBuilder; OwnPtr<HTMLPreloadScanner> m_preloadScanner; OwnPtr<HTMLParserScheduler> m_parserScheduler; + HTMLSourceTracker m_sourceTracker; + XSSFilter m_xssFilter; bool m_endWasDelayed; unsigned m_writeNestingLevel; diff --git a/Source/WebCore/html/parser/HTMLInputStream.h b/Source/WebCore/html/parser/HTMLInputStream.h index d95ec31..512ae88 100644 --- a/Source/WebCore/html/parser/HTMLInputStream.h +++ b/Source/WebCore/html/parser/HTMLInputStream.h @@ -67,17 +67,7 @@ public: bool hasInsertionPoint() const { - if (&m_first != m_last) - return true; - if (!haveSeenEndOfFile()) { - // FIXME: Somehow we need to understand the difference between - // input streams that are coming off the network and streams that - // were created with document.open(). In the later case, we always - // have an isertion point at the end of the stream until someone - // calls document.close(). - return true; - } - return false; + return &m_first != m_last; } void markEndOfFile() diff --git a/Source/WebCore/html/parser/HTMLParserIdioms.cpp b/Source/WebCore/html/parser/HTMLParserIdioms.cpp index 91ff8d3..2be6af9 100644 --- a/Source/WebCore/html/parser/HTMLParserIdioms.cpp +++ b/Source/WebCore/html/parser/HTMLParserIdioms.cpp @@ -25,6 +25,7 @@ #include "config.h" #include "HTMLParserIdioms.h" +#include <limits> #include <wtf/MathExtras.h> #include <wtf/dtoa.h> #include <wtf/text/AtomicString.h> @@ -85,7 +86,7 @@ bool parseToDoubleForNumberType(const String& string, double* result) // Numbers are considered finite IEEE 754 single-precision floating point values. // See HTML5 2.4.4.3 `Real numbers.' - if (-FLT_MAX > value || value > FLT_MAX) + if (-std::numeric_limits<float>::max() > value || value > std::numeric_limits<float>::max()) return false; if (result) { diff --git a/Source/WebCore/html/parser/HTMLScriptRunner.cpp b/Source/WebCore/html/parser/HTMLScriptRunner.cpp index 2fe1d30..c99858d 100644 --- a/Source/WebCore/html/parser/HTMLScriptRunner.cpp +++ b/Source/WebCore/html/parser/HTMLScriptRunner.cpp @@ -264,6 +264,7 @@ bool HTMLScriptRunner::requestPendingScript(PendingScript& pendingScript, Elemen ASSERT(!pendingScript.element()); const AtomicString& srcValue = script->getAttribute(srcAttr); // Allow the host to disllow script loads (using the XSSAuditor, etc.) + // FIXME: this check should be performed on the final URL in a redirect chain. if (!m_host->shouldLoadExternalScriptFromSrc(srcValue)) return false; // FIXME: We need to resolve the url relative to the element. diff --git a/Source/WebCore/html/parser/HTMLSourceTracker.cpp b/Source/WebCore/html/parser/HTMLSourceTracker.cpp new file mode 100644 index 0000000..cf43105 --- /dev/null +++ b/Source/WebCore/html/parser/HTMLSourceTracker.cpp @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2010 Adam Barth. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "HTMLSourceTracker.h" + +namespace WebCore { + +HTMLSourceTracker::HTMLSourceTracker() +{ +} + +void HTMLSourceTracker::start(const HTMLInputStream& input, HTMLToken& token) +{ + m_sourceFromPreviousSegments = token.type() == HTMLToken::Uninitialized ? String() : m_sourceFromPreviousSegments + m_source.toString(); + m_source = input.current(); + token.setBaseOffset(input.current().numberOfCharactersConsumed() - m_sourceFromPreviousSegments.length()); +} + +void HTMLSourceTracker::end(const HTMLInputStream& input, HTMLToken& token) +{ + m_cachedSourceForToken = String(); + // FIXME: This work should really be done by the HTMLTokenizer. + token.end(input.current().numberOfCharactersConsumed()); +} + +String HTMLSourceTracker::sourceForToken(const HTMLToken& token) +{ + if (token.type() == HTMLToken::EndOfFile) + return String(); // Hides the null character we use to mark the end of file. + + if (!m_cachedSourceForToken.isEmpty()) + return m_cachedSourceForToken; + + ASSERT(!token.startIndex()); + UChar* data = 0; + int length = token.endIndex() - token.startIndex() - m_sourceFromPreviousSegments.length(); + String source = String::createUninitialized(length, data); + for (int i = 0; i < length; ++i) { + data[i] = *m_source; + m_source.advance(); + } + m_cachedSourceForToken = m_sourceFromPreviousSegments + source; + return m_cachedSourceForToken; +} + +} diff --git a/Source/WebCore/html/parser/HTMLSourceTracker.h b/Source/WebCore/html/parser/HTMLSourceTracker.h new file mode 100644 index 0000000..17ae191 --- /dev/null +++ b/Source/WebCore/html/parser/HTMLSourceTracker.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2010 Adam Barth. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef HTMLSourceTracker_h +#define HTMLSourceTracker_h + +#include "HTMLInputStream.h" +#include "HTMLToken.h" + +namespace WebCore { + +class HTMLSourceTracker { + WTF_MAKE_NONCOPYABLE(HTMLSourceTracker); +public: + HTMLSourceTracker(); + + // FIXME: Once we move "end" into HTMLTokenizer, rename "start" to + // something that makes it obvious that this method can be called multiple + // times. + void start(const HTMLInputStream&, HTMLToken&); + void end(const HTMLInputStream&, HTMLToken&); + + String sourceForToken(const HTMLToken&); + +private: + String m_sourceFromPreviousSegments; + SegmentedString m_source; + String m_cachedSourceForToken; +}; + +} + +#endif diff --git a/Source/WebCore/html/parser/HTMLToken.h b/Source/WebCore/html/parser/HTMLToken.h index 1cbc151..aa16ab2 100644 --- a/Source/WebCore/html/parser/HTMLToken.h +++ b/Source/WebCore/html/parser/HTMLToken.h @@ -64,20 +64,26 @@ public: HTMLToken() { clear(); } - void clear(int startIndex = 0) + void clear() { m_type = Uninitialized; - m_range.m_start = startIndex; - m_range.m_end = startIndex; + m_range.m_start = 0; + m_range.m_end = 0; + m_baseOffset = 0; m_data.clear(); } int startIndex() const { return m_range.m_start; } int endIndex() const { return m_range.m_end; } - void end(int endIndex) + void setBaseOffset(int offset) { - m_range.m_end = endIndex; + m_baseOffset = offset; + } + + void end(int endOffset) + { + m_range.m_end = endOffset - m_baseOffset; } void makeEndOfFile() @@ -172,29 +178,30 @@ public: #endif } - void beginAttributeName(int index) + void beginAttributeName(int offset) { - m_currentAttribute->m_nameRange.m_start = index; + m_currentAttribute->m_nameRange.m_start = offset - m_baseOffset; } - void endAttributeName(int index) + void endAttributeName(int offset) { + int index = offset - m_baseOffset; m_currentAttribute->m_nameRange.m_end = index; m_currentAttribute->m_valueRange.m_start = index; m_currentAttribute->m_valueRange.m_end = index; } - void beginAttributeValue(int index) + void beginAttributeValue(int offset) { - m_currentAttribute->m_valueRange.m_start = index; + m_currentAttribute->m_valueRange.m_start = offset - m_baseOffset; #ifndef NDEBUG m_currentAttribute->m_valueRange.m_end = 0; #endif } - void endAttributeValue(int index) + void endAttributeValue(int offset) { - m_currentAttribute->m_valueRange.m_end = index; + m_currentAttribute->m_valueRange.m_end = offset - m_baseOffset; } void appendToAttributeName(UChar character) @@ -213,6 +220,13 @@ public: m_currentAttribute->m_value.append(character); } + void appendToAttributeValue(size_t i, const String& value) + { + ASSERT(!value.isEmpty()); + ASSERT(m_type == StartTag || m_type == EndTag); + m_attributes[i].m_value.append(value.characters(), value.length()); + } + Type type() const { return m_type; } bool selfClosing() const @@ -239,6 +253,18 @@ public: return m_data; } + void eraseCharacters() + { + ASSERT(m_type == Character); + m_data.clear(); + } + + void eraseValueOfAttribute(size_t i) + { + ASSERT(m_type == StartTag || m_type == EndTag); + m_attributes[i].m_value.clear(); + } + const DataVector& characters() const { ASSERT(m_type == Character); @@ -331,9 +357,8 @@ private: }; Type m_type; - - // Which characters from the input stream are represented by this token. - Range m_range; + Range m_range; // Always starts at zero. + int m_baseOffset; // "name" for DOCTYPE, StartTag, and EndTag // "characters" for Character diff --git a/Source/WebCore/html/parser/HTMLTreeBuilder.cpp b/Source/WebCore/html/parser/HTMLTreeBuilder.cpp index 97cee13..d2931ac 100644 --- a/Source/WebCore/html/parser/HTMLTreeBuilder.cpp +++ b/Source/WebCore/html/parser/HTMLTreeBuilder.cpp @@ -26,8 +26,8 @@ #include "config.h" #include "HTMLTreeBuilder.h" -#include "CharacterNames.h" #include "Comment.h" +#include "DOMWindow.h" #include "DocumentFragment.h" #include "DocumentType.h" #include "Frame.h" @@ -50,6 +50,7 @@ #include "XLinkNames.h" #include "XMLNSNames.h" #include "XMLNames.h" +#include <wtf/unicode/CharacterNames.h> namespace WebCore { @@ -341,7 +342,7 @@ private: HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks) : m_framesetOk(true) , m_document(document) - , m_tree(document, FragmentScriptingAllowed, false) + , m_tree(document) , m_reportErrors(reportErrors) , m_isPaused(false) , m_insertionMode(InitialMode) @@ -359,8 +360,8 @@ HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument* docum HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks) : m_framesetOk(true) , m_fragmentContext(fragment, contextElement, scriptingPermission) - , m_document(m_fragmentContext.document()) - , m_tree(m_document, scriptingPermission, true) + , m_document(fragment->document()) + , m_tree(fragment, scriptingPermission) , m_reportErrors(false) // FIXME: Why not report errors in fragments? , m_isPaused(false) , m_insertionMode(InitialMode) @@ -374,7 +375,6 @@ HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* f if (contextElement) { // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm: // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case - m_document->setCompatibilityMode(contextElement->document()->compatibilityMode()); processFakeStartTag(htmlTag); resetInsertionModeAppropriately(); m_tree.setForm(closestFormAncestor(contextElement)); @@ -403,27 +403,24 @@ HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext() } HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission) - : m_dummyDocumentForFragmentParsing(HTMLDocument::create(0, KURL(), fragment->document()->baseURI())) - , m_fragment(fragment) + : m_fragment(fragment) , m_contextElement(contextElement) , m_scriptingPermission(scriptingPermission) { - m_dummyDocumentForFragmentParsing->setCompatibilityMode(fragment->document()->compatibilityMode()); -} - -Document* HTMLTreeBuilder::FragmentParsingContext::document() const -{ - ASSERT(m_fragment); - return m_dummyDocumentForFragmentParsing.get(); + ASSERT(!fragment->hasChildNodes()); } void HTMLTreeBuilder::FragmentParsingContext::finished() { - // Populate the DocumentFragment with the parsed content now that we're done. - ContainerNode* root = m_dummyDocumentForFragmentParsing.get(); - if (m_contextElement) - root = m_dummyDocumentForFragmentParsing->documentElement(); - m_fragment->takeAllChildrenFrom(root); + if (!m_contextElement) + return; + + // The HTML5 spec says to return the children of the fragment's document + // element when there is a context element (10.4.7). + RefPtr<ContainerNode> documentElement = firstElementChild(m_fragment); + m_fragment->removeChildren(); + ASSERT(documentElement); + m_fragment->takeAllChildrenFrom(documentElement.get()); } HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext() @@ -2807,6 +2804,20 @@ void HTMLTreeBuilder::finished() m_document->finishedParsing(); } +void HTMLTreeBuilder::parseError(AtomicHTMLToken&) +{ + DEFINE_STATIC_LOCAL(String, parseErrorMessage, ("HTML parse error (recovered gracefully)")); + + if (!m_reportErrors) + return; + + DOMWindow* domWindow = m_document->domWindow(); + if (!domWindow) + return; + + domWindow->console()->addMessage(HTMLMessageSource, LogMessageType, WarningMessageLevel, parseErrorMessage, m_parser->lineNumber(), m_document->url().string()); +} + bool HTMLTreeBuilder::scriptEnabled(Frame* frame) { if (!frame) diff --git a/Source/WebCore/html/parser/HTMLTreeBuilder.h b/Source/WebCore/html/parser/HTMLTreeBuilder.h index 309ac6f..0cec667 100644 --- a/Source/WebCore/html/parser/HTMLTreeBuilder.h +++ b/Source/WebCore/html/parser/HTMLTreeBuilder.h @@ -183,8 +183,7 @@ private: bool m_framesetOk; - // FIXME: Implement error reporting. - void parseError(AtomicHTMLToken&) { } + void parseError(AtomicHTMLToken&); InsertionMode insertionMode() const { return m_insertionMode; } void setInsertionMode(InsertionMode mode) @@ -212,7 +211,6 @@ private: FragmentParsingContext(DocumentFragment*, Element* contextElement, FragmentScriptingPermission); ~FragmentParsingContext(); - Document* document() const; DocumentFragment* fragment() const { return m_fragment; } Element* contextElement() const { ASSERT(m_fragment); return m_contextElement; } FragmentScriptingPermission scriptingPermission() const { ASSERT(m_fragment); return m_scriptingPermission; } @@ -220,7 +218,6 @@ private: void finished(); private: - RefPtr<Document> m_dummyDocumentForFragmentParsing; DocumentFragment* m_fragment; Element* m_contextElement; diff --git a/Source/WebCore/html/parser/HTMLViewSourceParser.cpp b/Source/WebCore/html/parser/HTMLViewSourceParser.cpp index ace8590..7cdbdc7 100644 --- a/Source/WebCore/html/parser/HTMLViewSourceParser.cpp +++ b/Source/WebCore/html/parser/HTMLViewSourceParser.cpp @@ -49,35 +49,27 @@ void HTMLViewSourceParser::insert(const SegmentedString&) void HTMLViewSourceParser::pumpTokenizer() { - while (m_tokenizer->nextToken(m_input.current(), m_token)) { - m_token.end(m_input.current().numberOfCharactersConsumed()); + while (true) { + m_sourceTracker.start(m_input, m_token); + if (!m_tokenizer->nextToken(m_input.current(), m_token)) + break; + m_sourceTracker.end(m_input, m_token); + document()->addSource(sourceForToken(), m_token); updateTokenizerState(); - m_token.clear(m_input.current().numberOfCharactersConsumed()); + m_token.clear(); } } void HTMLViewSourceParser::append(const SegmentedString& input) { m_input.appendToEnd(input); - m_source.append(input); pumpTokenizer(); } String HTMLViewSourceParser::sourceForToken() { - if (m_token.type() == HTMLToken::EndOfFile) - return String(); - - ASSERT(m_source.numberOfCharactersConsumed() == m_token.startIndex()); - UChar* data = 0; - int length = m_token.endIndex() - m_token.startIndex(); - String source = String::createUninitialized(length, data); - for (int i = 0; i < length; ++i) { - data[i] = *m_source; - m_source.advance(); - } - return source; + return m_sourceTracker.sourceForToken(m_token); } void HTMLViewSourceParser::updateTokenizerState() diff --git a/Source/WebCore/html/parser/HTMLViewSourceParser.h b/Source/WebCore/html/parser/HTMLViewSourceParser.h index abe55b4..2e6ddfe 100644 --- a/Source/WebCore/html/parser/HTMLViewSourceParser.h +++ b/Source/WebCore/html/parser/HTMLViewSourceParser.h @@ -28,6 +28,7 @@ #include "DecodedDataDocumentParser.h" #include "HTMLInputStream.h" +#include "HTMLSourceTracker.h" #include "HTMLToken.h" #include "HTMLTokenizer.h" #include "HTMLViewSourceDocument.h" @@ -69,8 +70,8 @@ private: void updateTokenizerState(); HTMLInputStream m_input; - SegmentedString m_source; HTMLToken m_token; + HTMLSourceTracker m_sourceTracker; OwnPtr<HTMLTokenizer> m_tokenizer; }; diff --git a/Source/WebCore/html/parser/XSSFilter.cpp b/Source/WebCore/html/parser/XSSFilter.cpp new file mode 100644 index 0000000..de31f76 --- /dev/null +++ b/Source/WebCore/html/parser/XSSFilter.cpp @@ -0,0 +1,450 @@ +/* + * Copyright (C) 2011 Adam Barth. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "XSSFilter.h" + +#include "DOMWindow.h" +#include "Document.h" +#include "DocumentLoader.h" +#include "Frame.h" +#include "HTMLDocumentParser.h" +#include "HTMLNames.h" +#include "HTMLParamElement.h" +#include "HTMLParserIdioms.h" +#include "Settings.h" +#include "TextEncoding.h" +#include "TextResourceDecoder.h" +#include <wtf/text/CString.h> + +namespace WebCore { + +using namespace HTMLNames; + +namespace { + +bool isNonCanonicalCharacter(UChar c) +{ + // We remove all non-ASCII characters, including non-printable ASCII characters. + // + // Note, we don't remove backslashes like PHP stripslashes(), which among other things converts "\\0" to the \0 character. + // Instead, we remove backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0"). However, this has the + // adverse effect that we remove any legitimate zeros from a string. + // + // For instance: new String("http://localhost:8000") => new String("http://localhost:8"). + return (c == '\\' || c == '0' || c == '\0' || c >= 127); +} + +String canonicalize(const String& string) +{ + return string.removeCharacters(&isNonCanonicalCharacter); +} + +bool isRequiredForInjection(UChar c) +{ + return (c == '\'' || c == '"' || c == '<' || c == '>'); +} + +bool hasName(const HTMLToken& token, const QualifiedName& name) +{ + return equalIgnoringNullity(token.name(), static_cast<const String&>(name.localName())); +} + +bool findAttributeWithName(const HTMLToken& token, const QualifiedName& name, size_t& indexOfMatchingAttribute) +{ + for (size_t i = 0; i < token.attributes().size(); ++i) { + if (equalIgnoringNullity(token.attributes().at(i).m_name, name.localName())) { + indexOfMatchingAttribute = i; + return true; + } + } + return false; +} + +bool isNameOfInlineEventHandler(const Vector<UChar, 32>& name) +{ + const size_t lengthOfShortestInlineEventHandlerName = 5; // To wit: oncut. + if (name.size() < lengthOfShortestInlineEventHandlerName) + return false; + return name[0] == 'o' && name[1] == 'n'; +} + +bool containsJavaScriptURL(const Vector<UChar, 32>& value) +{ + static const char javaScriptScheme[] = "javascript:"; + static const size_t lengthOfJavaScriptScheme = sizeof(javaScriptScheme) - 1; + + size_t i; + for (i = 0; i < value.size(); ++i) { + if (!isHTMLSpace(value[i])) + break; + } + + if (value.size() - i < lengthOfJavaScriptScheme) + return false; + + return equalIgnoringCase(value.data() + i, javaScriptScheme, lengthOfJavaScriptScheme); +} + +String decodeURL(const String& string, const TextEncoding& encoding) +{ + String workingString = string; + workingString.replace('+', ' '); + workingString = decodeURLEscapeSequences(workingString); + CString workingStringUTF8 = workingString.utf8(); + String decodedString = encoding.decode(workingStringUTF8.data(), workingStringUTF8.length()); + // FIXME: Is this check necessary? + if (decodedString.isEmpty()) + return canonicalize(workingString); + return canonicalize(decodedString); +} + +} + +XSSFilter::XSSFilter(HTMLDocumentParser* parser) + : m_parser(parser) + , m_isEnabled(false) + , m_xssProtection(XSSProtectionEnabled) + , m_state(Uninitialized) +{ + ASSERT(m_parser); + if (Frame* frame = parser->document()->frame()) { + if (Settings* settings = frame->settings()) + m_isEnabled = settings->xssAuditorEnabled(); + } + // Although tempting to call init() at this point, the various objects + // we want to reference might not all have been constructed yet. +} + +void XSSFilter::init() +{ + const size_t miniumLengthForSuffixTree = 512; // FIXME: Tune this parameter. + const int suffixTreeDepth = 5; + + ASSERT(m_state == Uninitialized); + m_state = Initial; + + if (!m_isEnabled) + return; + + // In theory, the Document could have detached from the Frame after the + // XSSFilter was constructed. + if (!m_parser->document()->frame()) { + m_isEnabled = false; + return; + } + + const KURL& url = m_parser->document()->url(); + + if (url.protocolIsData()) { + m_isEnabled = false; + return; + } + + TextResourceDecoder* decoder = m_parser->document()->decoder(); + m_decodedURL = decoder ? decodeURL(url.string(), decoder->encoding()) : url.string(); + if (m_decodedURL.find(isRequiredForInjection, 0) == notFound) + m_decodedURL = String(); + + if (DocumentLoader* documentLoader = m_parser->document()->frame()->loader()->documentLoader()) { + DEFINE_STATIC_LOCAL(String, XSSProtectionHeader, ("X-XSS-Protection")); + m_xssProtection = parseXSSProtectionHeader(documentLoader->response().httpHeaderField(XSSProtectionHeader)); + + FormData* httpBody = documentLoader->originalRequest().httpBody(); + if (httpBody && !httpBody->isEmpty()) { + String httpBodyAsString = httpBody->flattenToString(); + m_decodedHTTPBody = decoder ? decodeURL(httpBodyAsString, decoder->encoding()) : httpBodyAsString; + if (m_decodedHTTPBody.find(isRequiredForInjection, 0) == notFound) + m_decodedHTTPBody = String(); + if (m_decodedHTTPBody.length() >= miniumLengthForSuffixTree) + m_decodedHTTPBodySuffixTree = adoptPtr(new SuffixTree<ASCIICodebook>(m_decodedHTTPBody, suffixTreeDepth)); + } + } + + if (m_decodedURL.isEmpty() && m_decodedHTTPBody.isEmpty()) + m_isEnabled = false; +} + +void XSSFilter::filterToken(HTMLToken& token) +{ + if (m_state == Uninitialized) { + init(); + ASSERT(m_state == Initial); + } + + if (!m_isEnabled || m_xssProtection == XSSProtectionDisabled) + return; + + bool didBlockScript = false; + + switch (m_state) { + case Uninitialized: + ASSERT_NOT_REACHED(); + break; + case Initial: + didBlockScript = filterTokenInitial(token); + break; + case AfterScriptStartTag: + didBlockScript = filterTokenAfterScriptStartTag(token); + ASSERT(m_state == Initial); + m_cachedSnippet = String(); + break; + } + + if (didBlockScript) { + // FIXME: Consider using a more helpful console message. + DEFINE_STATIC_LOCAL(String, consoleMessage, ("Refused to execute a JavaScript script. Source code of script found within request.\n")); + // FIXME: We should add the real line number to the console. + m_parser->document()->domWindow()->console()->addMessage(JSMessageSource, LogMessageType, ErrorMessageLevel, consoleMessage, 1, String()); + + if (m_xssProtection == XSSProtectionBlockEnabled) { + m_parser->document()->frame()->loader()->stopAllLoaders(); + m_parser->document()->frame()->navigationScheduler()->scheduleLocationChange(m_parser->document()->securityOrigin(), blankURL(), String()); + } + } +} + +bool XSSFilter::filterTokenInitial(HTMLToken& token) +{ + ASSERT(m_state == Initial); + + if (token.type() != HTMLToken::StartTag) + return false; + + bool didBlockScript = eraseDangerousAttributesIfInjected(token); + + if (hasName(token, scriptTag)) + didBlockScript |= filterScriptToken(token); + else if (hasName(token, objectTag)) + didBlockScript |= filterObjectToken(token); + else if (hasName(token, paramTag)) + didBlockScript |= filterParamToken(token); + else if (hasName(token, embedTag)) + didBlockScript |= filterEmbedToken(token); + else if (hasName(token, appletTag)) + didBlockScript |= filterAppletToken(token); + else if (hasName(token, metaTag)) + didBlockScript |= filterMetaToken(token); + else if (hasName(token, baseTag)) + didBlockScript |= filterBaseToken(token); + + return didBlockScript; +} + +bool XSSFilter::filterTokenAfterScriptStartTag(HTMLToken& token) +{ + ASSERT(m_state == AfterScriptStartTag); + m_state = Initial; + + if (token.type() != HTMLToken::Character) { + ASSERT(token.type() == HTMLToken::EndTag || token.type() == HTMLToken::EndOfFile); + return false; + } + + int start = 0; + // FIXME: We probably want to grab only the first few characters of the + // contents of the script element. + int end = token.endIndex() - token.startIndex(); + if (isContainedInRequest(m_cachedSnippet + snippetForRange(token, start, end))) { + token.eraseCharacters(); + token.appendToCharacter(' '); // Technically, character tokens can't be empty. + return true; + } + return false; +} + +bool XSSFilter::filterScriptToken(HTMLToken& token) +{ + ASSERT(m_state == Initial); + ASSERT(token.type() == HTMLToken::StartTag); + ASSERT(hasName(token, scriptTag)); + + if (eraseAttributeIfInjected(token, srcAttr, blankURL().string())) + return true; + + m_state = AfterScriptStartTag; + m_cachedSnippet = m_parser->sourceForToken(token); + return false; +} + +bool XSSFilter::filterObjectToken(HTMLToken& token) +{ + ASSERT(m_state == Initial); + ASSERT(token.type() == HTMLToken::StartTag); + ASSERT(hasName(token, objectTag)); + + bool didBlockScript = false; + + didBlockScript |= eraseAttributeIfInjected(token, dataAttr, blankURL().string()); + didBlockScript |= eraseAttributeIfInjected(token, typeAttr); + didBlockScript |= eraseAttributeIfInjected(token, classidAttr); + + return didBlockScript; +} + +bool XSSFilter::filterParamToken(HTMLToken& token) +{ + ASSERT(m_state == Initial); + ASSERT(token.type() == HTMLToken::StartTag); + ASSERT(hasName(token, paramTag)); + + size_t indexOfNameAttribute; + if (!findAttributeWithName(token, nameAttr, indexOfNameAttribute)) + return false; + + const HTMLToken::Attribute& nameAttribute = token.attributes().at(indexOfNameAttribute); + String name = String(nameAttribute.m_value.data(), nameAttribute.m_value.size()); + + if (!HTMLParamElement::isURLParameter(name)) + return false; + + return eraseAttributeIfInjected(token, valueAttr, blankURL().string()); +} + +bool XSSFilter::filterEmbedToken(HTMLToken& token) +{ + ASSERT(m_state == Initial); + ASSERT(token.type() == HTMLToken::StartTag); + ASSERT(hasName(token, embedTag)); + + bool didBlockScript = false; + + didBlockScript |= eraseAttributeIfInjected(token, srcAttr, blankURL().string()); + didBlockScript |= eraseAttributeIfInjected(token, typeAttr); + + return didBlockScript; +} + +bool XSSFilter::filterAppletToken(HTMLToken& token) +{ + ASSERT(m_state == Initial); + ASSERT(token.type() == HTMLToken::StartTag); + ASSERT(hasName(token, appletTag)); + + bool didBlockScript = false; + + didBlockScript |= eraseAttributeIfInjected(token, codeAttr); + didBlockScript |= eraseAttributeIfInjected(token, objectAttr); + + return didBlockScript; +} + +bool XSSFilter::filterMetaToken(HTMLToken& token) +{ + ASSERT(m_state == Initial); + ASSERT(token.type() == HTMLToken::StartTag); + ASSERT(hasName(token, metaTag)); + + return eraseAttributeIfInjected(token, http_equivAttr); +} + +bool XSSFilter::filterBaseToken(HTMLToken& token) +{ + ASSERT(m_state == Initial); + ASSERT(token.type() == HTMLToken::StartTag); + ASSERT(hasName(token, baseTag)); + + return eraseAttributeIfInjected(token, hrefAttr); +} + +bool XSSFilter::eraseDangerousAttributesIfInjected(HTMLToken& token) +{ + DEFINE_STATIC_LOCAL(String, safeJavaScriptURL, ("javascript:void(0)")); + + bool didBlockScript = false; + for (size_t i = 0; i < token.attributes().size(); ++i) { + const HTMLToken::Attribute& attribute = token.attributes().at(i); + bool isInlineEventHandler = isNameOfInlineEventHandler(attribute.m_name); + bool valueContainsJavaScriptURL = isInlineEventHandler ? false : containsJavaScriptURL(attribute.m_value); + if (!isInlineEventHandler && !valueContainsJavaScriptURL) + continue; + if (!isContainedInRequest(snippetForAttribute(token, attribute))) + continue; + token.eraseValueOfAttribute(i); + if (valueContainsJavaScriptURL) + token.appendToAttributeValue(i, safeJavaScriptURL); + didBlockScript = true; + } + return didBlockScript; +} + +bool XSSFilter::eraseAttributeIfInjected(HTMLToken& token, const QualifiedName& attributeName, const String& replacementValue) +{ + size_t indexOfAttribute; + if (findAttributeWithName(token, attributeName, indexOfAttribute)) { + const HTMLToken::Attribute& attribute = token.attributes().at(indexOfAttribute); + if (isContainedInRequest(snippetForAttribute(token, attribute))) { + if (attributeName == srcAttr && isSameOriginResource(String(attribute.m_value.data(), attribute.m_value.size()))) + return false; + token.eraseValueOfAttribute(indexOfAttribute); + if (!replacementValue.isEmpty()) + token.appendToAttributeValue(indexOfAttribute, replacementValue); + return true; + } + } + return false; +} + +String XSSFilter::snippetForRange(const HTMLToken& token, int start, int end) +{ + // FIXME: There's an extra allocation here that we could save by + // passing the range to the parser. + return m_parser->sourceForToken(token).substring(start, end - start); +} + +String XSSFilter::snippetForAttribute(const HTMLToken& token, const HTMLToken::Attribute& attribute) +{ + // FIXME: We should grab one character before the name also. + int start = attribute.m_nameRange.m_start - token.startIndex(); + // FIXME: We probably want to grab only the first few characters of the attribute value. + int end = attribute.m_valueRange.m_end - token.startIndex(); + return snippetForRange(token, start, end); +} + +bool XSSFilter::isContainedInRequest(const String& snippet) +{ + ASSERT(!snippet.isEmpty()); + String canonicalizedSnippet = canonicalize(snippet); + ASSERT(!canonicalizedSnippet.isEmpty()); + if (m_decodedURL.find(canonicalizedSnippet, 0, false) != notFound) + return true; + if (m_decodedHTTPBodySuffixTree && !m_decodedHTTPBodySuffixTree->mightContain(canonicalizedSnippet)) + return false; + return m_decodedHTTPBody.find(canonicalizedSnippet, 0, false) != notFound; +} + +bool XSSFilter::isSameOriginResource(const String& url) +{ + // If the resource is loaded from the same URL as the enclosing page, it's + // probably not an XSS attack, so we reduce false positives by allowing the + // request. If the resource has a query string, we're more suspicious, + // however, because that's pretty rare and the attacker might be able to + // trick a server-side script into doing something dangerous with the query + // string. + KURL resourceURL(m_parser->document()->url(), url); + return (m_parser->document()->url().host() == resourceURL.host() && resourceURL.query().isEmpty()); +} + +} diff --git a/Source/WebCore/html/parser/XSSFilter.h b/Source/WebCore/html/parser/XSSFilter.h new file mode 100644 index 0000000..2c7d428 --- /dev/null +++ b/Source/WebCore/html/parser/XSSFilter.h @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2011 Adam Barth. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef XSSFilter_h +#define XSSFilter_h + +#include "HTMLToken.h" +#include "HTTPParsers.h" +#include "SuffixTree.h" + +namespace WebCore { + +class HTMLDocumentParser; + +class XSSFilter { + WTF_MAKE_NONCOPYABLE(XSSFilter); +public: + explicit XSSFilter(HTMLDocumentParser*); + + void filterToken(HTMLToken&); + +private: + enum State { + Uninitialized, + Initial, + AfterScriptStartTag, + }; + + void init(); + + bool filterTokenInitial(HTMLToken&); + bool filterTokenAfterScriptStartTag(HTMLToken&); + + bool filterScriptToken(HTMLToken&); + bool filterObjectToken(HTMLToken&); + bool filterParamToken(HTMLToken&); + bool filterEmbedToken(HTMLToken&); + bool filterAppletToken(HTMLToken&); + bool filterMetaToken(HTMLToken&); + bool filterBaseToken(HTMLToken&); + + bool eraseDangerousAttributesIfInjected(HTMLToken&); + bool eraseAttributeIfInjected(HTMLToken&, const QualifiedName&, const String& replacementValue = String()); + + String snippetForRange(const HTMLToken&, int start, int end); + String snippetForAttribute(const HTMLToken&, const HTMLToken::Attribute&); + + bool isContainedInRequest(const String&); + bool isSameOriginResource(const String& url); + + HTMLDocumentParser* m_parser; + bool m_isEnabled; + XSSProtectionDisposition m_xssProtection; + + String m_decodedURL; + String m_decodedHTTPBody; + OwnPtr<SuffixTree<ASCIICodebook> > m_decodedHTTPBodySuffixTree; + + State m_state; + String m_cachedSnippet; +}; + +} + +#endif |