16 files changed, 805 insertions, 79 deletions
diff --git a/Source/WebCore/html/parser/HTMLConstructionSite.cpp b/Source/WebCore/html/parser/HTMLConstructionSite.cpp
index c46b9b9..a026ef9 100644
--- a/Source/WebCore/html/parser/HTMLConstructionSite.cpp
+++ b/Source/WebCore/html/parser/HTMLConstructionSite.cpp
@@ -130,10 +130,20 @@ void HTMLConstructionSite::attachAtSite(const AttachmentSite& site, PassRefPtr<N
         child->attach();
 }
 
-HTMLConstructionSite::HTMLConstructionSite(Document* document, FragmentScriptingPermission scriptingPermission, bool isParsingFragment)
+HTMLConstructionSite::HTMLConstructionSite(Document* document)
     : m_document(document)
+    , m_attachmentRoot(document)
+    , m_fragmentScriptingPermission(FragmentScriptingAllowed)
+    , m_isParsingFragment(false)
+    , m_redirectAttachToFosterParent(false)
+{
+}
+
+HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, FragmentScriptingPermission scriptingPermission)
+    : m_document(fragment->document())
+    , m_attachmentRoot(fragment)
     , m_fragmentScriptingPermission(scriptingPermission)
-    , m_isParsingFragment(isParsingFragment)
+    , m_isParsingFragment(true)
     , m_redirectAttachToFosterParent(false)
 {
 }
@@ -145,6 +155,7 @@ HTMLConstructionSite::~HTMLConstructionSite()
 void HTMLConstructionSite::detach()
 {
     m_document = 0;
+    m_attachmentRoot = 0;
 }
 
 void HTMLConstructionSite::setForm(HTMLFormElement* form)
@@ -170,7 +181,7 @@ void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken& tok
 {
     RefPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(m_document);
     element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
-    m_openElements.pushHTMLHtmlElement(attach<Element>(m_document, element.get()));
+    m_openElements.pushHTMLHtmlElement(attach<Element>(m_attachmentRoot, element.get()));
 #if ENABLE(OFFLINE_WEB_APPLICATIONS)
     element->insertedByParser();
 #endif
@@ -205,7 +216,16 @@ void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken& token)
 void HTMLConstructionSite::insertDoctype(AtomicHTMLToken& token)
 {
     ASSERT(token.type() == HTMLToken::DOCTYPE);
-    attach(m_document, DocumentType::create(m_document, token.name(), String::adopt(token.publicIdentifier()), String::adopt(token.systemIdentifier())));
+    attach(m_attachmentRoot, DocumentType::create(m_document, token.name(), String::adopt(token.publicIdentifier()), String::adopt(token.systemIdentifier())));
+    
+    // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which
+    // never occurs.  However, if we ever chose to support such, this code is subtly wrong,
+    // because context-less fragments can determine their own quirks mode, and thus change
+    // parsing rules (like <p> inside <table>).  For now we ASSERT that we never hit this code
+    // in a fragment, as changing the owning document's compatibility mode would be wrong.
+    ASSERT(!m_isParsingFragment);
+    if (m_isParsingFragment)
+        return;
     
     if (token.forceQuirks())
         m_document->setCompatibilityMode(Document::QuirksMode);
@@ -222,7 +242,7 @@ void HTMLConstructionSite::insertComment(AtomicHTMLToken& token)
 void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken& token)
 {
     ASSERT(token.type() == HTMLToken::Comment);
-    attach(m_document, Comment::create(m_document, token.comment()));
+    attach(m_attachmentRoot, Comment::create(m_document, token.comment()));
 }
 
 void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken& token)
diff --git a/Source/WebCore/html/parser/HTMLConstructionSite.h b/Source/WebCore/html/parser/HTMLConstructionSite.h
index 5a4a65d..0298503 100644
--- a/Source/WebCore/html/parser/HTMLConstructionSite.h
+++ b/Source/WebCore/html/parser/HTMLConstructionSite.h
@@ -43,7 +43,8 @@ class Element;
 class HTMLConstructionSite {
     WTF_MAKE_NONCOPYABLE(HTMLConstructionSite);
 public:
-    HTMLConstructionSite(Document*, FragmentScriptingPermission, bool isParsingFragment);
+    HTMLConstructionSite(Document*);
+    HTMLConstructionSite(DocumentFragment*, FragmentScriptingPermission);
     ~HTMLConstructionSite();
 
     void detach();
@@ -130,6 +131,12 @@ private:
     void dispatchDocumentElementAvailableIfNeeded();
 
     Document* m_document;
+    
+    // This is the root ContainerNode to which the parser attaches all newly
+    // constructed nodes. It points to a DocumentFragment when parsing fragments
+    // and a Document in all other cases.
+    ContainerNode* m_attachmentRoot;
+    
     RefPtr<Element> m_head;
     RefPtr<HTMLFormElement> m_form;
     mutable HTMLElementStack m_openElements;
diff --git a/Source/WebCore/html/parser/HTMLDocumentParser.cpp b/Source/WebCore/html/parser/HTMLDocumentParser.cpp
index 93e1309..2fe9486 100644
--- a/Source/WebCore/html/parser/HTMLDocumentParser.cpp
+++ b/Source/WebCore/html/parser/HTMLDocumentParser.cpp
@@ -39,7 +39,6 @@
 #include "InspectorInstrumentation.h"
 #include "NestingLevelIncrementer.h"
 #include "Settings.h"
-#include "XSSAuditor.h"
 #include <wtf/CurrentTime.h>
 
 #ifdef ANDROID_INSTRUMENT
@@ -85,6 +84,7 @@ HTMLDocumentParser::HTMLDocumentParser(HTMLDocument* document, bool reportErrors
     , m_scriptRunner(HTMLScriptRunner::create(document, this))
     , m_treeBuilder(HTMLTreeBuilder::create(this, document, reportErrors, usePreHTML5ParserQuirks(document)))
     , m_parserScheduler(HTMLParserScheduler::create(this))
+    , m_xssFilter(this)
     , m_endWasDelayed(false)
     , m_writeNestingLevel(0)
 {
@@ -96,6 +96,7 @@ HTMLDocumentParser::HTMLDocumentParser(DocumentFragment* fragment, Element* cont
     : ScriptableDocumentParser(fragment->document())
     , m_tokenizer(HTMLTokenizer::create(usePreHTML5ParserQuirks(fragment->document())))
     , m_treeBuilder(HTMLTreeBuilder::create(this, fragment, contextElement, scriptingPermission, usePreHTML5ParserQuirks(fragment->document())))
+    , m_xssFilter(this)
     , m_endWasDelayed(false)
     , m_writeNestingLevel(0)
 {
@@ -230,8 +231,13 @@ void HTMLDocumentParser::pumpTokenizer(SynchronousMode mode)
         if (!m_treeBuilder->isParsingFragment()
             && document()->frame() && document()->frame()->navigationScheduler()->locationChangePending())
             break;
+
+        m_sourceTracker.start(m_input, m_token);
         if (!m_tokenizer->nextToken(m_input.current(), m_token))
             break;
+        m_sourceTracker.end(m_input, m_token);
+
+        m_xssFilter.filterToken(m_token);
 
         m_treeBuilder->constructTreeFromToken(m_token);
         m_token.clear();
@@ -274,7 +280,12 @@ void HTMLDocumentParser::pumpTokenizer(SynchronousMode mode)
 
 bool HTMLDocumentParser::hasInsertionPoint()
 {
-    return m_input.hasInsertionPoint();
+    // FIXME: The wasCreatedByScript() branch here might not be fully correct.
+    //        Our model of the EOF character differs slightly from the one in
+    //        the spec because our treatment is uniform between network-sourced
+    //        and script-sourced input streams whereas the spec treats them
+    //        differently.
+    return m_input.hasInsertionPoint() || (wasCreatedByScript() && !m_input.haveSeenEndOfFile());
 }
 
 void HTMLDocumentParser::insert(const SegmentedString& source)
@@ -414,6 +425,11 @@ bool HTMLDocumentParser::inScriptExecution() const
     return m_scriptRunner->isExecutingScript();
 }
 
+String HTMLDocumentParser::sourceForToken(const HTMLToken& token)
+{
+    return m_sourceTracker.sourceForToken(token);
+}
+
 int HTMLDocumentParser::lineNumber() const
 {
     return m_tokenizer->lineNumber();
@@ -460,9 +476,7 @@ void HTMLDocumentParser::stopWatchingForLoad(CachedResource* cachedScript)
 
 bool HTMLDocumentParser::shouldLoadExternalScriptFromSrc(const AtomicString& srcValue)
 {
-    if (!xssAuditor())
-        return true;
-    return xssAuditor()->canLoadExternalScriptFromSrc(srcValue);
+    return document()->contentSecurityPolicy()->canLoadExternalScriptFromSrc(srcValue);
 }
 
 void HTMLDocumentParser::notifyFinished(CachedResource* cachedResource)
diff --git a/Source/WebCore/html/parser/HTMLDocumentParser.h b/Source/WebCore/html/parser/HTMLDocumentParser.h
index f925269..be2ca1b 100644
--- a/Source/WebCore/html/parser/HTMLDocumentParser.h
+++ b/Source/WebCore/html/parser/HTMLDocumentParser.h
@@ -30,10 +30,12 @@
 #include "FragmentScriptingPermission.h"
 #include "HTMLInputStream.h"
 #include "HTMLScriptRunnerHost.h"
+#include "HTMLSourceTracker.h"
 #include "HTMLToken.h"
 #include "ScriptableDocumentParser.h"
 #include "SegmentedString.h"
 #include "Timer.h"
+#include "XSSFilter.h"
 #include <wtf/OwnPtr.h>
 
 namespace WebCore {
@@ -71,8 +73,11 @@ public:
     static bool usePreHTML5ParserQuirks(Document*);
 
     HTMLTokenizer* tokenizer() const { return m_tokenizer.get(); }
+    String sourceForToken(const HTMLToken&);
 
     virtual TextPosition0 textPosition() const;
+    virtual int lineNumber() const;
+
     virtual void suspendScheduledTasks();
     virtual void resumeScheduledTasks();
 
@@ -97,7 +102,6 @@ private:
     virtual bool isWaitingForScripts() const;
     virtual bool isExecutingScript() const;
     virtual void executeScriptsWaitingForStylesheets();
-    virtual int lineNumber() const;
 
     // HTMLScriptRunnerHost
     virtual void watchForLoad(CachedResource*);
@@ -141,6 +145,8 @@ private:
     OwnPtr<HTMLTreeBuilder> m_treeBuilder;
     OwnPtr<HTMLPreloadScanner> m_preloadScanner;
     OwnPtr<HTMLParserScheduler> m_parserScheduler;
+    HTMLSourceTracker m_sourceTracker;
+    XSSFilter m_xssFilter;
 
     bool m_endWasDelayed;
     unsigned m_writeNestingLevel;
diff --git a/Source/WebCore/html/parser/HTMLInputStream.h b/Source/WebCore/html/parser/HTMLInputStream.h
index d95ec31..512ae88 100644
--- a/Source/WebCore/html/parser/HTMLInputStream.h
+++ b/Source/WebCore/html/parser/HTMLInputStream.h
@@ -67,17 +67,7 @@ public:
 
     bool hasInsertionPoint() const
     {
-        if (&m_first != m_last)
-            return true;
-        if (!haveSeenEndOfFile()) {
-            // FIXME: Somehow we need to understand the difference between
-            // input streams that are coming off the network and streams that
-            // were created with document.open(). In the later case, we always
-            // have an isertion point at the end of the stream until someone
-            // calls document.close().
-            return true;
-        }
-        return false;
+        return &m_first != m_last;
     }
 
     void markEndOfFile()
diff --git a/Source/WebCore/html/parser/HTMLParserIdioms.cpp b/Source/WebCore/html/parser/HTMLParserIdioms.cpp
index 91ff8d3..2be6af9 100644
--- a/Source/WebCore/html/parser/HTMLParserIdioms.cpp
+++ b/Source/WebCore/html/parser/HTMLParserIdioms.cpp
@@ -25,6 +25,7 @@
 #include "config.h"
 #include "HTMLParserIdioms.h"
 
+#include <limits>
 #include <wtf/MathExtras.h>
 #include <wtf/dtoa.h>
 #include <wtf/text/AtomicString.h>
@@ -85,7 +86,7 @@ bool parseToDoubleForNumberType(const String& string, double* result)
 
     // Numbers are considered finite IEEE 754 single-precision floating point values.
     // See HTML5 2.4.4.3 `Real numbers.'
-    if (-FLT_MAX > value || value > FLT_MAX)
+    if (-std::numeric_limits<float>::max() > value || value > std::numeric_limits<float>::max())
         return false;
 
     if (result) {
diff --git a/Source/WebCore/html/parser/HTMLScriptRunner.cpp b/Source/WebCore/html/parser/HTMLScriptRunner.cpp
index 2fe1d30..c99858d 100644
--- a/Source/WebCore/html/parser/HTMLScriptRunner.cpp
+++ b/Source/WebCore/html/parser/HTMLScriptRunner.cpp
@@ -264,6 +264,7 @@ bool HTMLScriptRunner::requestPendingScript(PendingScript& pendingScript, Elemen
     ASSERT(!pendingScript.element());
     const AtomicString& srcValue = script->getAttribute(srcAttr);
     // Allow the host to disllow script loads (using the XSSAuditor, etc.)
+    // FIXME: this check should be performed on the final URL in a redirect chain.
     if (!m_host->shouldLoadExternalScriptFromSrc(srcValue))
         return false;
     // FIXME: We need to resolve the url relative to the element.
diff --git a/Source/WebCore/html/parser/HTMLSourceTracker.cpp b/Source/WebCore/html/parser/HTMLSourceTracker.cpp
new file mode 100644
index 0000000..cf43105
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLSourceTracker.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2010 Adam Barth. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "HTMLSourceTracker.h"
+
+namespace WebCore {
+
+HTMLSourceTracker::HTMLSourceTracker()
+{
+}
+
+void HTMLSourceTracker::start(const HTMLInputStream& input, HTMLToken& token)
+{
+    m_sourceFromPreviousSegments = token.type() == HTMLToken::Uninitialized ? String() : m_sourceFromPreviousSegments + m_source.toString();
+    m_source = input.current();
+    token.setBaseOffset(input.current().numberOfCharactersConsumed() - m_sourceFromPreviousSegments.length());
+}
+
+void HTMLSourceTracker::end(const HTMLInputStream& input, HTMLToken& token)
+{
+    m_cachedSourceForToken = String();
+    // FIXME: This work should really be done by the HTMLTokenizer.
+    token.end(input.current().numberOfCharactersConsumed());
+}
+
+String HTMLSourceTracker::sourceForToken(const HTMLToken& token)
+{
+    if (token.type() == HTMLToken::EndOfFile)
+        return String(); // Hides the null character we use to mark the end of file.
+
+    if (!m_cachedSourceForToken.isEmpty())
+        return m_cachedSourceForToken;
+
+    ASSERT(!token.startIndex());
+    UChar* data = 0;
+    int length = token.endIndex() - token.startIndex() - m_sourceFromPreviousSegments.length();
+    String source = String::createUninitialized(length, data);
+    for (int i = 0; i < length; ++i) {
+        data[i] = *m_source;
+        m_source.advance();
+    }
+    m_cachedSourceForToken = m_sourceFromPreviousSegments + source;
+    return m_cachedSourceForToken;
+}
+
+}
diff --git a/Source/WebCore/html/parser/HTMLSourceTracker.h b/Source/WebCore/html/parser/HTMLSourceTracker.h
new file mode 100644
index 0000000..17ae191
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLSourceTracker.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2010 Adam Barth. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLSourceTracker_h
+#define HTMLSourceTracker_h
+
+#include "HTMLInputStream.h"
+#include "HTMLToken.h"
+
+namespace WebCore {
+
+class HTMLSourceTracker {
+    WTF_MAKE_NONCOPYABLE(HTMLSourceTracker);
+public:
+    HTMLSourceTracker();
+
+    // FIXME: Once we move "end" into HTMLTokenizer, rename "start" to
+    // something that makes it obvious that this method can be called multiple
+    // times.
+    void start(const HTMLInputStream&, HTMLToken&);
+    void end(const HTMLInputStream&, HTMLToken&);
+
+    String sourceForToken(const HTMLToken&);
+
+private:
+    String m_sourceFromPreviousSegments;
+    SegmentedString m_source;
+    String m_cachedSourceForToken;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/html/parser/HTMLToken.h b/Source/WebCore/html/parser/HTMLToken.h
index 1cbc151..aa16ab2 100644
--- a/Source/WebCore/html/parser/HTMLToken.h
+++ b/Source/WebCore/html/parser/HTMLToken.h
@@ -64,20 +64,26 @@ public:
 
     HTMLToken() { clear(); }
 
-    void clear(int startIndex = 0)
+    void clear()
     {
         m_type = Uninitialized;
-        m_range.m_start = startIndex;
-        m_range.m_end = startIndex;
+        m_range.m_start = 0;
+        m_range.m_end = 0;
+        m_baseOffset = 0;
         m_data.clear();
     }
 
     int startIndex() const { return m_range.m_start; }
     int endIndex() const { return m_range.m_end; }
 
-    void end(int endIndex)
+    void setBaseOffset(int offset)
     {
-        m_range.m_end = endIndex;
+        m_baseOffset = offset;
+    }
+
+    void end(int endOffset)
+    {
+        m_range.m_end = endOffset - m_baseOffset;
     }
 
     void makeEndOfFile()
@@ -172,29 +178,30 @@ public:
 #endif
     }
 
-    void beginAttributeName(int index)
+    void beginAttributeName(int offset)
     {
-        m_currentAttribute->m_nameRange.m_start = index;
+        m_currentAttribute->m_nameRange.m_start = offset - m_baseOffset;
     }
 
-    void endAttributeName(int index)
+    void endAttributeName(int offset)
     {
+        int index = offset - m_baseOffset;
         m_currentAttribute->m_nameRange.m_end = index;
         m_currentAttribute->m_valueRange.m_start = index;
         m_currentAttribute->m_valueRange.m_end = index;
     }
 
-    void beginAttributeValue(int index)
+    void beginAttributeValue(int offset)
     {
-        m_currentAttribute->m_valueRange.m_start = index;
+        m_currentAttribute->m_valueRange.m_start = offset - m_baseOffset;
 #ifndef NDEBUG
         m_currentAttribute->m_valueRange.m_end = 0;
 #endif
     }
 
-    void endAttributeValue(int index)
+    void endAttributeValue(int offset)
     {
-        m_currentAttribute->m_valueRange.m_end = index;
+        m_currentAttribute->m_valueRange.m_end = offset - m_baseOffset;
     }
 
     void appendToAttributeName(UChar character)
@@ -213,6 +220,13 @@ public:
         m_currentAttribute->m_value.append(character);
     }
 
+    void appendToAttributeValue(size_t i, const String& value)
+    {
+        ASSERT(!value.isEmpty());
+        ASSERT(m_type == StartTag || m_type == EndTag);
+        m_attributes[i].m_value.append(value.characters(), value.length());
+    }
+
     Type type() const { return m_type; }
 
     bool selfClosing() const
@@ -239,6 +253,18 @@ public:
         return m_data;
     }
 
+    void eraseCharacters()
+    {
+        ASSERT(m_type == Character);
+        m_data.clear();
+    }
+
+    void eraseValueOfAttribute(size_t i)
+    {
+        ASSERT(m_type == StartTag || m_type == EndTag);
+        m_attributes[i].m_value.clear();
+    }
+
     const DataVector& characters() const
     {
         ASSERT(m_type == Character);
@@ -331,9 +357,8 @@ private:
     };
 
     Type m_type;
-
-    // Which characters from the input stream are represented by this token.
-    Range m_range;
+    Range m_range; // Always starts at zero.
+    int m_baseOffset;
 
     // "name" for DOCTYPE, StartTag, and EndTag
     // "characters" for Character
diff --git a/Source/WebCore/html/parser/HTMLTreeBuilder.cpp b/Source/WebCore/html/parser/HTMLTreeBuilder.cpp
index 97cee13..d2931ac 100644
--- a/Source/WebCore/html/parser/HTMLTreeBuilder.cpp
+++ b/Source/WebCore/html/parser/HTMLTreeBuilder.cpp
@@ -26,8 +26,8 @@
 #include "config.h"
 #include "HTMLTreeBuilder.h"
 
-#include "CharacterNames.h"
 #include "Comment.h"
+#include "DOMWindow.h"
 #include "DocumentFragment.h"
 #include "DocumentType.h"
 #include "Frame.h"
@@ -50,6 +50,7 @@
 #include "XLinkNames.h"
 #include "XMLNSNames.h"
 #include "XMLNames.h"
+#include <wtf/unicode/CharacterNames.h>
 
 namespace WebCore {
 
@@ -341,7 +342,7 @@ private:
 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks)
     : m_framesetOk(true)
     , m_document(document)
-    , m_tree(document, FragmentScriptingAllowed, false)
+    , m_tree(document)
     , m_reportErrors(reportErrors)
     , m_isPaused(false)
     , m_insertionMode(InitialMode)
@@ -359,8 +360,8 @@ HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument* docum
 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks)
     : m_framesetOk(true)
     , m_fragmentContext(fragment, contextElement, scriptingPermission)
-    , m_document(m_fragmentContext.document())
-    , m_tree(m_document, scriptingPermission, true)
+    , m_document(fragment->document())
+    , m_tree(fragment, scriptingPermission)
     , m_reportErrors(false) // FIXME: Why not report errors in fragments?
     , m_isPaused(false)
     , m_insertionMode(InitialMode)
@@ -374,7 +375,6 @@ HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* f
     if (contextElement) {
         // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm:
         // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
-        m_document->setCompatibilityMode(contextElement->document()->compatibilityMode());
         processFakeStartTag(htmlTag);
         resetInsertionModeAppropriately();
         m_tree.setForm(closestFormAncestor(contextElement));
@@ -403,27 +403,24 @@ HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
 }
 
 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
-    : m_dummyDocumentForFragmentParsing(HTMLDocument::create(0, KURL(), fragment->document()->baseURI()))
-    , m_fragment(fragment)
+    : m_fragment(fragment)
     , m_contextElement(contextElement)
     , m_scriptingPermission(scriptingPermission)
 {
-    m_dummyDocumentForFragmentParsing->setCompatibilityMode(fragment->document()->compatibilityMode());
-}
-
-Document* HTMLTreeBuilder::FragmentParsingContext::document() const
-{
-    ASSERT(m_fragment);
-    return m_dummyDocumentForFragmentParsing.get();
+    ASSERT(!fragment->hasChildNodes());
 }
 
 void HTMLTreeBuilder::FragmentParsingContext::finished()
 {
-    // Populate the DocumentFragment with the parsed content now that we're done.
-    ContainerNode* root = m_dummyDocumentForFragmentParsing.get();
-    if (m_contextElement)
-        root = m_dummyDocumentForFragmentParsing->documentElement();
-    m_fragment->takeAllChildrenFrom(root);
+    if (!m_contextElement)
+        return;
+    
+    // The HTML5 spec says to return the children of the fragment's document
+    // element when there is a context element (10.4.7).
+    RefPtr<ContainerNode> documentElement = firstElementChild(m_fragment);
+    m_fragment->removeChildren();
+    ASSERT(documentElement);
+    m_fragment->takeAllChildrenFrom(documentElement.get());
 }
 
 HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext()
@@ -2807,6 +2804,20 @@ void HTMLTreeBuilder::finished()
     m_document->finishedParsing();
 }
 
+void HTMLTreeBuilder::parseError(AtomicHTMLToken&)
+{
+    DEFINE_STATIC_LOCAL(String, parseErrorMessage, ("HTML parse error (recovered gracefully)"));
+
+    if (!m_reportErrors)
+        return;
+
+    DOMWindow* domWindow = m_document->domWindow();
+    if (!domWindow)
+        return;
+
+    domWindow->console()->addMessage(HTMLMessageSource, LogMessageType, WarningMessageLevel, parseErrorMessage, m_parser->lineNumber(), m_document->url().string());
+}
+
 bool HTMLTreeBuilder::scriptEnabled(Frame* frame)
 {
     if (!frame)
diff --git a/Source/WebCore/html/parser/HTMLTreeBuilder.h b/Source/WebCore/html/parser/HTMLTreeBuilder.h
index 309ac6f..0cec667 100644
--- a/Source/WebCore/html/parser/HTMLTreeBuilder.h
+++ b/Source/WebCore/html/parser/HTMLTreeBuilder.h
@@ -183,8 +183,7 @@ private:
 
     bool m_framesetOk;
 
-    // FIXME: Implement error reporting.
-    void parseError(AtomicHTMLToken&) { }
+    void parseError(AtomicHTMLToken&);
 
     InsertionMode insertionMode() const { return m_insertionMode; }
     void setInsertionMode(InsertionMode mode)
@@ -212,7 +211,6 @@ private:
         FragmentParsingContext(DocumentFragment*, Element* contextElement, FragmentScriptingPermission);
         ~FragmentParsingContext();
 
-        Document* document() const;
         DocumentFragment* fragment() const { return m_fragment; }
         Element* contextElement() const { ASSERT(m_fragment); return m_contextElement; }
         FragmentScriptingPermission scriptingPermission() const { ASSERT(m_fragment); return m_scriptingPermission; }
@@ -220,7 +218,6 @@ private:
         void finished();
 
     private:
-        RefPtr<Document> m_dummyDocumentForFragmentParsing;
         DocumentFragment* m_fragment;
         Element* m_contextElement;
 
diff --git a/Source/WebCore/html/parser/HTMLViewSourceParser.cpp b/Source/WebCore/html/parser/HTMLViewSourceParser.cpp
index ace8590..7cdbdc7 100644
--- a/Source/WebCore/html/parser/HTMLViewSourceParser.cpp
+++ b/Source/WebCore/html/parser/HTMLViewSourceParser.cpp
@@ -49,35 +49,27 @@ void HTMLViewSourceParser::insert(const SegmentedString&)
 
 void HTMLViewSourceParser::pumpTokenizer()
 {
-    while (m_tokenizer->nextToken(m_input.current(), m_token)) {
-        m_token.end(m_input.current().numberOfCharactersConsumed());
+    while (true) {
+        m_sourceTracker.start(m_input, m_token);
+        if (!m_tokenizer->nextToken(m_input.current(), m_token))
+            break;
+        m_sourceTracker.end(m_input, m_token);
+
         document()->addSource(sourceForToken(), m_token);
         updateTokenizerState();
-        m_token.clear(m_input.current().numberOfCharactersConsumed());
+        m_token.clear();
     }
 }
 
 void HTMLViewSourceParser::append(const SegmentedString& input)
 {
     m_input.appendToEnd(input);
-    m_source.append(input);
     pumpTokenizer();
 }
 
 String HTMLViewSourceParser::sourceForToken()
 {
-    if (m_token.type() == HTMLToken::EndOfFile)
-        return String();
-
-    ASSERT(m_source.numberOfCharactersConsumed() == m_token.startIndex());
-    UChar* data = 0;
-    int length = m_token.endIndex() - m_token.startIndex();
-    String source = String::createUninitialized(length, data);
-    for (int i = 0; i < length; ++i) {
-        data[i] = *m_source;
-        m_source.advance();
-    }
-    return source;
+    return m_sourceTracker.sourceForToken(m_token);
 }
 
 void HTMLViewSourceParser::updateTokenizerState()
diff --git a/Source/WebCore/html/parser/HTMLViewSourceParser.h b/Source/WebCore/html/parser/HTMLViewSourceParser.h
index abe55b4..2e6ddfe 100644
--- a/Source/WebCore/html/parser/HTMLViewSourceParser.h
+++ b/Source/WebCore/html/parser/HTMLViewSourceParser.h
@@ -28,6 +28,7 @@
 
 #include "DecodedDataDocumentParser.h"
 #include "HTMLInputStream.h"
+#include "HTMLSourceTracker.h"
 #include "HTMLToken.h"
 #include "HTMLTokenizer.h"
 #include "HTMLViewSourceDocument.h"
@@ -69,8 +70,8 @@ private:
     void updateTokenizerState();
 
     HTMLInputStream m_input;
-    SegmentedString m_source;
     HTMLToken m_token;
+    HTMLSourceTracker m_sourceTracker;
     OwnPtr<HTMLTokenizer> m_tokenizer;
 };
 
diff --git a/Source/WebCore/html/parser/XSSFilter.cpp b/Source/WebCore/html/parser/XSSFilter.cpp
new file mode 100644
index 0000000..de31f76
--- /dev/null
+++ b/Source/WebCore/html/parser/XSSFilter.cpp
@@ -0,0 +1,450 @@
+/*
+ * Copyright (C) 2011 Adam Barth. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "XSSFilter.h"
+
+#include "DOMWindow.h"
+#include "Document.h"
+#include "DocumentLoader.h"
+#include "Frame.h"
+#include "HTMLDocumentParser.h"
+#include "HTMLNames.h"
+#include "HTMLParamElement.h"
+#include "HTMLParserIdioms.h"
+#include "Settings.h"
+#include "TextEncoding.h"
+#include "TextResourceDecoder.h"
+#include <wtf/text/CString.h>
+
+namespace WebCore {
+
+using namespace HTMLNames;
+
+namespace {
+
+bool isNonCanonicalCharacter(UChar c)
+{
+    // We remove all non-ASCII characters, including non-printable ASCII characters.
+    //
+    // Note, we don't remove backslashes like PHP stripslashes(), which among other things converts "\\0" to the \0 character.
+    // Instead, we remove backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0"). However, this has the 
+    // adverse effect that we remove any legitimate zeros from a string.
+    //
+    // For instance: new String("http://localhost:8000") => new String("http://localhost:8").
+    return (c == '\\' || c == '0' || c == '\0' || c >= 127);
+}
+
+String canonicalize(const String& string)
+{
+    return string.removeCharacters(&isNonCanonicalCharacter);
+}
+
+bool isRequiredForInjection(UChar c)
+{
+    return (c == '\'' || c == '"' || c == '<' || c == '>');
+}
+
+bool hasName(const HTMLToken& token, const QualifiedName& name)
+{
+    return equalIgnoringNullity(token.name(), static_cast<const String&>(name.localName()));
+}
+
+bool findAttributeWithName(const HTMLToken& token, const QualifiedName& name, size_t& indexOfMatchingAttribute)
+{
+    for (size_t i = 0; i < token.attributes().size(); ++i) {
+        if (equalIgnoringNullity(token.attributes().at(i).m_name, name.localName())) {
+            indexOfMatchingAttribute = i;
+            return true;
+        }
+    }
+    return false;
+}
+
+bool isNameOfInlineEventHandler(const Vector<UChar, 32>& name)
+{
+    const size_t lengthOfShortestInlineEventHandlerName = 5; // To wit: oncut.
+    if (name.size() < lengthOfShortestInlineEventHandlerName)
+        return false;
+    return name[0] == 'o' && name[1] == 'n';
+}
+
+bool containsJavaScriptURL(const Vector<UChar, 32>& value)
+{
+    static const char javaScriptScheme[] = "javascript:";
+    static const size_t lengthOfJavaScriptScheme = sizeof(javaScriptScheme) - 1;
+
+    size_t i;
+    for (i = 0; i < value.size(); ++i) {
+        if (!isHTMLSpace(value[i]))
+            break;
+    }
+
+    if (value.size() - i < lengthOfJavaScriptScheme)
+        return false;
+
+    return equalIgnoringCase(value.data() + i, javaScriptScheme, lengthOfJavaScriptScheme);
+}
+
+String decodeURL(const String& string, const TextEncoding& encoding)
+{
+    String workingString = string;
+    workingString.replace('+', ' ');
+    workingString = decodeURLEscapeSequences(workingString);
+    CString workingStringUTF8 = workingString.utf8();
+    String decodedString = encoding.decode(workingStringUTF8.data(), workingStringUTF8.length());
+    // FIXME: Is this check necessary?
+    if (decodedString.isEmpty())
+        return canonicalize(workingString);
+    return canonicalize(decodedString);
+}
+
+}
+
+XSSFilter::XSSFilter(HTMLDocumentParser* parser)
+    : m_parser(parser)
+    , m_isEnabled(false)
+    , m_xssProtection(XSSProtectionEnabled)
+    , m_state(Uninitialized)
+{
+    ASSERT(m_parser);
+    if (Frame* frame = parser->document()->frame()) {
+        if (Settings* settings = frame->settings())
+            m_isEnabled = settings->xssAuditorEnabled();
+    }
+    // Although tempting to call init() at this point, the various objects
+    // we want to reference might not all have been constructed yet.
+}
+
+void XSSFilter::init()
+{
+    const size_t miniumLengthForSuffixTree = 512; // FIXME: Tune this parameter.
+    const int suffixTreeDepth = 5;
+
+    ASSERT(m_state == Uninitialized);
+    m_state = Initial;
+
+    if (!m_isEnabled)
+        return;
+    
+    // In theory, the Document could have detached from the Frame after the
+    // XSSFilter was constructed.
+    if (!m_parser->document()->frame()) {
+        m_isEnabled = false;
+        return;
+    }
+
+    const KURL& url = m_parser->document()->url();
+
+    if (url.protocolIsData()) {
+        m_isEnabled = false;
+        return;
+    }
+
+    TextResourceDecoder* decoder = m_parser->document()->decoder();
+    m_decodedURL = decoder ? decodeURL(url.string(), decoder->encoding()) : url.string();
+    if (m_decodedURL.find(isRequiredForInjection, 0) == notFound)
+        m_decodedURL = String();
+
+    if (DocumentLoader* documentLoader = m_parser->document()->frame()->loader()->documentLoader()) {
+        DEFINE_STATIC_LOCAL(String, XSSProtectionHeader, ("X-XSS-Protection"));
+        m_xssProtection = parseXSSProtectionHeader(documentLoader->response().httpHeaderField(XSSProtectionHeader));
+
+        FormData* httpBody = documentLoader->originalRequest().httpBody();
+        if (httpBody && !httpBody->isEmpty()) {
+            String httpBodyAsString = httpBody->flattenToString();
+            m_decodedHTTPBody = decoder ? decodeURL(httpBodyAsString, decoder->encoding()) : httpBodyAsString;
+            if (m_decodedHTTPBody.find(isRequiredForInjection, 0) == notFound)
+                m_decodedHTTPBody = String();
+            if (m_decodedHTTPBody.length() >= miniumLengthForSuffixTree)
+                m_decodedHTTPBodySuffixTree = adoptPtr(new SuffixTree<ASCIICodebook>(m_decodedHTTPBody, suffixTreeDepth));
+        }
+    }
+
+    if (m_decodedURL.isEmpty() && m_decodedHTTPBody.isEmpty())
+        m_isEnabled = false;
+}
+
+void XSSFilter::filterToken(HTMLToken& token)
+{
+    if (m_state == Uninitialized) {
+        init();
+        ASSERT(m_state == Initial);
+    }
+
+    if (!m_isEnabled || m_xssProtection == XSSProtectionDisabled)
+        return;
+
+    bool didBlockScript = false;
+
+    switch (m_state) {
+    case Uninitialized:
+        ASSERT_NOT_REACHED();
+        break;
+    case Initial: 
+        didBlockScript = filterTokenInitial(token);
+        break;
+    case AfterScriptStartTag:
+        didBlockScript = filterTokenAfterScriptStartTag(token);
+        ASSERT(m_state == Initial);
+        m_cachedSnippet = String();
+        break;
+    }
+
+    if (didBlockScript) {
+        // FIXME: Consider using a more helpful console message.
+        DEFINE_STATIC_LOCAL(String, consoleMessage, ("Refused to execute a JavaScript script. Source code of script found within request.\n"));
+        // FIXME: We should add the real line number to the console.
+        m_parser->document()->domWindow()->console()->addMessage(JSMessageSource, LogMessageType, ErrorMessageLevel, consoleMessage, 1, String());
+
+        if (m_xssProtection == XSSProtectionBlockEnabled) {
+            m_parser->document()->frame()->loader()->stopAllLoaders();
+            m_parser->document()->frame()->navigationScheduler()->scheduleLocationChange(m_parser->document()->securityOrigin(), blankURL(), String());
+        }
+    }
+}
+
+bool XSSFilter::filterTokenInitial(HTMLToken& token)
+{
+    ASSERT(m_state == Initial);
+
+    if (token.type() != HTMLToken::StartTag)
+        return false;
+
+    bool didBlockScript = eraseDangerousAttributesIfInjected(token);
+
+    if (hasName(token, scriptTag))
+        didBlockScript |= filterScriptToken(token);
+    else if (hasName(token, objectTag))
+        didBlockScript |= filterObjectToken(token);
+    else if (hasName(token, paramTag))
+        didBlockScript |= filterParamToken(token);
+    else if (hasName(token, embedTag))
+        didBlockScript |= filterEmbedToken(token);
+    else if (hasName(token, appletTag))
+        didBlockScript |= filterAppletToken(token);
+    else if (hasName(token, metaTag))
+        didBlockScript |= filterMetaToken(token);
+    else if (hasName(token, baseTag))
+        didBlockScript |= filterBaseToken(token);
+
+    return didBlockScript;
+}
+
+bool XSSFilter::filterTokenAfterScriptStartTag(HTMLToken& token)
+{
+    ASSERT(m_state == AfterScriptStartTag);
+    m_state = Initial;
+
+    if (token.type() != HTMLToken::Character) {
+        ASSERT(token.type() == HTMLToken::EndTag || token.type() == HTMLToken::EndOfFile);
+        return false;
+    }
+
+    int start = 0;
+    // FIXME: We probably want to grab only the first few characters of the
+    //        contents of the script element.
+    int end = token.endIndex() - token.startIndex();
+    if (isContainedInRequest(m_cachedSnippet + snippetForRange(token, start, end))) {
+        token.eraseCharacters();
+        token.appendToCharacter(' '); // Technically, character tokens can't be empty.
+        return true;
+    }
+    return false;
+}
+
+bool XSSFilter::filterScriptToken(HTMLToken& token)
+{
+    ASSERT(m_state == Initial);
+    ASSERT(token.type() == HTMLToken::StartTag);
+    ASSERT(hasName(token, scriptTag));
+
+    if (eraseAttributeIfInjected(token, srcAttr, blankURL().string()))
+        return true;
+
+    m_state = AfterScriptStartTag;
+    m_cachedSnippet = m_parser->sourceForToken(token);
+    return false;
+}
+
+bool XSSFilter::filterObjectToken(HTMLToken& token)
+{
+    ASSERT(m_state == Initial);
+    ASSERT(token.type() == HTMLToken::StartTag);
+    ASSERT(hasName(token, objectTag));
+
+    bool didBlockScript = false;
+
+    didBlockScript |= eraseAttributeIfInjected(token, dataAttr, blankURL().string());
+    didBlockScript |= eraseAttributeIfInjected(token, typeAttr);
+    didBlockScript |= eraseAttributeIfInjected(token, classidAttr);
+
+    return didBlockScript;
+}
+
+bool XSSFilter::filterParamToken(HTMLToken& token)
+{
+    ASSERT(m_state == Initial);
+    ASSERT(token.type() == HTMLToken::StartTag);
+    ASSERT(hasName(token, paramTag));
+
+    size_t indexOfNameAttribute;
+    if (!findAttributeWithName(token, nameAttr, indexOfNameAttribute))
+        return false;
+
+    const HTMLToken::Attribute& nameAttribute = token.attributes().at(indexOfNameAttribute);
+    String name = String(nameAttribute.m_value.data(), nameAttribute.m_value.size());
+
+    if (!HTMLParamElement::isURLParameter(name))
+        return false;
+
+    return eraseAttributeIfInjected(token, valueAttr, blankURL().string());
+}
+
+bool XSSFilter::filterEmbedToken(HTMLToken& token)
+{
+    ASSERT(m_state == Initial);
+    ASSERT(token.type() == HTMLToken::StartTag);
+    ASSERT(hasName(token, embedTag));
+
+    bool didBlockScript = false;
+
+    didBlockScript |= eraseAttributeIfInjected(token, srcAttr, blankURL().string());
+    didBlockScript |= eraseAttributeIfInjected(token, typeAttr);
+
+    return didBlockScript;
+}
+
+bool XSSFilter::filterAppletToken(HTMLToken& token)
+{
+    ASSERT(m_state == Initial);
+    ASSERT(token.type() == HTMLToken::StartTag);
+    ASSERT(hasName(token, appletTag));
+
+    bool didBlockScript = false;
+
+    didBlockScript |= eraseAttributeIfInjected(token, codeAttr);
+    didBlockScript |= eraseAttributeIfInjected(token, objectAttr);
+
+    return didBlockScript;
+}
+
+bool XSSFilter::filterMetaToken(HTMLToken& token)
+{
+    ASSERT(m_state == Initial);
+    ASSERT(token.type() == HTMLToken::StartTag);
+    ASSERT(hasName(token, metaTag));
+
+    return eraseAttributeIfInjected(token, http_equivAttr);
+}
+
+bool XSSFilter::filterBaseToken(HTMLToken& token)
+{
+    ASSERT(m_state == Initial);
+    ASSERT(token.type() == HTMLToken::StartTag);
+    ASSERT(hasName(token, baseTag));
+
+    return eraseAttributeIfInjected(token, hrefAttr);
+}
+
+bool XSSFilter::eraseDangerousAttributesIfInjected(HTMLToken& token)
+{
+    DEFINE_STATIC_LOCAL(String, safeJavaScriptURL, ("javascript:void(0)"));
+
+    bool didBlockScript = false;
+    for (size_t i = 0; i < token.attributes().size(); ++i) {
+        const HTMLToken::Attribute& attribute = token.attributes().at(i);
+        bool isInlineEventHandler = isNameOfInlineEventHandler(attribute.m_name);
+        bool valueContainsJavaScriptURL = isInlineEventHandler ? false : containsJavaScriptURL(attribute.m_value);
+        if (!isInlineEventHandler && !valueContainsJavaScriptURL)
+            continue;
+        if (!isContainedInRequest(snippetForAttribute(token, attribute)))
+            continue;
+        token.eraseValueOfAttribute(i);
+        if (valueContainsJavaScriptURL)
+            token.appendToAttributeValue(i, safeJavaScriptURL);
+        didBlockScript = true;
+    }
+    return didBlockScript;
+}
+
+bool XSSFilter::eraseAttributeIfInjected(HTMLToken& token, const QualifiedName& attributeName, const String& replacementValue)
+{
+    size_t indexOfAttribute;
+    if (findAttributeWithName(token, attributeName, indexOfAttribute)) {
+        const HTMLToken::Attribute& attribute = token.attributes().at(indexOfAttribute);
+        if (isContainedInRequest(snippetForAttribute(token, attribute))) {
+            if (attributeName == srcAttr && isSameOriginResource(String(attribute.m_value.data(), attribute.m_value.size())))
+                return false;
+            token.eraseValueOfAttribute(indexOfAttribute);
+            if (!replacementValue.isEmpty())
+                token.appendToAttributeValue(indexOfAttribute, replacementValue);
+            return true;
+        }
+    }
+    return false;
+}
+
+String XSSFilter::snippetForRange(const HTMLToken& token, int start, int end)
+{
+    // FIXME: There's an extra allocation here that we could save by
+    //        passing the range to the parser.
+    return m_parser->sourceForToken(token).substring(start, end - start);
+}
+
+String XSSFilter::snippetForAttribute(const HTMLToken& token, const HTMLToken::Attribute& attribute)
+{
+    // FIXME: We should grab one character before the name also.
+    int start = attribute.m_nameRange.m_start - token.startIndex();
+    // FIXME: We probably want to grab only the first few characters of the attribute value.
+    int end = attribute.m_valueRange.m_end - token.startIndex();
+    return snippetForRange(token, start, end);
+}
+
+bool XSSFilter::isContainedInRequest(const String& snippet)
+{
+    ASSERT(!snippet.isEmpty());
+    String canonicalizedSnippet = canonicalize(snippet);
+    ASSERT(!canonicalizedSnippet.isEmpty());
+    if (m_decodedURL.find(canonicalizedSnippet, 0, false) != notFound)
+        return true;
+    if (m_decodedHTTPBodySuffixTree && !m_decodedHTTPBodySuffixTree->mightContain(canonicalizedSnippet))
+        return false;
+    return m_decodedHTTPBody.find(canonicalizedSnippet, 0, false) != notFound;
+}
+
+bool XSSFilter::isSameOriginResource(const String& url)
+{
+    // If the resource is loaded from the same URL as the enclosing page, it's
+    // probably not an XSS attack, so we reduce false positives by allowing the
+    // request. If the resource has a query string, we're more suspicious,
+    // however, because that's pretty rare and the attacker might be able to
+    // trick a server-side script into doing something dangerous with the query
+    // string.
+    KURL resourceURL(m_parser->document()->url(), url);
+    return (m_parser->document()->url().host() == resourceURL.host() && resourceURL.query().isEmpty());
+}
+
+}
diff --git a/Source/WebCore/html/parser/XSSFilter.h b/Source/WebCore/html/parser/XSSFilter.h
new file mode 100644
index 0000000..2c7d428
--- /dev/null
+++ b/Source/WebCore/html/parser/XSSFilter.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2011 Adam Barth. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef XSSFilter_h
+#define XSSFilter_h
+
+#include "HTMLToken.h"
+#include "HTTPParsers.h"
+#include "SuffixTree.h"
+
+namespace WebCore {
+
+class HTMLDocumentParser;
+
+class XSSFilter {
+    WTF_MAKE_NONCOPYABLE(XSSFilter);
+public:
+    explicit XSSFilter(HTMLDocumentParser*);
+
+    void filterToken(HTMLToken&);
+
+private:
+    enum State {
+        Uninitialized,
+        Initial,
+        AfterScriptStartTag,
+    };
+
+    void init();
+
+    bool filterTokenInitial(HTMLToken&);
+    bool filterTokenAfterScriptStartTag(HTMLToken&);
+
+    bool filterScriptToken(HTMLToken&);
+    bool filterObjectToken(HTMLToken&);
+    bool filterParamToken(HTMLToken&);
+    bool filterEmbedToken(HTMLToken&);
+    bool filterAppletToken(HTMLToken&);
+    bool filterMetaToken(HTMLToken&);
+    bool filterBaseToken(HTMLToken&);
+
+    bool eraseDangerousAttributesIfInjected(HTMLToken&);
+    bool eraseAttributeIfInjected(HTMLToken&, const QualifiedName&, const String& replacementValue = String());
+
+    String snippetForRange(const HTMLToken&, int start, int end);
+    String snippetForAttribute(const HTMLToken&, const HTMLToken::Attribute&);
+
+    bool isContainedInRequest(const String&);
+    bool isSameOriginResource(const String& url);
+
+    HTMLDocumentParser* m_parser;
+    bool m_isEnabled;
+    XSSProtectionDisposition m_xssProtection;
+
+    String m_decodedURL;
+    String m_decodedHTTPBody;
+    OwnPtr<SuffixTree<ASCIICodebook> > m_decodedHTTPBodySuffixTree;
+
+    State m_state;
+    String m_cachedSnippet;
+};
+
+}
+
+#endif