1 files changed, 266 insertions, 1731 deletions
diff --git a/WebCore/html/HTMLDocumentParser.cpp b/WebCore/html/HTMLDocumentParser.cpp
index e59cb74..79ca805 100644
--- a/WebCore/html/HTMLDocumentParser.cpp
+++ b/WebCore/html/HTMLDocumentParser.cpp
@@ -1,60 +1,44 @@
 /*
-    Copyright (C) 1997 Martin Jones (mjones@kde.org)
-              (C) 1997 Torben Weis (weis@kde.org)
-              (C) 1998 Waldo Bastian (bastian@kde.org)
-              (C) 1999 Lars Knoll (knoll@kde.org)
-              (C) 1999 Antti Koivisto (koivisto@kde.org)
-              (C) 2001 Dirk Mueller (mueller@kde.org)
-    Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
-    Copyright (C) 2005, 2006 Alexey Proskuryakov (ap@nypop.com)
-    Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
-
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Library General Public
-    License as published by the Free Software Foundation; either
-    version 2 of the License, or (at your option) any later version.
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Library General Public License for more details.
-
-    You should have received a copy of the GNU Library General Public License
-    along with this library; see the file COPYING.LIB.  If not, write to
-    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-    Boston, MA 02110-1301, USA.
-*/
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
 
 #include "config.h"
 #include "HTMLDocumentParser.h"
 
-#include "Attribute.h"
-#include "CSSHelper.h"
-#include "Cache.h"
-#include "CachedScript.h"
-#include "DocLoader.h"
 #include "DocumentFragment.h"
-#include "Event.h"
-#include "EventNames.h"
+#include "Element.h"
 #include "Frame.h"
-#include "FrameLoader.h"
-#include "FrameView.h"
-#include "HTMLElement.h"
-#include "HTMLNames.h"
-#include "LegacyHTMLTreeConstructor.h"
-#include "HTMLScriptElement.h"
-#include "HTMLViewSourceDocument.h"
-#include "ImageLoader.h"
-#include "InspectorTimelineAgent.h"
-#include "Page.h"
-#include "PreloadScanner.h"
-#include "ScriptController.h"
-#include "ScriptSourceCode.h"
-#include "ScriptValue.h"
+#include "HTMLParserScheduler.h"
+#include "HTMLTokenizer.h"
+#include "HTMLPreloadScanner.h"
+#include "HTMLScriptRunner.h"
+#include "HTMLTreeBuilder.h"
+#include "HTMLDocument.h"
 #include "XSSAuditor.h"
-#include <wtf/ASCIICType.h>
 #include <wtf/CurrentTime.h>
 
+<<<<<<< HEAD
 #include "HTMLEntityNames.cpp"
 
 #ifdef ANDROID_INSTRUMENT
@@ -65,361 +49,86 @@
 
 using namespace WTF;
 using namespace std;
+=======
+#if ENABLE(INSPECTOR)
+#include "InspectorTimelineAgent.h"
+#endif
+>>>>>>> webkit.org at r61871
 
 namespace WebCore {
 
-using namespace HTMLNames;
-
-// This value is used to define how many characters the parser will process before
-// yeilding control.
-// To increase responsivness reduce the parser chunk size.
-static const int defaultTokenizerChunkSize = 4096;
-
-// FIXME: We would like this constant to be 200ms.
-// Yielding more aggressively results in increased responsiveness and better incremental rendering.
-// It slows down overall page-load on slower machines, though, so for now we set a value of 500.
-// For smaller chunks (above) decrease the value of TimerDelay as the the parser should not
-// yield for as long a period otherwise it will take way to long to load a page.
-static const double defaultTokenizerTimeDelay = 0.500;
-
-static const char commentStart [] = "<!--";
-static const char doctypeStart [] = "<!doctype";
-static const char publicStart [] = "public";
-static const char systemStart [] = "system";
-static const char scriptEnd [] = "</script";
-static const char xmpEnd [] = "</xmp";
-static const char styleEnd [] =  "</style";
-static const char textareaEnd [] = "</textarea";
-static const char titleEnd [] = "</title";
-static const char iframeEnd [] = "</iframe";
-
-// Full support for MS Windows extensions to Latin-1.
-// Technically these extensions should only be activated for pages
-// marked "windows-1252" or "cp1252", but
-// in the standard Microsoft way, these extensions infect hundreds of thousands
-// of web pages.  Note that people with non-latin-1 Microsoft extensions
-// are SOL.
-//
-// See: http://www.microsoft.com/globaldev/reference/WinCP.asp
-//      http://www.bbsinc.com/iso8859.html
-//      http://www.obviously.com/
-//
-// There may be better equivalents
-
-// We only need this for entities. For non-entity text, we handle this in the text encoding.
-
-static const UChar windowsLatin1ExtensionArray[32] = {
-    0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87
-    0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F
-    0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97
-    0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178  // 98-9F
-};
-
-static inline UChar fixUpChar(UChar c)
-{
-    if ((c & ~0x1F) != 0x0080)
-        return c;
-    return windowsLatin1ExtensionArray[c - 0x80];
-}
+namespace {
 
-static inline bool tagMatch(const char* s1, const UChar* s2, unsigned length)
-{
-    for (unsigned i = 0; i != length; ++i) {
-        unsigned char c1 = s1[i];
-        unsigned char uc1 = toASCIIUpper(static_cast<char>(c1));
-        UChar c2 = s2[i];
-        if (c1 != c2 && uc1 != c2)
-            return false;
+class NestingLevelIncrementer : public Noncopyable {
+public:
+    explicit NestingLevelIncrementer(int& counter)
+        : m_counter(&counter)
+    {
+        ++(*m_counter);
     }
-    return true;
-}
 
-inline void Token::addAttribute(AtomicString& attrName, const AtomicString& attributeValue, bool viewSourceMode)
-{
-    if (!attrName.isEmpty()) {
-        ASSERT(!attrName.contains('/'));
-        RefPtr<Attribute> a = Attribute::createMapped(attrName, attributeValue);
-        if (!attrs) {
-            attrs = NamedNodeMap::create();
-            attrs->reserveInitialCapacity(10);
-        }
-        attrs->insertAttribute(a.release(), viewSourceMode);
+    ~NestingLevelIncrementer()
+    {
+        --(*m_counter);
     }
 
-    attrName = emptyAtom;
-}
+private:
+    int* m_counter;
+};
 
-// ----------------------------------------------------------------------------
-
-HTMLDocumentParser::HTMLDocumentParser(HTMLDocument* doc, bool reportErrors)
-    : DocumentParser()
-    , m_buffer(0)
-    , m_scriptCode(0)
-    , m_scriptCodeSize(0)
-    , m_scriptCodeCapacity(0)
-    , m_scriptCodeResync(0)
-    , m_executingScript(0)
-    , m_requestingScript(false)
-    , m_hasScriptsWaitingForStylesheets(false)
-    , m_timer(this, &HTMLDocumentParser::timerFired)
-    , m_externalScriptsTimer(this, &HTMLDocumentParser::executeExternalScriptsTimerFired)
-    , m_doc(doc)
-    , m_treeConstructor(new LegacyHTMLTreeConstructor(doc, reportErrors))
-    , m_inWrite(false)
-    , m_fragment(false)
-    , m_scriptingPermission(FragmentScriptingAllowed)
-{
-    begin();
-}
+} // namespace
 
-HTMLDocumentParser::HTMLDocumentParser(HTMLViewSourceDocument* doc)
-    : DocumentParser(true)
-    , m_buffer(0)
-    , m_scriptCode(0)
-    , m_scriptCodeSize(0)
-    , m_scriptCodeCapacity(0)
-    , m_scriptCodeResync(0)
-    , m_executingScript(0)
-    , m_requestingScript(false)
-    , m_hasScriptsWaitingForStylesheets(false)
-    , m_timer(this, &HTMLDocumentParser::timerFired)
-    , m_externalScriptsTimer(this, &HTMLDocumentParser::executeExternalScriptsTimerFired)
-    , m_doc(doc)
-    , m_inWrite(false)
-    , m_fragment(false)
-    , m_scriptingPermission(FragmentScriptingAllowed)
+HTMLDocumentParser::HTMLDocumentParser(HTMLDocument* document, bool reportErrors)
+    : DocumentParser(document)
+    , m_tokenizer(new HTMLTokenizer)
+    , m_scriptRunner(new HTMLScriptRunner(document, this))
+    , m_treeBuilder(new HTMLTreeBuilder(m_tokenizer.get(), document, reportErrors))
+    , m_parserScheduler(new HTMLParserScheduler(this))
+    , m_endWasDelayed(false)
+    , m_writeNestingLevel(0)
 {
     begin();
 }
 
-HTMLDocumentParser::HTMLDocumentParser(DocumentFragment* frag, FragmentScriptingPermission scriptingPermission)
-    : m_buffer(0)
-    , m_scriptCode(0)
-    , m_scriptCodeSize(0)
-    , m_scriptCodeCapacity(0)
-    , m_scriptCodeResync(0)
-    , m_executingScript(0)
-    , m_requestingScript(false)
-    , m_hasScriptsWaitingForStylesheets(false)
-    , m_timer(this, &HTMLDocumentParser::timerFired)
-    , m_externalScriptsTimer(this, &HTMLDocumentParser::executeExternalScriptsTimerFired)
-    , m_doc(frag->document())
-    , m_treeConstructor(new LegacyHTMLTreeConstructor(frag, scriptingPermission))
-    , m_inWrite(false)
-    , m_fragment(true)
-    , m_scriptingPermission(scriptingPermission)
+// FIXME: Member variables should be grouped into self-initializing structs to
+// minimize code duplication between these constructors.
+HTMLDocumentParser::HTMLDocumentParser(DocumentFragment* fragment, FragmentScriptingPermission scriptingPermission)
+    : DocumentParser(fragment->document())
+    , m_tokenizer(new HTMLTokenizer)
+    , m_treeBuilder(new HTMLTreeBuilder(m_tokenizer.get(), fragment, scriptingPermission))
+    , m_endWasDelayed(false)
+    , m_writeNestingLevel(0)
 {
     begin();
 }
 
-void HTMLDocumentParser::reset()
+HTMLDocumentParser::~HTMLDocumentParser()
 {
-    ASSERT(m_executingScript == 0);
-
-    while (!m_pendingScripts.isEmpty()) {
-        CachedScript* cs = m_pendingScripts.takeFirst().get();
-        ASSERT(cache()->disabled() || cs->accessCount() > 0);
-        cs->removeClient(this);
-    }
-
-    fastFree(m_buffer);
-    m_buffer = m_dest = 0;
-    m_bufferSize = 0;
-
-    fastFree(m_scriptCode);
-    m_scriptCode = 0;
-    m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0;
-
-    m_timer.stop();
-    m_externalScriptsTimer.stop();
-
-    m_state.setAllowYield(false);
-    m_state.setForceSynchronous(false);
-
-    m_currentToken.reset();
-    m_doctypeToken.reset();
-    m_doctypeSearchCount = 0;
-    m_doctypeSecondarySearchCount = 0;
-    m_hasScriptsWaitingForStylesheets = false;
+    // FIXME: We'd like to ASSERT that normal operation of this class clears
+    // out any delayed actions, but we can't because we're unceremoniously
+    // deleted.  If there were a required call to some sort of cancel function,
+    // then we could ASSERT some invariants here.
 }
 
 void HTMLDocumentParser::begin()
 {
-    m_executingScript = 0;
-    m_requestingScript = false;
-    m_hasScriptsWaitingForStylesheets = false;
-    m_state.setLoadingExtScript(false);
-    reset();
-    m_bufferSize = 254;
-    m_buffer = static_cast<UChar*>(fastMalloc(sizeof(UChar) * 254));
-    m_dest = m_buffer;
-    tquote = NoQuote;
-    searchCount = 0;
-    m_state.setEntityState(NoEntity);
-    m_scriptTagSrcAttrValue = String();
-    m_pendingSrc.clear();
-    m_currentPrependingSrc = 0;
-    m_noMoreData = false;
-    m_brokenComments = false;
-    m_brokenServer = false;
-    m_lineNumber = 0;
-    m_currentScriptTagStartLineNumber = 0;
-    m_currentTagStartLineNumber = 0;
-    m_state.setForceSynchronous(false);
-
-    Page* page = m_doc->page();
-    if (page && page->hasCustomHTMLTokenizerTimeDelay())
-        m_tokenizerTimeDelay = page->customHTMLTokenizerTimeDelay();
-    else
-        m_tokenizerTimeDelay = defaultTokenizerTimeDelay;
-
-    if (page && page->hasCustomHTMLTokenizerChunkSize())
-        m_tokenizerChunkSize = page->customHTMLTokenizerChunkSize();
-    else
-        m_tokenizerChunkSize = defaultTokenizerChunkSize;
-}
-
-void HTMLDocumentParser::setForceSynchronous(bool force)
-{
-    m_state.setForceSynchronous(force);
+    // FIXME: Should we reset the tokenizer?
 }
 
-HTMLDocumentParser::State HTMLDocumentParser::processListing(SegmentedString list, State state)
+void HTMLDocumentParser::stopParsing()
 {
-    // This function adds the listing 'list' as
-    // preformatted text-tokens to the token-collection
-    while (!list.isEmpty()) {
-        if (state.skipLF()) {
-            state.setSkipLF(false);
-            if (*list == '\n') {
-                list.advance();
-                continue;
-            }
-        }
-
-        checkBuffer();
-
-        if (*list == '\n' || *list == '\r') {
-            if (state.discardLF())
-                // Ignore this LF
-                state.setDiscardLF(false); // We have discarded 1 LF
-            else
-                *m_dest++ = '\n';
-
-            /* Check for MS-DOS CRLF sequence */
-            if (*list == '\r')
-                state.setSkipLF(true);
-
-            list.advance();
-        } else {
-            state.setDiscardLF(false);
-            *m_dest++ = *list;
-            list.advance();
-        }
-    }
-
-    return state;
+    DocumentParser::stopParsing();
+    m_parserScheduler.clear(); // Deleting the scheduler will clear any timers.
 }
 
-HTMLDocumentParser::State HTMLDocumentParser::parseNonHTMLText(SegmentedString& src, State state)
+bool HTMLDocumentParser::processingData() const
 {
-    ASSERT(state.inTextArea() || state.inTitle() || state.inIFrame() || !state.hasEntityState());
-    ASSERT(!state.hasTagState());
-    ASSERT(state.inXmp() + state.inTextArea() + state.inTitle() + state.inStyle() + state.inScript() + state.inIFrame() == 1);
-    if (state.inScript() && !m_currentScriptTagStartLineNumber)
-        m_currentScriptTagStartLineNumber = m_lineNumber;
-
-    if (state.inComment())
-        state = parseComment(src, state);
-
-    int lastDecodedEntityPosition = -1;
-    while (!src.isEmpty()) {
-        checkScriptBuffer();
-        UChar ch = *src;
-
-        if (!m_scriptCodeResync && !m_brokenComments &&
-            !state.inXmp() && ch == '-' && m_scriptCodeSize >= 3 && !src.escaped() &&
-            m_scriptCode[m_scriptCodeSize - 3] == '<' && m_scriptCode[m_scriptCodeSize - 2] == '!' && m_scriptCode[m_scriptCodeSize - 1] == '-' &&
-            (lastDecodedEntityPosition < m_scriptCodeSize - 3)) {
-            state.setInComment(true);
-            state = parseComment(src, state);
-            continue;
-        }
-        if (m_scriptCodeResync && !tquote && ch == '>') {
-            src.advancePastNonNewline();
-            m_scriptCodeSize = m_scriptCodeResync - 1;
-            m_scriptCodeResync = 0;
-            m_scriptCode[m_scriptCodeSize] = m_scriptCode[m_scriptCodeSize + 1] = 0;
-            if (state.inScript())
-                state = scriptHandler(state);
-            else {
-                state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize), state);
-                processToken();
-                if (state.inStyle()) {
-                    m_currentToken.tagName = styleTag.localName();
-                    m_currentToken.beginTag = false;
-                } else if (state.inTextArea()) {
-                    m_currentToken.tagName = textareaTag.localName();
-                    m_currentToken.beginTag = false;
-                } else if (state.inTitle()) {
-                    m_currentToken.tagName = titleTag.localName();
-                    m_currentToken.beginTag = false;
-                } else if (state.inXmp()) {
-                    m_currentToken.tagName = xmpTag.localName();
-                    m_currentToken.beginTag = false;
-                } else if (state.inIFrame()) {
-                    m_currentToken.tagName = iframeTag.localName();
-                    m_currentToken.beginTag = false;
-                }
-                processToken();
-                state.setInStyle(false);
-                state.setInScript(false);
-                state.setInTextArea(false);
-                state.setInTitle(false);
-                state.setInXmp(false);
-                state.setInIFrame(false);
-                tquote = NoQuote;
-                m_scriptCodeSize = m_scriptCodeResync = 0;
-            }
-            return state;
-        }
-        // possible end of tagname, lets check.
-        if (!m_scriptCodeResync && !state.escaped() && !src.escaped() && (ch == '>' || ch == '/' || isASCIISpace(ch)) &&
-             m_scriptCodeSize >= m_searchStopperLength &&
-             tagMatch(m_searchStopper, m_scriptCode + m_scriptCodeSize - m_searchStopperLength, m_searchStopperLength) &&
-             (lastDecodedEntityPosition < m_scriptCodeSize - m_searchStopperLength)) {
-            m_scriptCodeResync = m_scriptCodeSize-m_searchStopperLength+1;
-            tquote = NoQuote;
-            continue;
-        }
-        if (m_scriptCodeResync && !state.escaped()) {
-            if (ch == '\"')
-                tquote = (tquote == NoQuote) ? DoubleQuote : ((tquote == SingleQuote) ? SingleQuote : NoQuote);
-            else if (ch == '\'')
-                tquote = (tquote == NoQuote) ? SingleQuote : (tquote == DoubleQuote) ? DoubleQuote : NoQuote;
-            else if (tquote != NoQuote && (ch == '\r' || ch == '\n'))
-                tquote = NoQuote;
-        }
-        state.setEscaped(!state.escaped() && ch == '\\');
-        if (!m_scriptCodeResync && (state.inTextArea() || state.inTitle() || state.inIFrame()) && !src.escaped() && ch == '&') {
-            UChar* scriptCodeDest = m_scriptCode + m_scriptCodeSize;
-            src.advancePastNonNewline();
-            state = parseEntity(src, scriptCodeDest, state, m_cBufferPos, true, false);
-            if (scriptCodeDest == m_scriptCode + m_scriptCodeSize)
-                lastDecodedEntityPosition = m_scriptCodeSize;
-            else
-                m_scriptCodeSize = scriptCodeDest - m_scriptCode;
-        } else {
-            m_scriptCode[m_scriptCodeSize++] = ch;
-            src.advance(m_lineNumber);
-        }
-    }
-
-    return state;
+    return isScheduledForResume() || inWrite();
 }
 
-HTMLDocumentParser::State HTMLDocumentParser::scriptHandler(State state)
+void HTMLDocumentParser::pumpTokenizerIfPossible(SynchronousMode mode)
 {
+<<<<<<< HEAD
     // We are inside a <script>
     bool doScriptExec = false;
     int startLine = m_currentScriptTagStartLineNumber + 1; // Script line numbers are 1 based, HTMLTokenzier line numbers are 0 based
@@ -613,1156 +322,152 @@ HTMLDocumentParser::State HTMLDocumentParser::scriptExecution(const ScriptSource
 
     return state;
 }
+=======
+    if (m_parserStopped || m_treeBuilder->isPaused())
+        return;
+>>>>>>> webkit.org at r61871
 
-HTMLDocumentParser::State HTMLDocumentParser::parseComment(SegmentedString& src, State state)
-{
-    // FIXME: Why does this code even run for comments inside <script> and <style>? This seems bogus.
-    checkScriptBuffer(src.length());
-    while (!src.isEmpty()) {
-        UChar ch = *src;
-        m_scriptCode[m_scriptCodeSize++] = ch;
-        if (ch == '>') {
-            bool handleBrokenComments = m_brokenComments && !(state.inScript() || state.inStyle());
-            int endCharsCount = 1; // start off with one for the '>' character
-            if (m_scriptCodeSize > 2 && m_scriptCode[m_scriptCodeSize-3] == '-' && m_scriptCode[m_scriptCodeSize-2] == '-') {
-                endCharsCount = 3;
-            } else if (m_scriptCodeSize > 3 && m_scriptCode[m_scriptCodeSize-4] == '-' && m_scriptCode[m_scriptCodeSize-3] == '-' &&
-                m_scriptCode[m_scriptCodeSize-2] == '!') {
-                // Other browsers will accept --!> as a close comment, even though it's
-                // not technically valid.
-                endCharsCount = 4;
-            }
-            if (handleBrokenComments || endCharsCount > 1) {
-                src.advancePastNonNewline();
-                if (!(state.inTitle() || state.inScript() || state.inXmp() || state.inTextArea() || state.inStyle() || state.inIFrame())) {
-                    checkScriptBuffer();
-                    m_scriptCode[m_scriptCodeSize] = 0;
-                    m_scriptCode[m_scriptCodeSize + 1] = 0;
-                    m_currentToken.tagName = commentAtom;
-                    m_currentToken.beginTag = true;
-                    state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize - endCharsCount), state);
-                    processToken();
-                    m_currentToken.tagName = commentAtom;
-                    m_currentToken.beginTag = false;
-                    processToken();
-                    m_scriptCodeSize = 0;
-                }
-                state.setInComment(false);
-                return state; // Finished parsing comment
-            }
-        }
-        src.advance(m_lineNumber);
+    // Once a resume is scheduled, HTMLParserScheduler controls when we next pump.
+    if (isScheduledForResume()) {
+        ASSERT(mode == AllowYield);
+        return;
     }
 
-    return state;
+    pumpTokenizer(mode);
 }
 
-HTMLDocumentParser::State HTMLDocumentParser::parseServer(SegmentedString& src, State state)
+bool HTMLDocumentParser::isScheduledForResume() const
 {
-    checkScriptBuffer(src.length());
-    while (!src.isEmpty()) {
-        UChar ch = *src;
-        m_scriptCode[m_scriptCodeSize++] = ch;
-        if (ch == '>' && m_scriptCodeSize > 1 && m_scriptCode[m_scriptCodeSize - 2] == '%') {
-            src.advancePastNonNewline();
-            state.setInServer(false);
-            m_scriptCodeSize = 0;
-            return state; // Finished parsing server include
-        }
-        src.advance(m_lineNumber);
-    }
-    return state;
+    return m_parserScheduler && m_parserScheduler->isScheduledForResume();
 }
 
-HTMLDocumentParser::State HTMLDocumentParser::parseProcessingInstruction(SegmentedString& src, State state)
+// Used by HTMLParserScheduler
+void HTMLDocumentParser::resumeParsingAfterYield()
 {
-    UChar oldchar = 0;
-    while (!src.isEmpty()) {
-        UChar chbegin = *src;
-        if (chbegin == '\'')
-            tquote = tquote == SingleQuote ? NoQuote : SingleQuote;
-        else if (chbegin == '\"')
-            tquote = tquote == DoubleQuote ? NoQuote : DoubleQuote;
-        // Look for '?>'
-        // Some crappy sites omit the "?" before it, so
-        // we look for an unquoted '>' instead. (IE compatible)
-        else if (chbegin == '>' && (!tquote || oldchar == '?')) {
-            // We got a '?>' sequence
-            state.setInProcessingInstruction(false);
-            src.advancePastNonNewline();
-            state.setDiscardLF(true);
-            return state; // Finished parsing comment!
-        }
-        src.advance(m_lineNumber);
-        oldchar = chbegin;
-    }
-
-    return state;
+    // We should never be here unless we can pump immediately.  Call pumpTokenizer()
+    // directly so that ASSERTS will fire if we're wrong.
+    pumpTokenizer(AllowYield);
 }
 
-HTMLDocumentParser::State HTMLDocumentParser::parseText(SegmentedString& src, State state)
+bool HTMLDocumentParser::runScriptsForPausedTreeBuilder()
 {
-    while (!src.isEmpty()) {
-        UChar cc = *src;
-
-        if (state.skipLF()) {
-            state.setSkipLF(false);
-            if (cc == '\n') {
-                src.advancePastNewline(m_lineNumber);
-                continue;
-            }
-        }
-
-        // do we need to enlarge the buffer?
-        checkBuffer();
+    ASSERT(m_treeBuilder->isPaused());
 
-        if (cc == '\r') {
-            state.setSkipLF(true);
-            *m_dest++ = '\n';
-        } else
-            *m_dest++ = cc;
-        src.advance(m_lineNumber);
-    }
-
-    return state;
+    int scriptStartLine = 0;
+    RefPtr<Element> scriptElement = m_treeBuilder->takeScriptToProcess(scriptStartLine);
+    // We will not have a scriptRunner when parsing a DocumentFragment.
+    if (!m_scriptRunner)
+        return true;
+    return m_scriptRunner->execute(scriptElement.release(), scriptStartLine);
 }
 
-
-HTMLDocumentParser::State HTMLDocumentParser::parseEntity(SegmentedString& src, UChar*& dest, State state, unsigned& cBufferPos, bool start, bool parsingTag)
+void HTMLDocumentParser::pumpTokenizer(SynchronousMode mode)
 {
-    if (start) {
-        cBufferPos = 0;
-        state.setEntityState(SearchEntity);
-        EntityUnicodeValue = 0;
-    }
+    ASSERT(!m_parserStopped);
+    ASSERT(!m_treeBuilder->isPaused());
+    ASSERT(!isScheduledForResume());
 
-    while (!src.isEmpty()) {
-        UChar cc = *src;
-        switch (state.entityState()) {
-        case NoEntity:
-            ASSERT(state.entityState() != NoEntity);
-            return state;
-
-        case SearchEntity:
-            if (cc == '#') {
-                m_cBuffer[cBufferPos++] = cc;
-                src.advancePastNonNewline();
-                state.setEntityState(NumericSearch);
-            } else
-                state.setEntityState(EntityName);
-            break;
+    // We tell the InspectorTimelineAgent about every pump, even if we
+    // end up pumping nothing.  It can filter out empty pumps itself.
+    willPumpLexer();
 
-        case NumericSearch:
-            if (cc == 'x' || cc == 'X') {
-                m_cBuffer[cBufferPos++] = cc;
-                src.advancePastNonNewline();
-                state.setEntityState(Hexadecimal);
-            } else if (cc >= '0' && cc <= '9')
-                state.setEntityState(Decimal);
-            else
-                state.setEntityState(SearchSemicolon);
+    HTMLParserScheduler::PumpSession session;
+    // FIXME: This loop body has is now too long and needs cleanup.
+    while (mode == ForceSynchronous || (!m_parserStopped && m_parserScheduler->shouldContinueParsing(session))) {
+        if (!m_tokenizer->nextToken(m_input.current(), m_token))
             break;
 
-        case Hexadecimal: {
-            int ll = min(src.length(), 10 - cBufferPos);
-            while (ll--) {
-                cc = *src;
-                if (!((cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f') || (cc >= 'A' && cc <= 'F'))) {
-                    state.setEntityState(SearchSemicolon);
-                    break;
-                }
-                int digit;
-                if (cc < 'A')
-                    digit = cc - '0';
-                else
-                    digit = (cc - 'A' + 10) & 0xF; // handle both upper and lower case without a branch
-                EntityUnicodeValue = EntityUnicodeValue * 16 + digit;
-                m_cBuffer[cBufferPos++] = cc;
-                src.advancePastNonNewline();
-            }
-            if (cBufferPos == 10)
-                state.setEntityState(SearchSemicolon);
-            break;
-        }
-        case Decimal:
-        {
-            int ll = min(src.length(), 9-cBufferPos);
-            while (ll--) {
-                cc = *src;
-
-                if (!(cc >= '0' && cc <= '9')) {
-                    state.setEntityState(SearchSemicolon);
-                    break;
-                }
-
-                EntityUnicodeValue = EntityUnicodeValue * 10 + (cc - '0');
-                m_cBuffer[cBufferPos++] = cc;
-                src.advancePastNonNewline();
-            }
-            if (cBufferPos == 9)
-                state.setEntityState(SearchSemicolon);
+        m_treeBuilder->constructTreeFromToken(m_token);
+        m_token.clear();
+
+        // The parser will pause itself when waiting on a script to load or run.
+        if (!m_treeBuilder->isPaused())
+            continue;
+
+        // If we're paused waiting for a script, we try to execute scripts before continuing.
+        bool shouldContinueParsing = runScriptsForPausedTreeBuilder();
+        m_treeBuilder->setPaused(!shouldContinueParsing);
+        if (!shouldContinueParsing)
             break;
+    }
+
+    if (isWaitingForScripts()) {
+        ASSERT(m_tokenizer->state() == HTMLTokenizer::DataState);
+        if (!m_preloadScanner) {
+            m_preloadScanner.set(new HTMLPreloadScanner(m_document));
+            m_preloadScanner->appendToEnd(m_input.current());
         }
-        case EntityName:
-        {
-            int ll = min(src.length(), 9-cBufferPos);
-            while (ll--) {
-                cc = *src;
-
-                if (!((cc >= 'a' && cc <= 'z') || (cc >= '0' && cc <= '9') || (cc >= 'A' && cc <= 'Z'))) {
-                    state.setEntityState(SearchSemicolon);
-                    break;
-                }
-
-                m_cBuffer[cBufferPos++] = cc;
-                src.advancePastNonNewline();
-            }
-            if (cBufferPos == 9)
-                state.setEntityState(SearchSemicolon);
-            if (state.entityState() == SearchSemicolon) {
-                if (cBufferPos > 1) {
-                    // Since the maximum length of entity name is 9,
-                    // so a single char array which is allocated on
-                    // the stack, its length is 10, should be OK.
-                    // Also if we have an illegal character, we treat it
-                    // as illegal entity name.
-                    unsigned testedEntityNameLen = 0;
-                    char tmpEntityNameBuffer[10];
-
-                    ASSERT(cBufferPos < 10);
-                    for (; testedEntityNameLen < cBufferPos; ++testedEntityNameLen) {
-                        if (m_cBuffer[testedEntityNameLen] > 0x7e)
-                            break;
-                        tmpEntityNameBuffer[testedEntityNameLen] = m_cBuffer[testedEntityNameLen];
-                    }
-
-                    const Entity *e;
-
-                    if (testedEntityNameLen == cBufferPos)
-                        e = findEntity(tmpEntityNameBuffer, cBufferPos);
-                    else
-                        e = 0;
-
-                    if (e)
-                        EntityUnicodeValue = e->code;
-
-                    // be IE compatible
-                    if (parsingTag && EntityUnicodeValue > 255 && *src != ';')
-                        EntityUnicodeValue = 0;
-                }
-            }
-            else
-                break;
-        }
-        case SearchSemicolon:
-            // Don't allow values that are more than 21 bits.
-            if (EntityUnicodeValue > 0 && EntityUnicodeValue <= 0x10FFFF) {
-                if (!inViewSourceMode()) {
-                    if (*src == ';')
-                        src.advancePastNonNewline();
-                    if (EntityUnicodeValue <= 0xFFFF) {
-                        checkBuffer();
-                        src.push(fixUpChar(EntityUnicodeValue));
-                    } else {
-                        // Convert to UTF-16, using surrogate code points.
-                        checkBuffer(2);
-                        src.push(U16_LEAD(EntityUnicodeValue));
-                        src.push(U16_TRAIL(EntityUnicodeValue));
-                    }
-                } else {
-                    // FIXME: We should eventually colorize entities by sending them as a special token.
-                    // 12 bytes required: up to 10 bytes in m_cBuffer plus the
-                    // leading '&' and trailing ';'
-                    checkBuffer(12);
-                    *dest++ = '&';
-                    for (unsigned i = 0; i < cBufferPos; i++)
-                        dest[i] = m_cBuffer[i];
-                    dest += cBufferPos;
-                    if (*src == ';') {
-                        *dest++ = ';';
-                        src.advancePastNonNewline();
-                    }
-                }
-            } else {
-                // 11 bytes required: up to 10 bytes in m_cBuffer plus the
-                // leading '&'
-                checkBuffer(11);
-                // ignore the sequence, add it to the buffer as plaintext
-                *dest++ = '&';
-                for (unsigned i = 0; i < cBufferPos; i++)
-                    dest[i] = m_cBuffer[i];
-                dest += cBufferPos;
-            }
-
-            state.setEntityState(NoEntity);
-            return state;
-        }
+        m_preloadScanner->scan();
     }
 
-    return state;
+    didPumpLexer();
 }
 
-HTMLDocumentParser::State HTMLDocumentParser::parseDoctype(SegmentedString& src, State state)
+void HTMLDocumentParser::willPumpLexer()
 {
-    ASSERT(state.inDoctype());
-    while (!src.isEmpty() && state.inDoctype()) {
-        UChar c = *src;
-        bool isWhitespace = c == '\r' || c == '\n' || c == '\t' || c == ' ';
-        switch (m_doctypeToken.state()) {
-            case DoctypeBegin: {
-                m_doctypeToken.setState(DoctypeBeforeName);
-                if (isWhitespace) {
-                    src.advance(m_lineNumber);
-                    if (inViewSourceMode())
-                        m_doctypeToken.m_source.append(c);
-                }
-                break;
-            }
-            case DoctypeBeforeName: {
-                if (c == '>') {
-                    // Malformed.  Just exit.
-                    src.advancePastNonNewline();
-                    state.setInDoctype(false);
-                    if (inViewSourceMode())
-                        processDoctypeToken();
-                } else if (isWhitespace) {
-                    src.advance(m_lineNumber);
-                    if (inViewSourceMode())
-                        m_doctypeToken.m_source.append(c);
-                } else
-                    m_doctypeToken.setState(DoctypeName);
-                break;
-            }
-            case DoctypeName: {
-                if (c == '>') {
-                    // Valid doctype. Emit it.
-                    src.advancePastNonNewline();
-                    state.setInDoctype(false);
-                    processDoctypeToken();
-                } else if (isWhitespace) {
-                    m_doctypeSearchCount = 0; // Used now to scan for PUBLIC
-                    m_doctypeSecondarySearchCount = 0; // Used now to scan for SYSTEM
-                    m_doctypeToken.setState(DoctypeAfterName);
-                    src.advance(m_lineNumber);
-                    if (inViewSourceMode())
-                        m_doctypeToken.m_source.append(c);
-                } else {
-                    src.advancePastNonNewline();
-                    m_doctypeToken.m_name.append(c);
-                    if (inViewSourceMode())
-                        m_doctypeToken.m_source.append(c);
-                }
-                break;
-            }
-            case DoctypeAfterName: {
-                if (c == '>') {
-                    // Valid doctype. Emit it.
-                    src.advancePastNonNewline();
-                    state.setInDoctype(false);
-                    processDoctypeToken();
-                } else if (!isWhitespace) {
-                    src.advancePastNonNewline();
-                    if (toASCIILower(c) == publicStart[m_doctypeSearchCount]) {
-                        m_doctypeSearchCount++;
-                        if (m_doctypeSearchCount == 6)
-                            // Found 'PUBLIC' sequence
-                            m_doctypeToken.setState(DoctypeBeforePublicID);
-                    } else if (m_doctypeSearchCount > 0) {
-                        m_doctypeSearchCount = 0;
-                        m_doctypeToken.setState(DoctypeBogus);
-                    } else if (toASCIILower(c) == systemStart[m_doctypeSecondarySearchCount]) {
-                        m_doctypeSecondarySearchCount++;
-                        if (m_doctypeSecondarySearchCount == 6)
-                            // Found 'SYSTEM' sequence
-                            m_doctypeToken.setState(DoctypeBeforeSystemID);
-                    } else {
-                        m_doctypeSecondarySearchCount = 0;
-                        m_doctypeToken.setState(DoctypeBogus);
-                    }
-                    if (inViewSourceMode())
-                        m_doctypeToken.m_source.append(c);
-                } else {
-                    src.advance(m_lineNumber); // Whitespace keeps us in the after name state.
-                    if (inViewSourceMode())
-                        m_doctypeToken.m_source.append(c);
-                }
-                break;
-            }
-            case DoctypeBeforePublicID: {
-                if (c == '\"' || c == '\'') {
-                    tquote = c == '\"' ? DoubleQuote : SingleQuote;
-                    m_doctypeToken.setState(DoctypePublicID);
-                    src.advancePastNonNewline();
-                    if (inViewSourceMode())
-                        m_doctypeToken.m_source.append(c);
-                } else if (c == '>') {
-                    // Considered bogus.  Don't process the doctype.
-                    src.advancePastNonNewline();
-                    state.setInDoctype(false);
-                    if (inViewSourceMode())
-                        processDoctypeToken();
-                } else if (isWhitespace) {
-                    src.advance(m_lineNumber);
-                    if (inViewSourceMode())
-                        m_doctypeToken.m_source.append(c);
-                } else
-                    m_doctypeToken.setState(DoctypeBogus);
-                break;
-            }
-            case DoctypePublicID: {
-                if ((c == '\"' && tquote == DoubleQuote) || (c == '\'' && tquote == SingleQuote)) {
-                    src.advancePastNonNewline();
-                    m_doctypeToken.setState(DoctypeAfterPublicID);
-                    if (inViewSourceMode())
-                        m_doctypeToken.m_source.append(c);
-                } else if (c == '>') {
-                     // Considered bogus.  Don't process the doctype.
-                    src.advancePastNonNewline();
-                    state.setInDoctype(false);
-                    if (inViewSourceMode())
-                        processDoctypeToken();
-                } else {
-                    m_doctypeToken.m_publicID.append(c);
-                    src.advance(m_lineNumber);
-                    if (inViewSourceMode())
-                        m_doctypeToken.m_source.append(c);
-                }
-                break;
-            }
-            case DoctypeAfterPublicID:
-                if (c == '\"' || c == '\'') {
-                    tquote = c == '\"' ? DoubleQuote : SingleQuote;
-                    m_doctypeToken.setState(DoctypeSystemID);
-                    src.advancePastNonNewline();
-                    if (inViewSourceMode())
-                        m_doctypeToken.m_source.append(c);
-                } else if (c == '>') {
-                    // Valid doctype. Emit it now.
-                    src.advancePastNonNewline();
-                    state.setInDoctype(false);
-                    processDoctypeToken();
-                } else if (isWhitespace) {
-                    src.advance(m_lineNumber);
-                    if (inViewSourceMode())
-                        m_doctypeToken.m_source.append(c);
-                } else
-                    m_doctypeToken.setState(DoctypeBogus);
-                break;
-            case DoctypeBeforeSystemID:
-                if (c == '\"' || c == '\'') {
-                    tquote = c == '\"' ? DoubleQuote : SingleQuote;
-                    m_doctypeToken.setState(DoctypeSystemID);
-                    src.advancePastNonNewline();
-                    if (inViewSourceMode())
-                        m_doctypeToken.m_source.append(c);
-                } else if (c == '>') {
-                    // Considered bogus.  Don't process the doctype.
-                    src.advancePastNonNewline();
-                    state.setInDoctype(false);
-                } else if (isWhitespace) {
-                    src.advance(m_lineNumber);
-                    if (inViewSourceMode())
-                        m_doctypeToken.m_source.append(c);
-                } else
-                    m_doctypeToken.setState(DoctypeBogus);
-                break;
-            case DoctypeSystemID:
-                if ((c == '\"' && tquote == DoubleQuote) || (c == '\'' && tquote == SingleQuote)) {
-                    src.advancePastNonNewline();
-                    m_doctypeToken.setState(DoctypeAfterSystemID);
-                    if (inViewSourceMode())
-                        m_doctypeToken.m_source.append(c);
-                } else if (c == '>') {
-                     // Considered bogus.  Don't process the doctype.
-                    src.advancePastNonNewline();
-                    state.setInDoctype(false);
-                    if (inViewSourceMode())
-                        processDoctypeToken();
-                } else {
-                    m_doctypeToken.m_systemID.append(c);
-                    src.advance(m_lineNumber);
-                    if (inViewSourceMode())
-                        m_doctypeToken.m_source.append(c);
-                }
-                break;
-            case DoctypeAfterSystemID:
-                if (c == '>') {
-                    // Valid doctype. Emit it now.
-                    src.advancePastNonNewline();
-                    state.setInDoctype(false);
-                    processDoctypeToken();
-                } else if (isWhitespace) {
-                    src.advance(m_lineNumber);
-                    if (inViewSourceMode())
-                        m_doctypeToken.m_source.append(c);
-                } else
-                    m_doctypeToken.setState(DoctypeBogus);
-                break;
-            case DoctypeBogus:
-                if (c == '>') {
-                    // Done with the bogus doctype.
-                    src.advancePastNonNewline();
-                    state.setInDoctype(false);
-                    if (inViewSourceMode())
-                       processDoctypeToken();
-                } else {
-                    src.advance(m_lineNumber); // Just keep scanning for '>'
-                    if (inViewSourceMode())
-                        m_doctypeToken.m_source.append(c);
-                }
-                break;
-            default:
-                break;
-        }
-    }
-    return state;
+#if ENABLE(INSPECTOR)
+    // FIXME: m_input.current().length() is only accurate if we
+    // end up parsing the whole buffer in this pump.  We should pass how
+    // much we parsed as part of didWriteHTML instead of willWriteHTML.
+    if (InspectorTimelineAgent* timelineAgent = m_document->inspectorTimelineAgent())
+        timelineAgent->willWriteHTML(m_input.current().length(), m_tokenizer->lineNumber());
+#endif
 }
 
-HTMLDocumentParser::State HTMLDocumentParser::parseTag(SegmentedString& src, State state)
+void HTMLDocumentParser::didPumpLexer()
 {
-    ASSERT(!state.hasEntityState());
-
-    unsigned cBufferPos = m_cBufferPos;
-
-    bool lastIsSlash = false;
-
-    while (!src.isEmpty()) {
-        checkBuffer();
-        switch (state.tagState()) {
-        case NoTag:
-        {
-            m_cBufferPos = cBufferPos;
-            return state;
-        }
-        case TagName:
-        {
-            if (searchCount > 0) {
-                if (*src == commentStart[searchCount]) {
-                    searchCount++;
-                    if (searchCount == 2)
-                        m_doctypeSearchCount++; // A '!' is also part of a doctype, so we are moving through that still as well.
-                    else
-                        m_doctypeSearchCount = 0;
-                    if (searchCount == 4) {
-                        // Found '<!--' sequence
-                        src.advancePastNonNewline();
-                        m_dest = m_buffer; // ignore the previous part of this tag
-                        state.setInComment(true);
-                        state.setTagState(NoTag);
-
-                        // Fix bug 34302 at kde.bugs.org.  Go ahead and treat
-                        // <!--> as a valid comment, since both mozilla and IE on windows
-                        // can handle this case.  Only do this in quirks mode. -dwh
-                        if (!src.isEmpty() && *src == '>' && m_doc->inCompatMode()) {
-                            state.setInComment(false);
-                            src.advancePastNonNewline();
-                            if (!src.isEmpty())
-                                m_cBuffer[cBufferPos++] = *src;
-                        } else
-                          state = parseComment(src, state);
-
-                        m_cBufferPos = cBufferPos;
-                        return state; // Finished parsing tag!
-                    }
-                    m_cBuffer[cBufferPos++] = *src;
-                    src.advancePastNonNewline();
-                    break;
-                } else
-                    searchCount = 0; // Stop looking for '<!--' sequence
-            }
-
-            if (m_doctypeSearchCount > 0) {
-                if (toASCIILower(*src) == doctypeStart[m_doctypeSearchCount]) {
-                    m_doctypeSearchCount++;
-                    m_cBuffer[cBufferPos++] = *src;
-                    src.advancePastNonNewline();
-                    if (m_doctypeSearchCount == 9) {
-                        // Found '<!DOCTYPE' sequence
-                        state.setInDoctype(true);
-                        state.setTagState(NoTag);
-                        m_doctypeToken.reset();
-                        if (inViewSourceMode())
-                            m_doctypeToken.m_source.append(m_cBuffer, cBufferPos);
-                        state = parseDoctype(src, state);
-                        m_cBufferPos = cBufferPos;
-                        return state;
-                    }
-                    break;
-                } else
-                    m_doctypeSearchCount = 0; // Stop looking for '<!DOCTYPE' sequence
-            }
-
-            bool finish = false;
-            unsigned int ll = min(src.length(), CBUFLEN - cBufferPos);
-            while (ll--) {
-                UChar curchar = *src;
-                if (isASCIISpace(curchar) || curchar == '>' || curchar == '<') {
-                    finish = true;
-                    break;
-                }
-
-                // tolower() shows up on profiles. This is faster!
-                if (curchar >= 'A' && curchar <= 'Z' && !inViewSourceMode())
-                    m_cBuffer[cBufferPos++] = curchar + ('a' - 'A');
-                else
-                    m_cBuffer[cBufferPos++] = curchar;
-                src.advancePastNonNewline();
-            }
-
-            // Disadvantage: we add the possible rest of the tag
-            // as attribute names. ### judge if this causes problems
-            if (finish || CBUFLEN == cBufferPos) {
-                bool beginTag;
-                UChar* ptr = m_cBuffer;
-                unsigned int len = cBufferPos;
-                m_cBuffer[cBufferPos] = '\0';
-                if ((cBufferPos > 0) && (*ptr == '/')) {
-                    // End Tag
-                    beginTag = false;
-                    ptr++;
-                    len--;
-                }
-                else
-                    // Start Tag
-                    beginTag = true;
-
-                // Ignore the / in fake xml tags like <br/>.  We trim off the "/" so that we'll get "br" as the tag name and not "br/".
-                if (len > 1 && ptr[len-1] == '/' && !inViewSourceMode())
-                    ptr[--len] = '\0';
-
-                // Now that we've shaved off any invalid / that might have followed the name), make the tag.
-                // FIXME: FireFox and WinIE turn !foo nodes into comments, we ignore comments. (fast/parser/tag-with-exclamation-point.html)
-                if (ptr[0] != '!' || inViewSourceMode()) {
-                    m_currentToken.tagName = AtomicString(ptr);
-                    m_currentToken.beginTag = beginTag;
-                }
-                m_dest = m_buffer;
-                state.setTagState(SearchAttribute);
-                cBufferPos = 0;
-            }
-            break;
-        }
-        case SearchAttribute:
-            while (!src.isEmpty()) {
-                UChar curchar = *src;
-                // In this mode just ignore any quotes we encounter and treat them like spaces.
-                if (!isASCIISpace(curchar) && curchar != '\'' && curchar != '"') {
-                    if (curchar == '<' || curchar == '>')
-                        state.setTagState(SearchEnd);
-                    else
-                        state.setTagState(AttributeName);
-
-                    cBufferPos = 0;
-                    break;
-                }
-                if (inViewSourceMode())
-                    m_currentToken.addViewSourceChar(curchar);
-                src.advance(m_lineNumber);
-            }
-            break;
-        case AttributeName:
-        {
-            m_rawAttributeBeforeValue.clear();
-            int ll = min(src.length(), CBUFLEN - cBufferPos);
-            while (ll--) {
-                UChar curchar = *src;
-                // If we encounter a "/" when scanning an attribute name, treat it as a delimiter.  This allows the
-                // cases like <input type=checkbox checked/> to work (and accommodates XML-style syntax as per HTML5).
-                if (curchar <= '>' && (curchar >= '<' || isASCIISpace(curchar) || curchar == '/')) {
-                    m_cBuffer[cBufferPos] = '\0';
-                    m_attrName = AtomicString(m_cBuffer);
-                    m_dest = m_buffer;
-                    *m_dest++ = 0;
-                    state.setTagState(SearchEqual);
-                    if (inViewSourceMode())
-                        m_currentToken.addViewSourceChar('a');
-                    break;
-                }
-
-                // tolower() shows up on profiles. This is faster!
-                if (curchar >= 'A' && curchar <= 'Z' && !inViewSourceMode())
-                    m_cBuffer[cBufferPos++] = curchar + ('a' - 'A');
-                else
-                    m_cBuffer[cBufferPos++] = curchar;
-
-                m_rawAttributeBeforeValue.append(curchar);
-                src.advance(m_lineNumber);
-            }
-            if (cBufferPos == CBUFLEN) {
-                m_cBuffer[cBufferPos] = '\0';
-                m_attrName = AtomicString(m_cBuffer);
-                m_dest = m_buffer;
-                *m_dest++ = 0;
-                state.setTagState(SearchEqual);
-                if (inViewSourceMode())
-                    m_currentToken.addViewSourceChar('a');
-            }
-            break;
-        }
-        case SearchEqual:
-            while (!src.isEmpty()) {
-                UChar curchar = *src;
-
-                if (lastIsSlash && curchar == '>') {
-                    // This is a quirk (with a long sad history).  We have to do this
-                    // since widgets do <script src="foo.js"/> and expect the tag to close.
-                    if (m_currentToken.tagName == scriptTag)
-                        m_currentToken.selfClosingTag = true;
-                    m_currentToken.brokenXMLStyle = true;
-                }
-
-                // In this mode just ignore any quotes or slashes we encounter and treat them like spaces.
-                if (!isASCIISpace(curchar) && curchar != '\'' && curchar != '"' && curchar != '/') {
-                    if (curchar == '=') {
-                        state.setTagState(SearchValue);
-                        if (inViewSourceMode())
-                            m_currentToken.addViewSourceChar(curchar);
-                        m_rawAttributeBeforeValue.append(curchar);
-                        src.advancePastNonNewline();
-                    } else {
-                        m_currentToken.addAttribute(m_attrName, emptyAtom, inViewSourceMode());
-                        m_dest = m_buffer;
-                        state.setTagState(SearchAttribute);
-                        lastIsSlash = false;
-                    }
-                    break;
-                }
-
-                lastIsSlash = curchar == '/';
-
-                if (inViewSourceMode())
-                    m_currentToken.addViewSourceChar(curchar);
-                m_rawAttributeBeforeValue.append(curchar);
-                src.advance(m_lineNumber);
-            }
-            break;
-        case SearchValue:
-            while (!src.isEmpty()) {
-                UChar curchar = *src;
-                if (!isASCIISpace(curchar)) {
-                    if (curchar == '\'' || curchar == '\"') {
-                        tquote = curchar == '\"' ? DoubleQuote : SingleQuote;
-                        state.setTagState(QuotedValue);
-                        if (inViewSourceMode())
-                            m_currentToken.addViewSourceChar(curchar);
-                        m_rawAttributeBeforeValue.append(curchar);
-                        src.advancePastNonNewline();
-                    } else
-                        state.setTagState(Value);
-
-                    break;
-                }
-                if (inViewSourceMode())
-                    m_currentToken.addViewSourceChar(curchar);
-                m_rawAttributeBeforeValue.append(curchar);
-                src.advance(m_lineNumber);
-            }
-            break;
-        case QuotedValue:
-            while (!src.isEmpty()) {
-                checkBuffer();
-
-                UChar curchar = *src;
-                if (curchar <= '>' && !src.escaped()) {
-                    if (curchar == '>' && m_attrName.isEmpty()) {
-                        // Handle a case like <img '>.  Just go ahead and be willing
-                        // to close the whole tag.  Don't consume the character and
-                        // just go back into SearchEnd while ignoring the whole
-                        // value.
-                        // FIXME: Note that this is actually not a very good solution.
-                        // It doesn't handle the general case of
-                        // unmatched quotes among attributes that have names. -dwh
-                        while (m_dest > m_buffer + 1 && (m_dest[-1] == '\n' || m_dest[-1] == '\r'))
-                            m_dest--; // remove trailing newlines
-                        AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1);
-                        if (!attributeValue.contains('/'))
-                            m_attrName = attributeValue; // Just make the name/value match. (FIXME: Is this some WinIE quirk?)
-                        m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode());
-                        if (inViewSourceMode())
-                            m_currentToken.addViewSourceChar('x');
-                        state.setTagState(SearchAttribute);
-                        m_dest = m_buffer;
-                        tquote = NoQuote;
-                        break;
-                    }
-
-                    if (curchar == '&') {
-                        src.advancePastNonNewline();
-                        state = parseEntity(src, m_dest, state, cBufferPos, true, true);
-                        break;
-                    }
-
-                    if ((tquote == SingleQuote && curchar == '\'') || (tquote == DoubleQuote && curchar == '\"')) {
-                        // some <input type=hidden> rely on trailing spaces. argh
-                        while (m_dest > m_buffer + 1 && (m_dest[-1] == '\n' || m_dest[-1] == '\r'))
-                            m_dest--; // remove trailing newlines
-                        AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1);
-                        if (m_attrName.isEmpty() && !attributeValue.contains('/')) {
-                            m_attrName = attributeValue; // Make the name match the value. (FIXME: Is this a WinIE quirk?)
-                            if (inViewSourceMode())
-                                m_currentToken.addViewSourceChar('x');
-                        } else if (inViewSourceMode())
-                            m_currentToken.addViewSourceChar('v');
-
-                        if (m_currentToken.beginTag && m_currentToken.tagName == scriptTag && !inViewSourceMode() && !m_treeConstructor->skipMode() && m_attrName == srcAttr) {
-                            String context(m_rawAttributeBeforeValue.data(), m_rawAttributeBeforeValue.size());
-                            if (m_XSSAuditor && !m_XSSAuditor->canLoadExternalScriptFromSrc(attributeValue))
-                                attributeValue = blankURL().string();
-                        }
-
-                        m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode());
-                        m_dest = m_buffer;
-                        state.setTagState(SearchAttribute);
-                        tquote = NoQuote;
-                        if (inViewSourceMode())
-                            m_currentToken.addViewSourceChar(curchar);
-                        src.advancePastNonNewline();
-                        break;
-                    }
-                }
-
-                *m_dest++ = curchar;
-                src.advance(m_lineNumber);
-            }
-            break;
-        case Value:
-            while (!src.isEmpty()) {
-                checkBuffer();
-                UChar curchar = *src;
-                if (curchar <= '>' && !src.escaped()) {
-                    // parse Entities
-                    if (curchar == '&') {
-                        src.advancePastNonNewline();
-                        state = parseEntity(src, m_dest, state, cBufferPos, true, true);
-                        break;
-                    }
-                    // no quotes. Every space means end of value
-                    // '/' does not delimit in IE!
-                    if (isASCIISpace(curchar) || curchar == '>') {
-                        AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1);
-
-                        if (m_currentToken.beginTag && m_currentToken.tagName == scriptTag && !inViewSourceMode() && !m_treeConstructor->skipMode() && m_attrName == srcAttr) {
-                            String context(m_rawAttributeBeforeValue.data(), m_rawAttributeBeforeValue.size());
-                            if (m_XSSAuditor && !m_XSSAuditor->canLoadExternalScriptFromSrc(attributeValue))
-                                attributeValue = blankURL().string();
-                        }
-
-                        m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode());
-                        if (inViewSourceMode())
-                            m_currentToken.addViewSourceChar('v');
-                        m_dest = m_buffer;
-                        state.setTagState(SearchAttribute);
-                        break;
-                    }
-                }
-
-                *m_dest++ = curchar;
-                src.advance(m_lineNumber);
-            }
-            break;
-        case SearchEnd:
-        {
-            while (!src.isEmpty()) {
-                UChar ch = *src;
-                if (ch == '>' || ch == '<')
-                    break;
-                if (ch == '/')
-                    m_currentToken.selfClosingTag = true;
-                if (inViewSourceMode())
-                    m_currentToken.addViewSourceChar(ch);
-                src.advance(m_lineNumber);
-            }
-            if (src.isEmpty())
-                break;
-
-            searchCount = 0; // Stop looking for '<!--' sequence
-            state.setTagState(NoTag);
-            tquote = NoQuote;
-
-            if (*src != '<')
-                src.advance(m_lineNumber);
-
-            if (m_currentToken.tagName == nullAtom) { //stop if tag is unknown
-                m_cBufferPos = cBufferPos;
-                return state;
-            }
-
-            AtomicString tagName = m_currentToken.tagName;
-
-            // Handle <script src="foo"/> like Mozilla/Opera. We have to do this now for Dashboard
-            // compatibility.
-            bool isSelfClosingScript = m_currentToken.selfClosingTag && m_currentToken.beginTag && m_currentToken.tagName == scriptTag;
-            bool beginTag = !m_currentToken.selfClosingTag && m_currentToken.beginTag;
-            if (m_currentToken.beginTag && m_currentToken.tagName == scriptTag && !inViewSourceMode() && !m_treeConstructor->skipMode()) {
-                Attribute* a = 0;
-                m_scriptTagSrcAttrValue = String();
-                m_scriptTagCharsetAttrValue = String();
-                if (m_currentToken.attrs && !m_fragment) {
-                    if (m_doc->frame() && m_doc->frame()->script()->canExecuteScripts(NotAboutToExecuteScript)) {
-                        if ((a = m_currentToken.attrs->getAttributeItem(srcAttr)))
-                            m_scriptTagSrcAttrValue = m_doc->completeURL(deprecatedParseURL(a->value())).string();
-                    }
-                }
-            }
-
-            RefPtr<Node> n = processToken();
-            m_cBufferPos = cBufferPos;
-            if (n || inViewSourceMode()) {
-                State savedState = state;
-                SegmentedString savedSrc = src;
-                long savedLineno = m_lineNumber;
-                if ((tagName == preTag || tagName == listingTag) && !inViewSourceMode()) {
-                    if (beginTag)
-                        state.setDiscardLF(true); // Discard the first LF after we open a pre.
-                } else if (tagName == scriptTag) {
-                    ASSERT(!m_scriptNode);
-                    m_scriptNode = static_pointer_cast<HTMLScriptElement>(n);
-                    if (m_scriptNode)
-                        m_scriptTagCharsetAttrValue = m_scriptNode->scriptCharset();
-                    if (beginTag) {
-                        m_searchStopper = scriptEnd;
-                        m_searchStopperLength = 8;
-                        state.setInScript(true);
-                        state = parseNonHTMLText(src, state);
-                    } else if (isSelfClosingScript) { // Handle <script src="foo"/>
-                        state.setInScript(true);
-                        state = scriptHandler(state);
-                    }
-                } else if (tagName == styleTag) {
-                    if (beginTag) {
-                        m_searchStopper = styleEnd;
-                        m_searchStopperLength = 7;
-                        state.setInStyle(true);
-                        state = parseNonHTMLText(src, state);
-                    }
-                } else if (tagName == textareaTag) {
-                    if (beginTag) {
-                        m_searchStopper = textareaEnd;
-                        m_searchStopperLength = 10;
-                        state.setInTextArea(true);
-                        state = parseNonHTMLText(src, state);
-                    }
-                } else if (tagName == titleTag) {
-                    if (beginTag) {
-                        m_searchStopper = titleEnd;
-                        m_searchStopperLength = 7;
-                        state.setInTitle(true);
-                        state = parseNonHTMLText(src, state);
-                    }
-                } else if (tagName == xmpTag) {
-                    if (beginTag) {
-                        m_searchStopper = xmpEnd;
-                        m_searchStopperLength = 5;
-                        state.setInXmp(true);
-                        state = parseNonHTMLText(src, state);
-                    }
-                } else if (tagName == iframeTag) {
-                    if (beginTag) {
-                        m_searchStopper = iframeEnd;
-                        m_searchStopperLength = 8;
-                        state.setInIFrame(true);
-                        state = parseNonHTMLText(src, state);
-                    }
-                }
-                if (src.isEmpty() && (state.inTitle() || inViewSourceMode()) && !state.inComment() && !(state.inScript() && m_currentScriptTagStartLineNumber)) {
-                    // We just ate the rest of the document as the #text node under the special tag!
-                    // Reset the state then retokenize without special handling.
-                    // Let the parser clean up the missing close tag.
-                    // FIXME: This is incorrect, because src.isEmpty() doesn't mean we're
-                    // at the end of the document unless m_noMoreData is also true. We need
-                    // to detect this case elsewhere, and save the state somewhere other
-                    // than a local variable.
-                    state = savedState;
-                    src = savedSrc;
-                    m_lineNumber = savedLineno;
-                    m_scriptCodeSize = 0;
-                }
-            }
-            if (tagName == plaintextTag)
-                state.setInPlainText(beginTag);
-            return state; // Finished parsing tag!
-        }
-        } // end switch
-    }
-    m_cBufferPos = cBufferPos;
-    return state;
+#if ENABLE(INSPECTOR)
+    if (InspectorTimelineAgent* timelineAgent = m_document->inspectorTimelineAgent())
+        timelineAgent->didWriteHTML(m_tokenizer->lineNumber());
+#endif
 }
 
-inline bool HTMLDocumentParser::continueProcessing(int& processedCount, double startTime, State &state)
+void HTMLDocumentParser::write(const SegmentedString& source, bool isFromNetwork)
 {
-    // We don't want to be checking elapsed time with every character, so we only check after we've
-    // processed a certain number of characters.
-    bool allowedYield = state.allowYield();
-    state.setAllowYield(false);
-    if (!state.loadingExtScript() && !state.forceSynchronous() && !m_executingScript && (processedCount > m_tokenizerChunkSize || allowedYield)) {
-        processedCount = 0;
-        if (currentTime() - startTime > m_tokenizerTimeDelay) {
-            /* FIXME: We'd like to yield aggressively to give stylesheets the opportunity to
-               load, but this hurts overall performance on slower machines.  For now turn this
-               off.
-            || (!m_doc->haveStylesheetsLoaded() &&
-                (m_doc->documentElement()->id() != ID_HTML || m_doc->body()))) {*/
-            // Schedule the timer to keep processing as soon as possible.
-            m_timer.startOneShot(0);
-            return false;
-        }
-    }
+    if (m_parserStopped)
+        return;
 
-    processedCount++;
-    return true;
-}
+    NestingLevelIncrementer nestingLevelIncrementer(m_writeNestingLevel);
 
-// Turns the statemachine one crank using the passed in State object.
-// This does not modify m_state directly in order to be reentrant.
-ALWAYS_INLINE void HTMLDocumentParser::advance(State& state)
-{
-    // do we need to enlarge the buffer?
-    checkBuffer();
-
-    UChar cc = *m_src;
-
-    bool wasSkipLF = state.skipLF();
-    if (wasSkipLF)
-        state.setSkipLF(false);
-
-    if (wasSkipLF && (cc == '\n'))
-        m_src.advance();
-    else if (state.needsSpecialWriteHandling()) {
-        // it's important to keep needsSpecialWriteHandling with the flags this block tests
-        if (state.hasEntityState())
-            state = parseEntity(m_src, m_dest, state, m_cBufferPos, false, state.hasTagState());
-        else if (state.inPlainText())
-            state = parseText(m_src, state);
-        else if (state.inAnyNonHTMLText())
-            state = parseNonHTMLText(m_src, state);
-        else if (state.inComment())
-            state = parseComment(m_src, state);
-        else if (state.inDoctype())
-            state = parseDoctype(m_src, state);
-        else if (state.inServer())
-            state = parseServer(m_src, state);
-        else if (state.inProcessingInstruction())
-            state = parseProcessingInstruction(m_src, state);
-        else if (state.hasTagState())
-            state = parseTag(m_src, state);
-        else if (state.startTag()) {
-            state.setStartTag(false);
-
-            switch (cc) {
-            case '/':
-                break;
-            case '!': {
-                // <!-- comment --> or <!DOCTYPE ...>
-                searchCount = 1; // Look for '<!--' sequence to start comment or '<!DOCTYPE' sequence to start doctype
-                m_doctypeSearchCount = 1;
-                break;
-            }
-            case '?': {
-                // xml processing instruction
-                state.setInProcessingInstruction(true);
-                tquote = NoQuote;
-                state = parseProcessingInstruction(m_src, state);
-                return;
-            }
-            case '%':
-                if (!m_brokenServer) {
-                    // <% server stuff, handle as comment %>
-                    state.setInServer(true);
-                    tquote = NoQuote;
-                    state = parseServer(m_src, state);
-                    return;
-                }
-                // else fall through
-            default: {
-                if (((cc >= 'a') && (cc <= 'z')) || ((cc >= 'A') && (cc <= 'Z'))) {
-                    // Start of a Start-Tag
-                } else {
-                    // Invalid tag
-                    // Add as is
-                    *m_dest = '<';
-                    m_dest++;
-                    return;
-                }
-            }
-            }; // end case
-
-            processToken();
-
-            m_cBufferPos = 0;
-            state.setTagState(TagName);
-            state = parseTag(m_src, state);
-        }
-    } else if (cc == '&' && !m_src.escaped()) {
-        m_src.advancePastNonNewline();
-        state = parseEntity(m_src, m_dest, state, m_cBufferPos, true, state.hasTagState());
-    } else if (cc == '<' && !m_src.escaped()) {
-        m_currentTagStartLineNumber = m_lineNumber;
-        m_src.advancePastNonNewline();
-        state.setStartTag(true);
-        state.setDiscardLF(false);
-    } else if (cc == '\n' || cc == '\r') {
-        if (state.discardLF())
-            // Ignore this LF
-            state.setDiscardLF(false); // We have discarded 1 LF
-        else {
-            // Process this LF
-            *m_dest++ = '\n';
-            if (cc == '\r' && !m_src.excludeLineNumbers())
-                m_lineNumber++;
+    if (isFromNetwork) {
+        m_input.appendToEnd(source);
+        if (m_preloadScanner)
+            m_preloadScanner->appendToEnd(source);
+
+        if (m_writeNestingLevel > 1) {
+            // We've gotten data off the network in a nested call to write().
+            // We don't want to consume any more of the input stream now.  Do
+            // not worry.  We'll consume this data in a less-nested write().
+            return;
         }
+    } else
+        m_input.insertAtCurrentInsertionPoint(source);
 
-        /* Check for MS-DOS CRLF sequence */
-        if (cc == '\r')
-            state.setSkipLF(true);
-        m_src.advance(m_lineNumber);
-    } else {
-        state.setDiscardLF(false);
-        *m_dest++ = cc;
-        m_src.advancePastNonNewline();
-    }
+    pumpTokenizerIfPossible(isFromNetwork ? AllowYield : ForceSynchronous);
+    endIfDelayed();
 }
 
-void HTMLDocumentParser::willWriteHTML(const SegmentedString& source)
+void HTMLDocumentParser::end()
 {
-    #if ENABLE(INSPECTOR)
-        if (InspectorTimelineAgent* timelineAgent = m_doc->inspectorTimelineAgent())
-            timelineAgent->willWriteHTML(source.length(), m_lineNumber);
-    #endif
-}
+    ASSERT(!isScheduledForResume());
+    // NOTE: This pump should only ever emit buffered character tokens,
+    // so ForceSynchronous vs. AllowYield should be meaningless.
+    pumpTokenizerIfPossible(ForceSynchronous);
 
-void HTMLDocumentParser::didWriteHTML()
-{
-    #if ENABLE(INSPECTOR)
-        if (InspectorTimelineAgent* timelineAgent = m_doc->inspectorTimelineAgent())
-            timelineAgent->didWriteHTML(m_lineNumber);
-    #endif
+    // Informs the the rest of WebCore that parsing is really finished (and deletes this).
+    m_treeBuilder->finished();
 }
 
-void HTMLDocumentParser::write(const SegmentedString& str, bool appendData)
+void HTMLDocumentParser::attemptToEnd()
 {
-    if (!m_buffer)
-        return;
-
-    if (m_parserStopped)
-        return;
+    // finish() indicates we will not receive any more data. If we are waiting on
+    // an external script to load, we can't finish parsing quite yet.
 
-    SegmentedString source(str);
-    if (m_executingScript)
-        source.setExcludeLineNumbers();
-
-    if ((m_executingScript && appendData) || !m_pendingScripts.isEmpty()) {
-        // don't parse; we will do this later
-        if (m_currentPrependingSrc)
-            m_currentPrependingSrc->append(source);
-        else {
-            m_pendingSrc.append(source);
-#if PRELOAD_SCANNER_ENABLED
-            if (m_preloadScanner && m_preloadScanner->inProgress() && appendData)
-                m_preloadScanner->write(source);
-#endif
-        }
+    if (inWrite() || isWaitingForScripts() || inScriptExecution() || isScheduledForResume()) {
+        m_endWasDelayed = true;
         return;
     }
+<<<<<<< HEAD
 
 #if PRELOAD_SCANNER_ENABLED
     if (m_preloadScanner && m_preloadScanner->inProgress() && appendData)
@@ -1812,321 +517,151 @@ void HTMLDocumentParser::write(const SegmentedString& str, bool appendData)
 
     // After parsing, go ahead and dispatch image beforeload events.
     ImageLoader::dispatchPendingBeforeLoadEvents();
+=======
+    end();
+>>>>>>> webkit.org at r61871
 }
 
-void HTMLDocumentParser::stopParsing()
+void HTMLDocumentParser::endIfDelayed()
 {
-    DocumentParser::stopParsing();
-    m_timer.stop();
+    // We don't check inWrite() here since inWrite() will be true if this was
+    // called from write().
+    if (!m_endWasDelayed || isWaitingForScripts() || inScriptExecution() || isScheduledForResume())
+        return;
 
-    // FIXME: Why is HTMLDocumentParser the only DocumentParser which calls checkCompleted?
-    // The FrameLoader needs to know that the parser has finished with its data,
-    // regardless of whether it happened naturally or due to manual intervention.
-    if (!m_fragment && m_doc->frame())
-        m_doc->frame()->loader()->checkCompleted();
+    m_endWasDelayed = false;
+    end();
 }
 
-bool HTMLDocumentParser::processingData() const
+void HTMLDocumentParser::finish()
 {
-    return m_timer.isActive() || m_inWrite;
+    // We're not going to get any more data off the network, so we close the
+    // input stream to indicate EOF.
+    m_input.close();
+    attemptToEnd();
 }
 
-void HTMLDocumentParser::timerFired(Timer<HTMLDocumentParser>*)
+bool HTMLDocumentParser::finishWasCalled()
 {
-    if (m_doc->view() && m_doc->view()->layoutPending() && !m_doc->minimumLayoutDelay()) {
-        // Restart the timer and let layout win.  This is basically a way of ensuring that the layout
-        // timer has higher priority than our timer.
-        m_timer.startOneShot(0);
-        return;
-    }
-
-    // Invoke write() as though more data came in. This might cause us to get deleted.
-    write(SegmentedString(), true);
+    return m_input.isClosed();
 }
 
-void HTMLDocumentParser::end()
+// This function is virtual and just for the DocumentParser interface.
+bool HTMLDocumentParser::isExecutingScript() const
 {
-    ASSERT(!m_timer.isActive());
-    m_timer.stop(); // Only helps if assertion above fires, but do it anyway.
-
-    if (m_buffer) {
-        // parseTag is using the buffer for different matters
-        if (!m_state.hasTagState())
-            processToken();
-
-        fastFree(m_scriptCode);
-        m_scriptCode = 0;
-        m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0;
-
-        fastFree(m_buffer);
-        m_buffer = 0;
-    }
-
-    if (!inViewSourceMode())
-        m_treeConstructor->finished();
-    else
-        m_doc->finishedParsing();
+    return inScriptExecution();
 }
 
-void HTMLDocumentParser::finish()
+// This function is non-virtual and used throughout the implementation.
+bool HTMLDocumentParser::inScriptExecution() const
 {
-    // do this as long as we don't find matching comment ends
-    while ((m_state.inComment() || m_state.inServer()) && m_scriptCode && m_scriptCodeSize) {
-        // we've found an unmatched comment start
-        if (m_state.inComment())
-            m_brokenComments = true;
-        else
-            m_brokenServer = true;
-        checkScriptBuffer();
-        m_scriptCode[m_scriptCodeSize] = 0;
-        m_scriptCode[m_scriptCodeSize + 1] = 0;
-        int pos;
-        String food;
-        if (m_state.inScript() || m_state.inStyle() || m_state.inTextArea())
-            food = String(m_scriptCode, m_scriptCodeSize);
-        else if (m_state.inServer()) {
-            food = "<";
-            food.append(m_scriptCode, m_scriptCodeSize);
-        } else {
-            pos = find(m_scriptCode, m_scriptCodeSize, '>');
-            food = String(m_scriptCode + pos + 1, m_scriptCodeSize - pos - 1);
-        }
-        fastFree(m_scriptCode);
-        m_scriptCode = 0;
-        m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0;
-        m_state.setInComment(false);
-        m_state.setInServer(false);
-        if (!food.isEmpty())
-            write(food, true);
-    }
-    // this indicates we will not receive any more data... but if we are waiting on
-    // an external script to load, we can't finish parsing until that is done
-    m_noMoreData = true;
-    if (!m_inWrite && !m_state.loadingExtScript() && !m_executingScript && !m_timer.isActive())
-        end(); // this actually causes us to be deleted
+    if (!m_scriptRunner)
+        return false;
+    return m_scriptRunner->inScriptExecution();
 }
 
-PassRefPtr<Node> HTMLDocumentParser::processToken()
+int HTMLDocumentParser::lineNumber() const
 {
-    ScriptController* scriptController = (!m_fragment && m_doc->frame()) ? m_doc->frame()->script() : 0;
-    if (scriptController && scriptController->canExecuteScripts(NotAboutToExecuteScript))
-        // FIXME: Why isn't this m_currentScriptTagStartLineNumber?  I suspect this is wrong.
-        scriptController->setEventHandlerLineNumber(m_currentTagStartLineNumber + 1); // Script line numbers are 1 based.
-    if (m_dest > m_buffer) {
-        m_currentToken.text = StringImpl::createStrippingNullCharacters(m_buffer, m_dest - m_buffer);
-        if (m_currentToken.tagName != commentAtom)
-            m_currentToken.tagName = textAtom;
-    } else if (m_currentToken.tagName == nullAtom) {
-        m_currentToken.reset();
-        if (scriptController)
-            scriptController->setEventHandlerLineNumber(m_lineNumber + 1); // Script line numbers are 1 based.
-        return 0;
-    }
-
-    m_dest = m_buffer;
-
-    RefPtr<Node> n;
-
-    if (!m_parserStopped) {
-        if (NamedNodeMap* map = m_currentToken.attrs.get())
-            map->shrinkToLength();
-        if (inViewSourceMode())
-            static_cast<HTMLViewSourceDocument*>(m_doc)->addViewSourceToken(&m_currentToken);
-        else
-            // pass the token over to the parser, the parser DOES NOT delete the token
-            n = m_treeConstructor->parseToken(&m_currentToken);
-    }
-    m_currentToken.reset();
-    if (scriptController)
-        scriptController->setEventHandlerLineNumber(0);
-
-    return n.release();
+    return m_tokenizer->lineNumber();
 }
 
-void HTMLDocumentParser::processDoctypeToken()
+int HTMLDocumentParser::columnNumber() const
 {
-    if (inViewSourceMode())
-        static_cast<HTMLViewSourceDocument*>(m_doc)->addViewSourceDoctypeToken(&m_doctypeToken);
-    else
-        m_treeConstructor->parseDoctypeToken(&m_doctypeToken);
+    return m_tokenizer->columnNumber();
 }
 
-HTMLDocumentParser::~HTMLDocumentParser()
+LegacyHTMLTreeBuilder* HTMLDocumentParser::htmlTreeBuilder() const
 {
-    ASSERT(!m_inWrite);
-    reset();
+    return m_treeBuilder->legacyTreeBuilder();
 }
 
-
-void HTMLDocumentParser::enlargeBuffer(int len)
+bool HTMLDocumentParser::isWaitingForScripts() const
 {
-    // Resize policy: Always at least double the size of the buffer each time.
-    int delta = max(len, m_bufferSize);
-
-    // Check for overflow.
-    // For now, handle overflow the same way we handle fastRealloc failure, with CRASH.
-    static const int maxSize = INT_MAX / sizeof(UChar);
-    if (delta > maxSize - m_bufferSize)
-        CRASH();
-
-    int newSize = m_bufferSize + delta;
-    int oldOffset = m_dest - m_buffer;
-    m_buffer = static_cast<UChar*>(fastRealloc(m_buffer, newSize * sizeof(UChar)));
-    m_dest = m_buffer + oldOffset;
-    m_bufferSize = newSize;
+    return m_treeBuilder->isPaused();
 }
 
-void HTMLDocumentParser::enlargeScriptBuffer(int len)
+void HTMLDocumentParser::resumeParsingAfterScriptExecution()
 {
-    // Resize policy: Always at least double the size of the buffer each time.
-    int delta = max(len, m_scriptCodeCapacity);
-
-    // Check for overflow.
-    // For now, handle overflow the same way we handle fastRealloc failure, with CRASH.
-    static const int maxSize = INT_MAX / sizeof(UChar);
-    if (delta > maxSize - m_scriptCodeCapacity)
-        CRASH();
-
-    int newSize = m_scriptCodeCapacity + delta;
-    // If we allow fastRealloc(ptr, 0), it will call CRASH(). We run into this
-    // case if the HTML being parsed begins with "<!--" and there's more data
-    // coming.
-    if (!newSize) {
-        ASSERT(!m_scriptCode);
-        return;
-    }
+    ASSERT(!inScriptExecution());
+    ASSERT(!m_treeBuilder->isPaused());
 
-    m_scriptCode = static_cast<UChar*>(fastRealloc(m_scriptCode, newSize * sizeof(UChar)));
-    m_scriptCodeCapacity = newSize;
-}
+    pumpTokenizerIfPossible(AllowYield);
 
-void HTMLDocumentParser::executeScriptsWaitingForStylesheets()
-{
-    ASSERT(m_doc->haveStylesheetsLoaded());
-
-    if (m_hasScriptsWaitingForStylesheets)
-        notifyFinished(0);
+    // The document already finished parsing we were just waiting on scripts when finished() was called.
+    endIfDelayed();
 }
 
-void HTMLDocumentParser::notifyFinished(CachedResource*)
+void HTMLDocumentParser::watchForLoad(CachedResource* cachedScript)
 {
-    executeExternalScriptsIfReady();
+    cachedScript->addClient(this);
 }
 
-void HTMLDocumentParser::executeExternalScriptsIfReady()
+void HTMLDocumentParser::stopWatchingForLoad(CachedResource* cachedScript)
 {
-    ASSERT(!m_pendingScripts.isEmpty());
-
-    // Make external scripts wait for external stylesheets.
-    // FIXME: This needs to be done for inline scripts too.
-    m_hasScriptsWaitingForStylesheets = !m_doc->haveStylesheetsLoaded();
-    if (m_hasScriptsWaitingForStylesheets)
-        return;
-
-    bool finished = false;
-
-    double startTime = currentTime();
-    while (!finished && m_pendingScripts.first()->isLoaded()) {
-        if (!continueExecutingExternalScripts(startTime))
-            break;
-
-        CachedScript* cs = m_pendingScripts.takeFirst().get();
-        ASSERT(cache()->disabled() || cs->accessCount() > 0);
-
-        setSrc(SegmentedString());
-
-        // make sure we forget about the script before we execute the new one
-        // infinite recursion might happen otherwise
-        ScriptSourceCode sourceCode(cs);
-        bool errorOccurred = cs->errorOccurred();
-        cs->removeClient(this);
-
-        RefPtr<Node> n = m_scriptNode.release();
-
-        if (errorOccurred)
-            n->dispatchEvent(Event::create(eventNames().errorEvent, true, false));
-        else {
-            if (static_cast<HTMLScriptElement*>(n.get())->shouldExecuteAsJavaScript())
-                m_state = scriptExecution(sourceCode, m_state);
-#if ENABLE(XHTMLMP)
-            else
-                m_doc->setShouldProcessNoscriptElement(true);
-#endif
-            n->dispatchEvent(Event::create(eventNames().loadEvent, false, false));
-        }
-
-        // The state of m_pendingScripts.isEmpty() can change inside the scriptExecution()
-        // call above, so test afterwards.
-        finished = m_pendingScripts.isEmpty();
-        if (finished) {
-            ASSERT(!m_hasScriptsWaitingForStylesheets);
-            m_state.setLoadingExtScript(false);
-        } else if (m_hasScriptsWaitingForStylesheets) {
-            // m_hasScriptsWaitingForStylesheets flag might have changed during the script execution.
-            // If it did we are now blocked waiting for stylesheets and should not execute more scripts until they arrive.
-            finished = true;
-        }
-
-        // 'm_requestingScript' is true when we are called synchronously from
-        // scriptHandler(). In that case scriptHandler() will take care
-        // of m_pendingSrc.
-        if (!m_requestingScript) {
-            SegmentedString rest = m_pendingSrc;
-            m_pendingSrc.clear();
-            write(rest, false);
-            // we might be deleted at this point, do not access any members.
-        }
-    }
+    cachedScript->removeClient(this);
 }
 
-void HTMLDocumentParser::executeExternalScriptsTimerFired(Timer<HTMLDocumentParser>*)
+bool HTMLDocumentParser::shouldLoadExternalScriptFromSrc(const AtomicString& srcValue)
 {
-    if (m_doc->view() && m_doc->view()->layoutPending() && !m_doc->minimumLayoutDelay()) {
-        // Restart the timer and do layout first.
-        m_externalScriptsTimer.startOneShot(0);
-        return;
-    }
-
-    // Continue executing external scripts.
-    executeExternalScriptsIfReady();
+    if (!m_XSSAuditor)
+        return true;
+    return m_XSSAuditor->canLoadExternalScriptFromSrc(srcValue);
 }
 
-bool HTMLDocumentParser::continueExecutingExternalScripts(double startTime)
+void HTMLDocumentParser::notifyFinished(CachedResource* cachedResource)
 {
-    if (m_externalScriptsTimer.isActive())
-        return false;
-
-    if (currentTime() - startTime > m_tokenizerTimeDelay) {
-        // Schedule the timer to keep processing as soon as possible.
-        m_externalScriptsTimer.startOneShot(0);
-        return false;
+    ASSERT(m_scriptRunner);
+    // Ignore calls unless we have a script blocking the parser waiting
+    // for its own load.  Otherwise this may be a load callback from
+    // CachedResource::addClient because the script was already in the cache.
+    // HTMLScriptRunner may not be ready to handle running that script yet.
+    if (!m_scriptRunner->hasScriptsWaitingForLoad()) {
+        ASSERT(m_scriptRunner->inScriptExecution());
+        return;
     }
-    return true;
+    ASSERT(!inScriptExecution());
+    ASSERT(m_treeBuilder->isPaused());
+    // Note: We only ever wait on one script at a time, so we always know this
+    // is the one we were waiting on and can un-pause the tree builder.
+    m_treeBuilder->setPaused(false);
+    bool shouldContinueParsing = m_scriptRunner->executeScriptsWaitingForLoad(cachedResource);
+    m_treeBuilder->setPaused(!shouldContinueParsing);
+    if (shouldContinueParsing)
+        resumeParsingAfterScriptExecution();
 }
 
-bool HTMLDocumentParser::isWaitingForScripts() const
+void HTMLDocumentParser::executeScriptsWaitingForStylesheets()
 {
-    return m_state.loadingExtScript();
+    // Document only calls this when the Document owns the DocumentParser
+    // so this will not be called in the DocumentFragment case.
+    ASSERT(m_scriptRunner);
+    // Ignore calls unless we have a script blocking the parser waiting on a
+    // stylesheet load.  Otherwise we are currently parsing and this
+    // is a re-entrant call from encountering a </ style> tag.
+    if (!m_scriptRunner->hasScriptsWaitingForStylesheets())
+        return;
+    ASSERT(!m_scriptRunner->inScriptExecution());
+    ASSERT(m_treeBuilder->isPaused());
+    // Note: We only ever wait on one script at a time, so we always know this
+    // is the one we were waiting on and can un-pause the tree builder.
+    m_treeBuilder->setPaused(false);
+    bool shouldContinueParsing = m_scriptRunner->executeScriptsWaitingForStylesheets();
+    m_treeBuilder->setPaused(!shouldContinueParsing);
+    if (shouldContinueParsing)
+        resumeParsingAfterScriptExecution();
 }
 
-void HTMLDocumentParser::setSrc(const SegmentedString& source)
+ScriptController* HTMLDocumentParser::script() const
 {
-    m_src = source;
+    return m_document->frame() ? m_document->frame()->script() : 0;
 }
 
-void parseHTMLDocumentFragment(const String& source, DocumentFragment* fragment, FragmentScriptingPermission scriptingPermission)
+void HTMLDocumentParser::parseDocumentFragment(const String& source, DocumentFragment* fragment, FragmentScriptingPermission scriptingPermission)
 {
     HTMLDocumentParser parser(fragment, scriptingPermission);
-    parser.setForceSynchronous(true);
-    parser.write(source, true);
+    parser.write(source, false);
     parser.finish();
-    ASSERT(!parser.processingData());      // make sure we're done (see 3963151)
-}
-
-UChar decodeNamedEntity(const char* name)
-{
-    const Entity* e = findEntity(name, strlen(name));
-    return e ? e->code : 0;
+    ASSERT(!parser.processingData()); // Make sure we're done. <rdar://problem/3963151>
 }
 
 }