Merge WebKit at r61871: Initial merge by git.

Change-Id: I6cff43abca9cc4782e088a469ad4f03f166a65d5
author: Kristian Monsen <kristianm@google.com> 2010-06-28 16:42:48 +0100
committer: Kristian Monsen <kristianm@google.com> 2010-07-02 10:29:56 +0100
commit: 06ea8e899e48f1f2f396b70e63fae369f2f23232 (patch)
tree: 20c1428cd05c76f32394ab354ea35ed99acd86d8 /WebCore/html/HTMLDocumentParser.h
parent: 72aad67af14193199e29cdd5c4ddc095a8b9a8a8 (diff)
download: external_webkit-06ea8e899e48f1f2f396b70e63fae369f2f23232.zip
external_webkit-06ea8e899e48f1f2f396b70e63fae369f2f23232.tar.gz
external_webkit-06ea8e899e48f1f2f396b70e63fae369f2f23232.tar.bz2
1 files changed, 98 insertions, 415 deletions
diff --git a/WebCore/html/HTMLDocumentParser.h b/WebCore/html/HTMLDocumentParser.h
index 6072a7e..c2e752f 100644
--- a/WebCore/html/HTMLDocumentParser.h
+++ b/WebCore/html/HTMLDocumentParser.h
@@ -1,448 +1,131 @@
 /*
-    Copyright (C) 1997 Martin Jones (mjones@kde.org)
-              (C) 1997 Torben Weis (weis@kde.org)
-              (C) 1998 Waldo Bastian (bastian@kde.org)
-              (C) 2001 Dirk Mueller (mueller@kde.org)
-    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
-
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Library General Public
-    License as published by the Free Software Foundation; either
-    version 2 of the License, or (at your option) any later version.
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Library General Public License for more details.
-
-    You should have received a copy of the GNU Library General Public License
-    along with this library; see the file COPYING.LIB.  If not, write to
-    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-    Boston, MA 02110-1301, USA.
-*/
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
 
-#ifndef HTMLTokenizer_h
-#define HTMLTokenizer_h
+#ifndef HTMLDocumentParser_h
+#define HTMLDocumentParser_h
 
 #include "CachedResourceClient.h"
-#include "CachedResourceHandle.h"
-#include "MappedAttributeEntry.h"
-#include "NamedNodeMap.h"
+#include "FragmentScriptingPermission.h"
+#include "HTMLScriptRunnerHost.h"
+#include "HTMLToken.h"
+#include "HTMLInputStream.h"
 #include "SegmentedString.h"
-#include "Timer.h"
 #include "DocumentParser.h"
-#include <wtf/Deque.h>
+#include "Timer.h"
 #include <wtf/OwnPtr.h>
-#include <wtf/Vector.h>
 
 namespace WebCore {
 
-class CachedScript;
-class DocumentFragment;
 class Document;
+class DocumentFragment;
 class HTMLDocument;
-class HTMLScriptElement;
-class HTMLViewSourceDocument;
-class FrameView;
-class LegacyHTMLTreeConstructor;
-class Node;
-class PreloadScanner;
+class HTMLParserScheduler;
+class HTMLTokenizer;
+class HTMLScriptRunner;
+class HTMLTreeBuilder;
+class HTMLPreloadScanner;
+class LegacyHTMLTreeBuilder;
+class ScriptController;
 class ScriptSourceCode;
 
-/**
- * @internal
- * represents one HTML tag. Consists of a numerical id, and the list
- * of attributes. Can also represent text. In this case the id = 0 and
- * text contains the text.
- */
-struct Token {
-    Token()
-        : beginTag(true)
-        , selfClosingTag(false)
-        , brokenXMLStyle(false)
-        , m_sourceInfo(0)
-    { }
-    ~Token() { }
-
-    void addAttribute(AtomicString& attrName, const AtomicString& v, bool viewSourceMode);
-
-    bool isOpenTag(const QualifiedName& fullName) const { return beginTag && fullName.localName() == tagName; }
-    bool isCloseTag(const QualifiedName& fullName) const { return !beginTag && fullName.localName() == tagName; }
-
-    void reset()
-    {
-        attrs = 0;
-        text = 0;
-        tagName = nullAtom;
-        beginTag = true;
-        selfClosingTag = false;
-        brokenXMLStyle = false;
-        if (m_sourceInfo)
-            m_sourceInfo->clear();
-    }
-
-    void addViewSourceChar(UChar c) { if (!m_sourceInfo.get()) m_sourceInfo.set(new Vector<UChar>); m_sourceInfo->append(c); }
-
-    RefPtr<NamedNodeMap> attrs;
-    RefPtr<StringImpl> text;
-    AtomicString tagName;
-    bool beginTag;
-    bool selfClosingTag;
-    bool brokenXMLStyle;
-    OwnPtr<Vector<UChar> > m_sourceInfo;
-};
-
-enum DoctypeState {
-    DoctypeBegin,
-    DoctypeBeforeName,
-    DoctypeName,
-    DoctypeAfterName,
-    DoctypeBeforePublicID,
-    DoctypePublicID,
-    DoctypeAfterPublicID,
-    DoctypeBeforeSystemID,
-    DoctypeSystemID,
-    DoctypeAfterSystemID,
-    DoctypeBogus
-};
-
-class DoctypeToken {
-public:
-    DoctypeToken() {}
-
-    void reset()
-    {
-        m_name.clear();
-        m_publicID.clear();
-        m_systemID.clear();
-        m_state = DoctypeBegin;
-        m_source.clear();
-        m_forceQuirks = false;
-    }
-
-    DoctypeState state() { return m_state; }
-    void setState(DoctypeState s) { m_state = s; }
-
-    Vector<UChar> m_name;
-    Vector<UChar> m_publicID;
-    Vector<UChar> m_systemID;
-    DoctypeState m_state;
-
-    Vector<UChar> m_source;
-
-    bool m_forceQuirks; // Used by the HTML5 parser.
-};
-
-//-----------------------------------------------------------------------------
-
-// FIXME: This class does too much.  Right now it is both an HTML lexer as well
-// as handling all of the non-lexer-specific junk related to tokenizing HTML
-// (like dealing with <script> tags).  The HTML lexer bits should be pushed
-// down into a separate HTML lexer class.
-
-class HTMLDocumentParser : public DocumentParser, public CachedResourceClient {
+class HTMLDocumentParser :  public DocumentParser, HTMLScriptRunnerHost, CachedResourceClient {
 public:
+    // FIXME: These constructors should be made private and replaced by create() methods.
     HTMLDocumentParser(HTMLDocument*, bool reportErrors);
-    HTMLDocumentParser(HTMLViewSourceDocument*);
-    HTMLDocumentParser(DocumentFragment*, FragmentScriptingPermission = FragmentScriptingAllowed);
+    HTMLDocumentParser(DocumentFragment*, FragmentScriptingPermission);
     virtual ~HTMLDocumentParser();
 
-    virtual void write(const SegmentedString&, bool appendData);
-    virtual void finish();
-    virtual bool forceSynchronous() const { return m_state.forceSynchronous(); }
-    virtual void setForceSynchronous(bool force);
-    virtual bool isWaitingForScripts() const;
-    virtual void stopParsing();
-    virtual bool processingData() const;
-    virtual int executingScript() const { return m_executingScript; }
-
-    virtual int lineNumber() const { return m_lineNumber; }
-    virtual int columnNumber() const { return 1; }
-
-    bool processingContentWrittenByScript() const { return m_src.excludeLineNumbers(); }
-
-    virtual void executeScriptsWaitingForStylesheets();
+    // Exposed for HTMLParserScheduler
+    void resumeParsingAfterYield();
 
-    virtual LegacyHTMLTreeConstructor* htmlTreeConstructor() const { return m_treeConstructor.get(); }
-    virtual HTMLDocumentParser* asHTMLDocumentParser() { return this; }
+    static void parseDocumentFragment(const String&, DocumentFragment*, FragmentScriptingPermission = FragmentScriptingAllowed);
 
 private:
-    class State;
-
-    // Where we are in parsing a tag
-    void begin();
-    void end();
-
-    void reset();
-
-    void willWriteHTML(const SegmentedString&);
-    ALWAYS_INLINE void advance(State&);
-    void didWriteHTML();
-
-    PassRefPtr<Node> processToken();
-    void processDoctypeToken();
-
-    State processListing(SegmentedString, State);
-    State parseComment(SegmentedString&, State);
-    State parseDoctype(SegmentedString&, State);
-    State parseServer(SegmentedString&, State);
-    State parseText(SegmentedString&, State);
-    State parseNonHTMLText(SegmentedString&, State);
-    State parseTag(SegmentedString&, State);
-    State parseEntity(SegmentedString&, UChar*& dest, State, unsigned& cBufferPos, bool start, bool parsingTag);
-    State parseProcessingInstruction(SegmentedString&, State);
-    State scriptHandler(State);
-    State scriptExecution(const ScriptSourceCode&, State);
-    void setSrc(const SegmentedString&);
-
-    // check if we have enough space in the buffer.
-    // if not enlarge it
-    inline void checkBuffer(int len = 10)
-    {
-        if ((m_dest - m_buffer) > m_bufferSize - len)
-            enlargeBuffer(len);
-    }
-
-    inline void checkScriptBuffer(int len = 10)
-    {
-        if (m_scriptCodeSize + len >= m_scriptCodeCapacity)
-            enlargeScriptBuffer(len);
-    }
-
-    void enlargeBuffer(int len);
-    void enlargeScriptBuffer(int len);
-
-    bool continueProcessing(int& processedCount, double startTime, State&);
-    void timerFired(Timer<HTMLDocumentParser>*);
-    void allDataProcessed();
-
-    // from CachedResourceClient
-    void notifyFinished(CachedResource*);
-
-    void executeExternalScriptsIfReady();
-    void executeExternalScriptsTimerFired(Timer<HTMLDocumentParser>*);
-    bool continueExecutingExternalScripts(double startTime);
-
-    // Internal buffers
-    ///////////////////
-    UChar* m_buffer;
-    int m_bufferSize;
-    UChar* m_dest;
-
-    Token m_currentToken;
-
-    // This buffer holds the raw characters we've seen between the beginning of
-    // the attribute name and the first character of the attribute value.
-    Vector<UChar, 32> m_rawAttributeBeforeValue;
-
-    // DocumentParser flags
-    //////////////////
-    // are we in quotes within a html tag
-    enum { NoQuote, SingleQuote, DoubleQuote } tquote;
-
-    // Are we in a &... character entity description?
-    enum EntityState {
-        NoEntity = 0,
-        SearchEntity = 1,
-        NumericSearch = 2,
-        Hexadecimal = 3,
-        Decimal = 4,
-        EntityName = 5,
-        SearchSemicolon = 6
-    };
-    unsigned EntityUnicodeValue;
-
-    enum TagState {
-        NoTag = 0,
-        TagName = 1,
-        SearchAttribute = 2,
-        AttributeName = 3,
-        SearchEqual = 4,
-        SearchValue = 5,
-        QuotedValue = 6,
-        Value = 7,
-        SearchEnd = 8
-    };
-
-    class State {
-    public:
-        State() : m_bits(0) { }
-
-        TagState tagState() const { return static_cast<TagState>(m_bits & TagMask); }
-        void setTagState(TagState t) { m_bits = (m_bits & ~TagMask) | t; }
-        EntityState entityState() const { return static_cast<EntityState>((m_bits & EntityMask) >> EntityShift); }
-        void setEntityState(EntityState e) { m_bits = (m_bits & ~EntityMask) | (e << EntityShift); }
-
-        bool inScript() const { return testBit(InScript); }
-        void setInScript(bool v) { setBit(InScript, v); }
-        bool inStyle() const { return testBit(InStyle); }
-        void setInStyle(bool v) { setBit(InStyle, v); }
-        bool inXmp() const { return testBit(InXmp); }
-        void setInXmp(bool v) { setBit(InXmp, v); }
-        bool inTitle() const { return testBit(InTitle); }
-        void setInTitle(bool v) { setBit(InTitle, v); }
-        bool inIFrame() const { return testBit(InIFrame); }
-        void setInIFrame(bool v) { setBit(InIFrame, v); }
-        bool inPlainText() const { return testBit(InPlainText); }
-        void setInPlainText(bool v) { setBit(InPlainText, v); }
-        bool inProcessingInstruction() const { return testBit(InProcessingInstruction); }
-        void setInProcessingInstruction(bool v) { return setBit(InProcessingInstruction, v); }
-        bool inComment() const { return testBit(InComment); }
-        void setInComment(bool v) { setBit(InComment, v); }
-        bool inDoctype() const { return testBit(InDoctype); }
-        void setInDoctype(bool v) { setBit(InDoctype, v); }
-        bool inTextArea() const { return testBit(InTextArea); }
-        void setInTextArea(bool v) { setBit(InTextArea, v); }
-        bool escaped() const { return testBit(Escaped); }
-        void setEscaped(bool v) { setBit(Escaped, v); }
-        bool inServer() const { return testBit(InServer); }
-        void setInServer(bool v) { setBit(InServer, v); }
-        bool skipLF() const { return testBit(SkipLF); }
-        void setSkipLF(bool v) { setBit(SkipLF, v); }
-        bool startTag() const { return testBit(StartTag); }
-        void setStartTag(bool v) { setBit(StartTag, v); }
-        bool discardLF() const { return testBit(DiscardLF); }
-        void setDiscardLF(bool v) { setBit(DiscardLF, v); }
-        bool allowYield() const { return testBit(AllowYield); }
-        void setAllowYield(bool v) { setBit(AllowYield, v); }
-        bool loadingExtScript() const { return testBit(LoadingExtScript); }
-        void setLoadingExtScript(bool v) { setBit(LoadingExtScript, v); }
-        bool forceSynchronous() const { return testBit(ForceSynchronous); }
-        void setForceSynchronous(bool v) { setBit(ForceSynchronous, v); }
-
-        bool inAnyNonHTMLText() const { return m_bits & (InScript | InStyle | InXmp | InTextArea | InTitle | InIFrame); }
-        bool hasTagState() const { return m_bits & TagMask; }
-        bool hasEntityState() const { return m_bits & EntityMask; }
-
-        bool needsSpecialWriteHandling() const { return m_bits & (InScript | InStyle | InXmp | InTextArea | InTitle | InIFrame | TagMask | EntityMask | InPlainText | InComment | InDoctype | InServer | InProcessingInstruction | StartTag); }
-
-    private:
-        static const int EntityShift = 4;
-        enum StateBits {
-            TagMask = (1 << 4) - 1,
-            EntityMask = (1 << 7) - (1 << 4),
-            InScript = 1 << 7,
-            InStyle = 1 << 8,
-            // Bit 9 unused
-            InXmp = 1 << 10,
-            InTitle = 1 << 11,
-            InPlainText = 1 << 12,
-            InProcessingInstruction = 1 << 13,
-            InComment = 1 << 14,
-            InTextArea = 1 << 15,
-            Escaped = 1 << 16,
-            InServer = 1 << 17,
-            SkipLF = 1 << 18,
-            StartTag = 1 << 19,
-            DiscardLF = 1 << 20, // FIXME: should clarify difference between skip and discard
-            AllowYield = 1 << 21,
-            LoadingExtScript = 1 << 22,
-            ForceSynchronous = 1 << 23,
-            InIFrame = 1 << 24,
-            InDoctype = 1 << 25
-        };
-
-        void setBit(StateBits bit, bool value)
-        {
-            if (value)
-                m_bits |= bit;
-            else
-                m_bits &= ~bit;
-        }
-        bool testBit(StateBits bit) const { return m_bits & bit; }
-
-        unsigned m_bits;
+    // DocumentParser
+    virtual void begin();
+    virtual void write(const SegmentedString&, bool isFromNetwork);
+    virtual void finish();
+    virtual bool finishWasCalled();
+    virtual bool processingData() const;
+    virtual void stopParsing();
+    virtual bool isWaitingForScripts() const;
+    virtual bool isExecutingScript() const;
+    virtual void executeScriptsWaitingForStylesheets();
+    virtual int lineNumber() const;
+    virtual int columnNumber() const;
+    // FIXME: HTMLFormControlElement accesses the LegacyHTMLTreeBuilder via this method.
+    // Remove this when the LegacyHTMLTreeBuilder is no longer used.
+    virtual LegacyHTMLTreeBuilder* htmlTreeBuilder() const;
+
+    // HTMLScriptRunnerHost
+    virtual void watchForLoad(CachedResource*);
+    virtual void stopWatchingForLoad(CachedResource*);
+    virtual bool shouldLoadExternalScriptFromSrc(const AtomicString&);
+    virtual HTMLInputStream& inputStream() { return m_input; }
+
+    // CachedResourceClient
+    virtual void notifyFinished(CachedResource*);
+
+    void willPumpLexer();
+    void didPumpLexer();
+
+    enum SynchronousMode {
+        AllowYield,
+        ForceSynchronous,
     };
+    void pumpTokenizer(SynchronousMode);
+    void pumpTokenizerIfPossible(SynchronousMode);
 
-    State m_state;
-
-    DoctypeToken m_doctypeToken;
-    int m_doctypeSearchCount;
-    int m_doctypeSecondarySearchCount;
-
-    bool m_brokenServer;
-
-    // Name of an attribute that we just scanned.
-    AtomicString m_attrName;
+    bool runScriptsForPausedTreeBuilder();
+    void resumeParsingAfterScriptExecution();
 
-    // Used to store the code of a scripting sequence
-    UChar* m_scriptCode;
-    // Size of the script sequenze stored in @ref #scriptCode
-    int m_scriptCodeSize;
-    // Maximal size that can be stored in @ref #scriptCode
-    int m_scriptCodeCapacity;
-    // resync point of script code size
-    int m_scriptCodeResync;
-
-    // Stores characters if we are scanning for a string like "</script>"
-    UChar searchBuffer[10];
-
-    // Counts where we are in the string we are scanning for
-    int searchCount;
-    // the stopper string
-    const char* m_searchStopper;
-    int m_searchStopperLength;
-
-    // if no more data is coming, just parse what we have (including ext scripts that
-    // may be still downloading) and finish
-    bool m_noMoreData;
-    // URL to get source code of script from
-    String m_scriptTagSrcAttrValue;
-    String m_scriptTagCharsetAttrValue;
-    // the HTML code we will parse after the external script we are waiting for has loaded
-    SegmentedString m_pendingSrc;
-
-    // the HTML code we will parse after this particular script has
-    // loaded, but before all pending HTML
-    SegmentedString* m_currentPrependingSrc;
-
-    // true if we are executing a script while parsing a document. This causes the parsing of
-    // the output of the script to be postponed until after the script has finished executing
-    int m_executingScript;
-    Deque<CachedResourceHandle<CachedScript> > m_pendingScripts;
-    RefPtr<HTMLScriptElement> m_scriptNode;
-
-    bool m_requestingScript;
-    bool m_hasScriptsWaitingForStylesheets;
-
-    // if we found one broken comment, there are most likely others as well
-    // store a flag to get rid of the O(n^2) behaviour in such a case.
-    bool m_brokenComments;
-    // current line number
-    int m_lineNumber;
-    int m_currentScriptTagStartLineNumber;
-    int m_currentTagStartLineNumber;
+    void attemptToEnd();
+    void endIfDelayed();
+    void end();
 
-    double m_tokenizerTimeDelay;
-    int m_tokenizerChunkSize;
+    bool isScheduledForResume() const;
+    bool inScriptExecution() const;
+    bool inWrite() const { return m_writeNestingLevel > 0; }
 
-    // The timer for continued processing.
-    Timer<HTMLDocumentParser> m_timer;
+    ScriptController* script() const;
 
-    // The timer for continued executing external scripts.
-    Timer<HTMLDocumentParser> m_externalScriptsTimer;
+    HTMLInputStream m_input;
 
-// This buffer can hold arbitrarily long user-defined attribute names, such as in EMBED tags.
-// So any fixed number might be too small, but rather than rewriting all usage of this buffer
-// we'll just make it large enough to handle all imaginable cases.
-#define CBUFLEN 1024
-    UChar m_cBuffer[CBUFLEN + 2];
-    unsigned int m_cBufferPos;
+    // We hold m_token here because it might be partially complete.
+    HTMLToken m_token;
 
-    SegmentedString m_src;
-    Document* m_doc;
-    OwnPtr<LegacyHTMLTreeConstructor> m_treeConstructor;
-    bool m_inWrite;
-    bool m_fragment;
-    FragmentScriptingPermission m_scriptingPermission;
+    OwnPtr<HTMLTokenizer> m_tokenizer;
+    OwnPtr<HTMLScriptRunner> m_scriptRunner;
+    OwnPtr<HTMLTreeBuilder> m_treeBuilder;
+    OwnPtr<HTMLPreloadScanner> m_preloadScanner;
+    OwnPtr<HTMLParserScheduler> m_parserScheduler;
 
-    OwnPtr<PreloadScanner> m_preloadScanner;
+    bool m_endWasDelayed;
+    int m_writeNestingLevel;
 };
 
-void parseHTMLDocumentFragment(const String&, DocumentFragment*, FragmentScriptingPermission = FragmentScriptingAllowed);
-
-UChar decodeNamedEntity(const char*);
-
-} // namespace WebCore
+}
 
-#endif // HTMLTokenizer_h
+#endif
author	Kristian Monsen <kristianm@google.com>	2010-06-28 16:42:48 +0100
committer	Kristian Monsen <kristianm@google.com>	2010-07-02 10:29:56 +0100
commit	06ea8e899e48f1f2f396b70e63fae369f2f23232 (patch)
tree	20c1428cd05c76f32394ab354ea35ed99acd86d8 /WebCore/html/HTMLDocumentParser.h
parent	72aad67af14193199e29cdd5c4ddc095a8b9a8a8 (diff)
download	external_webkit-06ea8e899e48f1f2f396b70e63fae369f2f23232.zip external_webkit-06ea8e899e48f1f2f396b70e63fae369f2f23232.tar.gz external_webkit-06ea8e899e48f1f2f396b70e63fae369f2f23232.tar.bz2