summaryrefslogtreecommitdiffstats
path: root/WebCore/html/HTML5DocumentParser.h
diff options
context:
space:
mode:
Diffstat (limited to 'WebCore/html/HTML5DocumentParser.h')
-rw-r--r--WebCore/html/HTML5DocumentParser.h192
1 files changed, 192 insertions, 0 deletions
diff --git a/WebCore/html/HTML5DocumentParser.h b/WebCore/html/HTML5DocumentParser.h
new file mode 100644
index 0000000..4a75cec
--- /dev/null
+++ b/WebCore/html/HTML5DocumentParser.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTML5Tokenizer_h
+#define HTML5Tokenizer_h
+
+#include "CachedResourceClient.h"
+#include "HTML5ScriptRunnerHost.h"
+#include "HTML5Token.h"
+#include "SegmentedString.h"
+#include "DocumentParser.h"
+#include <wtf/OwnPtr.h>
+
+namespace WebCore {
+
+class HTMLDocument;
+class LegacyHTMLTreeConstructor;
+class HTML5Lexer;
+class HTML5ScriptRunner;
+class HTML5TreeBuilder;
+class ScriptController;
+class ScriptSourceCode;
+
+class HTML5DocumentParser : public DocumentParser, HTML5ScriptRunnerHost, CachedResourceClient {
+public:
+ HTML5DocumentParser(HTMLDocument*, bool reportErrors);
+ virtual ~HTML5DocumentParser();
+
+ // DocumentParser
+ virtual void begin();
+ virtual void write(const SegmentedString&, bool appendData);
+ virtual void end();
+ virtual void finish();
+ virtual int executingScript() const;
+ virtual bool isWaitingForScripts() const;
+ virtual void executeScriptsWaitingForStylesheets();
+ virtual int lineNumber() const;
+ virtual int columnNumber() const;
+ // FIXME: HTMLFormControlElement accesses the LegacyHTMLTreeConstructor via this method.
+ // Remove this when the LegacyHTMLTreeConstructor is no longer used.
+ virtual LegacyHTMLTreeConstructor* htmlTreeConstructor() const;
+
+ // HTML5ScriptRunnerHost
+ virtual void watchForLoad(CachedResource*);
+ virtual void stopWatchingForLoad(CachedResource*);
+ virtual bool shouldLoadExternalScriptFromSrc(const AtomicString&);
+ virtual void executeScript(const ScriptSourceCode&);
+
+ // CachedResourceClient
+ virtual void notifyFinished(CachedResource*);
+
+private:
+ // The InputStream is made up of a sequence of SegmentedStrings:
+ //
+ // [--current--][--next--][--next--] ... [--next--]
+ // /\ (also called m_last)
+ // L_ current insertion point
+ //
+ // The current segmented string is stored in InputStream. Each of the
+ // afterInsertionPoint buffers are stored in InsertionPointRecords on the
+ // stack.
+ //
+ // We remove characters from the "current" string in the InputStream.
+ // document.write() will add characters at the current insertion point,
+ // which appends them to the "current" string.
+ //
+ // m_last is a pointer to the last of the afterInsertionPoint strings.
+ // The network adds data at the end of the InputStream, which appends
+ // them to the "last" string.
+ class InputStream {
+ public:
+ InputStream()
+ : m_last(&m_first)
+ {
+ }
+
+ void appendToEnd(const SegmentedString& string)
+ {
+ m_last->append(string);
+ }
+
+ void insertAtCurrentInsertionPoint(const SegmentedString& string)
+ {
+ m_first.append(string);
+ }
+
+ void close() { m_last->close(); }
+
+ SegmentedString& current() { return m_first; }
+
+ void splitInto(SegmentedString& next)
+ {
+ next = m_first;
+ m_first = SegmentedString();
+ if (m_last == &m_first) {
+ // We used to only have one SegmentedString in the InputStream
+ // but now we have two. That means m_first is no longer also
+ // the m_last string, |next| is now the last one.
+ m_last = &next;
+ }
+ }
+
+ void mergeFrom(SegmentedString& next)
+ {
+ m_first.append(next);
+ if (m_last == &next) {
+ // The string |next| used to be the last SegmentedString in
+ // the InputStream. Now that it's been merged into m_first,
+ // that makes m_first the last one.
+ m_last = &m_first;
+ }
+ if (next.isClosed()) {
+ // We also need to merge the "closed" state from next to
+ // m_first. Arguably, this work could be done in append().
+ m_first.close();
+ }
+ }
+
+ private:
+ SegmentedString m_first;
+ SegmentedString* m_last;
+ };
+
+ class InsertionPointRecord {
+ public:
+ InsertionPointRecord(InputStream& inputStream)
+ : m_inputStream(&inputStream)
+ {
+ m_inputStream->splitInto(m_next);
+ }
+
+ ~InsertionPointRecord()
+ {
+ m_inputStream->mergeFrom(m_next);
+ }
+
+ private:
+ InputStream* m_inputStream;
+ SegmentedString m_next;
+ };
+
+ void willPumpLexer();
+ void didPumpLexer();
+
+ void pumpLexer();
+ void pumpLexerIfPossible();
+ void resumeParsingAfterScriptExecution();
+
+ void attemptToEnd();
+ void endIfDelayed();
+ bool inWrite() const { return m_writeNestingLevel > 0; }
+
+ ScriptController* script() const;
+
+ InputStream m_input;
+
+ // We hold m_token here because it might be partially complete.
+ HTML5Token m_token;
+
+ HTMLDocument* m_document;
+ OwnPtr<HTML5Lexer> m_lexer;
+ OwnPtr<HTML5ScriptRunner> m_scriptRunner;
+ OwnPtr<HTML5TreeBuilder> m_treeConstructor;
+ bool m_endWasDelayed;
+ int m_writeNestingLevel;
+};
+
+}
+
+#endif