/* Copyright (C) 1997 Martin Jones (mjones@kde.org) (C) 1997 Torben Weis (weis@kde.org) (C) 1998 Waldo Bastian (bastian@kde.org) (C) 2001 Dirk Mueller (mueller@kde.org) Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef LegacyHTMLDocumentParser_h #define LegacyHTMLDocumentParser_h #include "CachedResourceClient.h" #include "CachedResourceHandle.h" #include "FragmentScriptingPermission.h" #include "NamedNodeMap.h" #include "SegmentedString.h" #include "Timer.h" #include "DocumentParser.h" #include #include #include namespace WebCore { class CachedScript; class DocumentFragment; class Document; class HTMLDocument; class HTMLScriptElement; class HTMLViewSourceDocument; class FrameView; class LegacyHTMLTreeBuilder; class Node; class LegacyPreloadScanner; class ScriptSourceCode; /** * @internal * represents one HTML tag. Consists of a numerical id, and the list * of attributes. Can also represent text. In this case the id = 0 and * text contains the text. */ struct Token { Token() : beginTag(true) , selfClosingTag(false) , brokenXMLStyle(false) , m_sourceInfo(0) { } ~Token() { } void addAttribute(AtomicString& attrName, const AtomicString& v, bool viewSourceMode); bool isOpenTag(const QualifiedName& fullName) const { return beginTag && fullName.localName() == tagName; } bool isCloseTag(const QualifiedName& fullName) const { return !beginTag && fullName.localName() == tagName; } void reset() { attrs = 0; text = 0; tagName = nullAtom; beginTag = true; selfClosingTag = false; brokenXMLStyle = false; if (m_sourceInfo) m_sourceInfo->clear(); } void addViewSourceChar(UChar c) { if (!m_sourceInfo.get()) m_sourceInfo.set(new Vector); m_sourceInfo->append(c); } RefPtr attrs; RefPtr text; AtomicString tagName; bool beginTag; bool selfClosingTag; bool brokenXMLStyle; OwnPtr > m_sourceInfo; }; enum DoctypeState { DoctypeBegin, DoctypeBeforeName, DoctypeName, DoctypeAfterName, DoctypeBeforePublicID, DoctypePublicID, DoctypeAfterPublicID, DoctypeBeforeSystemID, DoctypeSystemID, DoctypeAfterSystemID, DoctypeBogus }; class DoctypeToken { public: DoctypeToken() {} void reset() { m_name.clear(); m_publicID.clear(); m_systemID.clear(); m_state = DoctypeBegin; m_source.clear(); m_forceQuirks = false; } DoctypeState state() { return m_state; } void setState(DoctypeState s) { m_state = s; } Vector m_name; Vector m_publicID; Vector m_systemID; DoctypeState m_state; Vector m_source; bool m_forceQuirks; // Used by the HTML5 parser. }; //----------------------------------------------------------------------------- // FIXME: This class does too much. Right now it is both an HTML tokenizer as well // as handling all of the non-tokenizer-specific junk related to tokenizing HTML // (like dealing with " UChar searchBuffer[10]; // Counts where we are in the string we are scanning for int searchCount; // the stopper string const char* m_searchStopper; int m_searchStopperLength; // if no more data is coming, just parse what we have (including ext scripts that // may be still downloading) and finish bool m_noMoreData; // URL to get source code of script from String m_scriptTagSrcAttrValue; String m_scriptTagCharsetAttrValue; // the HTML code we will parse after the external script we are waiting for has loaded SegmentedString m_pendingSrc; // the HTML code we will parse after this particular script has // loaded, but before all pending HTML SegmentedString* m_currentPrependingSrc; // true if we are executing a script while parsing a document. This causes the parsing of // the output of the script to be postponed until after the script has finished executing int m_executingScript; Deque > m_pendingScripts; RefPtr m_scriptNode; bool m_requestingScript; bool m_hasScriptsWaitingForStylesheets; // if we found one broken comment, there are most likely others as well // store a flag to get rid of the O(n^2) behaviour in such a case. bool m_brokenComments; // current line number int m_lineNumber; int m_currentScriptTagStartLineNumber; int m_currentTagStartLineNumber; double m_tokenizerTimeDelay; int m_tokenizerChunkSize; // The timer for continued processing. Timer m_timer; // The timer for continued executing external scripts. Timer m_externalScriptsTimer; // This buffer can hold arbitrarily long user-defined attribute names, such as in EMBED tags. // So any fixed number might be too small, but rather than rewriting all usage of this buffer // we'll just make it large enough to handle all imaginable cases. #define CBUFLEN 1024 UChar m_cBuffer[CBUFLEN + 2]; unsigned int m_cBufferPos; SegmentedString m_src; OwnPtr m_treeBuilder; bool m_inWrite; bool m_fragment; FragmentScriptingPermission m_scriptingPermission; OwnPtr m_preloadScanner; }; UChar decodeNamedEntity(const char*); } // namespace WebCore #endif // LegacyHTMLDocumentParser_h