diff options
Diffstat (limited to 'WebCore/html/HTMLParser.h')
-rw-r--r-- | WebCore/html/HTMLParser.h | 184 |
1 files changed, 184 insertions, 0 deletions
diff --git a/WebCore/html/HTMLParser.h b/WebCore/html/HTMLParser.h new file mode 100644 index 0000000..3a5b437 --- /dev/null +++ b/WebCore/html/HTMLParser.h @@ -0,0 +1,184 @@ +/* + Copyright (C) 1997 Martin Jones (mjones@kde.org) + (C) 1997 Torben Weis (weis@kde.org) + (C) 1998 Waldo Bastian (bastian@kde.org) + (C) 1999 Lars Knoll (knoll@kde.org) + Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. +*/ + +#ifndef HTMLParser_h +#define HTMLParser_h + +#include "QualifiedName.h" +#include <wtf/Forward.h> +#include <wtf/RefPtr.h> +#include "HTMLParserErrorCodes.h" + +namespace WebCore { + +class DoctypeToken; +class Document; +class DocumentFragment; +class HTMLDocument; +class HTMLFormElement; +class HTMLHeadElement; +class HTMLMapElement; +class Node; +class Token; + +struct HTMLStackElem; + +/** + * The parser for HTML. It receives a stream of tokens from the HTMLTokenizer, and + * builds up the Document structure from it. + */ +class HTMLParser : Noncopyable { +public: + HTMLParser(HTMLDocument*, bool reportErrors); + HTMLParser(DocumentFragment*); + virtual ~HTMLParser(); + + /** + * parses one token delivered by the tokenizer + */ + PassRefPtr<Node> parseToken(Token*); + + // Parses a doctype token. + void parseDoctypeToken(DoctypeToken*); + + /** + * tokenizer says it's not going to be sending us any more tokens + */ + void finished(); + + /** + * resets the parser + */ + void reset(); + + bool skipMode() const { return !m_skipModeTag.isNull(); } + bool isHandlingResidualStyleAcrossBlocks() const { return m_handlingResidualStyleAcrossBlocks; } + +private: + void setCurrent(Node*); + void derefCurrent(); + void setSkipMode(const QualifiedName& qName) { m_skipModeTag = qName.localName(); } + + PassRefPtr<Node> getNode(Token*); + bool bodyCreateErrorCheck(Token*, RefPtr<Node>&); + bool canvasCreateErrorCheck(Token*, RefPtr<Node>&); + bool commentCreateErrorCheck(Token*, RefPtr<Node>&); + bool ddCreateErrorCheck(Token*, RefPtr<Node>&); + bool dtCreateErrorCheck(Token*, RefPtr<Node>&); + bool formCreateErrorCheck(Token*, RefPtr<Node>&); + bool framesetCreateErrorCheck(Token*, RefPtr<Node>&); + bool headCreateErrorCheck(Token*, RefPtr<Node>&); + bool iframeCreateErrorCheck(Token*, RefPtr<Node>&); + bool isindexCreateErrorCheck(Token*, RefPtr<Node>&); + bool mapCreateErrorCheck(Token*, RefPtr<Node>&); + bool nestedCreateErrorCheck(Token*, RefPtr<Node>&); + bool nestedPCloserCreateErrorCheck(Token*, RefPtr<Node>&); + bool nestedStyleCreateErrorCheck(Token*, RefPtr<Node>&); + bool noembedCreateErrorCheck(Token*, RefPtr<Node>&); + bool noframesCreateErrorCheck(Token*, RefPtr<Node>&); + bool nolayerCreateErrorCheck(Token*, RefPtr<Node>&); + bool noscriptCreateErrorCheck(Token*, RefPtr<Node>&); + bool pCloserCreateErrorCheck(Token*, RefPtr<Node>&); + bool pCloserStrictCreateErrorCheck(Token*, RefPtr<Node>&); + bool selectCreateErrorCheck(Token*, RefPtr<Node>&); + bool tableCellCreateErrorCheck(Token*, RefPtr<Node>&); + bool tableSectionCreateErrorCheck(Token*, RefPtr<Node>&); + bool textCreateErrorCheck(Token*, RefPtr<Node>&); + + void processCloseTag(Token*); + + bool insertNode(Node*, bool flat = false); + bool handleError(Node*, bool flat, const AtomicString& localName, int tagPriority); + + void pushBlock(const AtomicString& tagName, int level); + void popBlock(const AtomicString& tagName, bool reportErrors = false); + void popBlock(const QualifiedName& qName, bool reportErrors = false) { return popBlock(qName.localName(), reportErrors); } // Convenience function for readability. + void popOneBlock(); + void moveOneBlockToStack(HTMLStackElem*& head); + inline HTMLStackElem* popOneBlockCommon(); + void popInlineBlocks(); + + void freeBlock(); + + void createHead(); + + static bool isResidualStyleTag(const AtomicString& tagName); + static bool isAffectedByResidualStyle(const AtomicString& tagName); + void handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem*); + void reopenResidualStyleTags(HTMLStackElem*, Node* malformedTableParent); + + bool allowNestedRedundantTag(const AtomicString& tagName); + + static bool isHeaderTag(const AtomicString& tagName); + void popNestedHeaderTag(); + + bool isInline(Node*) const; + + void startBody(); // inserts the isindex element + PassRefPtr<Node> handleIsindex(Token*); + + void checkIfHasPElementInScope(); + bool hasPElementInScope() + { + if (m_hasPElementInScope == Unknown) + checkIfHasPElementInScope(); + return m_hasPElementInScope == InScope; + } + + void reportError(HTMLParserErrorCode errorCode, const AtomicString* tagName1 = 0, const AtomicString* tagName2 = 0, bool closeTags = false) + { if (!m_reportErrors) return; reportErrorToConsole(errorCode, tagName1, tagName2, closeTags); } + + void reportErrorToConsole(HTMLParserErrorCode, const AtomicString* tagName1, const AtomicString* tagName2, bool closeTags); + + Document* document; + + // The currently active element (the one new elements will be added to). Can be a document fragment, a document or an element. + Node* current; + // We can't ref a document, but we don't want to constantly check if a node is a document just to decide whether to deref. + bool didRefCurrent; + + HTMLStackElem* blockStack; + + enum ElementInScopeState { NotInScope, InScope, Unknown }; + ElementInScopeState m_hasPElementInScope; + + RefPtr<HTMLFormElement> m_currentFormElement; // currently active form + RefPtr<HTMLMapElement> m_currentMapElement; // current map + HTMLHeadElement* head; // head element; needed for HTML which defines <base> after </head> + RefPtr<Node> m_isindexElement; // a possible <isindex> element in the head + + bool inBody; + bool haveContent; + bool haveFrameSet; + + AtomicString m_skipModeTag; // tells the parser to discard all tags until it reaches the one specified + + bool m_isParsingFragment; + bool m_reportErrors; + bool m_handlingResidualStyleAcrossBlocks; + int inStrayTableContent; +}; + +} + +#endif // HTMLParser_h |