/* * Copyright (C) 2010 Google, Inc. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef HTMLToken_h #define HTMLToken_h #include "NamedNodeMap.h" #include #include namespace WebCore { class HTMLToken : public Noncopyable { public: enum Type { Uninitialized, DOCTYPE, StartTag, EndTag, Comment, Character, EndOfFile, }; class Attribute { public: WTF::Vector m_name; WTF::Vector m_value; }; typedef WTF::Vector AttributeList; typedef WTF::Vector DataVector; HTMLToken() { clear(); } void clear() { m_type = Uninitialized; } void beginStartTag(UChar character) { ASSERT(m_type == Uninitialized); m_type = StartTag; m_data.clear(); m_selfClosing = false; m_currentAttribute = 0; m_attributes.clear(); m_data.append(character); } template void beginEndTag(T characters) { ASSERT(m_type == Uninitialized); m_type = EndTag; m_data.clear(); m_selfClosing = false; m_currentAttribute = 0; m_attributes.clear(); m_data.append(characters); } void beginCharacter(UChar character) { ASSERT(m_type == Uninitialized); m_type = Character; m_data.clear(); m_data.append(character); } void beginComment() { ASSERT(m_type == Uninitialized); m_type = Comment; m_data.clear(); } void beginDOCTYPE() { ASSERT(m_type == Uninitialized); m_type = DOCTYPE; m_data.clear(); m_doctypeData.set(new DoctypeData()); } void beginDOCTYPE(UChar character) { beginDOCTYPE(); m_data.append(character); } void appendToName(UChar character) { ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE); m_data.append(character); } template void appendToCharacter(T characters) { ASSERT(m_type == Character); m_data.append(characters); } void appendToComment(UChar character) { ASSERT(m_type == Comment); m_data.append(character); } void addNewAttribute() { ASSERT(m_type == StartTag || m_type == EndTag); m_attributes.grow(m_attributes.size() + 1); m_currentAttribute = &m_attributes.last(); } void appendToAttributeName(UChar character) { ASSERT(m_type == StartTag || m_type == EndTag); m_currentAttribute->m_name.append(character); } void appendToAttributeValue(UChar character) { ASSERT(m_type == StartTag || m_type == EndTag); m_currentAttribute->m_value.append(character); } Type type() const { return m_type; } bool selfClosing() const { ASSERT(m_type == StartTag || m_type == EndTag); return m_selfClosing; } const AttributeList& attributes() const { ASSERT(m_type == StartTag || m_type == EndTag); return m_attributes; } const DataVector& name() const { ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE); return m_data; } const DataVector& characters() const { ASSERT(m_type == Character); return m_data; } const DataVector& comment() const { ASSERT(m_type == Comment); return m_data; } // FIXME: Distinguish between a missing public identifer and an empty one. const WTF::Vector& publicIdentifier() const { ASSERT(m_type == DOCTYPE); return m_doctypeData->m_publicIdentifier; } // FIXME: Distinguish between a missing system identifer and an empty one. const WTF::Vector& systemIdentifier() const { ASSERT(m_type == DOCTYPE); return m_doctypeData->m_systemIdentifier; } void setPublicIdentifierToEmptyString() { ASSERT(m_type == DOCTYPE); m_doctypeData->m_hasPublicIdentifier = true; m_doctypeData->m_publicIdentifier.clear(); } void setSystemIdentifierToEmptyString() { ASSERT(m_type == DOCTYPE); m_doctypeData->m_hasSystemIdentifier = true; m_doctypeData->m_systemIdentifier.clear(); } bool forceQuirks() const { ASSERT(m_type == DOCTYPE); return m_doctypeData->m_forceQuirks; } void setForceQuirks() { ASSERT(m_type == DOCTYPE); m_doctypeData->m_forceQuirks = true; } void appendToPublicIdentifier(UChar character) { ASSERT(m_type == DOCTYPE); ASSERT(m_doctypeData->m_hasPublicIdentifier); m_doctypeData->m_publicIdentifier.append(character); } void appendToSystemIdentifier(UChar character) { ASSERT(m_type == DOCTYPE); ASSERT(m_doctypeData->m_hasSystemIdentifier); m_doctypeData->m_systemIdentifier.append(character); } private: // FIXME: I'm not sure what the final relationship between HTMLToken and // AtomicHTMLToken will be. I'm marking this a friend for now, but we'll // want to end up with a cleaner interface between the two classes. friend class AtomicHTMLToken; class DoctypeData { public: DoctypeData() : m_hasPublicIdentifier(false) , m_hasSystemIdentifier(false) , m_forceQuirks(false) { } bool m_hasPublicIdentifier; bool m_hasSystemIdentifier; bool m_forceQuirks; WTF::Vector m_publicIdentifier; WTF::Vector m_systemIdentifier; }; Type m_type; // "name" for DOCTYPE, StartTag, and EndTag // "characters" for Character // "data" for Comment DataVector m_data; // For DOCTYPE OwnPtr m_doctypeData; // For StartTag and EndTag bool m_selfClosing; AttributeList m_attributes; // Old tokenizer reserves 10. // A pointer into m_attributes used during lexing. Attribute* m_currentAttribute; }; // FIXME: This class should eventually be named HTMLToken once we move the // exiting HTMLToken to be internal to the HTMLTokenizer. class AtomicHTMLToken : public Noncopyable { public: AtomicHTMLToken(HTMLToken& token) : m_type(token.type()) { switch (m_type) { case HTMLToken::Uninitialized: ASSERT_NOT_REACHED(); break; case HTMLToken::DOCTYPE: m_name = AtomicString(token.name().data(), token.name().size()); m_doctypeData.set(token.m_doctypeData.release()); break; case HTMLToken::EndOfFile: break; case HTMLToken::StartTag: case HTMLToken::EndTag: { m_selfClosing = token.selfClosing(); m_name = AtomicString(token.name().data(), token.name().size()); const HTMLToken::AttributeList& attributes = token.attributes(); for (HTMLToken::AttributeList::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter) { if (!iter->m_name.isEmpty()) { String name(iter->m_name.data(), iter->m_name.size()); String value(iter->m_value.data(), iter->m_value.size()); RefPtr mappedAttribute = Attribute::createMapped(name, value); if (!m_attributes) m_attributes = NamedNodeMap::create(); m_attributes->insertAttribute(mappedAttribute.release(), false); } } break; } case HTMLToken::Comment: m_data = String(token.comment().data(), token.comment().size()); break; case HTMLToken::Character: m_data = String(token.characters().data(), token.characters().size()); break; } } AtomicHTMLToken(HTMLToken::Type type, AtomicString name) : m_type(type) , m_name(name) { } HTMLToken::Type type() const { return m_type; } const AtomicString& name() const { ASSERT(m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag || m_type == HTMLToken::DOCTYPE); return m_name; } bool selfClosing() const { ASSERT(m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag); return m_selfClosing; } NamedNodeMap* attributes() const { ASSERT(m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag); return m_attributes.get(); } const String& characters() const { ASSERT(m_type == HTMLToken::Character); return m_data; } const String& comment() const { ASSERT(m_type == HTMLToken::Comment); return m_data; } // FIXME: Distinguish between a missing public identifer and an empty one. const WTF::Vector& publicIdentifier() const { ASSERT(m_type == HTMLToken::DOCTYPE); return m_doctypeData->m_publicIdentifier; } // FIXME: Distinguish between a missing system identifer and an empty one. const WTF::Vector& systemIdentifier() const { ASSERT(m_type == HTMLToken::DOCTYPE); return m_doctypeData->m_systemIdentifier; } bool forceQuirks() const { ASSERT(m_type == HTMLToken::DOCTYPE); return m_doctypeData->m_forceQuirks; } private: HTMLToken::Type m_type; // "name" for DOCTYPE, StartTag, and EndTag AtomicString m_name; // "characters" for Character // "data" for Comment String m_data; // For DOCTYPE OwnPtr m_doctypeData; // For StartTag and EndTag bool m_selfClosing; RefPtr m_attributes; }; } #endif