1 files changed, 216 insertions, 38 deletions
diff --git a/WebCore/html/HTMLDocument.cpp b/WebCore/html/HTMLDocument.cpp
index eb9eb4d..8db8422 100644
--- a/WebCore/html/HTMLDocument.cpp
+++ b/WebCore/html/HTMLDocument.cpp
@@ -74,6 +74,11 @@
 
 #include "DocTypeStrings.cpp"
 
+#ifdef ANDROID_META_SUPPORT
+#include "FrameTree.h"
+#include "Settings.h"
+#endif
+
 namespace WebCore {
 
 using namespace HTMLNames;
@@ -82,7 +87,6 @@ HTMLDocument::HTMLDocument(DOMImplementation* implementation, Frame* frame)
     : Document(implementation, frame)
 {
     clearXMLVersion();
-    setParseMode(Compat);
 }
 
 HTMLDocument::~HTMLDocument()
@@ -274,7 +278,7 @@ Tokenizer *HTMLDocument::createTokenizer()
 
 bool HTMLDocument::childAllowed(Node *newChild)
 {
-    return newChild->hasTagName(htmlTag) || newChild->isCommentNode() || (newChild->nodeType() == DOCUMENT_TYPE_NODE && !doctype());
+    return newChild->hasTagName(htmlTag) || newChild->isCommentNode();
 }
 
 PassRefPtr<Element> HTMLDocument::createElement(const String &name, ExceptionCode& ec)
@@ -347,62 +351,236 @@ bool HTMLDocument::hasDocExtraNamedItem(const String& name)
     return docExtraNamedItemCounts.get(name.impl()) != 0;
 }
 
-void HTMLDocument::determineParseMode()
+const int PARSEMODE_HAVE_DOCTYPE        =       (1<<0);
+const int PARSEMODE_HAVE_PUBLIC_ID      =       (1<<1);
+const int PARSEMODE_HAVE_SYSTEM_ID      =       (1<<2);
+const int PARSEMODE_HAVE_INTERNAL       =       (1<<3);
+
+static int parseDocTypePart(const String& buffer, int index)
+{
+    while (true) {
+        UChar ch = buffer[index];
+        if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r')
+            ++index;
+        else if (ch == '-') {
+            int tmpIndex=index;
+            if (buffer[index+1] == '-' &&
+                ((tmpIndex=buffer.find("--", index+2)) != -1))
+                index = tmpIndex+2;
+            else
+                return index;
+        }
+        else
+            return index;
+    }
+}
+
+static bool containsString(const char* str, const String& buffer, int offset)
+{
+    String startString(str);
+    if (offset + startString.length() > buffer.length())
+        return false;
+    
+    String bufferString = buffer.substring(offset, startString.length()).lower();
+    String lowerStart = startString.lower();
+
+    return bufferString.startsWith(lowerStart);
+}
+
+static bool parseDocTypeDeclaration(const String& buffer,
+                                    int* resultFlags,
+                                    String& name,
+                                    String& publicID,
+                                    String& systemID)
 {
-    // FIXME: It's terrible that this code runs separately and isn't just built in to the
-    // HTML tokenizer/parser.
+    bool haveDocType = false;
+    *resultFlags = 0;
+
+    // Skip through any comments and processing instructions.
+    int index = 0;
+    do {
+        index = buffer.find('<', index);
+        if (index == -1) break;
+        UChar nextChar = buffer[index+1];
+        if (nextChar == '!') {
+            if (containsString("doctype", buffer, index+2)) {
+                haveDocType = true;
+                index += 9; // Skip "<!DOCTYPE"
+                break;
+            }
+            index = parseDocTypePart(buffer,index);
+            index = buffer.find('>', index);
+        }
+        else if (nextChar == '?')
+            index = buffer.find('>', index);
+        else
+            break;
+    } while (index != -1);
+
+    if (!haveDocType)
+        return true;
+    *resultFlags |= PARSEMODE_HAVE_DOCTYPE;
+
+    index = parseDocTypePart(buffer, index);
+    if (!containsString("html", buffer, index))
+        return false;
+    
+    name = buffer.substring(index, 4);
+    index = parseDocTypePart(buffer, index+4);
+    bool hasPublic = containsString("public", buffer, index);
+    if (hasPublic) {
+        index = parseDocTypePart(buffer, index+6);
+
+        // We've read <!DOCTYPE HTML PUBLIC (not case sensitive).
+        // Now we find the beginning and end of the public identifers
+        // and system identifiers (assuming they're even present).
+        UChar theChar = buffer[index];
+        if (theChar != '\"' && theChar != '\'')
+            return false;
+        
+        // |start| is the first character (after the quote) and |end|
+        // is the final quote, so there are |end|-|start| characters.
+        int publicIDStart = index+1;
+        int publicIDEnd = buffer.find(theChar, publicIDStart);
+        if (publicIDEnd == -1)
+            return false;
+        index = parseDocTypePart(buffer, publicIDEnd+1);
+        UChar next = buffer[index];
+        if (next == '>') {
+            // Public identifier present, but no system identifier.
+            // Do nothing.  Note that this is the most common
+            // case.
+        }
+        else if (next == '\"' || next == '\'') {
+            // We have a system identifier.
+            *resultFlags |= PARSEMODE_HAVE_SYSTEM_ID;
+            int systemIDStart = index+1;
+            int systemIDEnd = buffer.find(next, systemIDStart);
+            if (systemIDEnd == -1)
+                return false;
+            systemID = buffer.substring(systemIDStart, systemIDEnd - systemIDStart);
+        }
+        else if (next == '[') {
+            // We found an internal subset.
+            *resultFlags |= PARSEMODE_HAVE_INTERNAL;
+        }
+        else
+            return false; // Something's wrong.
+
+        // We need to trim whitespace off the public identifier.
+        publicID = buffer.substring(publicIDStart, publicIDEnd - publicIDStart);
+        publicID = publicID.stripWhiteSpace();
+        *resultFlags |= PARSEMODE_HAVE_PUBLIC_ID;
+    } else {
+        if (containsString("system", buffer, index)) {
+            // Doctype has a system ID but no public ID
+            *resultFlags |= PARSEMODE_HAVE_SYSTEM_ID;
+            index = parseDocTypePart(buffer, index+6);
+            UChar next = buffer[index];
+            if (next != '\"' && next != '\'')
+                return false;
+            int systemIDStart = index+1;
+            int systemIDEnd = buffer.find(next, systemIDStart);
+            if (systemIDEnd == -1)
+                return false;
+            systemID = buffer.substring(systemIDStart, systemIDEnd - systemIDStart);
+            index = parseDocTypePart(buffer, systemIDEnd+1);
+        }
+
+        UChar nextChar = buffer[index];
+        if (nextChar == '[')
+            *resultFlags |= PARSEMODE_HAVE_INTERNAL;
+        else if (nextChar != '>')
+            return false;
+    }
+
+    return true;
+}
 
+void HTMLDocument::determineParseMode(const String& str)
+{
     // This code more or less mimics Mozilla's implementation (specifically the
     // doctype parsing implemented by David Baron in Mozilla's nsParser.cpp).
     //
     // There are three possible parse modes:
-    // COMPAT - quirks mode emulates WinIE and NS4.  CSS parsing is also relaxed in this mode, e.g., unit types can
+    // COMPAT - quirks mode emulates WinIE
+    // and NS4.  CSS parsing is also relaxed in this mode, e.g., unit types can
     // be omitted from numbers.
-    // ALMOST STRICT - This mode is identical to strict mode except for its treatment of line-height in the inline box model.  For
-    // now (until the inline box model is re-written), this mode is identical to STANDARDS mode.
-    // STRICT - no quirks apply.  Web pages will obey the specifications to the letter.
-    bool wasInCompatMode = inCompatMode();
-    DocumentType* docType = doctype();
-    if (!docType || !equalIgnoringCase(docType->name(), "html"))
-        // No doctype found at all or the doctype is not HTML.  Default to quirks mode and Html4.
-        setParseMode(Compat);
-    else if (!doctype()->systemId().isEmpty() && equalIgnoringCase(docType->systemId(), "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"))
-        // Assume quirks mode for this particular system ID.  In the HTML5 spec, this is the only
-        // system identifier that is examined.
-        setParseMode(Compat);
-    else if (docType->publicId().isEmpty())
-        // A doctype without a public ID means use strict mode.
-        setParseMode(Strict);
-    else {
-        // We have to check a list of public IDs to see what we
-        // should do.
-        String lowerPubID = docType->publicId().lower();
-        CString pubIDStr = lowerPubID.latin1();
-       
-        // Look up the entry in our gperf-generated table.
-        const PubIDInfo* doctypeEntry = findDoctypeEntry(pubIDStr.data(), pubIDStr.length());
-        if (!doctypeEntry)
-            // The DOCTYPE is not in the list.  Assume strict mode.
+    // ALMOST STRICT - This mode is identical to strict mode
+    // except for its treatment of line-height in the inline box model.  For
+    // now (until the inline box model is re-written), this mode is identical
+    // to STANDARDS mode.
+    // STRICT - no quirks apply.  Web pages will obey the specifications to
+    // the letter.
+
+    String name, systemID, publicID;
+    int resultFlags = 0;
+    if (parseDocTypeDeclaration(str, &resultFlags, name, publicID, systemID)) {
+        if (resultFlags & PARSEMODE_HAVE_DOCTYPE)
+            setDocType(new DocumentType(this, name, publicID, systemID));
+        if (!(resultFlags & PARSEMODE_HAVE_DOCTYPE)) {
+            // No doctype found at all.  Default to quirks mode and Html4.
+            setParseMode(Compat);
+            setHTMLMode(Html4);
+        }
+        else if ((resultFlags & PARSEMODE_HAVE_INTERNAL) ||
+                 !(resultFlags & PARSEMODE_HAVE_PUBLIC_ID)) {
+            // Internal subsets always denote full standards, as does
+            // a doctype without a public ID.
             setParseMode(Strict);
+            setHTMLMode(Html4);
+        }
         else {
-            switch (docType->systemId().isEmpty() ?
-                    doctypeEntry->mode_if_no_sysid :
-                    doctypeEntry->mode_if_sysid) {
+            // We have to check a list of public IDs to see what we
+            // should do.
+            String lowerPubID = publicID.lower();
+            CString pubIDStr = lowerPubID.latin1();
+           
+#ifdef ANDROID_META_SUPPORT
+            if ((!frame()->tree() || !frame()->tree()->parent()) &&
+                    strstr(pubIDStr.data(), "-//wapforum//dtd xhtml mobile 1.") == pubIDStr.data()) {
+                // fit mobile sites directly in the screen
+                frame()->settings()->setMetadataSettings("width", "device-width");
+            }
+#endif
+            // Look up the entry in our gperf-generated table.
+            const PubIDInfo* doctypeEntry = findDoctypeEntry(pubIDStr.data(), pubIDStr.length());
+            if (!doctypeEntry) {
+                // The DOCTYPE is not in the list.  Assume strict mode.
+                setParseMode(Strict);
+                setHTMLMode(Html4);
+                return;
+            }
+
+            switch ((resultFlags & PARSEMODE_HAVE_SYSTEM_ID) ?
+                    doctypeEntry->mode_if_sysid :
+                    doctypeEntry->mode_if_no_sysid)
+            {
                 case PubIDInfo::eQuirks3:
+                    setParseMode(Compat);
+                    setHTMLMode(Html3);
+                    break;
                 case PubIDInfo::eQuirks:
                     setParseMode(Compat);
+                    setHTMLMode(Html4);
                     break;
                 case PubIDInfo::eAlmostStandards:
                     setParseMode(AlmostStrict);
+                    setHTMLMode(Html4);
                     break;
                  default:
                     ASSERT(false);
             }
-        }
+        }   
     }
-    
-    if (inCompatMode() != wasInCompatMode)
-        updateStyleSelector();
+    else {
+        // Malformed doctype implies quirks mode.
+        setParseMode(Compat);
+        setHTMLMode(Html3);
+    }
+  
+    styleSelector()->strictParsing = !inCompatMode();
+ 
 }
     
 }