summaryrefslogtreecommitdiffstats
path: root/WebCore/html/HTMLDocument.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'WebCore/html/HTMLDocument.cpp')
-rw-r--r--WebCore/html/HTMLDocument.cpp254
1 files changed, 216 insertions, 38 deletions
diff --git a/WebCore/html/HTMLDocument.cpp b/WebCore/html/HTMLDocument.cpp
index eb9eb4d..8db8422 100644
--- a/WebCore/html/HTMLDocument.cpp
+++ b/WebCore/html/HTMLDocument.cpp
@@ -74,6 +74,11 @@
#include "DocTypeStrings.cpp"
+#ifdef ANDROID_META_SUPPORT
+#include "FrameTree.h"
+#include "Settings.h"
+#endif
+
namespace WebCore {
using namespace HTMLNames;
@@ -82,7 +87,6 @@ HTMLDocument::HTMLDocument(DOMImplementation* implementation, Frame* frame)
: Document(implementation, frame)
{
clearXMLVersion();
- setParseMode(Compat);
}
HTMLDocument::~HTMLDocument()
@@ -274,7 +278,7 @@ Tokenizer *HTMLDocument::createTokenizer()
bool HTMLDocument::childAllowed(Node *newChild)
{
- return newChild->hasTagName(htmlTag) || newChild->isCommentNode() || (newChild->nodeType() == DOCUMENT_TYPE_NODE && !doctype());
+ return newChild->hasTagName(htmlTag) || newChild->isCommentNode();
}
PassRefPtr<Element> HTMLDocument::createElement(const String &name, ExceptionCode& ec)
@@ -347,62 +351,236 @@ bool HTMLDocument::hasDocExtraNamedItem(const String& name)
return docExtraNamedItemCounts.get(name.impl()) != 0;
}
-void HTMLDocument::determineParseMode()
+const int PARSEMODE_HAVE_DOCTYPE = (1<<0);
+const int PARSEMODE_HAVE_PUBLIC_ID = (1<<1);
+const int PARSEMODE_HAVE_SYSTEM_ID = (1<<2);
+const int PARSEMODE_HAVE_INTERNAL = (1<<3);
+
+static int parseDocTypePart(const String& buffer, int index)
+{
+ while (true) {
+ UChar ch = buffer[index];
+ if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r')
+ ++index;
+ else if (ch == '-') {
+ int tmpIndex=index;
+ if (buffer[index+1] == '-' &&
+ ((tmpIndex=buffer.find("--", index+2)) != -1))
+ index = tmpIndex+2;
+ else
+ return index;
+ }
+ else
+ return index;
+ }
+}
+
+static bool containsString(const char* str, const String& buffer, int offset)
+{
+ String startString(str);
+ if (offset + startString.length() > buffer.length())
+ return false;
+
+ String bufferString = buffer.substring(offset, startString.length()).lower();
+ String lowerStart = startString.lower();
+
+ return bufferString.startsWith(lowerStart);
+}
+
+static bool parseDocTypeDeclaration(const String& buffer,
+ int* resultFlags,
+ String& name,
+ String& publicID,
+ String& systemID)
{
- // FIXME: It's terrible that this code runs separately and isn't just built in to the
- // HTML tokenizer/parser.
+ bool haveDocType = false;
+ *resultFlags = 0;
+
+ // Skip through any comments and processing instructions.
+ int index = 0;
+ do {
+ index = buffer.find('<', index);
+ if (index == -1) break;
+ UChar nextChar = buffer[index+1];
+ if (nextChar == '!') {
+ if (containsString("doctype", buffer, index+2)) {
+ haveDocType = true;
+ index += 9; // Skip "<!DOCTYPE"
+ break;
+ }
+ index = parseDocTypePart(buffer,index);
+ index = buffer.find('>', index);
+ }
+ else if (nextChar == '?')
+ index = buffer.find('>', index);
+ else
+ break;
+ } while (index != -1);
+
+ if (!haveDocType)
+ return true;
+ *resultFlags |= PARSEMODE_HAVE_DOCTYPE;
+
+ index = parseDocTypePart(buffer, index);
+ if (!containsString("html", buffer, index))
+ return false;
+
+ name = buffer.substring(index, 4);
+ index = parseDocTypePart(buffer, index+4);
+ bool hasPublic = containsString("public", buffer, index);
+ if (hasPublic) {
+ index = parseDocTypePart(buffer, index+6);
+
+ // We've read <!DOCTYPE HTML PUBLIC (not case sensitive).
+ // Now we find the beginning and end of the public identifers
+ // and system identifiers (assuming they're even present).
+ UChar theChar = buffer[index];
+ if (theChar != '\"' && theChar != '\'')
+ return false;
+
+ // |start| is the first character (after the quote) and |end|
+ // is the final quote, so there are |end|-|start| characters.
+ int publicIDStart = index+1;
+ int publicIDEnd = buffer.find(theChar, publicIDStart);
+ if (publicIDEnd == -1)
+ return false;
+ index = parseDocTypePart(buffer, publicIDEnd+1);
+ UChar next = buffer[index];
+ if (next == '>') {
+ // Public identifier present, but no system identifier.
+ // Do nothing. Note that this is the most common
+ // case.
+ }
+ else if (next == '\"' || next == '\'') {
+ // We have a system identifier.
+ *resultFlags |= PARSEMODE_HAVE_SYSTEM_ID;
+ int systemIDStart = index+1;
+ int systemIDEnd = buffer.find(next, systemIDStart);
+ if (systemIDEnd == -1)
+ return false;
+ systemID = buffer.substring(systemIDStart, systemIDEnd - systemIDStart);
+ }
+ else if (next == '[') {
+ // We found an internal subset.
+ *resultFlags |= PARSEMODE_HAVE_INTERNAL;
+ }
+ else
+ return false; // Something's wrong.
+
+ // We need to trim whitespace off the public identifier.
+ publicID = buffer.substring(publicIDStart, publicIDEnd - publicIDStart);
+ publicID = publicID.stripWhiteSpace();
+ *resultFlags |= PARSEMODE_HAVE_PUBLIC_ID;
+ } else {
+ if (containsString("system", buffer, index)) {
+ // Doctype has a system ID but no public ID
+ *resultFlags |= PARSEMODE_HAVE_SYSTEM_ID;
+ index = parseDocTypePart(buffer, index+6);
+ UChar next = buffer[index];
+ if (next != '\"' && next != '\'')
+ return false;
+ int systemIDStart = index+1;
+ int systemIDEnd = buffer.find(next, systemIDStart);
+ if (systemIDEnd == -1)
+ return false;
+ systemID = buffer.substring(systemIDStart, systemIDEnd - systemIDStart);
+ index = parseDocTypePart(buffer, systemIDEnd+1);
+ }
+
+ UChar nextChar = buffer[index];
+ if (nextChar == '[')
+ *resultFlags |= PARSEMODE_HAVE_INTERNAL;
+ else if (nextChar != '>')
+ return false;
+ }
+
+ return true;
+}
+void HTMLDocument::determineParseMode(const String& str)
+{
// This code more or less mimics Mozilla's implementation (specifically the
// doctype parsing implemented by David Baron in Mozilla's nsParser.cpp).
//
// There are three possible parse modes:
- // COMPAT - quirks mode emulates WinIE and NS4. CSS parsing is also relaxed in this mode, e.g., unit types can
+ // COMPAT - quirks mode emulates WinIE
+ // and NS4. CSS parsing is also relaxed in this mode, e.g., unit types can
// be omitted from numbers.
- // ALMOST STRICT - This mode is identical to strict mode except for its treatment of line-height in the inline box model. For
- // now (until the inline box model is re-written), this mode is identical to STANDARDS mode.
- // STRICT - no quirks apply. Web pages will obey the specifications to the letter.
- bool wasInCompatMode = inCompatMode();
- DocumentType* docType = doctype();
- if (!docType || !equalIgnoringCase(docType->name(), "html"))
- // No doctype found at all or the doctype is not HTML. Default to quirks mode and Html4.
- setParseMode(Compat);
- else if (!doctype()->systemId().isEmpty() && equalIgnoringCase(docType->systemId(), "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"))
- // Assume quirks mode for this particular system ID. In the HTML5 spec, this is the only
- // system identifier that is examined.
- setParseMode(Compat);
- else if (docType->publicId().isEmpty())
- // A doctype without a public ID means use strict mode.
- setParseMode(Strict);
- else {
- // We have to check a list of public IDs to see what we
- // should do.
- String lowerPubID = docType->publicId().lower();
- CString pubIDStr = lowerPubID.latin1();
-
- // Look up the entry in our gperf-generated table.
- const PubIDInfo* doctypeEntry = findDoctypeEntry(pubIDStr.data(), pubIDStr.length());
- if (!doctypeEntry)
- // The DOCTYPE is not in the list. Assume strict mode.
+ // ALMOST STRICT - This mode is identical to strict mode
+ // except for its treatment of line-height in the inline box model. For
+ // now (until the inline box model is re-written), this mode is identical
+ // to STANDARDS mode.
+ // STRICT - no quirks apply. Web pages will obey the specifications to
+ // the letter.
+
+ String name, systemID, publicID;
+ int resultFlags = 0;
+ if (parseDocTypeDeclaration(str, &resultFlags, name, publicID, systemID)) {
+ if (resultFlags & PARSEMODE_HAVE_DOCTYPE)
+ setDocType(new DocumentType(this, name, publicID, systemID));
+ if (!(resultFlags & PARSEMODE_HAVE_DOCTYPE)) {
+ // No doctype found at all. Default to quirks mode and Html4.
+ setParseMode(Compat);
+ setHTMLMode(Html4);
+ }
+ else if ((resultFlags & PARSEMODE_HAVE_INTERNAL) ||
+ !(resultFlags & PARSEMODE_HAVE_PUBLIC_ID)) {
+ // Internal subsets always denote full standards, as does
+ // a doctype without a public ID.
setParseMode(Strict);
+ setHTMLMode(Html4);
+ }
else {
- switch (docType->systemId().isEmpty() ?
- doctypeEntry->mode_if_no_sysid :
- doctypeEntry->mode_if_sysid) {
+ // We have to check a list of public IDs to see what we
+ // should do.
+ String lowerPubID = publicID.lower();
+ CString pubIDStr = lowerPubID.latin1();
+
+#ifdef ANDROID_META_SUPPORT
+ if ((!frame()->tree() || !frame()->tree()->parent()) &&
+ strstr(pubIDStr.data(), "-//wapforum//dtd xhtml mobile 1.") == pubIDStr.data()) {
+ // fit mobile sites directly in the screen
+ frame()->settings()->setMetadataSettings("width", "device-width");
+ }
+#endif
+ // Look up the entry in our gperf-generated table.
+ const PubIDInfo* doctypeEntry = findDoctypeEntry(pubIDStr.data(), pubIDStr.length());
+ if (!doctypeEntry) {
+ // The DOCTYPE is not in the list. Assume strict mode.
+ setParseMode(Strict);
+ setHTMLMode(Html4);
+ return;
+ }
+
+ switch ((resultFlags & PARSEMODE_HAVE_SYSTEM_ID) ?
+ doctypeEntry->mode_if_sysid :
+ doctypeEntry->mode_if_no_sysid)
+ {
case PubIDInfo::eQuirks3:
+ setParseMode(Compat);
+ setHTMLMode(Html3);
+ break;
case PubIDInfo::eQuirks:
setParseMode(Compat);
+ setHTMLMode(Html4);
break;
case PubIDInfo::eAlmostStandards:
setParseMode(AlmostStrict);
+ setHTMLMode(Html4);
break;
default:
ASSERT(false);
}
- }
+ }
}
-
- if (inCompatMode() != wasInCompatMode)
- updateStyleSelector();
+ else {
+ // Malformed doctype implies quirks mode.
+ setParseMode(Compat);
+ setHTMLMode(Html3);
+ }
+
+ styleSelector()->strictParsing = !inCompatMode();
+
}
}