diff options
Diffstat (limited to 'WebKit/chromium/src/WebPageSerializerImpl.cpp')
-rw-r--r-- | WebKit/chromium/src/WebPageSerializerImpl.cpp | 159 |
1 files changed, 69 insertions, 90 deletions
diff --git a/WebKit/chromium/src/WebPageSerializerImpl.cpp b/WebKit/chromium/src/WebPageSerializerImpl.cpp index d5b2b7f..0d85d78 100644 --- a/WebKit/chromium/src/WebPageSerializerImpl.cpp +++ b/WebKit/chromium/src/WebPageSerializerImpl.cpp @@ -88,8 +88,6 @@ #include "HTMLMetaElement.h" #include "HTMLNames.h" #include "KURL.h" -#include "PlatformString.h" -#include "StringBuilder.h" #include "TextEncoding.h" #include "markup.h" @@ -107,22 +105,22 @@ namespace WebKit { // contegious string is found in the page. static const unsigned dataBufferCapacity = 65536; -WebPageSerializerImpl::SerializeDomParam::SerializeDomParam(const KURL& currentFrameURL, +WebPageSerializerImpl::SerializeDomParam::SerializeDomParam(const KURL& url, const TextEncoding& textEncoding, - Document* doc, + Document* document, const String& directoryName) - : currentFrameURL(currentFrameURL) + : url(url) , textEncoding(textEncoding) - , doc(doc) + , document(document) , directoryName(directoryName) - , hasDoctype(false) - , hasCheckedMeta(false) + , isHTMLDocument(document->isHTMLDocument()) + , haveSeenDocType(false) + , haveAddedCharsetDeclaration(false) , skipMetaElement(0) , isInScriptOrStyleTag(false) - , hasDocDeclaration(false) + , haveAddedXMLProcessingDirective(false) + , haveAddedContentsBeforeEnd(false) { - // Cache the value since we check it lots of times. - isHTMLDocument = doc->isHTMLDocument(); } String WebPageSerializerImpl::preActionBeforeSerializeOpenTag( @@ -150,41 +148,41 @@ String WebPageSerializerImpl::preActionBeforeSerializeOpenTag( } } else if (element->hasTagName(HTMLNames::htmlTag)) { // Check something before processing the open tag of HEAD element. - // First we add doc type declaration if original doc has it. - if (!param->hasDoctype) { - param->hasDoctype = true; - result.append(createMarkup(param->doc->doctype())); + // First we add doc type declaration if original document has it. + if (!param->haveSeenDocType) { + param->haveSeenDocType = true; + result.append(createMarkup(param->document->doctype())); } // Add MOTW declaration before html tag. // See http://msdn2.microsoft.com/en-us/library/ms537628(VS.85).aspx. - result.append(WebPageSerializer::generateMarkOfTheWebDeclaration(param->currentFrameURL)); + result.append(WebPageSerializer::generateMarkOfTheWebDeclaration(param->url)); } else if (element->hasTagName(HTMLNames::baseTag)) { // Comment the BASE tag when serializing dom. result.append("<!--"); } } else { // Write XML declaration. - if (!param->hasDocDeclaration) { - param->hasDocDeclaration = true; + if (!param->haveAddedXMLProcessingDirective) { + param->haveAddedXMLProcessingDirective = true; // Get encoding info. - String xmlEncoding = param->doc->xmlEncoding(); + String xmlEncoding = param->document->xmlEncoding(); if (xmlEncoding.isEmpty()) - xmlEncoding = param->doc->frame()->loader()->encoding(); + xmlEncoding = param->document->frame()->loader()->writer()->encoding(); if (xmlEncoding.isEmpty()) xmlEncoding = UTF8Encoding().name(); result.append("<?xml version=\""); - result.append(param->doc->xmlVersion()); + result.append(param->document->xmlVersion()); result.append("\" encoding=\""); result.append(xmlEncoding); - if (param->doc->xmlStandalone()) + if (param->document->xmlStandalone()) result.append("\" standalone=\"yes"); result.append("\"?>\n"); } - // Add doc type declaration if original doc has it. - if (!param->hasDoctype) { - param->hasDoctype = true; - result.append(createMarkup(param->doc->doctype())); + // Add doc type declaration if original document has it. + if (!param->haveSeenDocType) { + param->haveSeenDocType = true; + result.append(createMarkup(param->document->doctype())); } } return result.toString(); @@ -195,13 +193,13 @@ String WebPageSerializerImpl::postActionAfterSerializeOpenTag( { StringBuilder result; - param->hasAddedContentsBeforeEnd = false; + param->haveAddedContentsBeforeEnd = false; if (!param->isHTMLDocument) return result.toString(); // Check after processing the open tag of HEAD element - if (!param->hasCheckedMeta + if (!param->haveAddedCharsetDeclaration && element->hasTagName(HTMLNames::headTag)) { - param->hasCheckedMeta = true; + param->haveAddedCharsetDeclaration = true; // Check meta element. WebKit only pre-parse the first 512 bytes // of the document. If the whole <HEAD> is larger and meta is the // end of head part, then this kind of pages aren't decoded correctly @@ -212,7 +210,7 @@ String WebPageSerializerImpl::postActionAfterSerializeOpenTag( result.append(WebPageSerializer::generateMetaCharsetDeclaration( String(param->textEncoding.name()))); - param->hasAddedContentsBeforeEnd = true; + param->haveAddedContentsBeforeEnd = true; // Will search each META which has charset declaration, and skip them all // in PreActionBeforeSerializeOpenTag. } else if (element->hasTagName(HTMLNames::scriptTag) @@ -259,7 +257,7 @@ String WebPageSerializerImpl::postActionAfterSerializeEndTag( result.append("-->"); // Append a new base tag declaration. result.append(WebPageSerializer::generateBaseTagDeclaration( - param->doc->baseTarget())); + param->document->baseTarget())); } return result.toString(); @@ -271,27 +269,27 @@ void WebPageSerializerImpl::saveHTMLContentToBuffer( m_dataBuffer.append(result); encodeAndFlushBuffer(WebPageSerializerClient::CurrentFrameIsNotFinished, param, - 0); + DoNotForceFlush); } void WebPageSerializerImpl::encodeAndFlushBuffer( WebPageSerializerClient::PageSerializationStatus status, SerializeDomParam* param, - bool force) + FlushOption flushOption) { // Data buffer is not full nor do we want to force flush. - if (!force && m_dataBuffer.length() <= dataBufferCapacity) + if (flushOption != ForceFlush && m_dataBuffer.length() <= dataBufferCapacity) return; String content = m_dataBuffer.toString(); - m_dataBuffer.clear(); + m_dataBuffer = StringBuilder(); // Convert the unicode content to target encoding CString encodedContent = param->textEncoding.encode( content.characters(), content.length(), EntitiesForUnencodables); // Send result to the client. - m_client->didSerializeDataForFrame(param->currentFrameURL, + m_client->didSerializeDataForFrame(param->url, WebCString(encodedContent.data(), encodedContent.length()), status); } @@ -306,7 +304,7 @@ void WebPageSerializerImpl::openTagToString(const Element* element, if (needSkip) return; // Add open tag - result += "<" + element->nodeName(); + result += "<" + element->nodeName().lower(); // Go through all attributes and serialize them. const NamedNodeMap *attrMap = element->attributes(true); if (attrMap) { @@ -329,7 +327,7 @@ void WebPageSerializerImpl::openTagToString(const Element* element, result += attrValue; else { // Get the absolute link - String completeURL = param->doc->completeURL(attrValue); + String completeURL = param->document->completeURL(attrValue); // Check whether we have local files for those link. if (m_localLinks.contains(completeURL)) { if (!m_localDirectoryName.isEmpty()) @@ -352,7 +350,7 @@ void WebPageSerializerImpl::openTagToString(const Element* element, // Do post action for open tag. String addedContents = postActionAfterSerializeOpenTag(element, param); // Complete the open tag for element when it has child/children. - if (element->hasChildNodes() || param->hasAddedContentsBeforeEnd) + if (element->hasChildNodes() || param->haveAddedContentsBeforeEnd) result += ">"; // Append the added contents generate in post action of open tag. result += addedContents; @@ -372,20 +370,19 @@ void WebPageSerializerImpl::endTagToString(const Element* element, if (needSkip) return; // Write end tag when element has child/children. - if (element->hasChildNodes() || param->hasAddedContentsBeforeEnd) { + if (element->hasChildNodes() || param->haveAddedContentsBeforeEnd) { result += "</"; - result += element->nodeName(); + result += element->nodeName().lower(); result += ">"; } else { // Check whether we have to write end tag for empty element. if (param->isHTMLDocument) { result += ">"; - const HTMLElement* htmlElement = - static_cast<const HTMLElement*>(element); - if (htmlElement->endTagRequirement() == TagStatusRequired) { + // FIXME: This code is horribly wrong. WebPageSerializerImpl must die. + if (!static_cast<const HTMLElement*>(element)->ieForbidsInsertHTML()) { // We need to write end tag when it is required. result += "</"; - result += element->nodeName(); + result += element->nodeName().lower(); result += ">"; } } else { @@ -423,7 +420,7 @@ void WebPageSerializerImpl::buildContentForNode(const Node* node, break; // Document type node can be in DOM? case Node::DOCUMENT_TYPE_NODE: - param->hasDoctype = true; + param->haveSeenDocType = true; default: // For other type node, call default action. saveHTMLContentToBuffer(createMarkup(node), param); @@ -457,7 +454,7 @@ WebPageSerializerImpl::WebPageSerializerImpl(WebFrame* frame, m_localLinks.set(url.string(), localPaths[i]); } - ASSERT(!m_dataBuffer.length()); + ASSERT(m_dataBuffer.isEmpty()); } void WebPageSerializerImpl::collectTargetFrames() @@ -492,55 +489,37 @@ void WebPageSerializerImpl::collectTargetFrames() bool WebPageSerializerImpl::serialize() { - // Collect target frames. if (!m_framesCollected) collectTargetFrames(); + bool didSerialization = false; - // Get KURL for main frame. - KURL mainPageURL = m_specifiedWebFrameImpl->frame()->loader()->url(); + KURL mainURL = m_specifiedWebFrameImpl->frame()->document()->url(); - // Go through all frames for serializing DOM for whole page, include - // sub-frames. - for (int i = 0; i < static_cast<int>(m_frames.size()); ++i) { - // Get current serializing frame. - WebFrameImpl* currentFrame = m_frames[i]; - // Get current using document. - Document* currentDoc = currentFrame->frame()->document(); - // Get current frame's URL. - const KURL& currentFrameURL = currentFrame->frame()->loader()->url(); - - // Check whether we have done this document. - if (m_localLinks.contains(currentFrameURL.string())) { - // A new document, we will serialize it. - didSerialization = true; - // Get target encoding for current document. - String encoding = currentFrame->frame()->loader()->encoding(); - // Create the text encoding object with target encoding. - TextEncoding textEncoding(encoding); - // Construct serialize parameter for late processing document. - SerializeDomParam param(currentFrameURL, - encoding.length() ? textEncoding : UTF8Encoding(), - currentDoc, - currentFrameURL == mainPageURL ? m_localDirectoryName : ""); - - // Process current document. - Element* rootElement = currentDoc->documentElement(); - if (rootElement) - buildContentForNode(rootElement, ¶m); - - // Flush the remainder data and finish serializing current frame. - encodeAndFlushBuffer(WebPageSerializerClient::CurrentFrameIsFinished, - ¶m, - 1); - } + for (unsigned i = 0; i < m_frames.size(); ++i) { + WebFrameImpl* webFrame = m_frames[i]; + Document* document = webFrame->frame()->document(); + const KURL& url = document->url(); + + if (!url.isValid() || !m_localLinks.contains(url.string())) + continue; + + didSerialization = true; + + String encoding = webFrame->frame()->loader()->writer()->encoding(); + const TextEncoding& textEncoding = encoding.isEmpty() ? UTF8Encoding() : TextEncoding(encoding); + String directoryName = url == mainURL ? m_localDirectoryName : ""; + + SerializeDomParam param(url, textEncoding, document, directoryName); + + Element* documentElement = document->documentElement(); + if (documentElement) + buildContentForNode(documentElement, ¶m); + + encodeAndFlushBuffer(WebPageSerializerClient::CurrentFrameIsFinished, ¶m, ForceFlush); } - // We have done call frames, so we send message to embedder to tell it that - // frames are finished serializing. - ASSERT(!m_dataBuffer.length()); - m_client->didSerializeDataForFrame(KURL(), - WebCString("", 0), - WebPageSerializerClient::AllFramesAreFinished); + ASSERT(m_dataBuffer.isEmpty()); + m_client->didSerializeDataForFrame(KURL(), WebCString("", 0), WebPageSerializerClient::AllFramesAreFinished); return didSerialization; } |