summaryrefslogtreecommitdiffstats
path: root/WebKit/chromium/src/WebPageSerializerImpl.h
blob: a7f2fb4a14ea4a40235b9b033ee70d92e7208845 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
/*
 * Copyright (C) 2009 Google Inc. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 *
 *     * Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above
 * copyright notice, this list of conditions and the following disclaimer
 * in the documentation and/or other materials provided with the
 * distribution.
 *     * Neither the name of Google Inc. nor the names of its
 * contributors may be used to endorse or promote products derived from
 * this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef WebPageSerializerImpl_h
#define WebPageSerializerImpl_h

#include "PlatformString.h"
#include <wtf/Forward.h>
#include <wtf/HashMap.h>
#include <wtf/Vector.h>
#include <wtf/text/StringHash.h>
#include "StringBuilder.h"

#include "WebEntities.h"
#include "WebPageSerializer.h"
#include "WebPageSerializerClient.h"
#include "WebString.h"
#include "WebURL.h"

namespace WebCore {
class Document;
class Element;
class Node;
class TextEncoding;
}

namespace WebKit {
class WebFrameImpl;

// Get html data by serializing all frames of current page with lists
// which contain all resource links that have local copy.
// contain all saved auxiliary files included all sub frames and resources.
// This function will find out all frames and serialize them to HTML data.
// We have a data buffer to temporary saving generated html data. We will
// sequentially call WebViewDelegate::SendSerializedHtmlData once the data
// buffer is full. See comments of WebViewDelegate::SendSerializedHtmlData
// for getting more information.
class WebPageSerializerImpl {
public:
    // Do serialization action. Return false means no available frame has been
    // serialized, otherwise return true.
    bool serialize();

    // The parameter specifies which frame need to be serialized.
    // The parameter recursive_serialization specifies whether we need to
    // serialize all sub frames of the specified frame or not.
    // The parameter delegate specifies the pointer of interface
    // DomSerializerDelegate provide sink interface which can receive the
    // individual chunks of data to be saved.
    // The parameter links contain original URLs of all saved links.
    // The parameter local_paths contain corresponding local file paths of all
    // saved links, which matched with vector:links one by one.
    // The parameter local_directory_name is relative path of directory which
    // contain all saved auxiliary files included all sub frames and resources.
    WebPageSerializerImpl(WebFrame* frame,
                          bool recursive,
                          WebPageSerializerClient* client,
                          const WebVector<WebURL>& links,
                          const WebVector<WebString>& localPaths,
                          const WebString& localDirectoryName);

private:
    // Specified frame which need to be serialized;
    WebFrameImpl* m_specifiedWebFrameImpl;
    // Pointer of WebPageSerializerClient
    WebPageSerializerClient* m_client;
    // This hash map is used to map resource URL of original link to its local
    // file path.
    typedef HashMap<WTF::String, WTF::String> LinkLocalPathMap;
    // local_links_ include all pair of local resource path and corresponding
    // original link.
    LinkLocalPathMap m_localLinks;
    // Data buffer for saving result of serialized DOM data.
    WebCore::StringBuilder m_dataBuffer;
    // Passing true to recursive_serialization_ indicates we will serialize not
    // only the specified frame but also all sub-frames in the specific frame.
    // Otherwise we only serialize the specified frame excluded all sub-frames.
    bool m_recursiveSerialization;
    // Flag indicates whether we have collected all frames which need to be
    // serialized or not;
    bool m_framesCollected;
    // Local directory name of all local resource files.
    WTF::String m_localDirectoryName;
    // Vector for saving all frames which need to be serialized.
    Vector<WebFrameImpl*> m_frames;

    // Web entities conversion maps.
    WebEntities m_htmlEntities;
    WebEntities m_xmlEntities;

    struct SerializeDomParam {
        // Frame URL of current processing document presented by GURL
        const WebCore::KURL& currentFrameURL;
        // Current using text encoding object.
        const WebCore::TextEncoding& textEncoding;

        // Document object of current frame.
        WebCore::Document* doc;
        // Local directory name of all local resource files.
        const WTF::String& directoryName;

        // Flag indicates current doc is html document or not. It's a cache value
        // of Document.isHTMLDocument().
        bool isHTMLDocument;
        // Flag which indicate whether we have met document type declaration.
        bool hasDoctype;
        // Flag which indicate whether will process meta issue.
        bool hasCheckedMeta;
        // This meta element need to be skipped when serializing DOM.
        const WebCore::Element* skipMetaElement;
        // Flag indicates we are in script or style tag.
        bool isInScriptOrStyleTag;
        // Flag indicates whether we have written xml document declaration.
        // It is only used in xml document
        bool hasDocDeclaration;
        // Flag indicates whether we have added additional contents before end tag.
        // This flag will be re-assigned in each call of function
        // PostActionAfterSerializeOpenTag and it could be changed in function
        // PreActionBeforeSerializeEndTag if the function adds new contents into
        // serialization stream.
        bool hasAddedContentsBeforeEnd;

        // Constructor.
        SerializeDomParam(const WebCore::KURL& currentFrameURL,
                          const WebCore::TextEncoding& textEncoding,
                          WebCore::Document* doc,
                          const WTF::String& directoryName);
    };

    // Collect all target frames which need to be serialized.
    void collectTargetFrames();
    // Before we begin serializing open tag of a element, we give the target
    // element a chance to do some work prior to add some additional data.
    WTF::String preActionBeforeSerializeOpenTag(const WebCore::Element* element,
                                                    SerializeDomParam* param,
                                                    bool* needSkip);
    // After we finish serializing open tag of a element, we give the target
    // element a chance to do some post work to add some additional data.
    WTF::String postActionAfterSerializeOpenTag(const WebCore::Element* element,
                                                    SerializeDomParam* param);
    // Before we begin serializing end tag of a element, we give the target
    // element a chance to do some work prior to add some additional data.
    WTF::String preActionBeforeSerializeEndTag(const WebCore::Element* element,
                                                   SerializeDomParam* param,
                                                   bool* needSkip);
    // After we finish serializing end tag of a element, we give the target
    // element a chance to do some post work to add some additional data.
    WTF::String postActionAfterSerializeEndTag(const WebCore::Element* element,
                                                   SerializeDomParam* param);
    // Save generated html content to data buffer.
    void saveHTMLContentToBuffer(const WTF::String& content,
                                 SerializeDomParam* param);
    // Flushes the content buffer by encoding and sending the content to the
    // WebPageSerializerClient. Content is not flushed if the buffer is not full
    // unless force is 1.
    void encodeAndFlushBuffer(WebPageSerializerClient::PageSerializationStatus status,
                              SerializeDomParam* param,
                              bool force);
    // Serialize open tag of an specified element.
    void openTagToString(const WebCore::Element* element,
                         SerializeDomParam* param);
    // Serialize end tag of an specified element.
    void endTagToString(const WebCore::Element* element,
                        SerializeDomParam* param);
    // Build content for a specified node
    void buildContentForNode(const WebCore::Node* node,
                             SerializeDomParam* param);
};

} // namespace WebKit

#endif