summaryrefslogtreecommitdiffstats
path: root/include/utils/ZipFileRO.h
blob: 547e36a098648179085f8533035e6e4e4711242e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
/*
 * Copyright (C) 2007 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * Read-only access to Zip archives, with minimal heap allocation.
 *
 * This is similar to the more-complete ZipFile class, but no attempt
 * has been made to make them interchangeable.  This class operates under
 * a very different set of assumptions and constraints.
 *
 * One such assumption is that if you're getting file descriptors for
 * use with this class as a child of a fork() operation, you must be on
 * a pread() to guarantee correct operation. This is because pread() can
 * atomically read at a file offset without worrying about a lock around an
 * lseek() + read() pair.
 */
#ifndef __LIBS_ZIPFILERO_H
#define __LIBS_ZIPFILERO_H

#include <utils/Compat.h>
#include <utils/Errors.h>
#include <utils/FileMap.h>
#include <utils/threads.h>

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>

namespace android {

/*
 * Trivial typedef to ensure that ZipEntryRO is not treated as a simple
 * integer.  We use NULL to indicate an invalid value.
 */
typedef void* ZipEntryRO;

/*
 * Open a Zip archive for reading.
 *
 * We want "open" and "find entry by name" to be fast operations, and we
 * want to use as little memory as possible.  We memory-map the file,
 * and load a hash table with pointers to the filenames (which aren't
 * null-terminated).  The other fields are at a fixed offset from the
 * filename, so we don't need to extract those (but we do need to byte-read
 * and endian-swap them every time we want them).
 *
 * To speed comparisons when doing a lookup by name, we could make the mapping
 * "private" (copy-on-write) and null-terminate the filenames after verifying
 * the record structure.  However, this requires a private mapping of
 * every page that the Central Directory touches.  Easier to tuck a copy
 * of the string length into the hash table entry.
 *
 * NOTE: If this is used on file descriptors inherited from a fork() operation,
 * you must be on a platform that implements pread() to guarantee correctness
 * on the shared file descriptors.
 */
class ZipFileRO {
public:
    ZipFileRO()
        : mFd(-1), mFileName(NULL), mFileLength(-1),
          mDirectoryMap(NULL),
          mNumEntries(-1), mDirectoryOffset(-1),
          mHashTableSize(-1), mHashTable(NULL)
        {}

    ~ZipFileRO();

    /*
     * Open an archive.
     */
    status_t open(const char* zipFileName);

    /*
     * Find an entry, by name.  Returns the entry identifier, or NULL if
     * not found.
     *
     * If two entries have the same name, one will be chosen at semi-random.
     */
    ZipEntryRO findEntryByName(const char* fileName) const;

    /*
     * Return the #of entries in the Zip archive.
     */
    int getNumEntries(void) const {
        return mNumEntries;
    }

    /*
     * Return the Nth entry.  Zip file entries are not stored in sorted
     * order, and updated entries may appear at the end, so anyone walking
     * the archive needs to avoid making ordering assumptions.  We take
     * that further by returning the Nth non-empty entry in the hash table
     * rather than the Nth entry in the archive.
     *
     * Valid values are [0..numEntries).
     *
     * [This is currently O(n).  If it needs to be fast we can allocate an
     * additional data structure or provide an iterator interface.]
     */
    ZipEntryRO findEntryByIndex(int idx) const;

    /*
     * Copy the filename into the supplied buffer.  Returns 0 on success,
     * -1 if "entry" is invalid, or the filename length if it didn't fit.  The
     * length, and the returned string, include the null-termination.
     */
    int getEntryFileName(ZipEntryRO entry, char* buffer, int bufLen) const;

    /*
     * Get the vital stats for an entry.  Pass in NULL pointers for anything
     * you don't need.
     *
     * "*pOffset" holds the Zip file offset of the entry's data.
     *
     * Returns "false" if "entry" is bogus or if the data in the Zip file
     * appears to be bad.
     */
    bool getEntryInfo(ZipEntryRO entry, int* pMethod, size_t* pUncompLen,
        size_t* pCompLen, off64_t* pOffset, long* pModWhen, long* pCrc32) const;

    /*
     * Create a new FileMap object that maps a subset of the archive.  For
     * an uncompressed entry this effectively provides a pointer to the
     * actual data, for a compressed entry this provides the input buffer
     * for inflate().
     */
    FileMap* createEntryFileMap(ZipEntryRO entry) const;

    /*
     * Uncompress the data into a buffer.  Depending on the compression
     * format, this is either an "inflate" operation or a memcpy.
     *
     * Use "uncompLen" from getEntryInfo() to determine the required
     * buffer size.
     *
     * Returns "true" on success.
     */
    bool uncompressEntry(ZipEntryRO entry, void* buffer) const;

    /*
     * Uncompress the data to an open file descriptor.
     */
    bool uncompressEntry(ZipEntryRO entry, int fd) const;

    /* Zip compression methods we support */
    enum {
        kCompressStored     = 0,        // no compression
        kCompressDeflated   = 8,        // standard deflate
    };

    /*
     * Utility function: uncompress deflated data, buffer to buffer.
     */
    static bool inflateBuffer(void* outBuf, const void* inBuf,
        size_t uncompLen, size_t compLen);

    /*
     * Utility function: uncompress deflated data, buffer to fd.
     */
    static bool inflateBuffer(int fd, const void* inBuf,
        size_t uncompLen, size_t compLen);

    /*
     * Utility function to convert ZIP's time format to a timespec struct.
     */
    static inline void zipTimeToTimespec(long when, struct tm* timespec) {
        const long date = when >> 16;
        timespec->tm_year = ((date >> 9) & 0x7F) + 80; // Zip is years since 1980
        timespec->tm_mon = (date >> 5) & 0x0F;
        timespec->tm_mday = date & 0x1F;

        timespec->tm_hour = (when >> 11) & 0x1F;
        timespec->tm_min = (when >> 5) & 0x3F;
        timespec->tm_sec = (when & 0x1F) << 1;
    }

    /*
     * Some basic functions for raw data manipulation.  "LE" means
     * Little Endian.
     */
    static inline unsigned short get2LE(const unsigned char* buf) {
        return buf[0] | (buf[1] << 8);
    }
    static inline unsigned long get4LE(const unsigned char* buf) {
        return buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
    }

private:
    /* these are private and not defined */ 
    ZipFileRO(const ZipFileRO& src);
    ZipFileRO& operator=(const ZipFileRO& src);

    /* locate and parse the central directory */
    bool mapCentralDirectory(void);

    /* parse the archive, prepping internal structures */
    bool parseZipArchive(void);

    /* add a new entry to the hash table */
    void addToHash(const char* str, int strLen, unsigned int hash);

    /* compute string hash code */
    static unsigned int computeHash(const char* str, int len);

    /* convert a ZipEntryRO back to a hash table index */
    int entryToIndex(const ZipEntryRO entry) const;

    /*
     * One entry in the hash table.
     */
    typedef struct HashEntry {
        const char*     name;
        unsigned short  nameLen;
        //unsigned int    hash;
    } HashEntry;

    /* open Zip archive */
    int         mFd;

    /* Lock for handling the file descriptor (seeks, etc) */
    mutable Mutex mFdLock;

    /* zip file name */
    char*       mFileName;

    /* length of file */
    size_t      mFileLength;

    /* mapped file */
    FileMap*    mDirectoryMap;

    /* number of entries in the Zip archive */
    int         mNumEntries;

    /* CD directory offset in the Zip archive */
    off64_t     mDirectoryOffset;

    /*
     * We know how many entries are in the Zip archive, so we have a
     * fixed-size hash table.  We probe for an empty slot.
     */
    int         mHashTableSize;
    HashEntry*  mHashTable;
};

}; // namespace android

#endif /*__LIBS_ZIPFILERO_H*/