summaryrefslogtreecommitdiffstats
path: root/luni/src/main/java/java/util/zip/GZIPInputStream.java
blob: 925e8c4d1a00a32c9f806857856ba70c2fbadf2f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package java.util.zip;

import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.nio.ByteOrder;
import java.util.Arrays;
import libcore.io.Memory;
import libcore.io.Streams;

/**
 * The {@code GZIPInputStream} class is used to read data stored in the GZIP
 * format, reading and decompressing GZIP data from the underlying stream into
 * its buffer.
 *
 * <h3>Example</h3>
 * <p>Using {@code GZIPInputStream} is easier than {@link ZipInputStream}
 * because GZIP is only for compression, and is not a container for multiple files.
 * This code decompresses the data from a GZIP stream, similar to the {@code gunzip(1)} utility.
 * <pre>
 * InputStream is = ...
 * GZIPInputStream zis = new GZIPInputStream(new BufferedInputStream(is));
 * try {
 *     // Reading from 'zis' gets you the uncompressed bytes...
 *     processStream(zis);
 * } finally {
 *     zis.close();
 * }
 * </pre>
 *
 * <p>Note that this class ignores all remaining data at the end of the last
 * GZIP member.
 */
public class GZIPInputStream extends InflaterInputStream {
    private static final int FCOMMENT = 16;

    private static final int FEXTRA = 4;

    private static final int FHCRC = 2;

    private static final int FNAME = 8;

    private static final int GZIP_TRAILER_SIZE = 8;

    /**
     * The magic header for the GZIP format.
     */
    public static final int GZIP_MAGIC = 0x8b1f;

    /**
     * The checksum algorithm used when handling uncompressed data.
     */
    protected CRC32 crc = new CRC32();

    /**
     * Indicates the end of the input stream.
     */
    protected boolean eos = false;

    /**
     * Construct a {@code GZIPInputStream} to read from GZIP data from the
     * underlying stream.
     *
     * @param is
     *            the {@code InputStream} to read data from.
     * @throws IOException
     *             if an {@code IOException} occurs.
     */
    public GZIPInputStream(InputStream is) throws IOException {
        this(is, BUF_SIZE);
    }

    /**
     * Construct a {@code GZIPInputStream} to read from GZIP data from the
     * underlying stream. Set the internal buffer size to {@code size}.
     *
     * @param is
     *            the {@code InputStream} to read data from.
     * @param size
     *            the internal read buffer size.
     * @throws IOException
     *             if an {@code IOException} occurs.
     */
    public GZIPInputStream(InputStream is, int size) throws IOException {
        super(is, new Inflater(true), size);

        try {
            byte[] header = readHeader(is);
            final short magic = Memory.peekShort(header, 0, ByteOrder.LITTLE_ENDIAN);
            if (magic != (short) GZIP_MAGIC) {
                throw new IOException(String.format("unknown format (magic number %x)", magic));
            }

            parseGzipHeader(is, header, crc, buf);
        } catch (IOException e) {
            close(); // release the inflater
            throw e;
        }
    }

    /**
     * Closes this stream and any underlying streams.
     */
    @Override
    public void close() throws IOException {
        eos = true;
        super.close();
    }

    @Override
    public int read(byte[] buffer, int byteOffset, int byteCount) throws IOException {
        if (closed) {
            throw new IOException("Stream is closed");
        }
        if (eos) {
            return -1;
        }
        Arrays.checkOffsetAndCount(buffer.length, byteOffset, byteCount);

        int bytesRead;
        try {
            bytesRead = super.read(buffer, byteOffset, byteCount);
        } finally {
            eos = eof; // update eos after every read(), even when it throws
        }

        if (bytesRead != -1) {
            crc.update(buffer, byteOffset, bytesRead);
        }

        if (eos) {
            verifyCrc();
            eos = maybeReadNextMember();
            if (!eos) {
                crc.reset();
                inf.reset();
                eof = false;
                len = 0;
            }
        }

        return bytesRead;
    }

    private boolean maybeReadNextMember() throws IOException {
        // If we have any unconsumed data in the inflater buffer, we have to
        // scan that first. The fact that we've reached here implies we've
        // successfully consumed the GZIP trailer.
        final int remaining = inf.getRemaining() - GZIP_TRAILER_SIZE;
        if (remaining > 0) {
            // NOTE: We make sure we create a pushback stream exactly once,
            // even if the input stream contains multiple members.
            //
            // The push back stream we create must therefore be able to contain
            // (worst case) the entire buffer even though there may be fewer bytes
            // remaining when it is first created.
            if (!(in instanceof PushbackInputStream)) {
                in = new PushbackInputStream(in, buf.length);
            }
            ((PushbackInputStream) in).unread(buf,
                    inf.getCurrentOffset() + GZIP_TRAILER_SIZE, remaining);
        }

        final byte[] buffer;
        try {
            buffer = readHeader(in);
        } catch (EOFException eof) {
            // We've reached the end of the stream and there are no more members
            // to read. Note that we might also hit this if there are fewer than
            // GZIP_HEADER_LENGTH bytes at the end of a member. We don't care
            // because we're specified to ignore all data at the end of the last
            // gzip record.
            return true;
        }

        final short magic = Memory.peekShort(buffer, 0, ByteOrder.LITTLE_ENDIAN);
        if (magic != (short) GZIP_MAGIC) {
            // Don't throw here because we've already read one valid member
            // from this stream.
            return true;
        }

        // We've encountered the gzip magic number, so we assume there's another
        // member in the stream.
        parseGzipHeader(in, buffer, crc, buf);
        return false;
    }

    private static byte[] readHeader(InputStream in) throws IOException {
        byte[] header = new byte[10];
        Streams.readFully(in, header, 0, header.length);
        return header;
    }

    private static void parseGzipHeader(InputStream in, byte[] header,
            CRC32 crc, byte[] scratch) throws IOException {
        final byte flags = header[3];
        final boolean hcrc = (flags & FHCRC) != 0;
        if (hcrc) {
            crc.update(header, 0, header.length);
        }
        if ((flags & FEXTRA) != 0) {
            Streams.readFully(in, header, 0, 2);
            if (hcrc) {
                crc.update(header, 0, 2);
            }
            int length = Memory.peekShort(header, 0, ByteOrder.LITTLE_ENDIAN) & 0xffff;
            while (length > 0) {
                int max = length > scratch.length ? scratch.length : length;
                int result = in.read(scratch, 0, max);
                if (result == -1) {
                    throw new EOFException();
                }
                if (hcrc) {
                    crc.update(scratch, 0, result);
                }
                length -= result;
            }
        }
        if ((flags & FNAME) != 0) {
            readZeroTerminated(in, crc, hcrc);
        }
        if ((flags & FCOMMENT) != 0) {
            readZeroTerminated(in, crc, hcrc);
        }
        if (hcrc) {
            Streams.readFully(in, header, 0, 2);
            short crc16 = Memory.peekShort(header, 0, ByteOrder.LITTLE_ENDIAN);
            if ((short) crc.getValue() != crc16) {
                throw new IOException("CRC mismatch");
            }
            crc.reset();
        }
    }

    private void verifyCrc() throws IOException {
        // Get non-compressed bytes read by fill
        int size = inf.getRemaining();
        final int trailerSize = 8; // crc (4 bytes) + total out (4 bytes)
        byte[] b = new byte[trailerSize];
        int copySize = (size > trailerSize) ? trailerSize : size;

        System.arraycopy(buf, len - size, b, 0, copySize);
        Streams.readFully(in, b, copySize, trailerSize - copySize);

        if (Memory.peekInt(b, 0, ByteOrder.LITTLE_ENDIAN) != (int) crc.getValue()) {
            throw new IOException("CRC mismatch");
        }
        if (Memory.peekInt(b, 4, ByteOrder.LITTLE_ENDIAN) != inf.getTotalOut()) {
            throw new IOException("Size mismatch");
        }
    }

    private static void readZeroTerminated(InputStream in, CRC32 crc, boolean hcrc)
            throws IOException {
        int result;
        // TODO: Fix these single byte reads. This method is used to consume the
        // header FNAME & FCOMMENT which aren't widely used in gzip files.
        while ((result = in.read()) > 0) {
            if (hcrc) {
                crc.update(result);
            }
        }
        if (result == -1) {
            throw new EOFException();
        }
        // Add the zero
        if (hcrc) {
            crc.update(result);
        }
    }
}