diff options
author | Neil Fuller <nfuller@google.com> | 2014-03-12 10:16:53 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2014-03-12 10:16:54 +0000 |
commit | 3537f5811df63fa05faeca2bb85364dd03322fca (patch) | |
tree | 06f38df62d87813b47633e105fdc28247e355615 /luni | |
parent | 5a692fa8cdeec7eee42482f78187bb76be5a87db (diff) | |
parent | e3d756c5dae1af2aa5f0ad8bc7f133df3e7401eb (diff) | |
download | libcore-3537f5811df63fa05faeca2bb85364dd03322fca.zip libcore-3537f5811df63fa05faeca2bb85364dd03322fca.tar.gz libcore-3537f5811df63fa05faeca2bb85364dd03322fca.tar.bz2 |
Merge "Add Java 1.7 APIs: ZipFile, ZipInputStream, ZipOutputStream"
Diffstat (limited to 'luni')
7 files changed, 478 insertions, 69 deletions
diff --git a/luni/src/main/java/java/util/zip/ZipEntry.java b/luni/src/main/java/java/util/zip/ZipEntry.java index 69f027a..217cc3c 100644 --- a/luni/src/main/java/java/util/zip/ZipEntry.java +++ b/luni/src/main/java/java/util/zip/ZipEntry.java @@ -20,6 +20,7 @@ package java.util.zip; import java.io.IOException; import java.io.InputStream; import java.nio.ByteOrder; +import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Calendar; @@ -94,9 +95,7 @@ public class ZipEntry implements ZipConstants, Cloneable { if (name == null) { throw new NullPointerException("name == null"); } - if (name.length() > 0xFFFF) { - throw new IllegalArgumentException("Name too long: " + name.length()); - } + validateStringLength("Name", name); this.name = name; } @@ -203,11 +202,8 @@ public class ZipEntry implements ZipConstants, Cloneable { this.comment = null; return; } + validateStringLength("Comment", comment); - byte[] commentBytes = comment.getBytes(StandardCharsets.UTF_8); - if (commentBytes.length > 0xffff) { - throw new IllegalArgumentException("Comment too long: " + commentBytes.length); - } this.comment = comment; } @@ -375,12 +371,14 @@ public class ZipEntry implements ZipConstants, Cloneable { /* * Internal constructor. Creates a new ZipEntry by reading the * Central Directory Entry (CDE) from "in", which must be positioned - * at the CDE signature. + * at the CDE signature. If the GPBF_UTF8_FLAG is set in the CDE then + * UTF-8 is used to decode the string information, otherwise the + * defaultCharset is used. * * On exit, "in" will be positioned at the start of the next entry * in the Central Directory. */ - ZipEntry(byte[] cdeHdrBuf, InputStream cdStream) throws IOException { + ZipEntry(byte[] cdeHdrBuf, InputStream cdStream, Charset defaultCharset) throws IOException { Streams.readFully(cdStream, cdeHdrBuf, 0, cdeHdrBuf.length); BufferIterator it = HeapBufferIterator.iterator(cdeHdrBuf, 0, cdeHdrBuf.length, @@ -398,6 +396,13 @@ public class ZipEntry implements ZipConstants, Cloneable { throw new ZipException("Invalid General Purpose Bit Flag: " + gpbf); } + // If the GPBF_UTF8_FLAG is set then the character encoding is UTF-8 whatever the default + // provided. + Charset charset = defaultCharset; + if ((gpbf & ZipFile.GPBF_UTF8_FLAG) != 0) { + charset = StandardCharsets.UTF_8; + } + compressionMethod = it.readShort() & 0xffff; time = it.readShort() & 0xffff; modDate = it.readShort() & 0xffff; @@ -420,19 +425,17 @@ public class ZipEntry implements ZipConstants, Cloneable { if (containsNulByte(nameBytes)) { throw new ZipException("Filename contains NUL byte: " + Arrays.toString(nameBytes)); } - name = new String(nameBytes, 0, nameBytes.length, StandardCharsets.UTF_8); + name = new String(nameBytes, 0, nameBytes.length, charset); if (extraLength > 0) { extra = new byte[extraLength]; Streams.readFully(cdStream, extra, 0, extraLength); } - // The RI has always assumed UTF-8. (If GPBF_UTF8_FLAG isn't set, the encoding is - // actually IBM-437.) if (commentByteCount > 0) { byte[] commentBytes = new byte[commentByteCount]; Streams.readFully(cdStream, commentBytes, 0, commentByteCount); - comment = new String(commentBytes, 0, commentBytes.length, StandardCharsets.UTF_8); + comment = new String(commentBytes, 0, commentBytes.length, charset); } } @@ -444,4 +447,14 @@ public class ZipEntry implements ZipConstants, Cloneable { } return false; } + + private static void validateStringLength(String argument, String string) { + // This check is not perfect: the character encoding is determined when the entry is + // written out. UTF-8 is probably a worst-case: most alternatives should be single byte per + // character. + byte[] bytes = string.getBytes(StandardCharsets.UTF_8); + if (bytes.length > 0xffff) { + throw new IllegalArgumentException(argument + " too long: " + bytes.length); + } + } } diff --git a/luni/src/main/java/java/util/zip/ZipFile.java b/luni/src/main/java/java/util/zip/ZipFile.java index 4380281..4b3e431 100644 --- a/luni/src/main/java/java/util/zip/ZipFile.java +++ b/luni/src/main/java/java/util/zip/ZipFile.java @@ -26,6 +26,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.RandomAccessFile; import java.nio.ByteOrder; +import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.util.Enumeration; import java.util.Iterator; @@ -96,6 +97,8 @@ public class ZipFile implements Closeable, ZipConstants { private final String filename; + private final Charset charset; + private File fileToDeleteOnClose; private RandomAccessFile raf; @@ -108,33 +111,81 @@ public class ZipFile implements Closeable, ZipConstants { /** * Constructs a new {@code ZipFile} allowing read access to the contents of the given file. + * + * <p>UTF-8 is used to decode all comments and entry names in the file. + * * @throws ZipException if a zip error occurs. * @throws IOException if an {@code IOException} occurs. */ public ZipFile(File file) throws ZipException, IOException { - this(file, OPEN_READ); + this(file, OPEN_READ, StandardCharsets.UTF_8); } /** * Constructs a new {@code ZipFile} allowing read access to the contents of the given file. + * + * <p>The {@code charset} is used to decode the file comment if one exists. If the character + * encoding for entry names and comments is not explicitly marked as UTF-8 by the zip file + * then {@code charset} is used to decode them. + * + * @throws ZipException if a zip error occurs. + * @throws IOException if an {@code IOException} occurs. + * @since 1.7 + * @hide Until ready for an API update + */ + public ZipFile(File file, Charset charset) throws ZipException, IOException { + this(file, OPEN_READ, charset); + } + + /** + * Constructs a new {@code ZipFile} allowing read access to the contents of the given file. + * + * <p>UTF-8 is used to decode all comments and entry names in the file. + * * @throws IOException if an IOException occurs. */ public ZipFile(String name) throws IOException { - this(new File(name), OPEN_READ); + this(new File(name), OPEN_READ, StandardCharsets.UTF_8); } /** * Constructs a new {@code ZipFile} allowing access to the given file. - * The {@code mode} must be either {@code OPEN_READ} or {@code OPEN_READ|OPEN_DELETE}. * - * <p>If the {@code OPEN_DELETE} flag is supplied, the file will be deleted at or before the + * <p>UTF-8 is used to decode all comments and entry names in the file. + * + * <p>The {@code mode} must be either {@code OPEN_READ} or {@code OPEN_READ|OPEN_DELETE}. + * If the {@code OPEN_DELETE} flag is supplied, the file will be deleted at or before the * time that the {@code ZipFile} is closed (the contents will remain accessible until * this {@code ZipFile} is closed); it also calls {@code File.deleteOnExit}. * * @throws IOException if an {@code IOException} occurs. */ public ZipFile(File file, int mode) throws IOException { + this(file, mode, StandardCharsets.UTF_8); + } + + /** + * Constructs a new {@code ZipFile} allowing access to the given file. + * + * <p>The {@code mode} must be either {@code OPEN_READ} or {@code OPEN_READ|OPEN_DELETE}. + * If the {@code OPEN_DELETE} flag is supplied, the file will be deleted at or before the + * time that the {@code ZipFile} is closed (the contents will remain accessible until + * this {@code ZipFile} is closed); it also calls {@code File.deleteOnExit}. + * + * <p>The {@code charset} is used to decode the file comment if one exists. If the character + * encoding for entry names and comments is not explicitly marked as UTF-8 by the zip file + * then {@code charset} is used to decode them. + * + * @throws IOException if an {@code IOException} occurs. + * @since 1.7 + * @hide Until ready for an API update + */ + public ZipFile(File file, int mode, Charset charset) throws IOException { filename = file.getPath(); + if (charset == null) { + throw new NullPointerException("charset == null"); + } + this.charset = charset; if (mode != OPEN_READ && mode != (OPEN_READ | OPEN_DELETE)) { throw new IllegalArgumentException("Bad mode: " + mode); } @@ -403,7 +454,7 @@ public class ZipFile implements Closeable, ZipConstants { if (commentLength > 0) { byte[] commentBytes = new byte[commentLength]; raf.readFully(commentBytes); - comment = new String(commentBytes, 0, commentBytes.length, StandardCharsets.UTF_8); + comment = new String(commentBytes, 0, commentBytes.length, charset); } // Seek to the first CDE and read all entries. @@ -414,7 +465,7 @@ public class ZipFile implements Closeable, ZipConstants { BufferedInputStream bufferedStream = new BufferedInputStream(rafStream, 4096); byte[] hdrBuf = new byte[CENHDR]; // Reuse the same buffer for each entry. for (int i = 0; i < numEntries; ++i) { - ZipEntry newEntry = new ZipEntry(hdrBuf, bufferedStream); + ZipEntry newEntry = new ZipEntry(hdrBuf, bufferedStream, charset); if (newEntry.localHeaderRelOffset >= centralDirOffset) { throw new ZipException("Local file header offset is after central directory"); } diff --git a/luni/src/main/java/java/util/zip/ZipInputStream.java b/luni/src/main/java/java/util/zip/ZipInputStream.java index 17f3938..5a73619 100644 --- a/luni/src/main/java/java/util/zip/ZipInputStream.java +++ b/luni/src/main/java/java/util/zip/ZipInputStream.java @@ -21,7 +21,9 @@ import java.io.IOException; import java.io.InputStream; import java.io.PushbackInputStream; import java.nio.ByteOrder; +import java.nio.charset.Charset; import java.nio.charset.ModifiedUtf8; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import libcore.io.Memory; import libcore.io.Streams; @@ -84,18 +86,39 @@ public class ZipInputStream extends InflaterInputStream implements ZipConstants private final CRC32 crc = new CRC32(); - private byte[] nameBuf = new byte[256]; + private byte[] stringBytesBuf = new byte[256]; - private char[] charBuf = new char[256]; + private char[] stringCharBuf = new char[256]; + + private final Charset charset; /** * Constructs a new {@code ZipInputStream} to read zip entries from the given input stream. + * + * <p>UTF-8 is used to decode all strings in the file. */ public ZipInputStream(InputStream stream) { + this(stream, StandardCharsets.UTF_8); + } + + /** + * Constructs a new {@code ZipInputStream} to read zip entries from the given input stream. + * + * <p>If the character encoding for entry names and comments is not explicitly marked as UTF-8 + * by the zip file then {@code charset} is used to decode them. + * + * @since 1.7 + * @hide Until ready for an API update + */ + public ZipInputStream(InputStream stream, Charset charset) { super(new PushbackInputStream(stream, BUF_SIZE), new Inflater(true)); if (stream == null) { throw new NullPointerException("stream == null"); } + if (charset == null) { + throw new NullPointerException("charset == null"); + } + this.charset = charset; } /** @@ -249,14 +272,13 @@ public class ZipInputStream extends InflaterInputStream implements ZipConstants } int extraLength = peekShort(LOCEXT - LOCVER); - if (nameLength > nameBuf.length) { - nameBuf = new byte[nameLength]; - // The bytes are modified UTF-8, so the number of chars will always be less than or - // equal to the number of bytes. It's fine if this buffer is too long. - charBuf = new char[nameLength]; + // Determine the character set to use to decode strings. + Charset charset = this.charset; + if ((flags & ZipFile.GPBF_UTF8_FLAG) != 0) { + charset = StandardCharsets.UTF_8; } - Streams.readFully(in, nameBuf, 0, nameLength); - currentEntry = createZipEntry(ModifiedUtf8.decode(nameBuf, charBuf, 0, nameLength)); + String name = readString(nameLength, charset); + currentEntry = createZipEntry(name); currentEntry.time = ceLastModifiedTime; currentEntry.modDate = ceLastModifiedDate; currentEntry.setMethod(ceCompressionMethod); @@ -273,6 +295,26 @@ public class ZipInputStream extends InflaterInputStream implements ZipConstants return currentEntry; } + /** + * Reads bytes from the current stream position returning the string representation. + */ + private String readString(int byteLength, Charset charset) throws IOException { + if (byteLength > stringBytesBuf.length) { + stringBytesBuf = new byte[byteLength]; + } + Streams.readFully(in, stringBytesBuf, 0, byteLength); + if (charset == StandardCharsets.UTF_8) { + // The number of chars will always be less than or equal to the number of bytes. It's + // fine if this buffer is too long. + if (byteLength > stringCharBuf.length) { + stringCharBuf = new char[byteLength]; + } + return ModifiedUtf8.decode(stringBytesBuf, stringCharBuf, 0, byteLength); + } else { + return new String(stringBytesBuf, 0, byteLength, charset); + } + } + private int peekShort(int offset) { return Memory.peekShort(hdrBuf, offset, ByteOrder.LITTLE_ENDIAN) & 0xffff; } diff --git a/luni/src/main/java/java/util/zip/ZipOutputStream.java b/luni/src/main/java/java/util/zip/ZipOutputStream.java index c4d7560..ac6bdf2 100644 --- a/luni/src/main/java/java/util/zip/ZipOutputStream.java +++ b/luni/src/main/java/java/util/zip/ZipOutputStream.java @@ -20,6 +20,7 @@ package java.util.zip; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; +import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.HashSet; @@ -75,6 +76,8 @@ public class ZipOutputStream extends DeflaterOutputStream implements ZipConstant private final HashSet<String> entries = new HashSet<String>(); + private final Charset charset; + private int defaultCompressionMethod = DEFLATED; private int compressionLevel = Deflater.DEFAULT_COMPRESSION; @@ -85,16 +88,40 @@ public class ZipOutputStream extends DeflaterOutputStream implements ZipConstant private final CRC32 crc = new CRC32(); - private int offset = 0, curOffset = 0, nameLength; + private int offset = 0, curOffset = 0; + /** The charset-encoded name for the current entry. */ private byte[] nameBytes; + /** The charset-encoded comment for the current entry. */ + private byte[] entryCommentBytes; + /** - * Constructs a new {@code ZipOutputStream} that writes a zip file - * to the given {@code OutputStream}. + * Constructs a new {@code ZipOutputStream} that writes a zip file to the given + * {@code OutputStream}. + * + * <p>UTF-8 will be used to encode the file comment, entry names and comments. */ public ZipOutputStream(OutputStream os) { + this(os, StandardCharsets.UTF_8); + } + + /** + * Constructs a new {@code ZipOutputStream} that writes a zip file to the given + * {@code OutputStream}. + * + * <p>The specified character set will be used to encode the file comment, entry names and + * comments. + * + * @since 1.7 + * @hide Until ready for an API update + */ + public ZipOutputStream(OutputStream os, Charset charset) { super(os, new Deflater(Deflater.DEFAULT_COMPRESSION, true)); + if (charset == null) { + throw new NullPointerException("charset == null"); + } + this.charset = charset; } /** @@ -153,10 +180,12 @@ public class ZipOutputStream extends DeflaterOutputStream implements ZipConstant // Update the CentralDirectory // http://www.pkware.com/documents/casestudies/APPNOTE.TXT int flags = currentEntry.getMethod() == STORED ? 0 : ZipFile.GPBF_DATA_DESCRIPTOR_FLAG; - // Since gingerbread, we always set the UTF-8 flag on individual files. - // Some tools insist that the central directory also have the UTF-8 flag. + // Since gingerbread, we always set the UTF-8 flag on individual files if appropriate. + // Some tools insist that the central directory have the UTF-8 flag. // http://code.google.com/p/android/issues/detail?id=20214 - flags |= ZipFile.GPBF_UTF8_FLAG; + if (charset == StandardCharsets.UTF_8) { + flags |= ZipFile.GPBF_UTF8_FLAG; + } writeLong(cDir, CENSIG); writeShort(cDir, ZIP_VERSION_2_0); // Version this file was made by. writeShort(cDir, ZIP_VERSION_2_0); // Minimum version needed to extract. @@ -172,19 +201,14 @@ public class ZipOutputStream extends DeflaterOutputStream implements ZipConstant curOffset += writeLong(cDir, crc.tbytes); writeLong(cDir, crc.tbytes); } - curOffset += writeShort(cDir, nameLength); + curOffset += writeShort(cDir, nameBytes.length); if (currentEntry.extra != null) { curOffset += writeShort(cDir, currentEntry.extra.length); } else { writeShort(cDir, 0); } - String comment = currentEntry.getComment(); - byte[] commentBytes = EmptyArray.BYTE; - if (comment != null) { - commentBytes = comment.getBytes(StandardCharsets.UTF_8); - } - writeShort(cDir, commentBytes.length); // Comment length. + writeShort(cDir, entryCommentBytes.length); // Comment length. writeShort(cDir, 0); // Disk Start writeShort(cDir, 0); // Internal File Attributes writeLong(cDir, 0); // External File Attributes @@ -195,8 +219,9 @@ public class ZipOutputStream extends DeflaterOutputStream implements ZipConstant cDir.write(currentEntry.extra); } offset += curOffset; - if (commentBytes.length > 0) { - cDir.write(commentBytes); + if (entryCommentBytes.length > 0) { + cDir.write(entryCommentBytes); + entryCommentBytes = EmptyArray.BYTE; } currentEntry = null; crc.reset(); @@ -294,10 +319,14 @@ public class ZipOutputStream extends DeflaterOutputStream implements ZipConstant // TODO: support Zip64. throw new ZipException("Too many entries for the zip file format's 16-bit entry count"); } - nameBytes = ze.name.getBytes(StandardCharsets.UTF_8); - nameLength = nameBytes.length; - if (nameLength > 0xffff) { - throw new IllegalArgumentException("Name too long: " + nameLength + " UTF-8 bytes"); + nameBytes = ze.name.getBytes(charset); + checkSizeIsWithinShort("Name", nameBytes); + entryCommentBytes = EmptyArray.BYTE; + if (ze.comment != null) { + entryCommentBytes = ze.comment.getBytes(charset); + // The comment is not written out until the entry is finished, but it is validated here + // to fail-fast. + checkSizeIsWithinShort("Comment", entryCommentBytes); } def.setLevel(compressionLevel); @@ -310,8 +339,10 @@ public class ZipOutputStream extends DeflaterOutputStream implements ZipConstant // http://www.pkware.com/documents/casestudies/APPNOTE.TXT int flags = (method == STORED) ? 0 : ZipFile.GPBF_DATA_DESCRIPTOR_FLAG; // Java always outputs UTF-8 filenames. (Before Java 7, the RI didn't set this flag and used - // modified UTF-8. From Java 7, it sets this flag and uses normal UTF-8.) - flags |= ZipFile.GPBF_UTF8_FLAG; + // modified UTF-8. From Java 7, when using UTF_8 it sets this flag and uses normal UTF-8.) + if (charset == StandardCharsets.UTF_8) { + flags |= ZipFile.GPBF_UTF8_FLAG; + } writeLong(out, LOCSIG); // Entry header writeShort(out, ZIP_VERSION_2_0); // Minimum version needed to extract. writeShort(out, flags); @@ -331,7 +362,7 @@ public class ZipOutputStream extends DeflaterOutputStream implements ZipConstant writeLong(out, 0); writeLong(out, 0); } - writeShort(out, nameLength); + writeShort(out, nameBytes.length); if (currentEntry.extra != null) { writeShort(out, currentEntry.extra.length); } else { @@ -345,18 +376,16 @@ public class ZipOutputStream extends DeflaterOutputStream implements ZipConstant /** * Sets the comment associated with the file being written. See {@link ZipFile#getComment}. - * @throws IllegalArgumentException if the comment is >= 64 Ki UTF-8 bytes. + * @throws IllegalArgumentException if the comment is >= 64 Ki encoded bytes. */ public void setComment(String comment) { if (comment == null) { - this.commentBytes = null; + this.commentBytes = EmptyArray.BYTE; return; } - byte[] newCommentBytes = comment.getBytes(StandardCharsets.UTF_8); - if (newCommentBytes.length > 0xffff) { - throw new IllegalArgumentException("Comment too long: " + newCommentBytes.length + " bytes"); - } + byte[] newCommentBytes = comment.getBytes(charset); + checkSizeIsWithinShort("Comment", newCommentBytes); this.commentBytes = newCommentBytes; } @@ -423,4 +452,11 @@ public class ZipOutputStream extends DeflaterOutputStream implements ZipConstant throw new IOException("Stream is closed"); } } + + private void checkSizeIsWithinShort(String property, byte[] bytes) { + if (bytes.length > 0xffff) { + throw new IllegalArgumentException( + property + " too long in " + charset + ":" + bytes.length + " bytes"); + } + } } diff --git a/luni/src/test/java/libcore/java/util/zip/ZipFileTest.java b/luni/src/test/java/libcore/java/util/zip/ZipFileTest.java index 8afc223..81ff673 100644 --- a/luni/src/test/java/libcore/java/util/zip/ZipFileTest.java +++ b/luni/src/test/java/libcore/java/util/zip/ZipFileTest.java @@ -23,8 +23,15 @@ import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.io.OutputStream; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; import java.util.Enumeration; +import java.util.HashSet; +import java.util.List; import java.util.Random; +import java.util.Set; import java.util.zip.CRC32; import java.util.zip.ZipEntry; import java.util.zip.ZipException; @@ -161,7 +168,7 @@ public final class ZipFileTest extends TestCase { } assertEquals(expectedLength, count); - + zip.close(); } public void testInflatingStreamsRequiringZipRefill() throws IOException { @@ -210,6 +217,96 @@ public final class ZipFileTest extends TestCase { } } + public void testNullCharset() throws IOException { + try { + new ZipFile(createTemporaryZipFile(), null); + fail(); + } catch (NullPointerException expected) { + } + } + + // Tests that non-UTF8 encoded zip files can be interpreted. Relies on ZipOutputStream. + public void testNonUtf8Encoding() throws IOException { + Charset charset = Charset.forName("Cp437"); + String encodingDependentString = "\u00FB"; + assertEncodingDiffers(encodingDependentString, charset, StandardCharsets.US_ASCII, + StandardCharsets.UTF_8); + String name = "name" + encodingDependentString; + String comment = "comment" + encodingDependentString; + + File result = createTemporaryZipFile(); + OutputStream os = new BufferedOutputStream(new FileOutputStream(result)); + ZipOutputStream out = new ZipOutputStream(os, charset); + out.setComment(comment); + ZipEntry writeEntry = new ZipEntry(name); + writeEntry.setComment(comment); + out.putNextEntry(writeEntry); + out.write("FileContentsIrrelevant".getBytes()); + out.closeEntry(); + out.close(); + + ZipFile zipFile = new ZipFile(result, StandardCharsets.US_ASCII); + assertNull(zipFile.getEntry(name)); + assertFalse(zipFile.getComment().equals(comment)); + zipFile.close(); + + zipFile = new ZipFile(result, charset); + ZipEntry readEntry = zipFile.getEntry(name); + assertNotNull(readEntry); + assertEquals(name, readEntry.getName()); + assertEquals(comment, readEntry.getComment()); + assertEquals(comment, zipFile.getComment()); + zipFile.close(); + } + + // Tests that UTF8 encoded zip files can be interpreted when the constructor is provided with a + // non-UTF-8 encoding. Relies on ZipOutputStream. + public void testUtf8EncodingOverridesConstructor() throws IOException { + Charset charset = Charset.forName("Cp437"); + String encodingDependentString = "\u00FB"; + assertEncodingDiffers(encodingDependentString, charset, StandardCharsets.UTF_8); + String name = "name" + encodingDependentString; + String comment = "comment" + encodingDependentString; + + File result = createTemporaryZipFile(); + OutputStream os = new BufferedOutputStream(new FileOutputStream(result)); + ZipOutputStream out = new ZipOutputStream(os, StandardCharsets.UTF_8); + // The file comment does not get meta-data about the character encoding. + out.setComment(comment); + // The entry will be tagged as being UTF-8 encoded. + ZipEntry writeEntry = new ZipEntry(name); + writeEntry.setComment(comment); + out.putNextEntry(writeEntry); + out.write("FileContentsIrrelevant".getBytes()); + out.closeEntry(); + out.close(); + + ZipFile zipFile = new ZipFile(result, charset); + // The entry should be found, because it should be tagged as being UTF-8 encoded. + ZipEntry readEntry = zipFile.getEntry(name); + assertNotNull(readEntry); + assertEquals(name, readEntry.getName()); + assertEquals(comment, readEntry.getComment()); + // We expect the comment to be mangled because it is not tagged. + assertFalse(zipFile.getComment().equals(comment)); + zipFile.close(); + } + + /** + * Asserts the byte encoding for the string is different for all the supplied character + * sets. + */ + private void assertEncodingDiffers(String string, Charset... charsets) { + Set<List<Byte>> encodings = new HashSet<List<Byte>>(); + for (int i = 0; i < charsets.length; i++) { + List<Byte> byteList = new ArrayList<Byte>(); + for (byte b : string.getBytes(charsets[i])) { + byteList.add(b); + } + assertTrue("Encoding has been seen before", encodings.add(byteList)); + } + } + /** * Compresses the given number of files, each of the given size, into a .zip archive. */ @@ -219,21 +316,23 @@ public final class ZipFileTest extends TestCase { byte[] writeBuffer = new byte[8192]; Random random = new Random(); - ZipOutputStream out = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(result))); - for (int entry = 0; entry < entryCount; ++entry) { - ZipEntry ze = new ZipEntry(Integer.toHexString(entry)); - out.putNextEntry(ze); + ZipOutputStream out = createZipOutputStream(result); + try { + for (int entry = 0; entry < entryCount; ++entry) { + ZipEntry ze = new ZipEntry(Integer.toHexString(entry)); + out.putNextEntry(ze); + + for (int i = 0; i < entrySize; i += writeBuffer.length) { + random.nextBytes(writeBuffer); + int byteCount = Math.min(writeBuffer.length, entrySize - i); + out.write(writeBuffer, 0, byteCount); + } - for (int i = 0; i < entrySize; i += writeBuffer.length) { - random.nextBytes(writeBuffer); - int byteCount = Math.min(writeBuffer.length, entrySize - i); - out.write(writeBuffer, 0, byteCount); + out.closeEntry(); } - - out.closeEntry(); + } finally { + out.close(); } - - out.close(); return result; } diff --git a/luni/src/test/java/libcore/java/util/zip/ZipInputStreamTest.java b/luni/src/test/java/libcore/java/util/zip/ZipInputStreamTest.java index 3d6e600..2daa127 100644 --- a/luni/src/test/java/libcore/java/util/zip/ZipInputStreamTest.java +++ b/luni/src/test/java/libcore/java/util/zip/ZipInputStreamTest.java @@ -20,10 +20,18 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; import java.util.Arrays; +import java.util.HashSet; +import java.util.List; import java.util.Random; +import java.util.Set; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; +import java.util.zip.ZipOutputStream; + import junit.framework.TestCase; import tests.support.resource.Support_Resources; @@ -42,6 +50,97 @@ public final class ZipInputStreamTest extends TestCase { assertTrue(Arrays.equals(data, unzip("r", ZipOutputStreamTest.zip("r", data)))); } + public void testNullCharset() throws IOException { + try { + new ZipInputStream(new ByteArrayInputStream(new byte[1]), null); + fail(); + } catch (NullPointerException expected) { + } + } + + // Tests that non-UTF8 encoded zip file entries can be interpreted. Relies on ZipOutputStream. + public void testNonUtf8Encoding() throws IOException { + Charset charset = Charset.forName("Cp437"); + String encodingDependentString = "\u00FB"; + assertEncodingDiffers(encodingDependentString, charset, StandardCharsets.US_ASCII, + StandardCharsets.UTF_8); + String name = "name" + encodingDependentString; + String comment = "comment" + encodingDependentString; + + ByteArrayOutputStream bytesOutputStream = new ByteArrayOutputStream(); + ZipOutputStream out = new ZipOutputStream(bytesOutputStream, charset); + ZipEntry writeEntry = new ZipEntry(name); + writeEntry.setComment(comment); + out.putNextEntry(writeEntry); + out.write("FileContentsIrrelevant".getBytes()); + out.closeEntry(); + out.close(); + + ByteArrayInputStream bytesInputStream = + new ByteArrayInputStream(bytesOutputStream.toByteArray()); + ZipInputStream in = new ZipInputStream(bytesInputStream, StandardCharsets.US_ASCII); + ZipEntry readEntry = in.getNextEntry(); + // Due to the way ZipInputStream works it never returns entry comments. + assertNull("ZipInputStream must not retrieve comments", readEntry.getComment()); + assertFalse(readEntry.getName().equals(name)); + in.close(); + + bytesInputStream = new ByteArrayInputStream(bytesOutputStream.toByteArray()); + in = new ZipInputStream(bytesInputStream, charset); + readEntry = in.getNextEntry(); + // Because ZipInputStream never reads the central directory it never returns entry + // comments or the file comment. + assertNull("ZipInputStream must not retrieve comments", readEntry.getComment()); + assertEquals(name, readEntry.getName()); + in.close(); + } + + // Tests that UTF8 encoded zip file entries can be interpreted when the constructor is provided + // with a non-UTF-8 encoding. Relies on ZipOutputStream. + public void testUtf8EncodingOverridesConstructor() throws IOException { + Charset charset = Charset.forName("Cp437"); + String encodingDependentString = "\u00FB"; + assertEncodingDiffers(encodingDependentString, charset, StandardCharsets.UTF_8); + String name = "name" + encodingDependentString; + String comment = "comment" + encodingDependentString; + + ByteArrayOutputStream bytesOutputStream = new ByteArrayOutputStream(); + ZipOutputStream out = new ZipOutputStream(bytesOutputStream, StandardCharsets.UTF_8); + // The entry will be tagged as being UTF-8 encoded. + ZipEntry writeEntry = new ZipEntry(name); + writeEntry.setComment(comment); + out.putNextEntry(writeEntry); + out.write("FileContentsIrrelevant".getBytes()); + out.closeEntry(); + out.close(); + + ByteArrayInputStream bytesInputStream = + new ByteArrayInputStream(bytesOutputStream.toByteArray()); + ZipInputStream in = new ZipInputStream(bytesInputStream, charset); + ZipEntry readEntry = in.getNextEntry(); + // Because ZipInputStream never reads the central directory it never returns entry + // comments or the file comment. + assertNull("ZipInputStream must not retrieve comments", readEntry.getComment()); + assertNotNull(readEntry); + assertEquals(name, readEntry.getName()); + in.close(); + } + + /** + * Asserts the byte encoding for the string is different for all the supplied character + * sets. + */ + private void assertEncodingDiffers(String string, Charset... charsets) { + Set<List<Byte>> encodings = new HashSet<List<Byte>>(); + for (int i = 0; i < charsets.length; i++) { + List<Byte> byteList = new ArrayList<Byte>(); + for (byte b : string.getBytes(charsets[i])) { + byteList.add(b); + } + assertTrue("Encoding has been seen before", encodings.add(byteList)); + } + } + public static byte[] unzip(String name, byte[] bytes) throws IOException { ZipInputStream in = new ZipInputStream(new ByteArrayInputStream(bytes)); ByteArrayOutputStream out = new ByteArrayOutputStream(); diff --git a/luni/src/test/java/libcore/java/util/zip/ZipOutputStreamTest.java b/luni/src/test/java/libcore/java/util/zip/ZipOutputStreamTest.java index dc80512..92afffa 100644 --- a/luni/src/test/java/libcore/java/util/zip/ZipOutputStreamTest.java +++ b/luni/src/test/java/libcore/java/util/zip/ZipOutputStreamTest.java @@ -21,6 +21,7 @@ import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Random; import java.util.zip.ZipEntry; @@ -76,4 +77,72 @@ public final class ZipOutputStreamTest extends TestCase { // expected } } + + public void testNullCharset() throws IOException { + try { + new ZipOutputStream(new ByteArrayOutputStream(), null); + fail(); + } catch (NullPointerException expected) { + } + } + + /** Regression test for null comment causing a NullPointerException during write. */ + public void testNullComment() throws IOException { + ZipOutputStream out = new ZipOutputStream(new ByteArrayOutputStream()); + out.setComment(null); + out.putNextEntry(new ZipEntry("name")); + out.write(new byte[1]); + out.closeEntry(); + out.finish(); + } + + /** Contrived test to force a longer name than can be stored. */ + public void testLongName() throws IOException { + int maxNameBytes = 0xffff; // 2 bytes + String longName = createString(maxNameBytes); + + ZipEntry entry = new ZipEntry(longName); + + // Using UTF-16 will result in name bytes twice as large as is supported by Zip. + // UTF-16 is an unlikely character set to actually want to use with Zip but enables + // the edge-case behavior required without using direct field access. + ZipOutputStream out = new ZipOutputStream( + new ByteArrayOutputStream(), StandardCharsets.UTF_16); + try { + out.putNextEntry(entry); + fail(); + } catch (IllegalArgumentException expected) { + } + } + + /** Contrived test to force a longer comment than can be stored. */ + public void testLongComment() throws IOException { + int maxCommentBytes = 0xffff; // 2 bytes + String longComment = createString(maxCommentBytes); + + ZipEntry entry = new ZipEntry("name"); + // setComment() should pass, because it is at the limit of what ZipEntry will detect as + // valid (since it uses UTF-8 as a worst-case guess). + entry.setComment(longComment); + + // Using UTF-16 will result in comment bytes twice as large as is supported by Zip. + // UTF-16 is an unlikely character set to actually want to use with Zip but enables + // the edge-case behavior required without using direct field access. + ZipOutputStream out = new ZipOutputStream( + new ByteArrayOutputStream(), StandardCharsets.UTF_16); + try { + out.putNextEntry(entry); + fail(); + } catch (IllegalArgumentException expected) { + } + } + + private static String createString(int numChars) { + char c = 'a'; + StringBuilder sb = new StringBuilder(numChars); + for (int i = 0; i < numChars; i++) { + sb.append(c); + } + return sb.toString(); + } } |