Add Java 1.7 APIs: ZipFile, ZipInputStream, ZipOutputStream

Add constructor support for the character encoding to use. When reading a ZipFile this is used for the file comment and for entry names and comments when a zip entry is not self-described as UTF-8 encoded. When writing a zip file the character encoding determines the encoding used for the file comment, entry names and comments. The default for reading and writing when the character encoding is not specified remains as UTF-8. Additional fix: ZipOutputStream: null comments no longer cause NullPointerException. Change-Id: I7cadfa939377d0f87fd5503dae2a0b2dbac2ba39
author: Neil Fuller <nfuller@google.com> 2014-03-11 10:47:07 +0000
committer: Neil Fuller <nfuller@google.com> 2014-03-12 10:15:35 +0000
commit: e3d756c5dae1af2aa5f0ad8bc7f133df3e7401eb (patch)
tree: 5850cc100b53a90a157c29e5b59b012921648140 /luni
parent: 4189a6e183e8c38992df6de29321733fad06e50a (diff)
download: libcore-e3d756c5dae1af2aa5f0ad8bc7f133df3e7401eb.zip
libcore-e3d756c5dae1af2aa5f0ad8bc7f133df3e7401eb.tar.gz
libcore-e3d756c5dae1af2aa5f0ad8bc7f133df3e7401eb.tar.bz2
7 files changed, 478 insertions, 69 deletions
diff --git a/luni/src/main/java/java/util/zip/ZipEntry.java b/luni/src/main/java/java/util/zip/ZipEntry.java
index 69f027a..217cc3c 100644
--- a/luni/src/main/java/java/util/zip/ZipEntry.java
+++ b/luni/src/main/java/java/util/zip/ZipEntry.java
@@ -20,6 +20,7 @@ package java.util.zip;
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.ByteOrder;
+import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.Calendar;
@@ -94,9 +95,7 @@ public class ZipEntry implements ZipConstants, Cloneable {
         if (name == null) {
             throw new NullPointerException("name == null");
         }
-        if (name.length() > 0xFFFF) {
-            throw new IllegalArgumentException("Name too long: " + name.length());
-        }
+        validateStringLength("Name", name);
         this.name = name;
     }
 
@@ -203,11 +202,8 @@ public class ZipEntry implements ZipConstants, Cloneable {
             this.comment = null;
             return;
         }
+        validateStringLength("Comment", comment);
 
-        byte[] commentBytes = comment.getBytes(StandardCharsets.UTF_8);
-        if (commentBytes.length > 0xffff) {
-            throw new IllegalArgumentException("Comment too long: " + commentBytes.length);
-        }
         this.comment = comment;
     }
 
@@ -375,12 +371,14 @@ public class ZipEntry implements ZipConstants, Cloneable {
     /*
      * Internal constructor.  Creates a new ZipEntry by reading the
      * Central Directory Entry (CDE) from "in", which must be positioned
-     * at the CDE signature.
+     * at the CDE signature. If the GPBF_UTF8_FLAG is set in the CDE then
+     * UTF-8 is used to decode the string information, otherwise the
+     * defaultCharset is used.
      *
      * On exit, "in" will be positioned at the start of the next entry
      * in the Central Directory.
      */
-    ZipEntry(byte[] cdeHdrBuf, InputStream cdStream) throws IOException {
+    ZipEntry(byte[] cdeHdrBuf, InputStream cdStream, Charset defaultCharset) throws IOException {
         Streams.readFully(cdStream, cdeHdrBuf, 0, cdeHdrBuf.length);
 
         BufferIterator it = HeapBufferIterator.iterator(cdeHdrBuf, 0, cdeHdrBuf.length,
@@ -398,6 +396,13 @@ public class ZipEntry implements ZipConstants, Cloneable {
             throw new ZipException("Invalid General Purpose Bit Flag: " + gpbf);
         }
 
+        // If the GPBF_UTF8_FLAG is set then the character encoding is UTF-8 whatever the default
+        // provided.
+        Charset charset = defaultCharset;
+        if ((gpbf & ZipFile.GPBF_UTF8_FLAG) != 0) {
+            charset = StandardCharsets.UTF_8;
+        }
+
         compressionMethod = it.readShort() & 0xffff;
         time = it.readShort() & 0xffff;
         modDate = it.readShort() & 0xffff;
@@ -420,19 +425,17 @@ public class ZipEntry implements ZipConstants, Cloneable {
         if (containsNulByte(nameBytes)) {
             throw new ZipException("Filename contains NUL byte: " + Arrays.toString(nameBytes));
         }
-        name = new String(nameBytes, 0, nameBytes.length, StandardCharsets.UTF_8);
+        name = new String(nameBytes, 0, nameBytes.length, charset);
 
         if (extraLength > 0) {
             extra = new byte[extraLength];
             Streams.readFully(cdStream, extra, 0, extraLength);
         }
 
-        // The RI has always assumed UTF-8. (If GPBF_UTF8_FLAG isn't set, the encoding is
-        // actually IBM-437.)
         if (commentByteCount > 0) {
             byte[] commentBytes = new byte[commentByteCount];
             Streams.readFully(cdStream, commentBytes, 0, commentByteCount);
-            comment = new String(commentBytes, 0, commentBytes.length, StandardCharsets.UTF_8);
+            comment = new String(commentBytes, 0, commentBytes.length, charset);
         }
     }
 
@@ -444,4 +447,14 @@ public class ZipEntry implements ZipConstants, Cloneable {
         }
         return false;
     }
+
+    private static void validateStringLength(String argument, String string) {
+        // This check is not perfect: the character encoding is determined when the entry is
+        // written out. UTF-8 is probably a worst-case: most alternatives should be single byte per
+        // character.
+        byte[] bytes = string.getBytes(StandardCharsets.UTF_8);
+        if (bytes.length > 0xffff) {
+            throw new IllegalArgumentException(argument + " too long: " + bytes.length);
+        }
+    }
 }
diff --git a/luni/src/main/java/java/util/zip/ZipFile.java b/luni/src/main/java/java/util/zip/ZipFile.java
index 4380281..4b3e431 100644
--- a/luni/src/main/java/java/util/zip/ZipFile.java
+++ b/luni/src/main/java/java/util/zip/ZipFile.java
@@ -26,6 +26,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.RandomAccessFile;
 import java.nio.ByteOrder;
+import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
 import java.util.Enumeration;
 import java.util.Iterator;
@@ -96,6 +97,8 @@ public class ZipFile implements Closeable, ZipConstants {
 
     private final String filename;
 
+    private final Charset charset;
+
     private File fileToDeleteOnClose;
 
     private RandomAccessFile raf;
@@ -108,33 +111,81 @@ public class ZipFile implements Closeable, ZipConstants {
 
     /**
      * Constructs a new {@code ZipFile} allowing read access to the contents of the given file.
+     *
+     * <p>UTF-8 is used to decode all comments and entry names in the file.
+     *
      * @throws ZipException if a zip error occurs.
      * @throws IOException if an {@code IOException} occurs.
      */
     public ZipFile(File file) throws ZipException, IOException {
-        this(file, OPEN_READ);
+        this(file, OPEN_READ, StandardCharsets.UTF_8);
     }
 
     /**
      * Constructs a new {@code ZipFile} allowing read access to the contents of the given file.
+     *
+     * <p>The {@code charset} is used to decode the file comment if one exists. If the character
+     * encoding for entry names and comments is not explicitly marked as UTF-8 by the zip file
+     * then {@code charset} is used to decode them.
+     *
+     * @throws ZipException if a zip error occurs.
+     * @throws IOException if an {@code IOException} occurs.
+     * @since 1.7
+     * @hide Until ready for an API update
+     */
+    public ZipFile(File file, Charset charset) throws ZipException, IOException {
+        this(file, OPEN_READ, charset);
+    }
+
+    /**
+     * Constructs a new {@code ZipFile} allowing read access to the contents of the given file.
+     *
+     * <p>UTF-8 is used to decode all comments and entry names in the file.
+     *
      * @throws IOException if an IOException occurs.
      */
     public ZipFile(String name) throws IOException {
-        this(new File(name), OPEN_READ);
+        this(new File(name), OPEN_READ, StandardCharsets.UTF_8);
     }
 
     /**
      * Constructs a new {@code ZipFile} allowing access to the given file.
-     * The {@code mode} must be either {@code OPEN_READ} or {@code OPEN_READ|OPEN_DELETE}.
      *
-     * <p>If the {@code OPEN_DELETE} flag is supplied, the file will be deleted at or before the
+     * <p>UTF-8 is used to decode all comments and entry names in the file.
+     *
+     * <p>The {@code mode} must be either {@code OPEN_READ} or {@code OPEN_READ|OPEN_DELETE}.
+     * If the {@code OPEN_DELETE} flag is supplied, the file will be deleted at or before the
      * time that the {@code ZipFile} is closed (the contents will remain accessible until
      * this {@code ZipFile} is closed); it also calls {@code File.deleteOnExit}.
      *
      * @throws IOException if an {@code IOException} occurs.
      */
     public ZipFile(File file, int mode) throws IOException {
+        this(file, mode, StandardCharsets.UTF_8);
+    }
+
+    /**
+     * Constructs a new {@code ZipFile} allowing access to the given file.
+     *
+     * <p>The {@code mode} must be either {@code OPEN_READ} or {@code OPEN_READ|OPEN_DELETE}.
+     * If the {@code OPEN_DELETE} flag is supplied, the file will be deleted at or before the
+     * time that the {@code ZipFile} is closed (the contents will remain accessible until
+     * this {@code ZipFile} is closed); it also calls {@code File.deleteOnExit}.
+     *
+     * <p>The {@code charset} is used to decode the file comment if one exists. If the character
+     * encoding for entry names and comments is not explicitly marked as UTF-8 by the zip file
+     * then {@code charset} is used to decode them.
+     *
+     * @throws IOException if an {@code IOException} occurs.
+     * @since 1.7
+     * @hide Until ready for an API update
+     */
+    public ZipFile(File file, int mode, Charset charset) throws IOException {
         filename = file.getPath();
+        if (charset == null) {
+            throw new NullPointerException("charset == null");
+        }
+        this.charset = charset;
         if (mode != OPEN_READ && mode != (OPEN_READ | OPEN_DELETE)) {
             throw new IllegalArgumentException("Bad mode: " + mode);
         }
@@ -403,7 +454,7 @@ public class ZipFile implements Closeable, ZipConstants {
         if (commentLength > 0) {
             byte[] commentBytes = new byte[commentLength];
             raf.readFully(commentBytes);
-            comment = new String(commentBytes, 0, commentBytes.length, StandardCharsets.UTF_8);
+            comment = new String(commentBytes, 0, commentBytes.length, charset);
         }
 
         // Seek to the first CDE and read all entries.
@@ -414,7 +465,7 @@ public class ZipFile implements Closeable, ZipConstants {
         BufferedInputStream bufferedStream = new BufferedInputStream(rafStream, 4096);
         byte[] hdrBuf = new byte[CENHDR]; // Reuse the same buffer for each entry.
         for (int i = 0; i < numEntries; ++i) {
-            ZipEntry newEntry = new ZipEntry(hdrBuf, bufferedStream);
+            ZipEntry newEntry = new ZipEntry(hdrBuf, bufferedStream, charset);
             if (newEntry.localHeaderRelOffset >= centralDirOffset) {
                 throw new ZipException("Local file header offset is after central directory");
             }
diff --git a/luni/src/main/java/java/util/zip/ZipInputStream.java b/luni/src/main/java/java/util/zip/ZipInputStream.java
index 17f3938..5a73619 100644
--- a/luni/src/main/java/java/util/zip/ZipInputStream.java
+++ b/luni/src/main/java/java/util/zip/ZipInputStream.java
@@ -21,7 +21,9 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.PushbackInputStream;
 import java.nio.ByteOrder;
+import java.nio.charset.Charset;
 import java.nio.charset.ModifiedUtf8;
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import libcore.io.Memory;
 import libcore.io.Streams;
@@ -84,18 +86,39 @@ public class ZipInputStream extends InflaterInputStream implements ZipConstants
 
     private final CRC32 crc = new CRC32();
 
-    private byte[] nameBuf = new byte[256];
+    private byte[] stringBytesBuf = new byte[256];
 
-    private char[] charBuf = new char[256];
+    private char[] stringCharBuf = new char[256];
+
+    private final Charset charset;
 
     /**
      * Constructs a new {@code ZipInputStream} to read zip entries from the given input stream.
+     *
+     * <p>UTF-8 is used to decode all strings in the file.
      */
     public ZipInputStream(InputStream stream) {
+        this(stream, StandardCharsets.UTF_8);
+    }
+
+    /**
+     * Constructs a new {@code ZipInputStream} to read zip entries from the given input stream.
+     *
+     * <p>If the character encoding for entry names and comments is not explicitly marked as UTF-8
+     * by the zip file then {@code charset} is used to decode them.
+     *
+     * @since 1.7
+     * @hide Until ready for an API update
+     */
+    public ZipInputStream(InputStream stream, Charset charset) {
         super(new PushbackInputStream(stream, BUF_SIZE), new Inflater(true));
         if (stream == null) {
             throw new NullPointerException("stream == null");
         }
+        if (charset == null) {
+            throw new NullPointerException("charset == null");
+        }
+        this.charset = charset;
     }
 
     /**
@@ -249,14 +272,13 @@ public class ZipInputStream extends InflaterInputStream implements ZipConstants
         }
         int extraLength = peekShort(LOCEXT - LOCVER);
 
-        if (nameLength > nameBuf.length) {
-            nameBuf = new byte[nameLength];
-            // The bytes are modified UTF-8, so the number of chars will always be less than or
-            // equal to the number of bytes. It's fine if this buffer is too long.
-            charBuf = new char[nameLength];
+        // Determine the character set to use to decode strings.
+        Charset charset = this.charset;
+        if ((flags & ZipFile.GPBF_UTF8_FLAG) != 0) {
+            charset = StandardCharsets.UTF_8;
         }
-        Streams.readFully(in, nameBuf, 0, nameLength);
-        currentEntry = createZipEntry(ModifiedUtf8.decode(nameBuf, charBuf, 0, nameLength));
+        String name = readString(nameLength, charset);
+        currentEntry = createZipEntry(name);
         currentEntry.time = ceLastModifiedTime;
         currentEntry.modDate = ceLastModifiedDate;
         currentEntry.setMethod(ceCompressionMethod);
@@ -273,6 +295,26 @@ public class ZipInputStream extends InflaterInputStream implements ZipConstants
         return currentEntry;
     }
 
+    /**
+     * Reads bytes from the current stream position returning the string representation.
+     */
+    private String readString(int byteLength, Charset charset) throws IOException {
+        if (byteLength > stringBytesBuf.length) {
+            stringBytesBuf = new byte[byteLength];
+        }
+        Streams.readFully(in, stringBytesBuf, 0, byteLength);
+        if (charset == StandardCharsets.UTF_8) {
+            // The number of chars will always be less than or equal to the number of bytes. It's
+            // fine if this buffer is too long.
+            if (byteLength > stringCharBuf.length) {
+                stringCharBuf = new char[byteLength];
+            }
+            return ModifiedUtf8.decode(stringBytesBuf, stringCharBuf, 0, byteLength);
+        } else {
+            return new String(stringBytesBuf, 0, byteLength, charset);
+        }
+    }
+
     private int peekShort(int offset) {
         return Memory.peekShort(hdrBuf, offset, ByteOrder.LITTLE_ENDIAN) & 0xffff;
     }
diff --git a/luni/src/main/java/java/util/zip/ZipOutputStream.java b/luni/src/main/java/java/util/zip/ZipOutputStream.java
index c4d7560..ac6bdf2 100644
--- a/luni/src/main/java/java/util/zip/ZipOutputStream.java
+++ b/luni/src/main/java/java/util/zip/ZipOutputStream.java
@@ -20,6 +20,7 @@ package java.util.zip;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
+import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.HashSet;
@@ -75,6 +76,8 @@ public class ZipOutputStream extends DeflaterOutputStream implements ZipConstant
 
     private final HashSet<String> entries = new HashSet<String>();
 
+    private final Charset charset;
+
     private int defaultCompressionMethod = DEFLATED;
 
     private int compressionLevel = Deflater.DEFAULT_COMPRESSION;
@@ -85,16 +88,40 @@ public class ZipOutputStream extends DeflaterOutputStream implements ZipConstant
 
     private final CRC32 crc = new CRC32();
 
-    private int offset = 0, curOffset = 0, nameLength;
+    private int offset = 0, curOffset = 0;
 
+    /** The charset-encoded name for the current entry. */
     private byte[] nameBytes;
 
+    /** The charset-encoded comment for the current entry. */
+    private byte[] entryCommentBytes;
+
     /**
-     * Constructs a new {@code ZipOutputStream} that writes a zip file
-     * to the given {@code OutputStream}.
+     * Constructs a new {@code ZipOutputStream} that writes a zip file to the given
+     * {@code OutputStream}.
+     *
+     * <p>UTF-8 will be used to encode the file comment, entry names and comments.
      */
     public ZipOutputStream(OutputStream os) {
+        this(os, StandardCharsets.UTF_8);
+    }
+
+    /**
+     * Constructs a new {@code ZipOutputStream} that writes a zip file to the given
+     * {@code OutputStream}.
+     *
+     * <p>The specified character set will be used to encode the file comment, entry names and
+     * comments.
+     *
+     * @since 1.7
+     * @hide Until ready for an API update
+     */
+    public ZipOutputStream(OutputStream os, Charset charset) {
         super(os, new Deflater(Deflater.DEFAULT_COMPRESSION, true));
+        if (charset == null) {
+            throw new NullPointerException("charset == null");
+        }
+        this.charset = charset;
     }
 
     /**
@@ -153,10 +180,12 @@ public class ZipOutputStream extends DeflaterOutputStream implements ZipConstant
         // Update the CentralDirectory
         // http://www.pkware.com/documents/casestudies/APPNOTE.TXT
         int flags = currentEntry.getMethod() == STORED ? 0 : ZipFile.GPBF_DATA_DESCRIPTOR_FLAG;
-        // Since gingerbread, we always set the UTF-8 flag on individual files.
-        // Some tools insist that the central directory also have the UTF-8 flag.
+        // Since gingerbread, we always set the UTF-8 flag on individual files if appropriate.
+        // Some tools insist that the central directory have the UTF-8 flag.
         // http://code.google.com/p/android/issues/detail?id=20214
-        flags |= ZipFile.GPBF_UTF8_FLAG;
+        if (charset == StandardCharsets.UTF_8) {
+            flags |= ZipFile.GPBF_UTF8_FLAG;
+        }
         writeLong(cDir, CENSIG);
         writeShort(cDir, ZIP_VERSION_2_0); // Version this file was made by.
         writeShort(cDir, ZIP_VERSION_2_0); // Minimum version needed to extract.
@@ -172,19 +201,14 @@ public class ZipOutputStream extends DeflaterOutputStream implements ZipConstant
             curOffset += writeLong(cDir, crc.tbytes);
             writeLong(cDir, crc.tbytes);
         }
-        curOffset += writeShort(cDir, nameLength);
+        curOffset += writeShort(cDir, nameBytes.length);
         if (currentEntry.extra != null) {
             curOffset += writeShort(cDir, currentEntry.extra.length);
         } else {
             writeShort(cDir, 0);
         }
 
-        String comment = currentEntry.getComment();
-        byte[] commentBytes = EmptyArray.BYTE;
-        if (comment != null) {
-            commentBytes = comment.getBytes(StandardCharsets.UTF_8);
-        }
-        writeShort(cDir, commentBytes.length); // Comment length.
+        writeShort(cDir, entryCommentBytes.length); // Comment length.
         writeShort(cDir, 0); // Disk Start
         writeShort(cDir, 0); // Internal File Attributes
         writeLong(cDir, 0); // External File Attributes
@@ -195,8 +219,9 @@ public class ZipOutputStream extends DeflaterOutputStream implements ZipConstant
             cDir.write(currentEntry.extra);
         }
         offset += curOffset;
-        if (commentBytes.length > 0) {
-            cDir.write(commentBytes);
+        if (entryCommentBytes.length > 0) {
+            cDir.write(entryCommentBytes);
+            entryCommentBytes = EmptyArray.BYTE;
         }
         currentEntry = null;
         crc.reset();
@@ -294,10 +319,14 @@ public class ZipOutputStream extends DeflaterOutputStream implements ZipConstant
             // TODO: support Zip64.
             throw new ZipException("Too many entries for the zip file format's 16-bit entry count");
         }
-        nameBytes = ze.name.getBytes(StandardCharsets.UTF_8);
-        nameLength = nameBytes.length;
-        if (nameLength > 0xffff) {
-            throw new IllegalArgumentException("Name too long: " + nameLength + " UTF-8 bytes");
+        nameBytes = ze.name.getBytes(charset);
+        checkSizeIsWithinShort("Name", nameBytes);
+        entryCommentBytes = EmptyArray.BYTE;
+        if (ze.comment != null) {
+            entryCommentBytes = ze.comment.getBytes(charset);
+            // The comment is not written out until the entry is finished, but it is validated here
+            // to fail-fast.
+            checkSizeIsWithinShort("Comment", entryCommentBytes);
         }
 
         def.setLevel(compressionLevel);
@@ -310,8 +339,10 @@ public class ZipOutputStream extends DeflaterOutputStream implements ZipConstant
         // http://www.pkware.com/documents/casestudies/APPNOTE.TXT
         int flags = (method == STORED) ? 0 : ZipFile.GPBF_DATA_DESCRIPTOR_FLAG;
         // Java always outputs UTF-8 filenames. (Before Java 7, the RI didn't set this flag and used
-        // modified UTF-8. From Java 7, it sets this flag and uses normal UTF-8.)
-        flags |= ZipFile.GPBF_UTF8_FLAG;
+        // modified UTF-8. From Java 7, when using UTF_8 it sets this flag and uses normal UTF-8.)
+        if (charset == StandardCharsets.UTF_8) {
+            flags |= ZipFile.GPBF_UTF8_FLAG;
+        }
         writeLong(out, LOCSIG); // Entry header
         writeShort(out, ZIP_VERSION_2_0); // Minimum version needed to extract.
         writeShort(out, flags);
@@ -331,7 +362,7 @@ public class ZipOutputStream extends DeflaterOutputStream implements ZipConstant
             writeLong(out, 0);
             writeLong(out, 0);
         }
-        writeShort(out, nameLength);
+        writeShort(out, nameBytes.length);
         if (currentEntry.extra != null) {
             writeShort(out, currentEntry.extra.length);
         } else {
@@ -345,18 +376,16 @@ public class ZipOutputStream extends DeflaterOutputStream implements ZipConstant
 
     /**
      * Sets the comment associated with the file being written. See {@link ZipFile#getComment}.
-     * @throws IllegalArgumentException if the comment is >= 64 Ki UTF-8 bytes.
+     * @throws IllegalArgumentException if the comment is >= 64 Ki encoded bytes.
      */
     public void setComment(String comment) {
         if (comment == null) {
-            this.commentBytes = null;
+            this.commentBytes = EmptyArray.BYTE;
             return;
         }
 
-        byte[] newCommentBytes = comment.getBytes(StandardCharsets.UTF_8);
-        if (newCommentBytes.length > 0xffff) {
-            throw new IllegalArgumentException("Comment too long: " + newCommentBytes.length + " bytes");
-        }
+        byte[] newCommentBytes = comment.getBytes(charset);
+        checkSizeIsWithinShort("Comment", newCommentBytes);
         this.commentBytes = newCommentBytes;
     }
 
@@ -423,4 +452,11 @@ public class ZipOutputStream extends DeflaterOutputStream implements ZipConstant
             throw new IOException("Stream is closed");
         }
     }
+
+    private void checkSizeIsWithinShort(String property, byte[] bytes) {
+        if (bytes.length > 0xffff) {
+            throw new IllegalArgumentException(
+                    property + " too long in " + charset + ":" + bytes.length + " bytes");
+        }
+    }
 }
diff --git a/luni/src/test/java/libcore/java/util/zip/ZipFileTest.java b/luni/src/test/java/libcore/java/util/zip/ZipFileTest.java
index 8afc223..81ff673 100644
--- a/luni/src/test/java/libcore/java/util/zip/ZipFileTest.java
+++ b/luni/src/test/java/libcore/java/util/zip/ZipFileTest.java
@@ -23,8 +23,15 @@ import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
 import java.util.Enumeration;
+import java.util.HashSet;
+import java.util.List;
 import java.util.Random;
+import java.util.Set;
 import java.util.zip.CRC32;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipException;
@@ -161,7 +168,7 @@ public final class ZipFileTest extends TestCase {
         }
 
         assertEquals(expectedLength, count);
-
+        zip.close();
     }
 
     public void testInflatingStreamsRequiringZipRefill() throws IOException {
@@ -210,6 +217,96 @@ public final class ZipFileTest extends TestCase {
         }
     }
 
+    public void testNullCharset() throws IOException {
+        try {
+            new ZipFile(createTemporaryZipFile(), null);
+            fail();
+        } catch (NullPointerException expected) {
+        }
+    }
+
+    // Tests that non-UTF8 encoded zip files can be interpreted. Relies on ZipOutputStream.
+    public void testNonUtf8Encoding() throws IOException {
+        Charset charset = Charset.forName("Cp437");
+        String encodingDependentString = "\u00FB";
+        assertEncodingDiffers(encodingDependentString, charset, StandardCharsets.US_ASCII,
+                StandardCharsets.UTF_8);
+        String name = "name" + encodingDependentString;
+        String comment = "comment" + encodingDependentString;
+
+        File result = createTemporaryZipFile();
+        OutputStream os = new BufferedOutputStream(new FileOutputStream(result));
+        ZipOutputStream out = new ZipOutputStream(os, charset);
+        out.setComment(comment);
+        ZipEntry writeEntry = new ZipEntry(name);
+        writeEntry.setComment(comment);
+        out.putNextEntry(writeEntry);
+        out.write("FileContentsIrrelevant".getBytes());
+        out.closeEntry();
+        out.close();
+
+        ZipFile zipFile = new ZipFile(result, StandardCharsets.US_ASCII);
+        assertNull(zipFile.getEntry(name));
+        assertFalse(zipFile.getComment().equals(comment));
+        zipFile.close();
+
+        zipFile = new ZipFile(result, charset);
+        ZipEntry readEntry = zipFile.getEntry(name);
+        assertNotNull(readEntry);
+        assertEquals(name, readEntry.getName());
+        assertEquals(comment, readEntry.getComment());
+        assertEquals(comment, zipFile.getComment());
+        zipFile.close();
+    }
+
+    // Tests that UTF8 encoded zip files can be interpreted when the constructor is provided with a
+    // non-UTF-8 encoding. Relies on ZipOutputStream.
+    public void testUtf8EncodingOverridesConstructor() throws IOException {
+        Charset charset = Charset.forName("Cp437");
+        String encodingDependentString = "\u00FB";
+        assertEncodingDiffers(encodingDependentString, charset, StandardCharsets.UTF_8);
+        String name = "name" + encodingDependentString;
+        String comment = "comment" + encodingDependentString;
+
+        File result = createTemporaryZipFile();
+        OutputStream os = new BufferedOutputStream(new FileOutputStream(result));
+        ZipOutputStream out = new ZipOutputStream(os, StandardCharsets.UTF_8);
+        // The file comment does not get meta-data about the character encoding.
+        out.setComment(comment);
+        // The entry will be tagged as being UTF-8 encoded.
+        ZipEntry writeEntry = new ZipEntry(name);
+        writeEntry.setComment(comment);
+        out.putNextEntry(writeEntry);
+        out.write("FileContentsIrrelevant".getBytes());
+        out.closeEntry();
+        out.close();
+
+        ZipFile zipFile = new ZipFile(result, charset);
+        // The entry should be found, because it should be tagged as being UTF-8 encoded.
+        ZipEntry readEntry = zipFile.getEntry(name);
+        assertNotNull(readEntry);
+        assertEquals(name, readEntry.getName());
+        assertEquals(comment, readEntry.getComment());
+        // We expect the comment to be mangled because it is not tagged.
+        assertFalse(zipFile.getComment().equals(comment));
+        zipFile.close();
+    }
+
+    /**
+     * Asserts the byte encoding for the string is different for all the supplied character
+     * sets.
+     */
+    private void assertEncodingDiffers(String string, Charset... charsets) {
+        Set<List<Byte>> encodings = new HashSet<List<Byte>>();
+        for (int i = 0; i < charsets.length; i++) {
+            List<Byte> byteList = new ArrayList<Byte>();
+            for (byte b : string.getBytes(charsets[i])) {
+                byteList.add(b);
+            }
+            assertTrue("Encoding has been seen before", encodings.add(byteList));
+        }
+    }
+
     /**
      * Compresses the given number of files, each of the given size, into a .zip archive.
      */
@@ -219,21 +316,23 @@ public final class ZipFileTest extends TestCase {
         byte[] writeBuffer = new byte[8192];
         Random random = new Random();
 
-        ZipOutputStream out = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(result)));
-        for (int entry = 0; entry < entryCount; ++entry) {
-            ZipEntry ze = new ZipEntry(Integer.toHexString(entry));
-            out.putNextEntry(ze);
+        ZipOutputStream out = createZipOutputStream(result);
+        try {
+            for (int entry = 0; entry < entryCount; ++entry) {
+                ZipEntry ze = new ZipEntry(Integer.toHexString(entry));
+                out.putNextEntry(ze);
+
+                for (int i = 0; i < entrySize; i += writeBuffer.length) {
+                    random.nextBytes(writeBuffer);
+                    int byteCount = Math.min(writeBuffer.length, entrySize - i);
+                    out.write(writeBuffer, 0, byteCount);
+                }
 
-            for (int i = 0; i < entrySize; i += writeBuffer.length) {
-                random.nextBytes(writeBuffer);
-                int byteCount = Math.min(writeBuffer.length, entrySize - i);
-                out.write(writeBuffer, 0, byteCount);
+                out.closeEntry();
             }
-
-            out.closeEntry();
+        } finally {
+            out.close();
         }
-
-        out.close();
         return result;
     }
 
diff --git a/luni/src/test/java/libcore/java/util/zip/ZipInputStreamTest.java b/luni/src/test/java/libcore/java/util/zip/ZipInputStreamTest.java
index 3d6e600..2daa127 100644
--- a/luni/src/test/java/libcore/java/util/zip/ZipInputStreamTest.java
+++ b/luni/src/test/java/libcore/java/util/zip/ZipInputStreamTest.java
@@ -20,10 +20,18 @@ import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
 import java.util.Random;
+import java.util.Set;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipInputStream;
+import java.util.zip.ZipOutputStream;
+
 import junit.framework.TestCase;
 
 import tests.support.resource.Support_Resources;
@@ -42,6 +50,97 @@ public final class ZipInputStreamTest extends TestCase {
         assertTrue(Arrays.equals(data, unzip("r", ZipOutputStreamTest.zip("r", data))));
     }
 
+    public void testNullCharset() throws IOException {
+        try {
+            new ZipInputStream(new ByteArrayInputStream(new byte[1]), null);
+            fail();
+        } catch (NullPointerException expected) {
+        }
+    }
+
+    // Tests that non-UTF8 encoded zip file entries can be interpreted. Relies on ZipOutputStream.
+    public void testNonUtf8Encoding() throws IOException {
+        Charset charset = Charset.forName("Cp437");
+        String encodingDependentString = "\u00FB";
+        assertEncodingDiffers(encodingDependentString, charset, StandardCharsets.US_ASCII,
+                StandardCharsets.UTF_8);
+        String name = "name" + encodingDependentString;
+        String comment = "comment" + encodingDependentString;
+
+        ByteArrayOutputStream bytesOutputStream = new ByteArrayOutputStream();
+        ZipOutputStream out = new ZipOutputStream(bytesOutputStream, charset);
+        ZipEntry writeEntry = new ZipEntry(name);
+        writeEntry.setComment(comment);
+        out.putNextEntry(writeEntry);
+        out.write("FileContentsIrrelevant".getBytes());
+        out.closeEntry();
+        out.close();
+
+        ByteArrayInputStream bytesInputStream =
+                new ByteArrayInputStream(bytesOutputStream.toByteArray());
+        ZipInputStream in = new ZipInputStream(bytesInputStream, StandardCharsets.US_ASCII);
+        ZipEntry readEntry = in.getNextEntry();
+        // Due to the way ZipInputStream works it never returns entry comments.
+        assertNull("ZipInputStream must not retrieve comments", readEntry.getComment());
+        assertFalse(readEntry.getName().equals(name));
+        in.close();
+
+        bytesInputStream = new ByteArrayInputStream(bytesOutputStream.toByteArray());
+        in = new ZipInputStream(bytesInputStream, charset);
+        readEntry = in.getNextEntry();
+        // Because ZipInputStream never reads the central directory it never returns entry
+        // comments or the file comment.
+        assertNull("ZipInputStream must not retrieve comments", readEntry.getComment());
+        assertEquals(name, readEntry.getName());
+        in.close();
+    }
+
+    // Tests that UTF8 encoded zip file entries can be interpreted when the constructor is provided
+    // with a non-UTF-8 encoding. Relies on ZipOutputStream.
+    public void testUtf8EncodingOverridesConstructor() throws IOException {
+        Charset charset = Charset.forName("Cp437");
+        String encodingDependentString = "\u00FB";
+        assertEncodingDiffers(encodingDependentString, charset, StandardCharsets.UTF_8);
+        String name = "name" + encodingDependentString;
+        String comment = "comment" + encodingDependentString;
+
+        ByteArrayOutputStream bytesOutputStream = new ByteArrayOutputStream();
+        ZipOutputStream out = new ZipOutputStream(bytesOutputStream, StandardCharsets.UTF_8);
+        // The entry will be tagged as being UTF-8 encoded.
+        ZipEntry writeEntry = new ZipEntry(name);
+        writeEntry.setComment(comment);
+        out.putNextEntry(writeEntry);
+        out.write("FileContentsIrrelevant".getBytes());
+        out.closeEntry();
+        out.close();
+
+        ByteArrayInputStream bytesInputStream =
+                new ByteArrayInputStream(bytesOutputStream.toByteArray());
+        ZipInputStream in = new ZipInputStream(bytesInputStream, charset);
+        ZipEntry readEntry = in.getNextEntry();
+        // Because ZipInputStream never reads the central directory it never returns entry
+        // comments or the file comment.
+        assertNull("ZipInputStream must not retrieve comments", readEntry.getComment());
+        assertNotNull(readEntry);
+        assertEquals(name, readEntry.getName());
+        in.close();
+    }
+
+    /**
+     * Asserts the byte encoding for the string is different for all the supplied character
+     * sets.
+     */
+    private void assertEncodingDiffers(String string, Charset... charsets) {
+        Set<List<Byte>> encodings = new HashSet<List<Byte>>();
+        for (int i = 0; i < charsets.length; i++) {
+            List<Byte> byteList = new ArrayList<Byte>();
+            for (byte b : string.getBytes(charsets[i])) {
+                byteList.add(b);
+            }
+            assertTrue("Encoding has been seen before", encodings.add(byteList));
+        }
+    }
+
     public static byte[] unzip(String name, byte[] bytes) throws IOException {
         ZipInputStream in = new ZipInputStream(new ByteArrayInputStream(bytes));
         ByteArrayOutputStream out = new ByteArrayOutputStream();
diff --git a/luni/src/test/java/libcore/java/util/zip/ZipOutputStreamTest.java b/luni/src/test/java/libcore/java/util/zip/ZipOutputStreamTest.java
index dc80512..92afffa 100644
--- a/luni/src/test/java/libcore/java/util/zip/ZipOutputStreamTest.java
+++ b/luni/src/test/java/libcore/java/util/zip/ZipOutputStreamTest.java
@@ -21,6 +21,7 @@ import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.Random;
 import java.util.zip.ZipEntry;
@@ -76,4 +77,72 @@ public final class ZipOutputStreamTest extends TestCase {
             // expected
         }
     }
+
+    public void testNullCharset() throws IOException {
+        try {
+            new ZipOutputStream(new ByteArrayOutputStream(), null);
+            fail();
+        } catch (NullPointerException expected) {
+        }
+    }
+
+    /** Regression test for null comment causing a NullPointerException during write. */
+    public void testNullComment() throws IOException {
+        ZipOutputStream out = new ZipOutputStream(new ByteArrayOutputStream());
+        out.setComment(null);
+        out.putNextEntry(new ZipEntry("name"));
+        out.write(new byte[1]);
+        out.closeEntry();
+        out.finish();
+    }
+
+    /** Contrived test to force a longer name than can be stored. */
+    public void testLongName() throws IOException {
+        int maxNameBytes = 0xffff; // 2 bytes
+        String longName = createString(maxNameBytes);
+
+        ZipEntry entry = new ZipEntry(longName);
+
+        // Using UTF-16 will result in name bytes twice as large as is supported by Zip.
+        // UTF-16 is an unlikely character set to actually want to use with Zip but enables
+        // the edge-case behavior required without using direct field access.
+        ZipOutputStream out = new ZipOutputStream(
+                new ByteArrayOutputStream(), StandardCharsets.UTF_16);
+        try {
+            out.putNextEntry(entry);
+            fail();
+        } catch (IllegalArgumentException expected) {
+        }
+    }
+
+    /** Contrived test to force a longer comment than can be stored. */
+    public void testLongComment() throws IOException {
+        int maxCommentBytes = 0xffff; // 2 bytes
+        String longComment = createString(maxCommentBytes);
+
+        ZipEntry entry = new ZipEntry("name");
+        // setComment() should pass, because it is at the limit of what ZipEntry will detect as
+        // valid (since it uses UTF-8 as a worst-case guess).
+        entry.setComment(longComment);
+
+        // Using UTF-16 will result in comment bytes twice as large as is supported by Zip.
+        // UTF-16 is an unlikely character set to actually want to use with Zip but enables
+        // the edge-case behavior required without using direct field access.
+        ZipOutputStream out = new ZipOutputStream(
+                new ByteArrayOutputStream(), StandardCharsets.UTF_16);
+        try {
+            out.putNextEntry(entry);
+            fail();
+        } catch (IllegalArgumentException expected) {
+        }
+    }
+
+    private static String createString(int numChars) {
+        char c = 'a';
+        StringBuilder sb = new StringBuilder(numChars);
+        for (int i = 0; i < numChars; i++) {
+            sb.append(c);
+        }
+        return sb.toString();
+    }
 }
author	Neil Fuller <nfuller@google.com>	2014-03-11 10:47:07 +0000
committer	Neil Fuller <nfuller@google.com>	2014-03-12 10:15:35 +0000
commit	e3d756c5dae1af2aa5f0ad8bc7f133df3e7401eb (patch)
tree	5850cc100b53a90a157c29e5b59b012921648140 /luni
parent	4189a6e183e8c38992df6de29321733fad06e50a (diff)
download	libcore-e3d756c5dae1af2aa5f0ad8bc7f133df3e7401eb.zip libcore-e3d756c5dae1af2aa5f0ad8bc7f133df3e7401eb.tar.gz libcore-e3d756c5dae1af2aa5f0ad8bc7f133df3e7401eb.tar.bz2