diff options
author | The Android Open Source Project <initial-contribution@android.com> | 2009-03-03 19:29:09 -0800 |
---|---|---|
committer | The Android Open Source Project <initial-contribution@android.com> | 2009-03-03 19:29:09 -0800 |
commit | 55a2c71f27d3e0b8344597c7f281e687cb7aeb1b (patch) | |
tree | ecd18b995aea8eeeb8b3823266280d41245bf0f7 /scripts/divide_and_compress.py | |
parent | 82ea7a177797b844b252effea5c7c7c5d63ea4ac (diff) | |
download | sdk-55a2c71f27d3e0b8344597c7f281e687cb7aeb1b.zip sdk-55a2c71f27d3e0b8344597c7f281e687cb7aeb1b.tar.gz sdk-55a2c71f27d3e0b8344597c7f281e687cb7aeb1b.tar.bz2 |
auto import from //depot/cupcake/@135843
Diffstat (limited to 'scripts/divide_and_compress.py')
-rwxr-xr-x | scripts/divide_and_compress.py | 352 |
1 files changed, 352 insertions, 0 deletions
diff --git a/scripts/divide_and_compress.py b/scripts/divide_and_compress.py new file mode 100755 index 0000000..d369be4 --- /dev/null +++ b/scripts/divide_and_compress.py @@ -0,0 +1,352 @@ +#!/usr/bin/python2.4 +# +# Copyright (C) 2008 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Module to compress directories in to series of zip files. + +This module will take a directory and compress all its contents, including +child directories into a series of zip files named N.zip where 'N' ranges from +0 to infinity. The zip files will all be below a certain specified maximum +threshold. + +The directory is compressed with a depth first traversal, each directory's +file contents being compressed as it is visisted, before the compression of any +child directory's contents. In this way the files within an archive are ordered +and the archives themselves are ordered. + +The class also constructs a 'main.py' file intended for use with Google App +Engine with a custom App Engine program not currently distributed with this +code base. The custom App Engine runtime can leverage the index files written +out by this class to more quickly locate which zip file to serve a given URL +from. +""" + +__author__ = 'jmatt@google.com (Justin Mattson)' + +from optparse import OptionParser +import os +import stat +import sys +import zipfile +from zipfile import ZipFile +import divide_and_compress_constants + + +def Main(argv): + parser = CreateOptionsParser() + (options, args) = parser.parse_args() + VerifyArguments(options, parser) + zipper = DirectoryZipper(options.destination, + options.sourcefiles, + ParseSize(options.filesize), + options.compress) + zipper.StartCompress() + + +def CreateOptionsParser(): + rtn = OptionParser() + rtn.add_option('-s', '--sourcefiles', dest='sourcefiles', default=None, + help='The directory containing the files to compress') + rtn.add_option('-d', '--destination', dest='destination', default=None, + help=('Where to put the archive files, this should not be' + ' a child of where the source files exist.')) + rtn.add_option('-f', '--filesize', dest='filesize', default='1M', + help=('Maximum size of archive files. A number followed by' + 'a magnitude indicator, eg. 1000000B == one million ' + 'BYTES, 500K == five hundred KILOBYTES, 1.2M == one ' + 'point two MEGABYTES. 1M == 1048576 BYTES')) + rtn.add_option('-n', '--nocompress', action='store_false', dest='compress', + default=True, + help=('Whether the archive files should be compressed, or ' + 'just a concatenation of the source files')) + return rtn + + +def VerifyArguments(options, parser): + try: + if options.sourcefiles is None or options.destination is None: + parser.print_help() + sys.exit(-1) + except (AttributeError), err: + parser.print_help() + sys.exit(-1) + + +def ParseSize(size_str): + if len(size_str) < 2: + raise ValueError(('filesize argument not understood, please include' + ' a numeric value and magnitude indicator')) + magnitude = size_str[len(size_str)-1:] + if not magnitude in ('K', 'B', 'M'): + raise ValueError(('filesize magnitude indicator not valid, must be \'K\',' + '\'B\', or \'M\'')) + numeral = float(size_str[0:len(size_str)-1]) + if magnitude == 'K': + numeral *= 1024 + elif magnitude == 'M': + numeral *= 1048576 + return int(numeral) + + +class DirectoryZipper(object): + """Class to compress a directory and all its sub-directories.""" + current_archive = None + output_dir = None + base_path = None + max_size = None + compress = None + index_fp = None + + def __init__(self, output_path, base_dir, archive_size, enable_compression): + """DirectoryZipper constructor. + + Args: + output_path: the path to write the archives and index file to + base_dir: the directory to compress + archive_size: the maximum size, in bytes, of a single archive file + enable_compression: whether or not compression should be enabled, if + disabled, the files will be written into an uncompresed zip + """ + self.output_dir = output_path + self.current_archive = '0.zip' + self.base_path = base_dir + self.max_size = archive_size + self.compress = enable_compression + + def StartCompress(self): + """Start compress of the directory. + + This will start the compression process and write the archives to the + specified output directory. It will also produce an 'index.txt' file in the + output directory that maps from file to archive. + """ + self.index_fp = open(''.join([self.output_dir, 'main.py']), 'w') + self.index_fp.write(divide_and_compress_constants.file_preamble) + os.path.walk(self.base_path, self.CompressDirectory, 1) + self.index_fp.write(divide_and_compress_constants.file_endpiece) + self.index_fp.close() + + def RemoveLastFile(self, archive_path=None): + """Removes the last item in the archive. + + This removes the last item in the archive by reading the items out of the + archive, adding them to a new archive, deleting the old archive, and + moving the new archive to the location of the old archive. + + Args: + archive_path: Path to the archive to modify. This archive should not be + open elsewhere, since it will need to be deleted. + Return: + A new ZipFile object that points to the modified archive file + """ + if archive_path is None: + archive_path = ''.join([self.output_dir, self.current_archive]) + + # Move the old file and create a new one at its old location + ext_offset = archive_path.rfind('.') + old_archive = ''.join([archive_path[0:ext_offset], '-old', + archive_path[ext_offset:]]) + os.rename(archive_path, old_archive) + old_fp = self.OpenZipFileAtPath(old_archive, mode='r') + + if self.compress: + new_fp = self.OpenZipFileAtPath(archive_path, + mode='w', + compress=zipfile.ZIP_DEFLATED) + else: + new_fp = self.OpenZipFileAtPath(archive_path, + mode='w', + compress=zipfile.ZIP_STORED) + + # Read the old archive in a new archive, except the last one + zip_members = enumerate(old_fp.infolist()) + num_members = len(old_fp.infolist()) + while num_members > 1: + this_member = zip_members.next()[1] + new_fp.writestr(this_member.filename, old_fp.read(this_member.filename)) + num_members -= 1 + + # Close files and delete the old one + old_fp.close() + new_fp.close() + os.unlink(old_archive) + + def OpenZipFileAtPath(self, path, mode=None, compress=zipfile.ZIP_DEFLATED): + """This method is mainly for testing purposes, eg dependency injection.""" + if mode is None: + if os.path.exists(path): + mode = 'a' + else: + mode = 'w' + + if mode == 'r': + return ZipFile(path, mode) + else: + return ZipFile(path, mode, compress) + + def CompressDirectory(self, irrelevant, dir_path, dir_contents): + """Method to compress the given directory. + + This method compresses the directory 'dir_path'. It will add to an existing + zip file that still has space and create new ones as necessary to keep zip + file sizes under the maximum specified size. This also writes out the + mapping of files to archives to the self.index_fp file descriptor + + Args: + irrelevant: a numeric identifier passed by the os.path.walk method, this + is not used by this method + dir_path: the path to the directory to compress + dir_contents: a list of directory contents to be compressed + """ + + # construct the queue of files to be added that this method will use + # it seems that dir_contents is given in reverse alphabetical order, + # so put them in alphabetical order by inserting to front of the list + dir_contents.sort() + zip_queue = [] + if dir_path[len(dir_path) - 1:] == os.sep: + for filename in dir_contents: + zip_queue.append(''.join([dir_path, filename])) + else: + for filename in dir_contents: + zip_queue.append(''.join([dir_path, os.sep, filename])) + compress_bit = zipfile.ZIP_DEFLATED + if not self.compress: + compress_bit = zipfile.ZIP_STORED + + # zip all files in this directory, adding to existing archives and creating + # as necessary + while len(zip_queue) > 0: + target_file = zip_queue[0] + if os.path.isfile(target_file): + self.AddFileToArchive(target_file, compress_bit) + + # see if adding the new file made our archive too large + if not self.ArchiveIsValid(): + + # IF fixing fails, the last added file was to large, skip it + # ELSE the current archive filled normally, make a new one and try + # adding the file again + if not self.FixArchive('SIZE'): + zip_queue.pop(0) + else: + self.current_archive = '%i.zip' % ( + int(self.current_archive[ + 0:self.current_archive.rfind('.zip')]) + 1) + else: + + # if this the first file in the archive, write an index record + self.WriteIndexRecord() + zip_queue.pop(0) + else: + zip_queue.pop(0) + + def WriteIndexRecord(self): + """Write an index record to the index file. + + Only write an index record if this is the first file to go into archive + + Returns: + True if an archive record is written, False if it isn't + """ + archive = self.OpenZipFileAtPath( + ''.join([self.output_dir, self.current_archive]), 'r') + archive_index = archive.infolist() + if len(archive_index) == 1: + self.index_fp.write( + '[\'%s\', \'%s\'],\n' % (self.current_archive, + archive_index[0].filename)) + archive.close() + return True + else: + archive.close() + return False + + def FixArchive(self, problem): + """Make the archive compliant. + + Args: + problem: the reason the archive is invalid + + Returns: + Whether the file(s) removed to fix the archive could conceivably be + in an archive, but for some reason can't be added to this one. + """ + archive_path = ''.join([self.output_dir, self.current_archive]) + rtn_value = None + + if problem == 'SIZE': + archive_obj = self.OpenZipFileAtPath(archive_path, mode='r') + num_archive_files = len(archive_obj.infolist()) + + # IF there is a single file, that means its too large to compress, + # delete the created archive + # ELSE do normal finalization + if num_archive_files == 1: + print ('WARNING: %s%s is too large to store.' % ( + self.base_path, archive_obj.infolist()[0].filename)) + archive_obj.close() + os.unlink(archive_path) + rtn_value = False + else: + self.RemoveLastFile(''.join([self.output_dir, self.current_archive])) + archive_obj.close() + print 'Final archive size for %s is %i' % ( + self.current_archive, os.stat(archive_path)[stat.ST_SIZE]) + rtn_value = True + return rtn_value + + def AddFileToArchive(self, filepath, compress_bit): + """Add the file at filepath to the current archive. + + Args: + filepath: the path of the file to add + compress_bit: whether or not this fiel should be compressed when added + + Returns: + True if the file could be added (typically because this is a file) or + False if it couldn't be added (typically because its a directory) + """ + curr_archive_path = ''.join([self.output_dir, self.current_archive]) + if os.path.isfile(filepath): + if os.stat(filepath)[stat.ST_SIZE] > 1048576: + print 'Warning: %s is potentially too large to serve on GAE' % filepath + archive = self.OpenZipFileAtPath(curr_archive_path, + compress=compress_bit) + # add the file to the archive + archive.write(filepath, filepath[len(self.base_path):]) + archive.close() + return True + else: + return False + + def ArchiveIsValid(self): + """Check whether the archive is valid. + + Currently this only checks whether the archive is under the required size. + The thought is that eventually this will do additional validation + + Returns: + True if the archive is valid, False if its not + """ + archive_path = ''.join([self.output_dir, self.current_archive]) + if os.stat(archive_path)[stat.ST_SIZE] > self.max_size: + return False + else: + return True + +if __name__ == '__main__': + Main(sys.argv) |