aboutsummaryrefslogtreecommitdiffstats
path: root/scripts/divide_and_compress.py
diff options
context:
space:
mode:
authorThe Android Open Source Project <initial-contribution@android.com>2009-03-03 18:28:16 -0800
committerThe Android Open Source Project <initial-contribution@android.com>2009-03-03 18:28:16 -0800
commit82ea7a177797b844b252effea5c7c7c5d63ea4ac (patch)
tree4b825dc642cb6eb9a060e54bf8d69288fbee4904 /scripts/divide_and_compress.py
parentc9432be76d50a527da232d518f633add2f76242b (diff)
downloadsdk-82ea7a177797b844b252effea5c7c7c5d63ea4ac.zip
sdk-82ea7a177797b844b252effea5c7c7c5d63ea4ac.tar.gz
sdk-82ea7a177797b844b252effea5c7c7c5d63ea4ac.tar.bz2
auto import from //depot/cupcake/@135843
Diffstat (limited to 'scripts/divide_and_compress.py')
-rwxr-xr-xscripts/divide_and_compress.py352
1 files changed, 0 insertions, 352 deletions
diff --git a/scripts/divide_and_compress.py b/scripts/divide_and_compress.py
deleted file mode 100755
index d369be4..0000000
--- a/scripts/divide_and_compress.py
+++ /dev/null
@@ -1,352 +0,0 @@
-#!/usr/bin/python2.4
-#
-# Copyright (C) 2008 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-"""Module to compress directories in to series of zip files.
-
-This module will take a directory and compress all its contents, including
-child directories into a series of zip files named N.zip where 'N' ranges from
-0 to infinity. The zip files will all be below a certain specified maximum
-threshold.
-
-The directory is compressed with a depth first traversal, each directory's
-file contents being compressed as it is visisted, before the compression of any
-child directory's contents. In this way the files within an archive are ordered
-and the archives themselves are ordered.
-
-The class also constructs a 'main.py' file intended for use with Google App
-Engine with a custom App Engine program not currently distributed with this
-code base. The custom App Engine runtime can leverage the index files written
-out by this class to more quickly locate which zip file to serve a given URL
-from.
-"""
-
-__author__ = 'jmatt@google.com (Justin Mattson)'
-
-from optparse import OptionParser
-import os
-import stat
-import sys
-import zipfile
-from zipfile import ZipFile
-import divide_and_compress_constants
-
-
-def Main(argv):
- parser = CreateOptionsParser()
- (options, args) = parser.parse_args()
- VerifyArguments(options, parser)
- zipper = DirectoryZipper(options.destination,
- options.sourcefiles,
- ParseSize(options.filesize),
- options.compress)
- zipper.StartCompress()
-
-
-def CreateOptionsParser():
- rtn = OptionParser()
- rtn.add_option('-s', '--sourcefiles', dest='sourcefiles', default=None,
- help='The directory containing the files to compress')
- rtn.add_option('-d', '--destination', dest='destination', default=None,
- help=('Where to put the archive files, this should not be'
- ' a child of where the source files exist.'))
- rtn.add_option('-f', '--filesize', dest='filesize', default='1M',
- help=('Maximum size of archive files. A number followed by'
- 'a magnitude indicator, eg. 1000000B == one million '
- 'BYTES, 500K == five hundred KILOBYTES, 1.2M == one '
- 'point two MEGABYTES. 1M == 1048576 BYTES'))
- rtn.add_option('-n', '--nocompress', action='store_false', dest='compress',
- default=True,
- help=('Whether the archive files should be compressed, or '
- 'just a concatenation of the source files'))
- return rtn
-
-
-def VerifyArguments(options, parser):
- try:
- if options.sourcefiles is None or options.destination is None:
- parser.print_help()
- sys.exit(-1)
- except (AttributeError), err:
- parser.print_help()
- sys.exit(-1)
-
-
-def ParseSize(size_str):
- if len(size_str) < 2:
- raise ValueError(('filesize argument not understood, please include'
- ' a numeric value and magnitude indicator'))
- magnitude = size_str[len(size_str)-1:]
- if not magnitude in ('K', 'B', 'M'):
- raise ValueError(('filesize magnitude indicator not valid, must be \'K\','
- '\'B\', or \'M\''))
- numeral = float(size_str[0:len(size_str)-1])
- if magnitude == 'K':
- numeral *= 1024
- elif magnitude == 'M':
- numeral *= 1048576
- return int(numeral)
-
-
-class DirectoryZipper(object):
- """Class to compress a directory and all its sub-directories."""
- current_archive = None
- output_dir = None
- base_path = None
- max_size = None
- compress = None
- index_fp = None
-
- def __init__(self, output_path, base_dir, archive_size, enable_compression):
- """DirectoryZipper constructor.
-
- Args:
- output_path: the path to write the archives and index file to
- base_dir: the directory to compress
- archive_size: the maximum size, in bytes, of a single archive file
- enable_compression: whether or not compression should be enabled, if
- disabled, the files will be written into an uncompresed zip
- """
- self.output_dir = output_path
- self.current_archive = '0.zip'
- self.base_path = base_dir
- self.max_size = archive_size
- self.compress = enable_compression
-
- def StartCompress(self):
- """Start compress of the directory.
-
- This will start the compression process and write the archives to the
- specified output directory. It will also produce an 'index.txt' file in the
- output directory that maps from file to archive.
- """
- self.index_fp = open(''.join([self.output_dir, 'main.py']), 'w')
- self.index_fp.write(divide_and_compress_constants.file_preamble)
- os.path.walk(self.base_path, self.CompressDirectory, 1)
- self.index_fp.write(divide_and_compress_constants.file_endpiece)
- self.index_fp.close()
-
- def RemoveLastFile(self, archive_path=None):
- """Removes the last item in the archive.
-
- This removes the last item in the archive by reading the items out of the
- archive, adding them to a new archive, deleting the old archive, and
- moving the new archive to the location of the old archive.
-
- Args:
- archive_path: Path to the archive to modify. This archive should not be
- open elsewhere, since it will need to be deleted.
- Return:
- A new ZipFile object that points to the modified archive file
- """
- if archive_path is None:
- archive_path = ''.join([self.output_dir, self.current_archive])
-
- # Move the old file and create a new one at its old location
- ext_offset = archive_path.rfind('.')
- old_archive = ''.join([archive_path[0:ext_offset], '-old',
- archive_path[ext_offset:]])
- os.rename(archive_path, old_archive)
- old_fp = self.OpenZipFileAtPath(old_archive, mode='r')
-
- if self.compress:
- new_fp = self.OpenZipFileAtPath(archive_path,
- mode='w',
- compress=zipfile.ZIP_DEFLATED)
- else:
- new_fp = self.OpenZipFileAtPath(archive_path,
- mode='w',
- compress=zipfile.ZIP_STORED)
-
- # Read the old archive in a new archive, except the last one
- zip_members = enumerate(old_fp.infolist())
- num_members = len(old_fp.infolist())
- while num_members > 1:
- this_member = zip_members.next()[1]
- new_fp.writestr(this_member.filename, old_fp.read(this_member.filename))
- num_members -= 1
-
- # Close files and delete the old one
- old_fp.close()
- new_fp.close()
- os.unlink(old_archive)
-
- def OpenZipFileAtPath(self, path, mode=None, compress=zipfile.ZIP_DEFLATED):
- """This method is mainly for testing purposes, eg dependency injection."""
- if mode is None:
- if os.path.exists(path):
- mode = 'a'
- else:
- mode = 'w'
-
- if mode == 'r':
- return ZipFile(path, mode)
- else:
- return ZipFile(path, mode, compress)
-
- def CompressDirectory(self, irrelevant, dir_path, dir_contents):
- """Method to compress the given directory.
-
- This method compresses the directory 'dir_path'. It will add to an existing
- zip file that still has space and create new ones as necessary to keep zip
- file sizes under the maximum specified size. This also writes out the
- mapping of files to archives to the self.index_fp file descriptor
-
- Args:
- irrelevant: a numeric identifier passed by the os.path.walk method, this
- is not used by this method
- dir_path: the path to the directory to compress
- dir_contents: a list of directory contents to be compressed
- """
-
- # construct the queue of files to be added that this method will use
- # it seems that dir_contents is given in reverse alphabetical order,
- # so put them in alphabetical order by inserting to front of the list
- dir_contents.sort()
- zip_queue = []
- if dir_path[len(dir_path) - 1:] == os.sep:
- for filename in dir_contents:
- zip_queue.append(''.join([dir_path, filename]))
- else:
- for filename in dir_contents:
- zip_queue.append(''.join([dir_path, os.sep, filename]))
- compress_bit = zipfile.ZIP_DEFLATED
- if not self.compress:
- compress_bit = zipfile.ZIP_STORED
-
- # zip all files in this directory, adding to existing archives and creating
- # as necessary
- while len(zip_queue) > 0:
- target_file = zip_queue[0]
- if os.path.isfile(target_file):
- self.AddFileToArchive(target_file, compress_bit)
-
- # see if adding the new file made our archive too large
- if not self.ArchiveIsValid():
-
- # IF fixing fails, the last added file was to large, skip it
- # ELSE the current archive filled normally, make a new one and try
- # adding the file again
- if not self.FixArchive('SIZE'):
- zip_queue.pop(0)
- else:
- self.current_archive = '%i.zip' % (
- int(self.current_archive[
- 0:self.current_archive.rfind('.zip')]) + 1)
- else:
-
- # if this the first file in the archive, write an index record
- self.WriteIndexRecord()
- zip_queue.pop(0)
- else:
- zip_queue.pop(0)
-
- def WriteIndexRecord(self):
- """Write an index record to the index file.
-
- Only write an index record if this is the first file to go into archive
-
- Returns:
- True if an archive record is written, False if it isn't
- """
- archive = self.OpenZipFileAtPath(
- ''.join([self.output_dir, self.current_archive]), 'r')
- archive_index = archive.infolist()
- if len(archive_index) == 1:
- self.index_fp.write(
- '[\'%s\', \'%s\'],\n' % (self.current_archive,
- archive_index[0].filename))
- archive.close()
- return True
- else:
- archive.close()
- return False
-
- def FixArchive(self, problem):
- """Make the archive compliant.
-
- Args:
- problem: the reason the archive is invalid
-
- Returns:
- Whether the file(s) removed to fix the archive could conceivably be
- in an archive, but for some reason can't be added to this one.
- """
- archive_path = ''.join([self.output_dir, self.current_archive])
- rtn_value = None
-
- if problem == 'SIZE':
- archive_obj = self.OpenZipFileAtPath(archive_path, mode='r')
- num_archive_files = len(archive_obj.infolist())
-
- # IF there is a single file, that means its too large to compress,
- # delete the created archive
- # ELSE do normal finalization
- if num_archive_files == 1:
- print ('WARNING: %s%s is too large to store.' % (
- self.base_path, archive_obj.infolist()[0].filename))
- archive_obj.close()
- os.unlink(archive_path)
- rtn_value = False
- else:
- self.RemoveLastFile(''.join([self.output_dir, self.current_archive]))
- archive_obj.close()
- print 'Final archive size for %s is %i' % (
- self.current_archive, os.stat(archive_path)[stat.ST_SIZE])
- rtn_value = True
- return rtn_value
-
- def AddFileToArchive(self, filepath, compress_bit):
- """Add the file at filepath to the current archive.
-
- Args:
- filepath: the path of the file to add
- compress_bit: whether or not this fiel should be compressed when added
-
- Returns:
- True if the file could be added (typically because this is a file) or
- False if it couldn't be added (typically because its a directory)
- """
- curr_archive_path = ''.join([self.output_dir, self.current_archive])
- if os.path.isfile(filepath):
- if os.stat(filepath)[stat.ST_SIZE] > 1048576:
- print 'Warning: %s is potentially too large to serve on GAE' % filepath
- archive = self.OpenZipFileAtPath(curr_archive_path,
- compress=compress_bit)
- # add the file to the archive
- archive.write(filepath, filepath[len(self.base_path):])
- archive.close()
- return True
- else:
- return False
-
- def ArchiveIsValid(self):
- """Check whether the archive is valid.
-
- Currently this only checks whether the archive is under the required size.
- The thought is that eventually this will do additional validation
-
- Returns:
- True if the archive is valid, False if its not
- """
- archive_path = ''.join([self.output_dir, self.current_archive])
- if os.stat(archive_path)[stat.ST_SIZE] > self.max_size:
- return False
- else:
- return True
-
-if __name__ == '__main__':
- Main(sys.argv)