diff options
author | The Android Open Source Project <initial-contribution@android.com> | 2009-03-03 18:28:16 -0800 |
---|---|---|
committer | The Android Open Source Project <initial-contribution@android.com> | 2009-03-03 18:28:16 -0800 |
commit | 82ea7a177797b844b252effea5c7c7c5d63ea4ac (patch) | |
tree | 4b825dc642cb6eb9a060e54bf8d69288fbee4904 /scripts/app_engine_server/memcache_zipserve.py | |
parent | c9432be76d50a527da232d518f633add2f76242b (diff) | |
download | sdk-82ea7a177797b844b252effea5c7c7c5d63ea4ac.zip sdk-82ea7a177797b844b252effea5c7c7c5d63ea4ac.tar.gz sdk-82ea7a177797b844b252effea5c7c7c5d63ea4ac.tar.bz2 |
auto import from //depot/cupcake/@135843
Diffstat (limited to 'scripts/app_engine_server/memcache_zipserve.py')
-rw-r--r-- | scripts/app_engine_server/memcache_zipserve.py | 412 |
1 files changed, 0 insertions, 412 deletions
diff --git a/scripts/app_engine_server/memcache_zipserve.py b/scripts/app_engine_server/memcache_zipserve.py deleted file mode 100644 index e11cfc5..0000000 --- a/scripts/app_engine_server/memcache_zipserve.py +++ /dev/null @@ -1,412 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2009 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""A class to serve pages from zip files and use memcache for performance. - -This contains a class and a function to create an anonymous instance of the -class to serve HTTP GET requests. Memcache is used to increase response speed -and lower processing cycles used in serving. Credit to Guido van Rossum and -his implementation of zipserve which served as a reference as I wrote this. - - MemcachedZipHandler: Class that serves request - create_handler: method to create instance of MemcachedZipHandler -""" - -__author__ = 'jmatt@google.com (Justin Mattson)' - -import email.Utils -import logging -import mimetypes -import time -import zipfile - -from google.appengine.api import memcache -from google.appengine.ext import webapp -from google.appengine.ext.webapp import util - - -def create_handler(zip_files, max_age=None, public=None): - """Factory method to create a MemcachedZipHandler instance. - - Args: - zip_files: A list of file names, or a list of lists of file name, first - member of file mappings. See MemcachedZipHandler documentation for - more information about using the list of lists format - max_age: The maximum client-side cache lifetime - public: Whether this should be declared public in the client-side cache - Returns: - A MemcachedZipHandler wrapped in a pretty, anonymous bow for use with App - Engine - - Raises: - ValueError: if the zip_files argument is not a list - """ - # verify argument integrity. If the argument is passed in list format, - # convert it to list of lists format - - if zip_files and type(zip_files).__name__ == 'list': - num_items = len(zip_files) - while num_items > 0: - if type(zip_files[num_items - 1]).__name__ != 'list': - zip_files[num_items - 1] = [zip_files[num_items-1]] - num_items -= 1 - else: - raise ValueError('File name arguments must be a list') - - class HandlerWrapper(MemcachedZipHandler): - """Simple wrapper for an instance of MemcachedZipHandler. - - I'm still not sure why this is needed - """ - - def get(self, name): - self.zipfilenames = zip_files - self.TrueGet(name) - if max_age is not None: - MAX_AGE = max_age - if public is not None: - PUBLIC = public - - return HandlerWrapper - - -class MemcachedZipHandler(webapp.RequestHandler): - """Handles get requests for a given URL. - - Serves a GET request from a series of zip files. As files are served they are - put into memcache, which is much faster than retreiving them from the zip - source file again. It also uses considerably fewer CPU cycles. - """ - zipfile_cache = {} # class cache of source zip files - MAX_AGE = 600 # max client-side cache lifetime - PUBLIC = True # public cache setting - CACHE_PREFIX = 'cache://' # memcache key prefix for actual URLs - NEG_CACHE_PREFIX = 'noncache://' # memcache key prefix for non-existant URL - - def TrueGet(self, name): - """The top-level entry point to serving requests. - - Called 'True' get because it does the work when called from the wrapper - class' get method - - Args: - name: URL requested - - Returns: - None - """ - name = self.PreprocessUrl(name) - - # see if we have the page in the memcache - resp_data = self.GetFromCache(name) - if resp_data is None: - logging.info('Cache miss for %s', name) - resp_data = self.GetFromNegativeCache(name) - if resp_data is None: - resp_data = self.GetFromStore(name) - - # IF we have the file, put it in the memcache - # ELSE put it in the negative cache - if resp_data is not None: - self.StoreOrUpdateInCache(name, resp_data) - else: - logging.info('Adding %s to negative cache, serving 404', name) - self.StoreInNegativeCache(name) - self.Write404Error() - return - else: - self.Write404Error() - return - - content_type, encoding = mimetypes.guess_type(name) - if content_type: - self.response.headers['Content-Type'] = content_type - self.SetCachingHeaders() - self.response.out.write(resp_data) - - def PreprocessUrl(self, name): - """Any preprocessing work on the URL when it comes it. - - Put any work related to interpretting the incoming URL here. For example, - this is used to redirect requests for a directory to the index.html file - in that directory. Subclasses should override this method to do different - preprocessing. - - Args: - name: The incoming URL - - Returns: - The processed URL - """ - # handle special case of requesting the domain itself - if not name: - name = 'index.html' - - # determine if this is a request for a directory - final_path_segment = name - final_slash_offset = name.rfind('/') - if final_slash_offset != len(name) - 1: - final_path_segment = name[final_slash_offset + 1:] - if final_path_segment.find('.') == -1: - name = ''.join([name, '/']) - - # if this is a directory, redirect to index.html - if name[len(name) - 1:] == '/': - return '%s%s' % (name, 'index.html') - else: - return name - - def GetFromStore(self, file_path): - """Retrieve file from zip files. - - Get the file from the source, it must not have been in the memcache. If - possible, we'll use the zip file index to quickly locate where the file - should be found. (See MapToFileArchive documentation for assumptions about - file ordering.) If we don't have an index or don't find the file where the - index says we should, look through all the zip files to find it. - - Args: - file_path: the file that we're looking for - - Returns: - The contents of the requested file - """ - resp_data = None - file_itr = iter(self.zipfilenames) - - # check the index, if we have one, to see what archive the file is in - archive_name = self.MapFileToArchive(file_path) - if not archive_name: - archive_name = file_itr.next()[0] - - while resp_data is None and archive_name: - zip_archive = self.LoadZipFile(archive_name) - if zip_archive: - - # we expect some lookups will fail, and that's okay, 404s will deal - # with that - try: - resp_data = zip_archive.read(file_path) - except (KeyError, RuntimeError), err: - # no op - x = False - if resp_data is not None: - logging.info('%s read from %s', file_path, archive_name) - - try: - archive_name = file_itr.next()[0] - except (StopIteration), err: - archive_name = False - - return resp_data - - def LoadZipFile(self, zipfilename): - """Convenience method to load zip file. - - Just a convenience method to load the zip file from the data store. This is - useful if we ever want to change data stores and also as a means of - dependency injection for testing. This method will look at our file cache - first, and then load and cache the file if there's a cache miss - - Args: - zipfilename: the name of the zip file to load - - Returns: - The zip file requested, or None if there is an I/O error - """ - zip_archive = None - zip_archive = self.zipfile_cache.get(zipfilename) - if zip_archive is None: - try: - zip_archive = zipfile.ZipFile(zipfilename) - self.zipfile_cache[zipfilename] = zip_archive - except (IOError, RuntimeError), err: - logging.error('Can\'t open zipfile %s, cause: %s' % (zipfilename, - err)) - return zip_archive - - def MapFileToArchive(self, file_path): - """Given a file name, determine what archive it should be in. - - This method makes two critical assumptions. - (1) The zip files passed as an argument to the handler, if concatenated - in that same order, would result in a total ordering - of all the files. See (2) for ordering type. - (2) Upper case letters before lower case letters. The traversal of a - directory tree is depth first. A parent directory's files are added - before the files of any child directories - - Args: - file_path: the file to be mapped to an archive - - Returns: - The name of the archive where we expect the file to be - """ - num_archives = len(self.zipfilenames) - while num_archives > 0: - target = self.zipfilenames[num_archives - 1] - if len(target) > 1: - if self.CompareFilenames(target[1], file_path) >= 0: - return target[0] - num_archives -= 1 - - return None - - def CompareFilenames(self, file1, file2): - """Determines whether file1 is lexigraphically 'before' file2. - - WARNING: This method assumes that paths are output in a depth-first, - with parent directories' files stored before childs' - - We say that file1 is lexigraphically before file2 if the last non-matching - path segment of file1 is alphabetically before file2. - - Args: - file1: the first file path - file2: the second file path - - Returns: - A positive number if file1 is before file2 - A negative number if file2 is before file1 - 0 if filenames are the same - """ - f1_segments = file1.split('/') - f2_segments = file2.split('/') - - segment_ptr = 0 - while (segment_ptr < len(f1_segments) and - segment_ptr < len(f2_segments) and - f1_segments[segment_ptr] == f2_segments[segment_ptr]): - segment_ptr += 1 - - if len(f1_segments) == len(f2_segments): - - # we fell off the end, the paths much be the same - if segment_ptr == len(f1_segments): - return 0 - - # we didn't fall of the end, compare the segments where they differ - if f1_segments[segment_ptr] < f2_segments[segment_ptr]: - return 1 - elif f1_segments[segment_ptr] > f2_segments[segment_ptr]: - return -1 - else: - return 0 - - # the number of segments differs, we either mismatched comparing - # directories, or comparing a file to a directory - else: - - # IF we were looking at the last segment of one of the paths, - # the one with fewer segments is first because files come before - # directories - # ELSE we just need to compare directory names - if (segment_ptr + 1 == len(f1_segments) or - segment_ptr + 1 == len(f2_segments)): - return len(f2_segments) - len(f1_segments) - else: - if f1_segments[segment_ptr] < f2_segments[segment_ptr]: - return 1 - elif f1_segments[segment_ptr] > f2_segments[segment_ptr]: - return -1 - else: - return 0 - - def SetCachingHeaders(self): - """Set caching headers for the request.""" - max_age = self.MAX_AGE - self.response.headers['Expires'] = email.Utils.formatdate( - time.time() + max_age, usegmt=True) - cache_control = [] - if self.PUBLIC: - cache_control.append('public') - cache_control.append('max-age=%d' % max_age) - self.response.headers['Cache-Control'] = ', '.join(cache_control) - - def GetFromCache(self, filename): - """Get file from memcache, if available. - - Args: - filename: The URL of the file to return - - Returns: - The content of the file - """ - return memcache.get('%s%s' % (self.CACHE_PREFIX, filename)) - - def StoreOrUpdateInCache(self, filename, data): - """Store data in the cache. - - Store a piece of data in the memcache. Memcache has a maximum item size of - 1*10^6 bytes. If the data is too large, fail, but log the failure. Future - work will consider compressing the data before storing or chunking it - - Args: - filename: the name of the file to store - data: the data of the file - - Returns: - None - """ - try: - if not memcache.add('%s%s' % (self.CACHE_PREFIX, filename), data): - memcache.replace('%s%s' % (self.CACHE_PREFIX, filename), data) - except (ValueError), err: - logging.warning('Data size too large to cache\n%s' % err) - - def Write404Error(self): - """Ouptut a simple 404 response.""" - self.error(404) - self.response.out.write( - ''.join(['<html><head><title>404: Not Found</title></head>', - '<body><b><h2>Error 404</h2><br/>', - 'File not found</b></body></html>'])) - - def StoreInNegativeCache(self, filename): - """If a non-existant URL is accessed, cache this result as well. - - Future work should consider setting a maximum negative cache size to - prevent it from from negatively impacting the real cache. - - Args: - filename: URL to add ot negative cache - - Returns: - None - """ - memcache.add('%s%s' % (self.NEG_CACHE_PREFIX, filename), -1) - - def GetFromNegativeCache(self, filename): - """Retrieve from negative cache. - - Args: - filename: URL to retreive - - Returns: - The file contents if present in the negative cache. - """ - return memcache.get('%s%s' % (self.NEG_CACHE_PREFIX, filename)) - - -def main(): - application = webapp.WSGIApplication([('/([^/]+)/(.*)', - MemcachedZipHandler)]) - util.run_wsgi_app(application) - - -if __name__ == '__main__': - main() |