aboutsummaryrefslogtreecommitdiffstats
path: root/scripts/app_engine_server/memcache_zipserve.py
diff options
context:
space:
mode:
authorThe Android Open Source Project <initial-contribution@android.com>2009-03-03 19:29:09 -0800
committerThe Android Open Source Project <initial-contribution@android.com>2009-03-03 19:29:09 -0800
commit55a2c71f27d3e0b8344597c7f281e687cb7aeb1b (patch)
treeecd18b995aea8eeeb8b3823266280d41245bf0f7 /scripts/app_engine_server/memcache_zipserve.py
parent82ea7a177797b844b252effea5c7c7c5d63ea4ac (diff)
downloadsdk-55a2c71f27d3e0b8344597c7f281e687cb7aeb1b.zip
sdk-55a2c71f27d3e0b8344597c7f281e687cb7aeb1b.tar.gz
sdk-55a2c71f27d3e0b8344597c7f281e687cb7aeb1b.tar.bz2
auto import from //depot/cupcake/@135843
Diffstat (limited to 'scripts/app_engine_server/memcache_zipserve.py')
-rw-r--r--scripts/app_engine_server/memcache_zipserve.py412
1 files changed, 412 insertions, 0 deletions
diff --git a/scripts/app_engine_server/memcache_zipserve.py b/scripts/app_engine_server/memcache_zipserve.py
new file mode 100644
index 0000000..e11cfc5
--- /dev/null
+++ b/scripts/app_engine_server/memcache_zipserve.py
@@ -0,0 +1,412 @@
+#!/usr/bin/env python
+#
+# Copyright 2009 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""A class to serve pages from zip files and use memcache for performance.
+
+This contains a class and a function to create an anonymous instance of the
+class to serve HTTP GET requests. Memcache is used to increase response speed
+and lower processing cycles used in serving. Credit to Guido van Rossum and
+his implementation of zipserve which served as a reference as I wrote this.
+
+ MemcachedZipHandler: Class that serves request
+ create_handler: method to create instance of MemcachedZipHandler
+"""
+
+__author__ = 'jmatt@google.com (Justin Mattson)'
+
+import email.Utils
+import logging
+import mimetypes
+import time
+import zipfile
+
+from google.appengine.api import memcache
+from google.appengine.ext import webapp
+from google.appengine.ext.webapp import util
+
+
+def create_handler(zip_files, max_age=None, public=None):
+ """Factory method to create a MemcachedZipHandler instance.
+
+ Args:
+ zip_files: A list of file names, or a list of lists of file name, first
+ member of file mappings. See MemcachedZipHandler documentation for
+ more information about using the list of lists format
+ max_age: The maximum client-side cache lifetime
+ public: Whether this should be declared public in the client-side cache
+ Returns:
+ A MemcachedZipHandler wrapped in a pretty, anonymous bow for use with App
+ Engine
+
+ Raises:
+ ValueError: if the zip_files argument is not a list
+ """
+ # verify argument integrity. If the argument is passed in list format,
+ # convert it to list of lists format
+
+ if zip_files and type(zip_files).__name__ == 'list':
+ num_items = len(zip_files)
+ while num_items > 0:
+ if type(zip_files[num_items - 1]).__name__ != 'list':
+ zip_files[num_items - 1] = [zip_files[num_items-1]]
+ num_items -= 1
+ else:
+ raise ValueError('File name arguments must be a list')
+
+ class HandlerWrapper(MemcachedZipHandler):
+ """Simple wrapper for an instance of MemcachedZipHandler.
+
+ I'm still not sure why this is needed
+ """
+
+ def get(self, name):
+ self.zipfilenames = zip_files
+ self.TrueGet(name)
+ if max_age is not None:
+ MAX_AGE = max_age
+ if public is not None:
+ PUBLIC = public
+
+ return HandlerWrapper
+
+
+class MemcachedZipHandler(webapp.RequestHandler):
+ """Handles get requests for a given URL.
+
+ Serves a GET request from a series of zip files. As files are served they are
+ put into memcache, which is much faster than retreiving them from the zip
+ source file again. It also uses considerably fewer CPU cycles.
+ """
+ zipfile_cache = {} # class cache of source zip files
+ MAX_AGE = 600 # max client-side cache lifetime
+ PUBLIC = True # public cache setting
+ CACHE_PREFIX = 'cache://' # memcache key prefix for actual URLs
+ NEG_CACHE_PREFIX = 'noncache://' # memcache key prefix for non-existant URL
+
+ def TrueGet(self, name):
+ """The top-level entry point to serving requests.
+
+ Called 'True' get because it does the work when called from the wrapper
+ class' get method
+
+ Args:
+ name: URL requested
+
+ Returns:
+ None
+ """
+ name = self.PreprocessUrl(name)
+
+ # see if we have the page in the memcache
+ resp_data = self.GetFromCache(name)
+ if resp_data is None:
+ logging.info('Cache miss for %s', name)
+ resp_data = self.GetFromNegativeCache(name)
+ if resp_data is None:
+ resp_data = self.GetFromStore(name)
+
+ # IF we have the file, put it in the memcache
+ # ELSE put it in the negative cache
+ if resp_data is not None:
+ self.StoreOrUpdateInCache(name, resp_data)
+ else:
+ logging.info('Adding %s to negative cache, serving 404', name)
+ self.StoreInNegativeCache(name)
+ self.Write404Error()
+ return
+ else:
+ self.Write404Error()
+ return
+
+ content_type, encoding = mimetypes.guess_type(name)
+ if content_type:
+ self.response.headers['Content-Type'] = content_type
+ self.SetCachingHeaders()
+ self.response.out.write(resp_data)
+
+ def PreprocessUrl(self, name):
+ """Any preprocessing work on the URL when it comes it.
+
+ Put any work related to interpretting the incoming URL here. For example,
+ this is used to redirect requests for a directory to the index.html file
+ in that directory. Subclasses should override this method to do different
+ preprocessing.
+
+ Args:
+ name: The incoming URL
+
+ Returns:
+ The processed URL
+ """
+ # handle special case of requesting the domain itself
+ if not name:
+ name = 'index.html'
+
+ # determine if this is a request for a directory
+ final_path_segment = name
+ final_slash_offset = name.rfind('/')
+ if final_slash_offset != len(name) - 1:
+ final_path_segment = name[final_slash_offset + 1:]
+ if final_path_segment.find('.') == -1:
+ name = ''.join([name, '/'])
+
+ # if this is a directory, redirect to index.html
+ if name[len(name) - 1:] == '/':
+ return '%s%s' % (name, 'index.html')
+ else:
+ return name
+
+ def GetFromStore(self, file_path):
+ """Retrieve file from zip files.
+
+ Get the file from the source, it must not have been in the memcache. If
+ possible, we'll use the zip file index to quickly locate where the file
+ should be found. (See MapToFileArchive documentation for assumptions about
+ file ordering.) If we don't have an index or don't find the file where the
+ index says we should, look through all the zip files to find it.
+
+ Args:
+ file_path: the file that we're looking for
+
+ Returns:
+ The contents of the requested file
+ """
+ resp_data = None
+ file_itr = iter(self.zipfilenames)
+
+ # check the index, if we have one, to see what archive the file is in
+ archive_name = self.MapFileToArchive(file_path)
+ if not archive_name:
+ archive_name = file_itr.next()[0]
+
+ while resp_data is None and archive_name:
+ zip_archive = self.LoadZipFile(archive_name)
+ if zip_archive:
+
+ # we expect some lookups will fail, and that's okay, 404s will deal
+ # with that
+ try:
+ resp_data = zip_archive.read(file_path)
+ except (KeyError, RuntimeError), err:
+ # no op
+ x = False
+ if resp_data is not None:
+ logging.info('%s read from %s', file_path, archive_name)
+
+ try:
+ archive_name = file_itr.next()[0]
+ except (StopIteration), err:
+ archive_name = False
+
+ return resp_data
+
+ def LoadZipFile(self, zipfilename):
+ """Convenience method to load zip file.
+
+ Just a convenience method to load the zip file from the data store. This is
+ useful if we ever want to change data stores and also as a means of
+ dependency injection for testing. This method will look at our file cache
+ first, and then load and cache the file if there's a cache miss
+
+ Args:
+ zipfilename: the name of the zip file to load
+
+ Returns:
+ The zip file requested, or None if there is an I/O error
+ """
+ zip_archive = None
+ zip_archive = self.zipfile_cache.get(zipfilename)
+ if zip_archive is None:
+ try:
+ zip_archive = zipfile.ZipFile(zipfilename)
+ self.zipfile_cache[zipfilename] = zip_archive
+ except (IOError, RuntimeError), err:
+ logging.error('Can\'t open zipfile %s, cause: %s' % (zipfilename,
+ err))
+ return zip_archive
+
+ def MapFileToArchive(self, file_path):
+ """Given a file name, determine what archive it should be in.
+
+ This method makes two critical assumptions.
+ (1) The zip files passed as an argument to the handler, if concatenated
+ in that same order, would result in a total ordering
+ of all the files. See (2) for ordering type.
+ (2) Upper case letters before lower case letters. The traversal of a
+ directory tree is depth first. A parent directory's files are added
+ before the files of any child directories
+
+ Args:
+ file_path: the file to be mapped to an archive
+
+ Returns:
+ The name of the archive where we expect the file to be
+ """
+ num_archives = len(self.zipfilenames)
+ while num_archives > 0:
+ target = self.zipfilenames[num_archives - 1]
+ if len(target) > 1:
+ if self.CompareFilenames(target[1], file_path) >= 0:
+ return target[0]
+ num_archives -= 1
+
+ return None
+
+ def CompareFilenames(self, file1, file2):
+ """Determines whether file1 is lexigraphically 'before' file2.
+
+ WARNING: This method assumes that paths are output in a depth-first,
+ with parent directories' files stored before childs'
+
+ We say that file1 is lexigraphically before file2 if the last non-matching
+ path segment of file1 is alphabetically before file2.
+
+ Args:
+ file1: the first file path
+ file2: the second file path
+
+ Returns:
+ A positive number if file1 is before file2
+ A negative number if file2 is before file1
+ 0 if filenames are the same
+ """
+ f1_segments = file1.split('/')
+ f2_segments = file2.split('/')
+
+ segment_ptr = 0
+ while (segment_ptr < len(f1_segments) and
+ segment_ptr < len(f2_segments) and
+ f1_segments[segment_ptr] == f2_segments[segment_ptr]):
+ segment_ptr += 1
+
+ if len(f1_segments) == len(f2_segments):
+
+ # we fell off the end, the paths much be the same
+ if segment_ptr == len(f1_segments):
+ return 0
+
+ # we didn't fall of the end, compare the segments where they differ
+ if f1_segments[segment_ptr] < f2_segments[segment_ptr]:
+ return 1
+ elif f1_segments[segment_ptr] > f2_segments[segment_ptr]:
+ return -1
+ else:
+ return 0
+
+ # the number of segments differs, we either mismatched comparing
+ # directories, or comparing a file to a directory
+ else:
+
+ # IF we were looking at the last segment of one of the paths,
+ # the one with fewer segments is first because files come before
+ # directories
+ # ELSE we just need to compare directory names
+ if (segment_ptr + 1 == len(f1_segments) or
+ segment_ptr + 1 == len(f2_segments)):
+ return len(f2_segments) - len(f1_segments)
+ else:
+ if f1_segments[segment_ptr] < f2_segments[segment_ptr]:
+ return 1
+ elif f1_segments[segment_ptr] > f2_segments[segment_ptr]:
+ return -1
+ else:
+ return 0
+
+ def SetCachingHeaders(self):
+ """Set caching headers for the request."""
+ max_age = self.MAX_AGE
+ self.response.headers['Expires'] = email.Utils.formatdate(
+ time.time() + max_age, usegmt=True)
+ cache_control = []
+ if self.PUBLIC:
+ cache_control.append('public')
+ cache_control.append('max-age=%d' % max_age)
+ self.response.headers['Cache-Control'] = ', '.join(cache_control)
+
+ def GetFromCache(self, filename):
+ """Get file from memcache, if available.
+
+ Args:
+ filename: The URL of the file to return
+
+ Returns:
+ The content of the file
+ """
+ return memcache.get('%s%s' % (self.CACHE_PREFIX, filename))
+
+ def StoreOrUpdateInCache(self, filename, data):
+ """Store data in the cache.
+
+ Store a piece of data in the memcache. Memcache has a maximum item size of
+ 1*10^6 bytes. If the data is too large, fail, but log the failure. Future
+ work will consider compressing the data before storing or chunking it
+
+ Args:
+ filename: the name of the file to store
+ data: the data of the file
+
+ Returns:
+ None
+ """
+ try:
+ if not memcache.add('%s%s' % (self.CACHE_PREFIX, filename), data):
+ memcache.replace('%s%s' % (self.CACHE_PREFIX, filename), data)
+ except (ValueError), err:
+ logging.warning('Data size too large to cache\n%s' % err)
+
+ def Write404Error(self):
+ """Ouptut a simple 404 response."""
+ self.error(404)
+ self.response.out.write(
+ ''.join(['<html><head><title>404: Not Found</title></head>',
+ '<body><b><h2>Error 404</h2><br/>',
+ 'File not found</b></body></html>']))
+
+ def StoreInNegativeCache(self, filename):
+ """If a non-existant URL is accessed, cache this result as well.
+
+ Future work should consider setting a maximum negative cache size to
+ prevent it from from negatively impacting the real cache.
+
+ Args:
+ filename: URL to add ot negative cache
+
+ Returns:
+ None
+ """
+ memcache.add('%s%s' % (self.NEG_CACHE_PREFIX, filename), -1)
+
+ def GetFromNegativeCache(self, filename):
+ """Retrieve from negative cache.
+
+ Args:
+ filename: URL to retreive
+
+ Returns:
+ The file contents if present in the negative cache.
+ """
+ return memcache.get('%s%s' % (self.NEG_CACHE_PREFIX, filename))
+
+
+def main():
+ application = webapp.WSGIApplication([('/([^/]+)/(.*)',
+ MemcachedZipHandler)])
+ util.run_wsgi_app(application)
+
+
+if __name__ == '__main__':
+ main()