diff options
author | Iain Merrick <husky@google.com> | 2010-08-19 17:55:56 +0100 |
---|---|---|
committer | Iain Merrick <husky@google.com> | 2010-08-23 11:05:40 +0100 |
commit | f486d19d62f1bc33246748b14b14a9dfa617b57f (patch) | |
tree | 195485454c93125455a30e553a73981c3816144d /WebKitTools/Scripts/webkitpy/layout_tests/deduplicate_tests.py | |
parent | 6ba0b43722d16bc295606bec39f396f596e4fef1 (diff) | |
download | external_webkit-f486d19d62f1bc33246748b14b14a9dfa617b57f.zip external_webkit-f486d19d62f1bc33246748b14b14a9dfa617b57f.tar.gz external_webkit-f486d19d62f1bc33246748b14b14a9dfa617b57f.tar.bz2 |
Merge WebKit at r65615 : Initial merge by git.
Change-Id: Ifbf384f4531e3b58475a662e38195c2d9152ae79
Diffstat (limited to 'WebKitTools/Scripts/webkitpy/layout_tests/deduplicate_tests.py')
-rw-r--r-- | WebKitTools/Scripts/webkitpy/layout_tests/deduplicate_tests.py | 167 |
1 files changed, 167 insertions, 0 deletions
diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/deduplicate_tests.py b/WebKitTools/Scripts/webkitpy/layout_tests/deduplicate_tests.py new file mode 100644 index 0000000..bb63f5e --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/layout_tests/deduplicate_tests.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python +# Copyright (C) 2010 Google Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""deduplicate_tests -- lists duplicated between platforms. + +If platform/mac-leopard is missing an expected test output, we fall back on +platform/mac. This means it's possible to grow redundant test outputs, +where we have the same expected data in both a platform directory and another +platform it falls back on. +""" + +import collections +import fnmatch +import os +import subprocess +import sys +import re +import webkitpy.common.system.executive as executive +import webkitpy.common.system.logutils as logutils +import webkitpy.layout_tests.port.factory as port_factory + +_log = logutils.get_logger(__file__) + +_BASE_PLATFORM = 'base' + + +def port_fallbacks(): + """Get the port fallback information. + Returns: + A dictionary mapping platform name to a list of other platforms to fall + back on. All platforms fall back on 'base'. + """ + fallbacks = {_BASE_PLATFORM: []} + for port_name in os.listdir(os.path.join('LayoutTests', 'platform')): + try: + platforms = port_factory.get(port_name).baseline_search_path() + except NotImplementedError: + _log.error("'%s' lacks baseline_search_path(), please fix." % port_name) + fallbacks[port_name] = [_BASE_PLATFORM] + continue + fallbacks[port_name] = [os.path.basename(p) for p in platforms][1:] + fallbacks[port_name].append(_BASE_PLATFORM) + return fallbacks + + +def parse_git_output(git_output, glob_pattern): + """Parses the output of git ls-tree and filters based on glob_pattern. + Args: + git_output: result of git ls-tree -r HEAD LayoutTests. + glob_pattern: a pattern to filter the files. + Returns: + A dictionary mapping (test name, hash of content) => [paths] + """ + hashes = collections.defaultdict(set) + for line in git_output.split('\n'): + if not line: + break + attrs, path = line.strip().split('\t') + if not fnmatch.fnmatch(path, glob_pattern): + continue + path = path[len('LayoutTests/'):] + match = re.match(r'^(platform/.*?/)?(.*)', path) + test = match.group(2) + _, _, hash = attrs.split(' ') + hashes[(test, hash)].add(path) + return hashes + + +def cluster_file_hashes(glob_pattern): + """Get the hashes of all the test expectations in the tree. + We cheat and use git's hashes. + Args: + glob_pattern: a pattern to filter the files. + Returns: + A dictionary mapping (test name, hash of content) => [paths] + """ + + # A map of file hash => set of all files with that hash. + hashes = collections.defaultdict(set) + + # Fill in the map. + cmd = ('git', 'ls-tree', '-r', 'HEAD', 'LayoutTests') + try: + git_output = executive.Executive().run_command(cmd) + except OSError, e: + if e.errno == 2: # No such file or directory. + _log.error("Error: 'No such file' when running git.") + _log.error("This script requires git.") + sys.exit(1) + raise e + return parse_git_output(git_output, glob_pattern) + + +def extract_platforms(paths): + """Extracts the platforms from a list of paths matching ^platform/(.*?)/. + Args: + paths: a list of paths. + Returns: + A dictionary containing all platforms from paths. + """ + platforms = {} + for path in paths: + match = re.match(r'^platform/(.*?)/', path) + if match: + platform = match.group(1) + else: + platform = _BASE_PLATFORM + platforms[platform] = path + return platforms + + +def find_dups(hashes, port_fallbacks): + """Yields info about redundant test expectations. + Args: + hashes: a list of hashes as returned by cluster_file_hashes. + port_fallbacks: a list of fallback information as returned by get_port_fallbacks. + Returns: + a tuple containing (test, platform, fallback, platforms) + """ + for (test, hash), cluster in hashes.items(): + if len(cluster) < 2: + continue # Common case: only one file with that hash. + + # Compute the list of platforms we have this particular hash for. + platforms = extract_platforms(cluster) + if len(platforms) == 1: + continue + + # See if any of the platforms are redundant with each other. + for platform in platforms.keys(): + for fallback in port_fallbacks[platform]: + if fallback in platforms.keys(): + yield test, platform, fallback, platforms[platform] + + +def deduplicate(glob_pattern): + """Traverses LayoutTests and returns information about duplicated files. + Args: + glob pattern to filter the files in LayoutTests. + Returns: + a dictionary containing test, path, platform and fallback. + """ + fallbacks = port_fallbacks() + hashes = cluster_file_hashes(glob_pattern) + return [{'test': test, 'path': path, 'platform': platform, 'fallback': fallback} + for test, platform, fallback, path in find_dups(hashes, fallbacks)] |