summaryrefslogtreecommitdiffstats
path: root/WebKitTools/Scripts/webkitpy/layout_tests/deduplicate_tests.py
diff options
context:
space:
mode:
authorIain Merrick <husky@google.com>2010-08-19 17:55:56 +0100
committerIain Merrick <husky@google.com>2010-08-23 11:05:40 +0100
commitf486d19d62f1bc33246748b14b14a9dfa617b57f (patch)
tree195485454c93125455a30e553a73981c3816144d /WebKitTools/Scripts/webkitpy/layout_tests/deduplicate_tests.py
parent6ba0b43722d16bc295606bec39f396f596e4fef1 (diff)
downloadexternal_webkit-f486d19d62f1bc33246748b14b14a9dfa617b57f.zip
external_webkit-f486d19d62f1bc33246748b14b14a9dfa617b57f.tar.gz
external_webkit-f486d19d62f1bc33246748b14b14a9dfa617b57f.tar.bz2
Merge WebKit at r65615 : Initial merge by git.
Change-Id: Ifbf384f4531e3b58475a662e38195c2d9152ae79
Diffstat (limited to 'WebKitTools/Scripts/webkitpy/layout_tests/deduplicate_tests.py')
-rw-r--r--WebKitTools/Scripts/webkitpy/layout_tests/deduplicate_tests.py167
1 files changed, 167 insertions, 0 deletions
diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/deduplicate_tests.py b/WebKitTools/Scripts/webkitpy/layout_tests/deduplicate_tests.py
new file mode 100644
index 0000000..bb63f5e
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/layout_tests/deduplicate_tests.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python
+# Copyright (C) 2010 Google Inc. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""deduplicate_tests -- lists duplicated between platforms.
+
+If platform/mac-leopard is missing an expected test output, we fall back on
+platform/mac. This means it's possible to grow redundant test outputs,
+where we have the same expected data in both a platform directory and another
+platform it falls back on.
+"""
+
+import collections
+import fnmatch
+import os
+import subprocess
+import sys
+import re
+import webkitpy.common.system.executive as executive
+import webkitpy.common.system.logutils as logutils
+import webkitpy.layout_tests.port.factory as port_factory
+
+_log = logutils.get_logger(__file__)
+
+_BASE_PLATFORM = 'base'
+
+
+def port_fallbacks():
+ """Get the port fallback information.
+ Returns:
+ A dictionary mapping platform name to a list of other platforms to fall
+ back on. All platforms fall back on 'base'.
+ """
+ fallbacks = {_BASE_PLATFORM: []}
+ for port_name in os.listdir(os.path.join('LayoutTests', 'platform')):
+ try:
+ platforms = port_factory.get(port_name).baseline_search_path()
+ except NotImplementedError:
+ _log.error("'%s' lacks baseline_search_path(), please fix." % port_name)
+ fallbacks[port_name] = [_BASE_PLATFORM]
+ continue
+ fallbacks[port_name] = [os.path.basename(p) for p in platforms][1:]
+ fallbacks[port_name].append(_BASE_PLATFORM)
+ return fallbacks
+
+
+def parse_git_output(git_output, glob_pattern):
+ """Parses the output of git ls-tree and filters based on glob_pattern.
+ Args:
+ git_output: result of git ls-tree -r HEAD LayoutTests.
+ glob_pattern: a pattern to filter the files.
+ Returns:
+ A dictionary mapping (test name, hash of content) => [paths]
+ """
+ hashes = collections.defaultdict(set)
+ for line in git_output.split('\n'):
+ if not line:
+ break
+ attrs, path = line.strip().split('\t')
+ if not fnmatch.fnmatch(path, glob_pattern):
+ continue
+ path = path[len('LayoutTests/'):]
+ match = re.match(r'^(platform/.*?/)?(.*)', path)
+ test = match.group(2)
+ _, _, hash = attrs.split(' ')
+ hashes[(test, hash)].add(path)
+ return hashes
+
+
+def cluster_file_hashes(glob_pattern):
+ """Get the hashes of all the test expectations in the tree.
+ We cheat and use git's hashes.
+ Args:
+ glob_pattern: a pattern to filter the files.
+ Returns:
+ A dictionary mapping (test name, hash of content) => [paths]
+ """
+
+ # A map of file hash => set of all files with that hash.
+ hashes = collections.defaultdict(set)
+
+ # Fill in the map.
+ cmd = ('git', 'ls-tree', '-r', 'HEAD', 'LayoutTests')
+ try:
+ git_output = executive.Executive().run_command(cmd)
+ except OSError, e:
+ if e.errno == 2: # No such file or directory.
+ _log.error("Error: 'No such file' when running git.")
+ _log.error("This script requires git.")
+ sys.exit(1)
+ raise e
+ return parse_git_output(git_output, glob_pattern)
+
+
+def extract_platforms(paths):
+ """Extracts the platforms from a list of paths matching ^platform/(.*?)/.
+ Args:
+ paths: a list of paths.
+ Returns:
+ A dictionary containing all platforms from paths.
+ """
+ platforms = {}
+ for path in paths:
+ match = re.match(r'^platform/(.*?)/', path)
+ if match:
+ platform = match.group(1)
+ else:
+ platform = _BASE_PLATFORM
+ platforms[platform] = path
+ return platforms
+
+
+def find_dups(hashes, port_fallbacks):
+ """Yields info about redundant test expectations.
+ Args:
+ hashes: a list of hashes as returned by cluster_file_hashes.
+ port_fallbacks: a list of fallback information as returned by get_port_fallbacks.
+ Returns:
+ a tuple containing (test, platform, fallback, platforms)
+ """
+ for (test, hash), cluster in hashes.items():
+ if len(cluster) < 2:
+ continue # Common case: only one file with that hash.
+
+ # Compute the list of platforms we have this particular hash for.
+ platforms = extract_platforms(cluster)
+ if len(platforms) == 1:
+ continue
+
+ # See if any of the platforms are redundant with each other.
+ for platform in platforms.keys():
+ for fallback in port_fallbacks[platform]:
+ if fallback in platforms.keys():
+ yield test, platform, fallback, platforms[platform]
+
+
+def deduplicate(glob_pattern):
+ """Traverses LayoutTests and returns information about duplicated files.
+ Args:
+ glob pattern to filter the files in LayoutTests.
+ Returns:
+ a dictionary containing test, path, platform and fallback.
+ """
+ fallbacks = port_fallbacks()
+ hashes = cluster_file_hashes(glob_pattern)
+ return [{'test': test, 'path': path, 'platform': platform, 'fallback': fallback}
+ for test, platform, fallback, path in find_dups(hashes, fallbacks)]