1 files changed, 1218 insertions, 0 deletions
diff --git a/Tools/Scripts/webkitpy/layout_tests/layout_package/test_runner.py b/Tools/Scripts/webkitpy/layout_tests/layout_package/test_runner.py
new file mode 100644
index 0000000..24d04ca
--- /dev/null
+++ b/Tools/Scripts/webkitpy/layout_tests/layout_package/test_runner.py
@@ -0,0 +1,1218 @@
+#!/usr/bin/env python
+# Copyright (C) 2010 Google Inc. All rights reserved.
+# Copyright (C) 2010 Gabor Rapcsanyi (rgabor@inf.u-szeged.hu), University of Szeged
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+The TestRunner class runs a series of tests (TestType interface) against a set
+of test files.  If a test file fails a TestType, it returns a list TestFailure
+objects to the TestRunner.  The TestRunner then aggregates the TestFailures to
+create a final report.
+"""
+
+from __future__ import with_statement
+
+import codecs
+import errno
+import logging
+import math
+import os
+import Queue
+import random
+import shutil
+import sys
+import time
+
+from result_summary import ResultSummary
+from test_input import TestInput
+
+import dump_render_tree_thread
+import json_layout_results_generator
+import message_broker
+import printing
+import test_expectations
+import test_failures
+import test_results
+import test_results_uploader
+
+from webkitpy.thirdparty import simplejson
+from webkitpy.tool import grammar
+
+_log = logging.getLogger("webkitpy.layout_tests.run_webkit_tests")
+
+# Builder base URL where we have the archived test results.
+BUILDER_BASE_URL = "http://build.chromium.org/buildbot/layout_test_results/"
+
+LAYOUT_TESTS_DIRECTORY = "LayoutTests" + os.sep
+
+TestExpectationsFile = test_expectations.TestExpectationsFile
+
+
+def summarize_unexpected_results(port_obj, expectations, result_summary,
+                                 retry_summary):
+    """Summarize any unexpected results as a dict.
+
+    FIXME: split this data structure into a separate class?
+
+    Args:
+        port_obj: interface to port-specific hooks
+        expectations: test_expectations.TestExpectations object
+        result_summary: summary object from initial test runs
+        retry_summary: summary object from final test run of retried tests
+    Returns:
+        A dictionary containing a summary of the unexpected results from the
+        run, with the following fields:
+        'version': a version indicator (1 in this version)
+        'fixable': # of fixable tests (NOW - PASS)
+        'skipped': # of skipped tests (NOW & SKIPPED)
+        'num_regressions': # of non-flaky failures
+        'num_flaky': # of flaky failures
+        'num_passes': # of unexpected passes
+        'tests': a dict of tests -> {'expected': '...', 'actual': '...'}
+    """
+    results = {}
+    results['version'] = 1
+
+    tbe = result_summary.tests_by_expectation
+    tbt = result_summary.tests_by_timeline
+    results['fixable'] = len(tbt[test_expectations.NOW] -
+                                tbe[test_expectations.PASS])
+    results['skipped'] = len(tbt[test_expectations.NOW] &
+                                tbe[test_expectations.SKIP])
+
+    num_passes = 0
+    num_flaky = 0
+    num_regressions = 0
+    keywords = {}
+    for k, v in TestExpectationsFile.EXPECTATIONS.iteritems():
+        keywords[v] = k.upper()
+
+    tests = {}
+    for filename, result in result_summary.unexpected_results.iteritems():
+        # Note that if a test crashed in the original run, we ignore
+        # whether or not it crashed when we retried it (if we retried it),
+        # and always consider the result not flaky.
+        test = port_obj.relative_test_filename(filename)
+        expected = expectations.get_expectations_string(filename)
+        actual = [keywords[result]]
+
+        if result == test_expectations.PASS:
+            num_passes += 1
+        elif result == test_expectations.CRASH:
+            num_regressions += 1
+        else:
+            if filename not in retry_summary.unexpected_results:
+                actual.extend(expectations.get_expectations_string(
+                    filename).split(" "))
+                num_flaky += 1
+            else:
+                retry_result = retry_summary.unexpected_results[filename]
+                if result != retry_result:
+                    actual.append(keywords[retry_result])
+                    num_flaky += 1
+                else:
+                    num_regressions += 1
+
+        tests[test] = {}
+        tests[test]['expected'] = expected
+        tests[test]['actual'] = " ".join(actual)
+
+    results['tests'] = tests
+    results['num_passes'] = num_passes
+    results['num_flaky'] = num_flaky
+    results['num_regressions'] = num_regressions
+
+    return results
+
+
+class TestRunInterruptedException(Exception):
+    """Raised when a test run should be stopped immediately."""
+    def __init__(self, reason):
+        self.reason = reason
+
+
+class TestRunner:
+    """A class for managing running a series of tests on a series of layout
+    test files."""
+
+    HTTP_SUBDIR = os.sep.join(['', 'http', ''])
+    WEBSOCKET_SUBDIR = os.sep.join(['', 'websocket', ''])
+
+    # The per-test timeout in milliseconds, if no --time-out-ms option was
+    # given to run_webkit_tests. This should correspond to the default timeout
+    # in DumpRenderTree.
+    DEFAULT_TEST_TIMEOUT_MS = 6 * 1000
+
+    def __init__(self, port, options, printer):
+        """Initialize test runner data structures.
+
+        Args:
+          port: an object implementing port-specific
+          options: a dictionary of command line options
+          printer: a Printer object to record updates to.
+        """
+        self._port = port
+        self._options = options
+        self._printer = printer
+        self._message_broker = None
+
+        # disable wss server. need to install pyOpenSSL on buildbots.
+        # self._websocket_secure_server = websocket_server.PyWebSocket(
+        #        options.results_directory, use_tls=True, port=9323)
+
+        # a set of test files, and the same tests as a list
+        self._test_files = set()
+        self._test_files_list = None
+        self._result_queue = Queue.Queue()
+        self._retrying = False
+
+    def collect_tests(self, args, last_unexpected_results):
+        """Find all the files to test.
+
+        Args:
+          args: list of test arguments from the command line
+          last_unexpected_results: list of unexpected results to retest, if any
+
+        """
+        paths = [self._strip_test_dir_prefix(arg) for arg in args if arg and arg != '']
+        paths += last_unexpected_results
+        if self._options.test_list:
+            paths += read_test_files(self._options.test_list)
+        self._test_files = self._port.tests(paths)
+
+    def _strip_test_dir_prefix(self, path):
+        if path.startswith(LAYOUT_TESTS_DIRECTORY):
+            return path[len(LAYOUT_TESTS_DIRECTORY):]
+        return path
+
+    def lint(self):
+        lint_failed = False
+
+        # Creating the expecations for each platform/configuration pair does
+        # all the test list parsing and ensures it's correct syntax (e.g. no
+        # dupes).
+        for platform_name in self._port.test_platform_names():
+            try:
+                self.parse_expectations(platform_name, is_debug_mode=True)
+            except test_expectations.ParseError:
+                lint_failed = True
+            try:
+                self.parse_expectations(platform_name, is_debug_mode=False)
+            except test_expectations.ParseError:
+                lint_failed = True
+
+        self._printer.write("")
+        if lint_failed:
+            _log.error("Lint failed.")
+            return -1
+
+        _log.info("Lint succeeded.")
+        return 0
+
+    def parse_expectations(self, test_platform_name, is_debug_mode):
+        """Parse the expectations from the test_list files and return a data
+        structure holding them. Throws an error if the test_list files have
+        invalid syntax."""
+        if self._options.lint_test_files:
+            test_files = None
+        else:
+            test_files = self._test_files
+
+        expectations_str = self._port.test_expectations()
+        overrides_str = self._port.test_expectations_overrides()
+        self._expectations = test_expectations.TestExpectations(
+            self._port, test_files, expectations_str, test_platform_name,
+            is_debug_mode, self._options.lint_test_files,
+            overrides=overrides_str)
+        return self._expectations
+
+    def prepare_lists_and_print_output(self):
+        """Create appropriate subsets of test lists and returns a
+        ResultSummary object. Also prints expected test counts.
+        """
+
+        # Remove skipped - both fixable and ignored - files from the
+        # top-level list of files to test.
+        num_all_test_files = len(self._test_files)
+        self._printer.print_expected("Found:  %d tests" %
+                                     (len(self._test_files)))
+        if not num_all_test_files:
+            _log.critical('No tests to run.')
+            return None
+
+        skipped = set()
+        if num_all_test_files > 1 and not self._options.force:
+            skipped = self._expectations.get_tests_with_result_type(
+                           test_expectations.SKIP)
+            self._test_files -= skipped
+
+        # Create a sorted list of test files so the subset chunk,
+        # if used, contains alphabetically consecutive tests.
+        self._test_files_list = list(self._test_files)
+        if self._options.randomize_order:
+            random.shuffle(self._test_files_list)
+        else:
+            self._test_files_list.sort()
+
+        # If the user specifies they just want to run a subset of the tests,
+        # just grab a subset of the non-skipped tests.
+        if self._options.run_chunk or self._options.run_part:
+            chunk_value = self._options.run_chunk or self._options.run_part
+            test_files = self._test_files_list
+            try:
+                (chunk_num, chunk_len) = chunk_value.split(":")
+                chunk_num = int(chunk_num)
+                assert(chunk_num >= 0)
+                test_size = int(chunk_len)
+                assert(test_size > 0)
+            except:
+                _log.critical("invalid chunk '%s'" % chunk_value)
+                return None
+
+            # Get the number of tests
+            num_tests = len(test_files)
+
+            # Get the start offset of the slice.
+            if self._options.run_chunk:
+                chunk_len = test_size
+                # In this case chunk_num can be really large. We need
+                # to make the slave fit in the current number of tests.
+                slice_start = (chunk_num * chunk_len) % num_tests
+            else:
+                # Validate the data.
+                assert(test_size <= num_tests)
+                assert(chunk_num <= test_size)
+
+                # To count the chunk_len, and make sure we don't skip
+                # some tests, we round to the next value that fits exactly
+                # all the parts.
+                rounded_tests = num_tests
+                if rounded_tests % test_size != 0:
+                    rounded_tests = (num_tests + test_size -
+                                     (num_tests % test_size))
+
+                chunk_len = rounded_tests / test_size
+                slice_start = chunk_len * (chunk_num - 1)
+                # It does not mind if we go over test_size.
+
+            # Get the end offset of the slice.
+            slice_end = min(num_tests, slice_start + chunk_len)
+
+            files = test_files[slice_start:slice_end]
+
+            tests_run_msg = 'Running: %d tests (chunk slice [%d:%d] of %d)' % (
+                (slice_end - slice_start), slice_start, slice_end, num_tests)
+            self._printer.print_expected(tests_run_msg)
+
+            # If we reached the end and we don't have enough tests, we run some
+            # from the beginning.
+            if slice_end - slice_start < chunk_len:
+                extra = chunk_len - (slice_end - slice_start)
+                extra_msg = ('   last chunk is partial, appending [0:%d]' %
+                            extra)
+                self._printer.print_expected(extra_msg)
+                tests_run_msg += "\n" + extra_msg
+                files.extend(test_files[0:extra])
+            tests_run_filename = os.path.join(self._options.results_directory,
+                                              "tests_run.txt")
+            with codecs.open(tests_run_filename, "w", "utf-8") as file:
+                file.write(tests_run_msg + "\n")
+
+            len_skip_chunk = int(len(files) * len(skipped) /
+                                 float(len(self._test_files)))
+            skip_chunk_list = list(skipped)[0:len_skip_chunk]
+            skip_chunk = set(skip_chunk_list)
+
+            # Update expectations so that the stats are calculated correctly.
+            # We need to pass a list that includes the right # of skipped files
+            # to ParseExpectations so that ResultSummary() will get the correct
+            # stats. So, we add in the subset of skipped files, and then
+            # subtract them back out.
+            self._test_files_list = files + skip_chunk_list
+            self._test_files = set(self._test_files_list)
+
+            self._expectations = self.parse_expectations(
+                self._port.test_platform_name(),
+                self._options.configuration == 'Debug')
+
+            self._test_files = set(files)
+            self._test_files_list = files
+        else:
+            skip_chunk = skipped
+
+        result_summary = ResultSummary(self._expectations,
+            self._test_files | skip_chunk)
+        self._print_expected_results_of_type(result_summary,
+            test_expectations.PASS, "passes")
+        self._print_expected_results_of_type(result_summary,
+            test_expectations.FAIL, "failures")
+        self._print_expected_results_of_type(result_summary,
+            test_expectations.FLAKY, "flaky")
+        self._print_expected_results_of_type(result_summary,
+            test_expectations.SKIP, "skipped")
+
+        if self._options.force:
+            self._printer.print_expected('Running all tests, including '
+                                         'skips (--force)')
+        else:
+            # Note that we don't actually run the skipped tests (they were
+            # subtracted out of self._test_files, above), but we stub out the
+            # results here so the statistics can remain accurate.
+            for test in skip_chunk:
+                result = test_results.TestResult(test,
+                    failures=[], test_run_time=0, total_time_for_all_diffs=0,
+                    time_for_diffs=0)
+                result.type = test_expectations.SKIP
+                result_summary.add(result, expected=True)
+        self._printer.print_expected('')
+
+        return result_summary
+
+    def _get_dir_for_test_file(self, test_file):
+        """Returns the highest-level directory by which to shard the given
+        test file."""
+        index = test_file.rfind(os.sep + LAYOUT_TESTS_DIRECTORY)
+
+        test_file = test_file[index + len(LAYOUT_TESTS_DIRECTORY):]
+        test_file_parts = test_file.split(os.sep, 1)
+        directory = test_file_parts[0]
+        test_file = test_file_parts[1]
+
+        # The http tests are very stable on mac/linux.
+        # TODO(ojan): Make the http server on Windows be apache so we can
+        # turn shard the http tests there as well. Switching to apache is
+        # what made them stable on linux/mac.
+        return_value = directory
+        while ((directory != 'http' or sys.platform in ('darwin', 'linux2'))
+                and test_file.find(os.sep) >= 0):
+            test_file_parts = test_file.split(os.sep, 1)
+            directory = test_file_parts[0]
+            return_value = os.path.join(return_value, directory)
+            test_file = test_file_parts[1]
+
+        return return_value
+
+    def _get_test_input_for_file(self, test_file):
+        """Returns the appropriate TestInput object for the file. Mostly this
+        is used for looking up the timeout value (in ms) to use for the given
+        test."""
+        if self._test_is_slow(test_file):
+            return TestInput(test_file, self._options.slow_time_out_ms)
+        return TestInput(test_file, self._options.time_out_ms)
+
+    def _test_requires_lock(self, test_file):
+        """Return True if the test needs to be locked when
+        running multiple copies of NRWTs."""
+        split_path = test_file.split(os.sep)
+        return 'http' in split_path or 'websocket' in split_path
+
+    def _test_is_slow(self, test_file):
+        return self._expectations.has_modifier(test_file,
+                                               test_expectations.SLOW)
+
+    def _shard_tests(self, test_files, use_real_shards):
+        """Groups tests into batches.
+        This helps ensure that tests that depend on each other (aka bad tests!)
+        continue to run together as most cross-tests dependencies tend to
+        occur within the same directory. If use_real_shards is False, we
+        put each (non-HTTP/websocket) test into its own shard for maximum
+        concurrency instead of trying to do any sort of real sharding.
+
+        Return:
+            A list of lists of TestInput objects.
+        """
+        # FIXME: when we added http locking, we changed how this works such
+        # that we always lump all of the HTTP threads into a single shard.
+        # That will slow down experimental-fully-parallel, but it's unclear
+        # what the best alternative is completely revamping how we track
+        # when to grab the lock.
+
+        test_lists = []
+        tests_to_http_lock = []
+        if not use_real_shards:
+            for test_file in test_files:
+                test_input = self._get_test_input_for_file(test_file)
+                if self._test_requires_lock(test_file):
+                    tests_to_http_lock.append(test_input)
+                else:
+                    test_lists.append((".", [test_input]))
+        else:
+            tests_by_dir = {}
+            for test_file in test_files:
+                directory = self._get_dir_for_test_file(test_file)
+                test_input = self._get_test_input_for_file(test_file)
+                if self._test_requires_lock(test_file):
+                    tests_to_http_lock.append(test_input)
+                else:
+                    tests_by_dir.setdefault(directory, [])
+                    tests_by_dir[directory].append(test_input)
+            # Sort by the number of tests in the dir so that the ones with the
+            # most tests get run first in order to maximize parallelization.
+            # Number of tests is a good enough, but not perfect, approximation
+            # of how long that set of tests will take to run. We can't just use
+            # a PriorityQueue until we move to Python 2.6.
+            for directory in tests_by_dir:
+                test_list = tests_by_dir[directory]
+                # Keep the tests in alphabetical order.
+                # FIXME: Remove once tests are fixed so they can be run in any
+                # order.
+                test_list.reverse()
+                test_list_tuple = (directory, test_list)
+                test_lists.append(test_list_tuple)
+            test_lists.sort(lambda a, b: cmp(len(b[1]), len(a[1])))
+
+        # Put the http tests first. There are only a couple hundred of them,
+        # but each http test takes a very long time to run, so sorting by the
+        # number of tests doesn't accurately capture how long they take to run.
+        if tests_to_http_lock:
+            tests_to_http_lock.reverse()
+            test_lists.insert(0, ("tests_to_http_lock", tests_to_http_lock))
+
+        return test_lists
+
+    def _contains_tests(self, subdir):
+        for test_file in self._test_files:
+            if test_file.find(subdir) >= 0:
+                return True
+        return False
+
+    def _num_workers(self):
+        return int(self._options.child_processes)
+
+    def _run_tests(self, file_list, result_summary):
+        """Runs the tests in the file_list.
+
+        Return: A tuple (interrupted, keyboard_interrupted, thread_timings,
+            test_timings, individual_test_timings)
+            interrupted is whether the run was interrupted
+            keyboard_interrupted is whether the interruption was because someone
+              typed Ctrl^C
+            thread_timings is a list of dicts with the total runtime
+              of each thread with 'name', 'num_tests', 'total_time' properties
+            test_timings is a list of timings for each sharded subdirectory
+              of the form [time, directory_name, num_tests]
+            individual_test_timings is a list of run times for each test
+              in the form {filename:filename, test_run_time:test_run_time}
+            result_summary: summary object to populate with the results
+        """
+
+        self._printer.print_update('Sharding tests ...')
+        num_workers = self._num_workers()
+        test_lists = self._shard_tests(file_list,
+            num_workers > 1 and not self._options.experimental_fully_parallel)
+        filename_queue = Queue.Queue()
+        for item in test_lists:
+            filename_queue.put(item)
+
+        self._printer.print_update('Starting %s ...' %
+                                   grammar.pluralize('worker', num_workers))
+        self._message_broker = message_broker.get(self._port, self._options)
+        broker = self._message_broker
+        self._current_filename_queue = filename_queue
+        self._current_result_summary = result_summary
+
+        if not self._options.dry_run:
+            threads = broker.start_workers(self)
+        else:
+            threads = {}
+
+        self._printer.print_update("Starting testing ...")
+        keyboard_interrupted = False
+        interrupted = False
+        if not self._options.dry_run:
+            try:
+                broker.run_message_loop()
+            except KeyboardInterrupt:
+                _log.info("Interrupted, exiting")
+                broker.cancel_workers()
+                keyboard_interrupted = True
+                interrupted = True
+            except TestRunInterruptedException, e:
+                _log.info(e.reason)
+                broker.cancel_workers()
+                interrupted = True
+            except:
+                # Unexpected exception; don't try to clean up workers.
+                _log.info("Exception raised, exiting")
+                raise
+
+        thread_timings, test_timings, individual_test_timings = \
+            self._collect_timing_info(threads)
+
+        broker.cleanup()
+        self._message_broker = None
+        return (interrupted, keyboard_interrupted, thread_timings, test_timings,
+                individual_test_timings)
+
+    def update(self):
+        self.update_summary(self._current_result_summary)
+
+    def _collect_timing_info(self, threads):
+        test_timings = {}
+        individual_test_timings = []
+        thread_timings = []
+
+        for thread in threads:
+            thread_timings.append({'name': thread.getName(),
+                                   'num_tests': thread.get_num_tests(),
+                                   'total_time': thread.get_total_time()})
+            test_timings.update(thread.get_test_group_timing_stats())
+            individual_test_timings.extend(thread.get_test_results())
+
+        return (thread_timings, test_timings, individual_test_timings)
+
+    def needs_http(self):
+        """Returns whether the test runner needs an HTTP server."""
+        return self._contains_tests(self.HTTP_SUBDIR)
+
+    def needs_websocket(self):
+        """Returns whether the test runner needs a WEBSOCKET server."""
+        return self._contains_tests(self.WEBSOCKET_SUBDIR)
+
+    def set_up_run(self):
+        """Configures the system to be ready to run tests.
+
+        Returns a ResultSummary object if we should continue to run tests,
+        or None if we should abort.
+
+        """
+        # This must be started before we check the system dependencies,
+        # since the helper may do things to make the setup correct.
+        self._printer.print_update("Starting helper ...")
+        self._port.start_helper()
+
+        # Check that the system dependencies (themes, fonts, ...) are correct.
+        if not self._options.nocheck_sys_deps:
+            self._printer.print_update("Checking system dependencies ...")
+            if not self._port.check_sys_deps(self.needs_http()):
+                self._port.stop_helper()
+                return None
+
+        if self._options.clobber_old_results:
+            self._clobber_old_results()
+
+        # Create the output directory if it doesn't already exist.
+        self._port.maybe_make_directory(self._options.results_directory)
+
+        self._port.setup_test_run()
+
+        self._printer.print_update("Preparing tests ...")
+        result_summary = self.prepare_lists_and_print_output()
+        if not result_summary:
+            return None
+
+        return result_summary
+
+    def run(self, result_summary):
+        """Run all our tests on all our test files.
+
+        For each test file, we run each test type. If there are any failures,
+        we collect them for reporting.
+
+        Args:
+          result_summary: a summary object tracking the test results.
+
+        Return:
+          The number of unexpected results (0 == success)
+        """
+        # gather_test_files() must have been called first to initialize us.
+        # If we didn't find any files to test, we've errored out already in
+        # prepare_lists_and_print_output().
+        assert(len(self._test_files))
+
+        start_time = time.time()
+
+        interrupted, keyboard_interrupted, thread_timings, test_timings, \
+            individual_test_timings = (
+            self._run_tests(self._test_files_list, result_summary))
+
+        # We exclude the crashes from the list of results to retry, because
+        # we want to treat even a potentially flaky crash as an error.
+        failures = self._get_failures(result_summary, include_crashes=False)
+        retry_summary = result_summary
+        while (len(failures) and self._options.retry_failures and
+            not self._retrying and not interrupted):
+            _log.info('')
+            _log.info("Retrying %d unexpected failure(s) ..." % len(failures))
+            _log.info('')
+            self._retrying = True
+            retry_summary = ResultSummary(self._expectations, failures.keys())
+            # Note that we intentionally ignore the return value here.
+            self._run_tests(failures.keys(), retry_summary)
+            failures = self._get_failures(retry_summary, include_crashes=True)
+
+        end_time = time.time()
+
+        self._print_timing_statistics(end_time - start_time,
+                                      thread_timings, test_timings,
+                                      individual_test_timings,
+                                      result_summary)
+
+        self._print_result_summary(result_summary)
+
+        sys.stdout.flush()
+        sys.stderr.flush()
+
+        self._printer.print_one_line_summary(result_summary.total,
+                                             result_summary.expected,
+                                             result_summary.unexpected)
+
+        unexpected_results = summarize_unexpected_results(self._port,
+            self._expectations, result_summary, retry_summary)
+        self._printer.print_unexpected_results(unexpected_results)
+
+        if (self._options.record_results and not self._options.dry_run and
+            not interrupted):
+            # Write the same data to log files and upload generated JSON files
+            # to appengine server.
+            self._upload_json_files(unexpected_results, result_summary,
+                                    individual_test_timings)
+
+        # Write the summary to disk (results.html) and display it if requested.
+        if not self._options.dry_run:
+            wrote_results = self._write_results_html_file(result_summary)
+            if self._options.show_results and wrote_results:
+                self._show_results_html_file()
+
+        # Now that we've completed all the processing we can, we re-raise
+        # a KeyboardInterrupt if necessary so the caller can handle it.
+        if keyboard_interrupted:
+            raise KeyboardInterrupt
+
+        # Ignore flaky failures and unexpected passes so we don't turn the
+        # bot red for those.
+        return unexpected_results['num_regressions']
+
+    def clean_up_run(self):
+        """Restores the system after we're done running tests."""
+
+        _log.debug("flushing stdout")
+        sys.stdout.flush()
+        _log.debug("flushing stderr")
+        sys.stderr.flush()
+        _log.debug("stopping helper")
+        self._port.stop_helper()
+
+    def update_summary(self, result_summary):
+        """Update the summary and print results with any completed tests."""
+        while True:
+            try:
+                result = test_results.TestResult.loads(self._result_queue.get_nowait())
+            except Queue.Empty:
+                return
+
+            expected = self._expectations.matches_an_expected_result(
+                result.filename, result.type, self._options.pixel_tests)
+            result_summary.add(result, expected)
+            exp_str = self._expectations.get_expectations_string(
+                result.filename)
+            got_str = self._expectations.expectation_to_string(result.type)
+            self._printer.print_test_result(result, expected, exp_str, got_str)
+            self._printer.print_progress(result_summary, self._retrying,
+                                         self._test_files_list)
+
+            def interrupt_if_at_failure_limit(limit, count, message):
+                if limit and count >= limit:
+                    raise TestRunInterruptedException(message % count)
+
+            interrupt_if_at_failure_limit(
+                self._options.exit_after_n_failures,
+                result_summary.unexpected_failures,
+                "Aborting run since %d failures were reached")
+            interrupt_if_at_failure_limit(
+                self._options.exit_after_n_crashes_or_timeouts,
+                result_summary.unexpected_crashes_or_timeouts,
+                "Aborting run since %d crashes or timeouts were reached")
+
+    def _clobber_old_results(self):
+        # Just clobber the actual test results directories since the other
+        # files in the results directory are explicitly used for cross-run
+        # tracking.
+        self._printer.print_update("Clobbering old results in %s" %
+                                   self._options.results_directory)
+        layout_tests_dir = self._port.layout_tests_dir()
+        possible_dirs = self._port.test_dirs()
+        for dirname in possible_dirs:
+            if os.path.isdir(os.path.join(layout_tests_dir, dirname)):
+                shutil.rmtree(os.path.join(self._options.results_directory,
+                                           dirname),
+                              ignore_errors=True)
+
+    def _get_failures(self, result_summary, include_crashes):
+        """Filters a dict of results and returns only the failures.
+
+        Args:
+          result_summary: the results of the test run
+          include_crashes: whether crashes are included in the output.
+            We use False when finding the list of failures to retry
+            to see if the results were flaky. Although the crashes may also be
+            flaky, we treat them as if they aren't so that they're not ignored.
+        Returns:
+          a dict of files -> results
+        """
+        failed_results = {}
+        for test, result in result_summary.unexpected_results.iteritems():
+            if (result == test_expectations.PASS or
+                result == test_expectations.CRASH and not include_crashes):
+                continue
+            failed_results[test] = result
+
+        return failed_results
+
+    def _upload_json_files(self, unexpected_results, result_summary,
+                        individual_test_timings):
+        """Writes the results of the test run as JSON files into the results
+        dir and upload the files to the appengine server.
+
+        There are three different files written into the results dir:
+          unexpected_results.json: A short list of any unexpected results.
+            This is used by the buildbots to display results.
+          expectations.json: This is used by the flakiness dashboard.
+          results.json: A full list of the results - used by the flakiness
+            dashboard and the aggregate results dashboard.
+
+        Args:
+          unexpected_results: dict of unexpected results
+          result_summary: full summary object
+          individual_test_timings: list of test times (used by the flakiness
+            dashboard).
+        """
+        results_directory = self._options.results_directory
+        _log.debug("Writing JSON files in %s." % results_directory)
+        unexpected_json_path = os.path.join(results_directory, "unexpected_results.json")
+        with codecs.open(unexpected_json_path, "w", "utf-8") as file:
+            simplejson.dump(unexpected_results, file, sort_keys=True, indent=2)
+
+        # Write a json file of the test_expectations.txt file for the layout
+        # tests dashboard.
+        expectations_path = os.path.join(results_directory, "expectations.json")
+        expectations_json = \
+            self._expectations.get_expectations_json_for_all_platforms()
+        with codecs.open(expectations_path, "w", "utf-8") as file:
+            file.write(u"ADD_EXPECTATIONS(%s);" % expectations_json)
+
+        generator = json_layout_results_generator.JSONLayoutResultsGenerator(
+            self._port, self._options.builder_name, self._options.build_name,
+            self._options.build_number, self._options.results_directory,
+            BUILDER_BASE_URL, individual_test_timings,
+            self._expectations, result_summary, self._test_files_list,
+            not self._options.upload_full_results,
+            self._options.test_results_server,
+            "layout-tests",
+            self._options.master_name)
+
+        _log.debug("Finished writing JSON files.")
+
+        json_files = ["expectations.json"]
+        if self._options.upload_full_results:
+            json_files.append("results.json")
+        else:
+            json_files.append("incremental_results.json")
+
+        generator.upload_json_files(json_files)
+
+    def _print_config(self):
+        """Prints the configuration for the test run."""
+        p = self._printer
+        p.print_config("Using port '%s'" % self._port.name())
+        p.print_config("Placing test results in %s" %
+                       self._options.results_directory)
+        if self._options.new_baseline:
+            p.print_config("Placing new baselines in %s" %
+                           self._port.baseline_path())
+        p.print_config("Using %s build" % self._options.configuration)
+        if self._options.pixel_tests:
+            p.print_config("Pixel tests enabled")
+        else:
+            p.print_config("Pixel tests disabled")
+
+        p.print_config("Regular timeout: %s, slow test timeout: %s" %
+                       (self._options.time_out_ms,
+                        self._options.slow_time_out_ms))
+
+        if self._num_workers() == 1:
+            p.print_config("Running one %s" % self._port.driver_name())
+        else:
+            p.print_config("Running %s %ss in parallel" %
+                           (self._options.child_processes,
+                            self._port.driver_name()))
+        p.print_config('Command line: ' +
+                       ' '.join(self._port.driver_cmd_line()))
+        p.print_config("Worker model: %s" % self._options.worker_model)
+        p.print_config("")
+
+    def _print_expected_results_of_type(self, result_summary,
+                                        result_type, result_type_str):
+        """Print the number of the tests in a given result class.
+
+        Args:
+          result_summary - the object containing all the results to report on
+          result_type - the particular result type to report in the summary.
+          result_type_str - a string description of the result_type.
+        """
+        tests = self._expectations.get_tests_with_result_type(result_type)
+        now = result_summary.tests_by_timeline[test_expectations.NOW]
+        wontfix = result_summary.tests_by_timeline[test_expectations.WONTFIX]
+
+        # We use a fancy format string in order to print the data out in a
+        # nicely-aligned table.
+        fmtstr = ("Expect: %%5d %%-8s (%%%dd now, %%%dd wontfix)"
+                  % (self._num_digits(now), self._num_digits(wontfix)))
+        self._printer.print_expected(fmtstr %
+            (len(tests), result_type_str, len(tests & now), len(tests & wontfix)))
+
+    def _num_digits(self, num):
+        """Returns the number of digits needed to represent the length of a
+        sequence."""
+        ndigits = 1
+        if len(num):
+            ndigits = int(math.log10(len(num))) + 1
+        return ndigits
+
+    def _print_timing_statistics(self, total_time, thread_timings,
+                               directory_test_timings, individual_test_timings,
+                               result_summary):
+        """Record timing-specific information for the test run.
+
+        Args:
+          total_time: total elapsed time (in seconds) for the test run
+          thread_timings: wall clock time each thread ran for
+          directory_test_timings: timing by directory
+          individual_test_timings: timing by file
+          result_summary: summary object for the test run
+        """
+        self._printer.print_timing("Test timing:")
+        self._printer.print_timing("  %6.2f total testing time" % total_time)
+        self._printer.print_timing("")
+        self._printer.print_timing("Thread timing:")
+        cuml_time = 0
+        for t in thread_timings:
+            self._printer.print_timing("    %10s: %5d tests, %6.2f secs" %
+                  (t['name'], t['num_tests'], t['total_time']))
+            cuml_time += t['total_time']
+        self._printer.print_timing("   %6.2f cumulative, %6.2f optimal" %
+              (cuml_time, cuml_time / int(self._options.child_processes)))
+        self._printer.print_timing("")
+
+        self._print_aggregate_test_statistics(individual_test_timings)
+        self._print_individual_test_times(individual_test_timings,
+                                          result_summary)
+        self._print_directory_timings(directory_test_timings)
+
+    def _print_aggregate_test_statistics(self, individual_test_timings):
+        """Prints aggregate statistics (e.g. median, mean, etc.) for all tests.
+        Args:
+          individual_test_timings: List of TestResults for all tests.
+        """
+        test_types = []  # Unit tests don't actually produce any timings.
+        if individual_test_timings:
+            test_types = individual_test_timings[0].time_for_diffs.keys()
+        times_for_dump_render_tree = []
+        times_for_diff_processing = []
+        times_per_test_type = {}
+        for test_type in test_types:
+            times_per_test_type[test_type] = []
+
+        for test_stats in individual_test_timings:
+            times_for_dump_render_tree.append(test_stats.test_run_time)
+            times_for_diff_processing.append(
+                test_stats.total_time_for_all_diffs)
+            time_for_diffs = test_stats.time_for_diffs
+            for test_type in test_types:
+                times_per_test_type[test_type].append(
+                    time_for_diffs[test_type])
+
+        self._print_statistics_for_test_timings(
+            "PER TEST TIME IN TESTSHELL (seconds):",
+            times_for_dump_render_tree)
+        self._print_statistics_for_test_timings(
+            "PER TEST DIFF PROCESSING TIMES (seconds):",
+            times_for_diff_processing)
+        for test_type in test_types:
+            self._print_statistics_for_test_timings(
+                "PER TEST TIMES BY TEST TYPE: %s" % test_type,
+                times_per_test_type[test_type])
+
+    def _print_individual_test_times(self, individual_test_timings,
+                                  result_summary):
+        """Prints the run times for slow, timeout and crash tests.
+        Args:
+          individual_test_timings: List of TestStats for all tests.
+          result_summary: summary object for test run
+        """
+        # Reverse-sort by the time spent in DumpRenderTree.
+        individual_test_timings.sort(lambda a, b:
+            cmp(b.test_run_time, a.test_run_time))
+
+        num_printed = 0
+        slow_tests = []
+        timeout_or_crash_tests = []
+        unexpected_slow_tests = []
+        for test_tuple in individual_test_timings:
+            filename = test_tuple.filename
+            is_timeout_crash_or_slow = False
+            if self._test_is_slow(filename):
+                is_timeout_crash_or_slow = True
+                slow_tests.append(test_tuple)
+
+            if filename in result_summary.failures:
+                result = result_summary.results[filename].type
+                if (result == test_expectations.TIMEOUT or
+                    result == test_expectations.CRASH):
+                    is_timeout_crash_or_slow = True
+                    timeout_or_crash_tests.append(test_tuple)
+
+            if (not is_timeout_crash_or_slow and
+                num_printed < printing.NUM_SLOW_TESTS_TO_LOG):
+                num_printed = num_printed + 1
+                unexpected_slow_tests.append(test_tuple)
+
+        self._printer.print_timing("")
+        self._print_test_list_timing("%s slowest tests that are not "
+            "marked as SLOW and did not timeout/crash:" %
+            printing.NUM_SLOW_TESTS_TO_LOG, unexpected_slow_tests)
+        self._printer.print_timing("")
+        self._print_test_list_timing("Tests marked as SLOW:", slow_tests)
+        self._printer.print_timing("")
+        self._print_test_list_timing("Tests that timed out or crashed:",
+                                     timeout_or_crash_tests)
+        self._printer.print_timing("")
+
+    def _print_test_list_timing(self, title, test_list):
+        """Print timing info for each test.
+
+        Args:
+          title: section heading
+          test_list: tests that fall in this section
+        """
+        if self._printer.disabled('slowest'):
+            return
+
+        self._printer.print_timing(title)
+        for test_tuple in test_list:
+            filename = test_tuple.filename[len(
+                self._port.layout_tests_dir()) + 1:]
+            filename = filename.replace('\\', '/')
+            test_run_time = round(test_tuple.test_run_time, 1)
+            self._printer.print_timing("  %s took %s seconds" %
+                                       (filename, test_run_time))
+
+    def _print_directory_timings(self, directory_test_timings):
+        """Print timing info by directory for any directories that
+        take > 10 seconds to run.
+
+        Args:
+          directory_test_timing: time info for each directory
+        """
+        timings = []
+        for directory in directory_test_timings:
+            num_tests, time_for_directory = directory_test_timings[directory]
+            timings.append((round(time_for_directory, 1), directory,
+                            num_tests))
+        timings.sort()
+
+        self._printer.print_timing("Time to process slowest subdirectories:")
+        min_seconds_to_print = 10
+        for timing in timings:
+            if timing[0] > min_seconds_to_print:
+                self._printer.print_timing(
+                    "  %s took %s seconds to run %s tests." % (timing[1],
+                    timing[0], timing[2]))
+        self._printer.print_timing("")
+
+    def _print_statistics_for_test_timings(self, title, timings):
+        """Prints the median, mean and standard deviation of the values in
+        timings.
+
+        Args:
+          title: Title for these timings.
+          timings: A list of floats representing times.
+        """
+        self._printer.print_timing(title)
+        timings.sort()
+
+        num_tests = len(timings)
+        if not num_tests:
+            return
+        percentile90 = timings[int(.9 * num_tests)]
+        percentile99 = timings[int(.99 * num_tests)]
+
+        if num_tests % 2 == 1:
+            median = timings[((num_tests - 1) / 2) - 1]
+        else:
+            lower = timings[num_tests / 2 - 1]
+            upper = timings[num_tests / 2]
+            median = (float(lower + upper)) / 2
+
+        mean = sum(timings) / num_tests
+
+        for time in timings:
+            sum_of_deviations = math.pow(time - mean, 2)
+
+        std_deviation = math.sqrt(sum_of_deviations / num_tests)
+        self._printer.print_timing("  Median:          %6.3f" % median)
+        self._printer.print_timing("  Mean:            %6.3f" % mean)
+        self._printer.print_timing("  90th percentile: %6.3f" % percentile90)
+        self._printer.print_timing("  99th percentile: %6.3f" % percentile99)
+        self._printer.print_timing("  Standard dev:    %6.3f" % std_deviation)
+        self._printer.print_timing("")
+
+    def _print_result_summary(self, result_summary):
+        """Print a short summary about how many tests passed.
+
+        Args:
+          result_summary: information to log
+        """
+        failed = len(result_summary.failures)
+        skipped = len(
+            result_summary.tests_by_expectation[test_expectations.SKIP])
+        total = result_summary.total
+        passed = total - failed - skipped
+        pct_passed = 0.0
+        if total > 0:
+            pct_passed = float(passed) * 100 / total
+
+        self._printer.print_actual("")
+        self._printer.print_actual("=> Results: %d/%d tests passed (%.1f%%)" %
+                     (passed, total, pct_passed))
+        self._printer.print_actual("")
+        self._print_result_summary_entry(result_summary,
+            test_expectations.NOW, "Tests to be fixed")
+
+        self._printer.print_actual("")
+        self._print_result_summary_entry(result_summary,
+            test_expectations.WONTFIX,
+            "Tests that will only be fixed if they crash (WONTFIX)")
+        self._printer.print_actual("")
+
+    def _print_result_summary_entry(self, result_summary, timeline,
+                                    heading):
+        """Print a summary block of results for a particular timeline of test.
+
+        Args:
+          result_summary: summary to print results for
+          timeline: the timeline to print results for (NOT, WONTFIX, etc.)
+          heading: a textual description of the timeline
+        """
+        total = len(result_summary.tests_by_timeline[timeline])
+        not_passing = (total -
+           len(result_summary.tests_by_expectation[test_expectations.PASS] &
+               result_summary.tests_by_timeline[timeline]))
+        self._printer.print_actual("=> %s (%d):" % (heading, not_passing))
+
+        for result in TestExpectationsFile.EXPECTATION_ORDER:
+            if result == test_expectations.PASS:
+                continue
+            results = (result_summary.tests_by_expectation[result] &
+                       result_summary.tests_by_timeline[timeline])
+            desc = TestExpectationsFile.EXPECTATION_DESCRIPTIONS[result]
+            if not_passing and len(results):
+                pct = len(results) * 100.0 / not_passing
+                self._printer.print_actual("  %5d %-24s (%4.1f%%)" %
+                    (len(results), desc[len(results) != 1], pct))
+
+    def _results_html(self, test_files, failures, title="Test Failures", override_time=None):
+        """
+        test_files = a list of file paths
+        failures = dictionary mapping test paths to failure objects
+        title = title printed at top of test
+        override_time = current time (used by unit tests)
+        """
+        page = """<html>
+  <head>
+    <title>Layout Test Results (%(time)s)</title>
+  </head>
+  <body>
+    <h2>%(title)s (%(time)s)</h2>
+        """ % {'title': title, 'time': override_time or time.asctime()}
+
+        for test_file in sorted(test_files):
+            test_name = self._port.relative_test_filename(test_file)
+            test_url = self._port.filename_to_uri(test_file)
+            page += u"<p><a href='%s'>%s</a><br />\n" % (test_url, test_name)
+            test_failures = failures.get(test_file, [])
+            for failure in test_failures:
+                page += (u"&nbsp;&nbsp;%s<br/>" %
+                         failure.result_html_output(test_name))
+            page += "</p>\n"
+        page += "</body></html>\n"
+        return page
+
+    def _write_results_html_file(self, result_summary):
+        """Write results.html which is a summary of tests that failed.
+
+        Args:
+          result_summary: a summary of the results :)
+
+        Returns:
+          True if any results were written (since expected failures may be
+          omitted)
+        """
+        # test failures
+        if self._options.full_results_html:
+            results_title = "Test Failures"
+            test_files = result_summary.failures.keys()
+        else:
+            results_title = "Unexpected Test Failures"
+            unexpected_failures = self._get_failures(result_summary,
+                include_crashes=True)
+            test_files = unexpected_failures.keys()
+        if not len(test_files):
+            return False
+
+        out_filename = os.path.join(self._options.results_directory,
+                                    "results.html")
+        with codecs.open(out_filename, "w", "utf-8") as results_file:
+            html = self._results_html(test_files, result_summary.failures, results_title)
+            results_file.write(html)
+
+        return True
+
+    def _show_results_html_file(self):
+        """Shows the results.html page."""
+        results_filename = os.path.join(self._options.results_directory,
+                                        "results.html")
+        self._port.show_results_html_file(results_filename)
+
+
+def read_test_files(files):
+    tests = []
+    for file in files:
+        try:
+            with codecs.open(file, 'r', 'utf-8') as file_contents:
+                # FIXME: This could be cleaner using a list comprehension.
+                for line in file_contents:
+                    line = test_expectations.strip_comments(line)
+                    if line:
+                        tests.append(line)
+        except IOError, e:
+            if e.errno == errno.ENOENT:
+                _log.critical('')
+                _log.critical('--test-list file "%s" not found' % file)
+            raise
+    return tests