From d47d8e14880132c42a75f41c8041851797c75e35 Mon Sep 17 00:00:00 2001 From: Tao Bao Date: Thu, 21 May 2015 14:09:49 -0700 Subject: Assert the stash size when generating OTAs. With block-based OTA v2 and v3, it requires stash space on the /cache partition to back up blocks during an update. We need to ensure that it doesn't exceed the partition size. Since there might be other files on /cache as well, we use cache_size * threshold as the maximum allowed size. The threshold defaults to 0.8, which can be overridden by command line option '--stash_threshold'. Change-Id: Ieee5d373c9bfb2ea401d85ca8a3adb491579de76 (cherry picked from commit 23ac4042128e47f6fe1ef176e7cb96f907d8e149) --- tools/releasetools/blockimgdiff.py | 25 ++++++++++++++++++++----- tools/releasetools/ota_from_target_files.py | 19 +++++++++++++++++++ 2 files changed, 39 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/releasetools/blockimgdiff.py b/tools/releasetools/blockimgdiff.py index 6ed9ca2..a007f81 100644 --- a/tools/releasetools/blockimgdiff.py +++ b/tools/releasetools/blockimgdiff.py @@ -16,6 +16,7 @@ from __future__ import print_function from collections import deque, OrderedDict from hashlib import sha1 +import common import heapq import itertools import multiprocessing @@ -460,9 +461,20 @@ class BlockImageDiff(object): if free_string: out.append("".join(free_string)) - # sanity check: abort if we're going to need more than 512 MB if - # stash space - assert max_stashed_blocks * self.tgt.blocksize < (512 << 20) + if self.version >= 2: + # Sanity check: abort if we're going to need more stash space than + # the allowed size (cache_size * threshold). There are two purposes + # of having a threshold here. a) Part of the cache may have been + # occupied by some recovery logs. b) It will buy us some time to deal + # with the oversize issue. + cache_size = common.OPTIONS.cache_size + stash_threshold = common.OPTIONS.stash_threshold + max_allowed = cache_size * stash_threshold + assert max_stashed_blocks * self.tgt.blocksize < max_allowed, \ + 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % ( + max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks, + self.tgt.blocksize, max_allowed, cache_size, + stash_threshold) # Zero out extended blocks as a workaround for bug 20881595. if self.tgt.extended: @@ -489,8 +501,11 @@ class BlockImageDiff(object): f.write(i) if self.version >= 2: - print("max stashed blocks: %d (%d bytes)\n" % ( - max_stashed_blocks, max_stashed_blocks * self.tgt.blocksize)) + max_stashed_size = max_stashed_blocks * self.tgt.blocksize + max_allowed = common.OPTIONS.cache_size * common.OPTIONS.stash_threshold + print("max stashed blocks: %d (%d bytes), limit: %d bytes (%.2f%%)\n" % ( + max_stashed_blocks, max_stashed_size, max_allowed, + max_stashed_size * 100.0 / max_allowed)) def ComputePatches(self, prefix): print("Reticulating splines...") diff --git a/tools/releasetools/ota_from_target_files.py b/tools/releasetools/ota_from_target_files.py index 531a728..3a69675 100755 --- a/tools/releasetools/ota_from_target_files.py +++ b/tools/releasetools/ota_from_target_files.py @@ -84,6 +84,9 @@ Usage: ota_from_target_files [flags] input_target_files output_ota_package Specifies the number of worker-threads that will be used when generating patches for incremental updates (defaults to 3). + --stash_threshold + Specifies the threshold that will be used to compute the maximum + allowed stash size (defaults to 0.8). """ import sys @@ -122,6 +125,10 @@ OPTIONS.updater_binary = None OPTIONS.oem_source = None OPTIONS.fallback_to_full = True OPTIONS.full_radio = False +# Stash size cannot exceed cache_size * threshold. +OPTIONS.cache_size = None +OPTIONS.stash_threshold = 0.8 + def MostPopularKey(d, default): """Given a dict, return the key corresponding to the largest @@ -1505,6 +1512,12 @@ def main(argv): OPTIONS.updater_binary = a elif o in ("--no_fallback_to_full",): OPTIONS.fallback_to_full = False + elif o == "--stash_threshold": + try: + OPTIONS.stash_threshold = float(a) + except ValueError: + raise ValueError("Cannot parse value %r for option %r - expecting " + "a float" % (a, o)) else: return False return True @@ -1528,6 +1541,7 @@ def main(argv): "oem_settings=", "verify", "no_fallback_to_full", + "stash_threshold=", ], extra_option_handler=option_handler) if len(args) != 2: @@ -1585,6 +1599,11 @@ def main(argv): output_zip = zipfile.ZipFile(temp_zip_file, "w", compression=zipfile.ZIP_DEFLATED) + cache_size = OPTIONS.info_dict.get("cache_size", None) + if cache_size is None: + raise RuntimeError("can't determine the cache partition size") + OPTIONS.cache_size = cache_size + if OPTIONS.incremental_source is None: WriteFullOTAPackage(input_zip, output_zip) if OPTIONS.package_key is None: -- cgit v1.1 From 1fc67631eedbfba07e005d85aadae6e1b91d8d93 Mon Sep 17 00:00:00 2001 From: Tao Bao Date: Mon, 17 Aug 2015 09:45:13 -0700 Subject: Revise stash for BBOTAs when needed. When generating incremental BBOTAs (v2 and above), we need to ensure that the needed runtime stash is below the given threshold. If it's running out of space on /cache, we replace the command that uses a stash with a "new" command instead. This may increase the OTA package size, since it is carrying more full blocks instead of patches. It gets even worse for large files that span a number of blocks, because currently we will store all the blocks for the file as "new" blocks if stashing cannot be satisfied. We may further optimize by splitting them into smaller chunks so that most of them can still be stashed. Bug: 22430577 Change-Id: Ieae5243d461e3f899f613f76a380f6f7c3edb356 (cherry picked from commit 82c47981bd0602a1c7b50dfabf9a6a2412993bae) --- tools/releasetools/blockimgdiff.py | 77 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) (limited to 'tools') diff --git a/tools/releasetools/blockimgdiff.py b/tools/releasetools/blockimgdiff.py index a007f81..6afa7a5 100644 --- a/tools/releasetools/blockimgdiff.py +++ b/tools/releasetools/blockimgdiff.py @@ -174,6 +174,12 @@ class Transfer(object): return (sum(sr.size() for (_, sr) in self.stash_before) - sum(sr.size() for (_, sr) in self.use_stash)) + def ConvertToNew(self): + assert self.style != "new" + self.use_stash = [] + self.style = "new" + self.src_ranges = RangeSet() + def __str__(self): return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style + " to " + str(self.tgt_ranges) + ">") @@ -268,6 +274,10 @@ class BlockImageDiff(object): self.ReverseBackwardEdges() self.ImproveVertexSequence() + # Ensure the runtime stash size is under the limit. + if self.version >= 2 and common.OPTIONS.cache_size is not None: + self.ReviseStashSize() + # Double-check our work. self.AssertSequenceGood() @@ -507,6 +517,73 @@ class BlockImageDiff(object): max_stashed_blocks, max_stashed_size, max_allowed, max_stashed_size * 100.0 / max_allowed)) + def ReviseStashSize(self): + print("Revising stash size...") + stashes = {} + + # Create the map between a stash and its def/use points. For example, for a + # given stash of (idx, sr), stashes[idx] = (sr, def_cmd, use_cmd). + for xf in self.transfers: + # Command xf defines (stores) all the stashes in stash_before. + for idx, sr in xf.stash_before: + stashes[idx] = (sr, xf) + + # Record all the stashes command xf uses. + for idx, _ in xf.use_stash: + stashes[idx] += (xf,) + + # Compute the maximum blocks available for stash based on /cache size and + # the threshold. + cache_size = common.OPTIONS.cache_size + stash_threshold = common.OPTIONS.stash_threshold + max_allowed = cache_size * stash_threshold / self.tgt.blocksize + + stashed_blocks = 0 + + # Now go through all the commands. Compute the required stash size on the + # fly. If a command requires excess stash than available, it deletes the + # stash by replacing the command that uses the stash with a "new" command + # instead. + for xf in self.transfers: + replaced_cmds = [] + + # xf.stash_before generates explicit stash commands. + for idx, sr in xf.stash_before: + if stashed_blocks + sr.size() > max_allowed: + # We cannot stash this one for a later command. Find out the command + # that will use this stash and replace the command with "new". + use_cmd = stashes[idx][2] + replaced_cmds.append(use_cmd) + print(" %s replaced due to an explicit stash of %d blocks." % ( + use_cmd, sr.size())) + else: + stashed_blocks += sr.size() + + # xf.use_stash generates free commands. + for _, sr in xf.use_stash: + stashed_blocks -= sr.size() + + # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to + # ComputePatches(), they both have the style of "diff". + if xf.style == "diff" and self.version >= 3: + assert xf.tgt_ranges and xf.src_ranges + if xf.src_ranges.overlaps(xf.tgt_ranges): + if stashed_blocks + xf.src_ranges.size() > max_allowed: + replaced_cmds.append(xf) + print(" %s replaced due to an implicit stash of %d blocks." % ( + xf, xf.src_ranges.size())) + + # Replace the commands in replaced_cmds with "new"s. + for cmd in replaced_cmds: + # It no longer uses any commands in "use_stash". Remove the def points + # for all those stashes. + for idx, sr in cmd.use_stash: + def_cmd = stashes[idx][1] + assert (idx, sr) in def_cmd.stash_before + def_cmd.stash_before.remove((idx, sr)) + + cmd.ConvertToNew() + def ComputePatches(self, prefix): print("Reticulating splines...") diff_q = [] -- cgit v1.1 From 937847ae493d36b9741f6387a2357d5cdceda3d9 Mon Sep 17 00:00:00 2001 From: Tao Bao Date: Tue, 25 Aug 2015 15:10:10 -0700 Subject: Split large files for BBOTA v3. For BBOTA v3, we need to stash source blocks to support resumable feature. However, with the growth of file size and the shrink of the cache size, source blocks that represent a file are too large to be stashed as a whole. CL in [1] solves the issue by replacing the diff command with a "new" command. However, it may increase the generated package size substantially (e.g. from ~100MB to ~400MB). With this CL, if a file spans too many blocks, we split it into smaller pieces by generating multiple commands. For the same case above, it reduces the package size to ~150MB. One potential downside is that after splitting, files like .jar, .apk and .zip can no longer use imgdiff. We may lose the potential benefit of using imgdiff for patch size reduction. [1] commit 82c47981bd0602a1c7b50dfabf9a6a2412993bae Bug: 22430577 Change-Id: Iee1ad6543f3d40368e079e418cc31728e1ab3f48 (cherry picked from commit 9a5caf2c30e5dcb19823dff328de1cfb140a2799) --- tools/releasetools/blockimgdiff.py | 85 ++++++++++++++++++++++++++++++-------- tools/releasetools/rangelib.py | 33 +++++++++++++++ 2 files changed, 101 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/releasetools/blockimgdiff.py b/tools/releasetools/blockimgdiff.py index 6afa7a5..a1b3eda 100644 --- a/tools/releasetools/blockimgdiff.py +++ b/tools/releasetools/blockimgdiff.py @@ -297,7 +297,6 @@ class BlockImageDiff(object): out = [] total = 0 - performs_read = False stashes = {} stashed_blocks = 0 @@ -409,7 +408,6 @@ class BlockImageDiff(object): out.append("%s %s\n" % (xf.style, xf.tgt_ranges.to_string_raw())) total += tgt_size elif xf.style == "move": - performs_read = True assert xf.tgt_ranges assert xf.src_ranges.size() == tgt_size if xf.src_ranges != xf.tgt_ranges: @@ -434,7 +432,6 @@ class BlockImageDiff(object): xf.tgt_ranges.to_string_raw(), src_str)) total += tgt_size elif xf.style in ("bsdiff", "imgdiff"): - performs_read = True assert xf.tgt_ranges assert xf.src_ranges if self.version == 1: @@ -539,6 +536,7 @@ class BlockImageDiff(object): max_allowed = cache_size * stash_threshold / self.tgt.blocksize stashed_blocks = 0 + new_blocks = 0 # Now go through all the commands. Compute the required stash size on the # fly. If a command requires excess stash than available, it deletes the @@ -554,8 +552,7 @@ class BlockImageDiff(object): # that will use this stash and replace the command with "new". use_cmd = stashes[idx][2] replaced_cmds.append(use_cmd) - print(" %s replaced due to an explicit stash of %d blocks." % ( - use_cmd, sr.size())) + print("%10d %9s %s" % (sr.size(), "explicit", use_cmd)) else: stashed_blocks += sr.size() @@ -570,8 +567,7 @@ class BlockImageDiff(object): if xf.src_ranges.overlaps(xf.tgt_ranges): if stashed_blocks + xf.src_ranges.size() > max_allowed: replaced_cmds.append(xf) - print(" %s replaced due to an implicit stash of %d blocks." % ( - xf, xf.src_ranges.size())) + print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf)) # Replace the commands in replaced_cmds with "new"s. for cmd in replaced_cmds: @@ -581,9 +577,13 @@ class BlockImageDiff(object): def_cmd = stashes[idx][1] assert (idx, sr) in def_cmd.stash_before def_cmd.stash_before.remove((idx, sr)) + new_blocks += sr.size() cmd.ConvertToNew() + print(" Total %d blocks are packed as new blocks due to insufficient " + "cache size." % (new_blocks,)) + def ComputePatches(self, prefix): print("Reticulating splines...") diff_q = [] @@ -939,6 +939,57 @@ class BlockImageDiff(object): a.goes_after[b] = size def FindTransfers(self): + """Parse the file_map to generate all the transfers.""" + + def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id, + split=False): + """Wrapper function for adding a Transfer(). + + For BBOTA v3, we need to stash source blocks for resumable feature. + However, with the growth of file size and the shrink of the cache + partition source blocks are too large to be stashed. If a file occupies + too many blocks (greater than MAX_BLOCKS_PER_DIFF_TRANSFER), we split it + into smaller pieces by getting multiple Transfer()s. + + The downside is that after splitting, we can no longer use imgdiff but + only bsdiff.""" + + MAX_BLOCKS_PER_DIFF_TRANSFER = 1024 + + # We care about diff transfers only. + if style != "diff" or not split: + Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id) + return + + # Change nothing for small files. + if (tgt_ranges.size() <= MAX_BLOCKS_PER_DIFF_TRANSFER and + src_ranges.size() <= MAX_BLOCKS_PER_DIFF_TRANSFER): + Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id) + return + + pieces = 0 + while (tgt_ranges.size() > MAX_BLOCKS_PER_DIFF_TRANSFER and + src_ranges.size() > MAX_BLOCKS_PER_DIFF_TRANSFER): + tgt_split_name = "%s-%d" % (tgt_name, pieces) + src_split_name = "%s-%d" % (src_name, pieces) + tgt_first = tgt_ranges.first(MAX_BLOCKS_PER_DIFF_TRANSFER) + src_first = src_ranges.first(MAX_BLOCKS_PER_DIFF_TRANSFER) + Transfer(tgt_split_name, src_split_name, tgt_first, src_first, style, + by_id) + + tgt_ranges = tgt_ranges.subtract(tgt_first) + src_ranges = src_ranges.subtract(src_first) + pieces += 1 + + # Handle remaining blocks. + if tgt_ranges.size() or src_ranges.size(): + # Must be both non-empty. + assert tgt_ranges.size() and src_ranges.size() + tgt_split_name = "%s-%d" % (tgt_name, pieces) + src_split_name = "%s-%d" % (src_name, pieces) + Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges, style, + by_id) + empty = RangeSet() for tgt_fn, tgt_ranges in self.tgt.file_map.items(): if tgt_fn == "__ZERO": @@ -946,28 +997,28 @@ class BlockImageDiff(object): # in any file and that are filled with zeros. We have a # special transfer style for zero blocks. src_ranges = self.src.file_map.get("__ZERO", empty) - Transfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges, - "zero", self.transfers) + AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges, + "zero", self.transfers) continue elif tgt_fn == "__COPY": # "__COPY" domain includes all the blocks not contained in any # file and that need to be copied unconditionally to the target. - Transfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers) + AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers) continue elif tgt_fn in self.src.file_map: # Look for an exact pathname match in the source. - Transfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn], - "diff", self.transfers) + AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn], + "diff", self.transfers, self.version >= 3) continue b = os.path.basename(tgt_fn) if b in self.src_basenames: # Look for an exact basename match in the source. src_fn = self.src_basenames[b] - Transfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn], - "diff", self.transfers) + AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn], + "diff", self.transfers, self.version >= 3) continue b = re.sub("[0-9]+", "#", b) @@ -977,11 +1028,11 @@ class BlockImageDiff(object): # for .so files that contain version numbers in the filename # that get bumped.) src_fn = self.src_numpatterns[b] - Transfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn], - "diff", self.transfers) + AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn], + "diff", self.transfers, self.version >= 3) continue - Transfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers) + AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers) def AbbreviateSourceNames(self): for k in self.src.file_map.keys(): diff --git a/tools/releasetools/rangelib.py b/tools/releasetools/rangelib.py index 1506658..373bbed 100644 --- a/tools/releasetools/rangelib.py +++ b/tools/releasetools/rangelib.py @@ -24,6 +24,7 @@ class RangeSet(object): lots of runs.""" def __init__(self, data=None): + # TODO(tbao): monotonic is broken when passing in a tuple. self.monotonic = False if isinstance(data, str): self._parse_internal(data) @@ -260,6 +261,38 @@ class RangeSet(object): out = out.union(RangeSet(str(s1) + "-" + str(e1-1))) return out + def first(self, n): + """Return the RangeSet that contains at most the first 'n' integers. + + >>> RangeSet("0-9").first(1) + + >>> RangeSet("10-19").first(5) + + >>> RangeSet("10-19").first(15) + + >>> RangeSet("10-19 30-39").first(3) + + >>> RangeSet("10-19 30-39").first(15) + + >>> RangeSet("10-19 30-39").first(30) + + >>> RangeSet("0-9").first(0) + + """ + + if self.size() <= n: + return self + + out = [] + for s, e in self: + if e - s >= n: + out += (s, s+n) + break + else: + out += (s, e) + n -= e - s + return RangeSet(data=out) + if __name__ == "__main__": import doctest -- cgit v1.1