From d47d8e14880132c42a75f41c8041851797c75e35 Mon Sep 17 00:00:00 2001
From: Tao Bao <tbao@google.com>
Date: Thu, 21 May 2015 14:09:49 -0700
Subject: Assert the stash size when generating OTAs.

With block-based OTA v2 and v3, it requires stash space on the /cache
partition to back up blocks during an update. We need to ensure that
it doesn't exceed the partition size. Since there might be other files
on /cache as well, we use cache_size * threshold as the maximum allowed
size. The threshold defaults to 0.8, which can be overridden by command
line option '--stash_threshold'.

Change-Id: Ieee5d373c9bfb2ea401d85ca8a3adb491579de76
(cherry picked from commit 23ac4042128e47f6fe1ef176e7cb96f907d8e149)
---
 tools/releasetools/blockimgdiff.py          | 25 ++++++++++++++++++++-----
 tools/releasetools/ota_from_target_files.py | 19 +++++++++++++++++++
 2 files changed, 39 insertions(+), 5 deletions(-)

(limited to 'tools')
diff --git a/tools/releasetools/blockimgdiff.py b/tools/releasetools/blockimgdiff.py
index 6ed9ca2..a007f81 100644
--- a/tools/releasetools/blockimgdiff.py
+++ b/tools/releasetools/blockimgdiff.py
@@ -16,6 +16,7 @@ from __future__ import print_function
 
 from collections import deque, OrderedDict
 from hashlib import sha1
+import common
 import heapq
 import itertools
 import multiprocessing
@@ -460,9 +461,20 @@ class BlockImageDiff(object):
       if free_string:
         out.append("".join(free_string))
 
-      # sanity check: abort if we're going to need more than 512 MB if
-      # stash space
-      assert max_stashed_blocks * self.tgt.blocksize < (512 << 20)
+      if self.version >= 2:
+        # Sanity check: abort if we're going to need more stash space than
+        # the allowed size (cache_size * threshold). There are two purposes
+        # of having a threshold here. a) Part of the cache may have been
+        # occupied by some recovery logs. b) It will buy us some time to deal
+        # with the oversize issue.
+        cache_size = common.OPTIONS.cache_size
+        stash_threshold = common.OPTIONS.stash_threshold
+        max_allowed = cache_size * stash_threshold
+        assert max_stashed_blocks * self.tgt.blocksize < max_allowed, \
+               'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
+                   max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
+                   self.tgt.blocksize, max_allowed, cache_size,
+                   stash_threshold)
 
     # Zero out extended blocks as a workaround for bug 20881595.
     if self.tgt.extended:
@@ -489,8 +501,11 @@ class BlockImageDiff(object):
         f.write(i)
 
     if self.version >= 2:
-      print("max stashed blocks: %d  (%d bytes)\n" % (
-          max_stashed_blocks, max_stashed_blocks * self.tgt.blocksize))
+      max_stashed_size = max_stashed_blocks * self.tgt.blocksize
+      max_allowed = common.OPTIONS.cache_size * common.OPTIONS.stash_threshold
+      print("max stashed blocks: %d  (%d bytes), limit: %d bytes (%.2f%%)\n" % (
+          max_stashed_blocks, max_stashed_size, max_allowed,
+          max_stashed_size * 100.0 / max_allowed))
 
   def ComputePatches(self, prefix):
     print("Reticulating splines...")
diff --git a/tools/releasetools/ota_from_target_files.py b/tools/releasetools/ota_from_target_files.py
index 531a728..3a69675 100755
--- a/tools/releasetools/ota_from_target_files.py
+++ b/tools/releasetools/ota_from_target_files.py
@@ -84,6 +84,9 @@ Usage:  ota_from_target_files [flags] input_target_files output_ota_package
       Specifies the number of worker-threads that will be used when
       generating patches for incremental updates (defaults to 3).
 
+  --stash_threshold <float>
+      Specifies the threshold that will be used to compute the maximum
+      allowed stash size (defaults to 0.8).
 """
 
 import sys
@@ -122,6 +125,10 @@ OPTIONS.updater_binary = None
 OPTIONS.oem_source = None
 OPTIONS.fallback_to_full = True
 OPTIONS.full_radio = False
+# Stash size cannot exceed cache_size * threshold.
+OPTIONS.cache_size = None
+OPTIONS.stash_threshold = 0.8
+
 
 def MostPopularKey(d, default):
   """Given a dict, return the key corresponding to the largest
@@ -1505,6 +1512,12 @@ def main(argv):
       OPTIONS.updater_binary = a
     elif o in ("--no_fallback_to_full",):
       OPTIONS.fallback_to_full = False
+    elif o == "--stash_threshold":
+      try:
+        OPTIONS.stash_threshold = float(a)
+      except ValueError:
+        raise ValueError("Cannot parse value %r for option %r - expecting "
+                         "a float" % (a, o))
     else:
       return False
     return True
@@ -1528,6 +1541,7 @@ def main(argv):
                                  "oem_settings=",
                                  "verify",
                                  "no_fallback_to_full",
+                                 "stash_threshold=",
                              ], extra_option_handler=option_handler)
 
   if len(args) != 2:
@@ -1585,6 +1599,11 @@ def main(argv):
       output_zip = zipfile.ZipFile(temp_zip_file, "w",
                                    compression=zipfile.ZIP_DEFLATED)
 
+    cache_size = OPTIONS.info_dict.get("cache_size", None)
+    if cache_size is None:
+      raise RuntimeError("can't determine the cache partition size")
+    OPTIONS.cache_size = cache_size
+
     if OPTIONS.incremental_source is None:
       WriteFullOTAPackage(input_zip, output_zip)
       if OPTIONS.package_key is None:
-- 
cgit v1.1


From 1fc67631eedbfba07e005d85aadae6e1b91d8d93 Mon Sep 17 00:00:00 2001
From: Tao Bao <tbao@google.com>
Date: Mon, 17 Aug 2015 09:45:13 -0700
Subject: Revise stash for BBOTAs when needed.

When generating incremental BBOTAs (v2 and above), we need to ensure
that the needed runtime stash is below the given threshold. If it's
running out of space on /cache, we replace the command that uses a
stash with a "new" command instead.

This may increase the OTA package size, since it is carrying more full
blocks instead of patches. It gets even worse for large files that span
a number of blocks, because currently we will store all the blocks for
the file as "new" blocks if stashing cannot be satisfied. We may further
optimize by splitting them into smaller chunks so that most of them can
still be stashed.

Bug: 22430577
Change-Id: Ieae5243d461e3f899f613f76a380f6f7c3edb356
(cherry picked from commit 82c47981bd0602a1c7b50dfabf9a6a2412993bae)
---
 tools/releasetools/blockimgdiff.py | 77 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

(limited to 'tools')

diff --git a/tools/releasetools/blockimgdiff.py b/tools/releasetools/blockimgdiff.py
index a007f81..6afa7a5 100644
--- a/tools/releasetools/blockimgdiff.py
+++ b/tools/releasetools/blockimgdiff.py
@@ -174,6 +174,12 @@ class Transfer(object):
     return (sum(sr.size() for (_, sr) in self.stash_before) -
             sum(sr.size() for (_, sr) in self.use_stash))
 
+  def ConvertToNew(self):
+    assert self.style != "new"
+    self.use_stash = []
+    self.style = "new"
+    self.src_ranges = RangeSet()
+
   def __str__(self):
     return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
             " to " + str(self.tgt_ranges) + ">")
@@ -268,6 +274,10 @@ class BlockImageDiff(object):
       self.ReverseBackwardEdges()
       self.ImproveVertexSequence()
 
+    # Ensure the runtime stash size is under the limit.
+    if self.version >= 2 and common.OPTIONS.cache_size is not None:
+      self.ReviseStashSize()
+
     # Double-check our work.
     self.AssertSequenceGood()
 
@@ -507,6 +517,73 @@ class BlockImageDiff(object):
           max_stashed_blocks, max_stashed_size, max_allowed,
           max_stashed_size * 100.0 / max_allowed))
 
+  def ReviseStashSize(self):
+    print("Revising stash size...")
+    stashes = {}
+
+    # Create the map between a stash and its def/use points. For example, for a
+    # given stash of (idx, sr), stashes[idx] = (sr, def_cmd, use_cmd).
+    for xf in self.transfers:
+      # Command xf defines (stores) all the stashes in stash_before.
+      for idx, sr in xf.stash_before:
+        stashes[idx] = (sr, xf)
+
+      # Record all the stashes command xf uses.
+      for idx, _ in xf.use_stash:
+        stashes[idx] += (xf,)
+
+    # Compute the maximum blocks available for stash based on /cache size and
+    # the threshold.
+    cache_size = common.OPTIONS.cache_size
+    stash_threshold = common.OPTIONS.stash_threshold
+    max_allowed = cache_size * stash_threshold / self.tgt.blocksize
+
+    stashed_blocks = 0
+
+    # Now go through all the commands. Compute the required stash size on the
+    # fly. If a command requires excess stash than available, it deletes the
+    # stash by replacing the command that uses the stash with a "new" command
+    # instead.
+    for xf in self.transfers:
+      replaced_cmds = []
+
+      # xf.stash_before generates explicit stash commands.
+      for idx, sr in xf.stash_before:
+        if stashed_blocks + sr.size() > max_allowed:
+          # We cannot stash this one for a later command. Find out the command
+          # that will use this stash and replace the command with "new".
+          use_cmd = stashes[idx][2]
+          replaced_cmds.append(use_cmd)
+          print("  %s replaced due to an explicit stash of %d blocks." % (
+              use_cmd, sr.size()))
+        else:
+          stashed_blocks += sr.size()
+
+      # xf.use_stash generates free commands.
+      for _, sr in xf.use_stash:
+        stashed_blocks -= sr.size()
+
+      # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
+      # ComputePatches(), they both have the style of "diff".
+      if xf.style == "diff" and self.version >= 3:
+        assert xf.tgt_ranges and xf.src_ranges
+        if xf.src_ranges.overlaps(xf.tgt_ranges):
+          if stashed_blocks + xf.src_ranges.size() > max_allowed:
+            replaced_cmds.append(xf)
+            print("  %s replaced due to an implicit stash of %d blocks." % (
+                xf, xf.src_ranges.size()))
+
+      # Replace the commands in replaced_cmds with "new"s.
+      for cmd in replaced_cmds:
+        # It no longer uses any commands in "use_stash". Remove the def points
+        # for all those stashes.
+        for idx, sr in cmd.use_stash:
+          def_cmd = stashes[idx][1]
+          assert (idx, sr) in def_cmd.stash_before
+          def_cmd.stash_before.remove((idx, sr))
+
+        cmd.ConvertToNew()
+
   def ComputePatches(self, prefix):
     print("Reticulating splines...")
     diff_q = []
-- 
cgit v1.1


From 937847ae493d36b9741f6387a2357d5cdceda3d9 Mon Sep 17 00:00:00 2001
From: Tao Bao <tbao@google.com>
Date: Tue, 25 Aug 2015 15:10:10 -0700
Subject: Split large files for BBOTA v3.

For BBOTA v3, we need to stash source blocks to support resumable
feature. However, with the growth of file size and the shrink of the
cache size, source blocks that represent a file are too large to be
stashed as a whole. CL in [1] solves the issue by replacing the diff
command with a "new" command. However, it may increase the generated
package size substantially (e.g. from ~100MB to ~400MB).

With this CL, if a file spans too many blocks, we split it into smaller
pieces by generating multiple commands. For the same case above, it
reduces the package size to ~150MB.

One potential downside is that after splitting, files like .jar,
.apk and .zip can no longer use imgdiff. We may lose the potential
benefit of using imgdiff for patch size reduction.

[1] commit 82c47981bd0602a1c7b50dfabf9a6a2412993bae

Bug: 22430577
Change-Id: Iee1ad6543f3d40368e079e418cc31728e1ab3f48
(cherry picked from commit 9a5caf2c30e5dcb19823dff328de1cfb140a2799)
---
 tools/releasetools/blockimgdiff.py | 85 ++++++++++++++++++++++++++++++--------
 tools/releasetools/rangelib.py     | 33 +++++++++++++++
 2 files changed, 101 insertions(+), 17 deletions(-)

(limited to 'tools')

diff --git a/tools/releasetools/blockimgdiff.py b/tools/releasetools/blockimgdiff.py
index 6afa7a5..a1b3eda 100644
--- a/tools/releasetools/blockimgdiff.py
+++ b/tools/releasetools/blockimgdiff.py
@@ -297,7 +297,6 @@ class BlockImageDiff(object):
     out = []
 
     total = 0
-    performs_read = False
 
     stashes = {}
     stashed_blocks = 0
@@ -409,7 +408,6 @@ class BlockImageDiff(object):
         out.append("%s %s\n" % (xf.style, xf.tgt_ranges.to_string_raw()))
         total += tgt_size
       elif xf.style == "move":
-        performs_read = True
         assert xf.tgt_ranges
         assert xf.src_ranges.size() == tgt_size
         if xf.src_ranges != xf.tgt_ranges:
@@ -434,7 +432,6 @@ class BlockImageDiff(object):
                 xf.tgt_ranges.to_string_raw(), src_str))
           total += tgt_size
       elif xf.style in ("bsdiff", "imgdiff"):
-        performs_read = True
         assert xf.tgt_ranges
         assert xf.src_ranges
         if self.version == 1:
@@ -539,6 +536,7 @@ class BlockImageDiff(object):
     max_allowed = cache_size * stash_threshold / self.tgt.blocksize
 
     stashed_blocks = 0
+    new_blocks = 0
 
     # Now go through all the commands. Compute the required stash size on the
     # fly. If a command requires excess stash than available, it deletes the
@@ -554,8 +552,7 @@ class BlockImageDiff(object):
           # that will use this stash and replace the command with "new".
           use_cmd = stashes[idx][2]
           replaced_cmds.append(use_cmd)
-          print("  %s replaced due to an explicit stash of %d blocks." % (
-              use_cmd, sr.size()))
+          print("%10d  %9s  %s" % (sr.size(), "explicit", use_cmd))
         else:
           stashed_blocks += sr.size()
 
@@ -570,8 +567,7 @@ class BlockImageDiff(object):
         if xf.src_ranges.overlaps(xf.tgt_ranges):
           if stashed_blocks + xf.src_ranges.size() > max_allowed:
             replaced_cmds.append(xf)
-            print("  %s replaced due to an implicit stash of %d blocks." % (
-                xf, xf.src_ranges.size()))
+            print("%10d  %9s  %s" % (xf.src_ranges.size(), "implicit", xf))
 
       # Replace the commands in replaced_cmds with "new"s.
       for cmd in replaced_cmds:
@@ -581,9 +577,13 @@ class BlockImageDiff(object):
           def_cmd = stashes[idx][1]
           assert (idx, sr) in def_cmd.stash_before
           def_cmd.stash_before.remove((idx, sr))
+          new_blocks += sr.size()
 
         cmd.ConvertToNew()
 
+    print("  Total %d blocks are packed as new blocks due to insufficient "
+          "cache size." % (new_blocks,))
+
   def ComputePatches(self, prefix):
     print("Reticulating splines...")
     diff_q = []
@@ -939,6 +939,57 @@ class BlockImageDiff(object):
           a.goes_after[b] = size
 
   def FindTransfers(self):
+    """Parse the file_map to generate all the transfers."""
+
+    def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
+                    split=False):
+      """Wrapper function for adding a Transfer().
+
+      For BBOTA v3, we need to stash source blocks for resumable feature.
+      However, with the growth of file size and the shrink of the cache
+      partition source blocks are too large to be stashed. If a file occupies
+      too many blocks (greater than MAX_BLOCKS_PER_DIFF_TRANSFER), we split it
+      into smaller pieces by getting multiple Transfer()s.
+
+      The downside is that after splitting, we can no longer use imgdiff but
+      only bsdiff."""
+
+      MAX_BLOCKS_PER_DIFF_TRANSFER = 1024
+
+      # We care about diff transfers only.
+      if style != "diff" or not split:
+        Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
+        return
+
+      # Change nothing for small files.
+      if (tgt_ranges.size() <= MAX_BLOCKS_PER_DIFF_TRANSFER and
+          src_ranges.size() <= MAX_BLOCKS_PER_DIFF_TRANSFER):
+        Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
+        return
+
+      pieces = 0
+      while (tgt_ranges.size() > MAX_BLOCKS_PER_DIFF_TRANSFER and
+             src_ranges.size() > MAX_BLOCKS_PER_DIFF_TRANSFER):
+        tgt_split_name = "%s-%d" % (tgt_name, pieces)
+        src_split_name = "%s-%d" % (src_name, pieces)
+        tgt_first = tgt_ranges.first(MAX_BLOCKS_PER_DIFF_TRANSFER)
+        src_first = src_ranges.first(MAX_BLOCKS_PER_DIFF_TRANSFER)
+        Transfer(tgt_split_name, src_split_name, tgt_first, src_first, style,
+                 by_id)
+
+        tgt_ranges = tgt_ranges.subtract(tgt_first)
+        src_ranges = src_ranges.subtract(src_first)
+        pieces += 1
+
+      # Handle remaining blocks.
+      if tgt_ranges.size() or src_ranges.size():
+        # Must be both non-empty.
+        assert tgt_ranges.size() and src_ranges.size()
+        tgt_split_name = "%s-%d" % (tgt_name, pieces)
+        src_split_name = "%s-%d" % (src_name, pieces)
+        Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges, style,
+                 by_id)
+
     empty = RangeSet()
     for tgt_fn, tgt_ranges in self.tgt.file_map.items():
       if tgt_fn == "__ZERO":
@@ -946,28 +997,28 @@ class BlockImageDiff(object):
         # in any file and that are filled with zeros.  We have a
         # special transfer style for zero blocks.
         src_ranges = self.src.file_map.get("__ZERO", empty)
-        Transfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
-                 "zero", self.transfers)
+        AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
+                    "zero", self.transfers)
         continue
 
       elif tgt_fn == "__COPY":
         # "__COPY" domain includes all the blocks not contained in any
         # file and that need to be copied unconditionally to the target.
-        Transfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
+        AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
         continue
 
       elif tgt_fn in self.src.file_map:
         # Look for an exact pathname match in the source.
-        Transfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
-                 "diff", self.transfers)
+        AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
+                    "diff", self.transfers, self.version >= 3)
         continue
 
       b = os.path.basename(tgt_fn)
       if b in self.src_basenames:
         # Look for an exact basename match in the source.
         src_fn = self.src_basenames[b]
-        Transfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
-                 "diff", self.transfers)
+        AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
+                    "diff", self.transfers, self.version >= 3)
         continue
 
       b = re.sub("[0-9]+", "#", b)
@@ -977,11 +1028,11 @@ class BlockImageDiff(object):
         # for .so files that contain version numbers in the filename
         # that get bumped.)
         src_fn = self.src_numpatterns[b]
-        Transfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
-                 "diff", self.transfers)
+        AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
+                    "diff", self.transfers, self.version >= 3)
         continue
 
-      Transfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
+      AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
 
   def AbbreviateSourceNames(self):
     for k in self.src.file_map.keys():
diff --git a/tools/releasetools/rangelib.py b/tools/releasetools/rangelib.py
index 1506658..373bbed 100644
--- a/tools/releasetools/rangelib.py
+++ b/tools/releasetools/rangelib.py
@@ -24,6 +24,7 @@ class RangeSet(object):
   lots of runs."""
 
   def __init__(self, data=None):
+    # TODO(tbao): monotonic is broken when passing in a tuple.
     self.monotonic = False
     if isinstance(data, str):
       self._parse_internal(data)
@@ -260,6 +261,38 @@ class RangeSet(object):
       out = out.union(RangeSet(str(s1) + "-" + str(e1-1)))
     return out
 
+  def first(self, n):
+    """Return the RangeSet that contains at most the first 'n' integers.
+
+    >>> RangeSet("0-9").first(1)
+    <RangeSet("0")>
+    >>> RangeSet("10-19").first(5)
+    <RangeSet("10-14")>
+    >>> RangeSet("10-19").first(15)
+    <RangeSet("10-19")>
+    >>> RangeSet("10-19 30-39").first(3)
+    <RangeSet("10-12")>
+    >>> RangeSet("10-19 30-39").first(15)
+    <RangeSet("10-19 30-34")>
+    >>> RangeSet("10-19 30-39").first(30)
+    <RangeSet("10-19 30-39")>
+    >>> RangeSet("0-9").first(0)
+    <RangeSet("")>
+    """
+
+    if self.size() <= n:
+      return self
+
+    out = []
+    for s, e in self:
+      if e - s >= n:
+        out += (s, s+n)
+        break
+      else:
+        out += (s, e)
+        n -= e - s
+    return RangeSet(data=out)
+
 
 if __name__ == "__main__":
   import doctest
-- 
cgit v1.1