1 files changed, 114 insertions, 25 deletions
diff --git a/Tools/Scripts/VCSUtils.pm b/Tools/Scripts/VCSUtils.pm
index faed7ed..e61774c 100644
--- a/Tools/Scripts/VCSUtils.pm
+++ b/Tools/Scripts/VCSUtils.pm
@@ -44,11 +44,13 @@ BEGIN {
     $VERSION     = 1.00;
     @ISA         = qw(Exporter);
     @EXPORT      = qw(
+        &applyGitBinaryPatchDelta
         &callSilently
         &canonicalizePath
         &changeLogEmailAddress
         &changeLogName
         &chdirReturningRelativePath
+        &decodeGitBinaryChunk
         &decodeGitBinaryPatch
         &determineSVNRoot
         &determineVCSRoot
@@ -392,6 +394,16 @@ sub normalizePath($)
     return $path;
 }
 
+sub adjustPathForRecentRenamings($)
+{
+    my ($fullPath) = @_;
+
+    if ($fullPath =~ m|^WebCore/| || $fullPath =~ m|^JavaScriptCore/|) {
+        return "Source/$fullPath";
+    }
+    return $fullPath;
+}
+
 sub canonicalizePath($)
 {
     my ($file) = @_;
@@ -511,7 +523,7 @@ sub parseGitDiffHeader($$)
         # The first and second paths can differ in the case of copies
         # and renames.  We use the second file path because it is the
         # destination path.
-        $indexPath = $4;
+        $indexPath = adjustPathForRecentRenamings($4);
         # Use $POSTMATCH to preserve the end-of-line character.
         $_ = "Index: $indexPath$POSTMATCH"; # Convert to SVN format.
     } else {
@@ -627,7 +639,7 @@ sub parseSvnDiffHeader($$)
 
     my $indexPath;
     if (/$svnDiffStartRegEx/) {
-        $indexPath = $1;
+        $indexPath = adjustPathForRecentRenamings($1);
     } else {
         die("First line of SVN diff does not begin with \"Index \": \"$_\"");
     }
@@ -1304,12 +1316,7 @@ sub setChangeLogDateAndReviewer($$$)
 # Returns $changeLogHashRef:
 #   $changeLogHashRef: a hash reference representing a change log patch.
 #     patch: a ChangeLog patch equivalent to the given one, but with the
-#            newest ChangeLog entry inserted at the top of the file, if possible.
-#     hasOverlappingLines: the value 1 if the change log entry overlaps
-#                          some lines of another change log entry. This can
-#                          happen when deliberately inserting a new ChangeLog
-#                          entry earlier in the file above an entry with
-#                          the same date and author.                     
+#            newest ChangeLog entry inserted at the top of the file, if possible.              
 sub fixChangeLogPatch($)
 {
     my $patch = shift; # $patch will only contain patch fragments for ChangeLog.
@@ -1403,8 +1410,19 @@ sub fixChangeLogPatch($)
         $lines[$i] = "+$text";
     }
 
-    # Finish moving whatever overlapping lines remain, and update
-    # the initial chunk range.
+    # If @overlappingLines > 0, this is where we make use of the
+    # assumption that the beginning of the source file was not modified.
+    splice(@lines, $chunkStartIndex, 0, @overlappingLines);
+
+    # Update the date start index as it may have changed after shifting
+    # the overlapping lines towards the front.
+    for ($i = $chunkStartIndex; $i < $dateStartIndex; ++$i) {
+        $dateStartIndex = $i if $lines[$i] =~ /$dateStartRegEx/;
+    }
+    splice(@lines, $chunkStartIndex, $dateStartIndex - $chunkStartIndex); # Remove context of later entry.
+    $deletedLineCount += $dateStartIndex - $chunkStartIndex;
+
+    # Update the initial chunk range.
     my $chunkRangeRegEx = '^\@\@ -(\d+),(\d+) \+\d+,(\d+) \@\@$'; # e.g. @@ -2,6 +2,18 @@
     if ($lines[$chunkStartIndex - 1] !~ /$chunkRangeRegEx/) {
         # FIXME: Handle errors differently from ChangeLog files that
@@ -1413,21 +1431,9 @@ sub fixChangeLogPatch($)
         $changeLogHashRef{patch} = $patch; # Error: unexpected patch string format.
         return \%changeLogHashRef;
     }
-    my $skippedFirstLineCount = $1 - 1;
     my $oldSourceLineCount = $2;
     my $oldTargetLineCount = $3;
 
-    if (@overlappingLines != $skippedFirstLineCount) {
-        # This can happen, for example, when deliberately inserting
-        # a new ChangeLog entry earlier in the file.
-        $changeLogHashRef{hasOverlappingLines} = 1;
-        $changeLogHashRef{patch} = $patch;
-        return \%changeLogHashRef;
-    }
-    # If @overlappingLines > 0, this is where we make use of the
-    # assumption that the beginning of the source file was not modified.
-    splice(@lines, $chunkStartIndex, 0, @overlappingLines);
-
     my $sourceLineCount = $oldSourceLineCount + @overlappingLines - $deletedLineCount;
     my $targetLineCount = $oldTargetLineCount + @overlappingLines - $deletedLineCount;
     $lines[$chunkStartIndex - 1] = "@@ -1,$sourceLineCount +1,$targetLineCount @@";
@@ -1738,7 +1744,6 @@ sub decodeGitBinaryPatch($$)
     #
     # Each chunk a line which starts from either "literal" or "delta",
     # followed by a number which specifies decoded size of the chunk.
-    # The "delta" type chunks aren't supported by this function yet.
     #
     # Then, content of the chunk comes. To decode the content, we
     # need decode it with base85 first, and then zlib.
@@ -1759,10 +1764,94 @@ sub decodeGitBinaryPatch($$)
     my $reverseBinaryChunk = decodeGitBinaryChunk($encodedReverseChunk, $fullPath);
     my $reverseBinaryChunkActualSize = length($reverseBinaryChunk);
 
-    die "$fullPath: unexpected size of the first chunk (expected $binaryChunkExpectedSize but was $binaryChunkActualSize" if ($binaryChunkExpectedSize != $binaryChunkActualSize);
-    die "$fullPath: unexpected size of the second chunk (expected $reverseBinaryChunkExpectedSize but was $reverseBinaryChunkActualSize" if ($reverseBinaryChunkExpectedSize != $reverseBinaryChunkActualSize);
+    die "$fullPath: unexpected size of the first chunk (expected $binaryChunkExpectedSize but was $binaryChunkActualSize" if ($binaryChunkType eq "literal" and $binaryChunkExpectedSize != $binaryChunkActualSize);
+    die "$fullPath: unexpected size of the second chunk (expected $reverseBinaryChunkExpectedSize but was $reverseBinaryChunkActualSize" if ($reverseBinaryChunkType eq "literal" and $reverseBinaryChunkExpectedSize != $reverseBinaryChunkActualSize);
 
     return ($binaryChunkType, $binaryChunk, $reverseBinaryChunkType, $reverseBinaryChunk);
 }
 
+sub readByte($$)
+{
+    my ($data, $location) = @_;
+    
+    # Return the byte at $location in $data as a numeric value. 
+    return ord(substr($data, $location, 1));
+}
+
+# The git binary delta format is undocumented, except in code:
+# - https://github.com/git/git/blob/master/delta.h:get_delta_hdr_size is the source
+#   of the algorithm in decodeGitBinaryPatchDeltaSize.
+# - https://github.com/git/git/blob/master/patch-delta.c:patch_delta is the source
+#   of the algorithm in applyGitBinaryPatchDelta.
+sub decodeGitBinaryPatchDeltaSize($)
+{
+    my ($binaryChunk) = @_;
+    
+    # Source and destination buffer sizes are stored in 7-bit chunks at the
+    # start of the binary delta patch data.  The highest bit in each byte
+    # except the last is set; the remaining 7 bits provide the next
+    # chunk of the size.  The chunks are stored in ascending significance
+    # order.
+    my $cmd;
+    my $size = 0;
+    my $shift = 0;
+    for (my $i = 0; $i < length($binaryChunk);) {
+        $cmd = readByte($binaryChunk, $i++);
+        $size |= ($cmd & 0x7f) << $shift;
+        $shift += 7;
+        if (!($cmd & 0x80)) {
+            return ($size, $i);
+        }
+    }
+}
+
+sub applyGitBinaryPatchDelta($$)
+{
+    my ($binaryChunk, $originalContents) = @_;
+    
+    # Git delta format consists of two headers indicating source buffer size
+    # and result size, then a series of commands.  Each command is either
+    # a copy-from-old-version (the 0x80 bit is set) or a copy-from-delta
+    # command.  Commands are applied sequentially to generate the result.
+    #
+    # A copy-from-old-version command encodes an offset and size to copy
+    # from in subsequent bits, while a copy-from-delta command consists only
+    # of the number of bytes to copy from the delta.
+
+    # We don't use these values, but we need to know how big they are so that
+    # we can skip to the diff data.
+    my ($size, $bytesUsed) = decodeGitBinaryPatchDeltaSize($binaryChunk);
+    $binaryChunk = substr($binaryChunk, $bytesUsed);
+    ($size, $bytesUsed) = decodeGitBinaryPatchDeltaSize($binaryChunk);
+    $binaryChunk = substr($binaryChunk, $bytesUsed);
+
+    my $out = "";
+    for (my $i = 0; $i < length($binaryChunk); ) {
+        my $cmd = ord(substr($binaryChunk, $i++, 1));
+        if ($cmd & 0x80) {
+            # Extract an offset and size from the delta data, then copy
+            # $size bytes from $offset in the original data into the output.
+            my $offset = 0;
+            my $size = 0;
+            if ($cmd & 0x01) { $offset = readByte($binaryChunk, $i++); }
+            if ($cmd & 0x02) { $offset |= readByte($binaryChunk, $i++) << 8; }
+            if ($cmd & 0x04) { $offset |= readByte($binaryChunk, $i++) << 16; }
+            if ($cmd & 0x08) { $offset |= readByte($binaryChunk, $i++) << 24; }
+            if ($cmd & 0x10) { $size = readByte($binaryChunk, $i++); }
+            if ($cmd & 0x20) { $size |= readByte($binaryChunk, $i++) << 8; }
+            if ($cmd & 0x40) { $size |= readByte($binaryChunk, $i++) << 16; }
+            if ($size == 0) { $size = 0x10000; }
+            $out .= substr($originalContents, $offset, $size);
+        } elsif ($cmd) {
+            # Copy $cmd bytes from the delta data into the output.
+            $out .= substr($binaryChunk, $i, $cmd);
+            $i += $cmd;
+        } else {
+            die "unexpected delta opcode 0";
+        }
+    }
+
+    return $out;
+}
+
 1;