From c23841c833ab35d82b32553d0659e815859e6c01 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Sat, 3 Jan 2009 04:21:30 -0800 Subject: [PATCH] Add the binary delta apply algorithm for pack style deltas The git__delta_apply() function can be used to apply a Git style delta, such as those used in pack files or in git patch files, to recover the original object stream. Signed-off-by: Shawn O. Pearce --- src/delta-apply.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++ src/delta-apply.h | 25 +++++++++++ 2 files changed, 129 insertions(+) create mode 100644 src/delta-apply.c create mode 100644 src/delta-apply.h diff --git a/src/delta-apply.c b/src/delta-apply.c new file mode 100644 index 000000000..f92446126 --- /dev/null +++ b/src/delta-apply.c @@ -0,0 +1,104 @@ +#include "common.h" +#include "git/odb.h" + +/* + * This file was heavily cribbed from BinaryDelta.java in JGit, which + * itself was heavily cribbed from patch-delta.c in the + * GIT project. The original delta patching code was written by + * Nicolas Pitre . + */ + +static size_t hdr_sz( + const unsigned char **delta, + const unsigned char *end) +{ + const unsigned char *d = *delta; + size_t r = 0; + unsigned int c, shift = 0; + + do { + if (d == end) + return -1; + c = *d++; + r |= (c & 0x7f) << shift; + shift += 7; + } while (c & 0x80); + *delta = d; + return r; +} + +int git__delta_apply( + git_obj *out, + const unsigned char *base, + size_t base_len, + const unsigned char *delta, + size_t delta_len) +{ + const unsigned char *delta_end = delta + delta_len; + size_t res_sz; + unsigned char *res_dp; + + /* Check that the base size matches the data we were given; + * if not we would underflow while accessing data from the + * base object, resulting in data corruption or segfault. + */ + if (base_len != hdr_sz(&delta, delta_end)) + return GIT_ERROR; + + res_sz = hdr_sz(&delta, delta_end); + if (!(res_dp = git__malloc(res_sz + 1))) + return GIT_ERROR; + res_dp[res_sz] = '\0'; + out->data = res_dp; + out->len = res_sz; + + while (delta < delta_end) { + unsigned char cmd = *delta++; + if (cmd & 0x80) { + /* cmd is a copy instruction; copy from the base. + */ + size_t off = 0, len = 0; + + if (cmd & 0x01) off = *delta++; + if (cmd & 0x02) off |= *delta++ << 8; + if (cmd & 0x04) off |= *delta++ << 16; + if (cmd & 0x08) off |= *delta++ << 24; + + if (cmd & 0x10) len = *delta++; + if (cmd & 0x20) len |= *delta++ << 8; + if (cmd & 0x40) len |= *delta++ << 16; + if (!len) len = 0x10000; + + if (base_len < off + len || res_sz < len) + goto fail; + memcpy(res_dp, base + off, len); + res_dp += len; + res_sz -= len; + + } else if (cmd) { + /* cmd is a literal insert instruction; copy from + * the delta stream itself. + */ + if (delta_end - delta < cmd || res_sz < cmd) + goto fail; + memcpy(res_dp, delta, cmd); + delta += cmd; + res_dp += cmd; + res_sz -= cmd; + + } else { + /* cmd == 0 is reserved for future encodings. + */ + goto fail; + } + } + + if (delta != delta_end || res_sz) + goto fail; + return GIT_SUCCESS; + +fail: + free(out->data); + out->data = NULL; + return GIT_ERROR; +} diff --git a/src/delta-apply.h b/src/delta-apply.h new file mode 100644 index 000000000..498bccdfe --- /dev/null +++ b/src/delta-apply.h @@ -0,0 +1,25 @@ +#ifndef INCLUDE_delta_apply_h__ +#define INCLUDE_delta_apply_h__ + +/** + * Apply a git binary delta to recover the original content. + * + * @param out the output buffer to receive the original data. + * Only out->data and out->len are populated, as this is + * the only information available in the delta. + * @param base the base to copy from during copy instructions. + * @param base_len number of bytes available at base. + * @param delta the delta to execute copy/insert instructions from. + * @param delta_len total number of bytes in the delta. + * @return + * - GIT_SUCCESS on a successful delta unpack. + * - GIT_ERROR if the delta is corrupt or doesn't match the base. + */ +extern int git__delta_apply( + git_obj *out, + const unsigned char *base, + size_t base_len, + const unsigned char *delta, + size_t delta_len); + +#endif