From 6a2d2f8aa14462396cbc7d3e408ed28430e212e2 Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Wed, 17 Jun 2015 06:42:20 -0700 Subject: [PATCH] delta: move delta application to delta.c Move the delta application functions into `delta.c`, next to the similar delta creation functions. Make the `git__delta_apply` functions adhere to other naming and parameter style within the library. --- src/delta-apply.c | 166 --------------------------------------------- src/delta-apply.h | 62 ----------------- src/delta.c | 167 ++++++++++++++++++++++++++++++++++++++++++++++ src/delta.h | 51 +++++++++++++- src/odb.c | 2 +- src/odb_loose.c | 2 +- src/odb_pack.c | 2 +- src/pack.c | 7 +- 8 files changed, 223 insertions(+), 236 deletions(-) delete mode 100644 src/delta-apply.c delete mode 100644 src/delta-apply.h diff --git a/src/delta-apply.c b/src/delta-apply.c deleted file mode 100644 index 02ec7b75e..000000000 --- a/src/delta-apply.c +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Copyright (C) the libgit2 contributors. All rights reserved. - * - * This file is part of libgit2, distributed under the GNU GPL v2 with - * a Linking Exception. For full terms see the included COPYING file. - */ -#include "common.h" -#include "git2/odb.h" -#include "delta-apply.h" - -/* - * This file was heavily cribbed from BinaryDelta.java in JGit, which - * itself was heavily cribbed from patch-delta.c in the - * GIT project. The original delta patching code was written by - * Nicolas Pitre . - */ - -static int hdr_sz( - size_t *size, - const unsigned char **delta, - const unsigned char *end) -{ - const unsigned char *d = *delta; - size_t r = 0; - unsigned int c, shift = 0; - - do { - if (d == end) - return -1; - c = *d++; - r |= (c & 0x7f) << shift; - shift += 7; - } while (c & 0x80); - *delta = d; - *size = r; - return 0; -} - -int git__delta_read_header( - const unsigned char *delta, - size_t delta_len, - size_t *base_sz, - size_t *res_sz) -{ - const unsigned char *delta_end = delta + delta_len; - if ((hdr_sz(base_sz, &delta, delta_end) < 0) || - (hdr_sz(res_sz, &delta, delta_end) < 0)) - return -1; - return 0; -} - -#define DELTA_HEADER_BUFFER_LEN 16 -int git__delta_read_header_fromstream(size_t *base_sz, size_t *res_sz, git_packfile_stream *stream) -{ - static const size_t buffer_len = DELTA_HEADER_BUFFER_LEN; - unsigned char buffer[DELTA_HEADER_BUFFER_LEN]; - const unsigned char *delta, *delta_end; - size_t len; - ssize_t read; - - len = read = 0; - while (len < buffer_len) { - read = git_packfile_stream_read(stream, &buffer[len], buffer_len - len); - - if (read == 0) - break; - - if (read == GIT_EBUFS) - continue; - - len += read; - } - - delta = buffer; - delta_end = delta + len; - if ((hdr_sz(base_sz, &delta, delta_end) < 0) || - (hdr_sz(res_sz, &delta, delta_end) < 0)) - return -1; - - return 0; -} - -int git__delta_apply( - git_rawobj *out, - const unsigned char *base, - size_t base_len, - const unsigned char *delta, - size_t delta_len) -{ - const unsigned char *delta_end = delta + delta_len; - size_t base_sz, res_sz, alloc_sz; - unsigned char *res_dp; - - /* Check that the base size matches the data we were given; - * if not we would underflow while accessing data from the - * base object, resulting in data corruption or segfault. - */ - if ((hdr_sz(&base_sz, &delta, delta_end) < 0) || (base_sz != base_len)) { - giterr_set(GITERR_INVALID, "Failed to apply delta. Base size does not match given data"); - return -1; - } - - if (hdr_sz(&res_sz, &delta, delta_end) < 0) { - giterr_set(GITERR_INVALID, "Failed to apply delta. Base size does not match given data"); - return -1; - } - - GITERR_CHECK_ALLOC_ADD(&alloc_sz, res_sz, 1); - res_dp = git__malloc(alloc_sz); - GITERR_CHECK_ALLOC(res_dp); - - res_dp[res_sz] = '\0'; - out->data = res_dp; - out->len = res_sz; - - while (delta < delta_end) { - unsigned char cmd = *delta++; - if (cmd & 0x80) { - /* cmd is a copy instruction; copy from the base. - */ - size_t off = 0, len = 0; - - if (cmd & 0x01) off = *delta++; - if (cmd & 0x02) off |= *delta++ << 8UL; - if (cmd & 0x04) off |= *delta++ << 16UL; - if (cmd & 0x08) off |= *delta++ << 24UL; - - if (cmd & 0x10) len = *delta++; - if (cmd & 0x20) len |= *delta++ << 8UL; - if (cmd & 0x40) len |= *delta++ << 16UL; - if (!len) len = 0x10000; - - if (base_len < off + len || res_sz < len) - goto fail; - memcpy(res_dp, base + off, len); - res_dp += len; - res_sz -= len; - - } else if (cmd) { - /* cmd is a literal insert instruction; copy from - * the delta stream itself. - */ - if (delta_end - delta < cmd || res_sz < cmd) - goto fail; - memcpy(res_dp, delta, cmd); - delta += cmd; - res_dp += cmd; - res_sz -= cmd; - - } else { - /* cmd == 0 is reserved for future encodings. - */ - goto fail; - } - } - - if (delta != delta_end || res_sz) - goto fail; - return 0; - -fail: - git__free(out->data); - out->data = NULL; - giterr_set(GITERR_INVALID, "Failed to apply delta"); - return -1; -} diff --git a/src/delta-apply.h b/src/delta-apply.h deleted file mode 100644 index eeeb78682..000000000 --- a/src/delta-apply.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (C) the libgit2 contributors. All rights reserved. - * - * This file is part of libgit2, distributed under the GNU GPL v2 with - * a Linking Exception. For full terms see the included COPYING file. - */ -#ifndef INCLUDE_delta_apply_h__ -#define INCLUDE_delta_apply_h__ - -#include "odb.h" -#include "pack.h" - -/** - * Apply a git binary delta to recover the original content. - * - * @param out the output buffer to receive the original data. - * Only out->data and out->len are populated, as this is - * the only information available in the delta. - * @param base the base to copy from during copy instructions. - * @param base_len number of bytes available at base. - * @param delta the delta to execute copy/insert instructions from. - * @param delta_len total number of bytes in the delta. - * @return - * - 0 on a successful delta unpack. - * - GIT_ERROR if the delta is corrupt or doesn't match the base. - */ -extern int git__delta_apply( - git_rawobj *out, - const unsigned char *base, - size_t base_len, - const unsigned char *delta, - size_t delta_len); - -/** - * Read the header of a git binary delta. - * - * @param delta the delta to execute copy/insert instructions from. - * @param delta_len total number of bytes in the delta. - * @param base_sz pointer to store the base size field. - * @param res_sz pointer to store the result size field. - * @return - * - 0 on a successful decoding the header. - * - GIT_ERROR if the delta is corrupt. - */ -extern int git__delta_read_header( - const unsigned char *delta, - size_t delta_len, - size_t *base_sz, - size_t *res_sz); - -/** - * Read the header of a git binary delta - * - * This variant reads just enough from the packfile stream to read the - * delta header. - */ -extern int git__delta_read_header_fromstream( - size_t *base_sz, - size_t *res_sz, - git_packfile_stream *stream); - -#endif diff --git a/src/delta.c b/src/delta.c index d72d820d8..8a4c2a104 100644 --- a/src/delta.c +++ b/src/delta.c @@ -441,3 +441,170 @@ git_delta_create( *delta_size = outpos; return out; } + +/* +* Delta application was heavily cribbed from BinaryDelta.java in JGit, which +* itself was heavily cribbed from patch-delta.c in the +* GIT project. The original delta patching code was written by +* Nicolas Pitre . +*/ + +static int hdr_sz( + size_t *size, + const unsigned char **delta, + const unsigned char *end) +{ + const unsigned char *d = *delta; + size_t r = 0; + unsigned int c, shift = 0; + + do { + if (d == end) + return -1; + c = *d++; + r |= (c & 0x7f) << shift; + shift += 7; + } while (c & 0x80); + *delta = d; + *size = r; + return 0; +} + +int git_delta_read_header( + size_t *base_out, + size_t *result_out, + const unsigned char *delta, + size_t delta_len) +{ + const unsigned char *delta_end = delta + delta_len; + if ((hdr_sz(base_out, &delta, delta_end) < 0) || + (hdr_sz(result_out, &delta, delta_end) < 0)) + return -1; + return 0; +} + +#define DELTA_HEADER_BUFFER_LEN 16 +int git_delta_read_header_fromstream( + size_t *base_sz, size_t *res_sz, git_packfile_stream *stream) +{ + static const size_t buffer_len = DELTA_HEADER_BUFFER_LEN; + unsigned char buffer[DELTA_HEADER_BUFFER_LEN]; + const unsigned char *delta, *delta_end; + size_t len; + ssize_t read; + + len = read = 0; + while (len < buffer_len) { + read = git_packfile_stream_read(stream, &buffer[len], buffer_len - len); + + if (read == 0) + break; + + if (read == GIT_EBUFS) + continue; + + len += read; + } + + delta = buffer; + delta_end = delta + len; + if ((hdr_sz(base_sz, &delta, delta_end) < 0) || + (hdr_sz(res_sz, &delta, delta_end) < 0)) + return -1; + + return 0; +} + +int git_delta_apply( + void **out, + size_t *out_len, + const unsigned char *base, + size_t base_len, + const unsigned char *delta, + size_t delta_len) +{ + const unsigned char *delta_end = delta + delta_len; + size_t base_sz, res_sz, alloc_sz; + unsigned char *res_dp; + + *out = NULL; + *out_len = 0; + + /* Check that the base size matches the data we were given; + * if not we would underflow while accessing data from the + * base object, resulting in data corruption or segfault. + */ + if ((hdr_sz(&base_sz, &delta, delta_end) < 0) || (base_sz != base_len)) { + giterr_set(GITERR_INVALID, "Failed to apply delta. Base size does not match given data"); + return -1; + } + + if (hdr_sz(&res_sz, &delta, delta_end) < 0) { + giterr_set(GITERR_INVALID, "Failed to apply delta. Base size does not match given data"); + return -1; + } + + GITERR_CHECK_ALLOC_ADD(&alloc_sz, res_sz, 1); + res_dp = git__malloc(alloc_sz); + GITERR_CHECK_ALLOC(res_dp); + + res_dp[res_sz] = '\0'; + *out = res_dp; + *out_len = res_sz; + + while (delta < delta_end) { + unsigned char cmd = *delta++; + if (cmd & 0x80) { + /* cmd is a copy instruction; copy from the base. + */ + size_t off = 0, len = 0; + + if (cmd & 0x01) off = *delta++; + if (cmd & 0x02) off |= *delta++ << 8UL; + if (cmd & 0x04) off |= *delta++ << 16UL; + if (cmd & 0x08) off |= *delta++ << 24UL; + + if (cmd & 0x10) len = *delta++; + if (cmd & 0x20) len |= *delta++ << 8UL; + if (cmd & 0x40) len |= *delta++ << 16UL; + if (!len) len = 0x10000; + + if (base_len < off + len || res_sz < len) + goto fail; + memcpy(res_dp, base + off, len); + res_dp += len; + res_sz -= len; + + } + else if (cmd) { + /* cmd is a literal insert instruction; copy from + * the delta stream itself. + */ + if (delta_end - delta < cmd || res_sz < cmd) + goto fail; + memcpy(res_dp, delta, cmd); + delta += cmd; + res_dp += cmd; + res_sz -= cmd; + + } + else { + /* cmd == 0 is reserved for future encodings. + */ + goto fail; + } + } + + if (delta != delta_end || res_sz) + goto fail; + return 0; + +fail: + git__free(*out); + + *out = NULL; + *out_len = 0; + + giterr_set(GITERR_INVALID, "Failed to apply delta"); + return -1; +} diff --git a/src/delta.h b/src/delta.h index 4ca327992..d9d1d0fa8 100644 --- a/src/delta.h +++ b/src/delta.h @@ -6,6 +6,7 @@ #define INCLUDE_git_delta_h__ #include "common.h" +#include "pack.h" /* opaque object for delta index */ struct git_delta_index; @@ -19,8 +20,8 @@ struct git_delta_index; * before free_delta_index() is called. The returned pointer must be freed * using free_delta_index(). */ -extern struct git_delta_index * -git_delta_create_index(const void *buf, unsigned long bufsize); +extern struct git_delta_index *git_delta_create_index( + const void *buf, unsigned long bufsize); /* * free_delta_index: free the index created by create_delta_index() @@ -111,4 +112,50 @@ GIT_INLINE(unsigned long) git_delta_get_hdr_size( return size; } +/** +* Apply a git binary delta to recover the original content. +* The caller is responsible for freeing the returned buffer. +* +* @param out the output buffer +* @param out_len the length of the output buffer +* @param base the base to copy from during copy instructions. +* @param base_len number of bytes available at base. +* @param delta the delta to execute copy/insert instructions from. +* @param delta_len total number of bytes in the delta. +* @return 0 on success or an error code +*/ +extern int git_delta_apply( + void **out, + size_t *out_len, + const unsigned char *base, + size_t base_len, + const unsigned char *delta, + size_t delta_len); + +/** +* Read the header of a git binary delta. +* +* @param base_out pointer to store the base size field. +* @param result_out pointer to store the result size field. +* @param delta the delta to execute copy/insert instructions from. +* @param delta_len total number of bytes in the delta. +* @return 0 on success or an error code +*/ +extern int git_delta_read_header( + size_t *base_out, + size_t *result_out, + const unsigned char *delta, + size_t delta_len); + +/** + * Read the header of a git binary delta + * + * This variant reads just enough from the packfile stream to read the + * delta header. + */ +extern int git_delta_read_header_fromstream( + size_t *base_out, + size_t *result_out, + git_packfile_stream *stream); + #endif diff --git a/src/odb.c b/src/odb.c index 890e6e2f8..777e3dc5c 100644 --- a/src/odb.c +++ b/src/odb.c @@ -12,7 +12,7 @@ #include "fileops.h" #include "hash.h" #include "odb.h" -#include "delta-apply.h" +#include "delta.h" #include "filter.h" #include "repository.h" diff --git a/src/odb_loose.c b/src/odb_loose.c index 3c33160d0..228d4c334 100644 --- a/src/odb_loose.c +++ b/src/odb_loose.c @@ -12,7 +12,7 @@ #include "fileops.h" #include "hash.h" #include "odb.h" -#include "delta-apply.h" +#include "delta.h" #include "filebuf.h" #include "git2/odb_backend.h" diff --git a/src/odb_pack.c b/src/odb_pack.c index 5a57864ad..244e12bc3 100644 --- a/src/odb_pack.c +++ b/src/odb_pack.c @@ -13,7 +13,7 @@ #include "fileops.h" #include "hash.h" #include "odb.h" -#include "delta-apply.h" +#include "delta.h" #include "sha1_lookup.h" #include "mwindow.h" #include "pack.h" diff --git a/src/pack.c b/src/pack.c index 6a700e29f..310f00fa3 100644 --- a/src/pack.c +++ b/src/pack.c @@ -8,7 +8,7 @@ #include "common.h" #include "odb.h" #include "pack.h" -#include "delta-apply.h" +#include "delta.h" #include "sha1_lookup.h" #include "mwindow.h" #include "fileops.h" @@ -505,7 +505,7 @@ int git_packfile_resolve_header( git_mwindow_close(&w_curs); if ((error = git_packfile_stream_open(&stream, p, curpos)) < 0) return error; - error = git__delta_read_header_fromstream(&base_size, size_p, &stream); + error = git_delta_read_header_fromstream(&base_size, size_p, &stream); git_packfile_stream_free(&stream); if (error < 0) return error; @@ -730,8 +730,9 @@ int git_packfile_unpack( obj->len = 0; obj->type = GIT_OBJ_BAD; - error = git__delta_apply(obj, base.data, base.len, delta.data, delta.len); + error = git_delta_apply(&obj->data, &obj->len, base.data, base.len, delta.data, delta.len); obj->type = base_type; + /* * We usually don't want to free the base at this * point, as we put it into the cache in the previous