From 4bc9b74c14cd2d37de12295cc6ebfd90274e222b Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Mon, 28 Sep 2015 16:24:50 -0400 Subject: [PATCH 1/5] GITERR_CHECK_ALLOC_ADDn: multi-arg adders --- src/common.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/common.h b/src/common.h index 6dca36fbd..7170df91a 100644 --- a/src/common.h +++ b/src/common.h @@ -208,6 +208,15 @@ GIT_INLINE(void) git__init_structure(void *structure, size_t len, unsigned int v #define GITERR_CHECK_ALLOC_ADD(out, one, two) \ if (GIT_ADD_SIZET_OVERFLOW(out, one, two)) { return -1; } +#define GITERR_CHECK_ALLOC_ADD3(out, one, two, three) \ + if (GIT_ADD_SIZET_OVERFLOW(out, one, two) || \ + GIT_ADD_SIZET_OVERFLOW(out, *(out), three)) { return -1; } + +#define GITERR_CHECK_ALLOC_ADD4(out, one, two, three, four) \ + if (GIT_ADD_SIZET_OVERFLOW(out, one, two) || \ + GIT_ADD_SIZET_OVERFLOW(out, *(out), three) || \ + GIT_ADD_SIZET_OVERFLOW(out, *(out), four)) { return -1; } + /** Check for multiplicative overflow, failing if it would occur. */ #define GITERR_CHECK_ALLOC_MULTIPLY(out, nelem, elsize) \ if (GIT_MULTIPLY_SIZET_OVERFLOW(out, nelem, elsize)) { return -1; } From 46c0e6e3c169f8d76ed739444bc51c4b3f74969b Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Mon, 28 Sep 2015 16:34:29 -0400 Subject: [PATCH 2/5] xdiff: convert size variables to size_t --- src/xdiff/xdiffi.c | 12 ++-- src/xdiff/xhistogram.c | 6 +- src/xdiff/xmerge.c | 142 ++++++++++++++++++++++++++++------------- 3 files changed, 110 insertions(+), 50 deletions(-) diff --git a/src/xdiff/xdiffi.c b/src/xdiff/xdiffi.c index 0620e5fff..f4d01b48c 100644 --- a/src/xdiff/xdiffi.c +++ b/src/xdiff/xdiffi.c @@ -21,7 +21,8 @@ */ #include "xinclude.h" - +#include "common.h" +#include "integer.h" #define XDL_MAX_COST_MIN 256 @@ -323,7 +324,7 @@ int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1, int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, xdfenv_t *xe) { - long ndiags; + size_t ndiags, allocsize; long *kvd, *kvdf, *kvdb; xdalgoenv_t xenv; diffdata_t dd1, dd2; @@ -343,9 +344,12 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, * Allocate and setup K vectors to be used by the differential algorithm. * One is to store the forward path and one to store the backward path. */ - ndiags = xe->xdf1.nreff + xe->xdf2.nreff + 3; - if (!(kvd = (long *) xdl_malloc((2 * ndiags + 2) * sizeof(long)))) { + GITERR_CHECK_ALLOC_ADD3(&ndiags, xe->xdf1.nreff, xe->xdf2.nreff, 3); + GITERR_CHECK_ALLOC_MULTIPLY(&allocsize, ndiags, 2); + GITERR_CHECK_ALLOC_ADD(&allocsize, allocsize, 2); + GITERR_CHECK_ALLOC_MULTIPLY(&allocsize, allocsize, sizeof(long)); + if (!(kvd = (long *) xdl_malloc(allocsize))) { xdl_free_env(xe); return -1; } diff --git a/src/xdiff/xhistogram.c b/src/xdiff/xhistogram.c index 500d8112a..0c2edb89c 100644 --- a/src/xdiff/xhistogram.c +++ b/src/xdiff/xhistogram.c @@ -44,6 +44,7 @@ #include "xinclude.h" #include "xtypes.h" #include "xdiff.h" +#include "common.h" #define MAX_PTR UINT_MAX #define MAX_CNT UINT_MAX @@ -271,7 +272,7 @@ static int histogram_diff( { struct histindex index; struct region lcs; - unsigned int sz; + size_t sz; int result = -1; if (count1 <= 0 && count2 <= 0) @@ -302,7 +303,8 @@ static int histogram_diff( index.table_bits = xdl_hashbits(count1); sz = index.records_size = 1 << index.table_bits; - sz *= sizeof(struct record *); + GITERR_CHECK_ALLOC_MULTIPLY(&sz, sz, sizeof(struct record *)); + if (!(index.records = (struct record **) xdl_malloc(sz))) goto cleanup; memset(index.records, 0, sz); diff --git a/src/xdiff/xmerge.c b/src/xdiff/xmerge.c index b11e59817..7b7e0e2d3 100644 --- a/src/xdiff/xmerge.c +++ b/src/xdiff/xmerge.c @@ -21,6 +21,7 @@ */ #include "xinclude.h" +#include "common.h" typedef struct s_xdmerge { struct s_xdmerge *next; @@ -109,59 +110,74 @@ static int xdl_merge_cmp_lines(xdfenv_t *xe1, int i1, xdfenv_t *xe2, int i2, return 0; } -static int xdl_recs_copy_0(int use_orig, xdfenv_t *xe, int i, int count, int add_nl, char *dest) +static int xdl_recs_copy_0(size_t *out, int use_orig, xdfenv_t *xe, int i, int count, int add_nl, char *dest) { xrecord_t **recs; - int size = 0; + size_t size = 0; + + *out = 0; recs = (use_orig ? xe->xdf1.recs : xe->xdf2.recs) + i; if (count < 1) return 0; - for (i = 0; i < count; size += recs[i++]->size) + for (i = 0; i < count; ) { if (dest) memcpy(dest + size, recs[i]->ptr, recs[i]->size); + + GITERR_CHECK_ALLOC_ADD(&size, size, recs[i++]->size); + } + if (add_nl) { i = recs[count - 1]->size; if (i == 0 || recs[count - 1]->ptr[i - 1] != '\n') { if (dest) dest[size] = '\n'; - size++; + + GITERR_CHECK_ALLOC_ADD(&size, size, 1); } } - return size; + + *out = size; + return 0; } -static int xdl_recs_copy(xdfenv_t *xe, int i, int count, int add_nl, char *dest) +static int xdl_recs_copy(size_t *out, xdfenv_t *xe, int i, int count, int add_nl, char *dest) { - return xdl_recs_copy_0(0, xe, i, count, add_nl, dest); + return xdl_recs_copy_0(out, 0, xe, i, count, add_nl, dest); } -static int xdl_orig_copy(xdfenv_t *xe, int i, int count, int add_nl, char *dest) +static int xdl_orig_copy(size_t *out, xdfenv_t *xe, int i, int count, int add_nl, char *dest) { - return xdl_recs_copy_0(1, xe, i, count, add_nl, dest); + return xdl_recs_copy_0(out, 1, xe, i, count, add_nl, dest); } -static int fill_conflict_hunk(xdfenv_t *xe1, const char *name1, +static int fill_conflict_hunk(size_t *out, xdfenv_t *xe1, const char *name1, xdfenv_t *xe2, const char *name2, const char *name3, - int size, int i, int style, + size_t size, int i, int style, xdmerge_t *m, char *dest, int marker_size) { int marker1_size = (name1 ? (int)strlen(name1) + 1 : 0); int marker2_size = (name2 ? (int)strlen(name2) + 1 : 0); int marker3_size = (name3 ? (int)strlen(name3) + 1 : 0); + size_t copied; + + *out = 0; if (marker_size <= 0) marker_size = DEFAULT_CONFLICT_MARKER_SIZE; /* Before conflicting part */ - size += xdl_recs_copy(xe1, i, m->i1 - i, 0, - dest ? dest + size : NULL); + if (xdl_recs_copy(&copied, xe1, i, m->i1 - i, 0, + dest ? dest + size : NULL) < 0) + return -1; + + GITERR_CHECK_ALLOC_ADD(&size, size, copied); if (!dest) { - size += marker_size + 1 + marker1_size; + GITERR_CHECK_ALLOC_ADD4(&size, size, marker_size, 1, marker1_size); } else { memset(dest + size, '<', marker_size); size += marker_size; @@ -174,13 +190,16 @@ static int fill_conflict_hunk(xdfenv_t *xe1, const char *name1, } /* Postimage from side #1 */ - size += xdl_recs_copy(xe1, m->i1, m->chg1, 1, - dest ? dest + size : NULL); + if (xdl_recs_copy(&copied, xe1, m->i1, m->chg1, 1, + dest ? dest + size : NULL) < 0) + return -1; + + GITERR_CHECK_ALLOC_ADD(&size, size, copied); if (style == XDL_MERGE_DIFF3) { /* Shared preimage */ if (!dest) { - size += marker_size + 1 + marker3_size; + GITERR_CHECK_ALLOC_ADD4(&size, size, marker_size, 1, marker3_size); } else { memset(dest + size, '|', marker_size); size += marker_size; @@ -191,12 +210,15 @@ static int fill_conflict_hunk(xdfenv_t *xe1, const char *name1, } dest[size++] = '\n'; } - size += xdl_orig_copy(xe1, m->i0, m->chg0, 1, - dest ? dest + size : NULL); + + if (xdl_orig_copy(&copied, xe1, m->i0, m->chg0, 1, + dest ? dest + size : NULL) < 0) + return -1; + GITERR_CHECK_ALLOC_ADD(&size, size, copied); } if (!dest) { - size += marker_size + 1; + GITERR_CHECK_ALLOC_ADD3(&size, size, marker_size, 1); } else { memset(dest + size, '=', marker_size); size += marker_size; @@ -204,10 +226,14 @@ static int fill_conflict_hunk(xdfenv_t *xe1, const char *name1, } /* Postimage from side #2 */ - size += xdl_recs_copy(xe2, m->i2, m->chg2, 1, - dest ? dest + size : NULL); + + if (xdl_recs_copy(&copied, xe2, m->i2, m->chg2, 1, + dest ? dest + size : NULL) < 0) + return -1; + GITERR_CHECK_ALLOC_ADD(&size, size, copied); + if (!dest) { - size += marker_size + 1 + marker2_size; + GITERR_CHECK_ALLOC_ADD4(&size, size, marker_size, 1, marker2_size); } else { memset(dest + size, '>', marker_size); size += marker_size; @@ -218,46 +244,69 @@ static int fill_conflict_hunk(xdfenv_t *xe1, const char *name1, } dest[size++] = '\n'; } - return size; + + *out = size; + return 0; } -static int xdl_fill_merge_buffer(xdfenv_t *xe1, const char *name1, +static int xdl_fill_merge_buffer(size_t *out, + xdfenv_t *xe1, const char *name1, xdfenv_t *xe2, const char *name2, const char *ancestor_name, int favor, xdmerge_t *m, char *dest, int style, int marker_size) { - int size, i; + size_t size, copied; + int i; + + *out = 0; for (size = i = 0; m; m = m->next) { if (favor && !m->mode) m->mode = favor; - if (m->mode == 0) - size = fill_conflict_hunk(xe1, name1, xe2, name2, + if (m->mode == 0) { + if (fill_conflict_hunk(&size, xe1, name1, xe2, name2, ancestor_name, size, i, style, m, dest, - marker_size); + marker_size) < 0) + return -1; + } else if (m->mode & 3) { /* Before conflicting part */ - size += xdl_recs_copy(xe1, i, m->i1 - i, 0, - dest ? dest + size : NULL); + if (xdl_recs_copy(&copied, xe1, i, m->i1 - i, 0, + dest ? dest + size : NULL) < 0) + return -1; + GITERR_CHECK_ALLOC_ADD(&size, size, copied); + /* Postimage from side #1 */ - if (m->mode & 1) - size += xdl_recs_copy(xe1, m->i1, m->chg1, (m->mode & 2), - dest ? dest + size : NULL); + if (m->mode & 1) { + if (xdl_recs_copy(&copied, xe1, m->i1, m->chg1, (m->mode & 2), + dest ? dest + size : NULL) < 0) + return -1; + GITERR_CHECK_ALLOC_ADD(&size, size, copied); + } + /* Postimage from side #2 */ - if (m->mode & 2) - size += xdl_recs_copy(xe2, m->i2, m->chg2, 0, - dest ? dest + size : NULL); + if (m->mode & 2) { + if (xdl_recs_copy(&copied, xe2, m->i2, m->chg2, 0, + dest ? dest + size : NULL) < 0) + return -1; + GITERR_CHECK_ALLOC_ADD(&size, size, copied); + } } else continue; i = m->i1 + m->chg1; } - size += xdl_recs_copy(xe1, i, xe1->xdf2.nrec - i, 0, - dest ? dest + size : NULL); - return size; + + if (xdl_recs_copy(&copied, xe1, i, xe1->xdf2.nrec - i, 0, + dest ? dest + size : NULL) < 0) + return -1; + GITERR_CHECK_ALLOC_ADD(&size, size, copied); + + *out = size; + return 0; } /* @@ -551,19 +600,24 @@ static int xdl_do_merge(xdfenv_t *xe1, xdchange_t *xscr1, /* output */ if (result) { int marker_size = xmp->marker_size; - int size = xdl_fill_merge_buffer(xe1, name1, xe2, name2, + size_t size; + + if (xdl_fill_merge_buffer(&size, xe1, name1, xe2, name2, ancestor_name, favor, changes, NULL, style, - marker_size); + marker_size) < 0) + return -1; + result->ptr = xdl_malloc(size); if (!result->ptr) { xdl_cleanup_merge(changes); return -1; } result->size = size; - xdl_fill_merge_buffer(xe1, name1, xe2, name2, + if (xdl_fill_merge_buffer(&size, xe1, name1, xe2, name2, ancestor_name, favor, changes, - result->ptr, style, marker_size); + result->ptr, style, marker_size) < 0) + return -1; } return xdl_cleanup_merge(changes); } From e43520660c6dba6470af28ce3be822be401f5788 Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Mon, 28 Sep 2015 18:25:24 -0400 Subject: [PATCH 3/5] merge_file: treat large files as binary xdiff craps the bed on large files. Treat very large files as binary, so that it doesn't even have to try. Refactor our merge binary handling to better match git.git, which looks for a NUL in the first 8000 bytes. --- src/merge.c | 55 -------------------------- src/merge_file.c | 96 +++++++++++++++++++++++++++++++++++---------- src/merge_file.h | 5 +-- src/xdiff/xdiff.h | 8 ++-- tests/merge/files.c | 88 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 170 insertions(+), 82 deletions(-) diff --git a/src/merge.c b/src/merge.c index fc5088c82..89b8e8505 100644 --- a/src/merge.c +++ b/src/merge.c @@ -61,12 +61,6 @@ struct merge_diff_df_data { git_merge_diff *prev_conflict; }; -GIT_INLINE(int) merge_diff_detect_binary( - bool *binary_out, - git_repository *repo, - const git_merge_diff *conflict); - - /* Merge base computation */ int merge_bases_many(git_commit_list **out, git_revwalk **walk_out, git_repository *repo, size_t length, const git_oid input_array[]) @@ -668,7 +662,6 @@ static int merge_conflict_resolve_automerge( git_odb *odb = NULL; git_oid automerge_oid; int error = 0; - bool binary = false; assert(resolved && diff_list && conflict); @@ -703,12 +696,6 @@ static int merge_conflict_resolve_automerge( strcmp(conflict->ancestor_entry.path, conflict->their_entry.path) != 0) return 0; - /* Reject binary conflicts */ - if ((error = merge_diff_detect_binary(&binary, diff_list->repo, conflict)) < 0) - return error; - if (binary) - return 0; - ancestor = GIT_MERGE_INDEX_ENTRY_EXISTS(conflict->ancestor_entry) ? &conflict->ancestor_entry : NULL; ours = GIT_MERGE_INDEX_ENTRY_EXISTS(conflict->our_entry) ? @@ -1314,48 +1301,6 @@ GIT_INLINE(int) merge_diff_detect_type( return 0; } -GIT_INLINE(int) merge_diff_detect_binary( - bool *binary_out, - git_repository *repo, - const git_merge_diff *conflict) -{ - git_blob *ancestor_blob = NULL, *our_blob = NULL, *their_blob = NULL; - int error = 0; - bool binary = false; - - if (GIT_MERGE_INDEX_ENTRY_ISFILE(conflict->ancestor_entry)) { - if ((error = git_blob_lookup(&ancestor_blob, repo, &conflict->ancestor_entry.id)) < 0) - goto done; - - binary = git_blob_is_binary(ancestor_blob); - } - - if (!binary && - GIT_MERGE_INDEX_ENTRY_ISFILE(conflict->our_entry)) { - if ((error = git_blob_lookup(&our_blob, repo, &conflict->our_entry.id)) < 0) - goto done; - - binary = git_blob_is_binary(our_blob); - } - - if (!binary && - GIT_MERGE_INDEX_ENTRY_ISFILE(conflict->their_entry)) { - if ((error = git_blob_lookup(&their_blob, repo, &conflict->their_entry.id)) < 0) - goto done; - - binary = git_blob_is_binary(their_blob); - } - - *binary_out = binary; - -done: - git_blob_free(ancestor_blob); - git_blob_free(our_blob); - git_blob_free(their_blob); - - return error; -} - GIT_INLINE(int) index_entry_dup_pool( git_index_entry *out, git_pool *pool, diff --git a/src/merge_file.c b/src/merge_file.c index 6d89b089d..b174231cc 100644 --- a/src/merge_file.c +++ b/src/merge_file.c @@ -15,9 +15,15 @@ #include "git2/repository.h" #include "git2/object.h" #include "git2/index.h" +#include "git2/merge.h" #include "xdiff/xdiff.h" +/* only examine the first 8000 bytes for binaryness. + * https://github.com/git/git/blob/77bd3ea9f54f1584147b594abc04c26ca516d987/xdiff-interface.c#L197 + */ +#define GIT_MERGE_FILE_BINARY_SIZE 8000 + #define GIT_MERGE_FILE_SIDE_EXISTS(X) ((X)->mode != 0) GIT_INLINE(const char *) merge_file_best_path( @@ -100,7 +106,7 @@ static void merge_file_normalize_opts( } } -static int git_merge_file__from_inputs( +static int merge_file__xdiff( git_merge_file_result *out, const git_merge_file_input *ancestor, const git_merge_file_input *ours, @@ -189,6 +195,63 @@ done: return error; } +static bool merge_file__is_binary(const git_merge_file_input *file) +{ + size_t len = file ? file->size : 0; + + if (len > GIT_MERGE_FILE_XDIFF_MAX) + return true; + if (len > GIT_MERGE_FILE_BINARY_SIZE) + len = GIT_MERGE_FILE_BINARY_SIZE; + + return len ? (memchr(file->ptr, 0, len) != NULL) : false; +} + +static int merge_file__binary( + git_merge_file_result *out, + const git_merge_file_input *ours, + const git_merge_file_input *theirs, + const git_merge_file_options *given_opts) +{ + const git_merge_file_input *favored = NULL; + + memset(out, 0x0, sizeof(git_merge_file_result)); + + if (given_opts && given_opts->favor == GIT_MERGE_FILE_FAVOR_OURS) + favored = ours; + else if (given_opts && given_opts->favor == GIT_MERGE_FILE_FAVOR_THEIRS) + favored = theirs; + else + goto done; + + if ((out->path = git__strdup(favored->path)) == NULL || + (out->ptr = git__malloc(favored->size)) == NULL) + goto done; + + memcpy((char *)out->ptr, favored->ptr, favored->size); + out->len = favored->size; + out->mode = favored->mode; + out->automergeable = 1; + +done: + return 0; +} + +static int merge_file__from_inputs( + git_merge_file_result *out, + const git_merge_file_input *ancestor, + const git_merge_file_input *ours, + const git_merge_file_input *theirs, + const git_merge_file_options *given_opts) +{ + if (merge_file__is_binary(ancestor) || + merge_file__is_binary(ours) || + merge_file__is_binary(theirs)) + return merge_file__binary(out, ours, theirs, given_opts); + + return merge_file__xdiff(out, ancestor, ours, theirs, given_opts); +} + static git_merge_file_input *git_merge_file__normalize_inputs( git_merge_file_input *out, const git_merge_file_input *given) @@ -223,7 +286,7 @@ int git_merge_file( ours = git_merge_file__normalize_inputs(&inputs[1], ours); theirs = git_merge_file__normalize_inputs(&inputs[2], theirs); - return git_merge_file__from_inputs(out, ancestor, ours, theirs, options); + return merge_file__from_inputs(out, ancestor, ours, theirs, options); } int git_merge_file_from_index( @@ -234,8 +297,8 @@ int git_merge_file_from_index( const git_index_entry *theirs, const git_merge_file_options *options) { - git_merge_file_input inputs[3] = { {0} }, - *ancestor_input = NULL, *our_input = NULL, *their_input = NULL; + git_merge_file_input *ancestor_ptr = NULL, + ancestor_input = {0}, our_input = {0}, their_input = {0}; git_odb *odb = NULL; git_odb_object *odb_object[3] = { 0 }; int error = 0; @@ -249,27 +312,20 @@ int git_merge_file_from_index( if (ancestor) { if ((error = git_merge_file__input_from_index( - &inputs[0], &odb_object[0], odb, ancestor)) < 0) + &ancestor_input, &odb_object[0], odb, ancestor)) < 0) goto done; - ancestor_input = &inputs[0]; + ancestor_ptr = &ancestor_input; } if ((error = git_merge_file__input_from_index( - &inputs[1], &odb_object[1], odb, ours)) < 0) + &our_input, &odb_object[1], odb, ours)) < 0 || + (error = git_merge_file__input_from_index( + &their_input, &odb_object[2], odb, theirs)) < 0) goto done; - our_input = &inputs[1]; - - if ((error = git_merge_file__input_from_index( - &inputs[2], &odb_object[2], odb, theirs)) < 0) - goto done; - - their_input = &inputs[2]; - - if ((error = git_merge_file__from_inputs(out, - ancestor_input, our_input, their_input, options)) < 0) - goto done; + error = merge_file__from_inputs(out, + ancestor_ptr, &our_input, &their_input, options); done: git_odb_object_free(odb_object[0]); @@ -286,7 +342,5 @@ void git_merge_file_result_free(git_merge_file_result *result) return; git__free((char *)result->path); - - /* xdiff uses malloc() not git_malloc, so we use free(), not git_free() */ - free((char *)result->ptr); + git__free((char *)result->ptr); } diff --git a/src/merge_file.h b/src/merge_file.h index 263391ee3..08ecfc095 100644 --- a/src/merge_file.h +++ b/src/merge_file.h @@ -7,8 +7,7 @@ #ifndef INCLUDE_filediff_h__ #define INCLUDE_filediff_h__ -#include "xdiff/xdiff.h" - -#include "git2/merge.h" +/* xdiff cannot cope with large files, just treat them as binary */ +#define GIT_MERGE_FILE_XDIFF_MAX (1024UL * 1024 * 1023) #endif diff --git a/src/xdiff/xdiff.h b/src/xdiff/xdiff.h index e2f1e892b..db5d59884 100644 --- a/src/xdiff/xdiff.h +++ b/src/xdiff/xdiff.h @@ -20,6 +20,8 @@ * */ +#include "util.h" + #if !defined(XDIFF_H) #define XDIFF_H @@ -106,9 +108,9 @@ typedef struct s_bdiffparam { } bdiffparam_t; -#define xdl_malloc(x) malloc(x) -#define xdl_free(ptr) free(ptr) -#define xdl_realloc(ptr,x) realloc(ptr,x) +#define xdl_malloc(x) git__malloc(x) +#define xdl_free(ptr) git__free(ptr) +#define xdl_realloc(ptr,x) git__realloc(ptr,x) void *xdl_mmfile_first(mmfile_t *mmf, long *size); long xdl_mmfile_size(mmfile_t *mmf); diff --git a/tests/merge/files.c b/tests/merge/files.c index 2fd90d066..b365d5a42 100644 --- a/tests/merge/files.c +++ b/tests/merge/files.c @@ -3,6 +3,7 @@ #include "git2/merge.h" #include "buffer.h" #include "merge.h" +#include "merge_file.h" #include "merge_helpers.h" #include "refs.h" #include "fileops.h" @@ -288,3 +289,90 @@ void test_merge_files__doesnt_add_newline(void) git_merge_file_result_free(&result); } +void test_merge_files__skips_large_files(void) +{ + git_merge_file_input ours = GIT_MERGE_FILE_INPUT_INIT, + theirs = GIT_MERGE_FILE_INPUT_INIT; + git_merge_file_options opts = GIT_MERGE_FILE_OPTIONS_INIT; + git_merge_file_result result = {0}; + + ours.size = GIT_MERGE_FILE_XDIFF_MAX + 1; + ours.path = "testfile.txt"; + ours.mode = 0100755; + + theirs.size = GIT_MERGE_FILE_XDIFF_MAX + 1; + theirs.path = "testfile.txt"; + theirs.mode = 0100755; + + cl_git_pass(git_merge_file(&result, NULL, &ours, &theirs, &opts)); + + cl_assert_equal_i(0, result.automergeable); + + git_merge_file_result_free(&result); +} + +void test_merge_files__skips_binaries(void) +{ + git_merge_file_input ancestor = GIT_MERGE_FILE_INPUT_INIT, + ours = GIT_MERGE_FILE_INPUT_INIT, + theirs = GIT_MERGE_FILE_INPUT_INIT; + git_merge_file_result result = {0}; + + ancestor.ptr = "ance\0stor\0"; + ancestor.size = 10; + ancestor.path = "ancestor.txt"; + ancestor.mode = 0100755; + + ours.ptr = "foo\0bar\0"; + ours.size = 8; + ours.path = "ours.txt"; + ours.mode = 0100755; + + theirs.ptr = "bar\0foo\0"; + theirs.size = 8; + theirs.path = "theirs.txt"; + theirs.mode = 0100644; + + cl_git_pass(git_merge_file(&result, &ancestor, &ours, &theirs, NULL)); + + cl_assert_equal_i(0, result.automergeable); + + git_merge_file_result_free(&result); +} + +void test_merge_files__handles_binaries_when_favored(void) +{ + git_merge_file_input ancestor = GIT_MERGE_FILE_INPUT_INIT, + ours = GIT_MERGE_FILE_INPUT_INIT, + theirs = GIT_MERGE_FILE_INPUT_INIT; + git_merge_file_options opts = GIT_MERGE_FILE_OPTIONS_INIT; + git_merge_file_result result = {0}; + + ancestor.ptr = "ance\0stor\0"; + ancestor.size = 10; + ancestor.path = "ancestor.txt"; + ancestor.mode = 0100755; + + ours.ptr = "foo\0bar\0"; + ours.size = 8; + ours.path = "ours.txt"; + ours.mode = 0100755; + + theirs.ptr = "bar\0foo\0"; + theirs.size = 8; + theirs.path = "theirs.txt"; + theirs.mode = 0100644; + + opts.favor = GIT_MERGE_FILE_FAVOR_OURS; + cl_git_pass(git_merge_file(&result, &ancestor, &ours, &theirs, &opts)); + + cl_assert_equal_i(1, result.automergeable); + + cl_assert_equal_s("ours.txt", result.path); + cl_assert_equal_i(0100755, result.mode); + + cl_assert_equal_i(ours.size, result.len); + cl_assert(memcmp(result.ptr, ours.ptr, ours.size) == 0); + + git_merge_file_result_free(&result); +} From 6c014bcc54fb9923490f9af917dc43e3661e5782 Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Tue, 29 Sep 2015 12:18:17 -0400 Subject: [PATCH 4/5] diff: don't feed large files to xdiff --- src/checkout.c | 3 ++- src/diff_patch.c | 4 ++++ src/diff_xdiff.c | 7 +++++++ src/diff_xdiff.h | 5 +++++ src/merge.c | 1 - src/merge_file.c | 4 ++-- src/merge_file.h | 13 ------------- tests/merge/files.c | 6 +++--- 8 files changed, 23 insertions(+), 20 deletions(-) delete mode 100644 src/merge_file.h diff --git a/src/checkout.c b/src/checkout.c index 2a8bfd558..632556622 100644 --- a/src/checkout.c +++ b/src/checkout.c @@ -18,6 +18,7 @@ #include "git2/submodule.h" #include "git2/sys/index.h" #include "git2/sys/filter.h" +#include "git2/merge.h" #include "refs.h" #include "repository.h" @@ -27,7 +28,7 @@ #include "diff.h" #include "pathspec.h" #include "buf_text.h" -#include "merge_file.h" +#include "diff_xdiff.h" #include "path.h" #include "attr.h" #include "pool.h" diff --git a/src/diff_patch.c b/src/diff_patch.c index 0628da6f2..50faa3b3f 100644 --- a/src/diff_patch.c +++ b/src/diff_patch.c @@ -30,6 +30,10 @@ static void diff_patch_update_binary(git_patch *patch) (patch->nfile.file->flags & GIT_DIFF_FLAG_BINARY) != 0) patch->delta->flags |= GIT_DIFF_FLAG_BINARY; + else if (patch->ofile.file->size > GIT_XDIFF_MAX_SIZE || + patch->nfile.file->size > GIT_XDIFF_MAX_SIZE) + patch->delta->flags |= GIT_DIFF_FLAG_BINARY; + else if ((patch->ofile.file->flags & DIFF_FLAGS_NOT_BINARY) != 0 && (patch->nfile.file->flags & DIFF_FLAGS_NOT_BINARY) != 0) patch->delta->flags |= GIT_DIFF_FLAG_NOT_BINARY; diff --git a/src/diff_xdiff.c b/src/diff_xdiff.c index e5984f1c9..1057df3aa 100644 --- a/src/diff_xdiff.c +++ b/src/diff_xdiff.c @@ -4,6 +4,7 @@ * This file is part of libgit2, distributed under the GNU GPL v2 with * a Linking Exception. For full terms see the included COPYING file. */ +#include "git2/errors.h" #include "common.h" #include "diff.h" #include "diff_driver.h" @@ -208,6 +209,12 @@ static int git_xdiff(git_diff_output *output, git_patch *patch) git_patch__old_data(&info.xd_old_data.ptr, &info.xd_old_data.size, patch); git_patch__new_data(&info.xd_new_data.ptr, &info.xd_new_data.size, patch); + if (info.xd_old_data.size > GIT_XDIFF_MAX_SIZE || + info.xd_new_data.size > GIT_XDIFF_MAX_SIZE) { + giterr_set(GITERR_INVALID, "files too large for diff"); + return -1; + } + xdl_diff(&info.xd_old_data, &info.xd_new_data, &xo->params, &xo->config, &xo->callback); diff --git a/src/diff_xdiff.h b/src/diff_xdiff.h index c547b00cf..98e11b2cb 100644 --- a/src/diff_xdiff.h +++ b/src/diff_xdiff.h @@ -11,6 +11,11 @@ #include "diff_patch.h" #include "xdiff/xdiff.h" +/* xdiff cannot cope with large files. these files should not be passed to + * xdiff. callers should treat these large files as binary. + */ +#define GIT_XDIFF_MAX_SIZE (1024LL * 1024 * 1023) + /* A git_xdiff_output is a git_diff_output with extra fields necessary * to use libxdiff. Calling git_xdiff_init() will set the diff_cb field * of the output to use xdiff to generate the diffs. diff --git a/src/merge.c b/src/merge.c index 89b8e8505..930457bdb 100644 --- a/src/merge.c +++ b/src/merge.c @@ -20,7 +20,6 @@ #include "diff.h" #include "checkout.h" #include "tree.h" -#include "merge_file.h" #include "blob.h" #include "oid.h" #include "index.h" diff --git a/src/merge_file.c b/src/merge_file.c index b174231cc..6d4738065 100644 --- a/src/merge_file.c +++ b/src/merge_file.c @@ -7,10 +7,10 @@ #include "common.h" #include "repository.h" -#include "merge_file.h" #include "posix.h" #include "fileops.h" #include "index.h" +#include "diff_xdiff.h" #include "git2/repository.h" #include "git2/object.h" @@ -199,7 +199,7 @@ static bool merge_file__is_binary(const git_merge_file_input *file) { size_t len = file ? file->size : 0; - if (len > GIT_MERGE_FILE_XDIFF_MAX) + if (len > GIT_XDIFF_MAX_SIZE) return true; if (len > GIT_MERGE_FILE_BINARY_SIZE) len = GIT_MERGE_FILE_BINARY_SIZE; diff --git a/src/merge_file.h b/src/merge_file.h deleted file mode 100644 index 08ecfc095..000000000 --- a/src/merge_file.h +++ /dev/null @@ -1,13 +0,0 @@ -/* - * Copyright (C) the libgit2 contributors. All rights reserved. - * - * This file is part of libgit2, distributed under the GNU GPL v2 with - * a Linking Exception. For full terms see the included COPYING file. - */ -#ifndef INCLUDE_filediff_h__ -#define INCLUDE_filediff_h__ - -/* xdiff cannot cope with large files, just treat them as binary */ -#define GIT_MERGE_FILE_XDIFF_MAX (1024UL * 1024 * 1023) - -#endif diff --git a/tests/merge/files.c b/tests/merge/files.c index b365d5a42..2d55df2b2 100644 --- a/tests/merge/files.c +++ b/tests/merge/files.c @@ -3,10 +3,10 @@ #include "git2/merge.h" #include "buffer.h" #include "merge.h" -#include "merge_file.h" #include "merge_helpers.h" #include "refs.h" #include "fileops.h" +#include "diff_xdiff.h" #define TEST_REPO_PATH "merge-resolve" #define TEST_INDEX_PATH TEST_REPO_PATH "/.git/index" @@ -296,11 +296,11 @@ void test_merge_files__skips_large_files(void) git_merge_file_options opts = GIT_MERGE_FILE_OPTIONS_INIT; git_merge_file_result result = {0}; - ours.size = GIT_MERGE_FILE_XDIFF_MAX + 1; + ours.size = GIT_XDIFF_MAX_SIZE + 1; ours.path = "testfile.txt"; ours.mode = 0100755; - theirs.size = GIT_MERGE_FILE_XDIFF_MAX + 1; + theirs.size = GIT_XDIFF_MAX_SIZE + 1; theirs.path = "testfile.txt"; theirs.mode = 0100755; From ae195a71ae898b7d5d61943f24f31d97065a87c6 Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Tue, 29 Sep 2015 12:46:41 -0400 Subject: [PATCH 5/5] blame: guard xdiff calls for large files --- src/blame.c | 2 +- src/blame_git.c | 34 +++++++++++++++++++++++++++------- src/blame_git.h | 2 +- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/src/blame.c b/src/blame.c index a52f9402b..08a90dcfd 100644 --- a/src/blame.c +++ b/src/blame.c @@ -331,7 +331,7 @@ static int blame_internal(git_blame *blame) blame->ent = ent; - git_blame__like_git(blame, blame->options.flags); + error = git_blame__like_git(blame, blame->options.flags); cleanup: for (ent = blame->ent; ent; ) { diff --git a/src/blame_git.c b/src/blame_git.c index f481426aa..67bae2384 100644 --- a/src/blame_git.c +++ b/src/blame_git.c @@ -9,6 +9,7 @@ #include "commit.h" #include "blob.h" #include "xdiff/xinclude.h" +#include "diff_xdiff.h" /* * Origin is refcounted and usually we keep the blob contents to be @@ -351,6 +352,13 @@ static int diff_hunks(mmfile_t file_a, mmfile_t file_b, void *cb_data) ecb.priv = cb_data; trim_common_tail(&file_a, &file_b, 0); + + if (file_a.size > GIT_XDIFF_MAX_SIZE || + file_b.size > GIT_XDIFF_MAX_SIZE) { + giterr_set(GITERR_INVALID, "file too large to blame"); + return -1; + } + return xdl_diff(&file_a, &file_b, &xpp, &xecfg, &ecb); } @@ -379,7 +387,9 @@ static int pass_blame_to_parent( fill_origin_blob(parent, &file_p); fill_origin_blob(target, &file_o); - diff_hunks(file_p, file_o, &d); + if (diff_hunks(file_p, file_o, &d) < 0) + return -1; + /* The reset (i.e. anything after tlno) are the same as the parent */ blame_chunk(blame, d.tlno, d.plno, last_in_target, target, parent); @@ -477,12 +487,13 @@ static void pass_whole_blame(git_blame *blame, } } -static void pass_blame(git_blame *blame, git_blame__origin *origin, uint32_t opt) +static int pass_blame(git_blame *blame, git_blame__origin *origin, uint32_t opt) { git_commit *commit = origin->commit; int i, num_parents; git_blame__origin *sg_buf[16]; git_blame__origin *porigin, **sg_origin = sg_buf; + int ret, error = 0; num_parents = git_commit_parentcount(commit); if (!git_oid_cmp(git_commit_id(commit), &blame->options.oldest_commit)) @@ -540,8 +551,13 @@ static void pass_blame(git_blame *blame, git_blame__origin *origin, uint32_t opt origin_incref(porigin); origin->previous = porigin; } - if (pass_blame_to_parent(blame, origin, porigin)) + + if ((ret = pass_blame_to_parent(blame, origin, porigin)) != 0) { + if (ret < 0) + error = -1; + goto finish; + } } /* TODO: optionally find moves in parents' files */ @@ -554,7 +570,7 @@ finish: origin_decref(sg_origin[i]); if (sg_origin != sg_buf) git__free(sg_origin); - return; + return error; } /* @@ -583,7 +599,7 @@ static void coalesce(git_blame *blame) } } -void git_blame__like_git(git_blame *blame, uint32_t opt) +int git_blame__like_git(git_blame *blame, uint32_t opt) { while (true) { git_blame__entry *ent; @@ -594,11 +610,13 @@ void git_blame__like_git(git_blame *blame, uint32_t opt) if (!ent->guilty) suspect = ent->suspect; if (!suspect) - return; /* all done */ + return 0; /* all done */ /* We'll use this suspect later in the loop, so hold on to it for now. */ origin_incref(suspect); - pass_blame(blame, suspect, opt); + + if (pass_blame(blame, suspect, opt) < 0) + return -1; /* Take responsibility for the remaining entries */ for (ent = blame->ent; ent; ent = ent->next) { @@ -613,6 +631,8 @@ void git_blame__like_git(git_blame *blame, uint32_t opt) } coalesce(blame); + + return 0; } void git_blame__free_entry(git_blame__entry *ent) diff --git a/src/blame_git.h b/src/blame_git.h index 3ec2710b8..1891b0e1f 100644 --- a/src/blame_git.h +++ b/src/blame_git.h @@ -15,6 +15,6 @@ int git_blame__get_origin( git_commit *commit, const char *path); void git_blame__free_entry(git_blame__entry *ent); -void git_blame__like_git(git_blame *sb, uint32_t flags); +int git_blame__like_git(git_blame *sb, uint32_t flags); #endif