From 41019152a03815be27139764ceed05fd5a0e4b58 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 14 Mar 2017 10:01:56 +0100 Subject: [PATCH 1/5] patch_generate: remove duplicated logic Under the existing logic, we try to load patch contents differently, depending on whether the patch files stem from the working directory or not. But actually, the executed code paths are completely equal to each other -- so we were always the code despite the condition. Remove the condition altogether and conflate both code paths. --- src/patch_generate.c | 37 ++++++++----------------------------- 1 file changed, 8 insertions(+), 29 deletions(-) diff --git a/src/patch_generate.c b/src/patch_generate.c index ab68f5801..d986fd3ab 100644 --- a/src/patch_generate.c +++ b/src/patch_generate.c @@ -206,35 +206,14 @@ static int patch_generated_load(git_patch_generated *patch, git_patch_generated_ ((patch->nfile.flags & GIT_DIFF_FLAG__NO_DATA) != 0 || (patch->nfile.file->flags & GIT_DIFF_FLAG_VALID_ID) != 0)); - /* always try to load workdir content first because filtering may - * need 2x data size and this minimizes peak memory footprint - */ - if (patch->ofile.src == GIT_ITERATOR_TYPE_WORKDIR) { - if ((error = git_diff_file_content__load( - &patch->ofile, &patch->base.diff_opts)) < 0 || - should_skip_binary(patch, patch->ofile.file)) - goto cleanup; - } - if (patch->nfile.src == GIT_ITERATOR_TYPE_WORKDIR) { - if ((error = git_diff_file_content__load( - &patch->nfile, &patch->base.diff_opts)) < 0 || - should_skip_binary(patch, patch->nfile.file)) - goto cleanup; - } - - /* once workdir has been tried, load other data as needed */ - if (patch->ofile.src != GIT_ITERATOR_TYPE_WORKDIR) { - if ((error = git_diff_file_content__load( - &patch->ofile, &patch->base.diff_opts)) < 0 || - should_skip_binary(patch, patch->ofile.file)) - goto cleanup; - } - if (patch->nfile.src != GIT_ITERATOR_TYPE_WORKDIR) { - if ((error = git_diff_file_content__load( - &patch->nfile, &patch->base.diff_opts)) < 0 || - should_skip_binary(patch, patch->nfile.file)) - goto cleanup; - } + if ((error = git_diff_file_content__load( + &patch->ofile, &patch->base.diff_opts)) < 0 || + should_skip_binary(patch, patch->ofile.file)) + goto cleanup; + if ((error = git_diff_file_content__load( + &patch->nfile, &patch->base.diff_opts)) < 0 || + should_skip_binary(patch, patch->nfile.file)) + goto cleanup; /* if previously missing an oid, and now that we have it the two sides * are the same (and not submodules), update MODIFIED -> UNMODIFIED From ace3508f4c2ad7f821bc79d0b34e7ac55fac4d12 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 14 Mar 2017 10:37:47 +0100 Subject: [PATCH 2/5] patch_generate: fix `git_diff_foreach` only working with generated diffs The current logic of `git_diff_foreach` makes the assumption that all diffs passed in are actually derived from generated diffs. With these assumptions we try to derive the actual diff by inspecting either the working directory files or blobs of a repository. This obviously cannot work for diffs parsed from a file, where we do not necessarily have a repository at hand. Since the introduced split of parsed and generated patches, there are multiple functions which help us to handle patches generically, being indifferent from where they stem from. Use these functions and remove the old logic specific to generated patches. This allows re-using the same code for invoking the callbacks on the deltas. --- src/patch_generate.c | 32 +++++++++----------------------- tests/diff/parse.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 23 deletions(-) diff --git a/src/patch_generate.c b/src/patch_generate.c index d986fd3ab..bba116236 100644 --- a/src/patch_generate.c +++ b/src/patch_generate.c @@ -409,39 +409,25 @@ int git_diff_foreach( void *payload) { int error = 0; - git_xdiff_output xo; + git_diff_delta *delta; size_t idx; - git_patch_generated patch; if ((error = diff_required(diff, "git_diff_foreach")) < 0) return error; - memset(&xo, 0, sizeof(xo)); - memset(&patch, 0, sizeof(patch)); - diff_output_init( - &xo.output, &diff->opts, file_cb, binary_cb, hunk_cb, data_cb, payload); - git_xdiff_init(&xo, &diff->opts); - - git_vector_foreach(&diff->deltas, idx, patch.base.delta) { + git_vector_foreach(&diff->deltas, idx, delta) { + git_patch *patch; /* check flags against patch status */ - if (git_diff_delta__should_skip(&diff->opts, patch.base.delta)) + if (git_diff_delta__should_skip(&diff->opts, delta)) continue; - if (binary_cb || hunk_cb || data_cb) { - if ((error = patch_generated_init(&patch, diff, idx)) != 0 || - (error = patch_generated_load(&patch, &xo.output)) != 0) { - git_patch_free(&patch.base); - return error; - } - } + if ((error = git_patch_from_diff(&patch, diff, idx)) != 0) + break; - if ((error = patch_generated_invoke_file_callback(&patch, &xo.output)) == 0) { - if (binary_cb || hunk_cb || data_cb) - error = patch_generated_create(&patch, &xo.output); - } - - git_patch_free(&patch.base); + error = git_patch__invoke_callbacks(patch, file_cb, binary_cb, + hunk_cb, data_cb, payload); + git_patch_free(patch); if (error) break; diff --git a/tests/diff/parse.c b/tests/diff/parse.c index a06813d1b..b79b19e8e 100644 --- a/tests/diff/parse.c +++ b/tests/diff/parse.c @@ -196,3 +196,32 @@ void test_diff_parse__get_patch_from_diff(void) cl_git_sandbox_cleanup(); } + +static int file_cb(const git_diff_delta *delta, float progress, void *payload) +{ + int *called = (int *) payload; + GIT_UNUSED(delta); + GIT_UNUSED(progress); + (*called)++; + return 0; +} + +void test_diff_parse__foreach_works_with_parsed_patch(void) +{ + const char patch[] = + "diff --git a/obj1 b/obj2\n" + "index 1234567..7654321 10644\n" + "--- a/obj1\n" + "+++ b/obj2\n" + "@@ -1 +1 @@\n" + "-abcde\n" + "+12345\n"; + int called = 0; + git_diff *diff; + + cl_git_pass(git_diff_from_buffer(&diff, patch, strlen(patch))); + cl_git_pass(git_diff_foreach(diff, file_cb, NULL, NULL, NULL, &called)); + cl_assert_equal_i(called, 1); + + git_diff_free(diff); +} From 62a2fc06d4af781c456bf753d6ada16df682df55 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 14 Mar 2017 13:06:25 +0100 Subject: [PATCH 3/5] patch_generate: move `git_diff_foreach` to diff.c Now that the `git_diff_foreach` function does not depend on internals of the `git_patch_generated` structure anymore, we can easily move it to the actual diff code. --- src/diff.c | 35 +++++++++++++++++++++++++++++++++++ src/patch_generate.c | 36 ------------------------------------ 2 files changed, 35 insertions(+), 36 deletions(-) diff --git a/src/diff.c b/src/diff.c index 317d49597..a93bd4cd0 100644 --- a/src/diff.c +++ b/src/diff.c @@ -120,6 +120,41 @@ int git_diff_get_perfdata(git_diff_perfdata *out, const git_diff *diff) return 0; } +int git_diff_foreach( + git_diff *diff, + git_diff_file_cb file_cb, + git_diff_binary_cb binary_cb, + git_diff_hunk_cb hunk_cb, + git_diff_line_cb data_cb, + void *payload) +{ + int error = 0; + git_diff_delta *delta; + size_t idx; + + assert(diff); + + git_vector_foreach(&diff->deltas, idx, delta) { + git_patch *patch; + + /* check flags against patch status */ + if (git_diff_delta__should_skip(&diff->opts, delta)) + continue; + + if ((error = git_patch_from_diff(&patch, diff, idx)) != 0) + break; + + error = git_patch__invoke_callbacks(patch, file_cb, binary_cb, + hunk_cb, data_cb, payload); + git_patch_free(patch); + + if (error) + break; + } + + return error; +} + int git_diff_format_email__append_header_tobuf( git_buf *out, const git_oid *id, diff --git a/src/patch_generate.c b/src/patch_generate.c index bba116236..804fc0e09 100644 --- a/src/patch_generate.c +++ b/src/patch_generate.c @@ -400,42 +400,6 @@ static int diff_required(git_diff *diff, const char *action) return -1; } -int git_diff_foreach( - git_diff *diff, - git_diff_file_cb file_cb, - git_diff_binary_cb binary_cb, - git_diff_hunk_cb hunk_cb, - git_diff_line_cb data_cb, - void *payload) -{ - int error = 0; - git_diff_delta *delta; - size_t idx; - - if ((error = diff_required(diff, "git_diff_foreach")) < 0) - return error; - - git_vector_foreach(&diff->deltas, idx, delta) { - git_patch *patch; - - /* check flags against patch status */ - if (git_diff_delta__should_skip(&diff->opts, delta)) - continue; - - if ((error = git_patch_from_diff(&patch, diff, idx)) != 0) - break; - - error = git_patch__invoke_callbacks(patch, file_cb, binary_cb, - hunk_cb, data_cb, payload); - git_patch_free(patch); - - if (error) - break; - } - - return error; -} - typedef struct { git_patch_generated patch; git_diff_delta delta; From ad5a909cfbb07541e3e2548313043cc3f3a0918a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 14 Mar 2017 09:39:37 +0100 Subject: [PATCH 4/5] patch_parse: fix parsing minimal trailing diff line In a diff, the shortest possible hunk with a modification (that is, no deletion) results from a file with only one line with a single character which is removed. Thus the following hunk @@ -1 +1 @@ -a + is the shortest valid hunk modifying a line. The function parsing the hunk body though assumes that there must always be at least 4 bytes present to make up a valid hunk, which is obviously wrong in this case. The absolute minimum number of bytes required for a modification is actually 2 bytes, that is the "+" and the following newline. Note: if there is no trailing newline, the assumption will not be offended as the diff will have a line "\ No trailing newline" at its end. This patch fixes the issue by lowering the amount of bytes required. --- src/patch_parse.c | 5 +++-- tests/diff/parse.c | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/src/patch_parse.c b/src/patch_parse.c index f5275947d..d993c0311 100644 --- a/src/patch_parse.c +++ b/src/patch_parse.c @@ -562,8 +562,9 @@ static int parse_hunk_body( int newlines = hunk->hunk.new_lines; for (; - ctx->remain_len > 4 && (oldlines || newlines) && - memcmp(ctx->line, "@@ -", 4) != 0; + ctx->remain_len > 1 && + (oldlines || newlines) && + (ctx->remain_len <= 4 || memcmp(ctx->line, "@@ -", 4) != 0); parse_advance_line(ctx)) { int origin; diff --git a/tests/diff/parse.c b/tests/diff/parse.c index b79b19e8e..35870594a 100644 --- a/tests/diff/parse.c +++ b/tests/diff/parse.c @@ -225,3 +225,24 @@ void test_diff_parse__foreach_works_with_parsed_patch(void) git_diff_free(diff); } + +void test_diff_parse__parsing_minimal_patch_succeeds(void) +{ + const char patch[] = + "diff --git a/obj1 b/obj2\n" + "index 1234567..7654321 10644\n" + "--- a/obj1\n" + "+++ b/obj2\n" + "@@ -1 +1 @@\n" + "-a\n" + "+\n"; + git_buf buf = GIT_BUF_INIT; + git_diff *diff; + + cl_git_pass(git_diff_from_buffer(&diff, patch, strlen(patch))); + cl_git_pass(git_diff_to_buf(&buf, diff, GIT_DIFF_FORMAT_PATCH)); + cl_assert_equal_s(patch, buf.ptr); + + git_diff_free(diff); + git_buf_free(&buf); +} From c0eba379d18c6f7979d09c672b48feeaca69dfbd Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 14 Mar 2017 11:01:19 +0100 Subject: [PATCH 5/5] diff_parse: correctly set options for parsed diffs The function `diff_parsed_alloc` allocates and initializes a `git_diff_parsed` structure. This structure also contains diff options. While we initialize its flags, we fail to do a real initialization of its values. This bites us when we want to actually use the generated diff as we do not se the option's version field, which is required to operate correctly. Fix the issue by executing `git_diff_init_options` on the embedded struct. --- src/diff_parse.c | 4 +++- tests/diff/parse.c | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/diff_parse.c b/src/diff_parse.c index e640063af..93915683e 100644 --- a/src/diff_parse.c +++ b/src/diff_parse.c @@ -37,7 +37,6 @@ static git_diff_parsed *diff_parsed_alloc(void) GIT_REFCOUNT_INC(diff); diff->base.type = GIT_DIFF_TYPE_PARSED; - diff->base.opts.flags &= ~GIT_DIFF_IGNORE_CASE; diff->base.strcomp = git__strcmp; diff->base.strncomp = git__strncmp; diff->base.pfxcomp = git__prefixcmp; @@ -45,6 +44,9 @@ static git_diff_parsed *diff_parsed_alloc(void) diff->base.patch_fn = git_patch_parsed_from_diff; diff->base.free_fn = diff_parsed_free; + git_diff_init_options(&diff->base.opts, GIT_DIFF_OPTIONS_VERSION); + diff->base.opts.flags &= ~GIT_DIFF_IGNORE_CASE; + git_pool_init(&diff->base.pool, 1); if (git_vector_init(&diff->patches, 0, NULL) < 0 || diff --git a/tests/diff/parse.c b/tests/diff/parse.c index 35870594a..acb6eb8a5 100644 --- a/tests/diff/parse.c +++ b/tests/diff/parse.c @@ -246,3 +246,24 @@ void test_diff_parse__parsing_minimal_patch_succeeds(void) git_diff_free(diff); git_buf_free(&buf); } + +void test_diff_parse__patch_roundtrip_succeeds(void) +{ + const char buf1[] = "a\n", buf2[] = "b\n"; + git_buf patchbuf = GIT_BUF_INIT, diffbuf = GIT_BUF_INIT; + git_patch *patch; + git_diff *diff; + + cl_git_pass(git_patch_from_buffers(&patch, buf1, strlen(buf1), "obj1", buf2, strlen(buf2), "obj2", NULL)); + cl_git_pass(git_patch_to_buf(&patchbuf, patch)); + + cl_git_pass(git_diff_from_buffer(&diff, patchbuf.ptr, patchbuf.size)); + cl_git_pass(git_diff_to_buf(&diffbuf, diff, GIT_DIFF_FORMAT_PATCH)); + + cl_assert_equal_s(patchbuf.ptr, diffbuf.ptr); + + git_patch_free(patch); + git_diff_free(diff); + git_buf_free(&patchbuf); + git_buf_free(&diffbuf); +}