diff --git a/include/git2/diff.h b/include/git2/diff.h index 4b4591a9e..05825c50d 100644 --- a/include/git2/diff.h +++ b/include/git2/diff.h @@ -79,13 +79,28 @@ typedef struct { */ typedef struct git_diff_list git_diff_list; +/** + * Flags that can be set for the file on side of a diff. + * + * Most of the flags are just for internal consumption by libgit2, + * but some of them may be interesting to external users. They are: + * + * - VALID_OID - the `oid` value is computed and correct + * - FREE_PATH - the `path` string is separated allocated memory + * - BINARY - this file should be considered binary data + * - NOT_BINARY - this file should be considered text data + * - FREE_DATA - the internal file data is kept in allocated memory + * - UNMAP_DATA - the internal file data is kept in mmap'ed memory + * - NO_DATA - this side of the diff should not be loaded + */ enum { GIT_DIFF_FILE_VALID_OID = (1 << 0), GIT_DIFF_FILE_FREE_PATH = (1 << 1), GIT_DIFF_FILE_BINARY = (1 << 2), GIT_DIFF_FILE_NOT_BINARY = (1 << 3), GIT_DIFF_FILE_FREE_DATA = (1 << 4), - GIT_DIFF_FILE_UNMAP_DATA = (1 << 5) + GIT_DIFF_FILE_UNMAP_DATA = (1 << 5), + GIT_DIFF_FILE_NO_DATA = (1 << 6), }; /** @@ -176,7 +191,7 @@ enum { GIT_DIFF_LINE_CONTEXT = ' ', GIT_DIFF_LINE_ADDITION = '+', GIT_DIFF_LINE_DELETION = '-', - GIT_DIFF_LINE_ADD_EOFNL = '\n', /**< DEPRECATED - will not be returned */ + GIT_DIFF_LINE_ADD_EOFNL = '\n', /**< Removed line w/o LF & added one with */ GIT_DIFF_LINE_DEL_EOFNL = '\0', /**< LF was removed at end of file */ /* The following values will only be sent to a `git_diff_data_fn` when diff --git a/src/diff_output.c b/src/diff_output.c index dbef7ddc1..ea40c3355 100644 --- a/src/diff_output.c +++ b/src/diff_output.c @@ -172,9 +172,13 @@ static void update_delta_is_binary(git_diff_delta *delta) if ((delta->old_file.flags & GIT_DIFF_FILE_BINARY) != 0 || (delta->new_file.flags & GIT_DIFF_FILE_BINARY) != 0) delta->binary = 1; - else if ((delta->old_file.flags & GIT_DIFF_FILE_NOT_BINARY) != 0 && - (delta->new_file.flags & GIT_DIFF_FILE_NOT_BINARY) != 0) + +#define NOT_BINARY_FLAGS (GIT_DIFF_FILE_NOT_BINARY|GIT_DIFF_FILE_NO_DATA) + + else if ((delta->old_file.flags & NOT_BINARY_FLAGS) != 0 && + (delta->new_file.flags & NOT_BINARY_FLAGS) != 0) delta->binary = 0; + /* otherwise leave delta->binary value untouched */ } @@ -507,7 +511,7 @@ static int diff_delta_load(diff_delta_context *ctxt) { int error = 0; git_diff_delta *delta = ctxt->delta; - bool load_old = false, load_new = false, check_if_unmodified = false; + bool check_if_unmodified = false; if (ctxt->loaded || !ctxt->delta) return 0; @@ -527,22 +531,33 @@ static int diff_delta_load(diff_delta_context *ctxt) goto cleanup; switch (delta->status) { - case GIT_DELTA_ADDED: load_new = true; break; - case GIT_DELTA_DELETED: load_old = true; break; - case GIT_DELTA_MODIFIED: load_new = load_old = true; break; - default: break; + case GIT_DELTA_ADDED: + delta->old_file.flags |= GIT_DIFF_FILE_NO_DATA; + break; + case GIT_DELTA_DELETED: + delta->new_file.flags |= GIT_DIFF_FILE_NO_DATA; + break; + case GIT_DELTA_MODIFIED: + break; + default: + delta->new_file.flags |= GIT_DIFF_FILE_NO_DATA; + delta->old_file.flags |= GIT_DIFF_FILE_NO_DATA; + break; } +#define CHECK_UNMODIFIED (GIT_DIFF_FILE_NO_DATA | GIT_DIFF_FILE_VALID_OID) + check_if_unmodified = - (load_old && (delta->old_file.flags & GIT_DIFF_FILE_VALID_OID) == 0) || - (load_new && (delta->new_file.flags & GIT_DIFF_FILE_VALID_OID) == 0); + (delta->old_file.flags & CHECK_UNMODIFIED) == 0 && + (delta->new_file.flags & CHECK_UNMODIFIED) == 0; /* Always try to load workdir content first, since it may need to be * filtered (and hence use 2x memory) and we want to minimize the max * memory footprint during diff. */ - if (load_old && ctxt->old_src == GIT_ITERATOR_WORKDIR) { + if ((delta->old_file.flags & GIT_DIFF_FILE_NO_DATA) == 0 && + ctxt->old_src == GIT_ITERATOR_WORKDIR) { if ((error = get_workdir_content( ctxt, &delta->old_file, &ctxt->old_data)) < 0) goto cleanup; @@ -550,7 +565,8 @@ static int diff_delta_load(diff_delta_context *ctxt) goto cleanup; } - if (load_new && ctxt->new_src == GIT_ITERATOR_WORKDIR) { + if ((delta->new_file.flags & GIT_DIFF_FILE_NO_DATA) == 0 && + ctxt->new_src == GIT_ITERATOR_WORKDIR) { if ((error = get_workdir_content( ctxt, &delta->new_file, &ctxt->new_data)) < 0) goto cleanup; @@ -558,7 +574,8 @@ static int diff_delta_load(diff_delta_context *ctxt) goto cleanup; } - if (load_old && ctxt->old_src != GIT_ITERATOR_WORKDIR) { + if ((delta->old_file.flags & GIT_DIFF_FILE_NO_DATA) == 0 && + ctxt->old_src != GIT_ITERATOR_WORKDIR) { if ((error = get_blob_content( ctxt, &delta->old_file, &ctxt->old_data, &ctxt->old_blob)) < 0) goto cleanup; @@ -566,7 +583,8 @@ static int diff_delta_load(diff_delta_context *ctxt) goto cleanup; } - if (load_new && ctxt->new_src != GIT_ITERATOR_WORKDIR) { + if ((delta->new_file.flags & GIT_DIFF_FILE_NO_DATA) == 0 && + ctxt->new_src != GIT_ITERATOR_WORKDIR) { if ((error = get_blob_content( ctxt, &delta->new_file, &ctxt->new_data, &ctxt->new_blob)) < 0) goto cleanup; @@ -588,6 +606,10 @@ static int diff_delta_load(diff_delta_context *ctxt) } cleanup: + /* last change to update binary flag */ + if (delta->binary == -1) + update_delta_is_binary(delta); + ctxt->loaded = !error; /* flag if we would want to diff the contents of these files */ @@ -629,9 +651,10 @@ static int diff_delta_cb(void *priv, mmbuffer_t *bufs, int len) } if (len == 3 && !ctxt->cb_error) { - /* This should only happen if we are adding a line that does not - * have a newline at the end and the old code did. In that case, - * we have a ADD with a DEL_EOFNL as a pair. + /* If we have a '+' and a third buf, then we have added a line + * without a newline and the old code had one, so DEL_EOFNL. + * If we have a '-' and a third buf, then we have removed a line + * with out a newline but added a blank line, so ADD_EOFNL. */ char origin = (*bufs[0].ptr == '+') ? GIT_DIFF_LINE_DEL_EOFNL : @@ -1036,6 +1059,7 @@ static void set_data_from_blob( } else { map->data = ""; file->size = map->len = 0; + file->flags |= GIT_DIFF_FILE_NO_DATA; } } diff --git a/src/fileops.c b/src/fileops.c index d4def1a9a..cbe3d4782 100644 --- a/src/fileops.c +++ b/src/fileops.c @@ -115,40 +115,26 @@ mode_t git_futils_canonical_mode(mode_t raw_mode) return 0; } -#define MAX_READ_STALLS 10 - int git_futils_readbuffer_fd(git_buf *buf, git_file fd, size_t len) { - int stalls = MAX_READ_STALLS; + ssize_t read_size; git_buf_clear(buf); if (git_buf_grow(buf, len + 1) < 0) return -1; - buf->ptr[len] = '\0'; + /* p_read loops internally to read len bytes */ + read_size = p_read(fd, buf->ptr, len); - while (len > 0) { - ssize_t read_size = p_read(fd, buf->ptr + buf->size, len); - - if (read_size < 0) { - giterr_set(GITERR_OS, "Failed to read descriptor"); - return -1; - } - - if (read_size == 0) { - stalls--; - - if (!stalls) { - giterr_set(GITERR_OS, "Too many stalls reading descriptor"); - return -1; - } - } - - len -= read_size; - buf->size += read_size; + if (read_size < 0) { + giterr_set(GITERR_OS, "Failed to read descriptor"); + return -1; } + buf->ptr[read_size] = '\0'; + buf->size = read_size; + return 0; } diff --git a/tests-clar/diff/diff_helpers.c b/tests-clar/diff/diff_helpers.c index ef59b686f..767b34392 100644 --- a/tests-clar/diff/diff_helpers.c +++ b/tests-clar/diff/diff_helpers.c @@ -89,7 +89,8 @@ int diff_line_fn( e->line_adds++; break; case GIT_DIFF_LINE_ADD_EOFNL: - assert(0); + /* technically not a line add, but we'll count it as such */ + e->line_adds++; break; case GIT_DIFF_LINE_DELETION: e->line_dels++; diff --git a/tests-clar/diff/diffiter.c b/tests-clar/diff/diffiter.c index 56c254741..23071e48b 100644 --- a/tests-clar/diff/diffiter.c +++ b/tests-clar/diff/diffiter.c @@ -114,3 +114,88 @@ void test_diff_diffiter__iterate_files_and_hunks(void) git_diff_iterator_free(iter); git_diff_list_free(diff); } + +void test_diff_diffiter__max_size_threshold(void) +{ + git_repository *repo = cl_git_sandbox_init("status"); + git_diff_options opts = {0}; + git_diff_list *diff = NULL; + git_diff_iterator *iter; + git_diff_delta *delta; + int error, file_count = 0, binary_count = 0, hunk_count = 0; + + opts.context_lines = 3; + opts.interhunk_lines = 1; + opts.flags |= GIT_DIFF_INCLUDE_IGNORED | GIT_DIFF_INCLUDE_UNTRACKED; + + cl_git_pass(git_diff_workdir_to_index(repo, &opts, &diff)); + cl_git_pass(git_diff_iterator_new(&iter, diff)); + + while ((error = git_diff_iterator_next_file(&delta, iter)) != GIT_ITEROVER) { + cl_assert_equal_i(0, error); + cl_assert(delta); + + file_count++; + + hunk_count += git_diff_iterator_num_hunks_in_file(iter); + + assert(delta->binary == 0 || delta->binary == 1); + + binary_count += delta->binary; + } + + cl_assert_equal_i(GIT_ITEROVER, error); + cl_assert(delta == NULL); + + cl_assert_equal_i(13, file_count); + cl_assert_equal_i(0, binary_count); + cl_assert_equal_i(8, hunk_count); + + git_diff_iterator_free(iter); + git_diff_list_free(diff); + + /* try again with low file size threshold */ + + file_count = 0; + binary_count = 0; + hunk_count = 0; + + opts.context_lines = 3; + opts.interhunk_lines = 1; + opts.flags |= GIT_DIFF_INCLUDE_IGNORED | GIT_DIFF_INCLUDE_UNTRACKED; + opts.max_size = 50; /* treat anything over 50 bytes as binary! */ + + cl_git_pass(git_diff_workdir_to_index(repo, &opts, &diff)); + cl_git_pass(git_diff_iterator_new(&iter, diff)); + + while ((error = git_diff_iterator_next_file(&delta, iter)) != GIT_ITEROVER) { + cl_assert_equal_i(0, error); + cl_assert(delta); + + file_count++; + + hunk_count += git_diff_iterator_num_hunks_in_file(iter); + + assert(delta->binary == 0 || delta->binary == 1); + + binary_count += delta->binary; + } + + cl_assert_equal_i(GIT_ITEROVER, error); + cl_assert(delta == NULL); + + cl_assert_equal_i(13, file_count); + + /* Three files are over the 50 byte threshold: + * - staged_changes_file_deleted + * - staged_changes_modified_file + * - staged_new_file_modified_file + */ + cl_assert_equal_i(3, binary_count); + + cl_assert_equal_i(5, hunk_count); + + git_diff_iterator_free(iter); + git_diff_list_free(diff); + +}