mirror of
https://git.proxmox.com/git/libgit2
synced 2025-05-28 20:51:11 +00:00
Merge pull request #1979 from libgit2/rb/diff-find-delete-unmod
Add GIT_DIFF_FIND_REMOVE_UNMODIFIED flag and fix copy detection bug
This commit is contained in:
commit
96fb6a647f
@ -468,41 +468,71 @@ typedef int (*git_diff_line_cb)(
|
||||
* Flags to control the behavior of diff rename/copy detection.
|
||||
*/
|
||||
typedef enum {
|
||||
/** look for renames? (`--find-renames`) */
|
||||
/** Look for renames? (`--find-renames`) */
|
||||
GIT_DIFF_FIND_RENAMES = (1u << 0),
|
||||
/** consider old side of modified for renames? (`--break-rewrites=N`) */
|
||||
|
||||
/** Consider old side of MODIFIED for renames? (`--break-rewrites=N`) */
|
||||
GIT_DIFF_FIND_RENAMES_FROM_REWRITES = (1u << 1),
|
||||
|
||||
/** look for copies? (a la `--find-copies`) */
|
||||
/** Look for copies? (a la `--find-copies`). */
|
||||
GIT_DIFF_FIND_COPIES = (1u << 2),
|
||||
/** consider unmodified as copy sources? (`--find-copies-harder`) */
|
||||
|
||||
/** Consider UNMODIFIED as copy sources? (`--find-copies-harder`).
|
||||
*
|
||||
* For this to work correctly, use GIT_DIFF_INCLUDE_UNMODIFIED when
|
||||
* the initial `git_diff` is being generated.
|
||||
*/
|
||||
GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED = (1u << 3),
|
||||
|
||||
/** mark large rewrites for split (`--break-rewrites=/M`) */
|
||||
/** Mark significant rewrites for split (`--break-rewrites=/M`) */
|
||||
GIT_DIFF_FIND_REWRITES = (1u << 4),
|
||||
/** actually split large rewrites into delete/add pairs */
|
||||
/** Actually split large rewrites into delete/add pairs */
|
||||
GIT_DIFF_BREAK_REWRITES = (1u << 5),
|
||||
/** mark rewrites for split and break into delete/add pairs */
|
||||
/** Mark rewrites for split and break into delete/add pairs */
|
||||
GIT_DIFF_FIND_AND_BREAK_REWRITES =
|
||||
(GIT_DIFF_FIND_REWRITES | GIT_DIFF_BREAK_REWRITES),
|
||||
|
||||
/** find renames/copies for untracked items in working directory */
|
||||
/** Find renames/copies for UNTRACKED items in working directory.
|
||||
*
|
||||
* For this to work correctly, use GIT_DIFF_INCLUDE_UNTRACKED when the
|
||||
* initial `git_diff` is being generated (and obviously the diff must
|
||||
* be against the working directory for this to make sense).
|
||||
*/
|
||||
GIT_DIFF_FIND_FOR_UNTRACKED = (1u << 6),
|
||||
|
||||
/** turn on all finding features */
|
||||
/** Turn on all finding features. */
|
||||
GIT_DIFF_FIND_ALL = (0x0ff),
|
||||
|
||||
/** measure similarity ignoring leading whitespace (default) */
|
||||
/** Measure similarity ignoring leading whitespace (default) */
|
||||
GIT_DIFF_FIND_IGNORE_LEADING_WHITESPACE = 0,
|
||||
/** measure similarity ignoring all whitespace */
|
||||
/** Measure similarity ignoring all whitespace */
|
||||
GIT_DIFF_FIND_IGNORE_WHITESPACE = (1u << 12),
|
||||
/** measure similarity including all data */
|
||||
/** Measure similarity including all data */
|
||||
GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE = (1u << 13),
|
||||
/** measure similarity only by comparing SHAs (fast and cheap) */
|
||||
/** Measure similarity only by comparing SHAs (fast and cheap) */
|
||||
GIT_DIFF_FIND_EXACT_MATCH_ONLY = (1u << 14),
|
||||
|
||||
/** do not break rewrites unless they contribute to a rename */
|
||||
/** Do not break rewrites unless they contribute to a rename.
|
||||
*
|
||||
* Normally, GIT_DIFF_FIND_AND_BREAK_REWRITES will measure the self-
|
||||
* similarity of modified files and split the ones that have changed a
|
||||
* lot into a DELETE / ADD pair. Then the sides of that pair will be
|
||||
* considered candidates for rename and copy detection.
|
||||
*
|
||||
* If you add this flag in and the split pair is *not* used for an
|
||||
* actual rename or copy, then the modified record will be restored to
|
||||
* a regular MODIFIED record instead of being split.
|
||||
*/
|
||||
GIT_DIFF_BREAK_REWRITES_FOR_RENAMES_ONLY = (1u << 15),
|
||||
|
||||
/** Remove any UNMODIFIED deltas after find_similar is done.
|
||||
*
|
||||
* Using GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED to emulate the
|
||||
* --find-copies-harder behavior requires building a diff with the
|
||||
* GIT_DIFF_INCLUDE_UNMODIFIED flag. If you do not want UNMODIFIED
|
||||
* records in the final result, pass this flag to have them removed.
|
||||
*/
|
||||
GIT_DIFF_FIND_REMOVE_UNMODIFIED = (1u << 16),
|
||||
} git_diff_find_t;
|
||||
|
||||
/**
|
||||
|
@ -366,12 +366,28 @@ static int normalize_find_opts(
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int insert_delete_side_of_split(
|
||||
git_diff *diff, git_vector *onto, const git_diff_delta *delta)
|
||||
{
|
||||
/* make new record for DELETED side of split */
|
||||
git_diff_delta *deleted = diff_delta__dup(delta, &diff->pool);
|
||||
GITERR_CHECK_ALLOC(deleted);
|
||||
|
||||
deleted->status = GIT_DELTA_DELETED;
|
||||
deleted->nfiles = 1;
|
||||
memset(&deleted->new_file, 0, sizeof(deleted->new_file));
|
||||
deleted->new_file.path = deleted->old_file.path;
|
||||
deleted->new_file.flags |= GIT_DIFF_FLAG_VALID_OID;
|
||||
|
||||
return git_vector_insert(onto, deleted);
|
||||
}
|
||||
|
||||
static int apply_splits_and_deletes(
|
||||
git_diff *diff, size_t expected_size, bool actually_split)
|
||||
{
|
||||
git_vector onto = GIT_VECTOR_INIT;
|
||||
size_t i;
|
||||
git_diff_delta *delta, *deleted;
|
||||
git_diff_delta *delta;
|
||||
|
||||
if (git_vector_init(&onto, expected_size, git_diff_delta__cmp) < 0)
|
||||
return -1;
|
||||
@ -384,17 +400,7 @@ static int apply_splits_and_deletes(
|
||||
if ((delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0 && actually_split) {
|
||||
delta->similarity = 0;
|
||||
|
||||
/* make new record for DELETED side of split */
|
||||
if (!(deleted = diff_delta__dup(delta, &diff->pool)))
|
||||
goto on_error;
|
||||
|
||||
deleted->status = GIT_DELTA_DELETED;
|
||||
deleted->nfiles = 1;
|
||||
memset(&deleted->new_file, 0, sizeof(deleted->new_file));
|
||||
deleted->new_file.path = deleted->old_file.path;
|
||||
deleted->new_file.flags |= GIT_DIFF_FLAG_VALID_OID;
|
||||
|
||||
if (git_vector_insert(&onto, deleted) < 0)
|
||||
if (insert_delete_side_of_split(diff, &onto, delta) < 0)
|
||||
goto on_error;
|
||||
|
||||
if (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR)
|
||||
@ -740,6 +746,8 @@ static bool is_rename_source(
|
||||
case GIT_DELTA_UNMODIFIED:
|
||||
if (!FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED))
|
||||
return false;
|
||||
if (FLAG_SET(opts, GIT_DIFF_FIND_REMOVE_UNMODIFIED))
|
||||
delta->flags |= GIT_DIFF_FLAG__TO_DELETE;
|
||||
break;
|
||||
|
||||
default: /* MODIFIED, RENAMED, COPIED */
|
||||
@ -1058,10 +1066,7 @@ find_best_matches:
|
||||
}
|
||||
}
|
||||
|
||||
else if (delta_is_new_only(tgt)) {
|
||||
if (!FLAG_SET(&opts, GIT_DIFF_FIND_COPIES))
|
||||
continue;
|
||||
|
||||
else if (FLAG_SET(&opts, GIT_DIFF_FIND_COPIES)) {
|
||||
if (tgt2src_copy[t].similarity < opts.copy_threshold)
|
||||
continue;
|
||||
|
||||
@ -1069,10 +1074,21 @@ find_best_matches:
|
||||
best_match = &tgt2src_copy[t];
|
||||
src = GIT_VECTOR_GET(&diff->deltas, best_match->idx);
|
||||
|
||||
if (delta_is_split(tgt)) {
|
||||
error = insert_delete_side_of_split(diff, &diff->deltas, tgt);
|
||||
if (error < 0)
|
||||
goto cleanup;
|
||||
num_rewrites--;
|
||||
}
|
||||
|
||||
if (!delta_is_split(tgt) && !delta_is_new_only(tgt))
|
||||
continue;
|
||||
|
||||
tgt->status = GIT_DELTA_COPIED;
|
||||
tgt->similarity = best_match->similarity;
|
||||
tgt->nfiles = 2;
|
||||
memcpy(&tgt->old_file, &src->old_file, sizeof(tgt->old_file));
|
||||
tgt->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
|
||||
|
||||
num_updates++;
|
||||
}
|
||||
|
@ -1284,3 +1284,100 @@ void test_diff_rename__rewrite_on_single_file(void)
|
||||
git_diff_free(diff);
|
||||
git_index_free(index);
|
||||
}
|
||||
|
||||
void test_diff_rename__can_find_copy_to_split(void)
|
||||
{
|
||||
git_buf c1 = GIT_BUF_INIT;
|
||||
git_index *index;
|
||||
git_tree *tree;
|
||||
git_diff *diff;
|
||||
git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
|
||||
git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
|
||||
diff_expects exp;
|
||||
|
||||
cl_git_pass(git_futils_readbuffer(&c1, "renames/songof7cities.txt"));
|
||||
cl_git_pass(git_futils_writebuffer(&c1, "renames/untimely.txt", 0, 0));
|
||||
|
||||
cl_git_pass(
|
||||
git_revparse_single((git_object **)&tree, g_repo, "HEAD^{tree}"));
|
||||
|
||||
cl_git_pass(git_repository_index(&index, g_repo));
|
||||
cl_git_pass(git_index_read_tree(index, tree));
|
||||
cl_git_pass(git_index_add_bypath(index, "untimely.txt"));
|
||||
|
||||
diffopts.flags = GIT_DIFF_INCLUDE_UNMODIFIED;
|
||||
|
||||
cl_git_pass(git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
|
||||
|
||||
memset(&exp, 0, sizeof(exp));
|
||||
cl_git_pass(git_diff_foreach(
|
||||
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
|
||||
cl_assert_equal_i(4, exp.files);
|
||||
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
|
||||
cl_assert_equal_i(3, exp.file_status[GIT_DELTA_UNMODIFIED]);
|
||||
|
||||
opts.flags = GIT_DIFF_FIND_ALL;
|
||||
cl_git_pass(git_diff_find_similar(diff, &opts));
|
||||
|
||||
memset(&exp, 0, sizeof(exp));
|
||||
cl_git_pass(git_diff_foreach(
|
||||
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
|
||||
cl_assert_equal_i(5, exp.files);
|
||||
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
|
||||
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_COPIED]);
|
||||
cl_assert_equal_i(3, exp.file_status[GIT_DELTA_UNMODIFIED]);
|
||||
|
||||
git_diff_free(diff);
|
||||
git_tree_free(tree);
|
||||
git_index_free(index);
|
||||
|
||||
git_buf_free(&c1);
|
||||
}
|
||||
|
||||
void test_diff_rename__can_delete_unmodified_deltas(void)
|
||||
{
|
||||
git_buf c1 = GIT_BUF_INIT;
|
||||
git_index *index;
|
||||
git_tree *tree;
|
||||
git_diff *diff;
|
||||
git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
|
||||
git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
|
||||
diff_expects exp;
|
||||
|
||||
cl_git_pass(git_futils_readbuffer(&c1, "renames/songof7cities.txt"));
|
||||
cl_git_pass(git_futils_writebuffer(&c1, "renames/untimely.txt", 0, 0));
|
||||
|
||||
cl_git_pass(
|
||||
git_revparse_single((git_object **)&tree, g_repo, "HEAD^{tree}"));
|
||||
|
||||
cl_git_pass(git_repository_index(&index, g_repo));
|
||||
cl_git_pass(git_index_read_tree(index, tree));
|
||||
cl_git_pass(git_index_add_bypath(index, "untimely.txt"));
|
||||
|
||||
diffopts.flags = GIT_DIFF_INCLUDE_UNMODIFIED;
|
||||
|
||||
cl_git_pass(git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
|
||||
|
||||
memset(&exp, 0, sizeof(exp));
|
||||
cl_git_pass(git_diff_foreach(
|
||||
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
|
||||
cl_assert_equal_i(4, exp.files);
|
||||
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
|
||||
cl_assert_equal_i(3, exp.file_status[GIT_DELTA_UNMODIFIED]);
|
||||
|
||||
opts.flags = GIT_DIFF_FIND_ALL | GIT_DIFF_FIND_REMOVE_UNMODIFIED;
|
||||
cl_git_pass(git_diff_find_similar(diff, &opts));
|
||||
|
||||
memset(&exp, 0, sizeof(exp));
|
||||
cl_git_pass(git_diff_foreach(
|
||||
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
|
||||
cl_assert_equal_i(2, exp.files);
|
||||
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
|
||||
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_COPIED]);
|
||||
|
||||
git_diff_free(diff);
|
||||
git_tree_free(tree);
|
||||
git_index_free(index);
|
||||
|
||||
git_buf_free(&c1);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user