libgit2/tests-clar/diff/rename.c
Russell Belfer d958e37a48 Fix issues with git_diff_find_similar
There are a number of bugs in the rename code that only were
obvious when I started testing it against large old repos with
more complex patterns.  (The code to do that testing is not ready
to merge with libgit2, but I do plan to add more thorough tests.)

This contains a significant number of changes and also tweaks the
public API slightly to make emulating core git easier.

Most notably, this separates the GIT_DIFF_FIND_AND_BREAK_REWRITES
flag into FIND_REWRITES (which adds a self-similarity score to
every modified file) and BREAK_REWRITES (which splits the modified
deltas into add/remove pairs in the diff list).  When you do a raw
output of core git, rewrites show up as M090 or such, not at A and
D output, so I wanted to be able to emulate that.

Publicly, this also changes the flags to be uint16_t since we
don't need values out of that range.

Internally, this contains significant changes from a number of
small bug fixes (like using the wrong side of the diff to decide
if the object could be found in the ODB vs the workdir) to larger
issues about which files can and should be compared and how the
various edge cases of similarity scores should be treated.

Honestly, I don't think this is the last update that will have to
be made to this code, but I think this moves us closer to correct
behavior and I tried to document the code so it would be easier
to follow..
2013-05-17 17:21:45 -07:00

450 lines
15 KiB
C

#include "clar_libgit2.h"
#include "diff_helpers.h"
static git_repository *g_repo = NULL;
void test_diff_rename__initialize(void)
{
g_repo = cl_git_sandbox_init("renames");
}
void test_diff_rename__cleanup(void)
{
cl_git_sandbox_cleanup();
}
/*
* Renames repo has:
*
* commit 31e47d8c1fa36d7f8d537b96158e3f024de0a9f2 -
* serving.txt (25 lines)
* sevencities.txt (50 lines)
* commit 2bc7f351d20b53f1c72c16c4b036e491c478c49a -
* serving.txt -> sixserving.txt (rename, no change, 100% match)
* sevencities.txt -> sevencities.txt (no change)
* sevencities.txt -> songofseven.txt (copy, no change, 100% match)
* commit 1c068dee5790ef1580cfc4cd670915b48d790084
* songofseven.txt -> songofseven.txt (major rewrite, <20% match - split)
* sixserving.txt -> sixserving.txt (indentation change)
* sixserving.txt -> ikeepsix.txt (copy, add title, >80% match)
* sevencities.txt (no change)
* commit 19dd32dfb1520a64e5bbaae8dce6ef423dfa2f13
* songofseven.txt -> untimely.txt (rename, convert to crlf)
* ikeepsix.txt -> ikeepsix.txt (reorder sections in file)
* sixserving.txt -> sixserving.txt (whitespace change - not just indent)
* sevencities.txt -> songof7cities.txt (rename, small text changes)
*/
void test_diff_rename__match_oid(void)
{
const char *old_sha = "31e47d8c1fa36d7f8d537b96158e3f024de0a9f2";
const char *new_sha = "2bc7f351d20b53f1c72c16c4b036e491c478c49a";
git_tree *old_tree, *new_tree;
git_diff_list *diff;
git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
diff_expects exp;
old_tree = resolve_commit_oid_to_tree(g_repo, old_sha);
new_tree = resolve_commit_oid_to_tree(g_repo, new_sha);
/* Must pass GIT_DIFF_INCLUDE_UNMODIFIED if you expect to emulate
* --find-copies-harder during rename transformion...
*/
diffopts.flags |= GIT_DIFF_INCLUDE_UNMODIFIED;
cl_git_pass(git_diff_tree_to_tree(
&diff, g_repo, old_tree, new_tree, &diffopts));
/* git diff --no-renames \
* 31e47d8c1fa36d7f8d537b96158e3f024de0a9f2 \
* 2bc7f351d20b53f1c72c16c4b036e491c478c49a
*/
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(4, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_UNMODIFIED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_ADDED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
/* git diff 31e47d8c1fa36d7f8d537b96158e3f024de0a9f2 \
* 2bc7f351d20b53f1c72c16c4b036e491c478c49a
*/
cl_git_pass(git_diff_find_similar(diff, NULL));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(3, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_UNMODIFIED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_ADDED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_RENAMED]);
git_diff_list_free(diff);
cl_git_pass(git_diff_tree_to_tree(
&diff, g_repo, old_tree, new_tree, &diffopts));
/* git diff --find-copies-harder \
* 31e47d8c1fa36d7f8d537b96158e3f024de0a9f2 \
* 2bc7f351d20b53f1c72c16c4b036e491c478c49a
*/
opts.flags = GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(3, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_UNMODIFIED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_COPIED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_RENAMED]);
git_diff_list_free(diff);
git_tree_free(old_tree);
git_tree_free(new_tree);
}
void test_diff_rename__checks_options_version(void)
{
const char *old_sha = "31e47d8c1fa36d7f8d537b96158e3f024de0a9f2";
const char *new_sha = "2bc7f351d20b53f1c72c16c4b036e491c478c49a";
git_tree *old_tree, *new_tree;
git_diff_list *diff;
git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
const git_error *err;
old_tree = resolve_commit_oid_to_tree(g_repo, old_sha);
new_tree = resolve_commit_oid_to_tree(g_repo, new_sha);
diffopts.flags |= GIT_DIFF_INCLUDE_UNMODIFIED;
cl_git_pass(git_diff_tree_to_tree(
&diff, g_repo, old_tree, new_tree, &diffopts));
opts.version = 0;
cl_git_fail(git_diff_find_similar(diff, &opts));
err = giterr_last();
cl_assert_equal_i(GITERR_INVALID, err->klass);
giterr_clear();
opts.version = 1024;
cl_git_fail(git_diff_find_similar(diff, &opts));
err = giterr_last();
cl_assert_equal_i(GITERR_INVALID, err->klass);
git_diff_list_free(diff);
git_tree_free(old_tree);
git_tree_free(new_tree);
}
void test_diff_rename__not_exact_match(void)
{
const char *sha0 = "2bc7f351d20b53f1c72c16c4b036e491c478c49a";
const char *sha1 = "1c068dee5790ef1580cfc4cd670915b48d790084";
const char *sha2 = "19dd32dfb1520a64e5bbaae8dce6ef423dfa2f13";
git_tree *old_tree, *new_tree;
git_diff_list *diff;
git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
diff_expects exp;
/* == Changes =====================================================
* songofseven.txt -> songofseven.txt (major rewrite, <20% match - split)
* sixserving.txt -> sixserving.txt (indentation change)
* sixserving.txt -> ikeepsix.txt (copy, add title, >80% match)
* sevencities.txt (no change)
*/
old_tree = resolve_commit_oid_to_tree(g_repo, sha0);
new_tree = resolve_commit_oid_to_tree(g_repo, sha1);
/* Must pass GIT_DIFF_INCLUDE_UNMODIFIED if you expect to emulate
* --find-copies-harder during rename transformion...
*/
diffopts.flags |= GIT_DIFF_INCLUDE_UNMODIFIED;
cl_git_pass(git_diff_tree_to_tree(
&diff, g_repo, old_tree, new_tree, &diffopts));
/* git diff --no-renames \
* 2bc7f351d20b53f1c72c16c4b036e491c478c49a \
* 1c068dee5790ef1580cfc4cd670915b48d790084
*/
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(4, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_UNMODIFIED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_MODIFIED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_ADDED]);
/* git diff -M 2bc7f351d20b53f1c72c16c4b036e491c478c49a \
* 1c068dee5790ef1580cfc4cd670915b48d790084
*
* must not pass NULL for opts because it will pick up environment
* values for "diff.renames" and test won't be consistent.
*/
opts.flags = GIT_DIFF_FIND_RENAMES;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(4, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_UNMODIFIED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_MODIFIED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_ADDED]);
git_diff_list_free(diff);
/* git diff -M -C \
* 2bc7f351d20b53f1c72c16c4b036e491c478c49a \
* 1c068dee5790ef1580cfc4cd670915b48d790084
*/
cl_git_pass(git_diff_tree_to_tree(
&diff, g_repo, old_tree, new_tree, &diffopts));
opts.flags = GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(4, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_UNMODIFIED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_MODIFIED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_COPIED]);
git_diff_list_free(diff);
/* git diff -M -C --find-copies-harder --break-rewrites \
* 2bc7f351d20b53f1c72c16c4b036e491c478c49a \
* 1c068dee5790ef1580cfc4cd670915b48d790084
*/
cl_git_pass(git_diff_tree_to_tree(
&diff, g_repo, old_tree, new_tree, &diffopts));
opts.flags = GIT_DIFF_FIND_ALL;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(5, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_UNMODIFIED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_ADDED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_COPIED]);
git_diff_list_free(diff);
/* == Changes =====================================================
* songofseven.txt -> untimely.txt (rename, convert to crlf)
* ikeepsix.txt -> ikeepsix.txt (reorder sections in file)
* sixserving.txt -> sixserving.txt (whitespace - not just indent)
* sevencities.txt -> songof7cities.txt (rename, small text changes)
*/
git_tree_free(old_tree);
old_tree = new_tree;
new_tree = resolve_commit_oid_to_tree(g_repo, sha2);
cl_git_pass(git_diff_tree_to_tree(
&diff, g_repo, old_tree, new_tree, &diffopts));
/* git diff --no-renames \
* 1c068dee5790ef1580cfc4cd670915b48d790084 \
* 19dd32dfb1520a64e5bbaae8dce6ef423dfa2f13
*/
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(6, exp.files);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_MODIFIED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_ADDED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_DELETED]);
git_diff_list_free(diff);
/* git diff -M -C \
* 1c068dee5790ef1580cfc4cd670915b48d790084 \
* 19dd32dfb1520a64e5bbaae8dce6ef423dfa2f13
*/
cl_git_pass(git_diff_tree_to_tree(
&diff, g_repo, old_tree, new_tree, &diffopts));
opts.flags = GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(4, exp.files);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_MODIFIED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_RENAMED]);
git_diff_list_free(diff);
/* git diff -M -C --find-copies-harder --break-rewrites \
* 1c068dee5790ef1580cfc4cd670915b48d790084 \
* 19dd32dfb1520a64e5bbaae8dce6ef423dfa2f13
* with libgit2 default similarity comparison...
*/
cl_git_pass(git_diff_tree_to_tree(
&diff, g_repo, old_tree, new_tree, &diffopts));
opts.flags = GIT_DIFF_FIND_ALL;
cl_git_pass(git_diff_find_similar(diff, &opts));
/* the default match algorithm is going to find the internal
* whitespace differences in the lines of sixserving.txt to be
* significant enough that this will decide to split it into
* an ADD and a DELETE
*/
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(5, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_ADDED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_RENAMED]);
git_diff_list_free(diff);
/* git diff -M -C --find-copies-harder --break-rewrites \
* 1c068dee5790ef1580cfc4cd670915b48d790084 \
* 19dd32dfb1520a64e5bbaae8dce6ef423dfa2f13
* with ignore_space whitespace comparision
*/
cl_git_pass(git_diff_tree_to_tree(
&diff, g_repo, old_tree, new_tree, &diffopts));
opts.flags = GIT_DIFF_FIND_ALL | GIT_DIFF_FIND_IGNORE_WHITESPACE;
cl_git_pass(git_diff_find_similar(diff, &opts));
/* Ignoring whitespace, this should no longer split sixserver.txt */
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(4, exp.files);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_MODIFIED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_RENAMED]);
git_diff_list_free(diff);
git_tree_free(old_tree);
git_tree_free(new_tree);
}
void test_diff_rename__handles_small_files(void)
{
const char *tree_sha = "2bc7f351d20b53f1c72c16c4b036e491c478c49a";
git_index *index;
git_tree *tree;
git_diff_list *diff;
git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
cl_git_pass(git_repository_index(&index, g_repo));
tree = resolve_commit_oid_to_tree(g_repo, tree_sha);
cl_git_rewritefile("renames/songof7cities.txt", "single line\n");
cl_git_pass(git_index_add_bypath(index, "songof7cities.txt"));
cl_git_rewritefile("renames/untimely.txt", "untimely\n");
cl_git_pass(git_index_add_bypath(index, "untimely.txt"));
/* Tests that we can invoke find_similar on small files
* and that the GIT_EBUFS (too small) error code is not
* propagated to the caller.
*/
cl_git_pass(git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
opts.flags = GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES |
GIT_DIFF_FIND_AND_BREAK_REWRITES;
cl_git_pass(git_diff_find_similar(diff, &opts));
git_diff_list_free(diff);
git_tree_free(tree);
git_index_free(index);
}
void test_diff_rename__working_directory_changes(void)
{
/* let's rewrite some files in the working directory on demand */
/* and with / without CRLF changes */
}
void test_diff_rename__patch(void)
{
const char *sha0 = "2bc7f351d20b53f1c72c16c4b036e491c478c49a";
const char *sha1 = "1c068dee5790ef1580cfc4cd670915b48d790084";
git_tree *old_tree, *new_tree;
git_diff_list *diff;
git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
git_diff_patch *patch;
const git_diff_delta *delta;
char *text;
const char *expected = "diff --git a/sixserving.txt b/ikeepsix.txt\nindex ad0a8e5..36020db 100644\n--- a/sixserving.txt\n+++ b/ikeepsix.txt\n@@ -1,3 +1,6 @@\n+I Keep Six Honest Serving-Men\n+=============================\n+\n I KEEP six honest serving-men\n (They taught me all I knew);\n Their names are What and Why and When\n@@ -21,4 +24,4 @@\n One million Hows, two million Wheres,\n And seven million Whys!\n \n- -- Rudyard Kipling\n+ -- Rudyard Kipling\n";
old_tree = resolve_commit_oid_to_tree(g_repo, sha0);
new_tree = resolve_commit_oid_to_tree(g_repo, sha1);
diffopts.flags |= GIT_DIFF_INCLUDE_UNMODIFIED;
cl_git_pass(git_diff_tree_to_tree(
&diff, g_repo, old_tree, new_tree, &diffopts));
opts.flags = GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES;
cl_git_pass(git_diff_find_similar(diff, &opts));
/* == Changes =====================================================
* sixserving.txt -> ikeepsix.txt (copy, add title, >80% match)
* sevencities.txt (no change)
* sixserving.txt -> sixserving.txt (indentation change)
* songofseven.txt -> songofseven.txt (major rewrite, <20% match - split)
*/
cl_assert_equal_i(4, (int)git_diff_num_deltas(diff));
cl_git_pass(git_diff_get_patch(&patch, &delta, diff, 0));
cl_assert_equal_i(GIT_DELTA_COPIED, (int)delta->status);
cl_git_pass(git_diff_patch_to_str(&text, patch));
cl_assert_equal_s(expected, text);
git__free(text);
git_diff_patch_free(patch);
cl_git_pass(git_diff_get_patch(NULL, &delta, diff, 1));
cl_assert_equal_i(GIT_DELTA_UNMODIFIED, (int)delta->status);
cl_git_pass(git_diff_get_patch(NULL, &delta, diff, 2));
cl_assert_equal_i(GIT_DELTA_MODIFIED, (int)delta->status);
cl_git_pass(git_diff_get_patch(NULL, &delta, diff, 3));
cl_assert_equal_i(GIT_DELTA_MODIFIED, (int)delta->status);
git_diff_list_free(diff);
git_tree_free(old_tree);
git_tree_free(new_tree);
}