From b4f5bb074721823cc016b66a9984abe2c271cb1f Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Tue, 23 Oct 2012 16:40:51 -0700 Subject: [PATCH 1/2] Initial implementation of diff rename detection This implements the basis for diff rename and copy detection, although it is based on simple SHA comparison right now instead of using a matching algortihm. Just as `git_diff_merge` can be used as a post-pass on diffs to emulate certain command line behaviors, there is a new API `git_diff_detect` which will update a diff list in-place, adjusting some deltas to RENAMED or COPIED state (and also, eventually, splitting MODIFIED deltas where the change is too large into DELETED/ADDED pairs). This also adds a new test repo that will hold rename/copy/split scenarios. Right now, it just has exact-match rename and copy, but the tests are written to use tree diffs, so we should be able to add new test scenarios easily without breaking tests. --- include/git2/diff.h | 58 ++++++- src/diff.c | 147 ++++++++++++++++++ src/diff.h | 3 + tests-clar/diff/blob.c | 76 +++++---- tests-clar/diff/diff_helpers.c | 17 +- tests-clar/diff/diff_helpers.h | 9 +- tests-clar/diff/index.c | 12 +- tests-clar/diff/rename.c | 105 +++++++++++++ tests-clar/diff/tree.c | 50 +++--- tests-clar/diff/workdir.c | 134 ++++++++-------- tests-clar/repo/head.c | 2 - tests-clar/resources/renames/.gitted/HEAD | 1 + tests-clar/resources/renames/.gitted/config | 7 + .../resources/renames/.gitted/description | 1 + tests-clar/resources/renames/.gitted/index | Bin 0 -> 272 bytes .../resources/renames/.gitted/info/exclude | 6 + .../resources/renames/.gitted/logs/HEAD | 2 + .../renames/.gitted/logs/refs/heads/master | 2 + .../03/da7ad872536bd448da8d88eb7165338bf923a7 | Bin 0 -> 90 bytes .../2b/c7f351d20b53f1c72c16c4b036e491c478c49a | Bin 0 -> 173 bytes .../31/e47d8c1fa36d7f8d537b96158e3f024de0a9f2 | Bin 0 -> 131 bytes .../61/8c6f2f8740bd6049b2fb9eb93fc15726462745 | Bin 0 -> 106 bytes .../66/311f5cfbe7836c27510a3ba2f43e282e2c8bba | Bin 0 -> 1155 bytes .../ad/0a8e55a104ac54a8a29ed4b84b49e76837a113 | Bin 0 -> 415 bytes .../renames/.gitted/refs/heads/master | 1 + tests-clar/resources/renames/sevencities.txt | 49 ++++++ tests-clar/resources/renames/sixserving.txt | 24 +++ tests-clar/resources/renames/songofseven.txt | 49 ++++++ 28 files changed, 589 insertions(+), 166 deletions(-) create mode 100644 tests-clar/diff/rename.c create mode 100644 tests-clar/resources/renames/.gitted/HEAD create mode 100644 tests-clar/resources/renames/.gitted/config create mode 100644 tests-clar/resources/renames/.gitted/description create mode 100644 tests-clar/resources/renames/.gitted/index create mode 100644 tests-clar/resources/renames/.gitted/info/exclude create mode 100644 tests-clar/resources/renames/.gitted/logs/HEAD create mode 100644 tests-clar/resources/renames/.gitted/logs/refs/heads/master create mode 100644 tests-clar/resources/renames/.gitted/objects/03/da7ad872536bd448da8d88eb7165338bf923a7 create mode 100644 tests-clar/resources/renames/.gitted/objects/2b/c7f351d20b53f1c72c16c4b036e491c478c49a create mode 100644 tests-clar/resources/renames/.gitted/objects/31/e47d8c1fa36d7f8d537b96158e3f024de0a9f2 create mode 100644 tests-clar/resources/renames/.gitted/objects/61/8c6f2f8740bd6049b2fb9eb93fc15726462745 create mode 100644 tests-clar/resources/renames/.gitted/objects/66/311f5cfbe7836c27510a3ba2f43e282e2c8bba create mode 100644 tests-clar/resources/renames/.gitted/objects/ad/0a8e55a104ac54a8a29ed4b84b49e76837a113 create mode 100644 tests-clar/resources/renames/.gitted/refs/heads/master create mode 100644 tests-clar/resources/renames/sevencities.txt create mode 100644 tests-clar/resources/renames/sixserving.txt create mode 100644 tests-clar/resources/renames/songofseven.txt diff --git a/include/git2/diff.h b/include/git2/diff.h index 1932db029..f9dbb67e0 100644 --- a/include/git2/diff.h +++ b/include/git2/diff.h @@ -33,7 +33,7 @@ GIT_BEGIN_DECL * Flags for diff options. A combination of these flags can be passed * in via the `flags` value in the `git_diff_options`. */ -enum { +typedef enum { /** Normal diff, the default */ GIT_DIFF_NORMAL = 0, /** Reverse the sides of the diff */ @@ -86,7 +86,7 @@ enum { * mode set to tree. Note: the tree SHA will not be available. */ GIT_DIFF_INCLUDE_TYPECHANGE_TREES = (1 << 16), -}; +} git_diff_option_t; /** * Structure describing options about how the diff should be executed. @@ -95,7 +95,7 @@ enum { * values. Similarly, passing NULL for the options structure will * give the defaults. The default values are marked below. * - * - flags: a combination of the GIT_DIFF_... values above + * - flags: a combination of the git_diff_option_t values above * - context_lines: number of lines of context to show around diffs * - interhunk_lines: min lines between diff hunks to merge them * - old_prefix: "directory" to prefix to old file names (default "a") @@ -124,7 +124,7 @@ typedef struct git_diff_list git_diff_list; * Most of the flags are just for internal consumption by libgit2, * but some of them may be interesting to external users. */ -enum { +typedef enum { GIT_DIFF_FILE_VALID_OID = (1 << 0), /** `oid` value is known correct */ GIT_DIFF_FILE_FREE_PATH = (1 << 1), /** `path` is allocated memory */ GIT_DIFF_FILE_BINARY = (1 << 2), /** should be considered binary data */ @@ -132,7 +132,7 @@ enum { GIT_DIFF_FILE_FREE_DATA = (1 << 4), /** internal file data is allocated */ GIT_DIFF_FILE_UNMAP_DATA = (1 << 5), /** internal file data is mmap'ed */ GIT_DIFF_FILE_NO_DATA = (1 << 6), /** file data should not be loaded */ -}; +} git_diff_file_flag_t; /** * What type of change is described by a git_diff_delta? @@ -218,7 +218,7 @@ typedef int (*git_diff_hunk_fn)( * output callbacks to demarcate lines that are actually part of * the file or hunk headers. */ -enum { +typedef enum { /* These values will be sent to `git_diff_data_fn` along with the line */ GIT_DIFF_LINE_CONTEXT = ' ', GIT_DIFF_LINE_ADDITION = '+', @@ -233,7 +233,7 @@ enum { GIT_DIFF_LINE_FILE_HDR = 'F', GIT_DIFF_LINE_HUNK_HDR = 'H', GIT_DIFF_LINE_BINARY = 'B' -}; +} git_diff_line_t; /** * When iterating over a diff, callback that will be made per text diff @@ -259,6 +259,36 @@ typedef int (*git_diff_data_fn)( */ typedef struct git_diff_patch git_diff_patch; +/** + * Flags to control the behavior of diff rename/copy detection. + */ +typedef enum { + /** should we look for renames */ + GIT_DIFF_DETECT_RENAMES = (1 << 0), + /** should we look for copies */ + GIT_DIFF_DETECT_COPIES = (1 << 1), + /** should we consider unmodified files as possible copy sources */ + GIT_DIFF_DETECT_COPIES_FROM_UNMODIFIED = (1 << 2), + /** should we split large rewrites into delete / add pairs */ + GIT_DIFF_DETECT_BREAK_REWRITES = (1 << 3), +} git_diff_detect_t; + +/** + * Control behavior of rename and copy detection + */ +typedef struct { + /** Combination of git_diff_detect_t values */ + unsigned int flags; + /** Threshold on similarity index to consider a file renamed. */ + unsigned int rename_threshold; + /** Threshold on similarity index to consider a file a copy. */ + unsigned int copy_threshold; + /** Threshold on change % to split modify into delete/add pair. */ + unsigned int break_rewrite_threshold; + /** Maximum rename/copy targets to check (diff.renameLimit) */ + unsigned int target_limit; +} git_diff_detect_options; + /** @name Diff List Generator Functions * @@ -374,6 +404,20 @@ GIT_EXTERN(int) git_diff_merge( git_diff_list *onto, const git_diff_list *from); +/** + * Update a diff list with file renames, copies, etc. + * + * This modifies a diff list in place, replacing old entries that look + * like renames or copies with new entries reflecting those changes. + * + * @param diff Diff list to run detection algorithms on + * @param options Control how detection should be run, NULL for defaults + * @return 0 on success, -1 on failure + */ +GIT_EXTERN(int) git_diff_detect( + git_diff_list *diff, + git_diff_detect_options *options); + /**@}*/ diff --git a/src/diff.c b/src/diff.c index 9f693bebf..e2649ff3b 100644 --- a/src/diff.c +++ b/src/diff.c @@ -378,12 +378,23 @@ static git_diff_list *git_diff_list_alloc( diff->diffcaps = diff->diffcaps | GIT_DIFFCAPS_TRUST_CTIME; /* Don't set GIT_DIFFCAPS_USE_DEV - compile time option in core git */ + /* TODO: there are certain config settings where even if we were + * not given an options structure, we need the diff list to have one + * so that we can store the altered default values. + * + * - diff.ignoreSubmodules + * - diff.mnemonicprefix + * - diff.noprefix + */ + if (opts == NULL) return diff; memcpy(&diff->opts, opts, sizeof(git_diff_options)); memset(&diff->opts.pathspec, 0, sizeof(diff->opts.pathspec)); + /* TODO: handle config diff.mnemonicprefix, diff.noprefix */ + diff->opts.old_prefix = diff_strdup_prefix(&diff->pool, opts->old_prefix ? opts->old_prefix : DIFF_OLD_PREFIX_DEFAULT); diff->opts.new_prefix = diff_strdup_prefix(&diff->pool, @@ -1082,3 +1093,139 @@ int git_diff_merge( return error; } + +#define DEFAULT_THRESHOLD 50 +#define DEFAULT_TARGET_LIMIT 200 + +int git_diff_detect( + git_diff_list *diff, + git_diff_detect_options *opts) +{ + int error = 0; + unsigned int i, j; + git_diff_delta *from, *to; + bool check_unmodified = opts && + (opts->flags & GIT_DIFF_DETECT_COPIES_FROM_UNMODIFIED) != 0; + int max_targets = (opts && opts->target_limit > 0) ? + opts->target_limit : DEFAULT_TARGET_LIMIT; + unsigned int rename_threshold = (opts && opts->rename_threshold > 0) ? + opts->rename_threshold : DEFAULT_THRESHOLD; + unsigned int copy_threshold = (opts && opts->copy_threshold > 0) ? + opts->copy_threshold : DEFAULT_THRESHOLD; + int num_deletes = 0, num_splits = 0; + + /* TODO: update opts from config diff.renameLimit / diff.renames */ + + git_vector_foreach(&diff->deltas, i, from) { + int tried_targets = 0; + + git_vector_foreach(&diff->deltas, j, to) { + unsigned int similarity = 0; + + if (i == j) + continue; + + switch (to->status) { + case GIT_DELTA_ADDED: + case GIT_DELTA_UNTRACKED: + case GIT_DELTA_RENAMED: + case GIT_DELTA_COPIED: + break; + default: + /* only those status values should be checked */ + continue; + } + + /* don't check UNMODIFIED files as source unless given option */ + if (from->status == GIT_DELTA_UNMODIFIED && !check_unmodified) + continue; + + /* cap on maximum files we'll examine */ + if (++tried_targets > max_targets) + break; + + /* calculate similarity and see if this pair beats the + * similarity score of the current best pair. + */ + if (git_oid_cmp(&from->old_file.oid, &to->new_file.oid) == 0) + similarity = 100; + /* TODO: insert actual similarity algo here */ + + if (similarity <= to->similarity) + continue; + + if (from->status == GIT_DELTA_DELETED) { + if (similarity < rename_threshold) + continue; + + /* merge "from" & "to" to a RENAMED record */ + to->status = GIT_DELTA_RENAMED; + memcpy(&to->old_file, &from->old_file, sizeof(to->old_file)); + + from->status = GIT_DELTA__TO_DELETE; + num_deletes++; + } else { + if (similarity < copy_threshold) + continue; + + /* convert "to" to a COPIED record */ + to->status = GIT_DELTA_COPIED; + memcpy(&to->old_file, &from->old_file, sizeof(to->old_file)); + } + } + + if (from->status == GIT_DELTA_MODIFIED && + opts && (opts->flags & GIT_DIFF_DETECT_BREAK_REWRITES) != 0) + { + /* TODO: calculate similarity and maybe mark for split */ + + /* from->status = GIT_DELTA__TO_SPLIT; */ + /* num_splits++; */ + } + } + + if (num_deletes > 0 || num_splits > 0) { + git_vector onto = GIT_VECTOR_INIT; + size_t new_size = diff->deltas.length + num_splits - num_deletes; + + if (git_vector_init(&onto, new_size, diff_delta__cmp) < 0) + return -1; + + /* build new delta list without TO_DELETE and splitting TO_SPLIT */ + git_vector_foreach(&diff->deltas, i, from) { + if (from->status == GIT_DELTA__TO_DELETE) { + git__free(from); + continue; + } + + if (from->status == GIT_DELTA__TO_SPLIT) { + git_diff_delta *deleted = diff_delta__dup(from, &diff->pool); + if (!deleted) + return -1; + + deleted->status = GIT_DELTA_DELETED; + memset(&deleted->new_file, 0, sizeof(deleted->new_file)); + deleted->new_file.path = deleted->old_file.path; + deleted->new_file.flags |= GIT_DIFF_FILE_VALID_OID; + + git_vector_insert(&onto, deleted); + + from->status = GIT_DELTA_ADDED; + memset(&from->old_file, 0, sizeof(from->old_file)); + from->old_file.path = from->new_file.path; + from->old_file.flags |= GIT_DIFF_FILE_VALID_OID; + } + + git_vector_insert(&onto, from); + } + + /* swap new delta list into place */ + + git_vector_sort(&onto); + git_vector_swap(&diff->deltas, &onto); + git_vector_free(&onto); + } + + return error; +} + diff --git a/src/diff.h b/src/diff.h index c6a26aee7..61723bc9e 100644 --- a/src/diff.h +++ b/src/diff.h @@ -28,6 +28,9 @@ enum { GIT_DIFFCAPS_USE_DEV = (1 << 4), /* use st_dev? */ }; +#define GIT_DELTA__TO_DELETE 10 +#define GIT_DELTA__TO_SPLIT 11 + struct git_diff_list { git_refcount rc; git_repository *repo; diff --git a/tests-clar/diff/blob.c b/tests-clar/diff/blob.c index d5cf41e99..0a37e829d 100644 --- a/tests-clar/diff/blob.c +++ b/tests-clar/diff/blob.c @@ -59,8 +59,8 @@ void test_diff_blob__can_compare_text_blobs(void) a, b, &opts, &expected, diff_file_fn, diff_hunk_fn, diff_line_fn)); cl_assert_equal_i(1, expected.files); - cl_assert_equal_i(1, expected.file_mods); - cl_assert(expected.at_least_one_of_them_is_binary == false); + cl_assert_equal_i(1, expected.file_status[GIT_DELTA_MODIFIED]); + cl_assert_equal_i(0, expected.files_binary); cl_assert_equal_i(1, expected.hunks); cl_assert_equal_i(6, expected.lines); @@ -74,8 +74,8 @@ void test_diff_blob__can_compare_text_blobs(void) b, c, &opts, &expected, diff_file_fn, diff_hunk_fn, diff_line_fn)); cl_assert_equal_i(1, expected.files); - cl_assert_equal_i(1, expected.file_mods); - cl_assert(expected.at_least_one_of_them_is_binary == false); + cl_assert_equal_i(1, expected.file_status[GIT_DELTA_MODIFIED]); + cl_assert_equal_i(0, expected.files_binary); cl_assert_equal_i(1, expected.hunks); cl_assert_equal_i(15, expected.lines); @@ -89,8 +89,8 @@ void test_diff_blob__can_compare_text_blobs(void) a, c, &opts, &expected, diff_file_fn, diff_hunk_fn, diff_line_fn)); cl_assert_equal_i(1, expected.files); - cl_assert_equal_i(1, expected.file_mods); - cl_assert(expected.at_least_one_of_them_is_binary == false); + cl_assert_equal_i(1, expected.file_status[GIT_DELTA_MODIFIED]); + cl_assert_equal_i(0, expected.files_binary); cl_assert_equal_i(1, expected.hunks); cl_assert_equal_i(13, expected.lines); @@ -103,8 +103,8 @@ void test_diff_blob__can_compare_text_blobs(void) c, d, &opts, &expected, diff_file_fn, diff_hunk_fn, diff_line_fn)); cl_assert_equal_i(1, expected.files); - cl_assert_equal_i(1, expected.file_mods); - cl_assert(expected.at_least_one_of_them_is_binary == false); + cl_assert_equal_i(1, expected.file_status[GIT_DELTA_MODIFIED]); + cl_assert_equal_i(0, expected.files_binary); cl_assert_equal_i(2, expected.hunks); cl_assert_equal_i(14, expected.lines); @@ -125,8 +125,8 @@ void test_diff_blob__can_compare_against_null_blobs(void) d, e, &opts, &expected, diff_file_fn, diff_hunk_fn, diff_line_fn)); cl_assert_equal_i(1, expected.files); - cl_assert_equal_i(1, expected.file_dels); - cl_assert(expected.at_least_one_of_them_is_binary == false); + cl_assert_equal_i(1, expected.file_status[GIT_DELTA_DELETED]); + cl_assert_equal_i(0, expected.files_binary); cl_assert_equal_i(1, expected.hunks); cl_assert_equal_i(14, expected.hunk_old_lines); @@ -140,8 +140,8 @@ void test_diff_blob__can_compare_against_null_blobs(void) d, e, &opts, &expected, diff_file_fn, diff_hunk_fn, diff_line_fn)); cl_assert_equal_i(1, expected.files); - cl_assert_equal_i(1, expected.file_adds); - cl_assert(expected.at_least_one_of_them_is_binary == false); + cl_assert_equal_i(1, expected.file_status[GIT_DELTA_ADDED]); + cl_assert_equal_i(0, expected.files_binary); cl_assert_equal_i(1, expected.hunks); cl_assert_equal_i(14, expected.hunk_new_lines); @@ -154,10 +154,9 @@ void test_diff_blob__can_compare_against_null_blobs(void) cl_git_pass(git_diff_blobs( alien, NULL, &opts, &expected, diff_file_fn, diff_hunk_fn, diff_line_fn)); - cl_assert(expected.at_least_one_of_them_is_binary == true); - cl_assert_equal_i(1, expected.files); - cl_assert_equal_i(1, expected.file_dels); + cl_assert_equal_i(1, expected.files_binary); + cl_assert_equal_i(1, expected.file_status[GIT_DELTA_DELETED]); cl_assert_equal_i(0, expected.hunks); cl_assert_equal_i(0, expected.lines); @@ -166,20 +165,19 @@ void test_diff_blob__can_compare_against_null_blobs(void) cl_git_pass(git_diff_blobs( NULL, alien, &opts, &expected, diff_file_fn, diff_hunk_fn, diff_line_fn)); - cl_assert(expected.at_least_one_of_them_is_binary == true); - cl_assert_equal_i(1, expected.files); - cl_assert_equal_i(1, expected.file_adds); + cl_assert_equal_i(1, expected.files_binary); + cl_assert_equal_i(1, expected.file_status[GIT_DELTA_ADDED]); cl_assert_equal_i(0, expected.hunks); cl_assert_equal_i(0, expected.lines); } -static void assert_identical_blobs_comparison(diff_expects expected) +static void assert_identical_blobs_comparison(diff_expects *expected) { - cl_assert_equal_i(1, expected.files); - cl_assert_equal_i(1, expected.file_unmodified); - cl_assert_equal_i(0, expected.hunks); - cl_assert_equal_i(0, expected.lines); + cl_assert_equal_i(1, expected->files); + cl_assert_equal_i(1, expected->file_status[GIT_DELTA_UNMODIFIED]); + cl_assert_equal_i(0, expected->hunks); + cl_assert_equal_i(0, expected->lines); } void test_diff_blob__can_compare_identical_blobs(void) @@ -187,32 +185,32 @@ void test_diff_blob__can_compare_identical_blobs(void) cl_git_pass(git_diff_blobs( d, d, &opts, &expected, diff_file_fn, diff_hunk_fn, diff_line_fn)); - cl_assert(expected.at_least_one_of_them_is_binary == false); - assert_identical_blobs_comparison(expected); + cl_assert_equal_i(0, expected.files_binary); + assert_identical_blobs_comparison(&expected); memset(&expected, 0, sizeof(expected)); cl_git_pass(git_diff_blobs( NULL, NULL, &opts, &expected, diff_file_fn, diff_hunk_fn, diff_line_fn)); - cl_assert(expected.at_least_one_of_them_is_binary == false); - assert_identical_blobs_comparison(expected); + cl_assert_equal_i(0, expected.files_binary); + assert_identical_blobs_comparison(&expected); memset(&expected, 0, sizeof(expected)); cl_git_pass(git_diff_blobs( alien, alien, &opts, &expected, diff_file_fn, diff_hunk_fn, diff_line_fn)); - cl_assert(expected.at_least_one_of_them_is_binary == true); - assert_identical_blobs_comparison(expected); + cl_assert(expected.files_binary > 0); + assert_identical_blobs_comparison(&expected); } -static void assert_binary_blobs_comparison(diff_expects expected) +static void assert_binary_blobs_comparison(diff_expects *expected) { - cl_assert(expected.at_least_one_of_them_is_binary == true); + cl_assert(expected->files_binary > 0); - cl_assert_equal_i(1, expected.files); - cl_assert_equal_i(1, expected.file_mods); - cl_assert_equal_i(0, expected.hunks); - cl_assert_equal_i(0, expected.lines); + cl_assert_equal_i(1, expected->files); + cl_assert_equal_i(1, expected->file_status[GIT_DELTA_MODIFIED]); + cl_assert_equal_i(0, expected->hunks); + cl_assert_equal_i(0, expected->lines); } void test_diff_blob__can_compare_two_binary_blobs(void) @@ -227,14 +225,14 @@ void test_diff_blob__can_compare_two_binary_blobs(void) cl_git_pass(git_diff_blobs( alien, heart, &opts, &expected, diff_file_fn, diff_hunk_fn, diff_line_fn)); - assert_binary_blobs_comparison(expected); + assert_binary_blobs_comparison(&expected); memset(&expected, 0, sizeof(expected)); cl_git_pass(git_diff_blobs( heart, alien, &opts, &expected, diff_file_fn, diff_hunk_fn, diff_line_fn)); - assert_binary_blobs_comparison(expected); + assert_binary_blobs_comparison(&expected); git_blob_free(heart); } @@ -244,14 +242,14 @@ void test_diff_blob__can_compare_a_binary_blob_and_a_text_blob(void) cl_git_pass(git_diff_blobs( alien, d, &opts, &expected, diff_file_fn, diff_hunk_fn, diff_line_fn)); - assert_binary_blobs_comparison(expected); + assert_binary_blobs_comparison(&expected); memset(&expected, 0, sizeof(expected)); cl_git_pass(git_diff_blobs( d, alien, &opts, &expected, diff_file_fn, diff_hunk_fn, diff_line_fn)); - assert_binary_blobs_comparison(expected); + assert_binary_blobs_comparison(&expected); } /* diff --git a/tests-clar/diff/diff_helpers.c b/tests-clar/diff/diff_helpers.c index de0e7e074..992c87d4c 100644 --- a/tests-clar/diff/diff_helpers.c +++ b/tests-clar/diff/diff_helpers.c @@ -32,20 +32,13 @@ int diff_file_fn( e->files++; - if (delta->binary) { - e->at_least_one_of_them_is_binary = true; + if (delta->binary) e->files_binary++; - } - switch (delta->status) { - case GIT_DELTA_ADDED: e->file_adds++; break; - case GIT_DELTA_DELETED: e->file_dels++; break; - case GIT_DELTA_MODIFIED: e->file_mods++; break; - case GIT_DELTA_IGNORED: e->file_ignored++; break; - case GIT_DELTA_UNTRACKED: e->file_untracked++; break; - case GIT_DELTA_UNMODIFIED: e->file_unmodified++; break; - default: break; - } + cl_assert(delta->status <= GIT_DELTA_TYPECHANGE); + + e->file_status[delta->status] += 1; + return 0; } diff --git a/tests-clar/diff/diff_helpers.h b/tests-clar/diff/diff_helpers.h index 629130934..6ff493d49 100644 --- a/tests-clar/diff/diff_helpers.h +++ b/tests-clar/diff/diff_helpers.h @@ -8,12 +8,7 @@ typedef struct { int files; int files_binary; - int file_adds; - int file_dels; - int file_mods; - int file_ignored; - int file_untracked; - int file_unmodified; + int file_status[10]; /* indexed by git_delta_t value */ int hunks; int hunk_new_lines; @@ -23,8 +18,6 @@ typedef struct { int line_ctxt; int line_adds; int line_dels; - - bool at_least_one_of_them_is_binary; } diff_expects; extern int diff_file_fn( diff --git a/tests-clar/diff/index.c b/tests-clar/diff/index.c index 7c4bddb90..eda8f066a 100644 --- a/tests-clar/diff/index.c +++ b/tests-clar/diff/index.c @@ -45,9 +45,9 @@ void test_diff_index__0(void) * - mv .git .gitted */ cl_assert_equal_i(8, exp.files); - cl_assert_equal_i(3, exp.file_adds); - cl_assert_equal_i(2, exp.file_dels); - cl_assert_equal_i(3, exp.file_mods); + cl_assert_equal_i(3, exp.file_status[GIT_DELTA_ADDED]); + cl_assert_equal_i(2, exp.file_status[GIT_DELTA_DELETED]); + cl_assert_equal_i(3, exp.file_status[GIT_DELTA_MODIFIED]); cl_assert_equal_i(8, exp.hunks); @@ -73,9 +73,9 @@ void test_diff_index__0(void) * - mv .git .gitted */ cl_assert_equal_i(12, exp.files); - cl_assert_equal_i(7, exp.file_adds); - cl_assert_equal_i(2, exp.file_dels); - cl_assert_equal_i(3, exp.file_mods); + cl_assert_equal_i(7, exp.file_status[GIT_DELTA_ADDED]); + cl_assert_equal_i(2, exp.file_status[GIT_DELTA_DELETED]); + cl_assert_equal_i(3, exp.file_status[GIT_DELTA_MODIFIED]); cl_assert_equal_i(12, exp.hunks); diff --git a/tests-clar/diff/rename.c b/tests-clar/diff/rename.c new file mode 100644 index 000000000..8a50fd5ea --- /dev/null +++ b/tests-clar/diff/rename.c @@ -0,0 +1,105 @@ +#include "clar_libgit2.h" +#include "diff_helpers.h" + +static git_repository *g_repo = NULL; + +void test_diff_rename__initialize(void) +{ + g_repo = cl_git_sandbox_init("renames"); +} + +void test_diff_rename__cleanup(void) +{ + cl_git_sandbox_cleanup(); +} + +/* + * Renames repo has: + * + * commit 31e47d8c1fa36d7f8d537b96158e3f024de0a9f2 - + * serving.txt (25 lines) + * sevencities.txt (50 lines) + * commit 2bc7f351d20b53f1c72c16c4b036e491c478c49a - + * serving.txt -> sixserving.txt (rename, no change, 100% match) + * sevencities.txt -> sevencities.txt (no change) + * sevencities.txt -> songofseven.txt (copy, no change, 100% match) + * + * TODO: add commits with various % changes of copy / rename + */ + +void test_diff_rename__match_oid(void) +{ + const char *old_sha = "31e47d8c1fa36d7f8d537b96158e3f024de0a9f2"; + const char *new_sha = "2bc7f351d20b53f1c72c16c4b036e491c478c49a"; + git_tree *old_tree, *new_tree; + git_diff_list *diff; + git_diff_options diffopts = {0}; + git_diff_detect_options opts; + diff_expects exp; + + old_tree = resolve_commit_oid_to_tree(g_repo, old_sha); + new_tree = resolve_commit_oid_to_tree(g_repo, new_sha); + + /* Must pass GIT_DIFF_INCLUDE_UNMODIFIED if you expect to emulate + * --find-copies-harder during rename detection... + */ + memset(&diffopts, 0, sizeof(diffopts)); + diffopts.flags |= GIT_DIFF_INCLUDE_UNMODIFIED; + + cl_git_pass(git_diff_tree_to_tree( + g_repo, &diffopts, old_tree, new_tree, &diff)); + + /* git diff --no-renames \ + * 31e47d8c1fa36d7f8d537b96158e3f024de0a9f2 \ + * 2bc7f351d20b53f1c72c16c4b036e491c478c49a + */ + memset(&exp, 0, sizeof(exp)); + cl_git_pass(git_diff_foreach( + diff, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn)); + + cl_assert_equal_i(4, exp.files); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_UNMODIFIED]); + cl_assert_equal_i(2, exp.file_status[GIT_DELTA_ADDED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]); + + /* git diff 31e47d8c1fa36d7f8d537b96158e3f024de0a9f2 \ + * 2bc7f351d20b53f1c72c16c4b036e491c478c49a + */ + cl_git_pass(git_diff_detect(diff, NULL)); + + memset(&exp, 0, sizeof(exp)); + cl_git_pass(git_diff_foreach( + diff, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn)); + + cl_assert_equal_i(3, exp.files); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_UNMODIFIED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_ADDED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_RENAMED]); + + git_diff_list_free(diff); + + cl_git_pass(git_diff_tree_to_tree( + g_repo, &diffopts, old_tree, new_tree, &diff)); + + /* git diff --find-copies-harder \ + * 31e47d8c1fa36d7f8d537b96158e3f024de0a9f2 \ + * 2bc7f351d20b53f1c72c16c4b036e491c478c49a + */ + memset(&opts, 0, sizeof(opts)); + opts.flags = GIT_DIFF_DETECT_COPIES_FROM_UNMODIFIED; + cl_git_pass(git_diff_detect(diff, &opts)); + + memset(&exp, 0, sizeof(exp)); + cl_git_pass(git_diff_foreach( + diff, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn)); + + cl_assert_equal_i(3, exp.files); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_UNMODIFIED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_COPIED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_RENAMED]); + + git_diff_list_free(diff); + + git_tree_free(old_tree); + git_tree_free(new_tree); +} diff --git a/tests-clar/diff/tree.c b/tests-clar/diff/tree.c index c5a0e626e..f8b9a71a5 100644 --- a/tests-clar/diff/tree.c +++ b/tests-clar/diff/tree.c @@ -40,9 +40,9 @@ void test_diff_tree__0(void) diff, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn)); cl_assert_equal_i(5, exp.files); - cl_assert_equal_i(2, exp.file_adds); - cl_assert_equal_i(1, exp.file_dels); - cl_assert_equal_i(2, exp.file_mods); + cl_assert_equal_i(2, exp.file_status[GIT_DELTA_ADDED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]); + cl_assert_equal_i(2, exp.file_status[GIT_DELTA_MODIFIED]); cl_assert_equal_i(5, exp.hunks); @@ -62,9 +62,9 @@ void test_diff_tree__0(void) diff, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn)); cl_assert_equal_i(2, exp.files); - cl_assert_equal_i(0, exp.file_adds); - cl_assert_equal_i(0, exp.file_dels); - cl_assert_equal_i(2, exp.file_mods); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_ADDED]); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_DELETED]); + cl_assert_equal_i(2, exp.file_status[GIT_DELTA_MODIFIED]); cl_assert_equal_i(2, exp.hunks); @@ -111,22 +111,23 @@ void test_diff_tree__options(void) * - git diff [options] 6bab5c79cd5140d0 605812ab7fe421fdd * - mv .git .gitted */ +#define EXPECT_STATUS_ADM(ADDS,DELS,MODS) { 0, ADDS, DELS, MODS, 0, 0, 0, 0, 0 } diff_expects test_expects[] = { /* a vs b tests */ - { 5, 0, 3, 0, 2, 0, 0, 0, 4, 0, 0, 51, 2, 46, 3 }, - { 5, 0, 3, 0, 2, 0, 0, 0, 4, 0, 0, 53, 4, 46, 3 }, - { 5, 0, 0, 3, 2, 0, 0, 0, 4, 0, 0, 52, 3, 3, 46 }, - { 5, 0, 3, 0, 2, 0, 0, 0, 5, 0, 0, 54, 3, 47, 4 }, + { 5, 0, EXPECT_STATUS_ADM(3, 0, 2), 4, 0, 0, 51, 2, 46, 3 }, + { 5, 0, EXPECT_STATUS_ADM(3, 0, 2), 4, 0, 0, 53, 4, 46, 3 }, + { 5, 0, EXPECT_STATUS_ADM(0, 3, 2), 4, 0, 0, 52, 3, 3, 46 }, + { 5, 0, EXPECT_STATUS_ADM(3, 0, 2), 5, 0, 0, 54, 3, 47, 4 }, /* c vs d tests */ - { 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 22, 9, 10, 3 }, - { 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 19, 12, 7, 0 }, - { 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 20, 11, 8, 1 }, - { 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 20, 11, 8, 1 }, - { 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 18, 11, 0, 7 }, + { 1, 0, EXPECT_STATUS_ADM(0, 0, 1), 1, 0, 0, 22, 9, 10, 3 }, + { 1, 0, EXPECT_STATUS_ADM(0, 0, 1), 1, 0, 0, 19, 12, 7, 0 }, + { 1, 0, EXPECT_STATUS_ADM(0, 0, 1), 1, 0, 0, 20, 11, 8, 1 }, + { 1, 0, EXPECT_STATUS_ADM(0, 0, 1), 1, 0, 0, 20, 11, 8, 1 }, + { 1, 0, EXPECT_STATUS_ADM(0, 0, 1), 1, 0, 0, 18, 11, 0, 7 }, { 0 }, }; diff_expects *expected; - int i; + int i, j; g_repo = cl_git_sandbox_init("attr"); @@ -149,9 +150,8 @@ void test_diff_tree__options(void) expected = &test_expects[i]; cl_assert_equal_i(actual.files, expected->files); - cl_assert_equal_i(actual.file_adds, expected->file_adds); - cl_assert_equal_i(actual.file_dels, expected->file_dels); - cl_assert_equal_i(actual.file_mods, expected->file_mods); + for (j = GIT_DELTA_UNMODIFIED; j <= GIT_DELTA_TYPECHANGE; ++j) + cl_assert_equal_i(expected->file_status[j], actual.file_status[j]); cl_assert_equal_i(actual.hunks, expected->hunks); cl_assert_equal_i(actual.lines, expected->lines); cl_assert_equal_i(actual.line_ctxt, expected->line_ctxt); @@ -193,9 +193,9 @@ void test_diff_tree__bare(void) diff, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn)); cl_assert_equal_i(3, exp.files); - cl_assert_equal_i(2, exp.file_adds); - cl_assert_equal_i(0, exp.file_dels); - cl_assert_equal_i(1, exp.file_mods); + cl_assert_equal_i(2, exp.file_status[GIT_DELTA_ADDED]); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_DELETED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]); cl_assert_equal_i(3, exp.hunks); @@ -243,9 +243,9 @@ void test_diff_tree__merge(void) diff1, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn)); cl_assert_equal_i(6, exp.files); - cl_assert_equal_i(2, exp.file_adds); - cl_assert_equal_i(1, exp.file_dels); - cl_assert_equal_i(3, exp.file_mods); + cl_assert_equal_i(2, exp.file_status[GIT_DELTA_ADDED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]); + cl_assert_equal_i(3, exp.file_status[GIT_DELTA_MODIFIED]); cl_assert_equal_i(6, exp.hunks); diff --git a/tests-clar/diff/workdir.c b/tests-clar/diff/workdir.c index 3e388ea70..e617560f7 100644 --- a/tests-clar/diff/workdir.c +++ b/tests-clar/diff/workdir.c @@ -46,11 +46,11 @@ void test_diff_workdir__to_index(void) * - mv .git .gitted */ cl_assert_equal_i(13, exp.files); - cl_assert_equal_i(0, exp.file_adds); - cl_assert_equal_i(4, exp.file_dels); - cl_assert_equal_i(4, exp.file_mods); - cl_assert_equal_i(1, exp.file_ignored); - cl_assert_equal_i(4, exp.file_untracked); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_ADDED]); + cl_assert_equal_i(4, exp.file_status[GIT_DELTA_DELETED]); + cl_assert_equal_i(4, exp.file_status[GIT_DELTA_MODIFIED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_IGNORED]); + cl_assert_equal_i(4, exp.file_status[GIT_DELTA_UNTRACKED]); cl_assert_equal_i(8, exp.hunks); @@ -107,11 +107,11 @@ void test_diff_workdir__to_tree(void) diff, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn)); cl_assert_equal_i(14, exp.files); - cl_assert_equal_i(0, exp.file_adds); - cl_assert_equal_i(4, exp.file_dels); - cl_assert_equal_i(4, exp.file_mods); - cl_assert_equal_i(1, exp.file_ignored); - cl_assert_equal_i(5, exp.file_untracked); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_ADDED]); + cl_assert_equal_i(4, exp.file_status[GIT_DELTA_DELETED]); + cl_assert_equal_i(4, exp.file_status[GIT_DELTA_MODIFIED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_IGNORED]); + cl_assert_equal_i(5, exp.file_status[GIT_DELTA_UNTRACKED]); } /* Since there is no git diff equivalent, let's just assume that the @@ -143,11 +143,11 @@ void test_diff_workdir__to_tree(void) diff, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn)); cl_assert_equal_i(15, exp.files); - cl_assert_equal_i(2, exp.file_adds); - cl_assert_equal_i(5, exp.file_dels); - cl_assert_equal_i(4, exp.file_mods); - cl_assert_equal_i(1, exp.file_ignored); - cl_assert_equal_i(3, exp.file_untracked); + cl_assert_equal_i(2, exp.file_status[GIT_DELTA_ADDED]); + cl_assert_equal_i(5, exp.file_status[GIT_DELTA_DELETED]); + cl_assert_equal_i(4, exp.file_status[GIT_DELTA_MODIFIED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_IGNORED]); + cl_assert_equal_i(3, exp.file_status[GIT_DELTA_UNTRACKED]); cl_assert_equal_i(11, exp.hunks); @@ -180,11 +180,11 @@ void test_diff_workdir__to_tree(void) diff, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn)); cl_assert_equal_i(16, exp.files); - cl_assert_equal_i(5, exp.file_adds); - cl_assert_equal_i(4, exp.file_dels); - cl_assert_equal_i(3, exp.file_mods); - cl_assert_equal_i(1, exp.file_ignored); - cl_assert_equal_i(3, exp.file_untracked); + cl_assert_equal_i(5, exp.file_status[GIT_DELTA_ADDED]); + cl_assert_equal_i(4, exp.file_status[GIT_DELTA_DELETED]); + cl_assert_equal_i(3, exp.file_status[GIT_DELTA_MODIFIED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_IGNORED]); + cl_assert_equal_i(3, exp.file_status[GIT_DELTA_UNTRACKED]); cl_assert_equal_i(12, exp.hunks); @@ -228,11 +228,11 @@ void test_diff_workdir__to_index_with_pathspec(void) cl_git_pass(git_diff_foreach(diff, &exp, diff_file_fn, NULL, NULL)); cl_assert_equal_i(13, exp.files); - cl_assert_equal_i(0, exp.file_adds); - cl_assert_equal_i(4, exp.file_dels); - cl_assert_equal_i(4, exp.file_mods); - cl_assert_equal_i(1, exp.file_ignored); - cl_assert_equal_i(4, exp.file_untracked); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_ADDED]); + cl_assert_equal_i(4, exp.file_status[GIT_DELTA_DELETED]); + cl_assert_equal_i(4, exp.file_status[GIT_DELTA_MODIFIED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_IGNORED]); + cl_assert_equal_i(4, exp.file_status[GIT_DELTA_UNTRACKED]); } git_diff_list_free(diff); @@ -251,11 +251,11 @@ void test_diff_workdir__to_index_with_pathspec(void) cl_git_pass(git_diff_foreach(diff, &exp, diff_file_fn, NULL, NULL)); cl_assert_equal_i(1, exp.files); - cl_assert_equal_i(0, exp.file_adds); - cl_assert_equal_i(0, exp.file_dels); - cl_assert_equal_i(1, exp.file_mods); - cl_assert_equal_i(0, exp.file_ignored); - cl_assert_equal_i(0, exp.file_untracked); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_ADDED]); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_DELETED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_IGNORED]); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_UNTRACKED]); } git_diff_list_free(diff); @@ -274,11 +274,11 @@ void test_diff_workdir__to_index_with_pathspec(void) cl_git_pass(git_diff_foreach(diff, &exp, diff_file_fn, NULL, NULL)); cl_assert_equal_i(3, exp.files); - cl_assert_equal_i(0, exp.file_adds); - cl_assert_equal_i(1, exp.file_dels); - cl_assert_equal_i(1, exp.file_mods); - cl_assert_equal_i(0, exp.file_ignored); - cl_assert_equal_i(1, exp.file_untracked); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_ADDED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_IGNORED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_UNTRACKED]); } git_diff_list_free(diff); @@ -297,11 +297,11 @@ void test_diff_workdir__to_index_with_pathspec(void) cl_git_pass(git_diff_foreach(diff, &exp, diff_file_fn, NULL, NULL)); cl_assert_equal_i(2, exp.files); - cl_assert_equal_i(0, exp.file_adds); - cl_assert_equal_i(2, exp.file_dels); - cl_assert_equal_i(0, exp.file_mods); - cl_assert_equal_i(0, exp.file_ignored); - cl_assert_equal_i(0, exp.file_untracked); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_ADDED]); + cl_assert_equal_i(2, exp.file_status[GIT_DELTA_DELETED]); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_MODIFIED]); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_IGNORED]); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_UNTRACKED]); } git_diff_list_free(diff); @@ -337,7 +337,7 @@ void test_diff_workdir__filemode_changes(void) diff, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn)); cl_assert_equal_i(0, exp.files); - cl_assert_equal_i(0, exp.file_mods); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_MODIFIED]); cl_assert_equal_i(0, exp.hunks); } @@ -360,7 +360,7 @@ void test_diff_workdir__filemode_changes(void) diff, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn)); cl_assert_equal_i(1, exp.files); - cl_assert_equal_i(1, exp.file_mods); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]); cl_assert_equal_i(0, exp.hunks); } @@ -393,7 +393,7 @@ void test_diff_workdir__filemode_changes_with_filemode_false(void) diff, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn)); cl_assert_equal_i(0, exp.files); - cl_assert_equal_i(0, exp.file_mods); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_MODIFIED]); cl_assert_equal_i(0, exp.hunks); git_diff_list_free(diff); @@ -409,7 +409,7 @@ void test_diff_workdir__filemode_changes_with_filemode_false(void) diff, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn)); cl_assert_equal_i(0, exp.files); - cl_assert_equal_i(0, exp.file_mods); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_MODIFIED]); cl_assert_equal_i(0, exp.hunks); git_diff_list_free(diff); @@ -456,9 +456,9 @@ void test_diff_workdir__head_index_and_workdir_all_differ(void) diff_i2t, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn)); cl_assert_equal_i(1, exp.files); - cl_assert_equal_i(0, exp.file_adds); - cl_assert_equal_i(0, exp.file_dels); - cl_assert_equal_i(1, exp.file_mods); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_ADDED]); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_DELETED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]); cl_assert_equal_i(1, exp.hunks); cl_assert_equal_i(2, exp.lines); cl_assert_equal_i(1, exp.line_ctxt); @@ -477,9 +477,9 @@ void test_diff_workdir__head_index_and_workdir_all_differ(void) diff_w2i, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn)); cl_assert_equal_i(1, exp.files); - cl_assert_equal_i(0, exp.file_adds); - cl_assert_equal_i(0, exp.file_dels); - cl_assert_equal_i(1, exp.file_mods); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_ADDED]); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_DELETED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]); cl_assert_equal_i(1, exp.hunks); cl_assert_equal_i(3, exp.lines); cl_assert_equal_i(2, exp.line_ctxt); @@ -500,9 +500,9 @@ void test_diff_workdir__head_index_and_workdir_all_differ(void) diff_i2t, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn)); cl_assert_equal_i(1, exp.files); - cl_assert_equal_i(0, exp.file_adds); - cl_assert_equal_i(0, exp.file_dels); - cl_assert_equal_i(1, exp.file_mods); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_ADDED]); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_DELETED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]); cl_assert_equal_i(1, exp.hunks); cl_assert_equal_i(3, exp.lines); cl_assert_equal_i(1, exp.line_ctxt); @@ -542,9 +542,9 @@ void test_diff_workdir__eof_newline_changes(void) diff, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn)); cl_assert_equal_i(0, exp.files); - cl_assert_equal_i(0, exp.file_adds); - cl_assert_equal_i(0, exp.file_dels); - cl_assert_equal_i(0, exp.file_mods); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_ADDED]); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_DELETED]); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_MODIFIED]); cl_assert_equal_i(0, exp.hunks); cl_assert_equal_i(0, exp.lines); cl_assert_equal_i(0, exp.line_ctxt); @@ -569,9 +569,9 @@ void test_diff_workdir__eof_newline_changes(void) diff, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn)); cl_assert_equal_i(1, exp.files); - cl_assert_equal_i(0, exp.file_adds); - cl_assert_equal_i(0, exp.file_dels); - cl_assert_equal_i(1, exp.file_mods); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_ADDED]); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_DELETED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]); cl_assert_equal_i(1, exp.hunks); cl_assert_equal_i(2, exp.lines); cl_assert_equal_i(1, exp.line_ctxt); @@ -596,9 +596,9 @@ void test_diff_workdir__eof_newline_changes(void) diff, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn)); cl_assert_equal_i(1, exp.files); - cl_assert_equal_i(0, exp.file_adds); - cl_assert_equal_i(0, exp.file_dels); - cl_assert_equal_i(1, exp.file_mods); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_ADDED]); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_DELETED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]); cl_assert_equal_i(1, exp.hunks); cl_assert_equal_i(3, exp.lines); cl_assert_equal_i(0, exp.line_ctxt); @@ -801,11 +801,11 @@ void test_diff_workdir__submodules(void) cl_assert_equal_i(10, exp.files); - cl_assert_equal_i(0, exp.file_adds); - cl_assert_equal_i(0, exp.file_dels); - cl_assert_equal_i(1, exp.file_mods); - cl_assert_equal_i(0, exp.file_ignored); - cl_assert_equal_i(9, exp.file_untracked); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_ADDED]); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_DELETED]); + cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]); + cl_assert_equal_i(0, exp.file_status[GIT_DELTA_IGNORED]); + cl_assert_equal_i(9, exp.file_status[GIT_DELTA_UNTRACKED]); /* the following numbers match "git diff 873585" exactly */ diff --git a/tests-clar/repo/head.c b/tests-clar/repo/head.c index 22947168e..58f525e2b 100644 --- a/tests-clar/repo/head.c +++ b/tests-clar/repo/head.c @@ -180,8 +180,6 @@ void test_repo_head__retrieving_an_orphaned_head_returns_GIT_EORPHANEDHEAD(void) void test_repo_head__can_tell_if_an_orphaned_head_is_detached(void) { - git_reference *head; - make_head_orphaned(repo, NON_EXISTING_HEAD); cl_assert_equal_i(false, git_repository_head_detached(repo)); diff --git a/tests-clar/resources/renames/.gitted/HEAD b/tests-clar/resources/renames/.gitted/HEAD new file mode 100644 index 000000000..cb089cd89 --- /dev/null +++ b/tests-clar/resources/renames/.gitted/HEAD @@ -0,0 +1 @@ +ref: refs/heads/master diff --git a/tests-clar/resources/renames/.gitted/config b/tests-clar/resources/renames/.gitted/config new file mode 100644 index 000000000..bb4d11c1f --- /dev/null +++ b/tests-clar/resources/renames/.gitted/config @@ -0,0 +1,7 @@ +[core] + repositoryformatversion = 0 + filemode = true + bare = false + logallrefupdates = true + ignorecase = true + precomposeunicode = false diff --git a/tests-clar/resources/renames/.gitted/description b/tests-clar/resources/renames/.gitted/description new file mode 100644 index 000000000..498b267a8 --- /dev/null +++ b/tests-clar/resources/renames/.gitted/description @@ -0,0 +1 @@ +Unnamed repository; edit this file 'description' to name the repository. diff --git a/tests-clar/resources/renames/.gitted/index b/tests-clar/resources/renames/.gitted/index new file mode 100644 index 0000000000000000000000000000000000000000..1fc69fcbefd9ccf13194b86b1493359f0f5962e6 GIT binary patch literal 272 zcmZ?q402{*U|<4b=76?;n}9R}jOJrtU|}rq6J}s&T*AP>_!TH6!oa|>D$P(n=J)gF z9Q8mh>qTGeH1u@3cQNo6rB>Rurcem1X9oLycnyXlL*Tn+K(#?gLsyl>74Y((}{6 du7nzB78M<(+dawb5?_5t|BE2K6H6U-004~dPl*5k literal 0 HcmV?d00001 diff --git a/tests-clar/resources/renames/.gitted/info/exclude b/tests-clar/resources/renames/.gitted/info/exclude new file mode 100644 index 000000000..a5196d1be --- /dev/null +++ b/tests-clar/resources/renames/.gitted/info/exclude @@ -0,0 +1,6 @@ +# git ls-files --others --exclude-from=.git/info/exclude +# Lines that start with '#' are comments. +# For a project mostly in C, the following would be a good set of +# exclude patterns (uncomment them if you want to use them): +# *.[oa] +# *~ diff --git a/tests-clar/resources/renames/.gitted/logs/HEAD b/tests-clar/resources/renames/.gitted/logs/HEAD new file mode 100644 index 000000000..34222ed7d --- /dev/null +++ b/tests-clar/resources/renames/.gitted/logs/HEAD @@ -0,0 +1,2 @@ +0000000000000000000000000000000000000000 31e47d8c1fa36d7f8d537b96158e3f024de0a9f2 Russell Belfer 1351024687 -0700 commit (initial): Initial commit +31e47d8c1fa36d7f8d537b96158e3f024de0a9f2 2bc7f351d20b53f1c72c16c4b036e491c478c49a Russell Belfer 1351024817 -0700 commit: copy and rename with no change diff --git a/tests-clar/resources/renames/.gitted/logs/refs/heads/master b/tests-clar/resources/renames/.gitted/logs/refs/heads/master new file mode 100644 index 000000000..34222ed7d --- /dev/null +++ b/tests-clar/resources/renames/.gitted/logs/refs/heads/master @@ -0,0 +1,2 @@ +0000000000000000000000000000000000000000 31e47d8c1fa36d7f8d537b96158e3f024de0a9f2 Russell Belfer 1351024687 -0700 commit (initial): Initial commit +31e47d8c1fa36d7f8d537b96158e3f024de0a9f2 2bc7f351d20b53f1c72c16c4b036e491c478c49a Russell Belfer 1351024817 -0700 commit: copy and rename with no change diff --git a/tests-clar/resources/renames/.gitted/objects/03/da7ad872536bd448da8d88eb7165338bf923a7 b/tests-clar/resources/renames/.gitted/objects/03/da7ad872536bd448da8d88eb7165338bf923a7 new file mode 100644 index 0000000000000000000000000000000000000000..2ee86444d7e36c2f48ae2a5e8a2f2aed2084d7b1 GIT binary patch literal 90 zcmV-g0HyzU0V^p=O;xZkVlXr?Ff%bxC{8UZ%gjsHE2$`9Sj*KHx{zf}$cjbtuI%vk we4b&xP#CVPEHy7Vvm`UM7_2nSP(J4O^X44&KrZV=U+gsWbh>u|0Hs(VQKqseFaQ7m literal 0 HcmV?d00001 diff --git a/tests-clar/resources/renames/.gitted/objects/2b/c7f351d20b53f1c72c16c4b036e491c478c49a b/tests-clar/resources/renames/.gitted/objects/2b/c7f351d20b53f1c72c16c4b036e491c478c49a new file mode 100644 index 0000000000000000000000000000000000000000..93f1ccb3fd4755e9a1eae764590ffd5f8143559d GIT binary patch literal 173 zcmV;e08;;W0hNx~3BxcD1@rAHwt)oQ2ZRt(LORf~Ya49Y$TA__M=j*{@rIeSt?S%C zNPlS+5e#?9kRWFji`g*cqoC+9dP#|ED~vM2Dn0C>Vrd{bDw|#66r{S~WVTF5*&v9um6%A b-MZp}YFsc*wy lmRjy0lo4ENO9*F7vS2VYFfcPQQ7BF=OU+BpEXhnQ)+?zfVMsHSkNN$)IY&K^ z%X-llI}JUZ?p-jYnH9yUMP-?J>0p&>x%xsEvaAVNv1s0v9p0YLGt3tX!&K(yrRS%C MjmBvx01B=$oADqoEC2ui literal 0 HcmV?d00001 diff --git a/tests-clar/resources/renames/.gitted/objects/66/311f5cfbe7836c27510a3ba2f43e282e2c8bba b/tests-clar/resources/renames/.gitted/objects/66/311f5cfbe7836c27510a3ba2f43e282e2c8bba new file mode 100644 index 0000000000000000000000000000000000000000..5ee28a76ae3cbdab2977e29f0431dcb3571715d2 GIT binary patch literal 1155 zcmV-}1bq8=0d-YPZ{tP`?ODHqz1so0rvgO}Zi4`BH;Y9aAn0)f5M za=Oq*39TBf*Xhb(p_s=hWy#SkRA=j2ZttqqYY-DG!fe+kV_O=TQm}PSOkI$XOlQJf zb-oH*rGafA$!U_9BF!B5pTGXbDfQWeHi(Q-!{cP6zJxkdC*M+ULa|R z`c>2N66h50{>p#gK7LFrljBGxZMh_|?f@%PiUtXhi%~P6E|SOwjRP-&_w^UScB4o1 zOyfLd2N%C*j>)ch0&Ig?4!lqsc-{iW2@#RyPsXDW2O5oah=C_Ujifh7x70mmkCHKE zp9DShk<$;=1!-w!^8l;WcQlQHwCS)uqcxH7*Xkp=;yB?u^n6$S#;BfFam4>D83hhV z05`|YJaDuzDYAlmG%XW4#i{ghvK*StbsXgt?>eeYNSI_yQqhIi)MqIj}Affyd(rSn4$3ab3QfWZ z6g{hj;!$Z8nLMxAI{Ap;;m0Tc?u3Xkk;1LkY` zMimr+9an2MfLFQ;moNqoLRNzAL1{uFY1l5INad0ax}=aF7YZuPw}v5H3T$FVW49jS zD0|4;(fIAofszj6ia=0Rt%F9dy(fPktM? z4kNM*!pWHhYCmI2$9MY(fpM&iEPDv4U@YDwUO06i44a}GX5*-W}Gbl#$*qB~A literal 0 HcmV?d00001 diff --git a/tests-clar/resources/renames/.gitted/objects/ad/0a8e55a104ac54a8a29ed4b84b49e76837a113 b/tests-clar/resources/renames/.gitted/objects/ad/0a8e55a104ac54a8a29ed4b84b49e76837a113 new file mode 100644 index 0000000000000000000000000000000000000000..440b7bec3f8521484fd7ca4e8b7e64e9898bbf07 GIT binary patch literal 415 zcmV;Q0bu@k0bP>8j?*v@hB^Bw{yhOwT8RrBdMeVQt@Z!{q@JD4B(~z%mF*B z8ZQy_F--8*v(~b%r;74V>~n&i(9Ve_81aodCeAOd651$w>`B$Y&Hl+Wl`dECBCTA- z7c=`IZbb6?irv}1mnfcdgB!2tx7cE3O$Q>5zL=}R;P7U%J{zcAgD{OYG=Uo1S<@=FHgBPFO4qxYP)3L*&y*R#v J>JME$ Date: Tue, 30 Oct 2012 09:40:50 -0700 Subject: [PATCH 2/2] Move rename detection into new file This improves the naming for the rename related functionality moving it to be called `git_diff_find_similar()` and renaming all the associated constants, etc. to make more sense. I also moved the new code (plus the existing `git_diff_merge`) into a new file `diff_tform.c` where I can put new functions related to manipulating git diff lists. This also updates the implementation significantly from the last revision fixing some ordering issues (where break-rewrite needs to be handled prior to copy and rename detection) and improving config option handling. --- include/git2/diff.h | 48 ++-- src/diff.c | 342 ++-------------------------- src/diff.h | 2 + src/diff_tform.c | 466 +++++++++++++++++++++++++++++++++++++++ src/vector.c | 30 +++ src/vector.h | 3 + tests-clar/diff/rename.c | 10 +- 7 files changed, 559 insertions(+), 342 deletions(-) create mode 100644 src/diff_tform.c diff --git a/include/git2/diff.h b/include/git2/diff.h index f9dbb67e0..439215575 100644 --- a/include/git2/diff.h +++ b/include/git2/diff.h @@ -263,31 +263,41 @@ typedef struct git_diff_patch git_diff_patch; * Flags to control the behavior of diff rename/copy detection. */ typedef enum { - /** should we look for renames */ - GIT_DIFF_DETECT_RENAMES = (1 << 0), - /** should we look for copies */ - GIT_DIFF_DETECT_COPIES = (1 << 1), - /** should we consider unmodified files as possible copy sources */ - GIT_DIFF_DETECT_COPIES_FROM_UNMODIFIED = (1 << 2), - /** should we split large rewrites into delete / add pairs */ - GIT_DIFF_DETECT_BREAK_REWRITES = (1 << 3), -} git_diff_detect_t; + /** look for renames? (`--find-renames`) */ + GIT_DIFF_FIND_RENAMES = (1 << 0), + /** consider old size of modified for renames? (`--break-rewrites=N`) */ + GIT_DIFF_FIND_RENAMES_FROM_REWRITES = (1 << 1), + + /** look for copies? (a la `--find-copies`) */ + GIT_DIFF_FIND_COPIES = (1 << 2), + /** consider unmodified as copy sources? (`--find-copies-harder`) */ + GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED = (1 << 3), + + /** split large rewrites into delete/add pairs (`--break-rewrites=/M`) */ + GIT_DIFF_FIND_AND_BREAK_REWRITES = (1 << 4), +} git_diff_find_t; /** * Control behavior of rename and copy detection */ typedef struct { - /** Combination of git_diff_detect_t values */ + /** Combination of git_diff_find_t values (default FIND_RENAMES) */ unsigned int flags; - /** Threshold on similarity index to consider a file renamed. */ + + /** Similarity to consider a file renamed (default 50) */ unsigned int rename_threshold; - /** Threshold on similarity index to consider a file a copy. */ + /** Similarity of modified to be eligible rename source (default 50) */ + unsigned int rename_from_rewrite_threshold; + /** Similarity to consider a file a copy (default 50) */ unsigned int copy_threshold; - /** Threshold on change % to split modify into delete/add pair. */ + /** Similarity to split modify into delete/add pair (default 60) */ unsigned int break_rewrite_threshold; - /** Maximum rename/copy targets to check (diff.renameLimit) */ + + /** Maximum similarity sources to examine (a la diff's `-l` option or + * the `diff.renameLimit` config) (default 200) + */ unsigned int target_limit; -} git_diff_detect_options; +} git_diff_find_options; /** @name Diff List Generator Functions @@ -405,18 +415,20 @@ GIT_EXTERN(int) git_diff_merge( const git_diff_list *from); /** - * Update a diff list with file renames, copies, etc. + * Transform a diff list marking file renames, copies, etc. * * This modifies a diff list in place, replacing old entries that look * like renames or copies with new entries reflecting those changes. + * This also will, if requested, break modified files into add/remove + * pairs if the amount of change is above a threshold. * * @param diff Diff list to run detection algorithms on * @param options Control how detection should be run, NULL for defaults * @return 0 on success, -1 on failure */ -GIT_EXTERN(int) git_diff_detect( +GIT_EXTERN(int) git_diff_find_similar( git_diff_list *diff, - git_diff_detect_options *options); + git_diff_find_options *options); /**@}*/ diff --git a/src/diff.c b/src/diff.c index e2649ff3b..55f6ee7d5 100644 --- a/src/diff.c +++ b/src/diff.c @@ -110,85 +110,6 @@ static git_diff_delta *diff_delta__alloc( return delta; } -static git_diff_delta *diff_delta__dup( - const git_diff_delta *d, git_pool *pool) -{ - git_diff_delta *delta = git__malloc(sizeof(git_diff_delta)); - if (!delta) - return NULL; - - memcpy(delta, d, sizeof(git_diff_delta)); - - delta->old_file.path = git_pool_strdup(pool, d->old_file.path); - if (delta->old_file.path == NULL) - goto fail; - - if (d->new_file.path != d->old_file.path) { - delta->new_file.path = git_pool_strdup(pool, d->new_file.path); - if (delta->new_file.path == NULL) - goto fail; - } else { - delta->new_file.path = delta->old_file.path; - } - - return delta; - -fail: - git__free(delta); - return NULL; -} - -static git_diff_delta *diff_delta__merge_like_cgit( - const git_diff_delta *a, const git_diff_delta *b, git_pool *pool) -{ - git_diff_delta *dup; - - /* Emulate C git for merging two diffs (a la 'git diff '). - * - * When C git does a diff between the work dir and a tree, it actually - * diffs with the index but uses the workdir contents. This emulates - * those choices so we can emulate the type of diff. - * - * We have three file descriptions here, let's call them: - * f1 = a->old_file - * f2 = a->new_file AND b->old_file - * f3 = b->new_file - */ - - /* if f2 == f3 or f2 is deleted, then just dup the 'a' diff */ - if (b->status == GIT_DELTA_UNMODIFIED || a->status == GIT_DELTA_DELETED) - return diff_delta__dup(a, pool); - - /* otherwise, base this diff on the 'b' diff */ - if ((dup = diff_delta__dup(b, pool)) == NULL) - return NULL; - - /* If 'a' status is uninteresting, then we're done */ - if (a->status == GIT_DELTA_UNMODIFIED) - return dup; - - assert(a->status != GIT_DELTA_UNMODIFIED); - assert(b->status != GIT_DELTA_UNMODIFIED); - - /* A cgit exception is that the diff of a file that is only in the - * index (i.e. not in HEAD nor workdir) is given as empty. - */ - if (dup->status == GIT_DELTA_DELETED) { - if (a->status == GIT_DELTA_ADDED) - dup->status = GIT_DELTA_UNMODIFIED; - /* else don't overwrite DELETE status */ - } else { - dup->status = a->status; - } - - git_oid_cpy(&dup->old_file.oid, &a->old_file.oid); - dup->old_file.mode = a->old_file.mode; - dup->old_file.size = a->old_file.size; - dup->old_file.flags = a->old_file.flags; - - return dup; -} - static int diff_delta__from_one( git_diff_list *diff, git_delta_t status, @@ -332,13 +253,34 @@ static char *diff_strdup_prefix(git_pool *pool, const char *prefix) return git_pool_strndup(pool, prefix, len + 1); } -static int diff_delta__cmp(const void *a, const void *b) +int git_diff_delta__cmp(const void *a, const void *b) { const git_diff_delta *da = a, *db = b; int val = strcmp(da->old_file.path, db->old_file.path); return val ? val : ((int)da->status - (int)db->status); } +bool git_diff_delta__should_skip( + const git_diff_options *opts, const git_diff_delta *delta) +{ + uint32_t flags = opts ? opts->flags : 0; + + if (delta->status == GIT_DELTA_UNMODIFIED && + (flags & GIT_DIFF_INCLUDE_UNMODIFIED) == 0) + return true; + + if (delta->status == GIT_DELTA_IGNORED && + (flags & GIT_DIFF_INCLUDE_IGNORED) == 0) + return true; + + if (delta->status == GIT_DELTA_UNTRACKED && + (flags & GIT_DIFF_INCLUDE_UNTRACKED) == 0) + return true; + + return false; +} + + static int config_bool(git_config *cfg, const char *name, int defvalue) { int val = defvalue; @@ -361,7 +303,7 @@ static git_diff_list *git_diff_list_alloc( GIT_REFCOUNT_INC(diff); diff->repo = repo; - if (git_vector_init(&diff->deltas, 0, diff_delta__cmp) < 0 || + if (git_vector_init(&diff->deltas, 0, git_diff_delta__cmp) < 0 || git_pool_init(&diff->pool, 1, 0) < 0) goto fail; @@ -991,241 +933,3 @@ on_error: git_iterator_free(a); return error; } - - -bool git_diff_delta__should_skip( - const git_diff_options *opts, const git_diff_delta *delta) -{ - uint32_t flags = opts ? opts->flags : 0; - - if (delta->status == GIT_DELTA_UNMODIFIED && - (flags & GIT_DIFF_INCLUDE_UNMODIFIED) == 0) - return true; - - if (delta->status == GIT_DELTA_IGNORED && - (flags & GIT_DIFF_INCLUDE_IGNORED) == 0) - return true; - - if (delta->status == GIT_DELTA_UNTRACKED && - (flags & GIT_DIFF_INCLUDE_UNTRACKED) == 0) - return true; - - return false; -} - - -int git_diff_merge( - git_diff_list *onto, - const git_diff_list *from) -{ - int error = 0; - git_pool onto_pool; - git_vector onto_new; - git_diff_delta *delta; - bool ignore_case = false; - unsigned int i, j; - - assert(onto && from); - - if (!from->deltas.length) - return 0; - - if (git_vector_init(&onto_new, onto->deltas.length, diff_delta__cmp) < 0 || - git_pool_init(&onto_pool, 1, 0) < 0) - return -1; - - if ((onto->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE) != 0 || - (from->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE) != 0) - { - ignore_case = true; - - /* This function currently only supports merging diff lists that - * are sorted identically. */ - assert((onto->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE) != 0 && - (from->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE) != 0); - } - - for (i = 0, j = 0; i < onto->deltas.length || j < from->deltas.length; ) { - git_diff_delta *o = GIT_VECTOR_GET(&onto->deltas, i); - const git_diff_delta *f = GIT_VECTOR_GET(&from->deltas, j); - int cmp = !f ? -1 : !o ? 1 : STRCMP_CASESELECT(ignore_case, o->old_file.path, f->old_file.path); - - if (cmp < 0) { - delta = diff_delta__dup(o, &onto_pool); - i++; - } else if (cmp > 0) { - delta = diff_delta__dup(f, &onto_pool); - j++; - } else { - delta = diff_delta__merge_like_cgit(o, f, &onto_pool); - i++; - j++; - } - - /* the ignore rules for the target may not match the source - * or the result of a merged delta could be skippable... - */ - if (git_diff_delta__should_skip(&onto->opts, delta)) { - git__free(delta); - continue; - } - - if ((error = !delta ? -1 : git_vector_insert(&onto_new, delta)) < 0) - break; - } - - if (!error) { - git_vector_swap(&onto->deltas, &onto_new); - git_pool_swap(&onto->pool, &onto_pool); - onto->new_src = from->new_src; - - /* prefix strings also come from old pool, so recreate those.*/ - onto->opts.old_prefix = - git_pool_strdup_safe(&onto->pool, onto->opts.old_prefix); - onto->opts.new_prefix = - git_pool_strdup_safe(&onto->pool, onto->opts.new_prefix); - } - - git_vector_foreach(&onto_new, i, delta) - git__free(delta); - git_vector_free(&onto_new); - git_pool_clear(&onto_pool); - - return error; -} - -#define DEFAULT_THRESHOLD 50 -#define DEFAULT_TARGET_LIMIT 200 - -int git_diff_detect( - git_diff_list *diff, - git_diff_detect_options *opts) -{ - int error = 0; - unsigned int i, j; - git_diff_delta *from, *to; - bool check_unmodified = opts && - (opts->flags & GIT_DIFF_DETECT_COPIES_FROM_UNMODIFIED) != 0; - int max_targets = (opts && opts->target_limit > 0) ? - opts->target_limit : DEFAULT_TARGET_LIMIT; - unsigned int rename_threshold = (opts && opts->rename_threshold > 0) ? - opts->rename_threshold : DEFAULT_THRESHOLD; - unsigned int copy_threshold = (opts && opts->copy_threshold > 0) ? - opts->copy_threshold : DEFAULT_THRESHOLD; - int num_deletes = 0, num_splits = 0; - - /* TODO: update opts from config diff.renameLimit / diff.renames */ - - git_vector_foreach(&diff->deltas, i, from) { - int tried_targets = 0; - - git_vector_foreach(&diff->deltas, j, to) { - unsigned int similarity = 0; - - if (i == j) - continue; - - switch (to->status) { - case GIT_DELTA_ADDED: - case GIT_DELTA_UNTRACKED: - case GIT_DELTA_RENAMED: - case GIT_DELTA_COPIED: - break; - default: - /* only those status values should be checked */ - continue; - } - - /* don't check UNMODIFIED files as source unless given option */ - if (from->status == GIT_DELTA_UNMODIFIED && !check_unmodified) - continue; - - /* cap on maximum files we'll examine */ - if (++tried_targets > max_targets) - break; - - /* calculate similarity and see if this pair beats the - * similarity score of the current best pair. - */ - if (git_oid_cmp(&from->old_file.oid, &to->new_file.oid) == 0) - similarity = 100; - /* TODO: insert actual similarity algo here */ - - if (similarity <= to->similarity) - continue; - - if (from->status == GIT_DELTA_DELETED) { - if (similarity < rename_threshold) - continue; - - /* merge "from" & "to" to a RENAMED record */ - to->status = GIT_DELTA_RENAMED; - memcpy(&to->old_file, &from->old_file, sizeof(to->old_file)); - - from->status = GIT_DELTA__TO_DELETE; - num_deletes++; - } else { - if (similarity < copy_threshold) - continue; - - /* convert "to" to a COPIED record */ - to->status = GIT_DELTA_COPIED; - memcpy(&to->old_file, &from->old_file, sizeof(to->old_file)); - } - } - - if (from->status == GIT_DELTA_MODIFIED && - opts && (opts->flags & GIT_DIFF_DETECT_BREAK_REWRITES) != 0) - { - /* TODO: calculate similarity and maybe mark for split */ - - /* from->status = GIT_DELTA__TO_SPLIT; */ - /* num_splits++; */ - } - } - - if (num_deletes > 0 || num_splits > 0) { - git_vector onto = GIT_VECTOR_INIT; - size_t new_size = diff->deltas.length + num_splits - num_deletes; - - if (git_vector_init(&onto, new_size, diff_delta__cmp) < 0) - return -1; - - /* build new delta list without TO_DELETE and splitting TO_SPLIT */ - git_vector_foreach(&diff->deltas, i, from) { - if (from->status == GIT_DELTA__TO_DELETE) { - git__free(from); - continue; - } - - if (from->status == GIT_DELTA__TO_SPLIT) { - git_diff_delta *deleted = diff_delta__dup(from, &diff->pool); - if (!deleted) - return -1; - - deleted->status = GIT_DELTA_DELETED; - memset(&deleted->new_file, 0, sizeof(deleted->new_file)); - deleted->new_file.path = deleted->old_file.path; - deleted->new_file.flags |= GIT_DIFF_FILE_VALID_OID; - - git_vector_insert(&onto, deleted); - - from->status = GIT_DELTA_ADDED; - memset(&from->old_file, 0, sizeof(from->old_file)); - from->old_file.path = from->new_file.path; - from->old_file.flags |= GIT_DIFF_FILE_VALID_OID; - } - - git_vector_insert(&onto, from); - } - - /* swap new delta list into place */ - - git_vector_sort(&onto); - git_vector_swap(&diff->deltas, &onto); - git_vector_free(&onto); - } - - return error; -} - diff --git a/src/diff.h b/src/diff.h index 61723bc9e..ed66439bf 100644 --- a/src/diff.h +++ b/src/diff.h @@ -48,6 +48,8 @@ extern void git_diff__cleanup_modes( extern void git_diff_list_addref(git_diff_list *diff); +extern int git_diff_delta__cmp(const void *a, const void *b); + extern bool git_diff_delta__should_skip( const git_diff_options *opts, const git_diff_delta *delta); diff --git a/src/diff_tform.c b/src/diff_tform.c new file mode 100644 index 000000000..987d4b8e6 --- /dev/null +++ b/src/diff_tform.c @@ -0,0 +1,466 @@ +/* + * Copyright (C) 2012 the libgit2 contributors + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#include "common.h" +#include "diff.h" +#include "git2/config.h" + +static git_diff_delta *diff_delta__dup( + const git_diff_delta *d, git_pool *pool) +{ + git_diff_delta *delta = git__malloc(sizeof(git_diff_delta)); + if (!delta) + return NULL; + + memcpy(delta, d, sizeof(git_diff_delta)); + + delta->old_file.path = git_pool_strdup(pool, d->old_file.path); + if (delta->old_file.path == NULL) + goto fail; + + if (d->new_file.path != d->old_file.path) { + delta->new_file.path = git_pool_strdup(pool, d->new_file.path); + if (delta->new_file.path == NULL) + goto fail; + } else { + delta->new_file.path = delta->old_file.path; + } + + return delta; + +fail: + git__free(delta); + return NULL; +} + +static git_diff_delta *diff_delta__merge_like_cgit( + const git_diff_delta *a, const git_diff_delta *b, git_pool *pool) +{ + git_diff_delta *dup; + + /* Emulate C git for merging two diffs (a la 'git diff '). + * + * When C git does a diff between the work dir and a tree, it actually + * diffs with the index but uses the workdir contents. This emulates + * those choices so we can emulate the type of diff. + * + * We have three file descriptions here, let's call them: + * f1 = a->old_file + * f2 = a->new_file AND b->old_file + * f3 = b->new_file + */ + + /* if f2 == f3 or f2 is deleted, then just dup the 'a' diff */ + if (b->status == GIT_DELTA_UNMODIFIED || a->status == GIT_DELTA_DELETED) + return diff_delta__dup(a, pool); + + /* otherwise, base this diff on the 'b' diff */ + if ((dup = diff_delta__dup(b, pool)) == NULL) + return NULL; + + /* If 'a' status is uninteresting, then we're done */ + if (a->status == GIT_DELTA_UNMODIFIED) + return dup; + + assert(a->status != GIT_DELTA_UNMODIFIED); + assert(b->status != GIT_DELTA_UNMODIFIED); + + /* A cgit exception is that the diff of a file that is only in the + * index (i.e. not in HEAD nor workdir) is given as empty. + */ + if (dup->status == GIT_DELTA_DELETED) { + if (a->status == GIT_DELTA_ADDED) + dup->status = GIT_DELTA_UNMODIFIED; + /* else don't overwrite DELETE status */ + } else { + dup->status = a->status; + } + + git_oid_cpy(&dup->old_file.oid, &a->old_file.oid); + dup->old_file.mode = a->old_file.mode; + dup->old_file.size = a->old_file.size; + dup->old_file.flags = a->old_file.flags; + + return dup; +} + +int git_diff_merge( + git_diff_list *onto, + const git_diff_list *from) +{ + int error = 0; + git_pool onto_pool; + git_vector onto_new; + git_diff_delta *delta; + bool ignore_case = false; + unsigned int i, j; + + assert(onto && from); + + if (!from->deltas.length) + return 0; + + if (git_vector_init( + &onto_new, onto->deltas.length, git_diff_delta__cmp) < 0 || + git_pool_init(&onto_pool, 1, 0) < 0) + return -1; + + if ((onto->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE) != 0 || + (from->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE) != 0) + { + ignore_case = true; + + /* This function currently only supports merging diff lists that + * are sorted identically. */ + assert((onto->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE) != 0 && + (from->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE) != 0); + } + + for (i = 0, j = 0; i < onto->deltas.length || j < from->deltas.length; ) { + git_diff_delta *o = GIT_VECTOR_GET(&onto->deltas, i); + const git_diff_delta *f = GIT_VECTOR_GET(&from->deltas, j); + int cmp = !f ? -1 : !o ? 1 : STRCMP_CASESELECT(ignore_case, o->old_file.path, f->old_file.path); + + if (cmp < 0) { + delta = diff_delta__dup(o, &onto_pool); + i++; + } else if (cmp > 0) { + delta = diff_delta__dup(f, &onto_pool); + j++; + } else { + delta = diff_delta__merge_like_cgit(o, f, &onto_pool); + i++; + j++; + } + + /* the ignore rules for the target may not match the source + * or the result of a merged delta could be skippable... + */ + if (git_diff_delta__should_skip(&onto->opts, delta)) { + git__free(delta); + continue; + } + + if ((error = !delta ? -1 : git_vector_insert(&onto_new, delta)) < 0) + break; + } + + if (!error) { + git_vector_swap(&onto->deltas, &onto_new); + git_pool_swap(&onto->pool, &onto_pool); + onto->new_src = from->new_src; + + /* prefix strings also come from old pool, so recreate those.*/ + onto->opts.old_prefix = + git_pool_strdup_safe(&onto->pool, onto->opts.old_prefix); + onto->opts.new_prefix = + git_pool_strdup_safe(&onto->pool, onto->opts.new_prefix); + } + + git_vector_foreach(&onto_new, i, delta) + git__free(delta); + git_vector_free(&onto_new); + git_pool_clear(&onto_pool); + + return error; +} + +#define DEFAULT_THRESHOLD 50 +#define DEFAULT_BREAK_REWRITE_THRESHOLD 60 +#define DEFAULT_TARGET_LIMIT 200 + +static int normalize_find_opts( + git_diff_list *diff, + git_diff_find_options *opts, + git_diff_find_options *given) +{ + git_config *cfg = NULL; + const char *val; + + if (diff->repo != NULL && + git_repository_config__weakptr(&cfg, diff->repo) < 0) + return -1; + + if (given != NULL) + memcpy(opts, given, sizeof(*opts)); + else { + memset(opts, 0, sizeof(*opts)); + + opts->flags = GIT_DIFF_FIND_RENAMES; + + if (git_config_get_string(&val, cfg, "diff.renames") < 0) + giterr_clear(); + else if (val && + (!strcasecmp(val, "copies") || !strcasecmp(val, "copy"))) + opts->flags = GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES; + } + + /* some flags imply others */ + + if (opts->flags & GIT_DIFF_FIND_RENAMES_FROM_REWRITES) + opts->flags |= GIT_DIFF_FIND_RENAMES; + + if (opts->flags & GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED) + opts->flags |= GIT_DIFF_FIND_COPIES; + +#define USE_DEFAULT(X) ((X) == 0 || (X) > 100) + + if (USE_DEFAULT(opts->rename_threshold)) + opts->rename_threshold = DEFAULT_THRESHOLD; + + if (USE_DEFAULT(opts->rename_from_rewrite_threshold)) + opts->rename_from_rewrite_threshold = DEFAULT_THRESHOLD; + + if (USE_DEFAULT(opts->copy_threshold)) + opts->copy_threshold = DEFAULT_THRESHOLD; + + if (USE_DEFAULT(opts->break_rewrite_threshold)) + opts->break_rewrite_threshold = DEFAULT_BREAK_REWRITE_THRESHOLD; + +#undef USE_DEFAULT + + if (!opts->target_limit) { + int32_t limit = 0; + + opts->target_limit = DEFAULT_TARGET_LIMIT; + + if (git_config_get_int32(&limit, cfg, "diff.renameLimit") < 0) + giterr_clear(); + else if (limit > 0) + opts->target_limit = limit; + } + + return 0; +} + +static int apply_splits_and_deletes(git_diff_list *diff, size_t expected_size) +{ + git_vector onto = GIT_VECTOR_INIT; + size_t i; + git_diff_delta *delta; + + if (git_vector_init(&onto, expected_size, git_diff_delta__cmp) < 0) + return -1; + + /* build new delta list without TO_DELETE and splitting TO_SPLIT */ + git_vector_foreach(&diff->deltas, i, delta) { + if (delta->status == GIT_DELTA__TO_DELETE) { + git__free(delta); + continue; + } + + if (delta->status == GIT_DELTA__TO_SPLIT) { + git_diff_delta *deleted = diff_delta__dup(delta, &diff->pool); + if (!deleted) + return -1; + + deleted->status = GIT_DELTA_DELETED; + memset(&deleted->new_file, 0, sizeof(deleted->new_file)); + deleted->new_file.path = deleted->old_file.path; + deleted->new_file.flags |= GIT_DIFF_FILE_VALID_OID; + + git_vector_insert(&onto, deleted); + + delta->status = GIT_DELTA_ADDED; + memset(&delta->old_file, 0, sizeof(delta->old_file)); + delta->old_file.path = delta->new_file.path; + delta->old_file.flags |= GIT_DIFF_FILE_VALID_OID; + } + + git_vector_insert(&onto, delta); + } + + /* swap new delta list into place */ + git_vector_sort(&onto); + git_vector_swap(&diff->deltas, &onto); + git_vector_free(&onto); + + return 0; +} + +static unsigned int calc_similarity( + void *cache, git_diff_file *old_file, git_diff_file *new_file) +{ + GIT_UNUSED(cache); + + if (git_oid_cmp(&old_file->oid, &new_file->oid) == 0) + return 100; + + /* TODO: insert actual similarity algo here */ + + return 0; +} + +#define FLAG_SET(opts,flag_name) ((opts.flags & flag_name) != 0) + +int git_diff_find_similar( + git_diff_list *diff, + git_diff_find_options *given_opts) +{ + unsigned int i, j, similarity; + git_diff_delta *from, *to; + git_diff_find_options opts; + unsigned int tried_targets, num_changes = 0; + git_vector matches = GIT_VECTOR_INIT; + + if (normalize_find_opts(diff, &opts, given_opts) < 0) + return -1; + + /* first do splits if requested */ + + if (FLAG_SET(opts, GIT_DIFF_FIND_AND_BREAK_REWRITES)) { + git_vector_foreach(&diff->deltas, i, from) { + if (from->status != GIT_DELTA_MODIFIED) + continue; + + /* Right now, this doesn't work right because the similarity + * algorithm isn't actually implemented... + */ + similarity = 100; + /* calc_similarity(NULL, &from->old_file, from->new_file); */ + + if (similarity < opts.break_rewrite_threshold) { + from->status = GIT_DELTA__TO_SPLIT; + num_changes++; + } + } + + /* apply splits as needed */ + if (num_changes > 0 && + apply_splits_and_deletes( + diff, diff->deltas.length + num_changes) < 0) + return -1; + } + + /* next find the most similar delta for each rename / copy candidate */ + + if (git_vector_init(&matches, diff->deltas.length, git_diff_delta__cmp) < 0) + return -1; + + git_vector_foreach(&diff->deltas, i, from) { + tried_targets = 0; + + git_vector_foreach(&diff->deltas, j, to) { + if (i == j) + continue; + + switch (to->status) { + case GIT_DELTA_ADDED: + case GIT_DELTA_UNTRACKED: + case GIT_DELTA_RENAMED: + case GIT_DELTA_COPIED: + break; + default: + /* only the above status values should be checked */ + continue; + } + + /* skip all but DELETED files unless copy detection is on */ + if (from->status != GIT_DELTA_DELETED && + !FLAG_SET(opts, GIT_DIFF_FIND_COPIES)) + continue; + + /* don't check UNMODIFIED files as source unless given option */ + if (from->status == GIT_DELTA_UNMODIFIED && + !FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED)) + continue; + + /* cap on maximum files we'll examine */ + if (++tried_targets > opts.target_limit) + break; + + /* calculate similarity and see if this pair beats the + * similarity score of the current best pair. + */ + similarity = calc_similarity(NULL, &from->old_file, &to->new_file); + + if (to->similarity < similarity) { + to->similarity = similarity; + if (git_vector_set(NULL, &matches, j, from) < 0) + return -1; + } + } + } + + /* next rewrite the diffs with renames / copies */ + + num_changes = 0; + + git_vector_foreach(&diff->deltas, j, to) { + from = GIT_VECTOR_GET(&matches, j); + if (!from) { + assert(to->similarity == 0); + continue; + } + + /* three possible outcomes here: + * 1. old DELETED and if over rename threshold, + * new becomes RENAMED and old goes away + * 2. old was MODIFIED but FIND_RENAMES_FROM_REWRITES is on and + * old is more similar to new than it is to itself, in which + * case, new becomes RENAMED and old becomed ADDED + * 3. otherwise if over copy threshold, new becomes COPIED + */ + + if (from->status == GIT_DELTA_DELETED) { + if (to->similarity < opts.rename_threshold) { + to->similarity = 0; + continue; + } + + to->status = GIT_DELTA_RENAMED; + memcpy(&to->old_file, &from->old_file, sizeof(to->old_file)); + + from->status = GIT_DELTA__TO_DELETE; + num_changes++; + + continue; + } + + if (from->status == GIT_DELTA_MODIFIED && + FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) && + to->similarity > opts.rename_threshold) + { + similarity = 100; + /* calc_similarity(NULL, &from->old_file, from->new_file); */ + + if (similarity < opts.rename_from_rewrite_threshold) { + to->status = GIT_DELTA_RENAMED; + memcpy(&to->old_file, &from->old_file, sizeof(to->old_file)); + + from->status = GIT_DELTA_ADDED; + memset(&from->old_file, 0, sizeof(from->old_file)); + from->old_file.path = to->old_file.path; + from->old_file.flags |= GIT_DIFF_FILE_VALID_OID; + + continue; + } + } + + if (to->similarity < opts.copy_threshold) { + to->similarity = 0; + continue; + } + + /* convert "to" to a COPIED record */ + to->status = GIT_DELTA_COPIED; + memcpy(&to->old_file, &from->old_file, sizeof(to->old_file)); + } + + git_vector_free(&matches); + + if (num_changes > 0) { + assert(num_changes < diff->deltas.length); + + if (apply_splits_and_deletes( + diff, diff->deltas.length - num_changes) < 0) + return -1; + } + + return 0; +} + +#undef FLAG_SET diff --git a/src/vector.c b/src/vector.c index c6a644cc3..e56b97849 100644 --- a/src/vector.c +++ b/src/vector.c @@ -241,3 +241,33 @@ void git_vector_swap(git_vector *a, git_vector *b) memcpy(a, b, sizeof(t)); memcpy(b, &t, sizeof(t)); } + +int git_vector_resize_to(git_vector *v, size_t new_length) +{ + if (new_length <= v->length) + return 0; + + while (new_length >= v->_alloc_size) + if (resize_vector(v) < 0) + return -1; + + memset(&v->contents[v->length], 0, + sizeof(void *) * (new_length - v->length)); + + v->length = new_length; + + return 0; +} + +int git_vector_set(void **old, git_vector *v, size_t position, void *value) +{ + if (git_vector_resize_to(v, position + 1) < 0) + return -1; + + if (old != NULL) + *old = v->contents[position]; + + v->contents[position] = value; + + return 0; +} diff --git a/src/vector.h b/src/vector.h index 49ba754f0..8886371e8 100644 --- a/src/vector.h +++ b/src/vector.h @@ -76,4 +76,7 @@ int git_vector_remove(git_vector *v, unsigned int idx); void git_vector_pop(git_vector *v); void git_vector_uniq(git_vector *v); +int git_vector_resize_to(git_vector *v, size_t new_length); +int git_vector_set(void **old, git_vector *v, size_t position, void *value); + #endif diff --git a/tests-clar/diff/rename.c b/tests-clar/diff/rename.c index 8a50fd5ea..0ee1db842 100644 --- a/tests-clar/diff/rename.c +++ b/tests-clar/diff/rename.c @@ -34,14 +34,14 @@ void test_diff_rename__match_oid(void) git_tree *old_tree, *new_tree; git_diff_list *diff; git_diff_options diffopts = {0}; - git_diff_detect_options opts; + git_diff_find_options opts; diff_expects exp; old_tree = resolve_commit_oid_to_tree(g_repo, old_sha); new_tree = resolve_commit_oid_to_tree(g_repo, new_sha); /* Must pass GIT_DIFF_INCLUDE_UNMODIFIED if you expect to emulate - * --find-copies-harder during rename detection... + * --find-copies-harder during rename transformion... */ memset(&diffopts, 0, sizeof(diffopts)); diffopts.flags |= GIT_DIFF_INCLUDE_UNMODIFIED; @@ -65,7 +65,7 @@ void test_diff_rename__match_oid(void) /* git diff 31e47d8c1fa36d7f8d537b96158e3f024de0a9f2 \ * 2bc7f351d20b53f1c72c16c4b036e491c478c49a */ - cl_git_pass(git_diff_detect(diff, NULL)); + cl_git_pass(git_diff_find_similar(diff, NULL)); memset(&exp, 0, sizeof(exp)); cl_git_pass(git_diff_foreach( @@ -86,8 +86,8 @@ void test_diff_rename__match_oid(void) * 2bc7f351d20b53f1c72c16c4b036e491c478c49a */ memset(&opts, 0, sizeof(opts)); - opts.flags = GIT_DIFF_DETECT_COPIES_FROM_UNMODIFIED; - cl_git_pass(git_diff_detect(diff, &opts)); + opts.flags = GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED; + cl_git_pass(git_diff_find_similar(diff, &opts)); memset(&exp, 0, sizeof(exp)); cl_git_pass(git_diff_foreach(