mirror of
https://git.proxmox.com/git/libgit2
synced 2025-05-03 06:17:02 +00:00
Move rename detection into new file
This improves the naming for the rename related functionality moving it to be called `git_diff_find_similar()` and renaming all the associated constants, etc. to make more sense. I also moved the new code (plus the existing `git_diff_merge`) into a new file `diff_tform.c` where I can put new functions related to manipulating git diff lists. This also updates the implementation significantly from the last revision fixing some ordering issues (where break-rewrite needs to be handled prior to copy and rename detection) and improving config option handling.
This commit is contained in:
parent
b4f5bb0747
commit
db106d01f0
@ -263,31 +263,41 @@ typedef struct git_diff_patch git_diff_patch;
|
||||
* Flags to control the behavior of diff rename/copy detection.
|
||||
*/
|
||||
typedef enum {
|
||||
/** should we look for renames */
|
||||
GIT_DIFF_DETECT_RENAMES = (1 << 0),
|
||||
/** should we look for copies */
|
||||
GIT_DIFF_DETECT_COPIES = (1 << 1),
|
||||
/** should we consider unmodified files as possible copy sources */
|
||||
GIT_DIFF_DETECT_COPIES_FROM_UNMODIFIED = (1 << 2),
|
||||
/** should we split large rewrites into delete / add pairs */
|
||||
GIT_DIFF_DETECT_BREAK_REWRITES = (1 << 3),
|
||||
} git_diff_detect_t;
|
||||
/** look for renames? (`--find-renames`) */
|
||||
GIT_DIFF_FIND_RENAMES = (1 << 0),
|
||||
/** consider old size of modified for renames? (`--break-rewrites=N`) */
|
||||
GIT_DIFF_FIND_RENAMES_FROM_REWRITES = (1 << 1),
|
||||
|
||||
/** look for copies? (a la `--find-copies`) */
|
||||
GIT_DIFF_FIND_COPIES = (1 << 2),
|
||||
/** consider unmodified as copy sources? (`--find-copies-harder`) */
|
||||
GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED = (1 << 3),
|
||||
|
||||
/** split large rewrites into delete/add pairs (`--break-rewrites=/M`) */
|
||||
GIT_DIFF_FIND_AND_BREAK_REWRITES = (1 << 4),
|
||||
} git_diff_find_t;
|
||||
|
||||
/**
|
||||
* Control behavior of rename and copy detection
|
||||
*/
|
||||
typedef struct {
|
||||
/** Combination of git_diff_detect_t values */
|
||||
/** Combination of git_diff_find_t values (default FIND_RENAMES) */
|
||||
unsigned int flags;
|
||||
/** Threshold on similarity index to consider a file renamed. */
|
||||
|
||||
/** Similarity to consider a file renamed (default 50) */
|
||||
unsigned int rename_threshold;
|
||||
/** Threshold on similarity index to consider a file a copy. */
|
||||
/** Similarity of modified to be eligible rename source (default 50) */
|
||||
unsigned int rename_from_rewrite_threshold;
|
||||
/** Similarity to consider a file a copy (default 50) */
|
||||
unsigned int copy_threshold;
|
||||
/** Threshold on change % to split modify into delete/add pair. */
|
||||
/** Similarity to split modify into delete/add pair (default 60) */
|
||||
unsigned int break_rewrite_threshold;
|
||||
/** Maximum rename/copy targets to check (diff.renameLimit) */
|
||||
|
||||
/** Maximum similarity sources to examine (a la diff's `-l` option or
|
||||
* the `diff.renameLimit` config) (default 200)
|
||||
*/
|
||||
unsigned int target_limit;
|
||||
} git_diff_detect_options;
|
||||
} git_diff_find_options;
|
||||
|
||||
|
||||
/** @name Diff List Generator Functions
|
||||
@ -405,18 +415,20 @@ GIT_EXTERN(int) git_diff_merge(
|
||||
const git_diff_list *from);
|
||||
|
||||
/**
|
||||
* Update a diff list with file renames, copies, etc.
|
||||
* Transform a diff list marking file renames, copies, etc.
|
||||
*
|
||||
* This modifies a diff list in place, replacing old entries that look
|
||||
* like renames or copies with new entries reflecting those changes.
|
||||
* This also will, if requested, break modified files into add/remove
|
||||
* pairs if the amount of change is above a threshold.
|
||||
*
|
||||
* @param diff Diff list to run detection algorithms on
|
||||
* @param options Control how detection should be run, NULL for defaults
|
||||
* @return 0 on success, -1 on failure
|
||||
*/
|
||||
GIT_EXTERN(int) git_diff_detect(
|
||||
GIT_EXTERN(int) git_diff_find_similar(
|
||||
git_diff_list *diff,
|
||||
git_diff_detect_options *options);
|
||||
git_diff_find_options *options);
|
||||
|
||||
/**@}*/
|
||||
|
||||
|
342
src/diff.c
342
src/diff.c
@ -110,85 +110,6 @@ static git_diff_delta *diff_delta__alloc(
|
||||
return delta;
|
||||
}
|
||||
|
||||
static git_diff_delta *diff_delta__dup(
|
||||
const git_diff_delta *d, git_pool *pool)
|
||||
{
|
||||
git_diff_delta *delta = git__malloc(sizeof(git_diff_delta));
|
||||
if (!delta)
|
||||
return NULL;
|
||||
|
||||
memcpy(delta, d, sizeof(git_diff_delta));
|
||||
|
||||
delta->old_file.path = git_pool_strdup(pool, d->old_file.path);
|
||||
if (delta->old_file.path == NULL)
|
||||
goto fail;
|
||||
|
||||
if (d->new_file.path != d->old_file.path) {
|
||||
delta->new_file.path = git_pool_strdup(pool, d->new_file.path);
|
||||
if (delta->new_file.path == NULL)
|
||||
goto fail;
|
||||
} else {
|
||||
delta->new_file.path = delta->old_file.path;
|
||||
}
|
||||
|
||||
return delta;
|
||||
|
||||
fail:
|
||||
git__free(delta);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static git_diff_delta *diff_delta__merge_like_cgit(
|
||||
const git_diff_delta *a, const git_diff_delta *b, git_pool *pool)
|
||||
{
|
||||
git_diff_delta *dup;
|
||||
|
||||
/* Emulate C git for merging two diffs (a la 'git diff <sha>').
|
||||
*
|
||||
* When C git does a diff between the work dir and a tree, it actually
|
||||
* diffs with the index but uses the workdir contents. This emulates
|
||||
* those choices so we can emulate the type of diff.
|
||||
*
|
||||
* We have three file descriptions here, let's call them:
|
||||
* f1 = a->old_file
|
||||
* f2 = a->new_file AND b->old_file
|
||||
* f3 = b->new_file
|
||||
*/
|
||||
|
||||
/* if f2 == f3 or f2 is deleted, then just dup the 'a' diff */
|
||||
if (b->status == GIT_DELTA_UNMODIFIED || a->status == GIT_DELTA_DELETED)
|
||||
return diff_delta__dup(a, pool);
|
||||
|
||||
/* otherwise, base this diff on the 'b' diff */
|
||||
if ((dup = diff_delta__dup(b, pool)) == NULL)
|
||||
return NULL;
|
||||
|
||||
/* If 'a' status is uninteresting, then we're done */
|
||||
if (a->status == GIT_DELTA_UNMODIFIED)
|
||||
return dup;
|
||||
|
||||
assert(a->status != GIT_DELTA_UNMODIFIED);
|
||||
assert(b->status != GIT_DELTA_UNMODIFIED);
|
||||
|
||||
/* A cgit exception is that the diff of a file that is only in the
|
||||
* index (i.e. not in HEAD nor workdir) is given as empty.
|
||||
*/
|
||||
if (dup->status == GIT_DELTA_DELETED) {
|
||||
if (a->status == GIT_DELTA_ADDED)
|
||||
dup->status = GIT_DELTA_UNMODIFIED;
|
||||
/* else don't overwrite DELETE status */
|
||||
} else {
|
||||
dup->status = a->status;
|
||||
}
|
||||
|
||||
git_oid_cpy(&dup->old_file.oid, &a->old_file.oid);
|
||||
dup->old_file.mode = a->old_file.mode;
|
||||
dup->old_file.size = a->old_file.size;
|
||||
dup->old_file.flags = a->old_file.flags;
|
||||
|
||||
return dup;
|
||||
}
|
||||
|
||||
static int diff_delta__from_one(
|
||||
git_diff_list *diff,
|
||||
git_delta_t status,
|
||||
@ -332,13 +253,34 @@ static char *diff_strdup_prefix(git_pool *pool, const char *prefix)
|
||||
return git_pool_strndup(pool, prefix, len + 1);
|
||||
}
|
||||
|
||||
static int diff_delta__cmp(const void *a, const void *b)
|
||||
int git_diff_delta__cmp(const void *a, const void *b)
|
||||
{
|
||||
const git_diff_delta *da = a, *db = b;
|
||||
int val = strcmp(da->old_file.path, db->old_file.path);
|
||||
return val ? val : ((int)da->status - (int)db->status);
|
||||
}
|
||||
|
||||
bool git_diff_delta__should_skip(
|
||||
const git_diff_options *opts, const git_diff_delta *delta)
|
||||
{
|
||||
uint32_t flags = opts ? opts->flags : 0;
|
||||
|
||||
if (delta->status == GIT_DELTA_UNMODIFIED &&
|
||||
(flags & GIT_DIFF_INCLUDE_UNMODIFIED) == 0)
|
||||
return true;
|
||||
|
||||
if (delta->status == GIT_DELTA_IGNORED &&
|
||||
(flags & GIT_DIFF_INCLUDE_IGNORED) == 0)
|
||||
return true;
|
||||
|
||||
if (delta->status == GIT_DELTA_UNTRACKED &&
|
||||
(flags & GIT_DIFF_INCLUDE_UNTRACKED) == 0)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
static int config_bool(git_config *cfg, const char *name, int defvalue)
|
||||
{
|
||||
int val = defvalue;
|
||||
@ -361,7 +303,7 @@ static git_diff_list *git_diff_list_alloc(
|
||||
GIT_REFCOUNT_INC(diff);
|
||||
diff->repo = repo;
|
||||
|
||||
if (git_vector_init(&diff->deltas, 0, diff_delta__cmp) < 0 ||
|
||||
if (git_vector_init(&diff->deltas, 0, git_diff_delta__cmp) < 0 ||
|
||||
git_pool_init(&diff->pool, 1, 0) < 0)
|
||||
goto fail;
|
||||
|
||||
@ -991,241 +933,3 @@ on_error:
|
||||
git_iterator_free(a);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
bool git_diff_delta__should_skip(
|
||||
const git_diff_options *opts, const git_diff_delta *delta)
|
||||
{
|
||||
uint32_t flags = opts ? opts->flags : 0;
|
||||
|
||||
if (delta->status == GIT_DELTA_UNMODIFIED &&
|
||||
(flags & GIT_DIFF_INCLUDE_UNMODIFIED) == 0)
|
||||
return true;
|
||||
|
||||
if (delta->status == GIT_DELTA_IGNORED &&
|
||||
(flags & GIT_DIFF_INCLUDE_IGNORED) == 0)
|
||||
return true;
|
||||
|
||||
if (delta->status == GIT_DELTA_UNTRACKED &&
|
||||
(flags & GIT_DIFF_INCLUDE_UNTRACKED) == 0)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
int git_diff_merge(
|
||||
git_diff_list *onto,
|
||||
const git_diff_list *from)
|
||||
{
|
||||
int error = 0;
|
||||
git_pool onto_pool;
|
||||
git_vector onto_new;
|
||||
git_diff_delta *delta;
|
||||
bool ignore_case = false;
|
||||
unsigned int i, j;
|
||||
|
||||
assert(onto && from);
|
||||
|
||||
if (!from->deltas.length)
|
||||
return 0;
|
||||
|
||||
if (git_vector_init(&onto_new, onto->deltas.length, diff_delta__cmp) < 0 ||
|
||||
git_pool_init(&onto_pool, 1, 0) < 0)
|
||||
return -1;
|
||||
|
||||
if ((onto->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE) != 0 ||
|
||||
(from->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE) != 0)
|
||||
{
|
||||
ignore_case = true;
|
||||
|
||||
/* This function currently only supports merging diff lists that
|
||||
* are sorted identically. */
|
||||
assert((onto->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE) != 0 &&
|
||||
(from->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE) != 0);
|
||||
}
|
||||
|
||||
for (i = 0, j = 0; i < onto->deltas.length || j < from->deltas.length; ) {
|
||||
git_diff_delta *o = GIT_VECTOR_GET(&onto->deltas, i);
|
||||
const git_diff_delta *f = GIT_VECTOR_GET(&from->deltas, j);
|
||||
int cmp = !f ? -1 : !o ? 1 : STRCMP_CASESELECT(ignore_case, o->old_file.path, f->old_file.path);
|
||||
|
||||
if (cmp < 0) {
|
||||
delta = diff_delta__dup(o, &onto_pool);
|
||||
i++;
|
||||
} else if (cmp > 0) {
|
||||
delta = diff_delta__dup(f, &onto_pool);
|
||||
j++;
|
||||
} else {
|
||||
delta = diff_delta__merge_like_cgit(o, f, &onto_pool);
|
||||
i++;
|
||||
j++;
|
||||
}
|
||||
|
||||
/* the ignore rules for the target may not match the source
|
||||
* or the result of a merged delta could be skippable...
|
||||
*/
|
||||
if (git_diff_delta__should_skip(&onto->opts, delta)) {
|
||||
git__free(delta);
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((error = !delta ? -1 : git_vector_insert(&onto_new, delta)) < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!error) {
|
||||
git_vector_swap(&onto->deltas, &onto_new);
|
||||
git_pool_swap(&onto->pool, &onto_pool);
|
||||
onto->new_src = from->new_src;
|
||||
|
||||
/* prefix strings also come from old pool, so recreate those.*/
|
||||
onto->opts.old_prefix =
|
||||
git_pool_strdup_safe(&onto->pool, onto->opts.old_prefix);
|
||||
onto->opts.new_prefix =
|
||||
git_pool_strdup_safe(&onto->pool, onto->opts.new_prefix);
|
||||
}
|
||||
|
||||
git_vector_foreach(&onto_new, i, delta)
|
||||
git__free(delta);
|
||||
git_vector_free(&onto_new);
|
||||
git_pool_clear(&onto_pool);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
#define DEFAULT_THRESHOLD 50
|
||||
#define DEFAULT_TARGET_LIMIT 200
|
||||
|
||||
int git_diff_detect(
|
||||
git_diff_list *diff,
|
||||
git_diff_detect_options *opts)
|
||||
{
|
||||
int error = 0;
|
||||
unsigned int i, j;
|
||||
git_diff_delta *from, *to;
|
||||
bool check_unmodified = opts &&
|
||||
(opts->flags & GIT_DIFF_DETECT_COPIES_FROM_UNMODIFIED) != 0;
|
||||
int max_targets = (opts && opts->target_limit > 0) ?
|
||||
opts->target_limit : DEFAULT_TARGET_LIMIT;
|
||||
unsigned int rename_threshold = (opts && opts->rename_threshold > 0) ?
|
||||
opts->rename_threshold : DEFAULT_THRESHOLD;
|
||||
unsigned int copy_threshold = (opts && opts->copy_threshold > 0) ?
|
||||
opts->copy_threshold : DEFAULT_THRESHOLD;
|
||||
int num_deletes = 0, num_splits = 0;
|
||||
|
||||
/* TODO: update opts from config diff.renameLimit / diff.renames */
|
||||
|
||||
git_vector_foreach(&diff->deltas, i, from) {
|
||||
int tried_targets = 0;
|
||||
|
||||
git_vector_foreach(&diff->deltas, j, to) {
|
||||
unsigned int similarity = 0;
|
||||
|
||||
if (i == j)
|
||||
continue;
|
||||
|
||||
switch (to->status) {
|
||||
case GIT_DELTA_ADDED:
|
||||
case GIT_DELTA_UNTRACKED:
|
||||
case GIT_DELTA_RENAMED:
|
||||
case GIT_DELTA_COPIED:
|
||||
break;
|
||||
default:
|
||||
/* only those status values should be checked */
|
||||
continue;
|
||||
}
|
||||
|
||||
/* don't check UNMODIFIED files as source unless given option */
|
||||
if (from->status == GIT_DELTA_UNMODIFIED && !check_unmodified)
|
||||
continue;
|
||||
|
||||
/* cap on maximum files we'll examine */
|
||||
if (++tried_targets > max_targets)
|
||||
break;
|
||||
|
||||
/* calculate similarity and see if this pair beats the
|
||||
* similarity score of the current best pair.
|
||||
*/
|
||||
if (git_oid_cmp(&from->old_file.oid, &to->new_file.oid) == 0)
|
||||
similarity = 100;
|
||||
/* TODO: insert actual similarity algo here */
|
||||
|
||||
if (similarity <= to->similarity)
|
||||
continue;
|
||||
|
||||
if (from->status == GIT_DELTA_DELETED) {
|
||||
if (similarity < rename_threshold)
|
||||
continue;
|
||||
|
||||
/* merge "from" & "to" to a RENAMED record */
|
||||
to->status = GIT_DELTA_RENAMED;
|
||||
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
|
||||
|
||||
from->status = GIT_DELTA__TO_DELETE;
|
||||
num_deletes++;
|
||||
} else {
|
||||
if (similarity < copy_threshold)
|
||||
continue;
|
||||
|
||||
/* convert "to" to a COPIED record */
|
||||
to->status = GIT_DELTA_COPIED;
|
||||
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
|
||||
}
|
||||
}
|
||||
|
||||
if (from->status == GIT_DELTA_MODIFIED &&
|
||||
opts && (opts->flags & GIT_DIFF_DETECT_BREAK_REWRITES) != 0)
|
||||
{
|
||||
/* TODO: calculate similarity and maybe mark for split */
|
||||
|
||||
/* from->status = GIT_DELTA__TO_SPLIT; */
|
||||
/* num_splits++; */
|
||||
}
|
||||
}
|
||||
|
||||
if (num_deletes > 0 || num_splits > 0) {
|
||||
git_vector onto = GIT_VECTOR_INIT;
|
||||
size_t new_size = diff->deltas.length + num_splits - num_deletes;
|
||||
|
||||
if (git_vector_init(&onto, new_size, diff_delta__cmp) < 0)
|
||||
return -1;
|
||||
|
||||
/* build new delta list without TO_DELETE and splitting TO_SPLIT */
|
||||
git_vector_foreach(&diff->deltas, i, from) {
|
||||
if (from->status == GIT_DELTA__TO_DELETE) {
|
||||
git__free(from);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (from->status == GIT_DELTA__TO_SPLIT) {
|
||||
git_diff_delta *deleted = diff_delta__dup(from, &diff->pool);
|
||||
if (!deleted)
|
||||
return -1;
|
||||
|
||||
deleted->status = GIT_DELTA_DELETED;
|
||||
memset(&deleted->new_file, 0, sizeof(deleted->new_file));
|
||||
deleted->new_file.path = deleted->old_file.path;
|
||||
deleted->new_file.flags |= GIT_DIFF_FILE_VALID_OID;
|
||||
|
||||
git_vector_insert(&onto, deleted);
|
||||
|
||||
from->status = GIT_DELTA_ADDED;
|
||||
memset(&from->old_file, 0, sizeof(from->old_file));
|
||||
from->old_file.path = from->new_file.path;
|
||||
from->old_file.flags |= GIT_DIFF_FILE_VALID_OID;
|
||||
}
|
||||
|
||||
git_vector_insert(&onto, from);
|
||||
}
|
||||
|
||||
/* swap new delta list into place */
|
||||
|
||||
git_vector_sort(&onto);
|
||||
git_vector_swap(&diff->deltas, &onto);
|
||||
git_vector_free(&onto);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
|
@ -48,6 +48,8 @@ extern void git_diff__cleanup_modes(
|
||||
|
||||
extern void git_diff_list_addref(git_diff_list *diff);
|
||||
|
||||
extern int git_diff_delta__cmp(const void *a, const void *b);
|
||||
|
||||
extern bool git_diff_delta__should_skip(
|
||||
const git_diff_options *opts, const git_diff_delta *delta);
|
||||
|
||||
|
466
src/diff_tform.c
Normal file
466
src/diff_tform.c
Normal file
@ -0,0 +1,466 @@
|
||||
/*
|
||||
* Copyright (C) 2012 the libgit2 contributors
|
||||
*
|
||||
* This file is part of libgit2, distributed under the GNU GPL v2 with
|
||||
* a Linking Exception. For full terms see the included COPYING file.
|
||||
*/
|
||||
#include "common.h"
|
||||
#include "diff.h"
|
||||
#include "git2/config.h"
|
||||
|
||||
static git_diff_delta *diff_delta__dup(
|
||||
const git_diff_delta *d, git_pool *pool)
|
||||
{
|
||||
git_diff_delta *delta = git__malloc(sizeof(git_diff_delta));
|
||||
if (!delta)
|
||||
return NULL;
|
||||
|
||||
memcpy(delta, d, sizeof(git_diff_delta));
|
||||
|
||||
delta->old_file.path = git_pool_strdup(pool, d->old_file.path);
|
||||
if (delta->old_file.path == NULL)
|
||||
goto fail;
|
||||
|
||||
if (d->new_file.path != d->old_file.path) {
|
||||
delta->new_file.path = git_pool_strdup(pool, d->new_file.path);
|
||||
if (delta->new_file.path == NULL)
|
||||
goto fail;
|
||||
} else {
|
||||
delta->new_file.path = delta->old_file.path;
|
||||
}
|
||||
|
||||
return delta;
|
||||
|
||||
fail:
|
||||
git__free(delta);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static git_diff_delta *diff_delta__merge_like_cgit(
|
||||
const git_diff_delta *a, const git_diff_delta *b, git_pool *pool)
|
||||
{
|
||||
git_diff_delta *dup;
|
||||
|
||||
/* Emulate C git for merging two diffs (a la 'git diff <sha>').
|
||||
*
|
||||
* When C git does a diff between the work dir and a tree, it actually
|
||||
* diffs with the index but uses the workdir contents. This emulates
|
||||
* those choices so we can emulate the type of diff.
|
||||
*
|
||||
* We have three file descriptions here, let's call them:
|
||||
* f1 = a->old_file
|
||||
* f2 = a->new_file AND b->old_file
|
||||
* f3 = b->new_file
|
||||
*/
|
||||
|
||||
/* if f2 == f3 or f2 is deleted, then just dup the 'a' diff */
|
||||
if (b->status == GIT_DELTA_UNMODIFIED || a->status == GIT_DELTA_DELETED)
|
||||
return diff_delta__dup(a, pool);
|
||||
|
||||
/* otherwise, base this diff on the 'b' diff */
|
||||
if ((dup = diff_delta__dup(b, pool)) == NULL)
|
||||
return NULL;
|
||||
|
||||
/* If 'a' status is uninteresting, then we're done */
|
||||
if (a->status == GIT_DELTA_UNMODIFIED)
|
||||
return dup;
|
||||
|
||||
assert(a->status != GIT_DELTA_UNMODIFIED);
|
||||
assert(b->status != GIT_DELTA_UNMODIFIED);
|
||||
|
||||
/* A cgit exception is that the diff of a file that is only in the
|
||||
* index (i.e. not in HEAD nor workdir) is given as empty.
|
||||
*/
|
||||
if (dup->status == GIT_DELTA_DELETED) {
|
||||
if (a->status == GIT_DELTA_ADDED)
|
||||
dup->status = GIT_DELTA_UNMODIFIED;
|
||||
/* else don't overwrite DELETE status */
|
||||
} else {
|
||||
dup->status = a->status;
|
||||
}
|
||||
|
||||
git_oid_cpy(&dup->old_file.oid, &a->old_file.oid);
|
||||
dup->old_file.mode = a->old_file.mode;
|
||||
dup->old_file.size = a->old_file.size;
|
||||
dup->old_file.flags = a->old_file.flags;
|
||||
|
||||
return dup;
|
||||
}
|
||||
|
||||
int git_diff_merge(
|
||||
git_diff_list *onto,
|
||||
const git_diff_list *from)
|
||||
{
|
||||
int error = 0;
|
||||
git_pool onto_pool;
|
||||
git_vector onto_new;
|
||||
git_diff_delta *delta;
|
||||
bool ignore_case = false;
|
||||
unsigned int i, j;
|
||||
|
||||
assert(onto && from);
|
||||
|
||||
if (!from->deltas.length)
|
||||
return 0;
|
||||
|
||||
if (git_vector_init(
|
||||
&onto_new, onto->deltas.length, git_diff_delta__cmp) < 0 ||
|
||||
git_pool_init(&onto_pool, 1, 0) < 0)
|
||||
return -1;
|
||||
|
||||
if ((onto->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE) != 0 ||
|
||||
(from->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE) != 0)
|
||||
{
|
||||
ignore_case = true;
|
||||
|
||||
/* This function currently only supports merging diff lists that
|
||||
* are sorted identically. */
|
||||
assert((onto->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE) != 0 &&
|
||||
(from->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE) != 0);
|
||||
}
|
||||
|
||||
for (i = 0, j = 0; i < onto->deltas.length || j < from->deltas.length; ) {
|
||||
git_diff_delta *o = GIT_VECTOR_GET(&onto->deltas, i);
|
||||
const git_diff_delta *f = GIT_VECTOR_GET(&from->deltas, j);
|
||||
int cmp = !f ? -1 : !o ? 1 : STRCMP_CASESELECT(ignore_case, o->old_file.path, f->old_file.path);
|
||||
|
||||
if (cmp < 0) {
|
||||
delta = diff_delta__dup(o, &onto_pool);
|
||||
i++;
|
||||
} else if (cmp > 0) {
|
||||
delta = diff_delta__dup(f, &onto_pool);
|
||||
j++;
|
||||
} else {
|
||||
delta = diff_delta__merge_like_cgit(o, f, &onto_pool);
|
||||
i++;
|
||||
j++;
|
||||
}
|
||||
|
||||
/* the ignore rules for the target may not match the source
|
||||
* or the result of a merged delta could be skippable...
|
||||
*/
|
||||
if (git_diff_delta__should_skip(&onto->opts, delta)) {
|
||||
git__free(delta);
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((error = !delta ? -1 : git_vector_insert(&onto_new, delta)) < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!error) {
|
||||
git_vector_swap(&onto->deltas, &onto_new);
|
||||
git_pool_swap(&onto->pool, &onto_pool);
|
||||
onto->new_src = from->new_src;
|
||||
|
||||
/* prefix strings also come from old pool, so recreate those.*/
|
||||
onto->opts.old_prefix =
|
||||
git_pool_strdup_safe(&onto->pool, onto->opts.old_prefix);
|
||||
onto->opts.new_prefix =
|
||||
git_pool_strdup_safe(&onto->pool, onto->opts.new_prefix);
|
||||
}
|
||||
|
||||
git_vector_foreach(&onto_new, i, delta)
|
||||
git__free(delta);
|
||||
git_vector_free(&onto_new);
|
||||
git_pool_clear(&onto_pool);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
#define DEFAULT_THRESHOLD 50
|
||||
#define DEFAULT_BREAK_REWRITE_THRESHOLD 60
|
||||
#define DEFAULT_TARGET_LIMIT 200
|
||||
|
||||
static int normalize_find_opts(
|
||||
git_diff_list *diff,
|
||||
git_diff_find_options *opts,
|
||||
git_diff_find_options *given)
|
||||
{
|
||||
git_config *cfg = NULL;
|
||||
const char *val;
|
||||
|
||||
if (diff->repo != NULL &&
|
||||
git_repository_config__weakptr(&cfg, diff->repo) < 0)
|
||||
return -1;
|
||||
|
||||
if (given != NULL)
|
||||
memcpy(opts, given, sizeof(*opts));
|
||||
else {
|
||||
memset(opts, 0, sizeof(*opts));
|
||||
|
||||
opts->flags = GIT_DIFF_FIND_RENAMES;
|
||||
|
||||
if (git_config_get_string(&val, cfg, "diff.renames") < 0)
|
||||
giterr_clear();
|
||||
else if (val &&
|
||||
(!strcasecmp(val, "copies") || !strcasecmp(val, "copy")))
|
||||
opts->flags = GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES;
|
||||
}
|
||||
|
||||
/* some flags imply others */
|
||||
|
||||
if (opts->flags & GIT_DIFF_FIND_RENAMES_FROM_REWRITES)
|
||||
opts->flags |= GIT_DIFF_FIND_RENAMES;
|
||||
|
||||
if (opts->flags & GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED)
|
||||
opts->flags |= GIT_DIFF_FIND_COPIES;
|
||||
|
||||
#define USE_DEFAULT(X) ((X) == 0 || (X) > 100)
|
||||
|
||||
if (USE_DEFAULT(opts->rename_threshold))
|
||||
opts->rename_threshold = DEFAULT_THRESHOLD;
|
||||
|
||||
if (USE_DEFAULT(opts->rename_from_rewrite_threshold))
|
||||
opts->rename_from_rewrite_threshold = DEFAULT_THRESHOLD;
|
||||
|
||||
if (USE_DEFAULT(opts->copy_threshold))
|
||||
opts->copy_threshold = DEFAULT_THRESHOLD;
|
||||
|
||||
if (USE_DEFAULT(opts->break_rewrite_threshold))
|
||||
opts->break_rewrite_threshold = DEFAULT_BREAK_REWRITE_THRESHOLD;
|
||||
|
||||
#undef USE_DEFAULT
|
||||
|
||||
if (!opts->target_limit) {
|
||||
int32_t limit = 0;
|
||||
|
||||
opts->target_limit = DEFAULT_TARGET_LIMIT;
|
||||
|
||||
if (git_config_get_int32(&limit, cfg, "diff.renameLimit") < 0)
|
||||
giterr_clear();
|
||||
else if (limit > 0)
|
||||
opts->target_limit = limit;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int apply_splits_and_deletes(git_diff_list *diff, size_t expected_size)
|
||||
{
|
||||
git_vector onto = GIT_VECTOR_INIT;
|
||||
size_t i;
|
||||
git_diff_delta *delta;
|
||||
|
||||
if (git_vector_init(&onto, expected_size, git_diff_delta__cmp) < 0)
|
||||
return -1;
|
||||
|
||||
/* build new delta list without TO_DELETE and splitting TO_SPLIT */
|
||||
git_vector_foreach(&diff->deltas, i, delta) {
|
||||
if (delta->status == GIT_DELTA__TO_DELETE) {
|
||||
git__free(delta);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (delta->status == GIT_DELTA__TO_SPLIT) {
|
||||
git_diff_delta *deleted = diff_delta__dup(delta, &diff->pool);
|
||||
if (!deleted)
|
||||
return -1;
|
||||
|
||||
deleted->status = GIT_DELTA_DELETED;
|
||||
memset(&deleted->new_file, 0, sizeof(deleted->new_file));
|
||||
deleted->new_file.path = deleted->old_file.path;
|
||||
deleted->new_file.flags |= GIT_DIFF_FILE_VALID_OID;
|
||||
|
||||
git_vector_insert(&onto, deleted);
|
||||
|
||||
delta->status = GIT_DELTA_ADDED;
|
||||
memset(&delta->old_file, 0, sizeof(delta->old_file));
|
||||
delta->old_file.path = delta->new_file.path;
|
||||
delta->old_file.flags |= GIT_DIFF_FILE_VALID_OID;
|
||||
}
|
||||
|
||||
git_vector_insert(&onto, delta);
|
||||
}
|
||||
|
||||
/* swap new delta list into place */
|
||||
git_vector_sort(&onto);
|
||||
git_vector_swap(&diff->deltas, &onto);
|
||||
git_vector_free(&onto);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned int calc_similarity(
|
||||
void *cache, git_diff_file *old_file, git_diff_file *new_file)
|
||||
{
|
||||
GIT_UNUSED(cache);
|
||||
|
||||
if (git_oid_cmp(&old_file->oid, &new_file->oid) == 0)
|
||||
return 100;
|
||||
|
||||
/* TODO: insert actual similarity algo here */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define FLAG_SET(opts,flag_name) ((opts.flags & flag_name) != 0)
|
||||
|
||||
int git_diff_find_similar(
|
||||
git_diff_list *diff,
|
||||
git_diff_find_options *given_opts)
|
||||
{
|
||||
unsigned int i, j, similarity;
|
||||
git_diff_delta *from, *to;
|
||||
git_diff_find_options opts;
|
||||
unsigned int tried_targets, num_changes = 0;
|
||||
git_vector matches = GIT_VECTOR_INIT;
|
||||
|
||||
if (normalize_find_opts(diff, &opts, given_opts) < 0)
|
||||
return -1;
|
||||
|
||||
/* first do splits if requested */
|
||||
|
||||
if (FLAG_SET(opts, GIT_DIFF_FIND_AND_BREAK_REWRITES)) {
|
||||
git_vector_foreach(&diff->deltas, i, from) {
|
||||
if (from->status != GIT_DELTA_MODIFIED)
|
||||
continue;
|
||||
|
||||
/* Right now, this doesn't work right because the similarity
|
||||
* algorithm isn't actually implemented...
|
||||
*/
|
||||
similarity = 100;
|
||||
/* calc_similarity(NULL, &from->old_file, from->new_file); */
|
||||
|
||||
if (similarity < opts.break_rewrite_threshold) {
|
||||
from->status = GIT_DELTA__TO_SPLIT;
|
||||
num_changes++;
|
||||
}
|
||||
}
|
||||
|
||||
/* apply splits as needed */
|
||||
if (num_changes > 0 &&
|
||||
apply_splits_and_deletes(
|
||||
diff, diff->deltas.length + num_changes) < 0)
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* next find the most similar delta for each rename / copy candidate */
|
||||
|
||||
if (git_vector_init(&matches, diff->deltas.length, git_diff_delta__cmp) < 0)
|
||||
return -1;
|
||||
|
||||
git_vector_foreach(&diff->deltas, i, from) {
|
||||
tried_targets = 0;
|
||||
|
||||
git_vector_foreach(&diff->deltas, j, to) {
|
||||
if (i == j)
|
||||
continue;
|
||||
|
||||
switch (to->status) {
|
||||
case GIT_DELTA_ADDED:
|
||||
case GIT_DELTA_UNTRACKED:
|
||||
case GIT_DELTA_RENAMED:
|
||||
case GIT_DELTA_COPIED:
|
||||
break;
|
||||
default:
|
||||
/* only the above status values should be checked */
|
||||
continue;
|
||||
}
|
||||
|
||||
/* skip all but DELETED files unless copy detection is on */
|
||||
if (from->status != GIT_DELTA_DELETED &&
|
||||
!FLAG_SET(opts, GIT_DIFF_FIND_COPIES))
|
||||
continue;
|
||||
|
||||
/* don't check UNMODIFIED files as source unless given option */
|
||||
if (from->status == GIT_DELTA_UNMODIFIED &&
|
||||
!FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED))
|
||||
continue;
|
||||
|
||||
/* cap on maximum files we'll examine */
|
||||
if (++tried_targets > opts.target_limit)
|
||||
break;
|
||||
|
||||
/* calculate similarity and see if this pair beats the
|
||||
* similarity score of the current best pair.
|
||||
*/
|
||||
similarity = calc_similarity(NULL, &from->old_file, &to->new_file);
|
||||
|
||||
if (to->similarity < similarity) {
|
||||
to->similarity = similarity;
|
||||
if (git_vector_set(NULL, &matches, j, from) < 0)
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* next rewrite the diffs with renames / copies */
|
||||
|
||||
num_changes = 0;
|
||||
|
||||
git_vector_foreach(&diff->deltas, j, to) {
|
||||
from = GIT_VECTOR_GET(&matches, j);
|
||||
if (!from) {
|
||||
assert(to->similarity == 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* three possible outcomes here:
|
||||
* 1. old DELETED and if over rename threshold,
|
||||
* new becomes RENAMED and old goes away
|
||||
* 2. old was MODIFIED but FIND_RENAMES_FROM_REWRITES is on and
|
||||
* old is more similar to new than it is to itself, in which
|
||||
* case, new becomes RENAMED and old becomed ADDED
|
||||
* 3. otherwise if over copy threshold, new becomes COPIED
|
||||
*/
|
||||
|
||||
if (from->status == GIT_DELTA_DELETED) {
|
||||
if (to->similarity < opts.rename_threshold) {
|
||||
to->similarity = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
to->status = GIT_DELTA_RENAMED;
|
||||
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
|
||||
|
||||
from->status = GIT_DELTA__TO_DELETE;
|
||||
num_changes++;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (from->status == GIT_DELTA_MODIFIED &&
|
||||
FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) &&
|
||||
to->similarity > opts.rename_threshold)
|
||||
{
|
||||
similarity = 100;
|
||||
/* calc_similarity(NULL, &from->old_file, from->new_file); */
|
||||
|
||||
if (similarity < opts.rename_from_rewrite_threshold) {
|
||||
to->status = GIT_DELTA_RENAMED;
|
||||
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
|
||||
|
||||
from->status = GIT_DELTA_ADDED;
|
||||
memset(&from->old_file, 0, sizeof(from->old_file));
|
||||
from->old_file.path = to->old_file.path;
|
||||
from->old_file.flags |= GIT_DIFF_FILE_VALID_OID;
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (to->similarity < opts.copy_threshold) {
|
||||
to->similarity = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* convert "to" to a COPIED record */
|
||||
to->status = GIT_DELTA_COPIED;
|
||||
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
|
||||
}
|
||||
|
||||
git_vector_free(&matches);
|
||||
|
||||
if (num_changes > 0) {
|
||||
assert(num_changes < diff->deltas.length);
|
||||
|
||||
if (apply_splits_and_deletes(
|
||||
diff, diff->deltas.length - num_changes) < 0)
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#undef FLAG_SET
|
30
src/vector.c
30
src/vector.c
@ -241,3 +241,33 @@ void git_vector_swap(git_vector *a, git_vector *b)
|
||||
memcpy(a, b, sizeof(t));
|
||||
memcpy(b, &t, sizeof(t));
|
||||
}
|
||||
|
||||
int git_vector_resize_to(git_vector *v, size_t new_length)
|
||||
{
|
||||
if (new_length <= v->length)
|
||||
return 0;
|
||||
|
||||
while (new_length >= v->_alloc_size)
|
||||
if (resize_vector(v) < 0)
|
||||
return -1;
|
||||
|
||||
memset(&v->contents[v->length], 0,
|
||||
sizeof(void *) * (new_length - v->length));
|
||||
|
||||
v->length = new_length;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int git_vector_set(void **old, git_vector *v, size_t position, void *value)
|
||||
{
|
||||
if (git_vector_resize_to(v, position + 1) < 0)
|
||||
return -1;
|
||||
|
||||
if (old != NULL)
|
||||
*old = v->contents[position];
|
||||
|
||||
v->contents[position] = value;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -76,4 +76,7 @@ int git_vector_remove(git_vector *v, unsigned int idx);
|
||||
void git_vector_pop(git_vector *v);
|
||||
void git_vector_uniq(git_vector *v);
|
||||
|
||||
int git_vector_resize_to(git_vector *v, size_t new_length);
|
||||
int git_vector_set(void **old, git_vector *v, size_t position, void *value);
|
||||
|
||||
#endif
|
||||
|
@ -34,14 +34,14 @@ void test_diff_rename__match_oid(void)
|
||||
git_tree *old_tree, *new_tree;
|
||||
git_diff_list *diff;
|
||||
git_diff_options diffopts = {0};
|
||||
git_diff_detect_options opts;
|
||||
git_diff_find_options opts;
|
||||
diff_expects exp;
|
||||
|
||||
old_tree = resolve_commit_oid_to_tree(g_repo, old_sha);
|
||||
new_tree = resolve_commit_oid_to_tree(g_repo, new_sha);
|
||||
|
||||
/* Must pass GIT_DIFF_INCLUDE_UNMODIFIED if you expect to emulate
|
||||
* --find-copies-harder during rename detection...
|
||||
* --find-copies-harder during rename transformion...
|
||||
*/
|
||||
memset(&diffopts, 0, sizeof(diffopts));
|
||||
diffopts.flags |= GIT_DIFF_INCLUDE_UNMODIFIED;
|
||||
@ -65,7 +65,7 @@ void test_diff_rename__match_oid(void)
|
||||
/* git diff 31e47d8c1fa36d7f8d537b96158e3f024de0a9f2 \
|
||||
* 2bc7f351d20b53f1c72c16c4b036e491c478c49a
|
||||
*/
|
||||
cl_git_pass(git_diff_detect(diff, NULL));
|
||||
cl_git_pass(git_diff_find_similar(diff, NULL));
|
||||
|
||||
memset(&exp, 0, sizeof(exp));
|
||||
cl_git_pass(git_diff_foreach(
|
||||
@ -86,8 +86,8 @@ void test_diff_rename__match_oid(void)
|
||||
* 2bc7f351d20b53f1c72c16c4b036e491c478c49a
|
||||
*/
|
||||
memset(&opts, 0, sizeof(opts));
|
||||
opts.flags = GIT_DIFF_DETECT_COPIES_FROM_UNMODIFIED;
|
||||
cl_git_pass(git_diff_detect(diff, &opts));
|
||||
opts.flags = GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED;
|
||||
cl_git_pass(git_diff_find_similar(diff, &opts));
|
||||
|
||||
memset(&exp, 0, sizeof(exp));
|
||||
cl_git_pass(git_diff_foreach(
|
||||
|
Loading…
Reference in New Issue
Block a user