From 713e11e0b1ad72d6905641669655fe860767c310 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Mon, 11 May 2015 13:24:53 +0200 Subject: [PATCH 1/5] index: use a diff to perform update_all We currently iterate over all the entries and re-add them to the index. While this provides correctness, it is wasteful as we try to re-insert files which have not changed. Instead, take a diff between the index and the worktree and only re-add those which we already know have changed. --- src/index.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 79 insertions(+), 2 deletions(-) diff --git a/src/index.c b/src/index.c index 7c0c31052..d673f4921 100644 --- a/src/index.c +++ b/src/index.c @@ -2758,18 +2758,95 @@ int git_index_remove_all( return error; } +struct foreach_diff_data { + git_index *index; + const git_pathspec *pathspec; + git_index_matched_path_cb cb; + void *payload; +}; + +static int add_each_file(const git_diff_delta *delta, float progress, void *payload) +{ + struct foreach_diff_data *data = payload; + const char *match, *path; + int error = 0; + + GIT_UNUSED(progress); + + /* We only want those we already have in the index */ + if (delta->status != GIT_DELTA_MODIFIED && + delta->status != GIT_DELTA_TYPECHANGE && + delta->status != GIT_DELTA_DELETED) + return 0; + + path = delta->old_file.path; + + /* We only want those which match the pathspecs */ + if (!git_pathspec__match( + &data->pathspec->pathspec, path, false, (bool)data->index->ignore_case, + &match, NULL)) + return 0; + + if (data->cb) + error = data->cb(path, match, data->payload); + + if (error > 0) /* skip this entry */ + return 0; + if (error < 0) /* actual error */ + return error; + + if (delta->status == GIT_DELTA_DELETED) + error = git_index_remove_bypath(data->index, path); + else + error = git_index_add_bypath(data->index, path); + + return error; +} + int git_index_update_all( git_index *index, const git_strarray *pathspec, git_index_matched_path_cb cb, void *payload) { - int error = index_apply_to_all( - index, INDEX_ACTION_UPDATE, pathspec, cb, payload); + int error; + git_repository *repo; + git_diff *diff; + git_pathspec ps; + struct foreach_diff_data data = { + index, + NULL, + cb, + payload, + }; + + repo = INDEX_OWNER(index); + + if (!repo) { + return create_index_error(-1, + "cannot run update; the index is not backed up by a repository."); + } + + /* + * We do the matching ourselves intead of passing the list to + * diff because we want to tell the callback which one + * matched, which we do not know if we ask diff to filter for us. + */ + if ((error = git_pathspec__init(&ps, pathspec)) < 0) + return error; + + if ((error = git_diff_index_to_workdir(&diff, repo, index, NULL)) < 0) + goto cleanup; + + data.pathspec = &ps; + error = git_diff_foreach(diff, add_each_file, NULL, NULL, &data); + git_diff_free(diff); if (error) /* make sure error is set if callback stopped iteration */ giterr_set_after_callback(error); +cleanup: + git_pathspec__clear(&ps); return error; } From 197307f6b1b12c9cf00a0e6802001fd18171bea9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Mon, 11 May 2015 15:19:15 +0200 Subject: [PATCH 2/5] index: refactor diff-based update_all to match other applies Refactor so we look like the code we're replacing, which should also allow us to more easily inplement add-all. --- src/index.c | 167 +++++++++++++++++++++++++++------------------------- 1 file changed, 87 insertions(+), 80 deletions(-) diff --git a/src/index.c b/src/index.c index d673f4921..d80ff0509 100644 --- a/src/index.c +++ b/src/index.c @@ -2668,6 +2668,92 @@ enum { INDEX_ACTION_REMOVE = 2, }; +struct foreach_diff_data { + git_index *index; + const git_pathspec *pathspec; + git_index_matched_path_cb cb; + void *payload; +}; + +static int apply_each_file(const git_diff_delta *delta, float progress, void *payload) +{ + struct foreach_diff_data *data = payload; + const char *match, *path; + int error = 0; + + GIT_UNUSED(progress); + + path = delta->old_file.path; + + /* We only want those which match the pathspecs */ + if (!git_pathspec__match( + &data->pathspec->pathspec, path, false, (bool)data->index->ignore_case, + &match, NULL)) + return 0; + + if (data->cb) + error = data->cb(path, match, data->payload); + + if (error > 0) /* skip this entry */ + return 0; + if (error < 0) /* actual error */ + return error; + + if (delta->status == GIT_DELTA_DELETED) + error = git_index_remove_bypath(data->index, path); + else + error = git_index_add_bypath(data->index, path); + + return error; +} + +static int index_apply_to_wd_diff(git_index *index, int action, const git_strarray *paths, + git_index_matched_path_cb cb, void *payload) +{ + int error; + git_diff *diff; + git_pathspec ps; + git_repository *repo; + struct foreach_diff_data data = { + index, + NULL, + cb, + payload, + }; + + assert(index); + assert(action == INDEX_ACTION_UPDATE); + + repo = INDEX_OWNER(index); + + if (!repo) { + return create_index_error(-1, + "cannot run update; the index is not backed up by a repository."); + } + + /* + * We do the matching ourselves intead of passing the list to + * diff because we want to tell the callback which one + * matched, which we do not know if we ask diff to filter for us. + */ + if ((error = git_pathspec__init(&ps, paths)) < 0) + return error; + + if ((error = git_diff_index_to_workdir(&diff, repo, index, NULL)) < 0) + goto cleanup; + + data.pathspec = &ps; + error = git_diff_foreach(diff, apply_each_file, NULL, NULL, &data); + git_diff_free(diff); + + if (error) /* make sure error is set if callback stopped iteration */ + giterr_set_after_callback(error); + +cleanup: + git_pathspec__clear(&ps); + return error; +} + static int index_apply_to_all( git_index *index, int action, @@ -2758,95 +2844,16 @@ int git_index_remove_all( return error; } -struct foreach_diff_data { - git_index *index; - const git_pathspec *pathspec; - git_index_matched_path_cb cb; - void *payload; -}; - -static int add_each_file(const git_diff_delta *delta, float progress, void *payload) -{ - struct foreach_diff_data *data = payload; - const char *match, *path; - int error = 0; - - GIT_UNUSED(progress); - - /* We only want those we already have in the index */ - if (delta->status != GIT_DELTA_MODIFIED && - delta->status != GIT_DELTA_TYPECHANGE && - delta->status != GIT_DELTA_DELETED) - return 0; - - path = delta->old_file.path; - - /* We only want those which match the pathspecs */ - if (!git_pathspec__match( - &data->pathspec->pathspec, path, false, (bool)data->index->ignore_case, - &match, NULL)) - return 0; - - if (data->cb) - error = data->cb(path, match, data->payload); - - if (error > 0) /* skip this entry */ - return 0; - if (error < 0) /* actual error */ - return error; - - if (delta->status == GIT_DELTA_DELETED) - error = git_index_remove_bypath(data->index, path); - else - error = git_index_add_bypath(data->index, path); - - return error; -} - int git_index_update_all( git_index *index, const git_strarray *pathspec, git_index_matched_path_cb cb, void *payload) { - int error; - git_repository *repo; - git_diff *diff; - git_pathspec ps; - struct foreach_diff_data data = { - index, - NULL, - cb, - payload, - }; - - repo = INDEX_OWNER(index); - - if (!repo) { - return create_index_error(-1, - "cannot run update; the index is not backed up by a repository."); - } - - /* - * We do the matching ourselves intead of passing the list to - * diff because we want to tell the callback which one - * matched, which we do not know if we ask diff to filter for us. - */ - if ((error = git_pathspec__init(&ps, pathspec)) < 0) - return error; - - if ((error = git_diff_index_to_workdir(&diff, repo, index, NULL)) < 0) - goto cleanup; - - data.pathspec = &ps; - error = git_diff_foreach(diff, add_each_file, NULL, NULL, &data); - git_diff_free(diff); - + int error = index_apply_to_wd_diff(index, INDEX_ACTION_UPDATE, pathspec, cb, payload); if (error) /* make sure error is set if callback stopped iteration */ giterr_set_after_callback(error); -cleanup: - git_pathspec__clear(&ps); return error; } From 0a78a52ed9c6945151536d0f19aa39b081a4a8ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Mon, 11 May 2015 16:09:09 +0200 Subject: [PATCH 3/5] index: make add_all to act on a diff Instead of going through each entry we have and re-adding, which may not even be correct for certain crlf options and has bad performance, use the function which performs a diff against the worktree and try to add and remove files from that list. --- src/index.c | 108 ++++++++++++++-------------------------------------- 1 file changed, 28 insertions(+), 80 deletions(-) diff --git a/src/index.c b/src/index.c index d80ff0509..3d0f4d14d 100644 --- a/src/index.c +++ b/src/index.c @@ -24,6 +24,10 @@ #include "git2/config.h" #include "git2/sys/index.h" +static int index_apply_to_wd_diff(git_index *index, int action, const git_strarray *paths, + unsigned int flags, + git_index_matched_path_cb cb, void *payload); + #define entry_size(type,len) ((offsetof(type, path) + (len) + 8) & ~7) #define short_entry_size(len) entry_size(struct entry_short, len) #define long_entry_size(len) entry_size(struct entry_long, len) @@ -2554,6 +2558,13 @@ git_repository *git_index_owner(const git_index *index) return INDEX_OWNER(index); } +enum { + INDEX_ACTION_NONE = 0, + INDEX_ACTION_UPDATE = 1, + INDEX_ACTION_REMOVE = 2, + INDEX_ACTION_ADDALL = 3, +}; + int git_index_add_all( git_index *index, const git_strarray *paths, @@ -2564,29 +2575,15 @@ int git_index_add_all( int error; git_repository *repo; git_iterator *wditer = NULL; - const git_index_entry *wd = NULL; - git_index_entry *entry; git_pathspec ps; - const char *match; - size_t existing; bool no_fnmatch = (flags & GIT_INDEX_ADD_DISABLE_PATHSPEC_MATCH) != 0; - int ignorecase; - git_oid blobid; assert(index); - if (INDEX_OWNER(index) == NULL) - return create_index_error(-1, - "Could not add paths to index. " - "Index is not backed up by an existing repository."); - repo = INDEX_OWNER(index); if ((error = git_repository__ensure_not_bare(repo, "index add all")) < 0) return error; - if (git_repository__cvar(&ignorecase, repo, GIT_CVAR_IGNORECASE) < 0) - return -1; - if ((error = git_pathspec__init(&ps, paths)) < 0) return error; @@ -2597,63 +2594,10 @@ int git_index_add_all( repo, &ps.pathspec, no_fnmatch)) < 0) goto cleanup; - if ((error = git_iterator_for_workdir( - &wditer, repo, NULL, NULL, 0, ps.prefix, ps.prefix)) < 0) - goto cleanup; + error = index_apply_to_wd_diff(index, INDEX_ACTION_ADDALL, paths, flags, cb, payload); - while (!(error = git_iterator_advance(&wd, wditer))) { - - /* check if path actually matches */ - if (!git_pathspec__match( - &ps.pathspec, wd->path, no_fnmatch, (bool)ignorecase, &match, NULL)) - continue; - - /* skip ignored items that are not already in the index */ - if ((flags & GIT_INDEX_ADD_FORCE) == 0 && - git_iterator_current_is_ignored(wditer) && - index_find(&existing, index, wd->path, 0, 0, true) < 0) - continue; - - /* issue notification callback if requested */ - if (cb && (error = cb(wd->path, match, payload)) != 0) { - if (error > 0) /* return > 0 means skip this one */ - continue; - if (error < 0) { /* return < 0 means abort */ - giterr_set_after_callback(error); - break; - } - } - - /* TODO: Should we check if the file on disk is already an exact - * match to the file in the index and skip this work if it is? - */ - - /* write the blob to disk and get the oid */ - if ((error = git_blob_create_fromworkdir(&blobid, repo, wd->path)) < 0) - break; - - /* make the new entry to insert */ - if ((error = index_entry_dup(&entry, INDEX_OWNER(index), wd)) < 0) - break; - - entry->id = blobid; - - /* add working directory item to index */ - if ((error = index_insert(index, &entry, 1, false)) < 0) - break; - - git_tree_cache_invalidate_path(index->tree, wd->path); - - /* add implies conflict resolved, move conflict entries to REUC */ - if ((error = index_conflict_to_reuc(index, wd->path)) < 0) { - if (error != GIT_ENOTFOUND) - break; - giterr_clear(); - } - } - - if (error == GIT_ITEROVER) - error = 0; + if (error) + giterr_set_after_callback(error); cleanup: git_iterator_free(wditer); @@ -2662,15 +2606,10 @@ cleanup: return error; } -enum { - INDEX_ACTION_NONE = 0, - INDEX_ACTION_UPDATE = 1, - INDEX_ACTION_REMOVE = 2, -}; - struct foreach_diff_data { git_index *index; const git_pathspec *pathspec; + unsigned int flags; git_index_matched_path_cb cb; void *payload; }; @@ -2702,27 +2641,30 @@ static int apply_each_file(const git_diff_delta *delta, float progress, void *pa if (delta->status == GIT_DELTA_DELETED) error = git_index_remove_bypath(data->index, path); else - error = git_index_add_bypath(data->index, path); + error = git_index_add_bypath(data->index, delta->new_file.path); return error; } static int index_apply_to_wd_diff(git_index *index, int action, const git_strarray *paths, + unsigned int flags, git_index_matched_path_cb cb, void *payload) { int error; git_diff *diff; git_pathspec ps; git_repository *repo; + git_diff_options opts = GIT_DIFF_OPTIONS_INIT; struct foreach_diff_data data = { index, NULL, + flags, cb, payload, }; assert(index); - assert(action == INDEX_ACTION_UPDATE); + assert(action == INDEX_ACTION_UPDATE || action == INDEX_ACTION_ADDALL); repo = INDEX_OWNER(index); @@ -2739,7 +2681,13 @@ static int index_apply_to_wd_diff(git_index *index, int action, const git_strarr if ((error = git_pathspec__init(&ps, paths)) < 0) return error; - if ((error = git_diff_index_to_workdir(&diff, repo, index, NULL)) < 0) + if (action == INDEX_ACTION_ADDALL) { + opts.flags = GIT_DIFF_INCLUDE_UNTRACKED | GIT_DIFF_RECURSE_IGNORED_DIRS; + if (flags == GIT_INDEX_ADD_FORCE) + opts.flags |= GIT_DIFF_INCLUDE_IGNORED; + } + + if ((error = git_diff_index_to_workdir(&diff, repo, index, &opts)) < 0) goto cleanup; data.pathspec = &ps; @@ -2850,7 +2798,7 @@ int git_index_update_all( git_index_matched_path_cb cb, void *payload) { - int error = index_apply_to_wd_diff(index, INDEX_ACTION_UPDATE, pathspec, cb, payload); + int error = index_apply_to_wd_diff(index, INDEX_ACTION_UPDATE, pathspec, 0, cb, payload); if (error) /* make sure error is set if callback stopped iteration */ giterr_set_after_callback(error); From 2b2dfe80f065f284aa53351d7203e31cf001cf8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Tue, 12 May 2015 12:07:33 +0200 Subject: [PATCH 4/5] index: include TYPECHANGE in the diff Without this option, we would not be able to catch exec bit changes. --- src/index.c | 3 ++- tests/index/addall.c | 13 +++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/index.c b/src/index.c index 3d0f4d14d..8f0976d13 100644 --- a/src/index.c +++ b/src/index.c @@ -2681,8 +2681,9 @@ static int index_apply_to_wd_diff(git_index *index, int action, const git_strarr if ((error = git_pathspec__init(&ps, paths)) < 0) return error; + opts.flags = GIT_DIFF_INCLUDE_TYPECHANGE; if (action == INDEX_ACTION_ADDALL) { - opts.flags = GIT_DIFF_INCLUDE_UNTRACKED | GIT_DIFF_RECURSE_IGNORED_DIRS; + opts.flags |= GIT_DIFF_INCLUDE_UNTRACKED | GIT_DIFF_RECURSE_IGNORED_DIRS; if (flags == GIT_INDEX_ADD_FORCE) opts.flags |= GIT_DIFF_INCLUDE_IGNORED; } diff --git a/tests/index/addall.c b/tests/index/addall.c index a7e2583b2..f13b768ce 100644 --- a/tests/index/addall.c +++ b/tests/index/addall.c @@ -193,6 +193,19 @@ void test_index_addall__repo_lifecycle(void) cl_repo_commit_from_index(NULL, g_repo, NULL, 0, "first commit"); check_status(g_repo, 0, 0, 0, 3, 0, 0, 1); + if (cl_repo_get_bool(g_repo, "core.filemode")) { + cl_git_pass(git_index_update_all(index, NULL, NULL, NULL)); + cl_must_pass(p_chmod(TEST_DIR "/file.zzz", 0777)); + cl_git_pass(git_index_update_all(index, NULL, NULL, NULL)); + check_status(g_repo, 0, 0, 1, 3, 0, 0, 1); + + /* go back to what we had before */ + cl_must_pass(p_chmod(TEST_DIR "/file.zzz", 0666)); + cl_git_pass(git_index_update_all(index, NULL, NULL, NULL)); + check_status(g_repo, 0, 0, 0, 3, 0, 0, 1); + } + + /* attempt to add an ignored file - does nothing */ strs[0] = "file.foo"; cl_git_pass(git_index_add_all(index, &paths, 0, NULL, NULL)); From 874cc35a8d130735d3d8160191e697021e1ab8fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Thu, 14 May 2015 15:24:15 +0200 Subject: [PATCH 5/5] index: add a CHANGELOG entry for the diff usage --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 69f02bba8..810538c64 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,9 @@ support for HTTPS connections insead of OpenSSL. remote functions now take these options or the callbacks instead of setting them beforehand. +* The index now uses diffs for `add_all()` and `update_all()` which + gives it a speed boost and closer semantics to git. + ### API additions