From ca05857e71f8d11582b1ad82f63c6a61e96fe20e Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 23 Nov 2016 18:26:19 -0500 Subject: [PATCH 1/3] Fix formatting Signed-off-by: David Turner --- src/submodule.c | 126 ++++++++++++++++++++++++------------------------ 1 file changed, 63 insertions(+), 63 deletions(-) diff --git a/src/submodule.c b/src/submodule.c index 6d6b314d6..46462f165 100644 --- a/src/submodule.c +++ b/src/submodule.c @@ -324,89 +324,89 @@ done: static int submodules_from_index(git_strmap *map, git_index *idx, git_config *cfg) { - int error; - git_iterator *i; - const git_index_entry *entry; - git_buf name = GIT_BUF_INIT; + int error; + git_iterator *i; + const git_index_entry *entry; + git_buf name = GIT_BUF_INIT; - if ((error = git_iterator_for_index(&i, git_index_owner(idx), idx, NULL)) < 0) - return error; + if ((error = git_iterator_for_index(&i, git_index_owner(idx), idx, NULL)) < 0) + return error; - while (!(error = git_iterator_advance(&entry, i))) { - khiter_t pos = git_strmap_lookup_index(map, entry->path); - git_submodule *sm; + while (!(error = git_iterator_advance(&entry, i))) { + khiter_t pos = git_strmap_lookup_index(map, entry->path); + git_submodule *sm; - git_buf_clear(&name); - if (!name_from_path(&name, cfg, entry->path)) { - git_strmap_lookup_index(map, name.ptr); - } + git_buf_clear(&name); + if (!name_from_path(&name, cfg, entry->path)) { + git_strmap_lookup_index(map, name.ptr); + } - if (git_strmap_valid_index(map, pos)) { - sm = git_strmap_value_at(map, pos); + if (git_strmap_valid_index(map, pos)) { + sm = git_strmap_value_at(map, pos); - if (S_ISGITLINK(entry->mode)) - submodule_update_from_index_entry(sm, entry); - else - sm->flags |= GIT_SUBMODULE_STATUS__INDEX_NOT_SUBMODULE; - } else if (S_ISGITLINK(entry->mode)) { - if (!submodule_get_or_create(&sm, git_index_owner(idx), map, name.ptr ? name.ptr : entry->path)) { - submodule_update_from_index_entry(sm, entry); - git_submodule_free(sm); - } - } - } + if (S_ISGITLINK(entry->mode)) + submodule_update_from_index_entry(sm, entry); + else + sm->flags |= GIT_SUBMODULE_STATUS__INDEX_NOT_SUBMODULE; + } else if (S_ISGITLINK(entry->mode)) { + if (!submodule_get_or_create(&sm, git_index_owner(idx), map, name.ptr ? name.ptr : entry->path)) { + submodule_update_from_index_entry(sm, entry); + git_submodule_free(sm); + } + } + } - if (error == GIT_ITEROVER) - error = 0; + if (error == GIT_ITEROVER) + error = 0; - git_buf_free(&name); - git_iterator_free(i); + git_buf_free(&name); + git_iterator_free(i); - return error; + return error; } static int submodules_from_head(git_strmap *map, git_tree *head, git_config *cfg) { - int error; - git_iterator *i; - const git_index_entry *entry; - git_buf name = GIT_BUF_INIT; + int error; + git_iterator *i; + const git_index_entry *entry; + git_buf name = GIT_BUF_INIT; - if ((error = git_iterator_for_tree(&i, head, NULL)) < 0) - return error; + if ((error = git_iterator_for_tree(&i, head, NULL)) < 0) + return error; - while (!(error = git_iterator_advance(&entry, i))) { - khiter_t pos = git_strmap_lookup_index(map, entry->path); - git_submodule *sm; + while (!(error = git_iterator_advance(&entry, i))) { + khiter_t pos = git_strmap_lookup_index(map, entry->path); + git_submodule *sm; - git_buf_clear(&name); - if (!name_from_path(&name, cfg, entry->path)) { - git_strmap_lookup_index(map, name.ptr); - } + git_buf_clear(&name); + if (!name_from_path(&name, cfg, entry->path)) { + git_strmap_lookup_index(map, name.ptr); + } - if (git_strmap_valid_index(map, pos)) { - sm = git_strmap_value_at(map, pos); + if (git_strmap_valid_index(map, pos)) { + sm = git_strmap_value_at(map, pos); - if (S_ISGITLINK(entry->mode)) - submodule_update_from_head_data(sm, entry->mode, &entry->id); - else - sm->flags |= GIT_SUBMODULE_STATUS__HEAD_NOT_SUBMODULE; - } else if (S_ISGITLINK(entry->mode)) { - if (!submodule_get_or_create(&sm, git_tree_owner(head), map, name.ptr ? name.ptr : entry->path)) { - submodule_update_from_head_data( - sm, entry->mode, &entry->id); - git_submodule_free(sm); - } - } - } + if (S_ISGITLINK(entry->mode)) + submodule_update_from_head_data(sm, entry->mode, &entry->id); + else + sm->flags |= GIT_SUBMODULE_STATUS__HEAD_NOT_SUBMODULE; + } else if (S_ISGITLINK(entry->mode)) { + if (!submodule_get_or_create(&sm, git_tree_owner(head), map, name.ptr ? name.ptr : entry->path)) { + submodule_update_from_head_data( + sm, entry->mode, &entry->id); + git_submodule_free(sm); + } + } + } - if (error == GIT_ITEROVER) - error = 0; + if (error == GIT_ITEROVER) + error = 0; - git_buf_free(&name); - git_iterator_free(i); + git_buf_free(&name); + git_iterator_free(i); - return error; + return error; } /* If have_sm is true, sm is populated, otherwise map an repo are. */ From 4d99c4cfc604bb141fd4e1423e934ebd3fb7e2a7 Mon Sep 17 00:00:00 2001 From: Brock Peabody Date: Wed, 23 Nov 2016 18:32:48 -0500 Subject: [PATCH 2/3] Allow for caching of submodules. Added `git_repository_submodule_cache_all` to initialze a cache of submodules on the repository so that operations looking up N submodules are O(N) and not O(N^2). Added a `git_repository_submodule_cache_clear` function to remove the cache. Also optimized the function that loads all submodules as it was itself O(N^2) w.r.t the number of submodules, having to loop through the `.gitmodules` file once per submodule. I changed it to process the `.gitmodules` file once, into a map. Signed-off-by: David Turner --- include/git2/sys/repository.h | 29 +++++++++ src/repository.c | 33 ++++++++++ src/repository.h | 1 + src/submodule.c | 119 +++++++++++++++++++++++----------- src/submodule.h | 3 + tests/submodule/lookup.c | 25 +++++++ 6 files changed, 171 insertions(+), 39 deletions(-) diff --git a/include/git2/sys/repository.h b/include/git2/sys/repository.h index 800396c86..0c9142143 100644 --- a/include/git2/sys/repository.h +++ b/include/git2/sys/repository.h @@ -135,6 +135,35 @@ GIT_EXTERN(void) git_repository_set_index(git_repository *repo, git_index *index */ GIT_EXTERN(int) git_repository_set_bare(git_repository *repo); +/** + * Load and cache all submodules. + * + * Because the `.gitmodules` file is unstructured, loading submodules is an + * O(N) operation. Any operation (such as `git_rebase_init`) that requires + * accessing all submodules is O(N^2) in the number of submodules, if it + * has to look each one up individually. This function loads all submodules + * and caches them so that subsequent calls to `git_submodule_lookup` are O(1). + * + * @param repo the repository whose submodules will be cached. + */ +GIT_EXTERN(int) git_repository_submodule_cache_all( + git_repository *repo); + +/** + * Clear the submodule cache. + * + * Clear the submodule cache populated by `git_repository_submodule_cache_all`. + * If there is no cache, do nothing. + * + * The cache incorporates data from the repository's configuration, as well + * as the state of the working tree, the index, and HEAD. So any time any + * of these has changed, the cache might become invalid. + * + * @param repo the repository whose submodule cache will be cleared + */ +GIT_EXTERN(int) git_repository_submodule_cache_clear( + git_repository *repo); + /** @} */ GIT_END_DECL #endif diff --git a/src/repository.c b/src/repository.c index 60299193f..2185632bf 100644 --- a/src/repository.c +++ b/src/repository.c @@ -27,6 +27,10 @@ #include "merge.h" #include "diff_driver.h" #include "annotated_commit.h" +#include "submodule.h" + +GIT__USE_STRMAP +#include "strmap.h" #ifdef GIT_WIN32 # include "win32/w32_util.h" @@ -109,6 +113,7 @@ void git_repository__cleanup(git_repository *repo) { assert(repo); + git_repository_submodule_cache_clear(repo); git_cache_clear(&repo->objects); git_attr_cache_flush(repo); @@ -2541,3 +2546,31 @@ int git_repository_set_ident(git_repository *repo, const char *name, const char return 0; } + +int git_repository_submodule_cache_all(git_repository *repo) +{ + int error; + + assert(repo); + + if ((error = git_strmap_alloc(&repo->submodule_cache))) + return error; + + error = git_submodule__map(repo, repo->submodule_cache); + return error; +} + +int git_repository_submodule_cache_clear(git_repository *repo) +{ + git_submodule *sm; + assert(repo); + if (repo->submodule_cache == NULL) { + return 0; + } + git_strmap_foreach_value(repo->submodule_cache, sm, { + git_submodule_free(sm); + }); + git_strmap_free(repo->submodule_cache); + repo->submodule_cache = 0; + return 0; +} diff --git a/src/repository.h b/src/repository.h index b259bea3b..9d276f376 100644 --- a/src/repository.h +++ b/src/repository.h @@ -143,6 +143,7 @@ struct git_repository { git_atomic attr_session_key; git_cvar_value cvar_cache[GIT_CVAR_CACHE_MAX]; + git_strmap *submodule_cache; }; GIT_INLINE(git_attr_cache *) git_repository_attr_cache(git_repository *repo) diff --git a/src/submodule.c b/src/submodule.c index 46462f165..6996006dc 100644 --- a/src/submodule.c +++ b/src/submodule.c @@ -149,40 +149,53 @@ static int find_by_path(const git_config_entry *entry, void *payload) } /** - * Find out the name of a submodule from its path + * Release the name map returned by 'load_submodule_names'. */ -static int name_from_path(git_buf *out, git_config *cfg, const char *path) +static void free_submodule_names(git_strmap *names) +{ + git_buf *name = 0; + if (names == NULL) + return; + git_strmap_foreach_value(names, name, { + git__free(name); + }); + git_strmap_free(names); + return; +} + +/** + * Map submodule paths to names. + * TODO: for some use-cases, this might need case-folding on a + * case-insensitive filesystem + */ +static int load_submodule_names(git_strmap *out, git_config *cfg) { const char *key = "submodule\\..*\\.path"; git_config_iterator *iter; git_config_entry *entry; - int error; + git_buf buf = GIT_BUF_INIT; + int rval; + int error = 0; if ((error = git_config_iterator_glob_new(&iter, cfg, key)) < 0) return error; - while ((error = git_config_next(&entry, iter)) == 0) { + while (git_config_next(&entry, iter) == 0) { const char *fdot, *ldot; - /* TODO: this should maybe be strcasecmp on a case-insensitive fs */ - if (strcmp(path, entry->value) != 0) - continue; - fdot = strchr(entry->name, '.'); ldot = strrchr(entry->name, '.'); - git_buf_clear(out); - git_buf_put(out, fdot + 1, ldot - fdot - 1); - goto cleanup; + git_buf_put(&buf, fdot + 1, ldot - fdot - 1); + git_strmap_insert(out, entry->value, git_buf_detach(&buf), rval); + if (rval < 0) { + giterr_set(GITERR_NOMEMORY, "Error inserting submodule into hash table"); + free_submodule_names(out); + return -1; + } } - if (error == GIT_ITEROVER) { - giterr_set(GITERR_SUBMODULE, "could not find a submodule name for '%s'", path); - error = GIT_ENOTFOUND; - } - -cleanup: git_config_iterator_free(iter); - return error; + return 0; } int git_submodule_lookup( @@ -196,6 +209,17 @@ int git_submodule_lookup( assert(repo && name); + if (repo->submodule_cache != NULL) { + khiter_t pos = git_strmap_lookup_index(repo->submodule_cache, name); + if (git_strmap_valid_index(repo->submodule_cache, pos)) { + if (out) { + *out = git_strmap_value_at(repo->submodule_cache, pos); + GIT_REFCOUNT_INC(*out); + } + return 0; + } + } + if ((error = submodule_alloc(&sm, repo, name)) < 0) return error; @@ -327,20 +351,18 @@ static int submodules_from_index(git_strmap *map, git_index *idx, git_config *cf int error; git_iterator *i; const git_index_entry *entry; - git_buf name = GIT_BUF_INIT; + git_strmap *names = 0; + git_strmap_alloc(&names); + if ((error = load_submodule_names(names, cfg))) + goto done; if ((error = git_iterator_for_index(&i, git_index_owner(idx), idx, NULL)) < 0) - return error; + goto done; while (!(error = git_iterator_advance(&entry, i))) { khiter_t pos = git_strmap_lookup_index(map, entry->path); git_submodule *sm; - git_buf_clear(&name); - if (!name_from_path(&name, cfg, entry->path)) { - git_strmap_lookup_index(map, name.ptr); - } - if (git_strmap_valid_index(map, pos)) { sm = git_strmap_value_at(map, pos); @@ -349,7 +371,17 @@ static int submodules_from_index(git_strmap *map, git_index *idx, git_config *cf else sm->flags |= GIT_SUBMODULE_STATUS__INDEX_NOT_SUBMODULE; } else if (S_ISGITLINK(entry->mode)) { - if (!submodule_get_or_create(&sm, git_index_owner(idx), map, name.ptr ? name.ptr : entry->path)) { + khiter_t name_pos; + const char *name; + + name_pos = git_strmap_lookup_index(names, entry->path); + if (git_strmap_valid_index(names, name_pos)) { + name = git_strmap_value_at(names, name_pos); + } else { + name = entry->path; + } + + if (!submodule_get_or_create(&sm, git_index_owner(idx), map, name)) { submodule_update_from_index_entry(sm, entry); git_submodule_free(sm); } @@ -359,8 +391,9 @@ static int submodules_from_index(git_strmap *map, git_index *idx, git_config *cf if (error == GIT_ITEROVER) error = 0; - git_buf_free(&name); +done: git_iterator_free(i); + free_submodule_names(names); return error; } @@ -370,20 +403,18 @@ static int submodules_from_head(git_strmap *map, git_tree *head, git_config *cfg int error; git_iterator *i; const git_index_entry *entry; - git_buf name = GIT_BUF_INIT; + git_strmap *names = 0; + git_strmap_alloc(&names); + if ((error = load_submodule_names(names, cfg))) + goto done; if ((error = git_iterator_for_tree(&i, head, NULL)) < 0) - return error; + goto done; while (!(error = git_iterator_advance(&entry, i))) { khiter_t pos = git_strmap_lookup_index(map, entry->path); git_submodule *sm; - git_buf_clear(&name); - if (!name_from_path(&name, cfg, entry->path)) { - git_strmap_lookup_index(map, name.ptr); - } - if (git_strmap_valid_index(map, pos)) { sm = git_strmap_value_at(map, pos); @@ -392,7 +423,17 @@ static int submodules_from_head(git_strmap *map, git_tree *head, git_config *cfg else sm->flags |= GIT_SUBMODULE_STATUS__HEAD_NOT_SUBMODULE; } else if (S_ISGITLINK(entry->mode)) { - if (!submodule_get_or_create(&sm, git_tree_owner(head), map, name.ptr ? name.ptr : entry->path)) { + khiter_t name_pos; + const char *name; + + name_pos = git_strmap_lookup_index(names, entry->path); + if (git_strmap_valid_index(names, name_pos)) { + name = git_strmap_value_at(names, name_pos); + } else { + name = entry->path; + } + + if (!submodule_get_or_create(&sm, git_tree_owner(head), map, name)) { submodule_update_from_head_data( sm, entry->mode, &entry->id); git_submodule_free(sm); @@ -403,8 +444,9 @@ static int submodules_from_head(git_strmap *map, git_tree *head, git_config *cfg if (error == GIT_ITEROVER) error = 0; - git_buf_free(&name); +done: git_iterator_free(i); + free_submodule_names(names); return error; } @@ -416,7 +458,7 @@ typedef struct { git_repository *repo; } lfc_data; -static int all_submodules(git_repository *repo, git_strmap *map) +int git_submodule__map(git_repository *repo, git_strmap *map) { int error = 0; git_index *idx = NULL; @@ -509,7 +551,7 @@ int git_submodule_foreach( if ((error = git_strmap_alloc(&submodules)) < 0) return error; - if ((error = all_submodules(repo, submodules)) < 0) + if ((error = git_submodule__map(repo, submodules)) < 0) goto done; if (!(error = git_vector_init( @@ -1566,7 +1608,6 @@ int git_submodule_location(unsigned int *location, git_submodule *sm) location, NULL, NULL, NULL, sm, GIT_SUBMODULE_IGNORE_ALL); } - /* * INTERNAL FUNCTIONS */ diff --git a/src/submodule.h b/src/submodule.h index 2ef2031b3..456a93979 100644 --- a/src/submodule.h +++ b/src/submodule.h @@ -143,4 +143,7 @@ extern int git_submodule_parse_ignore( extern int git_submodule_parse_update( git_submodule_update_t *out, const char *value); +extern int git_submodule__map( + git_repository *repo, + git_strmap *map); #endif diff --git a/tests/submodule/lookup.c b/tests/submodule/lookup.c index 148f9273e..e36fc44e0 100644 --- a/tests/submodule/lookup.c +++ b/tests/submodule/lookup.c @@ -388,3 +388,28 @@ void test_submodule_lookup__renamed(void) cl_git_pass(git_submodule_foreach(g_repo, sm_lookup_cb, &data)); cl_assert_equal_i(8, data.count); } + +void test_submodule_lookup_cached(void) { + git_submodule *sm; + git_submodule *sm2; + /* See that the simple tests still pass. */ + + git_repository_submodule_cache_all(g_repo); + test_submodule_lookup__simple_lookup(); + git_repository_submodule_cache_clear(g_repo); + test_submodule_lookup__simple_lookup(); + + /* Check that subsequent calls return different objects when cached. */ + git_repository_submodule_cache_all(g_repo); + cl_git_pass(git_submodule_lookup(&sm, g_repo, "sm_unchanged")); + cl_git_pass(git_submodule_lookup(&sm2, g_repo, "sm_unchanged")); + cl_assert_equal_p(sm, sm2); + git_submodule_free(sm2); + + /* and that we get new objects again after clearing the cache. */ + git_repository_submodule_cache_clear(g_repo); + cl_git_pass(git_submodule_lookup(&sm2, g_repo, "sm_unchanged")); + cl_assert(sm != sm2); + git_submodule_free(sm); + git_submodule_free(sm2); +} From 673dff88a2c78766609a7023167acb5a882fb84d Mon Sep 17 00:00:00 2001 From: Brock Peabody Date: Wed, 23 Nov 2016 18:32:55 -0500 Subject: [PATCH 3/3] Skip submodule head/index update when caching. `git_submodule_status` is very slow, bottlenecked on `git_repository_head_tree`, which it uses through `submodule_update_head`. If the user has requested submodule caching, assume that they want this status cached too and skip it. Signed-off-by: David Turner --- src/submodule.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/submodule.c b/src/submodule.c index 6996006dc..3099db6cf 100644 --- a/src/submodule.c +++ b/src/submodule.c @@ -1544,13 +1544,22 @@ int git_submodule__status( return 0; } - /* refresh the index OID */ - if (submodule_update_index(sm) < 0) - return -1; + /* If the user has requested caching submodule state, performing these + * expensive operations (especially `submodule_update_head`, which is + * bottlenecked on `git_repository_head_tree`) eliminates much of the + * advantage. We will, therefore, interpret the request for caching to + * apply here to and skip them. + */ - /* refresh the HEAD OID */ - if (submodule_update_head(sm) < 0) - return -1; + if (sm->repo->submodule_cache == NULL) { + /* refresh the index OID */ + if (submodule_update_index(sm) < 0) + return -1; + + /* refresh the HEAD OID */ + if (submodule_update_head(sm) < 0) + return -1; + } /* for ignore == dirty, don't scan the working directory */ if (ign == GIT_SUBMODULE_IGNORE_DIRTY) {