mirror of
				https://git.proxmox.com/git/libgit2
				synced 2025-10-31 09:15:51 +00:00 
			
		
		
		
	Merge pull request #4016 from novalis/submodule-optimization
Submodule optimization
This commit is contained in:
		
						commit
						98f5387233
					
				| @ -135,6 +135,35 @@ GIT_EXTERN(void) git_repository_set_index(git_repository *repo, git_index *index | ||||
|  */ | ||||
| GIT_EXTERN(int) git_repository_set_bare(git_repository *repo); | ||||
| 
 | ||||
| /**
 | ||||
|  * Load and cache all submodules. | ||||
|  * | ||||
|  * Because the `.gitmodules` file is unstructured, loading submodules is an | ||||
|  * O(N) operation.  Any operation (such as `git_rebase_init`) that requires | ||||
|  * accessing all submodules is O(N^2) in the number of submodules, if it | ||||
|  * has to look each one up individually.  This function loads all submodules | ||||
|  * and caches them so that subsequent calls to `git_submodule_lookup` are O(1). | ||||
|  * | ||||
|  * @param repo the repository whose submodules will be cached. | ||||
|  */ | ||||
| GIT_EXTERN(int) git_repository_submodule_cache_all( | ||||
| 	git_repository *repo); | ||||
| 
 | ||||
| /**
 | ||||
|  * Clear the submodule cache. | ||||
|  * | ||||
|  * Clear the submodule cache populated by `git_repository_submodule_cache_all`. | ||||
|  * If there is no cache, do nothing. | ||||
|  * | ||||
|  * The cache incorporates data from the repository's configuration, as well | ||||
|  * as the state of the working tree, the index, and HEAD.  So any time any | ||||
|  * of these has changed, the cache might become invalid. | ||||
|  * | ||||
|  * @param repo the repository whose submodule cache will be cleared | ||||
|  */ | ||||
| GIT_EXTERN(int) git_repository_submodule_cache_clear( | ||||
| 	git_repository *repo); | ||||
| 
 | ||||
| /** @} */ | ||||
| GIT_END_DECL | ||||
| #endif | ||||
|  | ||||
| @ -27,6 +27,10 @@ | ||||
| #include "merge.h" | ||||
| #include "diff_driver.h" | ||||
| #include "annotated_commit.h" | ||||
| #include "submodule.h" | ||||
| 
 | ||||
| GIT__USE_STRMAP | ||||
| #include "strmap.h" | ||||
| 
 | ||||
| #ifdef GIT_WIN32 | ||||
| # include "win32/w32_util.h" | ||||
| @ -109,6 +113,7 @@ void git_repository__cleanup(git_repository *repo) | ||||
| { | ||||
| 	assert(repo); | ||||
| 
 | ||||
| 	git_repository_submodule_cache_clear(repo); | ||||
| 	git_cache_clear(&repo->objects); | ||||
| 	git_attr_cache_flush(repo); | ||||
| 
 | ||||
| @ -2541,3 +2546,31 @@ int git_repository_set_ident(git_repository *repo, const char *name, const char | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| int git_repository_submodule_cache_all(git_repository *repo) | ||||
| { | ||||
| 	int error; | ||||
| 
 | ||||
| 	assert(repo); | ||||
| 
 | ||||
| 	if ((error = git_strmap_alloc(&repo->submodule_cache))) | ||||
| 		return error; | ||||
| 
 | ||||
| 	error = git_submodule__map(repo, repo->submodule_cache); | ||||
| 	return error; | ||||
| } | ||||
| 
 | ||||
| int git_repository_submodule_cache_clear(git_repository *repo) | ||||
| { | ||||
| 	git_submodule *sm; | ||||
| 	assert(repo); | ||||
| 	if (repo->submodule_cache == NULL) { | ||||
| 		return 0; | ||||
| 	} | ||||
| 	git_strmap_foreach_value(repo->submodule_cache, sm, { | ||||
| 		git_submodule_free(sm); | ||||
| 	}); | ||||
| 	git_strmap_free(repo->submodule_cache); | ||||
| 	repo->submodule_cache = 0; | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
| @ -143,6 +143,7 @@ struct git_repository { | ||||
| 	git_atomic attr_session_key; | ||||
| 
 | ||||
| 	git_cvar_value cvar_cache[GIT_CVAR_CACHE_MAX]; | ||||
| 	git_strmap *submodule_cache; | ||||
| }; | ||||
| 
 | ||||
| GIT_INLINE(git_attr_cache *) git_repository_attr_cache(git_repository *repo) | ||||
|  | ||||
							
								
								
									
										230
									
								
								src/submodule.c
									
									
									
									
									
								
							
							
						
						
									
										230
									
								
								src/submodule.c
									
									
									
									
									
								
							| @ -149,40 +149,53 @@ static int find_by_path(const git_config_entry *entry, void *payload) | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Find out the name of a submodule from its path | ||||
|  * Release the name map returned by 'load_submodule_names'. | ||||
|  */ | ||||
| static int name_from_path(git_buf *out, git_config *cfg, const char *path) | ||||
| static void free_submodule_names(git_strmap *names) | ||||
| { | ||||
| 	git_buf *name = 0; | ||||
| 	if (names == NULL) | ||||
| 		return; | ||||
| 	git_strmap_foreach_value(names, name, { | ||||
| 		git__free(name); | ||||
| 	}); | ||||
| 	git_strmap_free(names); | ||||
| 	return; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Map submodule paths to names. | ||||
|  * TODO: for some use-cases, this might need case-folding on a | ||||
|  * case-insensitive filesystem | ||||
|  */ | ||||
| static int load_submodule_names(git_strmap *out, git_config *cfg) | ||||
| { | ||||
| 	const char *key = "submodule\\..*\\.path"; | ||||
| 	git_config_iterator *iter; | ||||
| 	git_config_entry *entry; | ||||
| 	int error; | ||||
| 	git_buf buf = GIT_BUF_INIT; | ||||
| 	int rval; | ||||
| 	int error = 0; | ||||
| 
 | ||||
| 	if ((error = git_config_iterator_glob_new(&iter, cfg, key)) < 0) | ||||
| 		return error; | ||||
| 
 | ||||
| 	while ((error = git_config_next(&entry, iter)) == 0) { | ||||
| 	while (git_config_next(&entry, iter) == 0) { | ||||
| 		const char *fdot, *ldot; | ||||
| 		/* TODO: this should maybe be strcasecmp on a case-insensitive fs */ | ||||
| 		if (strcmp(path, entry->value) != 0) | ||||
| 			continue; | ||||
| 
 | ||||
| 		fdot = strchr(entry->name, '.'); | ||||
| 		ldot = strrchr(entry->name, '.'); | ||||
| 
 | ||||
| 		git_buf_clear(out); | ||||
| 		git_buf_put(out, fdot + 1, ldot - fdot - 1); | ||||
| 		goto cleanup; | ||||
| 		git_buf_put(&buf, fdot + 1, ldot - fdot - 1); | ||||
| 		git_strmap_insert(out, entry->value, git_buf_detach(&buf), rval); | ||||
| 		if (rval < 0) { | ||||
| 			giterr_set(GITERR_NOMEMORY, "Error inserting submodule into hash table"); | ||||
| 			free_submodule_names(out); | ||||
| 			return -1; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if (error == GIT_ITEROVER) { | ||||
| 		giterr_set(GITERR_SUBMODULE, "could not find a submodule name for '%s'", path); | ||||
| 		error = GIT_ENOTFOUND; | ||||
| 	} | ||||
| 
 | ||||
| cleanup: | ||||
| 	git_config_iterator_free(iter); | ||||
| 	return error; | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| int git_submodule_lookup( | ||||
| @ -196,6 +209,17 @@ int git_submodule_lookup( | ||||
| 
 | ||||
| 	assert(repo && name); | ||||
| 
 | ||||
| 	if (repo->submodule_cache != NULL) { | ||||
| 		khiter_t pos = git_strmap_lookup_index(repo->submodule_cache, name); | ||||
| 		if (git_strmap_valid_index(repo->submodule_cache, pos)) { | ||||
| 			if (out) { | ||||
| 				*out = git_strmap_value_at(repo->submodule_cache, pos); | ||||
| 				GIT_REFCOUNT_INC(*out); | ||||
| 			} | ||||
| 			return 0; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if ((error = submodule_alloc(&sm, repo, name)) < 0) | ||||
| 		return error; | ||||
| 
 | ||||
| @ -324,89 +348,107 @@ done: | ||||
| 
 | ||||
| static int submodules_from_index(git_strmap *map, git_index *idx, git_config *cfg) | ||||
| { | ||||
|        int error; | ||||
|        git_iterator *i; | ||||
|        const git_index_entry *entry; | ||||
|        git_buf name = GIT_BUF_INIT; | ||||
| 	int error; | ||||
| 	git_iterator *i; | ||||
| 	const git_index_entry *entry; | ||||
| 	git_strmap *names = 0; | ||||
| 	git_strmap_alloc(&names); | ||||
| 	if ((error = load_submodule_names(names, cfg))) | ||||
| 		goto done; | ||||
| 
 | ||||
|        if ((error = git_iterator_for_index(&i, git_index_owner(idx), idx, NULL)) < 0) | ||||
|                return error; | ||||
| 	if ((error = git_iterator_for_index(&i, git_index_owner(idx), idx, NULL)) < 0) | ||||
| 		goto done; | ||||
| 
 | ||||
|        while (!(error = git_iterator_advance(&entry, i))) { | ||||
|                khiter_t pos = git_strmap_lookup_index(map, entry->path); | ||||
|                git_submodule *sm; | ||||
| 	while (!(error = git_iterator_advance(&entry, i))) { | ||||
| 		khiter_t pos = git_strmap_lookup_index(map, entry->path); | ||||
| 		git_submodule *sm; | ||||
| 
 | ||||
| 	       git_buf_clear(&name); | ||||
| 	       if (!name_from_path(&name, cfg, entry->path)) { | ||||
| 		       git_strmap_lookup_index(map, name.ptr); | ||||
| 	       } | ||||
| 		if (git_strmap_valid_index(map, pos)) { | ||||
| 			sm = git_strmap_value_at(map, pos); | ||||
| 
 | ||||
|                if (git_strmap_valid_index(map, pos)) { | ||||
|                        sm = git_strmap_value_at(map, pos); | ||||
| 			if (S_ISGITLINK(entry->mode)) | ||||
| 				submodule_update_from_index_entry(sm, entry); | ||||
| 			else | ||||
| 				sm->flags |= GIT_SUBMODULE_STATUS__INDEX_NOT_SUBMODULE; | ||||
| 		} else if (S_ISGITLINK(entry->mode)) { | ||||
| 			khiter_t name_pos; | ||||
| 			const char *name; | ||||
| 
 | ||||
|                        if (S_ISGITLINK(entry->mode)) | ||||
|                                submodule_update_from_index_entry(sm, entry); | ||||
|                        else | ||||
|                                sm->flags |= GIT_SUBMODULE_STATUS__INDEX_NOT_SUBMODULE; | ||||
|                } else if (S_ISGITLINK(entry->mode)) { | ||||
|                        if (!submodule_get_or_create(&sm, git_index_owner(idx), map, name.ptr ? name.ptr : entry->path)) { | ||||
|                                submodule_update_from_index_entry(sm, entry); | ||||
|                                git_submodule_free(sm); | ||||
|                        } | ||||
|                } | ||||
|        } | ||||
| 			name_pos = git_strmap_lookup_index(names, entry->path); | ||||
| 			if (git_strmap_valid_index(names, name_pos)) { | ||||
| 				name = git_strmap_value_at(names, name_pos); | ||||
| 			} else { | ||||
| 				name = entry->path; | ||||
| 			} | ||||
| 
 | ||||
|        if (error == GIT_ITEROVER) | ||||
|                error = 0; | ||||
| 			if (!submodule_get_or_create(&sm, git_index_owner(idx), map, name)) { | ||||
| 				submodule_update_from_index_entry(sm, entry); | ||||
| 				git_submodule_free(sm); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
|        git_buf_free(&name); | ||||
|        git_iterator_free(i); | ||||
| 	if (error == GIT_ITEROVER) | ||||
| 		error = 0; | ||||
| 
 | ||||
|        return error; | ||||
| done: | ||||
| 	git_iterator_free(i); | ||||
| 	free_submodule_names(names); | ||||
| 
 | ||||
| 	return error; | ||||
| } | ||||
| 
 | ||||
| static int submodules_from_head(git_strmap *map, git_tree *head, git_config *cfg) | ||||
| { | ||||
|        int error; | ||||
|        git_iterator *i; | ||||
|        const git_index_entry *entry; | ||||
|        git_buf name = GIT_BUF_INIT; | ||||
| 	int error; | ||||
| 	git_iterator *i; | ||||
| 	const git_index_entry *entry; | ||||
| 	git_strmap *names = 0; | ||||
| 	git_strmap_alloc(&names); | ||||
| 	if ((error = load_submodule_names(names, cfg))) | ||||
| 		goto done; | ||||
| 
 | ||||
|        if ((error = git_iterator_for_tree(&i, head, NULL)) < 0) | ||||
|                return error; | ||||
| 	if ((error = git_iterator_for_tree(&i, head, NULL)) < 0) | ||||
| 		goto done; | ||||
| 
 | ||||
|        while (!(error = git_iterator_advance(&entry, i))) { | ||||
|                khiter_t pos = git_strmap_lookup_index(map, entry->path); | ||||
|                git_submodule *sm; | ||||
| 	while (!(error = git_iterator_advance(&entry, i))) { | ||||
| 		khiter_t pos = git_strmap_lookup_index(map, entry->path); | ||||
| 		git_submodule *sm; | ||||
| 
 | ||||
| 	       git_buf_clear(&name); | ||||
| 	       if (!name_from_path(&name, cfg, entry->path)) { | ||||
| 		       git_strmap_lookup_index(map, name.ptr); | ||||
| 	       } | ||||
| 		if (git_strmap_valid_index(map, pos)) { | ||||
| 			sm = git_strmap_value_at(map, pos); | ||||
| 
 | ||||
|                if (git_strmap_valid_index(map, pos)) { | ||||
|                        sm = git_strmap_value_at(map, pos); | ||||
| 			if (S_ISGITLINK(entry->mode)) | ||||
| 				submodule_update_from_head_data(sm, entry->mode, &entry->id); | ||||
| 			else | ||||
| 				sm->flags |= GIT_SUBMODULE_STATUS__HEAD_NOT_SUBMODULE; | ||||
| 		} else if (S_ISGITLINK(entry->mode)) { | ||||
| 			khiter_t name_pos; | ||||
| 			const char *name; | ||||
| 
 | ||||
|                        if (S_ISGITLINK(entry->mode)) | ||||
|                                submodule_update_from_head_data(sm, entry->mode, &entry->id); | ||||
|                        else | ||||
|                                sm->flags |= GIT_SUBMODULE_STATUS__HEAD_NOT_SUBMODULE; | ||||
|                } else if (S_ISGITLINK(entry->mode)) { | ||||
|                        if (!submodule_get_or_create(&sm, git_tree_owner(head), map, name.ptr ? name.ptr : entry->path)) { | ||||
|                                submodule_update_from_head_data( | ||||
|                                        sm, entry->mode, &entry->id); | ||||
|                                git_submodule_free(sm); | ||||
|                        } | ||||
|                } | ||||
|        } | ||||
| 			name_pos = git_strmap_lookup_index(names, entry->path); | ||||
| 			if (git_strmap_valid_index(names, name_pos)) { | ||||
| 				name = git_strmap_value_at(names, name_pos); | ||||
| 			} else { | ||||
| 				name = entry->path; | ||||
| 			} | ||||
| 
 | ||||
|        if (error == GIT_ITEROVER) | ||||
|                error = 0; | ||||
| 			if (!submodule_get_or_create(&sm, git_tree_owner(head), map, name)) { | ||||
| 				submodule_update_from_head_data( | ||||
| 					sm, entry->mode, &entry->id); | ||||
| 				git_submodule_free(sm); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
|        git_buf_free(&name); | ||||
|        git_iterator_free(i); | ||||
| 	if (error == GIT_ITEROVER) | ||||
| 		error = 0; | ||||
| 
 | ||||
|        return error; | ||||
| done: | ||||
| 	git_iterator_free(i); | ||||
| 	free_submodule_names(names); | ||||
| 
 | ||||
| 	return error; | ||||
| } | ||||
| 
 | ||||
| /* If have_sm is true, sm is populated, otherwise map an repo are. */ | ||||
| @ -416,7 +458,7 @@ typedef struct { | ||||
| 	git_repository *repo; | ||||
| } lfc_data; | ||||
| 
 | ||||
| static int all_submodules(git_repository *repo, git_strmap *map) | ||||
| int git_submodule__map(git_repository *repo, git_strmap *map) | ||||
| { | ||||
| 	int error = 0; | ||||
| 	git_index *idx = NULL; | ||||
| @ -509,7 +551,7 @@ int git_submodule_foreach( | ||||
| 	if ((error = git_strmap_alloc(&submodules)) < 0) | ||||
| 		return error; | ||||
| 
 | ||||
| 	if ((error = all_submodules(repo, submodules)) < 0) | ||||
| 	if ((error = git_submodule__map(repo, submodules)) < 0) | ||||
| 		goto done; | ||||
| 
 | ||||
| 	if (!(error = git_vector_init( | ||||
| @ -1502,13 +1544,22 @@ int git_submodule__status( | ||||
| 		return 0; | ||||
| 	} | ||||
| 
 | ||||
| 	/* refresh the index OID */ | ||||
| 	if (submodule_update_index(sm) < 0) | ||||
| 		return -1; | ||||
| 	/* If the user has requested caching submodule state, performing these
 | ||||
| 	 * expensive operations (especially `submodule_update_head`, which is | ||||
| 	 * bottlenecked on `git_repository_head_tree`) eliminates much of the | ||||
| 	 * advantage.  We will, therefore, interpret the request for caching to | ||||
| 	 * apply here to and skip them. | ||||
| 	 */ | ||||
| 
 | ||||
| 	/* refresh the HEAD OID */ | ||||
| 	if (submodule_update_head(sm) < 0) | ||||
| 		return -1; | ||||
| 	if (sm->repo->submodule_cache == NULL) { | ||||
| 		/* refresh the index OID */ | ||||
| 		if (submodule_update_index(sm) < 0) | ||||
| 			return -1; | ||||
| 
 | ||||
| 		/* refresh the HEAD OID */ | ||||
| 		if (submodule_update_head(sm) < 0) | ||||
| 			return -1; | ||||
| 	} | ||||
| 
 | ||||
| 	/* for ignore == dirty, don't scan the working directory */ | ||||
| 	if (ign == GIT_SUBMODULE_IGNORE_DIRTY) { | ||||
| @ -1566,7 +1617,6 @@ int git_submodule_location(unsigned int *location, git_submodule *sm) | ||||
| 		location, NULL, NULL, NULL, sm, GIT_SUBMODULE_IGNORE_ALL); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| /*
 | ||||
|  * INTERNAL FUNCTIONS | ||||
|  */ | ||||
|  | ||||
| @ -143,4 +143,7 @@ extern int git_submodule_parse_ignore( | ||||
| extern int git_submodule_parse_update( | ||||
| 	git_submodule_update_t *out, const char *value); | ||||
| 
 | ||||
| extern int git_submodule__map( | ||||
| 	git_repository *repo, | ||||
| 	git_strmap *map); | ||||
| #endif | ||||
|  | ||||
| @ -388,3 +388,28 @@ void test_submodule_lookup__renamed(void) | ||||
| 	cl_git_pass(git_submodule_foreach(g_repo, sm_lookup_cb, &data)); | ||||
| 	cl_assert_equal_i(8, data.count); | ||||
| } | ||||
| 
 | ||||
| void test_submodule_lookup_cached(void) { | ||||
| 	git_submodule *sm; | ||||
| 	git_submodule *sm2; | ||||
| 	/* See that the simple tests still pass. */ | ||||
| 
 | ||||
| 	git_repository_submodule_cache_all(g_repo); | ||||
| 	test_submodule_lookup__simple_lookup(); | ||||
| 	git_repository_submodule_cache_clear(g_repo); | ||||
| 	test_submodule_lookup__simple_lookup(); | ||||
| 
 | ||||
| 	/* Check that subsequent calls return different objects when cached. */ | ||||
| 	git_repository_submodule_cache_all(g_repo); | ||||
| 	cl_git_pass(git_submodule_lookup(&sm, g_repo, "sm_unchanged")); | ||||
| 	cl_git_pass(git_submodule_lookup(&sm2, g_repo, "sm_unchanged")); | ||||
| 	cl_assert_equal_p(sm, sm2); | ||||
| 	git_submodule_free(sm2); | ||||
| 
 | ||||
| 	/* and that we get new objects again after clearing the cache. */ | ||||
| 	git_repository_submodule_cache_clear(g_repo); | ||||
| 	cl_git_pass(git_submodule_lookup(&sm2, g_repo, "sm_unchanged")); | ||||
| 	cl_assert(sm != sm2); | ||||
| 	git_submodule_free(sm); | ||||
| 	git_submodule_free(sm2); | ||||
| } | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Edward Thomson
						Edward Thomson