From 4d3f1f97404b01cd00ad5b2f47f64672d787e901 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Mon, 9 Jun 2014 04:38:22 +0200 Subject: [PATCH 1/3] treebuilder: use a map instead of vector to store the entries Finding a filename in a vector means we need to resort it every time we want to read from it, which includes every time we want to write to it as well, as we want to find duplicate keys. A hash-map fits what we want to do much more accurately, as we do not care about sorting, but just the particular filename. We still keep removed entries around, as the interface let you assume they were going to be around until the treebuilder is cleared or freed, but in this case that involves an append to a vector in the filter case, which can now fail. The only time we care about sorting is when we write out the tree, so let's make that the only time we do any sorting. --- include/git2/tree.h | 2 +- src/tree.c | 123 +++++++++++++++++++++++--------------- src/tree.h | 5 +- tests/object/tree/write.c | 8 ++- 4 files changed, 84 insertions(+), 54 deletions(-) diff --git a/include/git2/tree.h b/include/git2/tree.h index 56922d40b..8f1d8a089 100644 --- a/include/git2/tree.h +++ b/include/git2/tree.h @@ -354,7 +354,7 @@ typedef int (*git_treebuilder_filter_cb)( * @param filter Callback to filter entries * @param payload Extra data to pass to filter callback */ -GIT_EXTERN(void) git_treebuilder_filter( +GIT_EXTERN(int) git_treebuilder_filter( git_treebuilder *bld, git_treebuilder_filter_cb filter, void *payload); diff --git a/src/tree.c b/src/tree.c index b64efe460..7d28c86ce 100644 --- a/src/tree.c +++ b/src/tree.c @@ -17,6 +17,8 @@ #define DEFAULT_TREE_SIZE 16 #define MAX_FILEMODE_BYTES 6 +GIT__USE_STRMAP; + static bool valid_filemode(const int filemode) { return (filemode == GIT_FILEMODE_TREE @@ -450,6 +452,7 @@ static int append_entry( git_filemode_t filemode) { git_tree_entry *entry; + int error = 0; if (!valid_entry_name(filename)) return tree_error("Failed to insert entry. Invalid name for a tree entry", filename); @@ -460,8 +463,9 @@ static int append_entry( git_oid_cpy(&entry->oid, id); entry->attr = (uint16_t)filemode; - if (git_vector_insert_sorted(&bld->entries, entry, NULL) < 0) { - git__free(entry); + git_strmap_insert(bld->map, entry->filename, entry, error); + if (error < 0) { + giterr_set(GITERR_TREE, "failed to append entry %s to the tree builder", filename); return -1; } @@ -610,17 +614,18 @@ int git_tree__write_index( int git_treebuilder_create(git_treebuilder **builder_p, const git_tree *source) { git_treebuilder *bld; - size_t i, source_entries = DEFAULT_TREE_SIZE; + size_t i; assert(builder_p); bld = git__calloc(1, sizeof(git_treebuilder)); GITERR_CHECK_ALLOC(bld); - if (source != NULL) - source_entries = source->entries.length; + if (git_strmap_alloc(&bld->map) < 0) { + return -1; + } - if (git_vector_init(&bld->entries, source_entries, entry_sort_cmp) < 0) + if (git_vector_init(&bld->removed, 0, NULL) < 0) goto on_error; if (source != NULL) { @@ -651,7 +656,8 @@ int git_treebuilder_insert( git_filemode_t filemode) { git_tree_entry *entry; - size_t pos; + int error; + git_strmap_iter iter; assert(bld && id && filename); @@ -661,24 +667,25 @@ int git_treebuilder_insert( if (!valid_entry_name(filename)) return tree_error("Failed to insert entry. Invalid name for a tree entry", filename); - if (!tree_key_search(&pos, &bld->entries, filename, strlen(filename))) { - entry = git_vector_get(&bld->entries, pos); - if (entry->removed) { - entry->removed = 0; - bld->entrycount++; - } - } else { - entry = alloc_entry(filename); - GITERR_CHECK_ALLOC(entry); + entry = alloc_entry(filename); + GITERR_CHECK_ALLOC(entry); - if (git_vector_insert_sorted(&bld->entries, entry, NULL) < 0) { - git__free(entry); + iter = git_strmap_lookup_index(bld->map, entry->filename); + if (git_strmap_valid_index(bld->map, iter)) { + git_tree_entry *old_val = git_strmap_value_at(bld->map, iter); + if (git_vector_insert(&bld->removed, old_val) < 0) return -1; - } - bld->entrycount++; + git_strmap_delete_at(bld->map, iter); } + git_strmap_insert(bld->map, entry->filename, entry, error); + if (error < 0) { + giterr_set(GITERR_TREE, "failed to insert %s", filename); + return -1; + } + + bld->entrycount++; git_oid_cpy(&entry->oid, id); entry->attr = filemode; @@ -690,17 +697,14 @@ int git_treebuilder_insert( static git_tree_entry *treebuilder_get(git_treebuilder *bld, const char *filename) { - size_t idx; - git_tree_entry *entry; + git_tree_entry *entry = NULL; + git_strmap_iter pos; assert(bld && filename); - if (tree_key_search(&idx, &bld->entries, filename, strlen(filename)) < 0) - return NULL; - - entry = git_vector_get(&bld->entries, idx); - if (entry->removed) - return NULL; + pos = git_strmap_lookup_index(bld->map, filename); + if (git_strmap_valid_index(bld->map, pos)) + entry = git_strmap_value_at(bld->map, pos); return entry; } @@ -712,12 +716,16 @@ const git_tree_entry *git_treebuilder_get(git_treebuilder *bld, const char *file int git_treebuilder_remove(git_treebuilder *bld, const char *filename) { - git_tree_entry *remove_ptr = treebuilder_get(bld, filename); + git_tree_entry *entry = treebuilder_get(bld, filename); - if (remove_ptr == NULL || remove_ptr->removed) + if (entry == NULL) return tree_error("Failed to remove entry. File isn't in the tree", filename); - remove_ptr->removed = 1; + if (git_vector_insert(&bld->removed, entry) < 0) + return -1; + + git_strmap_delete(bld->map, filename); + bld->entrycount--; return 0; } @@ -728,19 +736,26 @@ int git_treebuilder_write(git_oid *oid, git_repository *repo, git_treebuilder *b size_t i; git_buf tree = GIT_BUF_INIT; git_odb *odb; + git_tree_entry *entry; + git_vector entries; assert(bld); - git_vector_sort(&bld->entries); + if (git_vector_init(&entries, bld->entrycount, entry_sort_cmp) < 0) + return -1; + + git_strmap_foreach_value(bld->map, entry, { + if (git_vector_insert(&entries, entry) < 0) + return -1; + }); + + git_vector_sort(&entries); /* Grow the buffer beforehand to an estimated size */ - error = git_buf_grow(&tree, bld->entries.length * 72); + error = git_buf_grow(&tree, bld->entrycount * 72); - for (i = 0; i < bld->entries.length && !error; ++i) { - git_tree_entry *entry = git_vector_get(&bld->entries, i); - - if (entry->removed) - continue; + for (i = 0; i < entries.length && !error; ++i) { + git_tree_entry *entry = git_vector_get(&entries, i); git_buf_printf(&tree, "%o ", entry->attr); git_buf_put(&tree, entry->filename, entry->filename_len + 1); @@ -750,6 +765,8 @@ int git_treebuilder_write(git_oid *oid, git_repository *repo, git_treebuilder *b error = -1; } + git_vector_free(&entries); + if (!error && !(error = git_repository_odb__weakptr(&odb, repo))) error = git_odb_write(oid, odb, tree.ptr, tree.size, GIT_OBJ_TREE); @@ -758,22 +775,27 @@ int git_treebuilder_write(git_oid *oid, git_repository *repo, git_treebuilder *b return error; } -void git_treebuilder_filter( +int git_treebuilder_filter( git_treebuilder *bld, git_treebuilder_filter_cb filter, void *payload) { - size_t i; + const char *filename; git_tree_entry *entry; assert(bld && filter); - git_vector_foreach(&bld->entries, i, entry) { - if (!entry->removed && filter(entry, payload)) { - entry->removed = 1; - bld->entrycount--; - } - } + git_strmap_foreach(bld->map, filename, entry, { + if (filter(entry, payload)) { + if (git_vector_insert(&bld->removed, entry) < 0) + return -1; + + git_strmap_delete(bld->map, filename); + bld->entrycount--; + } + }); + + return 0; } void git_treebuilder_clear(git_treebuilder *bld) @@ -783,10 +805,12 @@ void git_treebuilder_clear(git_treebuilder *bld) assert(bld); - git_vector_foreach(&bld->entries, i, e) + git_vector_foreach(&bld->removed, i, e) git_tree_entry_free(e); - git_vector_clear(&bld->entries); + git_strmap_foreach_value(bld->map, e, git_tree_entry_free(e)); + + git_vector_clear(&bld->removed); bld->entrycount = 0; } @@ -796,7 +820,8 @@ void git_treebuilder_free(git_treebuilder *bld) return; git_treebuilder_clear(bld); - git_vector_free(&bld->entries); + git_vector_free(&bld->removed); + git_strmap_free(bld->map); git__free(bld); } diff --git a/src/tree.h b/src/tree.h index f07039a07..38daf21d4 100644 --- a/src/tree.h +++ b/src/tree.h @@ -11,9 +11,9 @@ #include "repository.h" #include "odb.h" #include "vector.h" +#include "strmap.h" struct git_tree_entry { - uint16_t removed; uint16_t attr; git_oid oid; size_t filename_len; @@ -26,8 +26,9 @@ struct git_tree { }; struct git_treebuilder { - git_vector entries; + git_vector removed; size_t entrycount; /* vector may contain "removed" entries */ + git_strmap *map; }; GIT_INLINE(bool) git_tree_entry__is_tree(const struct git_tree_entry *e) diff --git a/tests/object/tree/write.c b/tests/object/tree/write.c index 45356e807..ddb62e278 100644 --- a/tests/object/tree/write.c +++ b/tests/object/tree/write.c @@ -104,6 +104,7 @@ void test_object_tree_write__subtree(void) void test_object_tree_write__sorted_subtrees(void) { git_treebuilder *builder; + git_tree *tree; unsigned int i; int position_c = -1, position_cake = -1, position_config = -1; @@ -143,8 +144,9 @@ void test_object_tree_write__sorted_subtrees(void) cl_git_pass(git_treebuilder_write(&tree_oid, g_repo, builder)); - for (i = 0; i < builder->entries.length; ++i) { - git_tree_entry *entry = git_vector_get(&builder->entries, i); + cl_git_pass(git_tree_lookup(&tree, g_repo, &tree_oid)); + for (i = 0; i < git_tree_entrycount(tree); i++) { + const git_tree_entry *entry = git_tree_entry_byindex(tree, i); if (strcmp(entry->filename, "c") == 0) position_c = i; @@ -156,6 +158,8 @@ void test_object_tree_write__sorted_subtrees(void) position_config = i; } + git_tree_free(tree); + cl_assert(position_c != -1); cl_assert(position_cake != -1); cl_assert(position_config != -1); From 978fbb4c345e944004e5a2aede17cdd17ab75356 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Mon, 9 Jun 2014 22:45:23 +0200 Subject: [PATCH 2/3] treebuilder: don't keep removed entries around If the user wants to keep a copy for themselves, they should make a copy. It adds unnecessary complexity to make sure the returned entries are valid until the builder is cleared. --- include/git2/tree.h | 8 ++++--- src/tree.c | 51 +++++++++++++++------------------------------ src/tree.h | 1 - 3 files changed, 22 insertions(+), 38 deletions(-) diff --git a/include/git2/tree.h b/include/git2/tree.h index 8f1d8a089..42b68193e 100644 --- a/include/git2/tree.h +++ b/include/git2/tree.h @@ -301,8 +301,10 @@ GIT_EXTERN(const git_tree_entry *) git_treebuilder_get( * If an entry named `filename` already exists, its attributes * will be updated with the given ones. * - * The optional pointer `out` can be used to retrieve a pointer to - * the newly created/updated entry. Pass NULL if you do not need it. + * The optional pointer `out` can be used to retrieve a pointer to the + * newly created/updated entry. Pass NULL if you do not need it. The + * pointer may not be valid past the next operation in this + * builder. Duplicate the entry if you want to keep it. * * No attempt is being made to ensure that the provided oid points * to an existing git object in the object database, nor that the @@ -354,7 +356,7 @@ typedef int (*git_treebuilder_filter_cb)( * @param filter Callback to filter entries * @param payload Extra data to pass to filter callback */ -GIT_EXTERN(int) git_treebuilder_filter( +GIT_EXTERN(void) git_treebuilder_filter( git_treebuilder *bld, git_treebuilder_filter_cb filter, void *payload); diff --git a/src/tree.c b/src/tree.c index 7d28c86ce..e7357dd90 100644 --- a/src/tree.c +++ b/src/tree.c @@ -625,9 +625,6 @@ int git_treebuilder_create(git_treebuilder **builder_p, const git_tree *source) return -1; } - if (git_vector_init(&bld->removed, 0, NULL) < 0) - goto on_error; - if (source != NULL) { git_tree_entry *entry_src; @@ -657,7 +654,7 @@ int git_treebuilder_insert( { git_tree_entry *entry; int error; - git_strmap_iter iter; + git_strmap_iter pos; assert(bld && id && filename); @@ -667,22 +664,20 @@ int git_treebuilder_insert( if (!valid_entry_name(filename)) return tree_error("Failed to insert entry. Invalid name for a tree entry", filename); - entry = alloc_entry(filename); - GITERR_CHECK_ALLOC(entry); + pos = git_strmap_lookup_index(bld->map, filename); + if (git_strmap_valid_index(bld->map, pos)) { + entry = git_strmap_value_at(bld->map, pos); + } else { + entry = alloc_entry(filename); + GITERR_CHECK_ALLOC(entry); - iter = git_strmap_lookup_index(bld->map, entry->filename); - if (git_strmap_valid_index(bld->map, iter)) { - git_tree_entry *old_val = git_strmap_value_at(bld->map, iter); - if (git_vector_insert(&bld->removed, old_val) < 0) + git_strmap_insert(bld->map, entry->filename, entry, error); + + if (error < 0) { + git_tree_entry_free(entry); + giterr_set(GITERR_TREE, "failed to insert %s", filename); return -1; - - git_strmap_delete_at(bld->map, iter); - } - - git_strmap_insert(bld->map, entry->filename, entry, error); - if (error < 0) { - giterr_set(GITERR_TREE, "failed to insert %s", filename); - return -1; + } } bld->entrycount++; @@ -721,10 +716,8 @@ int git_treebuilder_remove(git_treebuilder *bld, const char *filename) if (entry == NULL) return tree_error("Failed to remove entry. File isn't in the tree", filename); - if (git_vector_insert(&bld->removed, entry) < 0) - return -1; - git_strmap_delete(bld->map, filename); + git_tree_entry_free(entry); bld->entrycount--; return 0; @@ -775,7 +768,7 @@ int git_treebuilder_write(git_oid *oid, git_repository *repo, git_treebuilder *b return error; } -int git_treebuilder_filter( +void git_treebuilder_filter( git_treebuilder *bld, git_treebuilder_filter_cb filter, void *payload) @@ -787,30 +780,21 @@ int git_treebuilder_filter( git_strmap_foreach(bld->map, filename, entry, { if (filter(entry, payload)) { - if (git_vector_insert(&bld->removed, entry) < 0) - return -1; - git_strmap_delete(bld->map, filename); bld->entrycount--; + git_tree_entry_free(entry); } }); - - return 0; } void git_treebuilder_clear(git_treebuilder *bld) { - size_t i; git_tree_entry *e; assert(bld); - git_vector_foreach(&bld->removed, i, e) - git_tree_entry_free(e); - git_strmap_foreach_value(bld->map, e, git_tree_entry_free(e)); - - git_vector_clear(&bld->removed); + git_strmap_clear(bld->map); bld->entrycount = 0; } @@ -820,7 +804,6 @@ void git_treebuilder_free(git_treebuilder *bld) return; git_treebuilder_clear(bld); - git_vector_free(&bld->removed); git_strmap_free(bld->map); git__free(bld); } diff --git a/src/tree.h b/src/tree.h index 38daf21d4..81508a6bd 100644 --- a/src/tree.h +++ b/src/tree.h @@ -26,7 +26,6 @@ struct git_tree { }; struct git_treebuilder { - git_vector removed; size_t entrycount; /* vector may contain "removed" entries */ git_strmap *map; }; From fcc60066073b746332eb859c7fccdcece150bfcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Mon, 9 Jun 2014 22:59:32 +0200 Subject: [PATCH 3/3] treentry: no need for manual size book-keeping We can simply ask the hasmap. --- src/tree.c | 15 ++++++--------- src/tree.h | 1 - 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/src/tree.c b/src/tree.c index e7357dd90..e0e2dbebf 100644 --- a/src/tree.c +++ b/src/tree.c @@ -367,7 +367,8 @@ size_t git_tree_entrycount(const git_tree *tree) unsigned int git_treebuilder_entrycount(git_treebuilder *bld) { assert(bld); - return (unsigned int)bld->entrycount; + + return git_strmap_num_entries(bld->map); } static int tree_error(const char *str, const char *path) @@ -469,7 +470,6 @@ static int append_entry( return -1; } - bld->entrycount++; return 0; } @@ -680,7 +680,6 @@ int git_treebuilder_insert( } } - bld->entrycount++; git_oid_cpy(&entry->oid, id); entry->attr = filemode; @@ -719,14 +718,13 @@ int git_treebuilder_remove(git_treebuilder *bld, const char *filename) git_strmap_delete(bld->map, filename); git_tree_entry_free(entry); - bld->entrycount--; return 0; } int git_treebuilder_write(git_oid *oid, git_repository *repo, git_treebuilder *bld) { int error = 0; - size_t i; + size_t i, entrycount; git_buf tree = GIT_BUF_INIT; git_odb *odb; git_tree_entry *entry; @@ -734,7 +732,8 @@ int git_treebuilder_write(git_oid *oid, git_repository *repo, git_treebuilder *b assert(bld); - if (git_vector_init(&entries, bld->entrycount, entry_sort_cmp) < 0) + entrycount = git_strmap_num_entries(bld->map); + if (git_vector_init(&entries, entrycount, entry_sort_cmp) < 0) return -1; git_strmap_foreach_value(bld->map, entry, { @@ -745,7 +744,7 @@ int git_treebuilder_write(git_oid *oid, git_repository *repo, git_treebuilder *b git_vector_sort(&entries); /* Grow the buffer beforehand to an estimated size */ - error = git_buf_grow(&tree, bld->entrycount * 72); + error = git_buf_grow(&tree, entrycount * 72); for (i = 0; i < entries.length && !error; ++i) { git_tree_entry *entry = git_vector_get(&entries, i); @@ -781,7 +780,6 @@ void git_treebuilder_filter( git_strmap_foreach(bld->map, filename, entry, { if (filter(entry, payload)) { git_strmap_delete(bld->map, filename); - bld->entrycount--; git_tree_entry_free(entry); } }); @@ -795,7 +793,6 @@ void git_treebuilder_clear(git_treebuilder *bld) git_strmap_foreach_value(bld->map, e, git_tree_entry_free(e)); git_strmap_clear(bld->map); - bld->entrycount = 0; } void git_treebuilder_free(git_treebuilder *bld) diff --git a/src/tree.h b/src/tree.h index 81508a6bd..5d27eb7c9 100644 --- a/src/tree.h +++ b/src/tree.h @@ -26,7 +26,6 @@ struct git_tree { }; struct git_treebuilder { - size_t entrycount; /* vector may contain "removed" entries */ git_strmap *map; };