From d091a9dbdeaa246b120feef60ce1940d7d22ea3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Thu, 10 Jul 2014 12:21:28 +0200 Subject: [PATCH 1/9] tree-cache: extract the allocation --- src/tree-cache.c | 34 ++++++++++++++++++++++------------ src/tree-cache.h | 1 + 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/src/tree-cache.c b/src/tree-cache.c index 49afd6e49..aec94938d 100644 --- a/src/tree-cache.c +++ b/src/tree-cache.c @@ -74,7 +74,6 @@ static int read_tree_internal(git_tree_cache **out, git_tree_cache *tree = NULL; const char *name_start, *buffer; int count; - size_t name_len; buffer = name_start = *buffer_in; @@ -84,17 +83,8 @@ static int read_tree_internal(git_tree_cache **out, if (++buffer >= buffer_end) goto corrupted; - name_len = strlen(name_start); - tree = git__malloc(sizeof(git_tree_cache) + name_len + 1); - GITERR_CHECK_ALLOC(tree); - - memset(tree, 0x0, sizeof(git_tree_cache)); - tree->parent = parent; - - /* NUL-terminated tree name */ - tree->namelen = name_len; - memcpy(tree->name, name_start, name_len); - tree->name[name_len] = '\0'; + if (git_tree_cache_new(&tree, name_start, parent) < 0) + return -1; /* Blank-terminated ASCII decimal number of entries in this tree */ if (git__strtol32(&count, buffer, &buffer, 10) < 0) @@ -164,6 +154,26 @@ int git_tree_cache_read(git_tree_cache **tree, const char *buffer, size_t buffer return 0; } +int git_tree_cache_new(git_tree_cache **out, const char *name, git_tree_cache *parent) +{ + size_t name_len; + git_tree_cache *tree; + + name_len = strlen(name); + tree = git__malloc(sizeof(git_tree_cache) + name_len + 1); + GITERR_CHECK_ALLOC(tree); + + memset(tree, 0x0, sizeof(git_tree_cache)); + tree->parent = parent; + /* NUL-terminated tree name */ + tree->namelen = name_len; + memcpy(tree->name, name, name_len); + tree->name[name_len] = '\0'; + + *out = tree; + return 0; +} + void git_tree_cache_free(git_tree_cache *tree) { unsigned int i; diff --git a/src/tree-cache.h b/src/tree-cache.h index 78017127c..592a352db 100644 --- a/src/tree-cache.h +++ b/src/tree-cache.h @@ -25,6 +25,7 @@ typedef struct { int git_tree_cache_read(git_tree_cache **tree, const char *buffer, size_t buffer_size); void git_tree_cache_invalidate_path(git_tree_cache *tree, const char *path); const git_tree_cache *git_tree_cache_get(const git_tree_cache *tree, const char *path); +int git_tree_cache_new(git_tree_cache **out, const char *name, git_tree_cache *parent); void git_tree_cache_free(git_tree_cache *tree); #endif From 19c88310cb79153e19b93c59020b2f7c34794f6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Thu, 10 Jul 2014 13:48:13 +0200 Subject: [PATCH 2/9] tree-cache: move to use a pool allocator This simplifies freeing the entries quite a bit; though there aren't that many failure paths right now, introducing filling the cache from a tree will introduce more. This makes sure not to leak memory on errors. --- src/index.c | 10 ++++++++-- src/index.h | 1 + src/tree-cache.c | 36 ++++++++++-------------------------- src/tree-cache.h | 5 +++-- 4 files changed, 22 insertions(+), 30 deletions(-) diff --git a/src/index.c b/src/index.c index 8b757f269..8ea1fd49e 100644 --- a/src/index.c +++ b/src/index.c @@ -405,6 +405,8 @@ int git_index_open(git_index **index_out, const char *index_path) return -1; } + git_pool_init(&index->tree_pool, 1, 0); + if (index_path != NULL) { index->index_file_path = git__strdup(index_path); if (!index->index_file_path) @@ -435,6 +437,7 @@ int git_index_open(git_index **index_out, const char *index_path) return 0; fail: + git_pool_clear(&index->tree_pool); git_index_free(index); return error; } @@ -517,8 +520,8 @@ int git_index_clear(git_index *index) assert(index); - git_tree_cache_free(index->tree); index->tree = NULL; + git_pool_clear(&index->tree_pool); if (git_mutex_lock(&index->lock) < 0) { giterr_set(GITERR_OS, "Failed to lock index"); @@ -621,6 +624,9 @@ int git_index_read(git_index *index, int force) if (error < 0) return error; + index->tree = NULL; + git_pool_clear(&index->tree_pool); + error = git_index_clear(index); if (!error) @@ -1871,7 +1877,7 @@ static size_t read_extension(git_index *index, const char *buffer, size_t buffer if (dest.signature[0] >= 'A' && dest.signature[0] <= 'Z') { /* tree cache */ if (memcmp(dest.signature, INDEX_EXT_TREECACHE_SIG, 4) == 0) { - if (git_tree_cache_read(&index->tree, buffer + 8, dest.extension_size) < 0) + if (git_tree_cache_read(&index->tree, buffer + 8, dest.extension_size, &index->tree_pool) < 0) return 0; } else if (memcmp(dest.signature, INDEX_EXT_UNMERGED_SIG, 4) == 0) { if (read_reuc(index, buffer + 8, dest.extension_size) < 0) diff --git a/src/index.h b/src/index.h index 50a0b4b6c..2eb93fb17 100644 --- a/src/index.h +++ b/src/index.h @@ -35,6 +35,7 @@ struct git_index { unsigned int no_symlinks:1; git_tree_cache *tree; + git_pool tree_pool; git_vector names; git_vector reuc; diff --git a/src/tree-cache.c b/src/tree-cache.c index aec94938d..d76d9281b 100644 --- a/src/tree-cache.c +++ b/src/tree-cache.c @@ -6,6 +6,7 @@ */ #include "tree-cache.h" +#include "pool.h" static git_tree_cache *find_child( const git_tree_cache *tree, const char *path, const char *end) @@ -69,7 +70,8 @@ const git_tree_cache *git_tree_cache_get(const git_tree_cache *tree, const char } static int read_tree_internal(git_tree_cache **out, - const char **buffer_in, const char *buffer_end, git_tree_cache *parent) + const char **buffer_in, const char *buffer_end, + git_tree_cache *parent, git_pool *pool) { git_tree_cache *tree = NULL; const char *name_start, *buffer; @@ -83,7 +85,7 @@ static int read_tree_internal(git_tree_cache **out, if (++buffer >= buffer_end) goto corrupted; - if (git_tree_cache_new(&tree, name_start, parent) < 0) + if (git_tree_cache_new(&tree, name_start, parent, pool) < 0) return -1; /* Blank-terminated ASCII decimal number of entries in this tree */ @@ -118,13 +120,13 @@ static int read_tree_internal(git_tree_cache **out, if (tree->children_count > 0) { unsigned int i; - tree->children = git__malloc(tree->children_count * sizeof(git_tree_cache *)); + tree->children = git_pool_malloc(pool, tree->children_count * sizeof(git_tree_cache *)); GITERR_CHECK_ALLOC(tree->children); memset(tree->children, 0x0, tree->children_count * sizeof(git_tree_cache *)); for (i = 0; i < tree->children_count; ++i) { - if (read_tree_internal(&tree->children[i], &buffer, buffer_end, tree) < 0) + if (read_tree_internal(&tree->children[i], &buffer, buffer_end, tree, pool) < 0) goto corrupted; } } @@ -134,16 +136,15 @@ static int read_tree_internal(git_tree_cache **out, return 0; corrupted: - git_tree_cache_free(tree); giterr_set(GITERR_INDEX, "Corrupted TREE extension in index"); return -1; } -int git_tree_cache_read(git_tree_cache **tree, const char *buffer, size_t buffer_size) +int git_tree_cache_read(git_tree_cache **tree, const char *buffer, size_t buffer_size, git_pool *pool) { const char *buffer_end = buffer + buffer_size; - if (read_tree_internal(tree, &buffer, buffer_end, NULL) < 0) + if (read_tree_internal(tree, &buffer, buffer_end, NULL, pool) < 0) return -1; if (buffer < buffer_end) { @@ -154,13 +155,13 @@ int git_tree_cache_read(git_tree_cache **tree, const char *buffer, size_t buffer return 0; } -int git_tree_cache_new(git_tree_cache **out, const char *name, git_tree_cache *parent) +int git_tree_cache_new(git_tree_cache **out, const char *name, git_tree_cache *parent, git_pool *pool) { size_t name_len; git_tree_cache *tree; name_len = strlen(name); - tree = git__malloc(sizeof(git_tree_cache) + name_len + 1); + tree = git_pool_malloc(pool, sizeof(git_tree_cache) + name_len + 1); GITERR_CHECK_ALLOC(tree); memset(tree, 0x0, sizeof(git_tree_cache)); @@ -173,20 +174,3 @@ int git_tree_cache_new(git_tree_cache **out, const char *name, git_tree_cache *p *out = tree; return 0; } - -void git_tree_cache_free(git_tree_cache *tree) -{ - unsigned int i; - - if (tree == NULL) - return; - - if (tree->children != NULL) { - for (i = 0; i < tree->children_count; ++i) - git_tree_cache_free(tree->children[i]); - - git__free(tree->children); - } - - git__free(tree); -} diff --git a/src/tree-cache.h b/src/tree-cache.h index 592a352db..d41a51f84 100644 --- a/src/tree-cache.h +++ b/src/tree-cache.h @@ -9,6 +9,7 @@ #define INCLUDE_tree_cache_h__ #include "common.h" +#include "pool.h" #include "git2/oid.h" typedef struct { @@ -22,10 +23,10 @@ typedef struct { char name[GIT_FLEX_ARRAY]; } git_tree_cache; -int git_tree_cache_read(git_tree_cache **tree, const char *buffer, size_t buffer_size); +int git_tree_cache_read(git_tree_cache **tree, const char *buffer, size_t buffer_size, git_pool *pool); void git_tree_cache_invalidate_path(git_tree_cache *tree, const char *path); const git_tree_cache *git_tree_cache_get(const git_tree_cache *tree, const char *path); -int git_tree_cache_new(git_tree_cache **out, const char *name, git_tree_cache *parent); +int git_tree_cache_new(git_tree_cache **out, const char *name, git_tree_cache *parent, git_pool *pool); void git_tree_cache_free(git_tree_cache *tree); #endif From 6843cebe17b7ce15eb9a6d1a88ac2e7e9c00d5b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Thu, 10 Jul 2014 14:10:39 +0200 Subject: [PATCH 3/9] index: fill the tree cache when reading from a tree When reading from a tree, we know what every tree is going to look like, so we can fill in the tree cache completely, making use of the index for modification of trees a lot quicker. --- src/index.c | 8 ++++++ src/tree-cache.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++ src/tree-cache.h | 4 +++ 3 files changed, 83 insertions(+) diff --git a/src/index.c b/src/index.c index 8ea1fd49e..be043a9bf 100644 --- a/src/index.c +++ b/src/index.c @@ -2277,6 +2277,7 @@ typedef struct read_tree_data { git_vector *old_entries; git_vector *new_entries; git_vector_cmp entry_cmp; + git_tree_cache *tree; } read_tree_data; static int read_tree_cb( @@ -2338,6 +2339,9 @@ int git_index_read_tree(git_index *index, const git_tree *tree) data.new_entries = &entries; data.entry_cmp = index->entries_search; + index->tree = NULL; + git_pool_clear(&index->tree_pool); + if (index_sort_if_needed(index, true) < 0) return -1; @@ -2358,6 +2362,10 @@ int git_index_read_tree(git_index *index, const git_tree *tree) } git_vector_free(&entries); + if (error < 0) + return error; + + error = git_tree_cache_read_tree(&index->tree, tree, &index->tree_pool); return error; } diff --git a/src/tree-cache.c b/src/tree-cache.c index d76d9281b..86521ce4d 100644 --- a/src/tree-cache.c +++ b/src/tree-cache.c @@ -7,6 +7,7 @@ #include "tree-cache.h" #include "pool.h" +#include "tree.h" static git_tree_cache *find_child( const git_tree_cache *tree, const char *path, const char *end) @@ -155,6 +156,76 @@ int git_tree_cache_read(git_tree_cache **tree, const char *buffer, size_t buffer return 0; } +static int read_tree_recursive(git_tree_cache *cache, const git_tree *tree, git_pool *pool) +{ + git_repository *repo; + size_t i, j, nentries, ntrees; + int error; + + repo = git_tree_owner(tree); + + git_oid_cpy(&cache->oid, git_tree_id(tree)); + nentries = git_tree_entrycount(tree); + + /* + * We make sure we know how many trees we need to allocate for + * so we don't have to realloc and change the pointers for the + * parents. + */ + ntrees = 0; + for (i = 0; i < nentries; i++) { + const git_tree_entry *entry; + + entry = git_tree_entry_byindex(tree, i); + if (git_tree_entry_filemode(entry) == GIT_FILEMODE_TREE) + ntrees++; + } + + cache->children_count = ntrees; + cache->children = git_pool_mallocz(pool, ntrees * sizeof(git_tree_cache *)); + GITERR_CHECK_ALLOC(cache->children); + + j = 0; + for (i = 0; i < nentries; i++) { + const git_tree_entry *entry; + git_tree *subtree; + + entry = git_tree_entry_byindex(tree, i); + if (git_tree_entry_filemode(entry) != GIT_FILEMODE_TREE) + continue; + + if ((error = git_tree_cache_new(&cache->children[j], git_tree_entry_name(entry), cache, pool)) < 0) + return error; + + if ((error = git_tree_lookup(&subtree, repo, git_tree_entry_id(entry))) < 0) + return error; + + error = read_tree_recursive(cache->children[j], subtree, pool); + git_tree_free(subtree); + j++; + + if (error < 0) + return error; + } + + return 0; +} + +int git_tree_cache_read_tree(git_tree_cache **out, const git_tree *tree, git_pool *pool) +{ + int error; + git_tree_cache *cache; + + if ((error = git_tree_cache_new(&cache, "", NULL, pool)) < 0) + return error; + + if ((error = read_tree_recursive(cache, tree, pool)) < 0) + return error; + + *out = cache; + return 0; +} + int git_tree_cache_new(git_tree_cache **out, const char *name, git_tree_cache *parent, git_pool *pool) { size_t name_len; diff --git a/src/tree-cache.h b/src/tree-cache.h index d41a51f84..d75049d8b 100644 --- a/src/tree-cache.h +++ b/src/tree-cache.h @@ -27,6 +27,10 @@ int git_tree_cache_read(git_tree_cache **tree, const char *buffer, size_t buffer void git_tree_cache_invalidate_path(git_tree_cache *tree, const char *path); const git_tree_cache *git_tree_cache_get(const git_tree_cache *tree, const char *path); int git_tree_cache_new(git_tree_cache **out, const char *name, git_tree_cache *parent, git_pool *pool); +/** + * Read a tree as the root of the tree cache (like for `git read-tree`) + */ +int git_tree_cache_read_tree(git_tree_cache **out, const git_tree *tree, git_pool *pool); void git_tree_cache_free(git_tree_cache *tree); #endif From ee4db1c16eda223477d4723724471d26ed188831 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Fri, 11 Jul 2014 11:48:51 +0200 Subject: [PATCH 4/9] index: add tests for the tree cache These test that we invalidate at the right levels and that we remove the tree cache when clearing the index. --- tests/index/cache.c | 110 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 tests/index/cache.c diff --git a/tests/index/cache.c b/tests/index/cache.c new file mode 100644 index 000000000..79b431be7 --- /dev/null +++ b/tests/index/cache.c @@ -0,0 +1,110 @@ +#include "clar_libgit2.h" +#include "git2.h" +#include "index.h" +#include "tree-cache.h" + +static git_repository *g_repo; + +void test_index_cache__initialize(void) +{ + g_repo = cl_git_sandbox_init("testrepo"); +} + +void test_index_cache__cleanup(void) +{ + cl_git_sandbox_cleanup(); + g_repo = NULL; +} + +void test_index_cache__read_tree_no_children(void) +{ + git_index *index; + git_index_entry entry; + git_tree *tree; + git_oid id; + + cl_git_pass(git_index_new(&index)); + cl_assert(index->tree == NULL); + cl_git_pass(git_oid_fromstr(&id, "45dd856fdd4d89b884c340ba0e047752d9b085d6")); + cl_git_pass(git_tree_lookup(&tree, g_repo, &id)); + cl_git_pass(git_index_read_tree(index, tree)); + git_tree_free(tree); + + cl_assert(index->tree); + cl_assert(git_oid_equal(&id, &index->tree->oid)); + cl_assert_equal_i(0, index->tree->children_count); + cl_assert_equal_i(0, index->tree->entries); /* 0 is a placeholder here */ + + memset(&entry, 0x0, sizeof(git_index_entry)); + entry.path = "new.txt"; + entry.mode = GIT_FILEMODE_BLOB; + git_oid_fromstr(&entry.id, "45b983be36b73c0788dc9cbcb76cbb80fc7bb057"); + + cl_git_pass(git_index_add(index, &entry)); + cl_assert_equal_i(-1, index->tree->entries); + + git_index_free(index); +} + +void test_index_cache__read_tree_children(void) +{ + git_index *index; + git_index_entry entry; + git_tree *tree; + const git_tree_cache *cache; + git_oid tree_id; + + cl_git_pass(git_repository_index(&index, g_repo)); + cl_git_pass(git_index_clear(index)); + cl_assert(index->tree == NULL); + + + /* add a bunch of entries at different levels */ + memset(&entry, 0x0, sizeof(git_index_entry)); + entry.path = "top-level"; + entry.mode = GIT_FILEMODE_BLOB; + git_oid_fromstr(&entry.id, "45b983be36b73c0788dc9cbcb76cbb80fc7bb057"); + cl_git_pass(git_index_add(index, &entry)); + + + entry.path = "subdir/some-file"; + cl_git_pass(git_index_add(index, &entry)); + + entry.path = "subdir/even-deeper/some-file"; + cl_git_pass(git_index_add(index, &entry)); + + entry.path = "subdir2/some-file"; + cl_git_pass(git_index_add(index, &entry)); + + cl_git_pass(git_index_write_tree(&tree_id, index)); + cl_git_pass(git_index_clear(index)); + cl_assert(index->tree == NULL); + + cl_git_pass(git_tree_lookup(&tree, g_repo, &tree_id)); + cl_git_pass(git_index_read_tree(index, tree)); + git_tree_free(tree); + + cl_assert(index->tree); + cl_assert_equal_i(2, index->tree->children_count); + + /* override with a slightly different id, also dummy */ + entry.path = "subdir/some-file"; + git_oid_fromstr(&entry.id, "45b983be36b73c0788dc9cbcb76cbb80fc7bb058"); + cl_git_pass(git_index_add(index, &entry)); + + cl_assert_equal_i(-1, index->tree->entries); + + cache = git_tree_cache_get(index->tree, "subdir"); + cl_assert(cache); + cl_assert_equal_i(-1, cache->entries); + + cache = git_tree_cache_get(index->tree, "subdir/even-deeper"); + cl_assert(cache); + cl_assert_equal_i(0, cache->entries); + + cache = git_tree_cache_get(index->tree, "subdir2"); + cl_assert(cache); + cl_assert_equal_i(0, cache->entries); + + git_index_free(index); +} From 46bb00673013a7dae1ee81c484ac05c533ed1c4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Fri, 11 Jul 2014 11:52:38 +0200 Subject: [PATCH 5/9] tree-cache: remove the parent pointer This wasn't used. We invalidate based on the full path, so we always go down the tree, never up. --- src/tree-cache.c | 15 +++++++-------- src/tree-cache.h | 5 ++--- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/tree-cache.c b/src/tree-cache.c index 86521ce4d..1eb046d33 100644 --- a/src/tree-cache.c +++ b/src/tree-cache.c @@ -72,7 +72,7 @@ const git_tree_cache *git_tree_cache_get(const git_tree_cache *tree, const char static int read_tree_internal(git_tree_cache **out, const char **buffer_in, const char *buffer_end, - git_tree_cache *parent, git_pool *pool) + git_pool *pool) { git_tree_cache *tree = NULL; const char *name_start, *buffer; @@ -86,7 +86,7 @@ static int read_tree_internal(git_tree_cache **out, if (++buffer >= buffer_end) goto corrupted; - if (git_tree_cache_new(&tree, name_start, parent, pool) < 0) + if (git_tree_cache_new(&tree, name_start, pool) < 0) return -1; /* Blank-terminated ASCII decimal number of entries in this tree */ @@ -127,7 +127,7 @@ static int read_tree_internal(git_tree_cache **out, memset(tree->children, 0x0, tree->children_count * sizeof(git_tree_cache *)); for (i = 0; i < tree->children_count; ++i) { - if (read_tree_internal(&tree->children[i], &buffer, buffer_end, tree, pool) < 0) + if (read_tree_internal(&tree->children[i], &buffer, buffer_end, pool) < 0) goto corrupted; } } @@ -145,7 +145,7 @@ int git_tree_cache_read(git_tree_cache **tree, const char *buffer, size_t buffer { const char *buffer_end = buffer + buffer_size; - if (read_tree_internal(tree, &buffer, buffer_end, NULL, pool) < 0) + if (read_tree_internal(tree, &buffer, buffer_end, pool) < 0) return -1; if (buffer < buffer_end) { @@ -194,7 +194,7 @@ static int read_tree_recursive(git_tree_cache *cache, const git_tree *tree, git_ if (git_tree_entry_filemode(entry) != GIT_FILEMODE_TREE) continue; - if ((error = git_tree_cache_new(&cache->children[j], git_tree_entry_name(entry), cache, pool)) < 0) + if ((error = git_tree_cache_new(&cache->children[j], git_tree_entry_name(entry), pool)) < 0) return error; if ((error = git_tree_lookup(&subtree, repo, git_tree_entry_id(entry))) < 0) @@ -216,7 +216,7 @@ int git_tree_cache_read_tree(git_tree_cache **out, const git_tree *tree, git_poo int error; git_tree_cache *cache; - if ((error = git_tree_cache_new(&cache, "", NULL, pool)) < 0) + if ((error = git_tree_cache_new(&cache, "", pool)) < 0) return error; if ((error = read_tree_recursive(cache, tree, pool)) < 0) @@ -226,7 +226,7 @@ int git_tree_cache_read_tree(git_tree_cache **out, const git_tree *tree, git_poo return 0; } -int git_tree_cache_new(git_tree_cache **out, const char *name, git_tree_cache *parent, git_pool *pool) +int git_tree_cache_new(git_tree_cache **out, const char *name, git_pool *pool) { size_t name_len; git_tree_cache *tree; @@ -236,7 +236,6 @@ int git_tree_cache_new(git_tree_cache **out, const char *name, git_tree_cache *p GITERR_CHECK_ALLOC(tree); memset(tree, 0x0, sizeof(git_tree_cache)); - tree->parent = parent; /* NUL-terminated tree name */ tree->namelen = name_len; memcpy(tree->name, name, name_len); diff --git a/src/tree-cache.h b/src/tree-cache.h index d75049d8b..b92a401a6 100644 --- a/src/tree-cache.h +++ b/src/tree-cache.h @@ -12,8 +12,7 @@ #include "pool.h" #include "git2/oid.h" -typedef struct { - struct git_tree_cache *parent; +typedef struct git_tree_cache { struct git_tree_cache **children; size_t children_count; @@ -26,7 +25,7 @@ typedef struct { int git_tree_cache_read(git_tree_cache **tree, const char *buffer, size_t buffer_size, git_pool *pool); void git_tree_cache_invalidate_path(git_tree_cache *tree, const char *path); const git_tree_cache *git_tree_cache_get(const git_tree_cache *tree, const char *path); -int git_tree_cache_new(git_tree_cache **out, const char *name, git_tree_cache *parent, git_pool *pool); +int git_tree_cache_new(git_tree_cache **out, const char *name, git_pool *pool); /** * Read a tree as the root of the tree cache (like for `git read-tree`) */ From c2f8b215937ed6f17dce94a4bde26d5c83b42533 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Sun, 28 Sep 2014 07:00:49 +0200 Subject: [PATCH 6/9] index: write out the tree cache extension Keeping the cache around after read-tree is only one part of the optimisation opportunities. In order to share the cache between program instances, we need to write the TREE extension to the index. Do so, taking the opportunity to rename 'entries' to 'entry_count' to match the name given in the format description. The included test is rather trivial, but works as a sanity check. --- src/index.c | 34 ++++++++++++++---- src/tree-cache.c | 60 +++++++++++++++++++++++++++++--- src/tree-cache.h | 4 ++- src/tree.c | 4 +-- tests/index/cache.c | 85 +++++++++++++++++++++++++++++++++++++++++---- 5 files changed, 168 insertions(+), 19 deletions(-) diff --git a/src/index.c b/src/index.c index be043a9bf..8a5bf61c1 100644 --- a/src/index.c +++ b/src/index.c @@ -2114,16 +2114,13 @@ static int write_entries(git_index *index, git_filebuf *file) static int write_extension(git_filebuf *file, struct index_extension *header, git_buf *data) { struct index_extension ondisk; - int error = 0; memset(&ondisk, 0x0, sizeof(struct index_extension)); memcpy(&ondisk, header, 4); ondisk.extension_size = htonl(header->extension_size); - if ((error = git_filebuf_write(file, &ondisk, sizeof(struct index_extension))) == 0) - error = git_filebuf_write(file, data->ptr, data->size); - - return error; + git_filebuf_write(file, &ondisk, sizeof(struct index_extension)); + return git_filebuf_write(file, data->ptr, data->size); } static int create_name_extension_data(git_buf *name_buf, git_index_name_entry *conflict_name) @@ -2229,6 +2226,29 @@ done: return error; } +static int write_tree_extension(git_index *index, git_filebuf *file) +{ + struct index_extension extension; + git_buf buf = GIT_BUF_INIT; + int error; + + if (index->tree == NULL) + return 0; + + if ((error = git_tree_cache_write(&buf, index->tree)) < 0) + return error; + + memset(&extension, 0x0, sizeof(struct index_extension)); + memcpy(&extension.signature, INDEX_EXT_TREECACHE_SIG, 4); + extension.extension_size = (uint32_t)buf.size; + + error = write_extension(file, &extension, &buf); + + git_buf_free(&buf); + + return error; +} + static int write_index(git_index *index, git_filebuf *file) { git_oid hash_final; @@ -2251,7 +2271,9 @@ static int write_index(git_index *index, git_filebuf *file) if (write_entries(index, file) < 0) return -1; - /* TODO: write tree cache extension */ + /* write the tree cache extension */ + if (index->tree != NULL && write_tree_extension(index, file) < 0) + return -1; /* write the rename conflict extension */ if (index->names.length > 0 && write_name_extension(index, file) < 0) diff --git a/src/tree-cache.c b/src/tree-cache.c index 1eb046d33..48dc46a40 100644 --- a/src/tree-cache.c +++ b/src/tree-cache.c @@ -31,7 +31,7 @@ void git_tree_cache_invalidate_path(git_tree_cache *tree, const char *path) if (tree == NULL) return; - tree->entries = -1; + tree->entry_count = -1; while (ptr != NULL) { end = strchr(ptr, '/'); @@ -43,7 +43,7 @@ void git_tree_cache_invalidate_path(git_tree_cache *tree, const char *path) if (tree == NULL) /* We don't have that tree */ return; - tree->entries = -1; + tree->entry_count = -1; ptr = end + 1; } } @@ -93,7 +93,7 @@ static int read_tree_internal(git_tree_cache **out, if (git__strtol32(&count, buffer, &buffer, 10) < 0) goto corrupted; - tree->entries = count; + tree->entry_count = count; if (*buffer != ' ' || ++buffer >= buffer_end) goto corrupted; @@ -108,7 +108,7 @@ static int read_tree_internal(git_tree_cache **out, goto corrupted; /* The SHA1 is only there if it's not invalidated */ - if (tree->entries >= 0) { + if (tree->entry_count >= 0) { /* 160-bit SHA-1 for this tree and it's children */ if (buffer + GIT_OID_RAWSZ > buffer_end) goto corrupted; @@ -244,3 +244,55 @@ int git_tree_cache_new(git_tree_cache **out, const char *name, git_pool *pool) *out = tree; return 0; } + +/** + * Recursively recalculate the total entry count, which we need to + * write out to the index + */ +static void recount_entries(git_tree_cache *tree) +{ + size_t i; + ssize_t entry_count; + git_tree_cache *child; + + for (i = 0; i < tree->children_count; i++) + recount_entries(tree->children[i]); + + if (tree->entry_count == -1) + return; + + entry_count = 0; + for (i = 0; i < tree->children_count; i++) { + child = tree->children[i]; + + if (child->entry_count == -1) + continue; + + entry_count += tree->children[i]->children_count; + } + + tree->entry_count = entry_count; +} + +static void write_tree(git_buf *out, git_tree_cache *tree) +{ + size_t i; + + git_buf_printf(out, "%s%c%zd %"PRIuZ"\n", tree->name, 0, tree->entry_count, tree->children_count); + + if (tree->entry_count == -1) + return; + + git_buf_put(out, (const char *) &tree->oid, GIT_OID_RAWSZ); + + for (i = 0; i < tree->children_count; i++) + write_tree(out, tree->children[i]); +} + +int git_tree_cache_write(git_buf *out, git_tree_cache *tree) +{ + recount_entries(tree); + write_tree(out, tree); + + return git_buf_oom(out) ? -1 : 0; +} diff --git a/src/tree-cache.h b/src/tree-cache.h index b92a401a6..c44ca7cf5 100644 --- a/src/tree-cache.h +++ b/src/tree-cache.h @@ -10,18 +10,20 @@ #include "common.h" #include "pool.h" +#include "buffer.h" #include "git2/oid.h" typedef struct git_tree_cache { struct git_tree_cache **children; size_t children_count; - ssize_t entries; + ssize_t entry_count; git_oid oid; size_t namelen; char name[GIT_FLEX_ARRAY]; } git_tree_cache; +int git_tree_cache_write(git_buf *out, git_tree_cache *tree); int git_tree_cache_read(git_tree_cache **tree, const char *buffer, size_t buffer_size, git_pool *pool); void git_tree_cache_invalidate_path(git_tree_cache *tree, const char *path); const git_tree_cache *git_tree_cache_get(const git_tree_cache *tree, const char *path); diff --git a/src/tree.c b/src/tree.c index 28190d6da..adc6fe5e4 100644 --- a/src/tree.c +++ b/src/tree.c @@ -488,7 +488,7 @@ static int write_tree( const git_tree_cache *cache; cache = git_tree_cache_get(index->tree, dirname); - if (cache != NULL && cache->entries >= 0){ + if (cache != NULL && cache->entry_count >= 0){ git_oid_cpy(oid, &cache->oid); return (int)find_next_dir(dirname, index, start); } @@ -589,7 +589,7 @@ int git_tree__write_index( return GIT_EUNMERGED; } - if (index->tree != NULL && index->tree->entries >= 0) { + if (index->tree != NULL && index->tree->entry_count >= 0) { git_oid_cpy(oid, &index->tree->oid); return 0; } diff --git a/tests/index/cache.c b/tests/index/cache.c index 79b431be7..ba2d51fa2 100644 --- a/tests/index/cache.c +++ b/tests/index/cache.c @@ -16,6 +16,79 @@ void test_index_cache__cleanup(void) g_repo = NULL; } +void test_index_cache__write_extension_at_root(void) +{ + git_index *index; + git_tree *tree; + git_oid id; + const char *tree_id_str = "45dd856fdd4d89b884c340ba0e047752d9b085d6"; + const char *index_file = "index-tree"; + + cl_git_pass(git_index_open(&index, index_file)); + cl_assert(index->tree == NULL); + cl_git_pass(git_oid_fromstr(&id, tree_id_str)); + cl_git_pass(git_tree_lookup(&tree, g_repo, &id)); + cl_git_pass(git_index_read_tree(index, tree)); + git_tree_free(tree); + + cl_assert(index->tree); + cl_git_pass(git_index_write(index)); + git_index_free(index); + + cl_git_pass(git_index_open(&index, index_file)); + cl_assert(index->tree); + + cl_assert_equal_i(0, index->tree->entry_count); + cl_assert_equal_i(0, index->tree->children_count); + + cl_assert(git_oid_equal(&id, &index->tree->oid)); + + cl_git_pass(p_unlink(index_file)); + git_index_free(index); +} + +void test_index_cache__write_extension_invalidated_root(void) +{ + git_index *index; + git_tree *tree; + git_oid id; + const char *tree_id_str = "45dd856fdd4d89b884c340ba0e047752d9b085d6"; + const char *index_file = "index-tree-invalidated"; + git_index_entry entry; + + cl_git_pass(git_index_open(&index, index_file)); + cl_assert(index->tree == NULL); + cl_git_pass(git_oid_fromstr(&id, tree_id_str)); + cl_git_pass(git_tree_lookup(&tree, g_repo, &id)); + cl_git_pass(git_index_read_tree(index, tree)); + git_tree_free(tree); + + cl_assert(index->tree); + + memset(&entry, 0x0, sizeof(git_index_entry)); + git_oid_cpy(&entry.id, &git_index_get_byindex(index, 0)->id); + entry.mode = GIT_FILEMODE_BLOB; + entry.path = "some-new-file.txt"; + + cl_git_pass(git_index_add(index, &entry)); + + cl_assert_equal_i(-1, index->tree->entry_count); + + cl_git_pass(git_index_write(index)); + git_index_free(index); + + cl_git_pass(git_index_open(&index, index_file)); + cl_assert(index->tree); + + cl_assert_equal_i(-1, index->tree->entry_count); + cl_assert_equal_i(0, index->tree->children_count); + + cl_assert(git_oid_cmp(&id, &index->tree->oid)); + + cl_git_pass(p_unlink(index_file)); + git_index_free(index); +} + void test_index_cache__read_tree_no_children(void) { git_index *index; @@ -33,7 +106,7 @@ void test_index_cache__read_tree_no_children(void) cl_assert(index->tree); cl_assert(git_oid_equal(&id, &index->tree->oid)); cl_assert_equal_i(0, index->tree->children_count); - cl_assert_equal_i(0, index->tree->entries); /* 0 is a placeholder here */ + cl_assert_equal_i(0, index->tree->entry_count); /* 0 is a placeholder here */ memset(&entry, 0x0, sizeof(git_index_entry)); entry.path = "new.txt"; @@ -41,7 +114,7 @@ void test_index_cache__read_tree_no_children(void) git_oid_fromstr(&entry.id, "45b983be36b73c0788dc9cbcb76cbb80fc7bb057"); cl_git_pass(git_index_add(index, &entry)); - cl_assert_equal_i(-1, index->tree->entries); + cl_assert_equal_i(-1, index->tree->entry_count); git_index_free(index); } @@ -92,19 +165,19 @@ void test_index_cache__read_tree_children(void) git_oid_fromstr(&entry.id, "45b983be36b73c0788dc9cbcb76cbb80fc7bb058"); cl_git_pass(git_index_add(index, &entry)); - cl_assert_equal_i(-1, index->tree->entries); + cl_assert_equal_i(-1, index->tree->entry_count); cache = git_tree_cache_get(index->tree, "subdir"); cl_assert(cache); - cl_assert_equal_i(-1, cache->entries); + cl_assert_equal_i(-1, cache->entry_count); cache = git_tree_cache_get(index->tree, "subdir/even-deeper"); cl_assert(cache); - cl_assert_equal_i(0, cache->entries); + cl_assert_equal_i(0, cache->entry_count); cache = git_tree_cache_get(index->tree, "subdir2"); cl_assert(cache); - cl_assert_equal_i(0, cache->entries); + cl_assert_equal_i(0, cache->entry_count); git_index_free(index); } From 795d8e932893b9944c6fe72191cb80c2ccdd5d34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Mon, 29 Sep 2014 08:03:22 +0200 Subject: [PATCH 7/9] index: make sure to write cached subtrees if parent is invalidated If e.g. the root tree is invalidated, we still want to write out its children, since those may still have valid cache entries. --- src/tree-cache.c | 6 ++--- tests/index/cache.c | 53 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 4 deletions(-) diff --git a/src/tree-cache.c b/src/tree-cache.c index 48dc46a40..bf52b5530 100644 --- a/src/tree-cache.c +++ b/src/tree-cache.c @@ -280,10 +280,8 @@ static void write_tree(git_buf *out, git_tree_cache *tree) git_buf_printf(out, "%s%c%zd %"PRIuZ"\n", tree->name, 0, tree->entry_count, tree->children_count); - if (tree->entry_count == -1) - return; - - git_buf_put(out, (const char *) &tree->oid, GIT_OID_RAWSZ); + if (tree->entry_count != -1) + git_buf_put(out, (const char *) &tree->oid, GIT_OID_RAWSZ); for (i = 0; i < tree->children_count; i++) write_tree(out, tree->children[i]); diff --git a/tests/index/cache.c b/tests/index/cache.c index ba2d51fa2..17a61a9f5 100644 --- a/tests/index/cache.c +++ b/tests/index/cache.c @@ -119,6 +119,59 @@ void test_index_cache__read_tree_no_children(void) git_index_free(index); } +void test_index_cache__two_levels(void) +{ + git_tree *tree; + git_oid tree_id; + git_index *index; + git_index_entry entry; + const git_tree_cache *tree_cache; + + cl_git_pass(git_repository_index(&index, g_repo)); + cl_git_pass(git_index_clear(index)); + + memset(&entry, 0x0, sizeof(entry)); + entry.mode = GIT_FILEMODE_BLOB; + cl_git_pass(git_oid_fromstr(&entry.id, "a8233120f6ad708f843d861ce2b7228ec4e3dec6")); + entry.path = "top-level.txt"; + cl_git_pass(git_index_add(index, &entry)); + + entry.path = "subdir/file.txt"; + cl_git_pass(git_index_add(index, &entry)); + + /* the read-tree fills the tree cache */ + cl_git_pass(git_index_write_tree(&tree_id, index)); + cl_git_pass(git_tree_lookup(&tree, g_repo, &tree_id)); + cl_git_pass(git_index_read_tree(index, tree)); + git_tree_free(tree); + cl_git_pass(git_index_write(index)); + + /* we now must have cache entries for "" and "subdir" */ + cl_assert(index->tree); + cl_assert(git_tree_cache_get(index->tree, "subdir")); + + cl_git_pass(git_index_read(index, true)); + /* we must still have cache entries for "" and "subdir", since we wrote it out */ + cl_assert(index->tree); + cl_assert(git_tree_cache_get(index->tree, "subdir")); + + entry.path = "top-level.txt"; + cl_git_pass(git_oid_fromstr(&entry.id, "3697d64be941a53d4ae8f6a271e4e3fa56b022cc")); + cl_git_pass(git_index_add(index, &entry)); + + /* writ out the index after we invalidate the root */ + cl_git_pass(git_index_write(index)); + cl_git_pass(git_index_read(index, true)); + + /* the cache for the subtree must still be valid, even if the root isn't */ + cl_assert(index->tree); + cl_assert_equal_i(-1, index->tree->entry_count); + cl_assert_equal_i(1, index->tree->children_count); + tree_cache = git_tree_cache_get(index->tree, "subdir"); + cl_assert(tree_cache); + cl_assert_equal_i(0, tree_cache->entry_count); +} + void test_index_cache__read_tree_children(void) { git_index *index; From 7465e873996c4157b8e10e353b803841c548e2dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Mon, 29 Sep 2014 09:07:41 +0200 Subject: [PATCH 8/9] index: fill the tree cache on write-tree An obvious place to fill the tree cache is on write-tree, as we're guaranteed to be able to fill in the whole tree cache. The way this commit does this is not the most efficient, as we read the root tree from the odb instead of filling in the cache as we go along, but it fills the cache such that successive operations (and persisting the index to disk) will be able to take advantage of the cache, and it reuses the code we already have for filling the cache. Filling in the cache as we create the trees would require some reallocation of the children vector, which is currently not possible with out pool implementation. A different data structure would likely allow us to perform this operation at a later date. --- src/tree.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/tree.c b/src/tree.c index adc6fe5e4..6b06dfd77 100644 --- a/src/tree.c +++ b/src/tree.c @@ -579,6 +579,7 @@ int git_tree__write_index( git_oid *oid, git_index *index, git_repository *repo) { int ret; + git_tree *tree; bool old_ignore_case = false; assert(oid && index && repo); @@ -609,7 +610,21 @@ int git_tree__write_index( if (old_ignore_case) git_index__set_ignore_case(index, true); - return ret < 0 ? ret : 0; + index->tree = NULL; + + if (ret < 0) + return ret; + + git_pool_clear(&index->tree_pool); + + if ((ret = git_tree_lookup(&tree, repo, oid)) < 0) + return ret; + + /* Read the tree cache into the index */ + ret = git_tree_cache_read_tree(&index->tree, tree, &index->tree_pool); + git_tree_free(tree); + + return ret; } int git_treebuilder_create(git_treebuilder **builder_p, const git_tree *source) From 1b63af513365ecca86e824225a937b9d8eaa9dce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Mon, 29 Sep 2014 09:21:57 +0200 Subject: [PATCH 9/9] Update CHANGELOG and PROJECTS with the tree cache changes --- CHANGELOG.md | 3 +++ PROJECTS.md | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b2c87316f..90b550240 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,9 @@ v0.21 + 1 us to safely update a reflog with arbitrary contents, as we need to do for stash. +* The index' tree cache is now filled upon read-tree and write-tree + and the cache is written to disk. + * LF -> CRLF filter refuses to handle mixed-EOL files * LF -> CRLF filter now runs when * text = auto (with Git for Windows 1.9.4) diff --git a/PROJECTS.md b/PROJECTS.md index d69988759..a2da6598a 100644 --- a/PROJECTS.md +++ b/PROJECTS.md @@ -88,8 +88,6 @@ might make good smaller projects by themselves. * Upgrade internal libxdiff code to latest from core Git * Improve index internals with hashtable lookup for files instead of using binary search every time -* Make the index write the cache out to disk (with tests to gain - confidence that the caching invalidation works correctly) * Tree builder improvements: * Use a hash table when building instead of a list * Extend to allow building a tree hierarchy