From 7132150ddf7a883c1f12a89c2518c1a07c0dc94c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Sat, 14 Nov 2015 23:46:21 +0100 Subject: [PATCH 1/5] tree: avoid advancing over the filename multiple times We've already looked at the filename with `memchr()` and then used `strlen()` to allocate the entry. We already know how much we have to advance to get to the object id, so add the filename length instead of looking at each byte again. --- src/tree.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/tree.c b/src/tree.c index bdd17661b..3504452c2 100644 --- a/src/tree.c +++ b/src/tree.c @@ -414,10 +414,8 @@ int git_tree__parse(void *_tree, git_odb_object *odb_obj) entry->attr = attr; } - while (buffer < buffer_end && *buffer != 0) - buffer++; - - buffer++; + /* Advance to the ID just after the path */ + buffer += entry->filename_len + 1; git_oid_fromraw(&entry->oid, (const unsigned char *)buffer); buffer += GIT_OID_RAWSZ; From ed970748b600313306657de6e5447e5447790766 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Sat, 14 Nov 2015 23:50:06 +0100 Subject: [PATCH 2/5] tree: pool the entry memory allocations These are rather small allocations, so we end up spending a non-trivial amount of time asking the OS for memory. Since these entries are tied to the lifetime of their tree, we can give the tree a pool so we speed up the allocations. --- src/tree.c | 34 +++++++++++++++++++++++++++++----- src/tree.h | 5 ++++- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/src/tree.c b/src/tree.c index 3504452c2..22505d23a 100644 --- a/src/tree.c +++ b/src/tree.c @@ -81,6 +81,30 @@ int git_tree_entry_icmp(const git_tree_entry *e1, const git_tree_entry *e2) git__strncasecmp); } +/** + * Allocate a tree entry, borrowing the filename from the tree which + * owns it. This is useful when reading trees, so we don't allocate a + * ton of small strings but can use the pool. + */ +static git_tree_entry *alloc_entry_pooled(git_pool *pool, const char *filename) +{ + git_tree_entry *entry = NULL; + size_t filename_len = strlen(filename), tree_len; + + if (GIT_ADD_SIZET_OVERFLOW(&tree_len, sizeof(git_tree_entry), filename_len) || + GIT_ADD_SIZET_OVERFLOW(&tree_len, tree_len, 1) || + !(entry = git_pool_malloc(pool, tree_len))) + return NULL; + + memset(entry, 0x0, sizeof(git_tree_entry)); + memcpy(entry->filename, filename, filename_len); + entry->filename[filename_len] = 0; + entry->filename_len = filename_len; + entry->pooled = true; + + return entry; +} + static git_tree_entry *alloc_entry(const char *filename) { git_tree_entry *entry = NULL; @@ -198,7 +222,7 @@ static int tree_key_search( void git_tree_entry_free(git_tree_entry *entry) { - if (entry == NULL) + if (entry == NULL || entry->pooled) return; git__free(entry); @@ -233,6 +257,7 @@ void git_tree__free(void *_tree) git_tree_entry_free(e); git_vector_free(&tree->entries); + git_pool_clear(&tree->pool); git__free(tree); } @@ -385,6 +410,7 @@ int git_tree__parse(void *_tree, git_odb_object *odb_obj) const char *buffer = git_odb_object_data(odb_obj); const char *buffer_end = buffer + git_odb_object_size(odb_obj); + git_pool_init(&tree->pool, 1); if (git_vector_init(&tree->entries, DEFAULT_TREE_SIZE, entry_sort_cmp) < 0) return -1; @@ -403,13 +429,11 @@ int git_tree__parse(void *_tree, git_odb_object *odb_obj) /** Allocate the entry and store it in the entries vector */ { - entry = alloc_entry(buffer); + entry = alloc_entry_pooled(&tree->pool, buffer); GITERR_CHECK_ALLOC(entry); - if (git_vector_insert(&tree->entries, entry) < 0) { - git__free(entry); + if (git_vector_insert(&tree->entries, entry) < 0) return -1; - } entry->attr = attr; } diff --git a/src/tree.h b/src/tree.h index d01b6fd41..51f42ca32 100644 --- a/src/tree.h +++ b/src/tree.h @@ -12,17 +12,20 @@ #include "odb.h" #include "vector.h" #include "strmap.h" +#include "pool.h" struct git_tree_entry { uint16_t attr; git_oid oid; + bool pooled; size_t filename_len; - char filename[1]; + char filename[GIT_FLEX_ARRAY]; }; struct git_tree { git_object object; git_vector entries; + git_pool pool; }; struct git_treebuilder { From 2580077fc2cbb124409bbc6453f2923217ac7957 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Sun, 15 Nov 2015 00:44:02 +0100 Subject: [PATCH 3/5] tree: calculate the filename length once We already know the size due to the `memchr()` so use that information instead of calling `strlen()` on it. --- src/tree.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/tree.c b/src/tree.c index 22505d23a..ff0464f1b 100644 --- a/src/tree.c +++ b/src/tree.c @@ -86,10 +86,10 @@ int git_tree_entry_icmp(const git_tree_entry *e1, const git_tree_entry *e2) * owns it. This is useful when reading trees, so we don't allocate a * ton of small strings but can use the pool. */ -static git_tree_entry *alloc_entry_pooled(git_pool *pool, const char *filename) +static git_tree_entry *alloc_entry_pooled(git_pool *pool, const char *filename, size_t filename_len) { git_tree_entry *entry = NULL; - size_t filename_len = strlen(filename), tree_len; + size_t tree_len; if (GIT_ADD_SIZET_OVERFLOW(&tree_len, sizeof(git_tree_entry), filename_len) || GIT_ADD_SIZET_OVERFLOW(&tree_len, tree_len, 1) || @@ -416,6 +416,8 @@ int git_tree__parse(void *_tree, git_odb_object *odb_obj) while (buffer < buffer_end) { git_tree_entry *entry; + size_t filename_len; + const char *nul; int attr; if (git__strtol32(&attr, buffer, &buffer, 8) < 0 || !buffer) @@ -424,12 +426,13 @@ int git_tree__parse(void *_tree, git_odb_object *odb_obj) if (*buffer++ != ' ') return tree_error("Failed to parse tree. Object is corrupted", NULL); - if (memchr(buffer, 0, buffer_end - buffer) == NULL) + if ((nul = memchr(buffer, 0, buffer_end - buffer)) == NULL) return tree_error("Failed to parse tree. Object is corrupted", NULL); + filename_len = nul - buffer; /** Allocate the entry and store it in the entries vector */ { - entry = alloc_entry_pooled(&tree->pool, buffer); + entry = alloc_entry_pooled(&tree->pool, buffer, filename_len); GITERR_CHECK_ALLOC(entry); if (git_vector_insert(&tree->entries, entry) < 0) @@ -439,7 +442,7 @@ int git_tree__parse(void *_tree, git_odb_object *odb_obj) } /* Advance to the ID just after the path */ - buffer += entry->filename_len + 1; + buffer += filename_len + 1; git_oid_fromraw(&entry->oid, (const unsigned char *)buffer); buffer += GIT_OID_RAWSZ; From ee42bb0e3d6534b8ac4d48df90b1bb85323972ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Sat, 28 Nov 2015 19:18:29 +0100 Subject: [PATCH 4/5] tree: make path len uint16_t and avoid holes This reduces the size of the struct from 32 to 26 bytes, and leaves a single padding byte at the end of the struct (which comes from the zero-length array). --- src/tree.c | 2 +- src/tree.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tree.c b/src/tree.c index ff0464f1b..d269a5333 100644 --- a/src/tree.c +++ b/src/tree.c @@ -125,7 +125,7 @@ static git_tree_entry *alloc_entry(const char *filename) struct tree_key_search { const char *filename; - size_t filename_len; + uint16_t filename_len; }; static int homing_search_cmp(const void *key, const void *array_member) diff --git a/src/tree.h b/src/tree.h index 51f42ca32..914d788c8 100644 --- a/src/tree.h +++ b/src/tree.h @@ -16,9 +16,9 @@ struct git_tree_entry { uint16_t attr; + uint16_t filename_len; git_oid oid; bool pooled; - size_t filename_len; char filename[GIT_FLEX_ARRAY]; }; From 95ae3520c5c9f76a435f63cc2d5e18d7ba0ba171 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Mon, 30 Nov 2015 17:32:18 +0100 Subject: [PATCH 5/5] tree: ensure the entry filename fits in 16 bits Return an error in case the length is too big. Also take this opportunity to have a single allocating function for the size and overflow logic. --- src/tree.c | 50 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/src/tree.c b/src/tree.c index d269a5333..0a32868cd 100644 --- a/src/tree.c +++ b/src/tree.c @@ -82,24 +82,47 @@ int git_tree_entry_icmp(const git_tree_entry *e1, const git_tree_entry *e2) } /** - * Allocate a tree entry, borrowing the filename from the tree which - * owns it. This is useful when reading trees, so we don't allocate a - * ton of small strings but can use the pool. + * Allocate either from the pool or from the system allocator */ -static git_tree_entry *alloc_entry_pooled(git_pool *pool, const char *filename, size_t filename_len) +static git_tree_entry *alloc_entry_base(git_pool *pool, const char *filename, size_t filename_len) { git_tree_entry *entry = NULL; size_t tree_len; + if (filename_len > UINT16_MAX) { + giterr_set(GITERR_INVALID, "tree entry is over UINT16_MAX in length"); + return NULL; + } + if (GIT_ADD_SIZET_OVERFLOW(&tree_len, sizeof(git_tree_entry), filename_len) || - GIT_ADD_SIZET_OVERFLOW(&tree_len, tree_len, 1) || - !(entry = git_pool_malloc(pool, tree_len))) + GIT_ADD_SIZET_OVERFLOW(&tree_len, tree_len, 1)) + return NULL; + + entry = pool ? git_pool_malloc(pool, tree_len) : + git__malloc(tree_len); + if (!entry) return NULL; memset(entry, 0x0, sizeof(git_tree_entry)); memcpy(entry->filename, filename, filename_len); entry->filename[filename_len] = 0; entry->filename_len = filename_len; + + return entry; +} + +/** + * Allocate a tree entry, using the poolin the tree which owns + * it. This is useful when reading trees, so we don't allocate a ton + * of small strings but can use the pool. + */ +static git_tree_entry *alloc_entry_pooled(git_pool *pool, const char *filename, size_t filename_len) +{ + git_tree_entry *entry = NULL; + + if (!(entry = alloc_entry_base(pool, filename, filename_len))) + return NULL; + entry->pooled = true; return entry; @@ -107,20 +130,7 @@ static git_tree_entry *alloc_entry_pooled(git_pool *pool, const char *filename, static git_tree_entry *alloc_entry(const char *filename) { - git_tree_entry *entry = NULL; - size_t filename_len = strlen(filename), tree_len; - - if (GIT_ADD_SIZET_OVERFLOW(&tree_len, sizeof(git_tree_entry), filename_len) || - GIT_ADD_SIZET_OVERFLOW(&tree_len, tree_len, 1) || - !(entry = git__malloc(tree_len))) - return NULL; - - memset(entry, 0x0, sizeof(git_tree_entry)); - memcpy(entry->filename, filename, filename_len); - entry->filename[filename_len] = 0; - entry->filename_len = filename_len; - - return entry; + return alloc_entry_base(NULL, filename, strlen(filename)); } struct tree_key_search {