From 2bc8fa0227d549006a9870620ca1f2e08a0c305e Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Tue, 17 Apr 2012 10:14:24 -0700 Subject: [PATCH 1/8] Implement git_pool paged memory allocator This adds a `git_pool` object that can do simple paged memory allocation with free for the entire pool at once. Using this, you can replace many small allocations with large blocks that can then cheaply be doled out in small pieces. This is best used when you plan to free the small blocks all at once - for example, if they represent the parsed state from a file or data stream that are either all kept or all discarded. There are two real patterns of usage for `git_pools`: either for "string" allocation, where the item size is a single byte and you end up just packing the allocations in together, or for "fixed size" allocation where you are allocating a large object (e.g. a `git_oid`) and you generally just allocation single objects that can be tightly packed. Of course, you can use it for other things, but those two cases are the easiest. --- src/blob.c | 4 +- src/buffer.c | 2 +- src/config_file.c | 10 +- src/errors.c | 2 +- src/filter.c | 2 +- src/global.c | 6 +- src/odb.c | 2 +- src/pool.c | 249 +++++++++++++++++++++++++++++++++++++++++ src/pool.h | 108 ++++++++++++++++++ src/refs.c | 2 +- src/remote.c | 2 +- src/repository.c | 2 +- tests-clar/core/pool.c | 85 ++++++++++++++ 13 files changed, 459 insertions(+), 17 deletions(-) create mode 100644 src/pool.c create mode 100644 src/pool.h create mode 100644 tests-clar/core/pool.c diff --git a/src/blob.c b/src/blob.c index f553de888..36571c70a 100644 --- a/src/blob.c +++ b/src/blob.c @@ -139,12 +139,12 @@ static int write_symlink( read_len = p_readlink(path, link_data, link_size); if (read_len != (ssize_t)link_size) { giterr_set(GITERR_OS, "Failed to create blob. Can't read symlink '%s'", path); - free(link_data); + git__free(link_data); return -1; } error = git_odb_write(oid, odb, (void *)link_data, link_size, GIT_OBJ_BLOB); - free(link_data); + git__free(link_data); return error; } diff --git a/src/buffer.c b/src/buffer.c index c23803564..24a0abdbe 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -159,7 +159,7 @@ int git_buf_printf(git_buf *buf, const char *format, ...) va_end(arglist); if (len < 0) { - free(buf->ptr); + git__free(buf->ptr); buf->ptr = &git_buf__oom; return -1; } diff --git a/src/config_file.c b/src/config_file.c index 5cc15d457..fd634fbca 100644 --- a/src/config_file.c +++ b/src/config_file.c @@ -370,7 +370,7 @@ static int config_set_multivar(git_config_file *cfg, const char *name, const cha result = regcomp(&preg, regexp, REG_EXTENDED); if (result < 0) { - free(key); + git__free(key); giterr_set_regex(&preg, result); return -1; } @@ -380,7 +380,7 @@ static int config_set_multivar(git_config_file *cfg, const char *name, const cha char *tmp = git__strdup(value); GITERR_CHECK_ALLOC(tmp); - free(var->value); + git__free(var->value); var->value = tmp; replaced = 1; } @@ -409,7 +409,7 @@ static int config_set_multivar(git_config_file *cfg, const char *name, const cha result = config_write(b, key, &preg, value); - free(key); + git__free(key); regfree(&preg); return result; @@ -426,7 +426,7 @@ static int config_delete(git_config_file *cfg, const char *name) return -1; var = git_hashtable_lookup(b->values, key); - free(key); + git__free(key); if (var == NULL) return GIT_ENOTFOUND; @@ -1275,7 +1275,7 @@ static int parse_variable(diskfile_backend *cfg, char **var_name, char **var_val char *proc_line = fixup_line(value_start, 0); GITERR_CHECK_ALLOC(proc_line); git_buf_puts(&multi_value, proc_line); - free(proc_line); + git__free(proc_line); if (parse_multiline_variable(cfg, &multi_value, quote_count) < 0 || git_buf_oom(&multi_value)) { git__free(*var_name); git__free(line); diff --git a/src/errors.c b/src/errors.c index aad6c4482..7a6bbd654 100644 --- a/src/errors.c +++ b/src/errors.c @@ -173,7 +173,7 @@ void giterr_set_str(int error_class, const char *string) { git_error *error = &GIT_GLOBAL->error_t; - free(error->message); + git__free(error->message); error->message = git__strdup(string); error->klass = error_class; diff --git a/src/filter.c b/src/filter.c index f0ee1ad39..d2d113409 100644 --- a/src/filter.c +++ b/src/filter.c @@ -111,7 +111,7 @@ void git_filters_free(git_vector *filters) if (filter->do_free != NULL) filter->do_free(filter); else - free(filter); + git__free(filter); } git_vector_free(filters); diff --git a/src/global.c b/src/global.c index b10fabc61..368c6c664 100644 --- a/src/global.c +++ b/src/global.c @@ -62,7 +62,7 @@ git_global_st *git__global_state(void) if ((ptr = TlsGetValue(_tls_index)) != NULL) return ptr; - ptr = malloc(sizeof(git_global_st)); + ptr = git__malloc(sizeof(git_global_st)); if (!ptr) return NULL; @@ -78,7 +78,7 @@ static int _tls_init = 0; static void cb__free_status(void *st) { - free(st); + git__free(st); } void git_threads_init(void) @@ -103,7 +103,7 @@ git_global_st *git__global_state(void) if ((ptr = pthread_getspecific(_tls_key)) != NULL) return ptr; - ptr = malloc(sizeof(git_global_st)); + ptr = git__malloc(sizeof(git_global_st)); if (!ptr) return NULL; diff --git a/src/odb.c b/src/odb.c index b615cc4f4..2538b8a77 100644 --- a/src/odb.c +++ b/src/odb.c @@ -169,7 +169,7 @@ int git_odb__hashlink(git_oid *out, const char *path) } result = git_odb_hash(out, link_data, (size_t)size, GIT_OBJ_BLOB); - free(link_data); + git__free(link_data); } else { int fd = git_futils_open_ro(path); if (fd < 0) diff --git a/src/pool.c b/src/pool.c new file mode 100644 index 000000000..8a611a2dc --- /dev/null +++ b/src/pool.c @@ -0,0 +1,249 @@ +#include "pool.h" +#ifndef GIT_WIN32 +#include +#endif + +struct git_pool_page { + git_pool_page *next; + uint32_t size; + uint32_t avail; + char data[GIT_FLEX_ARRAY]; +}; + +#define GIT_POOL_MIN_USABLE 4 +#define GIT_POOL_MIN_PAGESZ 2 * sizeof(void*) + +static int pool_alloc_page(git_pool *pool, uint32_t size, void **ptr); +static void pool_insert_page(git_pool *pool, git_pool_page *page); + +int git_pool_init( + git_pool *pool, uint32_t item_size, uint32_t items_per_page) +{ + assert(pool); + + if (!item_size) + item_size = 1; + /* round up item_size for decent object alignment */ + if (item_size > 4) + item_size = (item_size + 7) & ~7; + else if (item_size == 3) + item_size = 4; + + if (!items_per_page) { + uint32_t page_bytes = + git_pool__system_page_size() - sizeof(git_pool_page); + items_per_page = page_bytes / item_size; + } + if (item_size * items_per_page < GIT_POOL_MIN_PAGESZ) + items_per_page = (GIT_POOL_MIN_PAGESZ + item_size - 1) / item_size; + + memset(pool, 0, sizeof(git_pool)); + pool->item_size = item_size; + pool->page_size = item_size * items_per_page; + + return 0; +} + +void git_pool_clear(git_pool *pool) +{ + git_pool_page *scan, *next; + + for (scan = pool->open; scan != NULL; scan = next) { + next = scan->next; + git__free(scan); + } + pool->open = NULL; + + for (scan = pool->full; scan != NULL; scan = next) { + next = scan->next; + git__free(scan); + } + pool->full = NULL; + + pool->free_list = NULL; + + pool->has_string_alloc = 0; + pool->has_multi_item_alloc = 0; + pool->has_large_page_alloc = 0; +} + +static void pool_insert_page(git_pool *pool, git_pool_page *page) +{ + git_pool_page *scan; + + /* If there are no open pages or this page has the most open space, + * insert it at the beginning of the list. This is the common case. + */ + if (pool->open == NULL || pool->open->avail < page->avail) { + page->next = pool->open; + pool->open = page; + return; + } + + /* Otherwise insert into sorted position. */ + for (scan = pool->open; + scan->next && scan->next->avail > page->avail; + scan = scan->next); + page->next = scan->next; + scan->next = page; +} + +static int pool_alloc_page( + git_pool *pool, uint32_t size, void **ptr) +{ + git_pool_page *page; + uint32_t alloc_size; + + if (size <= pool->page_size) + alloc_size = pool->page_size; + else { + alloc_size = size; + pool->has_large_page_alloc = 1; + } + + page = git__calloc(1, alloc_size + sizeof(git_pool_page)); + if (!page) + return -1; + + page->size = alloc_size; + page->avail = alloc_size - size; + + if (page->avail > 0) + pool_insert_page(pool, page); + else { + page->next = pool->full; + pool->full = page; + } + + *ptr = page->data; + + return 0; +} + +GIT_INLINE(void) pool_remove_page( + git_pool *pool, git_pool_page *page, git_pool_page *prev) +{ + if (prev == NULL) + pool->open = page->next; + else + prev->next = page->next; +} + +int git_pool_malloc(git_pool *pool, uint32_t items, void **ptr) +{ + git_pool_page *scan = pool->open, *prev; + uint32_t size = items * pool->item_size; + + pool->has_string_alloc = 0; + if (items > 1) + pool->has_multi_item_alloc = 1; + else if (pool->free_list != NULL) { + *ptr = pool->free_list; + pool->free_list = *((void **)pool->free_list); + } + + /* just add a block if there is no open one to accomodate this */ + if (size >= pool->page_size || !scan || scan->avail < size) + return pool_alloc_page(pool, size, ptr); + + /* find smallest block in free list with space */ + for (scan = pool->open, prev = NULL; + scan->next && scan->next->avail >= size; + prev = scan, scan = scan->next); + + /* allocate space from the block */ + *ptr = &scan->data[scan->size - scan->avail]; + scan->avail -= size; + + /* move to full list if there is almost no space left */ + if (scan->avail < pool->item_size || scan->avail < GIT_POOL_MIN_USABLE) { + pool_remove_page(pool, scan, prev); + scan->next = pool->full; + pool->full = scan; + } + /* reorder list if block is now smaller than the one after it */ + else if (scan->next != NULL && scan->next->avail > scan->avail) { + pool_remove_page(pool, scan, prev); + pool_insert_page(pool, scan); + } + + return 0; +} + +char *git_pool_strndup(git_pool *pool, const char *str, size_t n) +{ + void *ptr = NULL; + + assert(pool && str && pool->item_size == sizeof(char)); + + if (!git_pool_malloc(pool, n, &ptr)) + memcpy(ptr, str, n); + pool->has_string_alloc = 1; + + return ptr; +} + +char *git_pool_strdup(git_pool *pool, const char *str) +{ + assert(pool && str && pool->item_size == sizeof(char)); + + return git_pool_strndup(pool, str, strlen(str) + 1); +} + +void git_pool_free(git_pool *pool, void *ptr) +{ + assert(pool && ptr && pool->item_size >= sizeof(void*)); + + *((void **)ptr) = pool->free_list; + pool->free_list = ptr; +} + +uint32_t git_pool__open_pages(git_pool *pool) +{ + uint32_t ct = 0; + git_pool_page *scan; + for (scan = pool->open; scan != NULL; scan = scan->next) ct++; + return ct; +} + +uint32_t git_pool__full_pages(git_pool *pool) +{ + uint32_t ct = 0; + git_pool_page *scan; + for (scan = pool->full; scan != NULL; scan = scan->next) ct++; + return ct; +} + +bool git_pool__ptr_in_pool(git_pool *pool, void *ptr) +{ + git_pool_page *scan; + for (scan = pool->open; scan != NULL; scan = scan->next) + if ( ((void *)scan->data) <= ptr && + (((void *)scan->data) + scan->size) > ptr) + return true; + for (scan = pool->full; scan != NULL; scan = scan->next) + if ( ((void *)scan->data) <= ptr && + (((void *)scan->data) + scan->size) > ptr) + return true; + return false; +} + +uint32_t git_pool__system_page_size(void) +{ + static uint32_t size = 0; + + if (!size) { +#ifdef GIT_WIN32 + SYSTEM_INFO info; + GetSystemInfo(&info); + size = (uint32_t)info.dwPageSize; +#else + size = (uint32_t)sysconf(_SC_PAGE_SIZE); +#endif + + size -= 2 * sizeof(void *); /* allow space for malloc overhead */ + } + + return size; +} + diff --git a/src/pool.h b/src/pool.h new file mode 100644 index 000000000..5f65412a0 --- /dev/null +++ b/src/pool.h @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2012 the libgit2 contributors + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_pool_h__ +#define INCLUDE_pool_h__ + +#include "common.h" + +typedef struct git_pool_page git_pool_page; + +/** + * Chunked allocator. + * + * A `git_pool` can be used when you want to cheaply allocate + * multiple items of the same type and are willing to free them + * all together with a single call. The two most common cases + * are a set of fixed size items (such as lots of OIDs) or a + * bunch of strings. + * + * Internally, a `git_pool` allocates pages of memory and then + * deals out blocks from the trailing unused portion of each page. + * The pages guarantee that the number of actual allocations done + * will be much smaller than the number of items needed. + * + * For examples of how to set up a `git_pool` see `git_pool_init`. + */ +typedef struct { + git_pool_page *open; /* pages with space left */ + git_pool_page *full; /* pages with no space left */ + void *free_list; /* optional: list of freed blocks */ + uint32_t item_size; /* size of single alloc unit in bytes */ + uint32_t page_size; /* size of page in bytes */ + unsigned has_string_alloc : 1; /* was the strdup function used */ + unsigned has_multi_item_alloc : 1; /* was items ever > 1 in malloc */ + unsigned has_large_page_alloc : 1; /* are any pages > page_size */ +} git_pool; + +/** + * Initialize a pool. + * + * To allocation strings, use like this: + * + * git_pool_init(&string_pool, 1, 0); + * my_string = git_pool_strdup(&string_pool, your_string); + * + * To allocate items of fixed size, use like this: + * + * git_pool_init(&pool, sizeof(item), 0); + * git_pool_malloc(&pool, 1, &my_item_ptr); + * + * Of course, you can use this in other ways, but those are the + * two most common patterns. + */ +extern int git_pool_init( + git_pool *pool, uint32_t item_size, uint32_t items_per_page); + +/** + * Free all items in pool + */ +extern void git_pool_clear(git_pool *pool); + +/** + * Allocate space for one or more items from a pool. + */ +extern int git_pool_malloc(git_pool *pool, uint32_t items, void **ptr); + +/** + * Allocate space and duplicate string data into it. + * + * This is allowed only for pools with item_size == sizeof(char) + */ +extern char *git_pool_strndup(git_pool *pool, const char *str, size_t n); + +/** + * Allocate space and duplicate a string into it. + * + * This is allowed only for pools with item_size == sizeof(char) + */ +extern char *git_pool_strdup(git_pool *pool, const char *str); + +/** + * Push a block back onto the free list for the pool. + * + * This is allowed only if the item_size is >= sizeof(void*). + * + * In some cases, it is helpful to "release" an allocated block + * for reuse. Pools don't support a general purpose free, but + * they will keep a simple free blocks linked list provided the + * native block size is large enough to hold a void pointer + */ +extern void git_pool_free(git_pool *pool, void *ptr); + +/* + * Misc utilities + */ + +extern uint32_t git_pool__open_pages(git_pool *pool); + +extern uint32_t git_pool__full_pages(git_pool *pool); + +extern bool git_pool__ptr_in_pool(git_pool *pool, void *ptr); + +extern uint32_t git_pool__system_page_size(void); + +#endif diff --git a/src/refs.c b/src/refs.c index bea1f1724..447f3a7b6 100644 --- a/src/refs.c +++ b/src/refs.c @@ -268,7 +268,7 @@ static int loose_lookup_to_packfile( if (loose_parse_oid(&ref->oid, &ref_file) < 0) { git_buf_free(&ref_file); - free(ref); + git__free(ref); return -1; } diff --git a/src/remote.c b/src/remote.c index b48a23339..54e1146c7 100644 --- a/src/remote.c +++ b/src/remote.c @@ -436,7 +436,7 @@ int git_remote_list(git_strarray *remotes_list, git_repository *repo) size_t i; char *elem; git_vector_foreach(&list, i, elem) { - free(elem); + git__free(elem); } git_vector_free(&list); diff --git a/src/repository.c b/src/repository.c index 88e3a182c..affc0c4c1 100644 --- a/src/repository.c +++ b/src/repository.c @@ -850,7 +850,7 @@ int git_repository_set_workdir(git_repository *repo, const char *workdir) if (git_path_prettify_dir(&path, workdir, NULL) < 0) return -1; - free(repo->workdir); + git__free(repo->workdir); repo->workdir = git_buf_detach(&path); repo->is_bare = 0; diff --git a/tests-clar/core/pool.c b/tests-clar/core/pool.c new file mode 100644 index 000000000..3f1ed8a5a --- /dev/null +++ b/tests-clar/core/pool.c @@ -0,0 +1,85 @@ +#include "clar_libgit2.h" +#include "pool.h" +#include "git2/oid.h" + +void test_core_pool__0(void) +{ + int i; + git_pool p; + void *ptr; + + cl_git_pass(git_pool_init(&p, 1, 4000)); + + for (i = 1; i < 10000; i *= 2) { + cl_git_pass(git_pool_malloc(&p, i, &ptr)); + cl_assert(ptr != NULL); + + cl_assert(git_pool__ptr_in_pool(&p, ptr)); + cl_assert(!git_pool__ptr_in_pool(&p, &i)); + } + + /* 1+2+4+8+16+32+64+128+256+512+1024 -> original block */ + /* 2048 -> 1 block */ + /* 4096 -> 1 block */ + /* 8192 -> 1 block */ + + cl_assert(git_pool__open_pages(&p) + git_pool__full_pages(&p) == 4); + + git_pool_clear(&p); +} + +void test_core_pool__1(void) +{ + int i; + git_pool p; + void *ptr; + + cl_git_pass(git_pool_init(&p, 1, 4000)); + + for (i = 2010; i > 0; i--) + cl_git_pass(git_pool_malloc(&p, i, &ptr)); + + /* with fixed page size, allocation must end up with these values */ + cl_assert(git_pool__open_pages(&p) == 1); + cl_assert(git_pool__full_pages(&p) == 505); + + git_pool_clear(&p); + + cl_git_pass(git_pool_init(&p, 1, 4100)); + + for (i = 2010; i > 0; i--) + cl_git_pass(git_pool_malloc(&p, i, &ptr)); + + /* with fixed page size, allocation must end up with these values */ + cl_assert(git_pool__open_pages(&p) == 1); + cl_assert(git_pool__full_pages(&p) == 492); + + git_pool_clear(&p); +} + +static char to_hex[] = "0123456789abcdef"; + +void test_core_pool__2(void) +{ + git_pool p; + char oid_hex[GIT_OID_HEXSZ]; + git_oid *oid; + int i, j; + + memset(oid_hex, '0', sizeof(oid_hex)); + + cl_git_pass(git_pool_init(&p, sizeof(git_oid), 100)); + + for (i = 1000; i < 10000; i++) { + cl_git_pass(git_pool_malloc(&p, 1, (void **)&oid)); + for (j = 0; j < 8; j++) + oid_hex[j] = to_hex[(i >> (4 * j)) & 0x0f]; + cl_git_pass(git_oid_fromstr(oid, oid_hex)); + } + + /* with fixed page size, allocation must end up with these values */ + cl_assert(git_pool__open_pages(&p) == 0); + cl_assert(git_pool__full_pages(&p) == 90); + + git_pool_clear(&p); +} From 19fa2bc111d50dc2bafb1393b87b5ba119615ae2 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Tue, 17 Apr 2012 15:12:50 -0700 Subject: [PATCH 2/8] Convert attrs and diffs to use string pools This converts the git attr related code (including ignores) and the git diff related code (and implicitly the status code) to use `git_pools` for storing strings. This reduces the number of small blocks allocated dramatically. --- src/attr.c | 42 ++++++++------ src/attr.h | 1 + src/attr_file.c | 64 +++++++++++++-------- src/attr_file.h | 14 +++-- src/diff.c | 121 +++++++++++++++------------------------ src/diff.h | 2 + src/ignore.c | 4 +- src/pool.c | 67 +++++++++++++++++----- src/pool.h | 19 +++++- tests-clar/attr/file.c | 10 ++-- tests-clar/attr/lookup.c | 13 +++-- tests-clar/core/pool.c | 12 ++-- 12 files changed, 216 insertions(+), 153 deletions(-) diff --git a/src/attr.c b/src/attr.c index c02289363..f5d50bb42 100644 --- a/src/attr.c +++ b/src/attr.c @@ -167,6 +167,7 @@ int git_attr_add_macro( { int error; git_attr_rule *macro = NULL; + git_pool *pool; if (git_attr_cache__init(repo) < 0) return -1; @@ -174,13 +175,15 @@ int git_attr_add_macro( macro = git__calloc(1, sizeof(git_attr_rule)); GITERR_CHECK_ALLOC(macro); - macro->match.pattern = git__strdup(name); + pool = &git_repository_attr_cache(repo)->pool; + + macro->match.pattern = git_pool_strdup(pool, name); GITERR_CHECK_ALLOC(macro->match.pattern); macro->match.length = strlen(macro->match.pattern); macro->match.flags = GIT_ATTR_FNMATCH_MACRO; - error = git_attr_assignment__parse(repo, ¯o->assigns, &values); + error = git_attr_assignment__parse(repo, pool, ¯o->assigns, &values); if (!error) error = git_attr_cache__insert_macro(repo, macro); @@ -221,7 +224,7 @@ int git_attr_cache__lookup_or_create_file( return 0; } - if (git_attr_file__new(&file) < 0) + if (git_attr_file__new(&file, &cache->pool) < 0) return -1; if (loader) @@ -384,6 +387,10 @@ int git_attr_cache__init(git_repository *repo) return -1; } + /* allocate string pool */ + if (git_pool_init(&cache->pool, 1, 0) < 0) + return -1; + cache->initialized = 1; /* insert default macros */ @@ -393,30 +400,33 @@ int git_attr_cache__init(git_repository *repo) void git_attr_cache_flush( git_repository *repo) { - git_hashtable *table; + git_attr_cache *cache; if (!repo) return; - if ((table = git_repository_attr_cache(repo)->files) != NULL) { + cache = git_repository_attr_cache(repo); + + if (cache->files != NULL) { git_attr_file *file; - - GIT_HASHTABLE_FOREACH_VALUE(table, file, git_attr_file__free(file)); - git_hashtable_free(table); - - git_repository_attr_cache(repo)->files = NULL; + GIT_HASHTABLE_FOREACH_VALUE( + cache->files, file, git_attr_file__free(file)); + git_hashtable_free(cache->files); + cache->files = NULL; } - if ((table = git_repository_attr_cache(repo)->macros) != NULL) { + if (cache->macros != NULL) { git_attr_rule *rule; - GIT_HASHTABLE_FOREACH_VALUE(table, rule, git_attr_rule__free(rule)); - git_hashtable_free(table); - - git_repository_attr_cache(repo)->macros = NULL; + GIT_HASHTABLE_FOREACH_VALUE( + cache->macros, rule, git_attr_rule__free(rule)); + git_hashtable_free(cache->macros); + cache->macros = NULL; } - git_repository_attr_cache(repo)->initialized = 0; + git_pool_clear(&cache->pool); + + cache->initialized = 0; } int git_attr_cache__insert_macro(git_repository *repo, git_attr_rule *macro) diff --git a/src/attr.h b/src/attr.h index 350c0ebad..825cbfe4e 100644 --- a/src/attr.h +++ b/src/attr.h @@ -14,6 +14,7 @@ typedef struct { int initialized; + git_pool pool; git_hashtable *files; /* hash path to git_attr_file of rules */ git_hashtable *macros; /* hash name to vector */ const char *cfg_attr_file; /* cached value of core.attributesfile */ diff --git a/src/attr_file.c b/src/attr_file.c index b2edce90e..7909c49b4 100644 --- a/src/attr_file.c +++ b/src/attr_file.c @@ -9,21 +9,32 @@ const char *git_attr__false = "[internal]__FALSE__"; static int sort_by_hash_and_name(const void *a_raw, const void *b_raw); static void git_attr_rule__clear(git_attr_rule *rule); -int git_attr_file__new(git_attr_file **attrs_ptr) +int git_attr_file__new(git_attr_file **attrs_ptr, git_pool *pool) { git_attr_file *attrs = NULL; attrs = git__calloc(1, sizeof(git_attr_file)); GITERR_CHECK_ALLOC(attrs); - if (git_vector_init(&attrs->rules, 4, NULL) < 0) { - git__free(attrs); - attrs = NULL; + if (pool) + attrs->pool = pool; + else { + attrs->pool = git__calloc(1, sizeof(git_pool)); + if (!attrs->pool || git_pool_init(attrs->pool, 1, 0) < 0) + goto fail; + attrs->pool_is_allocated = true; } - *attrs_ptr = attrs; + if (git_vector_init(&attrs->rules, 4, NULL) < 0) + goto fail; - return attrs ? 0 : -1; + *attrs_ptr = attrs; + return 0; + +fail: + git_attr_file__free(attrs); + attrs_ptr = NULL; + return -1; } int git_attr_file__set_path( @@ -76,8 +87,10 @@ int git_attr_file__from_buffer( } /* parse the next "pattern attr attr attr" line */ - if (!(error = git_attr_fnmatch__parse(&rule->match, context, &scan)) && - !(error = git_attr_assignment__parse(repo, &rule->assigns, &scan))) + if (!(error = git_attr_fnmatch__parse( + &rule->match, attrs->pool, context, &scan)) && + !(error = git_attr_assignment__parse( + repo, attrs->pool, &rule->assigns, &scan))) { if (rule->match.flags & GIT_ATTR_FNMATCH_MACRO) /* should generate error/warning if this is coming from any @@ -141,12 +154,18 @@ void git_attr_file__free(git_attr_file *file) git__free(file->path); file->path = NULL; + if (file->pool_is_allocated) { + git_pool_clear(file->pool); + git__free(file->pool); + } + file->pool = NULL; + git__free(file); } -unsigned long git_attr_file__name_hash(const char *name) +uint32_t git_attr_file__name_hash(const char *name) { - unsigned long h = 5381; + uint32_t h = 5381; int c; assert(name); while ((c = (int)*name++) != 0) @@ -293,6 +312,7 @@ int git_attr_path__init( */ int git_attr_fnmatch__parse( git_attr_fnmatch *spec, + git_pool *pool, const char *source, const char **base) { @@ -358,7 +378,7 @@ int git_attr_fnmatch__parse( /* given an unrooted fullpath match from a file inside a repo, * prefix the pattern with the relative directory of the source file */ - spec->pattern = git__malloc(sourcelen + spec->length + 1); + spec->pattern = git_pool_malloc(pool, sourcelen + spec->length + 1); if (spec->pattern) { memcpy(spec->pattern, source, sourcelen); memcpy(spec->pattern + sourcelen, pattern, spec->length); @@ -366,7 +386,7 @@ int git_attr_fnmatch__parse( spec->pattern[spec->length] = '\0'; } } else { - spec->pattern = git__strndup(pattern, spec->length); + spec->pattern = git_pool_strndup(pool, pattern, spec->length); } if (!spec->pattern) { @@ -405,14 +425,11 @@ static int sort_by_hash_and_name(const void *a_raw, const void *b_raw) static void git_attr_assignment__free(git_attr_assignment *assign) { - git__free(assign->name); + /* name and value are stored in a git_pool associated with the + * git_attr_file, so they do not need to be freed here + */ assign->name = NULL; - - if (assign->is_allocated) { - git__free((void *)assign->value); - assign->value = NULL; - } - + assign->value = NULL; git__free(assign); } @@ -428,6 +445,7 @@ static int merge_assignments(void **old_raw, void *new_raw) int git_attr_assignment__parse( git_repository *repo, + git_pool *pool, git_vector *assigns, const char **base) { @@ -454,7 +472,6 @@ int git_attr_assignment__parse( assign->name_hash = 5381; assign->value = git_attr__true; - assign->is_allocated = 0; /* look for magic name prefixes */ if (*scan == '-') { @@ -482,7 +499,7 @@ int git_attr_assignment__parse( } /* allocate permanent storage for name */ - assign->name = git__strndup(name_start, scan - name_start); + assign->name = git_pool_strndup(pool, name_start, scan - name_start); GITERR_CHECK_ALLOC(assign->name); /* if there is an equals sign, find the value */ @@ -491,9 +508,8 @@ int git_attr_assignment__parse( /* if we found a value, allocate permanent storage for it */ if (scan > value_start) { - assign->value = git__strndup(value_start, scan - value_start); + assign->value = git_pool_strndup(pool, value_start, scan - value_start); GITERR_CHECK_ALLOC(assign->value); - assign->is_allocated = 1; } } @@ -548,7 +564,7 @@ static void git_attr_rule__clear(git_attr_rule *rule) git_vector_free(&rule->assigns); } - git__free(rule->match.pattern); + /* match.pattern is stored in a git_pool, so no need to free */ rule->match.pattern = NULL; rule->match.length = 0; } diff --git a/src/attr_file.h b/src/attr_file.h index 294033d5e..9788a2295 100644 --- a/src/attr_file.h +++ b/src/attr_file.h @@ -10,6 +10,7 @@ #include "git2/attr.h" #include "vector.h" #include "hashtable.h" +#include "pool.h" #define GIT_ATTR_FILE ".gitattributes" #define GIT_ATTR_FILE_INREPO "info/attributes" @@ -36,20 +37,21 @@ typedef struct { typedef struct { git_refcount unused; const char *name; - unsigned long name_hash; + uint32_t name_hash; } git_attr_name; typedef struct { git_refcount rc; /* for macros */ char *name; - unsigned long name_hash; + uint32_t name_hash; const char *value; - int is_allocated; } git_attr_assignment; typedef struct { char *path; /* cache the path this was loaded from */ git_vector rules; /* vector of or */ + git_pool *pool; + bool pool_is_allocated; } git_attr_file; typedef struct { @@ -62,7 +64,7 @@ typedef struct { * git_attr_file API */ -extern int git_attr_file__new(git_attr_file **attrs_ptr); +extern int git_attr_file__new(git_attr_file **attrs_ptr, git_pool *pool); extern void git_attr_file__free(git_attr_file *file); extern int git_attr_file__from_buffer( @@ -84,7 +86,7 @@ extern int git_attr_file__lookup_one( git_vector_rforeach(&(file)->rules, (iter), (rule)) \ if (git_attr_rule__match((rule), (path))) -extern unsigned long git_attr_file__name_hash(const char *name); +extern uint32_t git_attr_file__name_hash(const char *name); /* @@ -93,6 +95,7 @@ extern unsigned long git_attr_file__name_hash(const char *name); extern int git_attr_fnmatch__parse( git_attr_fnmatch *spec, + git_pool *pool, const char *source, const char **base); @@ -114,6 +117,7 @@ extern int git_attr_path__init( extern int git_attr_assignment__parse( git_repository *repo, /* needed to expand macros */ + git_pool *pool, git_vector *assigns, const char **scan); diff --git a/src/diff.c b/src/diff.c index c6a0088ec..7d2ad59aa 100644 --- a/src/diff.c +++ b/src/diff.c @@ -54,24 +54,6 @@ static bool diff_path_matches_pathspec(git_diff_list *diff, const char *path) return false; } -static void diff_delta__free(git_diff_delta *delta) -{ - if (!delta) - return; - - if (delta->new.flags & GIT_DIFF_FILE_FREE_PATH) { - git__free((char *)delta->new.path); - delta->new.path = NULL; - } - - if (delta->old.flags & GIT_DIFF_FILE_FREE_PATH) { - git__free((char *)delta->old.path); - delta->old.path = NULL; - } - - git__free(delta); -} - static git_diff_delta *diff_delta__alloc( git_diff_list *diff, git_delta_t status, @@ -81,12 +63,11 @@ static git_diff_delta *diff_delta__alloc( if (!delta) return NULL; - delta->old.path = git__strdup(path); + delta->old.path = git_pool_strdup(&diff->pool, path); if (delta->old.path == NULL) { git__free(delta); return NULL; } - delta->old.flags |= GIT_DIFF_FILE_FREE_PATH; delta->new.path = delta->old.path; if (diff->opts.flags & GIT_DIFF_REVERSE) { @@ -101,7 +82,8 @@ static git_diff_delta *diff_delta__alloc( return delta; } -static git_diff_delta *diff_delta__dup(const git_diff_delta *d) +static git_diff_delta *diff_delta__dup( + const git_diff_delta *d, git_pool *pool) { git_diff_delta *delta = git__malloc(sizeof(git_diff_delta)); if (!delta) @@ -109,33 +91,29 @@ static git_diff_delta *diff_delta__dup(const git_diff_delta *d) memcpy(delta, d, sizeof(git_diff_delta)); - delta->old.path = git__strdup(d->old.path); - if (delta->old.path == NULL) { - git__free(delta); - return NULL; - } - delta->old.flags |= GIT_DIFF_FILE_FREE_PATH; + delta->old.path = git_pool_strdup(pool, d->old.path); + if (delta->old.path == NULL) + goto fail; if (d->new.path != d->old.path) { - delta->new.path = git__strdup(d->new.path); - if (delta->new.path == NULL) { - git__free(delta->old.path); - git__free(delta); - return NULL; - } - delta->new.flags |= GIT_DIFF_FILE_FREE_PATH; + delta->new.path = git_pool_strdup(pool, d->new.path); + if (delta->new.path == NULL) + goto fail; } else { delta->new.path = delta->old.path; - delta->new.flags &= ~GIT_DIFF_FILE_FREE_PATH; } return delta; + +fail: + git__free(delta); + return NULL; } static git_diff_delta *diff_delta__merge_like_cgit( - const git_diff_delta *a, const git_diff_delta *b) + const git_diff_delta *a, const git_diff_delta *b, git_pool *pool) { - git_diff_delta *dup = diff_delta__dup(a); + git_diff_delta *dup = diff_delta__dup(a, pool); if (!dup) return NULL; @@ -146,9 +124,7 @@ static git_diff_delta *diff_delta__merge_like_cgit( dup->new.mode = b->new.mode; dup->new.size = b->new.size; - dup->new.flags = - (dup->new.flags & GIT_DIFF_FILE_FREE_PATH) | - (b->new.flags & ~GIT_DIFF_FILE_FREE_PATH); + dup->new.flags = b->new.flags; /* Emulate C git for merging two diffs (a la 'git diff '). * @@ -210,7 +186,7 @@ static int diff_delta__from_one( delta->new.flags |= GIT_DIFF_FILE_VALID_OID; if (git_vector_insert(&diff->deltas, delta) < 0) { - diff_delta__free(delta); + git__free(delta); return -1; } @@ -249,7 +225,7 @@ static int diff_delta__from_two( delta->new.flags |= GIT_DIFF_FILE_VALID_OID; if (git_vector_insert(&diff->deltas, delta) < 0) { - diff_delta__free(delta); + git__free(delta); return -1; } @@ -259,19 +235,15 @@ static int diff_delta__from_two( #define DIFF_SRC_PREFIX_DEFAULT "a/" #define DIFF_DST_PREFIX_DEFAULT "b/" -static char *diff_strdup_prefix(const char *prefix) +static char *diff_strdup_prefix(git_pool *pool, const char *prefix) { size_t len = strlen(prefix); - char *str = git__malloc(len + 2); - if (str != NULL) { - memcpy(str, prefix, len + 1); - /* append '/' at end if needed */ - if (len > 0 && str[len - 1] != '/') { - str[len] = '/'; - str[len + 1] = '\0'; - } - } - return str; + + /* append '/' at end if needed */ + if (len > 0 && prefix[len - 1] != '/') + return git_pool_strcat(pool, prefix, "/"); + else + return git_pool_strndup(pool, prefix, len + 1); } static int diff_delta__cmp(const void *a, const void *b) @@ -300,6 +272,10 @@ static git_diff_list *git_diff_list_alloc( diff->repo = repo; + if (git_vector_init(&diff->deltas, 0, diff_delta__cmp) < 0 || + git_pool_init(&diff->pool, 1, 0) < 0) + goto fail; + /* load config values that affect diff behavior */ if (git_repository_config__weakptr(&cfg, repo) < 0) goto fail; @@ -319,9 +295,9 @@ static git_diff_list *git_diff_list_alloc( memcpy(&diff->opts, opts, sizeof(git_diff_options)); memset(&diff->opts.pathspec, 0, sizeof(diff->opts.pathspec)); - diff->opts.src_prefix = diff_strdup_prefix( + diff->opts.src_prefix = diff_strdup_prefix(&diff->pool, opts->src_prefix ? opts->src_prefix : DIFF_SRC_PREFIX_DEFAULT); - diff->opts.dst_prefix = diff_strdup_prefix( + diff->opts.dst_prefix = diff_strdup_prefix(&diff->pool, opts->dst_prefix ? opts->dst_prefix : DIFF_DST_PREFIX_DEFAULT); if (!diff->opts.src_prefix || !diff->opts.dst_prefix) @@ -333,9 +309,6 @@ static git_diff_list *git_diff_list_alloc( diff->opts.dst_prefix = swap; } - if (git_vector_init(&diff->deltas, 0, diff_delta__cmp) < 0) - goto fail; - /* only copy pathspec if it is "interesting" so we can test * diff->pathspec.length > 0 to know if it is worth calling * fnmatch as we iterate. @@ -349,11 +322,10 @@ static git_diff_list *git_diff_list_alloc( for (i = 0; i < opts->pathspec.count; ++i) { int ret; const char *pattern = opts->pathspec.strings[i]; - git_attr_fnmatch *match = - git__calloc(1, sizeof(git_attr_fnmatch)); + git_attr_fnmatch *match = git__calloc(1, sizeof(git_attr_fnmatch)); if (!match) goto fail; - ret = git_attr_fnmatch__parse(match, NULL, &pattern); + ret = git_attr_fnmatch__parse(match, &diff->pool, NULL, &pattern); if (ret == GIT_ENOTFOUND) { git__free(match); continue; @@ -381,23 +353,18 @@ void git_diff_list_free(git_diff_list *diff) return; git_vector_foreach(&diff->deltas, i, delta) { - diff_delta__free(delta); + git__free(delta); diff->deltas.contents[i] = NULL; } git_vector_free(&diff->deltas); git_vector_foreach(&diff->pathspec, i, match) { - if (match != NULL) { - git__free(match->pattern); - match->pattern = NULL; - git__free(match); - diff->pathspec.contents[i] = NULL; - } + git__free(match); + diff->pathspec.contents[i] = NULL; } git_vector_free(&diff->pathspec); - git__free(diff->opts.src_prefix); - git__free(diff->opts.dst_prefix); + git_pool_clear(&diff->pool); git__free(diff); } @@ -709,6 +676,7 @@ int git_diff_merge( const git_diff_list *from) { int error = 0; + git_pool onto_pool; git_vector onto_new; git_diff_delta *delta; unsigned int i, j; @@ -718,7 +686,8 @@ int git_diff_merge( if (!from->deltas.length) return 0; - if (git_vector_init(&onto_new, onto->deltas.length, diff_delta__cmp) < 0) + if (git_vector_init(&onto_new, onto->deltas.length, diff_delta__cmp) < 0 || + git_pool_init(&onto_pool, 1, 0) < 0) return -1; for (i = 0, j = 0; i < onto->deltas.length || j < from->deltas.length; ) { @@ -727,13 +696,13 @@ int git_diff_merge( int cmp = !f ? -1 : !o ? 1 : strcmp(o->old.path, f->old.path); if (cmp < 0) { - delta = diff_delta__dup(o); + delta = diff_delta__dup(o, &onto_pool); i++; } else if (cmp > 0) { - delta = diff_delta__dup(f); + delta = diff_delta__dup(f, &onto_pool); j++; } else { - delta = diff_delta__merge_like_cgit(o, f); + delta = diff_delta__merge_like_cgit(o, f, &onto_pool); i++; j++; } @@ -744,12 +713,14 @@ int git_diff_merge( if (!error) { git_vector_swap(&onto->deltas, &onto_new); + git_pool_swap(&onto->pool, &onto_pool); onto->new_src = from->new_src; } git_vector_foreach(&onto_new, i, delta) - diff_delta__free(delta); + git__free(delta); git_vector_free(&onto_new); + git_pool_clear(&onto_pool); return error; } diff --git a/src/diff.h b/src/diff.h index 9da07c295..4de18beea 100644 --- a/src/diff.h +++ b/src/diff.h @@ -12,6 +12,7 @@ #include "buffer.h" #include "iterator.h" #include "repository.h" +#include "pool.h" enum { GIT_DIFFCAPS_HAS_SYMLINKS = (1 << 0), /* symlinks on platform? */ @@ -26,6 +27,7 @@ struct git_diff_list { git_diff_options opts; git_vector pathspec; git_vector deltas; /* vector of git_diff_file_delta */ + git_pool pool; git_iterator_type_t old_src; git_iterator_type_t new_src; uint32_t diffcaps; diff --git a/src/ignore.c b/src/ignore.c index 1827eda82..165754b4d 100644 --- a/src/ignore.c +++ b/src/ignore.c @@ -35,7 +35,9 @@ static int load_ignore_file( GITERR_CHECK_ALLOC(match); } - if (!(error = git_attr_fnmatch__parse(match, context, &scan))) { + if (!(error = git_attr_fnmatch__parse( + match, ignores->pool, context, &scan))) + { match->flags = match->flags | GIT_ATTR_FNMATCH_IGNORE; scan = git__next_line(scan); error = git_vector_insert(&ignores->rules, match); diff --git a/src/pool.c b/src/pool.c index 8a611a2dc..2e64bde4a 100644 --- a/src/pool.c +++ b/src/pool.c @@ -13,7 +13,7 @@ struct git_pool_page { #define GIT_POOL_MIN_USABLE 4 #define GIT_POOL_MIN_PAGESZ 2 * sizeof(void*) -static int pool_alloc_page(git_pool *pool, uint32_t size, void **ptr); +static void *pool_alloc_page(git_pool *pool, uint32_t size); static void pool_insert_page(git_pool *pool, git_pool_page *page); int git_pool_init( @@ -62,11 +62,25 @@ void git_pool_clear(git_pool *pool) pool->free_list = NULL; + pool->items = 0; + pool->has_string_alloc = 0; pool->has_multi_item_alloc = 0; pool->has_large_page_alloc = 0; } +void git_pool_swap(git_pool *a, git_pool *b) +{ + git_pool temp; + + if (a == b) + return; + + memcpy(&temp, a, sizeof(temp)); + memcpy(a, b, sizeof(temp)); + memcpy(b, &temp, sizeof(temp)); +} + static void pool_insert_page(git_pool *pool, git_pool_page *page) { git_pool_page *scan; @@ -88,8 +102,7 @@ static void pool_insert_page(git_pool *pool, git_pool_page *page) scan->next = page; } -static int pool_alloc_page( - git_pool *pool, uint32_t size, void **ptr) +static void *pool_alloc_page(git_pool *pool, uint32_t size) { git_pool_page *page; uint32_t alloc_size; @@ -103,7 +116,7 @@ static int pool_alloc_page( page = git__calloc(1, alloc_size + sizeof(git_pool_page)); if (!page) - return -1; + return NULL; page->size = alloc_size; page->avail = alloc_size - size; @@ -115,9 +128,9 @@ static int pool_alloc_page( pool->full = page; } - *ptr = page->data; + pool->items++; - return 0; + return page->data; } GIT_INLINE(void) pool_remove_page( @@ -129,22 +142,26 @@ GIT_INLINE(void) pool_remove_page( prev->next = page->next; } -int git_pool_malloc(git_pool *pool, uint32_t items, void **ptr) +void *git_pool_malloc(git_pool *pool, uint32_t items) { git_pool_page *scan = pool->open, *prev; uint32_t size = items * pool->item_size; + void *ptr = NULL; pool->has_string_alloc = 0; if (items > 1) pool->has_multi_item_alloc = 1; else if (pool->free_list != NULL) { - *ptr = pool->free_list; + ptr = pool->free_list; pool->free_list = *((void **)pool->free_list); + return ptr; } /* just add a block if there is no open one to accomodate this */ if (size >= pool->page_size || !scan || scan->avail < size) - return pool_alloc_page(pool, size, ptr); + return pool_alloc_page(pool, size); + + pool->items++; /* find smallest block in free list with space */ for (scan = pool->open, prev = NULL; @@ -152,7 +169,7 @@ int git_pool_malloc(git_pool *pool, uint32_t items, void **ptr) prev = scan, scan = scan->next); /* allocate space from the block */ - *ptr = &scan->data[scan->size - scan->avail]; + ptr = &scan->data[scan->size - scan->avail]; scan->avail -= size; /* move to full list if there is almost no space left */ @@ -167,7 +184,7 @@ int git_pool_malloc(git_pool *pool, uint32_t items, void **ptr) pool_insert_page(pool, scan); } - return 0; + return ptr; } char *git_pool_strndup(git_pool *pool, const char *str, size_t n) @@ -176,8 +193,10 @@ char *git_pool_strndup(git_pool *pool, const char *str, size_t n) assert(pool && str && pool->item_size == sizeof(char)); - if (!git_pool_malloc(pool, n, &ptr)) + if ((ptr = git_pool_malloc(pool, n + 1)) != NULL) { memcpy(ptr, str, n); + *(((char *)ptr) + n) = '\0'; + } pool->has_string_alloc = 1; return ptr; @@ -187,7 +206,29 @@ char *git_pool_strdup(git_pool *pool, const char *str) { assert(pool && str && pool->item_size == sizeof(char)); - return git_pool_strndup(pool, str, strlen(str) + 1); + return git_pool_strndup(pool, str, strlen(str)); +} + +char *git_pool_strcat(git_pool *pool, const char *a, const char *b) +{ + void *ptr; + size_t len_a, len_b; + + assert(pool && a && b && pool->item_size == sizeof(char)); + + len_a = a ? strlen(a) : 0; + len_b = b ? strlen(b) : 0; + + if ((ptr = git_pool_malloc(pool, len_a + len_b + 1)) != NULL) { + if (len_a) + memcpy(ptr, a, len_a); + if (len_b) + memcpy(((char *)ptr) + len_a, b, len_b); + *(((char *)ptr) + len_a + len_b) = '\0'; + } + pool->has_string_alloc = 1; + + return ptr; } void git_pool_free(git_pool *pool, void *ptr) diff --git a/src/pool.h b/src/pool.h index 5f65412a0..a92589087 100644 --- a/src/pool.h +++ b/src/pool.h @@ -33,11 +33,14 @@ typedef struct { void *free_list; /* optional: list of freed blocks */ uint32_t item_size; /* size of single alloc unit in bytes */ uint32_t page_size; /* size of page in bytes */ + uint32_t items; unsigned has_string_alloc : 1; /* was the strdup function used */ unsigned has_multi_item_alloc : 1; /* was items ever > 1 in malloc */ unsigned has_large_page_alloc : 1; /* are any pages > page_size */ } git_pool; +#define GIT_POOL_INIT_STRINGPOOL { 0, 0, 0, 1, 4000, 0, 0, 0, 0 } + /** * Initialize a pool. * @@ -49,7 +52,7 @@ typedef struct { * To allocate items of fixed size, use like this: * * git_pool_init(&pool, sizeof(item), 0); - * git_pool_malloc(&pool, 1, &my_item_ptr); + * my_item = git_pool_malloc(&pool, 1); * * Of course, you can use this in other ways, but those are the * two most common patterns. @@ -62,10 +65,15 @@ extern int git_pool_init( */ extern void git_pool_clear(git_pool *pool); +/** + * Swap two pools with one another + */ +extern void git_pool_swap(git_pool *a, git_pool *b); + /** * Allocate space for one or more items from a pool. */ -extern int git_pool_malloc(git_pool *pool, uint32_t items, void **ptr); +extern void *git_pool_malloc(git_pool *pool, uint32_t items); /** * Allocate space and duplicate string data into it. @@ -81,6 +89,13 @@ extern char *git_pool_strndup(git_pool *pool, const char *str, size_t n); */ extern char *git_pool_strdup(git_pool *pool, const char *str); +/** + * Allocate space for the concatenation of two strings. + * + * This is allowed only for pools with item_size == sizeof(char) + */ +extern char *git_pool_strcat(git_pool *pool, const char *a, const char *b); + /** * Push a block back onto the free list for the pool. * diff --git a/tests-clar/attr/file.c b/tests-clar/attr/file.c index 7fede5025..4e1010230 100644 --- a/tests-clar/attr/file.c +++ b/tests-clar/attr/file.c @@ -11,7 +11,7 @@ void test_attr_file__simple_read(void) git_attr_assignment *assign; git_attr_rule *rule; - cl_git_pass(git_attr_file__new(&file)); + cl_git_pass(git_attr_file__new(&file, NULL)); cl_git_pass(git_attr_file__from_file(NULL, cl_fixture("attr/attr0"), file)); cl_assert_equal_s(cl_fixture("attr/attr0"), file->path); cl_assert(file->rules.length == 1); @@ -27,7 +27,6 @@ void test_attr_file__simple_read(void) cl_assert(assign != NULL); cl_assert_equal_s("binary", assign->name); cl_assert(GIT_ATTR_TRUE(assign->value)); - cl_assert(!assign->is_allocated); git_attr_file__free(file); } @@ -38,7 +37,7 @@ void test_attr_file__match_variants(void) git_attr_rule *rule; git_attr_assignment *assign; - cl_git_pass(git_attr_file__new(&file)); + cl_git_pass(git_attr_file__new(&file, NULL)); cl_git_pass(git_attr_file__from_file(NULL, cl_fixture("attr/attr1"), file)); cl_assert_equal_s(cl_fixture("attr/attr1"), file->path); cl_assert(file->rules.length == 10); @@ -56,7 +55,6 @@ void test_attr_file__match_variants(void) cl_assert_equal_s("attr0", assign->name); cl_assert(assign->name_hash == git_attr_file__name_hash(assign->name)); cl_assert(GIT_ATTR_TRUE(assign->value)); - cl_assert(!assign->is_allocated); rule = get_rule(1); cl_assert_equal_s("pat1", rule->match.pattern); @@ -125,7 +123,7 @@ void test_attr_file__assign_variants(void) git_attr_rule *rule; git_attr_assignment *assign; - cl_git_pass(git_attr_file__new(&file)); + cl_git_pass(git_attr_file__new(&file, NULL)); cl_git_pass(git_attr_file__from_file(NULL, cl_fixture("attr/attr2"), file)); cl_assert_equal_s(cl_fixture("attr/attr2"), file->path); cl_assert(file->rules.length == 11); @@ -191,7 +189,7 @@ void test_attr_file__check_attr_examples(void) git_attr_rule *rule; git_attr_assignment *assign; - cl_git_pass(git_attr_file__new(&file)); + cl_git_pass(git_attr_file__new(&file, NULL)); cl_git_pass(git_attr_file__from_file(NULL, cl_fixture("attr/attr3"), file)); cl_assert_equal_s(cl_fixture("attr/attr3"), file->path); cl_assert(file->rules.length == 3); diff --git a/tests-clar/attr/lookup.c b/tests-clar/attr/lookup.c index 4ce80e947..accd617e6 100644 --- a/tests-clar/attr/lookup.c +++ b/tests-clar/attr/lookup.c @@ -9,7 +9,7 @@ void test_attr_lookup__simple(void) git_attr_path path; const char *value = NULL; - cl_git_pass(git_attr_file__new(&file)); + cl_git_pass(git_attr_file__new(&file, NULL)); cl_git_pass(git_attr_file__from_file(NULL, cl_fixture("attr/attr0"), file)); cl_assert_equal_s(cl_fixture("attr/attr0"), file->path); cl_assert(file->rules.length == 1); @@ -127,7 +127,7 @@ void test_attr_lookup__match_variants(void) { NULL, NULL, 0, NULL } }; - cl_git_pass(git_attr_file__new(&file)); + cl_git_pass(git_attr_file__new(&file, NULL)); cl_git_pass(git_attr_file__from_file(NULL, cl_fixture("attr/attr1"), file)); cl_assert_equal_s(cl_fixture("attr/attr1"), file->path); cl_assert(file->rules.length == 10); @@ -144,6 +144,7 @@ void test_attr_lookup__match_variants(void) void test_attr_lookup__assign_variants(void) { git_attr_file *file; + struct attr_expected cases[] = { /* pat0 -> simple assign */ { "pat0", "simple", EXPECT_TRUE, NULL }, @@ -187,7 +188,7 @@ void test_attr_lookup__assign_variants(void) { NULL, NULL, 0, NULL } }; - cl_git_pass(git_attr_file__new(&file)); + cl_git_pass(git_attr_file__new(&file, NULL)); cl_git_pass(git_attr_file__from_file(NULL, cl_fixture("attr/attr2"), file)); cl_assert(file->rules.length == 11); @@ -199,6 +200,7 @@ void test_attr_lookup__assign_variants(void) void test_attr_lookup__check_attr_examples(void) { git_attr_file *file; + struct attr_expected cases[] = { { "foo.java", "diff", EXPECT_STRING, "java" }, { "foo.java", "crlf", EXPECT_FALSE, NULL }, @@ -222,7 +224,7 @@ void test_attr_lookup__check_attr_examples(void) { NULL, NULL, 0, NULL } }; - cl_git_pass(git_attr_file__new(&file)); + cl_git_pass(git_attr_file__new(&file, NULL)); cl_git_pass(git_attr_file__from_file(NULL, cl_fixture("attr/attr3"), file)); cl_assert(file->rules.length == 3); @@ -234,6 +236,7 @@ void test_attr_lookup__check_attr_examples(void) void test_attr_lookup__from_buffer(void) { git_attr_file *file; + struct attr_expected cases[] = { { "abc", "foo", EXPECT_TRUE, NULL }, { "abc", "bar", EXPECT_TRUE, NULL }, @@ -247,7 +250,7 @@ void test_attr_lookup__from_buffer(void) { NULL, NULL, 0, NULL } }; - cl_git_pass(git_attr_file__new(&file)); + cl_git_pass(git_attr_file__new(&file, NULL)); cl_git_pass(git_attr_file__from_buffer(NULL, "a* foo\nabc bar\n* baz", file)); cl_assert(file->rules.length == 3); diff --git a/tests-clar/core/pool.c b/tests-clar/core/pool.c index 3f1ed8a5a..5ed97366f 100644 --- a/tests-clar/core/pool.c +++ b/tests-clar/core/pool.c @@ -11,9 +11,8 @@ void test_core_pool__0(void) cl_git_pass(git_pool_init(&p, 1, 4000)); for (i = 1; i < 10000; i *= 2) { - cl_git_pass(git_pool_malloc(&p, i, &ptr)); + ptr = git_pool_malloc(&p, i); cl_assert(ptr != NULL); - cl_assert(git_pool__ptr_in_pool(&p, ptr)); cl_assert(!git_pool__ptr_in_pool(&p, &i)); } @@ -32,12 +31,11 @@ void test_core_pool__1(void) { int i; git_pool p; - void *ptr; cl_git_pass(git_pool_init(&p, 1, 4000)); for (i = 2010; i > 0; i--) - cl_git_pass(git_pool_malloc(&p, i, &ptr)); + cl_assert(git_pool_malloc(&p, i) != NULL); /* with fixed page size, allocation must end up with these values */ cl_assert(git_pool__open_pages(&p) == 1); @@ -48,7 +46,7 @@ void test_core_pool__1(void) cl_git_pass(git_pool_init(&p, 1, 4100)); for (i = 2010; i > 0; i--) - cl_git_pass(git_pool_malloc(&p, i, &ptr)); + cl_assert(git_pool_malloc(&p, i) != NULL); /* with fixed page size, allocation must end up with these values */ cl_assert(git_pool__open_pages(&p) == 1); @@ -71,7 +69,9 @@ void test_core_pool__2(void) cl_git_pass(git_pool_init(&p, sizeof(git_oid), 100)); for (i = 1000; i < 10000; i++) { - cl_git_pass(git_pool_malloc(&p, 1, (void **)&oid)); + oid = git_pool_malloc(&p, 1); + cl_assert(oid != NULL); + for (j = 0; j < 8; j++) oid_hex[j] = to_hex[(i >> (4 * j)) & 0x0f]; cl_git_pass(git_oid_fromstr(oid, oid_hex)); From da3b391c32b973d5c073951b6848eedd40434e5e Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Wed, 18 Apr 2012 10:57:08 -0700 Subject: [PATCH 3/8] Convert revwalk to use git_pool This removes the custom paged allocator from revwalk and replaces it with a `git_pool`. --- src/pool.c | 14 +++++++---- src/pool.h | 2 ++ src/revwalk.c | 69 ++++++++++++--------------------------------------- 3 files changed, 27 insertions(+), 58 deletions(-) diff --git a/src/pool.c b/src/pool.c index 2e64bde4a..8f5c7e75a 100644 --- a/src/pool.c +++ b/src/pool.c @@ -29,11 +29,8 @@ int git_pool_init( else if (item_size == 3) item_size = 4; - if (!items_per_page) { - uint32_t page_bytes = - git_pool__system_page_size() - sizeof(git_pool_page); - items_per_page = page_bytes / item_size; - } + if (!items_per_page) + items_per_page = git_pool__suggest_items_per_page(item_size); if (item_size * items_per_page < GIT_POOL_MIN_PAGESZ) items_per_page = (GIT_POOL_MIN_PAGESZ + item_size - 1) / item_size; @@ -288,3 +285,10 @@ uint32_t git_pool__system_page_size(void) return size; } +uint32_t git_pool__suggest_items_per_page(uint32_t item_size) +{ + uint32_t page_bytes = + git_pool__system_page_size() - sizeof(git_pool_page); + return page_bytes / item_size; +} + diff --git a/src/pool.h b/src/pool.h index a92589087..54a2861ed 100644 --- a/src/pool.h +++ b/src/pool.h @@ -120,4 +120,6 @@ extern bool git_pool__ptr_in_pool(git_pool *pool, void *ptr); extern uint32_t git_pool__system_page_size(void); +extern uint32_t git_pool__suggest_items_per_page(uint32_t item_size); + #endif diff --git a/src/revwalk.c b/src/revwalk.c index a62576038..557966b94 100644 --- a/src/revwalk.c +++ b/src/revwalk.c @@ -10,6 +10,7 @@ #include "odb.h" #include "hashtable.h" #include "pqueue.h" +#include "pool.h" #include "git2/revwalk.h" #include "git2/merge.h" @@ -46,6 +47,7 @@ struct git_revwalk { git_odb *odb; git_hashtable *commits; + git_pool commit_pool; commit_list *iterator_topo; commit_list *iterator_rand; @@ -55,9 +57,6 @@ struct git_revwalk { int (*get_next)(commit_object **, git_revwalk *); int (*enqueue)(git_revwalk *, commit_object *); - git_vector memory_alloc; - size_t chunk_size; - unsigned walking:1; unsigned int sorting; @@ -133,42 +132,23 @@ static uint32_t object_table_hash(const void *key, int hash_id) return r; } -#define COMMITS_PER_CHUNK 128 -#define CHUNK_STEP 64 -#define PARENTS_PER_COMMIT ((CHUNK_STEP - sizeof(commit_object)) / sizeof(commit_object *)) - -static int alloc_chunk(git_revwalk *walk) -{ - void *chunk; - - chunk = git__calloc(COMMITS_PER_CHUNK, CHUNK_STEP); - GITERR_CHECK_ALLOC(chunk); - - walk->chunk_size = 0; - return git_vector_insert(&walk->memory_alloc, chunk); -} +#define PARENTS_PER_COMMIT 2 +#define COMMIT_ALLOC \ + (sizeof(commit_object) + PARENTS_PER_COMMIT * sizeof(commit_object *)) static commit_object *alloc_commit(git_revwalk *walk) { - unsigned char *chunk; - - if (walk->chunk_size == COMMITS_PER_CHUNK) - if (alloc_chunk(walk) < 0) - return NULL; - - chunk = git_vector_get(&walk->memory_alloc, walk->memory_alloc.length - 1); - chunk += (walk->chunk_size * CHUNK_STEP); - walk->chunk_size++; - - return (commit_object *)chunk; + return (commit_object *)git_pool_malloc(&walk->commit_pool, COMMIT_ALLOC); } -static commit_object **alloc_parents(commit_object *commit, size_t n_parents) +static commit_object **alloc_parents( + git_revwalk *walk, commit_object *commit, size_t n_parents) { if (n_parents <= PARENTS_PER_COMMIT) - return (commit_object **)((unsigned char *)commit + sizeof(commit_object)); + return (commit_object **)((char *)commit + sizeof(commit_object)); - return git__malloc(n_parents * sizeof(commit_object *)); + return (commit_object **)git_pool_malloc( + &walk->commit_pool, n_parents * sizeof(commit_object *)); } @@ -185,10 +165,8 @@ static commit_object *commit_lookup(git_revwalk *walk, const git_oid *oid) git_oid_cpy(&commit->oid, oid); - if (git_hashtable_insert(walk->commits, &commit->oid, commit) < 0) { - git__free(commit); + if (git_hashtable_insert(walk->commits, &commit->oid, commit) < 0) return NULL; - } return commit; } @@ -212,7 +190,7 @@ static int commit_quick_parse(git_revwalk *walk, commit_object *commit, git_rawo buffer += parent_len; } - commit->parents = alloc_parents(commit, parents); + commit->parents = alloc_parents(walk, commit, parents); GITERR_CHECK_ALLOC(commit->parents); buffer = parents_start; @@ -756,9 +734,9 @@ int git_revwalk_new(git_revwalk **revwalk_out, git_repository *repo) GITERR_CHECK_ALLOC(walk->commits); if (git_pqueue_init(&walk->iterator_time, 8, commit_time_cmp) < 0 || - git_vector_init(&walk->memory_alloc, 8, NULL) < 0 || git_vector_init(&walk->twos, 4, NULL) < 0 || - alloc_chunk(walk) < 0) + git_pool_init(&walk->commit_pool, 1, + git_pool__suggest_items_per_page(COMMIT_ALLOC) * COMMIT_ALLOC) < 0) return -1; walk->get_next = &revwalk_next_unsorted; @@ -777,30 +755,15 @@ int git_revwalk_new(git_revwalk **revwalk_out, git_repository *repo) void git_revwalk_free(git_revwalk *walk) { - unsigned int i; - commit_object *commit; - if (walk == NULL) return; git_revwalk_reset(walk); git_odb_free(walk->odb); - /* if the parent has more than PARENTS_PER_COMMIT parents, - * we had to allocate a separate array for those parents. - * make sure it's being free'd */ - GIT_HASHTABLE_FOREACH_VALUE(walk->commits, commit, { - if (commit->out_degree > PARENTS_PER_COMMIT) - git__free(commit->parents); - }); - git_hashtable_free(walk->commits); + git_pool_clear(&walk->commit_pool); git_pqueue_free(&walk->iterator_time); - - for (i = 0; i < walk->memory_alloc.length; ++i) - git__free(git_vector_get(&walk->memory_alloc, i)); - - git_vector_free(&walk->memory_alloc); git_vector_free(&walk->twos); git__free(walk); } From 25f258e735f707075dc1b5cdd804540fe1e43f37 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Mon, 23 Apr 2012 09:21:15 -0700 Subject: [PATCH 4/8] Moving power-of-two bit utilities into util.h --- src/cache.c | 11 ++--------- src/util.h | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/src/cache.c b/src/cache.c index f445e906d..31da3c36e 100644 --- a/src/cache.c +++ b/src/cache.c @@ -9,21 +9,14 @@ #include "repository.h" #include "commit.h" #include "thread-utils.h" +#include "util.h" #include "cache.h" int git_cache_init(git_cache *cache, size_t size, git_cached_obj_freeptr free_ptr) { if (size < 8) size = 8; - - /* round up size to closest power of 2 */ - size--; - size |= size >> 1; - size |= size >> 2; - size |= size >> 4; - size |= size >> 8; - size |= size >> 16; - size++; + size = git__size_t_powerof2(size); cache->size_mask = size - 1; cache->lru_count = 0; diff --git a/src/util.h b/src/util.h index afa3f7205..1fee9a70c 100644 --- a/src/util.h +++ b/src/util.h @@ -179,4 +179,21 @@ GIT_INLINE(int) git__ishex(const char *str) return 1; } +GIT_INLINE(size_t) git__size_t_bitmask(size_t v) +{ + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + + return v; +} + +GIT_INLINE(size_t) git__size_t_powerof2(size_t v) +{ + return git__size_t_bitmask(v) + 1; +} + #endif /* INCLUDE_util_h__ */ From c16c8b9a7e7588f4ced41aa8f9787495f41fd918 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Mon, 23 Apr 2012 09:23:58 -0700 Subject: [PATCH 5/8] Adding stash to hashtable implementation Adding a small stash of nodes with key conflicts has been demonstrated to greatly increase the efficiency of a cuckoo hashtable. See: http://research.microsoft.com/pubs/73856/stash-full.9-30.pdf for more details. --- src/hashtable.c | 143 ++++++++++++++++++++++++++++++++++-------------- src/hashtable.h | 13 ++++- 2 files changed, 112 insertions(+), 44 deletions(-) diff --git a/src/hashtable.c b/src/hashtable.c index 8e057d4b1..e2f131cf1 100644 --- a/src/hashtable.c +++ b/src/hashtable.c @@ -18,28 +18,37 @@ static git_hashtable_node *node_with_hash(git_hashtable *self, const void *key, static void node_swap_with(git_hashtable_node *self, git_hashtable_node *other); static int node_insert(git_hashtable *self, git_hashtable_node *new_node); static int insert_nodes(git_hashtable *self, git_hashtable_node *old_nodes, size_t old_size); +static void reinsert_stash(git_hashtable *self); static int resize_to(git_hashtable *self, size_t new_size) { git_hashtable_node *old_nodes = self->nodes; size_t old_size = self->size; + git_hashtable_node old_stash[GIT_HASHTABLE_STASH_SIZE]; + size_t old_stash_count = self->stash_count; self->is_resizing = 1; + if (old_stash_count > 0) + memcpy(old_stash, self->stash, + old_stash_count * sizeof(git_hashtable_node)); + do { self->size = new_size; self->size_mask = new_size - 1; self->key_count = 0; + self->stash_count = 0; self->nodes = git__calloc(1, sizeof(git_hashtable_node) * self->size); GITERR_CHECK_ALLOC(self->nodes); - if (insert_nodes(self, old_nodes, old_size) == 0) + if (insert_nodes(self, old_nodes, old_size) == 0 && + insert_nodes(self, old_stash, old_stash_count) == 0) self->is_resizing = 0; else { new_size *= 2; git__free(self->nodes); } - } while(self->is_resizing); + } while (self->is_resizing); git__free(old_nodes); return 0; @@ -47,26 +56,28 @@ static int resize_to(git_hashtable *self, size_t new_size) static int set_size(git_hashtable *self, size_t new_size) { - self->nodes = git__realloc(self->nodes, new_size * sizeof(git_hashtable_node)); + self->nodes = + git__realloc(self->nodes, new_size * sizeof(git_hashtable_node)); GITERR_CHECK_ALLOC(self->nodes); - if (new_size > self->size) { + if (new_size > self->size) memset(&self->nodes[self->size], 0x0, (new_size - self->size) * sizeof(git_hashtable_node)); - } self->size = new_size; self->size_mask = new_size - 1; return 0; } -static git_hashtable_node *node_with_hash(git_hashtable *self, const void *key, int hash_id) +GIT_INLINE(git_hashtable_node *)node_with_hash( + git_hashtable *self, const void *key, int hash_id) { size_t pos = self->hash(key, hash_id) & self->size_mask; return git_hashtable_node_at(self->nodes, pos); } -static void node_swap_with(git_hashtable_node *self, git_hashtable_node *other) +GIT_INLINE(void) node_swap_with( + git_hashtable_node *self, git_hashtable_node *other) { git_hashtable_node tmp = *self; *self = *other; @@ -76,19 +87,26 @@ static void node_swap_with(git_hashtable_node *self, git_hashtable_node *other) static int node_insert(git_hashtable *self, git_hashtable_node *new_node) { int iteration, hash_id; + git_hashtable_node *node; for (iteration = 0; iteration < MAX_LOOPS; iteration++) { for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) { - git_hashtable_node *node; node = node_with_hash(self, new_node->key, hash_id); node_swap_with(new_node, node); - if (new_node->key == 0x0){ + if (new_node->key == 0x0) { self->key_count++; return 0; } } } + /* Insert into stash if there is space */ + if (self->stash_count < GIT_HASHTABLE_STASH_SIZE) { + node_swap_with(new_node, &self->stash[self->stash_count++]); + self->key_count++; + return 0; + } + /* Failed to insert node. Hashtable is currently resizing */ assert(!self->is_resizing); @@ -105,14 +123,29 @@ static int insert_nodes( for (i = 0; i < old_size; ++i) { git_hashtable_node *node = git_hashtable_node_at(old_nodes, i); - if (node->key && - git_hashtable_insert(self, node->key, node->value) < 0) + if (node->key && node_insert(self, node) < 0) return -1; } return 0; } +static void reinsert_stash(git_hashtable *self) +{ + int stash_count; + struct git_hashtable_node stash[GIT_HASHTABLE_STASH_SIZE]; + + if (self->stash_count <= 0) + return; + + memcpy(stash, self->stash, self->stash_count * sizeof(git_hashtable_node)); + stash_count = self->stash_count; + self->stash_count = 0; + + /* the node_insert() calls *cannot* fail because the stash is empty */ + insert_nodes(self, stash, stash_count); +} + git_hashtable *git_hashtable_alloc( size_t min_size, git_hash_ptr hash, @@ -127,21 +160,11 @@ git_hashtable *git_hashtable_alloc( memset(table, 0x0, sizeof(git_hashtable)); - if (min_size < 8) - min_size = 8; - - /* round up size to closest power of 2 */ - min_size--; - min_size |= min_size >> 1; - min_size |= min_size >> 2; - min_size |= min_size >> 4; - min_size |= min_size >> 8; - min_size |= min_size >> 16; - table->hash = hash; table->key_equal = key_eq; - set_size(table, min_size + 1); + min_size = git__size_t_powerof2(min_size < 8 ? 8 : min_size); + set_size(table, min_size); return table; } @@ -151,6 +174,8 @@ void git_hashtable_clear(git_hashtable *self) assert(self); memset(self->nodes, 0x0, sizeof(git_hashtable_node) * self->size); + + self->stash_count = 0; self->key_count = 0; } @@ -200,39 +225,70 @@ int git_hashtable_insert2( } } -void *git_hashtable_lookup(git_hashtable *self, const void *key) +static git_hashtable_node *find_node(git_hashtable *self, const void *key) { - int hash_id; + int hash_id, count = 0; git_hashtable_node *node; - assert(self && self->nodes); - for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) { node = node_with_hash(self, key, hash_id); - if (node->key && self->key_equal(key, node->key) == 0) - return node->value; + if (node->key) { + ++count; + if (self->key_equal(key, node->key) == 0) + return node; + } + } + + /* check stash if not found but all slots were filled */ + if (count == GIT_HASHTABLE_HASHES) { + for (count = 0; count < self->stash_count; ++count) + if (self->key_equal(key, self->stash[count].key) == 0) + return &self->stash[count]; } return NULL; } +static void reset_stash(git_hashtable *self, git_hashtable_node *node) +{ + /* if node was in stash, then compact stash */ + ssize_t offset = node - self->stash; + + if (offset >= 0 && offset < self->stash_count) { + if (offset < self->stash_count - 1) + memmove(node, node + 1, (self->stash_count - offset) * + sizeof(git_hashtable_node)); + self->stash_count--; + } + + reinsert_stash(self); +} + +void *git_hashtable_lookup(git_hashtable *self, const void *key) +{ + git_hashtable_node *node; + assert(self && key); + node = find_node(self, key); + return node ? node->value : NULL; +} + int git_hashtable_remove2( git_hashtable *self, const void *key, void **old_value) { - int hash_id; git_hashtable_node *node; assert(self && self->nodes); - for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) { - node = node_with_hash(self, key, hash_id); - if (node->key && self->key_equal(key, node->key) == 0) { - *old_value = node->value; - node->key = NULL; - node->value = NULL; - self->key_count--; - return 0; - } + node = find_node(self, key); + if (node) { + *old_value = node->value; + + node->key = NULL; + node->value = NULL; + self->key_count--; + + reset_stash(self, node); + return 0; } return GIT_ENOTFOUND; @@ -240,10 +296,15 @@ int git_hashtable_remove2( int git_hashtable_merge(git_hashtable *self, git_hashtable *other) { - if (resize_to(self, (self->size + other->size) * 2) < 0) + size_t new_size = git__size_t_powerof2(self->size + other->size); + + if (resize_to(self, new_size) < 0) return -1; - return insert_nodes(self, other->nodes, other->key_count); + if (insert_nodes(self, other->nodes, other->key_count) < 0) + return -1; + + return insert_nodes(self, other->stash, other->stash_count); } diff --git a/src/hashtable.h b/src/hashtable.h index 0bab84543..448487507 100644 --- a/src/hashtable.h +++ b/src/hashtable.h @@ -22,6 +22,8 @@ struct git_hashtable_node { void *value; }; +#define GIT_HASHTABLE_STASH_SIZE 3 + struct git_hashtable { struct git_hashtable_node *nodes; @@ -29,6 +31,9 @@ struct git_hashtable { size_t size; size_t key_count; + struct git_hashtable_node stash[GIT_HASHTABLE_STASH_SIZE]; + int stash_count; + int is_resizing; git_hash_ptr hash; @@ -38,9 +43,11 @@ struct git_hashtable { typedef struct git_hashtable_node git_hashtable_node; typedef struct git_hashtable git_hashtable; -git_hashtable *git_hashtable_alloc(size_t min_size, - git_hash_ptr hash, - git_hash_keyeq_ptr key_eq); +git_hashtable *git_hashtable_alloc( + size_t min_size, + git_hash_ptr hash, + git_hash_keyeq_ptr key_eq); + void *git_hashtable_lookup(git_hashtable *h, const void *key); int git_hashtable_remove2(git_hashtable *table, const void *key, void **old_value); From ada488bfe720d0df8187b5b58e326a13b7bdc678 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Tue, 24 Apr 2012 11:02:40 -0700 Subject: [PATCH 6/8] Import khash.h from attractivechaos/klib --- src/khash.h | 548 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 548 insertions(+) create mode 100644 src/khash.h diff --git a/src/khash.h b/src/khash.h new file mode 100644 index 000000000..1a28e1184 --- /dev/null +++ b/src/khash.h @@ -0,0 +1,548 @@ +/* The MIT License + + Copyright (c) 2008, 2009, 2011 by Attractive Chaos + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +/* + An example: + +#include "khash.h" +KHASH_MAP_INIT_INT(32, char) +int main() { + int ret, is_missing; + khiter_t k; + khash_t(32) *h = kh_init(32); + k = kh_put(32, h, 5, &ret); + kh_value(h, k) = 10; + k = kh_get(32, h, 10); + is_missing = (k == kh_end(h)); + k = kh_get(32, h, 5); + kh_del(32, h, k); + for (k = kh_begin(h); k != kh_end(h); ++k) + if (kh_exist(h, k)) kh_value(h, k) = 1; + kh_destroy(32, h); + return 0; +} +*/ + +/* + 2011-12-29 (0.2.7): + + * Minor code clean up; no actual effect. + + 2011-09-16 (0.2.6): + + * The capacity is a power of 2. This seems to dramatically improve the + speed for simple keys. Thank Zilong Tan for the suggestion. Reference: + + - http://code.google.com/p/ulib/ + - http://nothings.org/computer/judy/ + + * Allow to optionally use linear probing which usually has better + performance for random input. Double hashing is still the default as it + is more robust to certain non-random input. + + * Added Wang's integer hash function (not used by default). This hash + function is more robust to certain non-random input. + + 2011-02-14 (0.2.5): + + * Allow to declare global functions. + + 2009-09-26 (0.2.4): + + * Improve portability + + 2008-09-19 (0.2.3): + + * Corrected the example + * Improved interfaces + + 2008-09-11 (0.2.2): + + * Improved speed a little in kh_put() + + 2008-09-10 (0.2.1): + + * Added kh_clear() + * Fixed a compiling error + + 2008-09-02 (0.2.0): + + * Changed to token concatenation which increases flexibility. + + 2008-08-31 (0.1.2): + + * Fixed a bug in kh_get(), which has not been tested previously. + + 2008-08-31 (0.1.1): + + * Added destructor +*/ + + +#ifndef __AC_KHASH_H +#define __AC_KHASH_H + +/*! + @header + + Generic hash table library. + */ + +#define AC_VERSION_KHASH_H "0.2.6" + +#include +#include +#include + +/* compipler specific configuration */ + +#if UINT_MAX == 0xffffffffu +typedef unsigned int khint32_t; +#elif ULONG_MAX == 0xffffffffu +typedef unsigned long khint32_t; +#endif + +#if ULONG_MAX == ULLONG_MAX +typedef unsigned long khint64_t; +#else +typedef unsigned long long khint64_t; +#endif + +#ifdef _MSC_VER +#define inline __inline +#endif + +typedef khint32_t khint_t; +typedef khint_t khiter_t; + +#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2) +#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1) +#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3) +#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1))) +#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1))) +#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1))) +#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1)) + +#ifdef KHASH_LINEAR +#define __ac_inc(k, m) 1 +#else +#define __ac_inc(k, m) (((k)>>3 ^ (k)<<3) | 1) & (m) +#endif + +#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4) + +#ifndef kroundup32 +#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) +#endif + +static const double __ac_HASH_UPPER = 0.77; + +#define __KHASH_TYPE(name, khkey_t, khval_t) \ + typedef struct { \ + khint_t n_buckets, size, n_occupied, upper_bound; \ + khint32_t *flags; \ + khkey_t *keys; \ + khval_t *vals; \ + } kh_##name##_t; + +#define KHASH_DECLARE(name, khkey_t, khval_t) \ + __KHASH_TYPE(name, khkey_t, khval_t) \ + extern kh_##name##_t *kh_init_##name(); \ + extern void kh_destroy_##name(kh_##name##_t *h); \ + extern void kh_clear_##name(kh_##name##_t *h); \ + extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \ + extern void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \ + extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \ + extern void kh_del_##name(kh_##name##_t *h, khint_t x); + +#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + __KHASH_TYPE(name, khkey_t, khval_t) \ + SCOPE kh_##name##_t *kh_init_##name() { \ + return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t)); \ + } \ + SCOPE void kh_destroy_##name(kh_##name##_t *h) \ + { \ + if (h) { \ + free(h->keys); free(h->flags); \ + free(h->vals); \ + free(h); \ + } \ + } \ + SCOPE void kh_clear_##name(kh_##name##_t *h) \ + { \ + if (h && h->flags) { \ + memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \ + h->size = h->n_occupied = 0; \ + } \ + } \ + SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \ + { \ + if (h->n_buckets) { \ + khint_t inc, k, i, last, mask; \ + mask = h->n_buckets - 1; \ + k = __hash_func(key); i = k & mask; \ + inc = __ac_inc(k, mask); last = i; /* inc==1 for linear probing */ \ + while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ + i = (i + inc) & mask; \ + if (i == last) return h->n_buckets; \ + } \ + return __ac_iseither(h->flags, i)? h->n_buckets : i; \ + } else return 0; \ + } \ + SCOPE void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \ + { /* This function uses 0.25*n_bucktes bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \ + khint32_t *new_flags = 0; \ + khint_t j = 1; \ + { \ + kroundup32(new_n_buckets); \ + if (new_n_buckets < 4) new_n_buckets = 4; \ + if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \ + else { /* hash table size to be changed (shrink or expand); rehash */ \ + new_flags = (khint32_t*)malloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ + memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ + if (h->n_buckets < new_n_buckets) { /* expand */ \ + h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \ + if (kh_is_map) h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \ + } /* otherwise shrink */ \ + } \ + } \ + if (j) { /* rehashing is needed */ \ + for (j = 0; j != h->n_buckets; ++j) { \ + if (__ac_iseither(h->flags, j) == 0) { \ + khkey_t key = h->keys[j]; \ + khval_t val; \ + khint_t new_mask; \ + new_mask = new_n_buckets - 1; \ + if (kh_is_map) val = h->vals[j]; \ + __ac_set_isdel_true(h->flags, j); \ + while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ + khint_t inc, k, i; \ + k = __hash_func(key); \ + i = k & new_mask; \ + inc = __ac_inc(k, new_mask); \ + while (!__ac_isempty(new_flags, i)) i = (i + inc) & new_mask; \ + __ac_set_isempty_false(new_flags, i); \ + if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \ + { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ + if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \ + __ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \ + } else { /* write the element and jump out of the loop */ \ + h->keys[i] = key; \ + if (kh_is_map) h->vals[i] = val; \ + break; \ + } \ + } \ + } \ + } \ + if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \ + h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \ + if (kh_is_map) h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \ + } \ + free(h->flags); /* free the working space */ \ + h->flags = new_flags; \ + h->n_buckets = new_n_buckets; \ + h->n_occupied = h->size; \ + h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \ + } \ + } \ + SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \ + { \ + khint_t x; \ + if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \ + if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); /* clear "deleted" elements */ \ + else kh_resize_##name(h, h->n_buckets + 1); /* expand the hash table */ \ + } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \ + { \ + khint_t inc, k, i, site, last, mask = h->n_buckets - 1; \ + x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \ + if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \ + else { \ + inc = __ac_inc(k, mask); last = i; \ + while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ + if (__ac_isdel(h->flags, i)) site = i; \ + i = (i + inc) & mask; \ + if (i == last) { x = site; break; } \ + } \ + if (x == h->n_buckets) { \ + if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \ + else x = i; \ + } \ + } \ + } \ + if (__ac_isempty(h->flags, x)) { /* not present at all */ \ + h->keys[x] = key; \ + __ac_set_isboth_false(h->flags, x); \ + ++h->size; ++h->n_occupied; \ + *ret = 1; \ + } else if (__ac_isdel(h->flags, x)) { /* deleted */ \ + h->keys[x] = key; \ + __ac_set_isboth_false(h->flags, x); \ + ++h->size; \ + *ret = 2; \ + } else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \ + return x; \ + } \ + SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \ + { \ + if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \ + __ac_set_isdel_true(h->flags, x); \ + --h->size; \ + } \ + } + +#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + KHASH_INIT2(name, static inline, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) + +/* --- BEGIN OF HASH FUNCTIONS --- */ + +/*! @function + @abstract Integer hash function + @param key The integer [khint32_t] + @return The hash value [khint_t] + */ +#define kh_int_hash_func(key) (khint32_t)(key) +/*! @function + @abstract Integer comparison function + */ +#define kh_int_hash_equal(a, b) ((a) == (b)) +/*! @function + @abstract 64-bit integer hash function + @param key The integer [khint64_t] + @return The hash value [khint_t] + */ +#define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11) +/*! @function + @abstract 64-bit integer comparison function + */ +#define kh_int64_hash_equal(a, b) ((a) == (b)) +/*! @function + @abstract const char* hash function + @param s Pointer to a null terminated string + @return The hash value + */ +static inline khint_t __ac_X31_hash_string(const char *s) +{ + khint_t h = *s; + if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s; + return h; +} +/*! @function + @abstract Another interface to const char* hash function + @param key Pointer to a null terminated string [const char*] + @return The hash value [khint_t] + */ +#define kh_str_hash_func(key) __ac_X31_hash_string(key) +/*! @function + @abstract Const char* comparison function + */ +#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0) + +static inline khint_t __ac_Wang_hash(khint_t key) +{ + key += ~(key << 15); + key ^= (key >> 10); + key += (key << 3); + key ^= (key >> 6); + key += ~(key << 11); + key ^= (key >> 16); + return key; +} +#define kh_int_hash_func2(k) __ac_Wang_hash((khint_t)key) + +/* --- END OF HASH FUNCTIONS --- */ + +/* Other convenient macros... */ + +/*! + @abstract Type of the hash table. + @param name Name of the hash table [symbol] + */ +#define khash_t(name) kh_##name##_t + +/*! @function + @abstract Initiate a hash table. + @param name Name of the hash table [symbol] + @return Pointer to the hash table [khash_t(name)*] + */ +#define kh_init(name) kh_init_##name() + +/*! @function + @abstract Destroy a hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + */ +#define kh_destroy(name, h) kh_destroy_##name(h) + +/*! @function + @abstract Reset a hash table without deallocating memory. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + */ +#define kh_clear(name, h) kh_clear_##name(h) + +/*! @function + @abstract Resize a hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param s New size [khint_t] + */ +#define kh_resize(name, h, s) kh_resize_##name(h, s) + +/*! @function + @abstract Insert a key to the hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param k Key [type of keys] + @param r Extra return code: 0 if the key is present in the hash table; + 1 if the bucket is empty (never used); 2 if the element in + the bucket has been deleted [int*] + @return Iterator to the inserted element [khint_t] + */ +#define kh_put(name, h, k, r) kh_put_##name(h, k, r) + +/*! @function + @abstract Retrieve a key from the hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param k Key [type of keys] + @return Iterator to the found element, or kh_end(h) is the element is absent [khint_t] + */ +#define kh_get(name, h, k) kh_get_##name(h, k) + +/*! @function + @abstract Remove a key from the hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param k Iterator to the element to be deleted [khint_t] + */ +#define kh_del(name, h, k) kh_del_##name(h, k) + +/*! @function + @abstract Test whether a bucket contains data. + @param h Pointer to the hash table [khash_t(name)*] + @param x Iterator to the bucket [khint_t] + @return 1 if containing data; 0 otherwise [int] + */ +#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x))) + +/*! @function + @abstract Get key given an iterator + @param h Pointer to the hash table [khash_t(name)*] + @param x Iterator to the bucket [khint_t] + @return Key [type of keys] + */ +#define kh_key(h, x) ((h)->keys[x]) + +/*! @function + @abstract Get value given an iterator + @param h Pointer to the hash table [khash_t(name)*] + @param x Iterator to the bucket [khint_t] + @return Value [type of values] + @discussion For hash sets, calling this results in segfault. + */ +#define kh_val(h, x) ((h)->vals[x]) + +/*! @function + @abstract Alias of kh_val() + */ +#define kh_value(h, x) ((h)->vals[x]) + +/*! @function + @abstract Get the start iterator + @param h Pointer to the hash table [khash_t(name)*] + @return The start iterator [khint_t] + */ +#define kh_begin(h) (khint_t)(0) + +/*! @function + @abstract Get the end iterator + @param h Pointer to the hash table [khash_t(name)*] + @return The end iterator [khint_t] + */ +#define kh_end(h) ((h)->n_buckets) + +/*! @function + @abstract Get the number of elements in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @return Number of elements in the hash table [khint_t] + */ +#define kh_size(h) ((h)->size) + +/*! @function + @abstract Get the number of buckets in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @return Number of buckets in the hash table [khint_t] + */ +#define kh_n_buckets(h) ((h)->n_buckets) + +/* More conenient interfaces */ + +/*! @function + @abstract Instantiate a hash set containing integer keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_INT(name) \ + KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing integer keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_INT(name, khval_t) \ + KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing 64-bit integer keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_INT64(name) \ + KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing 64-bit integer keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_INT64(name, khval_t) \ + KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal) + +typedef const char *kh_cstr_t; +/*! @function + @abstract Instantiate a hash map containing const char* keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_STR(name) \ + KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing const char* keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_STR(name, khval_t) \ + KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal) + +#endif /* __AC_KHASH_H */ From 01fed0a8f9b80e80c8f76cde29fc0d66cb77fff7 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Wed, 25 Apr 2012 10:36:01 -0700 Subject: [PATCH 7/8] Convert hashtable usage over to khash This updates khash.h with some extra features (like error checking on allocations, ability to use wrapped malloc, foreach calls, etc), creates two high-level wrappers around khash: `git_khash_str` and `git_khash_oid` for string-to-void-ptr and oid-to-void-ptr tables, then converts all of the old usage of `git_hashtable` over to use these new hashtables. For `git_khash_str`, I've tried to create a set of macros that yield an API not too unlike the old `git_hashtable` API. Since the oid hashtable is only used in one file, I haven't bother to set up all those macros and just use the khash APIs directly for now. --- src/attr.c | 85 +++++++++++++++++++----------- src/attr.h | 8 ++- src/attr_file.c | 4 +- src/config.c | 1 - src/config_file.c | 95 +++++++++++++++++++++------------- src/khash.h | 100 ++++++++++++++++++++++++++++-------- src/khash_oid.h | 42 +++++++++++++++ src/khash_str.h | 54 +++++++++++++++++++ src/refs.c | 72 ++++++++++++++++---------- src/refs.h | 4 +- src/repository.h | 4 +- src/revwalk.c | 38 ++++++-------- src/submodule.c | 128 ++++++++++++++++++++++++---------------------- 13 files changed, 433 insertions(+), 202 deletions(-) create mode 100644 src/khash_oid.h create mode 100644 src/khash_str.h diff --git a/src/attr.c b/src/attr.c index f5d50bb42..1d7f3aa22 100644 --- a/src/attr.c +++ b/src/attr.c @@ -3,6 +3,8 @@ #include "config.h" #include +GIT_KHASH_STR__IMPLEMENTATION; + static int collect_attr_files( git_repository *repo, const char *path, git_vector *files); @@ -124,14 +126,14 @@ int git_attr_foreach( git_attr_file *file; git_attr_rule *rule; git_attr_assignment *assign; - git_hashtable *seen = NULL; + git_khash_str *seen = NULL; if ((error = git_attr_path__init( &path, pathname, git_repository_workdir(repo))) < 0 || (error = collect_attr_files(repo, pathname, &files)) < 0) return error; - seen = git_hashtable_alloc(8, git_hash__strhash_cb, git_hash__strcmp_cb); + seen = git_khash_str_alloc(); GITERR_CHECK_ALLOC(seen); git_vector_foreach(&files, i, file) { @@ -140,10 +142,11 @@ int git_attr_foreach( git_vector_foreach(&rule->assigns, k, assign) { /* skip if higher priority assignment was already seen */ - if (git_hashtable_lookup(seen, assign->name)) + if (git_khash_str_exists(seen, assign->name)) continue; - if (!(error = git_hashtable_insert(seen, assign->name, assign))) + git_khash_str_insert(seen, assign->name, assign, error); + if (error >= 0) error = callback(assign->name, assign->value, payload); if (error != 0) @@ -153,7 +156,7 @@ int git_attr_foreach( } cleanup: - git_hashtable_free(seen); + git_khash_str_free(seen); git_vector_free(&files); return error; @@ -197,10 +200,12 @@ int git_attr_add_macro( bool git_attr_cache__is_cached(git_repository *repo, const char *path) { const char *cache_key = path; + git_khash_str *files = git_repository_attr_cache(repo)->files; + if (repo && git__prefixcmp(cache_key, git_repository_workdir(repo)) == 0) cache_key += strlen(git_repository_workdir(repo)); - return (git_hashtable_lookup( - git_repository_attr_cache(repo)->files, cache_key) != NULL); + + return git_khash_str_exists(files, cache_key); } int git_attr_cache__lookup_or_create_file( @@ -213,9 +218,11 @@ int git_attr_cache__lookup_or_create_file( int error; git_attr_cache *cache = git_repository_attr_cache(repo); git_attr_file *file = NULL; + khiter_t pos; - if ((file = git_hashtable_lookup(cache->files, key)) != NULL) { - *file_ptr = file; + pos = git_khash_str_lookup_index(cache->files, key); + if (git_khash_str_valid_index(cache->files, pos)) { + *file_ptr = git_khash_str_value_at(cache->files, pos); return 0; } @@ -232,8 +239,11 @@ int git_attr_cache__lookup_or_create_file( else error = git_attr_file__set_path(repo, key, file); - if (!error) - error = git_hashtable_insert(cache->files, file->path, file); + if (!error) { + git_khash_str_insert(cache->files, file->path, file, error); + if (error > 0) + error = 0; + } if (error < 0) { git_attr_file__free(file); @@ -373,18 +383,14 @@ int git_attr_cache__init(git_repository *repo) /* allocate hashtable for attribute and ignore file contents */ if (cache->files == NULL) { - cache->files = git_hashtable_alloc( - 8, git_hash__strhash_cb, git_hash__strcmp_cb); - if (!cache->files) - return -1; + cache->files = git_khash_str_alloc(); + GITERR_CHECK_ALLOC(cache->files); } /* allocate hashtable for attribute macros */ if (cache->macros == NULL) { - cache->macros = git_hashtable_alloc( - 8, git_hash__strhash_cb, git_hash__strcmp_cb); - if (!cache->macros) - return -1; + cache->macros = git_khash_str_alloc(); + GITERR_CHECK_ALLOC(cache->macros); } /* allocate string pool */ @@ -409,19 +415,22 @@ void git_attr_cache_flush( if (cache->files != NULL) { git_attr_file *file; - GIT_HASHTABLE_FOREACH_VALUE( - cache->files, file, git_attr_file__free(file)); - git_hashtable_free(cache->files); - cache->files = NULL; + + git_khash_str_foreach_value(cache->files, file, { + git_attr_file__free(file); + }); + + git_khash_str_free(cache->files); } if (cache->macros != NULL) { git_attr_rule *rule; - GIT_HASHTABLE_FOREACH_VALUE( - cache->macros, rule, git_attr_rule__free(rule)); - git_hashtable_free(cache->macros); - cache->macros = NULL; + git_khash_str_foreach_value(cache->macros, rule, { + git_attr_rule__free(rule); + }); + + git_khash_str_free(cache->macros); } git_pool_clear(&cache->pool); @@ -431,10 +440,28 @@ void git_attr_cache_flush( int git_attr_cache__insert_macro(git_repository *repo, git_attr_rule *macro) { + git_khash_str *macros = git_repository_attr_cache(repo)->macros; + int error; + /* TODO: generate warning log if (macro->assigns.length == 0) */ if (macro->assigns.length == 0) return 0; - return git_hashtable_insert( - git_repository_attr_cache(repo)->macros, macro->match.pattern, macro); + git_khash_str_insert(macros, macro->match.pattern, macro, error); + return (error < 0) ? -1 : 0; } + +git_attr_rule *git_attr_cache__lookup_macro( + git_repository *repo, const char *name) +{ + git_khash_str *macros = git_repository_attr_cache(repo)->macros; + khiter_t pos; + + pos = git_khash_str_lookup_index(macros, name); + + if (!git_khash_str_valid_index(macros, pos)) + return NULL; + + return (git_attr_rule *)git_khash_str_value_at(macros, pos); +} + diff --git a/src/attr.h b/src/attr.h index 825cbfe4e..75e98607f 100644 --- a/src/attr.h +++ b/src/attr.h @@ -8,6 +8,7 @@ #define INCLUDE_attr_h__ #include "attr_file.h" +#include "khash_str.h" #define GIT_ATTR_CONFIG "core.attributesfile" #define GIT_IGNORE_CONFIG "core.excludesfile" @@ -15,8 +16,8 @@ typedef struct { int initialized; git_pool pool; - git_hashtable *files; /* hash path to git_attr_file of rules */ - git_hashtable *macros; /* hash name to vector */ + git_khash_str *files; /* hash path to git_attr_file of rules */ + git_khash_str *macros; /* hash name to vector */ const char *cfg_attr_file; /* cached value of core.attributesfile */ const char *cfg_excl_file; /* cached value of core.excludesfile */ } git_attr_cache; @@ -26,6 +27,9 @@ extern int git_attr_cache__init(git_repository *repo); extern int git_attr_cache__insert_macro( git_repository *repo, git_attr_rule *macro); +extern git_attr_rule *git_attr_cache__lookup_macro( + git_repository *repo, const char *name); + extern int git_attr_cache__lookup_or_create_file( git_repository *repo, const char *key, diff --git a/src/attr_file.c b/src/attr_file.c index 7909c49b4..e34053fc3 100644 --- a/src/attr_file.c +++ b/src/attr_file.c @@ -515,8 +515,8 @@ int git_attr_assignment__parse( /* expand macros (if given a repo with a macro cache) */ if (repo != NULL && assign->value == git_attr__true) { - git_attr_rule *macro = git_hashtable_lookup( - git_repository_attr_cache(repo)->macros, assign->name); + git_attr_rule *macro = + git_attr_cache__lookup_macro(repo, assign->name); if (macro != NULL) { unsigned int i; diff --git a/src/config.c b/src/config.c index f5cfa9ec0..4c971924c 100644 --- a/src/config.c +++ b/src/config.c @@ -7,7 +7,6 @@ #include "common.h" #include "fileops.h" -#include "hashtable.h" #include "config.h" #include "git2/config.h" #include "vector.h" diff --git a/src/config_file.c b/src/config_file.c index fd634fbca..a0ce329fc 100644 --- a/src/config_file.c +++ b/src/config_file.c @@ -12,12 +12,14 @@ #include "buffer.h" #include "git2/config.h" #include "git2/types.h" - +#include "khash_str.h" #include #include #include +GIT_KHASH_STR__IMPLEMENTATION; + typedef struct cvar_t { struct cvar_t *next; char *key; /* TODO: we might be able to get rid of this */ @@ -70,7 +72,7 @@ typedef struct { typedef struct { git_config_file parent; - git_hashtable *values; + git_khash_str *values; struct { git_buf buffer; @@ -130,22 +132,21 @@ static int normalize_name(const char *in, char **out) return 0; } -static void free_vars(git_hashtable *values) +static void free_vars(git_khash_str *values) { cvar_t *var = NULL; if (values == NULL) return; - GIT_HASHTABLE_FOREACH_VALUE(values, var, - do { - cvar_t *next = CVAR_LIST_NEXT(var); - cvar_free(var); - var = next; - } while (var != NULL); - ) + git_khash_str_foreach_value(values, var, + while (var != NULL) { + cvar_t *next = CVAR_LIST_NEXT(var); + cvar_free(var); + var = next; + }); - git_hashtable_free(values); + git_khash_str_free(values); } static int config_open(git_config_file *cfg) @@ -153,7 +154,7 @@ static int config_open(git_config_file *cfg) int res; diskfile_backend *b = (diskfile_backend *)cfg; - b->values = git_hashtable_alloc (20, git_hash__strhash_cb, git_hash__strcmp_cb); + b->values = git_khash_str_alloc(); GITERR_CHECK_ALLOC(b->values); git_buf_init(&b->reader.buffer, 0); @@ -195,24 +196,25 @@ static int file_foreach(git_config_file *backend, int (*fn)(const char *, const if (!b->values) return 0; - GIT_HASHTABLE_FOREACH(b->values, key, var, + git_khash_str_foreach(b->values, key, var, do { if (fn(key, var->value, data) < 0) break; var = CVAR_LIST_NEXT(var); } while (var != NULL); - ) + ); return 0; } static int config_set(git_config_file *cfg, const char *name, const char *value) { - cvar_t *var = NULL; - cvar_t *existing = NULL, *old_value = NULL; + cvar_t *var = NULL, *old_var; diskfile_backend *b = (diskfile_backend *)cfg; char *key; + khiter_t pos; + int rval; if (normalize_name(name, &key) < 0) return -1; @@ -221,8 +223,9 @@ static int config_set(git_config_file *cfg, const char *name, const char *value) * Try to find it in the existing values and update it if it * only has one value. */ - existing = git_hashtable_lookup(b->values, key); - if (existing != NULL) { + pos = git_khash_str_lookup_index(b->values, key); + if (git_khash_str_valid_index(b->values, pos)) { + cvar_t *existing = git_khash_str_value_at(b->values, pos); char *tmp = NULL; git__free(key); @@ -255,10 +258,11 @@ static int config_set(git_config_file *cfg, const char *name, const char *value) GITERR_CHECK_ALLOC(var->value); } - if (git_hashtable_insert2(b->values, key, var, (void **)&old_value) < 0) + git_khash_str_insert2(b->values, key, var, old_var, rval); + if (rval < 0) return -1; - - cvar_free(old_value); + if (old_var != NULL) + cvar_free(old_var); if (config_write(b, key, NULL, value) < 0) { cvar_free(var); @@ -273,21 +277,22 @@ static int config_set(git_config_file *cfg, const char *name, const char *value) */ static int config_get(git_config_file *cfg, const char *name, const char **out) { - cvar_t *var; diskfile_backend *b = (diskfile_backend *)cfg; char *key; + khiter_t pos; if (normalize_name(name, &key) < 0) return -1; - var = git_hashtable_lookup(b->values, key); + pos = git_khash_str_lookup_index(b->values, key); git__free(key); /* no error message; the config system will write one */ - if (var == NULL) + if (!git_khash_str_valid_index(b->values, pos)) return GIT_ENOTFOUND; - *out = var->value; + *out = ((cvar_t *)git_khash_str_value_at(b->values, pos))->value; + return 0; } @@ -301,16 +306,19 @@ static int config_get_multivar( cvar_t *var; diskfile_backend *b = (diskfile_backend *)cfg; char *key; + khiter_t pos; if (normalize_name(name, &key) < 0) return -1; - var = git_hashtable_lookup(b->values, key); + pos = git_khash_str_lookup_index(b->values, key); git__free(key); - if (var == NULL) + if (!git_khash_str_valid_index(b->values, pos)) return GIT_ENOTFOUND; + var = git_khash_str_value_at(b->values, pos); + if (regex_str != NULL) { regex_t regex; int result; @@ -350,7 +358,8 @@ static int config_get_multivar( return 0; } -static int config_set_multivar(git_config_file *cfg, const char *name, const char *regexp, const char *value) +static int config_set_multivar( + git_config_file *cfg, const char *name, const char *regexp, const char *value) { int replaced = 0; cvar_t *var, *newvar; @@ -358,15 +367,20 @@ static int config_set_multivar(git_config_file *cfg, const char *name, const cha char *key; regex_t preg; int result; + khiter_t pos; assert(regexp); if (normalize_name(name, &key) < 0) return -1; - var = git_hashtable_lookup(b->values, key); - if (var == NULL) + pos = git_khash_str_lookup_index(b->values, key); + if (!git_khash_str_valid_index(b->values, pos)) { + git__free(key); return GIT_ENOTFOUND; + } + + var = git_khash_str_value_at(b->values, pos); result = regcomp(&preg, regexp, REG_EXTENDED); if (result < 0) { @@ -421,22 +435,26 @@ static int config_delete(git_config_file *cfg, const char *name) diskfile_backend *b = (diskfile_backend *)cfg; char *key; int result; + khiter_t pos; if (normalize_name(name, &key) < 0) return -1; - var = git_hashtable_lookup(b->values, key); + pos = git_khash_str_lookup_index(b->values, key); git__free(key); - if (var == NULL) + if (!git_khash_str_valid_index(b->values, pos)) return GIT_ENOTFOUND; + var = git_khash_str_value_at(b->values, pos); + if (var->next != NULL) { giterr_set(GITERR_CONFIG, "Cannot delete multivar with a single delete"); return -1; } - git_hashtable_remove(b->values, var->key); + git_khash_str_delete_at(b->values, pos); + result = config_write(b, var->key, NULL, NULL); cvar_free(var); @@ -843,6 +861,7 @@ static int config_parse(diskfile_backend *cfg_file) cvar_t *var, *existing; git_buf buf = GIT_BUF_INIT; int result = 0; + khiter_t pos; /* Initialize the reading position */ cfg_file->reader.read_ptr = cfg_file->reader.buffer.ptr; @@ -895,10 +914,14 @@ static int config_parse(diskfile_backend *cfg_file) var->value = var_value; /* Add or append the new config option */ - existing = git_hashtable_lookup(cfg_file->values, var->key); - if (existing == NULL) { - result = git_hashtable_insert(cfg_file->values, var->key, var); + pos = git_khash_str_lookup_index(cfg_file->values, var->key); + if (!git_khash_str_valid_index(cfg_file->values, pos)) { + git_khash_str_insert(cfg_file->values, var->key, var, result); + if (result < 0) + break; + result = 0; } else { + existing = git_khash_str_value_at(cfg_file->values, pos); while (existing->next != NULL) { existing = existing->next; } diff --git a/src/khash.h b/src/khash.h index 1a28e1184..f9d239336 100644 --- a/src/khash.h +++ b/src/khash.h @@ -157,6 +157,19 @@ typedef khint_t khiter_t; #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) #endif +#ifndef kcalloc +#define kcalloc(N,Z) calloc(N,Z) +#endif +#ifndef kmalloc +#define kmalloc(Z) malloc(Z) +#endif +#ifndef krealloc +#define krealloc(P,Z) realloc(P,Z) +#endif +#ifndef kfree +#define kfree(P) free(P) +#endif + static const double __ac_HASH_UPPER = 0.77; #define __KHASH_TYPE(name, khkey_t, khval_t) \ @@ -167,27 +180,25 @@ static const double __ac_HASH_UPPER = 0.77; khval_t *vals; \ } kh_##name##_t; -#define KHASH_DECLARE(name, khkey_t, khval_t) \ - __KHASH_TYPE(name, khkey_t, khval_t) \ - extern kh_##name##_t *kh_init_##name(); \ +#define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \ + extern kh_##name##_t *kh_init_##name(void); \ extern void kh_destroy_##name(kh_##name##_t *h); \ extern void kh_clear_##name(kh_##name##_t *h); \ extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \ - extern void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \ + extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \ extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \ extern void kh_del_##name(kh_##name##_t *h, khint_t x); -#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ - __KHASH_TYPE(name, khkey_t, khval_t) \ - SCOPE kh_##name##_t *kh_init_##name() { \ - return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t)); \ +#define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + SCOPE kh_##name##_t *kh_init_##name(void) { \ + return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t)); \ } \ SCOPE void kh_destroy_##name(kh_##name##_t *h) \ { \ if (h) { \ - free(h->keys); free(h->flags); \ - free(h->vals); \ - free(h); \ + kfree(h->keys); kfree(h->flags); \ + kfree(h->vals); \ + kfree(h); \ } \ } \ SCOPE void kh_clear_##name(kh_##name##_t *h) \ @@ -211,7 +222,7 @@ static const double __ac_HASH_UPPER = 0.77; return __ac_iseither(h->flags, i)? h->n_buckets : i; \ } else return 0; \ } \ - SCOPE void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \ + SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \ { /* This function uses 0.25*n_bucktes bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \ khint32_t *new_flags = 0; \ khint_t j = 1; \ @@ -220,11 +231,18 @@ static const double __ac_HASH_UPPER = 0.77; if (new_n_buckets < 4) new_n_buckets = 4; \ if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \ else { /* hash table size to be changed (shrink or expand); rehash */ \ - new_flags = (khint32_t*)malloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ + new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ + if (!new_flags) return -1; \ memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ if (h->n_buckets < new_n_buckets) { /* expand */ \ - h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \ - if (kh_is_map) h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \ + khkey_t *new_keys = (khkey_t*)krealloc(h->keys, new_n_buckets * sizeof(khkey_t)); \ + if (!new_keys) return -1; \ + h->keys = new_keys; \ + if (kh_is_map) { \ + khval_t *new_vals = (khval_t*)krealloc(h->vals, new_n_buckets * sizeof(khval_t)); \ + if (!new_vals) return -1; \ + h->vals = new_vals; \ + } \ } /* otherwise shrink */ \ } \ } \ @@ -257,22 +275,28 @@ static const double __ac_HASH_UPPER = 0.77; } \ } \ if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \ - h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \ - if (kh_is_map) h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \ + h->keys = (khkey_t*)krealloc(h->keys, new_n_buckets * sizeof(khkey_t)); \ + if (kh_is_map) h->vals = (khval_t*)krealloc(h->vals, new_n_buckets * sizeof(khval_t)); \ } \ - free(h->flags); /* free the working space */ \ + kfree(h->flags); /* free the working space */ \ h->flags = new_flags; \ h->n_buckets = new_n_buckets; \ h->n_occupied = h->size; \ h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \ } \ + return 0; \ } \ SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \ { \ khint_t x; \ if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \ - if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); /* clear "deleted" elements */ \ - else kh_resize_##name(h, h->n_buckets + 1); /* expand the hash table */ \ + if (h->n_buckets > (h->size<<1)) { \ + if (kh_resize_##name(h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */ \ + *ret = -1; return h->n_buckets; \ + } \ + } else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the hash table */ \ + *ret = -1; return h->n_buckets; \ + } \ } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \ { \ khint_t inc, k, i, site, last, mask = h->n_buckets - 1; \ @@ -312,6 +336,14 @@ static const double __ac_HASH_UPPER = 0.77; } \ } +#define KHASH_DECLARE(name, khkey_t, khval_t) \ + __KHASH_TYPE(name, khkey_t, khval_t) \ + __KHASH_PROTOTYPES(name, khkey_t, khval_t) + +#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + __KHASH_TYPE(name, khkey_t, khval_t) \ + __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) + #define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ KHASH_INIT2(name, static inline, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) @@ -497,6 +529,34 @@ static inline khint_t __ac_Wang_hash(khint_t key) */ #define kh_n_buckets(h) ((h)->n_buckets) +/*! @function + @abstract Iterate over the entries in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @param kvar Variable to which key will be assigned + @param vvar Variable to which value will be assigned + @param code Block of code to execute + */ +#define kh_foreach(h, kvar, vvar, code) { khint_t __i; \ + for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ + if (!kh_exist(h,__i)) continue; \ + (kvar) = kh_key(h,__i); \ + (vvar) = kh_val(h,__i); \ + code; \ + } } + +/*! @function + @abstract Iterate over the values in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @param vvar Variable to which value will be assigned + @param code Block of code to execute + */ +#define kh_foreach_value(h, vvar, code) { khint_t __i; \ + for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ + if (!kh_exist(h,__i)) continue; \ + (vvar) = kh_val(h,__i); \ + code; \ + } } + /* More conenient interfaces */ /*! @function diff --git a/src/khash_oid.h b/src/khash_oid.h new file mode 100644 index 000000000..96d82c759 --- /dev/null +++ b/src/khash_oid.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2012 the libgit2 contributors + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_khash_oid_h__ +#define INCLUDE_khash_oid_h__ + +#include "common.h" +#include "git2/oid.h" + +#define kmalloc git__malloc +#define kcalloc git__calloc +#define krealloc git__realloc +#define kfree git__free +#include "khash.h" + +__KHASH_TYPE(oid, const git_oid *, void *); +typedef khash_t(oid) git_khash_oid; + +GIT_INLINE(khint_t) hash_git_oid(const git_oid *oid) +{ + int i; + khint_t h = 0; + for (i = 0; i < 20; ++i) + h = (h << 5) - h + oid->id[i]; + return h; +} + +GIT_INLINE(int) hash_git_oid_equal(const git_oid *a, const git_oid *b) +{ + return (memcmp(a->id, b->id, sizeof(a->id)) == 0); +} + +#define GIT_KHASH_OID__IMPLEMENTATION \ + __KHASH_IMPL(oid, static inline, const git_oid *, void *, 1, hash_git_oid, hash_git_oid_equal) + +#define git_khash_oid_alloc() kh_init(oid) +#define git_khash_oid_free(h) kh_destroy(oid,h), h = NULL + +#endif diff --git a/src/khash_str.h b/src/khash_str.h new file mode 100644 index 000000000..0b840d836 --- /dev/null +++ b/src/khash_str.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2012 the libgit2 contributors + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_khash_str_h__ +#define INCLUDE_khash_str_h__ + +#include "common.h" + +#define kmalloc git__malloc +#define kcalloc git__calloc +#define krealloc git__realloc +#define kfree git__free +#include "khash.h" + +__KHASH_TYPE(str, const char *, void *); +typedef khash_t(str) git_khash_str; + +#define GIT_KHASH_STR__IMPLEMENTATION \ + __KHASH_IMPL(str, static inline, const char *, void *, 1, kh_str_hash_func, kh_str_hash_equal) + +#define git_khash_str_alloc() kh_init(str) +#define git_khash_str_free(h) kh_destroy(str, h), h = NULL +#define git_khash_str_clear(h) kh_clear(str, h) + +#define git_khash_str_num_entries(h) kh_size(h) + +#define git_khash_str_lookup_index(h, k) kh_get(str, h, k) +#define git_khash_str_valid_index(h, idx) (idx != kh_end(h)) + +#define git_khash_str_exists(h, k) (kh_get(str, h, k) != kh_end(h)) + +#define git_khash_str_value_at(h, idx) kh_val(h, idx) +#define git_khash_str_set_value_at(h, idx, v) kh_val(h, idx) = v +#define git_khash_str_delete_at(h, idx) kh_del(str, h, idx) + +#define git_khash_str_insert(h, key, val, err) do { \ + khiter_t __pos = kh_put(str, h, key, &err); \ + if (err >= 0) kh_val(h, __pos) = val; \ + } while (0) + +#define git_khash_str_insert2(h, key, val, old, err) do { \ + khiter_t __pos = kh_put(str, h, key, &err); \ + if (err >= 0) { \ + old = (err == 0) ? kh_val(h, __pos) : NULL; \ + kh_val(h, __pos) = val; \ + } } while (0) + +#define git_khash_str_foreach kh_foreach +#define git_khash_str_foreach_value kh_foreach_value + +#endif diff --git a/src/refs.c b/src/refs.c index 447f3a7b6..7050b4af9 100644 --- a/src/refs.c +++ b/src/refs.c @@ -15,6 +15,8 @@ #include #include +GIT_KHASH_STR__IMPLEMENTATION; + #define DEFAULT_NESTING_LEVEL 5 #define MAX_NESTING_LEVEL 10 @@ -30,8 +32,6 @@ struct packref { char name[GIT_FLEX_ARRAY]; }; -static const int default_table_size = 32; - static int reference_read( git_buf *file_content, time_t *mtime, @@ -423,9 +423,7 @@ static int packed_load(git_repository *repo) /* First we make sure we have allocated the hash table */ if (ref_cache->packfile == NULL) { - ref_cache->packfile = git_hashtable_alloc( - default_table_size, git_hash__strhash_cb, git_hash__strcmp_cb); - + ref_cache->packfile = git_khash_str_alloc(); GITERR_CHECK_ALLOC(ref_cache->packfile); } @@ -440,7 +438,7 @@ static int packed_load(git_repository *repo) * refresh the packed refs. */ if (result == GIT_ENOTFOUND) { - git_hashtable_clear(ref_cache->packfile); + git_khash_str_clear(ref_cache->packfile); return 0; } @@ -454,7 +452,7 @@ static int packed_load(git_repository *repo) * At this point, we want to refresh the packed refs. We already * have the contents in our buffer. */ - git_hashtable_clear(ref_cache->packfile); + git_khash_str_clear(ref_cache->packfile); buffer_start = (const char *)packfile.ptr; buffer_end = (const char *)(buffer_start) + packfile.size; @@ -468,6 +466,7 @@ static int packed_load(git_repository *repo) } while (buffer_start < buffer_end) { + int err; struct packref *ref = NULL; if (packed_parse_oid(&ref, &buffer_start, buffer_end) < 0) @@ -478,15 +477,16 @@ static int packed_load(git_repository *repo) goto parse_failed; } - if (git_hashtable_insert(ref_cache->packfile, ref->name, ref) < 0) - return -1; + git_khash_str_insert(ref_cache->packfile, ref->name, ref, err); + if (err < 0) + goto parse_failed; } git_buf_free(&packfile); return 0; parse_failed: - git_hashtable_free(ref_cache->packfile); + git_khash_str_free(ref_cache->packfile); ref_cache->packfile = NULL; git_buf_free(&packfile); return -1; @@ -512,7 +512,7 @@ static int _dirent_loose_listall(void *_data, git_buf *full_path) /* do not add twice a reference that exists already in the packfile */ if ((data->list_flags & GIT_REF_PACKED) != 0 && - git_hashtable_lookup(data->repo->references.packfile, file_path) != NULL) + git_khash_str_exists(data->repo->references.packfile, file_path)) return 0; if (data->list_flags != GIT_REF_LISTALL) { @@ -529,6 +529,7 @@ static int _dirent_loose_load(void *data, git_buf *full_path) void *old_ref = NULL; struct packref *ref; const char *file_path; + int err; if (git_path_isdir(full_path->ptr) == true) return git_path_direach(full_path, _dirent_loose_load, repository); @@ -538,8 +539,9 @@ static int _dirent_loose_load(void *data, git_buf *full_path) if (loose_lookup_to_packfile(&ref, repository, file_path) < 0) return -1; - if (git_hashtable_insert2(repository->references.packfile, - ref->name, ref, &old_ref) < 0) { + git_khash_str_insert2( + repository->references.packfile, ref->name, ref, old_ref, err); + if (err < 0) { git__free(ref); return -1; } @@ -734,7 +736,8 @@ static int packed_write(git_repository *repo) assert(repo && repo->references.packfile); - total_refs = (unsigned int)repo->references.packfile->key_count; + total_refs = + (unsigned int)git_khash_str_num_entries(repo->references.packfile); if (git_vector_init(&packing_list, total_refs, packed_sort) < 0) return -1; @@ -743,10 +746,10 @@ static int packed_write(git_repository *repo) { struct packref *reference; - GIT_HASHTABLE_FOREACH_VALUE(repo->references.packfile, reference, - /* cannot fail: vector already has the right size */ + /* cannot fail: vector already has the right size */ + git_khash_str_foreach_value(repo->references.packfile, reference, { git_vector_insert(&packing_list, reference); - ); + }); } /* sort the vector so the entries appear sorted on the packfile */ @@ -870,7 +873,8 @@ static int reference_exists(int *exists, git_repository *repo, const char *ref_n return -1; if (git_path_isfile(ref_path.ptr) == true || - git_hashtable_lookup(repo->references.packfile, ref_path.ptr) != NULL) { + git_khash_str_exists(repo->references.packfile, ref_path.ptr)) + { *exists = 1; } else { *exists = 0; @@ -936,6 +940,8 @@ static int reference_can_write( static int packed_lookup(git_reference *ref) { struct packref *pack_ref = NULL; + git_khash_str *packfile_refs; + khiter_t pos; if (packed_load(ref->owner) < 0) return -1; @@ -952,12 +958,15 @@ static int packed_lookup(git_reference *ref) } /* Look up on the packfile */ - pack_ref = git_hashtable_lookup(ref->owner->references.packfile, ref->name); - if (pack_ref == NULL) { + packfile_refs = ref->owner->references.packfile; + pos = git_khash_str_lookup_index(packfile_refs, ref->name); + if (!git_khash_str_valid_index(packfile_refs, pos)) { giterr_set(GITERR_REFERENCE, "Reference '%s' not found", ref->name); return GIT_ENOTFOUND; } + pack_ref = git_khash_str_value_at(packfile_refs, pos); + ref->flags = GIT_REF_OID | GIT_REF_PACKED; ref->mtime = ref->owner->references.packfile_time; git_oid_cpy(&ref->target.oid, &pack_ref->oid); @@ -1002,18 +1011,25 @@ static int reference_delete(git_reference *ref) * We need to reload the packfile, remove the reference from the * packing list, and repack */ if (ref->flags & GIT_REF_PACKED) { + git_khash_str *packfile_refs; struct packref *packref; + khiter_t pos; + /* load the existing packfile */ if (packed_load(ref->owner) < 0) return -1; - if (git_hashtable_remove2(ref->owner->references.packfile, - ref->name, (void **) &packref) < 0) { + packfile_refs = ref->owner->references.packfile; + pos = git_khash_str_lookup_index(packfile_refs, ref->name); + if (!git_khash_str_valid_index(packfile_refs, pos)) { giterr_set(GITERR_REFERENCE, "Reference %s stopped existing in the packfile", ref->name); return -1; } + packref = git_khash_str_value_at(packfile_refs, pos); + git_khash_str_delete_at(packfile_refs, pos); + git__free(packref); if (packed_write(ref->owner) < 0) return -1; @@ -1467,14 +1483,15 @@ int git_reference_foreach( /* list all the packed references first */ if (list_flags & GIT_REF_PACKED) { const char *ref_name; + void *ref; if (packed_load(repo) < 0) return -1; - GIT_HASHTABLE_FOREACH_KEY(repo->references.packfile, ref_name, + git_khash_str_foreach(repo->references.packfile, ref_name, ref, { if (callback(ref_name, payload) < 0) return 0; - ); + }); } /* now list the loose references, trying not to @@ -1538,10 +1555,11 @@ void git_repository__refcache_free(git_refcache *refs) if (refs->packfile) { struct packref *reference; - GIT_HASHTABLE_FOREACH_VALUE( - refs->packfile, reference, git__free(reference)); + git_khash_str_foreach_value(refs->packfile, reference, { + git__free(reference); + }); - git_hashtable_free(refs->packfile); + git_khash_str_free(refs->packfile); } } diff --git a/src/refs.h b/src/refs.h index e4a225ca3..39648e6d9 100644 --- a/src/refs.h +++ b/src/refs.h @@ -10,7 +10,7 @@ #include "common.h" #include "git2/oid.h" #include "git2/refs.h" -#include "hashtable.h" +#include "khash_str.h" #define GIT_REFS_DIR "refs/" #define GIT_REFS_HEADS_DIR GIT_REFS_DIR "heads/" @@ -46,7 +46,7 @@ struct git_reference { }; typedef struct { - git_hashtable *packfile; + git_khash_str *packfile; time_t packfile_time; } git_refcache; diff --git a/src/repository.h b/src/repository.h index 178f29742..f53fa697e 100644 --- a/src/repository.h +++ b/src/repository.h @@ -13,13 +13,13 @@ #include "git2/repository.h" #include "git2/object.h" -#include "hashtable.h" #include "index.h" #include "cache.h" #include "refs.h" #include "buffer.h" #include "odb.h" #include "attr.h" +#include "khash_str.h" #define DOT_GIT ".git" #define GIT_DIR DOT_GIT "/" @@ -83,7 +83,7 @@ struct git_repository { git_cache objects; git_refcache references; git_attr_cache attrcache; - git_hashtable *submodules; + git_khash_str *submodules; char *path_repository; char *workdir; diff --git a/src/revwalk.c b/src/revwalk.c index 557966b94..5867e133e 100644 --- a/src/revwalk.c +++ b/src/revwalk.c @@ -8,15 +8,17 @@ #include "common.h" #include "commit.h" #include "odb.h" -#include "hashtable.h" #include "pqueue.h" #include "pool.h" +#include "khash_oid.h" #include "git2/revwalk.h" #include "git2/merge.h" #include +GIT_KHASH_OID__IMPLEMENTATION; + #define PARENT1 (1 << 0) #define PARENT2 (1 << 1) #define RESULT (1 << 2) @@ -46,7 +48,7 @@ struct git_revwalk { git_repository *repo; git_odb *odb; - git_hashtable *commits; + git_khash_oid *commits; git_pool commit_pool; commit_list *iterator_topo; @@ -123,15 +125,6 @@ static commit_object *commit_list_pop(commit_list **stack) return item; } -static uint32_t object_table_hash(const void *key, int hash_id) -{ - uint32_t r; - const git_oid *id = key; - - memcpy(&r, id->id + (hash_id * sizeof(uint32_t)), sizeof(r)); - return r; -} - #define PARENTS_PER_COMMIT 2 #define COMMIT_ALLOC \ (sizeof(commit_object) + PARENTS_PER_COMMIT * sizeof(commit_object *)) @@ -155,9 +148,13 @@ static commit_object **alloc_parents( static commit_object *commit_lookup(git_revwalk *walk, const git_oid *oid) { commit_object *commit; + khiter_t pos; + int ret; - if ((commit = git_hashtable_lookup(walk->commits, oid)) != NULL) - return commit; + /* lookup and reserve space if not already present */ + pos = kh_get(oid, walk->commits, oid); + if (pos != kh_end(walk->commits)) + return kh_value(walk->commits, pos); commit = alloc_commit(walk); if (commit == NULL) @@ -165,8 +162,9 @@ static commit_object *commit_lookup(git_revwalk *walk, const git_oid *oid) git_oid_cpy(&commit->oid, oid); - if (git_hashtable_insert(walk->commits, &commit->oid, commit) < 0) - return NULL; + pos = kh_put(oid, walk->commits, &commit->oid, &ret); + assert(ret != 0); + kh_value(walk->commits, pos) = commit; return commit; } @@ -728,9 +726,7 @@ int git_revwalk_new(git_revwalk **revwalk_out, git_repository *repo) memset(walk, 0x0, sizeof(git_revwalk)); - walk->commits = git_hashtable_alloc(64, - object_table_hash, - (git_hash_keyeq_ptr)git_oid_cmp); + walk->commits = git_khash_oid_alloc(); GITERR_CHECK_ALLOC(walk->commits); if (git_pqueue_init(&walk->iterator_time, 8, commit_time_cmp) < 0 || @@ -761,7 +757,7 @@ void git_revwalk_free(git_revwalk *walk) git_revwalk_reset(walk); git_odb_free(walk->odb); - git_hashtable_free(walk->commits); + git_khash_oid_free(walk->commits); git_pool_clear(&walk->commit_pool); git_pqueue_free(&walk->iterator_time); git_vector_free(&walk->twos); @@ -823,12 +819,12 @@ void git_revwalk_reset(git_revwalk *walk) assert(walk); - GIT_HASHTABLE_FOREACH_VALUE(walk->commits, commit, + kh_foreach_value(walk->commits, commit, { commit->seen = 0; commit->in_degree = 0; commit->topo_delay = 0; commit->uninteresting = 0; - ); + }); git_pqueue_clear(&walk->iterator_time); commit_list_free(&walk->iterator_topo); diff --git a/src/submodule.c b/src/submodule.c index 907e43e88..8072053af 100644 --- a/src/submodule.c +++ b/src/submodule.c @@ -12,7 +12,6 @@ #include "git2/index.h" #include "git2/submodule.h" #include "buffer.h" -#include "hashtable.h" #include "vector.h" #include "posix.h" #include "config_file.h" @@ -32,41 +31,32 @@ static git_cvar_map _sm_ignore_map[] = { {GIT_CVAR_STRING, "none", GIT_SUBMODULE_IGNORE_NONE} }; -static uint32_t strhash_no_trailing_slash(const void *key, int hash_id) +static inline khint_t str_hash_no_trailing_slash(const char *s) { - static uint32_t hash_seeds[GIT_HASHTABLE_HASHES] = { - 0x01010101, - 0x12345678, - 0xFEDCBA98 - }; + khint_t h; - size_t key_len = key ? strlen((const char *)key) : 0; - if (key_len > 0 && ((const char *)key)[key_len - 1] == '/') - key_len--; + for (h = 0; *s; ++s) + if (s[1] || *s != '/') + h = (h << 5) - h + *s; - return git__hash(key, (int)key_len, hash_seeds[hash_id]); + return h; } -static int strcmp_no_trailing_slash(const void *a, const void *b) +static inline int str_equal_no_trailing_slash(const char *a, const char *b) { - const char *astr = (const char *)a; - const char *bstr = (const char *)b; - size_t alen = a ? strlen(astr) : 0; - size_t blen = b ? strlen(bstr) : 0; - int cmp; + size_t alen = a ? strlen(a) : 0; + size_t blen = b ? strlen(b) : 0; - if (alen > 0 && astr[alen - 1] == '/') + if (alen && a[alen] == '/') alen--; - if (blen > 0 && bstr[blen - 1] == '/') + if (blen && b[blen] == '/') blen--; - cmp = strncmp(astr, bstr, min(alen, blen)); - if (cmp == 0) - cmp = (alen < blen) ? -1 : (alen > blen) ? 1 : 0; - - return cmp; + return (alen == blen && strncmp(a, b, alen) == 0); } +__KHASH_IMPL(str, static inline, const char *, void *, 1, str_hash_no_trailing_slash, str_equal_no_trailing_slash); + static git_submodule *submodule_alloc(const char *name) { git_submodule *sm = git__calloc(1, sizeof(git_submodule)); @@ -99,13 +89,18 @@ static void submodule_release(git_submodule *sm, int decr) } static int submodule_from_entry( - git_hashtable *smcfg, git_index_entry *entry) + git_khash_str *smcfg, git_index_entry *entry) { git_submodule *sm; void *old_sm; + khiter_t pos; + int error; - sm = git_hashtable_lookup(smcfg, entry->path); - if (!sm) + pos = git_khash_str_lookup_index(smcfg, entry->path); + + if (git_khash_str_valid_index(smcfg, pos)) + sm = git_khash_str_value_at(smcfg, pos); + else sm = submodule_alloc(entry->path); git_oid_cpy(&sm->oid, &entry->oid); @@ -120,7 +115,8 @@ static int submodule_from_entry( goto fail; } - if (git_hashtable_insert2(smcfg, sm->path, sm, &old_sm) < 0) + git_khash_str_insert2(smcfg, sm->path, sm, old_sm, error); + if (error < 0) goto fail; sm->refcount++; @@ -139,13 +135,15 @@ fail: static int submodule_from_config( const char *key, const char *value, void *data) { - git_hashtable *smcfg = data; + git_khash_str *smcfg = data; const char *namestart; const char *property; git_buf name = GIT_BUF_INIT; git_submodule *sm; void *old_sm = NULL; bool is_path; + khiter_t pos; + int error; if (git__prefixcmp(key, "submodule.") != 0) return 0; @@ -160,32 +158,40 @@ static int submodule_from_config( if (git_buf_set(&name, namestart, property - namestart - 1) < 0) return -1; - sm = git_hashtable_lookup(smcfg, name.ptr); - if (!sm && is_path) - sm = git_hashtable_lookup(smcfg, value); - if (!sm) + pos = git_khash_str_lookup_index(smcfg, name.ptr); + if (!git_khash_str_valid_index(smcfg, pos) && is_path) + pos = git_khash_str_lookup_index(smcfg, value); + if (!git_khash_str_valid_index(smcfg, pos)) sm = submodule_alloc(name.ptr); + else + sm = git_khash_str_value_at(smcfg, pos); if (!sm) goto fail; if (strcmp(sm->name, name.ptr) != 0) { assert(sm->path == sm->name); sm->name = git_buf_detach(&name); - if (git_hashtable_insert2(smcfg, sm->name, sm, &old_sm) < 0) + + git_khash_str_insert2(smcfg, sm->name, sm, old_sm, error); + if (error < 0) goto fail; sm->refcount++; } else if (is_path && strcmp(sm->path, value) != 0) { assert(sm->path == sm->name); - if ((sm->path = git__strdup(value)) == NULL || - git_hashtable_insert2(smcfg, sm->path, sm, &old_sm) < 0) + sm->path = git__strdup(value); + if (sm->path == NULL) + goto fail; + + git_khash_str_insert2(smcfg, sm->path, sm, old_sm, error); + if (error < 0) goto fail; sm->refcount++; } git_buf_free(&name); if (old_sm && ((git_submodule *)old_sm) != sm) { - /* TODO: log entry about multiple submodules with same path */ + /* TODO: log warning about multiple submodules with same path */ submodule_release(old_sm, 1); } @@ -241,7 +247,7 @@ static int load_submodule_config(git_repository *repo) git_index *index; unsigned int i, max_i; git_oid gitmodules_oid; - git_hashtable *smcfg; + git_khash_str *smcfg; struct git_config_file *mods = NULL; if (repo->submodules) @@ -251,8 +257,7 @@ static int load_submodule_config(git_repository *repo) * under both its name and its path. These are usually the same, but * that is not guaranteed. */ - smcfg = git_hashtable_alloc( - 4, strhash_no_trailing_slash, strcmp_no_trailing_slash); + smcfg = git_khash_str_alloc(); GITERR_CHECK_ALLOC(smcfg); /* scan index for gitmodules (and .gitmodules entry) */ @@ -302,13 +307,13 @@ cleanup: if (mods != NULL) git_config_file_free(mods); if (error) - git_hashtable_free(smcfg); + git_khash_str_free(smcfg); return error; } void git_submodule_config_free(git_repository *repo) { - git_hashtable *smcfg = repo->submodules; + git_khash_str *smcfg = repo->submodules; git_submodule *sm; repo->submodules = NULL; @@ -316,8 +321,10 @@ void git_submodule_config_free(git_repository *repo) if (smcfg == NULL) return; - GIT_HASHTABLE_FOREACH_VALUE(smcfg, sm, { submodule_release(sm,1); }); - git_hashtable_free(smcfg); + git_khash_str_foreach_value(smcfg, sm, { + submodule_release(sm,1); + }); + git_khash_str_free(smcfg); } static int submodule_cmp(const void *a, const void *b) @@ -338,19 +345,18 @@ int git_submodule_foreach( if ((error = load_submodule_config(repo)) < 0) return error; - GIT_HASHTABLE_FOREACH_VALUE( - repo->submodules, sm, { - /* usually the following will not come into play */ - if (sm->refcount > 1) { - if (git_vector_bsearch(&seen, sm) != GIT_ENOTFOUND) - continue; - if ((error = git_vector_insert(&seen, sm)) < 0) - break; - } - - if ((error = callback(sm->name, payload)) < 0) + git_khash_str_foreach_value(repo->submodules, sm, { + /* usually the following will not come into play */ + if (sm->refcount > 1) { + if (git_vector_bsearch(&seen, sm) != GIT_ENOTFOUND) + continue; + if ((error = git_vector_insert(&seen, sm)) < 0) break; - }); + } + + if ((error = callback(sm->name, payload)) < 0) + break; + }); git_vector_free(&seen); @@ -362,15 +368,17 @@ int git_submodule_lookup( git_repository *repo, const char *name) /* trailing slash is allowed */ { - git_submodule *sm; + khiter_t pos; if (load_submodule_config(repo) < 0) return -1; - sm = git_hashtable_lookup(repo->submodules, name); + pos = git_khash_str_lookup_index(repo->submodules, name); + if (!git_khash_str_valid_index(repo->submodules, pos)) + return GIT_ENOTFOUND; if (sm_ptr) - *sm_ptr = sm; + *sm_ptr = git_khash_str_value_at(repo->submodules, pos); - return sm ? 0 : GIT_ENOTFOUND; + return 0; } From c2b670436f4cc8901811ae0348f3c56150d1ccd5 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Wed, 25 Apr 2012 15:20:28 -0700 Subject: [PATCH 8/8] Rename git_khash_str to git_strmap, etc. This renamed `git_khash_str` to `git_strmap`, `git_hash_oid` to `git_oidmap`, and deletes `git_hashtable` from the tree, plus adds unit tests for `git_strmap`. --- src/attr.c | 48 ++--- src/attr.h | 6 +- src/attr_file.h | 1 - src/config_cache.c | 1 - src/config_file.c | 58 +++--- src/hashtable.c | 328 ---------------------------------- src/hashtable.h | 102 ----------- src/khash_str.h | 54 ------ src/{khash_oid.h => oidmap.h} | 12 +- src/refs.c | 46 ++--- src/refs.h | 4 +- src/repository.h | 4 +- src/revwalk.c | 10 +- src/strmap.h | 54 ++++++ src/submodule.c | 46 ++--- tests-clar/core/strmap.c | 102 +++++++++++ tests/t07-hashtable.c | 189 -------------------- tests/test_main.c | 2 - 18 files changed, 273 insertions(+), 794 deletions(-) delete mode 100644 src/hashtable.c delete mode 100644 src/hashtable.h delete mode 100644 src/khash_str.h rename src/{khash_oid.h => oidmap.h} (77%) create mode 100644 src/strmap.h create mode 100644 tests-clar/core/strmap.c delete mode 100644 tests/t07-hashtable.c diff --git a/src/attr.c b/src/attr.c index 1d7f3aa22..3e3a7e749 100644 --- a/src/attr.c +++ b/src/attr.c @@ -3,7 +3,7 @@ #include "config.h" #include -GIT_KHASH_STR__IMPLEMENTATION; +GIT__USE_STRMAP; static int collect_attr_files( git_repository *repo, const char *path, git_vector *files); @@ -126,14 +126,14 @@ int git_attr_foreach( git_attr_file *file; git_attr_rule *rule; git_attr_assignment *assign; - git_khash_str *seen = NULL; + git_strmap *seen = NULL; if ((error = git_attr_path__init( &path, pathname, git_repository_workdir(repo))) < 0 || (error = collect_attr_files(repo, pathname, &files)) < 0) return error; - seen = git_khash_str_alloc(); + seen = git_strmap_alloc(); GITERR_CHECK_ALLOC(seen); git_vector_foreach(&files, i, file) { @@ -142,10 +142,10 @@ int git_attr_foreach( git_vector_foreach(&rule->assigns, k, assign) { /* skip if higher priority assignment was already seen */ - if (git_khash_str_exists(seen, assign->name)) + if (git_strmap_exists(seen, assign->name)) continue; - git_khash_str_insert(seen, assign->name, assign, error); + git_strmap_insert(seen, assign->name, assign, error); if (error >= 0) error = callback(assign->name, assign->value, payload); @@ -156,7 +156,7 @@ int git_attr_foreach( } cleanup: - git_khash_str_free(seen); + git_strmap_free(seen); git_vector_free(&files); return error; @@ -200,12 +200,12 @@ int git_attr_add_macro( bool git_attr_cache__is_cached(git_repository *repo, const char *path) { const char *cache_key = path; - git_khash_str *files = git_repository_attr_cache(repo)->files; + git_strmap *files = git_repository_attr_cache(repo)->files; if (repo && git__prefixcmp(cache_key, git_repository_workdir(repo)) == 0) cache_key += strlen(git_repository_workdir(repo)); - return git_khash_str_exists(files, cache_key); + return git_strmap_exists(files, cache_key); } int git_attr_cache__lookup_or_create_file( @@ -220,9 +220,9 @@ int git_attr_cache__lookup_or_create_file( git_attr_file *file = NULL; khiter_t pos; - pos = git_khash_str_lookup_index(cache->files, key); - if (git_khash_str_valid_index(cache->files, pos)) { - *file_ptr = git_khash_str_value_at(cache->files, pos); + pos = git_strmap_lookup_index(cache->files, key); + if (git_strmap_valid_index(cache->files, pos)) { + *file_ptr = git_strmap_value_at(cache->files, pos); return 0; } @@ -240,7 +240,7 @@ int git_attr_cache__lookup_or_create_file( error = git_attr_file__set_path(repo, key, file); if (!error) { - git_khash_str_insert(cache->files, file->path, file, error); + git_strmap_insert(cache->files, file->path, file, error); if (error > 0) error = 0; } @@ -383,13 +383,13 @@ int git_attr_cache__init(git_repository *repo) /* allocate hashtable for attribute and ignore file contents */ if (cache->files == NULL) { - cache->files = git_khash_str_alloc(); + cache->files = git_strmap_alloc(); GITERR_CHECK_ALLOC(cache->files); } /* allocate hashtable for attribute macros */ if (cache->macros == NULL) { - cache->macros = git_khash_str_alloc(); + cache->macros = git_strmap_alloc(); GITERR_CHECK_ALLOC(cache->macros); } @@ -416,21 +416,21 @@ void git_attr_cache_flush( if (cache->files != NULL) { git_attr_file *file; - git_khash_str_foreach_value(cache->files, file, { + git_strmap_foreach_value(cache->files, file, { git_attr_file__free(file); }); - git_khash_str_free(cache->files); + git_strmap_free(cache->files); } if (cache->macros != NULL) { git_attr_rule *rule; - git_khash_str_foreach_value(cache->macros, rule, { + git_strmap_foreach_value(cache->macros, rule, { git_attr_rule__free(rule); }); - git_khash_str_free(cache->macros); + git_strmap_free(cache->macros); } git_pool_clear(&cache->pool); @@ -440,28 +440,28 @@ void git_attr_cache_flush( int git_attr_cache__insert_macro(git_repository *repo, git_attr_rule *macro) { - git_khash_str *macros = git_repository_attr_cache(repo)->macros; + git_strmap *macros = git_repository_attr_cache(repo)->macros; int error; /* TODO: generate warning log if (macro->assigns.length == 0) */ if (macro->assigns.length == 0) return 0; - git_khash_str_insert(macros, macro->match.pattern, macro, error); + git_strmap_insert(macros, macro->match.pattern, macro, error); return (error < 0) ? -1 : 0; } git_attr_rule *git_attr_cache__lookup_macro( git_repository *repo, const char *name) { - git_khash_str *macros = git_repository_attr_cache(repo)->macros; + git_strmap *macros = git_repository_attr_cache(repo)->macros; khiter_t pos; - pos = git_khash_str_lookup_index(macros, name); + pos = git_strmap_lookup_index(macros, name); - if (!git_khash_str_valid_index(macros, pos)) + if (!git_strmap_valid_index(macros, pos)) return NULL; - return (git_attr_rule *)git_khash_str_value_at(macros, pos); + return (git_attr_rule *)git_strmap_value_at(macros, pos); } diff --git a/src/attr.h b/src/attr.h index 75e98607f..43caf1b81 100644 --- a/src/attr.h +++ b/src/attr.h @@ -8,7 +8,7 @@ #define INCLUDE_attr_h__ #include "attr_file.h" -#include "khash_str.h" +#include "strmap.h" #define GIT_ATTR_CONFIG "core.attributesfile" #define GIT_IGNORE_CONFIG "core.excludesfile" @@ -16,8 +16,8 @@ typedef struct { int initialized; git_pool pool; - git_khash_str *files; /* hash path to git_attr_file of rules */ - git_khash_str *macros; /* hash name to vector */ + git_strmap *files; /* hash path to git_attr_file of rules */ + git_strmap *macros; /* hash name to vector */ const char *cfg_attr_file; /* cached value of core.attributesfile */ const char *cfg_excl_file; /* cached value of core.excludesfile */ } git_attr_cache; diff --git a/src/attr_file.h b/src/attr_file.h index 9788a2295..677534158 100644 --- a/src/attr_file.h +++ b/src/attr_file.h @@ -9,7 +9,6 @@ #include "git2/attr.h" #include "vector.h" -#include "hashtable.h" #include "pool.h" #define GIT_ATTR_FILE ".gitattributes" diff --git a/src/config_cache.c b/src/config_cache.c index 5e20847f5..3679a9646 100644 --- a/src/config_cache.c +++ b/src/config_cache.c @@ -7,7 +7,6 @@ #include "common.h" #include "fileops.h" -#include "hashtable.h" #include "config.h" #include "git2/config.h" #include "vector.h" diff --git a/src/config_file.c b/src/config_file.c index a0ce329fc..7841ea00f 100644 --- a/src/config_file.c +++ b/src/config_file.c @@ -12,13 +12,13 @@ #include "buffer.h" #include "git2/config.h" #include "git2/types.h" -#include "khash_str.h" +#include "strmap.h" #include #include #include -GIT_KHASH_STR__IMPLEMENTATION; +GIT__USE_STRMAP; typedef struct cvar_t { struct cvar_t *next; @@ -72,7 +72,7 @@ typedef struct { typedef struct { git_config_file parent; - git_khash_str *values; + git_strmap *values; struct { git_buf buffer; @@ -132,21 +132,21 @@ static int normalize_name(const char *in, char **out) return 0; } -static void free_vars(git_khash_str *values) +static void free_vars(git_strmap *values) { cvar_t *var = NULL; if (values == NULL) return; - git_khash_str_foreach_value(values, var, + git_strmap_foreach_value(values, var, while (var != NULL) { cvar_t *next = CVAR_LIST_NEXT(var); cvar_free(var); var = next; }); - git_khash_str_free(values); + git_strmap_free(values); } static int config_open(git_config_file *cfg) @@ -154,7 +154,7 @@ static int config_open(git_config_file *cfg) int res; diskfile_backend *b = (diskfile_backend *)cfg; - b->values = git_khash_str_alloc(); + b->values = git_strmap_alloc(); GITERR_CHECK_ALLOC(b->values); git_buf_init(&b->reader.buffer, 0); @@ -196,7 +196,7 @@ static int file_foreach(git_config_file *backend, int (*fn)(const char *, const if (!b->values) return 0; - git_khash_str_foreach(b->values, key, var, + git_strmap_foreach(b->values, key, var, do { if (fn(key, var->value, data) < 0) break; @@ -223,9 +223,9 @@ static int config_set(git_config_file *cfg, const char *name, const char *value) * Try to find it in the existing values and update it if it * only has one value. */ - pos = git_khash_str_lookup_index(b->values, key); - if (git_khash_str_valid_index(b->values, pos)) { - cvar_t *existing = git_khash_str_value_at(b->values, pos); + pos = git_strmap_lookup_index(b->values, key); + if (git_strmap_valid_index(b->values, pos)) { + cvar_t *existing = git_strmap_value_at(b->values, pos); char *tmp = NULL; git__free(key); @@ -258,7 +258,7 @@ static int config_set(git_config_file *cfg, const char *name, const char *value) GITERR_CHECK_ALLOC(var->value); } - git_khash_str_insert2(b->values, key, var, old_var, rval); + git_strmap_insert2(b->values, key, var, old_var, rval); if (rval < 0) return -1; if (old_var != NULL) @@ -284,14 +284,14 @@ static int config_get(git_config_file *cfg, const char *name, const char **out) if (normalize_name(name, &key) < 0) return -1; - pos = git_khash_str_lookup_index(b->values, key); + pos = git_strmap_lookup_index(b->values, key); git__free(key); /* no error message; the config system will write one */ - if (!git_khash_str_valid_index(b->values, pos)) + if (!git_strmap_valid_index(b->values, pos)) return GIT_ENOTFOUND; - *out = ((cvar_t *)git_khash_str_value_at(b->values, pos))->value; + *out = ((cvar_t *)git_strmap_value_at(b->values, pos))->value; return 0; } @@ -311,13 +311,13 @@ static int config_get_multivar( if (normalize_name(name, &key) < 0) return -1; - pos = git_khash_str_lookup_index(b->values, key); + pos = git_strmap_lookup_index(b->values, key); git__free(key); - if (!git_khash_str_valid_index(b->values, pos)) + if (!git_strmap_valid_index(b->values, pos)) return GIT_ENOTFOUND; - var = git_khash_str_value_at(b->values, pos); + var = git_strmap_value_at(b->values, pos); if (regex_str != NULL) { regex_t regex; @@ -374,13 +374,13 @@ static int config_set_multivar( if (normalize_name(name, &key) < 0) return -1; - pos = git_khash_str_lookup_index(b->values, key); - if (!git_khash_str_valid_index(b->values, pos)) { + pos = git_strmap_lookup_index(b->values, key); + if (!git_strmap_valid_index(b->values, pos)) { git__free(key); return GIT_ENOTFOUND; } - var = git_khash_str_value_at(b->values, pos); + var = git_strmap_value_at(b->values, pos); result = regcomp(&preg, regexp, REG_EXTENDED); if (result < 0) { @@ -440,20 +440,20 @@ static int config_delete(git_config_file *cfg, const char *name) if (normalize_name(name, &key) < 0) return -1; - pos = git_khash_str_lookup_index(b->values, key); + pos = git_strmap_lookup_index(b->values, key); git__free(key); - if (!git_khash_str_valid_index(b->values, pos)) + if (!git_strmap_valid_index(b->values, pos)) return GIT_ENOTFOUND; - var = git_khash_str_value_at(b->values, pos); + var = git_strmap_value_at(b->values, pos); if (var->next != NULL) { giterr_set(GITERR_CONFIG, "Cannot delete multivar with a single delete"); return -1; } - git_khash_str_delete_at(b->values, pos); + git_strmap_delete_at(b->values, pos); result = config_write(b, var->key, NULL, NULL); @@ -914,14 +914,14 @@ static int config_parse(diskfile_backend *cfg_file) var->value = var_value; /* Add or append the new config option */ - pos = git_khash_str_lookup_index(cfg_file->values, var->key); - if (!git_khash_str_valid_index(cfg_file->values, pos)) { - git_khash_str_insert(cfg_file->values, var->key, var, result); + pos = git_strmap_lookup_index(cfg_file->values, var->key); + if (!git_strmap_valid_index(cfg_file->values, pos)) { + git_strmap_insert(cfg_file->values, var->key, var, result); if (result < 0) break; result = 0; } else { - existing = git_khash_str_value_at(cfg_file->values, pos); + existing = git_strmap_value_at(cfg_file->values, pos); while (existing->next != NULL) { existing = existing->next; } diff --git a/src/hashtable.c b/src/hashtable.c deleted file mode 100644 index e2f131cf1..000000000 --- a/src/hashtable.c +++ /dev/null @@ -1,328 +0,0 @@ -/* - * Copyright (C) 2009-2012 the libgit2 contributors - * - * This file is part of libgit2, distributed under the GNU GPL v2 with - * a Linking Exception. For full terms see the included COPYING file. - */ - -#include "common.h" -#include "repository.h" -#include "commit.h" - -#define MAX_LOOPS 5 -static const double max_load_factor = 0.65; - -static int resize_to(git_hashtable *self, size_t new_size); -static int set_size(git_hashtable *self, size_t new_size); -static git_hashtable_node *node_with_hash(git_hashtable *self, const void *key, int hash_id); -static void node_swap_with(git_hashtable_node *self, git_hashtable_node *other); -static int node_insert(git_hashtable *self, git_hashtable_node *new_node); -static int insert_nodes(git_hashtable *self, git_hashtable_node *old_nodes, size_t old_size); -static void reinsert_stash(git_hashtable *self); - -static int resize_to(git_hashtable *self, size_t new_size) -{ - git_hashtable_node *old_nodes = self->nodes; - size_t old_size = self->size; - git_hashtable_node old_stash[GIT_HASHTABLE_STASH_SIZE]; - size_t old_stash_count = self->stash_count; - - self->is_resizing = 1; - - if (old_stash_count > 0) - memcpy(old_stash, self->stash, - old_stash_count * sizeof(git_hashtable_node)); - - do { - self->size = new_size; - self->size_mask = new_size - 1; - self->key_count = 0; - self->stash_count = 0; - self->nodes = git__calloc(1, sizeof(git_hashtable_node) * self->size); - GITERR_CHECK_ALLOC(self->nodes); - - if (insert_nodes(self, old_nodes, old_size) == 0 && - insert_nodes(self, old_stash, old_stash_count) == 0) - self->is_resizing = 0; - else { - new_size *= 2; - git__free(self->nodes); - } - } while (self->is_resizing); - - git__free(old_nodes); - return 0; -} - -static int set_size(git_hashtable *self, size_t new_size) -{ - self->nodes = - git__realloc(self->nodes, new_size * sizeof(git_hashtable_node)); - GITERR_CHECK_ALLOC(self->nodes); - - if (new_size > self->size) - memset(&self->nodes[self->size], 0x0, - (new_size - self->size) * sizeof(git_hashtable_node)); - - self->size = new_size; - self->size_mask = new_size - 1; - return 0; -} - -GIT_INLINE(git_hashtable_node *)node_with_hash( - git_hashtable *self, const void *key, int hash_id) -{ - size_t pos = self->hash(key, hash_id) & self->size_mask; - return git_hashtable_node_at(self->nodes, pos); -} - -GIT_INLINE(void) node_swap_with( - git_hashtable_node *self, git_hashtable_node *other) -{ - git_hashtable_node tmp = *self; - *self = *other; - *other = tmp; -} - -static int node_insert(git_hashtable *self, git_hashtable_node *new_node) -{ - int iteration, hash_id; - git_hashtable_node *node; - - for (iteration = 0; iteration < MAX_LOOPS; iteration++) { - for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) { - node = node_with_hash(self, new_node->key, hash_id); - node_swap_with(new_node, node); - if (new_node->key == 0x0) { - self->key_count++; - return 0; - } - } - } - - /* Insert into stash if there is space */ - if (self->stash_count < GIT_HASHTABLE_STASH_SIZE) { - node_swap_with(new_node, &self->stash[self->stash_count++]); - self->key_count++; - return 0; - } - - /* Failed to insert node. Hashtable is currently resizing */ - assert(!self->is_resizing); - - if (resize_to(self, self->size * 2) < 0) - return -1; - - return git_hashtable_insert(self, new_node->key, new_node->value); -} - -static int insert_nodes( - git_hashtable *self, git_hashtable_node *old_nodes, size_t old_size) -{ - size_t i; - - for (i = 0; i < old_size; ++i) { - git_hashtable_node *node = git_hashtable_node_at(old_nodes, i); - if (node->key && node_insert(self, node) < 0) - return -1; - } - - return 0; -} - -static void reinsert_stash(git_hashtable *self) -{ - int stash_count; - struct git_hashtable_node stash[GIT_HASHTABLE_STASH_SIZE]; - - if (self->stash_count <= 0) - return; - - memcpy(stash, self->stash, self->stash_count * sizeof(git_hashtable_node)); - stash_count = self->stash_count; - self->stash_count = 0; - - /* the node_insert() calls *cannot* fail because the stash is empty */ - insert_nodes(self, stash, stash_count); -} - -git_hashtable *git_hashtable_alloc( - size_t min_size, - git_hash_ptr hash, - git_hash_keyeq_ptr key_eq) -{ - git_hashtable *table; - - assert(hash && key_eq); - - if ((table = git__malloc(sizeof(*table))) == NULL) - return NULL; - - memset(table, 0x0, sizeof(git_hashtable)); - - table->hash = hash; - table->key_equal = key_eq; - - min_size = git__size_t_powerof2(min_size < 8 ? 8 : min_size); - set_size(table, min_size); - - return table; -} - -void git_hashtable_clear(git_hashtable *self) -{ - assert(self); - - memset(self->nodes, 0x0, sizeof(git_hashtable_node) * self->size); - - self->stash_count = 0; - self->key_count = 0; -} - -void git_hashtable_free(git_hashtable *self) -{ - assert(self); - - git__free(self->nodes); - git__free(self); -} - - -int git_hashtable_insert2( - git_hashtable *self, const void *key, void *value, void **old_value) -{ - int hash_id; - git_hashtable_node *node; - - assert(self && self->nodes); - - *old_value = NULL; - - for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) { - node = node_with_hash(self, key, hash_id); - - if (!node->key) { - node->key = key; - node->value = value; - self->key_count++; - return 0; - } - - if (key == node->key || self->key_equal(key, node->key) == 0) { - *old_value = node->value; - node->key = key; - node->value = value; - return 0; - } - } - - /* no space in table; must do cuckoo dance */ - { - git_hashtable_node x; - x.key = key; - x.value = value; - return node_insert(self, &x); - } -} - -static git_hashtable_node *find_node(git_hashtable *self, const void *key) -{ - int hash_id, count = 0; - git_hashtable_node *node; - - for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) { - node = node_with_hash(self, key, hash_id); - if (node->key) { - ++count; - if (self->key_equal(key, node->key) == 0) - return node; - } - } - - /* check stash if not found but all slots were filled */ - if (count == GIT_HASHTABLE_HASHES) { - for (count = 0; count < self->stash_count; ++count) - if (self->key_equal(key, self->stash[count].key) == 0) - return &self->stash[count]; - } - - return NULL; -} - -static void reset_stash(git_hashtable *self, git_hashtable_node *node) -{ - /* if node was in stash, then compact stash */ - ssize_t offset = node - self->stash; - - if (offset >= 0 && offset < self->stash_count) { - if (offset < self->stash_count - 1) - memmove(node, node + 1, (self->stash_count - offset) * - sizeof(git_hashtable_node)); - self->stash_count--; - } - - reinsert_stash(self); -} - -void *git_hashtable_lookup(git_hashtable *self, const void *key) -{ - git_hashtable_node *node; - assert(self && key); - node = find_node(self, key); - return node ? node->value : NULL; -} - -int git_hashtable_remove2( - git_hashtable *self, const void *key, void **old_value) -{ - git_hashtable_node *node; - - assert(self && self->nodes); - - node = find_node(self, key); - if (node) { - *old_value = node->value; - - node->key = NULL; - node->value = NULL; - self->key_count--; - - reset_stash(self, node); - return 0; - } - - return GIT_ENOTFOUND; -} - -int git_hashtable_merge(git_hashtable *self, git_hashtable *other) -{ - size_t new_size = git__size_t_powerof2(self->size + other->size); - - if (resize_to(self, new_size) < 0) - return -1; - - if (insert_nodes(self, other->nodes, other->key_count) < 0) - return -1; - - return insert_nodes(self, other->stash, other->stash_count); -} - - -/** - * Standard string - */ -uint32_t git_hash__strhash_cb(const void *key, int hash_id) -{ - static uint32_t hash_seeds[GIT_HASHTABLE_HASHES] = { - 2147483647, - 0x5d20bb23, - 0x7daaab3c - }; - - size_t key_len = strlen((const char *)key); - - /* won't take hash of strings longer than 2^31 right now */ - assert(key_len == (size_t)((int)key_len)); - - return git__hash(key, (int)key_len, hash_seeds[hash_id]); -} diff --git a/src/hashtable.h b/src/hashtable.h deleted file mode 100644 index 448487507..000000000 --- a/src/hashtable.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (C) 2009-2012 the libgit2 contributors - * - * This file is part of libgit2, distributed under the GNU GPL v2 with - * a Linking Exception. For full terms see the included COPYING file. - */ -#ifndef INCLUDE_hashtable_h__ -#define INCLUDE_hashtable_h__ - -#include "git2/common.h" -#include "git2/oid.h" -#include "git2/odb.h" -#include "common.h" - -#define GIT_HASHTABLE_HASHES 3 - -typedef uint32_t (*git_hash_ptr)(const void *, int hash_id); -typedef int (*git_hash_keyeq_ptr)(const void *key_a, const void *key_b); - -struct git_hashtable_node { - const void *key; - void *value; -}; - -#define GIT_HASHTABLE_STASH_SIZE 3 - -struct git_hashtable { - struct git_hashtable_node *nodes; - - size_t size_mask; - size_t size; - size_t key_count; - - struct git_hashtable_node stash[GIT_HASHTABLE_STASH_SIZE]; - int stash_count; - - int is_resizing; - - git_hash_ptr hash; - git_hash_keyeq_ptr key_equal; -}; - -typedef struct git_hashtable_node git_hashtable_node; -typedef struct git_hashtable git_hashtable; - -git_hashtable *git_hashtable_alloc( - size_t min_size, - git_hash_ptr hash, - git_hash_keyeq_ptr key_eq); - -void *git_hashtable_lookup(git_hashtable *h, const void *key); -int git_hashtable_remove2(git_hashtable *table, const void *key, void **old_value); - -GIT_INLINE(int) git_hashtable_remove(git_hashtable *table, const void *key) -{ - void *_unused; - return git_hashtable_remove2(table, key, &_unused); -} - - -void git_hashtable_free(git_hashtable *h); -void git_hashtable_clear(git_hashtable *h); -int git_hashtable_merge(git_hashtable *self, git_hashtable *other); - -int git_hashtable_insert2(git_hashtable *h, const void *key, void *value, void **old_value); - -GIT_INLINE(int) git_hashtable_insert(git_hashtable *h, const void *key, void *value) -{ - void *_unused; - return git_hashtable_insert2(h, key, value, &_unused); -} - -#define git_hashtable_node_at(nodes, pos) ((git_hashtable_node *)(&nodes[pos])) - -#define GIT_HASHTABLE__FOREACH(self,block) { \ - size_t _c; \ - git_hashtable_node *_n = (self)->nodes; \ - for (_c = (self)->size; _c > 0; _c--, _n++) { \ - if (!_n->key) continue; block } } - -#define GIT_HASHTABLE_FOREACH(self, pkey, pvalue, code)\ - GIT_HASHTABLE__FOREACH(self,{(pkey)=_n->key;(pvalue)=_n->value;code;}) - -#define GIT_HASHTABLE_FOREACH_KEY(self, pkey, code)\ - GIT_HASHTABLE__FOREACH(self,{(pkey)=_n->key;code;}) - -#define GIT_HASHTABLE_FOREACH_VALUE(self, pvalue, code)\ - GIT_HASHTABLE__FOREACH(self,{(pvalue)=_n->value;code;}) - -#define GIT_HASHTABLE_FOREACH_DELETE() {\ - _node->key = NULL; _node->value = NULL; _self->key_count--;\ -} - -/* - * If you want a hashtable with standard string keys, you can - * just pass git_hash__strcmp_cb and git_hash__strhash_cb to - * git_hashtable_alloc. - */ -#define git_hash__strcmp_cb git__strcmp_cb -extern uint32_t git_hash__strhash_cb(const void *key, int hash_id); - -#endif diff --git a/src/khash_str.h b/src/khash_str.h deleted file mode 100644 index 0b840d836..000000000 --- a/src/khash_str.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (C) 2012 the libgit2 contributors - * - * This file is part of libgit2, distributed under the GNU GPL v2 with - * a Linking Exception. For full terms see the included COPYING file. - */ -#ifndef INCLUDE_khash_str_h__ -#define INCLUDE_khash_str_h__ - -#include "common.h" - -#define kmalloc git__malloc -#define kcalloc git__calloc -#define krealloc git__realloc -#define kfree git__free -#include "khash.h" - -__KHASH_TYPE(str, const char *, void *); -typedef khash_t(str) git_khash_str; - -#define GIT_KHASH_STR__IMPLEMENTATION \ - __KHASH_IMPL(str, static inline, const char *, void *, 1, kh_str_hash_func, kh_str_hash_equal) - -#define git_khash_str_alloc() kh_init(str) -#define git_khash_str_free(h) kh_destroy(str, h), h = NULL -#define git_khash_str_clear(h) kh_clear(str, h) - -#define git_khash_str_num_entries(h) kh_size(h) - -#define git_khash_str_lookup_index(h, k) kh_get(str, h, k) -#define git_khash_str_valid_index(h, idx) (idx != kh_end(h)) - -#define git_khash_str_exists(h, k) (kh_get(str, h, k) != kh_end(h)) - -#define git_khash_str_value_at(h, idx) kh_val(h, idx) -#define git_khash_str_set_value_at(h, idx, v) kh_val(h, idx) = v -#define git_khash_str_delete_at(h, idx) kh_del(str, h, idx) - -#define git_khash_str_insert(h, key, val, err) do { \ - khiter_t __pos = kh_put(str, h, key, &err); \ - if (err >= 0) kh_val(h, __pos) = val; \ - } while (0) - -#define git_khash_str_insert2(h, key, val, old, err) do { \ - khiter_t __pos = kh_put(str, h, key, &err); \ - if (err >= 0) { \ - old = (err == 0) ? kh_val(h, __pos) : NULL; \ - kh_val(h, __pos) = val; \ - } } while (0) - -#define git_khash_str_foreach kh_foreach -#define git_khash_str_foreach_value kh_foreach_value - -#endif diff --git a/src/khash_oid.h b/src/oidmap.h similarity index 77% rename from src/khash_oid.h rename to src/oidmap.h index 96d82c759..858268c92 100644 --- a/src/khash_oid.h +++ b/src/oidmap.h @@ -4,8 +4,8 @@ * This file is part of libgit2, distributed under the GNU GPL v2 with * a Linking Exception. For full terms see the included COPYING file. */ -#ifndef INCLUDE_khash_oid_h__ -#define INCLUDE_khash_oid_h__ +#ifndef INCLUDE_oidmap_h__ +#define INCLUDE_oidmap_h__ #include "common.h" #include "git2/oid.h" @@ -17,7 +17,7 @@ #include "khash.h" __KHASH_TYPE(oid, const git_oid *, void *); -typedef khash_t(oid) git_khash_oid; +typedef khash_t(oid) git_oidmap; GIT_INLINE(khint_t) hash_git_oid(const git_oid *oid) { @@ -33,10 +33,10 @@ GIT_INLINE(int) hash_git_oid_equal(const git_oid *a, const git_oid *b) return (memcmp(a->id, b->id, sizeof(a->id)) == 0); } -#define GIT_KHASH_OID__IMPLEMENTATION \ +#define GIT__USE_OIDMAP \ __KHASH_IMPL(oid, static inline, const git_oid *, void *, 1, hash_git_oid, hash_git_oid_equal) -#define git_khash_oid_alloc() kh_init(oid) -#define git_khash_oid_free(h) kh_destroy(oid,h), h = NULL +#define git_oidmap_alloc() kh_init(oid) +#define git_oidmap_free(h) kh_destroy(oid,h), h = NULL #endif diff --git a/src/refs.c b/src/refs.c index 7050b4af9..7685d560c 100644 --- a/src/refs.c +++ b/src/refs.c @@ -15,7 +15,7 @@ #include #include -GIT_KHASH_STR__IMPLEMENTATION; +GIT__USE_STRMAP; #define DEFAULT_NESTING_LEVEL 5 #define MAX_NESTING_LEVEL 10 @@ -423,7 +423,7 @@ static int packed_load(git_repository *repo) /* First we make sure we have allocated the hash table */ if (ref_cache->packfile == NULL) { - ref_cache->packfile = git_khash_str_alloc(); + ref_cache->packfile = git_strmap_alloc(); GITERR_CHECK_ALLOC(ref_cache->packfile); } @@ -438,7 +438,7 @@ static int packed_load(git_repository *repo) * refresh the packed refs. */ if (result == GIT_ENOTFOUND) { - git_khash_str_clear(ref_cache->packfile); + git_strmap_clear(ref_cache->packfile); return 0; } @@ -452,7 +452,7 @@ static int packed_load(git_repository *repo) * At this point, we want to refresh the packed refs. We already * have the contents in our buffer. */ - git_khash_str_clear(ref_cache->packfile); + git_strmap_clear(ref_cache->packfile); buffer_start = (const char *)packfile.ptr; buffer_end = (const char *)(buffer_start) + packfile.size; @@ -477,7 +477,7 @@ static int packed_load(git_repository *repo) goto parse_failed; } - git_khash_str_insert(ref_cache->packfile, ref->name, ref, err); + git_strmap_insert(ref_cache->packfile, ref->name, ref, err); if (err < 0) goto parse_failed; } @@ -486,7 +486,7 @@ static int packed_load(git_repository *repo) return 0; parse_failed: - git_khash_str_free(ref_cache->packfile); + git_strmap_free(ref_cache->packfile); ref_cache->packfile = NULL; git_buf_free(&packfile); return -1; @@ -512,7 +512,7 @@ static int _dirent_loose_listall(void *_data, git_buf *full_path) /* do not add twice a reference that exists already in the packfile */ if ((data->list_flags & GIT_REF_PACKED) != 0 && - git_khash_str_exists(data->repo->references.packfile, file_path)) + git_strmap_exists(data->repo->references.packfile, file_path)) return 0; if (data->list_flags != GIT_REF_LISTALL) { @@ -539,7 +539,7 @@ static int _dirent_loose_load(void *data, git_buf *full_path) if (loose_lookup_to_packfile(&ref, repository, file_path) < 0) return -1; - git_khash_str_insert2( + git_strmap_insert2( repository->references.packfile, ref->name, ref, old_ref, err); if (err < 0) { git__free(ref); @@ -737,7 +737,7 @@ static int packed_write(git_repository *repo) assert(repo && repo->references.packfile); total_refs = - (unsigned int)git_khash_str_num_entries(repo->references.packfile); + (unsigned int)git_strmap_num_entries(repo->references.packfile); if (git_vector_init(&packing_list, total_refs, packed_sort) < 0) return -1; @@ -747,7 +747,7 @@ static int packed_write(git_repository *repo) struct packref *reference; /* cannot fail: vector already has the right size */ - git_khash_str_foreach_value(repo->references.packfile, reference, { + git_strmap_foreach_value(repo->references.packfile, reference, { git_vector_insert(&packing_list, reference); }); } @@ -873,7 +873,7 @@ static int reference_exists(int *exists, git_repository *repo, const char *ref_n return -1; if (git_path_isfile(ref_path.ptr) == true || - git_khash_str_exists(repo->references.packfile, ref_path.ptr)) + git_strmap_exists(repo->references.packfile, ref_path.ptr)) { *exists = 1; } else { @@ -940,7 +940,7 @@ static int reference_can_write( static int packed_lookup(git_reference *ref) { struct packref *pack_ref = NULL; - git_khash_str *packfile_refs; + git_strmap *packfile_refs; khiter_t pos; if (packed_load(ref->owner) < 0) @@ -959,13 +959,13 @@ static int packed_lookup(git_reference *ref) /* Look up on the packfile */ packfile_refs = ref->owner->references.packfile; - pos = git_khash_str_lookup_index(packfile_refs, ref->name); - if (!git_khash_str_valid_index(packfile_refs, pos)) { + pos = git_strmap_lookup_index(packfile_refs, ref->name); + if (!git_strmap_valid_index(packfile_refs, pos)) { giterr_set(GITERR_REFERENCE, "Reference '%s' not found", ref->name); return GIT_ENOTFOUND; } - pack_ref = git_khash_str_value_at(packfile_refs, pos); + pack_ref = git_strmap_value_at(packfile_refs, pos); ref->flags = GIT_REF_OID | GIT_REF_PACKED; ref->mtime = ref->owner->references.packfile_time; @@ -1011,7 +1011,7 @@ static int reference_delete(git_reference *ref) * We need to reload the packfile, remove the reference from the * packing list, and repack */ if (ref->flags & GIT_REF_PACKED) { - git_khash_str *packfile_refs; + git_strmap *packfile_refs; struct packref *packref; khiter_t pos; @@ -1020,15 +1020,15 @@ static int reference_delete(git_reference *ref) return -1; packfile_refs = ref->owner->references.packfile; - pos = git_khash_str_lookup_index(packfile_refs, ref->name); - if (!git_khash_str_valid_index(packfile_refs, pos)) { + pos = git_strmap_lookup_index(packfile_refs, ref->name); + if (!git_strmap_valid_index(packfile_refs, pos)) { giterr_set(GITERR_REFERENCE, "Reference %s stopped existing in the packfile", ref->name); return -1; } - packref = git_khash_str_value_at(packfile_refs, pos); - git_khash_str_delete_at(packfile_refs, pos); + packref = git_strmap_value_at(packfile_refs, pos); + git_strmap_delete_at(packfile_refs, pos); git__free(packref); if (packed_write(ref->owner) < 0) @@ -1488,7 +1488,7 @@ int git_reference_foreach( if (packed_load(repo) < 0) return -1; - git_khash_str_foreach(repo->references.packfile, ref_name, ref, { + git_strmap_foreach(repo->references.packfile, ref_name, ref, { if (callback(ref_name, payload) < 0) return 0; }); @@ -1555,11 +1555,11 @@ void git_repository__refcache_free(git_refcache *refs) if (refs->packfile) { struct packref *reference; - git_khash_str_foreach_value(refs->packfile, reference, { + git_strmap_foreach_value(refs->packfile, reference, { git__free(reference); }); - git_khash_str_free(refs->packfile); + git_strmap_free(refs->packfile); } } diff --git a/src/refs.h b/src/refs.h index 39648e6d9..369e91e1c 100644 --- a/src/refs.h +++ b/src/refs.h @@ -10,7 +10,7 @@ #include "common.h" #include "git2/oid.h" #include "git2/refs.h" -#include "khash_str.h" +#include "strmap.h" #define GIT_REFS_DIR "refs/" #define GIT_REFS_HEADS_DIR GIT_REFS_DIR "heads/" @@ -46,7 +46,7 @@ struct git_reference { }; typedef struct { - git_khash_str *packfile; + git_strmap *packfile; time_t packfile_time; } git_refcache; diff --git a/src/repository.h b/src/repository.h index f53fa697e..1ffac58f1 100644 --- a/src/repository.h +++ b/src/repository.h @@ -19,7 +19,7 @@ #include "buffer.h" #include "odb.h" #include "attr.h" -#include "khash_str.h" +#include "strmap.h" #define DOT_GIT ".git" #define GIT_DIR DOT_GIT "/" @@ -83,7 +83,7 @@ struct git_repository { git_cache objects; git_refcache references; git_attr_cache attrcache; - git_khash_str *submodules; + git_strmap *submodules; char *path_repository; char *workdir; diff --git a/src/revwalk.c b/src/revwalk.c index 5867e133e..1cfff3674 100644 --- a/src/revwalk.c +++ b/src/revwalk.c @@ -10,14 +10,14 @@ #include "odb.h" #include "pqueue.h" #include "pool.h" -#include "khash_oid.h" +#include "oidmap.h" #include "git2/revwalk.h" #include "git2/merge.h" #include -GIT_KHASH_OID__IMPLEMENTATION; +GIT__USE_OIDMAP; #define PARENT1 (1 << 0) #define PARENT2 (1 << 1) @@ -48,7 +48,7 @@ struct git_revwalk { git_repository *repo; git_odb *odb; - git_khash_oid *commits; + git_oidmap *commits; git_pool commit_pool; commit_list *iterator_topo; @@ -726,7 +726,7 @@ int git_revwalk_new(git_revwalk **revwalk_out, git_repository *repo) memset(walk, 0x0, sizeof(git_revwalk)); - walk->commits = git_khash_oid_alloc(); + walk->commits = git_oidmap_alloc(); GITERR_CHECK_ALLOC(walk->commits); if (git_pqueue_init(&walk->iterator_time, 8, commit_time_cmp) < 0 || @@ -757,7 +757,7 @@ void git_revwalk_free(git_revwalk *walk) git_revwalk_reset(walk); git_odb_free(walk->odb); - git_khash_oid_free(walk->commits); + git_oidmap_free(walk->commits); git_pool_clear(&walk->commit_pool); git_pqueue_free(&walk->iterator_time); git_vector_free(&walk->twos); diff --git a/src/strmap.h b/src/strmap.h new file mode 100644 index 000000000..55fbd7c6e --- /dev/null +++ b/src/strmap.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2012 the libgit2 contributors + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_strmap_h__ +#define INCLUDE_strmap_h__ + +#include "common.h" + +#define kmalloc git__malloc +#define kcalloc git__calloc +#define krealloc git__realloc +#define kfree git__free +#include "khash.h" + +__KHASH_TYPE(str, const char *, void *); +typedef khash_t(str) git_strmap; + +#define GIT__USE_STRMAP \ + __KHASH_IMPL(str, static inline, const char *, void *, 1, kh_str_hash_func, kh_str_hash_equal) + +#define git_strmap_alloc() kh_init(str) +#define git_strmap_free(h) kh_destroy(str, h), h = NULL +#define git_strmap_clear(h) kh_clear(str, h) + +#define git_strmap_num_entries(h) kh_size(h) + +#define git_strmap_lookup_index(h, k) kh_get(str, h, k) +#define git_strmap_valid_index(h, idx) (idx != kh_end(h)) + +#define git_strmap_exists(h, k) (kh_get(str, h, k) != kh_end(h)) + +#define git_strmap_value_at(h, idx) kh_val(h, idx) +#define git_strmap_set_value_at(h, idx, v) kh_val(h, idx) = v +#define git_strmap_delete_at(h, idx) kh_del(str, h, idx) + +#define git_strmap_insert(h, key, val, err) do { \ + khiter_t __pos = kh_put(str, h, key, &err); \ + if (err >= 0) kh_val(h, __pos) = val; \ + } while (0) + +#define git_strmap_insert2(h, key, val, old, err) do { \ + khiter_t __pos = kh_put(str, h, key, &err); \ + if (err >= 0) { \ + old = (err == 0) ? kh_val(h, __pos) : NULL; \ + kh_val(h, __pos) = val; \ + } } while (0) + +#define git_strmap_foreach kh_foreach +#define git_strmap_foreach_value kh_foreach_value + +#endif diff --git a/src/submodule.c b/src/submodule.c index 8072053af..1b5b59f45 100644 --- a/src/submodule.c +++ b/src/submodule.c @@ -89,17 +89,17 @@ static void submodule_release(git_submodule *sm, int decr) } static int submodule_from_entry( - git_khash_str *smcfg, git_index_entry *entry) + git_strmap *smcfg, git_index_entry *entry) { git_submodule *sm; void *old_sm; khiter_t pos; int error; - pos = git_khash_str_lookup_index(smcfg, entry->path); + pos = git_strmap_lookup_index(smcfg, entry->path); - if (git_khash_str_valid_index(smcfg, pos)) - sm = git_khash_str_value_at(smcfg, pos); + if (git_strmap_valid_index(smcfg, pos)) + sm = git_strmap_value_at(smcfg, pos); else sm = submodule_alloc(entry->path); @@ -115,7 +115,7 @@ static int submodule_from_entry( goto fail; } - git_khash_str_insert2(smcfg, sm->path, sm, old_sm, error); + git_strmap_insert2(smcfg, sm->path, sm, old_sm, error); if (error < 0) goto fail; sm->refcount++; @@ -135,7 +135,7 @@ fail: static int submodule_from_config( const char *key, const char *value, void *data) { - git_khash_str *smcfg = data; + git_strmap *smcfg = data; const char *namestart; const char *property; git_buf name = GIT_BUF_INIT; @@ -158,13 +158,13 @@ static int submodule_from_config( if (git_buf_set(&name, namestart, property - namestart - 1) < 0) return -1; - pos = git_khash_str_lookup_index(smcfg, name.ptr); - if (!git_khash_str_valid_index(smcfg, pos) && is_path) - pos = git_khash_str_lookup_index(smcfg, value); - if (!git_khash_str_valid_index(smcfg, pos)) + pos = git_strmap_lookup_index(smcfg, name.ptr); + if (!git_strmap_valid_index(smcfg, pos) && is_path) + pos = git_strmap_lookup_index(smcfg, value); + if (!git_strmap_valid_index(smcfg, pos)) sm = submodule_alloc(name.ptr); else - sm = git_khash_str_value_at(smcfg, pos); + sm = git_strmap_value_at(smcfg, pos); if (!sm) goto fail; @@ -172,7 +172,7 @@ static int submodule_from_config( assert(sm->path == sm->name); sm->name = git_buf_detach(&name); - git_khash_str_insert2(smcfg, sm->name, sm, old_sm, error); + git_strmap_insert2(smcfg, sm->name, sm, old_sm, error); if (error < 0) goto fail; sm->refcount++; @@ -183,7 +183,7 @@ static int submodule_from_config( if (sm->path == NULL) goto fail; - git_khash_str_insert2(smcfg, sm->path, sm, old_sm, error); + git_strmap_insert2(smcfg, sm->path, sm, old_sm, error); if (error < 0) goto fail; sm->refcount++; @@ -247,7 +247,7 @@ static int load_submodule_config(git_repository *repo) git_index *index; unsigned int i, max_i; git_oid gitmodules_oid; - git_khash_str *smcfg; + git_strmap *smcfg; struct git_config_file *mods = NULL; if (repo->submodules) @@ -257,7 +257,7 @@ static int load_submodule_config(git_repository *repo) * under both its name and its path. These are usually the same, but * that is not guaranteed. */ - smcfg = git_khash_str_alloc(); + smcfg = git_strmap_alloc(); GITERR_CHECK_ALLOC(smcfg); /* scan index for gitmodules (and .gitmodules entry) */ @@ -307,13 +307,13 @@ cleanup: if (mods != NULL) git_config_file_free(mods); if (error) - git_khash_str_free(smcfg); + git_strmap_free(smcfg); return error; } void git_submodule_config_free(git_repository *repo) { - git_khash_str *smcfg = repo->submodules; + git_strmap *smcfg = repo->submodules; git_submodule *sm; repo->submodules = NULL; @@ -321,10 +321,10 @@ void git_submodule_config_free(git_repository *repo) if (smcfg == NULL) return; - git_khash_str_foreach_value(smcfg, sm, { + git_strmap_foreach_value(smcfg, sm, { submodule_release(sm,1); }); - git_khash_str_free(smcfg); + git_strmap_free(smcfg); } static int submodule_cmp(const void *a, const void *b) @@ -345,7 +345,7 @@ int git_submodule_foreach( if ((error = load_submodule_config(repo)) < 0) return error; - git_khash_str_foreach_value(repo->submodules, sm, { + git_strmap_foreach_value(repo->submodules, sm, { /* usually the following will not come into play */ if (sm->refcount > 1) { if (git_vector_bsearch(&seen, sm) != GIT_ENOTFOUND) @@ -373,12 +373,12 @@ int git_submodule_lookup( if (load_submodule_config(repo) < 0) return -1; - pos = git_khash_str_lookup_index(repo->submodules, name); - if (!git_khash_str_valid_index(repo->submodules, pos)) + pos = git_strmap_lookup_index(repo->submodules, name); + if (!git_strmap_valid_index(repo->submodules, pos)) return GIT_ENOTFOUND; if (sm_ptr) - *sm_ptr = git_khash_str_value_at(repo->submodules, pos); + *sm_ptr = git_strmap_value_at(repo->submodules, pos); return 0; } diff --git a/tests-clar/core/strmap.c b/tests-clar/core/strmap.c new file mode 100644 index 000000000..f34a4f89f --- /dev/null +++ b/tests-clar/core/strmap.c @@ -0,0 +1,102 @@ +#include "clar_libgit2.h" +#include "strmap.h" + +GIT__USE_STRMAP; + +void test_core_strmap__0(void) +{ + git_strmap *table = git_strmap_alloc(); + cl_assert(table != NULL); + cl_assert(git_strmap_num_entries(table) == 0); + git_strmap_free(table); +} + +static void insert_strings(git_strmap *table, int count) +{ + int i, j, over, err; + char *str; + + for (i = 0; i < count; ++i) { + str = malloc(10); + for (j = 0; j < 10; ++j) + str[j] = 'a' + (i % 26); + str[9] = '\0'; + + /* if > 26, then encode larger value in first letters */ + for (j = 0, over = i / 26; over > 0; j++, over = over / 26) + str[j] = 'A' + (over % 26); + + git_strmap_insert(table, str, str, err); + cl_assert(err >= 0); + } + + cl_assert((int)git_strmap_num_entries(table) == count); +} + +void test_core_strmap__1(void) +{ + int i; + char *str; + git_strmap *table = git_strmap_alloc(); + cl_assert(table != NULL); + + insert_strings(table, 20); + + cl_assert(git_strmap_exists(table, "aaaaaaaaa")); + cl_assert(git_strmap_exists(table, "ggggggggg")); + cl_assert(!git_strmap_exists(table, "aaaaaaaab")); + cl_assert(!git_strmap_exists(table, "abcdefghi")); + + i = 0; + git_strmap_foreach_value(table, str, { i++; free(str); }); + cl_assert(i == 20); + + git_strmap_free(table); +} + +void test_core_strmap__2(void) +{ + khiter_t pos; + int i; + char *str; + git_strmap *table = git_strmap_alloc(); + cl_assert(table != NULL); + + insert_strings(table, 20); + + cl_assert(git_strmap_exists(table, "aaaaaaaaa")); + cl_assert(git_strmap_exists(table, "ggggggggg")); + cl_assert(!git_strmap_exists(table, "aaaaaaaab")); + cl_assert(!git_strmap_exists(table, "abcdefghi")); + + cl_assert(git_strmap_exists(table, "bbbbbbbbb")); + pos = git_strmap_lookup_index(table, "bbbbbbbbb"); + cl_assert(git_strmap_valid_index(table, pos)); + cl_assert_equal_s(git_strmap_value_at(table, pos), "bbbbbbbbb"); + free(git_strmap_value_at(table, pos)); + git_strmap_delete_at(table, pos); + + cl_assert(!git_strmap_exists(table, "bbbbbbbbb")); + + i = 0; + git_strmap_foreach_value(table, str, { i++; free(str); }); + cl_assert(i == 19); + + git_strmap_free(table); +} + +void test_core_strmap__3(void) +{ + int i; + char *str; + git_strmap *table = git_strmap_alloc(); + cl_assert(table != NULL); + + insert_strings(table, 10000); + + i = 0; + git_strmap_foreach_value(table, str, { i++; free(str); }); + cl_assert(i == 10000); + + git_strmap_free(table); +} diff --git a/tests/t07-hashtable.c b/tests/t07-hashtable.c deleted file mode 100644 index 4d45c7fc1..000000000 --- a/tests/t07-hashtable.c +++ /dev/null @@ -1,189 +0,0 @@ -/* - * This file is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, - * as published by the Free Software Foundation. - * - * In addition to the permissions in the GNU General Public License, - * the authors give you unlimited permission to link the compiled - * version of this file into combinations with other programs, - * and to distribute those combinations without any restriction - * coming from the use of this file. (The General Public License - * restrictions do apply in other respects; for example, they cover - * modification of the file, and distribution when not linked into - * a combined executable.) - * - * This file is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; see the file COPYING. If not, write to - * the Free Software Foundation, 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - */ -#include "test_lib.h" -#include "test_helpers.h" - -#include "hashtable.h" -#include "hash.h" - -typedef struct _aux_object { - int __bulk; - git_oid id; - int visited; -} table_item; - -static uint32_t hash_func(const void *key, int hash_id) -{ - uint32_t r; - const git_oid *id = key; - - memcpy(&r, id->id + (hash_id * sizeof(uint32_t)), sizeof(r)); - return r; -} - -static int hash_cmpkey(const void *a, const void *b) -{ - return git_oid_cmp(a, b); -} - -BEGIN_TEST(table0, "create a new hashtable") - - git_hashtable *table = NULL; - - table = git_hashtable_alloc(55, hash_func, hash_cmpkey); - must_be_true(table != NULL); - must_be_true(table->size_mask + 1 == 64); - - git_hashtable_free(table); - -END_TEST - -BEGIN_TEST(table1, "fill the hashtable with random entries") - - const int objects_n = 32; - int i; - - table_item *objects; - git_hashtable *table = NULL; - - table = git_hashtable_alloc(objects_n * 2, hash_func, hash_cmpkey); - must_be_true(table != NULL); - - objects = git__malloc(objects_n * sizeof(table_item)); - memset(objects, 0x0, objects_n * sizeof(table_item)); - - /* populate the hash table */ - for (i = 0; i < objects_n; ++i) { - git_hash_buf(&(objects[i].id), &i, sizeof(int)); - must_pass(git_hashtable_insert(table, &(objects[i].id), &(objects[i]))); - } - - /* make sure all the inserted objects can be found */ - for (i = 0; i < objects_n; ++i) { - git_oid id; - table_item *ob; - - git_hash_buf(&id, &i, sizeof(int)); - ob = (table_item *)git_hashtable_lookup(table, &id); - - must_be_true(ob != NULL); - must_be_true(ob == &(objects[i])); - } - - /* make sure we cannot find inexisting objects */ - for (i = 0; i < 50; ++i) { - int hash_id; - git_oid id; - - hash_id = (rand() % 50000) + objects_n; - git_hash_buf(&id, &hash_id, sizeof(int)); - must_be_true(git_hashtable_lookup(table, &id) == NULL); - } - - git_hashtable_free(table); - git__free(objects); - -END_TEST - - -BEGIN_TEST(table2, "make sure the table resizes automatically") - - const int objects_n = 64; - int i; - size_t old_size; - table_item *objects; - git_hashtable *table = NULL; - - table = git_hashtable_alloc(objects_n, hash_func, hash_cmpkey); - must_be_true(table != NULL); - - objects = git__malloc(objects_n * sizeof(table_item)); - memset(objects, 0x0, objects_n * sizeof(table_item)); - - old_size = table->size_mask + 1; - - /* populate the hash table -- should be automatically resized */ - for (i = 0; i < objects_n; ++i) { - git_hash_buf(&(objects[i].id), &i, sizeof(int)); - must_pass(git_hashtable_insert(table, &(objects[i].id), &(objects[i]))); - } - - must_be_true(table->size_mask > old_size); - - /* make sure all the inserted objects can be found */ - for (i = 0; i < objects_n; ++i) { - git_oid id; - table_item *ob; - - git_hash_buf(&id, &i, sizeof(int)); - ob = (table_item *)git_hashtable_lookup(table, &id); - - must_be_true(ob != NULL); - must_be_true(ob == &(objects[i])); - } - - git_hashtable_free(table); - git__free(objects); - -END_TEST - -BEGIN_TEST(tableit0, "iterate through all the contents of the table") - - const int objects_n = 32; - int i; - table_item *objects, *ob; - - git_hashtable *table = NULL; - - table = git_hashtable_alloc(objects_n * 2, hash_func, hash_cmpkey); - must_be_true(table != NULL); - - objects = git__malloc(objects_n * sizeof(table_item)); - memset(objects, 0x0, objects_n * sizeof(table_item)); - - /* populate the hash table */ - for (i = 0; i < objects_n; ++i) { - git_hash_buf(&(objects[i].id), &i, sizeof(int)); - must_pass(git_hashtable_insert(table, &(objects[i].id), &(objects[i]))); - } - - GIT_HASHTABLE_FOREACH_VALUE(table, ob, ob->visited = 1); - - /* make sure all nodes have been visited */ - for (i = 0; i < objects_n; ++i) - must_be_true(objects[i].visited); - - git_hashtable_free(table); - git__free(objects); -END_TEST - - -BEGIN_SUITE(hashtable) - ADD_TEST(table0); - ADD_TEST(table1); - ADD_TEST(table2); - ADD_TEST(tableit0); -END_SUITE - diff --git a/tests/test_main.c b/tests/test_main.c index 50256e97c..bc07f1ff1 100644 --- a/tests/test_main.c +++ b/tests/test_main.c @@ -37,7 +37,6 @@ DECLARE_SUITE(objwrite); DECLARE_SUITE(commit); DECLARE_SUITE(revwalk); DECLARE_SUITE(index); -DECLARE_SUITE(hashtable); DECLARE_SUITE(tag); DECLARE_SUITE(tree); DECLARE_SUITE(refs); @@ -53,7 +52,6 @@ static libgit2_suite suite_methods[]= { SUITE_NAME(commit), SUITE_NAME(revwalk), SUITE_NAME(index), - SUITE_NAME(hashtable), SUITE_NAME(tag), SUITE_NAME(tree), SUITE_NAME(refs),