diff --git a/src/hashtable.c b/src/hashtable.c index 67fd49a46..e226c8191 100644 --- a/src/hashtable.c +++ b/src/hashtable.c @@ -27,46 +27,113 @@ #include "repository.h" #include "commit.h" +#define MAX_LOOPS 5 static const double max_load_factor = 0.65; -static void hashtable_resize(git_hashtable *table) +static int resize_to(git_hashtable *self, size_t new_size); +static int set_size(git_hashtable *self, size_t new_size); +static git_hashtable_node *node_with_hash(git_hashtable *self, const void *key, int hash_id); +static void node_swap_with(git_hashtable_node *self, git_hashtable_node *other); +static int node_insert(git_hashtable *self, git_hashtable_node *new_node); +static int insert_nodes(git_hashtable *self, git_hashtable_node *old_nodes, size_t old_size); + +static int resize_to(git_hashtable *self, size_t new_size) { - git_hashtable_node **new_nodes; - unsigned int new_size, i; + git_hashtable_node *old_nodes = self->nodes; + size_t old_size = self->size; - assert(table); + self->is_resizing = 1; - new_size = (table->size_mask + 1) * 2; + do { + self->size = new_size; + self->size_mask = new_size - 1; + self->key_count = 0; + self->nodes = git__calloc(1, sizeof(git_hashtable_node) * self->size); - new_nodes = git__malloc(new_size * sizeof(git_hashtable_node *)); - if (new_nodes == NULL) - return; + if (self->nodes == NULL) + return GIT_ENOMEM; - memset(new_nodes, 0x0, new_size * sizeof(git_hashtable_node *)); + if (insert_nodes(self, old_nodes, old_size) == 0) + self->is_resizing = 0; + else { + new_size *= 2; + free(self->nodes); + } + } while(self->is_resizing); - for (i = 0; i <= table->size_mask; ++i) { - git_hashtable_node *n; - unsigned int index; + free(old_nodes); + return GIT_SUCCESS; +} - while ((n = table->nodes[i]) != NULL) { - table->nodes[i] = n->next; - index = n->hash & (new_size - 1); - n->next = new_nodes[index]; - new_nodes[index] = n; +static int set_size(git_hashtable *self, size_t new_size) +{ + self->nodes = realloc(self->nodes, new_size * sizeof(git_hashtable_node)); + if (self->nodes == NULL) + return GIT_ENOMEM; + + if (new_size > self->size) { + memset(&self->nodes[self->size], 0x0, (new_size - self->size) * sizeof(git_hashtable_node)); + } + + self->size = new_size; + self->size_mask = new_size - 1; + return GIT_SUCCESS; +} + +static git_hashtable_node *node_with_hash(git_hashtable *self, const void *key, int hash_id) +{ + size_t pos = self->hash(key, hash_id) & self->size_mask; + return git_hashtable_node_at(self->nodes, pos); +} + +static void node_swap_with(git_hashtable_node *self, git_hashtable_node *other) +{ + git_hashtable_node tmp = *self; + *self = *other; + *other = tmp; +} + +static int node_insert(git_hashtable *self, git_hashtable_node *new_node) +{ + int iteration, hash_id; + + for (iteration = 0; iteration < MAX_LOOPS; iteration++) { + for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) { + git_hashtable_node *node; + node = node_with_hash(self, new_node->key, hash_id); + node_swap_with(new_node, node); + if(new_node->key == 0x0){ + self->key_count++; + return GIT_SUCCESS; + } } } - free(table->nodes); - table->nodes = new_nodes; - table->size_mask = (new_size - 1); - table->max_count = (unsigned int)(new_size * max_load_factor); + if (self->is_resizing) + return GIT_EBUSY; + + resize_to(self, self->size * 2); + git_hashtable_insert(self, new_node->key, new_node->value); + return GIT_SUCCESS; } -git_hashtable *git_hashtable_alloc(unsigned int min_size, +static int insert_nodes(git_hashtable *self, git_hashtable_node *old_nodes, size_t old_size) +{ + size_t i; + + for (i = 0; i < old_size; ++i) { + git_hashtable_node *node = git_hashtable_node_at(old_nodes, i); + if (node->key && git_hashtable_insert(self, node->key, node->value) < GIT_SUCCESS) + return GIT_ENOMEM; + } + + return GIT_SUCCESS; +} + +git_hashtable *git_hashtable_alloc(size_t min_size, git_hash_ptr hash, git_hash_keyeq_ptr key_eq) { - unsigned int i; git_hashtable *table; assert(hash && key_eq); @@ -74,6 +141,11 @@ git_hashtable *git_hashtable_alloc(unsigned int min_size, if ((table = git__malloc(sizeof(git_hashtable))) == NULL) return NULL; + memset(table, 0x0, sizeof(git_hashtable)); + + if (min_size < 8) + min_size = 8; + /* round up size to closest power of 2 */ min_size--; min_size |= min_size >> 1; @@ -82,167 +154,90 @@ git_hashtable *git_hashtable_alloc(unsigned int min_size, min_size |= min_size >> 8; min_size |= min_size >> 16; - table->size_mask = min_size; - table->count = 0; - table->max_count = (unsigned int)((min_size + 1) * max_load_factor); - table->hash = hash; table->key_equal = key_eq; - table->nodes = git__malloc((min_size + 1) * sizeof(git_hashtable_node *)); - - if (table->nodes == NULL) { - free(table); - return NULL; - } - - for (i = 0; i <= min_size; ++i) - table->nodes[i] = NULL; + set_size(table, min_size + 1); return table; } -void git_hashtable_clear(git_hashtable *table) +void git_hashtable_clear(git_hashtable *self) { - unsigned int index; + assert(self); - assert(table); + memset(self->nodes, 0x0, sizeof(git_hashtable_node) * self->size); + self->key_count = 0; +} - for (index = 0; index <= table->size_mask; ++index) { - git_hashtable_node *node, *next_node; +void git_hashtable_free(git_hashtable *self) +{ + assert(self); - node = table->nodes[index]; - while (node != NULL) { - next_node = node->next; - free(node); - node = next_node; + free(self->nodes); + free(self); +} + + +int git_hashtable_insert(git_hashtable *self, const void *key, void *value) +{ + int hash_id; + git_hashtable_node *node; + + for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) { + node = node_with_hash(self, key, hash_id); + + if (!node->key) { + node->key = key; + node->value = value; + self->key_count++; + return GIT_SUCCESS; } - table->nodes[index] = NULL; + if (key == node->key || self->key_equal(key, node->key) == 0) { + node->value = value; + return GIT_SUCCESS; + } } - table->count = 0; + /* no space in table; must do cuckoo dance */ + { + git_hashtable_node x; + x.key = key; + x.value = value; + return node_insert(self, &x); + } } -void git_hashtable_free(git_hashtable *table) -{ - assert(table); - - git_hashtable_clear(table); - free(table->nodes); - free(table); -} - - -int git_hashtable_insert(git_hashtable *table, const void *key, void *value) +void *git_hashtable_lookup(git_hashtable *self, const void *key) { + int hash_id; git_hashtable_node *node; - uint32_t index, hash; - assert(table); - - if (table->count + 1 > table->max_count) - hashtable_resize(table); - - node = git__malloc(sizeof(git_hashtable_node)); - if (node == NULL) - return GIT_ENOMEM; - - hash = table->hash(key); - index = (hash & table->size_mask); - - node->object = value; - node->hash = hash; - node->next = table->nodes[index]; - - table->nodes[index] = node; - table->count++; - - return GIT_SUCCESS; -} - -void *git_hashtable_lookup(git_hashtable *table, const void *key) -{ - git_hashtable_node *node; - uint32_t index, hash; - - assert(table); - - hash = table->hash(key); - index = (hash & table->size_mask); - node = table->nodes[index]; - - while (node != NULL) { - if (node->hash == hash && table->key_equal(node->object, key)) - return node->object; - - node = node->next; + for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) { + node = node_with_hash(self, key, hash_id); + if (node->key && self->key_equal(key, node->key) == 0) + return node->value; } return NULL; } -int git_hashtable_remove(git_hashtable *table, const void *key) +int git_hashtable_remove(git_hashtable *self, const void *key) { - git_hashtable_node *node, *prev_node; - uint32_t index, hash; + int hash_id; + git_hashtable_node *node; - assert(table); - - hash = table->hash(key); - index = (hash & table->size_mask); - node = table->nodes[index]; - - prev_node = NULL; - - while (node != NULL) { - if (node->hash == hash && table->key_equal(node->object, key)) { - if (prev_node == NULL) - table->nodes[index] = node->next; - else - prev_node->next = node->next; - - free(node); + for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) { + node = node_with_hash(self, key, hash_id); + if (node->key && self->key_equal(key, node->key) == 0) { + node->key = NULL; + node->value = NULL; + self->key_count--; return GIT_SUCCESS; } - - prev_node = node; - node = node->next; } return GIT_ENOTFOUND; } - - -void git_hashtable_iterator_init(git_hashtable *table, git_hashtable_iterator *it) -{ - assert(table && it); - - memset(it, 0x0, sizeof(git_hashtable_iterator)); - - it->nodes = table->nodes; - it->current_node = NULL; - it->current_pos = 0; - it->size = table->size_mask + 1; -} - -void *git_hashtable_iterator_next(git_hashtable_iterator *it) -{ - git_hashtable_node *next = NULL; - - assert(it); - - while (it->current_node == NULL) { - if (it->current_pos >= it->size) - return NULL; - - it->current_node = it->nodes[it->current_pos++]; - } - - next = it->current_node; - it->current_node = it->current_node->next; - - return next->object; -} - diff --git a/src/hashtable.h b/src/hashtable.h index 69535040d..74da580ef 100644 --- a/src/hashtable.h +++ b/src/hashtable.h @@ -5,39 +5,33 @@ #include "git2/oid.h" #include "git2/odb.h" +#define GIT_HASHTABLE_HASHES 3 -typedef uint32_t (*git_hash_ptr)(const void *); -typedef int (*git_hash_keyeq_ptr)(void *obj, const void *obj_key); +typedef uint32_t (*git_hash_ptr)(const void *, int hash_id); +typedef int (*git_hash_keyeq_ptr)(const void *key_a, const void *key_b); struct git_hashtable_node { - void *object; - uint32_t hash; - struct git_hashtable_node *next; + const void *key; + void *value; }; struct git_hashtable { - struct git_hashtable_node **nodes; + struct git_hashtable_node *nodes; - unsigned int size_mask; - unsigned int count; - unsigned int max_count; + size_t size_mask; + size_t size; + size_t key_count; + + int is_resizing; git_hash_ptr hash; git_hash_keyeq_ptr key_equal; }; -struct git_hashtable_iterator { - struct git_hashtable_node **nodes; - struct git_hashtable_node *current_node; - unsigned int current_pos; - unsigned int size; -}; - typedef struct git_hashtable_node git_hashtable_node; typedef struct git_hashtable git_hashtable; -typedef struct git_hashtable_iterator git_hashtable_iterator; -git_hashtable *git_hashtable_alloc(unsigned int min_size, +git_hashtable *git_hashtable_alloc(size_t min_size, git_hash_ptr hash, git_hash_keyeq_ptr key_eq); int git_hashtable_insert(git_hashtable *h, const void *key, void *value); @@ -46,7 +40,22 @@ int git_hashtable_remove(git_hashtable *table, const void *key); void git_hashtable_free(git_hashtable *h); void git_hashtable_clear(git_hashtable *h); -void *git_hashtable_iterator_next(git_hashtable_iterator *it); -void git_hashtable_iterator_init(git_hashtable *h, git_hashtable_iterator *it); +#define git_hashtable_node_at(nodes, pos) ((git_hashtable_node *)(&nodes[pos])) + +#define GIT_HASHTABLE_FOREACH(self, pkey, pvalue, code) {\ + git_hashtable *_self = (self);\ + git_hashtable_node *_nodes = _self->nodes;\ + unsigned int _i, _size = _self->size;\ + for (_i = 0; _i < _size; _i ++) {\ + git_hashtable_node *_node = git_hashtable_node_at(_nodes, _i);\ + if (_node->key)\ + {\ + pkey = _node->key;\ + pvalue = _node->value;\ + code;\ + }\ + }\ +} + #endif diff --git a/src/refs.c b/src/refs.c index 1f434ea03..b95ec70cf 100644 --- a/src/refs.c +++ b/src/refs.c @@ -28,28 +28,21 @@ #include "repository.h" #include "fileops.h" -#define HASH_SEED 2147483647 #define MAX_NESTING_LEVEL 5 static const int default_table_size = 32; -static uint32_t reftable_hash(const void *key) +static uint32_t reftable_hash(const void *key, int hash_id) { - return git__hash(key, strlen((const char *)key), HASH_SEED); + static uint32_t hash_seeds[GIT_HASHTABLE_HASHES] = { + 2147483647, + 0x5d20bb23, + 0x7daaab3c + }; + + return git__hash(key, strlen((const char *)key), hash_seeds[hash_id]); } -static int reftable_haskey(void *reference, const void *key) -{ - git_reference *ref; - char *name; - - ref = (git_reference *)reference; - name = (char *)key; - - return strcmp(name, ref->name) == 0; -} - - static int check_refname(const char *name) { /* @@ -641,24 +634,21 @@ int git_repository__refcache_init(git_refcache *refs) refs->cache = git_hashtable_alloc( default_table_size, reftable_hash, - reftable_haskey); + (git_hash_keyeq_ptr)strcmp); return refs->cache ? GIT_SUCCESS : GIT_ENOMEM; } void git_repository__refcache_free(git_refcache *refs) { - git_hashtable_iterator it; + const char *ref_name; git_reference *reference; assert(refs); - git_hashtable_iterator_init(refs->cache, &it); - - while ((reference = (git_reference *)git_hashtable_iterator_next(&it)) != NULL) { - git_hashtable_remove(refs->cache, reference->name); - reference_free(reference); - } + GIT_HASHTABLE_FOREACH(refs->cache, ref_name, reference, + reference_free(reference) + ); git_hashtable_free(refs->cache); } diff --git a/src/repository.c b/src/repository.c index 2b17ba455..a1d2798b3 100644 --- a/src/repository.c +++ b/src/repository.c @@ -58,28 +58,16 @@ typedef struct { * Callbacks for the ODB cache, implemented * as a hash table */ -uint32_t object_table_hash(const void *key) +uint32_t object_table_hash(const void *key, int hash_id) { uint32_t r; git_oid *id; id = (git_oid *)key; - memcpy(&r, id->id, sizeof(r)); + memcpy(&r, id->id + (hash_id * sizeof(uint32_t)), sizeof(r)); return r; } -int object_table_hashkey(void *object, const void *key) -{ - git_object *obj; - git_oid *oid; - - obj = (git_object *)object; - oid = (git_oid *)key; - - return (git_oid_cmp(oid, &obj->id) == 0); -} - - /* * Git repository open methods * @@ -245,9 +233,9 @@ static git_repository *repository_alloc() repo->objects = git_hashtable_alloc( OBJECT_TABLE_SIZE, object_table_hash, - object_table_hashkey); + (git_hash_keyeq_ptr)git_oid_cmp); - if (repo->objects == NULL) { + if (repo->objects == NULL) { free(repo); return NULL; } @@ -369,8 +357,8 @@ cleanup: void git_repository_free(git_repository *repo) { - git_hashtable_iterator it; git_object *object; + const git_oid *oid; if (repo == NULL) return; @@ -380,11 +368,9 @@ void git_repository_free(git_repository *repo) free(repo->path_repository); free(repo->path_odb); - git_hashtable_iterator_init(repo->objects, &it); - - while ((object = (git_object *) - git_hashtable_iterator_next(&it)) != NULL) + GIT_HASHTABLE_FOREACH(repo->objects, oid, object, { git_object_free(object); + }); git_hashtable_free(repo->objects); diff --git a/src/revwalk.c b/src/revwalk.c index e30e543a8..c073be13f 100644 --- a/src/revwalk.c +++ b/src/revwalk.c @@ -28,28 +28,23 @@ #include "revwalk.h" #include "hashtable.h" -uint32_t git_revwalk__commit_hash(const void *key) +uint32_t git_revwalk__commit_hash(const void *key, int hash_id) { uint32_t r; git_commit *commit; commit = (git_commit *)key; - memcpy(&r, commit->object.id.id, sizeof(r)); + memcpy(&r, commit->object.id.id + (hash_id * sizeof(uint32_t)), sizeof(r)); return r; } -int git_revwalk__commit_haskey(void *object, const void *key) +int git_revwalk__commit_keycmp(const void *key_a, const void *key_b) { - git_revwalk_commit *walk_commit; - git_commit *commit_object; - - walk_commit = (git_revwalk_commit *)object; - commit_object = (git_commit *)key; - - return (walk_commit->commit_object == commit_object); + git_commit *a = (git_commit *)key_a; + git_commit *b = (git_commit *)key_b; + return git_oid_cmp(&a->object.id, &b->object.id); } - int git_revwalk_new(git_revwalk **revwalk_out, git_repository *repo) { git_revwalk *walk; @@ -62,7 +57,7 @@ int git_revwalk_new(git_revwalk **revwalk_out, git_repository *repo) walk->commits = git_hashtable_alloc(64, git_revwalk__commit_hash, - git_revwalk__commit_haskey); + git_revwalk__commit_keycmp); if (walk->commits == NULL) { free(walk); @@ -245,18 +240,15 @@ int git_revwalk_next(git_commit **commit, git_revwalk *walk) void git_revwalk_reset(git_revwalk *walk) { - git_hashtable_iterator it; + const void *_unused; git_revwalk_commit *commit; assert(walk); - git_hashtable_iterator_init(walk->commits, &it); - - while ((commit = (git_revwalk_commit *) - git_hashtable_iterator_next(&it)) != NULL) { + GIT_HASHTABLE_FOREACH(walk->commits, _unused, commit, { git_revwalk_list_clear(&commit->parents); free(commit); - } + }); git_hashtable_clear(walk->commits); git_revwalk_list_clear(&walk->iterator); diff --git a/tests/t07-hashtable.c b/tests/t07-hashtable.c index a0f32194c..76eb0d1a1 100644 --- a/tests/t07-hashtable.c +++ b/tests/t07-hashtable.c @@ -34,32 +34,26 @@ typedef struct _aux_object { int visited; } table_item; -uint32_t hash_func(const void *key) +uint32_t hash_func(const void *key, int hash_id) { uint32_t r; git_oid *id; id = (git_oid *)key; - memcpy(&r, id->id, sizeof(r)); + memcpy(&r, id->id + (hash_id * sizeof(uint32_t)), sizeof(r)); return r; } -int hash_haskey(void *item, const void *key) +int hash_cmpkey(const void *a, const void *b) { - table_item *obj; - git_oid *oid; - - obj = (table_item *)item; - oid = (git_oid *)key; - - return (git_oid_cmp(oid, &obj->id) == 0); + return git_oid_cmp(a, b); } BEGIN_TEST("table", table_create) git_hashtable *table = NULL; - table = git_hashtable_alloc(55, hash_func, hash_haskey); + table = git_hashtable_alloc(55, hash_func, hash_cmpkey); must_be_true(table != NULL); must_be_true(table->size_mask + 1 == 64); @@ -75,7 +69,7 @@ BEGIN_TEST("table", table_populate) table_item *objects; git_hashtable *table = NULL; - table = git_hashtable_alloc(objects_n * 2, hash_func, hash_haskey); + table = git_hashtable_alloc(objects_n * 2, hash_func, hash_cmpkey); must_be_true(table != NULL); objects = git__malloc(objects_n * sizeof(table_item)); @@ -123,7 +117,7 @@ BEGIN_TEST("table", table_resize) table_item *objects; git_hashtable *table = NULL; - table = git_hashtable_alloc(objects_n, hash_func, hash_haskey); + table = git_hashtable_alloc(objects_n, hash_func, hash_cmpkey); must_be_true(table != NULL); objects = git__malloc(objects_n * sizeof(table_item)); @@ -161,11 +155,11 @@ BEGIN_TEST("tableit", table_iterator) const int objects_n = 32; int i; table_item *objects, *ob; + const void *_unused; git_hashtable *table = NULL; - git_hashtable_iterator iterator; - table = git_hashtable_alloc(objects_n * 2, hash_func, hash_haskey); + table = git_hashtable_alloc(objects_n * 2, hash_func, hash_cmpkey); must_be_true(table != NULL); objects = git__malloc(objects_n * sizeof(table_item)); @@ -177,11 +171,9 @@ BEGIN_TEST("tableit", table_iterator) must_pass(git_hashtable_insert(table, &(objects[i].id), &(objects[i]))); } - git_hashtable_iterator_init(table, &iterator); - - /* iterate through all nodes, mark as visited */ - while ((ob = (table_item *)git_hashtable_iterator_next(&iterator)) != NULL) + GIT_HASHTABLE_FOREACH(table, _unused, ob, ob->visited = 1; + ); /* make sure all nodes have been visited */ for (i = 0; i < objects_n; ++i) @@ -189,7 +181,6 @@ BEGIN_TEST("tableit", table_iterator) git_hashtable_free(table); free(objects); - END_TEST