mirror of
https://git.proxmox.com/git/libgit2
synced 2025-05-21 08:24:34 +00:00
Adding stash to hashtable implementation
Adding a small stash of nodes with key conflicts has been demonstrated to greatly increase the efficiency of a cuckoo hashtable. See: http://research.microsoft.com/pubs/73856/stash-full.9-30.pdf for more details.
This commit is contained in:
parent
25f258e735
commit
c16c8b9a7e
143
src/hashtable.c
143
src/hashtable.c
@ -18,28 +18,37 @@ static git_hashtable_node *node_with_hash(git_hashtable *self, const void *key,
|
||||
static void node_swap_with(git_hashtable_node *self, git_hashtable_node *other);
|
||||
static int node_insert(git_hashtable *self, git_hashtable_node *new_node);
|
||||
static int insert_nodes(git_hashtable *self, git_hashtable_node *old_nodes, size_t old_size);
|
||||
static void reinsert_stash(git_hashtable *self);
|
||||
|
||||
static int resize_to(git_hashtable *self, size_t new_size)
|
||||
{
|
||||
git_hashtable_node *old_nodes = self->nodes;
|
||||
size_t old_size = self->size;
|
||||
git_hashtable_node old_stash[GIT_HASHTABLE_STASH_SIZE];
|
||||
size_t old_stash_count = self->stash_count;
|
||||
|
||||
self->is_resizing = 1;
|
||||
|
||||
if (old_stash_count > 0)
|
||||
memcpy(old_stash, self->stash,
|
||||
old_stash_count * sizeof(git_hashtable_node));
|
||||
|
||||
do {
|
||||
self->size = new_size;
|
||||
self->size_mask = new_size - 1;
|
||||
self->key_count = 0;
|
||||
self->stash_count = 0;
|
||||
self->nodes = git__calloc(1, sizeof(git_hashtable_node) * self->size);
|
||||
GITERR_CHECK_ALLOC(self->nodes);
|
||||
|
||||
if (insert_nodes(self, old_nodes, old_size) == 0)
|
||||
if (insert_nodes(self, old_nodes, old_size) == 0 &&
|
||||
insert_nodes(self, old_stash, old_stash_count) == 0)
|
||||
self->is_resizing = 0;
|
||||
else {
|
||||
new_size *= 2;
|
||||
git__free(self->nodes);
|
||||
}
|
||||
} while(self->is_resizing);
|
||||
} while (self->is_resizing);
|
||||
|
||||
git__free(old_nodes);
|
||||
return 0;
|
||||
@ -47,26 +56,28 @@ static int resize_to(git_hashtable *self, size_t new_size)
|
||||
|
||||
static int set_size(git_hashtable *self, size_t new_size)
|
||||
{
|
||||
self->nodes = git__realloc(self->nodes, new_size * sizeof(git_hashtable_node));
|
||||
self->nodes =
|
||||
git__realloc(self->nodes, new_size * sizeof(git_hashtable_node));
|
||||
GITERR_CHECK_ALLOC(self->nodes);
|
||||
|
||||
if (new_size > self->size) {
|
||||
if (new_size > self->size)
|
||||
memset(&self->nodes[self->size], 0x0,
|
||||
(new_size - self->size) * sizeof(git_hashtable_node));
|
||||
}
|
||||
|
||||
self->size = new_size;
|
||||
self->size_mask = new_size - 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static git_hashtable_node *node_with_hash(git_hashtable *self, const void *key, int hash_id)
|
||||
GIT_INLINE(git_hashtable_node *)node_with_hash(
|
||||
git_hashtable *self, const void *key, int hash_id)
|
||||
{
|
||||
size_t pos = self->hash(key, hash_id) & self->size_mask;
|
||||
return git_hashtable_node_at(self->nodes, pos);
|
||||
}
|
||||
|
||||
static void node_swap_with(git_hashtable_node *self, git_hashtable_node *other)
|
||||
GIT_INLINE(void) node_swap_with(
|
||||
git_hashtable_node *self, git_hashtable_node *other)
|
||||
{
|
||||
git_hashtable_node tmp = *self;
|
||||
*self = *other;
|
||||
@ -76,19 +87,26 @@ static void node_swap_with(git_hashtable_node *self, git_hashtable_node *other)
|
||||
static int node_insert(git_hashtable *self, git_hashtable_node *new_node)
|
||||
{
|
||||
int iteration, hash_id;
|
||||
git_hashtable_node *node;
|
||||
|
||||
for (iteration = 0; iteration < MAX_LOOPS; iteration++) {
|
||||
for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) {
|
||||
git_hashtable_node *node;
|
||||
node = node_with_hash(self, new_node->key, hash_id);
|
||||
node_swap_with(new_node, node);
|
||||
if (new_node->key == 0x0){
|
||||
if (new_node->key == 0x0) {
|
||||
self->key_count++;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Insert into stash if there is space */
|
||||
if (self->stash_count < GIT_HASHTABLE_STASH_SIZE) {
|
||||
node_swap_with(new_node, &self->stash[self->stash_count++]);
|
||||
self->key_count++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Failed to insert node. Hashtable is currently resizing */
|
||||
assert(!self->is_resizing);
|
||||
|
||||
@ -105,14 +123,29 @@ static int insert_nodes(
|
||||
|
||||
for (i = 0; i < old_size; ++i) {
|
||||
git_hashtable_node *node = git_hashtable_node_at(old_nodes, i);
|
||||
if (node->key &&
|
||||
git_hashtable_insert(self, node->key, node->value) < 0)
|
||||
if (node->key && node_insert(self, node) < 0)
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void reinsert_stash(git_hashtable *self)
|
||||
{
|
||||
int stash_count;
|
||||
struct git_hashtable_node stash[GIT_HASHTABLE_STASH_SIZE];
|
||||
|
||||
if (self->stash_count <= 0)
|
||||
return;
|
||||
|
||||
memcpy(stash, self->stash, self->stash_count * sizeof(git_hashtable_node));
|
||||
stash_count = self->stash_count;
|
||||
self->stash_count = 0;
|
||||
|
||||
/* the node_insert() calls *cannot* fail because the stash is empty */
|
||||
insert_nodes(self, stash, stash_count);
|
||||
}
|
||||
|
||||
git_hashtable *git_hashtable_alloc(
|
||||
size_t min_size,
|
||||
git_hash_ptr hash,
|
||||
@ -127,21 +160,11 @@ git_hashtable *git_hashtable_alloc(
|
||||
|
||||
memset(table, 0x0, sizeof(git_hashtable));
|
||||
|
||||
if (min_size < 8)
|
||||
min_size = 8;
|
||||
|
||||
/* round up size to closest power of 2 */
|
||||
min_size--;
|
||||
min_size |= min_size >> 1;
|
||||
min_size |= min_size >> 2;
|
||||
min_size |= min_size >> 4;
|
||||
min_size |= min_size >> 8;
|
||||
min_size |= min_size >> 16;
|
||||
|
||||
table->hash = hash;
|
||||
table->key_equal = key_eq;
|
||||
|
||||
set_size(table, min_size + 1);
|
||||
min_size = git__size_t_powerof2(min_size < 8 ? 8 : min_size);
|
||||
set_size(table, min_size);
|
||||
|
||||
return table;
|
||||
}
|
||||
@ -151,6 +174,8 @@ void git_hashtable_clear(git_hashtable *self)
|
||||
assert(self);
|
||||
|
||||
memset(self->nodes, 0x0, sizeof(git_hashtable_node) * self->size);
|
||||
|
||||
self->stash_count = 0;
|
||||
self->key_count = 0;
|
||||
}
|
||||
|
||||
@ -200,39 +225,70 @@ int git_hashtable_insert2(
|
||||
}
|
||||
}
|
||||
|
||||
void *git_hashtable_lookup(git_hashtable *self, const void *key)
|
||||
static git_hashtable_node *find_node(git_hashtable *self, const void *key)
|
||||
{
|
||||
int hash_id;
|
||||
int hash_id, count = 0;
|
||||
git_hashtable_node *node;
|
||||
|
||||
assert(self && self->nodes);
|
||||
|
||||
for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) {
|
||||
node = node_with_hash(self, key, hash_id);
|
||||
if (node->key && self->key_equal(key, node->key) == 0)
|
||||
return node->value;
|
||||
if (node->key) {
|
||||
++count;
|
||||
if (self->key_equal(key, node->key) == 0)
|
||||
return node;
|
||||
}
|
||||
}
|
||||
|
||||
/* check stash if not found but all slots were filled */
|
||||
if (count == GIT_HASHTABLE_HASHES) {
|
||||
for (count = 0; count < self->stash_count; ++count)
|
||||
if (self->key_equal(key, self->stash[count].key) == 0)
|
||||
return &self->stash[count];
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void reset_stash(git_hashtable *self, git_hashtable_node *node)
|
||||
{
|
||||
/* if node was in stash, then compact stash */
|
||||
ssize_t offset = node - self->stash;
|
||||
|
||||
if (offset >= 0 && offset < self->stash_count) {
|
||||
if (offset < self->stash_count - 1)
|
||||
memmove(node, node + 1, (self->stash_count - offset) *
|
||||
sizeof(git_hashtable_node));
|
||||
self->stash_count--;
|
||||
}
|
||||
|
||||
reinsert_stash(self);
|
||||
}
|
||||
|
||||
void *git_hashtable_lookup(git_hashtable *self, const void *key)
|
||||
{
|
||||
git_hashtable_node *node;
|
||||
assert(self && key);
|
||||
node = find_node(self, key);
|
||||
return node ? node->value : NULL;
|
||||
}
|
||||
|
||||
int git_hashtable_remove2(
|
||||
git_hashtable *self, const void *key, void **old_value)
|
||||
{
|
||||
int hash_id;
|
||||
git_hashtable_node *node;
|
||||
|
||||
assert(self && self->nodes);
|
||||
|
||||
for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) {
|
||||
node = node_with_hash(self, key, hash_id);
|
||||
if (node->key && self->key_equal(key, node->key) == 0) {
|
||||
*old_value = node->value;
|
||||
node->key = NULL;
|
||||
node->value = NULL;
|
||||
self->key_count--;
|
||||
return 0;
|
||||
}
|
||||
node = find_node(self, key);
|
||||
if (node) {
|
||||
*old_value = node->value;
|
||||
|
||||
node->key = NULL;
|
||||
node->value = NULL;
|
||||
self->key_count--;
|
||||
|
||||
reset_stash(self, node);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return GIT_ENOTFOUND;
|
||||
@ -240,10 +296,15 @@ int git_hashtable_remove2(
|
||||
|
||||
int git_hashtable_merge(git_hashtable *self, git_hashtable *other)
|
||||
{
|
||||
if (resize_to(self, (self->size + other->size) * 2) < 0)
|
||||
size_t new_size = git__size_t_powerof2(self->size + other->size);
|
||||
|
||||
if (resize_to(self, new_size) < 0)
|
||||
return -1;
|
||||
|
||||
return insert_nodes(self, other->nodes, other->key_count);
|
||||
if (insert_nodes(self, other->nodes, other->key_count) < 0)
|
||||
return -1;
|
||||
|
||||
return insert_nodes(self, other->stash, other->stash_count);
|
||||
}
|
||||
|
||||
|
||||
|
@ -22,6 +22,8 @@ struct git_hashtable_node {
|
||||
void *value;
|
||||
};
|
||||
|
||||
#define GIT_HASHTABLE_STASH_SIZE 3
|
||||
|
||||
struct git_hashtable {
|
||||
struct git_hashtable_node *nodes;
|
||||
|
||||
@ -29,6 +31,9 @@ struct git_hashtable {
|
||||
size_t size;
|
||||
size_t key_count;
|
||||
|
||||
struct git_hashtable_node stash[GIT_HASHTABLE_STASH_SIZE];
|
||||
int stash_count;
|
||||
|
||||
int is_resizing;
|
||||
|
||||
git_hash_ptr hash;
|
||||
@ -38,9 +43,11 @@ struct git_hashtable {
|
||||
typedef struct git_hashtable_node git_hashtable_node;
|
||||
typedef struct git_hashtable git_hashtable;
|
||||
|
||||
git_hashtable *git_hashtable_alloc(size_t min_size,
|
||||
git_hash_ptr hash,
|
||||
git_hash_keyeq_ptr key_eq);
|
||||
git_hashtable *git_hashtable_alloc(
|
||||
size_t min_size,
|
||||
git_hash_ptr hash,
|
||||
git_hash_keyeq_ptr key_eq);
|
||||
|
||||
void *git_hashtable_lookup(git_hashtable *h, const void *key);
|
||||
int git_hashtable_remove2(git_hashtable *table, const void *key, void **old_value);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user