mirror of
https://git.proxmox.com/git/libgit2
synced 2025-05-16 08:53:54 +00:00

The old hash table with chained buckets has been replaced by a new one using Cuckoo hashing, which offers guaranteed constant lookup times. This should improve speeds on most use cases, since hash tables in libgit2 are usually used as caches where the objects are stored once and queried several times. The Cuckoo hash implementation is based off the one in the Basekit library [1] for the IO language, but rewritten to support an arbritrary number of hashes. We currently use 3 to maximize the usage of the nodes pool. [1]: https://github.com/stevedekorte/basekit/blob/master/source/CHash.c Signed-off-by: Vicent Marti <tanoku@gmail.com>
459 lines
9.2 KiB
C
459 lines
9.2 KiB
C
/*
|
|
* This file is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License, version 2,
|
|
* as published by the Free Software Foundation.
|
|
*
|
|
* In addition to the permissions in the GNU General Public License,
|
|
* the authors give you unlimited permission to link the compiled
|
|
* version of this file into combinations with other programs,
|
|
* and to distribute those combinations without any restriction
|
|
* coming from the use of this file. (The General Public License
|
|
* restrictions do apply in other respects; for example, they cover
|
|
* modification of the file, and distribution when not linked into
|
|
* a combined executable.)
|
|
*
|
|
* This file is distributed in the hope that it will be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; see the file COPYING. If not, write to
|
|
* the Free Software Foundation, 51 Franklin Street, Fifth Floor,
|
|
* Boston, MA 02110-1301, USA.
|
|
*/
|
|
|
|
#include "common.h"
|
|
#include "commit.h"
|
|
#include "revwalk.h"
|
|
#include "hashtable.h"
|
|
|
|
uint32_t git_revwalk__commit_hash(const void *key, int hash_id)
|
|
{
|
|
uint32_t r;
|
|
git_commit *commit;
|
|
|
|
commit = (git_commit *)key;
|
|
memcpy(&r, commit->object.id.id + (hash_id * sizeof(uint32_t)), sizeof(r));
|
|
return r;
|
|
}
|
|
|
|
int git_revwalk__commit_keycmp(const void *key_a, const void *key_b)
|
|
{
|
|
git_commit *a = (git_commit *)key_a;
|
|
git_commit *b = (git_commit *)key_b;
|
|
return git_oid_cmp(&a->object.id, &b->object.id);
|
|
}
|
|
|
|
int git_revwalk_new(git_revwalk **revwalk_out, git_repository *repo)
|
|
{
|
|
git_revwalk *walk;
|
|
|
|
walk = git__malloc(sizeof(git_revwalk));
|
|
if (walk == NULL)
|
|
return GIT_ENOMEM;
|
|
|
|
memset(walk, 0x0, sizeof(git_revwalk));
|
|
|
|
walk->commits = git_hashtable_alloc(64,
|
|
git_revwalk__commit_hash,
|
|
git_revwalk__commit_keycmp);
|
|
|
|
if (walk->commits == NULL) {
|
|
free(walk);
|
|
return GIT_ENOMEM;
|
|
}
|
|
|
|
walk->repo = repo;
|
|
|
|
*revwalk_out = walk;
|
|
return GIT_SUCCESS;
|
|
}
|
|
|
|
void git_revwalk_free(git_revwalk *walk)
|
|
{
|
|
if (walk == NULL)
|
|
return;
|
|
|
|
git_revwalk_reset(walk);
|
|
git_hashtable_free(walk->commits);
|
|
free(walk);
|
|
}
|
|
|
|
git_repository *git_revwalk_repository(git_revwalk *walk)
|
|
{
|
|
assert(walk);
|
|
return walk->repo;
|
|
}
|
|
|
|
int git_revwalk_sorting(git_revwalk *walk, unsigned int sort_mode)
|
|
{
|
|
assert(walk);
|
|
|
|
if (walk->walking)
|
|
return GIT_EBUSY;
|
|
|
|
walk->sorting = sort_mode;
|
|
git_revwalk_reset(walk);
|
|
return GIT_SUCCESS;
|
|
}
|
|
|
|
static git_revwalk_commit *commit_to_walkcommit(git_revwalk *walk, git_commit *commit_object)
|
|
{
|
|
git_revwalk_commit *commit;
|
|
|
|
commit = (git_revwalk_commit *)git_hashtable_lookup(walk->commits, commit_object);
|
|
|
|
if (commit != NULL)
|
|
return commit;
|
|
|
|
commit = git__malloc(sizeof(git_revwalk_commit));
|
|
if (commit == NULL)
|
|
return NULL;
|
|
|
|
memset(commit, 0x0, sizeof(git_revwalk_commit));
|
|
|
|
commit->commit_object = commit_object;
|
|
|
|
git_hashtable_insert(walk->commits, commit_object, commit);
|
|
|
|
return commit;
|
|
}
|
|
|
|
static git_revwalk_commit *insert_commit(git_revwalk *walk, git_commit *commit_object)
|
|
{
|
|
git_revwalk_commit *commit;
|
|
unsigned int i;
|
|
|
|
assert(walk && commit_object);
|
|
|
|
if (commit_object->object.repo != walk->repo || walk->walking)
|
|
return NULL;
|
|
|
|
commit = commit_to_walkcommit(walk, commit_object);
|
|
if (commit == NULL)
|
|
return NULL;
|
|
|
|
if (commit->seen)
|
|
return commit;
|
|
|
|
commit->seen = 1;
|
|
|
|
for (i = 0; i < commit->commit_object->parents.length; ++i) {
|
|
git_commit *parent_object;
|
|
git_revwalk_commit *parent;
|
|
|
|
parent_object = git_vector_get(&commit->commit_object->parents, i);
|
|
|
|
if ((parent = commit_to_walkcommit(walk, parent_object)) == NULL)
|
|
return NULL;
|
|
|
|
parent = insert_commit(walk, parent_object);
|
|
if (parent == NULL)
|
|
return NULL;
|
|
|
|
parent->in_degree++;
|
|
|
|
git_revwalk_list_push_back(&commit->parents, parent);
|
|
}
|
|
|
|
if (git_revwalk_list_push_back(&walk->iterator, commit))
|
|
return NULL;
|
|
|
|
return commit;
|
|
}
|
|
|
|
int git_revwalk_push(git_revwalk *walk, git_commit *commit)
|
|
{
|
|
assert(walk && commit);
|
|
return insert_commit(walk, commit) ? GIT_SUCCESS : GIT_ENOMEM;
|
|
}
|
|
|
|
static void mark_uninteresting(git_revwalk_commit *commit)
|
|
{
|
|
git_revwalk_listnode *parent;
|
|
|
|
assert(commit);
|
|
|
|
commit->uninteresting = 1;
|
|
parent = commit->parents.head;
|
|
|
|
while (parent) {
|
|
mark_uninteresting(parent->walk_commit);
|
|
parent = parent->next;
|
|
}
|
|
}
|
|
|
|
int git_revwalk_hide(git_revwalk *walk, git_commit *commit)
|
|
{
|
|
git_revwalk_commit *hide;
|
|
|
|
assert(walk && commit);
|
|
|
|
hide = insert_commit(walk, commit);
|
|
if (hide == NULL)
|
|
return GIT_ENOMEM;
|
|
|
|
mark_uninteresting(hide);
|
|
return GIT_SUCCESS;
|
|
}
|
|
|
|
|
|
static void prepare_walk(git_revwalk *walk)
|
|
{
|
|
if (walk->sorting & GIT_SORT_TIME)
|
|
git_revwalk_list_timesort(&walk->iterator);
|
|
|
|
if (walk->sorting & GIT_SORT_TOPOLOGICAL)
|
|
git_revwalk_list_toposort(&walk->iterator);
|
|
|
|
if (walk->sorting & GIT_SORT_REVERSE)
|
|
walk->next = &git_revwalk_list_pop_back;
|
|
else
|
|
walk->next = &git_revwalk_list_pop_front;
|
|
|
|
walk->walking = 1;
|
|
}
|
|
|
|
int git_revwalk_next(git_commit **commit, git_revwalk *walk)
|
|
{
|
|
git_revwalk_commit *next;
|
|
|
|
assert(walk && commit);
|
|
|
|
if (!walk->walking)
|
|
prepare_walk(walk);
|
|
|
|
*commit = NULL;
|
|
|
|
while ((next = walk->next(&walk->iterator)) != NULL) {
|
|
if (!next->uninteresting) {
|
|
*commit = next->commit_object;
|
|
return GIT_SUCCESS;
|
|
}
|
|
}
|
|
|
|
/* No commits left to iterate */
|
|
git_revwalk_reset(walk);
|
|
return GIT_EREVWALKOVER;
|
|
}
|
|
|
|
void git_revwalk_reset(git_revwalk *walk)
|
|
{
|
|
const void *_unused;
|
|
git_revwalk_commit *commit;
|
|
|
|
assert(walk);
|
|
|
|
GIT_HASHTABLE_FOREACH(walk->commits, _unused, commit, {
|
|
git_revwalk_list_clear(&commit->parents);
|
|
free(commit);
|
|
});
|
|
|
|
git_hashtable_clear(walk->commits);
|
|
git_revwalk_list_clear(&walk->iterator);
|
|
walk->walking = 0;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int git_revwalk_list_push_back(git_revwalk_list *list, git_revwalk_commit *commit)
|
|
{
|
|
git_revwalk_listnode *node = NULL;
|
|
|
|
node = git__malloc(sizeof(git_revwalk_listnode));
|
|
|
|
if (node == NULL)
|
|
return GIT_ENOMEM;
|
|
|
|
node->walk_commit = commit;
|
|
node->next = NULL;
|
|
node->prev = list->tail;
|
|
|
|
if (list->tail == NULL) {
|
|
list->head = list->tail = node;
|
|
} else {
|
|
list->tail->next = node;
|
|
list->tail = node;
|
|
}
|
|
|
|
list->size++;
|
|
return 0;
|
|
}
|
|
|
|
int git_revwalk_list_push_front(git_revwalk_list *list, git_revwalk_commit *commit)
|
|
{
|
|
git_revwalk_listnode *node = NULL;
|
|
|
|
node = git__malloc(sizeof(git_revwalk_listnode));
|
|
|
|
if (node == NULL)
|
|
return GIT_ENOMEM;
|
|
|
|
node->walk_commit = commit;
|
|
node->next = list->head;
|
|
node->prev = NULL;
|
|
|
|
if (list->head == NULL) {
|
|
list->head = list->tail = node;
|
|
} else {
|
|
list->head->prev = node;
|
|
list->head = node;
|
|
}
|
|
|
|
list->size++;
|
|
return 0;
|
|
}
|
|
|
|
|
|
git_revwalk_commit *git_revwalk_list_pop_back(git_revwalk_list *list)
|
|
{
|
|
git_revwalk_listnode *node;
|
|
git_revwalk_commit *commit;
|
|
|
|
if (list->tail == NULL)
|
|
return NULL;
|
|
|
|
node = list->tail;
|
|
list->tail = list->tail->prev;
|
|
if (list->tail == NULL)
|
|
list->head = NULL;
|
|
else
|
|
list->tail->next = NULL;
|
|
|
|
commit = node->walk_commit;
|
|
free(node);
|
|
|
|
list->size--;
|
|
|
|
return commit;
|
|
}
|
|
|
|
git_revwalk_commit *git_revwalk_list_pop_front(git_revwalk_list *list)
|
|
{
|
|
git_revwalk_listnode *node;
|
|
git_revwalk_commit *commit;
|
|
|
|
if (list->head == NULL)
|
|
return NULL;
|
|
|
|
node = list->head;
|
|
list->head = list->head->next;
|
|
if (list->head == NULL)
|
|
list->tail = NULL;
|
|
else
|
|
list->head->prev = NULL;
|
|
|
|
commit = node->walk_commit;
|
|
free(node);
|
|
|
|
list->size--;
|
|
|
|
return commit;
|
|
}
|
|
|
|
void git_revwalk_list_clear(git_revwalk_list *list)
|
|
{
|
|
git_revwalk_listnode *node, *next_node;
|
|
|
|
node = list->head;
|
|
while (node) {
|
|
next_node = node->next;
|
|
free(node);
|
|
node = next_node;
|
|
}
|
|
|
|
list->head = list->tail = NULL;
|
|
list->size = 0;
|
|
}
|
|
|
|
void git_revwalk_list_timesort(git_revwalk_list *list)
|
|
{
|
|
git_revwalk_listnode *p, *q, *e;
|
|
int in_size, p_size, q_size, merge_count, i;
|
|
|
|
if (list->head == NULL)
|
|
return;
|
|
|
|
in_size = 1;
|
|
|
|
do {
|
|
p = list->head;
|
|
list->tail = NULL;
|
|
merge_count = 0;
|
|
|
|
while (p != NULL) {
|
|
merge_count++;
|
|
q = p;
|
|
p_size = 0;
|
|
q_size = in_size;
|
|
|
|
for (i = 0; i < in_size && q; ++i, q = q->next)
|
|
p_size++;
|
|
|
|
while (p_size > 0 || (q_size > 0 && q)) {
|
|
|
|
if (p_size == 0)
|
|
e = q, q = q->next, q_size--;
|
|
|
|
else if (q_size == 0 || q == NULL ||
|
|
p->walk_commit->commit_object->committer->when.time >=
|
|
q->walk_commit->commit_object->committer->when.time)
|
|
e = p, p = p->next, p_size--;
|
|
|
|
else
|
|
e = q, q = q->next, q_size--;
|
|
|
|
if (list->tail != NULL)
|
|
list->tail->next = e;
|
|
else
|
|
list->head = e;
|
|
|
|
e->prev = list->tail;
|
|
list->tail = e;
|
|
}
|
|
|
|
p = q;
|
|
}
|
|
|
|
list->tail->next = NULL;
|
|
in_size *= 2;
|
|
|
|
} while (merge_count > 1);
|
|
}
|
|
|
|
void git_revwalk_list_toposort(git_revwalk_list *list)
|
|
{
|
|
git_revwalk_commit *commit;
|
|
git_revwalk_list topo;
|
|
memset(&topo, 0x0, sizeof(git_revwalk_list));
|
|
|
|
while ((commit = git_revwalk_list_pop_back(list)) != NULL) {
|
|
git_revwalk_listnode *p;
|
|
|
|
if (commit->in_degree > 0) {
|
|
commit->topo_delay = 1;
|
|
continue;
|
|
}
|
|
|
|
for (p = commit->parents.head; p != NULL; p = p->next) {
|
|
p->walk_commit->in_degree--;
|
|
|
|
if (p->walk_commit->in_degree == 0 && p->walk_commit->topo_delay) {
|
|
p->walk_commit->topo_delay = 0;
|
|
git_revwalk_list_push_back(list, p->walk_commit);
|
|
}
|
|
}
|
|
|
|
git_revwalk_list_push_back(&topo, commit);
|
|
}
|
|
|
|
list->head = topo.head;
|
|
list->tail = topo.tail;
|
|
list->size = topo.size;
|
|
}
|
|
|