diff --git a/src/sortedcache.c b/src/sortedcache.c new file mode 100644 index 000000000..6015d616d --- /dev/null +++ b/src/sortedcache.c @@ -0,0 +1,297 @@ +#include "sortedcache.h" + +GIT__USE_STRMAP; + +int git_sortedcache_new( + git_sortedcache **out, + size_t item_path_offset, + git_sortedcache_free_item_fn free_item, + void *free_item_payload, + git_vector_cmp item_cmp, + const char *path) +{ + git_sortedcache *sc; + size_t pathlen; + + pathlen = path ? strlen(path) : 0; + + sc = git__calloc(sizeof(git_sortedcache) + pathlen + 1, 1); + GITERR_CHECK_ALLOC(sc); + + if (git_pool_init(&sc->pool, 1, 0) < 0 || + git_vector_init(&sc->items, 4, item_cmp) < 0 || + (sc->map = git_strmap_alloc()) == NULL) + goto fail; + + if (git_mutex_init(&sc->lock)) { + giterr_set(GITERR_OS, "Failed to initialize mutex"); + goto fail; + } + + sc->item_path_offset = item_path_offset; + sc->free_item = free_item; + sc->free_item_payload = free_item_payload; + GIT_REFCOUNT_INC(sc); + if (pathlen) + memcpy(sc->path, path, pathlen); + + *out = sc; + return 0; + +fail: + if (sc->map) + git_strmap_free(sc->map); + git_vector_free(&sc->items); + git_pool_clear(&sc->pool); + git__free(sc); + return -1; +} + +void git_sortedcache_incref(git_sortedcache *sc) +{ + GIT_REFCOUNT_INC(sc); +} + +static void sortedcache_clear(git_sortedcache *sc) +{ + git_strmap_clear(sc->map); + + if (sc->free_item) { + size_t i; + void *item; + + git_vector_foreach(&sc->items, i, item) { + sc->free_item(sc->free_item_payload, item); + } + } + + git_vector_clear(&sc->items); + + git_pool_clear(&sc->pool); +} + +static void sortedcache_free(git_sortedcache *sc) +{ + if (git_mutex_lock(&sc->lock) < 0) { + giterr_set(GITERR_OS, "Unable to acquire mutex lock for free"); + return; + } + + sortedcache_clear(sc); + + git_vector_free(&sc->items); + git_strmap_free(sc->map); + + git_mutex_unlock(&sc->lock); + git_mutex_free(&sc->lock); + + git__free(sc); +} + +void git_sortedcache_free(git_sortedcache *sc) +{ + if (!sc) + return; + GIT_REFCOUNT_DEC(sc, sortedcache_free); +} + +static int sortedcache_copy_item(void *payload, void *tgt_item, void *src_item) +{ + git_sortedcache *sc = payload; + /* path will already have been copied by upsert */ + memcpy(tgt_item, src_item, sc->item_path_offset); + return 0; +} + +/* copy a sorted cache */ +int git_sortedcache_copy( + git_sortedcache **out, + git_sortedcache *src, + int (*copy_item)(void *payload, void *tgt_item, void *src_item), + void *payload) +{ + git_sortedcache *tgt; + size_t i; + void *src_item, *tgt_item; + + if (!copy_item) { + copy_item = sortedcache_copy_item; + payload = src; + } + + if (git_sortedcache_new( + &tgt, src->item_path_offset, + src->free_item, src->free_item_payload, + src->items._cmp, src->path) < 0) + return -1; + + if (git_sortedcache_lock(src) < 0) { + git_sortedcache_free(tgt); + return -1; + } + + if (git_sortedcache_lock(tgt) < 0) + goto fail; + + git_vector_foreach(&src->items, i, src_item) { + if (git_sortedcache_upsert( + &tgt_item, tgt, ((char *)src_item) + src->item_path_offset) < 0) + goto fail; + if (copy_item(payload, tgt_item, src_item) < 0) + goto fail; + } + + git_sortedcache_unlock(tgt); + git_sortedcache_unlock(src); + + *out = tgt; + return 0; + +fail: + git_sortedcache_unlock(src); + git_sortedcache_free(tgt); + return -1; +} + +/* release all items in sorted cache */ +void git_sortedcache_clear(git_sortedcache *sc, bool lock) +{ + if (lock && git_mutex_lock(&sc->lock) < 0) { + giterr_set(GITERR_OS, "Unable to acquire mutex lock for clear"); + return; + } + + sortedcache_clear(sc); + + if (lock) + git_mutex_unlock(&sc->lock); +} + +/* check file stamp to see if reload is required */ +bool git_sortedcache_out_of_date(git_sortedcache *sc) +{ + return (git_futils_filestamp_check(&sc->stamp, sc->path) != 0); +} + +/* lock sortedcache while making modifications */ +int git_sortedcache_lock(git_sortedcache *sc) +{ + if (git_mutex_lock(&sc->lock) < 0) { + giterr_set(GITERR_OS, "Unable to acquire mutex lock"); + return -1; + } + return 0; +} + +/* unlock sorted cache when done with modifications */ +int git_sortedcache_unlock(git_sortedcache *sc) +{ + git_vector_sort(&sc->items); + git_mutex_unlock(&sc->lock); + return 0; +} + +/* if the file has changed, lock cache and load file contents into buf; + * returns <0 on error, >0 if file has not changed + */ +int git_sortedcache_lockandload(git_sortedcache *sc, git_buf *buf) +{ + int error, fd; + + if ((error = git_sortedcache_lock(sc)) < 0) + return error; + + if ((error = git_futils_filestamp_check(&sc->stamp, sc->path)) <= 0) + goto unlock; + + if (!git__is_sizet(sc->stamp.size)) { + giterr_set(GITERR_INVALID, "Unable to load file larger than size_t"); + error = -1; + goto unlock; + } + + if ((fd = git_futils_open_ro(sc->path)) < 0) { + error = fd; + goto unlock; + } + + if (buf) + error = git_futils_readbuffer_fd(buf, fd, (size_t)sc->stamp.size); + + (void)p_close(fd); + + if (error < 0) + goto unlock; + + return 1; /* return 1 -> file needs reload and was successfully loaded */ + +unlock: + git_sortedcache_unlock(sc); + return error; +} + +/* find and/or insert item, returning pointer to item data */ +int git_sortedcache_upsert( + void **out, git_sortedcache *sc, const char *key) +{ + int error = 0; + khiter_t pos; + void *item; + size_t keylen; + char *item_key; + + pos = git_strmap_lookup_index(sc->map, key); + if (git_strmap_valid_index(sc->map, pos)) { + item = git_strmap_value_at(sc->map, pos); + goto done; + } + + keylen = strlen(key); + item = git_pool_mallocz(&sc->pool, sc->item_path_offset + keylen + 1); + GITERR_CHECK_ALLOC(item); + + /* one strange thing is that even if the vector or hash table insert + * fail, there is no way to free the pool item so we just abandon it + */ + + item_key = ((char *)item) + sc->item_path_offset; + memcpy(item_key, key, keylen); + + pos = kh_put(str, sc->map, item_key, &error); + if (error < 0) + goto done; + + if (!error) + kh_key(sc->map, pos) = item_key; + kh_val(sc->map, pos) = item; + + error = git_vector_insert(&sc->items, item); + if (error < 0) + git_strmap_delete_at(sc->map, pos); + +done: + if (out) + *out = !error ? item : NULL; + return error; +} + +/* lookup item by key */ +void *git_sortedcache_lookup(const git_sortedcache *sc, const char *key) +{ + khiter_t pos = git_strmap_lookup_index(sc->map, key); + if (git_strmap_valid_index(sc->map, pos)) + return git_strmap_value_at(sc->map, pos); + return NULL; +} + +/* find out how many items are in the cache */ +size_t git_sortedcache_entrycount(const git_sortedcache *sc) +{ + return git_vector_length(&sc->items); +} + +/* lookup item by index */ +void *git_sortedcache_entry(const git_sortedcache *sc, size_t pos) +{ + return git_vector_get(&sc->items, pos); +} diff --git a/src/sortedcache.h b/src/sortedcache.h new file mode 100644 index 000000000..5d0d8f5a7 --- /dev/null +++ b/src/sortedcache.h @@ -0,0 +1,101 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_sorted_cache_h__ +#define INCLUDE_sorted_cache_h__ + +#include "util.h" +#include "fileops.h" +#include "vector.h" +#include "thread-utils.h" +#include "pool.h" +#include "strmap.h" + +/* + * The purpose of this data structure is to cache the parsed contents of a + * file where each item in the file can be identified by a key string and + * you want to both look them up by name and traverse them in sorted + * order. Each item is assumed to itself end in a GIT_FLEX_ARRAY. + */ + +typedef void (*git_sortedcache_free_item_fn)(void *payload, void *item); + +typedef struct { + git_refcount rc; + git_mutex lock; + size_t item_path_offset; + git_sortedcache_free_item_fn free_item; + void *free_item_payload; + git_pool pool; + git_vector items; + git_strmap *map; + git_futils_filestamp stamp; + char path[GIT_FLEX_ARRAY]; +} git_sortedcache; + +/* create a new sortedcache + * + * even though every sortedcache stores items with a GIT_FLEX_ARRAY at + * the end containing their key string, you have to provide the item_cmp + * sorting function because the sorting function doesn't get a payload + * and therefore can't know the offset to the item key string. :-( + */ +int git_sortedcache_new( + git_sortedcache **out, + size_t item_path_offset, /* use offsetof() macro */ + git_sortedcache_free_item_fn free_item, + void *free_item_payload, + git_vector_cmp item_cmp, + const char *path); + +/* copy a sorted cache + * + * - copy_item can be NULL to memcpy + * - locks src while copying + */ +int git_sortedcache_copy( + git_sortedcache **out, + git_sortedcache *src, + int (*copy_item)(void *payload, void *tgt_item, void *src_item), + void *payload); + +/* free sorted cache (first calling free_item callbacks) */ +void git_sortedcache_free(git_sortedcache *sc); + +/* increment reference count */ +void git_sortedcache_incref(git_sortedcache *sc); + +/* release all items in sorted cache - lock during clear if lock is true */ +void git_sortedcache_clear(git_sortedcache *sc, bool lock); + +/* check file stamp to see if reload is required */ +bool git_sortedcache_out_of_date(git_sortedcache *sc); + +/* lock sortedcache while making modifications */ +int git_sortedcache_lock(git_sortedcache *sc); + +/* unlock sorted cache when done with modifications */ +int git_sortedcache_unlock(git_sortedcache *sc); + +/* if the file has changed, lock cache and load file contents into buf; + * @return 0 if up-to-date, 1 if out-of-date, <0 on error + */ +int git_sortedcache_lockandload(git_sortedcache *sc, git_buf *buf); + +/* find and/or insert item, returning pointer to item data - lock first */ +int git_sortedcache_upsert( + void **out, git_sortedcache *sc, const char *key); + +/* lookup item by key */ +void *git_sortedcache_lookup(const git_sortedcache *sc, const char *key); + +/* find out how many items are in the cache */ +size_t git_sortedcache_entrycount(const git_sortedcache *sc); + +/* lookup item by index */ +void *git_sortedcache_entry(const git_sortedcache *sc, size_t pos); + +#endif diff --git a/src/vector.h b/src/vector.h index 1bda9c93d..ad1c34ea1 100644 --- a/src/vector.h +++ b/src/vector.h @@ -55,6 +55,11 @@ GIT_INLINE(void *) git_vector_get(const git_vector *v, size_t position) #define GIT_VECTOR_GET(V,I) ((I) < (V)->length ? (V)->contents[(I)] : NULL) +GIT_INLINE(size_t) git_vector_length(const git_vector *v) +{ + return v->length; +} + GIT_INLINE(void *) git_vector_last(const git_vector *v) { return (v->length > 0) ? git_vector_get(v, v->length - 1) : NULL; diff --git a/tests-clar/core/sortedcache.c b/tests-clar/core/sortedcache.c new file mode 100644 index 000000000..f192af31d --- /dev/null +++ b/tests-clar/core/sortedcache.c @@ -0,0 +1,298 @@ +#include "clar_libgit2.h" +#include "sortedcache.h" + +static int name_only_cmp(const void *a, const void *b) +{ + return strcmp(a, b); +} + +void test_core_sortedcache__name_only(void) +{ + git_sortedcache *sc; + void *item; + + cl_git_pass(git_sortedcache_new( + &sc, 0, NULL, NULL, name_only_cmp, NULL)); + + cl_git_pass(git_sortedcache_lock(sc)); + cl_git_pass(git_sortedcache_upsert(&item, sc, "aaa")); + cl_git_pass(git_sortedcache_upsert(&item, sc, "bbb")); + cl_git_pass(git_sortedcache_upsert(&item, sc, "zzz")); + cl_git_pass(git_sortedcache_upsert(&item, sc, "mmm")); + cl_git_pass(git_sortedcache_upsert(&item, sc, "iii")); + cl_git_pass(git_sortedcache_unlock(sc)); + + cl_assert_equal_sz(5, git_sortedcache_entrycount(sc)); + + cl_assert((item = git_sortedcache_lookup(sc, "aaa")) != NULL); + cl_assert_equal_s("aaa", item); + cl_assert((item = git_sortedcache_lookup(sc, "mmm")) != NULL); + cl_assert_equal_s("mmm", item); + cl_assert((item = git_sortedcache_lookup(sc, "zzz")) != NULL); + cl_assert_equal_s("zzz", item); + cl_assert(git_sortedcache_lookup(sc, "qqq") == NULL); + + cl_assert((item = git_sortedcache_entry(sc, 0)) != NULL); + cl_assert_equal_s("aaa", item); + cl_assert((item = git_sortedcache_entry(sc, 1)) != NULL); + cl_assert_equal_s("bbb", item); + cl_assert((item = git_sortedcache_entry(sc, 2)) != NULL); + cl_assert_equal_s("iii", item); + cl_assert((item = git_sortedcache_entry(sc, 3)) != NULL); + cl_assert_equal_s("mmm", item); + cl_assert((item = git_sortedcache_entry(sc, 4)) != NULL); + cl_assert_equal_s("zzz", item); + cl_assert(git_sortedcache_entry(sc, 5) == NULL); + + git_sortedcache_clear(sc, true); + + cl_assert_equal_sz(0, git_sortedcache_entrycount(sc)); + cl_assert(git_sortedcache_entry(sc, 0) == NULL); + cl_assert(git_sortedcache_lookup(sc, "aaa") == NULL); + cl_assert(git_sortedcache_entry(sc, 0) == NULL); + + git_sortedcache_free(sc); +} + +typedef struct { + int value; + char smaller_value; + char path[GIT_FLEX_ARRAY]; +} sortedcache_test_struct; + +static int sortedcache_test_struct_cmp(const void *a_, const void *b_) +{ + const sortedcache_test_struct *a = a_, *b = b_; + return strcmp(a->path, b->path); +} + +static void sortedcache_test_struct_free(void *payload, void *item_) +{ + sortedcache_test_struct *item = item_; + int *count = payload; + (*count)++; + item->smaller_value = 0; +} + +void test_core_sortedcache__in_memory(void) +{ + git_sortedcache *sc; + sortedcache_test_struct *item; + int free_count = 0; + + cl_git_pass(git_sortedcache_new( + &sc, offsetof(sortedcache_test_struct, path), + sortedcache_test_struct_free, &free_count, + sortedcache_test_struct_cmp, NULL)); + + cl_git_pass(git_sortedcache_lock(sc)); + cl_git_pass(git_sortedcache_upsert((void **)&item, sc, "aaa")); + item->value = 10; + item->smaller_value = 1; + cl_git_pass(git_sortedcache_upsert((void **)&item, sc, "bbb")); + item->value = 20; + item->smaller_value = 2; + cl_git_pass(git_sortedcache_upsert((void **)&item, sc, "zzz")); + item->value = 30; + item->smaller_value = 26; + cl_git_pass(git_sortedcache_upsert((void **)&item, sc, "mmm")); + item->value = 40; + item->smaller_value = 14; + cl_git_pass(git_sortedcache_upsert((void **)&item, sc, "iii")); + item->value = 50; + item->smaller_value = 9; + cl_git_pass(git_sortedcache_unlock(sc)); + + cl_assert_equal_sz(5, git_sortedcache_entrycount(sc)); + + cl_assert((item = git_sortedcache_lookup(sc, "aaa")) != NULL); + cl_assert_equal_s("aaa", item->path); + cl_assert_equal_i(10, item->value); + cl_assert((item = git_sortedcache_lookup(sc, "mmm")) != NULL); + cl_assert_equal_s("mmm", item->path); + cl_assert_equal_i(40, item->value); + cl_assert((item = git_sortedcache_lookup(sc, "zzz")) != NULL); + cl_assert_equal_s("zzz", item->path); + cl_assert_equal_i(30, item->value); + cl_assert(git_sortedcache_lookup(sc, "abc") == NULL); + + cl_assert((item = git_sortedcache_entry(sc, 0)) != NULL); + cl_assert_equal_s("aaa", item->path); + cl_assert_equal_i(10, item->value); + cl_assert((item = git_sortedcache_entry(sc, 1)) != NULL); + cl_assert_equal_s("bbb", item->path); + cl_assert_equal_i(20, item->value); + cl_assert((item = git_sortedcache_entry(sc, 2)) != NULL); + cl_assert_equal_s("iii", item->path); + cl_assert_equal_i(50, item->value); + cl_assert((item = git_sortedcache_entry(sc, 3)) != NULL); + cl_assert_equal_s("mmm", item->path); + cl_assert_equal_i(40, item->value); + cl_assert((item = git_sortedcache_entry(sc, 4)) != NULL); + cl_assert_equal_s("zzz", item->path); + cl_assert_equal_i(30, item->value); + cl_assert(git_sortedcache_entry(sc, 5) == NULL); + + cl_assert_equal_i(0, free_count); + + git_sortedcache_clear(sc, true); + + cl_assert_equal_i(5, free_count); + + cl_assert_equal_sz(0, git_sortedcache_entrycount(sc)); + cl_assert(git_sortedcache_entry(sc, 0) == NULL); + cl_assert(git_sortedcache_lookup(sc, "aaa") == NULL); + cl_assert(git_sortedcache_entry(sc, 0) == NULL); + + free_count = 0; + + cl_git_pass(git_sortedcache_lock(sc)); + cl_git_pass(git_sortedcache_upsert((void **)&item, sc, "testing")); + item->value = 10; + item->smaller_value = 3; + cl_git_pass(git_sortedcache_upsert((void **)&item, sc, "again")); + item->value = 20; + item->smaller_value = 1; + cl_git_pass(git_sortedcache_upsert((void **)&item, sc, "final")); + item->value = 30; + item->smaller_value = 2; + cl_git_pass(git_sortedcache_unlock(sc)); + + cl_assert_equal_sz(3, git_sortedcache_entrycount(sc)); + + cl_assert((item = git_sortedcache_lookup(sc, "testing")) != NULL); + cl_assert_equal_s("testing", item->path); + cl_assert_equal_i(10, item->value); + cl_assert((item = git_sortedcache_lookup(sc, "again")) != NULL); + cl_assert_equal_s("again", item->path); + cl_assert_equal_i(20, item->value); + cl_assert((item = git_sortedcache_lookup(sc, "final")) != NULL); + cl_assert_equal_s("final", item->path); + cl_assert_equal_i(30, item->value); + cl_assert(git_sortedcache_lookup(sc, "zzz") == NULL); + + cl_assert((item = git_sortedcache_entry(sc, 0)) != NULL); + cl_assert_equal_s("again", item->path); + cl_assert_equal_i(20, item->value); + cl_assert((item = git_sortedcache_entry(sc, 1)) != NULL); + cl_assert_equal_s("final", item->path); + cl_assert_equal_i(30, item->value); + cl_assert((item = git_sortedcache_entry(sc, 2)) != NULL); + cl_assert_equal_s("testing", item->path); + cl_assert_equal_i(10, item->value); + cl_assert(git_sortedcache_entry(sc, 3) == NULL); + + git_sortedcache_free(sc); + + cl_assert_equal_i(3, free_count); +} + +static void sortedcache_test_reload(git_sortedcache *sc) +{ + int count = 0; + git_buf buf = GIT_BUF_INIT; + char *scan, *after; + sortedcache_test_struct *item; + + cl_assert(git_sortedcache_lockandload(sc, &buf) > 0); + + git_sortedcache_clear(sc, false); /* clear once we already have lock */ + + for (scan = buf.ptr; *scan; scan = after + 1) { + int val = strtol(scan, &after, 0); + cl_assert(after > scan); + scan = after; + + for (scan = after; git__isspace(*scan); ++scan) /* find start */; + for (after = scan; *after && *after != '\n'; ++after) /* find eol */; + *after = '\0'; + + cl_git_pass(git_sortedcache_upsert((void **)&item, sc, scan)); + + item->value = val; + item->smaller_value = (char)(count++); + } + + cl_git_pass(git_sortedcache_unlock(sc)); + + git_buf_free(&buf); +} + +void test_core_sortedcache__on_disk(void) +{ + git_sortedcache *sc; + sortedcache_test_struct *item; + int free_count = 0; + + cl_git_mkfile("cacheitems.txt", "10 abc\n20 bcd\n30 cde\n"); + + cl_git_pass(git_sortedcache_new( + &sc, offsetof(sortedcache_test_struct, path), + sortedcache_test_struct_free, &free_count, + sortedcache_test_struct_cmp, "cacheitems.txt")); + + /* should need to reload the first time */ + + sortedcache_test_reload(sc); + + /* test what we loaded */ + + cl_assert_equal_sz(3, git_sortedcache_entrycount(sc)); + + cl_assert((item = git_sortedcache_lookup(sc, "abc")) != NULL); + cl_assert_equal_s("abc", item->path); + cl_assert_equal_i(10, item->value); + cl_assert((item = git_sortedcache_lookup(sc, "cde")) != NULL); + cl_assert_equal_s("cde", item->path); + cl_assert_equal_i(30, item->value); + cl_assert(git_sortedcache_lookup(sc, "aaa") == NULL); + + cl_assert((item = git_sortedcache_entry(sc, 0)) != NULL); + cl_assert_equal_s("abc", item->path); + cl_assert_equal_i(10, item->value); + cl_assert((item = git_sortedcache_entry(sc, 1)) != NULL); + cl_assert_equal_s("bcd", item->path); + cl_assert_equal_i(20, item->value); + cl_assert(git_sortedcache_entry(sc, 3) == NULL); + + /* should not need to reload this time */ + + cl_assert_equal_i(0, git_sortedcache_lockandload(sc, NULL)); + + /* rewrite ondisk file and reload */ + + cl_assert_equal_i(0, free_count); + + cl_git_rewritefile( + "cacheitems.txt", "100 abc\n200 zzz\n500 aaa\n10 final\n"); + sortedcache_test_reload(sc); + + cl_assert_equal_i(3, free_count); + + /* test what we loaded */ + + cl_assert_equal_sz(4, git_sortedcache_entrycount(sc)); + + cl_assert((item = git_sortedcache_lookup(sc, "abc")) != NULL); + cl_assert_equal_s("abc", item->path); + cl_assert_equal_i(100, item->value); + cl_assert((item = git_sortedcache_lookup(sc, "final")) != NULL); + cl_assert_equal_s("final", item->path); + cl_assert_equal_i(10, item->value); + cl_assert(git_sortedcache_lookup(sc, "cde") == NULL); + + cl_assert((item = git_sortedcache_entry(sc, 0)) != NULL); + cl_assert_equal_s("aaa", item->path); + cl_assert_equal_i(500, item->value); + cl_assert((item = git_sortedcache_entry(sc, 2)) != NULL); + cl_assert_equal_s("final", item->path); + cl_assert_equal_i(10, item->value); + cl_assert((item = git_sortedcache_entry(sc, 3)) != NULL); + cl_assert_equal_s("zzz", item->path); + cl_assert_equal_i(200, item->value); + + git_sortedcache_free(sc); + + cl_assert_equal_i(7, free_count); +} +