From ac02a69470f149be5c556eb607d8cb3dcc21771f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Fri, 14 Aug 2015 21:06:09 +0200 Subject: [PATCH 1/4] Add a hashmap for index entries They are hashed case-insensitively and take the stage into account. --- src/idxmap.h | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 src/idxmap.h diff --git a/src/idxmap.h b/src/idxmap.h new file mode 100644 index 000000000..74304bb97 --- /dev/null +++ b/src/idxmap.h @@ -0,0 +1,92 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_idxmap_h__ +#define INCLUDE_idxmap_h__ + +#include +#include "common.h" +#include "git2/index.h" + +#define kmalloc git__malloc +#define kcalloc git__calloc +#define krealloc git__realloc +#define kreallocarray git__reallocarray +#define kfree git__free +#include "khash.h" + +__KHASH_TYPE(idx, const git_index_entry *, git_index_entry *) +__KHASH_TYPE(idxicase, const git_index_entry *, git_index_entry *) + +typedef khash_t(idx) git_idxmap; +typedef khash_t(idxicase) git_idxmap_icase; + +typedef khiter_t git_idxmap_iter; + +/* This is __ac_X31_hash_string but with tolower and it takes the entry's stage into account */ +static kh_inline khint_t idxentry_hash(const git_index_entry *e) +{ + const char *s = e->path; + khint_t h = (khint_t)git__tolower(*s); + if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)git__tolower(*s); + return h + GIT_IDXENTRY_STAGE(e); +} + +#define idxentry_equal(a, b) (GIT_IDXENTRY_STAGE(a) == GIT_IDXENTRY_STAGE(b) && strcmp(a->path, b->path) == 0) +#define idxentry_icase_equal(a, b) (GIT_IDXENTRY_STAGE(a) == GIT_IDXENTRY_STAGE(b) && strcasecmp(a->path, b->path) == 0) + +#define GIT__USE_IDXMAP \ + __KHASH_IMPL(idx, static kh_inline, const git_index_entry *, git_index_entry *, 1, idxentry_hash, idxentry_equal) + +#define GIT__USE_IDXMAP_ICASE \ + __KHASH_IMPL(idxicase, static kh_inline, const git_index_entry *, git_index_entry *, 1, idxentry_hash, idxentry_icase_equal) + +#define git_idxmap_alloc(hp) \ + ((*(hp) = kh_init(idx)) == NULL) ? giterr_set_oom(), -1 : 0 + +#define git_idxmap_icase_alloc(hp) \ + ((*(hp) = kh_init(idxicase)) == NULL) ? giterr_set_oom(), -1 : 0 + +#define git_idxmap_insert(h, key, val, rval) do { \ + khiter_t __pos = kh_put(idx, h, key, &rval); \ + if (rval >= 0) { \ + if (rval == 0) kh_key(h, __pos) = key; \ + kh_val(h, __pos) = val; \ + } } while (0) + +#define git_idxmap_icase_insert(h, key, val, rval) do { \ + khiter_t __pos = kh_put(idxicase, h, key, &rval); \ + if (rval >= 0) { \ + if (rval == 0) kh_key(h, __pos) = key; \ + kh_val(h, __pos) = val; \ + } } while (0) + +#define git_idxmap_lookup_index(h, k) kh_get(idx, h, k) +#define git_idxmap_icase_lookup_index(h, k) kh_get(idxicase, h, k) +#define git_idxmap_value_at(h, idx) kh_val(h, idx) +#define git_idxmap_valid_index(h, idx) (idx != kh_end(h)) +#define git_idxmap_has_data(h, idx) kh_exist(h, idx) + +#define git_idxmap_free(h) kh_destroy(idx, h), h = NULL +#define git_idxmap_clear(h) kh_clear(idx, h) + +#define git_idxmap_delete_at(h, id) kh_del(idx, h, id) +#define git_idxmap_icase_delete_at(h, id) kh_del(idxicase, h, id) + +#define git_idxmap_delete(h, key) do { \ + khiter_t __pos = git_idxmap_lookup_index(h, key); \ + if (git_idxmap_valid_index(h, __pos)) \ + git_idxmap_delete_at(h, __pos); } while (0) + +#define git_idxmap_icase_delete(h, key) do { \ + khiter_t __pos = git_idxmap_icase_lookup_index(h, key); \ + if (git_idxmap_valid_index(h, __pos)) \ + git_idxmap_icase_delete_at(h, __pos); } while (0) + +#define git_idxmap_begin kh_begin +#define git_idxmap_end kh_end + +#endif From c232d6c32df90e009dcb8d79309d2c9b9f51b77a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Fri, 14 Aug 2015 21:06:51 +0200 Subject: [PATCH 2/4] index: add tests around case switching We were missing tests for switching the case-sensitivity of an index in-memory and then looking up entries in it. --- tests/index/filemodes.c | 1 + tests/index/tests.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/tests/index/filemodes.c b/tests/index/filemodes.c index b3907996b..6442d7755 100644 --- a/tests/index/filemodes.c +++ b/tests/index/filemodes.c @@ -239,6 +239,7 @@ void test_index_filemodes__invalid(void) cl_git_pass(git_repository_index(&index, g_repo)); + GIT_IDXENTRY_STAGE_SET(&entry, 0); entry.path = "foo"; entry.mode = GIT_OBJ_BLOB; cl_git_fail(git_index_add(index, &entry)); diff --git a/tests/index/tests.c b/tests/index/tests.c index e1ff12ad0..f1a057853 100644 --- a/tests/index/tests.c +++ b/tests/index/tests.c @@ -792,10 +792,43 @@ void test_index_tests__reload_while_ignoring_case(void) cl_git_pass(git_index_set_caps(index, caps &= ~GIT_INDEXCAP_IGNORE_CASE)); cl_git_pass(git_index_read(index, true)); cl_git_pass(git_vector_verify_sorted(&index->entries)); + cl_assert(git_index_get_bypath(index, ".HEADER", 0)); + cl_assert_equal_p(NULL, git_index_get_bypath(index, ".header", 0)); cl_git_pass(git_index_set_caps(index, caps | GIT_INDEXCAP_IGNORE_CASE)); cl_git_pass(git_index_read(index, true)); cl_git_pass(git_vector_verify_sorted(&index->entries)); + cl_assert(git_index_get_bypath(index, ".HEADER", 0)); + cl_assert(git_index_get_bypath(index, ".header", 0)); + + git_index_free(index); +} + +void test_index_tests__change_icase_on_instance(void) +{ + git_index *index; + unsigned int caps; + const git_index_entry *e; + + cl_git_pass(git_index_open(&index, TEST_INDEX_PATH)); + cl_git_pass(git_vector_verify_sorted(&index->entries)); + + caps = git_index_caps(index); + cl_git_pass(git_index_set_caps(index, caps &= ~GIT_INDEXCAP_IGNORE_CASE)); + cl_assert_equal_i(false, index->ignore_case); + cl_git_pass(git_vector_verify_sorted(&index->entries)); + cl_assert(e = git_index_get_bypath(index, "src/common.h", 0)); + cl_assert_equal_p(NULL, e = git_index_get_bypath(index, "SRC/Common.h", 0)); + cl_assert(e = git_index_get_bypath(index, "COPYING", 0)); + cl_assert_equal_p(NULL, e = git_index_get_bypath(index, "copying", 0)); + + cl_git_pass(git_index_set_caps(index, caps | GIT_INDEXCAP_IGNORE_CASE)); + cl_assert_equal_i(true, index->ignore_case); + cl_git_pass(git_vector_verify_sorted(&index->entries)); + cl_assert(e = git_index_get_bypath(index, "COPYING", 0)); + cl_assert_equal_s("COPYING", e->path); + cl_assert(e = git_index_get_bypath(index, "copying", 0)); + cl_assert_equal_s("COPYING", e->path); git_index_free(index); } From af1d5239a16976bd1b8d0a9358497f043bdfed14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Fri, 14 Aug 2015 21:10:12 +0200 Subject: [PATCH 3/4] index: keep a hash table as well as a vector of entries The hash table allows quick lookup of specific paths, while we use the vector for enumeration. --- src/index.c | 111 ++++++++++++++++++++++++++++++++++++++++++++-------- src/index.h | 2 + 2 files changed, 96 insertions(+), 17 deletions(-) diff --git a/src/index.c b/src/index.c index e424698bb..e904ffcfe 100644 --- a/src/index.c +++ b/src/index.c @@ -17,6 +17,7 @@ #include "pathspec.h" #include "ignore.h" #include "blob.h" +#include "idxmap.h" #include "git2/odb.h" #include "git2/oid.h" @@ -24,6 +25,9 @@ #include "git2/config.h" #include "git2/sys/index.h" +GIT__USE_IDXMAP +GIT__USE_IDXMAP_ICASE + static int index_apply_to_wd_diff(git_index *index, int action, const git_strarray *paths, unsigned int flags, git_index_matched_path_cb cb, void *payload); @@ -425,6 +429,7 @@ int git_index_open(git_index **index_out, const char *index_path) } if (git_vector_init(&index->entries, 32, git_index_entry_cmp) < 0 || + git_idxmap_alloc(&index->entries_map) < 0 || git_vector_init(&index->names, 8, conflict_name_cmp) < 0 || git_vector_init(&index->reuc, 8, reuc_cmp) < 0 || git_vector_init(&index->deleted, 8, git_index_entry_cmp) < 0) @@ -462,6 +467,7 @@ static void index_free(git_index *index) assert(!git_atomic_get(&index->readers)); git_index_clear(index); + git_idxmap_free(index->entries_map); git_vector_free(&index->entries); git_vector_free(&index->names); git_vector_free(&index->reuc); @@ -508,6 +514,11 @@ static int index_remove_entry(git_index *index, size_t pos) if (entry != NULL) git_tree_cache_invalidate_path(index->tree, entry->path); + if (index->ignore_case) + git_idxmap_icase_delete((khash_t(idxicase) *) index->entries_map, entry); + else + git_idxmap_delete(index->entries_map, entry); + error = git_vector_remove(&index->entries, pos); if (!error) { @@ -535,6 +546,7 @@ int git_index_clear(git_index *index) return -1; } + git_idxmap_clear(index->entries_map); while (!error && index->entries.length > 0) error = index_remove_entry(index, index->entries.length - 1); index_free_deleted(index); @@ -804,16 +816,24 @@ const git_index_entry *git_index_get_byindex( const git_index_entry *git_index_get_bypath( git_index *index, const char *path, int stage) { - size_t pos; + khiter_t pos; + git_index_entry key = {{ 0 }}; assert(index); - if (index_find(&pos, index, path, 0, stage, true) < 0) { - giterr_set(GITERR_INDEX, "Index does not contain %s", path); - return NULL; - } + key.path = path; + GIT_IDXENTRY_STAGE_SET(&key, stage); - return git_index_get_byindex(index, pos); + if (index->ignore_case) + pos = git_idxmap_icase_lookup_index((khash_t(idxicase) *) index->entries_map, &key); + else + pos = git_idxmap_lookup_index(index->entries_map, &key); + + if (git_idxmap_valid_index(index->entries_map, pos)) + return git_idxmap_value_at(index->entries_map, pos); + + giterr_set(GITERR_INDEX, "Index does not contain %s", path); + return NULL; } void git_index_entry__init_from_stat( @@ -1139,6 +1159,13 @@ static int index_insert( * check for dups, this is actually cheaper in the long run.) */ error = git_vector_insert_sorted(&index->entries, entry, index_no_dups); + + if (error == 0) { + if (index->ignore_case) + git_idxmap_icase_insert((khash_t(idxicase) *) index->entries_map, entry, entry, error); else + git_idxmap_insert(index->entries_map, entry, entry, error); + + } } if (error < 0) { @@ -1364,12 +1391,20 @@ int git_index_remove(git_index *index, const char *path, int stage) { int error; size_t position; + git_index_entry remove_key = {{ 0 }}; if (git_mutex_lock(&index->lock) < 0) { giterr_set(GITERR_OS, "Failed to lock index"); return -1; } + remove_key.path = path; + GIT_IDXENTRY_STAGE_SET(&remove_key, stage); + if (index->ignore_case) + git_idxmap_icase_delete((khash_t(idxicase) *) index->entries_map, &remove_key); + else + git_idxmap_delete(index->entries_map, &remove_key); + if (index_find(&position, index, path, 0, stage, false) < 0) { giterr_set( GITERR_INDEX, "Index does not contain %s at stage %d", path, stage); @@ -2180,6 +2215,11 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size) assert(!index->entries.length); + if (index->ignore_case) + kh_resize(idxicase, (khash_t(idxicase) *) index->entries_map, header.entry_count); + else + kh_resize(idx, index->entries_map, header.entry_count); + /* Parse all the entries */ for (i = 0; i < header.entry_count && buffer_size > INDEX_FOOTER_SIZE; ++i) { git_index_entry *entry; @@ -2196,6 +2236,16 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size) goto done; } + if (index->ignore_case) + git_idxmap_icase_insert((khash_t(idxicase) *) index->entries_map, entry, entry, error); + else + git_idxmap_insert(index->entries_map, entry, entry, error); + + if (error < 0) { + index_entry_free(entry); + goto done; + } + seek_forward(entry_size); } @@ -2610,7 +2660,13 @@ int git_index_read_tree(git_index *index, const git_tree *tree) { int error = 0; git_vector entries = GIT_VECTOR_INIT; + git_idxmap *entries_map; read_tree_data data; + size_t i; + git_index_entry *e; + + if (git_idxmap_alloc(&entries_map) < 0) + return -1; git_vector_set_cmp(&entries, index->entries._cmp); /* match sort */ @@ -2625,23 +2681,44 @@ int git_index_read_tree(git_index *index, const git_tree *tree) if (index_sort_if_needed(index, true) < 0) return -1; - error = git_tree_walk(tree, GIT_TREEWALK_POST, read_tree_cb, &data); + if ((error = git_tree_walk(tree, GIT_TREEWALK_POST, read_tree_cb, &data)) < 0) + goto cleanup; - if (!error) { - git_vector_sort(&entries); + if (index->ignore_case) + kh_resize(idxicase, (khash_t(idxicase) *) entries_map, entries.length); + else + kh_resize(idx, entries_map, entries.length); - if ((error = git_index_clear(index)) < 0) - /* well, this isn't good */; - else if (git_mutex_lock(&index->lock) < 0) { - giterr_set(GITERR_OS, "Unable to acquire index lock"); - error = -1; - } else { - git_vector_swap(&entries, &index->entries); - git_mutex_unlock(&index->lock); + git_vector_foreach(&entries, i, e) { + if (index->ignore_case) + git_idxmap_icase_insert((git_idxmap_icase *) entries_map, e, e, error); + else + git_idxmap_insert(entries_map, e, e, error); + + if (error < 0) { + giterr_set(GITERR_INDEX, "failed to insert entry into map"); + return error; } } + error = 0; + + git_vector_sort(&entries); + + if ((error = git_index_clear(index)) < 0) + /* well, this isn't good */; + else if (git_mutex_lock(&index->lock) < 0) { + giterr_set(GITERR_OS, "Unable to acquire index lock"); + error = -1; + } else { + git_vector_swap(&entries, &index->entries); + entries_map = git__swap(index->entries_map, entries_map); + git_mutex_unlock(&index->lock); + } + +cleanup: git_vector_free(&entries); + git_idxmap_free(entries_map); if (error < 0) return error; diff --git a/src/index.h b/src/index.h index 9c60b015c..546e677be 100644 --- a/src/index.h +++ b/src/index.h @@ -10,6 +10,7 @@ #include "fileops.h" #include "filebuf.h" #include "vector.h" +#include "idxmap.h" #include "tree-cache.h" #include "git2/odb.h" #include "git2/index.h" @@ -25,6 +26,7 @@ struct git_index { git_oid checksum; /* checksum at the end of the file */ git_vector entries; + git_idxmap *entries_map; git_mutex lock; /* lock held while entries is being changed */ git_vector deleted; /* deleted entries if readers > 0 */ From 81b76367571010aa83a3de38aecfee3c301e888d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Fri, 4 Sep 2015 13:30:49 +0200 Subject: [PATCH 4/4] index: put the icase insert choice in macros This should let us see more clearly what we're doing and avoid the ugly 'if' we need every time we want to interact with the map. --- src/index.c | 55 +++++++++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/src/index.c b/src/index.c index e904ffcfe..b9a78b21f 100644 --- a/src/index.c +++ b/src/index.c @@ -28,6 +28,29 @@ GIT__USE_IDXMAP GIT__USE_IDXMAP_ICASE +#define INSERT_IN_MAP_EX(idx, map, e, err) do { \ + if ((idx)->ignore_case) \ + git_idxmap_icase_insert((khash_t(idxicase) *) (map), (e), (e), (err)); \ + else \ + git_idxmap_insert((map), (e), (e), (err)); \ + } while (0) + +#define INSERT_IN_MAP(idx, e, err) INSERT_IN_MAP_EX(idx, (idx)->entries_map, e, err) + +#define LOOKUP_IN_MAP(p, idx, k) do { \ + if ((idx)->ignore_case) \ + (p) = git_idxmap_icase_lookup_index((khash_t(idxicase) *) index->entries_map, (k)); \ + else \ + (p) = git_idxmap_lookup_index(index->entries_map, (k)); \ + } while (0) + +#define DELETE_IN_MAP(idx, e) do { \ + if ((idx)->ignore_case) \ + git_idxmap_icase_delete((khash_t(idxicase) *) (idx)->entries_map, (e)); \ + else \ + git_idxmap_delete((idx)->entries_map, (e)); \ + } while (0) + static int index_apply_to_wd_diff(git_index *index, int action, const git_strarray *paths, unsigned int flags, git_index_matched_path_cb cb, void *payload); @@ -514,11 +537,7 @@ static int index_remove_entry(git_index *index, size_t pos) if (entry != NULL) git_tree_cache_invalidate_path(index->tree, entry->path); - if (index->ignore_case) - git_idxmap_icase_delete((khash_t(idxicase) *) index->entries_map, entry); - else - git_idxmap_delete(index->entries_map, entry); - + DELETE_IN_MAP(index, entry); error = git_vector_remove(&index->entries, pos); if (!error) { @@ -824,10 +843,7 @@ const git_index_entry *git_index_get_bypath( key.path = path; GIT_IDXENTRY_STAGE_SET(&key, stage); - if (index->ignore_case) - pos = git_idxmap_icase_lookup_index((khash_t(idxicase) *) index->entries_map, &key); - else - pos = git_idxmap_lookup_index(index->entries_map, &key); + LOOKUP_IN_MAP(pos, index, &key); if (git_idxmap_valid_index(index->entries_map, pos)) return git_idxmap_value_at(index->entries_map, pos); @@ -1161,10 +1177,7 @@ static int index_insert( error = git_vector_insert_sorted(&index->entries, entry, index_no_dups); if (error == 0) { - if (index->ignore_case) - git_idxmap_icase_insert((khash_t(idxicase) *) index->entries_map, entry, entry, error); else - git_idxmap_insert(index->entries_map, entry, entry, error); - + INSERT_IN_MAP(index, entry, error); } } @@ -1400,10 +1413,8 @@ int git_index_remove(git_index *index, const char *path, int stage) remove_key.path = path; GIT_IDXENTRY_STAGE_SET(&remove_key, stage); - if (index->ignore_case) - git_idxmap_icase_delete((khash_t(idxicase) *) index->entries_map, &remove_key); - else - git_idxmap_delete(index->entries_map, &remove_key); + + DELETE_IN_MAP(index, &remove_key); if (index_find(&position, index, path, 0, stage, false) < 0) { giterr_set( @@ -2236,10 +2247,7 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size) goto done; } - if (index->ignore_case) - git_idxmap_icase_insert((khash_t(idxicase) *) index->entries_map, entry, entry, error); - else - git_idxmap_insert(index->entries_map, entry, entry, error); + INSERT_IN_MAP(index, entry, error); if (error < 0) { index_entry_free(entry); @@ -2690,10 +2698,7 @@ int git_index_read_tree(git_index *index, const git_tree *tree) kh_resize(idx, entries_map, entries.length); git_vector_foreach(&entries, i, e) { - if (index->ignore_case) - git_idxmap_icase_insert((git_idxmap_icase *) entries_map, e, e, error); - else - git_idxmap_insert(entries_map, e, e, error); + INSERT_IN_MAP_EX(index, entries_map, e, error); if (error < 0) { giterr_set(GITERR_INDEX, "failed to insert entry into map");