diff --git a/include/git2/index.h b/include/git2/index.h index 83a18e160..d34ed0a92 100644 --- a/include/git2/index.h +++ b/include/git2/index.h @@ -172,6 +172,13 @@ GIT_EXTERN(int) git_index_write(git_index *index); */ GIT_EXTERN(int) git_index_find(git_index *index, const char *path); +/** + * Remove all entries with equal path except last added + * + * @param index an existing index object + */ +GIT_EXTERN(void) git_index_uniq(git_index *index); + /** * Add or update an index entry from a file in disk * diff --git a/src/index.c b/src/index.c index 06fa95c7b..dd33db92a 100644 --- a/src/index.c +++ b/src/index.c @@ -348,6 +348,7 @@ static int index_insert(git_index *index, const git_index_entry *source_entry, i git_index_entry *entry; size_t path_length; int position; + git_index_entry **entry_array; assert(index && source_entry); @@ -375,27 +376,28 @@ static int index_insert(git_index *index, const git_index_entry *source_entry, i else entry->flags |= GIT_IDXENTRY_NAMEMASK;; + /* + * replacing is not requested: just insert entry at the end; + * the index is no longer sorted + */ + if (!replace) + return git_vector_insert(&index->entries, entry); /* look if an entry with this path already exists */ position = git_index_find(index, source_entry->path); - /* if no entry exists and replace is not set, - * add the entry at the end; - * the index is no longer sorted */ - if (!replace || position == GIT_ENOTFOUND) { - if (git_vector_insert(&index->entries, entry) < GIT_SUCCESS) - return GIT_ENOMEM; + /* + * if no entry exists add the entry at the end; + * the index is no longer sorted + */ + if (position == GIT_ENOTFOUND) + return git_vector_insert(&index->entries, entry); - /* if a previous entry exists and replace is set, - * replace it */ - } else { - git_index_entry **entry_array = (git_index_entry **)index->entries.contents; - - free((char *)entry_array[position]->path); - free(entry_array[position]); - - entry_array[position] = entry; - } + /* exists, replace it */ + entry_array = (git_index_entry **) index->entries.contents; + free((char *)entry_array[position]->path); + free(entry_array[position]); + entry_array[position] = entry; return GIT_SUCCESS; } @@ -485,6 +487,11 @@ int git_index_find(git_index *index, const char *path) return git_vector_bsearch2(&index->entries, index_srch, path); } +void git_index_uniq(git_index *index) +{ + git_vector_uniq(&index->entries); +} + const git_index_entry_unmerged *git_index_get_unmerged_bypath(git_index *index, const char *path) { int pos; diff --git a/src/util.c b/src/util.c index 7bfc08be4..03e911241 100644 --- a/src/util.c +++ b/src/util.c @@ -330,3 +330,67 @@ uint32_t git__hash(const void *key, int len, uint32_t seed) return h1; } #endif + +/* + * A merge sort implementation, simplified from the qsort implementation + * by Mike Haertel, which is a part of the GNU C Library. + */ +static void msort_with_tmp(void *b, size_t n, size_t s, + int (*cmp)(const void *, const void *), + char *t) +{ + char *tmp; + char *b1, *b2; + size_t n1, n2; + + if (n <= 1) + return; + + n1 = n / 2; + n2 = n - n1; + b1 = b; + b2 = (char *)b + (n1 * s); + + msort_with_tmp(b1, n1, s, cmp, t); + msort_with_tmp(b2, n2, s, cmp, t); + + tmp = t; + + while (n1 > 0 && n2 > 0) { + if (cmp(b1, b2) <= 0) { + memcpy(tmp, b1, s); + tmp += s; + b1 += s; + --n1; + } else { + memcpy(tmp, b2, s); + tmp += s; + b2 += s; + --n2; + } + } + if (n1 > 0) + memcpy(tmp, b1, n1 * s); + memcpy(b, t, (n - n2) * s); +} + +int git__msort(void *b, size_t n, size_t s, + int (*cmp)(const void *, const void *)) +{ + const size_t size = n * s; + char buf[1024]; + + if (size < sizeof(buf)) { + /* The temporary array fits on the small on-stack buffer. */ + msort_with_tmp(b, n, s, cmp, buf); + } else { + /* It's somewhat large, so malloc it. */ + char *tmp = git__malloc(size); + if (tmp == NULL) + return GIT_ENOMEM; + msort_with_tmp(b, n, s, cmp, tmp); + free(tmp); + } + + return GIT_SUCCESS; +} diff --git a/src/util.h b/src/util.h index 0faf7f69c..c9ca4dec0 100644 --- a/src/util.h +++ b/src/util.h @@ -118,4 +118,7 @@ extern int git__fnmatch(const char *pattern, const char *name, int flags); } \ } while (0) +extern int git__msort(void *b, size_t n, size_t s, + int (*cmp)(const void *, const void *)); + #endif /* INCLUDE_util_h__ */ diff --git a/src/vector.c b/src/vector.c index 0451eb082..2fc051f0c 100644 --- a/src/vector.c +++ b/src/vector.c @@ -94,7 +94,7 @@ void git_vector_sort(git_vector *v) if (v->sorted || v->_cmp == NULL) return; - qsort(v->contents, v->length, sizeof(void *), v->_cmp); + git__msort(v->contents, v->length, sizeof(void *), v->_cmp); v->sorted = 1; } @@ -162,6 +162,26 @@ int git_vector_remove(git_vector *v, unsigned int idx) return GIT_SUCCESS; } +void git_vector_uniq(git_vector *v) +{ + git_vector_cmp cmp; + unsigned int i, j; + + if (v->length <= 1) + return; + + git_vector_sort(v); + cmp = v->_cmp ? v->_cmp : strict_comparison; + + for (i = 0, j = 1 ; j < v->length; ++j) + if (!cmp(v->contents + i, v->contents + j)) + v->contents[i] = v->contents[j]; + else + v->contents[++i] = v->contents[j]; + + v->length -= j - i - 1; +} + void git_vector_clear(git_vector *v) { assert(v); diff --git a/src/vector.h b/src/vector.h index 256452ee5..76778ba4e 100644 --- a/src/vector.h +++ b/src/vector.h @@ -32,5 +32,5 @@ GIT_INLINE(void *) git_vector_get(git_vector *v, unsigned int position) int git_vector_insert(git_vector *v, void *element); int git_vector_remove(git_vector *v, unsigned int idx); - +void git_vector_uniq(git_vector *v); #endif diff --git a/tests/t00-core.c b/tests/t00-core.c index c01518b28..ee1eb6df2 100644 --- a/tests/t00-core.c +++ b/tests/t00-core.c @@ -73,6 +73,28 @@ BEGIN_TEST(vector1, "don't read past array bounds on remove()") git_vector_free(&x); END_TEST +static int test_cmp(const void *a, const void *b) +{ + int n1 = *(int *)a; + int n2 = *(int *)b; + + return n1 - n2; +} + +BEGIN_TEST(vector2, "remove duplicates") + git_vector x; + must_pass(git_vector_init(&x, 5, test_cmp)); + must_pass(git_vector_insert(&x, (void *) 0xdeadbeef)); + must_pass(git_vector_insert(&x, (void *) 0xcafebabe)); + must_pass(git_vector_insert(&x, (void *) 0xcafebabe)); + must_pass(git_vector_insert(&x, (void *) 0xdeadbeef)); + must_pass(git_vector_insert(&x, (void *) 0xcafebabe)); + must_be_true(x.length == 5); + git_vector_uniq(&x); + must_be_true(x.length == 2); + git_vector_free(&x); +END_TEST + BEGIN_TEST(path0, "get the dirname of a path") char dir[64], *dir2; @@ -531,6 +553,7 @@ BEGIN_SUITE(core) ADD_TEST(vector0); ADD_TEST(vector1); + ADD_TEST(vector2); ADD_TEST(path0); ADD_TEST(path1);