From 4075e060b45a73834a24684ed835d52f7176d58b Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Mon, 3 Feb 2014 21:02:08 -0800 Subject: [PATCH 1/5] Replace pqueue with code from hashsig heap I accidentally wrote a separate priority queue implementation when I was working on file rename detection as part of the file hash signature calculation code. To simplify licensing terms, I just adapted that to a general purpose priority queue and replace the old priority queue implementation that was borrowed from elsewhere. This also removes parts of the COPYING document that no longer apply to libgit2. --- COPYING | 75 +----------------- src/commit_list.c | 6 +- src/commit_list.h | 2 +- src/graph.c | 35 ++++----- src/merge.c | 15 ++-- src/pqueue.c | 194 +++++++++++++++------------------------------- src/pqueue.h | 133 +++++++++++++------------------ src/revwalk.c | 5 +- src/vector.h | 10 +++ 9 files changed, 159 insertions(+), 316 deletions(-) diff --git a/COPYING b/COPYING index f7e9f3af7..181737284 100644 --- a/COPYING +++ b/COPYING @@ -388,19 +388,7 @@ Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler ---------------------------------------------------------------------- -The priority queue implementation is based on code licensed under the -Apache 2.0 license: - - Copyright 2010 Volkan Yazıcı - Copyright 2006-2010 The Apache Software Foundation - -The full text of the Apache 2.0 license is available at: - - http://www.apache.org/licenses/LICENSE-2.0 - ----------------------------------------------------------------------- - -The Clay framework is licensed under the MIT license: +The Clar framework is licensed under the MIT license: Copyright (C) 2011 by Vicent Marti @@ -930,64 +918,3 @@ necessary. Here is a sample; alter the names: That's all there is to it! ---------------------------------------------------------------------- - -Portions of src/win32/posix_w32.c are derrived from link_win32.c in PHP: - --------------------------------------------------------------------- - The PHP License, version 3.01 -Copyright (c) 1999 - 2012 The PHP Group. All rights reserved. --------------------------------------------------------------------- - -Redistribution and use in source and binary forms, with or without -modification, is permitted provided that the following conditions -are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - - 3. The name "PHP" must not be used to endorse or promote products - derived from this software without prior written permission. For - written permission, please contact group@php.net. - - 4. Products derived from this software may not be called "PHP", nor - may "PHP" appear in their name, without prior written permission - from group@php.net. You may indicate that your software works in - conjunction with PHP by saying "Foo for PHP" instead of calling - it "PHP Foo" or "phpfoo" - - 5. The PHP Group may publish revised and/or new versions of the - license from time to time. Each version will be given a - distinguishing version number. - Once covered code has been published under a particular version - of the license, you may always continue to use it under the terms - of that version. You may also choose to use such covered code - under the terms of any subsequent version of the license - published by the PHP Group. No one other than the PHP Group has - the right to modify the terms applicable to covered code created - under this License. - - 6. Redistributions of any form whatsoever must retain the following - acknowledgment: - "This product includes PHP software, freely available from - ". - -THIS SOFTWARE IS PROVIDED BY THE PHP DEVELOPMENT TEAM ``AS IS'' AND -ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PHP -DEVELOPMENT TEAM OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------- - diff --git a/src/commit_list.c b/src/commit_list.c index 64416e54d..9db3f5633 100644 --- a/src/commit_list.c +++ b/src/commit_list.c @@ -11,10 +11,10 @@ #include "pool.h" #include "odb.h" -int git_commit_list_time_cmp(void *a, void *b) +int git_commit_list_time_cmp(const void *a, const void *b) { - git_commit_list_node *commit_a = (git_commit_list_node *)a; - git_commit_list_node *commit_b = (git_commit_list_node *)b; + const git_commit_list_node *commit_a = a; + const git_commit_list_node *commit_b = b; return (commit_a->time < commit_b->time); } diff --git a/src/commit_list.h b/src/commit_list.h index d2f54b3ca..490d841be 100644 --- a/src/commit_list.h +++ b/src/commit_list.h @@ -39,7 +39,7 @@ typedef struct git_commit_list { } git_commit_list; git_commit_list_node *git_commit_list_alloc_node(git_revwalk *walk); -int git_commit_list_time_cmp(void *a, void *b); +int git_commit_list_time_cmp(const void *a, const void *b); void git_commit_list_free(git_commit_list **list_p); git_commit_list *git_commit_list_insert(git_commit_list_node *item, git_commit_list **list_p); git_commit_list *git_commit_list_insert_by_date(git_commit_list_node *item, git_commit_list **list_p); diff --git a/src/graph.c b/src/graph.c index f39af5ed5..96fda7add 100644 --- a/src/graph.c +++ b/src/graph.c @@ -1,4 +1,3 @@ - /* * Copyright (C) the libgit2 contributors. All rights reserved. * @@ -13,9 +12,9 @@ static int interesting(git_pqueue *list, git_commit_list *roots) { unsigned int i; - /* element 0 isn't used - we need to start at 1 */ - for (i = 1; i < list->size; i++) { - git_commit_list_node *commit = list->d[i]; + + for (i = 0; i < git_pqueue_size(list); i++) { + git_commit_list_node *commit = git_pqueue_get(list, i); if ((commit->flags & STALE) == 0) return 1; } @@ -42,7 +41,7 @@ static int mark_parents(git_revwalk *walk, git_commit_list_node *one, return 0; } - if (git_pqueue_init(&list, 2, git_commit_list_time_cmp) < 0) + if (git_pqueue_init(&list, 0, 2, git_commit_list_time_cmp) < 0) return -1; if (git_commit_list_parse(walk, one) < 0) @@ -59,10 +58,9 @@ static int mark_parents(git_revwalk *walk, git_commit_list_node *one, /* as long as there are non-STALE commits */ while (interesting(&list, roots)) { - git_commit_list_node *commit; + git_commit_list_node *commit = git_pqueue_pop(&list); int flags; - commit = git_pqueue_pop(&list); if (commit == NULL) break; @@ -110,16 +108,16 @@ static int ahead_behind(git_commit_list_node *one, git_commit_list_node *two, { git_commit_list_node *commit; git_pqueue pq; - int i; + int error = 0, i; *ahead = 0; *behind = 0; - if (git_pqueue_init(&pq, 2, git_commit_list_time_cmp) < 0) + if (git_pqueue_init(&pq, 0, 2, git_commit_list_time_cmp) < 0) return -1; - if (git_pqueue_insert(&pq, one) < 0) - goto on_error; - if (git_pqueue_insert(&pq, two) < 0) - goto on_error; + + if ((error = git_pqueue_insert(&pq, one)) < 0 || + (error = git_pqueue_insert(&pq, two)) < 0) + goto done; while ((commit = git_pqueue_pop(&pq)) != NULL) { if (commit->flags & RESULT || @@ -132,18 +130,15 @@ static int ahead_behind(git_commit_list_node *one, git_commit_list_node *two, for (i = 0; i < commit->out_degree; i++) { git_commit_list_node *p = commit->parents[i]; - if (git_pqueue_insert(&pq, p) < 0) - return -1; + if ((error = git_pqueue_insert(&pq, p)) < 0) + goto done; } commit->flags |= RESULT; } +done: git_pqueue_free(&pq); - return 0; - -on_error: - git_pqueue_free(&pq); - return -1; + return error; } int git_graph_ahead_behind(size_t *ahead, size_t *behind, git_repository *repo, diff --git a/src/merge.c b/src/merge.c index 20cfc0e23..d004554cf 100644 --- a/src/merge.c +++ b/src/merge.c @@ -161,10 +161,10 @@ on_error: static int interesting(git_pqueue *list) { - unsigned int i; - /* element 0 isn't used - we need to start at 1 */ - for (i = 1; i < list->size; i++) { - git_commit_list_node *commit = list->d[i]; + size_t i; + + for (i = 0; i < git_pqueue_size(list); i++) { + git_commit_list_node *commit = git_pqueue_get(list, i); if ((commit->flags & STALE) == 0) return 1; } @@ -186,7 +186,7 @@ int git_merge__bases_many(git_commit_list **out, git_revwalk *walk, git_commit_l return git_commit_list_insert(one, out) ? 0 : -1; } - if (git_pqueue_init(&list, twos->length * 2, git_commit_list_time_cmp) < 0) + if (git_pqueue_init(&list, 0, twos->length * 2, git_commit_list_time_cmp) < 0) return -1; if (git_commit_list_parse(walk, one) < 0) @@ -205,10 +205,11 @@ int git_merge__bases_many(git_commit_list **out, git_revwalk *walk, git_commit_l /* as long as there are non-STALE commits */ while (interesting(&list)) { - git_commit_list_node *commit; + git_commit_list_node *commit = git_pqueue_pop(&list); int flags; - commit = git_pqueue_pop(&list); + if (commit == NULL) + break; flags = commit->flags & (PARENT1 | PARENT2 | STALE); if (flags == (PARENT1 | PARENT2)) { diff --git a/src/pqueue.c b/src/pqueue.c index 7819ed41e..ddbad7a54 100644 --- a/src/pqueue.c +++ b/src/pqueue.c @@ -3,161 +3,95 @@ * * This file is part of libgit2, distributed under the GNU GPL v2 with * a Linking Exception. For full terms see the included COPYING file. - * - * This file is based on a modified version of the priority queue found - * in the Apache project and libpqueue library. - * - * https://github.com/vy/libpqueue - * - * Original file notice: - * - * Copyright 2010 Volkan Yazici - * Copyright 2006-2010 The Apache Software Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. */ -#include "common.h" #include "pqueue.h" +#include "util.h" -#define left(i) ((i) << 1) -#define right(i) (((i) << 1) + 1) -#define parent(i) ((i) >> 1) +#define PQUEUE_LCHILD_OF(I) (((I)<<1)+1) +#define PQUEUE_RCHILD_OF(I) (((I)<<1)+2) +#define PQUEUE_PARENT_OF(I) (((I)-1)>>1) -int git_pqueue_init(git_pqueue *q, size_t n, git_pqueue_cmp cmppri) +int git_pqueue_init( + git_pqueue *pq, + uint32_t flags, + size_t est_size, + git_vector_cmp cmp) { - assert(q); - - /* Need to allocate n+1 elements since element 0 isn't used. */ - q->d = git__malloc((n + 1) * sizeof(void *)); - GITERR_CHECK_ALLOC(q->d); - - q->size = 1; - q->avail = q->step = (n + 1); /* see comment above about n+1 */ - q->cmppri = cmppri; - - return 0; + pq->flags = flags; + pq->initial_size = est_size; + return git_vector_init(&pq->values, est_size, cmp); } - -void git_pqueue_free(git_pqueue *q) +void git_pqueue_free(git_pqueue *pq) { - git__free(q->d); - q->d = NULL; + git_vector_free(&pq->values); } -void git_pqueue_clear(git_pqueue *q) +static void pqueue_up(git_pqueue *pq, size_t el) { - q->size = 1; + size_t parent_el = PQUEUE_PARENT_OF(el); + + while (el > 0 && git_vector_cmp_elements(&pq->values, parent_el, el) > 0) { + git_vector_swap_elements(&pq->values, el, parent_el); + + el = parent_el; + parent_el = PQUEUE_PARENT_OF(el); + } } -size_t git_pqueue_size(git_pqueue *q) +static void pqueue_down(git_pqueue *pq, size_t el) { - /* queue element 0 exists but doesn't count since it isn't used. */ - return (q->size - 1); + size_t last = git_vector_length(&pq->values); + + while (1) { + size_t kid = PQUEUE_LCHILD_OF(el), rkid = PQUEUE_RCHILD_OF(el); + if (kid >= last) + break; + if (rkid < last && git_vector_cmp_elements(&pq->values, kid, rkid) > 0) + kid = rkid; + + if (git_vector_cmp_elements(&pq->values, el, kid) < 0) + break; + + git_vector_swap_elements(&pq->values, el, kid); + el = kid; + } } - -static void bubble_up(git_pqueue *q, size_t i) +int git_pqueue_insert(git_pqueue *pq, void *item) { - size_t parent_node; - void *moving_node = q->d[i]; + int error = 0; - for (parent_node = parent(i); - ((i > 1) && q->cmppri(q->d[parent_node], moving_node)); - i = parent_node, parent_node = parent(i)) { - q->d[i] = q->d[parent_node]; + /* if heap is full, pop the top element if new one should replace it */ + if ((pq->flags & GIT_PQUEUE_FIXED_SIZE) != 0 && + pq->values.length >= pq->initial_size) + { + /* skip item if below min item in heap */ + if (pq->values._cmp(item, git_vector_get(&pq->values, 0)) <= 0) + return 0; + (void)git_pqueue_pop(pq); } - q->d[i] = moving_node; + error = git_vector_insert(&pq->values, item); + + if (!error) + pqueue_up(pq, pq->values.length - 1); + + return error; } - -static size_t maxchild(git_pqueue *q, size_t i) +void *git_pqueue_pop(git_pqueue *pq) { - size_t child_node = left(i); + void *rval = git_vector_get(&pq->values, 0); - if (child_node >= q->size) - return 0; - - if ((child_node + 1) < q->size && - q->cmppri(q->d[child_node], q->d[child_node + 1])) - child_node++; /* use right child instead of left */ - - return child_node; -} - - -static void percolate_down(git_pqueue *q, size_t i) -{ - size_t child_node; - void *moving_node = q->d[i]; - - while ((child_node = maxchild(q, i)) != 0 && - q->cmppri(moving_node, q->d[child_node])) { - q->d[i] = q->d[child_node]; - i = child_node; + if (git_vector_length(&pq->values) > 1) { + pq->values.contents[0] = git_vector_last(&pq->values); + git_vector_pop(&pq->values); + pqueue_down(pq, 0); + } else { + git_vector_pop(&pq->values); } - q->d[i] = moving_node; -} - - -int git_pqueue_insert(git_pqueue *q, void *d) -{ - void *tmp; - size_t i; - size_t newsize; - - if (!q) return 1; - - /* allocate more memory if necessary */ - if (q->size >= q->avail) { - newsize = q->size + q->step; - tmp = git__realloc(q->d, sizeof(void *) * newsize); - GITERR_CHECK_ALLOC(tmp); - - q->d = tmp; - q->avail = newsize; - } - - /* insert item */ - i = q->size++; - q->d[i] = d; - bubble_up(q, i); - - return 0; -} - - -void *git_pqueue_pop(git_pqueue *q) -{ - void *head; - - if (!q || q->size == 1) - return NULL; - - head = q->d[1]; - q->d[1] = q->d[--q->size]; - percolate_down(q, 1); - - return head; -} - - -void *git_pqueue_peek(git_pqueue *q) -{ - if (!q || q->size == 1) - return NULL; - return q->d[1]; + return rval; } diff --git a/src/pqueue.h b/src/pqueue.h index 9061f8279..3c977e178 100644 --- a/src/pqueue.h +++ b/src/pqueue.h @@ -3,99 +3,74 @@ * * This file is part of libgit2, distributed under the GNU GPL v2 with * a Linking Exception. For full terms see the included COPYING file. - * - * This file is based on a modified version of the priority queue found - * in the Apache project and libpqueue library. - * - * https://github.com/vy/libpqueue - * - * Original file notice: - * - * Copyright 2010 Volkan Yazici - * Copyright 2006-2010 The Apache Software Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. */ - #ifndef INCLUDE_pqueue_h__ #define INCLUDE_pqueue_h__ -/** callback functions to get/set/compare the priority of an element */ -typedef int (*git_pqueue_cmp)(void *a, void *b); +#include "vector.h" -/** the priority queue handle */ typedef struct { - size_t size, avail, step; - git_pqueue_cmp cmppri; - void **d; + git_vector values; + size_t initial_size; + uint32_t flags; } git_pqueue; +enum { + GIT_PQUEUE_DEFAULT = 0, + GIT_PQUEUE_FIXED_SIZE = (1 << 0), /* don't grow heap, keep highest */ +}; /** - * initialize the queue + * Initialize priority queue * - * @param n the initial estimate of the number of queue items for which memory - * should be preallocated - * @param cmppri the callback function to compare two nodes of the queue + * @param pq The priority queue struct to initialize + * @param flags Flags (see above) to control queue behavior + * @param est_size The estimated/initial queue size + * @param cmp The entry priority comparison function + * @return 0 on success, <0 on error + */ +extern int git_pqueue_init( + git_pqueue *pq, + uint32_t flags, + size_t est_size, + git_vector_cmp cmp); + +/** + * Free the queue memory + */ +extern void git_pqueue_free(git_pqueue *pq); + +/** + * Get the number of items in the queue + */ +GIT_INLINE(size_t) git_pqueue_size(const git_pqueue *pq) +{ + return git_vector_length(&pq->values); +} + +/** + * Get an item in the queue + */ +GIT_INLINE(void *) git_pqueue_get(const git_pqueue *pq, size_t pos) +{ + return git_vector_get(&pq->values, pos); +} + +/** + * Insert a new item into the queue * - * @return the handle or NULL for insufficent memory + * @param pq The priority queue + * @param item Pointer to the item data + * @return 0 on success, <0 on failure */ -int git_pqueue_init(git_pqueue *q, size_t n, git_pqueue_cmp cmppri); - +extern int git_pqueue_insert(git_pqueue *pq, void *item); /** - * free all memory used by the queue - * @param q the queue + * Remove the top item in the priority queue + * + * @param pq The priority queue + * @return item from heap on success, NULL if queue is empty */ -void git_pqueue_free(git_pqueue *q); - -/** - * clear all the elements in the queue - * @param q the queue - */ -void git_pqueue_clear(git_pqueue *q); - -/** - * return the size of the queue. - * @param q the queue - */ -size_t git_pqueue_size(git_pqueue *q); - - -/** - * insert an item into the queue. - * @param q the queue - * @param d the item - * @return 0 on success - */ -int git_pqueue_insert(git_pqueue *q, void *d); - - -/** - * pop the highest-ranking item from the queue. - * @param q the queue - * @return NULL on error, otherwise the entry - */ -void *git_pqueue_pop(git_pqueue *q); - - -/** - * access highest-ranking item without removing it. - * @param q the queue - * @return NULL on error, otherwise the entry - */ -void *git_pqueue_peek(git_pqueue *q); - -#endif /* PQUEUE_H */ -/** @} */ +extern void *git_pqueue_pop(git_pqueue *pq); +#endif diff --git a/src/revwalk.c b/src/revwalk.c index c0a053211..d6dc10652 100644 --- a/src/revwalk.c +++ b/src/revwalk.c @@ -439,7 +439,8 @@ int git_revwalk_new(git_revwalk **revwalk_out, git_repository *repo) walk->commits = git_oidmap_alloc(); GITERR_CHECK_ALLOC(walk->commits); - if (git_pqueue_init(&walk->iterator_time, 8, git_commit_list_time_cmp) < 0 || + if (git_pqueue_init( + &walk->iterator_time, 0, 8, git_commit_list_time_cmp) < 0 || git_vector_init(&walk->twos, 4, NULL) < 0 || git_pool_init(&walk->commit_pool, 1, git_pool__suggest_items_per_page(COMMIT_ALLOC) * COMMIT_ALLOC) < 0) @@ -542,7 +543,7 @@ void git_revwalk_reset(git_revwalk *walk) commit->uninteresting = 0; }); - git_pqueue_clear(&walk->iterator_time); + git_pqueue_free(&walk->iterator_time); git_commit_list_free(&walk->iterator_topo); git_commit_list_free(&walk->iterator_rand); git_commit_list_free(&walk->iterator_reverse); diff --git a/src/vector.h b/src/vector.h index d318463c6..e8a967813 100644 --- a/src/vector.h +++ b/src/vector.h @@ -95,4 +95,14 @@ GIT_INLINE(void) git_vector_set_cmp(git_vector *v, git_vector_cmp cmp) } } +/** Swap two elements */ +#define git_vector_swap_elements(V, P1, P2) do { \ + void *__t = (V)->contents[P1]; \ + (V)->contents[P1] = (V)->contents[P2]; \ + (V)->contents[P2] = __t; } while (0) + +/** Compare two elements */ +#define git_vector_cmp_elements(V, P1, P2) \ + (V)->_cmp(git_vector_get(V,P1), git_vector_get(V,P2)) + #endif From af4bc6615d9fe0ebcc4abb939273913bcf9aee60 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Mon, 3 Feb 2014 21:04:40 -0800 Subject: [PATCH 2/5] Add some priority queue tests I forgot that I wrote some tests for the new priority queue code. --- tests/core/pqueue.c | 97 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 tests/core/pqueue.c diff --git a/tests/core/pqueue.c b/tests/core/pqueue.c new file mode 100644 index 000000000..d91dbb0cd --- /dev/null +++ b/tests/core/pqueue.c @@ -0,0 +1,97 @@ +#include "clar_libgit2.h" +#include "pqueue.h" + +static int cmp_ints(const void *v1, const void *v2) +{ + int i1 = *(int *)v1, i2 = *(int *)v2; + return (i1 < i2) ? -1 : (i1 > i2) ? 1 : 0; +} + +void test_core_pqueue__items_are_put_in_order(void) +{ + git_pqueue pq; + int i, vals[20]; + + cl_git_pass(git_pqueue_init(&pq, 0, 20, cmp_ints)); + + for (i = 0; i < 20; ++i) { + if (i < 10) + vals[i] = 10 - i; /* 10 down to 1 */ + else + vals[i] = i + 1; /* 11 up to 20 */ + + cl_git_pass(git_pqueue_insert(&pq, &vals[i])); + } + + cl_assert_equal_i(20, git_pqueue_size(&pq)); + + for (i = 1; i <= 20; ++i) { + void *p = git_pqueue_pop(&pq); + cl_assert(p); + cl_assert_equal_i(i, *(int *)p); + } + + cl_assert_equal_i(0, git_pqueue_size(&pq)); + + git_pqueue_free(&pq); +} + +void test_core_pqueue__interleave_inserts_and_pops(void) +{ + git_pqueue pq; + int chunk, v, i, vals[200]; + + cl_git_pass(git_pqueue_init(&pq, 0, 20, cmp_ints)); + + for (v = 0, chunk = 20; chunk <= 200; chunk += 20) { + /* push the next 20 */ + for (; v < chunk; ++v) { + vals[v] = (v & 1) ? 200 - v : v; + cl_git_pass(git_pqueue_insert(&pq, &vals[v])); + } + + /* pop the lowest 10 */ + for (i = 0; i < 10; ++i) + (void)git_pqueue_pop(&pq); + } + + cl_assert_equal_i(100, git_pqueue_size(&pq)); + + /* at this point, we've popped 0-99 */ + + for (v = 100; v < 200; ++v) { + void *p = git_pqueue_pop(&pq); + cl_assert(p); + cl_assert_equal_i(v, *(int *)p); + } + + cl_assert_equal_i(0, git_pqueue_size(&pq)); + + git_pqueue_free(&pq); +} + +void test_core_pqueue__max_heap_size(void) +{ + git_pqueue pq; + int i, vals[100]; + + cl_git_pass(git_pqueue_init(&pq, GIT_PQUEUE_FIXED_SIZE, 50, cmp_ints)); + + for (i = 0; i < 100; ++i) { + vals[i] = (i & 1) ? 100 - i : i; + cl_git_pass(git_pqueue_insert(&pq, &vals[i])); + } + + cl_assert_equal_i(50, git_pqueue_size(&pq)); + + for (i = 50; i < 100; ++i) { + void *p = git_pqueue_pop(&pq); + cl_assert(p); + cl_assert_equal_i(i, *(int *)p); + } + + cl_assert_equal_i(0, git_pqueue_size(&pq)); + + git_pqueue_free(&pq); + +} From 882c7742711199f757305687c257ac97262a3a30 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Tue, 4 Feb 2014 10:01:37 -0800 Subject: [PATCH 3/5] Convert pqueue to just be a git_vector This updates the git_pqueue to simply be a set of specialized init/insert/pop functions on a git_vector. To preserve the pqueue feature of having a fixed size heap, I converted the "sorted" field in git_vectors to a more general "flags" field so that pqueue could mix in it's own flag. This had a bunch of ramifications because a number of places were directly looking at the vector "sorted" field - I added a couple new git_vector helpers (is_sorted, set_sorted) so the specific representation of this information could be abstracted. --- src/index.c | 6 ++--- src/pqueue.c | 57 ++++++++++++++++++++++++--------------------- src/pqueue.h | 38 +++++++----------------------- src/revwalk.c | 2 +- src/sortedcache.c | 2 +- src/tree.c | 6 +++-- src/vector.c | 17 ++++++++------ src/vector.h | 17 ++++++++++++-- tests/index/tests.c | 8 +++---- 9 files changed, 78 insertions(+), 75 deletions(-) diff --git a/src/index.c b/src/index.c index 7bc5d5b24..1ab126c87 100644 --- a/src/index.c +++ b/src/index.c @@ -1564,7 +1564,7 @@ static int read_reuc(git_index *index, const char *buffer, size_t size) } /* entries are guaranteed to be sorted on-disk */ - index->reuc.sorted = 1; + git_vector_set_sorted(&index->reuc, true); return 0; } @@ -1610,7 +1610,7 @@ static int read_conflict_names(git_index *index, const char *buffer, size_t size #undef read_conflict_name /* entries are guaranteed to be sorted on-disk */ - index->names.sorted = 1; + git_vector_set_sorted(&index->names, true); return 0; } @@ -1812,7 +1812,7 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size) #undef seek_forward /* Entries are stored case-sensitively on disk. */ - index->entries.sorted = !index->ignore_case; + git_vector_set_sorted(&index->entries, index->ignore_case); git_vector_sort(&index->entries); return 0; diff --git a/src/pqueue.c b/src/pqueue.c index ddbad7a54..cc31a8f95 100644 --- a/src/pqueue.c +++ b/src/pqueue.c @@ -15,25 +15,29 @@ int git_pqueue_init( git_pqueue *pq, uint32_t flags, - size_t est_size, + size_t init_size, git_vector_cmp cmp) { - pq->flags = flags; - pq->initial_size = est_size; - return git_vector_init(&pq->values, est_size, cmp); -} + int error = git_vector_init(pq, init_size, cmp); -void git_pqueue_free(git_pqueue *pq) -{ - git_vector_free(&pq->values); + if (!error) { + /* mix in our flags */ + pq->flags |= flags; + + /* if fixed size heap, pretend vector is exactly init_size elements */ + if ((flags & GIT_PQUEUE_FIXED_SIZE) && init_size > 0) + pq->_alloc_size = init_size; + } + + return error; } static void pqueue_up(git_pqueue *pq, size_t el) { size_t parent_el = PQUEUE_PARENT_OF(el); - while (el > 0 && git_vector_cmp_elements(&pq->values, parent_el, el) > 0) { - git_vector_swap_elements(&pq->values, el, parent_el); + while (el > 0 && git_vector_cmp_elements(pq, parent_el, el) > 0) { + git_vector_swap_elements(pq, el, parent_el); el = parent_el; parent_el = PQUEUE_PARENT_OF(el); @@ -42,19 +46,19 @@ static void pqueue_up(git_pqueue *pq, size_t el) static void pqueue_down(git_pqueue *pq, size_t el) { - size_t last = git_vector_length(&pq->values); + size_t last = git_pqueue_size(pq); while (1) { size_t kid = PQUEUE_LCHILD_OF(el), rkid = PQUEUE_RCHILD_OF(el); if (kid >= last) break; - if (rkid < last && git_vector_cmp_elements(&pq->values, kid, rkid) > 0) + if (rkid < last && git_vector_cmp_elements(pq, kid, rkid) > 0) kid = rkid; - if (git_vector_cmp_elements(&pq->values, el, kid) < 0) + if (git_vector_cmp_elements(pq, el, kid) < 0) break; - git_vector_swap_elements(&pq->values, el, kid); + git_vector_swap_elements(pq, el, kid); el = kid; } } @@ -65,32 +69,33 @@ int git_pqueue_insert(git_pqueue *pq, void *item) /* if heap is full, pop the top element if new one should replace it */ if ((pq->flags & GIT_PQUEUE_FIXED_SIZE) != 0 && - pq->values.length >= pq->initial_size) + pq->length >= pq->_alloc_size) { - /* skip item if below min item in heap */ - if (pq->values._cmp(item, git_vector_get(&pq->values, 0)) <= 0) + /* skip this item if below min item in heap */ + if (pq->_cmp(item, git_vector_get(pq, 0)) <= 0) return 0; + /* otherwise remove the min item before inserting new */ (void)git_pqueue_pop(pq); } - error = git_vector_insert(&pq->values, item); - - if (!error) - pqueue_up(pq, pq->values.length - 1); + if (!(error = git_vector_insert(pq, item))) + pqueue_up(pq, pq->length - 1); return error; } void *git_pqueue_pop(git_pqueue *pq) { - void *rval = git_vector_get(&pq->values, 0); + void *rval = git_pqueue_get(pq, 0); - if (git_vector_length(&pq->values) > 1) { - pq->values.contents[0] = git_vector_last(&pq->values); - git_vector_pop(&pq->values); + if (git_pqueue_size(pq) > 1) { + /* move last item to top of heap, shrink, and push item down */ + pq->contents[0] = git_vector_last(pq); + git_vector_pop(pq); pqueue_down(pq, 0); } else { - git_vector_pop(&pq->values); + /* all we need to do is shrink the heap in this case */ + git_vector_pop(pq); } return rval; diff --git a/src/pqueue.h b/src/pqueue.h index 3c977e178..da7b74edf 100644 --- a/src/pqueue.h +++ b/src/pqueue.h @@ -9,15 +9,11 @@ #include "vector.h" -typedef struct { - git_vector values; - size_t initial_size; - uint32_t flags; -} git_pqueue; +typedef git_vector git_pqueue; enum { - GIT_PQUEUE_DEFAULT = 0, - GIT_PQUEUE_FIXED_SIZE = (1 << 0), /* don't grow heap, keep highest */ + /* flag meaning: don't grow heap, keep highest values only */ + GIT_PQUEUE_FIXED_SIZE = (GIT_VECTOR_FLAG_MAX << 1), }; /** @@ -25,36 +21,20 @@ enum { * * @param pq The priority queue struct to initialize * @param flags Flags (see above) to control queue behavior - * @param est_size The estimated/initial queue size + * @param init_size The initial queue size * @param cmp The entry priority comparison function * @return 0 on success, <0 on error */ extern int git_pqueue_init( git_pqueue *pq, uint32_t flags, - size_t est_size, + size_t init_size, git_vector_cmp cmp); -/** - * Free the queue memory - */ -extern void git_pqueue_free(git_pqueue *pq); - -/** - * Get the number of items in the queue - */ -GIT_INLINE(size_t) git_pqueue_size(const git_pqueue *pq) -{ - return git_vector_length(&pq->values); -} - -/** - * Get an item in the queue - */ -GIT_INLINE(void *) git_pqueue_get(const git_pqueue *pq, size_t pos) -{ - return git_vector_get(&pq->values, pos); -} +#define git_pqueue_free git_vector_free +#define git_pqueue_clear git_vector_clear +#define git_pqueue_size git_vector_length +#define git_pqueue_get git_vector_get /** * Insert a new item into the queue diff --git a/src/revwalk.c b/src/revwalk.c index d6dc10652..3bfc4d1aa 100644 --- a/src/revwalk.c +++ b/src/revwalk.c @@ -543,7 +543,7 @@ void git_revwalk_reset(git_revwalk *walk) commit->uninteresting = 0; }); - git_pqueue_free(&walk->iterator_time); + git_pqueue_clear(&walk->iterator_time); git_commit_list_free(&walk->iterator_topo); git_commit_list_free(&walk->iterator_rand); git_commit_list_free(&walk->iterator_reverse); diff --git a/src/sortedcache.c b/src/sortedcache.c index 466e55dbe..13f0921f1 100644 --- a/src/sortedcache.c +++ b/src/sortedcache.c @@ -321,7 +321,7 @@ size_t git_sortedcache_entrycount(const git_sortedcache *sc) void *git_sortedcache_entry(git_sortedcache *sc, size_t pos) { /* make sure the items are sorted so this gets the correct item */ - if (!sc->items.sorted) + if (!git_vector_is_sorted(&sc->items)) git_vector_sort(&sc->items); return git_vector_get(&sc->items, pos); diff --git a/src/tree.c b/src/tree.c index 877a3fcee..94f779eca 100644 --- a/src/tree.c +++ b/src/tree.c @@ -283,7 +283,8 @@ static const git_tree_entry *entry_fromname( { size_t idx; - assert(tree->entries.sorted); /* be safe when we cast away constness */ + /* be safe when we cast away constness - i.e. don't trigger a sort */ + assert(git_vector_is_sorted(&tree->entries)); if (tree_key_search(&idx, (git_vector *)&tree->entries, name, name_len) < 0) return NULL; @@ -333,7 +334,8 @@ int git_tree__prefix_position(const git_tree *tree, const char *path) ksearch.filename = path; ksearch.filename_len = strlen(path); - assert(tree->entries.sorted); /* be safe when we cast away constness */ + /* be safe when we cast away constness - i.e. don't trigger a sort */ + assert(git_vector_is_sorted(&tree->entries)); /* Find tree entry with appropriate prefix */ git_vector_bsearch2( diff --git a/src/vector.c b/src/vector.c index f0c2f06c2..e5d8919d3 100644 --- a/src/vector.c +++ b/src/vector.c @@ -56,7 +56,9 @@ int git_vector_dup(git_vector *v, const git_vector *src, git_vector_cmp cmp) v->_alloc_size = src->length; v->_cmp = cmp; v->length = src->length; - v->sorted = src->sorted && cmp == src->_cmp; + v->flags = src->flags; + if (cmp != src->_cmp) + git_vector_set_sorted(v, 0); v->contents = git__malloc(bytes); GITERR_CHECK_ALLOC(v->contents); @@ -97,7 +99,7 @@ int git_vector_init(git_vector *v, size_t initial_size, git_vector_cmp cmp) v->_alloc_size = 0; v->_cmp = cmp; v->length = 0; - v->sorted = 1; + v->flags = GIT_VECTOR_SORTED; v->contents = NULL; return resize_vector(v, max(initial_size, MIN_ALLOCSIZE)); @@ -128,7 +130,8 @@ int git_vector_insert(git_vector *v, void *element) return -1; v->contents[v->length++] = element; - v->sorted = 0; + + git_vector_set_sorted(v, v->length <= 1); return 0; } @@ -141,7 +144,7 @@ int git_vector_insert_sorted( assert(v && v->_cmp); - if (!v->sorted) + if (!git_vector_is_sorted(v)) git_vector_sort(v); if (v->length >= v->_alloc_size && @@ -171,11 +174,11 @@ void git_vector_sort(git_vector *v) { assert(v); - if (v->sorted || !v->_cmp) + if (git_vector_is_sorted(v) || !v->_cmp) return; git__tsort(v->contents, v->length, v->_cmp); - v->sorted = 1; + git_vector_set_sorted(v, 1); } int git_vector_bsearch2( @@ -291,7 +294,7 @@ void git_vector_clear(git_vector *v) { assert(v); v->length = 0; - v->sorted = 1; + git_vector_set_sorted(v, 1); } void git_vector_swap(git_vector *a, git_vector *b) diff --git a/src/vector.h b/src/vector.h index e8a967813..f983c55d5 100644 --- a/src/vector.h +++ b/src/vector.h @@ -11,12 +11,17 @@ typedef int (*git_vector_cmp)(const void *, const void *); +enum { + GIT_VECTOR_SORTED = (1u << 0), + GIT_VECTOR_FLAG_MAX = (1u << 1), +}; + typedef struct git_vector { size_t _alloc_size; git_vector_cmp _cmp; void **contents; size_t length; - int sorted; + uint32_t flags; } git_vector; #define GIT_VECTOR_INIT {0} @@ -86,12 +91,20 @@ void git_vector_remove_matching( int git_vector_resize_to(git_vector *v, size_t new_length); int git_vector_set(void **old, git_vector *v, size_t position, void *value); +/** Check if vector is sorted */ +#define git_vector_is_sorted(V) (((V)->flags & GIT_VECTOR_SORTED) != 0) + +/** Directly set sorted state of vector */ +#define git_vector_set_sorted(V,S) do { \ + (V)->flags = (S) ? ((V)->flags | GIT_VECTOR_SORTED) : \ + ((V)->flags & ~GIT_VECTOR_SORTED); } while (0) + /** Set the comparison function used for sorting the vector */ GIT_INLINE(void) git_vector_set_cmp(git_vector *v, git_vector_cmp cmp) { if (cmp != v->_cmp) { v->_cmp = cmp; - v->sorted = 0; + git_vector_set_sorted(v, 0); } } diff --git a/tests/index/tests.c b/tests/index/tests.c index bd90bc557..55a2f2c51 100644 --- a/tests/index/tests.c +++ b/tests/index/tests.c @@ -80,7 +80,7 @@ void test_index_tests__empty_index(void) cl_assert(index->on_disk == 0); cl_assert(git_index_entrycount(index) == 0); - cl_assert(index->entries.sorted); + cl_assert(git_vector_is_sorted(&index->entries)); git_index_free(index); } @@ -95,7 +95,7 @@ void test_index_tests__default_test_index(void) cl_assert(index->on_disk); cl_assert(git_index_entrycount(index) == index_entry_count); - cl_assert(index->entries.sorted); + cl_assert(git_vector_is_sorted(&index->entries)); entries = (git_index_entry **)index->entries.contents; @@ -118,7 +118,7 @@ void test_index_tests__gitgit_index(void) cl_assert(index->on_disk); cl_assert(git_index_entrycount(index) == index_entry_count_2); - cl_assert(index->entries.sorted); + cl_assert(git_vector_is_sorted(&index->entries)); cl_assert(index->tree != NULL); git_index_free(index); @@ -195,7 +195,7 @@ void test_index_tests__sort1(void) cl_git_pass(git_index_open(&index, "fake-index")); /* FIXME: this test is slightly dumb */ - cl_assert(index->entries.sorted); + cl_assert(git_vector_is_sorted(&index->entries)); git_index_free(index); } From 43709ca87811efc3c237eb719611f025502f3928 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Tue, 4 Feb 2014 10:33:30 -0800 Subject: [PATCH 4/5] Fix typo setting sorted flag when reloading index This fixes a typo I made for setting the sorted flag on the index after a reload. That typo didn't actually cause any test failures so I'm also adding a test that explicitly checks that the index is correctly sorted after a reload when ignoring case and when not. --- src/index.c | 6 ++++-- tests/index/tests.c | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/src/index.c b/src/index.c index 1ab126c87..42eb5fd49 100644 --- a/src/index.c +++ b/src/index.c @@ -1811,8 +1811,10 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size) #undef seek_forward - /* Entries are stored case-sensitively on disk. */ - git_vector_set_sorted(&index->entries, index->ignore_case); + /* Entries are stored case-sensitively on disk, so re-sort now if + * in-memory index is supposed to be case-insensitive + */ + git_vector_set_sorted(&index->entries, !index->ignore_case); git_vector_sort(&index->entries); return 0; diff --git a/tests/index/tests.c b/tests/index/tests.c index 55a2f2c51..6e28af1f7 100644 --- a/tests/index/tests.c +++ b/tests/index/tests.c @@ -543,3 +543,37 @@ void test_index_tests__corrupted_extension(void) cl_git_fail_with(git_index_open(&index, TEST_INDEXBAD_PATH), GIT_ERROR); } + +static void assert_index_is_sorted(git_index *index) +{ + git_vector *entries = &index->entries; + size_t i; + + cl_assert(git_vector_is_sorted(entries)); + + for (i = 1; i < git_vector_length(entries); ++i) { + git_index_entry *prev = git_vector_get(entries, i - 1); + git_index_entry *curr = git_vector_get(entries, i); + cl_assert(index->entries._cmp(prev, curr) <= 0); + } +} + +void test_index_tests__reload_while_ignoring_case(void) +{ + git_index *index; + unsigned int caps; + + cl_git_pass(git_index_open(&index, TEST_INDEX_PATH)); + assert_index_is_sorted(index); + + caps = git_index_caps(index); + cl_git_pass(git_index_set_caps(index, caps &= ~GIT_INDEXCAP_IGNORE_CASE)); + cl_git_pass(git_index_read(index, true)); + assert_index_is_sorted(index); + + cl_git_pass(git_index_set_caps(index, caps | GIT_INDEXCAP_IGNORE_CASE)); + cl_git_pass(git_index_read(index, true)); + assert_index_is_sorted(index); + + git_index_free(index); +} From 1bbacc9ff617b67831dbfce5e1b04e1bd8571aa0 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Tue, 4 Feb 2014 16:46:43 -0800 Subject: [PATCH 5/5] Avoid extra copying in pqueue operations This tweaks the pqueue_up and pqueue_down routines so that they will not do full element swaps but instead carry over the state of the previous loop iteration and only assign elements for which we know the final position. This will avoid a little bit of data assignment which should improve performance in theory. Also got rid of some vector helpers that I'm no longer using. --- src/pqueue.c | 37 ++++++++++++++++++++++++++----------- src/vector.h | 10 ---------- 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/src/pqueue.c b/src/pqueue.c index cc31a8f95..172cf43d5 100644 --- a/src/pqueue.c +++ b/src/pqueue.c @@ -35,32 +35,47 @@ int git_pqueue_init( static void pqueue_up(git_pqueue *pq, size_t el) { size_t parent_el = PQUEUE_PARENT_OF(el); + void *kid = git_vector_get(pq, el); - while (el > 0 && git_vector_cmp_elements(pq, parent_el, el) > 0) { - git_vector_swap_elements(pq, el, parent_el); + while (el > 0) { + void *parent = pq->contents[parent_el]; + + if (pq->_cmp(parent, kid) <= 0) + break; + + pq->contents[el] = parent; el = parent_el; parent_el = PQUEUE_PARENT_OF(el); } + + pq->contents[el] = kid; } static void pqueue_down(git_pqueue *pq, size_t el) { - size_t last = git_pqueue_size(pq); + void *parent = git_vector_get(pq, el), *kid, *rkid; while (1) { - size_t kid = PQUEUE_LCHILD_OF(el), rkid = PQUEUE_RCHILD_OF(el); - if (kid >= last) - break; - if (rkid < last && git_vector_cmp_elements(pq, kid, rkid) > 0) - kid = rkid; + size_t kid_el = PQUEUE_LCHILD_OF(el); - if (git_vector_cmp_elements(pq, el, kid) < 0) + if ((kid = git_vector_get(pq, kid_el)) == NULL) break; - git_vector_swap_elements(pq, el, kid); - el = kid; + if ((rkid = git_vector_get(pq, kid_el + 1)) != NULL && + pq->_cmp(kid, rkid) > 0) { + kid = rkid; + kid_el += 1; + } + + if (pq->_cmp(parent, kid) < 0) + break; + + pq->contents[el] = kid; + el = kid_el; } + + pq->contents[el] = parent; } int git_pqueue_insert(git_pqueue *pq, void *item) diff --git a/src/vector.h b/src/vector.h index f983c55d5..f8256853b 100644 --- a/src/vector.h +++ b/src/vector.h @@ -108,14 +108,4 @@ GIT_INLINE(void) git_vector_set_cmp(git_vector *v, git_vector_cmp cmp) } } -/** Swap two elements */ -#define git_vector_swap_elements(V, P1, P2) do { \ - void *__t = (V)->contents[P1]; \ - (V)->contents[P1] = (V)->contents[P2]; \ - (V)->contents[P2] = __t; } while (0) - -/** Compare two elements */ -#define git_vector_cmp_elements(V, P1, P2) \ - (V)->_cmp(git_vector_get(V,P1), git_vector_get(V,P2)) - #endif