From 43820f204ea32503b4083e3b6b83f30a0a0031c9 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Wed, 14 Oct 2015 19:24:07 +0200 Subject: [PATCH 1/2] odb: Be smarter when refreshing backends In the current implementation of ODB backends, each backend is tasked with refreshing itself after a failed lookup. This is standard Git behavior: we want to e.g. reload the packfiles on disk in case they have changed and that's the reason we can't find the object we're looking for. This behavior, however, becomes pathological in repositories where multiple alternates have been loaded. Given that each alternate counts as a separate backend, a miss in the main repository (which can potentially be very frequent in cases where object storage comes from the alternate) will result in refreshing all its packfiles before we move on to the alternate backend where the object will most likely be found. To fix this, the code in `odb.c` has been refactored as to perform the refresh of all the backends externally, once we've verified that the object is nowhere to be found. If the refresh is successful, we then perform the lookup sequentially through all the backends, skipping the ones that we know for sure weren't refreshed (because they have no refresh API). The on-disk pack backend has been adjusted accordingly: it no longer performs refreshes internally. --- src/odb.c | 237 +++++++++++++++++++++++++++++++++---------------- src/odb_pack.c | 82 +---------------- 2 files changed, 162 insertions(+), 157 deletions(-) diff --git a/src/odb.c b/src/odb.c index 2b2c35fe8..af1ff7fa2 100644 --- a/src/odb.c +++ b/src/odb.c @@ -620,23 +620,18 @@ void git_odb_free(git_odb *db) GIT_REFCOUNT_DEC(db, odb_free); } -int git_odb_exists(git_odb *db, const git_oid *id) +static int odb_exists_1(git_odb *db, const git_oid *id, bool only_refreshed) { - git_odb_object *object; size_t i; bool found = false; - assert(db && id); - - if ((object = git_cache_get_raw(odb_cache(db), id)) != NULL) { - git_odb_object_free(object); - return (int)true; - } - for (i = 0; i < db->backends.length && !found; ++i) { backend_internal *internal = git_vector_get(&db->backends, i); git_odb_backend *b = internal->backend; + if (only_refreshed && !b->refresh) + continue; + if (b->exists != NULL) found = (bool)b->exists(b, id); } @@ -644,12 +639,74 @@ int git_odb_exists(git_odb *db, const git_oid *id) return (int)found; } +int git_odb_exists(git_odb *db, const git_oid *id) +{ + git_odb_object *object; + + assert(db && id); + + if ((object = git_cache_get_raw(odb_cache(db), id)) != NULL) { + git_odb_object_free(object); + return (int)true; + } + + if (odb_exists_1(db, id, false)) + return 1; + + if (!git_odb_refresh(db)) + return odb_exists_1(db, id, true); + + /* Failed to refresh, hence not found */ + return 0; +} + +static int odb_exists_prefix_1(git_oid *out, git_odb *db, + const git_oid *key, size_t len, bool only_refreshed) +{ + size_t i; + int error = GIT_ENOTFOUND, num_found = 0; + git_oid last_found = {{0}}, found; + + for (i = 0; i < db->backends.length; ++i) { + backend_internal *internal = git_vector_get(&db->backends, i); + git_odb_backend *b = internal->backend; + + if (only_refreshed && !b->refresh) + continue; + + if (!b->exists_prefix) + continue; + + error = b->exists_prefix(&found, b, key, len); + if (error == GIT_ENOTFOUND || error == GIT_PASSTHROUGH) + continue; + if (error) + return error; + + /* make sure found item doesn't introduce ambiguity */ + if (num_found) { + if (git_oid__cmp(&last_found, &found)) + return git_odb__error_ambiguous("multiple matches for prefix"); + } else { + git_oid_cpy(&last_found, &found); + num_found++; + } + } + + if (!num_found) + return GIT_ENOTFOUND; + + if (out) + git_oid_cpy(out, &last_found); + + return 0; +} + int git_odb_exists_prefix( git_oid *out, git_odb *db, const git_oid *short_id, size_t len) { - int error = GIT_ENOTFOUND, num_found = 0; - size_t i; - git_oid key = {{0}}, last_found = {{0}}, found; + int error; + git_oid key = {{0}}; assert(db && short_id); @@ -673,35 +730,15 @@ int git_odb_exists_prefix( if (len & 1) key.id[len / 2] &= 0xF0; - for (i = 0; i < db->backends.length; ++i) { - backend_internal *internal = git_vector_get(&db->backends, i); - git_odb_backend *b = internal->backend; + error = odb_exists_prefix_1(out, db, &key, len, false); - if (!b->exists_prefix) - continue; + if (error == GIT_ENOTFOUND && !git_odb_refresh(db)) + error = odb_exists_prefix_1(out, db, &key, len, true); - error = b->exists_prefix(&found, b, &key, len); - if (error == GIT_ENOTFOUND || error == GIT_PASSTHROUGH) - continue; - if (error) - return error; - - /* make sure found item doesn't introduce ambiguity */ - if (num_found) { - if (git_oid__cmp(&last_found, &found)) - return git_odb__error_ambiguous("multiple matches for prefix"); - } else { - git_oid_cpy(&last_found, &found); - num_found++; - } - } - - if (!num_found) + if (error == GIT_ENOTFOUND) return git_odb__error_notfound("no match for id prefix", &key); - if (out) - git_oid_cpy(out, &last_found); - return 0; + return error; } int git_odb_read_header(size_t *len_p, git_otype *type_p, git_odb *db, const git_oid *id) @@ -783,36 +820,38 @@ static int hardcoded_objects(git_rawobj *raw, const git_oid *id) } } -int git_odb_read(git_odb_object **out, git_odb *db, const git_oid *id) +static int odb_read_1(git_odb_object **out, git_odb *db, const git_oid *id, + bool only_refreshed) { - size_t i, reads = 0; - int error; + size_t i; git_rawobj raw; git_odb_object *object; + bool found = false; - assert(out && db && id); + if (!hardcoded_objects(&raw, id)) + found = true; - *out = git_cache_get_raw(odb_cache(db), id); - if (*out != NULL) - return 0; - - error = hardcoded_objects(&raw, id); - - for (i = 0; i < db->backends.length && error < 0; ++i) { + for (i = 0; i < db->backends.length && !found; ++i) { backend_internal *internal = git_vector_get(&db->backends, i); git_odb_backend *b = internal->backend; + if (only_refreshed && !b->refresh) + continue; + if (b->read != NULL) { - ++reads; - error = b->read(&raw.data, &raw.len, &raw.type, b, id); + int error = b->read(&raw.data, &raw.len, &raw.type, b, id); + if (error == GIT_PASSTHROUGH || error == GIT_ENOTFOUND) + continue; + + if (error < 0) + return error; + + found = true; } } - if (error && error != GIT_PASSTHROUGH) { - if (!reads) - return git_odb__error_notfound("no match for id", id); - return error; - } + if (!found) + return GIT_ENOTFOUND; giterr_clear(); if ((object = odb_object__alloc(id, &raw)) == NULL) @@ -822,42 +861,48 @@ int git_odb_read(git_odb_object **out, git_odb *db, const git_oid *id) return 0; } -int git_odb_read_prefix( - git_odb_object **out, git_odb *db, const git_oid *short_id, size_t len) +int git_odb_read(git_odb_object **out, git_odb *db, const git_oid *id) +{ + int error; + + assert(out && db && id); + + *out = git_cache_get_raw(odb_cache(db), id); + if (*out != NULL) + return 0; + + error = odb_read_1(out, db, id, false); + + if (error == GIT_ENOTFOUND && !git_odb_refresh(db)) + error = odb_read_1(out, db, id, true); + + if (error == GIT_ENOTFOUND) + return git_odb__error_notfound("no match for id", id); + + return error; +} + +static int read_prefix_1(git_odb_object **out, git_odb *db, + const git_oid *key, size_t len, bool only_refreshed) { size_t i; int error = GIT_ENOTFOUND; - git_oid key = {{0}}, found_full_oid = {{0}}; + git_oid found_full_oid = {{0}}; git_rawobj raw; void *data = NULL; bool found = false; git_odb_object *object; - assert(out && db); - - if (len < GIT_OID_MINPREFIXLEN) - return git_odb__error_ambiguous("prefix length too short"); - if (len > GIT_OID_HEXSZ) - len = GIT_OID_HEXSZ; - - if (len == GIT_OID_HEXSZ) { - *out = git_cache_get_raw(odb_cache(db), short_id); - if (*out != NULL) - return 0; - } - - /* just copy valid part of short_id */ - memcpy(&key.id, short_id->id, (len + 1) / 2); - if (len & 1) - key.id[len / 2] &= 0xF0; - for (i = 0; i < db->backends.length; ++i) { backend_internal *internal = git_vector_get(&db->backends, i); git_odb_backend *b = internal->backend; + if (only_refreshed && !b->refresh) + continue; + if (b->read_prefix != NULL) { git_oid full_oid; - error = b->read_prefix(&full_oid, &raw.data, &raw.len, &raw.type, b, &key, len); + error = b->read_prefix(&full_oid, &raw.data, &raw.len, &raw.type, b, key, len); if (error == GIT_ENOTFOUND || error == GIT_PASSTHROUGH) continue; @@ -878,7 +923,7 @@ int git_odb_read_prefix( } if (!found) - return git_odb__error_notfound("no match for prefix", &key); + return GIT_ENOTFOUND; if ((object = odb_object__alloc(&found_full_oid, &raw)) == NULL) return -1; @@ -887,6 +932,42 @@ int git_odb_read_prefix( return 0; } +int git_odb_read_prefix( + git_odb_object **out, git_odb *db, const git_oid *short_id, size_t len) +{ + git_oid key = {{0}}; + int error; + + assert(out && db); + + if (len < GIT_OID_MINPREFIXLEN) + return git_odb__error_ambiguous("prefix length too short"); + + if (len > GIT_OID_HEXSZ) + len = GIT_OID_HEXSZ; + + if (len == GIT_OID_HEXSZ) { + *out = git_cache_get_raw(odb_cache(db), short_id); + if (*out != NULL) + return 0; + } + + /* just copy valid part of short_id */ + memcpy(&key.id, short_id->id, (len + 1) / 2); + if (len & 1) + key.id[len / 2] &= 0xF0; + + error = read_prefix_1(out, db, &key, len, false); + + if (error == GIT_ENOTFOUND && !git_odb_refresh(db)) + error = read_prefix_1(out, db, &key, len, true); + + if (error == GIT_ENOTFOUND) + return git_odb__error_notfound("no match for prefix", &key); + + return error; +} + int git_odb_foreach(git_odb *db, git_odb_foreach_cb cb, void *payload) { unsigned int i; diff --git a/src/odb_pack.c b/src/odb_pack.c index 735158d96..77d2c75b9 100644 --- a/src/odb_pack.c +++ b/src/odb_pack.c @@ -346,7 +346,7 @@ static int pack_backend__refresh(git_odb_backend *backend_) return error; } -static int pack_backend__read_header_internal( +static int pack_backend__read_header( size_t *len_p, git_otype *type_p, struct git_odb_backend *backend, const git_oid *oid) { @@ -361,24 +361,7 @@ static int pack_backend__read_header_internal( return git_packfile_resolve_header(len_p, type_p, e.p, e.offset); } -static int pack_backend__read_header( - size_t *len_p, git_otype *type_p, - struct git_odb_backend *backend, const git_oid *oid) -{ - int error; - - error = pack_backend__read_header_internal(len_p, type_p, backend, oid); - - if (error != GIT_ENOTFOUND) - return error; - - if ((error = pack_backend__refresh(backend)) < 0) - return error; - - return pack_backend__read_header_internal(len_p, type_p, backend, oid); -} - -static int pack_backend__read_internal( +static int pack_backend__read( void **buffer_p, size_t *len_p, git_otype *type_p, git_odb_backend *backend, const git_oid *oid) { @@ -397,24 +380,7 @@ static int pack_backend__read_internal( return 0; } -static int pack_backend__read( - void **buffer_p, size_t *len_p, git_otype *type_p, - git_odb_backend *backend, const git_oid *oid) -{ - int error; - - error = pack_backend__read_internal(buffer_p, len_p, type_p, backend, oid); - - if (error != GIT_ENOTFOUND) - return error; - - if ((error = pack_backend__refresh(backend)) < 0) - return error; - - return pack_backend__read_internal(buffer_p, len_p, type_p, backend, oid); -} - -static int pack_backend__read_prefix_internal( +static int pack_backend__read_prefix( git_oid *out_oid, void **buffer_p, size_t *len_p, @@ -451,45 +417,9 @@ static int pack_backend__read_prefix_internal( return error; } -static int pack_backend__read_prefix( - git_oid *out_oid, - void **buffer_p, - size_t *len_p, - git_otype *type_p, - git_odb_backend *backend, - const git_oid *short_oid, - size_t len) -{ - int error; - - error = pack_backend__read_prefix_internal( - out_oid, buffer_p, len_p, type_p, backend, short_oid, len); - - if (error != GIT_ENOTFOUND) - return error; - - if ((error = pack_backend__refresh(backend)) < 0) - return error; - - return pack_backend__read_prefix_internal( - out_oid, buffer_p, len_p, type_p, backend, short_oid, len); -} - static int pack_backend__exists(git_odb_backend *backend, const git_oid *oid) { struct git_pack_entry e; - int error; - - error = pack_entry_find(&e, (struct pack_backend *)backend, oid); - - if (error != GIT_ENOTFOUND) - return error == 0; - - if ((error = pack_backend__refresh(backend)) < 0) { - giterr_clear(); - return (int)false; - } - return pack_entry_find(&e, (struct pack_backend *)backend, oid) == 0; } @@ -501,12 +431,7 @@ static int pack_backend__exists_prefix( struct git_pack_entry e = {0}; error = pack_entry_find_prefix(&e, pb, short_id, len); - - if (error == GIT_ENOTFOUND && !(error = pack_backend__refresh(backend))) - error = pack_entry_find_prefix(&e, pb, short_id, len); - git_oid_cpy(out, &e.sha1); - return error; } @@ -674,7 +599,6 @@ int git_odb_backend_pack(git_odb_backend **backend_out, const char *objects_dir) git_path_isdir(git_buf_cstr(&path))) { backend->pack_folder = git_buf_detach(&path); - error = pack_backend__refresh((git_odb_backend *)backend); } From a0a1b19ab043f3579aabfb7602b4c4ac4dd69e72 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Wed, 14 Oct 2015 19:31:54 +0200 Subject: [PATCH 2/2] odb: Prioritize alternate backends For most real use cases, repositories with alternates use them as main object storage. Checking the alternate for objects before the main repository should result in measurable speedups. Because of this, we're changing the sorting algorithm to prioritize alternates *in cases where two backends have the same priority*. This means that the pack backend for the alternate will be checked before the pack backend for the main repository *but* both of them will be checked before any loose backends. --- src/odb.c | 12 ++++++++---- tests/odb/sorting.c | 16 ++++++++-------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/odb.c b/src/odb.c index af1ff7fa2..1c877c9fc 100644 --- a/src/odb.c +++ b/src/odb.c @@ -374,10 +374,14 @@ static int backend_sort_cmp(const void *a, const void *b) const backend_internal *backend_a = (const backend_internal *)(a); const backend_internal *backend_b = (const backend_internal *)(b); - if (backend_a->is_alternate == backend_b->is_alternate) - return (backend_b->priority - backend_a->priority); - - return backend_a->is_alternate ? 1 : -1; + if (backend_b->priority == backend_a->priority) { + if (backend_a->is_alternate) + return -1; + if (backend_b->is_alternate) + return 1; + return 0; + } + return (backend_b->priority - backend_a->priority); } int git_odb_new(git_odb **out) diff --git a/tests/odb/sorting.c b/tests/odb/sorting.c index d24c49c69..6af8b0d1b 100644 --- a/tests/odb/sorting.c +++ b/tests/odb/sorting.c @@ -57,14 +57,14 @@ void test_odb_sorting__basic_backends_sorting(void) void test_odb_sorting__alternate_backends_sorting(void) { - cl_git_pass(git_odb_add_backend(_odb, new_backend(0), 5)); - cl_git_pass(git_odb_add_backend(_odb, new_backend(2), 3)); - cl_git_pass(git_odb_add_backend(_odb, new_backend(1), 4)); - cl_git_pass(git_odb_add_backend(_odb, new_backend(3), 1)); - cl_git_pass(git_odb_add_alternate(_odb, new_backend(4), 5)); - cl_git_pass(git_odb_add_alternate(_odb, new_backend(6), 3)); - cl_git_pass(git_odb_add_alternate(_odb, new_backend(5), 4)); - cl_git_pass(git_odb_add_alternate(_odb, new_backend(7), 1)); + cl_git_pass(git_odb_add_backend(_odb, new_backend(1), 5)); + cl_git_pass(git_odb_add_backend(_odb, new_backend(5), 3)); + cl_git_pass(git_odb_add_backend(_odb, new_backend(3), 4)); + cl_git_pass(git_odb_add_backend(_odb, new_backend(7), 1)); + cl_git_pass(git_odb_add_alternate(_odb, new_backend(0), 5)); + cl_git_pass(git_odb_add_alternate(_odb, new_backend(4), 3)); + cl_git_pass(git_odb_add_alternate(_odb, new_backend(2), 4)); + cl_git_pass(git_odb_add_alternate(_odb, new_backend(6), 1)); check_backend_sorting(_odb); }