diff --git a/include/git2/object.h b/include/git2/object.h index 7fca1c988..4cb6af0ad 100644 --- a/include/git2/object.h +++ b/include/git2/object.h @@ -58,6 +58,36 @@ GIT_BEGIN_DECL */ GIT_EXTERN(int) git_object_lookup(git_object **object, git_repository *repo, const git_oid *id, git_otype type); +/** + * Lookup a reference to one of the objects in a repostory, + * given a prefix of its identifier (short id). + * + * The object obtained will be so that its identifier + * matches the first 'len' hexadecimal characters + * (packets of 4 bits) of the given 'id'. + * 'len' must be long enough to identify a unique + * object matching the prefix; otherwise the method will + * fail. + * + * The generated reference is owned by the repository and + * should be closed with the `git_object_close` method + * instead of free'd manually. + * + * The 'type' parameter must match the type of the object + * in the odb; the method will fail otherwise. + * The special value 'GIT_OBJ_ANY' may be passed to let + * the method guess the object's type. + * + * @param object pointer to the looked-up object + * @param repo the repository to look up the object + * @param id a short identifier for the object + * @param len the length of the short identifier + * @param type the type of the object + * @return a reference to the object + */ +GIT_EXTERN(int) git_object_lookup_short_oid(git_object **object_out, git_repository *repo, + const git_oid *id, unsigned int len, git_otype type); + /** * Get the id (SHA1) of a repository object * diff --git a/include/git2/odb.h b/include/git2/odb.h index 1d351beea..483934ad3 100644 --- a/include/git2/odb.h +++ b/include/git2/odb.h @@ -109,7 +109,7 @@ GIT_EXTERN(void) git_odb_close(git_odb *db); /** * Read an object from the database. * - * This method queries all avaiable ODB backends + * This method queries all available ODB backends * trying to read the given OID. * * The returned object is reference counted and @@ -125,6 +125,36 @@ GIT_EXTERN(void) git_odb_close(git_odb *db); */ GIT_EXTERN(int) git_odb_read(git_odb_object **out, git_odb *db, const git_oid *id); +/** + * Read an object from the database, given a prefix + * of its identifier. + * + * This method queries all available ODB backends + * trying to match the 'len' first hexadecimal + * characters of the 'short_id'. + * The remaining bits (GIT_OID_HEXSZ-len)*4 bits of + * 'short_id' must be 0s. + * The prefix must be long enough to identify + * a unique object in all the backends; the + * method will fail otherwise. + * + * The returned object is reference counted and + * internally cached, so it should be closed + * by the user once it's no longer in use. + * + * @param out_oid the oid of the unique object matching + * the short id + * @param out pointer where to store the read object + * @param db database to search for the object in. + * @param short_id a prefix of the id of the object to read. + * @param len the length of the prefix + * @return + * - GIT_SUCCESS if the object was read; + * - GIT_ENOTFOUND if the object is not in the database. + * - GIT_EAMBIGUOUS if the prefix is ambiguous (several objects match the prefix) + */ +GIT_EXTERN(int) git_odb_read_unique_short_oid(git_oid *out_oid, git_odb_object **out, git_odb *db, const git_oid *short_id, unsigned int len); + /** * Read the header of an object from the database, without * reading its full contents. diff --git a/src/object.c b/src/object.c index d2e4da359..e6e976756 100644 --- a/src/object.c +++ b/src/object.c @@ -95,24 +95,63 @@ static int create_object(git_object **object_out, git_otype type) return GIT_SUCCESS; } -int git_object_lookup(git_object **object_out, git_repository *repo, const git_oid *id, git_otype type) +int git_object_lookup_short_oid(git_object **object_out, git_repository *repo, const git_oid *id, unsigned int len, git_otype type) { git_object *object = NULL; git_odb_object *odb_obj; int error = GIT_SUCCESS; + git_oid out_oid; assert(repo && object_out && id); - object = git_cache_get(&repo->objects, id); - if (object != NULL) { - if (type != GIT_OBJ_ANY && type != object->type) - return git__throw(GIT_EINVALIDTYPE, "Failed to lookup object. The given type does not match the type on the ODB"); - - *object_out = object; - return GIT_SUCCESS; + if (len == 0) + return git__throw(GIT_EAMBIGUOUSOIDPREFIX, "Failed to lookup object. Prefix length should be not be 0."); + if (len > GIT_OID_HEXSZ) { + len = GIT_OID_HEXSZ; + } + + if (len == GIT_OID_HEXSZ) { + /* We want to match the full id : we can first look up in the cache, + * since there is no need to check for non ambiguousity + */ + object = git_cache_get(&repo->objects, id); + if (object != NULL) { + if (type != GIT_OBJ_ANY && type != object->type) + return git__throw(GIT_EINVALIDTYPE, "Failed to lookup object. The given type does not match the type on the ODB"); + + *object_out = object; + return GIT_SUCCESS; + } + + /* Object was not found in the cache, let's explore the backends. + * We could just use git_odb_read_unique_short_oid, + * it is the same cost for packed and loose object backends, + * but it may be much more costly for sqlite and hiredis. + */ + error = git_odb_read(&odb_obj, repo->db, id); + git_oid_cpy(&out_oid, id); + } else { + git_oid short_oid; + + /* We copy the first len*4 bits from id and fill the remaining with 0s */ + memcpy(short_oid.id, id->id, (len + 1) / 2); + if (len % 2) + short_oid.id[len / 2] &= 0xF0; + memset(short_oid.id + (len + 1) / 2, 0, (GIT_OID_HEXSZ - len) / 2); + + /* If len < GIT_OID_HEXSZ (a strict short oid was given), we have + * 2 options : + * - We always search in the cache first. If we find that short oid is + * ambiguous, we can stop. But in all the other cases, we must then + * explore all the backends (to find an object if there was match, + * or to check that oid is not ambiguous if we have found 1 match in + * the cache) + * - We never explore the cache, go right to exploring the backends + * We chose the latter : we explore directly the backends. + */ + error = git_odb_read_unique_short_oid(&out_oid, &odb_obj, repo->db, &short_oid, len); } - error = git_odb_read(&odb_obj, repo->db, id); if (error < GIT_SUCCESS) return git__rethrow(error, "Failed to lookup object"); @@ -127,7 +166,7 @@ int git_object_lookup(git_object **object_out, git_repository *repo, const git_o return git__rethrow(error, "Failed to lookup object"); /* Initialize parent object */ - git_oid_cpy(&object->cached.oid, id); + git_oid_cpy(&object->cached.oid, &out_oid); object->repo = repo; switch (type) { @@ -162,6 +201,10 @@ int git_object_lookup(git_object **object_out, git_repository *repo, const git_o return GIT_SUCCESS; } +int git_object_lookup(git_object **object_out, git_repository *repo, const git_oid *id, git_otype type) { + return git_object_lookup_short_oid(object_out, repo, id, GIT_OID_HEXSZ, type); +} + void git_object__free(void *_obj) { git_object *object = (git_object *)_obj; diff --git a/src/odb.c b/src/odb.c index c669e7c14..ed600ffab 100644 --- a/src/odb.c +++ b/src/odb.c @@ -488,6 +488,48 @@ int git_odb_read(git_odb_object **out, git_odb *db, const git_oid *id) return error; } +int git_odb_read_unique_short_oid(git_oid *out_oid, git_odb_object **out, git_odb *db, const git_oid *short_id, unsigned int len) +{ + unsigned int i; + int error = GIT_ENOTFOUND; + git_rawobj raw; + int found = 0; + + assert(out && db && id && len > 0); + + if (len > GIT_OID_HEXSZ) + len = GIT_OID_HEXSZ; + + if (len == GIT_OID_HEXSZ) { + *out = git_cache_get(&db->cache, short_id); + if (*out != NULL) { + git_oid_cpy(out_oid, short_id); + return GIT_SUCCESS; + } + } + + for (i = 0; i < db->backends.length && found < 2; ++i) { + backend_internal *internal = git_vector_get(&db->backends, i); + git_odb_backend *b = internal->backend; + + if (b->read != NULL) { + error = b->read_unique_short_oid(out_oid, &raw.data, &raw.len, &raw.type, b, short_id, len); + if (error == GIT_SUCCESS) + found++; + } + } + + if (found == 1) { + *out = git_cache_try_store(&db->cache, new_odb_object(out_oid, &raw)); + } else if (found > 1) { + return git__rethrow(GIT_EAMBIGUOUSOIDPREFIX, "Ambiguous sha1"); + } else { + return git__rethrow(GIT_ENOTFOUND, "Failed to read object"); + } + + return GIT_SUCCESS; +} + int git_odb_write(git_oid *oid, git_odb *db, const void *data, size_t len, git_otype type) { unsigned int i; diff --git a/src/odb_pack.c b/src/odb_pack.c index 3125a8c94..605608314 100644 --- a/src/odb_pack.c +++ b/src/odb_pack.c @@ -29,7 +29,9 @@ #include "fileops.h" #include "hash.h" #include "odb.h" +#include "oid.h" #include "delta-apply.h" +#include "sha1_lookup.h" #include "git2/odb_backend.h" @@ -262,15 +264,37 @@ static int packfile_refresh_all(struct pack_backend *backend); static off_t nth_packed_object_offset(const struct pack_file *p, uint32_t n); -static int pack_entry_find_offset(off_t *offset_out, - struct pack_file *p, const git_oid *oid); +/* Can find the offset of an object given + * a prefix of an identifier. + * Throws GIT_EAMBIGUOUSOIDPREFIX if short oid + * is ambiguous within the pack. + */ +static int pack_entry_find_offset( + off_t *offset_out, + git_oid *found_oid, + struct pack_file *p, + const git_oid *short_oid, + unsigned int len); -static int pack_entry_find1(struct pack_entry *e, - struct pack_file *p, const git_oid *oid); +static int pack_entry_find1( + struct pack_entry *e, + struct pack_file *p, + const git_oid *short_oid, + unsigned int len); static int pack_entry_find(struct pack_entry *e, struct pack_backend *backend, const git_oid *oid); +/* Can find the offset of an object given + * a prefix of an identifier. + * Throws GIT_EAMBIGUOUSOIDPREFIX if short oid + * is ambiguous. + */ +static int pack_entry_find_unique_short_oid(struct pack_entry *e, + struct pack_backend *backend, + const git_oid *short_oid, + unsigned int len); + static off_t get_delta_base(struct pack_backend *backend, struct pack_file *p, struct pack_window **w_curs, off_t *curpos, git_otype type, @@ -923,12 +947,15 @@ static off_t nth_packed_object_offset(const struct pack_file *p, uint32_t n) static int pack_entry_find_offset( off_t *offset_out, + git_oid *found_oid, struct pack_file *p, - const git_oid *oid) + const git_oid *short_oid, + unsigned int len) { const uint32_t *level1_ofs = p->index_map.data; const unsigned char *index = p->index_map.data; unsigned hi, lo, stride; + int found = 0; *offset_out = 0; @@ -950,8 +977,8 @@ static int pack_entry_find_offset( } index += 4 * 256; - hi = ntohl(level1_ofs[(int)oid->id[0]]); - lo = ((oid->id[0] == 0x0) ? 0 : ntohl(level1_ofs[(int)oid->id[0] - 1])); + hi = ntohl(level1_ofs[(int)short_oid->id[0]]); + lo = ((short_oid->id[0] == 0x0) ? 0 : ntohl(level1_ofs[(int)short_oid->id[0] - 1])); if (p->index_version > 1) { stride = 20; @@ -962,60 +989,79 @@ static int pack_entry_find_offset( #ifdef INDEX_DEBUG_LOOKUP printf("%02x%02x%02x... lo %u hi %u nr %d\n", - oid->id[0], oid->id[1], oid->id[2], lo, hi, p->num_objects); + short_oid->id[0], short_oid->id[1], short_oid->id[2], lo, hi, p->num_objects); #endif -#ifdef GIT2_INDEX_LOOKUP /* TODO: use the advanced lookup method from git.git */ + /* Use git.git lookup code */ + int pos = sha1_entry_pos(index, stride, 0, lo, hi, p->num_objects, short_oid->id); - int pos = sha1_entry_pos(index, stride, 0, lo, hi, p->num_objects, oid); - if (pos < 0) - return git__throw(GIT_ENOTFOUND, "Failed to find offset for pack entry. Entry not found"); + const unsigned char *current; + if (pos >= 0) { + /* An object matching exactly the oid was found */ + found = 1; + current = index + pos * stride; + } else { + /* No object was found */ + pos = - 1 - pos; + /* pos refers to the object with the "closest" oid to short_oid */ + if (pos < p->num_objects) { + current = index + pos * stride; - *offset_out = nth_packed_object_offset(p, pos); - return GIT_SUCCESS; - -#else /* use an old and boring binary search */ - - do { - unsigned mi = (lo + hi) / 2; - int cmp = memcmp(index + mi * stride, oid->id, GIT_OID_RAWSZ); - - if (!cmp) { - *offset_out = nth_packed_object_offset(p, mi); - return GIT_SUCCESS; + if (git_oid_match_raw(len, short_oid->id, current)) { + found = 1; + } } + } + if (found && pos + 1 < p->num_objects) { + /* Check for ambiguousity */ + const unsigned char *next = current + stride; - if (cmp > 0) - hi = mi; - else - lo = mi+1; + if (git_oid_match_raw(len, short_oid->id, next)) { + found = 2; + } + } - } while (lo < hi); + if (!found) { + return git__throw(GIT_ENOTFOUND, "Failed to find offset for pack entry. Entry not found"); + } else if (found > 1) { + return git__throw(GIT_EAMBIGUOUSOIDPREFIX, "Failed to find offset for pack entry. Ambiguous sha1 prefix within pack"); + } else { + *offset_out = nth_packed_object_offset(p, pos); + git_oid_mkraw(found_oid, current); - return git__throw(GIT_ENOTFOUND, "Failed to find offset for pack entry. Entry not found"); +#ifdef INDEX_DEBUG_LOOKUP + unsigned char hex_sha1[GIT_OID_HEXSZ + 1]; + git_oid_fmt(hex_sha1, found_oid); + hex_sha1[GIT_OID_HEXSZ] = '\0'; + printf("found lo=%d %s\n", lo, hex_sha1); #endif + return GIT_SUCCESS; + } } static int pack_entry_find1( struct pack_entry *e, struct pack_file *p, - const git_oid *oid) + const git_oid *short_oid, + unsigned int len) { off_t offset; assert(p); - if (p->num_bad_objects) { + if (len == GIT_OID_HEXSZ && p->num_bad_objects) { unsigned i; for (i = 0; i < p->num_bad_objects; i++) - if (git_oid_cmp(oid, &p->bad_object_sha1[i]) == 0) + if (git_oid_cmp(short_oid, &p->bad_object_sha1[i]) == 0) return git__throw(GIT_ERROR, "Failed to find pack entry. Bad object found"); } - if (pack_entry_find_offset(&offset, p, oid) < GIT_SUCCESS) - return git__throw(GIT_ENOTFOUND, "Failed to find pack entry. Couldn't find offset"); - - /* we found an entry in the index; + git_oid found_oid; + int error = pack_entry_find_offset(&offset, &found_oid, p, short_oid, len); + if (error < GIT_SUCCESS) + return git__rethrow(error, "Failed to find pack entry. Couldn't find offset"); + + /* we found a unique entry in the index; * make sure the packfile backing the index * still exists on disk */ if (p->pack_fd == -1 && packfile_open(p) < GIT_SUCCESS) @@ -1024,7 +1070,7 @@ static int pack_entry_find1( e->offset = offset; e->p = p; - git_oid_cpy(&e->sha1, oid); + git_oid_cpy(&e->sha1, &found_oid); return GIT_SUCCESS; } @@ -1037,7 +1083,7 @@ static int pack_entry_find(struct pack_entry *e, struct pack_backend *backend, c return git__rethrow(error, "Failed to find pack entry"); if (backend->last_found && - pack_entry_find1(e, backend->last_found, oid) == GIT_SUCCESS) + pack_entry_find1(e, backend->last_found, oid, GIT_OID_HEXSZ) == GIT_SUCCESS) return GIT_SUCCESS; for (i = 0; i < backend->packs.length; ++i) { @@ -1047,7 +1093,7 @@ static int pack_entry_find(struct pack_entry *e, struct pack_backend *backend, c if (p == backend->last_found) continue; - if (pack_entry_find1(e, p, oid) == GIT_SUCCESS) { + if (pack_entry_find1(e, p, oid, GIT_OID_HEXSZ) == GIT_SUCCESS) { backend->last_found = p; return GIT_SUCCESS; } @@ -1056,6 +1102,53 @@ static int pack_entry_find(struct pack_entry *e, struct pack_backend *backend, c return git__throw(GIT_ENOTFOUND, "Failed to find pack entry"); } +static int pack_entry_find_unique_short_oid(struct pack_entry *e, struct pack_backend *backend, + const git_oid *short_oid, unsigned int len) +{ + int error; + size_t i; + + if ((error = packfile_refresh_all(backend)) < GIT_SUCCESS) + return git__rethrow(error, "Failed to find pack entry"); + + unsigned found = 0; + if (backend->last_found) { + error = pack_entry_find1(e, backend->last_found, short_oid, len); + if (error == GIT_EAMBIGUOUSOIDPREFIX) { + return git__rethrow(error, "Failed to find pack entry. Ambiguous sha1 prefix"); + } else if (error == GIT_SUCCESS) { + found = 1; + } + } + + for (i = 0; i < backend->packs.length; ++i) { + struct pack_file *p; + + p = git_vector_get(&backend->packs, i); + if (p == backend->last_found) + continue; + + error = pack_entry_find1(e, p, short_oid, len); + if (error == GIT_EAMBIGUOUSOIDPREFIX) { + return git__rethrow(error, "Failed to find pack entry. Ambiguous sha1 prefix"); + } else if (error == GIT_SUCCESS) { + found++; + if (found > 1); + break; + backend->last_found = p; + } + } + + if (!found) { + return git__rethrow(GIT_ENOTFOUND, "Failed to find pack entry"); + } else if (found > 1) { + return git__rethrow(GIT_EAMBIGUOUSOIDPREFIX, "Failed to find pack entry. Ambiguous sha1 prefix"); + } else { + return GIT_SUCCESS; + } + +} + @@ -1190,6 +1283,7 @@ static off_t get_delta_base( { unsigned char *base_info = pack_window_open(backend, p, w_curs, *curpos, NULL); off_t base_offset; + git_oid unused; /* pack_window_open() assured us we have [base_info, base_info + 20) * as a range that we can look at without walking off the @@ -1214,7 +1308,7 @@ static off_t get_delta_base( *curpos += used; } else if (type == GIT_OBJ_REF_DELTA) { /* The base entry _must_ be in the same pack */ - if (pack_entry_find_offset(&base_offset, p, (git_oid *)base_info) < GIT_SUCCESS) + if (pack_entry_find_offset(&base_offset, &unused, p, (git_oid *)base_info, GIT_OID_HEXSZ) < GIT_SUCCESS) return git__throw(GIT_EPACKCORRUPTED, "Base entry delta is not in the same pack"); *curpos += 20; } else @@ -1367,15 +1461,23 @@ int pack_backend__read(void **buffer_p, size_t *len_p, git_otype *type_p, git_od int pack_backend__read_unique_short_oid(git_oid *out_oid, void **buffer_p, size_t *len_p, git_otype *type_p, git_odb_backend *backend, const git_oid *short_oid, unsigned int len) { - if (len >= GIT_OID_HEXSZ) { - int error = pack_backend__read(buffer_p, len_p, type_p, backend, short_oid); - if (error == GIT_SUCCESS) - git_oid_cpy(out_oid, short_oid); + struct pack_entry e; + git_rawobj raw; + int error; - return error; - } else if (len < GIT_OID_HEXSZ) { - return git__throw(GIT_ENOTIMPLEMENTED, "Pack backend cannot search objects from short oid"); - } + if ((error = pack_entry_find_unique_short_oid(&e, (struct pack_backend *)backend, short_oid, len)) < GIT_SUCCESS) + return git__rethrow(error, "Failed to read pack backend"); + + if ((error = packfile_unpack(&raw, (struct pack_backend *)backend, e.p, e.offset)) < GIT_SUCCESS) + return git__rethrow(error, "Failed to read pack backend"); + + *buffer_p = raw.data; + *len_p = raw.len; + *type_p = raw.type; + git_oid_cpy(out_oid, &e.sha1); + + + return GIT_SUCCESS; } int pack_backend__exists(git_odb_backend *backend, const git_oid *oid) diff --git a/src/sha1_lookup.c b/src/sha1_lookup.c new file mode 100644 index 000000000..f4a3c42cc --- /dev/null +++ b/src/sha1_lookup.c @@ -0,0 +1,196 @@ +/* + * This file is basically taken from git code. + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, + * as published by the Free Software Foundation. + * + * In addition to the permissions in the GNU General Public License, + * the authors give you unlimited permission to link the compiled + * version of this file into combinations with other programs, + * and to distribute those combinations without any restriction + * coming from the use of this file. (The General Public License + * restrictions do apply in other respects; for example, they cover + * modification of the file, and distribution when not linked into + * a combined executable.) + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#include + +#include "sha1_lookup.h" +#include "common.h" + +/* + * Conventional binary search loop looks like this: + * + * unsigned lo, hi; + * do { + * unsigned mi = (lo + hi) / 2; + * int cmp = "entry pointed at by mi" minus "target"; + * if (!cmp) + * return (mi is the wanted one) + * if (cmp > 0) + * hi = mi; "mi is larger than target" + * else + * lo = mi+1; "mi is smaller than target" + * } while (lo < hi); + * + * The invariants are: + * + * - When entering the loop, lo points at a slot that is never + * above the target (it could be at the target), hi points at a + * slot that is guaranteed to be above the target (it can never + * be at the target). + * + * - We find a point 'mi' between lo and hi (mi could be the same + * as lo, but never can be as same as hi), and check if it hits + * the target. There are three cases: + * + * - if it is a hit, we are happy. + * + * - if it is strictly higher than the target, we set it to hi, + * and repeat the search. + * + * - if it is strictly lower than the target, we update lo to + * one slot after it, because we allow lo to be at the target. + * + * If the loop exits, there is no matching entry. + * + * When choosing 'mi', we do not have to take the "middle" but + * anywhere in between lo and hi, as long as lo <= mi < hi is + * satisfied. When we somehow know that the distance between the + * target and lo is much shorter than the target and hi, we could + * pick mi that is much closer to lo than the midway. + * + * Now, we can take advantage of the fact that SHA-1 is a good hash + * function, and as long as there are enough entries in the table, we + * can expect uniform distribution. An entry that begins with for + * example "deadbeef..." is much likely to appear much later than in + * the midway of the table. It can reasonably be expected to be near + * 87% (222/256) from the top of the table. + * + * However, we do not want to pick "mi" too precisely. If the entry at + * the 87% in the above example turns out to be higher than the target + * we are looking for, we would end up narrowing the search space down + * only by 13%, instead of 50% we would get if we did a simple binary + * search. So we would want to hedge our bets by being less aggressive. + * + * The table at "table" holds at least "nr" entries of "elem_size" + * bytes each. Each entry has the SHA-1 key at "key_offset". The + * table is sorted by the SHA-1 key of the entries. The caller wants + * to find the entry with "key", and knows that the entry at "lo" is + * not higher than the entry it is looking for, and that the entry at + * "hi" is higher than the entry it is looking for. + */ +int sha1_entry_pos(const void *table, + size_t elem_size, + size_t key_offset, + unsigned lo, unsigned hi, unsigned nr, + const unsigned char *key) +{ + const unsigned char *base = table; + const unsigned char *hi_key, *lo_key; + unsigned ofs_0; + + if (!nr || lo >= hi) + return -1; + + if (nr == hi) + hi_key = NULL; + else + hi_key = base + elem_size * hi + key_offset; + lo_key = base + elem_size * lo + key_offset; + + ofs_0 = 0; + do { + int cmp; + unsigned ofs, mi, range; + unsigned lov, hiv, kyv; + const unsigned char *mi_key; + + range = hi - lo; + if (hi_key) { + for (ofs = ofs_0; ofs < 20; ofs++) + if (lo_key[ofs] != hi_key[ofs]) + break; + ofs_0 = ofs; + /* + * byte 0 thru (ofs-1) are the same between + * lo and hi; ofs is the first byte that is + * different. + */ + hiv = hi_key[ofs_0]; + if (ofs_0 < 19) + hiv = (hiv << 8) | hi_key[ofs_0+1]; + } else { + hiv = 256; + if (ofs_0 < 19) + hiv <<= 8; + } + lov = lo_key[ofs_0]; + kyv = key[ofs_0]; + if (ofs_0 < 19) { + lov = (lov << 8) | lo_key[ofs_0+1]; + kyv = (kyv << 8) | key[ofs_0+1]; + } + assert(lov < hiv); + + if (kyv < lov) + return -1 - lo; + if (hiv < kyv) + return -1 - hi; + + /* + * Even if we know the target is much closer to 'hi' + * than 'lo', if we pick too precisely and overshoot + * (e.g. when we know 'mi' is closer to 'hi' than to + * 'lo', pick 'mi' that is higher than the target), we + * end up narrowing the search space by a smaller + * amount (i.e. the distance between 'mi' and 'hi') + * than what we would have (i.e. about half of 'lo' + * and 'hi'). Hedge our bets to pick 'mi' less + * aggressively, i.e. make 'mi' a bit closer to the + * middle than we would otherwise pick. + */ + kyv = (kyv * 6 + lov + hiv) / 8; + if (lov < hiv - 1) { + if (kyv == lov) + kyv++; + else if (kyv == hiv) + kyv--; + } + mi = (range - 1) * (kyv - lov) / (hiv - lov) + lo; + +#ifdef INDEX_DEBUG_LOOKUP + printf("lo %u hi %u rg %u mi %u ", lo, hi, range, mi); + printf("ofs %u lov %x, hiv %x, kyv %x\n", + ofs_0, lov, hiv, kyv); +#endif + + if (!(lo <= mi && mi < hi)) { + return git__throw(GIT_ERROR, "Assertion failure. Binary search invariant is false"); + } + + mi_key = base + elem_size * mi + key_offset; + cmp = memcmp(mi_key + ofs_0, key + ofs_0, 20 - ofs_0); + if (!cmp) + return mi; + if (cmp > 0) { + hi = mi; + hi_key = mi_key; + } else { + lo = mi + 1; + lo_key = mi_key + elem_size; + } + } while (lo < hi); + return -lo-1; +} diff --git a/src/sha1_lookup.h b/src/sha1_lookup.h new file mode 100644 index 000000000..5caa2f5ed --- /dev/null +++ b/src/sha1_lookup.h @@ -0,0 +1,12 @@ +#ifndef INCLUDE_sha1_lookup_h__ +#define INCLUDE_sha1_lookup_h__ + +#include + +int sha1_entry_pos(const void *table, + size_t elem_size, + size_t key_offset, + unsigned lo, unsigned hi, unsigned nr, + const unsigned char *key); + +#endif