From cf0582b43ce591e7923637d2c8925028aaa5977b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Wed, 2 Oct 2013 12:22:54 +0200 Subject: [PATCH 1/8] indexer: do multiple passes over the delta list Though unusual, a packfile may contain a delta whose base is a delta that comes later. In order index such a packfile, we must not give up on the first failure to resolve a delta, but keep it around. If there is a pass which makes no progress, this indicates that the packfile is broken, so fail accordingly. --- src/indexer.c | 40 ++++++++++++++++++++++++++++----------- tests-clar/pack/indexer.c | 40 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 11 deletions(-) create mode 100644 tests-clar/pack/indexer.c diff --git a/src/indexer.c b/src/indexer.c index 3b160df5d..10b6929ee 100644 --- a/src/indexer.c +++ b/src/indexer.c @@ -594,20 +594,38 @@ static int resolve_deltas(git_indexer_stream *idx, git_transfer_progress *stats) { unsigned int i; struct delta_info *delta; + int progressed = 0; - git_vector_foreach(&idx->deltas, i, delta) { - git_rawobj obj; + while (idx->deltas.length > 0) { + progressed = 0; + git_vector_foreach(&idx->deltas, i, delta) { + git_rawobj obj; - idx->off = delta->delta_off; - if (git_packfile_unpack(&obj, idx->pack, &idx->off) < 0) + idx->off = delta->delta_off; + if (git_packfile_unpack(&obj, idx->pack, &idx->off) < 0) + continue; + + if (hash_and_save(idx, &obj, delta->delta_off) < 0) + continue; + + git__free(obj.data); + stats->indexed_objects++; + progressed = 1; + do_progress_callback(idx, stats); + + /* + * Remove this delta from the list and + * decrease i so we don't skip over the next + * delta. + */ + git_vector_remove(&idx->deltas, i); + i--; + } + + if (!progressed) { + giterr_set(GITERR_INDEXER, "the packfile is missing bases"); return -1; - - if (hash_and_save(idx, &obj, delta->delta_off) < 0) - return -1; - - git__free(obj.data); - stats->indexed_objects++; - do_progress_callback(idx, stats); + } } return 0; diff --git a/tests-clar/pack/indexer.c b/tests-clar/pack/indexer.c new file mode 100644 index 000000000..5394d32fd --- /dev/null +++ b/tests-clar/pack/indexer.c @@ -0,0 +1,40 @@ +#include "clar_libgit2.h" +#include "fileops.h" +#include "hash.h" +#include "iterator.h" +#include "vector.h" +#include "posix.h" + +/* + * This is a packfile with three objects. The second is a delta which + * depends on the third, which is also a delta. + */ +unsigned char out_of_order_pack[] = { + 0x50, 0x41, 0x43, 0x4b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, + 0x32, 0x78, 0x9c, 0x63, 0x67, 0x00, 0x00, 0x00, 0x10, 0x00, 0x08, 0x76, + 0xe6, 0x8f, 0xe8, 0x12, 0x9b, 0x54, 0x6b, 0x10, 0x1a, 0xee, 0x95, 0x10, + 0xc5, 0x32, 0x8e, 0x7f, 0x21, 0xca, 0x1d, 0x18, 0x78, 0x9c, 0x63, 0x62, + 0x66, 0x4e, 0xcb, 0xcf, 0x07, 0x00, 0x02, 0xac, 0x01, 0x4d, 0x75, 0x01, + 0xd7, 0x71, 0x36, 0x66, 0xf4, 0xde, 0x82, 0x27, 0x76, 0xc7, 0x62, 0x2c, + 0x10, 0xf1, 0xb0, 0x7d, 0xe2, 0x80, 0xdc, 0x78, 0x9c, 0x63, 0x62, 0x62, + 0x62, 0xb7, 0x03, 0x00, 0x00, 0x69, 0x00, 0x4c, 0xde, 0x7d, 0xaa, 0xe4, + 0x19, 0x87, 0x58, 0x80, 0x61, 0x09, 0x9a, 0x33, 0xca, 0x7a, 0x31, 0x92, + 0x6f, 0xae, 0x66, 0x75 +}; +unsigned int out_of_order_pack_len = 112; + +void test_pack_indexer__out_of_order(void) +{ + git_indexer_stream *idx; + git_transfer_progress stats; + + cl_git_pass(git_indexer_stream_new(&idx, ".", NULL, NULL, NULL)); + cl_git_pass(git_indexer_stream_add(idx, out_of_order_pack, out_of_order_pack_len, &stats)); + cl_git_pass(git_indexer_stream_finalize(idx, &stats)); + + cl_assert_equal_i(stats.total_objects, 3); + cl_assert_equal_i(stats.received_objects, 3); + cl_assert_equal_i(stats.indexed_objects, 3); + + git_indexer_stream_free(idx); +} From 51e82492ef5206767e176952733914275d0e3bdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Thu, 3 Oct 2013 16:54:25 +0200 Subject: [PATCH 2/8] pack: move the object header function here --- src/pack-objects.c | 36 +----------------------------------- src/pack.c | 32 ++++++++++++++++++++++++++++++++ src/pack.h | 2 ++ 3 files changed, 35 insertions(+), 35 deletions(-) diff --git a/src/pack-objects.c b/src/pack-objects.c index 2a2f36223..4d79ad95b 100644 --- a/src/pack-objects.c +++ b/src/pack-objects.c @@ -228,40 +228,6 @@ int git_packbuilder_insert(git_packbuilder *pb, const git_oid *oid, return 0; } -/* - * The per-object header is a pretty dense thing, which is - * - first byte: low four bits are "size", - * then three bits of "type", - * with the high bit being "size continues". - * - each byte afterwards: low seven bits are size continuation, - * with the high bit being "size continues" - */ -static int gen_pack_object_header( - unsigned char *hdr, - unsigned long size, - git_otype type) -{ - unsigned char *hdr_base; - unsigned char c; - - assert(type >= GIT_OBJ_COMMIT && type <= GIT_OBJ_REF_DELTA); - - /* TODO: add support for chunked objects; see git.git 6c0d19b1 */ - - c = (unsigned char)((type << 4) | (size & 15)); - size >>= 4; - hdr_base = hdr; - - while (size) { - *hdr++ = c | 0x80; - c = size & 0x7f; - size >>= 7; - } - *hdr++ = c; - - return (int)(hdr - hdr_base); -} - static int get_delta(void **out, git_odb *odb, git_pobject *po) { git_odb_object *src = NULL, *trg = NULL; @@ -323,7 +289,7 @@ static int write_object(git_buf *buf, git_packbuilder *pb, git_pobject *po) } /* Write header */ - hdr_len = gen_pack_object_header(hdr, size, type); + hdr_len = git_packfile__object_header(hdr, size, type); if (git_buf_put(buf, (char *)hdr, hdr_len) < 0) goto on_error; diff --git a/src/pack.c b/src/pack.c index e7fb9f1ae..5df0f50b9 100644 --- a/src/pack.c +++ b/src/pack.c @@ -364,6 +364,38 @@ static unsigned char *pack_window_open( return git_mwindow_open(&p->mwf, w_cursor, offset, 20, left); } +/* + * The per-object header is a pretty dense thing, which is + * - first byte: low four bits are "size", + * then three bits of "type", + * with the high bit being "size continues". + * - each byte afterwards: low seven bits are size continuation, + * with the high bit being "size continues" + */ +int git_packfile__object_header(unsigned char *hdr, unsigned long size, git_otype type) +{ + unsigned char *hdr_base; + unsigned char c; + + assert(type >= GIT_OBJ_COMMIT && type <= GIT_OBJ_REF_DELTA); + + /* TODO: add support for chunked objects; see git.git 6c0d19b1 */ + + c = (unsigned char)((type << 4) | (size & 15)); + size >>= 4; + hdr_base = hdr; + + while (size) { + *hdr++ = c | 0x80; + c = size & 0x7f; + size >>= 7; + } + *hdr++ = c; + + return (int)(hdr - hdr_base); +} + + static int packfile_unpack_header1( unsigned long *usedp, size_t *sizep, diff --git a/src/pack.h b/src/pack.h index aeeac9ce1..ddeefea1d 100644 --- a/src/pack.h +++ b/src/pack.h @@ -112,6 +112,8 @@ typedef struct git_packfile_stream { git_mwindow *mw; } git_packfile_stream; +int git_packfile__object_header(unsigned char *hdr, unsigned long size, git_otype type); + int git_packfile_unpack_header( size_t *size_p, git_otype *type_p, From 0b33fca03e030c7e807f0c75d7332e7fe2d3c0bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Wed, 2 Oct 2013 13:39:35 +0200 Subject: [PATCH 3/8] indexer: fix thin packs When given an ODB from which to read objects, the indexer will attempt to inject the missing bases at the end of the pack and update the header and trailer to reflect the new contents. --- examples/network/index-pack.c | 2 +- include/git2/indexer.h | 4 + include/git2/sys/odb_backend.h | 2 +- include/git2/types.h | 1 + src/indexer.c | 276 ++++++++++++++++++++++++++++----- src/odb.c | 2 +- src/odb_pack.c | 3 +- src/pack-objects.c | 2 +- tests-clar/pack/indexer.c | 88 +++++++++++ tests-clar/pack/packbuilder.c | 4 +- 10 files changed, 340 insertions(+), 44 deletions(-) diff --git a/examples/network/index-pack.c b/examples/network/index-pack.c index 889305da8..08b45c58c 100644 --- a/examples/network/index-pack.c +++ b/examples/network/index-pack.c @@ -46,7 +46,7 @@ int index_pack(git_repository *repo, int argc, char **argv) return EXIT_FAILURE; } - if (git_indexer_stream_new(&idx, ".", NULL, NULL) < 0) { + if (git_indexer_stream_new(&idx, ".", NULL, NULL, NULL) < 0) { puts("bad idx"); return -1; } diff --git a/include/git2/indexer.h b/include/git2/indexer.h index 4db072c9b..0858b6ea1 100644 --- a/include/git2/indexer.h +++ b/include/git2/indexer.h @@ -20,12 +20,16 @@ typedef struct git_indexer_stream git_indexer_stream; * * @param out where to store the indexer instance * @param path to the directory where the packfile should be stored + * @param odb object database from which to read base objects when + * fixing thin packs. Pass NULL if no thin pack is expected (an error + * will be returned if there are bases missing) * @param progress_cb function to call with progress information * @param progress_cb_payload payload for the progress callback */ GIT_EXTERN(int) git_indexer_stream_new( git_indexer_stream **out, const char *path, + git_odb *odb, git_transfer_progress_callback progress_cb, void *progress_cb_payload); diff --git a/include/git2/sys/odb_backend.h b/include/git2/sys/odb_backend.h index 4365906d4..8039a5b82 100644 --- a/include/git2/sys/odb_backend.h +++ b/include/git2/sys/odb_backend.h @@ -80,7 +80,7 @@ struct git_odb_backend { git_odb_backend *, git_odb_foreach_cb cb, void *payload); int (* writepack)( - git_odb_writepack **, git_odb_backend *, + git_odb_writepack **, git_odb_backend *, git_odb *odb, git_transfer_progress_callback progress_cb, void *progress_payload); void (* free)(git_odb_backend *); diff --git a/include/git2/types.h b/include/git2/types.h index b500c986d..3939353ee 100644 --- a/include/git2/types.h +++ b/include/git2/types.h @@ -217,6 +217,7 @@ typedef struct git_transfer_progress { unsigned int total_objects; unsigned int indexed_objects; unsigned int received_objects; + unsigned int local_objects; size_t received_bytes; } git_transfer_progress; diff --git a/src/indexer.c b/src/indexer.c index 10b6929ee..21b993a28 100644 --- a/src/indexer.c +++ b/src/indexer.c @@ -18,6 +18,7 @@ #include "filebuf.h" #include "oid.h" #include "oidmap.h" +#include "compress.h" #define UINT31_MAX (0x7FFFFFFF) @@ -33,6 +34,7 @@ struct git_indexer_stream { opened_pack :1, have_stream :1, have_delta :1; + struct git_pack_header hdr; struct git_pack_file *pack; git_filebuf pack_file; git_off_t off; @@ -48,6 +50,9 @@ struct git_indexer_stream { void *progress_payload; char objbuf[8*1024]; + /* Needed to look up objects which we want to inject to fix a thin pack */ + git_odb *odb; + /* Fields for calculating the packfile trailer (hash of everything before it) */ char inbuf[GIT_OID_RAWSZ]; int inbuf_len; @@ -114,6 +119,7 @@ static int objects_cmp(const void *a, const void *b) int git_indexer_stream_new( git_indexer_stream **out, const char *prefix, + git_odb *odb, git_transfer_progress_callback progress_cb, void *progress_payload) { @@ -124,6 +130,7 @@ int git_indexer_stream_new( idx = git__calloc(1, sizeof(git_indexer_stream)); GITERR_CHECK_ALLOC(idx); + idx->odb = odb; idx->progress_cb = progress_cb; idx->progress_payload = progress_payload; git_hash_ctx_init(&idx->trailer); @@ -309,17 +316,10 @@ on_error: return -1; } -static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t entry_start) +static int save_entry(git_indexer_stream *idx, struct entry *entry, struct git_pack_entry *pentry, git_off_t entry_start) { int i, error; khiter_t k; - git_oid oid; - size_t entry_size; - struct entry *entry; - struct git_pack_entry *pentry; - - entry = git__calloc(1, sizeof(*entry)); - GITERR_CHECK_ALLOC(entry); if (entry_start > UINT31_MAX) { entry->offset = UINT32_MAX; @@ -328,6 +328,34 @@ static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t ent entry->offset = (uint32_t)entry_start; } + pentry->offset = entry_start; + k = kh_put(oid, idx->pack->idx_cache, &pentry->sha1, &error); + if (!error) + return -1; + + kh_value(idx->pack->idx_cache, k) = pentry; + + /* Add the object to the list */ + if (git_vector_insert(&idx->objects, entry) < 0) + return -1; + + for (i = entry->oid.id[0]; i < 256; ++i) { + idx->fanout[i]++; + } + + return 0; +} + +static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t entry_start) +{ + git_oid oid; + size_t entry_size; + struct entry *entry; + struct git_pack_entry *pentry; + + entry = git__calloc(1, sizeof(*entry)); + GITERR_CHECK_ALLOC(entry); + if (git_odb__hashobj(&oid, obj) < 0) { giterr_set(GITERR_INDEXER, "Failed to hash object"); goto on_error; @@ -337,15 +365,6 @@ static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t ent GITERR_CHECK_ALLOC(pentry); git_oid_cpy(&pentry->sha1, &oid); - pentry->offset = entry_start; - k = kh_put(oid, idx->pack->idx_cache, &pentry->sha1, &error); - if (!error) { - git__free(pentry); - goto on_error; - } - - kh_value(idx->pack->idx_cache, k) = pentry; - git_oid_cpy(&entry->oid, &oid); entry->crc = crc32(0L, Z_NULL, 0); @@ -353,15 +372,7 @@ static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t ent if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0) goto on_error; - /* Add the object to the list */ - if (git_vector_insert(&idx->objects, entry) < 0) - goto on_error; - - for (i = oid.id[0]; i < 256; ++i) { - idx->fanout[i]++; - } - - return 0; + return save_entry(idx, entry, pentry, entry_start); on_error: git__free(entry); @@ -415,8 +426,8 @@ static void hash_partially(git_indexer_stream *idx, const uint8_t *data, size_t int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t size, git_transfer_progress *stats) { int error = -1; - struct git_pack_header hdr; size_t processed; + struct git_pack_header *hdr = &idx->hdr; git_mwindow_file *mwf = &idx->pack->mwf; assert(idx && data && stats); @@ -443,14 +454,14 @@ int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t siz if (!idx->parsed_header) { unsigned int total_objects; - if ((unsigned)idx->pack->mwf.size < sizeof(hdr)) + if ((unsigned)idx->pack->mwf.size < sizeof(struct git_pack_header)) return 0; - if (parse_header(&hdr, idx->pack) < 0) + if (parse_header(&idx->hdr, idx->pack) < 0) return -1; idx->parsed_header = 1; - idx->nr_objects = ntohl(hdr.hdr_entries); + idx->nr_objects = ntohl(hdr->hdr_entries); idx->off = sizeof(struct git_pack_header); /* for now, limit to 2^32 objects */ @@ -471,6 +482,7 @@ int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t siz return -1; stats->received_objects = 0; + stats->local_objects = 0; processed = stats->indexed_objects = 0; stats->total_objects = total_objects; do_progress_callback(idx, stats); @@ -590,6 +602,135 @@ static int index_path_stream(git_buf *path, git_indexer_stream *idx, const char return git_buf_oom(path) ? -1 : 0; } +/** + * Rewind the packfile by the trailer, as we might need to fix the + * packfile by injecting objects at the tail and must overwrite it. + */ +static git_off_t seek_back_trailer(git_indexer_stream *idx) +{ + git_off_t off; + + if ((off = p_lseek(idx->pack_file.fd, -GIT_OID_RAWSZ, SEEK_CUR)) < 0) + return -1; + + idx->pack->mwf.size -= GIT_OID_RAWSZ; + git_mwindow_free_all(&idx->pack->mwf); + + return off; +} + +static int inject_object(git_indexer_stream *idx, git_oid *id) +{ + git_odb_object *obj; + struct entry *entry; + struct git_pack_entry *pentry; + git_oid foo = {{0}}; + unsigned char hdr[64]; + git_buf buf = GIT_BUF_INIT; + git_off_t entry_start; + const void *data; + size_t len, hdr_len; + int error; + + entry = git__calloc(1, sizeof(*entry)); + GITERR_CHECK_ALLOC(entry); + + entry_start = seek_back_trailer(idx); + + if (git_odb_read(&obj, idx->odb, id) < 0) + return -1; + + data = git_odb_object_data(obj); + len = git_odb_object_size(obj); + + entry->crc = crc32(0L, Z_NULL, 0); + + /* Write out the object header */ + hdr_len = git_packfile__object_header(hdr, len, git_odb_object_type(obj)); + git_filebuf_write(&idx->pack_file, hdr, hdr_len); + idx->pack->mwf.size += hdr_len; + entry->crc = crc32(entry->crc, hdr, hdr_len); + + if ((error = git__compress(&buf, data, len)) < 0) + goto cleanup; + + /* And then the compressed object */ + git_filebuf_write(&idx->pack_file, buf.ptr, buf.size); + idx->pack->mwf.size += buf.size; + entry->crc = htonl(crc32(entry->crc, (unsigned char *)buf.ptr, buf.size)); + git_buf_free(&buf); + + /* Write a fake trailer so the pack functions play ball */ + if ((error = git_filebuf_write(&idx->pack_file, &foo, GIT_OID_RAWSZ)) < 0) + goto cleanup; + + idx->pack->mwf.size += GIT_OID_RAWSZ; + + pentry = git__calloc(1, sizeof(struct git_pack_entry)); + GITERR_CHECK_ALLOC(pentry); + + git_oid_cpy(&pentry->sha1, id); + git_oid_cpy(&entry->oid, id); + idx->off = entry_start + hdr_len + len; + + if ((error = save_entry(idx, entry, pentry, entry_start)) < 0) + git__free(pentry); + +cleanup: + git_odb_object_free(obj); + return error; +} + +static int fix_thin_pack(git_indexer_stream *idx, git_transfer_progress *stats) +{ + int error; + unsigned int i; + struct delta_info *delta; + + if (idx->odb == NULL) { + giterr_set(GITERR_INDEXER, "cannot fix a thin pack without an ODB"); + return -1; + } + + git_vector_foreach(&idx->deltas, i, delta) { + size_t size; + git_otype type; + git_mwindow *w = NULL; + git_off_t curpos = delta->delta_off; + unsigned char *base_info; + unsigned int left = 0; + git_oid base; + + error = git_packfile_unpack_header(&size, &type, &idx->pack->mwf, &w, &curpos); + git_mwindow_close(&w); + if (error < 0) + return error; + + if (type != GIT_OBJ_REF_DELTA) { + giterr_set(GITERR_INDEXER, "delta with missing base is not REF_DELTA"); + return -1; + } + + /* curpos now points to the base information, which is an OID */ + base_info = git_mwindow_open(&idx->pack->mwf, &w, curpos, GIT_OID_RAWSZ, &left); + if (base_info == NULL) { + giterr_set(GITERR_INDEXER, "failed to map delta information"); + return -1; + } + + git_oid_fromraw(&base, base_info); + git_mwindow_close(&w); + + if (inject_object(idx, &base) < 0) + return -1; + + stats->total_objects++; + stats->local_objects++; + } + + return 0; +} + static int resolve_deltas(git_indexer_stream *idx, git_transfer_progress *stats) { unsigned int i; @@ -619,13 +760,61 @@ static int resolve_deltas(git_indexer_stream *idx, git_transfer_progress *stats) * delta. */ git_vector_remove(&idx->deltas, i); + git__free(delta); i--; } - if (!progressed) { - giterr_set(GITERR_INDEXER, "the packfile is missing bases"); + if (!progressed && (fix_thin_pack(idx, stats) < 0)) + return -1; + } + + return 0; +} + +static int update_header_and_rehash(git_indexer_stream *idx, git_transfer_progress *stats) +{ + void *ptr; + size_t chunk = 1024*1024; + git_off_t hashed = 0; + git_mwindow *w = NULL; + git_mwindow_file *mwf; + unsigned int left; + git_hash_ctx *ctx; + + mwf = &idx->pack->mwf; + ctx = &idx->trailer; + + git_hash_ctx_init(ctx); + git_mwindow_free_all(mwf); + + /* Update the header to include the numer of local objects we injected */ + idx->hdr.hdr_entries = htonl(stats->total_objects); + if (p_lseek(idx->pack_file.fd, 0, SEEK_SET) < 0) { + giterr_set(GITERR_OS, "failed to seek to the beginning of the pack"); + return -1; + } + + if (p_write(idx->pack_file.fd, &idx->hdr, sizeof(struct git_pack_header)) < 0) { + giterr_set(GITERR_OS, "failed to update the pack header"); + return -1; + } + + /* + * We now use the same technique as before to determine the + * hash. We keep reading up to the end and let + * hash_partially() keep the existing trailer out of the + * calculation. + */ + idx->inbuf_len = 0; + while (hashed < mwf->size) { + ptr = git_mwindow_open(mwf, &w, hashed, chunk, &left); + if (ptr == NULL) return -1; - } + + hash_partially(idx, ptr, left); + hashed += left; + + git_mwindow_close(&w); } return 0; @@ -668,15 +857,28 @@ int git_indexer_stream_finalize(git_indexer_stream *idx, git_transfer_progress * return -1; } - if (idx->deltas.length > 0) - if (resolve_deltas(idx, stats) < 0) - return -1; + if (resolve_deltas(idx, stats) < 0) + return -1; - if (stats->indexed_objects != stats->total_objects) { + if (stats->indexed_objects + stats->local_objects != stats->total_objects) { giterr_set(GITERR_INDEXER, "early EOF"); return -1; } + if (stats->local_objects > 0) { + if (update_header_and_rehash(idx, stats) < 0) + return -1; + + git_hash_final(&trailer_hash, &idx->trailer); + if (p_lseek(idx->pack_file.fd, -GIT_OID_RAWSZ, SEEK_END) < 0) + return -1; + + if (p_write(idx->pack_file.fd, &trailer_hash, GIT_OID_RAWSZ) < 0) { + giterr_set(GITERR_OS, "failed to update pack trailer"); + return -1; + } + } + git_vector_sort(&idx->objects); git_buf_sets(&filename, idx->pack->pack_name); diff --git a/src/odb.c b/src/odb.c index b2c138aae..0137641de 100644 --- a/src/odb.c +++ b/src/odb.c @@ -988,7 +988,7 @@ int git_odb_write_pack(struct git_odb_writepack **out, git_odb *db, git_transfer if (b->writepack != NULL) { ++writes; - error = b->writepack(out, b, progress_cb, progress_payload); + error = b->writepack(out, b, db, progress_cb, progress_payload); } } diff --git a/src/odb_pack.c b/src/odb_pack.c index cadc93a65..edf94c967 100644 --- a/src/odb_pack.c +++ b/src/odb_pack.c @@ -541,6 +541,7 @@ static void pack_backend__writepack_free(struct git_odb_writepack *_writepack) static int pack_backend__writepack(struct git_odb_writepack **out, git_odb_backend *_backend, + git_odb *odb, git_transfer_progress_callback progress_cb, void *progress_payload) { @@ -557,7 +558,7 @@ static int pack_backend__writepack(struct git_odb_writepack **out, GITERR_CHECK_ALLOC(writepack); if (git_indexer_stream_new(&writepack->indexer_stream, - backend->pack_folder, progress_cb, progress_payload) < 0) { + backend->pack_folder, odb, progress_cb, progress_payload) < 0) { git__free(writepack); return -1; } diff --git a/src/pack-objects.c b/src/pack-objects.c index 4d79ad95b..6a18d4be4 100644 --- a/src/pack-objects.c +++ b/src/pack-objects.c @@ -1253,7 +1253,7 @@ int git_packbuilder_write( PREPARE_PACK; if (git_indexer_stream_new( - &indexer, path, progress_cb, progress_cb_payload) < 0) + &indexer, path, pb->odb, progress_cb, progress_cb_payload) < 0) return -1; ctx.indexer = indexer; diff --git a/tests-clar/pack/indexer.c b/tests-clar/pack/indexer.c index 5394d32fd..fd1616034 100644 --- a/tests-clar/pack/indexer.c +++ b/tests-clar/pack/indexer.c @@ -1,10 +1,12 @@ #include "clar_libgit2.h" +#include #include "fileops.h" #include "hash.h" #include "iterator.h" #include "vector.h" #include "posix.h" + /* * This is a packfile with three objects. The second is a delta which * depends on the third, which is also a delta. @@ -23,6 +25,24 @@ unsigned char out_of_order_pack[] = { }; unsigned int out_of_order_pack_len = 112; +/* + * Packfile with two objects. The second is a delta against an object + * which is not in the packfile + */ +unsigned char thin_pack[] = { + 0x50, 0x41, 0x43, 0x4b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, + 0x32, 0x78, 0x9c, 0x63, 0x67, 0x00, 0x00, 0x00, 0x10, 0x00, 0x08, 0x76, + 0xe6, 0x8f, 0xe8, 0x12, 0x9b, 0x54, 0x6b, 0x10, 0x1a, 0xee, 0x95, 0x10, + 0xc5, 0x32, 0x8e, 0x7f, 0x21, 0xca, 0x1d, 0x18, 0x78, 0x9c, 0x63, 0x62, + 0x66, 0x4e, 0xcb, 0xcf, 0x07, 0x00, 0x02, 0xac, 0x01, 0x4d, 0x42, 0x52, + 0x3a, 0x6f, 0x39, 0xd1, 0xfe, 0x66, 0x68, 0x6b, 0xa5, 0xe5, 0xe2, 0x97, + 0xac, 0x94, 0x6c, 0x76, 0x0b, 0x04 +}; +unsigned int thin_pack_len = 78; + +unsigned char base_obj[] = { 07, 076 }; +unsigned int base_obj_len = 2; + void test_pack_indexer__out_of_order(void) { git_indexer_stream *idx; @@ -38,3 +58,71 @@ void test_pack_indexer__out_of_order(void) git_indexer_stream_free(idx); } + +void test_pack_indexer__fix_thin(void) +{ + git_indexer_stream *idx; + git_transfer_progress stats; + git_repository *repo; + git_odb *odb; + git_oid id, should_id; + + cl_git_pass(git_repository_init(&repo, "thin.git", true)); + cl_git_pass(git_repository_odb(&odb, repo)); + + /* Store the missing base into your ODB so the indexer can fix the pack */ + cl_git_pass(git_odb_write(&id, odb, base_obj, base_obj_len, GIT_OBJ_BLOB)); + git_oid_fromstr(&should_id, "e68fe8129b546b101aee9510c5328e7f21ca1d18"); + cl_assert(!git_oid_cmp(&id, &should_id)); + + cl_git_pass(git_indexer_stream_new(&idx, ".", odb, NULL, NULL)); + cl_git_pass(git_indexer_stream_add(idx, thin_pack, thin_pack_len, &stats)); + cl_git_pass(git_indexer_stream_finalize(idx, &stats)); + + cl_assert_equal_i(stats.total_objects, 3); + cl_assert_equal_i(stats.received_objects, 2); + cl_assert_equal_i(stats.indexed_objects, 2); + cl_assert_equal_i(stats.local_objects, 1); + + git_oid_fromstr(&should_id, "11f0f69b334728fdd8bc86b80499f22f29d85b15"); + cl_assert(!git_oid_cmp(git_indexer_stream_hash(idx), &should_id)); + + git_indexer_stream_free(idx); + git_odb_free(odb); + git_repository_free(repo); + + /* + * The pack's name/hash only tells us what objects there are, + * so we need to go through the packfile again in order to + * figure out whether we calculated the trailer correctly. + */ + { + unsigned char buffer[128]; + int fd; + ssize_t read; + git_off_t left; + struct stat st; + const char *name = "pack-11f0f69b334728fdd8bc86b80499f22f29d85b15.pack"; + + fd = p_open(name, O_RDONLY); + cl_assert(fd != -1); + + cl_git_pass(p_stat(name, &st)); + left = st.st_size; + + cl_git_pass(git_indexer_stream_new(&idx, ".", NULL, NULL, NULL)); + read = p_read(fd, buffer, sizeof(buffer)); + cl_assert(read != -1); + p_close(fd); + + cl_git_pass(git_indexer_stream_add(idx, buffer, read, &stats)); + cl_git_pass(git_indexer_stream_finalize(idx, &stats)); + + cl_assert_equal_i(stats.total_objects, 3); + cl_assert_equal_i(stats.received_objects, 3); + cl_assert_equal_i(stats.indexed_objects, 3); + cl_assert_equal_i(stats.local_objects, 0); + + git_indexer_stream_free(idx); + } +} diff --git a/tests-clar/pack/packbuilder.c b/tests-clar/pack/packbuilder.c index 764fba213..69292567e 100644 --- a/tests-clar/pack/packbuilder.c +++ b/tests-clar/pack/packbuilder.c @@ -92,7 +92,7 @@ void test_pack_packbuilder__create_pack(void) seed_packbuilder(); - cl_git_pass(git_indexer_stream_new(&_indexer, ".", NULL, NULL)); + cl_git_pass(git_indexer_stream_new(&_indexer, ".", NULL, NULL, NULL)); cl_git_pass(git_packbuilder_foreach(_packbuilder, feed_indexer, &stats)); cl_git_pass(git_indexer_stream_finalize(_indexer, &stats)); @@ -141,7 +141,7 @@ void test_pack_packbuilder__foreach(void) git_indexer_stream *idx; seed_packbuilder(); - cl_git_pass(git_indexer_stream_new(&idx, ".", NULL, NULL)); + cl_git_pass(git_indexer_stream_new(&idx, ".", NULL, NULL, NULL)); cl_git_pass(git_packbuilder_foreach(_packbuilder, foreach_cb, idx)); cl_git_pass(git_indexer_stream_finalize(idx, &stats)); git_indexer_stream_free(idx); From b4342b116d98ba439d958c374eb4d49b06488f4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Fri, 4 Oct 2013 10:27:45 +0200 Subject: [PATCH 4/8] net: advertise our support for fixing thin packs --- src/transports/smart.h | 4 +++- src/transports/smart_pkt.c | 3 +++ src/transports/smart_protocol.c | 6 ++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/transports/smart.h b/src/transports/smart.h index c52401a3c..5e31ebd92 100644 --- a/src/transports/smart.h +++ b/src/transports/smart.h @@ -21,6 +21,7 @@ #define GIT_CAP_INCLUDE_TAG "include-tag" #define GIT_CAP_DELETE_REFS "delete-refs" #define GIT_CAP_REPORT_STATUS "report-status" +#define GIT_CAP_THIN_PACK "thin-pack" enum git_pkt_type { GIT_PKT_CMD, @@ -116,7 +117,8 @@ typedef struct transport_smart_caps { side_band_64k:1, include_tag:1, delete_refs:1, - report_status:1; + report_status:1, + thin_pack:1; } transport_smart_caps; typedef void (*packetsize_cb)(size_t received, void *payload); diff --git a/src/transports/smart_pkt.c b/src/transports/smart_pkt.c index 99da37567..a1f623c78 100644 --- a/src/transports/smart_pkt.c +++ b/src/transports/smart_pkt.c @@ -472,6 +472,9 @@ static int buffer_want_with_caps(const git_remote_head *head, transport_smart_ca if (caps->include_tag) git_buf_puts(&str, GIT_CAP_INCLUDE_TAG " "); + if (caps->thin_pack) + git_buf_puts(&str, GIT_CAP_THIN_PACK " "); + if (git_buf_oom(&str)) return -1; diff --git a/src/transports/smart_protocol.c b/src/transports/smart_protocol.c index 156b69e1f..407101a81 100644 --- a/src/transports/smart_protocol.c +++ b/src/transports/smart_protocol.c @@ -128,6 +128,12 @@ int git_smart__detect_caps(git_pkt_ref *pkt, transport_smart_caps *caps) continue; } + if (!git__prefixcmp(ptr, GIT_CAP_THIN_PACK)) { + caps->common = caps->thin_pack = 1; + ptr += strlen(GIT_CAP_THIN_PACK); + continue; + } + /* We don't know this capability, so skip it */ ptr = strchr(ptr, ' '); } From 7fb6eb278b350f9f4caab5a3f72bfb70353fc40d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Tue, 8 Oct 2013 11:54:50 +0200 Subject: [PATCH 5/8] indexer: inject one base at a time There may be multiple deltas referencing the same base as well as OFS deltas which rely on a thin delta. Deal with both at the same time by injecting a single object and going back up to the main delta-resolving loop. --- src/indexer.c | 70 +++++++++++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 30 deletions(-) diff --git a/src/indexer.c b/src/indexer.c index 21b993a28..2cda1a629 100644 --- a/src/indexer.c +++ b/src/indexer.c @@ -683,51 +683,59 @@ cleanup: static int fix_thin_pack(git_indexer_stream *idx, git_transfer_progress *stats) { - int error; + int error, found_ref_delta = 0; unsigned int i; struct delta_info *delta; + size_t size; + git_otype type; + git_mwindow *w = NULL; + git_off_t curpos; + unsigned char *base_info; + unsigned int left = 0; + git_oid base; + + assert(git_vector_length(&idx->deltas) > 0); if (idx->odb == NULL) { giterr_set(GITERR_INDEXER, "cannot fix a thin pack without an ODB"); return -1; } + /* Loop until we find the first REF delta */ git_vector_foreach(&idx->deltas, i, delta) { - size_t size; - git_otype type; - git_mwindow *w = NULL; - git_off_t curpos = delta->delta_off; - unsigned char *base_info; - unsigned int left = 0; - git_oid base; - + curpos = delta->delta_off; error = git_packfile_unpack_header(&size, &type, &idx->pack->mwf, &w, &curpos); git_mwindow_close(&w); if (error < 0) return error; - if (type != GIT_OBJ_REF_DELTA) { - giterr_set(GITERR_INDEXER, "delta with missing base is not REF_DELTA"); - return -1; + if (type == GIT_OBJ_REF_DELTA) { + found_ref_delta = 1; + break; } - - /* curpos now points to the base information, which is an OID */ - base_info = git_mwindow_open(&idx->pack->mwf, &w, curpos, GIT_OID_RAWSZ, &left); - if (base_info == NULL) { - giterr_set(GITERR_INDEXER, "failed to map delta information"); - return -1; - } - - git_oid_fromraw(&base, base_info); - git_mwindow_close(&w); - - if (inject_object(idx, &base) < 0) - return -1; - - stats->total_objects++; - stats->local_objects++; } + if (!found_ref_delta) { + giterr_set(GITERR_INDEXER, "no REF_DELTA found, cannot inject object"); + return -1; + } + + /* curpos now points to the base information, which is an OID */ + base_info = git_mwindow_open(&idx->pack->mwf, &w, curpos, GIT_OID_RAWSZ, &left); + if (base_info == NULL) { + giterr_set(GITERR_INDEXER, "failed to map delta information"); + return -1; + } + + git_oid_fromraw(&base, base_info); + git_mwindow_close(&w); + + if (inject_object(idx, &base) < 0) + return -1; + + stats->total_objects++; + stats->local_objects++; + return 0; } @@ -764,8 +772,10 @@ static int resolve_deltas(git_indexer_stream *idx, git_transfer_progress *stats) i--; } - if (!progressed && (fix_thin_pack(idx, stats) < 0)) - return -1; + if (!progressed && (fix_thin_pack(idx, stats) < 0)) { + giterr_set(GITERR_INDEXER, "missing delta bases"); + return -1; + } } return 0; From 893055f22e5395ec95231b4cc8a19046a3eb5b4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Fri, 11 Oct 2013 17:24:29 +0200 Subject: [PATCH 6/8] indexer: clearer stats for thin packs Don't increase the number of total objects, as it can produce suprising progress output. The only addition compared to pre-thin is the addition of local_objects to allow an output similar to git's "completed with %d local objects". --- include/git2/types.h | 7 +++++++ src/indexer.c | 5 ++--- tests-clar/pack/indexer.c | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/include/git2/types.h b/include/git2/types.h index 3939353ee..4ff2ba4c4 100644 --- a/include/git2/types.h +++ b/include/git2/types.h @@ -212,6 +212,13 @@ typedef struct git_remote_callbacks git_remote_callbacks; /** * This is passed as the first argument to the callback to allow the * user to see the progress. + * + * - total_objects: number of objects in the packfile being downloaded + * - indexed_objects: received objects that have been hashed + * - received_objects: objects which have been downloaded + * - local_objects: locally-available objects that have been injected + * in order to fix a thin pack. + * - received-bytes: size of the packfile received up to now */ typedef struct git_transfer_progress { unsigned int total_objects; diff --git a/src/indexer.c b/src/indexer.c index 2cda1a629..93ad116fe 100644 --- a/src/indexer.c +++ b/src/indexer.c @@ -733,7 +733,6 @@ static int fix_thin_pack(git_indexer_stream *idx, git_transfer_progress *stats) if (inject_object(idx, &base) < 0) return -1; - stats->total_objects++; stats->local_objects++; return 0; @@ -798,7 +797,7 @@ static int update_header_and_rehash(git_indexer_stream *idx, git_transfer_progre git_mwindow_free_all(mwf); /* Update the header to include the numer of local objects we injected */ - idx->hdr.hdr_entries = htonl(stats->total_objects); + idx->hdr.hdr_entries = htonl(stats->total_objects + stats->local_objects); if (p_lseek(idx->pack_file.fd, 0, SEEK_SET) < 0) { giterr_set(GITERR_OS, "failed to seek to the beginning of the pack"); return -1; @@ -870,7 +869,7 @@ int git_indexer_stream_finalize(git_indexer_stream *idx, git_transfer_progress * if (resolve_deltas(idx, stats) < 0) return -1; - if (stats->indexed_objects + stats->local_objects != stats->total_objects) { + if (stats->indexed_objects != stats->total_objects) { giterr_set(GITERR_INDEXER, "early EOF"); return -1; } diff --git a/tests-clar/pack/indexer.c b/tests-clar/pack/indexer.c index fd1616034..17ec7b3f8 100644 --- a/tests-clar/pack/indexer.c +++ b/tests-clar/pack/indexer.c @@ -79,7 +79,7 @@ void test_pack_indexer__fix_thin(void) cl_git_pass(git_indexer_stream_add(idx, thin_pack, thin_pack_len, &stats)); cl_git_pass(git_indexer_stream_finalize(idx, &stats)); - cl_assert_equal_i(stats.total_objects, 3); + cl_assert_equal_i(stats.total_objects, 2); cl_assert_equal_i(stats.received_objects, 2); cl_assert_equal_i(stats.indexed_objects, 2); cl_assert_equal_i(stats.local_objects, 1); From ebbd48f00e87eab699862f03a457571ead3c53b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Wed, 23 Oct 2013 14:22:44 +0200 Subject: [PATCH 7/8] examples: show used local objects in fetch Show how many local objects were used to fix the thin pack in our fetch example. --- examples/network/fetch.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/examples/network/fetch.c b/examples/network/fetch.c index ce016ce0b..50ec0033a 100644 --- a/examples/network/fetch.c +++ b/examples/network/fetch.c @@ -125,8 +125,13 @@ int fetch(git_repository *repo, int argc, char **argv) pthread_join(worker, NULL); #endif - printf("\rReceived %d/%d objects in %zu bytes\n", + if (stats->local_objects > 0) { + printf("\rReceived %d/%d objects in %zu bytes (used %d local objects)\n", + stats->indexed_objects, stats->total_objects, stats->received_bytes, stats->local_objects); + } else{ + printf("\rReceived %d/%d objects in %zu bytes\n", stats->indexed_objects, stats->total_objects, stats->received_bytes); + } // Disconnect the underlying connection to prevent from idling. git_remote_disconnect(remote); From ab46b1d8ebcdc820aefe2c1391d4be73939bce95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Wed, 23 Oct 2013 15:08:18 +0200 Subject: [PATCH 8/8] indexer: include the delta stats The user is unable to derive the number of deltas in the pack, as that would require them to capture the stats exactly in the moment between download and final processing, which is abstracted away in the fetch. Capture these numbers for the user and expose them in the progress struct. The clone and fetch examples now also present this information to the user. --- examples/network/clone.c | 8 +++++++- examples/network/fetch.c | 7 ++++++- include/git2/types.h | 2 ++ src/indexer.c | 6 ++++++ 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/examples/network/clone.c b/examples/network/clone.c index a09a94728..54c90aff8 100644 --- a/examples/network/clone.c +++ b/examples/network/clone.c @@ -25,13 +25,19 @@ static void print_progress(const progress_data *pd) : 0.f; int kbytes = pd->fetch_progress.received_bytes / 1024; - printf("net %3d%% (%4d kb, %5d/%5d) / idx %3d%% (%5d/%5d) / chk %3d%% (%4" PRIuZ "/%4" PRIuZ ") %s\n", + if (pd->fetch_progress.received_objects == pd->fetch_progress.total_objects) { + printf("Resolving deltas %d/%d\r", + pd->fetch_progress.indexed_deltas, + pd->fetch_progress.total_deltas); + } else { + printf("net %3d%% (%4d kb, %5d/%5d) / idx %3d%% (%5d/%5d) / chk %3d%% (%4" PRIuZ "/%4" PRIuZ ") %s\n", network_percent, kbytes, pd->fetch_progress.received_objects, pd->fetch_progress.total_objects, index_percent, pd->fetch_progress.indexed_objects, pd->fetch_progress.total_objects, checkout_percent, pd->completed_steps, pd->total_steps, pd->path); + } } static int fetch_progress(const git_transfer_progress *stats, void *payload) diff --git a/examples/network/fetch.c b/examples/network/fetch.c index 50ec0033a..b4f6a6ad6 100644 --- a/examples/network/fetch.c +++ b/examples/network/fetch.c @@ -72,6 +72,7 @@ int fetch(git_repository *repo, int argc, char **argv) const git_transfer_progress *stats; struct dl_data data; git_remote_callbacks callbacks = GIT_REMOTE_CALLBACKS_INIT; + int resolve_deltas_ln = 0; #ifndef _WIN32 pthread_t worker; #endif @@ -113,10 +114,14 @@ int fetch(git_repository *repo, int argc, char **argv) do { usleep(10000); - if (stats->total_objects > 0) + if (stats->received_objects == stats->total_objects) { + printf("Resolving deltas %d/%d\r", + stats->indexed_deltas, stats->total_deltas); + } else if (stats->total_objects > 0) { printf("Received %d/%d objects (%d) in %" PRIuZ " bytes\r", stats->received_objects, stats->total_objects, stats->indexed_objects, stats->received_bytes); + } } while (!data.finished); if (data.ret < 0) diff --git a/include/git2/types.h b/include/git2/types.h index 4ff2ba4c4..2d18d385a 100644 --- a/include/git2/types.h +++ b/include/git2/types.h @@ -225,6 +225,8 @@ typedef struct git_transfer_progress { unsigned int indexed_objects; unsigned int received_objects; unsigned int local_objects; + unsigned int total_deltas; + unsigned int indexed_deltas; size_t received_bytes; } git_transfer_progress; diff --git a/src/indexer.c b/src/indexer.c index 93ad116fe..f6e9ad902 100644 --- a/src/indexer.c +++ b/src/indexer.c @@ -483,6 +483,8 @@ int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t siz stats->received_objects = 0; stats->local_objects = 0; + stats->total_deltas = 0; + stats->indexed_deltas = 0; processed = stats->indexed_objects = 0; stats->total_objects = total_objects; do_progress_callback(idx, stats); @@ -758,6 +760,7 @@ static int resolve_deltas(git_indexer_stream *idx, git_transfer_progress *stats) git__free(obj.data); stats->indexed_objects++; + stats->indexed_deltas++; progressed = 1; do_progress_callback(idx, stats); @@ -866,6 +869,9 @@ int git_indexer_stream_finalize(git_indexer_stream *idx, git_transfer_progress * return -1; } + /* Freeze the number of deltas */ + stats->total_deltas = stats->total_objects - stats->indexed_objects; + if (resolve_deltas(idx, stats) < 0) return -1;