diff --git a/examples/network/index-pack.c b/examples/network/index-pack.c index 671035fb7..881c1493f 100644 --- a/examples/network/index-pack.c +++ b/examples/network/index-pack.c @@ -12,6 +12,61 @@ int index_cb(const git_indexer_stats *stats, void *data) } int index_pack(git_repository *repo, int argc, char **argv) +{ + git_indexer_stream *idx; + git_indexer_stats stats = {0, 0}; + int error, fd; + char hash[GIT_OID_HEXSZ + 1] = {0}; + ssize_t read_bytes; + char buf[512]; + + if (argc < 2) { + fprintf(stderr, "I need a packfile\n"); + return EXIT_FAILURE; + } + + if (git_indexer_stream_new(&idx, ".git") < 0) { + puts("bad idx"); + return -1; + } + + if ((fd = open(argv[1], 0)) < 0) { + perror("open"); + return -1; + } + + do { + read_bytes = read(fd, buf, sizeof(buf)); + if (read_bytes < 0) + break; + + if ((error = git_indexer_stream_add(idx, buf, read_bytes, &stats)) < 0) + goto cleanup; + + printf("\rIndexing %d of %d", stats.processed, stats.total); + } while (read_bytes > 0); + + if (read_bytes < 0) { + error = -1; + perror("failed reading"); + goto cleanup; + } + + if ((error = git_indexer_stream_finalize(idx, &stats)) < 0) + goto cleanup; + + printf("\rIndexing %d of %d\n", stats.processed, stats.total); + + git_oid_fmt(hash, git_indexer_stream_hash(idx)); + puts(hash); + +cleanup: + close(fd); + git_indexer_stream_free(idx); + return error; +} + +int index_pack_old(git_repository *repo, int argc, char **argv) { git_indexer *indexer; git_indexer_stats stats; diff --git a/include/git2/indexer.h b/include/git2/indexer.h index 7f336f8e6..a70fab214 100644 --- a/include/git2/indexer.h +++ b/include/git2/indexer.h @@ -23,6 +23,51 @@ typedef struct git_indexer_stats { typedef struct git_indexer git_indexer; +typedef struct git_indexer_stream git_indexer_stream; + +/** + * Create a new streaming indexer instance + * + * @param out where to store the inexer instance + * @param path to the gitdir (metadata directory) + */ +GIT_EXTERN(int) git_indexer_stream_new(git_indexer_stream **out, const char *gitdir); + +/** + * Add data to the indexer + * + * @param idx the indexer + * @param data the data to add + * @param size the size of the data + * @param stats stat storage + */ +GIT_EXTERN(int) git_indexer_stream_add(git_indexer_stream *idx, void *data, size_t size, git_indexer_stats *stats); + +/** + * Finalize the pack and index + * + * Resolve any pending deltas and write out the index file + * + * @param idx the indexer + */ +GIT_EXTERN(int) git_indexer_stream_finalize(git_indexer_stream *idx, git_indexer_stats *stats); + +/** + * Get the packfile's hash + * + * A packfile's name is derived from the sorted hashing of all object + * names. This is only correct after the index has been finalized. + * + * @param idx the indexer instance + */ +GIT_EXTERN(const git_oid *) git_indexer_stream_hash(git_indexer_stream *idx); + +/** + * Free the indexer and its resources + * + * @param idx the indexer to free + */ +GIT_EXTERN(void) git_indexer_stream_free(git_indexer_stream *idx); /** * Create a new indexer instance diff --git a/src/filebuf.c b/src/filebuf.c index a9de165d5..6538aea66 100644 --- a/src/filebuf.c +++ b/src/filebuf.c @@ -194,14 +194,19 @@ int git_filebuf_open(git_filebuf *file, const char *path, int flags) memset(file, 0x0, sizeof(git_filebuf)); + if (flags & GIT_FILEBUF_DO_NOT_BUFFER) + file->do_not_buffer = true; + file->buf_size = WRITE_BUFFER_SIZE; file->buf_pos = 0; file->fd = -1; file->last_error = BUFERR_OK; /* Allocate the main cache buffer */ - file->buffer = git__malloc(file->buf_size); - GITERR_CHECK_ALLOC(file->buffer); + if (!file->do_not_buffer) { + file->buffer = git__malloc(file->buf_size); + GITERR_CHECK_ALLOC(file->buffer); + } /* If we are hashing on-write, allocate a new hash context */ if (flags & GIT_FILEBUF_HASH_CONTENTS) { @@ -345,6 +350,9 @@ int git_filebuf_write(git_filebuf *file, const void *buff, size_t len) ENSURE_BUF_OK(file); + if (file->do_not_buffer) + return file->write(file, (void *)buff, len); + for (;;) { size_t space_left = file->buf_size - file->buf_pos; diff --git a/src/filebuf.h b/src/filebuf.h index 19e17975b..72563b57a 100644 --- a/src/filebuf.h +++ b/src/filebuf.h @@ -19,7 +19,8 @@ #define GIT_FILEBUF_APPEND (1 << 2) #define GIT_FILEBUF_FORCE (1 << 3) #define GIT_FILEBUF_TEMPORARY (1 << 4) -#define GIT_FILEBUF_DEFLATE_SHIFT (5) +#define GIT_FILEBUF_DO_NOT_BUFFER (1 << 5) +#define GIT_FILEBUF_DEFLATE_SHIFT (6) #define GIT_FILELOCK_EXTENSION ".lock\0" #define GIT_FILELOCK_EXTLENGTH 6 @@ -41,6 +42,7 @@ struct git_filebuf { size_t buf_size, buf_pos; git_file fd; bool fd_is_open; + bool do_not_buffer; int last_error; }; diff --git a/src/indexer.c b/src/indexer.c index b5d639702..1834d9884 100644 --- a/src/indexer.c +++ b/src/indexer.c @@ -30,8 +30,6 @@ struct entry { struct git_indexer { struct git_pack_file *pack; - struct stat st; - struct git_pack_header hdr; size_t nr_objects; git_vector objects; git_filebuf file; @@ -39,27 +37,84 @@ struct git_indexer { git_oid hash; }; +struct git_indexer_stream { + unsigned int parsed_header :1, + opened_pack; + struct git_pack_file *pack; + git_filebuf pack_file; + git_filebuf index_file; + git_off_t off; + size_t nr_objects; + git_vector objects; + git_vector deltas; + unsigned int fanout[256]; + git_oid hash; +}; + +struct delta_info { + git_off_t delta_off; +}; + const git_oid *git_indexer_hash(git_indexer *idx) { return &idx->hash; } -static int parse_header(git_indexer *idx) +const git_oid *git_indexer_stream_hash(git_indexer_stream *idx) +{ + return &idx->hash; +} + +static int open_pack(struct git_pack_file **out, const char *filename) +{ + size_t namelen; + struct git_pack_file *pack; + struct stat st; + int fd; + + namelen = strlen(filename); + pack = git__calloc(1, sizeof(struct git_pack_file) + namelen + 1); + GITERR_CHECK_ALLOC(pack); + + memcpy(pack->pack_name, filename, namelen + 1); + + if (p_stat(filename, &st) < 0) { + giterr_set(GITERR_OS, "Failed to stat packfile."); + goto cleanup; + } + + if ((fd = p_open(pack->pack_name, O_RDONLY)) < 0) { + giterr_set(GITERR_OS, "Failed to open packfile."); + goto cleanup; + } + + pack->mwf.fd = fd; + pack->mwf.size = (git_off_t)st.st_size; + + *out = pack; + return 0; + +cleanup: + git__free(pack); + return -1; +} + +static int parse_header(struct git_pack_header *hdr, struct git_pack_file *pack) { int error; /* Verify we recognize this pack file format. */ - if ((error = p_read(idx->pack->mwf.fd, &idx->hdr, sizeof(idx->hdr))) < 0) { + if ((error = p_read(pack->mwf.fd, hdr, sizeof(*hdr))) < 0) { giterr_set(GITERR_OS, "Failed to read in pack header"); return error; } - if (idx->hdr.hdr_signature != ntohl(PACK_SIGNATURE)) { + if (hdr->hdr_signature != ntohl(PACK_SIGNATURE)) { giterr_set(GITERR_INVALID, "Wrong pack signature"); return -1; } - if (!pack_version_ok(idx->hdr.hdr_version)) { + if (!pack_version_ok(hdr->hdr_version)) { giterr_set(GITERR_INVALID, "Wrong pack version"); return -1; } @@ -83,12 +138,446 @@ static int cache_cmp(const void *a, const void *b) return git_oid_cmp(&ea->sha1, &eb->sha1); } +int git_indexer_stream_new(git_indexer_stream **out, const char *prefix) +{ + git_indexer_stream *idx; + git_buf path = GIT_BUF_INIT; + static const char suff[] = "/objects/pack/pack-received"; + int error; + + idx = git__calloc(1, sizeof(git_indexer_stream)); + GITERR_CHECK_ALLOC(idx); + + error = git_buf_joinpath(&path, prefix, suff); + if (error < 0) + goto cleanup; + + error = git_filebuf_open(&idx->pack_file, path.ptr, + GIT_FILEBUF_TEMPORARY | GIT_FILEBUF_DO_NOT_BUFFER); + git_buf_free(&path); + if (error < 0) + goto cleanup; + + *out = idx; + return 0; + +cleanup: + git_buf_free(&path); + git_filebuf_cleanup(&idx->pack_file); + git__free(idx); + return -1; +} + +/* Try to store the delta so we can try to resolve it later */ +static int store_delta(git_indexer_stream *idx) +{ + git_otype type; + git_mwindow *w = NULL; + git_mwindow_file *mwf = &idx->pack->mwf; + git_off_t entry_start = idx->off; + struct delta_info *delta; + size_t entry_size; + git_rawobj obj; + int error; + + /* + * ref-delta objects can refer to object that we haven't + * found yet, so give it another opportunity + */ + if (git_packfile_unpack_header(&entry_size, &type, mwf, &w, &idx->off) < 0) + return -1; + + git_mwindow_close(&w); + + /* If it's not a delta, mark it as failure, we can't do anything with it */ + if (type != GIT_OBJ_REF_DELTA && type != GIT_OBJ_OFS_DELTA) + return -1; + + if (type == GIT_OBJ_REF_DELTA) { + idx->off += GIT_OID_RAWSZ; + } else { + git_off_t base_off; + + base_off = get_delta_base(idx->pack, &w, &idx->off, type, entry_start); + git_mwindow_close(&w); + if (base_off < 0) + return (int)base_off; + } + + error = packfile_unpack_compressed(&obj, idx->pack, &w, &idx->off, entry_size, type); + if (error == GIT_ESHORTBUFFER) { + idx->off = entry_start; + return GIT_ESHORTBUFFER; + } else if (error < 0){ + return -1; + } + + delta = git__calloc(1, sizeof(struct delta_info)); + GITERR_CHECK_ALLOC(delta); + delta->delta_off = entry_start; + + git__free(obj.data); + + if (git_vector_insert(&idx->deltas, delta) < 0) + return -1; + + return 0; +} + +static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t entry_start) +{ + int i; + git_oid oid; + void *packed; + size_t entry_size; + unsigned int left; + struct entry *entry; + git_mwindow *w = NULL; + git_mwindow_file *mwf = &idx->pack->mwf; + struct git_pack_entry *pentry; + + entry = git__calloc(1, sizeof(*entry)); + GITERR_CHECK_ALLOC(entry); + + if (entry_start > UINT31_MAX) { + entry->offset = UINT32_MAX; + entry->offset_long = entry_start; + } else { + entry->offset = (uint32_t)entry_start; + } + + /* FIXME: Parse the object instead of hashing it */ + if (git_odb__hashobj(&oid, obj) < 0) { + giterr_set(GITERR_INVALID, "Failed to hash object"); + return -1; + } + + pentry = git__malloc(sizeof(struct git_pack_entry)); + GITERR_CHECK_ALLOC(pentry); + + git_oid_cpy(&pentry->sha1, &oid); + pentry->offset = entry_start; + if (git_vector_insert(&idx->pack->cache, pentry) < 0) + goto on_error; + + git_oid_cpy(&entry->oid, &oid); + entry->crc = crc32(0L, Z_NULL, 0); + + entry_size = (size_t)(idx->off - entry_start); + packed = git_mwindow_open(mwf, &w, entry_start, entry_size, &left); + if (packed == NULL) + goto on_error; + + entry->crc = htonl(crc32(entry->crc, packed, (uInt)entry_size)); + git_mwindow_close(&w); + + /* Add the object to the list */ + if (git_vector_insert(&idx->objects, entry) < 0) + goto on_error; + + for (i = oid.id[0]; i < 256; ++i) { + idx->fanout[i]++; + } + + return 0; + +on_error: + git__free(entry); + git__free(pentry); + git__free(obj->data); + return -1; +} + +int git_indexer_stream_add(git_indexer_stream *idx, void *data, size_t size, git_indexer_stats *stats) +{ + int error; + struct git_pack_header hdr; + size_t processed = stats->processed; + git_mwindow_file *mwf = &idx->pack->mwf; + + assert(idx && data && stats); + + if (git_filebuf_write(&idx->pack_file, data, size) < 0) + return -1; + + /* Make sure we set the new size of the pack */ + if (idx->opened_pack) { + idx->pack->mwf.size += size; + //printf("\nadding %zu for %zu\n", size, idx->pack->mwf.size); + } else { + if (open_pack(&idx->pack, idx->pack_file.path_lock) < 0) + return -1; + idx->opened_pack = 1; + mwf = &idx->pack->mwf; + if (git_mwindow_file_register(&idx->pack->mwf) < 0) + return -1; + + return 0; + } + + if (!idx->parsed_header) { + if ((unsigned)idx->pack->mwf.size < sizeof(hdr)) + return 0; + + if (parse_header(&hdr, idx->pack) < 0) + return -1; + + idx->parsed_header = 1; + idx->nr_objects = ntohl(hdr.hdr_entries); + idx->off = sizeof(struct git_pack_header); + + /* for now, limit to 2^32 objects */ + assert(idx->nr_objects == (size_t)((unsigned int)idx->nr_objects)); + + if (git_vector_init(&idx->pack->cache, (unsigned int)idx->nr_objects, cache_cmp) < 0) + return -1; + + idx->pack->has_cache = 1; + if (git_vector_init(&idx->objects, (unsigned int)idx->nr_objects, objects_cmp) < 0) + return -1; + + if (git_vector_init(&idx->deltas, (unsigned int)(idx->nr_objects / 2), NULL) < 0) + return -1; + + stats->total = (unsigned int)idx->nr_objects; + stats->processed = 0; + } + + /* Now that we have data in the pack, let's try to parse it */ + + /* As the file grows any windows we try to use will be out of date */ + git_mwindow_free_all(mwf); + while (processed < idx->nr_objects) { + git_rawobj obj; + git_off_t entry_start = idx->off; + + if (idx->pack->mwf.size <= idx->off + 20) + return 0; + + error = git_packfile_unpack(&obj, idx->pack, &idx->off); + if (error == GIT_ESHORTBUFFER) { + idx->off = entry_start; + return 0; + } + + if (error < 0) { + idx->off = entry_start; + error = store_delta(idx); + if (error == GIT_ESHORTBUFFER) + return 0; + if (error < 0) + return error; + + continue; + } + + if (hash_and_save(idx, &obj, entry_start) < 0) + goto on_error; + + git__free(obj.data); + + stats->processed = ++processed; + } + + return 0; + +on_error: + git_mwindow_free_all(mwf); + return -1; +} + +static int index_path_stream(git_buf *path, git_indexer_stream *idx, const char *suffix) +{ + const char prefix[] = "pack-"; + size_t slash = (size_t)path->size; + + /* search backwards for '/' */ + while (slash > 0 && path->ptr[slash - 1] != '/') + slash--; + + if (git_buf_grow(path, slash + 1 + strlen(prefix) + + GIT_OID_HEXSZ + strlen(suffix) + 1) < 0) + return -1; + + git_buf_truncate(path, slash); + git_buf_puts(path, prefix); + git_oid_fmt(path->ptr + path->size, &idx->hash); + path->size += GIT_OID_HEXSZ; + git_buf_puts(path, suffix); + + return git_buf_oom(path) ? -1 : 0; +} + +static int resolve_deltas(git_indexer_stream *idx, git_indexer_stats *stats) +{ + unsigned int i; + struct delta_info *delta; + + git_vector_foreach(&idx->deltas, i, delta) { + git_rawobj obj; + + idx->off = delta->delta_off; + if (git_packfile_unpack(&obj, idx->pack, &idx->off) < 0) + return -1; + + if (hash_and_save(idx, &obj, delta->delta_off) < 0) + return -1; + + git__free(obj.data); + stats->processed++; + } + + return 0; +} + +int git_indexer_stream_finalize(git_indexer_stream *idx, git_indexer_stats *stats) +{ + git_mwindow *w = NULL; + unsigned int i, long_offsets = 0, left; + struct git_pack_idx_header hdr; + git_buf filename = GIT_BUF_INIT; + struct entry *entry; + void *packfile_hash; + git_oid file_hash; + SHA_CTX ctx; + + if (idx->deltas.length > 0) + if (resolve_deltas(idx, stats) < 0) + return -1; + + git_vector_sort(&idx->objects); + + git_buf_sets(&filename, idx->pack->pack_name); + git_buf_truncate(&filename, filename.size - strlen("pack")); + git_buf_puts(&filename, "idx"); + if (git_buf_oom(&filename)) + return -1; + + if (git_filebuf_open(&idx->index_file, filename.ptr, GIT_FILEBUF_HASH_CONTENTS) < 0) + goto on_error; + + /* Write out the header */ + hdr.idx_signature = htonl(PACK_IDX_SIGNATURE); + hdr.idx_version = htonl(2); + git_filebuf_write(&idx->index_file, &hdr, sizeof(hdr)); + + /* Write out the fanout table */ + for (i = 0; i < 256; ++i) { + uint32_t n = htonl(idx->fanout[i]); + git_filebuf_write(&idx->index_file, &n, sizeof(n)); + } + + /* Write out the object names (SHA-1 hashes) */ + SHA1_Init(&ctx); + git_vector_foreach(&idx->objects, i, entry) { + git_filebuf_write(&idx->index_file, &entry->oid, sizeof(git_oid)); + SHA1_Update(&ctx, &entry->oid, GIT_OID_RAWSZ); + } + SHA1_Final(idx->hash.id, &ctx); + + /* Write out the CRC32 values */ + git_vector_foreach(&idx->objects, i, entry) { + git_filebuf_write(&idx->index_file, &entry->crc, sizeof(uint32_t)); + } + + /* Write out the offsets */ + git_vector_foreach(&idx->objects, i, entry) { + uint32_t n; + + if (entry->offset == UINT32_MAX) + n = htonl(0x80000000 | long_offsets++); + else + n = htonl(entry->offset); + + git_filebuf_write(&idx->index_file, &n, sizeof(uint32_t)); + } + + /* Write out the long offsets */ + git_vector_foreach(&idx->objects, i, entry) { + uint32_t split[2]; + + if (entry->offset != UINT32_MAX) + continue; + + split[0] = htonl(entry->offset_long >> 32); + split[1] = htonl(entry->offset_long & 0xffffffff); + + git_filebuf_write(&idx->index_file, &split, sizeof(uint32_t) * 2); + } + + /* Write out the packfile trailer */ + packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left); + if (packfile_hash == NULL) { + git_mwindow_close(&w); + goto on_error; + } + + memcpy(&file_hash, packfile_hash, GIT_OID_RAWSZ); + git_mwindow_close(&w); + + git_filebuf_write(&idx->index_file, &file_hash, sizeof(git_oid)); + + /* Write out the packfile trailer to the idx file as well */ + if (git_filebuf_hash(&file_hash, &idx->index_file) < 0) + goto on_error; + + git_filebuf_write(&idx->index_file, &file_hash, sizeof(git_oid)); + + /* Figure out what the final name should be */ + if (index_path_stream(&filename, idx, ".idx") < 0) + goto on_error; + + /* Commit file */ + if (git_filebuf_commit_at(&idx->index_file, filename.ptr, GIT_PACK_FILE_MODE) < 0) + goto on_error; + + git_mwindow_free_all(&idx->pack->mwf); + + if (index_path_stream(&filename, idx, ".pack") < 0) + goto on_error; + /* And don't forget to rename the packfile to its new place. */ + if (git_filebuf_commit_at(&idx->pack_file, filename.ptr, GIT_PACK_FILE_MODE) < 0) + return -1; + + git_buf_free(&filename); + return 0; + +on_error: + git_mwindow_free_all(&idx->pack->mwf); + git_filebuf_cleanup(&idx->index_file); + git_buf_free(&filename); + return -1; +} + +void git_indexer_stream_free(git_indexer_stream *idx) +{ + unsigned int i; + struct entry *e; + struct git_pack_entry *pe; + struct delta_info *delta; + + if (idx == NULL) + return; + + p_close(idx->pack->mwf.fd); + git_vector_foreach(&idx->objects, i, e) + git__free(e); + git_vector_free(&idx->objects); + git_vector_foreach(&idx->pack->cache, i, pe) + git__free(pe); + git_vector_free(&idx->pack->cache); + git_vector_foreach(&idx->deltas, i, delta) + git__free(delta); + git_vector_free(&idx->deltas); + git__free(idx->pack); + git__free(idx); +} int git_indexer_new(git_indexer **out, const char *packname) { git_indexer *idx; - size_t namelen; - int ret, error; + struct git_pack_header hdr; + int error; assert(out && packname); @@ -100,37 +589,12 @@ int git_indexer_new(git_indexer **out, const char *packname) idx = git__calloc(1, sizeof(git_indexer)); GITERR_CHECK_ALLOC(idx); - namelen = strlen(packname); - idx->pack = git__calloc(1, sizeof(struct git_pack_file) + namelen + 1); - GITERR_CHECK_ALLOC(idx->pack); + open_pack(&idx->pack, packname); - memcpy(idx->pack->pack_name, packname, namelen + 1); - - if ((ret = p_stat(packname, &idx->st)) < 0) { - if (errno == ENOENT) { - giterr_set(GITERR_OS, "Failed to stat packfile. File not found"); - error = GIT_ENOTFOUND; - } else { - giterr_set(GITERR_OS, "Failed to stat packfile."); - error = -1; - } - - goto cleanup; - } - - if ((ret = p_open(idx->pack->pack_name, O_RDONLY)) < 0) { - giterr_set(GITERR_OS, "Failed to open packfile."); - error = -1; - goto cleanup; - } - - idx->pack->mwf.fd = ret; - idx->pack->mwf.size = (git_off_t)idx->st.st_size; - - if ((error = parse_header(idx)) < 0) + if ((error = parse_header(&hdr, idx->pack)) < 0) goto cleanup; - idx->nr_objects = ntohl(idx->hdr.hdr_entries); + idx->nr_objects = ntohl(hdr.hdr_entries); /* for now, limit to 2^32 objects */ assert(idx->nr_objects == (size_t)((unsigned int)idx->nr_objects)); @@ -151,7 +615,7 @@ int git_indexer_new(git_indexer **out, const char *packname) cleanup: git_indexer_free(idx); - return error; + return -1; } static int index_path(git_buf *path, git_indexer *idx) @@ -263,7 +727,7 @@ int git_indexer_write(git_indexer *idx) /* Write out the packfile trailer */ - packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->st.st_size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left); + packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left); git_mwindow_close(&w); if (packfile_hash == NULL) { error = -1; @@ -331,6 +795,7 @@ int git_indexer_run(git_indexer *idx, git_indexer_stats *stats) git_off_t entry_start = off; void *packed; size_t entry_size; + char fmt[GIT_OID_HEXSZ] = {0}; entry = git__calloc(1, sizeof(*entry)); GITERR_CHECK_ALLOC(entry); @@ -361,6 +826,8 @@ int git_indexer_run(git_indexer *idx, git_indexer_stats *stats) git_oid_cpy(&pentry->sha1, &oid); pentry->offset = entry_start; + git_oid_fmt(fmt, &oid); + printf("adding %s to cache\n", fmt); error = git_vector_insert(&idx->pack->cache, pentry); if (error < 0) goto cleanup; diff --git a/src/mwindow.c b/src/mwindow.c index 7fe02b9ce..31b98fb92 100644 --- a/src/mwindow.c +++ b/src/mwindow.c @@ -89,6 +89,7 @@ void git_mwindow_scan_lru( { git_mwindow *w, *w_l; + puts("LRU"); for (w_l = NULL, w = mwf->windows; w; w = w->next) { if (!w->inuse_cnt) { /* @@ -210,14 +211,16 @@ unsigned char *git_mwindow_open( git_mwindow_ctl *ctl = &GIT_GLOBAL->mem_ctl; git_mwindow *w = *cursor; + if (!w || !git_mwindow_contains(w, offset + extra)) { if (w) { w->inuse_cnt--; } for (w = mwf->windows; w; w = w->next) { - if (git_mwindow_contains(w, offset + extra)) + if (git_mwindow_contains(w, offset + extra)) { break; + } } /* @@ -246,6 +249,7 @@ unsigned char *git_mwindow_open( if (left) *left = (unsigned int)(w->window_map.len - offset); + fflush(stdout); return (unsigned char *) w->window_map.data + offset; } diff --git a/src/pack.c b/src/pack.c index a4e506945..b79ecf417 100644 --- a/src/pack.c +++ b/src/pack.c @@ -200,7 +200,8 @@ static unsigned char *pack_window_open( return git_mwindow_open(&p->mwf, w_cursor, offset, 20, left); } -static unsigned long packfile_unpack_header1( +static int packfile_unpack_header1( + unsigned long *usedp, size_t *sizep, git_otype *type, const unsigned char *buf, @@ -215,8 +216,13 @@ static unsigned long packfile_unpack_header1( size = c & 15; shift = 4; while (c & 0x80) { - if (len <= used || bitsizeof(long) <= shift) - return 0; + if (len <= used) + return GIT_ESHORTBUFFER; + + if (bitsizeof(long) <= shift) { + *usedp = 0; + return -1; + } c = buf[used++]; size += (c & 0x7f) << shift; @@ -224,7 +230,8 @@ static unsigned long packfile_unpack_header1( } *sizep = (size_t)size; - return used; + *usedp = used; + return 0; } int git_packfile_unpack_header( @@ -237,6 +244,7 @@ int git_packfile_unpack_header( unsigned char *base; unsigned int left; unsigned long used; + int ret; /* pack_window_open() assures us we have [base, base + 20) available * as a range that we can look at at. (Its actually the hash @@ -247,10 +255,13 @@ int git_packfile_unpack_header( // base = pack_window_open(p, w_curs, *curpos, &left); base = git_mwindow_open(mwf, w_curs, *curpos, 20, &left); if (base == NULL) - return -1; + return GIT_ESHORTBUFFER; - used = packfile_unpack_header1(size_p, type_p, base, left); - if (used == 0) + ret = packfile_unpack_header1(&used, size_p, type_p, base, left); + git_mwindow_close(w_curs); + if (ret == GIT_ESHORTBUFFER) + return ret; + else if (ret < 0) return packfile_error("header length is zero"); *curpos += used; @@ -271,12 +282,12 @@ static int packfile_unpack_delta( int error; base_offset = get_delta_base(p, w_curs, curpos, delta_type, obj_offset); + git_mwindow_close(w_curs); if (base_offset == 0) return packfile_error("delta offset is zero"); if (base_offset < 0) /* must actually be an error code */ return (int)base_offset; - git_mwindow_close(w_curs); error = git_packfile_unpack(&base, p, &base_offset); /* @@ -289,6 +300,7 @@ static int packfile_unpack_delta( return error; error = packfile_unpack_compressed(&delta, p, w_curs, curpos, delta_size, delta_type); + git_mwindow_close(w_curs); if (error < 0) { git__free(base.data); return error; @@ -327,6 +339,8 @@ int git_packfile_unpack( obj->type = GIT_OBJ_BAD; error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos); + git_mwindow_close(&w_curs); + if (error < 0) return error; @@ -352,8 +366,6 @@ int git_packfile_unpack( break; } - git_mwindow_close(&w_curs); - *obj_offset = curpos; return error; } @@ -381,6 +393,7 @@ int packfile_unpack_compressed( if (st != Z_OK) { git__free(buffer); giterr_set(GITERR_ZLIB, "Failed to inflate packfile"); + return -1; } @@ -388,10 +401,17 @@ int packfile_unpack_compressed( in = pack_window_open(p, w_curs, *curpos, &stream.avail_in); stream.next_in = in; st = inflate(&stream, Z_FINISH); + git_mwindow_close(w_curs); if (!stream.avail_out) break; /* the payload is larger than it should be */ + if (st == Z_BUF_ERROR && in == NULL) { + inflateEnd(&stream); + git__free(buffer); + return GIT_ESHORTBUFFER; + } + *curpos += stream.next_in - in; } while (st == Z_OK || st == Z_BUF_ERROR); @@ -420,10 +440,15 @@ git_off_t get_delta_base( git_otype type, git_off_t delta_obj_offset) { - unsigned char *base_info = pack_window_open(p, w_curs, *curpos, NULL); + unsigned int left = 0; + unsigned char *base_info; git_off_t base_offset; git_oid unused; + base_info = pack_window_open(p, w_curs, *curpos, &left); + /* Assumption: the only reason this would fail is because the file is too small */ + if (base_info == NULL) + return GIT_ESHORTBUFFER; /* pack_window_open() assured us we have [base_info, base_info + 20) * as a range that we can look at without walking off the * end of the mapped window. Its actually the hash size @@ -435,6 +460,8 @@ git_off_t get_delta_base( unsigned char c = base_info[used++]; base_offset = c & 127; while (c & 128) { + if (left <= used) + return GIT_ESHORTBUFFER; base_offset += 1; if (!base_offset || MSB(base_offset, 7)) return 0; /* overflow */ diff --git a/src/pack.h b/src/pack.h index 7cf41c183..cd7a4d2e1 100644 --- a/src/pack.h +++ b/src/pack.h @@ -83,6 +83,13 @@ int git_packfile_unpack_header( git_off_t *curpos); int git_packfile_unpack(git_rawobj *obj, struct git_pack_file *p, git_off_t *obj_offset); +int packfile_unpack_compressed( + git_rawobj *obj, + struct git_pack_file *p, + git_mwindow **w_curs, + git_off_t *curpos, + size_t size, + git_otype type); git_off_t get_delta_base(struct git_pack_file *p, git_mwindow **w_curs, git_off_t *curpos, git_otype type,