mirror of
https://git.proxmox.com/git/libgit2
synced 2025-05-07 21:56:44 +00:00
Merge pull request #1387 from carlosmn/kill-indexer
indexer: kill git_indexer
This commit is contained in:
commit
29ab8774e5
@ -33,7 +33,6 @@ typedef struct git_transfer_progress {
|
||||
*/
|
||||
typedef int (*git_transfer_progress_callback)(const git_transfer_progress *stats, void *payload);
|
||||
|
||||
typedef struct git_indexer git_indexer;
|
||||
typedef struct git_indexer_stream git_indexer_stream;
|
||||
|
||||
/**
|
||||
@ -86,53 +85,6 @@ GIT_EXTERN(const git_oid *) git_indexer_stream_hash(const git_indexer_stream *id
|
||||
*/
|
||||
GIT_EXTERN(void) git_indexer_stream_free(git_indexer_stream *idx);
|
||||
|
||||
/**
|
||||
* Create a new indexer instance
|
||||
*
|
||||
* @param out where to store the indexer instance
|
||||
* @param packname the absolute filename of the packfile to index
|
||||
*/
|
||||
GIT_EXTERN(int) git_indexer_new(git_indexer **out, const char *packname);
|
||||
|
||||
/**
|
||||
* Iterate over the objects in the packfile and extract the information
|
||||
*
|
||||
* Indexing a packfile can be very expensive so this function is
|
||||
* expected to be run in a worker thread and the stats used to provide
|
||||
* feedback the user.
|
||||
*
|
||||
* @param idx the indexer instance
|
||||
* @param stats storage for the running state
|
||||
*/
|
||||
GIT_EXTERN(int) git_indexer_run(git_indexer *idx, git_transfer_progress *stats);
|
||||
|
||||
/**
|
||||
* Write the index file to disk.
|
||||
*
|
||||
* The file will be stored as pack-$hash.idx in the same directory as
|
||||
* the packfile.
|
||||
*
|
||||
* @param idx the indexer instance
|
||||
*/
|
||||
GIT_EXTERN(int) git_indexer_write(git_indexer *idx);
|
||||
|
||||
/**
|
||||
* Get the packfile's hash
|
||||
*
|
||||
* A packfile's name is derived from the sorted hashing of all object
|
||||
* names. This is only correct after the index has been written to disk.
|
||||
*
|
||||
* @param idx the indexer instance
|
||||
*/
|
||||
GIT_EXTERN(const git_oid *) git_indexer_hash(const git_indexer *idx);
|
||||
|
||||
/**
|
||||
* Free the indexer and its resources
|
||||
*
|
||||
* @param idx the indexer to free
|
||||
*/
|
||||
GIT_EXTERN(void) git_indexer_free(git_indexer *idx);
|
||||
|
||||
GIT_END_DECL
|
||||
|
||||
#endif
|
||||
|
328
src/indexer.c
328
src/indexer.c
@ -27,15 +27,6 @@ struct entry {
|
||||
uint64_t offset_long;
|
||||
};
|
||||
|
||||
struct git_indexer {
|
||||
struct git_pack_file *pack;
|
||||
size_t nr_objects;
|
||||
git_vector objects;
|
||||
git_filebuf file;
|
||||
unsigned int fanout[256];
|
||||
git_oid hash;
|
||||
};
|
||||
|
||||
struct git_indexer_stream {
|
||||
unsigned int parsed_header :1,
|
||||
opened_pack :1,
|
||||
@ -61,11 +52,6 @@ struct delta_info {
|
||||
git_off_t delta_off;
|
||||
};
|
||||
|
||||
const git_oid *git_indexer_hash(const git_indexer *idx)
|
||||
{
|
||||
return &idx->hash;
|
||||
}
|
||||
|
||||
const git_oid *git_indexer_stream_hash(const git_indexer_stream *idx)
|
||||
{
|
||||
return &idx->hash;
|
||||
@ -451,7 +437,7 @@ int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t siz
|
||||
return -1;
|
||||
|
||||
stats->received_objects = 0;
|
||||
stats->indexed_objects = 0;
|
||||
processed = stats->indexed_objects = 0;
|
||||
stats->total_objects = (unsigned int)idx->nr_objects;
|
||||
do_progress_callback(idx, stats);
|
||||
}
|
||||
@ -755,315 +741,3 @@ void git_indexer_stream_free(git_indexer_stream *idx)
|
||||
git_filebuf_cleanup(&idx->pack_file);
|
||||
git__free(idx);
|
||||
}
|
||||
|
||||
int git_indexer_new(git_indexer **out, const char *packname)
|
||||
{
|
||||
git_indexer *idx;
|
||||
struct git_pack_header hdr;
|
||||
int error;
|
||||
|
||||
assert(out && packname);
|
||||
|
||||
idx = git__calloc(1, sizeof(git_indexer));
|
||||
GITERR_CHECK_ALLOC(idx);
|
||||
|
||||
open_pack(&idx->pack, packname);
|
||||
|
||||
if ((error = parse_header(&hdr, idx->pack)) < 0)
|
||||
goto cleanup;
|
||||
|
||||
idx->nr_objects = ntohl(hdr.hdr_entries);
|
||||
|
||||
/* for now, limit to 2^32 objects */
|
||||
assert(idx->nr_objects == (size_t)((unsigned int)idx->nr_objects));
|
||||
|
||||
error = git_vector_init(&idx->pack->cache, (unsigned int)idx->nr_objects, cache_cmp);
|
||||
if (error < 0)
|
||||
goto cleanup;
|
||||
|
||||
idx->pack->has_cache = 1;
|
||||
error = git_vector_init(&idx->objects, (unsigned int)idx->nr_objects, objects_cmp);
|
||||
if (error < 0)
|
||||
goto cleanup;
|
||||
|
||||
*out = idx;
|
||||
|
||||
return 0;
|
||||
|
||||
cleanup:
|
||||
git_indexer_free(idx);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int index_path(git_buf *path, git_indexer *idx)
|
||||
{
|
||||
const char prefix[] = "pack-", suffix[] = ".idx";
|
||||
size_t slash = (size_t)path->size;
|
||||
|
||||
/* search backwards for '/' */
|
||||
while (slash > 0 && path->ptr[slash - 1] != '/')
|
||||
slash--;
|
||||
|
||||
if (git_buf_grow(path, slash + 1 + strlen(prefix) +
|
||||
GIT_OID_HEXSZ + strlen(suffix) + 1) < 0)
|
||||
return -1;
|
||||
|
||||
git_buf_truncate(path, slash);
|
||||
git_buf_puts(path, prefix);
|
||||
git_oid_fmt(path->ptr + git_buf_len(path), &idx->hash);
|
||||
path->size += GIT_OID_HEXSZ;
|
||||
git_buf_puts(path, suffix);
|
||||
|
||||
return git_buf_oom(path) ? -1 : 0;
|
||||
}
|
||||
|
||||
int git_indexer_write(git_indexer *idx)
|
||||
{
|
||||
git_mwindow *w = NULL;
|
||||
int error;
|
||||
unsigned int i, long_offsets = 0, left;
|
||||
struct git_pack_idx_header hdr;
|
||||
git_buf filename = GIT_BUF_INIT;
|
||||
struct entry *entry;
|
||||
void *packfile_hash;
|
||||
git_oid file_hash;
|
||||
git_hash_ctx ctx;
|
||||
|
||||
if (git_hash_ctx_init(&ctx) < 0)
|
||||
return -1;
|
||||
|
||||
git_vector_sort(&idx->objects);
|
||||
|
||||
git_buf_sets(&filename, idx->pack->pack_name);
|
||||
git_buf_truncate(&filename, filename.size - strlen("pack"));
|
||||
git_buf_puts(&filename, "idx");
|
||||
if (git_buf_oom(&filename))
|
||||
return -1;
|
||||
|
||||
error = git_filebuf_open(&idx->file, filename.ptr, GIT_FILEBUF_HASH_CONTENTS);
|
||||
if (error < 0)
|
||||
goto cleanup;
|
||||
|
||||
/* Write out the header */
|
||||
hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
|
||||
hdr.idx_version = htonl(2);
|
||||
error = git_filebuf_write(&idx->file, &hdr, sizeof(hdr));
|
||||
if (error < 0)
|
||||
goto cleanup;
|
||||
|
||||
/* Write out the fanout table */
|
||||
for (i = 0; i < 256; ++i) {
|
||||
uint32_t n = htonl(idx->fanout[i]);
|
||||
error = git_filebuf_write(&idx->file, &n, sizeof(n));
|
||||
if (error < 0)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* Write out the object names (SHA-1 hashes) */
|
||||
git_vector_foreach(&idx->objects, i, entry) {
|
||||
if ((error = git_filebuf_write(&idx->file, &entry->oid, sizeof(git_oid))) < 0 ||
|
||||
(error = git_hash_update(&ctx, &entry->oid, GIT_OID_RAWSZ)) < 0)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if ((error = git_hash_final(&idx->hash, &ctx)) < 0)
|
||||
goto cleanup;
|
||||
|
||||
/* Write out the CRC32 values */
|
||||
git_vector_foreach(&idx->objects, i, entry) {
|
||||
error = git_filebuf_write(&idx->file, &entry->crc, sizeof(uint32_t));
|
||||
if (error < 0)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* Write out the offsets */
|
||||
git_vector_foreach(&idx->objects, i, entry) {
|
||||
uint32_t n;
|
||||
|
||||
if (entry->offset == UINT32_MAX)
|
||||
n = htonl(0x80000000 | long_offsets++);
|
||||
else
|
||||
n = htonl(entry->offset);
|
||||
|
||||
error = git_filebuf_write(&idx->file, &n, sizeof(uint32_t));
|
||||
if (error < 0)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* Write out the long offsets */
|
||||
git_vector_foreach(&idx->objects, i, entry) {
|
||||
uint32_t split[2];
|
||||
|
||||
if (entry->offset != UINT32_MAX)
|
||||
continue;
|
||||
|
||||
split[0] = htonl(entry->offset_long >> 32);
|
||||
split[1] = htonl(entry->offset_long & 0xffffffff);
|
||||
|
||||
error = git_filebuf_write(&idx->file, &split, sizeof(uint32_t) * 2);
|
||||
if (error < 0)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* Write out the packfile trailer */
|
||||
|
||||
packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
|
||||
git_mwindow_close(&w);
|
||||
if (packfile_hash == NULL) {
|
||||
error = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
memcpy(&file_hash, packfile_hash, GIT_OID_RAWSZ);
|
||||
|
||||
git_mwindow_close(&w);
|
||||
|
||||
error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid));
|
||||
if (error < 0)
|
||||
goto cleanup;
|
||||
|
||||
/* Write out the index sha */
|
||||
error = git_filebuf_hash(&file_hash, &idx->file);
|
||||
if (error < 0)
|
||||
goto cleanup;
|
||||
|
||||
error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid));
|
||||
if (error < 0)
|
||||
goto cleanup;
|
||||
|
||||
/* Figure out what the final name should be */
|
||||
error = index_path(&filename, idx);
|
||||
if (error < 0)
|
||||
goto cleanup;
|
||||
|
||||
/* Commit file */
|
||||
error = git_filebuf_commit_at(&idx->file, filename.ptr, GIT_PACK_FILE_MODE);
|
||||
|
||||
cleanup:
|
||||
git_mwindow_free_all(&idx->pack->mwf);
|
||||
git_mwindow_file_deregister(&idx->pack->mwf);
|
||||
if (error < 0)
|
||||
git_filebuf_cleanup(&idx->file);
|
||||
git_buf_free(&filename);
|
||||
git_hash_ctx_cleanup(&ctx);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
int git_indexer_run(git_indexer *idx, git_transfer_progress *stats)
|
||||
{
|
||||
git_mwindow_file *mwf;
|
||||
git_off_t off = sizeof(struct git_pack_header);
|
||||
int error;
|
||||
struct entry *entry;
|
||||
unsigned int left, processed;
|
||||
|
||||
assert(idx && stats);
|
||||
|
||||
mwf = &idx->pack->mwf;
|
||||
error = git_mwindow_file_register(mwf);
|
||||
if (error < 0)
|
||||
return error;
|
||||
|
||||
stats->total_objects = (unsigned int)idx->nr_objects;
|
||||
stats->indexed_objects = processed = 0;
|
||||
|
||||
while (processed < idx->nr_objects) {
|
||||
git_rawobj obj;
|
||||
git_oid oid;
|
||||
struct git_pack_entry *pentry;
|
||||
git_mwindow *w = NULL;
|
||||
int i;
|
||||
git_off_t entry_start = off;
|
||||
void *packed;
|
||||
size_t entry_size;
|
||||
char fmt[GIT_OID_HEXSZ] = {0};
|
||||
|
||||
entry = git__calloc(1, sizeof(*entry));
|
||||
GITERR_CHECK_ALLOC(entry);
|
||||
|
||||
if (off > UINT31_MAX) {
|
||||
entry->offset = UINT32_MAX;
|
||||
entry->offset_long = off;
|
||||
} else {
|
||||
entry->offset = (uint32_t)off;
|
||||
}
|
||||
|
||||
error = git_packfile_unpack(&obj, idx->pack, &off);
|
||||
if (error < 0)
|
||||
goto cleanup;
|
||||
|
||||
/* FIXME: Parse the object instead of hashing it */
|
||||
error = git_odb__hashobj(&oid, &obj);
|
||||
if (error < 0) {
|
||||
giterr_set(GITERR_INDEXER, "Failed to hash object");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
pentry = git__malloc(sizeof(struct git_pack_entry));
|
||||
if (pentry == NULL) {
|
||||
error = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
git_oid_cpy(&pentry->sha1, &oid);
|
||||
pentry->offset = entry_start;
|
||||
git_oid_fmt(fmt, &oid);
|
||||
error = git_vector_insert(&idx->pack->cache, pentry);
|
||||
if (error < 0)
|
||||
goto cleanup;
|
||||
|
||||
git_oid_cpy(&entry->oid, &oid);
|
||||
entry->crc = crc32(0L, Z_NULL, 0);
|
||||
|
||||
entry_size = (size_t)(off - entry_start);
|
||||
packed = git_mwindow_open(mwf, &w, entry_start, entry_size, &left);
|
||||
if (packed == NULL) {
|
||||
error = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
entry->crc = htonl(crc32(entry->crc, packed, (uInt)entry_size));
|
||||
git_mwindow_close(&w);
|
||||
|
||||
/* Add the object to the list */
|
||||
error = git_vector_insert(&idx->objects, entry);
|
||||
if (error < 0)
|
||||
goto cleanup;
|
||||
|
||||
for (i = oid.id[0]; i < 256; ++i) {
|
||||
idx->fanout[i]++;
|
||||
}
|
||||
|
||||
git__free(obj.data);
|
||||
|
||||
stats->indexed_objects = ++processed;
|
||||
}
|
||||
|
||||
cleanup:
|
||||
git_mwindow_free_all(mwf);
|
||||
|
||||
return error;
|
||||
|
||||
}
|
||||
|
||||
void git_indexer_free(git_indexer *idx)
|
||||
{
|
||||
unsigned int i;
|
||||
struct entry *e;
|
||||
struct git_pack_entry *pe;
|
||||
|
||||
if (idx == NULL)
|
||||
return;
|
||||
|
||||
git_mwindow_file_deregister(&idx->pack->mwf);
|
||||
git_vector_foreach(&idx->objects, i, e)
|
||||
git__free(e);
|
||||
git_vector_free(&idx->objects);
|
||||
git_vector_foreach(&idx->pack->cache, i, pe)
|
||||
git__free(pe);
|
||||
git_vector_free(&idx->pack->cache);
|
||||
git_packfile_free(idx->pack);
|
||||
git__free(idx);
|
||||
}
|
||||
|
||||
|
@ -8,7 +8,7 @@
|
||||
static git_repository *_repo;
|
||||
static git_revwalk *_revwalker;
|
||||
static git_packbuilder *_packbuilder;
|
||||
static git_indexer *_indexer;
|
||||
static git_indexer_stream *_indexer;
|
||||
static git_vector _commits;
|
||||
static int _commits_is_initialized;
|
||||
|
||||
@ -40,7 +40,7 @@ void test_pack_packbuilder__cleanup(void)
|
||||
git_revwalk_free(_revwalker);
|
||||
_revwalker = NULL;
|
||||
|
||||
git_indexer_free(_indexer);
|
||||
git_indexer_stream_free(_indexer);
|
||||
_indexer = NULL;
|
||||
|
||||
cl_git_sandbox_cleanup();
|
||||
@ -75,20 +75,29 @@ static void seed_packbuilder(void)
|
||||
}
|
||||
}
|
||||
|
||||
static int feed_indexer(void *ptr, size_t len, void *payload)
|
||||
{
|
||||
git_transfer_progress *stats = (git_transfer_progress *)payload;
|
||||
|
||||
return git_indexer_stream_add(_indexer, ptr, len, stats);
|
||||
}
|
||||
|
||||
void test_pack_packbuilder__create_pack(void)
|
||||
{
|
||||
git_transfer_progress stats;
|
||||
git_buf buf = GIT_BUF_INIT;
|
||||
git_buf buf = GIT_BUF_INIT, path = GIT_BUF_INIT;
|
||||
git_hash_ctx ctx;
|
||||
git_oid hash;
|
||||
char hex[41]; hex[40] = '\0';
|
||||
|
||||
seed_packbuilder();
|
||||
cl_git_pass(git_packbuilder_write(_packbuilder, "testpack.pack"));
|
||||
|
||||
cl_git_pass(git_indexer_new(&_indexer, "testpack.pack"));
|
||||
cl_git_pass(git_indexer_run(_indexer, &stats));
|
||||
cl_git_pass(git_indexer_write(_indexer));
|
||||
cl_git_pass(git_indexer_stream_new(&_indexer, ".", NULL, NULL));
|
||||
cl_git_pass(git_packbuilder_foreach(_packbuilder, feed_indexer, &stats));
|
||||
cl_git_pass(git_indexer_stream_finalize(_indexer, &stats));
|
||||
|
||||
git_oid_fmt(hex, git_indexer_stream_hash(_indexer));
|
||||
git_buf_printf(&path, "pack-%s.pack", hex);
|
||||
|
||||
/*
|
||||
* By default, packfiles are created with only one thread.
|
||||
@ -104,13 +113,14 @@ void test_pack_packbuilder__create_pack(void)
|
||||
*
|
||||
*/
|
||||
|
||||
cl_git_pass(git_futils_readbuffer(&buf, "testpack.pack"));
|
||||
cl_git_pass(git_futils_readbuffer(&buf, git_buf_cstr(&path)));
|
||||
|
||||
cl_git_pass(git_hash_ctx_init(&ctx));
|
||||
cl_git_pass(git_hash_update(&ctx, buf.ptr, buf.size));
|
||||
cl_git_pass(git_hash_final(&hash, &ctx));
|
||||
git_hash_ctx_cleanup(&ctx);
|
||||
|
||||
git_buf_free(&path);
|
||||
git_buf_free(&buf);
|
||||
|
||||
git_oid_fmt(hex, &hash);
|
||||
|
Loading…
Reference in New Issue
Block a user