mirror of
https://git.proxmox.com/git/libgit2
synced 2025-08-03 16:19:41 +00:00
Merge pull request #2972 from libgit2/cmn/pack-objects-walk
[WIP] Smarter pack-building
This commit is contained in:
commit
7800048afb
@ -114,6 +114,19 @@ GIT_EXTERN(int) git_packbuilder_insert_tree(git_packbuilder *pb, const git_oid *
|
||||
*/
|
||||
GIT_EXTERN(int) git_packbuilder_insert_commit(git_packbuilder *pb, const git_oid *id);
|
||||
|
||||
/**
|
||||
* Insert objects as given by the walk
|
||||
*
|
||||
* Those commits and all objects they reference will be inserted into
|
||||
* the packbuilder.
|
||||
*
|
||||
* @param pb the packbuilder
|
||||
* @param walk the revwalk to use to fill the packbuilder
|
||||
*
|
||||
* @return 0 or an error code
|
||||
*/
|
||||
GIT_EXTERN(int) git_packbuilder_insert_walk(git_packbuilder *pb, git_revwalk *walk);
|
||||
|
||||
/**
|
||||
* Write the contents of the packfile to an in-memory buffer
|
||||
*
|
||||
|
@ -19,6 +19,8 @@
|
||||
#include "vector.h"
|
||||
#include "repository.h"
|
||||
|
||||
GIT__USE_OIDMAP;
|
||||
|
||||
/* Ported from https://github.com/git/git/blob/89dde7882f71f846ccd0359756d27bebc31108de/builtin/describe.c */
|
||||
|
||||
struct commit_name {
|
||||
|
@ -18,6 +18,8 @@
|
||||
#include "oidmap.h"
|
||||
#include "zstream.h"
|
||||
|
||||
GIT__USE_OIDMAP;
|
||||
|
||||
extern git_mutex git__mwindow_mutex;
|
||||
|
||||
#define UINT31_MAX (0x7FFFFFFF)
|
||||
|
@ -15,6 +15,8 @@
|
||||
#include "thread-utils.h"
|
||||
#include "tree.h"
|
||||
#include "util.h"
|
||||
#include "revwalk.h"
|
||||
#include "commit_list.h"
|
||||
|
||||
#include "git2/pack.h"
|
||||
#include "git2/commit.h"
|
||||
@ -39,6 +41,8 @@ struct pack_write_context {
|
||||
git_transfer_progress *stats;
|
||||
};
|
||||
|
||||
GIT__USE_OIDMAP;
|
||||
|
||||
#ifdef GIT_THREADS
|
||||
|
||||
#define GIT_PACKBUILDER__MUTEX_OP(pb, mtx, op) do { \
|
||||
@ -124,10 +128,16 @@ int git_packbuilder_new(git_packbuilder **out, git_repository *repo)
|
||||
GITERR_CHECK_ALLOC(pb);
|
||||
|
||||
pb->object_ix = git_oidmap_alloc();
|
||||
|
||||
if (!pb->object_ix)
|
||||
goto on_error;
|
||||
|
||||
pb->walk_objects = git_oidmap_alloc();
|
||||
if (!pb->walk_objects)
|
||||
goto on_error;
|
||||
|
||||
if (git_pool_init(&pb->object_pool, sizeof(git_walk_object), 0) < 0)
|
||||
goto on_error;
|
||||
|
||||
pb->repo = repo;
|
||||
pb->nr_threads = 1; /* do not spawn any thread by default */
|
||||
|
||||
@ -1345,6 +1355,7 @@ const git_oid *git_packbuilder_hash(git_packbuilder *pb)
|
||||
return &pb->pack_oid;
|
||||
}
|
||||
|
||||
|
||||
static int cb_tree_walk(
|
||||
const char *root, const git_tree_entry *entry, void *payload)
|
||||
{
|
||||
@ -1403,6 +1414,235 @@ uint32_t git_packbuilder_written(git_packbuilder *pb)
|
||||
return pb->nr_written;
|
||||
}
|
||||
|
||||
int lookup_walk_object(git_walk_object **out, git_packbuilder *pb, const git_oid *id)
|
||||
{
|
||||
git_walk_object *obj;
|
||||
|
||||
obj = git_pool_mallocz(&pb->object_pool, 1);
|
||||
if (!obj) {
|
||||
giterr_set_oom();
|
||||
return -1;
|
||||
}
|
||||
|
||||
git_oid_cpy(&obj->id, id);
|
||||
|
||||
*out = obj;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int retrieve_object(git_walk_object **out, git_packbuilder *pb, const git_oid *id)
|
||||
{
|
||||
int error;
|
||||
khiter_t pos;
|
||||
git_walk_object *obj;
|
||||
|
||||
pos = git_oidmap_lookup_index(pb->walk_objects, id);
|
||||
if (git_oidmap_valid_index(pb->walk_objects, pos)) {
|
||||
obj = git_oidmap_value_at(pb->walk_objects, pos);
|
||||
} else {
|
||||
if ((error = lookup_walk_object(&obj, pb, id)) < 0)
|
||||
return error;
|
||||
|
||||
git_oidmap_insert(pb->walk_objects, &obj->id, obj, error);
|
||||
}
|
||||
|
||||
*out = obj;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mark_blob_uninteresting(git_packbuilder *pb, const git_oid *id)
|
||||
{
|
||||
int error;
|
||||
git_walk_object *obj;
|
||||
|
||||
if ((error = retrieve_object(&obj, pb, id)) < 0)
|
||||
return error;
|
||||
|
||||
obj->uninteresting = 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mark_tree_uninteresting(git_packbuilder *pb, const git_oid *id)
|
||||
{
|
||||
git_walk_object *obj;
|
||||
git_tree *tree;
|
||||
int error;
|
||||
size_t i;
|
||||
|
||||
if ((error = retrieve_object(&obj, pb, id)) < 0)
|
||||
return error;
|
||||
|
||||
if (obj->uninteresting)
|
||||
return 0;
|
||||
|
||||
obj->uninteresting = 1;
|
||||
|
||||
if ((error = git_tree_lookup(&tree, pb->repo, id)) < 0)
|
||||
return error;
|
||||
|
||||
for (i = 0; i < git_tree_entrycount(tree); i++) {
|
||||
const git_tree_entry *entry = git_tree_entry_byindex(tree, i);
|
||||
const git_oid *entry_id = git_tree_entry_id(entry);
|
||||
switch (git_tree_entry_type(entry)) {
|
||||
case GIT_OBJ_TREE:
|
||||
if ((error = mark_tree_uninteresting(pb, entry_id)) < 0)
|
||||
goto cleanup;
|
||||
break;
|
||||
case GIT_OBJ_BLOB:
|
||||
if ((error = mark_blob_uninteresting(pb, entry_id)) < 0)
|
||||
goto cleanup;
|
||||
break;
|
||||
default:
|
||||
/* it's a submodule or something unknown, we don't want it */
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
cleanup:
|
||||
git_tree_free(tree);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark the edges of the graph uninteresting. Since we start from a
|
||||
* git_revwalk, the commits are already uninteresting, but we need to
|
||||
* mark the trees and blobs.
|
||||
*/
|
||||
static int mark_edges_uninteresting(git_packbuilder *pb, git_commit_list *commits)
|
||||
{
|
||||
int error;
|
||||
git_commit_list *list;
|
||||
git_commit *commit;
|
||||
|
||||
for (list = commits; list; list = list->next) {
|
||||
if (!list->item->uninteresting)
|
||||
continue;
|
||||
|
||||
if ((error = git_commit_lookup(&commit, pb->repo, &list->item->oid)) < 0)
|
||||
return error;
|
||||
|
||||
error = mark_tree_uninteresting(pb, git_commit_tree_id(commit));
|
||||
git_commit_free(commit);
|
||||
|
||||
if (error < 0)
|
||||
return error;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int insert_tree(git_packbuilder *pb, git_tree *tree)
|
||||
{
|
||||
size_t i;
|
||||
int error;
|
||||
git_tree *subtree;
|
||||
git_walk_object *obj;
|
||||
const char *name;
|
||||
|
||||
if ((error = retrieve_object(&obj, pb, git_tree_id(tree))) < 0)
|
||||
return error;
|
||||
|
||||
if (obj->seen)
|
||||
return 0;
|
||||
|
||||
obj->seen = 1;
|
||||
|
||||
if ((error = git_packbuilder_insert(pb, &obj->id, NULL)))
|
||||
return error;
|
||||
|
||||
for (i = 0; i < git_tree_entrycount(tree); i++) {
|
||||
const git_tree_entry *entry = git_tree_entry_byindex(tree, i);
|
||||
const git_oid *entry_id = git_tree_entry_id(entry);
|
||||
switch (git_tree_entry_type(entry)) {
|
||||
case GIT_OBJ_TREE:
|
||||
if ((error = git_tree_lookup(&subtree, pb->repo, entry_id)) < 0)
|
||||
return error;
|
||||
|
||||
error = insert_tree(pb, subtree);
|
||||
git_tree_free(subtree);
|
||||
|
||||
if (error < 0)
|
||||
return error;
|
||||
|
||||
break;
|
||||
case GIT_OBJ_BLOB:
|
||||
name = git_tree_entry_name(entry);
|
||||
if ((error = git_packbuilder_insert(pb, entry_id, name)) < 0)
|
||||
return error;
|
||||
break;
|
||||
default:
|
||||
/* it's a submodule or something unknown, we don't want it */
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
int insert_commit(git_packbuilder *pb, git_walk_object *obj)
|
||||
{
|
||||
int error;
|
||||
git_commit *commit = NULL;
|
||||
git_tree *tree = NULL;
|
||||
|
||||
obj->seen = 1;
|
||||
|
||||
if ((error = git_packbuilder_insert(pb, &obj->id, NULL)) < 0)
|
||||
return error;
|
||||
|
||||
if ((error = git_commit_lookup(&commit, pb->repo, &obj->id)) < 0)
|
||||
return error;
|
||||
|
||||
if ((error = git_tree_lookup(&tree, pb->repo, git_commit_tree_id(commit))) < 0)
|
||||
goto cleanup;
|
||||
|
||||
if ((error = insert_tree(pb, tree)) < 0)
|
||||
goto cleanup;
|
||||
|
||||
cleanup:
|
||||
git_commit_free(commit);
|
||||
git_tree_free(tree);
|
||||
return error;
|
||||
}
|
||||
|
||||
int git_packbuilder_insert_walk(git_packbuilder *pb, git_revwalk *walk)
|
||||
{
|
||||
int error;
|
||||
git_oid id;
|
||||
git_walk_object *obj;
|
||||
|
||||
assert(pb && walk);
|
||||
|
||||
if ((error = mark_edges_uninteresting(pb, walk->user_input)) < 0)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* TODO: git marks the parents of the edges
|
||||
* uninteresting. This may provide a speed advantage, but does
|
||||
* seem to assume the remote does not have a single-commit
|
||||
* history on the other end.
|
||||
*/
|
||||
|
||||
/* walk down each tree up to the blobs and insert them, stopping when uninteresting */
|
||||
while ((error = git_revwalk_next(&id, walk)) == 0) {
|
||||
if ((error = retrieve_object(&obj, pb, &id)) < 0)
|
||||
return error;
|
||||
|
||||
if (obj->seen || obj->uninteresting)
|
||||
continue;
|
||||
|
||||
if ((error = insert_commit(pb, obj)) < 0)
|
||||
return error;
|
||||
}
|
||||
|
||||
if (error == GIT_ITEROVER)
|
||||
error = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int git_packbuilder_set_callbacks(git_packbuilder *pb, git_packbuilder_progress progress_cb, void *progress_cb_payload)
|
||||
{
|
||||
if (!pb)
|
||||
@ -1436,6 +1676,9 @@ void git_packbuilder_free(git_packbuilder *pb)
|
||||
if (pb->object_list)
|
||||
git__free(pb->object_list);
|
||||
|
||||
git_oidmap_free(pb->walk_objects);
|
||||
git_pool_clear(&pb->object_pool);
|
||||
|
||||
git_hash_ctx_cleanup(&pb->ctx);
|
||||
git_zstream_free(&pb->zstream);
|
||||
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "oidmap.h"
|
||||
#include "netops.h"
|
||||
#include "zstream.h"
|
||||
#include "pool.h"
|
||||
|
||||
#include "git2/oid.h"
|
||||
#include "git2/pack.h"
|
||||
@ -50,6 +51,12 @@ typedef struct git_pobject {
|
||||
filled:1;
|
||||
} git_pobject;
|
||||
|
||||
typedef struct {
|
||||
git_oid id;
|
||||
unsigned int uninteresting:1,
|
||||
seen:1;
|
||||
} git_walk_object;
|
||||
|
||||
struct git_packbuilder {
|
||||
git_repository *repo; /* associated repository */
|
||||
git_odb *odb; /* associated object database */
|
||||
@ -66,6 +73,9 @@ struct git_packbuilder {
|
||||
|
||||
git_oidmap *object_ix;
|
||||
|
||||
git_oidmap *walk_objects;
|
||||
git_pool object_pool;
|
||||
|
||||
git_oid pack_oid; /* hash of written pack */
|
||||
|
||||
/* synchronization objects */
|
||||
|
@ -16,6 +16,9 @@
|
||||
|
||||
#include <zlib.h>
|
||||
|
||||
GIT__USE_OFFMAP;
|
||||
GIT__USE_OIDMAP;
|
||||
|
||||
static int packfile_open(struct git_pack_file *p);
|
||||
static git_off_t nth_packed_object_offset(const struct git_pack_file *p, uint32_t n);
|
||||
int packfile_unpack_compressed(
|
||||
|
@ -71,9 +71,7 @@ struct pack_chain_elem {
|
||||
typedef git_array_t(struct pack_chain_elem) git_dependency_chain;
|
||||
|
||||
#include "offmap.h"
|
||||
|
||||
GIT__USE_OFFMAP
|
||||
GIT__USE_OIDMAP
|
||||
#include "oidmap.h"
|
||||
|
||||
#define GIT_PACK_CACHE_MEMORY_LIMIT 16 * 1024 * 1024
|
||||
#define GIT_PACK_CACHE_SIZE_LIMIT 1024 * 1024 /* don't bother caching anything over 1MB */
|
||||
|
@ -14,6 +14,8 @@
|
||||
#include "git2/revparse.h"
|
||||
#include "merge.h"
|
||||
|
||||
GIT__USE_OIDMAP;
|
||||
|
||||
git_commit_list_node *git_revwalk__commit_lookup(
|
||||
git_revwalk *walk, const git_oid *oid)
|
||||
{
|
||||
|
@ -14,7 +14,7 @@
|
||||
#include "pool.h"
|
||||
#include "vector.h"
|
||||
|
||||
GIT__USE_OIDMAP
|
||||
#include "oidmap.h"
|
||||
|
||||
struct git_revwalk {
|
||||
git_repository *repo;
|
||||
|
@ -513,7 +513,6 @@ static int local_download_pack(
|
||||
git_remote_head *rhead;
|
||||
unsigned int i;
|
||||
int error = -1;
|
||||
git_oid oid;
|
||||
git_packbuilder *pack = NULL;
|
||||
git_odb_writepack *writepack = NULL;
|
||||
git_odb *odb = NULL;
|
||||
@ -539,15 +538,22 @@ static int local_download_pack(
|
||||
if (git_object_type(obj) == GIT_OBJ_COMMIT) {
|
||||
/* Revwalker includes only wanted commits */
|
||||
error = git_revwalk_push(walk, &rhead->oid);
|
||||
if (!git_oid_iszero(&rhead->loid))
|
||||
if (!error && !git_oid_iszero(&rhead->loid)) {
|
||||
error = git_revwalk_hide(walk, &rhead->loid);
|
||||
if (error == GIT_ENOTFOUND)
|
||||
error = 0;
|
||||
}
|
||||
} else {
|
||||
/* Tag or some other wanted object. Add it on its own */
|
||||
error = git_packbuilder_insert(pack, &rhead->oid, rhead->name);
|
||||
}
|
||||
git_object_free(obj);
|
||||
if (error < 0)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if ((error = git_packbuilder_insert_walk(pack, walk)))
|
||||
goto cleanup;
|
||||
|
||||
if ((error = git_buf_printf(&progress_info, counting_objects_fmt, git_packbuilder_object_count(pack))) < 0)
|
||||
goto cleanup;
|
||||
|
||||
@ -559,35 +565,6 @@ static int local_download_pack(
|
||||
if ((error = git_repository_odb__weakptr(&odb, repo)) < 0)
|
||||
goto cleanup;
|
||||
|
||||
while ((error = git_revwalk_next(&oid, walk)) == 0) {
|
||||
git_commit *commit;
|
||||
|
||||
/* Skip commits we already have */
|
||||
if (git_odb_exists(odb, &oid)) continue;
|
||||
|
||||
if (!git_object_lookup((git_object**)&commit, t->repo, &oid, GIT_OBJ_COMMIT)) {
|
||||
const git_oid *tree_oid = git_commit_tree_id(commit);
|
||||
|
||||
/* Add the commit and its tree */
|
||||
if ((error = git_packbuilder_insert(pack, &oid, NULL)) < 0 ||
|
||||
(error = git_packbuilder_insert_tree(pack, tree_oid)) < 0) {
|
||||
git_commit_free(commit);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
git_commit_free(commit);
|
||||
|
||||
git_buf_clear(&progress_info);
|
||||
if ((error = git_buf_printf(&progress_info, counting_objects_fmt, git_packbuilder_object_count(pack))) < 0)
|
||||
goto cleanup;
|
||||
|
||||
if (t->progress_cb &&
|
||||
(error = t->progress_cb(git_buf_cstr(&progress_info), git_buf_len(&progress_info), t->message_cb_payload)) < 0)
|
||||
goto cleanup;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/* One last one with the newline */
|
||||
git_buf_clear(&progress_info);
|
||||
git_buf_printf(&progress_info, counting_objects_fmt, git_packbuilder_object_count(pack));
|
||||
@ -615,6 +592,7 @@ static int local_download_pack(
|
||||
if ((error = git_packbuilder_foreach(pack, foreach_cb, &data)) != 0)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
error = writepack->commit(writepack, stats);
|
||||
|
||||
cleanup:
|
||||
|
Loading…
Reference in New Issue
Block a user