From c1af5a3935025f486156cdfe3b006700e73f0a49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Sat, 6 Aug 2011 00:35:20 +0200 Subject: [PATCH] Implement cooperative caching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When indexing a file with ref deltas, a temporary cache for the offsets has to be built, as we don't have an index file yet. If the user takes the responsiblity for filling the cache, the packing code will look there first when it finds a ref delta. Signed-off-by: Carlos Martín Nieto --- src/indexer.c | 40 ++++++++++++++++++++++++++++++++++++---- src/pack.c | 12 ++++++++++++ src/pack.h | 3 ++- 3 files changed, 50 insertions(+), 5 deletions(-) diff --git a/src/indexer.c b/src/indexer.c index b874a35f0..0ab54f742 100644 --- a/src/indexer.c +++ b/src/indexer.c @@ -79,7 +79,7 @@ static int parse_header(git_indexer *idx) return GIT_SUCCESS; } -int objects_cmp(const void *a, const void *b) +static int objects_cmp(const void *a, const void *b) { const struct entry *entrya = a; const struct entry *entryb = b; @@ -87,6 +87,15 @@ int objects_cmp(const void *a, const void *b) return git_oid_cmp(&entrya->oid, &entryb->oid); } +static int cache_cmp(const void *a, const void *b) +{ + const struct git_pack_entry *ea = a; + const struct git_pack_entry *eb = b; + + return git_oid_cmp(&ea->sha1, &eb->sha1); +} + + int git_indexer_new(git_indexer **out, const char *packname) { git_indexer *idx; @@ -139,10 +148,14 @@ int git_indexer_new(git_indexer **out, const char *packname) idx->nr_objects = ntohl(idx->hdr.hdr_entries); - error = git_vector_init(&idx->objects, idx->nr_objects, objects_cmp); - if (error < GIT_SUCCESS) { + error = git_vector_init(&idx->pack->cache, idx->nr_objects, cache_cmp); + if (error < GIT_SUCCESS) + goto cleanup; + + idx->pack->has_cache = 1; + error = git_vector_init(&idx->objects, idx->nr_objects, objects_cmp); + if (error < GIT_SUCCESS) goto cleanup; - } *out = idx; @@ -250,6 +263,7 @@ int git_indexer_write(git_indexer *idx) /* Write out the packfile trailer */ packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->st.st_size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left); + git_mwindow_close(&w); if (packfile_hash == NULL) { error = git__rethrow(GIT_ENOMEM, "Failed to open window to packfile hash"); goto cleanup; @@ -276,6 +290,7 @@ int git_indexer_write(git_indexer *idx) error = git_filebuf_commit_at(&idx->file, filename); cleanup: + git_mwindow_free_all(&idx->pack->mwf); if (error < GIT_SUCCESS) git_filebuf_cleanup(&idx->file); @@ -303,6 +318,7 @@ int git_indexer_run(git_indexer *idx, git_indexer_stats *stats) while (processed < idx->nr_objects) { git_rawobj obj; git_oid oid; + struct git_pack_entry *pentry; git_mwindow *w = NULL; char hdr[512] = {0}; /* FIXME: How long should this be? */ int i, hdr_len; @@ -326,12 +342,24 @@ int git_indexer_run(git_indexer *idx, git_indexer_stats *stats) goto cleanup; } + /* FIXME: Parse the object instead of hashing it */ error = git_odb__hash_obj(&oid, hdr, sizeof(hdr), &hdr_len, &obj); if (error < GIT_SUCCESS) { error = git__rethrow(error, "Failed to hash object"); goto cleanup; } + pentry = git__malloc(sizeof(struct git_pack_entry)); + if (pentry == NULL) { + error = GIT_ENOMEM; + goto cleanup; + } + git_oid_cpy(&pentry->sha1, &oid); + pentry->offset = entry_start; + error = git_vector_insert(&idx->pack->cache, pentry); + if (error < GIT_SUCCESS) + goto cleanup; + git_oid_cpy(&entry->oid, &oid); entry->crc = crc32(0L, Z_NULL, 0); @@ -371,11 +399,15 @@ void git_indexer_free(git_indexer *idx) { unsigned int i; struct entry *e; + struct git_pack_entry *pe; p_close(idx->pack->mwf.fd); git_vector_foreach(&idx->objects, i, e) free(e); git_vector_free(&idx->objects); + git_vector_foreach(&idx->pack->cache, i, pe) + free(pe); + git_vector_free(&idx->pack->cache); free(idx->pack); free(idx); } diff --git a/src/pack.c b/src/pack.c index f0ebf9d37..4b43e7cf1 100644 --- a/src/pack.c +++ b/src/pack.c @@ -473,6 +473,18 @@ off_t get_delta_base( return 0; /* out of bound */ *curpos += used; } else if (type == GIT_OBJ_REF_DELTA) { + /* If we have the cooperative cache, search in it first */ + if (p->has_cache) { + int pos; + struct git_pack_entry key; + + git_oid_fromraw(&key.sha1, base_info); + pos = git_vector_bsearch(&p->cache, &key); + if (pos >= 0) { + *curpos += 20; + return ((struct git_pack_entry *)git_vector_get(&p->cache, pos))->offset; + } + } /* The base entry _must_ be in the same pack */ if (pack_entry_find_offset(&base_offset, &unused, p, (git_oid *)base_info, GIT_OID_HEXSZ) < GIT_SUCCESS) return git__rethrow(GIT_EPACKCORRUPTED, "Base entry delta is not in the same pack"); diff --git a/src/pack.h b/src/pack.h index a7112a6aa..164086fdf 100644 --- a/src/pack.h +++ b/src/pack.h @@ -77,8 +77,9 @@ struct git_pack_file { int index_version; git_time_t mtime; - unsigned pack_local:1, pack_keep:1; + unsigned pack_local:1, pack_keep:1, has_cache:1; git_oid sha1; + git_vector cache; /* something like ".git/objects/pack/xxxxx.pack" */ char pack_name[GIT_FLEX_ARRAY]; /* more */