libgit2/src/indexer.c
Russell Belfer 4aa7de1515 Convert indexer, notes, sha1_lookup, and signature
More files moved to new error handling style.
2012-03-19 17:49:46 -07:00

421 lines
9.2 KiB
C

/*
* Copyright (C) 2009-2012 the libgit2 contributors
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#include <zlib.h>
#include "git2/indexer.h"
#include "git2/object.h"
#include "git2/oid.h"
#include "common.h"
#include "pack.h"
#include "mwindow.h"
#include "posix.h"
#include "pack.h"
#include "filebuf.h"
#include "sha1.h"
#define UINT31_MAX (0x7FFFFFFF)
struct entry {
git_oid oid;
uint32_t crc;
uint32_t offset;
uint64_t offset_long;
};
struct git_indexer {
struct git_pack_file *pack;
struct stat st;
struct git_pack_header hdr;
size_t nr_objects;
git_vector objects;
git_filebuf file;
unsigned int fanout[256];
git_oid hash;
};
const git_oid *git_indexer_hash(git_indexer *idx)
{
return &idx->hash;
}
static int parse_header(git_indexer *idx)
{
int error;
/* Verify we recognize this pack file format. */
if ((error = p_read(idx->pack->mwf.fd, &idx->hdr, sizeof(idx->hdr))) < 0) {
giterr_set(GITERR_OS, "Failed to read in pack header");
return error;
}
if (idx->hdr.hdr_signature != ntohl(PACK_SIGNATURE)) {
giterr_set(GITERR_INVALID, "Wrong pack signature");
return -1;
}
if (!pack_version_ok(idx->hdr.hdr_version)) {
giterr_set(GITERR_INVALID, "Wrong pack version");
return -1;
}
return 0;
}
static int objects_cmp(const void *a, const void *b)
{
const struct entry *entrya = a;
const struct entry *entryb = b;
return git_oid_cmp(&entrya->oid, &entryb->oid);
}
static int cache_cmp(const void *a, const void *b)
{
const struct git_pack_entry *ea = a;
const struct git_pack_entry *eb = b;
return git_oid_cmp(&ea->sha1, &eb->sha1);
}
int git_indexer_new(git_indexer **out, const char *packname)
{
git_indexer *idx;
size_t namelen;
int ret, error;
assert(out && packname);
if (git_path_root(packname) < 0) {
giterr_set(GITERR_INVALID, "Path is not absolute");
return -1;
}
idx = git__calloc(1, sizeof(git_indexer));
GITERR_CHECK_ALLOC(idx);
namelen = strlen(packname);
idx->pack = git__calloc(1, sizeof(struct git_pack_file) + namelen + 1);
GITERR_CHECK_ALLOC(idx->pack);
memcpy(idx->pack->pack_name, packname, namelen + 1);
if ((ret = p_stat(packname, &idx->st)) < 0) {
if (errno == ENOENT) {
giterr_set(GITERR_OS, "Failed to stat packfile. File not found");
error = GIT_ENOTFOUND;
} else {
giterr_set(GITERR_OS, "Failed to stat packfile.");
error = -1;
}
goto cleanup;
}
if ((ret = p_open(idx->pack->pack_name, O_RDONLY)) < 0) {
giterr_set(GITERR_OS, "Failed to open packfile.");
error = -1;
goto cleanup;
}
idx->pack->mwf.fd = ret;
idx->pack->mwf.size = (git_off_t)idx->st.st_size;
if ((error = parse_header(idx)) < 0)
goto cleanup;
idx->nr_objects = ntohl(idx->hdr.hdr_entries);
/* for now, limit to 2^32 objects */
assert(idx->nr_objects == (size_t)((unsigned int)idx->nr_objects));
error = git_vector_init(&idx->pack->cache, (unsigned int)idx->nr_objects, cache_cmp);
if (error < 0)
goto cleanup;
idx->pack->has_cache = 1;
error = git_vector_init(&idx->objects, (unsigned int)idx->nr_objects, objects_cmp);
if (error < 0)
goto cleanup;
*out = idx;
return 0;
cleanup:
git_indexer_free(idx);
return error;
}
static int index_path(git_buf *path, git_indexer *idx)
{
const char prefix[] = "pack-", suffix[] = ".idx";
size_t slash = (size_t)path->size;
/* search backwards for '/' */
while (slash > 0 && path->ptr[slash - 1] != '/')
slash--;
if (git_buf_grow(path, slash + 1 + strlen(prefix) +
GIT_OID_HEXSZ + strlen(suffix) + 1) < 0)
return -1;
git_buf_truncate(path, slash);
git_buf_puts(path, prefix);
git_oid_fmt(path->ptr + path->size, &idx->hash);
path->size += GIT_OID_HEXSZ;
git_buf_puts(path, suffix);
return git_buf_oom(path) ? -1 : 0;
}
int git_indexer_write(git_indexer *idx)
{
git_mwindow *w = NULL;
int error;
unsigned int i, long_offsets = 0, left;
struct git_pack_idx_header hdr;
git_buf filename = GIT_BUF_INIT;
struct entry *entry;
void *packfile_hash;
git_oid file_hash;
SHA_CTX ctx;
git_vector_sort(&idx->objects);
git_buf_sets(&filename, idx->pack->pack_name);
git_buf_truncate(&filename, filename.size - strlen("pack"));
git_buf_puts(&filename, "idx");
if (git_buf_oom(&filename))
return -1;
error = git_filebuf_open(&idx->file, filename.ptr, GIT_FILEBUF_HASH_CONTENTS);
if (error < 0)
goto cleanup;
/* Write out the header */
hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
hdr.idx_version = htonl(2);
error = git_filebuf_write(&idx->file, &hdr, sizeof(hdr));
if (error < 0)
goto cleanup;
/* Write out the fanout table */
for (i = 0; i < 256; ++i) {
uint32_t n = htonl(idx->fanout[i]);
error = git_filebuf_write(&idx->file, &n, sizeof(n));
if (error < 0)
goto cleanup;
}
/* Write out the object names (SHA-1 hashes) */
SHA1_Init(&ctx);
git_vector_foreach(&idx->objects, i, entry) {
error = git_filebuf_write(&idx->file, &entry->oid, sizeof(git_oid));
SHA1_Update(&ctx, &entry->oid, GIT_OID_RAWSZ);
if (error < 0)
goto cleanup;
}
SHA1_Final(idx->hash.id, &ctx);
/* Write out the CRC32 values */
git_vector_foreach(&idx->objects, i, entry) {
error = git_filebuf_write(&idx->file, &entry->crc, sizeof(uint32_t));
if (error < 0)
goto cleanup;
}
/* Write out the offsets */
git_vector_foreach(&idx->objects, i, entry) {
uint32_t n;
if (entry->offset == UINT32_MAX)
n = htonl(0x80000000 | long_offsets++);
else
n = htonl(entry->offset);
error = git_filebuf_write(&idx->file, &n, sizeof(uint32_t));
if (error < 0)
goto cleanup;
}
/* Write out the long offsets */
git_vector_foreach(&idx->objects, i, entry) {
uint32_t split[2];
if (entry->offset != UINT32_MAX)
continue;
split[0] = htonl(entry->offset_long >> 32);
split[1] = htonl(entry->offset_long & 0xffffffff);
error = git_filebuf_write(&idx->file, &split, sizeof(uint32_t) * 2);
if (error < 0)
goto cleanup;
}
/* Write out the packfile trailer */
packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->st.st_size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
git_mwindow_close(&w);
if (packfile_hash == NULL) {
error = -1;
goto cleanup;
}
memcpy(&file_hash, packfile_hash, GIT_OID_RAWSZ);
git_mwindow_close(&w);
error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid));
if (error < 0)
goto cleanup;
/* Write out the index sha */
error = git_filebuf_hash(&file_hash, &idx->file);
if (error < 0)
goto cleanup;
error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid));
if (error < 0)
goto cleanup;
/* Figure out what the final name should be */
error = index_path(&filename, idx);
if (error < 0)
goto cleanup;
/* Commit file */
error = git_filebuf_commit_at(&idx->file, filename.ptr, GIT_PACK_FILE_MODE);
cleanup:
git_mwindow_free_all(&idx->pack->mwf);
if (error < 0)
git_filebuf_cleanup(&idx->file);
git_buf_free(&filename);
return error;
}
int git_indexer_run(git_indexer *idx, git_indexer_stats *stats)
{
git_mwindow_file *mwf;
git_off_t off = sizeof(struct git_pack_header);
int error;
struct entry *entry;
unsigned int left, processed;
assert(idx && stats);
mwf = &idx->pack->mwf;
error = git_mwindow_file_register(mwf);
if (error < 0)
return error;
stats->total = (unsigned int)idx->nr_objects;
stats->processed = processed = 0;
while (processed < idx->nr_objects) {
git_rawobj obj;
git_oid oid;
struct git_pack_entry *pentry;
git_mwindow *w = NULL;
int i;
git_off_t entry_start = off;
void *packed;
size_t entry_size;
entry = git__calloc(1, sizeof(*entry));
GITERR_CHECK_ALLOC(entry);
if (off > UINT31_MAX) {
entry->offset = UINT32_MAX;
entry->offset_long = off;
} else {
entry->offset = (uint32_t)off;
}
error = git_packfile_unpack(&obj, idx->pack, &off);
if (error < 0)
goto cleanup;
/* FIXME: Parse the object instead of hashing it */
error = git_odb__hashobj(&oid, &obj);
if (error < 0) {
giterr_set(GITERR_INVALID, "Failed to hash object");
goto cleanup;
}
pentry = git__malloc(sizeof(struct git_pack_entry));
if (pentry == NULL) {
error = -1;
goto cleanup;
}
git_oid_cpy(&pentry->sha1, &oid);
pentry->offset = entry_start;
error = git_vector_insert(&idx->pack->cache, pentry);
if (error < 0)
goto cleanup;
git_oid_cpy(&entry->oid, &oid);
entry->crc = crc32(0L, Z_NULL, 0);
entry_size = (size_t)(off - entry_start);
packed = git_mwindow_open(mwf, &w, entry_start, entry_size, &left);
if (packed == NULL) {
error = -1;
goto cleanup;
}
entry->crc = htonl(crc32(entry->crc, packed, (uInt)entry_size));
git_mwindow_close(&w);
/* Add the object to the list */
error = git_vector_insert(&idx->objects, entry);
if (error < 0)
goto cleanup;
for (i = oid.id[0]; i < 256; ++i) {
idx->fanout[i]++;
}
git__free(obj.data);
stats->processed = ++processed;
}
cleanup:
git_mwindow_free_all(mwf);
return error;
}
void git_indexer_free(git_indexer *idx)
{
unsigned int i;
struct entry *e;
struct git_pack_entry *pe;
if (idx == NULL)
return;
p_close(idx->pack->mwf.fd);
git_vector_foreach(&idx->objects, i, e)
git__free(e);
git_vector_free(&idx->objects);
git_vector_foreach(&idx->pack->cache, i, pe)
git__free(pe);
git_vector_free(&idx->pack->cache);
git__free(idx->pack);
git__free(idx);
}