libgit2/src/odb_pack.c
2011-08-08 16:56:28 -07:00

501 lines
15 KiB
C

/*
* This file is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2,
* as published by the Free Software Foundation.
*
* In addition to the permissions in the GNU General Public License,
* the authors give you unlimited permission to link the compiled
* version of this file into combinations with other programs,
* and to distribute those combinations without any restriction
* coming from the use of this file. (The General Public License
* restrictions do apply in other respects; for example, they cover
* modification of the file, and distribution when not linked into
* a combined executable.)
*
* This file is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#include "common.h"
#include "git2/zlib.h"
#include "git2/repository.h"
#include "git2/oid.h"
#include "fileops.h"
#include "hash.h"
#include "odb.h"
#include "delta-apply.h"
#include "sha1_lookup.h"
#include "mwindow.h"
#include "pack.h"
#include "git2/odb_backend.h"
struct pack_backend {
git_odb_backend parent;
git_vector packs;
struct git_pack_file *last_found;
char *pack_folder;
time_t pack_folder_mtime;
};
/**
* The wonderful tale of a Packed Object lookup query
* ===================================================
* A riveting and epic story of epicness and ASCII
* art, presented by yours truly,
* Sir Vicent of Marti
*
*
* Chapter 1: Once upon a time...
* Initialization of the Pack Backend
* --------------------------------------------------
*
* # git_odb_backend_pack
* | Creates the pack backend structure, initializes the
* | callback pointers to our default read() and exist() methods,
* | and tries to preload all the known packfiles in the ODB.
* |
* |-# packfile_load_all
* | Tries to find the `pack` folder, if it exists. ODBs without
* | a pack folder are ignored altogether. If there's a `pack` folder
* | we run a `dirent` callback through every file in the pack folder
* | to find our packfiles. The packfiles are then sorted according
* | to a sorting callback.
* |
* |-# packfile_load__cb
* | | This callback is called from `dirent` with every single file
* | | inside the pack folder. We find the packs by actually locating
* | | their index (ends in ".idx"). From that index, we verify that
* | | the corresponding packfile exists and is valid, and if so, we
* | | add it to the pack list.
* | |
* | |-# packfile_check
* | Make sure that there's a packfile to back this index, and store
* | some very basic information regarding the packfile itself,
* | such as the full path, the size, and the modification time.
* | We don't actually open the packfile to check for internal consistency.
* |
* |-# packfile_sort__cb
* Sort all the preloaded packs according to some specific criteria:
* we prioritize the "newer" packs because it's more likely they
* contain the objects we are looking for, and we prioritize local
* packs over remote ones.
*
*
*
* Chapter 2: To be, or not to be...
* A standard packed `exist` query for an OID
* --------------------------------------------------
*
* # pack_backend__exists
* | Check if the given SHA1 oid exists in any of the packs
* | that have been loaded for our ODB.
* |
* |-# pack_entry_find
* | Iterate through all the packs that have been preloaded
* | (starting by the pack where the latest object was found)
* | to try to find the OID in one of them.
* |
* |-# pack_entry_find1
* | Check the index of an individual pack to see if the SHA1
* | OID can be found. If we can find the offset to that SHA1
* | inside of the index, that means the object is contained
* | inside of the packfile and we can stop searching.
* | Before returning, we verify that the packfile behing the
* | index we are searching still exists on disk.
* |
* |-# pack_entry_find_offset
* | | Mmap the actual index file to disk if it hasn't been opened
* | | yet, and run a binary search through it to find the OID.
* | | See <http://book.git-scm.com/7_the_packfile.html> for specifics
* | | on the Packfile Index format and how do we find entries in it.
* | |
* | |-# pack_index_open
* | | Guess the name of the index based on the full path to the
* | | packfile, open it and verify its contents. Only if the index
* | | has not been opened already.
* | |
* | |-# pack_index_check
* | Mmap the index file and do a quick run through the header
* | to guess the index version (right now we support v1 and v2),
* | and to verify that the size of the index makes sense.
* |
* |-# packfile_open
* See `packfile_open` in Chapter 3
*
*
*
* Chapter 3: The neverending story...
* A standard packed `lookup` query for an OID
* --------------------------------------------------
* TODO
*
*/
/***********************************************************
*
* FORWARD DECLARATIONS
*
***********************************************************/
static void pack_window_free_all(struct pack_backend *backend, struct git_pack_file *p);
static int pack_window_contains(git_mwindow *win, off_t offset);
static int packfile_sort__cb(const void *a_, const void *b_);
static int packfile_load__cb(void *_data, char *path);
static int packfile_refresh_all(struct pack_backend *backend);
static int pack_entry_find(struct git_pack_entry *e,
struct pack_backend *backend, const git_oid *oid);
/* Can find the offset of an object given
* a prefix of an identifier.
* Throws GIT_EAMBIGUOUSOIDPREFIX if short oid
* is ambiguous.
* This method assumes that len is between
* GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
*/
static int pack_entry_find_prefix(struct git_pack_entry *e,
struct pack_backend *backend,
const git_oid *short_oid,
unsigned int len);
/***********************************************************
*
* PACK WINDOW MANAGEMENT
*
***********************************************************/
GIT_INLINE(void) pack_window_free_all(struct pack_backend *GIT_UNUSED(backend), struct git_pack_file *p)
{
GIT_UNUSED_ARG(backend);
git_mwindow_free_all(&p->mwf);
}
GIT_INLINE(int) pack_window_contains(git_mwindow *win, off_t offset)
{
/* We must promise at least 20 bytes (one hash) after the
* offset is available from this window, otherwise the offset
* is not actually in this window and a different window (which
* has that one hash excess) must be used. This is to support
* the object header and delta base parsing routines below.
*/
return git_mwindow_contains(win, offset + 20);
}
static int packfile_sort__cb(const void *a_, const void *b_)
{
const struct git_pack_file *a = a_;
const struct git_pack_file *b = b_;
int st;
/*
* Local packs tend to contain objects specific to our
* variant of the project than remote ones. In addition,
* remote ones could be on a network mounted filesystem.
* Favor local ones for these reasons.
*/
st = a->pack_local - b->pack_local;
if (st)
return -st;
/*
* Younger packs tend to contain more recent objects,
* and more recent objects tend to get accessed more
* often.
*/
if (a->mtime < b->mtime)
return 1;
else if (a->mtime == b->mtime)
return 0;
return -1;
}
static int packfile_load__cb(void *_data, char *path)
{
struct pack_backend *backend = (struct pack_backend *)_data;
struct git_pack_file *pack;
int error;
size_t i;
if (git__suffixcmp(path, ".idx") != 0)
return GIT_SUCCESS; /* not an index */
for (i = 0; i < backend->packs.length; ++i) {
struct git_pack_file *p = git_vector_get(&backend->packs, i);
if (memcmp(p->pack_name, path, strlen(path) - STRLEN(".idx")) == 0)
return GIT_SUCCESS;
}
error = git_packfile_check(&pack, path);
if (error < GIT_SUCCESS)
return git__rethrow(error, "Failed to load packfile");
if (git_vector_insert(&backend->packs, pack) < GIT_SUCCESS) {
free(pack);
return GIT_ENOMEM;
}
return GIT_SUCCESS;
}
static int packfile_refresh_all(struct pack_backend *backend)
{
int error;
struct stat st;
if (backend->pack_folder == NULL)
return GIT_SUCCESS;
if (p_stat(backend->pack_folder, &st) < 0 || !S_ISDIR(st.st_mode))
return git__throw(GIT_ENOTFOUND, "Failed to refresh packfiles. Backend not found");
if (st.st_mtime != backend->pack_folder_mtime) {
char path[GIT_PATH_MAX];
strcpy(path, backend->pack_folder);
/* reload all packs */
error = git_futils_direach(path, GIT_PATH_MAX, packfile_load__cb, (void *)backend);
if (error < GIT_SUCCESS)
return git__rethrow(error, "Failed to refresh packfiles");
git_vector_sort(&backend->packs);
backend->pack_folder_mtime = st.st_mtime;
}
return GIT_SUCCESS;
}
static int pack_entry_find(struct git_pack_entry *e, struct pack_backend *backend, const git_oid *oid)
{
int error;
size_t i;
if ((error = packfile_refresh_all(backend)) < GIT_SUCCESS)
return git__rethrow(error, "Failed to find pack entry");
if (backend->last_found &&
git_pack_entry_find(e, backend->last_found, oid, GIT_OID_HEXSZ) == GIT_SUCCESS)
return GIT_SUCCESS;
for (i = 0; i < backend->packs.length; ++i) {
struct git_pack_file *p;
p = git_vector_get(&backend->packs, i);
if (p == backend->last_found)
continue;
if (git_pack_entry_find(e, p, oid, GIT_OID_HEXSZ) == GIT_SUCCESS) {
backend->last_found = p;
return GIT_SUCCESS;
}
}
return git__throw(GIT_ENOTFOUND, "Failed to find pack entry");
}
static int pack_entry_find_prefix(
struct git_pack_entry *e,
struct pack_backend *backend,
const git_oid *short_oid,
unsigned int len)
{
int error;
size_t i;
unsigned found = 0;
if ((error = packfile_refresh_all(backend)) < GIT_SUCCESS)
return git__rethrow(error, "Failed to find pack entry");
if (backend->last_found) {
error = git_pack_entry_find(e, backend->last_found, short_oid, len);
if (error == GIT_EAMBIGUOUSOIDPREFIX) {
return git__rethrow(error, "Failed to find pack entry. Ambiguous sha1 prefix");
} else if (error == GIT_SUCCESS) {
found = 1;
}
}
for (i = 0; i < backend->packs.length; ++i) {
struct git_pack_file *p;
p = git_vector_get(&backend->packs, i);
if (p == backend->last_found)
continue;
error = git_pack_entry_find(e, p, short_oid, len);
if (error == GIT_EAMBIGUOUSOIDPREFIX) {
return git__rethrow(error, "Failed to find pack entry. Ambiguous sha1 prefix");
} else if (error == GIT_SUCCESS) {
found++;
if (found > 1)
break;
backend->last_found = p;
}
}
if (!found) {
return git__rethrow(GIT_ENOTFOUND, "Failed to find pack entry");
} else if (found > 1) {
return git__rethrow(GIT_EAMBIGUOUSOIDPREFIX, "Failed to find pack entry. Ambiguous sha1 prefix");
} else {
return GIT_SUCCESS;
}
}
/***********************************************************
*
* PACKED BACKEND PUBLIC API
*
* Implement the git_odb_backend API calls
*
***********************************************************/
/*
int pack_backend__read_header(git_rawobj *obj, git_odb_backend *backend, const git_oid *oid)
{
pack_location location;
assert(obj && backend && oid);
if (locate_packfile(&location, (struct pack_backend *)backend, oid) < 0)
return GIT_ENOTFOUND;
return read_header_packed(obj, &location);
}
*/
int pack_backend__read(void **buffer_p, size_t *len_p, git_otype *type_p, git_odb_backend *backend, const git_oid *oid)
{
struct git_pack_entry e;
git_rawobj raw;
int error;
if ((error = pack_entry_find(&e, (struct pack_backend *)backend, oid)) < GIT_SUCCESS)
return git__rethrow(error, "Failed to read pack backend");
if ((error = git_packfile_unpack(&raw, e.p, &e.offset)) < GIT_SUCCESS)
return git__rethrow(error, "Failed to read pack backend");
*buffer_p = raw.data;
*len_p = raw.len;
*type_p = raw.type;
return GIT_SUCCESS;
}
int pack_backend__read_prefix(
git_oid *out_oid,
void **buffer_p,
size_t *len_p,
git_otype *type_p,
git_odb_backend *backend,
const git_oid *short_oid,
unsigned int len)
{
if (len < GIT_OID_MINPREFIXLEN)
return git__throw(GIT_EAMBIGUOUSOIDPREFIX, "Failed to read pack backend. Prefix length is lower than %d.", GIT_OID_MINPREFIXLEN);
if (len >= GIT_OID_HEXSZ) {
/* We can fall back to regular read method */
int error = pack_backend__read(buffer_p, len_p, type_p, backend, short_oid);
if (error == GIT_SUCCESS)
git_oid_cpy(out_oid, short_oid);
return error;
} else {
struct git_pack_entry e;
git_rawobj raw;
int error;
if ((error = pack_entry_find_prefix(&e, (struct pack_backend *)backend, short_oid, len)) < GIT_SUCCESS)
return git__rethrow(error, "Failed to read pack backend");
if ((error = git_packfile_unpack(&raw, e.p, &e.offset)) < GIT_SUCCESS)
return git__rethrow(error, "Failed to read pack backend");
*buffer_p = raw.data;
*len_p = raw.len;
*type_p = raw.type;
git_oid_cpy(out_oid, &e.sha1);
}
return GIT_SUCCESS;
}
int pack_backend__exists(git_odb_backend *backend, const git_oid *oid)
{
struct git_pack_entry e;
return pack_entry_find(&e, (struct pack_backend *)backend, oid) == GIT_SUCCESS;
}
void pack_backend__free(git_odb_backend *_backend)
{
struct pack_backend *backend;
size_t i;
assert(_backend);
backend = (struct pack_backend *)_backend;
for (i = 0; i < backend->packs.length; ++i) {
struct git_pack_file *p = git_vector_get(&backend->packs, i);
packfile_free(p);
}
git_vector_free(&backend->packs);
free(backend->pack_folder);
free(backend);
}
int git_odb_backend_pack(git_odb_backend **backend_out, const char *objects_dir)
{
struct pack_backend *backend;
char path[GIT_PATH_MAX];
backend = git__calloc(1, sizeof(struct pack_backend));
if (backend == NULL)
return GIT_ENOMEM;
if (git_vector_init(&backend->packs, 8, packfile_sort__cb) < GIT_SUCCESS) {
free(backend);
return GIT_ENOMEM;
}
git_path_join(path, objects_dir, "pack");
if (git_futils_isdir(path) == GIT_SUCCESS) {
backend->pack_folder = git__strdup(path);
backend->pack_folder_mtime = 0;
if (backend->pack_folder == NULL) {
free(backend);
return GIT_ENOMEM;
}
}
backend->parent.read = &pack_backend__read;
backend->parent.read_prefix = &pack_backend__read_prefix;
backend->parent.read_header = NULL;
backend->parent.exists = &pack_backend__exists;
backend->parent.free = &pack_backend__free;
*backend_out = (git_odb_backend *)backend;
return GIT_SUCCESS;
}