From 7bfdb3d22bee39273e5861eeb4b77e4c8b5225b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Tue, 28 Jun 2011 20:39:30 +0200 Subject: [PATCH 01/12] Factor out the mmap window code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This code is useful for more things than just the packfile handling code. Factor it out so it can be reused. Signed-off-by: Carlos Martín Nieto --- src/mwindow.c | 271 +++++++++++++++++++++++++++++++++++++++++++++++++ src/mwindow.h | 64 ++++++++++++ src/odb_pack.c | 264 +++++++---------------------------------------- 3 files changed, 372 insertions(+), 227 deletions(-) create mode 100644 src/mwindow.c create mode 100644 src/mwindow.h diff --git a/src/mwindow.c b/src/mwindow.c new file mode 100644 index 000000000..3ac585720 --- /dev/null +++ b/src/mwindow.c @@ -0,0 +1,271 @@ +/* + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, + * as published by the Free Software Foundation. + * + * In addition to the permissions in the GNU General Public License, + * the authors give you unlimited permission to link the compiled + * version of this file into combinations with other programs, + * and to distribute those combinations without any restriction + * coming from the use of this file. (The General Public License + * restrictions do apply in other respects; for example, they cover + * modification of the file, and distribution when not linked into + * a combined executable.) + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#include "common.h" +#include "mwindow.h" +#include "vector.h" +#include "fileops.h" +#include "map.h" + +#define DEFAULT_WINDOW_SIZE \ + (sizeof(void*) >= 8 \ + ? 1 * 1024 * 1024 * 1024 \ + : 32 * 1024 * 1024) + +#define DEFAULT_MAPPED_LIMIT \ + ((1024L * 1024L) * (sizeof(void*) >= 8 ? 8192 : 256)) + +/* + * We need this because each process is only allowed a specific amount + * of memory. Making it writable should generate one instance per + * process, but we still need to set a couple of variables. + */ + +static git_mwindow_ctl ctl = { + .window_size = DEFAULT_WINDOW_SIZE, + .mapped_limit = DEFAULT_MAPPED_LIMIT +}; + +/* + * Free all the windows in a sequence, typically because we're done + * with the file + */ +void git_mwindow_free_all(git_mwindow_file *mwf) +{ + unsigned int i; + /* + * Remove these windows from the global list + */ + for (i = 0; i < ctl.windowfiles.length; ++i){ + if (git_vector_get(&ctl.windowfiles, i) == mwf) { + git_vector_remove(&ctl.windowfiles, i); + break; + } + } + + if (ctl.windowfiles.length == 0) { + git_vector_free(&ctl.windowfiles); + ctl.windowfiles.contents = NULL; + } + + while (mwf->windows) { + git_mwindow *w = mwf->windows; + assert(w->inuse_cnt == 0); + + ctl.mapped -= w->window_map.len; + ctl.open_windows--; + + git_futils_mmap_free(&w->window_map); + + mwf->windows = w->next; + free(w); + } +} + +/* + * Check if a window 'win' contains the address 'offset' + */ +int git_mwindow_contains(git_mwindow *win, off_t offset) +{ + off_t win_off = win->offset; + return win_off <= offset + && offset <= (off_t)(win_off + win->window_map.len); +} + +/* + * Find the least-recently-used window in a file + */ +void git_mwindow_scan_lru( + git_mwindow_file *mwf, + git_mwindow **lru_w, + git_mwindow **lru_l) +{ + git_mwindow *w, *w_l; + + for (w_l = NULL, w = mwf->windows; w; w = w->next) { + if (!w->inuse_cnt) { + /* + * If the current one is more recent than the last one, + * store it in the output parameter. If lru_w is NULL, + * it's the first loop, so store it as well. + */ + if (!*lru_w || w->last_used < (*lru_w)->last_used) { + *lru_w = w; + *lru_l = w_l; + } + } + w_l = w; + } +} + +/* + * Close the least recently used window. You should check to see if + * the file descriptors need closing from time to time. + */ +int git_mwindow_close_lru(git_mwindow_file *mwf) +{ + unsigned int i; + git_mwindow *lru_w = NULL, *lru_l = NULL; + + /* FIMXE: Does this give us any advantage? */ + if(mwf->windows) + git_mwindow_scan_lru(mwf, &lru_w, &lru_l); + + for (i = 0; i < ctl.windowfiles.length; ++i) { + git_mwindow_scan_lru(git_vector_get(&ctl.windowfiles, i), &lru_w, &lru_l); + } + + if (lru_w) { + git_mwindow_close(&lru_w); + ctl.mapped -= lru_w->window_map.len; + git_futils_mmap_free(&lru_w->window_map); + + if (lru_l) + lru_l->next = lru_w->next; + else + mwf->windows = lru_w->next; + + free(lru_w); + ctl.open_windows--; + + return GIT_SUCCESS; + } + + return git__throw(GIT_ERROR, "Failed to close memory window. Couln't find LRU"); +} + +static git_mwindow *new_window(git_mwindow_file *mwf, git_file fd, size_t size, off_t offset) +{ + size_t walign = ctl.window_size / 2; + size_t len; + git_mwindow *w; + + w = git__malloc(sizeof(*w)); + if (w == NULL) + return w; + + memset(w, 0x0, sizeof(*w)); + w->offset = (offset / walign) * walign; + + len = size - w->offset; + if (len > ctl.window_size) + len = ctl.window_size; + + ctl.mapped += len; + + while(ctl.mapped_limit < ctl.mapped && + git_mwindow_close_lru(mwf) == GIT_SUCCESS) {} + + /* FIXME: Shouldn't we error out if there's an error in closing lru? */ + + if (git_futils_mmap_ro(&w->window_map, fd, w->offset, len) < GIT_SUCCESS) + goto cleanup; + + ctl.mmap_calls++; + ctl.open_windows++; + + if (ctl.mapped > ctl.peak_mapped) + ctl.peak_mapped = ctl.mapped; + + if (ctl.open_windows > ctl.peak_open_windows) + ctl.peak_open_windows = ctl.open_windows; + + return w; + +cleanup: + free(w); + return NULL; +} + +/* + * Open a new window, closing the least recenty used until we have + * enough space. Don't forget to add it to your list + */ +unsigned char *git_mwindow_open(git_mwindow_file *mwf, git_mwindow **cursor, git_file fd, + size_t size, off_t offset, int extra, unsigned int *left) +{ + git_mwindow *w = *cursor; + + if (!w || !git_mwindow_contains(w, offset + extra)) { + if (w) { + w->inuse_cnt--; + } + + for (w = mwf->windows; w; w = w->next) { + if (git_mwindow_contains(w, offset + extra)) + break; + } + + /* + * If there isn't a suitable window, we need to create a new + * one. + */ + if (!w) { + w = new_window(mwf, fd, size, offset); + if (w == NULL) + return NULL; + w->next = mwf->windows; + mwf->windows = w; + } + } + + /* If we changed w, store it in the cursor */ + if (w != *cursor) { + w->last_used = ctl.used_ctr++; + w->inuse_cnt++; + *cursor = w; + } + + offset -= w->offset; + assert(git__is_sizet(offset)); + + if (left) + *left = w->window_map.len - offset; + + return (unsigned char *) w->window_map.data + offset; + + free(w); + return NULL; +} + +int git_mwindow_file_register(git_mwindow_file *mwf) +{ + int error; + + if (ctl.windowfiles.length == 0 && + (error = git_vector_init(&ctl.windowfiles, 8, NULL)) < GIT_SUCCESS) + return error; + + return git_vector_insert(&ctl.windowfiles, mwf); +} + +void git_mwindow_close(git_mwindow **window) +{ + git_mwindow *w = *window; + if (w) { + w->inuse_cnt--; + *window = NULL; + } +} diff --git a/src/mwindow.h b/src/mwindow.h new file mode 100644 index 000000000..971d1eee8 --- /dev/null +++ b/src/mwindow.h @@ -0,0 +1,64 @@ +/* + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, + * as published by the Free Software Foundation. + * + * In addition to the permissions in the GNU General Public License, + * the authors give you unlimited permission to link the compiled + * version of this file into combinations with other programs, + * and to distribute those combinations without any restriction + * coming from the use of this file. (The General Public License + * restrictions do apply in other respects; for example, they cover + * modification of the file, and distribution when not linked into + * a combined executable.) + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDE_mwindow__ +#define INCLUDE_mwindow__ + +#include "map.h" +#include "vector.h" +#include "fileops.h" + +typedef struct git_mwindow { + struct git_mwindow *next; + git_map window_map; + off_t offset; + unsigned int last_used; + unsigned int inuse_cnt; +} git_mwindow; + +typedef struct git_mwindow_file { + git_mwindow *windows; +} git_mwindow_file; + +typedef struct git_mwindow_ctl { + size_t mapped; + unsigned int open_windows; + size_t window_size; /* needs default value */ + size_t mapped_limit; /* needs default value */ + unsigned int mmap_calls; + unsigned int peak_open_windows; + size_t peak_mapped; + size_t used_ctr; + git_vector windowfiles; +} git_mwindow_ctl; + +int git_mwindow_contains(git_mwindow *win, off_t offset); +void git_mwindow_free_all(git_mwindow_file *mwf); +unsigned char *git_mwindow_open(git_mwindow_file *mwf, git_mwindow **cursor, git_file fd, size_t size, off_t offset, int extra, unsigned int *left); +void git_mwindow_scan_lru(git_mwindow_file *mwf, git_mwindow **lru_w, git_mwindow **lru_l); +int git_mwindow_file_register(git_mwindow_file *mwf); +void git_mwindow_close(git_mwindow **w_cursor); + +#endif diff --git a/src/odb_pack.c b/src/odb_pack.c index 67b983da0..6cc2c329b 100644 --- a/src/odb_pack.c +++ b/src/odb_pack.c @@ -32,17 +32,10 @@ #include "odb.h" #include "delta-apply.h" #include "sha1_lookup.h" +#include "mwindow.h" #include "git2/odb_backend.h" -#define DEFAULT_WINDOW_SIZE \ - (sizeof(void*) >= 8 \ - ? 1 * 1024 * 1024 * 1024 \ - : 32 * 1024 * 1024) - -#define DEFAULT_MAPPED_LIMIT \ - ((1024L * 1024L) * (sizeof(void*) >= 8 ? 8192 : 256)) - #define PACK_SIGNATURE 0x5041434b /* "PACK" */ #define PACK_VERSION 2 #define pack_version_ok(v) ((v) == htonl(2) || (v) == htonl(3)) @@ -76,18 +69,11 @@ struct pack_idx_header { uint32_t idx_version; }; -struct pack_window { - struct pack_window *next; - git_map window_map; - off_t offset; - unsigned int last_used; - unsigned int inuse_cnt; -}; - struct pack_file { - struct pack_window *windows; + int pack_fd; + git_mwindow_file mwf; + //git_mwindow *windows; off_t pack_size; - git_map index_map; uint32_t num_objects; @@ -96,7 +82,6 @@ struct pack_file { int index_version; git_time_t mtime; - int pack_fd; unsigned pack_local:1, pack_keep:1; git_oid sha1; @@ -116,19 +101,6 @@ struct pack_backend { struct pack_file *last_found; char *pack_folder; time_t pack_folder_mtime; - - size_t window_size; /* needs default value */ - - size_t mapped_limit; /* needs default value */ - size_t peak_mapped; - size_t mapped; - - size_t used_ctr; - - unsigned int peak_open_windows; - unsigned int open_windows; - - unsigned int mmap_calls; }; /** @@ -226,8 +198,6 @@ struct pack_backend { */ - - /*********************************************************** * * FORWARD DECLARATIONS @@ -235,19 +205,10 @@ struct pack_backend { ***********************************************************/ static void pack_window_free_all(struct pack_backend *backend, struct pack_file *p); -static int pack_window_contains(struct pack_window *win, off_t offset); +static int pack_window_contains(git_mwindow *win, off_t offset); -static void pack_window_scan_lru(struct pack_file *p, struct pack_file **lru_p, - struct pack_window **lru_w, struct pack_window **lru_l); - -static int pack_window_close_lru( struct pack_backend *backend, - struct pack_file *current, git_file keep_fd); - -static void pack_window_close(struct pack_window **w_cursor); - -static unsigned char *pack_window_open( struct pack_backend *backend, - struct pack_file *p, struct pack_window **w_cursor, off_t offset, - unsigned int *left); +static unsigned char *pack_window_open(struct pack_file *p, + git_mwindow **w_cursor, off_t offset, unsigned int *left); static int packfile_sort__cb(const void *a_, const void *b_); @@ -299,8 +260,7 @@ static int pack_entry_find_prefix(struct pack_entry *e, const git_oid *short_oid, unsigned int len); -static off_t get_delta_base(struct pack_backend *backend, - struct pack_file *p, struct pack_window **w_curs, +static off_t get_delta_base(struct pack_file *p, git_mwindow **w_curs, off_t *curpos, git_otype type, off_t delta_obj_offset); @@ -313,16 +273,14 @@ static unsigned long packfile_unpack_header1( static int packfile_unpack_header( size_t *size_p, git_otype *type_p, - struct pack_backend *backend, struct pack_file *p, - struct pack_window **w_curs, + git_mwindow **w_curs, off_t *curpos); static int packfile_unpack_compressed( git_rawobj *obj, - struct pack_backend *backend, struct pack_file *p, - struct pack_window **w_curs, + git_mwindow **w_curs, off_t curpos, size_t size, git_otype type); @@ -331,7 +289,7 @@ static int packfile_unpack_delta( git_rawobj *obj, struct pack_backend *backend, struct pack_file *p, - struct pack_window **w_curs, + git_mwindow **w_curs, off_t curpos, size_t delta_size, git_otype delta_type, @@ -350,23 +308,12 @@ static int packfile_unpack(git_rawobj *obj, struct pack_backend *backend, * ***********************************************************/ -void pack_window_free_all(struct pack_backend *backend, struct pack_file *p) +GIT_INLINE(void) pack_window_free_all(struct pack_backend *GIT_UNUSED(backend), struct pack_file *p) { - while (p->windows) { - struct pack_window *w = p->windows; - assert(w->inuse_cnt == 0); - - backend->mapped -= w->window_map.len; - backend->open_windows--; - - git_futils_mmap_free(&w->window_map); - - p->windows = w->next; - free(w); - } + git_mwindow_free_all(&p->mwf); } -GIT_INLINE(int) pack_window_contains(struct pack_window *win, off_t offset) +GIT_INLINE(int) pack_window_contains(git_mwindow *win, off_t offset) { /* We must promise at least 20 bytes (one hash) after the * offset is available from this window, otherwise the offset @@ -374,86 +321,15 @@ GIT_INLINE(int) pack_window_contains(struct pack_window *win, off_t offset) * has that one hash excess) must be used. This is to support * the object header and delta base parsing routines below. */ - off_t win_off = win->offset; - return win_off <= offset - && (offset + 20) <= (off_t)(win_off + win->window_map.len); -} - -static void pack_window_scan_lru( - struct pack_file *p, - struct pack_file **lru_p, - struct pack_window **lru_w, - struct pack_window **lru_l) -{ - struct pack_window *w, *w_l; - - for (w_l = NULL, w = p->windows; w; w = w->next) { - if (!w->inuse_cnt) { - if (!*lru_w || w->last_used < (*lru_w)->last_used) { - *lru_p = p; - *lru_w = w; - *lru_l = w_l; - } - } - w_l = w; - } -} - -static int pack_window_close_lru( - struct pack_backend *backend, - struct pack_file *current, - git_file keep_fd) -{ - struct pack_file *lru_p = NULL; - struct pack_window *lru_w = NULL, *lru_l = NULL; - size_t i; - - if (current) - pack_window_scan_lru(current, &lru_p, &lru_w, &lru_l); - - for (i = 0; i < backend->packs.length; ++i) - pack_window_scan_lru(git_vector_get(&backend->packs, i), &lru_p, &lru_w, &lru_l); - - if (lru_p) { - backend->mapped -= lru_w->window_map.len; - git_futils_mmap_free(&lru_w->window_map); - - if (lru_l) - lru_l->next = lru_w->next; - else { - lru_p->windows = lru_w->next; - if (!lru_p->windows && lru_p->pack_fd != keep_fd) { - p_close(lru_p->pack_fd); - lru_p->pack_fd = -1; - } - } - - free(lru_w); - backend->open_windows--; - return GIT_SUCCESS; - } - - return git__throw(GIT_ERROR, "Failed to close pack window"); -} - -static void pack_window_close(struct pack_window **w_cursor) -{ - struct pack_window *w = *w_cursor; - if (w) { - w->inuse_cnt--; - *w_cursor = NULL; - } + return git_mwindow_contains(win, offset + 20); } static unsigned char *pack_window_open( - struct pack_backend *backend, struct pack_file *p, - struct pack_window **w_cursor, + git_mwindow **w_cursor, off_t offset, unsigned int *left) { - struct pack_window *win = *w_cursor; - if (p->pack_fd == -1 && packfile_open(p) < GIT_SUCCESS) return NULL; @@ -465,73 +341,8 @@ static unsigned char *pack_window_open( if (offset > (p->pack_size - 20)) return NULL; - if (!win || !pack_window_contains(win, offset)) { - - if (win) - win->inuse_cnt--; - - for (win = p->windows; win; win = win->next) { - if (pack_window_contains(win, offset)) - break; - } - - if (!win) { - size_t window_align = backend->window_size / 2; - size_t len; - - win = git__calloc(1, sizeof(*win)); - if (win == NULL) - return NULL; - - win->offset = (offset / window_align) * window_align; - - len = (size_t)(p->pack_size - win->offset); - if (len > backend->window_size) - len = backend->window_size; - - backend->mapped += len; - - while (backend->mapped_limit < backend->mapped && - pack_window_close_lru(backend, p, p->pack_fd) == GIT_SUCCESS) {} - - if (git_futils_mmap_ro(&win->window_map, p->pack_fd, - win->offset, len) < GIT_SUCCESS) { - free(win); - return NULL; - } - - backend->mmap_calls++; - backend->open_windows++; - - if (backend->mapped > backend->peak_mapped) - backend->peak_mapped = backend->mapped; - - if (backend->open_windows > backend->peak_open_windows) - backend->peak_open_windows = backend->open_windows; - - win->next = p->windows; - p->windows = win; - } - } - - if (win != *w_cursor) { - win->last_used = backend->used_ctr++; - win->inuse_cnt++; - *w_cursor = win; - } - - offset -= win->offset; - assert(git__is_sizet(offset)); - - if (left) - *left = win->window_map.len - (size_t)offset; - - return (unsigned char *)win->window_map.data + offset; -} - - - - + return git_mwindow_open(&p->mwf, w_cursor, p->pack_fd, p->pack_size, offset, 20, left); + } @@ -766,6 +577,11 @@ static int packfile_open(struct pack_file *p) if (p->pack_fd < 0 || p_fstat(p->pack_fd, &st) < GIT_SUCCESS) return git__throw(GIT_EOSERR, "Failed to open packfile. File appears to be corrupted"); + if (git_mwindow_file_register(&p->mwf) < GIT_SUCCESS) { + p_close(p->pack_fd); + return git__throw(GIT_ERROR, "Failed to register packfile windows"); + } + /* If we created the struct before we had the pack we lack size. */ if (!p->pack_size) { if (!S_ISREG(st.st_mode)) @@ -1210,9 +1026,8 @@ static unsigned long packfile_unpack_header1( static int packfile_unpack_header( size_t *size_p, git_otype *type_p, - struct pack_backend *backend, struct pack_file *p, - struct pack_window **w_curs, + git_mwindow **w_curs, off_t *curpos) { unsigned char *base; @@ -1225,7 +1040,7 @@ static int packfile_unpack_header( * the maximum deflated object size is 2^137, which is just * insane, so we know won't exceed what we have been given. */ - base = pack_window_open(backend, p, w_curs, *curpos, &left); + base = pack_window_open(p, w_curs, *curpos, &left); if (base == NULL) return GIT_ENOMEM; @@ -1240,9 +1055,8 @@ static int packfile_unpack_header( static int packfile_unpack_compressed( git_rawobj *obj, - struct pack_backend *backend, struct pack_file *p, - struct pack_window **w_curs, + git_mwindow **w_curs, off_t curpos, size_t size, git_otype type) @@ -1265,7 +1079,7 @@ static int packfile_unpack_compressed( } do { - in = pack_window_open(backend, p, w_curs, curpos, &stream.avail_in); + in = pack_window_open(p, w_curs, curpos, &stream.avail_in); stream.next_in = in; st = inflate(&stream, Z_FINISH); @@ -1289,14 +1103,13 @@ static int packfile_unpack_compressed( } static off_t get_delta_base( - struct pack_backend *backend, struct pack_file *p, - struct pack_window **w_curs, + git_mwindow **w_curs, off_t *curpos, git_otype type, off_t delta_obj_offset) { - unsigned char *base_info = pack_window_open(backend, p, w_curs, *curpos, NULL); + unsigned char *base_info = pack_window_open(p, w_curs, *curpos, NULL); off_t base_offset; git_oid unused; @@ -1336,7 +1149,7 @@ static int packfile_unpack_delta( git_rawobj *obj, struct pack_backend *backend, struct pack_file *p, - struct pack_window **w_curs, + git_mwindow **w_curs, off_t curpos, size_t delta_size, git_otype delta_type, @@ -1346,11 +1159,11 @@ static int packfile_unpack_delta( git_rawobj base, delta; int error; - base_offset = get_delta_base(backend, p, w_curs, &curpos, delta_type, obj_offset); + base_offset = get_delta_base(p, w_curs, &curpos, delta_type, obj_offset); if (base_offset == 0) return git__throw(GIT_EOBJCORRUPTED, "Delta offset is zero"); - pack_window_close(w_curs); + git_mwindow_close(w_curs); error = packfile_unpack(&base, backend, p, base_offset); /* TODO: git.git tries to load the base from other packfiles @@ -1358,7 +1171,7 @@ static int packfile_unpack_delta( if (error < GIT_SUCCESS) return git__rethrow(error, "Corrupted delta"); - error = packfile_unpack_compressed(&delta, backend, p, w_curs, curpos, delta_size, delta_type); + error = packfile_unpack_compressed(&delta, p, w_curs, curpos, delta_size, delta_type); if (error < GIT_SUCCESS) { free(base.data); return git__rethrow(error, "Corrupted delta"); @@ -1383,7 +1196,7 @@ static int packfile_unpack( struct pack_file *p, off_t obj_offset) { - struct pack_window *w_curs = NULL; + git_mwindow *w_curs = NULL; off_t curpos = obj_offset; int error; @@ -1398,7 +1211,7 @@ static int packfile_unpack( obj->len = 0; obj->type = GIT_OBJ_BAD; - error = packfile_unpack_header(&size, &type, backend, p, &w_curs, &curpos); + error = packfile_unpack_header(&size, &type, p, &w_curs, &curpos); if (error < GIT_SUCCESS) return git__rethrow(error, "Failed to unpack packfile"); @@ -1415,7 +1228,7 @@ static int packfile_unpack( case GIT_OBJ_BLOB: case GIT_OBJ_TAG: error = packfile_unpack_compressed( - obj, backend, p, &w_curs, curpos, + obj, p, &w_curs, curpos, size, type); break; @@ -1424,7 +1237,7 @@ static int packfile_unpack( break; } - pack_window_close(&w_curs); + git_mwindow_close(&w_curs); return error == GIT_SUCCESS ? GIT_SUCCESS : git__rethrow(error, "Failed to unpack packfile"); } @@ -1551,9 +1364,6 @@ int git_odb_backend_pack(git_odb_backend **backend_out, const char *objects_dir) return GIT_ENOMEM; } - backend->window_size = DEFAULT_WINDOW_SIZE; - backend->mapped_limit = DEFAULT_MAPPED_LIMIT; - git_path_join(path, objects_dir, "pack"); if (git_futils_isdir(path) == GIT_SUCCESS) { backend->pack_folder = git__strdup(path); From c7c9e18388b6e11265c867766d4c001197852134 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Thu, 7 Jul 2011 10:17:40 +0200 Subject: [PATCH 02/12] Move the pack structs to an internal header --- src/odb_pack.c | 60 +------------------------------- src/pack.h | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 59 deletions(-) create mode 100644 src/pack.h diff --git a/src/odb_pack.c b/src/odb_pack.c index 6cc2c329b..80293359e 100644 --- a/src/odb_pack.c +++ b/src/odb_pack.c @@ -33,68 +33,10 @@ #include "delta-apply.h" #include "sha1_lookup.h" #include "mwindow.h" +#include "pack.h" #include "git2/odb_backend.h" -#define PACK_SIGNATURE 0x5041434b /* "PACK" */ -#define PACK_VERSION 2 -#define pack_version_ok(v) ((v) == htonl(2) || (v) == htonl(3)) -struct pack_header { - uint32_t hdr_signature; - uint32_t hdr_version; - uint32_t hdr_entries; -}; - -/* - * The first four bytes of index formats later than version 1 should - * start with this signature, as all older git binaries would find this - * value illegal and abort reading the file. - * - * This is the case because the number of objects in a packfile - * cannot exceed 1,431,660,000 as every object would need at least - * 3 bytes of data and the overall packfile cannot exceed 4 GiB with - * version 1 of the index file due to the offsets limited to 32 bits. - * Clearly the signature exceeds this maximum. - * - * Very old git binaries will also compare the first 4 bytes to the - * next 4 bytes in the index and abort with a "non-monotonic index" - * error if the second 4 byte word is smaller than the first 4 - * byte word. This would be true in the proposed future index - * format as idx_signature would be greater than idx_version. - */ -#define PACK_IDX_SIGNATURE 0xff744f63 /* "\377tOc" */ - -struct pack_idx_header { - uint32_t idx_signature; - uint32_t idx_version; -}; - -struct pack_file { - int pack_fd; - git_mwindow_file mwf; - //git_mwindow *windows; - off_t pack_size; - git_map index_map; - - uint32_t num_objects; - uint32_t num_bad_objects; - git_oid *bad_object_sha1; /* array of git_oid */ - - int index_version; - git_time_t mtime; - unsigned pack_local:1, pack_keep:1; - git_oid sha1; - - /* something like ".git/objects/pack/xxxxx.pack" */ - char pack_name[GIT_FLEX_ARRAY]; /* more */ -}; - -struct pack_entry { - off_t offset; - git_oid sha1; - struct pack_file *p; -}; - struct pack_backend { git_odb_backend parent; git_vector packs; diff --git a/src/pack.h b/src/pack.h new file mode 100644 index 000000000..a5525459e --- /dev/null +++ b/src/pack.h @@ -0,0 +1,94 @@ +/* + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, + * as published by the Free Software Foundation. + * + * In addition to the permissions in the GNU General Public License, + * the authors give you unlimited permission to link the compiled + * version of this file into combinations with other programs, + * and to distribute those combinations without any restriction + * coming from the use of this file. (The General Public License + * restrictions do apply in other respects; for example, they cover + * modification of the file, and distribution when not linked into + * a combined executable.) + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDE_pack_h__ +#define INCLUDE_pack_h__ + +#include "git2/oid.h" + +#include "common.h" +#include "map.h" +#include "mwindow.h" + +#define PACK_SIGNATURE 0x5041434b /* "PACK" */ +#define PACK_VERSION 2 +#define pack_version_ok(v) ((v) == htonl(2) || (v) == htonl(3)) +struct pack_header { + uint32_t hdr_signature; + uint32_t hdr_version; + uint32_t hdr_entries; +}; + +/* + * The first four bytes of index formats later than version 1 should + * start with this signature, as all older git binaries would find this + * value illegal and abort reading the file. + * + * This is the case because the number of objects in a packfile + * cannot exceed 1,431,660,000 as every object would need at least + * 3 bytes of data and the overall packfile cannot exceed 4 GiB with + * version 1 of the index file due to the offsets limited to 32 bits. + * Clearly the signature exceeds this maximum. + * + * Very old git binaries will also compare the first 4 bytes to the + * next 4 bytes in the index and abort with a "non-monotonic index" + * error if the second 4 byte word is smaller than the first 4 + * byte word. This would be true in the proposed future index + * format as idx_signature would be greater than idx_version. + */ + +#define PACK_IDX_SIGNATURE 0xff744f63 /* "\377tOc" */ + +struct pack_idx_header { + uint32_t idx_signature; + uint32_t idx_version; +}; + +struct pack_file { + int pack_fd; + git_mwindow_file mwf; + off_t pack_size; + git_map index_map; + + uint32_t num_objects; + uint32_t num_bad_objects; + git_oid *bad_object_sha1; /* array of git_oid */ + + int index_version; + git_time_t mtime; + unsigned pack_local:1, pack_keep:1; + git_oid sha1; + + /* something like ".git/objects/pack/xxxxx.pack" */ + char pack_name[GIT_FLEX_ARRAY]; /* more */ +}; + +struct pack_entry { + off_t offset; + git_oid sha1; + struct pack_file *p; +}; + +#endif From 3412391d4ccf6435b981c46e796cc6988a676fad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Thu, 7 Jul 2011 11:47:31 +0200 Subject: [PATCH 03/12] Intial indexer code --- include/git2.h | 1 + include/git2/indexer.h | 15 +++++ src/indexer.c | 133 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 149 insertions(+) create mode 100644 include/git2/indexer.h create mode 100644 src/indexer.c diff --git a/include/git2.h b/include/git2.h index 35e48b240..96de524e7 100644 --- a/include/git2.h +++ b/include/git2.h @@ -60,5 +60,6 @@ #include "git2/net.h" #include "git2/transport.h" #include "git2/status.h" +#include "git2/indexer.h" #endif diff --git a/include/git2/indexer.h b/include/git2/indexer.h new file mode 100644 index 000000000..be1752027 --- /dev/null +++ b/include/git2/indexer.h @@ -0,0 +1,15 @@ +#ifndef _INCLUDE_git_indexer_h__ +#define _INCLUDE_git_indexer_h__ + +typedef struct git_pack_indexer { + struct pack_file *pack; + git_vector objects; + git_vector deltas; + struct stat st; +} git_pack_indexer; + +GIT_EXTERN(int) git_pack_indexer_new(git_pack_indexer **out, const char *packname); +GIT_EXTERN(void) git_pack_indexer_free(git_pack_indexer *idx) + + +#endif diff --git a/src/indexer.c b/src/indexer.c new file mode 100644 index 000000000..b63efc088 --- /dev/null +++ b/src/indexer.c @@ -0,0 +1,133 @@ +/* + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, + * as published by the Free Software Foundation. + * + * In addition to the permissions in the GNU General Public License, + * the authors give you unlimited permission to link the compiled + * version of this file into combinations with other programs, + * and to distribute those combinations without any restriction + * coming from the use of this file. (The General Public License + * restrictions do apply in other respects; for example, they cover + * modification of the file, and distribution when not linked into + * a combined executable.) + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#include "common.h" +#include "pack.h" +#include "posix.h" + +static int parse_header(git_pack_indexer *idx) +{ + struct pack_header hdr; + int error; + + /* Verify we recognize this pack file format. */ + if ((error = p_read(idx->pack->pack_fd, &hdr, sizeof(hdr))) < GIT_SUCCESS) + goto cleanup; + + if (hdr.hdr_signature != htonl(PACK_SIGNATURE)) { + error = git__throw(GIT_EOBJCORRUPTED, "Wrong pack signature"); + goto cleanup; + } + + if (!pack_version_ok(hdr.hdr_version)) { + error = git__throw(GIT_EOBJCORRUPTED, "Wrong pack version"); + goto cleanup; + } + + /* + * FIXME: At this point we have no idea how many of the are + * deltas, so assume all objects are both until we get a better + * idea + */ + error = git_vector_init(&idx->objects, hdr.hdr_entries, NULL /* FIXME: probably need something */); + if (error < GIT_SUCCESS) + goto cleanup; + + error = git_vector_init(&idx->deltas, hdr.hdr_entries, NULL /* FIXME: probably need something */); + if (error < GIT_SUCCESS) + goto cleanup; + + return GIT_SUCCESS; + +cleanup: + git_vector_free(&idx->objects); + git_vector_free(&idx->deltas); + + return error; +} + +int git_pack_indexer_new(git_pack_indexer **out, const char *packname) +{ + struct git_pack_indexer *idx; + unsigned int namelen; + int ret, error; + + idx = git__malloc(sizeof(struct git_pack_indexer)); + if (idx == NULL) + return GIT_ENOMEM; + + memset(idx, 0x0, sizeof(*idx)); + + namelen = strlen(packname); + idx->pack = git__malloc(sizeof(struct pack_file) + namelen + 1); + if (idx->pack == NULL) + goto cleanup; + + memset(idx->pack, 0x0, sizeof(struct pack_file)); + memcpy(idx->pack->pack_name, packname, namelen); + + ret = p_stat(packname, &idx->st); + if (ret < 0) { + if (errno == ENOENT) + error = git__throw(GIT_ENOTFOUND, "Failed to stat packfile. File not found"); + else + error = git__throw(GIT_EOSERR, "Failed to stat packfile."); + + goto cleanup; + } + + ret = p_open(idx->pack->pack_name, O_RDONLY); + if (ret < 0) { + error = git__throw(GIT_EOSERR, "Failed to open packfile"); + goto cleanup; + } + + idx->pack->pack_fd = ret; + + error = parse_header(idx); + if (error < GIT_SUCCESS) { + error = git__rethrow(error, "Failed to parse packfile header"); + goto cleanup; + } + + *out = idx; + + return GIT_SUCCESS; + +cleanup: + free(idx->pack); + free(idx); + + return error; +} + +void git_pack_indexer_free(git_pack_indexer *idx) +{ + p_close(idx->pack->pack_fd); + git_vector_free(&idx->objects); + git_vector_free(&idx->deltas); + free(idx->pack); + free(idx); +} From f23c4a66bde5738a574416db4617ca749ca34f7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Thu, 7 Jul 2011 19:08:45 +0200 Subject: [PATCH 04/12] Start the runner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Carlos Martín Nieto --- include/git2/indexer.h | 18 +++++++++++------- src/indexer.c | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/include/git2/indexer.h b/include/git2/indexer.h index be1752027..34f25b97c 100644 --- a/include/git2/indexer.h +++ b/include/git2/indexer.h @@ -1,15 +1,19 @@ #ifndef _INCLUDE_git_indexer_h__ #define _INCLUDE_git_indexer_h__ -typedef struct git_pack_indexer { - struct pack_file *pack; - git_vector objects; - git_vector deltas; - struct stat st; -} git_pack_indexer; +#include "git2/common.h" + +typedef struct git_indexer_stats { + unsigned int total; + unsigned int parsed; +} git_indexer_stats; + + +typedef struct git_pack_indexer git_pack_indexer; GIT_EXTERN(int) git_pack_indexer_new(git_pack_indexer **out, const char *packname); -GIT_EXTERN(void) git_pack_indexer_free(git_pack_indexer *idx) +GIT_EXTERN(int) git_pack_indexer_run(git_pack_indexer *idx, int (*cb)(const git_indexer_stats *, void *), void *data); +GIT_EXTERN(void) git_pack_indexer_free(git_pack_indexer *idx); #endif diff --git a/src/indexer.c b/src/indexer.c index b63efc088..97f08dae1 100644 --- a/src/indexer.c +++ b/src/indexer.c @@ -23,10 +23,21 @@ * Boston, MA 02110-1301, USA. */ +#include "git2/indexer.h" + #include "common.h" #include "pack.h" +#include "mwindow.h" #include "posix.h" +typedef struct git_pack_indexer { + struct pack_file *pack; + git_vector objects; + git_vector deltas; + struct stat st; + git_indexer_stats stats; +} git_pack_indexer; + static int parse_header(git_pack_indexer *idx) { struct pack_header hdr; @@ -59,6 +70,8 @@ static int parse_header(git_pack_indexer *idx) if (error < GIT_SUCCESS) goto cleanup; + idx->stats.total = hdr.hdr_entries; + return GIT_SUCCESS; cleanup: @@ -123,6 +136,27 @@ cleanup: return error; } +/* + * Create the index. Every time something interesting happens + * (something has been parse or resolved), the callback gets called + * with some stats so it can tell the user how hard we're working + */ +int git_pack_indexer_run(git_pack_indexer *idx, int (*cb)(const git_indexer_stats *, void *), void *data) +{ + git_mwindow_file *mwf = &idx->pack->mwf; + int error; + + error = git_mwindow_file_register(mwf); + if (error < GIT_SUCCESS) + return git__rethrow(error, "Failed to register mwindow file"); + + /* notify early */ + if (cb) + cb(&idx->stats, data); + + return error; +} + void git_pack_indexer_free(git_pack_indexer *idx) { p_close(idx->pack->pack_fd); From ab525a7463492aa64c936b59165ab33f6264f1a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Thu, 7 Jul 2011 19:20:13 +0200 Subject: [PATCH 05/12] Rename stuff to git_indexer_ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Carlos Martín Nieto --- include/git2/indexer.h | 10 ++-- src/indexer.c | 107 +++++++++++++++++++++++++++++++++++++---- 2 files changed, 103 insertions(+), 14 deletions(-) diff --git a/include/git2/indexer.h b/include/git2/indexer.h index 34f25b97c..f32b1ef6b 100644 --- a/include/git2/indexer.h +++ b/include/git2/indexer.h @@ -5,15 +5,15 @@ typedef struct git_indexer_stats { unsigned int total; - unsigned int parsed; + unsigned int processed; } git_indexer_stats; -typedef struct git_pack_indexer git_pack_indexer; +typedef struct git_indexer git_indexer; -GIT_EXTERN(int) git_pack_indexer_new(git_pack_indexer **out, const char *packname); -GIT_EXTERN(int) git_pack_indexer_run(git_pack_indexer *idx, int (*cb)(const git_indexer_stats *, void *), void *data); -GIT_EXTERN(void) git_pack_indexer_free(git_pack_indexer *idx); +GIT_EXTERN(int) git_indexer_new(git_indexer **out, const char *packname); +GIT_EXTERN(int) git_indexer_run(git_indexer *idx, int (*cb)(const git_indexer_stats *, void *), void *data); +GIT_EXTERN(void) git_indexer_free(git_indexer *idx); #endif diff --git a/src/indexer.c b/src/indexer.c index 97f08dae1..241813724 100644 --- a/src/indexer.c +++ b/src/indexer.c @@ -24,21 +24,22 @@ */ #include "git2/indexer.h" +#include "git2/zlib.h" #include "common.h" #include "pack.h" #include "mwindow.h" #include "posix.h" -typedef struct git_pack_indexer { +typedef struct git_indexer { struct pack_file *pack; git_vector objects; git_vector deltas; struct stat st; git_indexer_stats stats; -} git_pack_indexer; +} git_indexer; -static int parse_header(git_pack_indexer *idx) +static int parse_header(git_indexer *idx) { struct pack_header hdr; int error; @@ -81,13 +82,13 @@ cleanup: return error; } -int git_pack_indexer_new(git_pack_indexer **out, const char *packname) +int git_indexer_new(git_indexer **out, const char *packname) { - struct git_pack_indexer *idx; + git_indexer *idx; unsigned int namelen; int ret, error; - idx = git__malloc(sizeof(struct git_pack_indexer)); + idx = git__malloc(sizeof(git_indexer)); if (idx == NULL) return GIT_ENOMEM; @@ -136,28 +137,115 @@ cleanup: return error; } +/* + * Parse the variable-width length and return it. Assumes that the + * whole number exists inside the buffer. As this is the git format, + * the first byte only contains length information in the lower nibble + * because the higher one is used for type and continuation. The + * output parameter is necessary because we don't know how long the + * entry is actually going to be. + */ +static unsigned long entry_len(const char **bufout, const char *buf) +{ + unsigned long size, c; + const char *p = buf; + unsigned shift; + + c = *p; + size = c & 0xf; + shift = 4; + + /* As long as the MSB is set, we need to continue */ + while (c & 0x80) { + p++; + c = *p; + size += (c & 0x7f) << shift; + shift += 7; + } + + *bufout = p; + return size; +} + +static git_otype entry_type(const char *buf) +{ + return (*buf >> 4) & 7; +} + /* * Create the index. Every time something interesting happens * (something has been parse or resolved), the callback gets called * with some stats so it can tell the user how hard we're working */ -int git_pack_indexer_run(git_pack_indexer *idx, int (*cb)(const git_indexer_stats *, void *), void *data) +int git_indexer_run(git_indexer *idx, int (*cb)(const git_indexer_stats *, void *), void *data) { git_mwindow_file *mwf = &idx->pack->mwf; + git_mwindow *w = NULL; + off_t off = 0; int error; + const char *ptr; + unsigned int fanout[256] = {0}; error = git_mwindow_file_register(mwf); if (error < GIT_SUCCESS) return git__rethrow(error, "Failed to register mwindow file"); - /* notify early */ + /* Notify before the first one */ if (cb) cb(&idx->stats, data); + while (idx->stats.processed < idx->stats.total) { + unsigned long size; + git_otype type; + + /* 4k is a bit magic for the moment */ + ptr = git_mwindow_open(mwf, &w, idx->pack->pack_fd, 4096, off, 0, NULL); + if (ptr == NULL) { + error = GIT_ENOMEM; + goto cleanup; + } + + /* + * The size is when expanded, so we need to inflate the object + * so we know where the next one ist. + */ + type = entry_type(ptr); + size = entry_len(&data, ptr); + + switch (type) { + case GIT_OBJ_COMMIT: + case GIT_OBJ_TREE: + case GIT_OBJ_BLOB: + case GIT_OBJ_TAG: + break; + default: + error = git__throw(GIT_EOBJCORRUPTED, "Invalid object type"); + goto cleanup; + } + + /* + * Do we need to uncompress everything if we're not running in + * strict mode? Or at least can't we free the data? + */ + + /* Get a window for the compressed data */ + //ptr = git_mwindow_open(mwf, &w, idx->pack->pack_fd, size, data - ptr, 0, NULL); + + idx->stats.processed++; + + if (cb) + cb(&idx->stats, data); + + } + +cleanup: + git_mwindow_free_all(mwf); + return error; + } -void git_pack_indexer_free(git_pack_indexer *idx) +void git_indexer_free(git_indexer *idx) { p_close(idx->pack->pack_fd); git_vector_free(&idx->objects); @@ -165,3 +253,4 @@ void git_pack_indexer_free(git_pack_indexer *idx) free(idx->pack); free(idx); } + From 7d0cdf82be73ea8c0dce07e5e0ed3c7fdcd4707e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Sat, 9 Jul 2011 02:25:01 +0200 Subject: [PATCH 06/12] Make packfile_unpack_header more generic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On the way, store the fd and the size in the mwindow file. Signed-off-by: Carlos Martín Nieto --- src/indexer.c | 29 ++-- src/mwindow.c | 6 +- src/mwindow.h | 4 +- src/odb_pack.c | 367 +++---------------------------------------------- src/pack.c | 300 ++++++++++++++++++++++++++++++++++++++++ src/pack.h | 28 +++- 6 files changed, 360 insertions(+), 374 deletions(-) create mode 100644 src/pack.c diff --git a/src/indexer.c b/src/indexer.c index 241813724..929eb3194 100644 --- a/src/indexer.c +++ b/src/indexer.c @@ -24,6 +24,7 @@ */ #include "git2/indexer.h" +#include "git2/object.h" #include "git2/zlib.h" #include "common.h" @@ -45,7 +46,7 @@ static int parse_header(git_indexer *idx) int error; /* Verify we recognize this pack file format. */ - if ((error = p_read(idx->pack->pack_fd, &hdr, sizeof(hdr))) < GIT_SUCCESS) + if ((error = p_read(idx->pack->mwf.fd, &hdr, sizeof(hdr))) < GIT_SUCCESS) goto cleanup; if (hdr.hdr_signature != htonl(PACK_SIGNATURE)) { @@ -118,7 +119,7 @@ int git_indexer_new(git_indexer **out, const char *packname) goto cleanup; } - idx->pack->pack_fd = ret; + idx->pack->mwf.fd = ret; error = parse_header(idx); if (error < GIT_SUCCESS) { @@ -177,7 +178,7 @@ static git_otype entry_type(const char *buf) * (something has been parse or resolved), the callback gets called * with some stats so it can tell the user how hard we're working */ -int git_indexer_run(git_indexer *idx, int (*cb)(const git_indexer_stats *, void *), void *data) +int git_indexer_run(git_indexer *idx, int (*cb)(const git_indexer_stats *, void *), void *cb_data) { git_mwindow_file *mwf = &idx->pack->mwf; git_mwindow *w = NULL; @@ -192,25 +193,13 @@ int git_indexer_run(git_indexer *idx, int (*cb)(const git_indexer_stats *, void /* Notify before the first one */ if (cb) - cb(&idx->stats, data); + cb(&idx->stats, cb_data); while (idx->stats.processed < idx->stats.total) { - unsigned long size; + size_t size; git_otype type; - /* 4k is a bit magic for the moment */ - ptr = git_mwindow_open(mwf, &w, idx->pack->pack_fd, 4096, off, 0, NULL); - if (ptr == NULL) { - error = GIT_ENOMEM; - goto cleanup; - } - - /* - * The size is when expanded, so we need to inflate the object - * so we know where the next one ist. - */ - type = entry_type(ptr); - size = entry_len(&data, ptr); + error = git_packfile_unpack_header(&size, &type, mwf, &w, &off); switch (type) { case GIT_OBJ_COMMIT: @@ -234,7 +223,7 @@ int git_indexer_run(git_indexer *idx, int (*cb)(const git_indexer_stats *, void idx->stats.processed++; if (cb) - cb(&idx->stats, data); + cb(&idx->stats, cb_data); } @@ -247,7 +236,7 @@ cleanup: void git_indexer_free(git_indexer *idx) { - p_close(idx->pack->pack_fd); + p_close(idx->pack->mwf.fd); git_vector_free(&idx->objects); git_vector_free(&idx->deltas); free(idx->pack); diff --git a/src/mwindow.c b/src/mwindow.c index 3ac585720..2f7fc7f7d 100644 --- a/src/mwindow.c +++ b/src/mwindow.c @@ -203,8 +203,8 @@ cleanup: * Open a new window, closing the least recenty used until we have * enough space. Don't forget to add it to your list */ -unsigned char *git_mwindow_open(git_mwindow_file *mwf, git_mwindow **cursor, git_file fd, - size_t size, off_t offset, int extra, unsigned int *left) +unsigned char *git_mwindow_open(git_mwindow_file *mwf, git_mwindow **cursor, + off_t offset, int extra, unsigned int *left) { git_mwindow *w = *cursor; @@ -223,7 +223,7 @@ unsigned char *git_mwindow_open(git_mwindow_file *mwf, git_mwindow **cursor, git * one. */ if (!w) { - w = new_window(mwf, fd, size, offset); + w = new_window(mwf, mwf->fd, mwf->size, offset); if (w == NULL) return NULL; w->next = mwf->windows; diff --git a/src/mwindow.h b/src/mwindow.h index 971d1eee8..6c29307a7 100644 --- a/src/mwindow.h +++ b/src/mwindow.h @@ -40,6 +40,8 @@ typedef struct git_mwindow { typedef struct git_mwindow_file { git_mwindow *windows; + int fd; + off_t size; } git_mwindow_file; typedef struct git_mwindow_ctl { @@ -56,7 +58,7 @@ typedef struct git_mwindow_ctl { int git_mwindow_contains(git_mwindow *win, off_t offset); void git_mwindow_free_all(git_mwindow_file *mwf); -unsigned char *git_mwindow_open(git_mwindow_file *mwf, git_mwindow **cursor, git_file fd, size_t size, off_t offset, int extra, unsigned int *left); +unsigned char *git_mwindow_open(git_mwindow_file *mwf, git_mwindow **cursor, off_t offset, int extra, unsigned int *left); void git_mwindow_scan_lru(git_mwindow_file *mwf, git_mwindow **lru_w, git_mwindow **lru_l); int git_mwindow_file_register(git_mwindow_file *mwf); void git_mwindow_close(git_mwindow **w_cursor); diff --git a/src/odb_pack.c b/src/odb_pack.c index 80293359e..9c92ea3c2 100644 --- a/src/odb_pack.c +++ b/src/odb_pack.c @@ -149,9 +149,6 @@ struct pack_backend { static void pack_window_free_all(struct pack_backend *backend, struct pack_file *p); static int pack_window_contains(git_mwindow *win, off_t offset); -static unsigned char *pack_window_open(struct pack_file *p, - git_mwindow **w_cursor, off_t offset, unsigned int *left); - static int packfile_sort__cb(const void *a_, const void *b_); static void pack_index_free(struct pack_file *p); @@ -202,46 +199,6 @@ static int pack_entry_find_prefix(struct pack_entry *e, const git_oid *short_oid, unsigned int len); -static off_t get_delta_base(struct pack_file *p, git_mwindow **w_curs, - off_t *curpos, git_otype type, - off_t delta_obj_offset); - -static unsigned long packfile_unpack_header1( - size_t *sizep, - git_otype *type, - const unsigned char *buf, - unsigned long len); - -static int packfile_unpack_header( - size_t *size_p, - git_otype *type_p, - struct pack_file *p, - git_mwindow **w_curs, - off_t *curpos); - -static int packfile_unpack_compressed( - git_rawobj *obj, - struct pack_file *p, - git_mwindow **w_curs, - off_t curpos, - size_t size, - git_otype type); - -static int packfile_unpack_delta( - git_rawobj *obj, - struct pack_backend *backend, - struct pack_file *p, - git_mwindow **w_curs, - off_t curpos, - size_t delta_size, - git_otype delta_type, - off_t obj_offset); - -static int packfile_unpack(git_rawobj *obj, struct pack_backend *backend, - struct pack_file *p, off_t obj_offset); - - - /*********************************************************** @@ -266,27 +223,6 @@ GIT_INLINE(int) pack_window_contains(git_mwindow *win, off_t offset) return git_mwindow_contains(win, offset + 20); } -static unsigned char *pack_window_open( - struct pack_file *p, - git_mwindow **w_cursor, - off_t offset, - unsigned int *left) -{ - if (p->pack_fd == -1 && packfile_open(p) < GIT_SUCCESS) - return NULL; - - /* Since packfiles end in a hash of their content and it's - * pointless to ask for an offset into the middle of that - * hash, and the pack_window_contains function above wouldn't match - * don't allow an offset too close to the end of the file. - */ - if (offset > (p->pack_size - 20)) - return NULL; - - return git_mwindow_open(&p->mwf, w_cursor, p->pack_fd, p->pack_size, offset, 20, left); - } - - /*********************************************************** * @@ -483,7 +419,7 @@ static struct pack_file *packfile_alloc(int extra) { struct pack_file *p = git__malloc(sizeof(*p) + extra); memset(p, 0, sizeof(*p)); - p->pack_fd = -1; + p->mwf.fd = -1; return p; } @@ -495,8 +431,8 @@ static void packfile_free(struct pack_backend *backend, struct pack_file *p) /* clear_delta_base_cache(); */ pack_window_free_all(backend, p); - if (p->pack_fd != -1) - p_close(p->pack_fd); + if (p->mwf.fd != -1) + p_close(p->mwf.fd); pack_index_free(p); @@ -515,28 +451,28 @@ static int packfile_open(struct pack_file *p) return git__throw(GIT_ENOTFOUND, "Failed to open packfile. File not found"); /* TODO: open with noatime */ - p->pack_fd = p_open(p->pack_name, O_RDONLY); - if (p->pack_fd < 0 || p_fstat(p->pack_fd, &st) < GIT_SUCCESS) + p->mwf.fd = p_open(p->pack_name, O_RDONLY); + if (p->mwf.fd < 0 || p_fstat(p->mwf.fd, &st) < GIT_SUCCESS) return git__throw(GIT_EOSERR, "Failed to open packfile. File appears to be corrupted"); if (git_mwindow_file_register(&p->mwf) < GIT_SUCCESS) { - p_close(p->pack_fd); + p_close(p->mwf.fd); return git__throw(GIT_ERROR, "Failed to register packfile windows"); } /* If we created the struct before we had the pack we lack size. */ - if (!p->pack_size) { + if (!p->mwf.size) { if (!S_ISREG(st.st_mode)) goto cleanup; - p->pack_size = (off_t)st.st_size; - } else if (p->pack_size != st.st_size) + p->mwf.size = (off_t)st.st_size; + } else if (p->mwf.size != st.st_size) goto cleanup; #if 0 /* We leave these file descriptors open with sliding mmap; * there is no point keeping them open across exec(), though. */ - fd_flag = fcntl(p->pack_fd, F_GETFD, 0); + fd_flag = fcntl(p->mwf.fd, F_GETFD, 0); if (fd_flag < 0) return error("cannot determine file descriptor flags"); @@ -546,7 +482,7 @@ static int packfile_open(struct pack_file *p) #endif /* Verify we recognize this pack file format. */ - if (p_read(p->pack_fd, &hdr, sizeof(hdr)) < GIT_SUCCESS) + if (p_read(p->mwf.fd, &hdr, sizeof(hdr)) < GIT_SUCCESS) goto cleanup; if (hdr.hdr_signature != htonl(PACK_SIGNATURE)) @@ -559,10 +495,10 @@ static int packfile_open(struct pack_file *p) if (p->num_objects != ntohl(hdr.hdr_entries)) goto cleanup; - if (p_lseek(p->pack_fd, p->pack_size - GIT_OID_RAWSZ, SEEK_SET) == -1) + if (p_lseek(p->mwf.fd, p->mwf.size - GIT_OID_RAWSZ, SEEK_SET) == -1) goto cleanup; - if (p_read(p->pack_fd, sha1.id, GIT_OID_RAWSZ) < GIT_SUCCESS) + if (p_read(p->mwf.fd, sha1.id, GIT_OID_RAWSZ) < GIT_SUCCESS) goto cleanup; idx_sha1 = ((unsigned char *)p->index_map.data) + p->index_map.len - 40; @@ -573,8 +509,8 @@ static int packfile_open(struct pack_file *p) return GIT_SUCCESS; cleanup: - p_close(p->pack_fd); - p->pack_fd = -1; + p_close(p->mwf.fd); + p->mwf.fd = -1; return git__throw(GIT_EPACKCORRUPTED, "Failed to packfile. Pack is corrupted"); } @@ -613,7 +549,7 @@ static int packfile_check(struct pack_file **pack_out, const char *path) /* ok, it looks sane as far as we can check without * actually mapping the pack file. */ - p->pack_size = (off_t)st.st_size; + p->mwf.size = (off_t)st.st_size; p->pack_local = 1; p->mtime = (git_time_t)st.st_mtime; @@ -833,7 +769,7 @@ static int pack_entry_find1( /* we found a unique entry in the index; * make sure the packfile backing the index * still exists on disk */ - if (p->pack_fd == -1 && packfile_open(p) < GIT_SUCCESS) + if (p->mwf.fd == -1 && packfile_open(p) < GIT_SUCCESS) return git__throw(GIT_EOSERR, "Failed to find pack entry. Packfile doesn't exist on disk"); e->offset = offset; @@ -922,271 +858,6 @@ static int pack_entry_find_prefix( } - - - - - - - - - - -/*********************************************************** - * - * PACKFILE ENTRY UNPACK INTERNALS - * - ***********************************************************/ - -static unsigned long packfile_unpack_header1( - size_t *sizep, - git_otype *type, - const unsigned char *buf, - unsigned long len) -{ - unsigned shift; - unsigned long size, c; - unsigned long used = 0; - - c = buf[used++]; - *type = (c >> 4) & 7; - size = c & 15; - shift = 4; - while (c & 0x80) { - if (len <= used || bitsizeof(long) <= shift) - return 0; - - c = buf[used++]; - size += (c & 0x7f) << shift; - shift += 7; - } - - *sizep = (size_t)size; - return used; -} - -static int packfile_unpack_header( - size_t *size_p, - git_otype *type_p, - struct pack_file *p, - git_mwindow **w_curs, - off_t *curpos) -{ - unsigned char *base; - unsigned int left; - unsigned long used; - - /* pack_window_open() assures us we have [base, base + 20) available - * as a range that we can look at at. (Its actually the hash - * size that is assured.) With our object header encoding - * the maximum deflated object size is 2^137, which is just - * insane, so we know won't exceed what we have been given. - */ - base = pack_window_open(p, w_curs, *curpos, &left); - if (base == NULL) - return GIT_ENOMEM; - - used = packfile_unpack_header1(size_p, type_p, base, left); - - if (used == 0) - return git__throw(GIT_EOBJCORRUPTED, "Header length is zero"); - - *curpos += used; - return GIT_SUCCESS; -} - -static int packfile_unpack_compressed( - git_rawobj *obj, - struct pack_file *p, - git_mwindow **w_curs, - off_t curpos, - size_t size, - git_otype type) -{ - int st; - z_stream stream; - unsigned char *buffer, *in; - - buffer = git__malloc(size + 1); - memset(buffer, 0x0, size + 1); - - memset(&stream, 0, sizeof(stream)); - stream.next_out = buffer; - stream.avail_out = size + 1; - - st = inflateInit(&stream); - if (st != Z_OK) { - free(buffer); - return git__throw(GIT_EZLIB, "Error in zlib"); - } - - do { - in = pack_window_open(p, w_curs, curpos, &stream.avail_in); - stream.next_in = in; - st = inflate(&stream, Z_FINISH); - - if (!stream.avail_out) - break; /* the payload is larger than it should be */ - - curpos += stream.next_in - in; - } while (st == Z_OK || st == Z_BUF_ERROR); - - inflateEnd(&stream); - - if ((st != Z_STREAM_END) || stream.total_out != size) { - free(buffer); - return git__throw(GIT_EZLIB, "Error in zlib"); - } - - obj->type = type; - obj->len = size; - obj->data = buffer; - return GIT_SUCCESS; -} - -static off_t get_delta_base( - struct pack_file *p, - git_mwindow **w_curs, - off_t *curpos, - git_otype type, - off_t delta_obj_offset) -{ - unsigned char *base_info = pack_window_open(p, w_curs, *curpos, NULL); - off_t base_offset; - git_oid unused; - - /* pack_window_open() assured us we have [base_info, base_info + 20) - * as a range that we can look at without walking off the - * end of the mapped window. Its actually the hash size - * that is assured. An OFS_DELTA longer than the hash size - * is stupid, as then a REF_DELTA would be smaller to store. - */ - if (type == GIT_OBJ_OFS_DELTA) { - unsigned used = 0; - unsigned char c = base_info[used++]; - base_offset = c & 127; - while (c & 128) { - base_offset += 1; - if (!base_offset || MSB(base_offset, 7)) - return 0; /* overflow */ - c = base_info[used++]; - base_offset = (base_offset << 7) + (c & 127); - } - base_offset = delta_obj_offset - base_offset; - if (base_offset <= 0 || base_offset >= delta_obj_offset) - return 0; /* out of bound */ - *curpos += used; - } else if (type == GIT_OBJ_REF_DELTA) { - /* The base entry _must_ be in the same pack */ - if (pack_entry_find_offset(&base_offset, &unused, p, (git_oid *)base_info, GIT_OID_HEXSZ) < GIT_SUCCESS) - return git__throw(GIT_EPACKCORRUPTED, "Base entry delta is not in the same pack"); - *curpos += 20; - } else - return 0; - - return base_offset; -} - -static int packfile_unpack_delta( - git_rawobj *obj, - struct pack_backend *backend, - struct pack_file *p, - git_mwindow **w_curs, - off_t curpos, - size_t delta_size, - git_otype delta_type, - off_t obj_offset) -{ - off_t base_offset; - git_rawobj base, delta; - int error; - - base_offset = get_delta_base(p, w_curs, &curpos, delta_type, obj_offset); - if (base_offset == 0) - return git__throw(GIT_EOBJCORRUPTED, "Delta offset is zero"); - - git_mwindow_close(w_curs); - error = packfile_unpack(&base, backend, p, base_offset); - - /* TODO: git.git tries to load the base from other packfiles - * or loose objects */ - if (error < GIT_SUCCESS) - return git__rethrow(error, "Corrupted delta"); - - error = packfile_unpack_compressed(&delta, p, w_curs, curpos, delta_size, delta_type); - if (error < GIT_SUCCESS) { - free(base.data); - return git__rethrow(error, "Corrupted delta"); - } - - obj->type = base.type; - error = git__delta_apply(obj, - base.data, base.len, - delta.data, delta.len); - - free(base.data); - free(delta.data); - - /* TODO: we might want to cache this shit. eventually */ - //add_delta_base_cache(p, base_offset, base, base_size, *type); - return error; /* error set by git__delta_apply */ -} - -static int packfile_unpack( - git_rawobj *obj, - struct pack_backend *backend, - struct pack_file *p, - off_t obj_offset) -{ - git_mwindow *w_curs = NULL; - off_t curpos = obj_offset; - int error; - - size_t size = 0; - git_otype type; - - /* - * TODO: optionally check the CRC on the packfile - */ - - obj->data = NULL; - obj->len = 0; - obj->type = GIT_OBJ_BAD; - - error = packfile_unpack_header(&size, &type, p, &w_curs, &curpos); - if (error < GIT_SUCCESS) - return git__rethrow(error, "Failed to unpack packfile"); - - switch (type) { - case GIT_OBJ_OFS_DELTA: - case GIT_OBJ_REF_DELTA: - error = packfile_unpack_delta( - obj, backend, p, &w_curs, curpos, - size, type, obj_offset); - break; - - case GIT_OBJ_COMMIT: - case GIT_OBJ_TREE: - case GIT_OBJ_BLOB: - case GIT_OBJ_TAG: - error = packfile_unpack_compressed( - obj, p, &w_curs, curpos, - size, type); - break; - - default: - error = GIT_EOBJCORRUPTED; - break; - } - - git_mwindow_close(&w_curs); - return error == GIT_SUCCESS ? GIT_SUCCESS : git__rethrow(error, "Failed to unpack packfile"); -} - - - - - /*********************************************************** * * PACKED BACKEND PUBLIC API @@ -1218,7 +889,7 @@ int pack_backend__read(void **buffer_p, size_t *len_p, git_otype *type_p, git_od if ((error = pack_entry_find(&e, (struct pack_backend *)backend, oid)) < GIT_SUCCESS) return git__rethrow(error, "Failed to read pack backend"); - if ((error = packfile_unpack(&raw, (struct pack_backend *)backend, e.p, e.offset)) < GIT_SUCCESS) + if ((error = packfile_unpack(&raw, e.p, e.offset)) < GIT_SUCCESS) return git__rethrow(error, "Failed to read pack backend"); *buffer_p = raw.data; @@ -1255,7 +926,7 @@ int pack_backend__read_prefix( if ((error = pack_entry_find_prefix(&e, (struct pack_backend *)backend, short_oid, len)) < GIT_SUCCESS) return git__rethrow(error, "Failed to read pack backend"); - if ((error = packfile_unpack(&raw, (struct pack_backend *)backend, e.p, e.offset)) < GIT_SUCCESS) + if ((error = packfile_unpack(&raw, e.p, e.offset)) < GIT_SUCCESS) return git__rethrow(error, "Failed to read pack backend"); *buffer_p = raw.data; diff --git a/src/pack.c b/src/pack.c new file mode 100644 index 000000000..71319a794 --- /dev/null +++ b/src/pack.c @@ -0,0 +1,300 @@ +/* + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, + * as published by the Free Software Foundation. + * + * In addition to the permissions in the GNU General Public License, + * the authors give you unlimited permission to link the compiled + * version of this file into combinations with other programs, + * and to distribute those combinations without any restriction + * coming from the use of this file. (The General Public License + * restrictions do apply in other respects; for example, they cover + * modification of the file, and distribution when not linked into + * a combined executable.) + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#include "mwindow.h" +#include "odb.h" +#include "pack.h" +#include "delta-apply.h" + +#include "git2/oid.h" +#include "git2/zlib.h" + +unsigned char *pack_window_open( + struct pack_file *p, + git_mwindow **w_cursor, + off_t offset, + unsigned int *left) +{ + if (p->mwf.fd == -1 && packfile_open(p) < GIT_SUCCESS) + return NULL; + + /* Since packfiles end in a hash of their content and it's + * pointless to ask for an offset into the middle of that + * hash, and the pack_window_contains function above wouldn't match + * don't allow an offset too close to the end of the file. + */ + if (offset > (p->mwf.size - 20)) + return NULL; + + return git_mwindow_open(&p->mwf, w_cursor, offset, 20, left); + } + +static unsigned long packfile_unpack_header1( + size_t *sizep, + git_otype *type, + const unsigned char *buf, + unsigned long len) +{ + unsigned shift; + unsigned long size, c; + unsigned long used = 0; + + c = buf[used++]; + *type = (c >> 4) & 7; + size = c & 15; + shift = 4; + while (c & 0x80) { + if (len <= used || bitsizeof(long) <= shift) + return 0; + + c = buf[used++]; + size += (c & 0x7f) << shift; + shift += 7; + } + + *sizep = (size_t)size; + return used; +} + +int git_packfile_unpack_header( + size_t *size_p, + git_otype *type_p, + git_mwindow_file *mwf, + git_mwindow **w_curs, + off_t *curpos) +{ + unsigned char *base; + unsigned int left; + unsigned long used; + + /* pack_window_open() assures us we have [base, base + 20) available + * as a range that we can look at at. (Its actually the hash + * size that is assured.) With our object header encoding + * the maximum deflated object size is 2^137, which is just + * insane, so we know won't exceed what we have been given. + */ +// base = pack_window_open(p, w_curs, *curpos, &left); + base = git_mwindow_open(mwf, w_curs, *curpos, 20, &left); + if (base == NULL) + return GIT_ENOMEM; + + used = packfile_unpack_header1(size_p, type_p, base, left); + + if (used == 0) + return git__throw(GIT_EOBJCORRUPTED, "Header length is zero"); + + *curpos += used; + return GIT_SUCCESS; +} + +int packfile_unpack_delta( + git_rawobj *obj, + struct pack_file *p, + git_mwindow **w_curs, + off_t curpos, + size_t delta_size, + git_otype delta_type, + off_t obj_offset) +{ + off_t base_offset; + git_rawobj base, delta; + int error; + + base_offset = get_delta_base(p, w_curs, &curpos, delta_type, obj_offset); + if (base_offset == 0) + return git__throw(GIT_EOBJCORRUPTED, "Delta offset is zero"); + + git_mwindow_close(w_curs); + error = packfile_unpack(&base, p, base_offset); + + /* + * TODO: git.git tries to load the base from other packfiles + * or loose objects. + * + * We'll need to do this in order to support thin packs. + */ + if (error < GIT_SUCCESS) + return git__rethrow(error, "Corrupted delta"); + + error = packfile_unpack_compressed(&delta, p, w_curs, curpos, delta_size, delta_type); + if (error < GIT_SUCCESS) { + free(base.data); + return git__rethrow(error, "Corrupted delta"); + } + + obj->type = base.type; + error = git__delta_apply(obj, + base.data, base.len, + delta.data, delta.len); + + free(base.data); + free(delta.data); + + /* TODO: we might want to cache this shit. eventually */ + //add_delta_base_cache(p, base_offset, base, base_size, *type); + return error; /* error set by git__delta_apply */ +} + +int packfile_unpack( + git_rawobj *obj, + struct pack_file *p, + off_t obj_offset) +{ + git_mwindow *w_curs = NULL; + off_t curpos = obj_offset; + int error; + + size_t size = 0; + git_otype type; + + /* + * TODO: optionally check the CRC on the packfile + */ + + obj->data = NULL; + obj->len = 0; + obj->type = GIT_OBJ_BAD; + + error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos); + if (error < GIT_SUCCESS) + return git__rethrow(error, "Failed to unpack packfile"); + + switch (type) { + case GIT_OBJ_OFS_DELTA: + case GIT_OBJ_REF_DELTA: + error = packfile_unpack_delta( + obj, p, &w_curs, curpos, + size, type, obj_offset); + break; + + case GIT_OBJ_COMMIT: + case GIT_OBJ_TREE: + case GIT_OBJ_BLOB: + case GIT_OBJ_TAG: + error = packfile_unpack_compressed( + obj, p, &w_curs, curpos, + size, type); + break; + + default: + error = GIT_EOBJCORRUPTED; + break; + } + + git_mwindow_close(&w_curs); + return error == GIT_SUCCESS ? GIT_SUCCESS : git__rethrow(error, "Failed to unpack packfile"); +} + +int packfile_unpack_compressed( + git_rawobj *obj, + struct pack_file *p, + git_mwindow **w_curs, + off_t curpos, + size_t size, + git_otype type) +{ + int st; + z_stream stream; + unsigned char *buffer, *in; + + buffer = git__malloc(size + 1); + memset(buffer, 0x0, size + 1); + + memset(&stream, 0, sizeof(stream)); + stream.next_out = buffer; + stream.avail_out = size + 1; + + st = inflateInit(&stream); + if (st != Z_OK) { + free(buffer); + return git__throw(GIT_EZLIB, "Error in zlib"); + } + + do { + in = pack_window_open(p, w_curs, curpos, &stream.avail_in); + stream.next_in = in; + st = inflate(&stream, Z_FINISH); + + if (!stream.avail_out) + break; /* the payload is larger than it should be */ + + curpos += stream.next_in - in; + } while (st == Z_OK || st == Z_BUF_ERROR); + + inflateEnd(&stream); + + if ((st != Z_STREAM_END) || stream.total_out != size) { + free(buffer); + return git__throw(GIT_EZLIB, "Error in zlib"); + } + + obj->type = type; + obj->len = size; + obj->data = buffer; + return GIT_SUCCESS; +} + +off_t get_delta_base( + struct pack_file *p, + git_mwindow **w_curs, + off_t *curpos, + git_otype type, + off_t delta_obj_offset) +{ + unsigned char *base_info = pack_window_open(p, w_curs, *curpos, NULL); + off_t base_offset; + git_oid unused; + + /* pack_window_open() assured us we have [base_info, base_info + 20) + * as a range that we can look at without walking off the + * end of the mapped window. Its actually the hash size + * that is assured. An OFS_DELTA longer than the hash size + * is stupid, as then a REF_DELTA would be smaller to store. + */ + if (type == GIT_OBJ_OFS_DELTA) { + unsigned used = 0; + unsigned char c = base_info[used++]; + base_offset = c & 127; + while (c & 128) { + base_offset += 1; + if (!base_offset || MSB(base_offset, 7)) + return 0; /* overflow */ + c = base_info[used++]; + base_offset = (base_offset << 7) + (c & 127); + } + base_offset = delta_obj_offset - base_offset; + if (base_offset <= 0 || base_offset >= delta_obj_offset) + return 0; /* out of bound */ + *curpos += used; + } else if (type == GIT_OBJ_REF_DELTA) { + /* The base entry _must_ be in the same pack */ + if (pack_entry_find_offset(&base_offset, &unused, p, (git_oid *)base_info, GIT_OID_HEXSZ) < GIT_SUCCESS) + return git__throw(GIT_EPACKCORRUPTED, "Base entry delta is not in the same pack"); + *curpos += 20; + } else + return 0; + + return base_offset; +} diff --git a/src/pack.h b/src/pack.h index a5525459e..732f88b4a 100644 --- a/src/pack.h +++ b/src/pack.h @@ -31,6 +31,7 @@ #include "common.h" #include "map.h" #include "mwindow.h" +#include "odb.h" #define PACK_SIGNATURE 0x5041434b /* "PACK" */ #define PACK_VERSION 2 @@ -67,9 +68,7 @@ struct pack_idx_header { }; struct pack_file { - int pack_fd; git_mwindow_file mwf; - off_t pack_size; git_map index_map; uint32_t num_objects; @@ -91,4 +90,29 @@ struct pack_entry { struct pack_file *p; }; +static unsigned char *pack_window_open(struct pack_file *p, + git_mwindow **w_cursor, off_t offset, unsigned int *left); + +int git_packfile_unpack_header( + size_t *size_p, + git_otype *type_p, + git_mwindow_file *mwf, + git_mwindow **w_curs, + off_t *curpos); + +int packfile_unpack_delta( + git_rawobj *obj, + struct pack_file *p, + git_mwindow **w_curs, + off_t curpos, + size_t delta_size, + git_otype delta_type, + off_t obj_offset); + +int packfile_unpack(git_rawobj *obj, struct pack_file *p, off_t obj_offset); + +off_t get_delta_base(struct pack_file *p, git_mwindow **w_curs, + off_t *curpos, git_otype type, + off_t delta_obj_offset); + #endif From a070f152bdeaeef61e7b0624231fe1d946b0043e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Fri, 29 Jul 2011 01:08:02 +0200 Subject: [PATCH 07/12] Move pack functions to their own file --- src/indexer.c | 8 +- src/odb_pack.c | 550 +++---------------------------------------------- src/pack.c | 499 +++++++++++++++++++++++++++++++++++++++++++- src/pack.h | 34 ++- 4 files changed, 534 insertions(+), 557 deletions(-) diff --git a/src/indexer.c b/src/indexer.c index 929eb3194..19409598a 100644 --- a/src/indexer.c +++ b/src/indexer.c @@ -33,7 +33,7 @@ #include "posix.h" typedef struct git_indexer { - struct pack_file *pack; + struct git_pack_file *pack; git_vector objects; git_vector deltas; struct stat st; @@ -42,7 +42,7 @@ typedef struct git_indexer { static int parse_header(git_indexer *idx) { - struct pack_header hdr; + struct git_pack_header hdr; int error; /* Verify we recognize this pack file format. */ @@ -96,11 +96,11 @@ int git_indexer_new(git_indexer **out, const char *packname) memset(idx, 0x0, sizeof(*idx)); namelen = strlen(packname); - idx->pack = git__malloc(sizeof(struct pack_file) + namelen + 1); + idx->pack = git__malloc(sizeof(struct git_pack_file) + namelen + 1); if (idx->pack == NULL) goto cleanup; - memset(idx->pack, 0x0, sizeof(struct pack_file)); + memset(idx->pack, 0x0, sizeof(struct git_pack_file)); memcpy(idx->pack->pack_name, packname, namelen); ret = p_stat(packname, &idx->st); diff --git a/src/odb_pack.c b/src/odb_pack.c index 9c92ea3c2..a661c1c41 100644 --- a/src/odb_pack.c +++ b/src/odb_pack.c @@ -40,7 +40,7 @@ struct pack_backend { git_odb_backend parent; git_vector packs; - struct pack_file *last_found; + struct git_pack_file *last_found; char *pack_folder; time_t pack_folder_mtime; }; @@ -146,45 +146,15 @@ struct pack_backend { * ***********************************************************/ -static void pack_window_free_all(struct pack_backend *backend, struct pack_file *p); +static void pack_window_free_all(struct pack_backend *backend, struct git_pack_file *p); static int pack_window_contains(git_mwindow *win, off_t offset); static int packfile_sort__cb(const void *a_, const void *b_); -static void pack_index_free(struct pack_file *p); - -static int pack_index_check(const char *path, struct pack_file *p); -static int pack_index_open(struct pack_file *p); - -static struct pack_file *packfile_alloc(int extra); -static int packfile_open(struct pack_file *p); -static int packfile_check(struct pack_file **pack_out, const char *path); static int packfile_load__cb(void *_data, char *path); static int packfile_refresh_all(struct pack_backend *backend); -static off_t nth_packed_object_offset(const struct pack_file *p, uint32_t n); - -/* Can find the offset of an object given - * a prefix of an identifier. - * Throws GIT_EAMBIGUOUSOIDPREFIX if short oid - * is ambiguous within the pack. - * This method assumes that len is between - * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ. - */ -static int pack_entry_find_offset( - off_t *offset_out, - git_oid *found_oid, - struct pack_file *p, - const git_oid *short_oid, - unsigned int len); - -static int pack_entry_find1( - struct pack_entry *e, - struct pack_file *p, - const git_oid *short_oid, - unsigned int len); - -static int pack_entry_find(struct pack_entry *e, +static int pack_entry_find(struct git_pack_entry *e, struct pack_backend *backend, const git_oid *oid); /* Can find the offset of an object given @@ -194,7 +164,7 @@ static int pack_entry_find(struct pack_entry *e, * This method assumes that len is between * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ. */ -static int pack_entry_find_prefix(struct pack_entry *e, +static int pack_entry_find_prefix(struct git_pack_entry *e, struct pack_backend *backend, const git_oid *short_oid, unsigned int len); @@ -207,7 +177,7 @@ static int pack_entry_find_prefix(struct pack_entry *e, * ***********************************************************/ -GIT_INLINE(void) pack_window_free_all(struct pack_backend *GIT_UNUSED(backend), struct pack_file *p) +GIT_INLINE(void) pack_window_free_all(struct pack_backend *GIT_UNUSED(backend), struct git_pack_file *p) { git_mwindow_free_all(&p->mwf); } @@ -223,173 +193,10 @@ GIT_INLINE(int) pack_window_contains(git_mwindow *win, off_t offset) return git_mwindow_contains(win, offset + 20); } - -/*********************************************************** - * - * PACK INDEX METHODS - * - ***********************************************************/ - -static void pack_index_free(struct pack_file *p) -{ - if (p->index_map.data) { - git_futils_mmap_free(&p->index_map); - p->index_map.data = NULL; - } -} - -static int pack_index_check(const char *path, struct pack_file *p) -{ - struct pack_idx_header *hdr; - uint32_t version, nr, i, *index; - - void *idx_map; - size_t idx_size; - - struct stat st; - - /* TODO: properly open the file without access time */ - git_file fd = p_open(path, O_RDONLY /*| O_NOATIME */); - - int error; - - if (fd < 0) - return git__throw(GIT_EOSERR, "Failed to check index. File missing or corrupted"); - - if (p_fstat(fd, &st) < GIT_SUCCESS) { - p_close(fd); - return git__throw(GIT_EOSERR, "Failed to check index. File appears to be corrupted"); - } - - if (!git__is_sizet(st.st_size)) - return GIT_ENOMEM; - - idx_size = (size_t)st.st_size; - - if (idx_size < 4 * 256 + 20 + 20) { - p_close(fd); - return git__throw(GIT_EOBJCORRUPTED, "Failed to check index. Object is corrupted"); - } - - error = git_futils_mmap_ro(&p->index_map, fd, 0, idx_size); - p_close(fd); - - if (error < GIT_SUCCESS) - return git__rethrow(error, "Failed to check index"); - - hdr = idx_map = p->index_map.data; - - if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) { - version = ntohl(hdr->idx_version); - - if (version < 2 || version > 2) { - git_futils_mmap_free(&p->index_map); - return git__throw(GIT_EOBJCORRUPTED, "Failed to check index. Unsupported index version"); - } - - } else - version = 1; - - nr = 0; - index = idx_map; - - if (version > 1) - index += 2; /* skip index header */ - - for (i = 0; i < 256; i++) { - uint32_t n = ntohl(index[i]); - if (n < nr) { - git_futils_mmap_free(&p->index_map); - return git__throw(GIT_EOBJCORRUPTED, "Failed to check index. Index is non-monotonic"); - } - nr = n; - } - - if (version == 1) { - /* - * Total size: - * - 256 index entries 4 bytes each - * - 24-byte entries * nr (20-byte sha1 + 4-byte offset) - * - 20-byte SHA1 of the packfile - * - 20-byte SHA1 file checksum - */ - if (idx_size != 4*256 + nr * 24 + 20 + 20) { - git_futils_mmap_free(&p->index_map); - return git__throw(GIT_EOBJCORRUPTED, "Failed to check index. Object is corrupted"); - } - } else if (version == 2) { - /* - * Minimum size: - * - 8 bytes of header - * - 256 index entries 4 bytes each - * - 20-byte sha1 entry * nr - * - 4-byte crc entry * nr - * - 4-byte offset entry * nr - * - 20-byte SHA1 of the packfile - * - 20-byte SHA1 file checksum - * And after the 4-byte offset table might be a - * variable sized table containing 8-byte entries - * for offsets larger than 2^31. - */ - unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20; - unsigned long max_size = min_size; - - if (nr) - max_size += (nr - 1)*8; - - if (idx_size < min_size || idx_size > max_size) { - git_futils_mmap_free(&p->index_map); - return git__throw(GIT_EOBJCORRUPTED, "Failed to check index. Wrong index size"); - } - - /* Make sure that off_t is big enough to access the whole pack... - * Is this an issue in libgit2? It shouldn't. */ - if (idx_size != min_size && (sizeof(off_t) <= 4)) { - git_futils_mmap_free(&p->index_map); - return git__throw(GIT_EOSERR, "Failed to check index. off_t not big enough to access the whole pack"); - } - } - - p->index_version = version; - p->num_objects = nr; - return GIT_SUCCESS; -} - -static int pack_index_open(struct pack_file *p) -{ - char *idx_name; - int error; - - if (p->index_map.data) - return GIT_SUCCESS; - - idx_name = git__strdup(p->pack_name); - strcpy(idx_name + strlen(idx_name) - STRLEN(".pack"), ".idx"); - - error = pack_index_check(idx_name, p); - free(idx_name); - - return error == GIT_SUCCESS ? GIT_SUCCESS : git__rethrow(error, "Failed to open index"); -} - - - - - - - - - -/*********************************************************** - * - * PACKFILE METHODS - * - ***********************************************************/ - static int packfile_sort__cb(const void *a_, const void *b_) { - const struct pack_file *a = a_; - const struct pack_file *b = b_; + const struct git_pack_file *a = a_; + const struct git_pack_file *b = b_; int st; /* @@ -415,157 +222,12 @@ static int packfile_sort__cb(const void *a_, const void *b_) return -1; } -static struct pack_file *packfile_alloc(int extra) -{ - struct pack_file *p = git__malloc(sizeof(*p) + extra); - memset(p, 0, sizeof(*p)); - p->mwf.fd = -1; - return p; -} -static void packfile_free(struct pack_backend *backend, struct pack_file *p) -{ - assert(p); - - /* clear_delta_base_cache(); */ - pack_window_free_all(backend, p); - - if (p->mwf.fd != -1) - p_close(p->mwf.fd); - - pack_index_free(p); - - free(p->bad_object_sha1); - free(p); -} - -static int packfile_open(struct pack_file *p) -{ - struct stat st; - struct pack_header hdr; - git_oid sha1; - unsigned char *idx_sha1; - - if (!p->index_map.data && pack_index_open(p) < GIT_SUCCESS) - return git__throw(GIT_ENOTFOUND, "Failed to open packfile. File not found"); - - /* TODO: open with noatime */ - p->mwf.fd = p_open(p->pack_name, O_RDONLY); - if (p->mwf.fd < 0 || p_fstat(p->mwf.fd, &st) < GIT_SUCCESS) - return git__throw(GIT_EOSERR, "Failed to open packfile. File appears to be corrupted"); - - if (git_mwindow_file_register(&p->mwf) < GIT_SUCCESS) { - p_close(p->mwf.fd); - return git__throw(GIT_ERROR, "Failed to register packfile windows"); - } - - /* If we created the struct before we had the pack we lack size. */ - if (!p->mwf.size) { - if (!S_ISREG(st.st_mode)) - goto cleanup; - p->mwf.size = (off_t)st.st_size; - } else if (p->mwf.size != st.st_size) - goto cleanup; - -#if 0 - /* We leave these file descriptors open with sliding mmap; - * there is no point keeping them open across exec(), though. - */ - fd_flag = fcntl(p->mwf.fd, F_GETFD, 0); - if (fd_flag < 0) - return error("cannot determine file descriptor flags"); - - fd_flag |= FD_CLOEXEC; - if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1) - return GIT_EOSERR; -#endif - - /* Verify we recognize this pack file format. */ - if (p_read(p->mwf.fd, &hdr, sizeof(hdr)) < GIT_SUCCESS) - goto cleanup; - - if (hdr.hdr_signature != htonl(PACK_SIGNATURE)) - goto cleanup; - - if (!pack_version_ok(hdr.hdr_version)) - goto cleanup; - - /* Verify the pack matches its index. */ - if (p->num_objects != ntohl(hdr.hdr_entries)) - goto cleanup; - - if (p_lseek(p->mwf.fd, p->mwf.size - GIT_OID_RAWSZ, SEEK_SET) == -1) - goto cleanup; - - if (p_read(p->mwf.fd, sha1.id, GIT_OID_RAWSZ) < GIT_SUCCESS) - goto cleanup; - - idx_sha1 = ((unsigned char *)p->index_map.data) + p->index_map.len - 40; - - if (git_oid_cmp(&sha1, (git_oid *)idx_sha1) != 0) - goto cleanup; - - return GIT_SUCCESS; - -cleanup: - p_close(p->mwf.fd); - p->mwf.fd = -1; - return git__throw(GIT_EPACKCORRUPTED, "Failed to packfile. Pack is corrupted"); -} - -static int packfile_check(struct pack_file **pack_out, const char *path) -{ - struct stat st; - struct pack_file *p; - size_t path_len; - - *pack_out = NULL; - path_len = strlen(path); - p = packfile_alloc(path_len + 2); - - /* - * Make sure a corresponding .pack file exists and that - * the index looks sane. - */ - path_len -= STRLEN(".idx"); - if (path_len < 1) { - free(p); - return git__throw(GIT_ENOTFOUND, "Failed to check packfile. Wrong path name"); - } - - memcpy(p->pack_name, path, path_len); - - strcpy(p->pack_name + path_len, ".keep"); - if (git_futils_exists(p->pack_name) == GIT_SUCCESS) - p->pack_keep = 1; - - strcpy(p->pack_name + path_len, ".pack"); - if (p_stat(p->pack_name, &st) < GIT_SUCCESS || !S_ISREG(st.st_mode)) { - free(p); - return git__throw(GIT_ENOTFOUND, "Failed to check packfile. File not found"); - } - - /* ok, it looks sane as far as we can check without - * actually mapping the pack file. - */ - p->mwf.size = (off_t)st.st_size; - p->pack_local = 1; - p->mtime = (git_time_t)st.st_mtime; - - /* see if we can parse the sha1 oid in the packfile name */ - if (path_len < 40 || - git_oid_fromstr(&p->sha1, path + path_len - GIT_OID_HEXSZ) < GIT_SUCCESS) - memset(&p->sha1, 0x0, GIT_OID_RAWSZ); - - *pack_out = p; - return GIT_SUCCESS; -} - static int packfile_load__cb(void *_data, char *path) { struct pack_backend *backend = (struct pack_backend *)_data; - struct pack_file *pack; + struct git_pack_file *pack; int error; size_t i; @@ -573,12 +235,12 @@ static int packfile_load__cb(void *_data, char *path) return GIT_SUCCESS; /* not an index */ for (i = 0; i < backend->packs.length; ++i) { - struct pack_file *p = git_vector_get(&backend->packs, i); + struct git_pack_file *p = git_vector_get(&backend->packs, i); if (memcmp(p->pack_name, path, strlen(path) - STRLEN(".idx")) == 0) return GIT_SUCCESS; } - error = packfile_check(&pack, path); + error = git_packfile_check(&pack, path); if (error < GIT_SUCCESS) return git__rethrow(error, "Failed to load packfile"); @@ -617,169 +279,7 @@ static int packfile_refresh_all(struct pack_backend *backend) return GIT_SUCCESS; } - - - - - - - -/*********************************************************** - * - * PACKFILE ENTRY SEARCH INTERNALS - * - ***********************************************************/ - -static off_t nth_packed_object_offset(const struct pack_file *p, uint32_t n) -{ - const unsigned char *index = p->index_map.data; - index += 4 * 256; - if (p->index_version == 1) { - return ntohl(*((const uint32_t *)(index + 24 * n))); - } else { - uint32_t off; - index += 8 + p->num_objects * (20 + 4); - off = ntohl(*((const uint32_t *)(index + 4 * n))); - if (!(off & 0x80000000)) - return off; - index += p->num_objects * 4 + (off & 0x7fffffff) * 8; - return (((uint64_t)ntohl(*((const uint32_t *)(index + 0)))) << 32) | - ntohl(*((const uint32_t *)(index + 4))); - } -} - -static int pack_entry_find_offset( - off_t *offset_out, - git_oid *found_oid, - struct pack_file *p, - const git_oid *short_oid, - unsigned int len) -{ - const uint32_t *level1_ofs = p->index_map.data; - const unsigned char *index = p->index_map.data; - unsigned hi, lo, stride; - int pos, found = 0; - const unsigned char *current = 0; - - *offset_out = 0; - - if (index == NULL) { - int error; - - if ((error = pack_index_open(p)) < GIT_SUCCESS) - return git__rethrow(error, "Failed to find offset for pack entry"); - - assert(p->index_map.data); - - index = p->index_map.data; - level1_ofs = p->index_map.data; - } - - if (p->index_version > 1) { - level1_ofs += 2; - index += 8; - } - - index += 4 * 256; - hi = ntohl(level1_ofs[(int)short_oid->id[0]]); - lo = ((short_oid->id[0] == 0x0) ? 0 : ntohl(level1_ofs[(int)short_oid->id[0] - 1])); - - if (p->index_version > 1) { - stride = 20; - } else { - stride = 24; - index += 4; - } - -#ifdef INDEX_DEBUG_LOOKUP - printf("%02x%02x%02x... lo %u hi %u nr %d\n", - short_oid->id[0], short_oid->id[1], short_oid->id[2], lo, hi, p->num_objects); -#endif - - /* Use git.git lookup code */ - pos = sha1_entry_pos(index, stride, 0, lo, hi, p->num_objects, short_oid->id); - - if (pos >= 0) { - /* An object matching exactly the oid was found */ - found = 1; - current = index + pos * stride; - } else { - /* No object was found */ - /* pos refers to the object with the "closest" oid to short_oid */ - pos = - 1 - pos; - if (pos < (int)p->num_objects) { - current = index + pos * stride; - - if (!git_oid_ncmp(short_oid, (const git_oid *)current, len)) { - found = 1; - } - } - } - - if (found && pos + 1 < (int)p->num_objects) { - /* Check for ambiguousity */ - const unsigned char *next = current + stride; - - if (!git_oid_ncmp(short_oid, (const git_oid *)next, len)) { - found = 2; - } - } - - if (!found) { - return git__throw(GIT_ENOTFOUND, "Failed to find offset for pack entry. Entry not found"); - } else if (found > 1) { - return git__throw(GIT_EAMBIGUOUSOIDPREFIX, "Failed to find offset for pack entry. Ambiguous sha1 prefix within pack"); - } else { - *offset_out = nth_packed_object_offset(p, pos); - git_oid_fromraw(found_oid, current); - -#ifdef INDEX_DEBUG_LOOKUP - unsigned char hex_sha1[GIT_OID_HEXSZ + 1]; - git_oid_fmt(hex_sha1, found_oid); - hex_sha1[GIT_OID_HEXSZ] = '\0'; - printf("found lo=%d %s\n", lo, hex_sha1); -#endif - return GIT_SUCCESS; - } -} - -static int pack_entry_find1( - struct pack_entry *e, - struct pack_file *p, - const git_oid *short_oid, - unsigned int len) -{ - off_t offset; - git_oid found_oid; - int error; - - assert(p); - - if (len == GIT_OID_HEXSZ && p->num_bad_objects) { - unsigned i; - for (i = 0; i < p->num_bad_objects; i++) - if (git_oid_cmp(short_oid, &p->bad_object_sha1[i]) == 0) - return git__throw(GIT_ERROR, "Failed to find pack entry. Bad object found"); - } - - error = pack_entry_find_offset(&offset, &found_oid, p, short_oid, len); - if (error < GIT_SUCCESS) - return git__rethrow(error, "Failed to find pack entry. Couldn't find offset"); - - /* we found a unique entry in the index; - * make sure the packfile backing the index - * still exists on disk */ - if (p->mwf.fd == -1 && packfile_open(p) < GIT_SUCCESS) - return git__throw(GIT_EOSERR, "Failed to find pack entry. Packfile doesn't exist on disk"); - - e->offset = offset; - e->p = p; - - git_oid_cpy(&e->sha1, &found_oid); - return GIT_SUCCESS; -} - -static int pack_entry_find(struct pack_entry *e, struct pack_backend *backend, const git_oid *oid) +static int pack_entry_find(struct git_pack_entry *e, struct pack_backend *backend, const git_oid *oid) { int error; size_t i; @@ -788,17 +288,17 @@ static int pack_entry_find(struct pack_entry *e, struct pack_backend *backend, c return git__rethrow(error, "Failed to find pack entry"); if (backend->last_found && - pack_entry_find1(e, backend->last_found, oid, GIT_OID_HEXSZ) == GIT_SUCCESS) + git_pack_entry_find(e, backend->last_found, oid, GIT_OID_HEXSZ) == GIT_SUCCESS) return GIT_SUCCESS; for (i = 0; i < backend->packs.length; ++i) { - struct pack_file *p; + struct git_pack_file *p; p = git_vector_get(&backend->packs, i); if (p == backend->last_found) continue; - if (pack_entry_find1(e, p, oid, GIT_OID_HEXSZ) == GIT_SUCCESS) { + if (git_pack_entry_find(e, p, oid, GIT_OID_HEXSZ) == GIT_SUCCESS) { backend->last_found = p; return GIT_SUCCESS; } @@ -808,7 +308,7 @@ static int pack_entry_find(struct pack_entry *e, struct pack_backend *backend, c } static int pack_entry_find_prefix( - struct pack_entry *e, + struct git_pack_entry *e, struct pack_backend *backend, const git_oid *short_oid, unsigned int len) @@ -821,7 +321,7 @@ static int pack_entry_find_prefix( return git__rethrow(error, "Failed to find pack entry"); if (backend->last_found) { - error = pack_entry_find1(e, backend->last_found, short_oid, len); + error = git_pack_entry_find(e, backend->last_found, short_oid, len); if (error == GIT_EAMBIGUOUSOIDPREFIX) { return git__rethrow(error, "Failed to find pack entry. Ambiguous sha1 prefix"); } else if (error == GIT_SUCCESS) { @@ -830,13 +330,13 @@ static int pack_entry_find_prefix( } for (i = 0; i < backend->packs.length; ++i) { - struct pack_file *p; + struct git_pack_file *p; p = git_vector_get(&backend->packs, i); if (p == backend->last_found) continue; - error = pack_entry_find1(e, p, short_oid, len); + error = git_pack_entry_find(e, p, short_oid, len); if (error == GIT_EAMBIGUOUSOIDPREFIX) { return git__rethrow(error, "Failed to find pack entry. Ambiguous sha1 prefix"); } else if (error == GIT_SUCCESS) { @@ -882,14 +382,14 @@ int pack_backend__read_header(git_rawobj *obj, git_odb_backend *backend, const g int pack_backend__read(void **buffer_p, size_t *len_p, git_otype *type_p, git_odb_backend *backend, const git_oid *oid) { - struct pack_entry e; + struct git_pack_entry e; git_rawobj raw; int error; if ((error = pack_entry_find(&e, (struct pack_backend *)backend, oid)) < GIT_SUCCESS) return git__rethrow(error, "Failed to read pack backend"); - if ((error = packfile_unpack(&raw, e.p, e.offset)) < GIT_SUCCESS) + if ((error = git_packfile_unpack(&raw, e.p, e.offset)) < GIT_SUCCESS) return git__rethrow(error, "Failed to read pack backend"); *buffer_p = raw.data; @@ -919,14 +419,14 @@ int pack_backend__read_prefix( return error; } else { - struct pack_entry e; + struct git_pack_entry e; git_rawobj raw; int error; if ((error = pack_entry_find_prefix(&e, (struct pack_backend *)backend, short_oid, len)) < GIT_SUCCESS) return git__rethrow(error, "Failed to read pack backend"); - if ((error = packfile_unpack(&raw, e.p, e.offset)) < GIT_SUCCESS) + if ((error = git_packfile_unpack(&raw, e.p, e.offset)) < GIT_SUCCESS) return git__rethrow(error, "Failed to read pack backend"); *buffer_p = raw.data; @@ -940,7 +440,7 @@ int pack_backend__read_prefix( int pack_backend__exists(git_odb_backend *backend, const git_oid *oid) { - struct pack_entry e; + struct git_pack_entry e; return pack_entry_find(&e, (struct pack_backend *)backend, oid) == GIT_SUCCESS; } @@ -954,8 +454,8 @@ void pack_backend__free(git_odb_backend *_backend) backend = (struct pack_backend *)_backend; for (i = 0; i < backend->packs.length; ++i) { - struct pack_file *p = git_vector_get(&backend->packs, i); - packfile_free(backend, p); + struct git_pack_file *p = git_vector_get(&backend->packs, i); + packfile_free(p); } git_vector_free(&backend->packs); diff --git a/src/pack.c b/src/pack.c index 71319a794..dca1903bd 100644 --- a/src/pack.c +++ b/src/pack.c @@ -27,12 +27,185 @@ #include "odb.h" #include "pack.h" #include "delta-apply.h" +#include "sha1_lookup.h" #include "git2/oid.h" #include "git2/zlib.h" -unsigned char *pack_window_open( - struct pack_file *p, +static int packfile_open(struct git_pack_file *p); +static off_t nth_packed_object_offset(const struct git_pack_file *p, uint32_t n); +int packfile_unpack_compressed( + git_rawobj *obj, + struct git_pack_file *p, + git_mwindow **w_curs, + off_t curpos, + size_t size, + git_otype type); + +/* Can find the offset of an object given + * a prefix of an identifier. + * Throws GIT_EAMBIGUOUSOIDPREFIX if short oid + * is ambiguous within the pack. + * This method assumes that len is between + * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ. + */ +static int pack_entry_find_offset( + off_t *offset_out, + git_oid *found_oid, + struct git_pack_file *p, + const git_oid *short_oid, + unsigned int len); + +/*********************************************************** + * + * PACK INDEX METHODS + * + ***********************************************************/ + +static void pack_index_free(struct git_pack_file *p) +{ + if (p->index_map.data) { + git_futils_mmap_free(&p->index_map); + p->index_map.data = NULL; + } +} + +static int pack_index_check(const char *path, struct git_pack_file *p) +{ + struct git_pack_idx_header *hdr; + uint32_t version, nr, i, *index; + + void *idx_map; + size_t idx_size; + + struct stat st; + + /* TODO: properly open the file without access time */ + git_file fd = p_open(path, O_RDONLY /*| O_NOATIME */); + + int error; + + if (fd < 0) + return git__throw(GIT_EOSERR, "Failed to check index. File missing or corrupted"); + + if (p_fstat(fd, &st) < GIT_SUCCESS) { + p_close(fd); + return git__throw(GIT_EOSERR, "Failed to check index. File appears to be corrupted"); + } + + if (!git__is_sizet(st.st_size)) + return GIT_ENOMEM; + + idx_size = (size_t)st.st_size; + + if (idx_size < 4 * 256 + 20 + 20) { + p_close(fd); + return git__throw(GIT_EOBJCORRUPTED, "Failed to check index. Object is corrupted"); + } + + error = git_futils_mmap_ro(&p->index_map, fd, 0, idx_size); + p_close(fd); + + if (error < GIT_SUCCESS) + return git__rethrow(error, "Failed to check index"); + + hdr = idx_map = p->index_map.data; + + if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) { + version = ntohl(hdr->idx_version); + + if (version < 2 || version > 2) { + git_futils_mmap_free(&p->index_map); + return git__throw(GIT_EOBJCORRUPTED, "Failed to check index. Unsupported index version"); + } + + } else + version = 1; + + nr = 0; + index = idx_map; + + if (version > 1) + index += 2; /* skip index header */ + + for (i = 0; i < 256; i++) { + uint32_t n = ntohl(index[i]); + if (n < nr) { + git_futils_mmap_free(&p->index_map); + return git__throw(GIT_EOBJCORRUPTED, "Failed to check index. Index is non-monotonic"); + } + nr = n; + } + + if (version == 1) { + /* + * Total size: + * - 256 index entries 4 bytes each + * - 24-byte entries * nr (20-byte sha1 + 4-byte offset) + * - 20-byte SHA1 of the packfile + * - 20-byte SHA1 file checksum + */ + if (idx_size != 4*256 + nr * 24 + 20 + 20) { + git_futils_mmap_free(&p->index_map); + return git__throw(GIT_EOBJCORRUPTED, "Failed to check index. Object is corrupted"); + } + } else if (version == 2) { + /* + * Minimum size: + * - 8 bytes of header + * - 256 index entries 4 bytes each + * - 20-byte sha1 entry * nr + * - 4-byte crc entry * nr + * - 4-byte offset entry * nr + * - 20-byte SHA1 of the packfile + * - 20-byte SHA1 file checksum + * And after the 4-byte offset table might be a + * variable sized table containing 8-byte entries + * for offsets larger than 2^31. + */ + unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20; + unsigned long max_size = min_size; + + if (nr) + max_size += (nr - 1)*8; + + if (idx_size < min_size || idx_size > max_size) { + git_futils_mmap_free(&p->index_map); + return git__throw(GIT_EOBJCORRUPTED, "Failed to check index. Wrong index size"); + } + + /* Make sure that off_t is big enough to access the whole pack... + * Is this an issue in libgit2? It shouldn't. */ + if (idx_size != min_size && (sizeof(off_t) <= 4)) { + git_futils_mmap_free(&p->index_map); + return git__throw(GIT_EOSERR, "Failed to check index. off_t not big enough to access the whole pack"); + } + } + + p->index_version = version; + p->num_objects = nr; + return GIT_SUCCESS; +} + +static int pack_index_open(struct git_pack_file *p) +{ + char *idx_name; + int error; + + if (p->index_map.data) + return GIT_SUCCESS; + + idx_name = git__strdup(p->pack_name); + strcpy(idx_name + strlen(idx_name) - STRLEN(".pack"), ".idx"); + + error = pack_index_check(idx_name, p); + free(idx_name); + + return error == GIT_SUCCESS ? GIT_SUCCESS : git__rethrow(error, "Failed to open index"); +} + +static unsigned char *pack_window_open( + struct git_pack_file *p, git_mwindow **w_cursor, off_t offset, unsigned int *left) @@ -109,9 +282,9 @@ int git_packfile_unpack_header( return GIT_SUCCESS; } -int packfile_unpack_delta( +static int packfile_unpack_delta( git_rawobj *obj, - struct pack_file *p, + struct git_pack_file *p, git_mwindow **w_curs, off_t curpos, size_t delta_size, @@ -127,7 +300,7 @@ int packfile_unpack_delta( return git__throw(GIT_EOBJCORRUPTED, "Delta offset is zero"); git_mwindow_close(w_curs); - error = packfile_unpack(&base, p, base_offset); + error = git_packfile_unpack(&base, p, base_offset); /* * TODO: git.git tries to load the base from other packfiles @@ -157,9 +330,9 @@ int packfile_unpack_delta( return error; /* error set by git__delta_apply */ } -int packfile_unpack( +int git_packfile_unpack( git_rawobj *obj, - struct pack_file *p, + struct git_pack_file *p, off_t obj_offset) { git_mwindow *w_curs = NULL; @@ -209,7 +382,7 @@ int packfile_unpack( int packfile_unpack_compressed( git_rawobj *obj, - struct pack_file *p, + struct git_pack_file *p, git_mwindow **w_curs, off_t curpos, size_t size, @@ -257,7 +430,7 @@ int packfile_unpack_compressed( } off_t get_delta_base( - struct pack_file *p, + struct git_pack_file *p, git_mwindow **w_curs, off_t *curpos, git_otype type, @@ -298,3 +471,311 @@ off_t get_delta_base( return base_offset; } + +/*********************************************************** + * + * PACKFILE METHODS + * + ***********************************************************/ + +static struct git_pack_file *packfile_alloc(int extra) +{ + struct git_pack_file *p = git__malloc(sizeof(*p) + extra); + memset(p, 0, sizeof(*p)); + p->mwf.fd = -1; + return p; +} + + +void packfile_free(struct git_pack_file *p) +{ + assert(p); + + /* clear_delta_base_cache(); */ + git_mwindow_free_all(&p->mwf); + + if (p->mwf.fd != -1) + p_close(p->mwf.fd); + + pack_index_free(p); + + free(p->bad_object_sha1); + free(p); +} + +static int packfile_open(struct git_pack_file *p) +{ + struct stat st; + struct git_pack_header hdr; + git_oid sha1; + unsigned char *idx_sha1; + + if (!p->index_map.data && pack_index_open(p) < GIT_SUCCESS) + return git__throw(GIT_ENOTFOUND, "Failed to open packfile. File not found"); + + /* TODO: open with noatime */ + p->mwf.fd = p_open(p->pack_name, O_RDONLY); + if (p->mwf.fd < 0 || p_fstat(p->mwf.fd, &st) < GIT_SUCCESS) + return git__throw(GIT_EOSERR, "Failed to open packfile. File appears to be corrupted"); + + if (git_mwindow_file_register(&p->mwf) < GIT_SUCCESS) { + p_close(p->mwf.fd); + return git__throw(GIT_ERROR, "Failed to register packfile windows"); + } + + /* If we created the struct before we had the pack we lack size. */ + if (!p->mwf.size) { + if (!S_ISREG(st.st_mode)) + goto cleanup; + p->mwf.size = (off_t)st.st_size; + } else if (p->mwf.size != st.st_size) + goto cleanup; + +#if 0 + /* We leave these file descriptors open with sliding mmap; + * there is no point keeping them open across exec(), though. + */ + fd_flag = fcntl(p->mwf.fd, F_GETFD, 0); + if (fd_flag < 0) + return error("cannot determine file descriptor flags"); + + fd_flag |= FD_CLOEXEC; + if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1) + return GIT_EOSERR; +#endif + + /* Verify we recognize this pack file format. */ + if (p_read(p->mwf.fd, &hdr, sizeof(hdr)) < GIT_SUCCESS) + goto cleanup; + + if (hdr.hdr_signature != htonl(PACK_SIGNATURE)) + goto cleanup; + + if (!pack_version_ok(hdr.hdr_version)) + goto cleanup; + + /* Verify the pack matches its index. */ + if (p->num_objects != ntohl(hdr.hdr_entries)) + goto cleanup; + + if (p_lseek(p->mwf.fd, p->mwf.size - GIT_OID_RAWSZ, SEEK_SET) == -1) + goto cleanup; + + if (p_read(p->mwf.fd, sha1.id, GIT_OID_RAWSZ) < GIT_SUCCESS) + goto cleanup; + + idx_sha1 = ((unsigned char *)p->index_map.data) + p->index_map.len - 40; + + if (git_oid_cmp(&sha1, (git_oid *)idx_sha1) != 0) + goto cleanup; + + return GIT_SUCCESS; + +cleanup: + p_close(p->mwf.fd); + p->mwf.fd = -1; + return git__throw(GIT_EPACKCORRUPTED, "Failed to open packfile. Pack is corrupted"); +} + +int git_packfile_check(struct git_pack_file **pack_out, const char *path) +{ + struct stat st; + struct git_pack_file *p; + size_t path_len; + + *pack_out = NULL; + path_len = strlen(path); + p = packfile_alloc(path_len + 2); + + /* + * Make sure a corresponding .pack file exists and that + * the index looks sane. + */ + path_len -= STRLEN(".idx"); + if (path_len < 1) { + free(p); + return git__throw(GIT_ENOTFOUND, "Failed to check packfile. Wrong path name"); + } + + memcpy(p->pack_name, path, path_len); + + strcpy(p->pack_name + path_len, ".keep"); + if (git_futils_exists(p->pack_name) == GIT_SUCCESS) + p->pack_keep = 1; + + strcpy(p->pack_name + path_len, ".pack"); + if (p_stat(p->pack_name, &st) < GIT_SUCCESS || !S_ISREG(st.st_mode)) { + free(p); + return git__throw(GIT_ENOTFOUND, "Failed to check packfile. File not found"); + } + + /* ok, it looks sane as far as we can check without + * actually mapping the pack file. + */ + p->mwf.size = (off_t)st.st_size; + p->pack_local = 1; + p->mtime = (git_time_t)st.st_mtime; + + /* see if we can parse the sha1 oid in the packfile name */ + if (path_len < 40 || + git_oid_fromstr(&p->sha1, path + path_len - GIT_OID_HEXSZ) < GIT_SUCCESS) + memset(&p->sha1, 0x0, GIT_OID_RAWSZ); + + *pack_out = p; + return GIT_SUCCESS; +} + +/*********************************************************** + * + * PACKFILE ENTRY SEARCH INTERNALS + * + ***********************************************************/ + +static off_t nth_packed_object_offset(const struct git_pack_file *p, uint32_t n) +{ + const unsigned char *index = p->index_map.data; + index += 4 * 256; + if (p->index_version == 1) { + return ntohl(*((uint32_t *)(index + 24 * n))); + } else { + uint32_t off; + index += 8 + p->num_objects * (20 + 4); + off = ntohl(*((uint32_t *)(index + 4 * n))); + if (!(off & 0x80000000)) + return off; + index += p->num_objects * 4 + (off & 0x7fffffff) * 8; + return (((uint64_t)ntohl(*((uint32_t *)(index + 0)))) << 32) | + ntohl(*((uint32_t *)(index + 4))); + } +} + +static int pack_entry_find_offset( + off_t *offset_out, + git_oid *found_oid, + struct git_pack_file *p, + const git_oid *short_oid, + unsigned int len) +{ + const uint32_t *level1_ofs = p->index_map.data; + const unsigned char *index = p->index_map.data; + unsigned hi, lo, stride; + int pos, found = 0; + const unsigned char *current = 0; + + *offset_out = 0; + + if (index == NULL) { + int error; + + if ((error = pack_index_open(p)) < GIT_SUCCESS) + return git__rethrow(error, "Failed to find offset for pack entry"); + + assert(p->index_map.data); + + index = p->index_map.data; + level1_ofs = p->index_map.data; + } + + if (p->index_version > 1) { + level1_ofs += 2; + index += 8; + } + + index += 4 * 256; + hi = ntohl(level1_ofs[(int)short_oid->id[0]]); + lo = ((short_oid->id[0] == 0x0) ? 0 : ntohl(level1_ofs[(int)short_oid->id[0] - 1])); + + if (p->index_version > 1) { + stride = 20; + } else { + stride = 24; + index += 4; + } + +#ifdef INDEX_DEBUG_LOOKUP + printf("%02x%02x%02x... lo %u hi %u nr %d\n", + short_oid->id[0], short_oid->id[1], short_oid->id[2], lo, hi, p->num_objects); +#endif + + /* Use git.git lookup code */ + pos = sha1_entry_pos(index, stride, 0, lo, hi, p->num_objects, short_oid->id); + + if (pos >= 0) { + /* An object matching exactly the oid was found */ + found = 1; + current = index + pos * stride; + } else { + /* No object was found */ + /* pos refers to the object with the "closest" oid to short_oid */ + pos = - 1 - pos; + if (pos < (int)p->num_objects) { + current = index + pos * stride; + + if (!git_oid_ncmp(short_oid, (const git_oid *)current, len)) { + found = 1; + } + } + } + + if (found && pos + 1 < (int)p->num_objects) { + /* Check for ambiguousity */ + const unsigned char *next = current + stride; + + if (!git_oid_ncmp(short_oid, (const git_oid *)next, len)) { + found = 2; + } + } + + if (!found) { + return git__throw(GIT_ENOTFOUND, "Failed to find offset for pack entry. Entry not found"); + } else if (found > 1) { + return git__throw(GIT_EAMBIGUOUSOIDPREFIX, "Failed to find offset for pack entry. Ambiguous sha1 prefix within pack"); + } else { + *offset_out = nth_packed_object_offset(p, pos); + git_oid_fromraw(found_oid, current); + +#ifdef INDEX_DEBUG_LOOKUP + unsigned char hex_sha1[GIT_OID_HEXSZ + 1]; + git_oid_fmt(hex_sha1, found_oid); + hex_sha1[GIT_OID_HEXSZ] = '\0'; + printf("found lo=%d %s\n", lo, hex_sha1); +#endif + return GIT_SUCCESS; + } +} + +int git_pack_entry_find( + struct git_pack_entry *e, + struct git_pack_file *p, + const git_oid *short_oid, + unsigned int len) +{ + off_t offset; + git_oid found_oid; + int error; + + assert(p); + + if (len == GIT_OID_HEXSZ && p->num_bad_objects) { + unsigned i; + for (i = 0; i < p->num_bad_objects; i++) + if (git_oid_cmp(short_oid, &p->bad_object_sha1[i]) == 0) + return git__throw(GIT_ERROR, "Failed to find pack entry. Bad object found"); + } + + error = pack_entry_find_offset(&offset, &found_oid, p, short_oid, len); + if (error < GIT_SUCCESS) + return git__rethrow(error, "Failed to find pack entry. Couldn't find offset"); + + /* we found a unique entry in the index; + * make sure the packfile backing the index + * still exists on disk */ + if (p->mwf.fd == -1 && packfile_open(p) < GIT_SUCCESS) + return git__throw(GIT_EOSERR, "Failed to find pack entry. Packfile doesn't exist on disk"); + + e->offset = offset; + e->p = p; + + git_oid_cpy(&e->sha1, &found_oid); + return GIT_SUCCESS; +} diff --git a/src/pack.h b/src/pack.h index 732f88b4a..bc1215252 100644 --- a/src/pack.h +++ b/src/pack.h @@ -36,7 +36,7 @@ #define PACK_SIGNATURE 0x5041434b /* "PACK" */ #define PACK_VERSION 2 #define pack_version_ok(v) ((v) == htonl(2) || (v) == htonl(3)) -struct pack_header { +struct git_pack_header { uint32_t hdr_signature; uint32_t hdr_version; uint32_t hdr_entries; @@ -62,12 +62,12 @@ struct pack_header { #define PACK_IDX_SIGNATURE 0xff744f63 /* "\377tOc" */ -struct pack_idx_header { +struct git_pack_idx_header { uint32_t idx_signature; uint32_t idx_version; }; -struct pack_file { +struct git_pack_file { git_mwindow_file mwf; git_map index_map; @@ -84,15 +84,12 @@ struct pack_file { char pack_name[GIT_FLEX_ARRAY]; /* more */ }; -struct pack_entry { +struct git_pack_entry { off_t offset; git_oid sha1; - struct pack_file *p; + struct git_pack_file *p; }; -static unsigned char *pack_window_open(struct pack_file *p, - git_mwindow **w_cursor, off_t offset, unsigned int *left); - int git_packfile_unpack_header( size_t *size_p, git_otype *type_p, @@ -100,19 +97,18 @@ int git_packfile_unpack_header( git_mwindow **w_curs, off_t *curpos); -int packfile_unpack_delta( - git_rawobj *obj, - struct pack_file *p, - git_mwindow **w_curs, - off_t curpos, - size_t delta_size, - git_otype delta_type, - off_t obj_offset); +int git_packfile_unpack(git_rawobj *obj, struct git_pack_file *p, off_t obj_offset); -int packfile_unpack(git_rawobj *obj, struct pack_file *p, off_t obj_offset); - -off_t get_delta_base(struct pack_file *p, git_mwindow **w_curs, +off_t get_delta_base(struct git_pack_file *p, git_mwindow **w_curs, off_t *curpos, git_otype type, off_t delta_obj_offset); +void packfile_free(struct git_pack_file *p); +int git_packfile_check(struct git_pack_file **pack_out, const char *path); +int git_pack_entry_find( + struct git_pack_entry *e, + struct git_pack_file *p, + const git_oid *short_oid, + unsigned int len); + #endif From b5b474dd0de88c0decd1bf6f4a4b202c8081bf43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Thu, 28 Jul 2011 11:45:46 +0200 Subject: [PATCH 08/12] Modify the given offset in git_packfile_unpack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The callers immediately throw away the offset, so we don't need any logical changes in any of them. This will be useful for the indexer, as it does need to know where the compressed data ends. Signed-off-by: Carlos Martín Nieto --- src/indexer.c | 152 ++++++++++++++++++++----------------------------- src/odb_pack.c | 4 +- src/pack.c | 35 +++++++----- src/pack.h | 2 +- 4 files changed, 88 insertions(+), 105 deletions(-) diff --git a/src/indexer.c b/src/indexer.c index 19409598a..6de0fec00 100644 --- a/src/indexer.c +++ b/src/indexer.c @@ -32,55 +32,37 @@ #include "mwindow.h" #include "posix.h" +struct entry { + unsigned char sha[GIT_OID_RAWSZ]; + uint32_t crc; + uint32_t offset; + uint64_t offset_long; +}; + typedef struct git_indexer { struct git_pack_file *pack; - git_vector objects; - git_vector deltas; struct stat st; git_indexer_stats stats; + struct git_pack_header hdr; + struct entry *objects; } git_indexer; static int parse_header(git_indexer *idx) { - struct git_pack_header hdr; int error; /* Verify we recognize this pack file format. */ - if ((error = p_read(idx->pack->mwf.fd, &hdr, sizeof(hdr))) < GIT_SUCCESS) - goto cleanup; + if ((error = p_read(idx->pack->mwf.fd, &idx->hdr, sizeof(idx->hdr))) < GIT_SUCCESS) + return git__rethrow(error, "Failed to read in pack header"); - if (hdr.hdr_signature != htonl(PACK_SIGNATURE)) { - error = git__throw(GIT_EOBJCORRUPTED, "Wrong pack signature"); - goto cleanup; - } + if (idx->hdr.hdr_signature != htonl(PACK_SIGNATURE)) + return git__throw(GIT_EOBJCORRUPTED, "Wrong pack signature"); - if (!pack_version_ok(hdr.hdr_version)) { - error = git__throw(GIT_EOBJCORRUPTED, "Wrong pack version"); - goto cleanup; - } + if (!pack_version_ok(idx->hdr.hdr_version)) + return git__throw(GIT_EOBJCORRUPTED, "Wrong pack version"); - /* - * FIXME: At this point we have no idea how many of the are - * deltas, so assume all objects are both until we get a better - * idea - */ - error = git_vector_init(&idx->objects, hdr.hdr_entries, NULL /* FIXME: probably need something */); - if (error < GIT_SUCCESS) - goto cleanup; - - error = git_vector_init(&idx->deltas, hdr.hdr_entries, NULL /* FIXME: probably need something */); - if (error < GIT_SUCCESS) - goto cleanup; - - idx->stats.total = hdr.hdr_entries; return GIT_SUCCESS; - -cleanup: - git_vector_free(&idx->objects); - git_vector_free(&idx->deltas); - - return error; } int git_indexer_new(git_indexer **out, const char *packname) @@ -127,6 +109,14 @@ int git_indexer_new(git_indexer **out, const char *packname) goto cleanup; } + idx->objects = git__calloc(sizeof(struct entry), idx->hdr.hdr_entries); + if (idx->objects == NULL) { + error = GIT_ENOMEM; + goto cleanup; + } + + idx->stats.total = idx->hdr.hdr_entries; + *out = idx; return GIT_SUCCESS; @@ -138,41 +128,6 @@ cleanup: return error; } -/* - * Parse the variable-width length and return it. Assumes that the - * whole number exists inside the buffer. As this is the git format, - * the first byte only contains length information in the lower nibble - * because the higher one is used for type and continuation. The - * output parameter is necessary because we don't know how long the - * entry is actually going to be. - */ -static unsigned long entry_len(const char **bufout, const char *buf) -{ - unsigned long size, c; - const char *p = buf; - unsigned shift; - - c = *p; - size = c & 0xf; - shift = 4; - - /* As long as the MSB is set, we need to continue */ - while (c & 0x80) { - p++; - c = *p; - size += (c & 0x7f) << shift; - shift += 7; - } - - *bufout = p; - return size; -} - -static git_otype entry_type(const char *buf) -{ - return (*buf >> 4) & 7; -} - /* * Create the index. Every time something interesting happens * (something has been parse or resolved), the callback gets called @@ -181,12 +136,12 @@ static git_otype entry_type(const char *buf) int git_indexer_run(git_indexer *idx, int (*cb)(const git_indexer_stats *, void *), void *cb_data) { git_mwindow_file *mwf = &idx->pack->mwf; - git_mwindow *w = NULL; off_t off = 0; int error; - const char *ptr; unsigned int fanout[256] = {0}; + /* FIXME: Write the keep file */ + error = git_mwindow_file_register(mwf); if (error < GIT_SUCCESS) return git__rethrow(error, "Failed to register mwindow file"); @@ -196,29 +151,45 @@ int git_indexer_run(git_indexer *idx, int (*cb)(const git_indexer_stats *, void cb(&idx->stats, cb_data); while (idx->stats.processed < idx->stats.total) { - size_t size; - git_otype type; + git_rawobj obj; + git_oid oid; + struct entry entry; + char hdr[512] = {0}; /* FIXME: How long should this be? */ + int i, hdr_len; - error = git_packfile_unpack_header(&size, &type, mwf, &w, &off); + memset(&entry, 0x0, sizeof(entry)); /* Necessary? */ - switch (type) { - case GIT_OBJ_COMMIT: - case GIT_OBJ_TREE: - case GIT_OBJ_BLOB: - case GIT_OBJ_TAG: - break; - default: - error = git__throw(GIT_EOBJCORRUPTED, "Invalid object type"); + if (off > UINT31_MAX) { + entry.offset = ~0ULL; + entry.offset_long = off; + } else { + entry.offset = off; + } + + error = git_packfile_unpack(&obj, idx->pack, &off); + if (error < GIT_SUCCESS) { + error = git__rethrow(error, "Failed to unpack object"); goto cleanup; } - /* - * Do we need to uncompress everything if we're not running in - * strict mode? Or at least can't we free the data? - */ + error = git_odb__hash_obj(&oid, hdr, sizeof(hdr), &hdr_len, &obj); + if (error < GIT_SUCCESS) { + error = git__rethrow(error, "Failed to hash object"); + goto cleanup; + } - /* Get a window for the compressed data */ - //ptr = git_mwindow_open(mwf, &w, idx->pack->pack_fd, size, data - ptr, 0, NULL); + memcpy(&entry.sha, oid.id, GIT_OID_RAWSZ); + /* entry.crc = crc32(obj.data) */ + + /* Add the object to the list */ + //memcpy(&idx->objects[idx->stats.processed], &entry, sizeof(entry)); + idx->objects[idx->stats.processed] = entry; + + for (i = oid.id[0]; i < 256; ++i) { + fanout[i]++; + } + + free(obj.data); idx->stats.processed++; @@ -227,6 +198,10 @@ int git_indexer_run(git_indexer *idx, int (*cb)(const git_indexer_stats *, void } + /* + * All's gone well, so let's write the index file. + */ + cleanup: git_mwindow_free_all(mwf); @@ -237,8 +212,7 @@ cleanup: void git_indexer_free(git_indexer *idx) { p_close(idx->pack->mwf.fd); - git_vector_free(&idx->objects); - git_vector_free(&idx->deltas); + free(idx->objects); free(idx->pack); free(idx); } diff --git a/src/odb_pack.c b/src/odb_pack.c index a661c1c41..0d6bb05cc 100644 --- a/src/odb_pack.c +++ b/src/odb_pack.c @@ -389,7 +389,7 @@ int pack_backend__read(void **buffer_p, size_t *len_p, git_otype *type_p, git_od if ((error = pack_entry_find(&e, (struct pack_backend *)backend, oid)) < GIT_SUCCESS) return git__rethrow(error, "Failed to read pack backend"); - if ((error = git_packfile_unpack(&raw, e.p, e.offset)) < GIT_SUCCESS) + if ((error = git_packfile_unpack(&raw, e.p, &e.offset)) < GIT_SUCCESS) return git__rethrow(error, "Failed to read pack backend"); *buffer_p = raw.data; @@ -426,7 +426,7 @@ int pack_backend__read_prefix( if ((error = pack_entry_find_prefix(&e, (struct pack_backend *)backend, short_oid, len)) < GIT_SUCCESS) return git__rethrow(error, "Failed to read pack backend"); - if ((error = git_packfile_unpack(&raw, e.p, e.offset)) < GIT_SUCCESS) + if ((error = git_packfile_unpack(&raw, e.p, &e.offset)) < GIT_SUCCESS) return git__rethrow(error, "Failed to read pack backend"); *buffer_p = raw.data; diff --git a/src/pack.c b/src/pack.c index dca1903bd..f7bad2f80 100644 --- a/src/pack.c +++ b/src/pack.c @@ -38,7 +38,7 @@ int packfile_unpack_compressed( git_rawobj *obj, struct git_pack_file *p, git_mwindow **w_curs, - off_t curpos, + off_t *curpos, size_t size, git_otype type); @@ -286,7 +286,7 @@ static int packfile_unpack_delta( git_rawobj *obj, struct git_pack_file *p, git_mwindow **w_curs, - off_t curpos, + off_t *curpos, size_t delta_size, git_otype delta_type, off_t obj_offset) @@ -295,12 +295,12 @@ static int packfile_unpack_delta( git_rawobj base, delta; int error; - base_offset = get_delta_base(p, w_curs, &curpos, delta_type, obj_offset); + base_offset = get_delta_base(p, w_curs, curpos, delta_type, obj_offset); if (base_offset == 0) return git__throw(GIT_EOBJCORRUPTED, "Delta offset is zero"); git_mwindow_close(w_curs); - error = git_packfile_unpack(&base, p, base_offset); + error = git_packfile_unpack(&base, p, &base_offset); /* * TODO: git.git tries to load the base from other packfiles @@ -333,10 +333,10 @@ static int packfile_unpack_delta( int git_packfile_unpack( git_rawobj *obj, struct git_pack_file *p, - off_t obj_offset) + off_t *obj_offset) { git_mwindow *w_curs = NULL; - off_t curpos = obj_offset; + off_t curpos = *obj_offset; int error; size_t size = 0; @@ -358,8 +358,8 @@ int git_packfile_unpack( case GIT_OBJ_OFS_DELTA: case GIT_OBJ_REF_DELTA: error = packfile_unpack_delta( - obj, p, &w_curs, curpos, - size, type, obj_offset); + obj, p, &w_curs, &curpos, + size, type, *obj_offset); break; case GIT_OBJ_COMMIT: @@ -367,7 +367,7 @@ int git_packfile_unpack( case GIT_OBJ_BLOB: case GIT_OBJ_TAG: error = packfile_unpack_compressed( - obj, p, &w_curs, curpos, + obj, p, &w_curs, &curpos, size, type); break; @@ -377,14 +377,19 @@ int git_packfile_unpack( } git_mwindow_close(&w_curs); - return error == GIT_SUCCESS ? GIT_SUCCESS : git__rethrow(error, "Failed to unpack packfile"); + + if (error < GIT_SUCCESS) + return git__rethrow(error, "Failed to unpack object"); + + *obj_offset = curpos; + return GIT_SUCCESS; } int packfile_unpack_compressed( git_rawobj *obj, struct git_pack_file *p, git_mwindow **w_curs, - off_t curpos, + off_t *curpos, size_t size, git_otype type) { @@ -406,14 +411,14 @@ int packfile_unpack_compressed( } do { - in = pack_window_open(p, w_curs, curpos, &stream.avail_in); + in = pack_window_open(p, w_curs, *curpos, &stream.avail_in); stream.next_in = in; st = inflate(&stream, Z_FINISH); if (!stream.avail_out) break; /* the payload is larger than it should be */ - curpos += stream.next_in - in; + *curpos += stream.next_in - in; } while (st == Z_OK || st == Z_BUF_ERROR); inflateEnd(&stream); @@ -429,6 +434,10 @@ int packfile_unpack_compressed( return GIT_SUCCESS; } +/* + * curpos is where the data starts, delta_obj_offset is the where the + * header starts + */ off_t get_delta_base( struct git_pack_file *p, git_mwindow **w_curs, diff --git a/src/pack.h b/src/pack.h index bc1215252..a7112a6aa 100644 --- a/src/pack.h +++ b/src/pack.h @@ -97,7 +97,7 @@ int git_packfile_unpack_header( git_mwindow **w_curs, off_t *curpos); -int git_packfile_unpack(git_rawobj *obj, struct git_pack_file *p, off_t obj_offset); +int git_packfile_unpack(git_rawobj *obj, struct git_pack_file *p, off_t *obj_offset); off_t get_delta_base(struct git_pack_file *p, git_mwindow **w_curs, off_t *curpos, git_otype type, From bcf21c556c2bbc46a93e81a19c5f9112dfb8f2c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Thu, 28 Jul 2011 23:59:53 +0200 Subject: [PATCH 09/12] Add git_vector_foreach MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit You can use it just as you'd use a for-loop Signed-off-by: Carlos Martín Nieto --- src/vector.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/vector.h b/src/vector.h index 76778ba4e..c43a7ce07 100644 --- a/src/vector.h +++ b/src/vector.h @@ -30,6 +30,9 @@ GIT_INLINE(void *) git_vector_get(git_vector *v, unsigned int position) return (position < v->length) ? v->contents[position] : NULL; } +#define git_vector_foreach(v, iter, elem) \ + for ((iter) = 0; (iter) < (v)->length && ((elem) = (v)->contents[(iter)], 1); (iter)++ ) + int git_vector_insert(git_vector *v, void *element); int git_vector_remove(git_vector *v, unsigned int idx); void git_vector_uniq(git_vector *v); From b7c44096ae12c47085978a4992d4f8cdf7946db4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Thu, 28 Jul 2011 23:35:39 +0200 Subject: [PATCH 10/12] Implement the indexer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only v2 index files are supported. Signed-off-by: Carlos Martín Nieto --- include/git2/indexer.h | 4 +- src/indexer.c | 251 ++++++++++++++++++++++++++++++++++------- 2 files changed, 212 insertions(+), 43 deletions(-) diff --git a/include/git2/indexer.h b/include/git2/indexer.h index f32b1ef6b..1f59ee314 100644 --- a/include/git2/indexer.h +++ b/include/git2/indexer.h @@ -2,6 +2,7 @@ #define _INCLUDE_git_indexer_h__ #include "git2/common.h" +#include "git2/oid.h" typedef struct git_indexer_stats { unsigned int total; @@ -12,7 +13,8 @@ typedef struct git_indexer_stats { typedef struct git_indexer git_indexer; GIT_EXTERN(int) git_indexer_new(git_indexer **out, const char *packname); -GIT_EXTERN(int) git_indexer_run(git_indexer *idx, int (*cb)(const git_indexer_stats *, void *), void *data); +GIT_EXTERN(int) git_indexer_run(git_indexer *idx, git_indexer_stats *stats); +GIT_EXTERN(const git_oid *) git_indexer_result(git_indexer *idx); GIT_EXTERN(void) git_indexer_free(git_indexer *idx); diff --git a/src/indexer.c b/src/indexer.c index 6de0fec00..4def1af9e 100644 --- a/src/indexer.c +++ b/src/indexer.c @@ -26,14 +26,20 @@ #include "git2/indexer.h" #include "git2/object.h" #include "git2/zlib.h" +#include "git2/oid.h" #include "common.h" #include "pack.h" #include "mwindow.h" #include "posix.h" +#include "pack.h" +#include "filebuf.h" +#include "sha1.h" + +#define UINT31_MAX (0x7FFFFFFF) struct entry { - unsigned char sha[GIT_OID_RAWSZ]; + git_oid oid; uint32_t crc; uint32_t offset; uint64_t offset_long; @@ -42,11 +48,19 @@ struct entry { typedef struct git_indexer { struct git_pack_file *pack; struct stat st; - git_indexer_stats stats; struct git_pack_header hdr; - struct entry *objects; + size_t nr_objects; + git_vector objects; + git_filebuf file; + unsigned int fanout[256]; + git_oid hash; } git_indexer; +const git_oid *git_indexer_hash(git_indexer *idx) +{ + return &idx->hash; +} + static int parse_header(git_indexer *idx) { int error; @@ -55,7 +69,7 @@ static int parse_header(git_indexer *idx) if ((error = p_read(idx->pack->mwf.fd, &idx->hdr, sizeof(idx->hdr))) < GIT_SUCCESS) return git__rethrow(error, "Failed to read in pack header"); - if (idx->hdr.hdr_signature != htonl(PACK_SIGNATURE)) + if (idx->hdr.hdr_signature != ntohl(PACK_SIGNATURE)) return git__throw(GIT_EOBJCORRUPTED, "Wrong pack signature"); if (!pack_version_ok(idx->hdr.hdr_version)) @@ -65,12 +79,23 @@ static int parse_header(git_indexer *idx) return GIT_SUCCESS; } +int objects_cmp(const void *a, const void *b) +{ + const struct entry *entrya = a; + const struct entry *entryb = b; + + return git_oid_cmp(&entrya->oid, &entryb->oid); +} + int git_indexer_new(git_indexer **out, const char *packname) { git_indexer *idx; unsigned int namelen; int ret, error; + if (git_path_root(packname) < 0) + return git__throw(GIT_EINVALIDPATH, "Path is not absolute"); + idx = git__malloc(sizeof(git_indexer)); if (idx == NULL) return GIT_ENOMEM; @@ -83,7 +108,7 @@ int git_indexer_new(git_indexer **out, const char *packname) goto cleanup; memset(idx->pack, 0x0, sizeof(struct git_pack_file)); - memcpy(idx->pack->pack_name, packname, namelen); + memcpy(idx->pack->pack_name, packname, namelen + 1); ret = p_stat(packname, &idx->st); if (ret < 0) { @@ -102,6 +127,7 @@ int git_indexer_new(git_indexer **out, const char *packname) } idx->pack->mwf.fd = ret; + idx->pack->mwf.size = idx->st.st_size; error = parse_header(idx); if (error < GIT_SUCCESS) { @@ -109,61 +135,187 @@ int git_indexer_new(git_indexer **out, const char *packname) goto cleanup; } - idx->objects = git__calloc(sizeof(struct entry), idx->hdr.hdr_entries); - if (idx->objects == NULL) { - error = GIT_ENOMEM; + idx->nr_objects = ntohl(idx->hdr.hdr_entries); + + error = git_vector_init(&idx->objects, idx->nr_objects, objects_cmp); + if (error < GIT_SUCCESS) { goto cleanup; } - idx->stats.total = idx->hdr.hdr_entries; - *out = idx; return GIT_SUCCESS; cleanup: - free(idx->pack); - free(idx); + git_indexer_free(idx); return error; } -/* - * Create the index. Every time something interesting happens - * (something has been parse or resolved), the callback gets called - * with some stats so it can tell the user how hard we're working - */ -int git_indexer_run(git_indexer *idx, int (*cb)(const git_indexer_stats *, void *), void *cb_data) +static void index_path(char *path, git_indexer *idx) { - git_mwindow_file *mwf = &idx->pack->mwf; - off_t off = 0; + char *ptr; + const char prefix[] = "pack-", suffix[] = ".idx\0"; + + ptr = strrchr(path, '/') + 1; + + memcpy(ptr, prefix, STRLEN(prefix)); + ptr += STRLEN(prefix); + git_oid_fmt(ptr, &idx->hash); + ptr += GIT_OID_HEXSZ; + memcpy(ptr, suffix, STRLEN(suffix)); +} + +static int write_index(git_indexer *idx) +{ + git_mwindow *w = NULL; + int error, namelen; + unsigned int i, long_offsets, left; + struct git_pack_idx_header hdr; + char filename[GIT_PATH_MAX]; + struct entry *entry; + void *packfile_hash; + git_oid file_hash; + SHA_CTX ctx; + + git_vector_sort(&idx->objects); + + namelen = strlen(idx->pack->pack_name); + memcpy(filename, idx->pack->pack_name, namelen); + memcpy(filename + namelen - STRLEN("pack"), "idx\0", STRLEN("idx\0")); + + error = git_filebuf_open(&idx->file, filename, GIT_FILEBUF_HASH_CONTENTS); + + /* Write out the header */ + hdr.idx_signature = htonl(PACK_IDX_SIGNATURE); + hdr.idx_version = htonl(2); + error = git_filebuf_write(&idx->file, &hdr, sizeof(hdr)); + + /* Write out the fanout table */ + for (i = 0; i < 256; ++i) { + uint32_t n = htonl(idx->fanout[i]); + error = git_filebuf_write(&idx->file, &n, sizeof(n)); + if (error < GIT_SUCCESS) + goto cleanup; + } + + /* Write out the object names (SHA-1 hashes) */ + SHA1_Init(&ctx); + git_vector_foreach(&idx->objects, i, entry) { + error = git_filebuf_write(&idx->file, &entry->oid, sizeof(git_oid)); + SHA1_Update(&ctx, &entry->oid, GIT_OID_RAWSZ); + if (error < GIT_SUCCESS) + goto cleanup; + } + SHA1_Final(idx->hash.id, &ctx); + + /* Write out the CRC32 values */ + git_vector_foreach(&idx->objects, i, entry) { + error = git_filebuf_write(&idx->file, &entry->crc, sizeof(uint32_t)); + if (error < GIT_SUCCESS) + goto cleanup; + } + + /* Write out the offsets */ + git_vector_foreach(&idx->objects, i, entry) { + uint32_t n; + + if (entry->offset == UINT32_MAX) + n = htonl(0x80000000 | long_offsets++); + else + n = htonl(entry->offset); + + error = git_filebuf_write(&idx->file, &n, sizeof(uint32_t)); + if (error < GIT_SUCCESS) + goto cleanup; + } + + /* Write out the long offsets */ + git_vector_foreach(&idx->objects, i, entry) { + uint32_t split[2]; + + if (entry->offset != UINT32_MAX) + continue; + + split[0] = htonl(entry->offset_long >> 32); + split[1] = htonl(entry->offset_long & 0xffffffff); + + error = git_filebuf_write(&idx->file, &split, sizeof(uint32_t) * 2); + if (error < GIT_SUCCESS) + goto cleanup; + } + + /* Write out the packfile trailer */ + + packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->st.st_size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left); + if (packfile_hash == NULL) { + error = git__rethrow(GIT_ENOMEM, "Failed to open window to packfile hash"); + goto cleanup; + } + + memcpy(&file_hash, packfile_hash, GIT_OID_RAWSZ); + + git_mwindow_close(&w); + + error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid)); + + /* Write out the index sha */ + error = git_filebuf_hash(&file_hash, &idx->file); + if (error < GIT_SUCCESS) + goto cleanup; + + error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid)); + if (error < GIT_SUCCESS) + goto cleanup; + + /* Figure out what the final name should be */ + index_path(filename, idx); + /* Commit file */ + error = git_filebuf_commit_at(&idx->file, filename); + +cleanup: + if (error < GIT_SUCCESS) + git_filebuf_cleanup(&idx->file); + + return error; +} + +int git_indexer_run(git_indexer *idx, git_indexer_stats *stats) +{ + git_mwindow_file *mwf; + off_t off = sizeof(struct git_pack_header); int error; - unsigned int fanout[256] = {0}; + struct entry *entry; + unsigned int left, processed; - /* FIXME: Write the keep file */ + assert(idx && stats); + mwf = &idx->pack->mwf; error = git_mwindow_file_register(mwf); if (error < GIT_SUCCESS) return git__rethrow(error, "Failed to register mwindow file"); - /* Notify before the first one */ - if (cb) - cb(&idx->stats, cb_data); + stats->total = idx->nr_objects; + stats->processed = processed = 0; - while (idx->stats.processed < idx->stats.total) { + while (processed < idx->nr_objects) { git_rawobj obj; git_oid oid; - struct entry entry; + git_mwindow *w = NULL; char hdr[512] = {0}; /* FIXME: How long should this be? */ int i, hdr_len; + off_t entry_start = off; + void *packed; + size_t entry_size; - memset(&entry, 0x0, sizeof(entry)); /* Necessary? */ + entry = git__malloc(sizeof(struct entry)); + memset(entry, 0x0, sizeof(struct entry)); if (off > UINT31_MAX) { - entry.offset = ~0ULL; - entry.offset_long = off; + entry->offset = UINT32_MAX; + entry->offset_long = off; } else { - entry.offset = off; + entry->offset = off; } error = git_packfile_unpack(&obj, idx->pack, &off); @@ -178,30 +330,40 @@ int git_indexer_run(git_indexer *idx, int (*cb)(const git_indexer_stats *, void goto cleanup; } - memcpy(&entry.sha, oid.id, GIT_OID_RAWSZ); - /* entry.crc = crc32(obj.data) */ + git_oid_cpy(&entry->oid, &oid); + entry->crc = crc32(0L, Z_NULL, 0); + + entry_size = off - entry_start; + packed = git_mwindow_open(mwf, &w, entry_start, entry_size, &left); + if (packed == NULL) { + error = git__rethrow(error, "Failed to open window to read packed data"); + goto cleanup; + } + entry->crc = htonl(crc32(entry->crc, packed, entry_size)); + git_mwindow_close(&w); /* Add the object to the list */ - //memcpy(&idx->objects[idx->stats.processed], &entry, sizeof(entry)); - idx->objects[idx->stats.processed] = entry; + error = git_vector_insert(&idx->objects, entry); + if (error < GIT_SUCCESS) { + error = git__rethrow(error, "Failed to add entry to list"); + goto cleanup; + } for (i = oid.id[0]; i < 256; ++i) { - fanout[i]++; + idx->fanout[i]++; } free(obj.data); - idx->stats.processed++; - - if (cb) - cb(&idx->stats, cb_data); - + stats->processed = ++processed; } /* * All's gone well, so let's write the index file. */ + error = write_index(idx); + /* Delete keep file */ cleanup: git_mwindow_free_all(mwf); @@ -211,8 +373,13 @@ cleanup: void git_indexer_free(git_indexer *idx) { + unsigned int i; + struct entry *e; + p_close(idx->pack->mwf.fd); - free(idx->objects); + git_vector_foreach(&idx->objects, i, e) + free(e); + git_vector_free(&idx->objects); free(idx->pack); free(idx); } From 48b3ad4f15a55ea6406958159b8d63b89b5dffdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Mon, 1 Aug 2011 14:02:09 +0200 Subject: [PATCH 11/12] Move pack index writing to a public function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Carlos Martín Nieto --- include/git2/indexer.h | 1 + src/indexer.c | 8 +------- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/include/git2/indexer.h b/include/git2/indexer.h index 1f59ee314..9273efca7 100644 --- a/include/git2/indexer.h +++ b/include/git2/indexer.h @@ -15,6 +15,7 @@ typedef struct git_indexer git_indexer; GIT_EXTERN(int) git_indexer_new(git_indexer **out, const char *packname); GIT_EXTERN(int) git_indexer_run(git_indexer *idx, git_indexer_stats *stats); GIT_EXTERN(const git_oid *) git_indexer_result(git_indexer *idx); +GIT_EXTERN(int) git_indexer_write(git_indexer *idx); GIT_EXTERN(void) git_indexer_free(git_indexer *idx); diff --git a/src/indexer.c b/src/indexer.c index 4def1af9e..7a2b28ae3 100644 --- a/src/indexer.c +++ b/src/indexer.c @@ -166,7 +166,7 @@ static void index_path(char *path, git_indexer *idx) memcpy(ptr, suffix, STRLEN(suffix)); } -static int write_index(git_indexer *idx) +int git_indexer_write(git_indexer *idx) { git_mwindow *w = NULL; int error, namelen; @@ -358,12 +358,6 @@ int git_indexer_run(git_indexer *idx, git_indexer_stats *stats) stats->processed = ++processed; } - /* - * All's gone well, so let's write the index file. - */ - error = write_index(idx); - - /* Delete keep file */ cleanup: git_mwindow_free_all(mwf); From 65cb1586c45b6ca2e74753b93e8677edcae903ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Mon, 1 Aug 2011 16:46:36 +0200 Subject: [PATCH 12/12] Document the indexer calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Carlos Martín Nieto --- include/git2/indexer.h | 47 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/include/git2/indexer.h b/include/git2/indexer.h index 9273efca7..6c31956b1 100644 --- a/include/git2/indexer.h +++ b/include/git2/indexer.h @@ -4,6 +4,10 @@ #include "git2/common.h" #include "git2/oid.h" +/** + * This is passed as the first argument to the callback to allow the + * user to see the progress. + */ typedef struct git_indexer_stats { unsigned int total; unsigned int processed; @@ -12,10 +16,51 @@ typedef struct git_indexer_stats { typedef struct git_indexer git_indexer; +/** + * Create a new indexer instance + * + * @param out where to store the indexer instance + * @param packname the absolute filename of the packfile to index + */ GIT_EXTERN(int) git_indexer_new(git_indexer **out, const char *packname); + +/** + * Iterate over the objects in the packfile and extract the information + * + * Indexing a packfile can be very expensive so this function is + * expected to be run in a worker thread and the stats used to provide + * feedback the user. + * + * @param idx the indexer instance + * @param stats storage for the running state + */ GIT_EXTERN(int) git_indexer_run(git_indexer *idx, git_indexer_stats *stats); -GIT_EXTERN(const git_oid *) git_indexer_result(git_indexer *idx); + +/** + * Write the index file to disk. + * + * The file will be stored as pack-$hash.idx in the same directory as + * the packfile. + * + * @param idx the indexer instance + */ GIT_EXTERN(int) git_indexer_write(git_indexer *idx); + +/** + * Get the packfile's hash + * + * A packfile's name is derived from the sorted hashing of all object + * names. This is only correct after the index has been written to disk. + * + * @param idx the indexer instance + */ +GIT_EXTERN(const git_oid *) git_indexer_hash(git_indexer *idx); + +/** + * Free the indexer and its resources + * + * @param idx the indexer to free + */ GIT_EXTERN(void) git_indexer_free(git_indexer *idx);