From afeecf4f262b74270368ef8a70c582ea9d5a18e8 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Sat, 9 Jul 2011 02:10:46 +0200 Subject: [PATCH] odb: Direct writes are back DIRECT WRITES ARE BACK AND FASTER THAN EVER. The streaming writer to the ODB was an overkill for the smaller objects like Commit and Tags; most of the streaming logic was taking too long. This commit makes Commits, Tags and Trees to be built-up in memory, and then written to disk in 2 pushes (header + data), instead of streaming everything. This is *always* faster, even for big files (since the git_filebuf class still does streaming writes when the memory cache overflows). This is also a gazillion lines of code smaller, because we don't have to precompute the final size of the object before starting the stream (this was kind of defeating the point of streaming, anyway). Blobs are still written with full streaming instead of loading them in memory, since this is still the fastest way. A new `git_buf` class has been added. It's missing some features, but it'll get there. --- src/buffer.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++ src/buffer.h | 24 ++++++++++++ src/commit.c | 55 +++++++++++---------------- src/filebuf.c | 4 +- src/filebuf.h | 2 +- src/odb_loose.c | 44 ++++++++++++++++++++++ src/oid.c | 10 +++++ src/repository.h | 2 + src/signature.c | 18 +++++++++ src/signature.h | 1 + src/tag.c | 52 +++++++------------------- src/tree.c | 37 +++++++----------- tests/t17-bufs.c | 59 +++++++++++++++++++++++++++++ tests/test_main.c | 2 + 14 files changed, 306 insertions(+), 99 deletions(-) create mode 100644 src/buffer.c create mode 100644 src/buffer.h create mode 100644 tests/t17-bufs.c diff --git a/src/buffer.c b/src/buffer.c new file mode 100644 index 000000000..6af4c9195 --- /dev/null +++ b/src/buffer.c @@ -0,0 +1,95 @@ +#include "buffer.h" +#include "posix.h" +#include + +#define ENSURE_SIZE(b, d) \ + if ((ssize_t)(d) >= buf->asize && git_buf_grow(b, (d)) < GIT_SUCCESS)\ + return; + +int git_buf_grow(git_buf *buf, size_t target_size) +{ + char *new_ptr; + + if (buf->asize < 0) + return GIT_ENOMEM; + + if (buf->asize == 0) + buf->asize = target_size; + + /* grow the buffer size by 1.5, until it's big enough + * to fit our target size */ + while (buf->asize < (int)target_size) + buf->asize = (buf->asize << 1) - (buf->asize >> 1); + + new_ptr = git__realloc(buf->ptr, buf->asize); + if (!new_ptr) { + buf->asize = -1; + return GIT_ENOMEM; + } + + buf->ptr = new_ptr; + return GIT_SUCCESS; +} + +int git_buf_oom(const git_buf *buf) +{ + return (buf->asize < 0); +} + +void git_buf_putc(git_buf *buf, char c) +{ + ENSURE_SIZE(buf, buf->size + 1); + buf->ptr[buf->size++] = c; +} + +void git_buf_put(git_buf *buf, const char *data, size_t len) +{ + ENSURE_SIZE(buf, buf->size + len); + memcpy(buf->ptr + buf->size, data, len); + buf->size += len; +} + +void git_buf_puts(git_buf *buf, const char *string) +{ + git_buf_put(buf, string, strlen(string)); +} + +void git_buf_printf(git_buf *buf, const char *format, ...) +{ + int len; + va_list arglist; + + ENSURE_SIZE(buf, buf->size + 1); + + while (1) { + va_start(arglist, format); + len = p_vsnprintf(buf->ptr + buf->size, buf->asize - buf->size, format, arglist); + va_end(arglist); + + if (len < 0) { + buf->asize = -1; + return; + } + + if (len + 1 <= buf->asize - buf->size) { + buf->size += len; + return; + } + + ENSURE_SIZE(buf, buf->size + len + 1); + } +} + +const char *git_buf_cstr(git_buf *buf) +{ + if (buf->size + 1 >= buf->asize && git_buf_grow(buf, buf->size + 1) < GIT_SUCCESS) + return NULL; + + buf->ptr[buf->size] = '\0'; + return buf->ptr; +} + +void git_buf_free(git_buf *buf) +{ + free(buf->ptr); +} diff --git a/src/buffer.h b/src/buffer.h new file mode 100644 index 000000000..1209340a1 --- /dev/null +++ b/src/buffer.h @@ -0,0 +1,24 @@ +#ifndef INCLUDE_buffer_h__ +#define INCLUDE_buffer_h__ + +#include "common.h" + +typedef struct { + char *ptr; + ssize_t asize, size; +} git_buf; + +#define GIT_BUF_INIT {NULL, 0, 0} + +int git_buf_grow(git_buf *buf, size_t target_size); +int git_buf_oom(const git_buf *buf); +void git_buf_putc(git_buf *buf, char c); +void git_buf_put(git_buf *buf, const char *data, size_t len); +void git_buf_puts(git_buf *buf, const char *string); +void git_buf_printf(git_buf *buf, const char *format, ...) GIT_FORMAT_PRINTF(2, 3); +const char *git_buf_cstr(git_buf *buf); +void git_buf_free(git_buf *buf); + +#define git_buf_PUTS(buf, str) git_buf_put(buf, str, sizeof(str) - 1) + +#endif diff --git a/src/commit.c b/src/commit.c index f7c26a682..05f75731a 100644 --- a/src/commit.c +++ b/src/commit.c @@ -116,53 +116,36 @@ int git_commit_create( int parent_count, const git_commit *parents[]) { - size_t final_size = 0; - int message_length, author_length, committer_length; - - char *author_str, *committer_str; - + git_buf commit = GIT_BUF_INIT; int error, i; - git_odb_stream *stream; - - message_length = strlen(message); - author_length = git_signature__write(&author_str, "author ", author); - committer_length = git_signature__write(&committer_str, "committer ", committer); - - if (author_length < 0 || committer_length < 0) - return git__throw(GIT_EINVALIDARGS, "Cannot create commit. Failed to parse signature"); - - final_size += GIT_OID_LINE_LENGTH("tree"); - final_size += GIT_OID_LINE_LENGTH("parent") * parent_count; - final_size += author_length; - final_size += committer_length; - final_size += 1 + message_length; - - if ((error = git_odb_open_wstream(&stream, repo->db, final_size, GIT_OBJ_COMMIT)) < GIT_SUCCESS) - return git__rethrow(error, "Failed to create commit"); if (git_object_owner((const git_object *)tree) != repo) return git__throw(GIT_EINVALIDARGS, "The given tree does not belong to this repository"); - git__write_oid(stream, "tree", git_object_id((const git_object *)tree)); + git_oid__writebuf(&commit, "tree ", git_object_id((const git_object *)tree)); for (i = 0; i < parent_count; ++i) { - if (git_object_owner((const git_object *)parents[i]) != repo) - return git__throw(GIT_EINVALIDARGS, "The given parent does not belong to this repository"); + if (git_object_owner((const git_object *)parents[i]) != repo) { + error = git__throw(GIT_EINVALIDARGS, "The given parent does not belong to this repository"); + goto cleanup; + } - git__write_oid(stream, "parent", git_object_id((const git_object *)parents[i])); + git_oid__writebuf(&commit, "parent ", git_object_id((const git_object *)parents[i])); } - stream->write(stream, author_str, author_length); - free(author_str); + git_signature__writebuf(&commit, "author ", author); + git_signature__writebuf(&commit, "committer ", committer); - stream->write(stream, committer_str, committer_length); - free(committer_str); + git_buf_putc(&commit, '\n'); + git_buf_puts(&commit, message); - stream->write(stream, "\n", 1); - stream->write(stream, message, message_length); + if (git_buf_oom(&commit)) { + error = git__throw(GIT_ENOMEM, "Not enough memory to build the commit data"); + goto cleanup; + } - error = stream->finalize_write(oid, stream); - stream->free(stream); + error = git_odb_write(oid, git_repository_database(repo), commit.ptr, commit.size, GIT_OBJ_COMMIT); + git_buf_free(&commit); if (error == GIT_SUCCESS && update_ref != NULL) { git_reference *head; @@ -192,6 +175,10 @@ int git_commit_create( return git__rethrow(error, "Failed to create commit"); return GIT_SUCCESS; + +cleanup: + git_buf_free(&commit); + return error; } int commit_parse_buffer(git_commit *commit, const void *data, size_t len) diff --git a/src/filebuf.c b/src/filebuf.c index 86a643d38..1fbbaa3d4 100644 --- a/src/filebuf.c +++ b/src/filebuf.c @@ -374,7 +374,7 @@ int git_filebuf_printf(git_filebuf *file, const char *format, ...) if (len < 0) return git__throw(GIT_EOSERR, "Failed to format string"); - if ((size_t)len <= space_left) { + if ((size_t)len + 1 <= space_left) { file->buf_pos += len; return GIT_SUCCESS; } @@ -384,7 +384,7 @@ int git_filebuf_printf(git_filebuf *file, const char *format, ...) space_left = file->buf_size - file->buf_pos; - } while ((size_t)len <= space_left); + } while ((size_t)len + 1 <= space_left); tmp_buffer = git__malloc(len + 1); if (!tmp_buffer) diff --git a/src/filebuf.h b/src/filebuf.h index 37cb36784..1567b115c 100644 --- a/src/filebuf.h +++ b/src/filebuf.h @@ -41,7 +41,7 @@ typedef struct git_filebuf git_filebuf; int git_filebuf_write(git_filebuf *lock, const void *buff, size_t len); int git_filebuf_reserve(git_filebuf *file, void **buff, size_t len); -int git_filebuf_printf(git_filebuf *file, const char *format, ...); +int git_filebuf_printf(git_filebuf *file, const char *format, ...) GIT_FORMAT_PRINTF(2, 3); int git_filebuf_open(git_filebuf *lock, const char *path, int flags); int git_filebuf_commit(git_filebuf *lock); diff --git a/src/odb_loose.c b/src/odb_loose.c index 2a2c464c7..a3a4e5b56 100644 --- a/src/odb_loose.c +++ b/src/odb_loose.c @@ -772,6 +772,49 @@ int loose_backend__stream(git_odb_stream **stream_out, git_odb_backend *_backend return GIT_SUCCESS; } +int loose_backend__write(git_oid *oid, git_odb_backend *_backend, const void *data, size_t len, git_otype type) +{ + int error, header_len; + char final_path[GIT_PATH_MAX], header[64]; + git_filebuf fbuf; + loose_backend *backend; + + backend = (loose_backend *)_backend; + + /* prepare the header for the file */ + { + header_len = format_object_header(header, sizeof(header), len, type); + if (header_len < GIT_SUCCESS) + return GIT_EOBJCORRUPTED; + } + + git_path_join(final_path, backend->objects_dir, "tmp_object"); + + error = git_filebuf_open(&fbuf, final_path, + GIT_FILEBUF_HASH_CONTENTS | + GIT_FILEBUF_DEFLATE_CONTENTS | + GIT_FILEBUF_TEMPORARY); + + if (error < GIT_SUCCESS) + return error; + + git_filebuf_write(&fbuf, header, header_len); + git_filebuf_write(&fbuf, data, len); + git_filebuf_hash(oid, &fbuf); + + if ((error = object_file_name(final_path, sizeof(final_path), backend->objects_dir, oid)) < GIT_SUCCESS) + goto cleanup; + + if ((error = git_futils_mkpath2file(final_path)) < GIT_SUCCESS) + goto cleanup; + + return git_filebuf_commit_at(&fbuf, final_path); + +cleanup: + git_filebuf_cleanup(&fbuf); + return error; +} + void loose_backend__free(git_odb_backend *_backend) { loose_backend *backend; @@ -800,6 +843,7 @@ int git_odb_backend_loose(git_odb_backend **backend_out, const char *objects_dir backend->fsync_object_files = 0; backend->parent.read = &loose_backend__read; + backend->parent.write = &loose_backend__write; backend->parent.read_prefix = &loose_backend__read_prefix; backend->parent.read_header = &loose_backend__read_header; backend->parent.writestream = &loose_backend__stream; diff --git a/src/oid.c b/src/oid.c index 2348d1434..8274cb55c 100644 --- a/src/oid.c +++ b/src/oid.c @@ -175,6 +175,16 @@ int git__write_oid(git_odb_stream *stream, const char *header, const git_oid *oi return GIT_SUCCESS; } +void git_oid__writebuf(git_buf *buf, const char *header, const git_oid *oid) +{ + char hex_oid[GIT_OID_HEXSZ]; + + git_oid_fmt(hex_oid, oid); + git_buf_puts(buf, header); + git_buf_put(buf, hex_oid, GIT_OID_HEXSZ); + git_buf_putc(buf, '\n'); +} + void git_oid_fromraw(git_oid *out, const unsigned char *raw) { memcpy(out->id, raw, sizeof(out->id)); diff --git a/src/repository.h b/src/repository.h index bcf9b2bc8..1cf426128 100644 --- a/src/repository.h +++ b/src/repository.h @@ -11,6 +11,7 @@ #include "index.h" #include "cache.h" #include "refs.h" +#include "buffer.h" #define DOT_GIT ".git" #define GIT_DIR DOT_GIT "/" @@ -44,5 +45,6 @@ void git_object__free(void *object); int git__parse_oid(git_oid *oid, const char **buffer_out, const char *buffer_end, const char *header); int git__write_oid(git_odb_stream *src, const char *header, const git_oid *oid); +void git_oid__writebuf(git_buf *buf, const char *header, const git_oid *oid); #endif diff --git a/src/signature.c b/src/signature.c index 6d9569d15..be9ed1ccb 100644 --- a/src/signature.c +++ b/src/signature.c @@ -342,4 +342,22 @@ int git_signature__write(char **signature, const char *header, const git_signatu return sig_buffer_len; } +void git_signature__writebuf(git_buf *buf, const char *header, const git_signature *sig) +{ + int offset, hours, mins; + char sign; + + offset = sig->when.offset; + sign = (sig->when.offset < 0) ? '-' : '+'; + + if (offset < 0) + offset = -offset; + + hours = offset / 60; + mins = offset % 60; + + git_buf_printf(buf, "%s%s <%s> %u %c%02d%02d\n", + header ? header : "", sig->name, sig->email, + (unsigned)sig->when.time, sign, hours, mins); +} diff --git a/src/signature.h b/src/signature.h index feba6578d..41bc25871 100644 --- a/src/signature.h +++ b/src/signature.h @@ -8,5 +8,6 @@ int git_signature__parse(git_signature *sig, const char **buffer_out, const char *buffer_end, const char *header); int git_signature__write(char **signature, const char *header, const git_signature *sig); +void git_signature__writebuf(git_buf *buf, const char *header, const git_signature *sig); #endif diff --git a/src/tag.c b/src/tag.c index 39254a961..12929015e 100644 --- a/src/tag.c +++ b/src/tag.c @@ -189,16 +189,10 @@ int git_tag_create( const char *message, int allow_ref_overwrite) { - size_t final_size = 0; - git_odb_stream *stream; - - const char *type_str; - char *tagger_str; git_reference *new_ref = NULL; - char ref_name[GIT_REFNAME_MAX]; + git_buf tag = GIT_BUF_INIT; - int type_str_len, tag_name_len, tagger_str_len, message_len; int error, should_update_ref = 0; if (git_object_owner(target) != repo) @@ -226,40 +220,20 @@ int git_tag_create( } } - type_str = git_object_type2string(git_object_type(target)); - tagger_str_len = git_signature__write(&tagger_str, "tagger ", tagger); + git_oid__writebuf(&tag, "object ", git_object_id(target)); + git_buf_printf(&tag, "type %s\n", git_object_type2string(git_object_type(target))); + git_buf_printf(&tag, "tag %s\n", tag_name); + git_signature__writebuf(&tag, "tagger ", tagger); + git_buf_putc(&tag, '\n'); + git_buf_puts(&tag, message); - type_str_len = strlen(type_str); - tag_name_len = strlen(tag_name); - message_len = strlen(message); + if (git_buf_oom(&tag)) { + git_buf_free(&tag); + return git__throw(GIT_ENOMEM, "Not enough memory to build the tag data"); + } - final_size += GIT_OID_LINE_LENGTH("object"); - final_size += STRLEN("type ") + type_str_len + 1; - final_size += STRLEN("tag ") + tag_name_len + 1; - final_size += tagger_str_len; - final_size += 1 + message_len; - - if ((error = git_odb_open_wstream(&stream, repo->db, final_size, GIT_OBJ_TAG)) < GIT_SUCCESS) - return git__rethrow(error, "Failed to create tag"); - - git__write_oid(stream, "object", git_object_id(target)); - - stream->write(stream, "type ", STRLEN("type ")); - stream->write(stream, type_str, type_str_len); - - stream->write(stream, "\ntag ", STRLEN("\ntag ")); - stream->write(stream, tag_name, tag_name_len); - stream->write(stream, "\n", 1); - - stream->write(stream, tagger_str, tagger_str_len); - free(tagger_str); - - stream->write(stream, "\n", 1); - stream->write(stream, message, message_len); - - - error = stream->finalize_write(oid, stream); - stream->free(stream); + error = git_odb_write(oid, git_repository_database(repo), tag.ptr, tag.size, GIT_OBJ_TAG); + git_buf_free(&tag); if (error < GIT_SUCCESS) return git__rethrow(error, "Failed to create tag"); diff --git a/src/tree.c b/src/tree.c index fff5068f8..ad9643f69 100644 --- a/src/tree.c +++ b/src/tree.c @@ -421,29 +421,16 @@ int git_treebuilder_remove(git_treebuilder *bld, const char *filename) int git_treebuilder_write(git_oid *oid, git_repository *repo, git_treebuilder *bld) { - unsigned int i, size = 0; - char filemode[MAX_FILEMODE_BYTES + 1 + 1]; - git_odb_stream *stream; + unsigned int i; int error; + git_buf tree = GIT_BUF_INIT; assert(bld); sort_entries(bld); - for (i = 0; i < bld->entries.length; ++i) { - git_tree_entry *entry = bld->entries.contents[i]; - - if (entry->removed) - continue; - - snprintf(filemode, sizeof(filemode), "%o ", entry->attr); - size += strlen(filemode); - size += entry->filename_len + 1; - size += GIT_OID_RAWSZ; - } - - if ((error = git_odb_open_wstream(&stream, git_repository_database(repo), size, GIT_OBJ_TREE)) < GIT_SUCCESS) - return git__rethrow(error, "Failed to write tree. Can't open write stream"); + /* Grow the buffer beforehand to an estimated size */ + git_buf_grow(&tree, bld->entries.length * 72); for (i = 0; i < bld->entries.length; ++i) { git_tree_entry *entry = bld->entries.contents[i]; @@ -451,14 +438,18 @@ int git_treebuilder_write(git_oid *oid, git_repository *repo, git_treebuilder *b if (entry->removed) continue; - snprintf(filemode, sizeof(filemode), "%o ", entry->attr); - stream->write(stream, filemode, strlen(filemode)); - stream->write(stream, entry->filename, entry->filename_len + 1); - stream->write(stream, (char *)entry->oid.id, GIT_OID_RAWSZ); + git_buf_printf(&tree, "%o ", entry->attr); + git_buf_put(&tree, entry->filename, entry->filename_len + 1); + git_buf_put(&tree, (char *)entry->oid.id, GIT_OID_RAWSZ); } - error = stream->finalize_write(oid, stream); - stream->free(stream); + if (git_buf_oom(&tree)) { + git_buf_free(&tree); + return git__throw(GIT_ENOMEM, "Not enough memory to build the tree data"); + } + + error = git_odb_write(oid, git_repository_database(repo), tree.ptr, tree.size, GIT_OBJ_TREE); + git_buf_free(&tree); return error == GIT_SUCCESS ? GIT_SUCCESS : git__rethrow(error, "Failed to write tree"); } diff --git a/tests/t17-bufs.c b/tests/t17-bufs.c new file mode 100644 index 000000000..b0269b790 --- /dev/null +++ b/tests/t17-bufs.c @@ -0,0 +1,59 @@ +/* + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, + * as published by the Free Software Foundation. + * + * In addition to the permissions in the GNU General Public License, + * the authors give you unlimited permission to link the compiled + * version of this file into combinations with other programs, + * and to distribute those combinations without any restriction + * coming from the use of this file. (The General Public License + * restrictions do apply in other respects; for example, they cover + * modification of the file, and distribution when not linked into + * a combined executable.) + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ +#include "test_lib.h" +#include "test_helpers.h" + +#include +#include "buffer.h" + +const char *test_string = "Have you seen that? Have you seeeen that??"; + +BEGIN_TEST(buf0, "check that resizing works properly") + git_buf buf = GIT_BUF_INIT; + git_buf_puts(&buf, test_string); + + must_be_true(git_buf_oom(&buf) == 0); + must_be_true(strcmp(git_buf_cstr(&buf), test_string) == 0); + + git_buf_puts(&buf, test_string); + must_be_true(strlen(git_buf_cstr(&buf)) == strlen(test_string) * 2); +END_TEST + +BEGIN_TEST(buf1, "check that printf works properly") + git_buf buf = GIT_BUF_INIT; + + git_buf_printf(&buf, "%s %s %d ", "shoop", "da", 23); + must_be_true(git_buf_oom(&buf) == 0); + must_be_true(strcmp(git_buf_cstr(&buf), "shoop da 23 ") == 0); + + git_buf_printf(&buf, "%s %d", "woop", 42); + must_be_true(git_buf_oom(&buf) == 0); + must_be_true(strcmp(git_buf_cstr(&buf), "shoop da 23 woop 42") == 0); +END_TEST + +BEGIN_SUITE(buffers) + ADD_TEST(buf0) + ADD_TEST(buf1) +END_SUITE diff --git a/tests/test_main.c b/tests/test_main.c index aab6c068b..2d3e5f954 100644 --- a/tests/test_main.c +++ b/tests/test_main.c @@ -44,6 +44,7 @@ DECLARE_SUITE(repository); DECLARE_SUITE(threads); DECLARE_SUITE(config); DECLARE_SUITE(remotes); +DECLARE_SUITE(buffers); static libgit2_suite suite_methods[]= { SUITE_NAME(core), @@ -61,6 +62,7 @@ static libgit2_suite suite_methods[]= { SUITE_NAME(threads), SUITE_NAME(config), SUITE_NAME(remotes), + SUITE_NAME(buffers), }; #define GIT_SUITE_COUNT (ARRAY_SIZE(suite_methods))