From 376e6c9f96ffcd572ba974c9cc4d13b4f1e31474 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Thu, 15 Aug 2013 13:48:35 +0200 Subject: [PATCH 1/6] odb: wrap the stream reading and writing functions This is in preparation for moving the hashing to the frontend, which requires us to handle the incoming data before passing it to the backend's stream. --- include/git2/odb.h | 49 ++++++++++++++++++++++++++++------- src/blob.c | 14 +++++----- src/odb.c | 26 ++++++++++++++++--- src/tag.c | 6 ++--- src/transports/local.c | 6 ++--- tests-clar/object/raw/write.c | 6 ++--- 6 files changed, 78 insertions(+), 29 deletions(-) diff --git a/include/git2/odb.h b/include/git2/odb.h index b3e9a57a6..3e93a932c 100644 --- a/include/git2/odb.h +++ b/include/git2/odb.h @@ -219,16 +219,9 @@ GIT_EXTERN(int) git_odb_write(git_oid *out, git_odb *odb, const void *data, size * The type and final length of the object must be specified * when opening the stream. * - * The returned stream will be of type `GIT_STREAM_WRONLY` and - * will have the following methods: - * - * - stream->write: write `n` bytes into the stream - * - stream->finalize_write: close the stream and store the object in - * the odb - * - stream->free: free the stream - * - * The streaming write won't be effective until `stream->finalize_write` - * is called and returns without an error + * The returned stream will be of type `GIT_STREAM_WRONLY`, and it + * won't be effective until `git_odb_stream_finalize_write` is called + * and returns without an error * * The stream must always be free'd or will leak memory. * @@ -242,6 +235,42 @@ GIT_EXTERN(int) git_odb_write(git_oid *out, git_odb *odb, const void *data, size */ GIT_EXTERN(int) git_odb_open_wstream(git_odb_stream **out, git_odb *db, size_t size, git_otype type); +/** + * Write to an odb stream + * + * @param stream the stream + * @param buffer the data to write + * @param len the buffer's length + * @return 0 if the write succeeded; error code otherwise + */ +GIT_EXTERN(int) git_odb_stream_write(git_odb_stream *stream, const char *buffer, size_t len); + +/** + * Finish writing to an odb stream + * + * The object will take its final name and will be available to the + * odb. + * + * @param out pointer to store the resulting object's id + * @param stream the stream + * @return 0 on success; an error code otherwise + */ +GIT_EXTERN(int) git_odb_stream_finalize_write(git_oid *out, git_odb_stream *stream); + +/** + * Read from an odb stream + * + * Most backends don't implement streaming reads + */ +GIT_EXTERN(int) git_odb_stream_read(git_odb_stream *stream, char *buffer, size_t len); + +/** + * Free an odb stream + * + * @param stream the stream to free + */ +GIT_EXTERN(void) git_odb_stream_free(git_odb_stream *stream); + /** * Open a stream to read an object from the ODB * diff --git a/src/blob.c b/src/blob.c index 5bb51f7cf..6a289f43b 100644 --- a/src/blob.c +++ b/src/blob.c @@ -60,10 +60,10 @@ int git_blob_create_frombuffer(git_oid *oid, git_repository *repo, const void *b (error = git_odb_open_wstream(&stream, odb, len, GIT_OBJ_BLOB)) < 0) return error; - if ((error = stream->write(stream, buffer, len)) == 0) - error = stream->finalize_write(oid, stream); + if ((error = git_odb_stream_write(stream, buffer, len)) == 0) + error = git_odb_stream_finalize_write(oid, stream); - stream->free(stream); + git_odb_stream_free(stream); return error; } @@ -80,12 +80,12 @@ static int write_file_stream( return error; if ((fd = git_futils_open_ro(path)) < 0) { - stream->free(stream); + git_odb_stream_free(stream); return -1; } while (!error && (read_len = p_read(fd, buffer, sizeof(buffer))) > 0) { - error = stream->write(stream, buffer, read_len); + error = git_odb_stream_write(stream, buffer, read_len); written += read_len; } @@ -97,9 +97,9 @@ static int write_file_stream( } if (!error) - error = stream->finalize_write(oid, stream); + error = git_odb_stream_finalize_write(oid, stream); - stream->free(stream); + git_odb_stream_free(stream); return error; } diff --git a/src/odb.c b/src/odb.c index 6969cf772..158159662 100644 --- a/src/odb.c +++ b/src/odb.c @@ -864,9 +864,9 @@ int git_odb_write( if ((error = git_odb_open_wstream(&stream, db, len, type)) != 0) return error; - stream->write(stream, data, len); - error = stream->finalize_write(oid, stream); - stream->free(stream); + git_odb_stream_write(stream, data, len); + error = git_odb_stream_finalize_write(oid, stream); + git_odb_stream_free(stream); return error; } @@ -904,6 +904,26 @@ int git_odb_open_wstream( return error; } +int git_odb_stream_write(git_odb_stream *stream, const char *buffer, size_t len) +{ + return stream->write(stream, buffer, len); +} + +int git_odb_stream_finalize_write(git_oid *out, git_odb_stream *stream) +{ + return stream->finalize_write(out, stream); +} + +int git_odb_stream_read(git_odb_stream *stream, char *buffer, size_t len) +{ + return stream->read(stream, buffer, len); +} + +void git_odb_stream_free(git_odb_stream *stream) +{ + stream->free(stream); +} + int git_odb_open_rstream(git_odb_stream **stream, git_odb *db, const git_oid *oid) { size_t i, reads = 0; diff --git a/src/tag.c b/src/tag.c index 71f4c1eb1..31a3c8b80 100644 --- a/src/tag.c +++ b/src/tag.c @@ -366,10 +366,10 @@ int git_tag_create_frombuffer(git_oid *oid, git_repository *repo, const char *bu if (git_odb_open_wstream(&stream, odb, strlen(buffer), GIT_OBJ_TAG) < 0) return -1; - stream->write(stream, buffer, strlen(buffer)); + git_odb_stream_write(stream, buffer, strlen(buffer)); - error = stream->finalize_write(oid, stream); - stream->free(stream); + error = git_odb_stream_finalize_write(oid, stream); + git_odb_stream_free(stream); if (error < 0) { git_buf_free(&ref_name); diff --git a/src/transports/local.c b/src/transports/local.c index a9da8146c..8a75de727 100644 --- a/src/transports/local.c +++ b/src/transports/local.c @@ -287,9 +287,9 @@ static int local_push_copy_object( odb_obj_size, odb_obj_type)) < 0) goto on_error; - if (odb_stream->write(odb_stream, (char *)git_odb_object_data(odb_obj), + if (git_odb_stream_write(odb_stream, (char *)git_odb_object_data(odb_obj), odb_obj_size) < 0 || - odb_stream->finalize_write(&remote_odb_obj_oid, odb_stream) < 0) { + git_odb_stream_finalize_write(&remote_odb_obj_oid, odb_stream) < 0) { error = -1; } else if (git_oid__cmp(&obj->id, &remote_odb_obj_oid) != 0) { giterr_set(GITERR_ODB, "Error when writing object to remote odb " @@ -298,7 +298,7 @@ static int local_push_copy_object( error = -1; } - odb_stream->free(odb_stream); + git_odb_stream_free(odb_stream); on_error: git_odb_object_free(odb_obj); diff --git a/tests-clar/object/raw/write.c b/tests-clar/object/raw/write.c index 9709c0302..273f08f2c 100644 --- a/tests-clar/object/raw/write.c +++ b/tests-clar/object/raw/write.c @@ -31,9 +31,9 @@ static void streaming_write(git_oid *oid, git_odb *odb, git_rawobj *raw) int error; cl_git_pass(git_odb_open_wstream(&stream, odb, raw->len, raw->type)); - stream->write(stream, raw->data, raw->len); - error = stream->finalize_write(oid, stream); - stream->free(stream); + git_odb_stream_write(stream, raw->data, raw->len); + error = git_odb_stream_finalize_write(oid, stream); + git_odb_stream_free(stream); cl_git_pass(error); } From 8380b39a6761efa360398279083a65064d6770d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Thu, 15 Aug 2013 14:29:39 +0200 Subject: [PATCH 2/6] odb: perform the stream hashing in the frontend Hash the data as it's coming into the stream and tell the backend what its name is when finalizing the write. This makes it consistent with the way a plain git_odb_write() performs the write. --- include/git2/odb_backend.h | 3 +++ src/odb.c | 21 +++++++++++++++++++++ src/odb_loose.c | 4 +--- 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/include/git2/odb_backend.h b/include/git2/odb_backend.h index af1e3e5b9..d004238a4 100644 --- a/include/git2/odb_backend.h +++ b/include/git2/odb_backend.h @@ -65,10 +65,13 @@ typedef enum { GIT_STREAM_RW = (GIT_STREAM_RDONLY | GIT_STREAM_WRONLY), } git_odb_stream_t; +typedef struct git_hash_ctx git_hash_ctx; + /** A stream to read/write from a backend */ struct git_odb_stream { git_odb_backend *backend; unsigned int mode; + git_hash_ctx *hash_ctx; int (*read)(git_odb_stream *stream, char *buffer, size_t len); int (*write)(git_odb_stream *stream, const char *buffer, size_t len); diff --git a/src/odb.c b/src/odb.c index 158159662..b7f64dfc9 100644 --- a/src/odb.c +++ b/src/odb.c @@ -871,11 +871,21 @@ int git_odb_write( return error; } +static void hash_header(git_hash_ctx *ctx, size_t size, git_otype type) +{ + char header[64]; + int hdrlen; + + hdrlen = git_odb__format_object_header(header, sizeof(header), size, type); + git_hash_update(ctx, header, hdrlen); +} + int git_odb_open_wstream( git_odb_stream **stream, git_odb *db, size_t size, git_otype type) { size_t i, writes = 0; int error = GIT_ERROR; + git_hash_ctx *ctx; assert(stream && db); @@ -901,16 +911,26 @@ int git_odb_open_wstream( if (error < 0 && !writes) error = git_odb__error_unsupported_in_backend("write object"); + ctx = git__malloc(sizeof(git_hash_ctx)); + GITERR_CHECK_ALLOC(ctx); + + + git_hash_ctx_init(ctx); + hash_header(ctx, size, type); + (*stream)->hash_ctx = ctx; + return error; } int git_odb_stream_write(git_odb_stream *stream, const char *buffer, size_t len) { + git_hash_update(stream->hash_ctx, buffer, len); return stream->write(stream, buffer, len); } int git_odb_stream_finalize_write(git_oid *out, git_odb_stream *stream) { + git_hash_final(out, stream->hash_ctx); return stream->finalize_write(out, stream); } @@ -921,6 +941,7 @@ int git_odb_stream_read(git_odb_stream *stream, char *buffer, size_t len) void git_odb_stream_free(git_odb_stream *stream) { + git__free(stream->hash_ctx); stream->free(stream); } diff --git a/src/odb_loose.c b/src/odb_loose.c index 76ed8e232..a0c2c8c4c 100644 --- a/src/odb_loose.c +++ b/src/odb_loose.c @@ -776,8 +776,7 @@ static int loose_backend__stream_fwrite(git_oid *oid, git_odb_stream *_stream) git_buf final_path = GIT_BUF_INIT; int error = 0; - if (git_filebuf_hash(oid, &stream->fbuf) < 0 || - object_file_name(&final_path, backend, oid) < 0 || + if (object_file_name(&final_path, backend, oid) < 0 || object_mkdir(&final_path, backend) < 0) error = -1; /* @@ -848,7 +847,6 @@ static int loose_backend__stream(git_odb_stream **stream_out, git_odb_backend *_ if (git_buf_joinpath(&tmp_path, backend->objects_dir, "tmp_object") < 0 || git_filebuf_open(&stream->fbuf, tmp_path.ptr, - GIT_FILEBUF_HASH_CONTENTS | GIT_FILEBUF_TEMPORARY | (backend->object_zlib_level << GIT_FILEBUF_DEFLATE_SHIFT)) < 0 || stream->stream.write((git_odb_stream *)stream, hdr, hdrlen) < 0) From d4e6cf0cd05540b373bfcb8908e4bc8f8f72c73c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Thu, 15 Aug 2013 14:32:47 +0200 Subject: [PATCH 3/6] odb: remove a duplicate object header formatting function --- src/odb_loose.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/src/odb_loose.c b/src/odb_loose.c index a0c2c8c4c..35f53fb7d 100644 --- a/src/odb_loose.c +++ b/src/odb_loose.c @@ -809,17 +809,6 @@ static void loose_backend__stream_free(git_odb_stream *_stream) git__free(stream); } -static int format_object_header(char *hdr, size_t n, size_t obj_len, git_otype obj_type) -{ - const char *type_str = git_object_type2string(obj_type); - int len = snprintf(hdr, n, "%s %"PRIuZ, type_str, obj_len); - - assert(len > 0); /* otherwise snprintf() is broken */ - assert(((size_t)len) < n); /* otherwise the caller is broken! */ - - return len+1; -} - static int loose_backend__stream(git_odb_stream **stream_out, git_odb_backend *_backend, size_t length, git_otype type) { loose_backend *backend; @@ -833,7 +822,7 @@ static int loose_backend__stream(git_odb_stream **stream_out, git_odb_backend *_ backend = (loose_backend *)_backend; *stream_out = NULL; - hdrlen = format_object_header(hdr, sizeof(hdr), length, type); + hdrlen = git_odb__format_object_header(hdr, sizeof(hdr), length, type); stream = git__calloc(1, sizeof(loose_writestream)); GITERR_CHECK_ALLOC(stream); @@ -872,7 +861,7 @@ static int loose_backend__write(git_oid *oid, git_odb_backend *_backend, const v backend = (loose_backend *)_backend; /* prepare the header for the file */ - header_len = format_object_header(header, sizeof(header), len, type); + header_len = git_odb__format_object_header(header, sizeof(header), len, type); if (git_buf_joinpath(&final_path, backend->objects_dir, "tmp_object") < 0 || git_filebuf_open(&fbuf, final_path.ptr, From fe0c6d4e712fa1bb072b8a847783bad47f3e91eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Sat, 17 Aug 2013 01:41:08 +0200 Subject: [PATCH 4/6] odb: make it clearer that the id is calculated in the frontend The frontend is in charge of calculating the id of the objects. Thus the backends should treat it as a read-only value. The positioning in the function signature made it seem as though it was an output parameter. Make the id const and move it from the front to behind the subject (backend or stream). --- include/git2/odb_backend.h | 2 +- include/git2/sys/odb_backend.h | 6 +----- src/odb.c | 8 ++++---- src/odb_loose.c | 4 ++-- 4 files changed, 8 insertions(+), 12 deletions(-) diff --git a/include/git2/odb_backend.h b/include/git2/odb_backend.h index d004238a4..5696ee07d 100644 --- a/include/git2/odb_backend.h +++ b/include/git2/odb_backend.h @@ -75,7 +75,7 @@ struct git_odb_stream { int (*read)(git_odb_stream *stream, char *buffer, size_t len); int (*write)(git_odb_stream *stream, const char *buffer, size_t len); - int (*finalize_write)(git_oid *oid_p, git_odb_stream *stream); + int (*finalize_write)(git_odb_stream *stream, const git_oid *oid); void (*free)(git_odb_stream *stream); }; diff --git a/include/git2/sys/odb_backend.h b/include/git2/sys/odb_backend.h index 3cd2734c0..2d06613d2 100644 --- a/include/git2/sys/odb_backend.h +++ b/include/git2/sys/odb_backend.h @@ -48,12 +48,8 @@ struct git_odb_backend { int (* read_header)( size_t *, git_otype *, git_odb_backend *, const git_oid *); - /* The writer may assume that the object - * has already been hashed and is passed - * in the first parameter. - */ int (* write)( - git_oid *, git_odb_backend *, const void *, size_t, git_otype); + git_odb_backend *, const git_oid *, const void *, size_t, git_otype); int (* writestream)( git_odb_stream **, git_odb_backend *, size_t, git_otype); diff --git a/src/odb.c b/src/odb.c index b7f64dfc9..d2be60f97 100644 --- a/src/odb.c +++ b/src/odb.c @@ -291,10 +291,10 @@ typedef struct { git_otype type; } fake_wstream; -static int fake_wstream__fwrite(git_oid *oid, git_odb_stream *_stream) +static int fake_wstream__fwrite(git_odb_stream *_stream, const git_oid *oid) { fake_wstream *stream = (fake_wstream *)_stream; - return _stream->backend->write(oid, _stream->backend, stream->buffer, stream->size, stream->type); + return _stream->backend->write(_stream->backend, oid, stream->buffer, stream->size, stream->type); } static int fake_wstream__write(git_odb_stream *_stream, const char *data, size_t len) @@ -851,7 +851,7 @@ int git_odb_write( continue; if (b->write != NULL) - error = b->write(oid, b, data, len, type); + error = b->write(b, oid, data, len, type); } if (!error || error == GIT_PASSTHROUGH) @@ -931,7 +931,7 @@ int git_odb_stream_write(git_odb_stream *stream, const char *buffer, size_t len) int git_odb_stream_finalize_write(git_oid *out, git_odb_stream *stream) { git_hash_final(out, stream->hash_ctx); - return stream->finalize_write(out, stream); + return stream->finalize_write(stream, out); } int git_odb_stream_read(git_odb_stream *stream, char *buffer, size_t len) diff --git a/src/odb_loose.c b/src/odb_loose.c index 35f53fb7d..d1ca18edb 100644 --- a/src/odb_loose.c +++ b/src/odb_loose.c @@ -769,7 +769,7 @@ static int loose_backend__foreach(git_odb_backend *_backend, git_odb_foreach_cb return state.cb_error ? state.cb_error : error; } -static int loose_backend__stream_fwrite(git_oid *oid, git_odb_stream *_stream) +static int loose_backend__stream_fwrite(git_odb_stream *_stream, const git_oid *oid) { loose_writestream *stream = (loose_writestream *)_stream; loose_backend *backend = (loose_backend *)_stream->backend; @@ -850,7 +850,7 @@ static int loose_backend__stream(git_odb_stream **stream_out, git_odb_backend *_ return !stream ? -1 : 0; } -static int loose_backend__write(git_oid *oid, git_odb_backend *_backend, const void *data, size_t len, git_otype type) +static int loose_backend__write(git_odb_backend *_backend, const git_oid *oid, const void *data, size_t len, git_otype type) { int error = 0, header_len; git_buf final_path = GIT_BUF_INIT; From 7a3764bee92456e5ee2ed41be9abb0ffb4165eb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Sat, 17 Aug 2013 01:55:52 +0200 Subject: [PATCH 5/6] odb: document git_odb_stream Clarify the role of each function and in particular mention that there is no need for the backend or stream to worry about the object's id, as it will be given when `finalize_write` is called. --- include/git2/odb_backend.h | 26 +++++++++++++++++++++++++- include/git2/sys/odb_backend.h | 4 ++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/include/git2/odb_backend.h b/include/git2/odb_backend.h index 5696ee07d..dca499583 100644 --- a/include/git2/odb_backend.h +++ b/include/git2/odb_backend.h @@ -67,15 +67,39 @@ typedef enum { typedef struct git_hash_ctx git_hash_ctx; -/** A stream to read/write from a backend */ +/** + * A stream to read/write from a backend. + * + * This represents a stream of data being written to or read from a + * backend. When writing, the frontend functions take care of + * calculating the object's id and all `finalize_write` needs to do is + * store the object with the id it is passed. + */ struct git_odb_stream { git_odb_backend *backend; unsigned int mode; git_hash_ctx *hash_ctx; + /** + * Write at most `len` bytes into `buffer` and advance the + * stream. + */ int (*read)(git_odb_stream *stream, char *buffer, size_t len); + + /** + * Write `len` bytes from `buffer` into the stream. + */ int (*write)(git_odb_stream *stream, const char *buffer, size_t len); + + /** + * Store the contents of the stream as an object with the id + * specified in `oid`. + */ int (*finalize_write)(git_odb_stream *stream, const git_oid *oid); + + /** + * Free the stream's memory. + */ void (*free)(git_odb_stream *stream); }; diff --git a/include/git2/sys/odb_backend.h b/include/git2/sys/odb_backend.h index 2d06613d2..31ffe1c33 100644 --- a/include/git2/sys/odb_backend.h +++ b/include/git2/sys/odb_backend.h @@ -48,6 +48,10 @@ struct git_odb_backend { int (* read_header)( size_t *, git_otype *, git_odb_backend *, const git_oid *); + /** + * Write an object into the backend. The id of the object has + * already been calculated and is passed in. + */ int (* write)( git_odb_backend *, const git_oid *, const void *, size_t, git_otype); From 090a07d29548ce69034b4be6ba043014226893c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Sat, 17 Aug 2013 02:12:04 +0200 Subject: [PATCH 6/6] odb: avoid hashing twice in and edge case If none of the backends support direct writes and we must stream the whole file, we already know what the object's id should be; so use the stream's functions directly, bypassing the frontend's hashing and overwriting of our existing id. --- src/odb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/odb.c b/src/odb.c index d2be60f97..21b46bf56 100644 --- a/src/odb.c +++ b/src/odb.c @@ -864,8 +864,8 @@ int git_odb_write( if ((error = git_odb_open_wstream(&stream, db, len, type)) != 0) return error; - git_odb_stream_write(stream, data, len); - error = git_odb_stream_finalize_write(oid, stream); + stream->write(stream, data, len); + error = stream->finalize_write(stream, oid); git_odb_stream_free(stream); return error;