diff --git a/.travis.yml b/.travis.yml index fc513458b..68b29b1e2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,6 +15,7 @@ compiler: env: global: - secure: "YnhS+8n6B+uoyaYfaJ3Lei7cSJqHDPiKJCKFIF2c87YDfmCvAJke8QtE7IzjYDs7UFkTCM4ox+ph2bERUrxZbSCyEkHdjIZpKuMJfYWja/jgMqTMxdyOH9y8JLFbZsSXDIXDwqBlC6vVyl1fP90M35wuWcNTs6tctfVWVofEFbs=" + - GITTEST_INVASIVE_FS_SIZE=1 matrix: - OPTIONS="-DTHREADSAFE=ON -DCMAKE_BUILD_TYPE=Release" - OPTIONS="-DTHREADSAFE=OFF -DBUILD_EXAMPLES=ON" diff --git a/appveyor.yml b/appveyor.yml index 8ac6728c3..d155485fd 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -3,7 +3,8 @@ branches: only: - master environment: - GITTEST_INVASIVE_FILESYSTEM: 1 + GITTEST_INVASIVE_FS_STRUCTURE: 1 + GITTEST_INVASIVE_FS_SIZE: 1 matrix: - GENERATOR: "Visual Studio 11" diff --git a/include/git2/filter.h b/include/git2/filter.h index 5b3f40394..dc59e6341 100644 --- a/include/git2/filter.h +++ b/include/git2/filter.h @@ -39,9 +39,9 @@ typedef enum { * Filter option flags. */ typedef enum { - GIT_FILTER_OPT_DEFAULT = 0u, - GIT_FILTER_OPT_ALLOW_UNSAFE = (1u << 0), -} git_filter_opt_t; + GIT_FILTER_DEFAULT = 0u, + GIT_FILTER_ALLOW_UNSAFE = (1u << 0), +} git_filter_flag_t; /** * A filter that can transform file data @@ -83,7 +83,7 @@ typedef struct git_filter_list git_filter_list; * @param blob The blob to which the filter will be applied (if known) * @param path Relative path of the file to be filtered * @param mode Filtering direction (WT->ODB or ODB->WT) - * @param options Combination of `git_filter_opt_t` flags + * @param flags Combination of `git_filter_flag_t` flags * @return 0 on success (which could still return NULL if no filters are * needed for the requested file), <0 on error */ @@ -93,7 +93,7 @@ GIT_EXTERN(int) git_filter_list_load( git_blob *blob, /* can be NULL */ const char *path, git_filter_mode_t mode, - uint32_t options); + uint32_t flags); /** * Apply filter list to a data buffer. @@ -137,6 +137,22 @@ GIT_EXTERN(int) git_filter_list_apply_to_blob( git_filter_list *filters, git_blob *blob); +GIT_EXTERN(int) git_filter_list_stream_data( + git_filter_list *filters, + git_buf *data, + git_writestream *target); + +GIT_EXTERN(int) git_filter_list_stream_file( + git_filter_list *filters, + git_repository *repo, + const char *path, + git_writestream *target); + +GIT_EXTERN(int) git_filter_list_stream_blob( + git_filter_list *filters, + git_blob *blob, + git_writestream *target); + /** * Free a git_filter_list * diff --git a/include/git2/sys/filter.h b/include/git2/sys/filter.h index 60248271a..5fd8d5566 100644 --- a/include/git2/sys/filter.h +++ b/include/git2/sys/filter.h @@ -123,9 +123,9 @@ GIT_EXTERN(const git_oid *) git_filter_source_id(const git_filter_source *src); GIT_EXTERN(git_filter_mode_t) git_filter_source_mode(const git_filter_source *src); /** - * Get the combination git_filter_opt_t options to be applied + * Get the combination git_filter_flag_t options to be applied */ -GIT_EXTERN(uint32_t) git_filter_source_options(const git_filter_source *src); +GIT_EXTERN(uint32_t) git_filter_source_flags(const git_filter_source *src); /* * struct git_filter @@ -208,6 +208,13 @@ typedef int (*git_filter_apply_fn)( const git_buf *from, const git_filter_source *src); +typedef int (*git_filter_stream_fn)( + git_writestream **out, + git_filter *self, + void **payload, + const git_filter_source *src, + git_writestream *next); + /** * Callback to clean up after filtering has been applied * @@ -247,6 +254,7 @@ struct git_filter { git_filter_shutdown_fn shutdown; git_filter_check_fn check; git_filter_apply_fn apply; + git_filter_stream_fn stream; git_filter_cleanup_fn cleanup; }; diff --git a/include/git2/types.h b/include/git2/types.h index 35e1573c7..c90ac4776 100644 --- a/include/git2/types.h +++ b/include/git2/types.h @@ -410,6 +410,15 @@ typedef enum { GIT_SUBMODULE_RECURSE_ONDEMAND = 2, } git_submodule_recurse_t; +/** A type to write in a streaming fashion, for example, for filters. */ +typedef struct git_writestream git_writestream; + +struct git_writestream { + int (*write)(git_writestream *stream, const char *buffer, size_t len); + int (*close)(git_writestream *stream); + void (*free)(git_writestream *stream); +}; + /** @} */ GIT_END_DECL diff --git a/src/attr.c b/src/attr.c index 44593da81..38420807a 100644 --- a/src/attr.c +++ b/src/attr.c @@ -282,9 +282,8 @@ static int system_attr_file( * a consumer. This allows them to treat this as a regular `git_buf`, * but their call to `git_buf_free` will not attempt to free it. */ - out->ptr = attr_session->sysdir.ptr; - out->size = attr_session->sysdir.size; - out->asize = 0; + git_buf_attach_notowned( + out, attr_session->sysdir.ptr, attr_session->sysdir.size); return 0; } diff --git a/src/blob.c b/src/blob.c index 30d5b705b..cf0329064 100644 --- a/src/blob.c +++ b/src/blob.c @@ -199,7 +199,7 @@ int git_blob__create_from_paths( /* Load the filters for writing this file to the ODB */ error = git_filter_list_load( &fl, repo, NULL, hint_path, - GIT_FILTER_TO_ODB, GIT_FILTER_OPT_DEFAULT); + GIT_FILTER_TO_ODB, GIT_FILTER_DEFAULT); if (error < 0) /* well, that didn't work */; @@ -329,15 +329,13 @@ cleanup: int git_blob_is_binary(const git_blob *blob) { - git_buf content; + git_buf content = GIT_BUF_INIT; assert(blob); - content.ptr = blob->odb_object->buffer; - content.size = - min(blob->odb_object->cached.size, GIT_FILTER_BYTES_TO_CHECK_NUL); - content.asize = 0; - + git_buf_attach_notowned(&content, blob->odb_object->buffer, + min(blob->odb_object->cached.size, + GIT_FILTER_BYTES_TO_CHECK_NUL)); return git_buf_text_is_binary(&content); } @@ -359,7 +357,7 @@ int git_blob_filtered_content( if (!(error = git_filter_list_load( &fl, git_blob_owner(blob), blob, path, - GIT_FILTER_TO_WORKTREE, GIT_FILTER_OPT_DEFAULT))) { + GIT_FILTER_TO_WORKTREE, GIT_FILTER_DEFAULT))) { error = git_filter_list_apply_to_blob(out, fl, blob); diff --git a/src/buffer.c b/src/buffer.c index 3deb0329c..f633c5e02 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -500,6 +500,20 @@ void git_buf_attach(git_buf *buf, char *ptr, size_t asize) } } +void git_buf_attach_notowned(git_buf *buf, const char *ptr, size_t size) +{ + if (git_buf_is_allocated(buf)) + git_buf_free(buf); + + if (!size) { + git_buf_init(buf, 0); + } else { + buf->ptr = (char *)ptr; + buf->asize = 0; + buf->size = size; + } +} + int git_buf_join_n(git_buf *buf, char separator, int nbuf, ...) { va_list ap; diff --git a/src/buffer.h b/src/buffer.h index 52342e309..093ed9b60 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -74,6 +74,12 @@ extern void git_buf_swap(git_buf *buf_a, git_buf *buf_b); extern char *git_buf_detach(git_buf *buf); extern void git_buf_attach(git_buf *buf, char *ptr, size_t asize); +/* Populates a `git_buf` where the contents are not "owned" by the + * buffer, and calls to `git_buf_free` will not free the given buf. + */ +extern void git_buf_attach_notowned( + git_buf *buf, const char *ptr, size_t size); + /** * Test if there have been any reallocation failures with this git_buf. * diff --git a/src/checkout.c b/src/checkout.c index 880af3dff..f71be26f9 100644 --- a/src/checkout.c +++ b/src/checkout.c @@ -17,6 +17,7 @@ #include "git2/diff.h" #include "git2/submodule.h" #include "git2/sys/index.h" +#include "git2/sys/filter.h" #include "refs.h" #include "repository.h" @@ -1371,22 +1372,109 @@ static int mkpath2file( return error; } -static int buffer_to_file( +struct checkout_stream { + git_writestream base; + const char *path; + int fd; + int open; +}; + +static int checkout_stream_write( + git_writestream *s, const char *buffer, size_t len) +{ + struct checkout_stream *stream = (struct checkout_stream *)s; + int ret; + + if ((ret = p_write(stream->fd, buffer, len)) < 0) + giterr_set(GITERR_OS, "Could not write to '%s'", stream->path); + + return ret; +} + +static int checkout_stream_close(git_writestream *s) +{ + struct checkout_stream *stream = (struct checkout_stream *)s; + assert(stream && stream->open); + + stream->open = 0; + return p_close(stream->fd); +} + +static void checkout_stream_free(git_writestream *s) +{ + GIT_UNUSED(s); +} + +static int blob_content_to_file( checkout_data *data, struct stat *st, - git_buf *buf, + git_blob *blob, const char *path, - mode_t file_mode) + const char *hint_path, + mode_t entry_filemode) { - int error; + int flags = data->opts.file_open_flags; + mode_t file_mode = data->opts.file_mode ? + data->opts.file_mode : entry_filemode; + git_filter_options filter_opts = GIT_FILTER_OPTIONS_INIT; + struct checkout_stream writer; + mode_t mode; + git_filter_list *fl = NULL; + int fd; + int error = 0; + + if (hint_path == NULL) + hint_path = path; if ((error = mkpath2file(data, path, data->opts.dir_mode)) < 0) return error; - if ((error = git_futils_writebuffer( - buf, path, data->opts.file_open_flags, file_mode)) < 0) + if (flags <= 0) + flags = O_CREAT | O_TRUNC | O_WRONLY; + if (!(mode = file_mode)) + mode = GIT_FILEMODE_BLOB; + + if ((fd = p_open(path, flags, mode)) < 0) { + giterr_set(GITERR_OS, "Could not open '%s' for writing", path); + return fd; + } + + filter_opts.attr_session = &data->attr_session; + filter_opts.temp_buf = &data->tmp; + + if (!data->opts.disable_filters && + (error = git_filter_list__load_ext( + &fl, data->repo, blob, hint_path, + GIT_FILTER_TO_WORKTREE, &filter_opts))) return error; + /* setup the writer */ + memset(&writer, 0, sizeof(struct checkout_stream)); + writer.base.write = checkout_stream_write; + writer.base.close = checkout_stream_close; + writer.base.free = checkout_stream_free; + writer.path = path; + writer.fd = fd; + writer.open = 1; + + error = git_filter_list_stream_blob(fl, blob, (git_writestream *)&writer); + + assert(writer.open == 0); + + git_filter_list_free(fl); + + if (error < 0) + return error; + + if (GIT_PERMS_IS_EXEC(mode)) { + data->perfdata.chmod_calls++; + + if ((error = p_chmod(path, mode)) < 0) { + giterr_set(GITERR_OS, "Failed to set permissions on '%s'", path); + return error; + } + } + if (st) { data->perfdata.stat_calls++; @@ -1394,53 +1482,11 @@ static int buffer_to_file( giterr_set(GITERR_OS, "Error statting '%s'", path); return error; } - } - if (GIT_PERMS_IS_EXEC(file_mode)) { - data->perfdata.chmod_calls++; - - if ((error = p_chmod(path, file_mode)) < 0) - giterr_set(GITERR_OS, "Failed to set permissions on '%s'", path); - } - - return error; -} - -static int blob_content_to_file( - checkout_data *data, - struct stat *st, - git_blob *blob, - const char *path, - const char * hint_path, - mode_t entry_filemode) -{ - mode_t file_mode = data->opts.file_mode ? - data->opts.file_mode : entry_filemode; - git_buf out = GIT_BUF_INIT; - git_filter_list *fl = NULL; - int error = 0; - - if (hint_path == NULL) - hint_path = path; - - if (!data->opts.disable_filters) - error = git_filter_list__load_with_attr_session( - &fl, data->repo, &data->attr_session, blob, hint_path, - GIT_FILTER_TO_WORKTREE, GIT_FILTER_OPT_DEFAULT); - - if (!error) - error = git_filter_list_apply_to_blob(&out, fl, blob); - - git_filter_list_free(fl); - - if (!error) { - error = buffer_to_file(data, st, &out, path, file_mode); st->st_mode = entry_filemode; - - git_buf_free(&out); } - return error; + return 0; } static int blob_content_to_link( @@ -1958,6 +2004,7 @@ static int checkout_write_merge( git_merge_file_result result = {0}; git_filebuf output = GIT_FILEBUF_INIT; git_filter_list *fl = NULL; + git_filter_options filter_opts = GIT_FILTER_OPTIONS_INIT; int error = 0; if (data->opts.checkout_strategy & GIT_CHECKOUT_CONFLICT_STYLE_DIFF3) @@ -2007,9 +2054,12 @@ static int checkout_write_merge( in_data.ptr = (char *)result.ptr; in_data.size = result.len; - if ((error = git_filter_list__load_with_attr_session( - &fl, data->repo, &data->attr_session, NULL, git_buf_cstr(&path_workdir), - GIT_FILTER_TO_WORKTREE, GIT_FILTER_OPT_DEFAULT)) < 0 || + filter_opts.attr_session = &data->attr_session; + filter_opts.temp_buf = &data->tmp; + + if ((error = git_filter_list__load_ext( + &fl, data->repo, NULL, git_buf_cstr(&path_workdir), + GIT_FILTER_TO_WORKTREE, &filter_opts)) < 0 || (error = git_filter_list_apply_to_data(&out_data, fl, &in_data)) < 0) goto done; } else { diff --git a/src/crlf.c b/src/crlf.c index c0a73990f..b5d1dbf32 100644 --- a/src/crlf.c +++ b/src/crlf.c @@ -302,7 +302,7 @@ static int crlf_check( return error; /* downgrade FAIL to WARN if ALLOW_UNSAFE option is used */ - if ((git_filter_source_options(src) & GIT_FILTER_OPT_ALLOW_UNSAFE) && + if ((git_filter_source_flags(src) & GIT_FILTER_ALLOW_UNSAFE) && ca.safe_crlf == GIT_SAFE_CRLF_FAIL) ca.safe_crlf = GIT_SAFE_CRLF_WARN; } diff --git a/src/diff.c b/src/diff.c index 07eae03e7..815351b21 100644 --- a/src/diff.c +++ b/src/diff.c @@ -600,7 +600,7 @@ int git_diff__oid_for_entry( error = -1; } else if (!(error = git_filter_list_load( &fl, diff->repo, NULL, entry.path, - GIT_FILTER_TO_ODB, GIT_FILTER_OPT_ALLOW_UNSAFE))) + GIT_FILTER_TO_ODB, GIT_FILTER_ALLOW_UNSAFE))) { int fd = git_futils_open_ro(full_path.ptr); if (fd < 0) diff --git a/src/diff_driver.c b/src/diff_driver.c index 7313ab573..049e6ef2a 100644 --- a/src/diff_driver.c +++ b/src/diff_driver.c @@ -418,14 +418,13 @@ void git_diff_driver_update_options( int git_diff_driver_content_is_binary( git_diff_driver *driver, const char *content, size_t content_len) { - git_buf search; - - search.ptr = (char *)content; - search.size = min(content_len, GIT_FILTER_BYTES_TO_CHECK_NUL); - search.asize = 0; + git_buf search = GIT_BUF_INIT; GIT_UNUSED(driver); + git_buf_attach_notowned(&search, content, + min(content_len, GIT_FILTER_BYTES_TO_CHECK_NUL)); + /* TODO: provide encoding / binary detection callbacks that can * be UTF-8 aware, etc. For now, instead of trying to be smart, * let's just use the simple NUL-byte detection that core git uses. diff --git a/src/diff_file.c b/src/diff_file.c index 96be0942b..f7061ae83 100644 --- a/src/diff_file.c +++ b/src/diff_file.c @@ -302,7 +302,7 @@ static int diff_file_content_load_workdir_file( if ((error = git_filter_list_load( &fl, fc->repo, NULL, fc->file->path, - GIT_FILTER_TO_ODB, GIT_FILTER_OPT_ALLOW_UNSAFE)) < 0) + GIT_FILTER_TO_ODB, GIT_FILTER_ALLOW_UNSAFE)) < 0) goto cleanup; /* if there are no filters, try to mmap the file */ diff --git a/src/filter.c b/src/filter.c index 7b54a76c0..4fbf84f6a 100644 --- a/src/filter.c +++ b/src/filter.c @@ -23,7 +23,7 @@ struct git_filter_source { git_oid oid; /* zero if unknown (which is likely) */ uint16_t filemode; /* zero if unknown */ git_filter_mode_t mode; - uint32_t options; + uint32_t flags; }; typedef struct { @@ -34,6 +34,7 @@ typedef struct { struct git_filter_list { git_array_t(git_filter_entry) filters; git_filter_source source; + git_buf *temp_buf; char path[GIT_FLEX_ARRAY]; }; @@ -371,9 +372,9 @@ git_filter_mode_t git_filter_source_mode(const git_filter_source *src) return src->mode; } -uint32_t git_filter_source_options(const git_filter_source *src) +uint32_t git_filter_source_flags(const git_filter_source *src) { - return src->options; + return src->flags; } static int filter_list_new( @@ -393,7 +394,7 @@ static int filter_list_new( fl->source.repo = src->repo; fl->source.path = fl->path; fl->source.mode = src->mode; - fl->source.options = src->options; + fl->source.flags = src->flags; *out = fl; return 0; @@ -448,24 +449,23 @@ int git_filter_list_new( git_filter_list **out, git_repository *repo, git_filter_mode_t mode, - uint32_t options) + uint32_t flags) { git_filter_source src = { 0 }; src.repo = repo; src.path = NULL; src.mode = mode; - src.options = options; + src.flags = flags; return filter_list_new(out, &src); } -int git_filter_list__load_with_attr_session( +int git_filter_list__load_ext( git_filter_list **filters, git_repository *repo, - git_attr_session *attr_session, git_blob *blob, /* can be NULL */ const char *path, git_filter_mode_t mode, - uint32_t options) + git_filter_options *filter_opts) { int error = 0; git_filter_list *fl = NULL; @@ -480,7 +480,8 @@ int git_filter_list__load_with_attr_session( src.repo = repo; src.path = path; src.mode = mode; - src.options = options; + src.flags = filter_opts->flags; + if (blob) git_oid_cpy(&src.oid, git_blob_id(blob)); @@ -493,7 +494,7 @@ int git_filter_list__load_with_attr_session( if (fdef->nattrs > 0) { error = filter_list_check_attributes( - &values, repo, attr_session, fdef, &src); + &values, repo, filter_opts->attr_session, fdef, &src); if (error == GIT_ENOTFOUND) { error = 0; @@ -516,8 +517,12 @@ int git_filter_list__load_with_attr_session( else if (error < 0) break; else { - if (!fl && (error = filter_list_new(&fl, &src)) < 0) - return error; + if (!fl) { + if ((error = filter_list_new(&fl, &src)) < 0) + return error; + + fl->temp_buf = filter_opts->temp_buf; + } fe = git_array_alloc(fl->filters); GITERR_CHECK_ALLOC(fe); @@ -542,10 +547,14 @@ int git_filter_list_load( git_blob *blob, /* can be NULL */ const char *path, git_filter_mode_t mode, - uint32_t options) + uint32_t flags) { - return git_filter_list__load_with_attr_session( - filters, repo, NULL, blob, path, mode, options); + git_filter_options filter_opts = GIT_FILTER_OPTIONS_INIT; + + filter_opts.flags = flags; + + return git_filter_list__load_ext( + filters, repo, blob, path, mode, &filter_opts); } void git_filter_list_free(git_filter_list *fl) @@ -600,84 +609,72 @@ size_t git_filter_list_length(const git_filter_list *fl) return fl ? git_array_size(fl->filters) : 0; } -static int filter_list_out_buffer_from_raw( - git_buf *out, const void *ptr, size_t size) -{ - if (git_buf_is_allocated(out)) - git_buf_free(out); +struct buf_stream { + git_writestream parent; + git_buf *target; + bool complete; +}; - if (!size) { - git_buf_init(out, 0); - } else { - out->ptr = (char *)ptr; - out->asize = 0; - out->size = size; - } +static int buf_stream_write( + git_writestream *s, const char *buffer, size_t len) +{ + struct buf_stream *buf_stream = (struct buf_stream *)s; + assert(buf_stream); + + assert(buf_stream->complete == 0); + + return git_buf_put(buf_stream->target, buffer, len); +} + +static int buf_stream_close(git_writestream *s) +{ + struct buf_stream *buf_stream = (struct buf_stream *)s; + assert(buf_stream); + + assert(buf_stream->complete == 0); + buf_stream->complete = 1; return 0; } -int git_filter_list_apply_to_data( - git_buf *tgt, git_filter_list *fl, git_buf *src) +static void buf_stream_free(git_writestream *s) { - int error = 0; - uint32_t i; - git_buf *dbuffer[2], local = GIT_BUF_INIT; - unsigned int si = 0; + GIT_UNUSED(s); +} + +static void buf_stream_init(struct buf_stream *writer, git_buf *target) +{ + memset(writer, 0, sizeof(struct buf_stream)); + + writer->parent.write = buf_stream_write; + writer->parent.close = buf_stream_close; + writer->parent.free = buf_stream_free; + writer->target = target; + + git_buf_clear(target); +} + +int git_filter_list_apply_to_data( + git_buf *tgt, git_filter_list *filters, git_buf *src) +{ + struct buf_stream writer; + int error; git_buf_sanitize(tgt); git_buf_sanitize(src); - if (!fl) - return filter_list_out_buffer_from_raw(tgt, src->ptr, src->size); - - dbuffer[0] = src; - dbuffer[1] = tgt; - - /* if `src` buffer is reallocable, then use it, otherwise copy it */ - if (!git_buf_is_allocated(src)) { - if (git_buf_set(&local, src->ptr, src->size) < 0) - return -1; - dbuffer[0] = &local; + if (!filters) { + git_buf_attach_notowned(tgt, src->ptr, src->size); + return 0; } - for (i = 0; i < git_array_size(fl->filters); ++i) { - unsigned int di = 1 - si; - uint32_t fidx = (fl->source.mode == GIT_FILTER_TO_WORKTREE) ? - i : git_array_size(fl->filters) - 1 - i; - git_filter_entry *fe = git_array_get(fl->filters, fidx); + buf_stream_init(&writer, tgt); - dbuffer[di]->size = 0; - - /* Apply the filter from dbuffer[src] to the other buffer; - * if the filtering is canceled by the user mid-filter, - * we skip to the next filter without changing the source - * of the double buffering (so that the text goes through - * cleanly). - */ - - error = fe->filter->apply( - fe->filter, &fe->payload, dbuffer[di], dbuffer[si], &fl->source); - - if (error == GIT_PASSTHROUGH) { - /* PASSTHROUGH means filter decided not to process the buffer */ - error = 0; - } else if (!error) { - git_buf_sanitize(dbuffer[di]); /* force NUL termination */ - si = di; /* swap buffers */ - } else { - tgt->size = 0; - goto cleanup; - } - } - - /* Ensure that the output ends up in dbuffer[1] (i.e. the dest) */ - if (si != 1) - git_buf_swap(dbuffer[0], dbuffer[1]); - -cleanup: - git_buf_free(&local); /* don't leak if we allocated locally */ + if ((error = git_filter_list_stream_data(filters, src, + (git_writestream *)&writer)) < 0) + return error; + assert(writer.complete); return error; } @@ -687,28 +684,21 @@ int git_filter_list_apply_to_file( git_repository *repo, const char *path) { + struct buf_stream writer; int error; - const char *base = repo ? git_repository_workdir(repo) : NULL; - git_buf abspath = GIT_BUF_INIT, raw = GIT_BUF_INIT; - if (!(error = git_path_join_unrooted(&abspath, path, base, NULL)) && - !(error = git_futils_readbuffer(&raw, abspath.ptr))) - { - error = git_filter_list_apply_to_data(out, filters, &raw); + buf_stream_init(&writer, out); - git_buf_free(&raw); - } + if ((error = git_filter_list_stream_file( + filters, repo, path, (git_writestream *)&writer)) < 0) + return error; - git_buf_free(&abspath); + assert(writer.complete); return error; } -int git_filter_list_apply_to_blob( - git_buf *out, - git_filter_list *filters, - git_blob *blob) +static int buf_from_blob(git_buf *out, git_blob *blob) { - git_buf in = GIT_BUF_INIT; git_off_t rawsize = git_blob_rawsize(blob); if (!git__is_sizet(rawsize)) { @@ -716,12 +706,250 @@ int git_filter_list_apply_to_blob( return -1; } - in.ptr = (char *)git_blob_rawcontent(blob); - in.asize = 0; - in.size = (size_t)rawsize; + git_buf_attach_notowned(out, git_blob_rawcontent(blob), (size_t)rawsize); + return 0; +} + +int git_filter_list_apply_to_blob( + git_buf *out, + git_filter_list *filters, + git_blob *blob) +{ + struct buf_stream writer; + int error; + + buf_stream_init(&writer, out); + + if ((error = git_filter_list_stream_blob( + filters, blob, (git_writestream *)&writer)) < 0) + return error; + + assert(writer.complete); + return error; +} + +struct proxy_stream { + git_writestream parent; + git_filter *filter; + const git_filter_source *source; + void **payload; + git_buf input; + git_buf temp_buf; + git_buf *output; + git_writestream *target; +}; + +static int proxy_stream_write( + git_writestream *s, const char *buffer, size_t len) +{ + struct proxy_stream *proxy_stream = (struct proxy_stream *)s; + assert(proxy_stream); + + return git_buf_put(&proxy_stream->input, buffer, len); +} + +static int proxy_stream_close(git_writestream *s) +{ + struct proxy_stream *proxy_stream = (struct proxy_stream *)s; + git_buf *writebuf; + int error; + + assert(proxy_stream); + + error = proxy_stream->filter->apply( + proxy_stream->filter, + proxy_stream->payload, + proxy_stream->output, + &proxy_stream->input, + proxy_stream->source); + + if (error == GIT_PASSTHROUGH) { + writebuf = &proxy_stream->input; + } else if (error == 0) { + git_buf_sanitize(proxy_stream->output); + writebuf = proxy_stream->output; + } else { + return error; + } + + if ((error = proxy_stream->target->write( + proxy_stream->target, writebuf->ptr, writebuf->size)) == 0) + error = proxy_stream->target->close(proxy_stream->target); + + return error; +} + +static void proxy_stream_free(git_writestream *s) +{ + struct proxy_stream *proxy_stream = (struct proxy_stream *)s; + assert(proxy_stream); + + git_buf_free(&proxy_stream->input); + git_buf_free(&proxy_stream->temp_buf); + git__free(proxy_stream); +} + +static int proxy_stream_init( + git_writestream **out, + git_filter *filter, + git_buf *temp_buf, + void **payload, + const git_filter_source *source, + git_writestream *target) +{ + struct proxy_stream *proxy_stream = git__calloc(1, sizeof(struct proxy_stream)); + GITERR_CHECK_ALLOC(proxy_stream); + + proxy_stream->parent.write = proxy_stream_write; + proxy_stream->parent.close = proxy_stream_close; + proxy_stream->parent.free = proxy_stream_free; + proxy_stream->filter = filter; + proxy_stream->payload = payload; + proxy_stream->source = source; + proxy_stream->target = target; + proxy_stream->output = temp_buf ? temp_buf : &proxy_stream->temp_buf; + + *out = (git_writestream *)proxy_stream; + return 0; +} + +static int stream_list_init( + git_writestream **out, + git_vector *streams, + git_filter_list *filters, + git_writestream *target) +{ + git_writestream *last_stream = target; + size_t i; + int error = 0; + + *out = NULL; + + if (!filters) { + *out = target; + return 0; + } + + /* Create filters last to first to get the chaining direction */ + for (i = 0; i < git_array_size(filters->filters); ++i) { + size_t filter_idx = (filters->source.mode == GIT_FILTER_TO_WORKTREE) ? + git_array_size(filters->filters) - 1 - i : i; + git_filter_entry *fe = git_array_get(filters->filters, filter_idx); + git_writestream *filter_stream; + + assert(fe->filter->stream || fe->filter->apply); + + /* If necessary, create a stream that proxies the traditional + * application. + */ + if (fe->filter->stream) + error = fe->filter->stream(&filter_stream, fe->filter, + &fe->payload, &filters->source, last_stream); + else + /* Create a stream that proxies the one-shot apply */ + error = proxy_stream_init(&filter_stream, fe->filter, + filters->temp_buf, &fe->payload, &filters->source, + last_stream); + + if (error < 0) + return error; + + git_vector_insert(streams, filter_stream); + last_stream = filter_stream; + } + + *out = last_stream; + return 0; +} + +void stream_list_free(git_vector *streams) +{ + git_writestream *stream; + size_t i; + + git_vector_foreach(streams, i, stream) + stream->free(stream); + git_vector_free(streams); +} + +#define STREAM_BUFSIZE 10240 + +/* TODO: maybe not use filter_stream as a target but create one */ +int git_filter_list_stream_file( + git_filter_list *filters, + git_repository *repo, + const char *path, + git_writestream *target) +{ + char buf[STREAM_BUFSIZE]; + git_buf abspath = GIT_BUF_INIT; + const char *base = repo ? git_repository_workdir(repo) : NULL; + git_vector filter_streams = GIT_VECTOR_INIT; + git_writestream *stream_start; + ssize_t readlen; + int fd, error; + + if ((error = stream_list_init( + &stream_start, &filter_streams, filters, target)) < 0 || + (error = git_path_join_unrooted(&abspath, path, base, NULL)) < 0) + goto done; + + if ((fd = git_futils_open_ro(path)) < 0) { + error = fd; + goto done; + } + + while ((readlen = p_read(fd, buf, STREAM_BUFSIZE)) > 0) { + if ((error = stream_start->write(stream_start, buf, readlen)) < 0) + goto done; + } + + if (!readlen) + error = stream_start->close(stream_start); + else if (readlen < 0) + error = readlen; + + p_close(fd); + +done: + stream_list_free(&filter_streams); + git_buf_free(&abspath); + return error; +} + +int git_filter_list_stream_data( + git_filter_list *filters, + git_buf *data, + git_writestream *target) +{ + git_vector filter_streams = GIT_VECTOR_INIT; + git_writestream *stream_start; + int error = 0; + + git_buf_sanitize(data); + + if ((error = stream_list_init( + &stream_start, &filter_streams, filters, target)) == 0 && + (error = + stream_start->write(stream_start, data->ptr, data->size)) == 0) + error = stream_start->close(stream_start); + + stream_list_free(&filter_streams); + return error; +} + +int git_filter_list_stream_blob( + git_filter_list *filters, + git_blob *blob, + git_writestream *target) +{ + git_buf in = GIT_BUF_INIT; + + if (buf_from_blob(&in, blob) < 0) + return -1; if (filters) git_oid_cpy(&filters->source.oid, git_blob_id(blob)); - return git_filter_list_apply_to_data(out, filters, &in); + return git_filter_list_stream_data(filters, &in, target); } diff --git a/src/filter.h b/src/filter.h index 390ffebad..5062afba5 100644 --- a/src/filter.h +++ b/src/filter.h @@ -24,16 +24,23 @@ typedef enum { GIT_CRLF_AUTO, } git_crlf_t; +typedef struct { + git_attr_session *attr_session; + git_buf *temp_buf; + uint32_t flags; +} git_filter_options; + +#define GIT_FILTER_OPTIONS_INIT {0} + extern void git_filter_free(git_filter *filter); -extern int git_filter_list__load_with_attr_session( +extern int git_filter_list__load_ext( git_filter_list **filters, git_repository *repo, - git_attr_session *attr_session, git_blob *blob, /* can be NULL */ const char *path, git_filter_mode_t mode, - uint32_t options); + git_filter_options *filter_opts); /* * Available filters diff --git a/src/repository.c b/src/repository.c index c9275078f..23c99b0f0 100644 --- a/src/repository.c +++ b/src/repository.c @@ -1849,7 +1849,7 @@ int git_repository_hashfile( if (strlen(as_path) > 0) { error = git_filter_list_load( &fl, repo, NULL, as_path, - GIT_FILTER_TO_ODB, GIT_FILTER_OPT_DEFAULT); + GIT_FILTER_TO_ODB, GIT_FILTER_DEFAULT); if (error < 0) return error; } else { diff --git a/tests/filter/crlf.c b/tests/filter/crlf.c index a31dac965..406d3b6b0 100644 --- a/tests/filter/crlf.c +++ b/tests/filter/crlf.c @@ -123,7 +123,7 @@ void test_filter_crlf__with_safecrlf_and_unsafe_allowed(void) cl_repo_set_bool(g_repo, "core.safecrlf", true); cl_git_pass(git_filter_list_new( - &fl, g_repo, GIT_FILTER_TO_ODB, GIT_FILTER_OPT_ALLOW_UNSAFE)); + &fl, g_repo, GIT_FILTER_TO_ODB, GIT_FILTER_ALLOW_UNSAFE)); crlf = git_filter_lookup(GIT_FILTER_CRLF); cl_assert(crlf != NULL); diff --git a/tests/filter/stream.c b/tests/filter/stream.c new file mode 100644 index 000000000..603f19494 --- /dev/null +++ b/tests/filter/stream.c @@ -0,0 +1,221 @@ +#include "clar_libgit2.h" +#include "posix.h" +#include "blob.h" +#include "filter.h" +#include "buf_text.h" +#include "git2/sys/filter.h" +#include "git2/sys/repository.h" + +static git_repository *g_repo = NULL; + +static git_filter *create_compress_filter(void); +static git_filter *compress_filter; + +void test_filter_stream__initialize(void) +{ + compress_filter = create_compress_filter(); + + cl_git_pass(git_filter_register("compress", compress_filter, 50)); + g_repo = cl_git_sandbox_init("empty_standard_repo"); +} + +void test_filter_stream__cleanup(void) +{ + cl_git_sandbox_cleanup(); + g_repo = NULL; + + git_filter_unregister("compress"); +} + +#define CHUNKSIZE 10240 + +struct compress_stream { + git_writestream parent; + git_writestream *next; + git_filter_mode_t mode; + char current; + size_t current_chunk; +}; + +static int compress_stream_write__deflated(struct compress_stream *stream, const char *buffer, size_t len) +{ + size_t idx = 0; + + while (len > 0) { + size_t chunkremain, chunksize; + + if (stream->current_chunk == 0) + stream->current = buffer[idx]; + + chunkremain = CHUNKSIZE - stream->current_chunk; + chunksize = min(chunkremain, len); + + stream->current_chunk += chunksize; + len -= chunksize; + idx += chunksize; + + if (stream->current_chunk == CHUNKSIZE) { + cl_git_pass(stream->next->write(stream->next, &stream->current, 1)); + stream->current_chunk = 0; + } + } + + return 0; +} + +static int compress_stream_write__inflated(struct compress_stream *stream, const char *buffer, size_t len) +{ + char inflated[CHUNKSIZE]; + size_t i, j; + + for (i = 0; i < len; i++) { + for (j = 0; j < CHUNKSIZE; j++) + inflated[j] = buffer[i]; + + cl_git_pass(stream->next->write(stream->next, inflated, CHUNKSIZE)); + } + + return 0; +} + +static int compress_stream_write(git_writestream *s, const char *buffer, size_t len) +{ + struct compress_stream *stream = (struct compress_stream *)s; + + return (stream->mode == GIT_FILTER_TO_ODB) ? + compress_stream_write__deflated(stream, buffer, len) : + compress_stream_write__inflated(stream, buffer, len); +} + +static int compress_stream_close(git_writestream *s) +{ + struct compress_stream *stream = (struct compress_stream *)s; + cl_assert_equal_i(0, stream->current_chunk); + stream->next->close(stream->next); + return 0; +} + +static void compress_stream_free(git_writestream *stream) +{ + git__free(stream); +} + +static int compress_filter_stream_init( + git_writestream **out, + git_filter *self, + void **payload, + const git_filter_source *src, + git_writestream *next) +{ + struct compress_stream *stream = git__calloc(1, sizeof(struct compress_stream)); + cl_assert(stream); + + GIT_UNUSED(self); + GIT_UNUSED(payload); + + stream->parent.write = compress_stream_write; + stream->parent.close = compress_stream_close; + stream->parent.free = compress_stream_free; + stream->next = next; + stream->mode = git_filter_source_mode(src); + + *out = (git_writestream *)stream; + return 0; +} + +static void compress_filter_free(git_filter *f) +{ + git__free(f); +} + +git_filter *create_compress_filter(void) +{ + git_filter *filter = git__calloc(1, sizeof(git_filter)); + cl_assert(filter); + + filter->version = GIT_FILTER_VERSION; + filter->attributes = "+compress"; + filter->stream = compress_filter_stream_init; + filter->shutdown = compress_filter_free; + + return filter; +} + +static void writefile(const char *filename, size_t numchunks) +{ + git_buf path = GIT_BUF_INIT; + char buf[CHUNKSIZE]; + size_t i = 0, j = 0; + int fd; + + cl_git_pass(git_buf_joinpath(&path, "empty_standard_repo", filename)); + + fd = p_open(path.ptr, O_RDWR|O_CREAT, 0666); + cl_assert(fd >= 0); + + for (i = 0; i < numchunks; i++) { + for (j = 0; j < CHUNKSIZE; j++) { + buf[j] = i % 256; + } + + cl_git_pass(p_write(fd, buf, CHUNKSIZE)); + } + p_close(fd); + + git_buf_free(&path); +} + +static void test_stream(size_t numchunks) +{ + git_index *index; + const git_index_entry *entry; + git_blob *blob; + struct stat st; + git_checkout_options checkout_opts = GIT_CHECKOUT_OPTIONS_INIT; + + checkout_opts.checkout_strategy = GIT_CHECKOUT_FORCE; + + cl_git_mkfile( + "empty_standard_repo/.gitattributes", + "* compress\n"); + + /* write a file to disk */ + writefile("streamed_file", numchunks); + + /* place it in the index */ + cl_git_pass(git_repository_index(&index, g_repo)); + cl_git_pass(git_index_add_bypath(index, "streamed_file")); + cl_git_pass(git_index_write(index)); + + /* ensure it was appropriately compressed */ + cl_assert(entry = git_index_get_bypath(index, "streamed_file", 0)); + + cl_git_pass(git_blob_lookup(&blob, g_repo, &entry->id)); + cl_assert_equal_i(numchunks, git_blob_rawsize(blob)); + + /* check the file back out */ + cl_must_pass(p_unlink("empty_standard_repo/streamed_file")); + cl_git_pass(git_checkout_index(g_repo, index, &checkout_opts)); + + /* ensure it was decompressed */ + cl_must_pass(p_stat("empty_standard_repo/streamed_file", &st)); + cl_assert_equal_sz((numchunks * CHUNKSIZE), st.st_size); + + git_index_free(index); + git_blob_free(blob); +} + +/* write a 50KB file through the "compression" stream */ +void test_filter_stream__smallfile(void) +{ + test_stream(5); +} + +/* optionally write a 500 MB file through the compression stream */ +void test_filter_stream__bigfile(void) +{ + if (!cl_getenv("GITTEST_INVASIVE_FS_SIZE")) + cl_skip(); + + test_stream(51200); +} diff --git a/tests/repo/init.c b/tests/repo/init.c index 91747c9f5..076156817 100644 --- a/tests/repo/init.c +++ b/tests/repo/init.c @@ -722,7 +722,7 @@ void test_repo_init__at_filesystem_root(void) git_buf root = GIT_BUF_INIT; int root_len; - if (!cl_getenv("GITTEST_INVASIVE_FILESYSTEM")) + if (!cl_getenv("GITTEST_INVASIVE_FS_STRUCTURE")) cl_skip(); root_len = git_path_root(sandbox);