From 85d5481206a932d747b2d5587b6d4c7f69993ba6 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Wed, 28 Aug 2013 16:44:04 -0700 Subject: [PATCH] Create public filter object and use it This creates include/sys/filter.h with a basic definition of a git_filter and then converts the internal code to use it. There are related internal objects (git_filter_list) that we will want to publish at some point, but this is a first step. --- include/git2/filter.h | 16 ++- include/git2/sys/filter.h | 104 ++++++++++++++++++ src/blob.c | 48 ++++----- src/buffer.h | 22 ++++ src/checkout.c | 60 ++++------- src/crlf.c | 173 +++++++++++++++++------------ src/diff.c | 12 +-- src/diff_file.c | 46 ++++---- src/filter.c | 186 +++++++++++++++++++++++++------- src/filter.h | 61 +++++------ src/odb.c | 6 +- src/odb.h | 3 +- src/repository.c | 8 +- tests-clar/object/blob/filter.c | 12 +-- 14 files changed, 505 insertions(+), 252 deletions(-) create mode 100644 include/git2/sys/filter.h diff --git a/include/git2/filter.h b/include/git2/filter.h index 3bc4a9037..478f3a6ad 100644 --- a/include/git2/filter.h +++ b/include/git2/filter.h @@ -39,7 +39,10 @@ typedef enum { * A filter that can transform file data * * This represents a filter that can be used to transform or even replace - * file data. Libgit2 currently includes one built in filter: + * file data. Libgit2 includes one built in filter and it is possible to + * write your own (see git2/sys/filter.h for information on that). + * + * The built in filter is: * * * "crlf" which uses the complex rules with the "text", "eol", and * "crlf" file attributes to decide how to convert between LF and CRLF @@ -47,6 +50,17 @@ typedef enum { */ typedef struct git_filter git_filter; +GIT_EXTERN(git_filter *) git_filter_lookup(const char *name); + +#define GIT_FILTER_CRLF "crlf" + +GIT_EXTERN(int) git_filter_apply_to_buffer( + git_buffer *out, + git_filter *filter, + const git_buffer *input, + const char *as_path, + git_filter_mode_t mode); + GIT_END_DECL /** @} */ diff --git a/include/git2/sys/filter.h b/include/git2/sys/filter.h new file mode 100644 index 000000000..2264be080 --- /dev/null +++ b/include/git2/sys/filter.h @@ -0,0 +1,104 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_sys_git_config_backend_h__ +#define INCLUDE_sys_git_config_backend_h__ + +#include "git2/filter.h" + +/** + * @file git2/sys/filter.h + * @brief Git filter backend and plugin routines + * @defgroup git_backend Git custom backend APIs + * @ingroup Git + * @{ + */ +GIT_BEGIN_DECL + +/** + * A filter source represents a file/blob to be processed + */ +typedef struct git_filter_source git_filter_source; +struct git_filter_source { + git_repository *repo; + const char *path; + git_oid oid; /* zero if unknown (which is likely) */ + uint16_t filemode; /* zero if unknown */ +}; + +/** + * Callback to actually perform the data filtering + */ +typedef int (*git_filter_apply_fn)( + git_filter *self, + void **payload, /* may be read and/or set */ + git_filter_mode_t mode, + git_buffer *to, + const git_buffer *from, + const git_filter_source *src); + +/** + * Callback to decide if a given source needs this filter + */ +typedef int (*git_filter_check_fn)( + git_filter *self, + void **payload, /* points to NULL ptr on entry, may be set */ + git_filter_mode_t mode, + const git_filter_source *src); + +/** + * Callback to clean up after filtering has been applied + */ +typedef void (*git_filter_cleanup_fn)( + git_filter *self, + void *payload); + +/** + * Filter structure used to register a new filter. + * + * To associate extra data with a filter, simply allocate extra data + * and put the `git_filter` struct at the start of your data buffer, + * then cast the `self` pointer to your larger structure when your + * callback is invoked. + * + * `version` should be set to GIT_FILTER_VERSION + * + * `apply` is the callback that actually filters data. + * + * `check` is an optional callback that checks if filtering is needed for + * a given source. + * + * `cleanup` is an optional callback that is made after the filter has + * been applied. Both the `check` and `apply` callbacks are able to + * allocate a `payload` to keep per-source filter state, and this callback + * is given that value and can clean up as needed. + */ +struct git_filter { + unsigned int version; + git_filter_apply_fn apply; + git_filter_check_fn check; + git_filter_cleanup_fn cleanup; +}; + +#define GIT_FILTER_VERSION 1 + +/** + * Register a filter under a given name + * + * Two filters will be preregistered with libgit2: GIT_FILTER_CRLF and + * GIT_FILTER_IDENT. + */ +GIT_EXTERN(int) git_filter_register( + const char *name, const git_filter *filter); + +/** + * Remove the filter with the given name + */ +GIT_EXTERN(int) git_filter_unregister(const char *name); + +/** @} */ +GIT_END_DECL +#endif diff --git a/src/blob.c b/src/blob.c index 6a866538c..3581ee9d1 100644 --- a/src/blob.c +++ b/src/blob.c @@ -108,7 +108,7 @@ static int write_file_filtered( git_off_t *size, git_odb *odb, const char *full_path, - git_vector *filters) + git_filter_list *fl) { int error; git_buf source = GIT_BUF_INIT; @@ -117,7 +117,7 @@ static int write_file_filtered( if ((error = git_futils_readbuffer(&source, full_path)) < 0) return error; - error = git_filters_apply(&dest, &source, filters); + error = git_filter_list_apply(&dest, &source, fl); /* Free the source as soon as possible. This can be big in memory, * and we don't want to ODB write to choke */ @@ -198,29 +198,25 @@ int git_blob__create_from_paths( if (S_ISLNK(mode)) { error = write_symlink(oid, odb, content_path, (size_t)size); } else { - git_vector write_filters = GIT_VECTOR_INIT; - int filter_count = 0; + git_filter_list *fl = NULL; - if (try_load_filters) { + if (try_load_filters) /* Load the filters for writing this file to the ODB */ - filter_count = git_filters_load( - &write_filters, repo, hint_path, GIT_FILTER_TO_ODB); - } + error = git_filter_list_load( + &fl, repo, hint_path, GIT_FILTER_TO_ODB); - if (filter_count < 0) { - /* Negative value means there was a critical error */ - error = filter_count; - } else if (filter_count == 0) { + if (error < 0) + /* well, that didn't work */; + else if (fl == NULL) /* No filters need to be applied to the document: we can stream * directly from disk */ error = write_file_stream(oid, odb, content_path, size); - } else { + else { /* We need to apply one or more filters */ - error = write_file_filtered( - oid, &size, odb, content_path, &write_filters); - } + error = write_file_filtered(oid, &size, odb, content_path, fl); - git_filters_free(&write_filters); + git_filter_list_free(fl); + } /* * TODO: eventually support streaming filtered files, for files @@ -345,9 +341,9 @@ int git_blob_filtered_content( const char *as_path, int check_for_binary_data) { - int error = 0, num_filters = 0; + int error = 0; git_buf filtered = GIT_BUF_INIT, unfiltered = GIT_BUF_INIT; - git_vector filters = GIT_VECTOR_INIT; + git_filter_list *fl = NULL; assert(blob && as_path && out); @@ -359,12 +355,12 @@ int git_blob_filtered_content( if (check_for_binary_data && git_buf_text_is_binary(&filtered)) return 0; - num_filters = git_filters_load( - &filters, git_blob_owner(blob), as_path, GIT_FILTER_TO_WORKTREE); - if (num_filters < 0) - return num_filters; + error = git_filter_list_load( + &fl, git_blob_owner(blob), as_path, GIT_FILTER_TO_WORKTREE); + if (error < 0) + return error; - if (num_filters > 0) { + if (fl != NULL) { if (out->ptr && out->available) { filtered.ptr = out->ptr; filtered.size = out->size; @@ -374,9 +370,9 @@ int git_blob_filtered_content( } if (!(error = git_blob__getbuf(&unfiltered, blob))) - error = git_filters_apply(&filtered, &unfiltered, &filters); + error = git_filter_list_apply(&filtered, &unfiltered, fl); - git_filters_free(&filters); + git_filter_list_free(fl); git_buf_free(&unfiltered); } diff --git a/src/buffer.h b/src/buffer.h index f3e1d506f..b1cb5d06a 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -9,6 +9,7 @@ #include "common.h" #include "git2/strarray.h" +#include "git2/buffer.h" #include typedef struct { @@ -174,4 +175,25 @@ int git_buf_splice( const char *data, size_t nb_to_insert); + +#define GIT_BUF_FROM_BUFFER(buffer) \ + { (buffer)->ptr, (buffer)->available, (buffer)->size } + +GIT_INLINE(void) git_buf_from_buffer(git_buf *buf, const git_buffer *buffer) +{ + buf->ptr = buffer->ptr; + buf->size = buffer->size; + buf->asize = buffer->available; +} + +#define GIT_BUFFER_FROM_BUF(buf) \ + { (buf)->ptr, (buf)->size, (buf)->asize } + +GIT_INLINE(void) git_buffer_from_buf(git_buffer *buffer, const git_buf *buf) +{ + buffer->ptr = buf->ptr; + buffer->size = buf->size; + buffer->available = buf->asize; +} + #endif diff --git a/src/checkout.c b/src/checkout.c index eb92e8fd6..5ce4a19c5 100644 --- a/src/checkout.c +++ b/src/checkout.c @@ -710,56 +710,40 @@ static int blob_content_to_file( mode_t entry_filemode, git_checkout_opts *opts) { - int error = -1, nb_filters = 0; - mode_t file_mode = opts->file_mode; - bool dont_free_filtered; + int error = 0; + mode_t file_mode = opts->file_mode ? opts->file_mode : entry_filemode; git_buf unfiltered = GIT_BUF_INIT, filtered = GIT_BUF_INIT; - git_vector filters = GIT_VECTOR_INIT; + git_filter_list *fl = NULL; /* Create a fake git_buf from the blob raw data... */ filtered.ptr = (void *)git_blob_rawcontent(blob); filtered.size = (size_t)git_blob_rawsize(blob); - /* ... and make sure it doesn't get unexpectedly freed */ - dont_free_filtered = true; - if (!opts->disable_filters && - !git_buf_text_is_binary(&filtered) && - (nb_filters = git_filters_load( - &filters, - git_object_owner((git_object *)blob), - path, - GIT_FILTER_TO_WORKTREE)) > 0) - { + if (!opts->disable_filters && !git_buf_text_is_binary(&filtered)) { + error = git_filter_list_load( + &fl, git_blob_owner(blob), path, GIT_FILTER_TO_WORKTREE); + } + + if (fl != NULL) { /* reset 'filtered' so it can be a filter target */ git_buf_init(&filtered, 0); - dont_free_filtered = false; + + if (!(error = git_blob__getbuf(&unfiltered, blob))) { + error = git_filter_list_apply(&filtered, &unfiltered, fl); + + git_buf_free(&unfiltered); + } + + git_filter_list_free(fl); } - if (nb_filters < 0) - return nb_filters; - - if (nb_filters > 0) { - if ((error = git_blob__getbuf(&unfiltered, blob)) < 0) - goto cleanup; - - if ((error = git_filters_apply(&filtered, &unfiltered, &filters)) < 0) - goto cleanup; - } - - /* Allow overriding of file mode */ - if (!file_mode) - file_mode = entry_filemode; - - error = buffer_to_file( - st, &filtered, path, opts->dir_mode, opts->file_open_flags, file_mode); - - if (!error) + if (!error && + !(error = buffer_to_file( + st, &filtered, path, opts->dir_mode, + opts->file_open_flags, file_mode))) st->st_mode = entry_filemode; -cleanup: - git_filters_free(&filters); - git_buf_free(&unfiltered); - if (!dont_free_filtered) + if (filtered.asize != 0) git_buf_free(&filtered); return error; diff --git a/src/crlf.c b/src/crlf.c index fbb3ba2dd..2177bff98 100644 --- a/src/crlf.c +++ b/src/crlf.c @@ -19,13 +19,11 @@ struct crlf_attrs { int crlf_action; int eol; + int auto_crlf; }; struct crlf_filter { git_filter f; - struct crlf_attrs attrs; - git_repository *repo; - char path[GIT_FLEX_ARRAY]; }; static int check_crlf(const char *value) @@ -76,7 +74,8 @@ static int crlf_input_action(struct crlf_attrs *ca) return ca->crlf_action; } -static int crlf_load_attributes(struct crlf_attrs *ca, git_repository *repo, const char *path) +static int crlf_load_attributes( + struct crlf_attrs *ca, git_repository *repo, const char *path) { #define NUM_CONV_ATTRS 3 @@ -108,9 +107,8 @@ static int crlf_load_attributes(struct crlf_attrs *ca, git_repository *repo, con return -1; } -static int has_cr_in_index(git_filter *self) +static int has_cr_in_index(git_repository *repo, const char *path) { - struct crlf_filter *filter = (struct crlf_filter *)self; git_index *index; const git_index_entry *entry; git_blob *blob; @@ -118,19 +116,19 @@ static int has_cr_in_index(git_filter *self) git_off_t blobsize; bool found_cr; - if (git_repository_index__weakptr(&index, filter->repo) < 0) { + if (git_repository_index__weakptr(&index, repo) < 0) { giterr_clear(); return false; } - if (!(entry = git_index_get_bypath(index, filter->path, 0)) && - !(entry = git_index_get_bypath(index, filter->path, 1))) + if (!(entry = git_index_get_bypath(index, path, 0)) && + !(entry = git_index_get_bypath(index, path, 1))) return false; if (!S_ISREG(entry->mode)) /* don't crlf filter non-blobs */ return true; - if (git_blob_lookup(&blob, filter->repo, &entry->oid) < 0) + if (git_blob_lookup(&blob, repo, &entry->oid) < 0) return false; blobcontent = git_blob_rawcontent(blob); @@ -147,26 +145,26 @@ static int has_cr_in_index(git_filter *self) } static int crlf_apply_to_odb( - git_filter *self, git_buf *dest, const git_buf *source) + struct crlf_attrs *ca, + git_buffer *to, + const git_buffer *from, + const git_filter_source *src) { - struct crlf_filter *filter = (struct crlf_filter *)self; - - assert(self && dest && source); + const git_buf from_buf = GIT_BUF_FROM_BUFFER(from); + git_buf to_buf = GIT_BUF_FROM_BUFFER(to); /* Empty file? Nothing to do */ - if (git_buf_len(source) == 0) + if (!git_buf_len(&from_buf)) return 0; /* Heuristics to see if we can skip the conversion. * Straight from Core Git. */ - if (filter->attrs.crlf_action == GIT_CRLF_AUTO || - filter->attrs.crlf_action == GIT_CRLF_GUESS) { - + if (ca->crlf_action == GIT_CRLF_AUTO || ca->crlf_action == GIT_CRLF_GUESS) { git_buf_text_stats stats; /* Check heuristics for binary vs text... */ - if (git_buf_text_gather_stats(&stats, source, false)) + if (git_buf_text_gather_stats(&stats, &from_buf, false)) return -1; /* @@ -175,28 +173,34 @@ static int crlf_apply_to_odb( * stuff? */ if (stats.cr != stats.crlf) - return -1; + return GIT_ENOTFOUND; - if (filter->attrs.crlf_action == GIT_CRLF_GUESS) { + if (ca->crlf_action == GIT_CRLF_GUESS) { /* * If the file in the index has any CR in it, do not convert. * This is the new safer autocrlf handling. */ - if (has_cr_in_index(self)) - return -1; + if (has_cr_in_index(src->repo, src->path)) + return GIT_ENOTFOUND; } if (!stats.cr) - return -1; + return GIT_ENOTFOUND; } /* Actually drop the carriage returns */ - return git_buf_text_crlf_to_lf(dest, source); + if (git_buf_text_crlf_to_lf(&to_buf, &from_buf) < 0) + return -1; + + /* Overwrite "to" buffer in case data was resized */ + git_buffer_from_buf(to, &to_buf); + + return 0; } -static const char *line_ending(struct crlf_filter *filter) +static const char *line_ending(struct crlf_attrs *ca) { - switch (filter->attrs.crlf_action) { + switch (ca->crlf_action) { case GIT_CRLF_BINARY: case GIT_CRLF_INPUT: return "\n"; @@ -213,7 +217,7 @@ static const char *line_ending(struct crlf_filter *filter) goto line_ending_error; } - switch (filter->attrs.eol) { + switch (ca->eol) { case GIT_EOL_UNSET: return GIT_EOL_NATIVE == GIT_EOL_CRLF ? "\r\n" @@ -235,44 +239,58 @@ line_ending_error: } static int crlf_apply_to_workdir( - git_filter *self, git_buf *tgt, const git_buf *src) + struct crlf_attrs *ca, git_buffer *to, const git_buffer *from) { - struct crlf_filter *filter = (struct crlf_filter *)self; + const git_buf from_buf = GIT_BUF_FROM_BUFFER(from); + git_buf to_buf = GIT_BUF_FROM_BUFFER(to); const char *workdir_ending = NULL; - assert(self && tgt && src); - /* Empty file? Nothing to do. */ - if (git_buf_len(src) == 0) - return -1; + if (git_buf_len(&from_buf) == 0) + return 0; /* Determine proper line ending */ - workdir_ending = line_ending(filter); + workdir_ending = line_ending(ca); if (!workdir_ending) return -1; if (!strcmp("\n", workdir_ending)) { - if (git_buf_find(src, '\r') < 0) + if (ca->crlf_action == GIT_CRLF_GUESS && ca->auto_crlf) + return GIT_ENOTFOUND; + + if (git_buf_find(&from_buf, '\r') < 0) + return GIT_ENOTFOUND; + + if (git_buf_text_crlf_to_lf(&to_buf, &from_buf) < 0) + return -1; + } else { + /* only other supported option is lf->crlf conversion */ + assert(!strcmp("\r\n", workdir_ending)); + + if (git_buf_text_lf_to_crlf(&to_buf, &from_buf) < 0) return -1; - return git_buf_text_crlf_to_lf(tgt, src); } - /* only other supported option is lf->crlf conversion */ - assert(!strcmp("\r\n", workdir_ending)); - return git_buf_text_lf_to_crlf(tgt, src); + /* Overwrite "to" buffer in case data was resized */ + git_buffer_from_buf(to, &to_buf); + + return 0; } -static int find_and_add_filter( - git_vector *filters, git_repository *repo, const char *path, - int (*apply)(struct git_filter *self, git_buf *tgt, const git_buf *src)) +static int crlf_check( + git_filter *self, + void **payload, /* points to NULL ptr on entry, may be set */ + git_filter_mode_t mode, + const git_filter_source *src) { - struct crlf_attrs ca; - struct crlf_filter *filter; - size_t pathlen; int error; + struct crlf_attrs ca; + + GIT_UNUSED(self); + GIT_UNUSED(mode); /* Load gitattributes for the path */ - if ((error = crlf_load_attributes(&ca, repo, path)) < 0) + if ((error = crlf_load_attributes(&ca, src->repo, src->path)) < 0) return error; /* @@ -282,41 +300,54 @@ static int find_and_add_filter( ca.crlf_action = crlf_input_action(&ca); if (ca.crlf_action == GIT_CRLF_BINARY) - return 0; + return GIT_ENOTFOUND; if (ca.crlf_action == GIT_CRLF_GUESS) { - int auto_crlf; - - if ((error = git_repository__cvar(&auto_crlf, repo, GIT_CVAR_AUTO_CRLF)) < 0) + if ((error = git_repository__cvar( + &ca.auto_crlf, src->repo, GIT_CVAR_AUTO_CRLF)) < 0) return error; - if (auto_crlf == GIT_AUTO_CRLF_FALSE) - return 0; + if (ca.auto_crlf == GIT_AUTO_CRLF_FALSE) + return GIT_ENOTFOUND; } - /* If we're good, we create a new filter object and push it - * into the filters array */ - pathlen = strlen(path); - filter = git__malloc(sizeof(struct crlf_filter) + pathlen + 1); - GITERR_CHECK_ALLOC(filter); + *payload = git__malloc(sizeof(ca)); + GITERR_CHECK_ALLOC(*payload); + memcpy(*payload, &ca, sizeof(ca)); - filter->f.apply = apply; - filter->f.do_free = NULL; - memcpy(&filter->attrs, &ca, sizeof(struct crlf_attrs)); - filter->repo = repo; - memcpy(filter->path, path, pathlen + 1); - - return git_vector_insert(filters, filter); + return 0; } -int git_filter_add__crlf_to_odb( - git_vector *filters, git_repository *repo, const char *path) +static int crlf_apply( + git_filter *self, + void **payload, /* may be read and/or set */ + git_filter_mode_t mode, + git_buffer *to, + const git_buffer *from, + const git_filter_source *src) { - return find_and_add_filter(filters, repo, path, &crlf_apply_to_odb); + GIT_UNUSED(self); + + if (mode == GIT_FILTER_SMUDGE) + return crlf_apply_to_workdir(*payload, to, from); + else + return crlf_apply_to_odb(*payload, to, from, src); } -int git_filter_add__crlf_to_workdir( - git_vector *filters, git_repository *repo, const char *path) +static void crlf_cleanup( + git_filter *self, + void *payload) { - return find_and_add_filter(filters, repo, path, &crlf_apply_to_workdir); + GIT_UNUSED(self); + git__free(payload); +} + +git_filter *git_crlf_filter_new(void) +{ + struct crlf_filter *f = git__calloc(1, sizeof(struct crlf_filter)); + f->f.version = GIT_FILTER_VERSION; + f->f.check = crlf_check; + f->f.apply = crlf_apply; + f->f.cleanup = crlf_cleanup; + return (git_filter *)f; } diff --git a/src/diff.c b/src/diff.c index 77dbbd8bc..b1cde36bc 100644 --- a/src/diff.c +++ b/src/diff.c @@ -568,21 +568,21 @@ int git_diff__oid_for_file( giterr_set(GITERR_OS, "File size overflow (for 32-bits) on '%s'", path); result = -1; } else { - git_vector filters = GIT_VECTOR_INIT; + git_filter_list *fl = NULL; - result = git_filters_load(&filters, repo, path, GIT_FILTER_TO_ODB); - if (result >= 0) { + result = git_filter_list_load(&fl, repo, path, GIT_FILTER_TO_ODB); + if (!result) { int fd = git_futils_open_ro(full_path.ptr); if (fd < 0) result = fd; else { result = git_odb__hashfd_filtered( - oid, fd, (size_t)size, GIT_OBJ_BLOB, &filters); + oid, fd, (size_t)size, GIT_OBJ_BLOB, fl); p_close(fd); } - } - git_filters_free(&filters); + git_filter_list_free(fl); + } } cleanup: diff --git a/src/diff_file.c b/src/diff_file.c index bcfef13cd..7602591cf 100644 --- a/src/diff_file.c +++ b/src/diff_file.c @@ -296,7 +296,7 @@ static int diff_file_content_load_workdir_file( git_diff_file_content *fc, git_buf *path) { int error = 0; - git_vector filters = GIT_VECTOR_INIT; + git_filter_list *fl = NULL; git_buf raw = GIT_BUF_INIT, filtered = GIT_BUF_INIT; git_file fd = git_futils_open_ro(git_buf_cstr(path)); @@ -310,41 +310,35 @@ static int diff_file_content_load_workdir_file( if (diff_file_content_binary_by_size(fc)) goto cleanup; - if ((error = git_filters_load( - &filters, fc->repo, fc->file->path, GIT_FILTER_TO_ODB)) < 0) + if ((error = git_filter_list_load( + &fl, fc->repo, fc->file->path, GIT_FILTER_TO_ODB)) < 0) goto cleanup; - /* error >= is a filter count */ - if (error == 0) { + /* if there are no filters, try to mmap the file */ + if (fl == NULL) { if (!(error = git_futils_mmap_ro( - &fc->map, fd, 0, (size_t)fc->file->size))) + &fc->map, fd, 0, (size_t)fc->file->size))) { fc->flags |= GIT_DIFF_FLAG__UNMAP_DATA; - else /* fall through to try readbuffer below */ - giterr_clear(); - } - - if (error != 0) { - error = git_futils_readbuffer_fd(&raw, fd, (size_t)fc->file->size); - if (error < 0) goto cleanup; - - if (!filters.length) - git_buf_swap(&filtered, &raw); - else - error = git_filters_apply(&filtered, &raw, &filters); - - if (!error) { - fc->map.len = git_buf_len(&filtered); - fc->map.data = git_buf_detach(&filtered); - fc->flags |= GIT_DIFF_FLAG__FREE_DATA; } - git_buf_free(&raw); - git_buf_free(&filtered); + /* if mmap failed, fall through to try readbuffer below */ + giterr_clear(); } + if (!(error = git_futils_readbuffer_fd(&raw, fd, (size_t)fc->file->size)) && + !(error = git_filter_list_apply(&filtered, &raw, fl))) + { + fc->map.len = git_buf_len(&filtered); + fc->map.data = git_buf_detach(&filtered); + fc->flags |= GIT_DIFF_FLAG__FREE_DATA; + } + + git_buf_free(&raw); + git_buf_free(&filtered); + cleanup: - git_filters_free(&filters); + git_filter_list_free(fl); p_close(fd); return error; diff --git a/src/filter.c b/src/filter.c index 9f749dcbd..7935e6518 100644 --- a/src/filter.c +++ b/src/filter.c @@ -13,62 +13,155 @@ #include "git2/config.h" #include "blob.h" -int git_filters_load(git_vector *filters, git_repository *repo, const char *path, int mode) -{ - int error; - - if (mode == GIT_FILTER_TO_ODB) { - /* Load the CRLF cleanup filter when writing to the ODB */ - error = git_filter_add__crlf_to_odb(filters, repo, path); - if (error < 0) - return error; - } else { - error = git_filter_add__crlf_to_workdir(filters, repo, path); - if (error < 0) - return error; - } - - return (int)filters->length; -} - -void git_filters_free(git_vector *filters) -{ - size_t i; +typedef struct { git_filter *filter; + void *payload; +} git_filter_entry; - git_vector_foreach(filters, i, filter) { - if (filter->do_free != NULL) - filter->do_free(filter); - else - git__free(filter); +struct git_filter_list { + git_array_t(git_filter_entry) filters; + git_filter_mode_t mode; + git_filter_source source; + char path[GIT_FLEX_ARRAY]; +}; + +typedef struct { + const char *filter_name; + git_filter *filter; +} git_filter_def; + +static git_array_t(git_filter_def) filter_registry = GIT_ARRAY_INIT; + +static int filter_load_defaults(void) +{ + if (!git_array_size(filter_registry)) { + git_filter_def *fdef = git_array_alloc(filter_registry); + GITERR_CHECK_ALLOC(fdef); + + fdef->filter_name = GIT_FILTER_CRLF; + fdef->filter = git_crlf_filter_new(); + GITERR_CHECK_ALLOC(fdef->filter); } - git_vector_free(filters); + return 0; } -int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters) +static int git_filter_list_new( + git_filter_list **out, git_filter_mode_t mode, const git_filter_source *src) { - size_t i; + git_filter_list *fl = NULL; + size_t pathlen = src->path ? strlen(src->path) : 0; + + fl = git__calloc(1, sizeof(git_filter_list) + pathlen + 1); + GITERR_CHECK_ALLOC(fl); + + fl->mode = mode; + if (src->path) + memcpy(fl->path, src->path, pathlen); + fl->source.repo = src->repo; + fl->source.path = fl->path; + + *out = fl; + return 0; +} + +int git_filter_list_load( + git_filter_list **filters, + git_repository *repo, + const char *path, + git_filter_mode_t mode) +{ + int error = 0; + git_filter_list *fl = NULL; + git_filter_source src = { 0 }; + git_filter_entry *fe; + uint32_t f; + + if (filter_load_defaults() < 0) + return -1; + + src.repo = repo; + src.path = path; + + for (f = 0; f < git_array_size(filter_registry); ++f) { + void *payload = NULL; + git_filter_def *fdef = git_array_get(filter_registry, f); + + if (!fdef || !fdef->filter) + continue; + + if (fdef->filter->check) + error = fdef->filter->check(fdef->filter, &payload, mode, &src); + + if (error == GIT_ENOTFOUND) + error = 0; + else if (error < 0) + break; + else { + if (!fl && (error = git_filter_list_new(&fl, mode, &src)) < 0) + return error; + + fe = git_array_alloc(fl->filters); + GITERR_CHECK_ALLOC(fe); + fe->filter = fdef->filter; + fe->payload = payload; + } + } + + if (error && fl != NULL) { + git_array_clear(fl->filters); + git__free(fl); + fl = NULL; + } + + *filters = fl; + return error; +} + +void git_filter_list_free(git_filter_list *fl) +{ + uint32_t i; + + if (!fl) + return; + + for (i = 0; i < git_array_size(fl->filters); ++i) { + git_filter_entry *fe = git_array_get(fl->filters, i); + if (fe->filter->cleanup) + fe->filter->cleanup(fe->filter, fe->payload); + } + + git_array_clear(fl->filters); + git__free(fl); +} + +int git_filter_list_apply( + git_buf *dest, + git_buf *source, + git_filter_list *fl) +{ + int error = 0; + uint32_t i; unsigned int src; git_buf *dbuffer[2]; + if (!fl) { + git_buf_swap(dest, source); + return 0; + } + dbuffer[0] = source; dbuffer[1] = dest; src = 0; - if (git_buf_len(source) == 0) { - git_buf_clear(dest); - return 0; - } - /* Pre-grow the destination buffer to more or less the size * we expect it to have */ if (git_buf_grow(dest, git_buf_len(source)) < 0) return -1; - for (i = 0; i < filters->length; ++i) { - git_filter *filter = git_vector_get(filters, i); + for (i = 0; i < git_array_size(fl->filters); ++i) { + git_filter_entry *fe = git_array_get(fl->filters, i); unsigned int dst = 1 - src; git_buf_clear(dbuffer[dst]); @@ -79,8 +172,25 @@ int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters) * of the double buffering (so that the text goes through * cleanly). */ - if (filter->apply(filter, dbuffer[dst], dbuffer[src]) == 0) - src = dst; + { + git_buffer srcb = GIT_BUFFER_FROM_BUF(dbuffer[src]); + git_buffer dstb = GIT_BUFFER_FROM_BUF(dbuffer[dst]); + + error = fe->filter->apply( + fe->filter, &fe->payload, fl->mode, &dstb, &srcb, &fl->source); + + if (error == GIT_ENOTFOUND) + error = 0; + else if (error < 0) { + git_buf_clear(dest); + return error; + } + else { + git_buf_from_buffer(dbuffer[src], &srcb); + git_buf_from_buffer(dbuffer[dst], &dstb); + src = dst; + } + } if (git_buf_oom(dbuffer[dst])) return -1; diff --git a/src/filter.h b/src/filter.h index 67845ad6a..a4ee2172d 100644 --- a/src/filter.h +++ b/src/filter.h @@ -9,14 +9,11 @@ #include "common.h" #include "buffer.h" +#include "array.h" #include "git2/odb.h" #include "git2/repository.h" #include "git2/filter.h" - -struct git_filter { - int (*apply)(struct git_filter *self, git_buf *dest, const git_buf *source); - void (*do_free)(struct git_filter *self); -}; +#include "git2/sys/filter.h" typedef enum { GIT_CRLF_GUESS = -1, @@ -27,34 +24,38 @@ typedef enum { GIT_CRLF_AUTO, } git_crlf_t; +typedef struct git_filter_list git_filter_list; + /* * FILTER API */ /* - * For any given path in the working directory, fill the `filters` - * array with the relevant filters that need to be applied. + * For any given path in the working directory, create a `git_filter_list` + * with the relevant filters that need to be applied. * - * Mode is either `GIT_FILTER_TO_WORKTREE` if you need to load the - * filters that will be used when checking out a file to the working - * directory, or `GIT_FILTER_TO_ODB` for the filters used when writing - * a file to the ODB. + * This will return 0 (success) but set the output git_filter_list to NULL + * if no filters are requested for the given file. * - * @param filters Vector where to store all the loaded filters + * @param filters Output newly created git_filter_list (or NULL) * @param repo Repository object that contains `path` * @param path Relative path of the file to be filtered * @param mode Filtering direction (WT->ODB or ODB->WT) - * @return the number of filters loaded for the file (0 if the file - * doesn't need filtering), or a negative error code + * @return 0 on success (which could still return NULL if no filters are + * needed for the requested file), <0 on error */ -extern int git_filters_load(git_vector *filters, git_repository *repo, const char *path, int mode); +extern int git_filter_list_load( + git_filter_list **filters, + git_repository *repo, + const char *path, + git_filter_mode_t mode); /* - * Apply one or more filters to a file. + * Apply one or more filters to a data buffer. * - * The file must have been loaded as a `git_buf` object. Both the `source` - * and `dest` buffers are owned by the caller and must be freed once - * they are no longer needed. + * The source data must have been loaded as a `git_buf` object. Both the + * `source` and `dest` buffers are owned by the caller and must be freed + * once they are no longer needed. * * NOTE: Because of the double-buffering schema, the `source` buffer that * contains the original file may be tampered once the filtering is @@ -63,29 +64,25 @@ extern int git_filters_load(git_vector *filters, git_repository *repo, const cha * * @param dest Buffer to store the result of the filtering * @param source Buffer containing the document to filter - * @param filters Vector of filters as supplied by `git_filters_load` + * @param filters An already loaded git_filter_list * @return 0 on success, an error code otherwise */ -extern int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters); +extern int git_filter_list_apply( + git_buf *dest, + git_buf *source, + git_filter_list *filters); /* - * Free the `filters` array generated by `git_filters_load`. + * Free the git_filter_list * - * Note that this frees both the array and its contents. The array will - * be clean/reusable after this call. - * - * @param filters A filters array as supplied by `git_filters_load` + * @param filters A git_filter_list created by `git_filter_list_load` */ -extern void git_filters_free(git_vector *filters); +extern void git_filter_list_free(git_filter_list *filters); /* * Available filters */ -/* Strip CRLF, from Worktree to ODB */ -extern int git_filter_add__crlf_to_odb(git_vector *filters, git_repository *repo, const char *path); - -/* Add CRLF, from ODB to worktree */ -extern int git_filter_add__crlf_to_workdir(git_vector *filters, git_repository *repo, const char *path); +extern git_filter *git_crlf_filter_new(void); #endif diff --git a/src/odb.c b/src/odb.c index a0bfec403..d9310a9d7 100644 --- a/src/odb.c +++ b/src/odb.c @@ -179,13 +179,13 @@ done: } int git_odb__hashfd_filtered( - git_oid *out, git_file fd, size_t size, git_otype type, git_vector *filters) + git_oid *out, git_file fd, size_t size, git_otype type, git_filter_list *fl) { int error; git_buf raw = GIT_BUF_INIT; git_buf filtered = GIT_BUF_INIT; - if (!filters || !filters->length) + if (!fl) return git_odb__hashfd(out, fd, size, type); /* size of data is used in header, so we have to read the whole file @@ -193,7 +193,7 @@ int git_odb__hashfd_filtered( */ if (!(error = git_futils_readbuffer_fd(&raw, fd, size))) - error = git_filters_apply(&filtered, &raw, filters); + error = git_filter_list_apply(&filtered, &raw, fl); git_buf_free(&raw); diff --git a/src/odb.h b/src/odb.h index 0d9f9e2ea..61dd9a7fd 100644 --- a/src/odb.h +++ b/src/odb.h @@ -14,6 +14,7 @@ #include "vector.h" #include "cache.h" #include "posix.h" +#include "filter.h" #define GIT_OBJECTS_DIR "objects/" #define GIT_OBJECT_DIR_MODE 0777 @@ -66,7 +67,7 @@ int git_odb__hashfd(git_oid *out, git_file fd, size_t size, git_otype type); * Acts just like git_odb__hashfd with the addition of filters... */ int git_odb__hashfd_filtered( - git_oid *out, git_file fd, size_t len, git_otype type, git_vector *filters); + git_oid *out, git_file fd, size_t len, git_otype type, git_filter_list *fl); /* * Hash a `path`, assuming it could be a POSIX symlink: if the path is a diff --git a/src/repository.c b/src/repository.c index eead41201..94700e4e3 100644 --- a/src/repository.c +++ b/src/repository.c @@ -1649,7 +1649,7 @@ int git_repository_hashfile( const char *as_path) { int error; - git_vector filters = GIT_VECTOR_INIT; + git_filter_list *fl = NULL; git_file fd = -1; git_off_t len; git_buf full_path = GIT_BUF_INIT; @@ -1671,7 +1671,7 @@ int git_repository_hashfile( /* passing empty string for "as_path" indicated --no-filters */ if (strlen(as_path) > 0) { - error = git_filters_load(&filters, repo, as_path, GIT_FILTER_TO_ODB); + error = git_filter_list_load(&fl, repo, as_path, GIT_FILTER_TO_ODB); if (error < 0) return error; } else { @@ -1698,12 +1698,12 @@ int git_repository_hashfile( goto cleanup; } - error = git_odb__hashfd_filtered(out, fd, (size_t)len, type, &filters); + error = git_odb__hashfd_filtered(out, fd, (size_t)len, type, fl); cleanup: if (fd >= 0) p_close(fd); - git_filters_free(&filters); + git_filter_list_free(fl); git_buf_free(&full_path); return error; diff --git a/tests-clar/object/blob/filter.c b/tests-clar/object/blob/filter.c index 2b3954d9c..33ebedcde 100644 --- a/tests-clar/object/blob/filter.c +++ b/tests-clar/object/blob/filter.c @@ -101,7 +101,7 @@ void test_object_blob_filter__stats(void) void test_object_blob_filter__to_odb(void) { - git_vector filters = GIT_VECTOR_INIT; + git_filter_list *fl = NULL; git_config *cfg; int i; git_blob *blob; @@ -113,21 +113,21 @@ void test_object_blob_filter__to_odb(void) git_attr_cache_flush(g_repo); cl_git_append2file("empty_standard_repo/.gitattributes", "*.txt text\n"); - cl_assert(git_filters_load( - &filters, g_repo, "filename.txt", GIT_FILTER_TO_ODB) > 0); - cl_assert(filters.length == 1); + cl_git_pass( + git_filter_list_load(&fl, g_repo, "filename.txt", GIT_FILTER_TO_ODB)); + cl_assert(fl != NULL); for (i = 0; i < NUM_TEST_OBJECTS; i++) { cl_git_pass(git_blob_lookup(&blob, g_repo, &g_oids[i])); cl_git_pass(git_blob__getbuf(&orig, blob)); - cl_git_pass(git_filters_apply(&out, &orig, &filters)); + cl_git_pass(git_filter_list_apply(&out, &orig, fl)); cl_assert(git_buf_cmp(&out, &g_crlf_filtered[i]) == 0); git_blob_free(blob); } - git_filters_free(&filters); + git_filter_list_free(fl); git_buf_free(&orig); git_buf_free(&out); git_config_free(cfg);