From 0cf77103b218ad3622aff34f3296db1bdd5f0df9 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Mon, 26 Aug 2013 23:17:07 -0700 Subject: [PATCH 01/25] Start of filter API + git_blob_filtered_content This begins the process of exposing git_filter objects to the public API. This includes: * new public type and API for `git_buffer` through which an allocated buffer can be passed to the user * new API `git_blob_filtered_content` * make the git_filter type and GIT_FILTER_TO_... constants public --- include/git2.h | 3 ++ include/git2/blob.h | 32 ++++++++++++++ include/git2/buffer.h | 86 ++++++++++++++++++++++++++++++++++++++ include/git2/filter.h | 54 ++++++++++++++++++++++++ src/blob.c | 51 ++++++++++++++++++++++ src/buf_text.c | 19 +++++---- src/buf_text.h | 8 ++-- src/buffer.c | 54 ++++++++++++++++++++++++ src/crlf.c | 20 +++++---- src/filter.h | 19 ++++----- tests-clar/checkout/crlf.c | 11 +---- tests-clar/core/buffer.c | 47 ++++++++++++++++++--- tests-clar/filter/blob.c | 43 +++++++++++++++++++ tests-clar/filter/crlf.h | 26 ++++++++++++ 14 files changed, 428 insertions(+), 45 deletions(-) create mode 100644 include/git2/buffer.h create mode 100644 include/git2/filter.h create mode 100644 tests-clar/filter/blob.c create mode 100644 tests-clar/filter/crlf.h diff --git a/include/git2.h b/include/git2.h index e8638a830..73c11ad83 100644 --- a/include/git2.h +++ b/include/git2.h @@ -58,4 +58,7 @@ #include "git2/stash.h" #include "git2/pathspec.h" +#include "git2/buffer.h" +#include "git2/filter.h" + #endif diff --git a/include/git2/blob.h b/include/git2/blob.h index 8fca48966..dcb815b2f 100644 --- a/include/git2/blob.h +++ b/include/git2/blob.h @@ -11,6 +11,7 @@ #include "types.h" #include "oid.h" #include "object.h" +#include "buffer.h" /** * @file git2/blob.h @@ -95,6 +96,37 @@ GIT_EXTERN(const void *) git_blob_rawcontent(const git_blob *blob); */ GIT_EXTERN(git_off_t) git_blob_rawsize(const git_blob *blob); +/** + * Get a buffer with the filtered content of a blob. + * + * This applies filters as if the blob was being checked out to the + * working directory under the specified filename. This may apply + * CRLF filtering or other types of changes depending on the file + * attributes set for the blob and the content detected in it. + * + * The output is written into a `git_buffer` which the caller must free + * when done (via `git_buffer_free`). + * + * If no filters need to be applied, then the `out` buffer will just be + * populated with a pointer to the raw content of the blob. In that case, + * be careful to *not* free the blob until done with the buffer. To keep + * the data detached from the blob, call `git_buffer_resize` on the buffer + * with a `want_size` of 0 and the buffer will be reallocated to be + * detached from the blob. + * + * @param out The git_buffer to be filled in + * @param blob Pointer to the blob + * @param as_path Path used for file attribute lookups, etc. + * @param check_for_binary_data Should this test if blob content contains + * NUL bytes / looks like binary data before applying filters? + * @return 0 on success or an error code + */ +GIT_EXTERN(int) git_blob_filtered_content( + git_buffer *out, + git_blob *blob, + const char *as_path, + int check_for_binary_data); + /** * Read a file from the working folder of a repository * and write it to the Object Database as a loose blob diff --git a/include/git2/buffer.h b/include/git2/buffer.h new file mode 100644 index 000000000..454a1faa5 --- /dev/null +++ b/include/git2/buffer.h @@ -0,0 +1,86 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_git_buffer_h__ +#define INCLUDE_git_buffer_h__ + +#include "common.h" + +/** + * @file git2/buffer.h + * @brief Buffer export structure + * + * @ingroup Git + * @{ + */ +GIT_BEGIN_DECL + +/** + * A data buffer for exporting data from libgit2 + * + * There are a number of places where libgit2 wants to return an allocated + * data buffer to the caller and have the caller take ownership of that + * allocated memory. This can be awkward if the caller does not have easy + * access to the same allocation functions that libgit2 is using. In those + * cases, libgit2 will instead fill in a `git_buffer` and the caller can + * use `git_buffer_free()` to release it when they are done. + * + * * `ptr` refers to the start of the allocated memory. + * * `size` contains the size of the data in `ptr` that is actually used. + * * `available` refers to the known total amount of allocated memory in + * cases where it is larger than the `size` actually in use. + * + * In a few cases, for uniformity and simplicity, an API may populate a + * `git_buffer` with data that should *not* be freed (i.e. the lifetime of + * the data buffer is actually tied to another libgit2 object). These + * cases will be clearly documented in the APIs that use the `git_buffer`. + * In those cases, the `available` field will be set to zero even though + * the `ptr` and `size` will be valid. + */ +typedef struct git_buffer { + char *ptr; + size_t size; + size_t available; +} git_buffer; + +/** + * Use to initialize buffer structure when git_buffer is on stack + */ +#define GIT_BUFFER_INIT { NULL, 0, 0 } + +/** + * Free the memory referred to by the git_buffer. + * + * Note that this does not free the `git_buffer` itself, just the memory + * pointed to by `buffer->ptr`. If that memory was not allocated by + * libgit2 itself, be careful with using this function because it could + * cause problems. + * + * @param buffer The buffer with allocated memory + */ +GIT_EXTERN(void) git_buffer_free(git_buffer *buffer); + +/** + * Resize the buffer allocation to make more space. + * + * This will update `buffer->available` with the new size (which will be + * at least `want_size` and may be larger). This may or may not change + * `buffer->ptr` depending on whether there is an existing allocation and + * whether that allocation can be increased in place. + * + * Currently, this will never shrink the buffer, only expand it. + * + * @param buffer The buffer to be resized; may or may not be allocated yet + * @param want_size The desired available size + * @return 0 on success, negative error code on allocation failure + */ +GIT_EXTERN(int) git_buffer_resize(git_buffer *buffer, size_t want_size); + +GIT_END_DECL + +/** @} */ + +#endif diff --git a/include/git2/filter.h b/include/git2/filter.h new file mode 100644 index 000000000..3bc4a9037 --- /dev/null +++ b/include/git2/filter.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_git_filter_h__ +#define INCLUDE_git_filter_h__ + +#include "common.h" +#include "types.h" +#include "oid.h" +#include "buffer.h" + +/** + * @file git2/filter.h + * @brief Git filter APIs + * + * @ingroup Git + * @{ + */ +GIT_BEGIN_DECL + +/** + * Filters are applied in one of two directions: smudging - which is + * exporting a file from the Git object database to the working directory, + * and cleaning - which is importing a file from the working directory to + * the Git object database. These values control which direction of + * change is being applied. + */ +typedef enum { + GIT_FILTER_SMUDGE = 0, + GIT_FILTER_TO_WORKTREE = GIT_FILTER_SMUDGE, + GIT_FILTER_CLEAN = 1, + GIT_FILTER_TO_ODB = GIT_FILTER_CLEAN, +} git_filter_mode_t; + +/** + * A filter that can transform file data + * + * This represents a filter that can be used to transform or even replace + * file data. Libgit2 currently includes one built in filter: + * + * * "crlf" which uses the complex rules with the "text", "eol", and + * "crlf" file attributes to decide how to convert between LF and CRLF + * line endings + */ +typedef struct git_filter git_filter; + +GIT_END_DECL + +/** @} */ + +#endif diff --git a/src/blob.c b/src/blob.c index 6a289f43b..6a866538c 100644 --- a/src/blob.c +++ b/src/blob.c @@ -338,3 +338,54 @@ int git_blob_is_binary(git_blob *blob) return git_buf_text_is_binary(&content); } + +int git_blob_filtered_content( + git_buffer *out, + git_blob *blob, + const char *as_path, + int check_for_binary_data) +{ + int error = 0, num_filters = 0; + git_buf filtered = GIT_BUF_INIT, unfiltered = GIT_BUF_INIT; + git_vector filters = GIT_VECTOR_INIT; + + assert(blob && as_path && out); + + /* Create a fake git_buf from the blob raw data... */ + filtered.ptr = (void *)git_blob_rawcontent(blob); + filtered.size = (size_t)git_blob_rawsize(blob); + filtered.asize = 0; + + if (check_for_binary_data && git_buf_text_is_binary(&filtered)) + return 0; + + num_filters = git_filters_load( + &filters, git_blob_owner(blob), as_path, GIT_FILTER_TO_WORKTREE); + if (num_filters < 0) + return num_filters; + + if (num_filters > 0) { + if (out->ptr && out->available) { + filtered.ptr = out->ptr; + filtered.size = out->size; + filtered.asize = out->available; + } else { + git_buf_init(&filtered, filtered.size + 1); + } + + if (!(error = git_blob__getbuf(&unfiltered, blob))) + error = git_filters_apply(&filtered, &unfiltered, &filters); + + git_filters_free(&filters); + git_buf_free(&unfiltered); + } + + if (!error) { + out->ptr = filtered.ptr; + out->size = filtered.size; + out->available = filtered.asize; + } + + return error; +} + diff --git a/src/buf_text.c b/src/buf_text.c index ecf592b51..eda86adb3 100644 --- a/src/buf_text.c +++ b/src/buf_text.c @@ -70,10 +70,10 @@ int git_buf_text_crlf_to_lf(git_buf *tgt, const git_buf *src) assert(tgt != src); if (!next) - return GIT_ENOTFOUND; + return git_buf_set(tgt, src->ptr, src->size); /* reduce reallocs while in the loop */ - if (git_buf_grow(tgt, src->size) < 0) + if (git_buf_grow(tgt, src->size + 1) < 0) return -1; out = tgt->ptr; tgt->size = 0; @@ -81,7 +81,7 @@ int git_buf_text_crlf_to_lf(git_buf *tgt, const git_buf *src) /* Find the next \r and copy whole chunk up to there to tgt */ for (; next; scan = next + 1, next = memchr(scan, '\r', scan_end - scan)) { if (next > scan) { - size_t copylen = next - scan; + size_t copylen = (size_t)(next - scan); memcpy(out, scan, copylen); out += copylen; } @@ -92,9 +92,14 @@ int git_buf_text_crlf_to_lf(git_buf *tgt, const git_buf *src) } /* Copy remaining input into dest */ - memcpy(out, scan, scan_end - scan + 1); /* +1 for NUL byte */ - out += (scan_end - scan); - tgt->size = out - tgt->ptr; + if (scan < scan_end) { + size_t remaining = (size_t)(scan_end - scan); + memcpy(out, scan, remaining); + out += remaining; + } + + tgt->size = (size_t)(out - tgt->ptr); + tgt->ptr[tgt->size] = '\0'; return 0; } @@ -109,7 +114,7 @@ int git_buf_text_lf_to_crlf(git_buf *tgt, const git_buf *src) assert(tgt != src); if (!next) - return GIT_ENOTFOUND; + return git_buf_set(tgt, src->ptr, src->size); /* attempt to reduce reallocs while in the loop */ if (git_buf_grow(tgt, src->size + (src->size >> 4) + 1) < 0) diff --git a/src/buf_text.h b/src/buf_text.h index 58e4e26a7..3ac9d1443 100644 --- a/src/buf_text.h +++ b/src/buf_text.h @@ -56,16 +56,16 @@ GIT_INLINE(int) git_buf_text_puts_escape_regex(git_buf *buf, const char *string) extern void git_buf_text_unescape(git_buf *buf); /** - * Replace all \r\n with \n (or do nothing if no \r\n are found) + * Replace all \r\n with \n. Does not modify \r without trailing \n. * - * @return 0 on success, GIT_ENOTFOUND if no \r\n, -1 on memory error + * @return 0 on success, -1 on memory error */ extern int git_buf_text_crlf_to_lf(git_buf *tgt, const git_buf *src); /** - * Replace all \n with \r\n (or do nothing if no \n are found) + * Replace all \n with \r\n. Does not modify existing \r\n. * - * @return 0 on success, GIT_ENOTFOUND if no \n, -1 on memory error + * @return 0 on success, -1 on memory error */ extern int git_buf_text_lf_to_crlf(git_buf *tgt, const git_buf *src); diff --git a/src/buffer.c b/src/buffer.c index b5b2fd678..a92133674 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -6,6 +6,7 @@ */ #include "buffer.h" #include "posix.h" +#include "git2/buffer.h" #include #include @@ -484,3 +485,56 @@ int git_buf_splice( buf->ptr[buf->size] = '\0'; return 0; } + +/* + * Public buffers API + */ + +void git_buffer_free(git_buffer *buffer) +{ + if (!buffer) + return; + + if (buffer->ptr != NULL && buffer->available > 0) + git__free(buffer->ptr); + + git__memzero(buffer, sizeof(*buffer)); +} + +int git_buffer_resize(git_buffer *buffer, size_t want_size) +{ + int non_allocated_buffer = 0; + char *new_ptr; + + assert(buffer); + + /* check if buffer->ptr points to memory owned elsewhere */ + non_allocated_buffer = (buffer->ptr != NULL && buffer->available == 0); + + if (non_allocated_buffer && !want_size) + want_size = buffer->size; + + if (buffer->available <= want_size) + return 0; + + if (non_allocated_buffer) { + new_ptr = NULL; + if (want_size < buffer->size) + want_size = buffer->size; + } else { + new_ptr = buffer->ptr; + } + + want_size = (want_size + 7) & ~7; /* round up to multiple of 8 */ + + new_ptr = git__realloc(new_ptr, want_size); + GITERR_CHECK_ALLOC(new_ptr); + + if (non_allocated_buffer) + memcpy(new_ptr, buffer->ptr, buffer->size); + + buffer->ptr = new_ptr; + buffer->available = want_size; + + return 0; +} diff --git a/src/crlf.c b/src/crlf.c index 65039f9cc..fbb3ba2dd 100644 --- a/src/crlf.c +++ b/src/crlf.c @@ -235,32 +235,36 @@ line_ending_error: } static int crlf_apply_to_workdir( - git_filter *self, git_buf *dest, const git_buf *source) + git_filter *self, git_buf *tgt, const git_buf *src) { struct crlf_filter *filter = (struct crlf_filter *)self; const char *workdir_ending = NULL; - assert(self && dest && source); + assert(self && tgt && src); /* Empty file? Nothing to do. */ - if (git_buf_len(source) == 0) + if (git_buf_len(src) == 0) return -1; /* Determine proper line ending */ workdir_ending = line_ending(filter); if (!workdir_ending) return -1; - if (!strcmp("\n", workdir_ending)) /* do nothing for \n ending */ - return -1; - /* for now, only lf->crlf conversion is supported here */ + if (!strcmp("\n", workdir_ending)) { + if (git_buf_find(src, '\r') < 0) + return -1; + return git_buf_text_crlf_to_lf(tgt, src); + } + + /* only other supported option is lf->crlf conversion */ assert(!strcmp("\r\n", workdir_ending)); - return git_buf_text_lf_to_crlf(dest, source); + return git_buf_text_lf_to_crlf(tgt, src); } static int find_and_add_filter( git_vector *filters, git_repository *repo, const char *path, - int (*apply)(struct git_filter *self, git_buf *dest, const git_buf *source)) + int (*apply)(struct git_filter *self, git_buf *tgt, const git_buf *src)) { struct crlf_attrs ca; struct crlf_filter *filter; diff --git a/src/filter.h b/src/filter.h index 42a44ebdb..67845ad6a 100644 --- a/src/filter.h +++ b/src/filter.h @@ -11,16 +11,12 @@ #include "buffer.h" #include "git2/odb.h" #include "git2/repository.h" +#include "git2/filter.h" -typedef struct git_filter { +struct git_filter { int (*apply)(struct git_filter *self, git_buf *dest, const git_buf *source); void (*do_free)(struct git_filter *self); -} git_filter; - -typedef enum { - GIT_FILTER_TO_WORKTREE, - GIT_FILTER_TO_ODB -} git_filter_mode; +}; typedef enum { GIT_CRLF_GUESS = -1, @@ -60,13 +56,14 @@ extern int git_filters_load(git_vector *filters, git_repository *repo, const cha * and `dest` buffers are owned by the caller and must be freed once * they are no longer needed. * - * NOTE: Because of the double-buffering schema, the `source` buffer that contains - * the original file may be tampered once the filtering is complete. Regardless, - * the `dest` buffer will always contain the final result of the filtering + * NOTE: Because of the double-buffering schema, the `source` buffer that + * contains the original file may be tampered once the filtering is + * complete. Regardless, the `dest` buffer will always contain the final + * result of the filtering * * @param dest Buffer to store the result of the filtering * @param source Buffer containing the document to filter - * @param filters A non-empty vector of filters as supplied by `git_filters_load` + * @param filters Vector of filters as supplied by `git_filters_load` * @return 0 on success, an error code otherwise */ extern int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters); diff --git a/tests-clar/checkout/crlf.c b/tests-clar/checkout/crlf.c index 285b1f272..5f5f1b776 100644 --- a/tests-clar/checkout/crlf.c +++ b/tests-clar/checkout/crlf.c @@ -1,19 +1,10 @@ #include "clar_libgit2.h" #include "checkout_helpers.h" +#include "../filter/crlf.h" #include "git2/checkout.h" #include "repository.h" -#define UTF8_BOM "\xEF\xBB\xBF" -#define ALL_CRLF_TEXT_RAW "crlf\r\ncrlf\r\ncrlf\r\ncrlf\r\n" -#define ALL_LF_TEXT_RAW "lf\nlf\nlf\nlf\nlf\n" -#define MORE_CRLF_TEXT_RAW "crlf\r\ncrlf\r\nlf\ncrlf\r\ncrlf\r\n" -#define MORE_LF_TEXT_RAW "lf\nlf\ncrlf\r\nlf\nlf\n" - -#define ALL_LF_TEXT_AS_CRLF "lf\r\nlf\r\nlf\r\nlf\r\nlf\r\n" -#define MORE_CRLF_TEXT_AS_CRLF "crlf\r\ncrlf\r\nlf\r\ncrlf\r\ncrlf\r\n" -#define MORE_LF_TEXT_AS_CRLF "lf\r\nlf\r\ncrlf\r\nlf\r\nlf\r\n" - static git_repository *g_repo; void test_checkout_crlf__initialize(void) diff --git a/tests-clar/core/buffer.c b/tests-clar/core/buffer.c index 8a0b6711f..11d173d49 100644 --- a/tests-clar/core/buffer.c +++ b/tests-clar/core/buffer.c @@ -919,6 +919,8 @@ void test_core_buffer__similarity_metric_whitespace(void) git_buf_free(&buf); } +#include "../filter/crlf.h" + #define check_buf(expected,buf) do { \ cl_assert_equal_s(expected, buf.ptr); \ cl_assert_equal_sz(strlen(expected), buf.size); } while (0) @@ -934,16 +936,16 @@ void test_core_buffer__lf_and_crlf_conversions(void) cl_git_pass(git_buf_text_lf_to_crlf(&tgt, &src)); check_buf("lf\r\nlf\r\nlf\r\nlf\r\n", tgt); - cl_assert_equal_i(GIT_ENOTFOUND, git_buf_text_crlf_to_lf(&tgt, &src)); - /* no conversion needed if all LFs already */ + cl_git_pass(git_buf_text_crlf_to_lf(&tgt, &src)); + check_buf(src.ptr, tgt); git_buf_sets(&src, "\nlf\nlf\nlf\nlf\nlf"); cl_git_pass(git_buf_text_lf_to_crlf(&tgt, &src)); check_buf("\r\nlf\r\nlf\r\nlf\r\nlf\r\nlf", tgt); - cl_assert_equal_i(GIT_ENOTFOUND, git_buf_text_crlf_to_lf(&tgt, &src)); - /* no conversion needed if all LFs already */ + cl_git_pass(git_buf_text_crlf_to_lf(&tgt, &src)); + check_buf(src.ptr, tgt); /* CRLF source */ @@ -993,10 +995,45 @@ void test_core_buffer__lf_and_crlf_conversions(void) check_buf("\rcrlf\nlf\nlf\ncr\rcrlf\nlf\ncr\r", tgt); git_buf_sets(&src, "\rcr\r"); - cl_assert_equal_i(GIT_ENOTFOUND, git_buf_text_lf_to_crlf(&tgt, &src)); + cl_git_pass(git_buf_text_lf_to_crlf(&tgt, &src)); + check_buf(src.ptr, tgt); cl_git_pass(git_buf_text_crlf_to_lf(&tgt, &src)); check_buf("\rcr\r", tgt); git_buf_free(&src); git_buf_free(&tgt); + + /* blob correspondence tests */ + + git_buf_sets(&src, ALL_CRLF_TEXT_RAW); + cl_git_pass(git_buf_text_lf_to_crlf(&tgt, &src)); + check_buf(ALL_CRLF_TEXT_AS_CRLF, tgt); + cl_git_pass(git_buf_text_crlf_to_lf(&tgt, &src)); + check_buf(ALL_CRLF_TEXT_AS_LF, tgt); + git_buf_free(&src); + git_buf_free(&tgt); + + git_buf_sets(&src, ALL_LF_TEXT_RAW); + cl_git_pass(git_buf_text_lf_to_crlf(&tgt, &src)); + check_buf(ALL_LF_TEXT_AS_CRLF, tgt); + cl_git_pass(git_buf_text_crlf_to_lf(&tgt, &src)); + check_buf(ALL_LF_TEXT_AS_LF, tgt); + git_buf_free(&src); + git_buf_free(&tgt); + + git_buf_sets(&src, MORE_CRLF_TEXT_RAW); + cl_git_pass(git_buf_text_lf_to_crlf(&tgt, &src)); + check_buf(MORE_CRLF_TEXT_AS_CRLF, tgt); + cl_git_pass(git_buf_text_crlf_to_lf(&tgt, &src)); + check_buf(MORE_CRLF_TEXT_AS_LF, tgt); + git_buf_free(&src); + git_buf_free(&tgt); + + git_buf_sets(&src, MORE_LF_TEXT_RAW); + cl_git_pass(git_buf_text_lf_to_crlf(&tgt, &src)); + check_buf(MORE_LF_TEXT_AS_CRLF, tgt); + cl_git_pass(git_buf_text_crlf_to_lf(&tgt, &src)); + check_buf(MORE_LF_TEXT_AS_LF, tgt); + git_buf_free(&src); + git_buf_free(&tgt); } diff --git a/tests-clar/filter/blob.c b/tests-clar/filter/blob.c new file mode 100644 index 000000000..27e001f99 --- /dev/null +++ b/tests-clar/filter/blob.c @@ -0,0 +1,43 @@ +#include "clar_libgit2.h" +#include "crlf.h" + +static git_repository *g_repo = NULL; + +void test_filter_blob__initialize(void) +{ + g_repo = cl_git_sandbox_init("crlf"); + cl_git_mkfile("crlf/.gitattributes", + "*.txt text\n*.bin binary\n*.crlf text eol=crlf\n*.lf text eol=lf\n"); +} + +void test_filter_blob__cleanup(void) +{ + cl_git_sandbox_cleanup(); +} + +void test_filter_blob__all_crlf(void) +{ + git_blob *blob; + git_buffer buf = GIT_BUFFER_INIT; + + cl_git_pass(git_revparse_single( + (git_object **)&blob, g_repo, "a9a2e891")); /* all-crlf */ + + cl_assert_equal_s(ALL_CRLF_TEXT_RAW, git_blob_rawcontent(blob)); + + cl_git_pass(git_blob_filtered_content(&buf, blob, "file.bin", 1)); + + cl_assert_equal_s(ALL_CRLF_TEXT_RAW, buf.ptr); + + cl_git_pass(git_blob_filtered_content(&buf, blob, "file.crlf", 1)); + + /* in this case, raw content has crlf in it already */ + cl_assert_equal_s(ALL_CRLF_TEXT_AS_CRLF, buf.ptr); + + cl_git_pass(git_blob_filtered_content(&buf, blob, "file.lf", 1)); + + cl_assert_equal_s(ALL_CRLF_TEXT_AS_LF, buf.ptr); + + git_buffer_free(&buf); + git_blob_free(blob); +} diff --git a/tests-clar/filter/crlf.h b/tests-clar/filter/crlf.h new file mode 100644 index 000000000..8fadee950 --- /dev/null +++ b/tests-clar/filter/crlf.h @@ -0,0 +1,26 @@ +#ifndef INCLUDE_filter_crlf_h__ +#define INCLUDE_filter_crlf_h__ + +/* + * file content for files in the resources/crlf repository + */ + +#define UTF8_BOM "\xEF\xBB\xBF" + +#define ALL_CRLF_TEXT_RAW "crlf\r\ncrlf\r\ncrlf\r\ncrlf\r\n" +#define ALL_LF_TEXT_RAW "lf\nlf\nlf\nlf\nlf\n" +#define MORE_CRLF_TEXT_RAW "crlf\r\ncrlf\r\nlf\ncrlf\r\ncrlf\r\n" +#define MORE_LF_TEXT_RAW "lf\nlf\ncrlf\r\nlf\nlf\n" + +#define ALL_CRLF_TEXT_AS_CRLF ALL_CRLF_TEXT_RAW +#define ALL_LF_TEXT_AS_CRLF "lf\r\nlf\r\nlf\r\nlf\r\nlf\r\n" +#define MORE_CRLF_TEXT_AS_CRLF "crlf\r\ncrlf\r\nlf\r\ncrlf\r\ncrlf\r\n" +#define MORE_LF_TEXT_AS_CRLF "lf\r\nlf\r\ncrlf\r\nlf\r\nlf\r\n" + +#define ALL_CRLF_TEXT_AS_LF "crlf\ncrlf\ncrlf\ncrlf\n" +#define ALL_LF_TEXT_AS_LF ALL_LF_TEXT_RAW +#define MORE_CRLF_TEXT_AS_LF "crlf\ncrlf\nlf\ncrlf\ncrlf\n" +#define MORE_LF_TEXT_AS_LF "lf\nlf\ncrlf\nlf\nlf\n" + + +#endif From 85d5481206a932d747b2d5587b6d4c7f69993ba6 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Wed, 28 Aug 2013 16:44:04 -0700 Subject: [PATCH 02/25] Create public filter object and use it This creates include/sys/filter.h with a basic definition of a git_filter and then converts the internal code to use it. There are related internal objects (git_filter_list) that we will want to publish at some point, but this is a first step. --- include/git2/filter.h | 16 ++- include/git2/sys/filter.h | 104 ++++++++++++++++++ src/blob.c | 48 ++++----- src/buffer.h | 22 ++++ src/checkout.c | 60 ++++------- src/crlf.c | 173 +++++++++++++++++------------ src/diff.c | 12 +-- src/diff_file.c | 46 ++++---- src/filter.c | 186 +++++++++++++++++++++++++------- src/filter.h | 61 +++++------ src/odb.c | 6 +- src/odb.h | 3 +- src/repository.c | 8 +- tests-clar/object/blob/filter.c | 12 +-- 14 files changed, 505 insertions(+), 252 deletions(-) create mode 100644 include/git2/sys/filter.h diff --git a/include/git2/filter.h b/include/git2/filter.h index 3bc4a9037..478f3a6ad 100644 --- a/include/git2/filter.h +++ b/include/git2/filter.h @@ -39,7 +39,10 @@ typedef enum { * A filter that can transform file data * * This represents a filter that can be used to transform or even replace - * file data. Libgit2 currently includes one built in filter: + * file data. Libgit2 includes one built in filter and it is possible to + * write your own (see git2/sys/filter.h for information on that). + * + * The built in filter is: * * * "crlf" which uses the complex rules with the "text", "eol", and * "crlf" file attributes to decide how to convert between LF and CRLF @@ -47,6 +50,17 @@ typedef enum { */ typedef struct git_filter git_filter; +GIT_EXTERN(git_filter *) git_filter_lookup(const char *name); + +#define GIT_FILTER_CRLF "crlf" + +GIT_EXTERN(int) git_filter_apply_to_buffer( + git_buffer *out, + git_filter *filter, + const git_buffer *input, + const char *as_path, + git_filter_mode_t mode); + GIT_END_DECL /** @} */ diff --git a/include/git2/sys/filter.h b/include/git2/sys/filter.h new file mode 100644 index 000000000..2264be080 --- /dev/null +++ b/include/git2/sys/filter.h @@ -0,0 +1,104 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_sys_git_config_backend_h__ +#define INCLUDE_sys_git_config_backend_h__ + +#include "git2/filter.h" + +/** + * @file git2/sys/filter.h + * @brief Git filter backend and plugin routines + * @defgroup git_backend Git custom backend APIs + * @ingroup Git + * @{ + */ +GIT_BEGIN_DECL + +/** + * A filter source represents a file/blob to be processed + */ +typedef struct git_filter_source git_filter_source; +struct git_filter_source { + git_repository *repo; + const char *path; + git_oid oid; /* zero if unknown (which is likely) */ + uint16_t filemode; /* zero if unknown */ +}; + +/** + * Callback to actually perform the data filtering + */ +typedef int (*git_filter_apply_fn)( + git_filter *self, + void **payload, /* may be read and/or set */ + git_filter_mode_t mode, + git_buffer *to, + const git_buffer *from, + const git_filter_source *src); + +/** + * Callback to decide if a given source needs this filter + */ +typedef int (*git_filter_check_fn)( + git_filter *self, + void **payload, /* points to NULL ptr on entry, may be set */ + git_filter_mode_t mode, + const git_filter_source *src); + +/** + * Callback to clean up after filtering has been applied + */ +typedef void (*git_filter_cleanup_fn)( + git_filter *self, + void *payload); + +/** + * Filter structure used to register a new filter. + * + * To associate extra data with a filter, simply allocate extra data + * and put the `git_filter` struct at the start of your data buffer, + * then cast the `self` pointer to your larger structure when your + * callback is invoked. + * + * `version` should be set to GIT_FILTER_VERSION + * + * `apply` is the callback that actually filters data. + * + * `check` is an optional callback that checks if filtering is needed for + * a given source. + * + * `cleanup` is an optional callback that is made after the filter has + * been applied. Both the `check` and `apply` callbacks are able to + * allocate a `payload` to keep per-source filter state, and this callback + * is given that value and can clean up as needed. + */ +struct git_filter { + unsigned int version; + git_filter_apply_fn apply; + git_filter_check_fn check; + git_filter_cleanup_fn cleanup; +}; + +#define GIT_FILTER_VERSION 1 + +/** + * Register a filter under a given name + * + * Two filters will be preregistered with libgit2: GIT_FILTER_CRLF and + * GIT_FILTER_IDENT. + */ +GIT_EXTERN(int) git_filter_register( + const char *name, const git_filter *filter); + +/** + * Remove the filter with the given name + */ +GIT_EXTERN(int) git_filter_unregister(const char *name); + +/** @} */ +GIT_END_DECL +#endif diff --git a/src/blob.c b/src/blob.c index 6a866538c..3581ee9d1 100644 --- a/src/blob.c +++ b/src/blob.c @@ -108,7 +108,7 @@ static int write_file_filtered( git_off_t *size, git_odb *odb, const char *full_path, - git_vector *filters) + git_filter_list *fl) { int error; git_buf source = GIT_BUF_INIT; @@ -117,7 +117,7 @@ static int write_file_filtered( if ((error = git_futils_readbuffer(&source, full_path)) < 0) return error; - error = git_filters_apply(&dest, &source, filters); + error = git_filter_list_apply(&dest, &source, fl); /* Free the source as soon as possible. This can be big in memory, * and we don't want to ODB write to choke */ @@ -198,29 +198,25 @@ int git_blob__create_from_paths( if (S_ISLNK(mode)) { error = write_symlink(oid, odb, content_path, (size_t)size); } else { - git_vector write_filters = GIT_VECTOR_INIT; - int filter_count = 0; + git_filter_list *fl = NULL; - if (try_load_filters) { + if (try_load_filters) /* Load the filters for writing this file to the ODB */ - filter_count = git_filters_load( - &write_filters, repo, hint_path, GIT_FILTER_TO_ODB); - } + error = git_filter_list_load( + &fl, repo, hint_path, GIT_FILTER_TO_ODB); - if (filter_count < 0) { - /* Negative value means there was a critical error */ - error = filter_count; - } else if (filter_count == 0) { + if (error < 0) + /* well, that didn't work */; + else if (fl == NULL) /* No filters need to be applied to the document: we can stream * directly from disk */ error = write_file_stream(oid, odb, content_path, size); - } else { + else { /* We need to apply one or more filters */ - error = write_file_filtered( - oid, &size, odb, content_path, &write_filters); - } + error = write_file_filtered(oid, &size, odb, content_path, fl); - git_filters_free(&write_filters); + git_filter_list_free(fl); + } /* * TODO: eventually support streaming filtered files, for files @@ -345,9 +341,9 @@ int git_blob_filtered_content( const char *as_path, int check_for_binary_data) { - int error = 0, num_filters = 0; + int error = 0; git_buf filtered = GIT_BUF_INIT, unfiltered = GIT_BUF_INIT; - git_vector filters = GIT_VECTOR_INIT; + git_filter_list *fl = NULL; assert(blob && as_path && out); @@ -359,12 +355,12 @@ int git_blob_filtered_content( if (check_for_binary_data && git_buf_text_is_binary(&filtered)) return 0; - num_filters = git_filters_load( - &filters, git_blob_owner(blob), as_path, GIT_FILTER_TO_WORKTREE); - if (num_filters < 0) - return num_filters; + error = git_filter_list_load( + &fl, git_blob_owner(blob), as_path, GIT_FILTER_TO_WORKTREE); + if (error < 0) + return error; - if (num_filters > 0) { + if (fl != NULL) { if (out->ptr && out->available) { filtered.ptr = out->ptr; filtered.size = out->size; @@ -374,9 +370,9 @@ int git_blob_filtered_content( } if (!(error = git_blob__getbuf(&unfiltered, blob))) - error = git_filters_apply(&filtered, &unfiltered, &filters); + error = git_filter_list_apply(&filtered, &unfiltered, fl); - git_filters_free(&filters); + git_filter_list_free(fl); git_buf_free(&unfiltered); } diff --git a/src/buffer.h b/src/buffer.h index f3e1d506f..b1cb5d06a 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -9,6 +9,7 @@ #include "common.h" #include "git2/strarray.h" +#include "git2/buffer.h" #include typedef struct { @@ -174,4 +175,25 @@ int git_buf_splice( const char *data, size_t nb_to_insert); + +#define GIT_BUF_FROM_BUFFER(buffer) \ + { (buffer)->ptr, (buffer)->available, (buffer)->size } + +GIT_INLINE(void) git_buf_from_buffer(git_buf *buf, const git_buffer *buffer) +{ + buf->ptr = buffer->ptr; + buf->size = buffer->size; + buf->asize = buffer->available; +} + +#define GIT_BUFFER_FROM_BUF(buf) \ + { (buf)->ptr, (buf)->size, (buf)->asize } + +GIT_INLINE(void) git_buffer_from_buf(git_buffer *buffer, const git_buf *buf) +{ + buffer->ptr = buf->ptr; + buffer->size = buf->size; + buffer->available = buf->asize; +} + #endif diff --git a/src/checkout.c b/src/checkout.c index eb92e8fd6..5ce4a19c5 100644 --- a/src/checkout.c +++ b/src/checkout.c @@ -710,56 +710,40 @@ static int blob_content_to_file( mode_t entry_filemode, git_checkout_opts *opts) { - int error = -1, nb_filters = 0; - mode_t file_mode = opts->file_mode; - bool dont_free_filtered; + int error = 0; + mode_t file_mode = opts->file_mode ? opts->file_mode : entry_filemode; git_buf unfiltered = GIT_BUF_INIT, filtered = GIT_BUF_INIT; - git_vector filters = GIT_VECTOR_INIT; + git_filter_list *fl = NULL; /* Create a fake git_buf from the blob raw data... */ filtered.ptr = (void *)git_blob_rawcontent(blob); filtered.size = (size_t)git_blob_rawsize(blob); - /* ... and make sure it doesn't get unexpectedly freed */ - dont_free_filtered = true; - if (!opts->disable_filters && - !git_buf_text_is_binary(&filtered) && - (nb_filters = git_filters_load( - &filters, - git_object_owner((git_object *)blob), - path, - GIT_FILTER_TO_WORKTREE)) > 0) - { + if (!opts->disable_filters && !git_buf_text_is_binary(&filtered)) { + error = git_filter_list_load( + &fl, git_blob_owner(blob), path, GIT_FILTER_TO_WORKTREE); + } + + if (fl != NULL) { /* reset 'filtered' so it can be a filter target */ git_buf_init(&filtered, 0); - dont_free_filtered = false; + + if (!(error = git_blob__getbuf(&unfiltered, blob))) { + error = git_filter_list_apply(&filtered, &unfiltered, fl); + + git_buf_free(&unfiltered); + } + + git_filter_list_free(fl); } - if (nb_filters < 0) - return nb_filters; - - if (nb_filters > 0) { - if ((error = git_blob__getbuf(&unfiltered, blob)) < 0) - goto cleanup; - - if ((error = git_filters_apply(&filtered, &unfiltered, &filters)) < 0) - goto cleanup; - } - - /* Allow overriding of file mode */ - if (!file_mode) - file_mode = entry_filemode; - - error = buffer_to_file( - st, &filtered, path, opts->dir_mode, opts->file_open_flags, file_mode); - - if (!error) + if (!error && + !(error = buffer_to_file( + st, &filtered, path, opts->dir_mode, + opts->file_open_flags, file_mode))) st->st_mode = entry_filemode; -cleanup: - git_filters_free(&filters); - git_buf_free(&unfiltered); - if (!dont_free_filtered) + if (filtered.asize != 0) git_buf_free(&filtered); return error; diff --git a/src/crlf.c b/src/crlf.c index fbb3ba2dd..2177bff98 100644 --- a/src/crlf.c +++ b/src/crlf.c @@ -19,13 +19,11 @@ struct crlf_attrs { int crlf_action; int eol; + int auto_crlf; }; struct crlf_filter { git_filter f; - struct crlf_attrs attrs; - git_repository *repo; - char path[GIT_FLEX_ARRAY]; }; static int check_crlf(const char *value) @@ -76,7 +74,8 @@ static int crlf_input_action(struct crlf_attrs *ca) return ca->crlf_action; } -static int crlf_load_attributes(struct crlf_attrs *ca, git_repository *repo, const char *path) +static int crlf_load_attributes( + struct crlf_attrs *ca, git_repository *repo, const char *path) { #define NUM_CONV_ATTRS 3 @@ -108,9 +107,8 @@ static int crlf_load_attributes(struct crlf_attrs *ca, git_repository *repo, con return -1; } -static int has_cr_in_index(git_filter *self) +static int has_cr_in_index(git_repository *repo, const char *path) { - struct crlf_filter *filter = (struct crlf_filter *)self; git_index *index; const git_index_entry *entry; git_blob *blob; @@ -118,19 +116,19 @@ static int has_cr_in_index(git_filter *self) git_off_t blobsize; bool found_cr; - if (git_repository_index__weakptr(&index, filter->repo) < 0) { + if (git_repository_index__weakptr(&index, repo) < 0) { giterr_clear(); return false; } - if (!(entry = git_index_get_bypath(index, filter->path, 0)) && - !(entry = git_index_get_bypath(index, filter->path, 1))) + if (!(entry = git_index_get_bypath(index, path, 0)) && + !(entry = git_index_get_bypath(index, path, 1))) return false; if (!S_ISREG(entry->mode)) /* don't crlf filter non-blobs */ return true; - if (git_blob_lookup(&blob, filter->repo, &entry->oid) < 0) + if (git_blob_lookup(&blob, repo, &entry->oid) < 0) return false; blobcontent = git_blob_rawcontent(blob); @@ -147,26 +145,26 @@ static int has_cr_in_index(git_filter *self) } static int crlf_apply_to_odb( - git_filter *self, git_buf *dest, const git_buf *source) + struct crlf_attrs *ca, + git_buffer *to, + const git_buffer *from, + const git_filter_source *src) { - struct crlf_filter *filter = (struct crlf_filter *)self; - - assert(self && dest && source); + const git_buf from_buf = GIT_BUF_FROM_BUFFER(from); + git_buf to_buf = GIT_BUF_FROM_BUFFER(to); /* Empty file? Nothing to do */ - if (git_buf_len(source) == 0) + if (!git_buf_len(&from_buf)) return 0; /* Heuristics to see if we can skip the conversion. * Straight from Core Git. */ - if (filter->attrs.crlf_action == GIT_CRLF_AUTO || - filter->attrs.crlf_action == GIT_CRLF_GUESS) { - + if (ca->crlf_action == GIT_CRLF_AUTO || ca->crlf_action == GIT_CRLF_GUESS) { git_buf_text_stats stats; /* Check heuristics for binary vs text... */ - if (git_buf_text_gather_stats(&stats, source, false)) + if (git_buf_text_gather_stats(&stats, &from_buf, false)) return -1; /* @@ -175,28 +173,34 @@ static int crlf_apply_to_odb( * stuff? */ if (stats.cr != stats.crlf) - return -1; + return GIT_ENOTFOUND; - if (filter->attrs.crlf_action == GIT_CRLF_GUESS) { + if (ca->crlf_action == GIT_CRLF_GUESS) { /* * If the file in the index has any CR in it, do not convert. * This is the new safer autocrlf handling. */ - if (has_cr_in_index(self)) - return -1; + if (has_cr_in_index(src->repo, src->path)) + return GIT_ENOTFOUND; } if (!stats.cr) - return -1; + return GIT_ENOTFOUND; } /* Actually drop the carriage returns */ - return git_buf_text_crlf_to_lf(dest, source); + if (git_buf_text_crlf_to_lf(&to_buf, &from_buf) < 0) + return -1; + + /* Overwrite "to" buffer in case data was resized */ + git_buffer_from_buf(to, &to_buf); + + return 0; } -static const char *line_ending(struct crlf_filter *filter) +static const char *line_ending(struct crlf_attrs *ca) { - switch (filter->attrs.crlf_action) { + switch (ca->crlf_action) { case GIT_CRLF_BINARY: case GIT_CRLF_INPUT: return "\n"; @@ -213,7 +217,7 @@ static const char *line_ending(struct crlf_filter *filter) goto line_ending_error; } - switch (filter->attrs.eol) { + switch (ca->eol) { case GIT_EOL_UNSET: return GIT_EOL_NATIVE == GIT_EOL_CRLF ? "\r\n" @@ -235,44 +239,58 @@ line_ending_error: } static int crlf_apply_to_workdir( - git_filter *self, git_buf *tgt, const git_buf *src) + struct crlf_attrs *ca, git_buffer *to, const git_buffer *from) { - struct crlf_filter *filter = (struct crlf_filter *)self; + const git_buf from_buf = GIT_BUF_FROM_BUFFER(from); + git_buf to_buf = GIT_BUF_FROM_BUFFER(to); const char *workdir_ending = NULL; - assert(self && tgt && src); - /* Empty file? Nothing to do. */ - if (git_buf_len(src) == 0) - return -1; + if (git_buf_len(&from_buf) == 0) + return 0; /* Determine proper line ending */ - workdir_ending = line_ending(filter); + workdir_ending = line_ending(ca); if (!workdir_ending) return -1; if (!strcmp("\n", workdir_ending)) { - if (git_buf_find(src, '\r') < 0) + if (ca->crlf_action == GIT_CRLF_GUESS && ca->auto_crlf) + return GIT_ENOTFOUND; + + if (git_buf_find(&from_buf, '\r') < 0) + return GIT_ENOTFOUND; + + if (git_buf_text_crlf_to_lf(&to_buf, &from_buf) < 0) + return -1; + } else { + /* only other supported option is lf->crlf conversion */ + assert(!strcmp("\r\n", workdir_ending)); + + if (git_buf_text_lf_to_crlf(&to_buf, &from_buf) < 0) return -1; - return git_buf_text_crlf_to_lf(tgt, src); } - /* only other supported option is lf->crlf conversion */ - assert(!strcmp("\r\n", workdir_ending)); - return git_buf_text_lf_to_crlf(tgt, src); + /* Overwrite "to" buffer in case data was resized */ + git_buffer_from_buf(to, &to_buf); + + return 0; } -static int find_and_add_filter( - git_vector *filters, git_repository *repo, const char *path, - int (*apply)(struct git_filter *self, git_buf *tgt, const git_buf *src)) +static int crlf_check( + git_filter *self, + void **payload, /* points to NULL ptr on entry, may be set */ + git_filter_mode_t mode, + const git_filter_source *src) { - struct crlf_attrs ca; - struct crlf_filter *filter; - size_t pathlen; int error; + struct crlf_attrs ca; + + GIT_UNUSED(self); + GIT_UNUSED(mode); /* Load gitattributes for the path */ - if ((error = crlf_load_attributes(&ca, repo, path)) < 0) + if ((error = crlf_load_attributes(&ca, src->repo, src->path)) < 0) return error; /* @@ -282,41 +300,54 @@ static int find_and_add_filter( ca.crlf_action = crlf_input_action(&ca); if (ca.crlf_action == GIT_CRLF_BINARY) - return 0; + return GIT_ENOTFOUND; if (ca.crlf_action == GIT_CRLF_GUESS) { - int auto_crlf; - - if ((error = git_repository__cvar(&auto_crlf, repo, GIT_CVAR_AUTO_CRLF)) < 0) + if ((error = git_repository__cvar( + &ca.auto_crlf, src->repo, GIT_CVAR_AUTO_CRLF)) < 0) return error; - if (auto_crlf == GIT_AUTO_CRLF_FALSE) - return 0; + if (ca.auto_crlf == GIT_AUTO_CRLF_FALSE) + return GIT_ENOTFOUND; } - /* If we're good, we create a new filter object and push it - * into the filters array */ - pathlen = strlen(path); - filter = git__malloc(sizeof(struct crlf_filter) + pathlen + 1); - GITERR_CHECK_ALLOC(filter); + *payload = git__malloc(sizeof(ca)); + GITERR_CHECK_ALLOC(*payload); + memcpy(*payload, &ca, sizeof(ca)); - filter->f.apply = apply; - filter->f.do_free = NULL; - memcpy(&filter->attrs, &ca, sizeof(struct crlf_attrs)); - filter->repo = repo; - memcpy(filter->path, path, pathlen + 1); - - return git_vector_insert(filters, filter); + return 0; } -int git_filter_add__crlf_to_odb( - git_vector *filters, git_repository *repo, const char *path) +static int crlf_apply( + git_filter *self, + void **payload, /* may be read and/or set */ + git_filter_mode_t mode, + git_buffer *to, + const git_buffer *from, + const git_filter_source *src) { - return find_and_add_filter(filters, repo, path, &crlf_apply_to_odb); + GIT_UNUSED(self); + + if (mode == GIT_FILTER_SMUDGE) + return crlf_apply_to_workdir(*payload, to, from); + else + return crlf_apply_to_odb(*payload, to, from, src); } -int git_filter_add__crlf_to_workdir( - git_vector *filters, git_repository *repo, const char *path) +static void crlf_cleanup( + git_filter *self, + void *payload) { - return find_and_add_filter(filters, repo, path, &crlf_apply_to_workdir); + GIT_UNUSED(self); + git__free(payload); +} + +git_filter *git_crlf_filter_new(void) +{ + struct crlf_filter *f = git__calloc(1, sizeof(struct crlf_filter)); + f->f.version = GIT_FILTER_VERSION; + f->f.check = crlf_check; + f->f.apply = crlf_apply; + f->f.cleanup = crlf_cleanup; + return (git_filter *)f; } diff --git a/src/diff.c b/src/diff.c index 77dbbd8bc..b1cde36bc 100644 --- a/src/diff.c +++ b/src/diff.c @@ -568,21 +568,21 @@ int git_diff__oid_for_file( giterr_set(GITERR_OS, "File size overflow (for 32-bits) on '%s'", path); result = -1; } else { - git_vector filters = GIT_VECTOR_INIT; + git_filter_list *fl = NULL; - result = git_filters_load(&filters, repo, path, GIT_FILTER_TO_ODB); - if (result >= 0) { + result = git_filter_list_load(&fl, repo, path, GIT_FILTER_TO_ODB); + if (!result) { int fd = git_futils_open_ro(full_path.ptr); if (fd < 0) result = fd; else { result = git_odb__hashfd_filtered( - oid, fd, (size_t)size, GIT_OBJ_BLOB, &filters); + oid, fd, (size_t)size, GIT_OBJ_BLOB, fl); p_close(fd); } - } - git_filters_free(&filters); + git_filter_list_free(fl); + } } cleanup: diff --git a/src/diff_file.c b/src/diff_file.c index bcfef13cd..7602591cf 100644 --- a/src/diff_file.c +++ b/src/diff_file.c @@ -296,7 +296,7 @@ static int diff_file_content_load_workdir_file( git_diff_file_content *fc, git_buf *path) { int error = 0; - git_vector filters = GIT_VECTOR_INIT; + git_filter_list *fl = NULL; git_buf raw = GIT_BUF_INIT, filtered = GIT_BUF_INIT; git_file fd = git_futils_open_ro(git_buf_cstr(path)); @@ -310,41 +310,35 @@ static int diff_file_content_load_workdir_file( if (diff_file_content_binary_by_size(fc)) goto cleanup; - if ((error = git_filters_load( - &filters, fc->repo, fc->file->path, GIT_FILTER_TO_ODB)) < 0) + if ((error = git_filter_list_load( + &fl, fc->repo, fc->file->path, GIT_FILTER_TO_ODB)) < 0) goto cleanup; - /* error >= is a filter count */ - if (error == 0) { + /* if there are no filters, try to mmap the file */ + if (fl == NULL) { if (!(error = git_futils_mmap_ro( - &fc->map, fd, 0, (size_t)fc->file->size))) + &fc->map, fd, 0, (size_t)fc->file->size))) { fc->flags |= GIT_DIFF_FLAG__UNMAP_DATA; - else /* fall through to try readbuffer below */ - giterr_clear(); - } - - if (error != 0) { - error = git_futils_readbuffer_fd(&raw, fd, (size_t)fc->file->size); - if (error < 0) goto cleanup; - - if (!filters.length) - git_buf_swap(&filtered, &raw); - else - error = git_filters_apply(&filtered, &raw, &filters); - - if (!error) { - fc->map.len = git_buf_len(&filtered); - fc->map.data = git_buf_detach(&filtered); - fc->flags |= GIT_DIFF_FLAG__FREE_DATA; } - git_buf_free(&raw); - git_buf_free(&filtered); + /* if mmap failed, fall through to try readbuffer below */ + giterr_clear(); } + if (!(error = git_futils_readbuffer_fd(&raw, fd, (size_t)fc->file->size)) && + !(error = git_filter_list_apply(&filtered, &raw, fl))) + { + fc->map.len = git_buf_len(&filtered); + fc->map.data = git_buf_detach(&filtered); + fc->flags |= GIT_DIFF_FLAG__FREE_DATA; + } + + git_buf_free(&raw); + git_buf_free(&filtered); + cleanup: - git_filters_free(&filters); + git_filter_list_free(fl); p_close(fd); return error; diff --git a/src/filter.c b/src/filter.c index 9f749dcbd..7935e6518 100644 --- a/src/filter.c +++ b/src/filter.c @@ -13,62 +13,155 @@ #include "git2/config.h" #include "blob.h" -int git_filters_load(git_vector *filters, git_repository *repo, const char *path, int mode) -{ - int error; - - if (mode == GIT_FILTER_TO_ODB) { - /* Load the CRLF cleanup filter when writing to the ODB */ - error = git_filter_add__crlf_to_odb(filters, repo, path); - if (error < 0) - return error; - } else { - error = git_filter_add__crlf_to_workdir(filters, repo, path); - if (error < 0) - return error; - } - - return (int)filters->length; -} - -void git_filters_free(git_vector *filters) -{ - size_t i; +typedef struct { git_filter *filter; + void *payload; +} git_filter_entry; - git_vector_foreach(filters, i, filter) { - if (filter->do_free != NULL) - filter->do_free(filter); - else - git__free(filter); +struct git_filter_list { + git_array_t(git_filter_entry) filters; + git_filter_mode_t mode; + git_filter_source source; + char path[GIT_FLEX_ARRAY]; +}; + +typedef struct { + const char *filter_name; + git_filter *filter; +} git_filter_def; + +static git_array_t(git_filter_def) filter_registry = GIT_ARRAY_INIT; + +static int filter_load_defaults(void) +{ + if (!git_array_size(filter_registry)) { + git_filter_def *fdef = git_array_alloc(filter_registry); + GITERR_CHECK_ALLOC(fdef); + + fdef->filter_name = GIT_FILTER_CRLF; + fdef->filter = git_crlf_filter_new(); + GITERR_CHECK_ALLOC(fdef->filter); } - git_vector_free(filters); + return 0; } -int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters) +static int git_filter_list_new( + git_filter_list **out, git_filter_mode_t mode, const git_filter_source *src) { - size_t i; + git_filter_list *fl = NULL; + size_t pathlen = src->path ? strlen(src->path) : 0; + + fl = git__calloc(1, sizeof(git_filter_list) + pathlen + 1); + GITERR_CHECK_ALLOC(fl); + + fl->mode = mode; + if (src->path) + memcpy(fl->path, src->path, pathlen); + fl->source.repo = src->repo; + fl->source.path = fl->path; + + *out = fl; + return 0; +} + +int git_filter_list_load( + git_filter_list **filters, + git_repository *repo, + const char *path, + git_filter_mode_t mode) +{ + int error = 0; + git_filter_list *fl = NULL; + git_filter_source src = { 0 }; + git_filter_entry *fe; + uint32_t f; + + if (filter_load_defaults() < 0) + return -1; + + src.repo = repo; + src.path = path; + + for (f = 0; f < git_array_size(filter_registry); ++f) { + void *payload = NULL; + git_filter_def *fdef = git_array_get(filter_registry, f); + + if (!fdef || !fdef->filter) + continue; + + if (fdef->filter->check) + error = fdef->filter->check(fdef->filter, &payload, mode, &src); + + if (error == GIT_ENOTFOUND) + error = 0; + else if (error < 0) + break; + else { + if (!fl && (error = git_filter_list_new(&fl, mode, &src)) < 0) + return error; + + fe = git_array_alloc(fl->filters); + GITERR_CHECK_ALLOC(fe); + fe->filter = fdef->filter; + fe->payload = payload; + } + } + + if (error && fl != NULL) { + git_array_clear(fl->filters); + git__free(fl); + fl = NULL; + } + + *filters = fl; + return error; +} + +void git_filter_list_free(git_filter_list *fl) +{ + uint32_t i; + + if (!fl) + return; + + for (i = 0; i < git_array_size(fl->filters); ++i) { + git_filter_entry *fe = git_array_get(fl->filters, i); + if (fe->filter->cleanup) + fe->filter->cleanup(fe->filter, fe->payload); + } + + git_array_clear(fl->filters); + git__free(fl); +} + +int git_filter_list_apply( + git_buf *dest, + git_buf *source, + git_filter_list *fl) +{ + int error = 0; + uint32_t i; unsigned int src; git_buf *dbuffer[2]; + if (!fl) { + git_buf_swap(dest, source); + return 0; + } + dbuffer[0] = source; dbuffer[1] = dest; src = 0; - if (git_buf_len(source) == 0) { - git_buf_clear(dest); - return 0; - } - /* Pre-grow the destination buffer to more or less the size * we expect it to have */ if (git_buf_grow(dest, git_buf_len(source)) < 0) return -1; - for (i = 0; i < filters->length; ++i) { - git_filter *filter = git_vector_get(filters, i); + for (i = 0; i < git_array_size(fl->filters); ++i) { + git_filter_entry *fe = git_array_get(fl->filters, i); unsigned int dst = 1 - src; git_buf_clear(dbuffer[dst]); @@ -79,8 +172,25 @@ int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters) * of the double buffering (so that the text goes through * cleanly). */ - if (filter->apply(filter, dbuffer[dst], dbuffer[src]) == 0) - src = dst; + { + git_buffer srcb = GIT_BUFFER_FROM_BUF(dbuffer[src]); + git_buffer dstb = GIT_BUFFER_FROM_BUF(dbuffer[dst]); + + error = fe->filter->apply( + fe->filter, &fe->payload, fl->mode, &dstb, &srcb, &fl->source); + + if (error == GIT_ENOTFOUND) + error = 0; + else if (error < 0) { + git_buf_clear(dest); + return error; + } + else { + git_buf_from_buffer(dbuffer[src], &srcb); + git_buf_from_buffer(dbuffer[dst], &dstb); + src = dst; + } + } if (git_buf_oom(dbuffer[dst])) return -1; diff --git a/src/filter.h b/src/filter.h index 67845ad6a..a4ee2172d 100644 --- a/src/filter.h +++ b/src/filter.h @@ -9,14 +9,11 @@ #include "common.h" #include "buffer.h" +#include "array.h" #include "git2/odb.h" #include "git2/repository.h" #include "git2/filter.h" - -struct git_filter { - int (*apply)(struct git_filter *self, git_buf *dest, const git_buf *source); - void (*do_free)(struct git_filter *self); -}; +#include "git2/sys/filter.h" typedef enum { GIT_CRLF_GUESS = -1, @@ -27,34 +24,38 @@ typedef enum { GIT_CRLF_AUTO, } git_crlf_t; +typedef struct git_filter_list git_filter_list; + /* * FILTER API */ /* - * For any given path in the working directory, fill the `filters` - * array with the relevant filters that need to be applied. + * For any given path in the working directory, create a `git_filter_list` + * with the relevant filters that need to be applied. * - * Mode is either `GIT_FILTER_TO_WORKTREE` if you need to load the - * filters that will be used when checking out a file to the working - * directory, or `GIT_FILTER_TO_ODB` for the filters used when writing - * a file to the ODB. + * This will return 0 (success) but set the output git_filter_list to NULL + * if no filters are requested for the given file. * - * @param filters Vector where to store all the loaded filters + * @param filters Output newly created git_filter_list (or NULL) * @param repo Repository object that contains `path` * @param path Relative path of the file to be filtered * @param mode Filtering direction (WT->ODB or ODB->WT) - * @return the number of filters loaded for the file (0 if the file - * doesn't need filtering), or a negative error code + * @return 0 on success (which could still return NULL if no filters are + * needed for the requested file), <0 on error */ -extern int git_filters_load(git_vector *filters, git_repository *repo, const char *path, int mode); +extern int git_filter_list_load( + git_filter_list **filters, + git_repository *repo, + const char *path, + git_filter_mode_t mode); /* - * Apply one or more filters to a file. + * Apply one or more filters to a data buffer. * - * The file must have been loaded as a `git_buf` object. Both the `source` - * and `dest` buffers are owned by the caller and must be freed once - * they are no longer needed. + * The source data must have been loaded as a `git_buf` object. Both the + * `source` and `dest` buffers are owned by the caller and must be freed + * once they are no longer needed. * * NOTE: Because of the double-buffering schema, the `source` buffer that * contains the original file may be tampered once the filtering is @@ -63,29 +64,25 @@ extern int git_filters_load(git_vector *filters, git_repository *repo, const cha * * @param dest Buffer to store the result of the filtering * @param source Buffer containing the document to filter - * @param filters Vector of filters as supplied by `git_filters_load` + * @param filters An already loaded git_filter_list * @return 0 on success, an error code otherwise */ -extern int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters); +extern int git_filter_list_apply( + git_buf *dest, + git_buf *source, + git_filter_list *filters); /* - * Free the `filters` array generated by `git_filters_load`. + * Free the git_filter_list * - * Note that this frees both the array and its contents. The array will - * be clean/reusable after this call. - * - * @param filters A filters array as supplied by `git_filters_load` + * @param filters A git_filter_list created by `git_filter_list_load` */ -extern void git_filters_free(git_vector *filters); +extern void git_filter_list_free(git_filter_list *filters); /* * Available filters */ -/* Strip CRLF, from Worktree to ODB */ -extern int git_filter_add__crlf_to_odb(git_vector *filters, git_repository *repo, const char *path); - -/* Add CRLF, from ODB to worktree */ -extern int git_filter_add__crlf_to_workdir(git_vector *filters, git_repository *repo, const char *path); +extern git_filter *git_crlf_filter_new(void); #endif diff --git a/src/odb.c b/src/odb.c index a0bfec403..d9310a9d7 100644 --- a/src/odb.c +++ b/src/odb.c @@ -179,13 +179,13 @@ done: } int git_odb__hashfd_filtered( - git_oid *out, git_file fd, size_t size, git_otype type, git_vector *filters) + git_oid *out, git_file fd, size_t size, git_otype type, git_filter_list *fl) { int error; git_buf raw = GIT_BUF_INIT; git_buf filtered = GIT_BUF_INIT; - if (!filters || !filters->length) + if (!fl) return git_odb__hashfd(out, fd, size, type); /* size of data is used in header, so we have to read the whole file @@ -193,7 +193,7 @@ int git_odb__hashfd_filtered( */ if (!(error = git_futils_readbuffer_fd(&raw, fd, size))) - error = git_filters_apply(&filtered, &raw, filters); + error = git_filter_list_apply(&filtered, &raw, fl); git_buf_free(&raw); diff --git a/src/odb.h b/src/odb.h index 0d9f9e2ea..61dd9a7fd 100644 --- a/src/odb.h +++ b/src/odb.h @@ -14,6 +14,7 @@ #include "vector.h" #include "cache.h" #include "posix.h" +#include "filter.h" #define GIT_OBJECTS_DIR "objects/" #define GIT_OBJECT_DIR_MODE 0777 @@ -66,7 +67,7 @@ int git_odb__hashfd(git_oid *out, git_file fd, size_t size, git_otype type); * Acts just like git_odb__hashfd with the addition of filters... */ int git_odb__hashfd_filtered( - git_oid *out, git_file fd, size_t len, git_otype type, git_vector *filters); + git_oid *out, git_file fd, size_t len, git_otype type, git_filter_list *fl); /* * Hash a `path`, assuming it could be a POSIX symlink: if the path is a diff --git a/src/repository.c b/src/repository.c index eead41201..94700e4e3 100644 --- a/src/repository.c +++ b/src/repository.c @@ -1649,7 +1649,7 @@ int git_repository_hashfile( const char *as_path) { int error; - git_vector filters = GIT_VECTOR_INIT; + git_filter_list *fl = NULL; git_file fd = -1; git_off_t len; git_buf full_path = GIT_BUF_INIT; @@ -1671,7 +1671,7 @@ int git_repository_hashfile( /* passing empty string for "as_path" indicated --no-filters */ if (strlen(as_path) > 0) { - error = git_filters_load(&filters, repo, as_path, GIT_FILTER_TO_ODB); + error = git_filter_list_load(&fl, repo, as_path, GIT_FILTER_TO_ODB); if (error < 0) return error; } else { @@ -1698,12 +1698,12 @@ int git_repository_hashfile( goto cleanup; } - error = git_odb__hashfd_filtered(out, fd, (size_t)len, type, &filters); + error = git_odb__hashfd_filtered(out, fd, (size_t)len, type, fl); cleanup: if (fd >= 0) p_close(fd); - git_filters_free(&filters); + git_filter_list_free(fl); git_buf_free(&full_path); return error; diff --git a/tests-clar/object/blob/filter.c b/tests-clar/object/blob/filter.c index 2b3954d9c..33ebedcde 100644 --- a/tests-clar/object/blob/filter.c +++ b/tests-clar/object/blob/filter.c @@ -101,7 +101,7 @@ void test_object_blob_filter__stats(void) void test_object_blob_filter__to_odb(void) { - git_vector filters = GIT_VECTOR_INIT; + git_filter_list *fl = NULL; git_config *cfg; int i; git_blob *blob; @@ -113,21 +113,21 @@ void test_object_blob_filter__to_odb(void) git_attr_cache_flush(g_repo); cl_git_append2file("empty_standard_repo/.gitattributes", "*.txt text\n"); - cl_assert(git_filters_load( - &filters, g_repo, "filename.txt", GIT_FILTER_TO_ODB) > 0); - cl_assert(filters.length == 1); + cl_git_pass( + git_filter_list_load(&fl, g_repo, "filename.txt", GIT_FILTER_TO_ODB)); + cl_assert(fl != NULL); for (i = 0; i < NUM_TEST_OBJECTS; i++) { cl_git_pass(git_blob_lookup(&blob, g_repo, &g_oids[i])); cl_git_pass(git_blob__getbuf(&orig, blob)); - cl_git_pass(git_filters_apply(&out, &orig, &filters)); + cl_git_pass(git_filter_list_apply(&out, &orig, fl)); cl_assert(git_buf_cmp(&out, &g_crlf_filtered[i]) == 0); git_blob_free(blob); } - git_filters_free(&filters); + git_filter_list_free(fl); git_buf_free(&orig); git_buf_free(&out); git_config_free(cfg); From 570ba25cb0f757f993e06df629faced32fdf2f8f Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Fri, 30 Aug 2013 16:02:07 -0700 Subject: [PATCH 03/25] Make git_filter_source opaque --- include/git2/sys/filter.h | 29 +++++++++++++++++++++++------ src/crlf.c | 15 ++++++++++----- src/filter.c | 27 +++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 11 deletions(-) diff --git a/include/git2/sys/filter.h b/include/git2/sys/filter.h index 2264be080..b1193a538 100644 --- a/include/git2/sys/filter.h +++ b/include/git2/sys/filter.h @@ -22,12 +22,29 @@ GIT_BEGIN_DECL * A filter source represents a file/blob to be processed */ typedef struct git_filter_source git_filter_source; -struct git_filter_source { - git_repository *repo; - const char *path; - git_oid oid; /* zero if unknown (which is likely) */ - uint16_t filemode; /* zero if unknown */ -}; + +/** + * Get the repository that the source data is coming from. + */ +GIT_EXTERN(git_repository *) git_filter_source_repo(const git_filter_source *src); + +/** + * Get the path that the source data is coming from. + */ +GIT_EXTERN(const char *) git_filter_source_path(const git_filter_source *src); + +/** + * Get the file mode of the source file + * If the mode is unknown, this will return 0 + */ +GIT_EXTERN(uint16_t) git_filter_source_filemode(const git_filter_source *src); + +/** + * Get the OID of the source + * If the OID is unknown (often the case with GIT_FILTER_CLEAN) then + * this will return NULL. + */ +GIT_EXTERN(const git_oid *) git_filter_source_id(const git_filter_source *src); /** * Callback to actually perform the data filtering diff --git a/src/crlf.c b/src/crlf.c index 2177bff98..cfc2d1eb1 100644 --- a/src/crlf.c +++ b/src/crlf.c @@ -107,8 +107,10 @@ static int crlf_load_attributes( return -1; } -static int has_cr_in_index(git_repository *repo, const char *path) +static int has_cr_in_index(const git_filter_source *src) { + git_repository *repo = git_filter_source_repo(src); + const char *path = git_filter_source_path(src); git_index *index; const git_index_entry *entry; git_blob *blob; @@ -180,7 +182,7 @@ static int crlf_apply_to_odb( * If the file in the index has any CR in it, do not convert. * This is the new safer autocrlf handling. */ - if (has_cr_in_index(src->repo, src->path)) + if (has_cr_in_index(src)) return GIT_ENOTFOUND; } @@ -290,7 +292,9 @@ static int crlf_check( GIT_UNUSED(mode); /* Load gitattributes for the path */ - if ((error = crlf_load_attributes(&ca, src->repo, src->path)) < 0) + error = crlf_load_attributes( + &ca, git_filter_source_repo(src), git_filter_source_path(src)); + if (error < 0) return error; /* @@ -303,8 +307,9 @@ static int crlf_check( return GIT_ENOTFOUND; if (ca.crlf_action == GIT_CRLF_GUESS) { - if ((error = git_repository__cvar( - &ca.auto_crlf, src->repo, GIT_CVAR_AUTO_CRLF)) < 0) + error = git_repository__cvar( + &ca.auto_crlf, git_filter_source_repo(src), GIT_CVAR_AUTO_CRLF); + if (error < 0) return error; if (ca.auto_crlf == GIT_AUTO_CRLF_FALSE) diff --git a/src/filter.c b/src/filter.c index 7935e6518..3d4c6d6ce 100644 --- a/src/filter.c +++ b/src/filter.c @@ -13,6 +13,13 @@ #include "git2/config.h" #include "blob.h" +struct git_filter_source { + git_repository *repo; + const char *path; + git_oid oid; /* zero if unknown (which is likely) */ + uint16_t filemode; /* zero if unknown */ +}; + typedef struct { git_filter *filter; void *payload; @@ -32,6 +39,26 @@ typedef struct { static git_array_t(git_filter_def) filter_registry = GIT_ARRAY_INIT; +git_repository *git_filter_source_repo(const git_filter_source *src) +{ + return src->repo; +} + +const char *git_filter_source_path(const git_filter_source *src) +{ + return src->path; +} + +uint16_t git_filter_source_filemode(const git_filter_source *src) +{ + return src->filemode; +} + +const git_oid *git_filter_source_id(const git_filter_source *src) +{ + return git_oid_iszero(&src->oid) ? NULL : &src->oid; +} + static int filter_load_defaults(void) { if (!git_array_size(filter_registry)) { From 974774c7b00c08585b05ff87174872be005a1f29 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Mon, 9 Sep 2013 16:57:34 -0700 Subject: [PATCH 04/25] Add attributes to filters and fix registry The filter registry as implemented was too primitive to actually work once multiple filters were coming into play. This expands the implementation of the registry to handle multiple prioritized filters correctly. Additionally, this adds an "attributes" field to a filter that makes it really really easy to implement filters that are based on one or more attribute values. The lookup and even simple value checking can all happen automatically without custom filter code. Lastly, with the registry improvements, this fills out the filter lifecycle callbacks, with initialize and shutdown callbacks that will be called before the filter is first used and after it is last invoked. This allows for system-wide initialization and cleanup by the filter. --- include/git2/errors.h | 1 + include/git2/sys/filter.h | 103 ++++++++++++--- src/attr.c | 8 +- src/crlf.c | 55 +++----- src/filter.c | 263 +++++++++++++++++++++++++++++++++++--- tests-clar/attr/repo.c | 16 +++ 6 files changed, 365 insertions(+), 81 deletions(-) diff --git a/include/git2/errors.h b/include/git2/errors.h index dc4486ade..a454ac956 100644 --- a/include/git2/errors.h +++ b/include/git2/errors.h @@ -68,6 +68,7 @@ typedef enum { GITERR_FETCHHEAD, GITERR_MERGE, GITERR_SSH, + GITERR_FILTER, } git_error_t; /** diff --git a/include/git2/sys/filter.h b/include/git2/sys/filter.h index b1193a538..b0a753019 100644 --- a/include/git2/sys/filter.h +++ b/include/git2/sys/filter.h @@ -46,6 +46,37 @@ GIT_EXTERN(uint16_t) git_filter_source_filemode(const git_filter_source *src); */ GIT_EXTERN(const git_oid *) git_filter_source_id(const git_filter_source *src); +/* + * struct git_filter + * + * The filter lifecycle: + * - initialize - first use of filter + * - shutdown - filter removed/unregistered from system + * - check - considering for file + * - apply - applied to file + * - cleanup - done with file + */ + +/** + * Initialize callback on filter + */ +typedef int (*git_filter_init_fn)(git_filter *self); + +/** + * Shutdown callback on filter + */ +typedef void (*git_filter_shutdown_fn)(git_filter *self); + +/** + * Callback to decide if a given source needs this filter + */ +typedef int (*git_filter_check_fn)( + git_filter *self, + void **payload, /* points to NULL ptr on entry, may be set */ + git_filter_mode_t mode, + const git_filter_source *src, + const char **attr_values); + /** * Callback to actually perform the data filtering */ @@ -57,15 +88,6 @@ typedef int (*git_filter_apply_fn)( const git_buffer *from, const git_filter_source *src); -/** - * Callback to decide if a given source needs this filter - */ -typedef int (*git_filter_check_fn)( - git_filter *self, - void **payload, /* points to NULL ptr on entry, may be set */ - git_filter_mode_t mode, - const git_filter_source *src); - /** * Callback to clean up after filtering has been applied */ @@ -83,10 +105,32 @@ typedef void (*git_filter_cleanup_fn)( * * `version` should be set to GIT_FILTER_VERSION * - * `apply` is the callback that actually filters data. + * `attributes` is a list of attributes to check on a file to see if the + * filter applies. The format is a whitespace-delimited list of names + * (like "eol crlf text"). Each name may have an optional value that will + * be tested even without a `check` callback. If the value does not + * match, the filter will be skipped. The values are specified as in a + * .gitattributes file (e.g. "myattr=foobar" or "myattr" or "-myattr"). + * If a check function is supplied, then the values of the attributes will + * be passed to that function. + * + * `initialize` is an optional callback invoked before a filter is first + * used. It will be called once at most. + * + * `shutdown` is an optional callback invoked when the filter is + * unregistered or when libgit2 is shutting down. It will be called once + * at most and should free any memory as needed. * * `check` is an optional callback that checks if filtering is needed for - * a given source. + * a given source. It should return 0 if the filter should be applied + * (i.e. success), GIT_ENOTFOUND if the filter should not be applied, or + * an other error code to fail out of the filter processing pipeline and + * return to the caller. + * + * `apply` is the callback that actually filters data. If it successfully + * writes the output, it should return 0. Like `check`, it can return + * GIT_ENOTFOUND to indicate that the filter doesn't actually want to run. + * Other error codes will stop filter processing and return to the caller. * * `cleanup` is an optional callback that is made after the filter has * been applied. Both the `check` and `apply` callbacks are able to @@ -94,25 +138,46 @@ typedef void (*git_filter_cleanup_fn)( * is given that value and can clean up as needed. */ struct git_filter { - unsigned int version; - git_filter_apply_fn apply; - git_filter_check_fn check; - git_filter_cleanup_fn cleanup; + unsigned int version; + const char *attributes; + git_filter_init_fn initialize; + git_filter_shutdown_fn shutdown; + git_filter_check_fn check; + git_filter_apply_fn apply; + git_filter_cleanup_fn cleanup; }; #define GIT_FILTER_VERSION 1 /** - * Register a filter under a given name + * Register a filter under a given name with a given priority. * - * Two filters will be preregistered with libgit2: GIT_FILTER_CRLF and - * GIT_FILTER_IDENT. + * If non-NULL, the filter's initialize callback will be invoked before + * the first use of the filter, so you can defer expensive operations (in + * case libgit2 is being used in a way that doesn't need the filter). + * + * A filter's attribute checks and `check` and `apply` callbacks will be + * issued in order of `priority` on smudge (to workdir), and in reverse + * order of `priority` on clean (to odb). + * + * One filter will be preregistered with libgit2: + * - GIT_FILTER_CRLF with priority of 0. + * + * Currently the filter registry is not thread safe, so any registering or + * deregistering of filters must be done outside of any possible usage of + * the filters (i.e. during application setup or shutdown). */ GIT_EXTERN(int) git_filter_register( - const char *name, const git_filter *filter); + const char *name, git_filter *filter, int priority); /** * Remove the filter with the given name + * + * It is not allowed to remove the builtin libgit2 filters. + * + * Currently the filter registry is not thread safe, so any registering or + * deregistering of filters must be done outside of any possible usage of + * the filters (i.e. during application setup or shutdown). */ GIT_EXTERN(int) git_filter_unregister(const char *name); diff --git a/src/attr.c b/src/attr.c index 6cdff29f9..7946db4d6 100644 --- a/src/attr.c +++ b/src/attr.c @@ -26,7 +26,6 @@ git_attr_t git_attr_value(const char *attr) return GIT_ATTR_VALUE_T; } - static int collect_attr_files( git_repository *repo, uint32_t flags, @@ -103,8 +102,6 @@ int git_attr_get_many( attr_get_many_info *info = NULL; size_t num_found = 0; - memset((void *)values, 0, sizeof(const char *) * num_attr); - if (git_attr_path__init(&path, pathname, git_repository_workdir(repo)) < 0) return -1; @@ -141,6 +138,11 @@ int git_attr_get_many( } } + for (k = 0; k < num_attr; k++) { + if (!info[k].found) + values[k] = NULL; + } + cleanup: git_vector_free(&files); git_attr_path__free(&path); diff --git a/src/crlf.c b/src/crlf.c index cfc2d1eb1..1242450d8 100644 --- a/src/crlf.c +++ b/src/crlf.c @@ -74,39 +74,6 @@ static int crlf_input_action(struct crlf_attrs *ca) return ca->crlf_action; } -static int crlf_load_attributes( - struct crlf_attrs *ca, git_repository *repo, const char *path) -{ -#define NUM_CONV_ATTRS 3 - - static const char *attr_names[NUM_CONV_ATTRS] = { - "crlf", "eol", "text", - }; - - const char *attr_vals[NUM_CONV_ATTRS]; - int error; - - error = git_attr_get_many(attr_vals, - repo, 0, path, NUM_CONV_ATTRS, attr_names); - - if (error == GIT_ENOTFOUND) { - ca->crlf_action = GIT_CRLF_GUESS; - ca->eol = GIT_EOL_UNSET; - return 0; - } - - if (error == 0) { - ca->crlf_action = check_crlf(attr_vals[2]); /* text */ - if (ca->crlf_action == GIT_CRLF_GUESS) - ca->crlf_action = check_crlf(attr_vals[0]); /* clrf */ - - ca->eol = check_eol(attr_vals[1]); /* eol */ - return 0; - } - - return -1; -} - static int has_cr_in_index(const git_filter_source *src) { git_repository *repo = git_filter_source_repo(src); @@ -283,7 +250,8 @@ static int crlf_check( git_filter *self, void **payload, /* points to NULL ptr on entry, may be set */ git_filter_mode_t mode, - const git_filter_source *src) + const git_filter_source *src, + const char **attr_values) { int error; struct crlf_attrs ca; @@ -291,11 +259,16 @@ static int crlf_check( GIT_UNUSED(self); GIT_UNUSED(mode); - /* Load gitattributes for the path */ - error = crlf_load_attributes( - &ca, git_filter_source_repo(src), git_filter_source_path(src)); - if (error < 0) - return error; + if (!attr_values) { + ca.crlf_action = GIT_CRLF_GUESS; + ca.eol = GIT_EOL_UNSET; + } else { + ca.crlf_action = check_crlf(attr_values[2]); /* text */ + if (ca.crlf_action == GIT_CRLF_GUESS) + ca.crlf_action = check_crlf(attr_values[0]); /* clrf */ + ca.eol = check_eol(attr_values[1]); /* eol */ + } + ca.auto_crlf = GIT_AUTO_CRLF_DEFAULT; /* * Use the core Git logic to see if we should perform CRLF for this file @@ -350,7 +323,11 @@ static void crlf_cleanup( git_filter *git_crlf_filter_new(void) { struct crlf_filter *f = git__calloc(1, sizeof(struct crlf_filter)); + f->f.version = GIT_FILTER_VERSION; + f->f.attributes = "crlf eol text"; + f->f.initialize = NULL; + f->f.shutdown = NULL; f->f.check = crlf_check; f->f.apply = crlf_apply; f->f.cleanup = crlf_cleanup; diff --git a/src/filter.c b/src/filter.c index 3d4c6d6ce..7fbc20a41 100644 --- a/src/filter.c +++ b/src/filter.c @@ -12,6 +12,7 @@ #include "repository.h" #include "git2/config.h" #include "blob.h" +#include "attr_file.h" struct git_filter_source { git_repository *repo; @@ -35,9 +36,187 @@ struct git_filter_list { typedef struct { const char *filter_name; git_filter *filter; + int priority; + size_t nattrs, nmatches; + char *attrdata; + const char *attrs[GIT_FLEX_ARRAY]; } git_filter_def; -static git_array_t(git_filter_def) filter_registry = GIT_ARRAY_INIT; +static int filter_def_priority_cmp(const void *a, const void *b) +{ + int pa = ((const git_filter_def *)a)->priority; + int pb = ((const git_filter_def *)b)->priority; + return (pa < pb) ? -1 : (pa > pb) ? 1 : 0; +} + +static git_vector git__filter_registry = { + 0, filter_def_priority_cmp, NULL, 0, 0 +}; + +static int filter_def_scan_attrs( + git_buf *attrs, size_t *nattr, size_t *nmatch, const char *attr_str) +{ + const char *start, *scan = attr_str; + int has_eq; + + *nattr = *nmatch = 0; + + if (!scan) + return 0; + + while (*scan) { + while (git__isspace(*scan)) scan++; + + for (start = scan, has_eq = 0; *scan && !git__isspace(*scan); ++scan) { + if (*scan == '=') + has_eq = 1; + } + + if (scan > start) { + (*nattr)++; + if (has_eq || *scan == '-' || *scan == '+' || *scan == '!') + (*nmatch)++; + + if (has_eq) + git_buf_putc(attrs, '='); + git_buf_put(attrs, start, scan - start); + git_buf_putc(attrs, '\0'); + } + } + + return 0; +} + +static void filter_def_set_attrs(git_filter_def *fdef) +{ + char *scan = fdef->attrdata; + size_t i; + + for (i = 0; i < fdef->nattrs; ++i) { + const char *name, *value; + + switch (*scan) { + case '=': + name = scan + 1; + for (scan++; *scan != '='; scan++) /* find '=' */; + *scan++ = '\0'; + value = scan; + break; + case '-': + name = scan + 1; value = git_attr__false; break; + case '+': + name = scan + 1; value = git_attr__true; break; + case '!': + name = scan + 1; value = git_attr__unset; break; + default: + name = scan; value = NULL; break; + } + + fdef->attrs[i] = name; + fdef->attrs[i + fdef->nattrs] = value; + + scan += strlen(scan) + 1; + } +} + +int git_filter_register( + const char *name, git_filter *filter, int priority) +{ + git_filter_def *fdef; + size_t nattr = 0, nmatch = 0; + git_buf attrs = GIT_BUF_INIT; + + if (git_filter_lookup(name) != NULL) { + giterr_set( + GITERR_FILTER, "Attempt to reregister existing filter '%s'", name); + return -1; + } + + if (filter_def_scan_attrs(&attrs, &nattr, &nmatch, filter->attributes) < 0) + return -1; + + fdef = git__calloc( + sizeof(git_filter_def) + 2 * nattr * sizeof(char *), 1); + GITERR_CHECK_ALLOC(fdef); + + fdef->filter_name = name; + fdef->filter = filter; + fdef->priority = priority; + fdef->nattrs = nattr; + fdef->nmatches = nmatch; + fdef->attrdata = git_buf_detach(&attrs); + + filter_def_set_attrs(fdef); + + if (git_vector_insert(&git__filter_registry, fdef) < 0) { + git__free(fdef->attrdata); + git__free(fdef); + return -1; + } + + git_vector_sort(&git__filter_registry); + return 0; +} + +static int filter_def_name_key_check(const void *key, const void *fdef) +{ + const char *name = + fdef ? ((const git_filter_def *)fdef)->filter_name : NULL; + return name ? -1 : git__strcmp(key, name); +} + +static git_filter_def *filter_find_by_name(size_t *pos, const char *name) +{ + git_filter_def *fdef = NULL; + + if (!git_vector_search2( + pos, &git__filter_registry, filter_def_name_key_check, name)) + fdef = git_vector_get(&git__filter_registry, *pos); + + return fdef; +} + +int git_filter_unregister(const char *name) +{ + size_t pos; + git_filter_def *fdef; + + /* cannot unregister default filters */ + if (!strcmp(GIT_FILTER_CRLF, name)) { + giterr_set(GITERR_FILTER, "Cannot unregister filter '%s'", name); + return -1; + } + + if ((fdef = filter_find_by_name(&pos, name)) == NULL) { + giterr_set(GITERR_FILTER, "Cannot find filter '%s' to unregister", name); + return GIT_ENOTFOUND; + } + + (void)git_vector_remove(&git__filter_registry, pos); + + if (fdef->filter->shutdown) + fdef->filter->shutdown(fdef->filter); + + git__free(fdef->attrdata); + git__free(fdef); + + return 0; +} + +git_filter *git_filter_lookup(const char *name) +{ + size_t pos; + git_filter_def *fdef = filter_find_by_name(&pos, name); + return fdef ? fdef->filter : NULL; +} + +static int filter_load_defaults(void) +{ + if (!git_vector_length(&git__filter_registry)) + return git_filter_register(GIT_FILTER_CRLF, git_crlf_filter_new(), 0); + + return 0; +} git_repository *git_filter_source_repo(const git_filter_source *src) { @@ -59,20 +238,6 @@ const git_oid *git_filter_source_id(const git_filter_source *src) return git_oid_iszero(&src->oid) ? NULL : &src->oid; } -static int filter_load_defaults(void) -{ - if (!git_array_size(filter_registry)) { - git_filter_def *fdef = git_array_alloc(filter_registry); - GITERR_CHECK_ALLOC(fdef); - - fdef->filter_name = GIT_FILTER_CRLF; - fdef->filter = git_crlf_filter_new(); - GITERR_CHECK_ALLOC(fdef->filter); - } - - return 0; -} - static int git_filter_list_new( git_filter_list **out, git_filter_mode_t mode, const git_filter_source *src) { @@ -92,6 +257,47 @@ static int git_filter_list_new( return 0; } +static int filter_list_check_attributes( + const char ***out, git_filter_def *fdef, const git_filter_source *src) +{ + int error; + size_t i; + const char **strs = git__calloc(fdef->nattrs, sizeof(const char *)); + GITERR_CHECK_ALLOC(strs); + + error = git_attr_get_many( + strs, src->repo, 0, src->path, fdef->nattrs, fdef->attrs); + + /* if no values were found but no matches are needed, it's okay! */ + if (error == GIT_ENOTFOUND && !fdef->nmatches) { + giterr_clear(); + git__free(strs); + return 0; + } + + for (i = 0; !error && i < fdef->nattrs; ++i) { + const char *want = fdef->attrs[fdef->nattrs + i]; + git_attr_t want_type, found_type; + + if (!want) + continue; + + want_type = git_attr_value(want); + found_type = git_attr_value(strs[i]); + + if (want_type != found_type || + (want_type == GIT_ATTR_VALUE_T && strcmp(want, strs[i]))) + error = GIT_ENOTFOUND; + } + + if (error) + git__free(strs); + else + *out = strs; + + return error; +} + int git_filter_list_load( git_filter_list **filters, git_repository *repo, @@ -102,7 +308,8 @@ int git_filter_list_load( git_filter_list *fl = NULL; git_filter_source src = { 0 }; git_filter_entry *fe; - uint32_t f; + size_t idx; + git_filter_def *fdef; if (filter_load_defaults() < 0) return -1; @@ -110,15 +317,27 @@ int git_filter_list_load( src.repo = repo; src.path = path; - for (f = 0; f < git_array_size(filter_registry); ++f) { + git_vector_foreach(&git__filter_registry, idx, fdef) { + const char **values = NULL; void *payload = NULL; - git_filter_def *fdef = git_array_get(filter_registry, f); if (!fdef || !fdef->filter) continue; + if (fdef->nattrs > 0) { + error = filter_list_check_attributes(&values, fdef, &src); + if (error == GIT_ENOTFOUND) { + error = 0; + continue; + } else if (error < 0) + break; + } + if (fdef->filter->check) - error = fdef->filter->check(fdef->filter, &payload, mode, &src); + error = fdef->filter->check( + fdef->filter, &payload, mode, &src, values); + + git__free(values); if (error == GIT_ENOTFOUND) error = 0; @@ -171,6 +390,7 @@ int git_filter_list_apply( uint32_t i; unsigned int src; git_buf *dbuffer[2]; + git_filter_entry *fe; if (!fl) { git_buf_swap(dest, source); @@ -188,11 +408,14 @@ int git_filter_list_apply( return -1; for (i = 0; i < git_array_size(fl->filters); ++i) { - git_filter_entry *fe = git_array_get(fl->filters, i); unsigned int dst = 1 - src; git_buf_clear(dbuffer[dst]); + fe = git_array_get( + fl->filters, (fl->mode == GIT_FILTER_TO_ODB) ? + i : git_array_size(fl->filters) - 1 - i); + /* Apply the filter from dbuffer[src] to the other buffer; * if the filtering is canceled by the user mid-filter, * we skip to the next filter without changing the source diff --git a/tests-clar/attr/repo.c b/tests-clar/attr/repo.c index ca3e71e7f..ef2ad5ce9 100644 --- a/tests-clar/attr/repo.c +++ b/tests-clar/attr/repo.c @@ -100,6 +100,22 @@ void test_attr_repo__get_many(void) cl_assert_equal_s("yes", values[3]); } +void test_attr_repo__get_many_in_place(void) +{ + const char *vals[4] = { "repoattr", "rootattr", "missingattr", "subattr" }; + + /* it should be legal to look up values into the same array that has + * the attribute names, overwriting each name as the value is found. + */ + + cl_git_pass(git_attr_get_many(vals, g_repo, 0, "sub/subdir_test1", 4, vals)); + + cl_assert(GIT_ATTR_TRUE(vals[0])); + cl_assert(GIT_ATTR_TRUE(vals[1])); + cl_assert(GIT_ATTR_UNSPECIFIED(vals[2])); + cl_assert_equal_s("yes", vals[3]); +} + static int count_attrs( const char *name, const char *value, From 2a7d224f99a053d93079644947d04e7cc085930f Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Tue, 10 Sep 2013 16:33:32 -0700 Subject: [PATCH 05/25] Extend public filter api with filter lists This moves the git_filter_list into the public API so that users can create, apply, and dispose of filter lists. This allows more granular application of filters to user data outside of libgit2 internals. This also converts all the internal usage of filters to the public APIs along with a few small tweaks to make it easier to use the public git_buffer stuff alongside the internal git_buf. --- include/git2/buffer.h | 27 ++++-- include/git2/filter.h | 94 +++++++++++++++++++- include/git2/sys/filter.h | 20 ++--- src/blob.c | 57 +++--------- src/buffer.c | 23 ++++- src/buffer.h | 5 ++ src/checkout.c | 38 +++----- src/crlf.c | 7 +- src/diff_file.c | 23 ++--- src/filter.c | 149 +++++++++++++++++++++----------- src/filter.h | 60 ------------- src/odb.c | 16 ++-- tests-clar/object/blob/filter.c | 14 +-- 13 files changed, 298 insertions(+), 235 deletions(-) diff --git a/include/git2/buffer.h b/include/git2/buffer.h index 454a1faa5..cb80e48f7 100644 --- a/include/git2/buffer.h +++ b/include/git2/buffer.h @@ -21,17 +21,17 @@ GIT_BEGIN_DECL /** * A data buffer for exporting data from libgit2 * - * There are a number of places where libgit2 wants to return an allocated - * data buffer to the caller and have the caller take ownership of that - * allocated memory. This can be awkward if the caller does not have easy - * access to the same allocation functions that libgit2 is using. In those - * cases, libgit2 will instead fill in a `git_buffer` and the caller can - * use `git_buffer_free()` to release it when they are done. + * Sometimes libgit2 wants to return an allocated data buffer to the + * caller and have the caller take responsibility for freeing that memory. + * This can be awkward if the caller does not have easy access to the same + * allocation functions that libgit2 is using. In those cases, libgit2 + * will instead fill in a `git_buffer` and the caller can use + * `git_buffer_free()` to release it when they are done. * * * `ptr` refers to the start of the allocated memory. * * `size` contains the size of the data in `ptr` that is actually used. - * * `available` refers to the known total amount of allocated memory in - * cases where it is larger than the `size` actually in use. + * * `available` refers to the known total amount of allocated memory. It + * may be larger than the `size` actually in use. * * In a few cases, for uniformity and simplicity, an API may populate a * `git_buffer` with data that should *not* be freed (i.e. the lifetime of @@ -79,6 +79,17 @@ GIT_EXTERN(void) git_buffer_free(git_buffer *buffer); */ GIT_EXTERN(int) git_buffer_resize(git_buffer *buffer, size_t want_size); +/** + * Set buffer to a copy of some raw data. + * + * @param buffer The buffer to set + * @param data The data to copy into the buffer + * @param datalen The length of the data to copy into the buffer + * @return 0 on success, negative error code on allocation failure + */ +GIT_EXTERN(int) git_buffer_copy( + git_buffer *buffer, const void *data, size_t datalen); + GIT_END_DECL /** @} */ diff --git a/include/git2/filter.h b/include/git2/filter.h index 478f3a6ad..cb23ae4f4 100644 --- a/include/git2/filter.h +++ b/include/git2/filter.h @@ -29,10 +29,10 @@ GIT_BEGIN_DECL * change is being applied. */ typedef enum { - GIT_FILTER_SMUDGE = 0, - GIT_FILTER_TO_WORKTREE = GIT_FILTER_SMUDGE, - GIT_FILTER_CLEAN = 1, - GIT_FILTER_TO_ODB = GIT_FILTER_CLEAN, + GIT_FILTER_TO_WORKTREE = 0, + GIT_FILTER_SMUDGE = GIT_FILTER_TO_WORKTREE, + GIT_FILTER_TO_ODB = 1, + GIT_FILTER_CLEAN = GIT_FILTER_TO_ODB, } git_filter_mode_t; /** @@ -50,10 +50,28 @@ typedef enum { */ typedef struct git_filter git_filter; +/** + * List of filters to be applied + * + * This represents a list of filters to be applied to a file / blob. You + * can build the list with one call, apply it with another, and dispose it + * with a third. In typical usage, there are not many occasions where a + * git_filter_list is needed directly since the library will generally + * handle conversions for you, but it can be convenient to be able to + * build and apply the list sometimes. + */ +typedef struct git_filter_list git_filter_list; + +/** + * Look up a filter by name + */ GIT_EXTERN(git_filter *) git_filter_lookup(const char *name); #define GIT_FILTER_CRLF "crlf" +/** + * Apply a single filter to a buffer of data + */ GIT_EXTERN(int) git_filter_apply_to_buffer( git_buffer *out, git_filter *filter, @@ -61,6 +79,74 @@ GIT_EXTERN(int) git_filter_apply_to_buffer( const char *as_path, git_filter_mode_t mode); +/** + * Load the filter list for a given path. + * + * This will return 0 (success) but set the output git_filter_list to NULL + * if no filters are requested for the given file. + * + * @param filters Output newly created git_filter_list (or NULL) + * @param repo Repository object that contains `path` + * @param path Relative path of the file to be filtered + * @param mode Filtering direction (WT->ODB or ODB->WT) + * @return 0 on success (which could still return NULL if no filters are + * needed for the requested file), <0 on error + */ +GIT_EXTERN(int) git_filter_list_load( + git_filter_list **filters, + git_repository *repo, + const char *path, + git_filter_mode_t mode); + +/** + * Apply filter list to a data buffer. + * + * See `git2/buffer.h` for background on `git_buffer` objects. + * + * If the `in` buffer refers to data managed by libgit2 + * (i.e. `in->available` is not zero), then it will be overwritten when + * applying the filters. If not, then it will be left untouched. + * + * If there are no filters to apply (or `filters` is NULL), then the `out` + * buffer will reference the `in` buffer data (with `available` set to + * zero) instead of allocating data. This keeps allocations to a minimum, + * but it means you have to be careful about freeing the `in` data. + * + * @param out Buffer to store the result of the filtering + * @param filters A loaded git_filter_list (or NULL) + * @param in Buffer containing the data to filter + * @return 0 on success, an error code otherwise + */ +GIT_EXTERN(int) git_filter_list_apply_to_data( + git_buffer *out, + git_filter_list *filters, + git_buffer *in); + +/** + * Apply filter list to the contents of a file on disk + */ +GIT_EXTERN(int) git_filter_list_apply_to_file( + git_buffer *out, + git_filter_list *filters, + git_repository *repo, + const char *path); + +/** + * Apply filter list to the contents of a blob + */ +GIT_EXTERN(int) git_filter_list_apply_to_blob( + git_buffer *out, + git_filter_list *filters, + git_blob *blob); + +/** + * Free a git_filter_list + * + * @param filters A git_filter_list created by `git_filter_list_load` + */ +GIT_EXTERN(void) git_filter_list_free(git_filter_list *filters); + + GIT_END_DECL /** @} */ diff --git a/include/git2/sys/filter.h b/include/git2/sys/filter.h index b0a753019..ca1fbfcce 100644 --- a/include/git2/sys/filter.h +++ b/include/git2/sys/filter.h @@ -46,6 +46,11 @@ GIT_EXTERN(uint16_t) git_filter_source_filemode(const git_filter_source *src); */ GIT_EXTERN(const git_oid *) git_filter_source_id(const git_filter_source *src); +/** + * Get the git_filter_mode_t to be applied + */ +GIT_EXTERN(git_filter_mode_t) git_filter_source_mode(const git_filter_source *src); + /* * struct git_filter * @@ -73,7 +78,6 @@ typedef void (*git_filter_shutdown_fn)(git_filter *self); typedef int (*git_filter_check_fn)( git_filter *self, void **payload, /* points to NULL ptr on entry, may be set */ - git_filter_mode_t mode, const git_filter_source *src, const char **attr_values); @@ -83,7 +87,6 @@ typedef int (*git_filter_check_fn)( typedef int (*git_filter_apply_fn)( git_filter *self, void **payload, /* may be read and/or set */ - git_filter_mode_t mode, git_buffer *to, const git_buffer *from, const git_filter_source *src); @@ -105,14 +108,11 @@ typedef void (*git_filter_cleanup_fn)( * * `version` should be set to GIT_FILTER_VERSION * - * `attributes` is a list of attributes to check on a file to see if the - * filter applies. The format is a whitespace-delimited list of names - * (like "eol crlf text"). Each name may have an optional value that will - * be tested even without a `check` callback. If the value does not - * match, the filter will be skipped. The values are specified as in a - * .gitattributes file (e.g. "myattr=foobar" or "myattr" or "-myattr"). - * If a check function is supplied, then the values of the attributes will - * be passed to that function. + * `attributes` is a whitespace-separated list of attribute names to check + * for this filter (e.g. "eol crlf text"). If the attribute name is bare, + * it will be simply loaded and passed to the `check` callback. If it has + * a value (i.e. "name=value"), the attribute must match that value for + * the filter to be applied. * * `initialize` is an optional callback invoked before a filter is first * used. It will be called once at most. diff --git a/src/blob.c b/src/blob.c index 3581ee9d1..e6bba033a 100644 --- a/src/blob.c +++ b/src/blob.c @@ -111,26 +111,18 @@ static int write_file_filtered( git_filter_list *fl) { int error; - git_buf source = GIT_BUF_INIT; - git_buf dest = GIT_BUF_INIT; + git_buffer tgt = GIT_BUFFER_INIT; - if ((error = git_futils_readbuffer(&source, full_path)) < 0) - return error; - - error = git_filter_list_apply(&dest, &source, fl); - - /* Free the source as soon as possible. This can be big in memory, - * and we don't want to ODB write to choke */ - git_buf_free(&source); + error = git_filter_list_apply_to_file(&tgt, fl, NULL, full_path); /* Write the file to disk if it was properly filtered */ if (!error) { - *size = dest.size; + *size = tgt.size; - error = git_odb_write(oid, odb, dest.ptr, dest.size, GIT_OBJ_BLOB); + error = git_odb_write(oid, odb, tgt.ptr, tgt.size, GIT_OBJ_BLOB); } - git_buf_free(&dest); + git_buffer_free(&tgt); return error; } @@ -329,8 +321,9 @@ int git_blob_is_binary(git_blob *blob) assert(blob); - content.ptr = blob->odb_object->buffer; - content.size = min(blob->odb_object->cached.size, 4000); + content.ptr = blob->odb_object->buffer; + content.size = min(blob->odb_object->cached.size, 4000); + content.asize = 0; return git_buf_text_is_binary(&content); } @@ -342,46 +335,20 @@ int git_blob_filtered_content( int check_for_binary_data) { int error = 0; - git_buf filtered = GIT_BUF_INIT, unfiltered = GIT_BUF_INIT; git_filter_list *fl = NULL; assert(blob && as_path && out); - /* Create a fake git_buf from the blob raw data... */ - filtered.ptr = (void *)git_blob_rawcontent(blob); - filtered.size = (size_t)git_blob_rawsize(blob); - filtered.asize = 0; - - if (check_for_binary_data && git_buf_text_is_binary(&filtered)) + if (check_for_binary_data && git_blob_is_binary(blob)) return 0; - error = git_filter_list_load( - &fl, git_blob_owner(blob), as_path, GIT_FILTER_TO_WORKTREE); - if (error < 0) - return error; + if (!(error = git_filter_list_load( + &fl, git_blob_owner(blob), as_path, GIT_FILTER_TO_WORKTREE))) { - if (fl != NULL) { - if (out->ptr && out->available) { - filtered.ptr = out->ptr; - filtered.size = out->size; - filtered.asize = out->available; - } else { - git_buf_init(&filtered, filtered.size + 1); - } - - if (!(error = git_blob__getbuf(&unfiltered, blob))) - error = git_filter_list_apply(&filtered, &unfiltered, fl); + error = git_filter_list_apply_to_blob(out, fl, blob); git_filter_list_free(fl); - git_buf_free(&unfiltered); - } - - if (!error) { - out->ptr = filtered.ptr; - out->size = filtered.size; - out->available = filtered.asize; } return error; } - diff --git a/src/buffer.c b/src/buffer.c index a92133674..aaebac776 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -501,7 +501,8 @@ void git_buffer_free(git_buffer *buffer) git__memzero(buffer, sizeof(*buffer)); } -int git_buffer_resize(git_buffer *buffer, size_t want_size) +static int git_buffer__resize( + git_buffer *buffer, size_t want_size, int preserve_data) { int non_allocated_buffer = 0; char *new_ptr; @@ -514,7 +515,7 @@ int git_buffer_resize(git_buffer *buffer, size_t want_size) if (non_allocated_buffer && !want_size) want_size = buffer->size; - if (buffer->available <= want_size) + if (buffer->available >= want_size) return 0; if (non_allocated_buffer) { @@ -530,7 +531,7 @@ int git_buffer_resize(git_buffer *buffer, size_t want_size) new_ptr = git__realloc(new_ptr, want_size); GITERR_CHECK_ALLOC(new_ptr); - if (non_allocated_buffer) + if (non_allocated_buffer && preserve_data) memcpy(new_ptr, buffer->ptr, buffer->size); buffer->ptr = new_ptr; @@ -538,3 +539,19 @@ int git_buffer_resize(git_buffer *buffer, size_t want_size) return 0; } + +int git_buffer_resize(git_buffer *buffer, size_t want_size) +{ + return git_buffer__resize(buffer, want_size, true); +} + +int git_buffer_copy( + git_buffer *buffer, const void *data, size_t datalen) +{ + if (git_buffer__resize(buffer, datalen, false) < 0) + return -1; + memcpy(buffer->ptr, data, datalen); + buffer->size = datalen; + return 0; +} + diff --git a/src/buffer.h b/src/buffer.h index b1cb5d06a..e07f29131 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -176,6 +176,11 @@ int git_buf_splice( size_t nb_to_insert); +GIT_INLINE(bool) git_buffer_is_allocated(const git_buffer *buffer) +{ + return (buffer->ptr != NULL && buffer->available > 0); +} + #define GIT_BUF_FROM_BUFFER(buffer) \ { (buffer)->ptr, (buffer)->available, (buffer)->size } diff --git a/src/checkout.c b/src/checkout.c index 5ce4a19c5..1def58b0a 100644 --- a/src/checkout.c +++ b/src/checkout.c @@ -678,19 +678,20 @@ fail: static int buffer_to_file( struct stat *st, - git_buf *buffer, + git_buffer *buffer, const char *path, mode_t dir_mode, int file_open_flags, mode_t file_mode) { int error; + git_buf buf = GIT_BUF_FROM_BUFFER(buffer); if ((error = git_futils_mkpath2file(path, dir_mode)) < 0) return error; if ((error = git_futils_writebuffer( - buffer, path, file_open_flags, file_mode)) < 0) + &buf, path, file_open_flags, file_mode)) < 0) return error; if (st != NULL && (error = p_stat(path, st)) < 0) @@ -712,39 +713,26 @@ static int blob_content_to_file( { int error = 0; mode_t file_mode = opts->file_mode ? opts->file_mode : entry_filemode; - git_buf unfiltered = GIT_BUF_INIT, filtered = GIT_BUF_INIT; + git_buffer out = GIT_BUFFER_INIT; git_filter_list *fl = NULL; - /* Create a fake git_buf from the blob raw data... */ - filtered.ptr = (void *)git_blob_rawcontent(blob); - filtered.size = (size_t)git_blob_rawsize(blob); - - if (!opts->disable_filters && !git_buf_text_is_binary(&filtered)) { + if (!opts->disable_filters && !git_blob_is_binary(blob)) error = git_filter_list_load( &fl, git_blob_owner(blob), path, GIT_FILTER_TO_WORKTREE); - } - if (fl != NULL) { - /* reset 'filtered' so it can be a filter target */ - git_buf_init(&filtered, 0); + if (!error) + error = git_filter_list_apply_to_blob(&out, fl, blob); - if (!(error = git_blob__getbuf(&unfiltered, blob))) { - error = git_filter_list_apply(&filtered, &unfiltered, fl); + git_filter_list_free(fl); - git_buf_free(&unfiltered); - } + if (!error) { + error = buffer_to_file( + st, &out, path, opts->dir_mode, opts->file_open_flags, file_mode); - git_filter_list_free(fl); - } - - if (!error && - !(error = buffer_to_file( - st, &filtered, path, opts->dir_mode, - opts->file_open_flags, file_mode))) st->st_mode = entry_filemode; - if (filtered.asize != 0) - git_buf_free(&filtered); + git_buffer_free(&out); + } return error; } diff --git a/src/crlf.c b/src/crlf.c index 1242450d8..cc256fc70 100644 --- a/src/crlf.c +++ b/src/crlf.c @@ -8,6 +8,7 @@ #include "git2/attr.h" #include "git2/blob.h" #include "git2/index.h" +#include "git2/sys/filter.h" #include "common.h" #include "fileops.h" @@ -249,7 +250,6 @@ static int crlf_apply_to_workdir( static int crlf_check( git_filter *self, void **payload, /* points to NULL ptr on entry, may be set */ - git_filter_mode_t mode, const git_filter_source *src, const char **attr_values) { @@ -257,7 +257,6 @@ static int crlf_check( struct crlf_attrs ca; GIT_UNUSED(self); - GIT_UNUSED(mode); if (!attr_values) { ca.crlf_action = GIT_CRLF_GUESS; @@ -299,14 +298,13 @@ static int crlf_check( static int crlf_apply( git_filter *self, void **payload, /* may be read and/or set */ - git_filter_mode_t mode, git_buffer *to, const git_buffer *from, const git_filter_source *src) { GIT_UNUSED(self); - if (mode == GIT_FILTER_SMUDGE) + if (git_filter_source_mode(src) == GIT_FILTER_SMUDGE) return crlf_apply_to_workdir(*payload, to, from); else return crlf_apply_to_odb(*payload, to, from, src); @@ -331,5 +329,6 @@ git_filter *git_crlf_filter_new(void) f->f.check = crlf_check; f->f.apply = crlf_apply; f->f.cleanup = crlf_cleanup; + return (git_filter *)f; } diff --git a/src/diff_file.c b/src/diff_file.c index 7602591cf..e0e244b65 100644 --- a/src/diff_file.c +++ b/src/diff_file.c @@ -297,8 +297,8 @@ static int diff_file_content_load_workdir_file( { int error = 0; git_filter_list *fl = NULL; - git_buf raw = GIT_BUF_INIT, filtered = GIT_BUF_INIT; git_file fd = git_futils_open_ro(git_buf_cstr(path)); + git_buf raw = GIT_BUF_INIT; if (fd < 0) return fd; @@ -326,16 +326,19 @@ static int diff_file_content_load_workdir_file( giterr_clear(); } - if (!(error = git_futils_readbuffer_fd(&raw, fd, (size_t)fc->file->size)) && - !(error = git_filter_list_apply(&filtered, &raw, fl))) - { - fc->map.len = git_buf_len(&filtered); - fc->map.data = git_buf_detach(&filtered); - fc->flags |= GIT_DIFF_FLAG__FREE_DATA; - } + if (!(error = git_futils_readbuffer_fd(&raw, fd, (size_t)fc->file->size))) { + git_buffer in = GIT_BUFFER_FROM_BUF(&raw), out = GIT_BUFFER_INIT; - git_buf_free(&raw); - git_buf_free(&filtered); + error = git_filter_list_apply_to_data(&out, fl, &in); + + git_buffer_free(&in); + + if (!error) { + fc->map.len = out.size; + fc->map.data = out.ptr; + fc->flags |= GIT_DIFF_FLAG__FREE_DATA; + } + } cleanup: git_filter_list_free(fl); diff --git a/src/filter.c b/src/filter.c index 7fbc20a41..de0d490aa 100644 --- a/src/filter.c +++ b/src/filter.c @@ -10,15 +10,18 @@ #include "hash.h" #include "filter.h" #include "repository.h" +#include "git2/sys/filter.h" #include "git2/config.h" #include "blob.h" #include "attr_file.h" +#include "array.h" struct git_filter_source { git_repository *repo; const char *path; git_oid oid; /* zero if unknown (which is likely) */ uint16_t filemode; /* zero if unknown */ + git_filter_mode_t mode; }; typedef struct { @@ -28,7 +31,6 @@ typedef struct { struct git_filter_list { git_array_t(git_filter_entry) filters; - git_filter_mode_t mode; git_filter_source source; char path[GIT_FLEX_ARRAY]; }; @@ -238,8 +240,13 @@ const git_oid *git_filter_source_id(const git_filter_source *src) return git_oid_iszero(&src->oid) ? NULL : &src->oid; } +git_filter_mode_t git_filter_source_mode(const git_filter_source *src) +{ + return src->mode; +} + static int git_filter_list_new( - git_filter_list **out, git_filter_mode_t mode, const git_filter_source *src) + git_filter_list **out, const git_filter_source *src) { git_filter_list *fl = NULL; size_t pathlen = src->path ? strlen(src->path) : 0; @@ -247,11 +254,11 @@ static int git_filter_list_new( fl = git__calloc(1, sizeof(git_filter_list) + pathlen + 1); GITERR_CHECK_ALLOC(fl); - fl->mode = mode; if (src->path) memcpy(fl->path, src->path, pathlen); fl->source.repo = src->repo; fl->source.path = fl->path; + fl->source.mode = src->mode; *out = fl; return 0; @@ -316,6 +323,7 @@ int git_filter_list_load( src.repo = repo; src.path = path; + src.mode = mode; git_vector_foreach(&git__filter_registry, idx, fdef) { const char **values = NULL; @@ -335,7 +343,7 @@ int git_filter_list_load( if (fdef->filter->check) error = fdef->filter->check( - fdef->filter, &payload, mode, &src, values); + fdef->filter, &payload, &src, values); git__free(values); @@ -344,7 +352,7 @@ int git_filter_list_load( else if (error < 0) break; else { - if (!fl && (error = git_filter_list_new(&fl, mode, &src)) < 0) + if (!fl && (error = git_filter_list_new(&fl, &src)) < 0) return error; fe = git_array_alloc(fl->filters); @@ -381,40 +389,46 @@ void git_filter_list_free(git_filter_list *fl) git__free(fl); } -int git_filter_list_apply( - git_buf *dest, - git_buf *source, - git_filter_list *fl) +static int filter_list_out_buffer_from_raw( + git_buffer *out, const void *ptr, size_t size) +{ + if (git_buffer_is_allocated(out)) + git_buffer_free(out); + + out->ptr = (char *)ptr; + out->size = size; + out->available = 0; + return 0; +} + +int git_filter_list_apply_to_data( + git_buffer *tgt, git_filter_list *fl, git_buffer *src) { int error = 0; uint32_t i; - unsigned int src; - git_buf *dbuffer[2]; - git_filter_entry *fe; + git_buffer *dbuffer[2], local = GIT_BUFFER_INIT; + unsigned int si = 0; - if (!fl) { - git_buf_swap(dest, source); - return 0; + if (!fl) + return filter_list_out_buffer_from_raw(tgt, src->ptr, src->size); + + dbuffer[0] = src; + dbuffer[1] = tgt; + + /* if `src` buffer is reallocable, then use it, otherwise copy it */ + if (!git_buffer_is_allocated(src)) { + if (git_buffer_copy(&local, src->ptr, src->size) < 0) + return -1; + dbuffer[0] = &local; } - dbuffer[0] = source; - dbuffer[1] = dest; - - src = 0; - - /* Pre-grow the destination buffer to more or less the size - * we expect it to have */ - if (git_buf_grow(dest, git_buf_len(source)) < 0) - return -1; - for (i = 0; i < git_array_size(fl->filters); ++i) { - unsigned int dst = 1 - src; + unsigned int di = 1 - si; + uint32_t fidx = (fl->source.mode == GIT_FILTER_TO_ODB) ? + i : git_array_size(fl->filters) - 1 - i; + git_filter_entry *fe = git_array_get(fl->filters, fidx); - git_buf_clear(dbuffer[dst]); - - fe = git_array_get( - fl->filters, (fl->mode == GIT_FILTER_TO_ODB) ? - i : git_array_size(fl->filters) - 1 - i); + dbuffer[di]->size = 0; /* Apply the filter from dbuffer[src] to the other buffer; * if the filtering is canceled by the user mid-filter, @@ -422,33 +436,64 @@ int git_filter_list_apply( * of the double buffering (so that the text goes through * cleanly). */ - { - git_buffer srcb = GIT_BUFFER_FROM_BUF(dbuffer[src]); - git_buffer dstb = GIT_BUFFER_FROM_BUF(dbuffer[dst]); - error = fe->filter->apply( - fe->filter, &fe->payload, fl->mode, &dstb, &srcb, &fl->source); + error = fe->filter->apply( + fe->filter, &fe->payload, dbuffer[di], dbuffer[si], &fl->source); - if (error == GIT_ENOTFOUND) - error = 0; - else if (error < 0) { - git_buf_clear(dest); - return error; - } - else { - git_buf_from_buffer(dbuffer[src], &srcb); - git_buf_from_buffer(dbuffer[dst], &dstb); - src = dst; - } + if (error == GIT_ENOTFOUND) + error = 0; + else if (!error) + si = di; /* swap buffers */ + else { + tgt->size = 0; + return error; } - - if (git_buf_oom(dbuffer[dst])) - return -1; } /* Ensure that the output ends up in dbuffer[1] (i.e. the dest) */ - if (src != 1) - git_buf_swap(dest, source); + if (si != 1) { + git_buffer sw = *dbuffer[1]; + *dbuffer[1] = *dbuffer[0]; + *dbuffer[0] = sw; + } + + git_buffer_free(&local); /* don't leak if we allocated locally */ return 0; } + +int git_filter_list_apply_to_file( + git_buffer *out, + git_filter_list *filters, + git_repository *repo, + const char *path) +{ + int error; + const char *base = repo ? git_repository_workdir(repo) : NULL; + git_buf abspath = GIT_BUF_INIT, raw = GIT_BUF_INIT; + + if (!(error = git_path_join_unrooted(&abspath, path, base, NULL)) && + !(error = git_futils_readbuffer(&raw, abspath.ptr))) + { + git_buffer in = GIT_BUFFER_FROM_BUF(&raw); + + error = git_filter_list_apply_to_data(out, filters, &in); + + git_buffer_free(&in); + } + + git_buf_free(&abspath); + return error; +} + +int git_filter_list_apply_to_blob( + git_buffer *out, + git_filter_list *filters, + git_blob *blob) +{ + git_buffer in = { + (char *)git_blob_rawcontent(blob), git_blob_rawsize(blob), 0 + }; + + return git_filter_list_apply_to_data(out, filters, &in); +} diff --git a/src/filter.h b/src/filter.h index a4ee2172d..1bde1e306 100644 --- a/src/filter.h +++ b/src/filter.h @@ -8,12 +8,7 @@ #define INCLUDE_filter_h__ #include "common.h" -#include "buffer.h" -#include "array.h" -#include "git2/odb.h" -#include "git2/repository.h" #include "git2/filter.h" -#include "git2/sys/filter.h" typedef enum { GIT_CRLF_GUESS = -1, @@ -24,61 +19,6 @@ typedef enum { GIT_CRLF_AUTO, } git_crlf_t; -typedef struct git_filter_list git_filter_list; - -/* - * FILTER API - */ - -/* - * For any given path in the working directory, create a `git_filter_list` - * with the relevant filters that need to be applied. - * - * This will return 0 (success) but set the output git_filter_list to NULL - * if no filters are requested for the given file. - * - * @param filters Output newly created git_filter_list (or NULL) - * @param repo Repository object that contains `path` - * @param path Relative path of the file to be filtered - * @param mode Filtering direction (WT->ODB or ODB->WT) - * @return 0 on success (which could still return NULL if no filters are - * needed for the requested file), <0 on error - */ -extern int git_filter_list_load( - git_filter_list **filters, - git_repository *repo, - const char *path, - git_filter_mode_t mode); - -/* - * Apply one or more filters to a data buffer. - * - * The source data must have been loaded as a `git_buf` object. Both the - * `source` and `dest` buffers are owned by the caller and must be freed - * once they are no longer needed. - * - * NOTE: Because of the double-buffering schema, the `source` buffer that - * contains the original file may be tampered once the filtering is - * complete. Regardless, the `dest` buffer will always contain the final - * result of the filtering - * - * @param dest Buffer to store the result of the filtering - * @param source Buffer containing the document to filter - * @param filters An already loaded git_filter_list - * @return 0 on success, an error code otherwise - */ -extern int git_filter_list_apply( - git_buf *dest, - git_buf *source, - git_filter_list *filters); - -/* - * Free the git_filter_list - * - * @param filters A git_filter_list created by `git_filter_list_load` - */ -extern void git_filter_list_free(git_filter_list *filters); - /* * Available filters */ diff --git a/src/odb.c b/src/odb.c index d9310a9d7..b71b038bf 100644 --- a/src/odb.c +++ b/src/odb.c @@ -183,7 +183,6 @@ int git_odb__hashfd_filtered( { int error; git_buf raw = GIT_BUF_INIT; - git_buf filtered = GIT_BUF_INIT; if (!fl) return git_odb__hashfd(out, fd, size, type); @@ -192,15 +191,18 @@ int git_odb__hashfd_filtered( * into memory to apply filters before beginning to calculate the hash */ - if (!(error = git_futils_readbuffer_fd(&raw, fd, size))) - error = git_filter_list_apply(&filtered, &raw, fl); + if (!(error = git_futils_readbuffer_fd(&raw, fd, size))) { + git_buffer pre = GIT_BUFFER_FROM_BUF(&raw), post = GIT_BUFFER_INIT; - git_buf_free(&raw); + error = git_filter_list_apply_to_data(&post, fl, &pre); - if (!error) - error = git_odb_hash(out, filtered.ptr, filtered.size, type); + git_buffer_free(&pre); - git_buf_free(&filtered); + if (!error) + error = git_odb_hash(out, post.ptr, post.size, type); + + git_buffer_free(&post); + } return error; } diff --git a/tests-clar/object/blob/filter.c b/tests-clar/object/blob/filter.c index 33ebedcde..a23f897f9 100644 --- a/tests-clar/object/blob/filter.c +++ b/tests-clar/object/blob/filter.c @@ -1,7 +1,6 @@ #include "clar_libgit2.h" #include "posix.h" #include "blob.h" -#include "filter.h" #include "buf_text.h" static git_repository *g_repo = NULL; @@ -105,7 +104,7 @@ void test_object_blob_filter__to_odb(void) git_config *cfg; int i; git_blob *blob; - git_buf orig = GIT_BUF_INIT, out = GIT_BUF_INIT; + git_buffer out = GIT_BUFFER_INIT; cl_git_pass(git_repository_config(&cfg, g_repo)); cl_assert(cfg); @@ -119,17 +118,18 @@ void test_object_blob_filter__to_odb(void) for (i = 0; i < NUM_TEST_OBJECTS; i++) { cl_git_pass(git_blob_lookup(&blob, g_repo, &g_oids[i])); - cl_git_pass(git_blob__getbuf(&orig, blob)); - cl_git_pass(git_filter_list_apply(&out, &orig, fl)); - cl_assert(git_buf_cmp(&out, &g_crlf_filtered[i]) == 0); + cl_git_pass(git_filter_list_apply_to_blob(&out, fl, blob)); + + cl_assert(!memcmp( + out.ptr, g_crlf_filtered[i].ptr, + min(out.size, g_crlf_filtered[i].size))); git_blob_free(blob); } git_filter_list_free(fl); - git_buf_free(&orig); - git_buf_free(&out); + git_buffer_free(&out); git_config_free(cfg); } From 29e92d385e0bb43d45ddea5f8173d78fcceac3a6 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Tue, 10 Sep 2013 16:53:09 -0700 Subject: [PATCH 06/25] Hook up filter initialize callback I knew I forgot something --- include/git2/sys/filter.h | 2 ++ src/filter.c | 34 ++++++++++++++++++++++++++++++++-- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/include/git2/sys/filter.h b/include/git2/sys/filter.h index ca1fbfcce..dbb086b0e 100644 --- a/include/git2/sys/filter.h +++ b/include/git2/sys/filter.h @@ -139,7 +139,9 @@ typedef void (*git_filter_cleanup_fn)( */ struct git_filter { unsigned int version; + const char *attributes; + git_filter_init_fn initialize; git_filter_shutdown_fn shutdown; git_filter_check_fn check; diff --git a/src/filter.c b/src/filter.c index de0d490aa..050014671 100644 --- a/src/filter.c +++ b/src/filter.c @@ -39,6 +39,7 @@ typedef struct { const char *filter_name; git_filter *filter; int priority; + int initialized; size_t nattrs, nmatches; char *attrdata; const char *attrs[GIT_FLEX_ARRAY]; @@ -178,6 +179,23 @@ static git_filter_def *filter_find_by_name(size_t *pos, const char *name) return fdef; } +static int filter_initialize(git_filter_def *fdef) +{ + int error = 0; + + if (!fdef->initialized && + fdef->filter && + fdef->filter->initialize && + (error = fdef->filter->initialize(fdef->filter)) < 0) + { + git_filter_unregister(fdef->filter_name); + return error; + } + + fdef->initialized = true; + return 0; +} + int git_filter_unregister(const char *name) { size_t pos; @@ -196,8 +214,10 @@ int git_filter_unregister(const char *name) (void)git_vector_remove(&git__filter_registry, pos); - if (fdef->filter->shutdown) + if (fdef->initialized && fdef->filter && fdef->filter->shutdown) { fdef->filter->shutdown(fdef->filter); + fdef->initialized = false; + } git__free(fdef->attrdata); git__free(fdef); @@ -209,7 +229,14 @@ git_filter *git_filter_lookup(const char *name) { size_t pos; git_filter_def *fdef = filter_find_by_name(&pos, name); - return fdef ? fdef->filter : NULL; + + if (!fdef) + return NULL; + + if (!fdef->initialized && filter_initialize(fdef) < 0) + return NULL; + + return fdef->filter; } static int filter_load_defaults(void) @@ -341,6 +368,9 @@ int git_filter_list_load( break; } + if (!fdef->initialized && (error = filter_initialize(fdef)) < 0) + break; + if (fdef->filter->check) error = fdef->filter->check( fdef->filter, &payload, &src, values); From e7d0ced2192c5efeea6d9f5667d366891010b86a Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Wed, 11 Sep 2013 12:38:06 -0700 Subject: [PATCH 07/25] Fix longstanding valgrind warning There was a possible circumstance that could result in reading past the end of a buffer. This check fixes that. --- src/buf_text.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/buf_text.c b/src/buf_text.c index eda86adb3..631feb3f8 100644 --- a/src/buf_text.c +++ b/src/buf_text.c @@ -87,7 +87,7 @@ int git_buf_text_crlf_to_lf(git_buf *tgt, const git_buf *src) } /* Do not drop \r unless it is followed by \n */ - if (next[1] != '\n') + if (next + 1 == scan_end || next[1] != '\n') *out++ = '\r'; } From a3aa5f4d5dcbe038f1d1c5ff40eed29d27953fbe Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Wed, 11 Sep 2013 12:45:20 -0700 Subject: [PATCH 08/25] Add simple global shutdown hooks Increasingly there are a number of components that want to do some cleanup at global shutdown time (at least if there are not going to be memory leaks). This creates a very simple system of shutdown hooks that will be invoked by git_threads_shutdown. Right now, the maximum number of hooks is hardcoded, but since adding a hook is not a public API, it should be fine and I thought it was better to start off with really simple code. --- src/fileops.c | 17 ++++++++++------- src/fileops.h | 5 ----- src/global.c | 38 ++++++++++++++++++++++++++++---------- src/global.h | 4 ++++ src/hash.h | 2 -- src/hash/hash_generic.h | 1 - src/hash/hash_openssl.h | 1 - src/hash/hash_win32.c | 20 +++++++++++--------- src/win32/pthread.c | 18 +++++++++--------- src/win32/pthread.h | 1 - 10 files changed, 62 insertions(+), 45 deletions(-) diff --git a/src/fileops.c b/src/fileops.c index 126d45f26..3b271e6f6 100644 --- a/src/fileops.c +++ b/src/fileops.c @@ -6,6 +6,7 @@ */ #include "common.h" #include "fileops.h" +#include "global.h" #include #if GIT_WIN32 #include "win32/findfile.h" @@ -635,6 +636,13 @@ static git_futils_dirs_guess_cb git_futils__dir_guess[GIT_FUTILS_DIR__MAX] = { git_futils_guess_xdg_dirs, }; +static void git_futils_dirs_global_shutdown(void) +{ + int i; + for (i = 0; i < GIT_FUTILS_DIR__MAX; ++i) + git_buf_free(&git_futils__dirs[i]); +} + int git_futils_dirs_global_init(void) { git_futils_dir_t i; @@ -644,6 +652,8 @@ int git_futils_dirs_global_init(void) for (i = 0; !error && i < GIT_FUTILS_DIR__MAX; i++) error = git_futils_dirs_get(&path, i); + git__on_shutdown(git_futils_dirs_global_shutdown); + return error; } @@ -726,13 +736,6 @@ int git_futils_dirs_set(git_futils_dir_t which, const char *search_path) return git_buf_oom(&git_futils__dirs[which]) ? -1 : 0; } -void git_futils_dirs_free(void) -{ - int i; - for (i = 0; i < GIT_FUTILS_DIR__MAX; ++i) - git_buf_free(&git_futils__dirs[i]); -} - static int git_futils_find_in_dirlist( git_buf *path, const char *name, git_futils_dir_t which, const char *label) { diff --git a/src/fileops.h b/src/fileops.h index 02f79b9e7..16bc58e93 100644 --- a/src/fileops.h +++ b/src/fileops.h @@ -353,11 +353,6 @@ extern int git_futils_dirs_get_str( */ extern int git_futils_dirs_set(git_futils_dir_t which, const char *paths); -/** - * Release / reset all search paths - */ -extern void git_futils_dirs_free(void); - /** * Create a "fake" symlink (text file containing the target path). * diff --git a/src/global.c b/src/global.c index b504e5e0a..4f024f17e 100644 --- a/src/global.c +++ b/src/global.c @@ -14,6 +14,28 @@ git_mutex git__mwindow_mutex; +#define MAX_SHUTDOWN_CB 8 + +git_global_shutdown_fn git__shutdown_callbacks[MAX_SHUTDOWN_CB]; +git_atomic git__n_shutdown_callbacks; + +void git__on_shutdown(git_global_shutdown_fn callback) +{ + int count = git_atomic_inc(&git__n_shutdown_callbacks); + assert(count <= MAX_SHUTDOWN_CB); + git__shutdown_callbacks[count - 1] = callback; +} + +static void git__shutdown(void) +{ + int pos; + + while ((pos = git_atomic_dec(&git__n_shutdown_callbacks)) >= 0) { + if (git__shutdown_callbacks[pos]) + git__shutdown_callbacks[pos](); + } +} + /** * Handle the global state with TLS * @@ -79,9 +101,7 @@ int git_threads_init(void) void git_threads_shutdown(void) { /* Shut down any subsystems that have global state */ - win32_pthread_shutdown(); - git_futils_dirs_free(); - git_hash_global_shutdown(); + git__shutdown(); TlsFree(_tls_index); _tls_init = 0; @@ -140,6 +160,9 @@ int git_threads_init(void) void git_threads_shutdown(void) { + /* Shut down any subsystems that have global state */ + git__shutdown(); + if (_tls_init) { void *ptr = pthread_getspecific(_tls_key); pthread_setspecific(_tls_key, NULL); @@ -149,10 +172,6 @@ void git_threads_shutdown(void) pthread_key_delete(_tls_key); _tls_init = 0; git_mutex_free(&git__mwindow_mutex); - - /* Shut down any subsystems that have global state */ - git_hash_global_shutdown(); - git_futils_dirs_free(); } git_global_st *git__global_state(void) @@ -179,15 +198,14 @@ static git_global_st __state; int git_threads_init(void) { - /* noop */ + /* noop */ return 0; } void git_threads_shutdown(void) { /* Shut down any subsystems that have global state */ - git_hash_global_shutdown(); - git_futils_dirs_free(); + git__shutdown(); } git_global_st *git__global_state(void) diff --git a/src/global.h b/src/global.h index badbc0883..778250376 100644 --- a/src/global.h +++ b/src/global.h @@ -21,4 +21,8 @@ extern git_mutex git__mwindow_mutex; #define GIT_GLOBAL (git__global_state()) +typedef void (*git_global_shutdown_fn)(void); + +extern void git__on_shutdown(git_global_shutdown_fn callback); + #endif diff --git a/src/hash.h b/src/hash.h index 5b848981f..c47f33549 100644 --- a/src/hash.h +++ b/src/hash.h @@ -13,8 +13,6 @@ typedef struct git_hash_prov git_hash_prov; typedef struct git_hash_ctx git_hash_ctx; int git_hash_global_init(void); -void git_hash_global_shutdown(void); - int git_hash_ctx_init(git_hash_ctx *ctx); void git_hash_ctx_cleanup(git_hash_ctx *ctx); diff --git a/src/hash/hash_generic.h b/src/hash/hash_generic.h index 6b60c98c4..daeb1cda8 100644 --- a/src/hash/hash_generic.h +++ b/src/hash/hash_generic.h @@ -17,7 +17,6 @@ struct git_hash_ctx { }; #define git_hash_global_init() 0 -#define git_hash_global_shutdown() /* noop */ #define git_hash_ctx_init(ctx) git_hash_init(ctx) #define git_hash_ctx_cleanup(ctx) diff --git a/src/hash/hash_openssl.h b/src/hash/hash_openssl.h index f83279a5a..9a55d472d 100644 --- a/src/hash/hash_openssl.h +++ b/src/hash/hash_openssl.h @@ -17,7 +17,6 @@ struct git_hash_ctx { }; #define git_hash_global_init() 0 -#define git_hash_global_shutdown() /* noop */ #define git_hash_ctx_init(ctx) git_hash_init(ctx) #define git_hash_ctx_cleanup(ctx) diff --git a/src/hash/hash_win32.c b/src/hash/hash_win32.c index 095ceb359..bb2231364 100644 --- a/src/hash/hash_win32.c +++ b/src/hash/hash_win32.c @@ -89,7 +89,15 @@ GIT_INLINE(void) hash_cryptoapi_prov_shutdown(void) hash_prov.type = INVALID; } -int git_hash_global_init() +static void git_hash_global_shutdown(void) +{ + if (hash_prov.type == CNG) + hash_cng_prov_shutdown(); + else if(hash_prov.type == CRYPTOAPI) + hash_cryptoapi_prov_shutdown(); +} + +int git_hash_global_init(void) { int error = 0; @@ -99,15 +107,9 @@ int git_hash_global_init() if ((error = hash_cng_prov_init()) < 0) error = hash_cryptoapi_prov_init(); - return error; -} + git__on_shutdown(git_hash_global_shutdown); -void git_hash_global_shutdown() -{ - if (hash_prov.type == CNG) - hash_cng_prov_shutdown(); - else if(hash_prov.type == CRYPTOAPI) - hash_cryptoapi_prov_shutdown(); + return error; } /* CryptoAPI: available in Windows XP and newer */ diff --git a/src/win32/pthread.c b/src/win32/pthread.c index d50ace695..8c7ef2856 100644 --- a/src/win32/pthread.c +++ b/src/win32/pthread.c @@ -217,6 +217,14 @@ int pthread_rwlock_destroy(pthread_rwlock_t *lock) } +static void win32_pthread_shutdown(void) +{ + if (win32_kernel32_dll) { + FreeLibrary(win32_kernel32_dll); + win32_kernel32_dll = NULL; + } +} + int win32_pthread_initialize(void) { if (win32_kernel32_dll) @@ -239,15 +247,7 @@ int win32_pthread_initialize(void) win32_srwlock_release_exclusive = (win32_srwlock_fn) GetProcAddress(win32_kernel32_dll, "ReleaseSRWLockExclusive"); - return 0; -} - -int win32_pthread_shutdown(void) -{ - if (win32_kernel32_dll) { - FreeLibrary(win32_kernel32_dll); - win32_kernel32_dll = NULL; - } + git__on_shutdown(win32_pthread_shutdown); return 0; } diff --git a/src/win32/pthread.h b/src/win32/pthread.h index 2ba2ca552..af5b121f0 100644 --- a/src/win32/pthread.h +++ b/src/win32/pthread.h @@ -69,6 +69,5 @@ int pthread_rwlock_wrunlock(pthread_rwlock_t *); int pthread_rwlock_destroy(pthread_rwlock_t *); extern int win32_pthread_initialize(void); -extern int win32_pthread_shutdown(void); #endif From 0646634e2fea3e0adf724e0b7b15118574b589fc Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Wed, 11 Sep 2013 12:45:37 -0700 Subject: [PATCH 09/25] Update filter registry code This updates the git filter registry to be a little cleaner and plugs some memory leaks. --- src/crlf.c | 13 ++-- src/filter.c | 167 ++++++++++++++++++++++++++++++++++----------------- 2 files changed, 122 insertions(+), 58 deletions(-) diff --git a/src/crlf.c b/src/crlf.c index cc256fc70..e974208a6 100644 --- a/src/crlf.c +++ b/src/crlf.c @@ -318,6 +318,11 @@ static void crlf_cleanup( git__free(payload); } +static void crlf_shutdown(git_filter *self) +{ + git__free(self); +} + git_filter *git_crlf_filter_new(void) { struct crlf_filter *f = git__calloc(1, sizeof(struct crlf_filter)); @@ -325,10 +330,10 @@ git_filter *git_crlf_filter_new(void) f->f.version = GIT_FILTER_VERSION; f->f.attributes = "crlf eol text"; f->f.initialize = NULL; - f->f.shutdown = NULL; - f->f.check = crlf_check; - f->f.apply = crlf_apply; - f->f.cleanup = crlf_cleanup; + f->f.shutdown = crlf_shutdown; + f->f.check = crlf_check; + f->f.apply = crlf_apply; + f->f.cleanup = crlf_cleanup; return (git_filter *)f; } diff --git a/src/filter.c b/src/filter.c index 050014671..73c2ceacb 100644 --- a/src/filter.c +++ b/src/filter.c @@ -10,6 +10,7 @@ #include "hash.h" #include "filter.h" #include "repository.h" +#include "global.h" #include "git2/sys/filter.h" #include "git2/config.h" #include "blob.h" @@ -52,10 +53,64 @@ static int filter_def_priority_cmp(const void *a, const void *b) return (pa < pb) ? -1 : (pa > pb) ? 1 : 0; } -static git_vector git__filter_registry = { - 0, filter_def_priority_cmp, NULL, 0, 0 +struct filter_registry { + git_vector filters; }; +static struct filter_registry *git__filter_registry = NULL; + +static void filter_registry_shutdown(void) +{ + struct filter_registry *reg = NULL; + size_t pos; + git_filter_def *fdef; + + if ((reg = git__swap(git__filter_registry, NULL)) == NULL) + return; + + git_vector_foreach(®->filters, pos, fdef) { + if (fdef->initialized && fdef->filter && fdef->filter->shutdown) { + fdef->filter->shutdown(fdef->filter); + fdef->initialized = false; + } + + git__free(fdef->attrdata); + git__free(fdef); + } + + git_vector_free(®->filters); + git__free(reg); +} + +static int filter_registry_initialize(void) +{ + int error = 0; + struct filter_registry *reg; + + if (git__filter_registry) + return 0; + + reg = git__calloc(1, sizeof(struct filter_registry)); + GITERR_CHECK_ALLOC(reg); + + if ((error = git_vector_init( + ®->filters, 2, filter_def_priority_cmp)) < 0) + goto cleanup; + + reg = git__compare_and_swap(&git__filter_registry, NULL, reg); + if (reg != NULL) + goto cleanup; + + git__on_shutdown(filter_registry_shutdown); + + return git_filter_register(GIT_FILTER_CRLF, git_crlf_filter_new(), 0); + +cleanup: + git_vector_free(®->filters); + git__free(reg); + return error; +} + static int filter_def_scan_attrs( git_buf *attrs, size_t *nattr, size_t *nmatch, const char *attr_str) { @@ -122,6 +177,29 @@ static void filter_def_set_attrs(git_filter_def *fdef) } } +static int filter_def_name_key_check(const void *key, const void *fdef) +{ + const char *name = + fdef ? ((const git_filter_def *)fdef)->filter_name : NULL; + return name ? -1 : git__strcmp(key, name); +} + +static int filter_registry_find(size_t *pos, const char *name) +{ + return git_vector_search2( + pos, &git__filter_registry->filters, filter_def_name_key_check, name); +} + +static git_filter_def *filter_registry_lookup(size_t *pos, const char *name) +{ + git_filter_def *fdef = NULL; + + if (!filter_registry_find(pos, name)) + fdef = git_vector_get(&git__filter_registry->filters, *pos); + + return fdef; +} + int git_filter_register( const char *name, git_filter *filter, int priority) { @@ -129,7 +207,10 @@ int git_filter_register( size_t nattr = 0, nmatch = 0; git_buf attrs = GIT_BUF_INIT; - if (git_filter_lookup(name) != NULL) { + if (filter_registry_initialize() < 0) + return -1; + + if (!filter_registry_find(NULL, name)) { giterr_set( GITERR_FILTER, "Attempt to reregister existing filter '%s'", name); return -1; @@ -151,48 +232,13 @@ int git_filter_register( filter_def_set_attrs(fdef); - if (git_vector_insert(&git__filter_registry, fdef) < 0) { + if (git_vector_insert(&git__filter_registry->filters, fdef) < 0) { git__free(fdef->attrdata); git__free(fdef); return -1; } - git_vector_sort(&git__filter_registry); - return 0; -} - -static int filter_def_name_key_check(const void *key, const void *fdef) -{ - const char *name = - fdef ? ((const git_filter_def *)fdef)->filter_name : NULL; - return name ? -1 : git__strcmp(key, name); -} - -static git_filter_def *filter_find_by_name(size_t *pos, const char *name) -{ - git_filter_def *fdef = NULL; - - if (!git_vector_search2( - pos, &git__filter_registry, filter_def_name_key_check, name)) - fdef = git_vector_get(&git__filter_registry, *pos); - - return fdef; -} - -static int filter_initialize(git_filter_def *fdef) -{ - int error = 0; - - if (!fdef->initialized && - fdef->filter && - fdef->filter->initialize && - (error = fdef->filter->initialize(fdef->filter)) < 0) - { - git_filter_unregister(fdef->filter_name); - return error; - } - - fdef->initialized = true; + git_vector_sort(&git__filter_registry->filters); return 0; } @@ -207,12 +253,12 @@ int git_filter_unregister(const char *name) return -1; } - if ((fdef = filter_find_by_name(&pos, name)) == NULL) { + if ((fdef = filter_registry_lookup(&pos, name)) == NULL) { giterr_set(GITERR_FILTER, "Cannot find filter '%s' to unregister", name); return GIT_ENOTFOUND; } - (void)git_vector_remove(&git__filter_registry, pos); + (void)git_vector_remove(&git__filter_registry->filters, pos); if (fdef->initialized && fdef->filter && fdef->filter->shutdown) { fdef->filter->shutdown(fdef->filter); @@ -225,12 +271,33 @@ int git_filter_unregister(const char *name) return 0; } +static int filter_initialize(git_filter_def *fdef) +{ + int error = 0; + + if (!fdef->initialized && + fdef->filter && + fdef->filter->initialize && + (error = fdef->filter->initialize(fdef->filter)) < 0) + { + /* auto-unregister if initialize fails */ + git_filter_unregister(fdef->filter_name); + return error; + } + + fdef->initialized = true; + return 0; +} + git_filter *git_filter_lookup(const char *name) { size_t pos; - git_filter_def *fdef = filter_find_by_name(&pos, name); + git_filter_def *fdef; - if (!fdef) + if (filter_registry_initialize() < 0) + return NULL; + + if ((fdef = filter_registry_lookup(&pos, name)) == NULL) return NULL; if (!fdef->initialized && filter_initialize(fdef) < 0) @@ -239,14 +306,6 @@ git_filter *git_filter_lookup(const char *name) return fdef->filter; } -static int filter_load_defaults(void) -{ - if (!git_vector_length(&git__filter_registry)) - return git_filter_register(GIT_FILTER_CRLF, git_crlf_filter_new(), 0); - - return 0; -} - git_repository *git_filter_source_repo(const git_filter_source *src) { return src->repo; @@ -345,14 +404,14 @@ int git_filter_list_load( size_t idx; git_filter_def *fdef; - if (filter_load_defaults() < 0) + if (filter_registry_initialize() < 0) return -1; src.repo = repo; src.path = path; src.mode = mode; - git_vector_foreach(&git__filter_registry, idx, fdef) { + git_vector_foreach(&git__filter_registry->filters, idx, fdef) { const char **values = NULL; void *payload = NULL; From 40cb40fab93281c808255d980bbe81a18a4d9e9a Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Wed, 11 Sep 2013 14:23:39 -0700 Subject: [PATCH 10/25] Add functions to manipulate filter lists Extend the git2/sys/filter API with functions to look up a filter and add it manually to a filter list. This requires some trickery because the regular attribute lookups and checks are bypassed when this happens, but in the right hands, it will allow a user to have granular control over applying filters. --- include/git2/filter.h | 17 -------- include/git2/sys/filter.h | 37 +++++++++++++++++ src/buffer.c | 3 +- src/crlf.c | 14 +++++-- src/filter.c | 52 ++++++++++++++++++++++-- tests-clar/filter/crlf.c | 83 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 181 insertions(+), 25 deletions(-) create mode 100644 tests-clar/filter/crlf.c diff --git a/include/git2/filter.h b/include/git2/filter.h index cb23ae4f4..8ef88d81b 100644 --- a/include/git2/filter.h +++ b/include/git2/filter.h @@ -62,23 +62,6 @@ typedef struct git_filter git_filter; */ typedef struct git_filter_list git_filter_list; -/** - * Look up a filter by name - */ -GIT_EXTERN(git_filter *) git_filter_lookup(const char *name); - -#define GIT_FILTER_CRLF "crlf" - -/** - * Apply a single filter to a buffer of data - */ -GIT_EXTERN(int) git_filter_apply_to_buffer( - git_buffer *out, - git_filter *filter, - const git_buffer *input, - const char *as_path, - git_filter_mode_t mode); - /** * Load the filter list for a given path. * diff --git a/include/git2/sys/filter.h b/include/git2/sys/filter.h index dbb086b0e..ca5738a53 100644 --- a/include/git2/sys/filter.h +++ b/include/git2/sys/filter.h @@ -18,6 +18,43 @@ */ GIT_BEGIN_DECL +/** + * Look up a filter by name + * + * @param name The name of the filter + * @return Pointer to the filter object or NULL if not found + */ +GIT_EXTERN(git_filter *) git_filter_lookup(const char *name); + +#define GIT_FILTER_CRLF "crlf" + +/** + * Create a new empty filter list + * + * Normally you won't use this because `git_filter_list_load` will create + * the filter list for you, but you can use this in combination with the + * `git_filter_lookup` and `git_filter_list_push` functions to assemble + * your own chains of filters. + */ +GIT_EXTERN(int) git_filter_list_new( + git_filter_list **out, git_repository *repo, git_filter_mode_t mode); + +/** + * Add a filter to a filter list with the given payload. + * + * Normally you won't have to do this because the filter list is created + * by calling the "check" function on registered filters when the filter + * attributes are set, but this does allow more direct manipulation of + * filter lists when desired. + * + * Note that normally the "check" function can set up a payload for the + * filter. Using this function, you can either pass in a payload if you + * know the expected payload format, or you can pass NULL. Some filters + * may fail with a NULL payload. Good luck! + */ +GIT_EXTERN(int) git_filter_list_push( + git_filter_list *fl, git_filter *filter, void *payload); + /** * A filter source represents a file/blob to be processed */ diff --git a/src/buffer.c b/src/buffer.c index aaebac776..07725b9cc 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -548,9 +548,10 @@ int git_buffer_resize(git_buffer *buffer, size_t want_size) int git_buffer_copy( git_buffer *buffer, const void *data, size_t datalen) { - if (git_buffer__resize(buffer, datalen, false) < 0) + if (git_buffer__resize(buffer, datalen + 1, false) < 0) return -1; memcpy(buffer->ptr, data, datalen); + buffer->ptr[datalen] = '\0'; buffer->size = datalen; return 0; } diff --git a/src/crlf.c b/src/crlf.c index e974208a6..99c154f70 100644 --- a/src/crlf.c +++ b/src/crlf.c @@ -86,6 +86,9 @@ static int has_cr_in_index(const git_filter_source *src) git_off_t blobsize; bool found_cr; + if (!path) + return false; + if (git_repository_index__weakptr(&index, repo) < 0) { giterr_clear(); return false; @@ -189,9 +192,7 @@ static const char *line_ending(struct crlf_attrs *ca) switch (ca->eol) { case GIT_EOL_UNSET: - return GIT_EOL_NATIVE == GIT_EOL_CRLF - ? "\r\n" - : "\n"; + return GIT_EOL_NATIVE == GIT_EOL_CRLF ? "\r\n" : "\n"; case GIT_EOL_CRLF: return "\r\n"; @@ -302,7 +303,12 @@ static int crlf_apply( const git_buffer *from, const git_filter_source *src) { - GIT_UNUSED(self); + /* initialize payload in case `check` was bypassed */ + if (!*payload) { + int error = crlf_check(self, payload, src, NULL); + if (error < 0 && error != GIT_ENOTFOUND) + return error; + } if (git_filter_source_mode(src) == GIT_FILTER_SMUDGE) return crlf_apply_to_workdir(*payload, to, from); diff --git a/src/filter.c b/src/filter.c index 73c2ceacb..79ccac0cf 100644 --- a/src/filter.c +++ b/src/filter.c @@ -181,7 +181,13 @@ static int filter_def_name_key_check(const void *key, const void *fdef) { const char *name = fdef ? ((const git_filter_def *)fdef)->filter_name : NULL; - return name ? -1 : git__strcmp(key, name); + return name ? git__strcmp(key, name) : -1; +} + +static int filter_def_filter_key_check(const void *key, const void *fdef) +{ + const void *filter = fdef ? ((const git_filter_def *)fdef)->filter : NULL; + return (key == filter) ? 0 : -1; } static int filter_registry_find(size_t *pos, const char *name) @@ -331,7 +337,7 @@ git_filter_mode_t git_filter_source_mode(const git_filter_source *src) return src->mode; } -static int git_filter_list_new( +static int filter_list_new( git_filter_list **out, const git_filter_source *src) { git_filter_list *fl = NULL; @@ -391,6 +397,16 @@ static int filter_list_check_attributes( return error; } +int git_filter_list_new( + git_filter_list **out, git_repository *repo, git_filter_mode_t mode) +{ + git_filter_source src = { 0 }; + src.repo = repo; + src.path = NULL; + src.mode = mode; + return filter_list_new(out, &src); +} + int git_filter_list_load( git_filter_list **filters, git_repository *repo, @@ -441,7 +457,7 @@ int git_filter_list_load( else if (error < 0) break; else { - if (!fl && (error = git_filter_list_new(&fl, &src)) < 0) + if (!fl && (error = filter_list_new(&fl, &src)) < 0) return error; fe = git_array_alloc(fl->filters); @@ -478,6 +494,36 @@ void git_filter_list_free(git_filter_list *fl) git__free(fl); } +int git_filter_list_push( + git_filter_list *fl, git_filter *filter, void *payload) +{ + int error = 0; + size_t pos; + git_filter_def *fdef; + git_filter_entry *fe; + + assert(fl && filter); + + if (git_vector_search2( + &pos, &git__filter_registry->filters, + filter_def_filter_key_check, filter) < 0) { + giterr_set(GITERR_FILTER, "Cannot use an unregistered filter"); + return -1; + } + + fdef = git_vector_get(&git__filter_registry->filters, pos); + + if (!fdef->initialized && (error = filter_initialize(fdef)) < 0) + return error; + + fe = git_array_alloc(fl->filters); + GITERR_CHECK_ALLOC(fe); + fe->filter = filter; + fe->payload = payload; + + return 0; +} + static int filter_list_out_buffer_from_raw( git_buffer *out, const void *ptr, size_t size) { diff --git a/tests-clar/filter/crlf.c b/tests-clar/filter/crlf.c new file mode 100644 index 000000000..098a85d4c --- /dev/null +++ b/tests-clar/filter/crlf.c @@ -0,0 +1,83 @@ +#include "clar_libgit2.h" +#include "git2/sys/filter.h" + +static git_repository *g_repo = NULL; + +void test_filter_crlf__initialize(void) +{ + g_repo = cl_git_sandbox_init("crlf"); + + cl_git_mkfile("crlf/.gitattributes", + "*.txt text\n*.bin binary\n*.crlf text eol=crlf\n*.lf text eol=lf\n"); +} + +void test_filter_crlf__cleanup(void) +{ + cl_git_sandbox_cleanup(); +} + +void test_filter_crlf__to_worktree(void) +{ + git_filter_list *fl; + git_filter *crlf; + git_buffer in = GIT_BUFFER_INIT, out = GIT_BUFFER_INIT; + + { + git_config *cfg; + cl_git_pass(git_repository_config(&cfg, g_repo)); + cl_git_pass(git_config_set_string(cfg, "core.autocrlf", "true")); + git_config_free(cfg); + } + + cl_git_pass(git_filter_list_new(&fl, g_repo, GIT_FILTER_TO_WORKTREE)); + + crlf = git_filter_lookup(GIT_FILTER_CRLF); + cl_assert(crlf != NULL); + + cl_git_pass(git_filter_list_push(fl, crlf, NULL)); + + in.ptr = "Some text\nRight here\n"; + in.size = strlen(in.ptr); + + cl_git_pass(git_filter_list_apply_to_data(&out, fl, &in)); + +#ifdef GIT_WIN32 + cl_assert_equal_s("Some text\r\nRight here\r\n", out.ptr); +#else + cl_assert_equal_s("Some text\nRight here\n", out.ptr); +#endif + + git_filter_list_free(fl); + git_buffer_free(&out); +} + +void test_filter_crlf__to_odb(void) +{ + git_filter_list *fl; + git_filter *crlf; + git_buffer in = GIT_BUFFER_INIT, out = GIT_BUFFER_INIT; + + { + git_config *cfg; + cl_git_pass(git_repository_config(&cfg, g_repo)); + cl_git_pass(git_config_set_string(cfg, "core.autocrlf", "true")); + git_config_free(cfg); + } + + cl_git_pass(git_filter_list_new(&fl, g_repo, GIT_FILTER_TO_ODB)); + + crlf = git_filter_lookup(GIT_FILTER_CRLF); + cl_assert(crlf != NULL); + + cl_git_pass(git_filter_list_push(fl, crlf, NULL)); + + in.ptr = "Some text\r\nRight here\r\n"; + in.size = strlen(in.ptr); + + cl_git_pass(git_filter_list_apply_to_data(&out, fl, &in)); + + cl_assert_equal_s("Some text\nRight here\n", out.ptr); + + git_filter_list_free(fl); + git_buffer_free(&out); +} From 4b11f25a4fbb6952284e037a70e2d61fde841ab6 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Wed, 11 Sep 2013 16:38:33 -0700 Subject: [PATCH 11/25] Add ident filter This adds the ident filter (that knows how to replace $Id$) and tweaks the filter APIs and code so that git_filter_source objects actually have the updated OID of the object being filtered when it is a known value. --- include/git2/filter.h | 6 +- include/git2/sys/filter.h | 11 ++- src/blob.c | 8 +- src/checkout.c | 2 +- src/crlf.c | 7 +- src/diff.c | 2 +- src/diff_file.c | 2 +- src/filter.c | 31 +++++++- src/filter.h | 3 + src/ident.c | 128 +++++++++++++++++++++++++++++++ src/repository.c | 3 +- tests-clar/filter/blob.c | 40 +++++++++- tests-clar/filter/ident.c | 131 ++++++++++++++++++++++++++++++++ tests-clar/object/blob/filter.c | 4 +- 14 files changed, 355 insertions(+), 23 deletions(-) create mode 100644 src/ident.c create mode 100644 tests-clar/filter/ident.c diff --git a/include/git2/filter.h b/include/git2/filter.h index 8ef88d81b..649ed97cf 100644 --- a/include/git2/filter.h +++ b/include/git2/filter.h @@ -42,11 +42,13 @@ typedef enum { * file data. Libgit2 includes one built in filter and it is possible to * write your own (see git2/sys/filter.h for information on that). * - * The built in filter is: + * The two builtin filters are: * * * "crlf" which uses the complex rules with the "text", "eol", and * "crlf" file attributes to decide how to convert between LF and CRLF * line endings + * * "ident" which replaces "$Id$" in a blob with "$Id: $" upon + * checkout and replaced "$Id: $" with "$Id$" on checkin. */ typedef struct git_filter git_filter; @@ -70,6 +72,7 @@ typedef struct git_filter_list git_filter_list; * * @param filters Output newly created git_filter_list (or NULL) * @param repo Repository object that contains `path` + * @param blob The blob to which the filter will be applied (if known) * @param path Relative path of the file to be filtered * @param mode Filtering direction (WT->ODB or ODB->WT) * @return 0 on success (which could still return NULL if no filters are @@ -78,6 +81,7 @@ typedef struct git_filter_list git_filter_list; GIT_EXTERN(int) git_filter_list_load( git_filter_list **filters, git_repository *repo, + git_blob *blob, /* can be NULL */ const char *path, git_filter_mode_t mode); diff --git a/include/git2/sys/filter.h b/include/git2/sys/filter.h index ca5738a53..c35fe55f6 100644 --- a/include/git2/sys/filter.h +++ b/include/git2/sys/filter.h @@ -26,7 +26,11 @@ GIT_BEGIN_DECL */ GIT_EXTERN(git_filter *) git_filter_lookup(const char *name); -#define GIT_FILTER_CRLF "crlf" +#define GIT_FILTER_CRLF "crlf" +#define GIT_FILTER_IDENT "ident" + +#define GIT_FILTER_CRLF_PRIORITY 0 +#define GIT_FILTER_IDENT_PRIORITY 100 /** * Create a new empty filter list @@ -199,8 +203,9 @@ struct git_filter { * issued in order of `priority` on smudge (to workdir), and in reverse * order of `priority` on clean (to odb). * - * One filter will be preregistered with libgit2: - * - GIT_FILTER_CRLF with priority of 0. + * Two filters are preregistered with libgit2: + * - GIT_FILTER_CRLF with priority 0 + * - GIT_FILTER_IDENT with priority 100 * * Currently the filter registry is not thread safe, so any registering or * deregistering of filters must be done outside of any possible usage of diff --git a/src/blob.c b/src/blob.c index e6bba033a..97fd6f70d 100644 --- a/src/blob.c +++ b/src/blob.c @@ -195,7 +195,7 @@ int git_blob__create_from_paths( if (try_load_filters) /* Load the filters for writing this file to the ODB */ error = git_filter_list_load( - &fl, repo, hint_path, GIT_FILTER_TO_ODB); + &fl, repo, NULL, hint_path, GIT_FILTER_TO_ODB); if (error < 0) /* well, that didn't work */; @@ -331,19 +331,19 @@ int git_blob_is_binary(git_blob *blob) int git_blob_filtered_content( git_buffer *out, git_blob *blob, - const char *as_path, + const char *path, int check_for_binary_data) { int error = 0; git_filter_list *fl = NULL; - assert(blob && as_path && out); + assert(blob && path && out); if (check_for_binary_data && git_blob_is_binary(blob)) return 0; if (!(error = git_filter_list_load( - &fl, git_blob_owner(blob), as_path, GIT_FILTER_TO_WORKTREE))) { + &fl, git_blob_owner(blob), blob, path, GIT_FILTER_TO_WORKTREE))) { error = git_filter_list_apply_to_blob(out, fl, blob); diff --git a/src/checkout.c b/src/checkout.c index 1def58b0a..7e79c9b5e 100644 --- a/src/checkout.c +++ b/src/checkout.c @@ -718,7 +718,7 @@ static int blob_content_to_file( if (!opts->disable_filters && !git_blob_is_binary(blob)) error = git_filter_list_load( - &fl, git_blob_owner(blob), path, GIT_FILTER_TO_WORKTREE); + &fl, git_blob_owner(blob), blob, path, GIT_FILTER_TO_WORKTREE); if (!error) error = git_filter_list_apply_to_blob(&out, fl, blob); diff --git a/src/crlf.c b/src/crlf.c index 99c154f70..f61a870da 100644 --- a/src/crlf.c +++ b/src/crlf.c @@ -324,11 +324,6 @@ static void crlf_cleanup( git__free(payload); } -static void crlf_shutdown(git_filter *self) -{ - git__free(self); -} - git_filter *git_crlf_filter_new(void) { struct crlf_filter *f = git__calloc(1, sizeof(struct crlf_filter)); @@ -336,7 +331,7 @@ git_filter *git_crlf_filter_new(void) f->f.version = GIT_FILTER_VERSION; f->f.attributes = "crlf eol text"; f->f.initialize = NULL; - f->f.shutdown = crlf_shutdown; + f->f.shutdown = git_filter_free; f->f.check = crlf_check; f->f.apply = crlf_apply; f->f.cleanup = crlf_cleanup; diff --git a/src/diff.c b/src/diff.c index b1cde36bc..4d9ace183 100644 --- a/src/diff.c +++ b/src/diff.c @@ -570,7 +570,7 @@ int git_diff__oid_for_file( } else { git_filter_list *fl = NULL; - result = git_filter_list_load(&fl, repo, path, GIT_FILTER_TO_ODB); + result = git_filter_list_load(&fl, repo, NULL, path, GIT_FILTER_TO_ODB); if (!result) { int fd = git_futils_open_ro(full_path.ptr); if (fd < 0) diff --git a/src/diff_file.c b/src/diff_file.c index e0e244b65..d02787c75 100644 --- a/src/diff_file.c +++ b/src/diff_file.c @@ -311,7 +311,7 @@ static int diff_file_content_load_workdir_file( goto cleanup; if ((error = git_filter_list_load( - &fl, fc->repo, fc->file->path, GIT_FILTER_TO_ODB)) < 0) + &fl, fc->repo, NULL, fc->file->path, GIT_FILTER_TO_ODB)) < 0) goto cleanup; /* if there are no filters, try to mmap the file */ diff --git a/src/filter.c b/src/filter.c index 79ccac0cf..f20611471 100644 --- a/src/filter.c +++ b/src/filter.c @@ -103,7 +103,23 @@ static int filter_registry_initialize(void) git__on_shutdown(filter_registry_shutdown); - return git_filter_register(GIT_FILTER_CRLF, git_crlf_filter_new(), 0); + /* try to register both default filters */ + { + git_filter *crlf = git_crlf_filter_new(); + git_filter *ident = git_ident_filter_new(); + + if (crlf && git_filter_register( + GIT_FILTER_CRLF, crlf, GIT_FILTER_CRLF_PRIORITY) < 0) + crlf = NULL; + if (ident && git_filter_register( + GIT_FILTER_IDENT, ident, GIT_FILTER_IDENT_PRIORITY) < 0) + ident = NULL; + + if (!crlf || !ident) + return -1; + } + + return 0; cleanup: git_vector_free(®->filters); @@ -132,7 +148,7 @@ static int filter_def_scan_attrs( if (scan > start) { (*nattr)++; - if (has_eq || *scan == '-' || *scan == '+' || *scan == '!') + if (has_eq || *start == '-' || *start == '+' || *start == '!') (*nmatch)++; if (has_eq) @@ -312,6 +328,11 @@ git_filter *git_filter_lookup(const char *name) return fdef->filter; } +void git_filter_free(git_filter *filter) +{ + git__free(filter); +} + git_repository *git_filter_source_repo(const git_filter_source *src) { return src->repo; @@ -410,6 +431,7 @@ int git_filter_list_new( int git_filter_list_load( git_filter_list **filters, git_repository *repo, + git_blob *blob, /* can be NULL */ const char *path, git_filter_mode_t mode) { @@ -426,6 +448,8 @@ int git_filter_list_load( src.repo = repo; src.path = path; src.mode = mode; + if (blob) + git_oid_cpy(&src.oid, git_blob_id(blob)); git_vector_foreach(&git__filter_registry->filters, idx, fdef) { const char **values = NULL; @@ -630,5 +654,8 @@ int git_filter_list_apply_to_blob( (char *)git_blob_rawcontent(blob), git_blob_rawsize(blob), 0 }; + if (filters) + git_oid_cpy(&filters->source.oid, git_blob_id(blob)); + return git_filter_list_apply_to_data(out, filters, &in); } diff --git a/src/filter.h b/src/filter.h index 1bde1e306..d0ace0f9a 100644 --- a/src/filter.h +++ b/src/filter.h @@ -19,10 +19,13 @@ typedef enum { GIT_CRLF_AUTO, } git_crlf_t; +extern void git_filter_free(git_filter *filter); + /* * Available filters */ extern git_filter *git_crlf_filter_new(void); +extern git_filter *git_ident_filter_new(void); #endif diff --git a/src/ident.c b/src/ident.c new file mode 100644 index 000000000..aedb973f9 --- /dev/null +++ b/src/ident.c @@ -0,0 +1,128 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ + +#include "git2/sys/filter.h" +#include "filter.h" +#include "buffer.h" + +int ident_find_id( + const char **id_start, const char **id_end, const char *start, size_t len) +{ + const char *found; + + while (len > 0 && (found = memchr(start, '$', len)) != NULL) { + size_t remaining = len - (size_t)(found - start); + if (remaining < 3) + return GIT_ENOTFOUND; + if (found[1] == 'I' && found[2] == 'd') + break; + start = found + 1; + len = remaining - 1; + } + + if (len < 3) + return GIT_ENOTFOUND; + *id_start = found; + + if ((found = memchr(found + 3, '$', len - 3)) == NULL) + return GIT_ENOTFOUND; + + *id_end = found + 1; + return 0; +} + +static int ident_insert_id( + git_buffer *to, const git_buffer *from, const git_filter_source *src) +{ + char oid[GIT_OID_HEXSZ+1]; + const char *id_start, *id_end, *from_end = from->ptr + from->size; + size_t need_size; + git_buf to_buf = GIT_BUF_FROM_BUFFER(to); + + /* replace $Id$ with blob id */ + + if (!git_filter_source_id(src)) + return GIT_ENOTFOUND; + + git_oid_tostr(oid, sizeof(oid), git_filter_source_id(src)); + + if (ident_find_id(&id_start, &id_end, from->ptr, from->size) < 0) + return GIT_ENOTFOUND; + + need_size = (size_t)(id_start - from->ptr) + + 5 /* "$Id: " */ + GIT_OID_HEXSZ + 1 /* "$" */ + + (size_t)(from_end - id_end); + + if (git_buf_grow(&to_buf, need_size) < 0) + return -1; + + git_buf_set(&to_buf, from->ptr, (size_t)(id_start - from->ptr)); + git_buf_put(&to_buf, "$Id: ", 5); + git_buf_put(&to_buf, oid, GIT_OID_HEXSZ); + git_buf_putc(&to_buf, '$'); + git_buf_put(&to_buf, id_end, (size_t)(from_end - id_end)); + + if (git_buf_oom(&to_buf)) + return -1; + + git_buffer_from_buf(to, &to_buf); + return 0; +} + +static int ident_remove_id( + git_buffer *to, const git_buffer *from) +{ + const char *id_start, *id_end, *from_end = from->ptr + from->size; + size_t need_size; + git_buf to_buf = GIT_BUF_FROM_BUFFER(to); + + if (ident_find_id(&id_start, &id_end, from->ptr, from->size) < 0) + return GIT_ENOTFOUND; + + need_size = (size_t)(id_start - from->ptr) + + 4 /* "$Id$" */ + (size_t)(from_end - id_end); + + if (git_buf_grow(&to_buf, need_size) < 0) + return -1; + + git_buf_set(&to_buf, from->ptr, (size_t)(id_start - from->ptr)); + git_buf_put(&to_buf, "$Id$", 4); + git_buf_put(&to_buf, id_end, (size_t)(from_end - id_end)); + + if (git_buf_oom(&to_buf)) + return -1; + + git_buffer_from_buf(to, &to_buf); + return 0; +} + +static int ident_apply( + git_filter *self, + void **payload, + git_buffer *to, + const git_buffer *from, + const git_filter_source *src) +{ + GIT_UNUSED(self); GIT_UNUSED(payload); + + if (git_filter_source_mode(src) == GIT_FILTER_SMUDGE) + return ident_insert_id(to, from, src); + else + return ident_remove_id(to, from); +} + +git_filter *git_ident_filter_new(void) +{ + git_filter *f = git__calloc(1, sizeof(git_filter)); + + f->version = GIT_FILTER_VERSION; + f->attributes = "+ident"; /* apply to files with ident attribute set */ + f->shutdown = git_filter_free; + f->apply = ident_apply; + + return f; +} diff --git a/src/repository.c b/src/repository.c index 94700e4e3..76e8228b7 100644 --- a/src/repository.c +++ b/src/repository.c @@ -1671,7 +1671,8 @@ int git_repository_hashfile( /* passing empty string for "as_path" indicated --no-filters */ if (strlen(as_path) > 0) { - error = git_filter_list_load(&fl, repo, as_path, GIT_FILTER_TO_ODB); + error = git_filter_list_load( + &fl, repo, NULL, as_path, GIT_FILTER_TO_ODB); if (error < 0) return error; } else { diff --git a/tests-clar/filter/blob.c b/tests-clar/filter/blob.c index 27e001f99..c265ed67a 100644 --- a/tests-clar/filter/blob.c +++ b/tests-clar/filter/blob.c @@ -7,7 +7,12 @@ void test_filter_blob__initialize(void) { g_repo = cl_git_sandbox_init("crlf"); cl_git_mkfile("crlf/.gitattributes", - "*.txt text\n*.bin binary\n*.crlf text eol=crlf\n*.lf text eol=lf\n"); + "*.txt text\n*.bin binary\n" + "*.crlf text eol=crlf\n" + "*.lf text eol=lf\n" + "*.ident text ident\n" + "*.identcrlf ident text eol=crlf\n" + "*.identlf ident text eol.lf\n"); } void test_filter_blob__cleanup(void) @@ -41,3 +46,36 @@ void test_filter_blob__all_crlf(void) git_buffer_free(&buf); git_blob_free(blob); } + +void test_filter_blob__ident(void) +{ + git_oid id; + git_blob *blob; + git_buffer buf = GIT_BUFFER_INIT; + + cl_git_mkfile("crlf/test.ident", "Some text\n$Id$\nGoes there\n"); + cl_git_pass(git_blob_create_fromworkdir(&id, g_repo, "test.ident")); + cl_git_pass(git_blob_lookup(&blob, g_repo, &id)); + cl_assert_equal_s( + "Some text\n$Id$\nGoes there\n", git_blob_rawcontent(blob)); + git_blob_free(blob); + + cl_git_mkfile("crlf/test.ident", "Some text\n$Id: Any old just you want$\nGoes there\n"); + cl_git_pass(git_blob_create_fromworkdir(&id, g_repo, "test.ident")); + cl_git_pass(git_blob_lookup(&blob, g_repo, &id)); + cl_assert_equal_s( + "Some text\n$Id$\nGoes there\n", git_blob_rawcontent(blob)); + + cl_git_pass(git_blob_filtered_content(&buf, blob, "filter.bin", 1)); + cl_assert_equal_s( + "Some text\n$Id$\nGoes there\n", buf.ptr); + + cl_git_pass(git_blob_filtered_content(&buf, blob, "filter.identcrlf", 1)); + cl_assert_equal_s( + "Some text\r\n$Id: 3164f585d548ac68027d22b104f2d8100b2b6845$\r\nGoes there\r\n", buf.ptr); + + cl_git_pass(git_blob_filtered_content(&buf, blob, "filter.identlf", 1)); + cl_assert_equal_s( + "Some text\n$Id: 3164f585d548ac68027d22b104f2d8100b2b6845$\nGoes there\n", buf.ptr); + +} diff --git a/tests-clar/filter/ident.c b/tests-clar/filter/ident.c new file mode 100644 index 000000000..55774fbdd --- /dev/null +++ b/tests-clar/filter/ident.c @@ -0,0 +1,131 @@ +#include "clar_libgit2.h" +#include "git2/sys/filter.h" + +static git_repository *g_repo = NULL; + +void test_filter_ident__initialize(void) +{ + g_repo = cl_git_sandbox_init("crlf"); +} + +void test_filter_ident__cleanup(void) +{ + cl_git_sandbox_cleanup(); +} + +static void add_blob_and_filter( + const char *data, + git_filter_list *fl, + const char *expected) +{ + git_oid id; + git_blob *blob; + git_buffer out = GIT_BUFFER_INIT; + + cl_git_mkfile("crlf/identtest", data); + cl_git_pass(git_blob_create_fromworkdir(&id, g_repo, "identtest")); + cl_git_pass(git_blob_lookup(&blob, g_repo, &id)); + + cl_git_pass(git_filter_list_apply_to_blob(&out, fl, blob)); + + cl_assert_equal_s(expected, out.ptr); + + git_blob_free(blob); + git_buffer_free(&out); +} + +void test_filter_ident__to_worktree(void) +{ + git_filter_list *fl; + git_filter *ident; + + cl_git_pass(git_filter_list_new(&fl, g_repo, GIT_FILTER_TO_WORKTREE)); + + ident = git_filter_lookup(GIT_FILTER_IDENT); + cl_assert(ident != NULL); + + cl_git_pass(git_filter_list_push(fl, ident, NULL)); + + add_blob_and_filter( + "Hello\n$Id$\nFun stuff\n", fl, + "Hello\n$Id: b69e2387aafcaf73c4de5b9ab59abe27fdadee30$\nFun stuff\n"); + add_blob_and_filter( + "Hello\n$Id: Junky$\nFun stuff\n", fl, + "Hello\n$Id: 45cd107a7102911cb2a7df08404674327fa050b9$\nFun stuff\n"); + add_blob_and_filter( + "$Id$\nAt the start\n", fl, + "$Id: b13415c767abc196fb95bd17070e8c1113e32160$\nAt the start\n"); + add_blob_and_filter( + "At the end\n$Id$", fl, + "At the end\n$Id: 1344925c6bc65b34c5a7b50f86bf688e48e9a272$"); + add_blob_and_filter( + "$Id$", fl, + "$Id: b3f5ebfb5843bc43ceecff6d4f26bb37c615beb1$"); + add_blob_and_filter( + "$Id: Some sort of junk goes here$", fl, + "$Id: ab2dd3853c7c9a4bff55aca2bea077a73c32ac06$"); + + add_blob_and_filter("$Id: ", fl, "$Id: "); + add_blob_and_filter("$Id", fl, "$Id"); + add_blob_and_filter("$I", fl, "$I"); + add_blob_and_filter("Id$", fl, "Id$"); + + git_filter_list_free(fl); +} + +void test_filter_ident__to_odb(void) +{ + git_filter_list *fl; + git_filter *ident; + + cl_git_pass(git_filter_list_new(&fl, g_repo, GIT_FILTER_TO_ODB)); + + ident = git_filter_lookup(GIT_FILTER_IDENT); + cl_assert(ident != NULL); + + cl_git_pass(git_filter_list_push(fl, ident, NULL)); + + add_blob_and_filter( + "Hello\n$Id$\nFun stuff\n", + fl, "Hello\n$Id$\nFun stuff\n"); + add_blob_and_filter( + "Hello\n$Id: b69e2387aafcaf73c4de5b9ab59abe27fdadee30$\nFun stuff\n", + fl, "Hello\n$Id$\nFun stuff\n"); + add_blob_and_filter( + "Hello\n$Id: Any junk you may have left here$\nFun stuff\n", + fl, "Hello\n$Id$\nFun stuff\n"); + add_blob_and_filter( + "Hello\n$Id:$\nFun stuff\n", + fl, "Hello\n$Id$\nFun stuff\n"); + add_blob_and_filter( + "Hello\n$Id:x$\nFun stuff\n", + fl, "Hello\n$Id$\nFun stuff\n"); + + add_blob_and_filter( + "$Id$\nAt the start\n", fl, "$Id$\nAt the start\n"); + add_blob_and_filter( + "$Id: lots of random text that should be removed from here$\nAt the start\n", fl, "$Id$\nAt the start\n"); + add_blob_and_filter( + "$Id: lots of random text that should not be removed without a terminator\nAt the start\n", fl, "$Id: lots of random text that should not be removed without a terminator\nAt the start\n"); + + add_blob_and_filter( + "At the end\n$Id$", fl, "At the end\n$Id$"); + add_blob_and_filter( + "At the end\n$Id:$", fl, "At the end\n$Id$"); + add_blob_and_filter( + "At the end\n$Id:asdfasdf$", fl, "At the end\n$Id$"); + add_blob_and_filter( + "At the end\n$Id", fl, "At the end\n$Id"); + add_blob_and_filter( + "At the end\n$IddI", fl, "At the end\n$IddI"); + + add_blob_and_filter("$Id$", fl, "$Id$"); + add_blob_and_filter("$Id: any$", fl, "$Id$"); + add_blob_and_filter("$Id: any long stuff goes here you see$", fl, "$Id$"); + add_blob_and_filter("$Id: ", fl, "$Id: "); + add_blob_and_filter("$Id", fl, "$Id"); + add_blob_and_filter("$I", fl, "$I"); + add_blob_and_filter("Id$", fl, "Id$"); + + git_filter_list_free(fl); +} diff --git a/tests-clar/object/blob/filter.c b/tests-clar/object/blob/filter.c index a23f897f9..1e82b69cd 100644 --- a/tests-clar/object/blob/filter.c +++ b/tests-clar/object/blob/filter.c @@ -112,8 +112,8 @@ void test_object_blob_filter__to_odb(void) git_attr_cache_flush(g_repo); cl_git_append2file("empty_standard_repo/.gitattributes", "*.txt text\n"); - cl_git_pass( - git_filter_list_load(&fl, g_repo, "filename.txt", GIT_FILTER_TO_ODB)); + cl_git_pass(git_filter_list_load( + &fl, g_repo, NULL, "filename.txt", GIT_FILTER_TO_ODB)); cl_assert(fl != NULL); for (i = 0; i < NUM_TEST_OBJECTS; i++) { From a9f51e430fef49b3299ec33c11a4e6623e3f58cc Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Wed, 11 Sep 2013 22:00:36 -0700 Subject: [PATCH 12/25] Merge git_buf and git_buffer This makes the git_buf struct that was used internally into an externally available structure and eliminates the git_buffer. As part of that, some of the special cases that arose with the externally used git_buffer were blended into the git_buf, such as being careful about git_buf objects that may have a NULL ptr and allowing for bufs with a valid ptr and size but zero asize as a way of referring to externally owned data. --- include/git2/blob.h | 10 ++-- include/git2/buffer.h | 92 +++++++++++++++++--------------- include/git2/filter.h | 23 ++++---- include/git2/sys/filter.h | 18 +++---- src/blob.c | 6 +-- src/buffer.c | 93 +++++++-------------------------- src/buffer.h | 65 ++++++----------------- src/checkout.c | 9 ++-- src/config_file.c | 2 + src/crlf.c | 42 +++++---------- src/diff_file.c | 6 +-- src/filter.c | 48 ++++++++--------- src/ident.c | 48 +++++++---------- src/odb.c | 8 +-- src/path.c | 2 +- src/util.c | 3 ++ tests-clar/filter/blob.c | 9 ++-- tests-clar/filter/crlf.c | 8 +-- tests-clar/filter/ident.c | 4 +- tests-clar/object/blob/filter.c | 4 +- 20 files changed, 204 insertions(+), 296 deletions(-) diff --git a/include/git2/blob.h b/include/git2/blob.h index dcb815b2f..dcab4fbe0 100644 --- a/include/git2/blob.h +++ b/include/git2/blob.h @@ -104,17 +104,17 @@ GIT_EXTERN(git_off_t) git_blob_rawsize(const git_blob *blob); * CRLF filtering or other types of changes depending on the file * attributes set for the blob and the content detected in it. * - * The output is written into a `git_buffer` which the caller must free - * when done (via `git_buffer_free`). + * The output is written into a `git_buf` which the caller must free + * when done (via `git_buf_free`). * * If no filters need to be applied, then the `out` buffer will just be * populated with a pointer to the raw content of the blob. In that case, * be careful to *not* free the blob until done with the buffer. To keep - * the data detached from the blob, call `git_buffer_resize` on the buffer + * the data detached from the blob, call `git_buf_grow` on the buffer * with a `want_size` of 0 and the buffer will be reallocated to be * detached from the blob. * - * @param out The git_buffer to be filled in + * @param out The git_buf to be filled in * @param blob Pointer to the blob * @param as_path Path used for file attribute lookups, etc. * @param check_for_binary_data Should this test if blob content contains @@ -122,7 +122,7 @@ GIT_EXTERN(git_off_t) git_blob_rawsize(const git_blob *blob); * @return 0 on success or an error code */ GIT_EXTERN(int) git_blob_filtered_content( - git_buffer *out, + git_buf *out, git_blob *blob, const char *as_path, int check_for_binary_data); diff --git a/include/git2/buffer.h b/include/git2/buffer.h index cb80e48f7..ae8681f13 100644 --- a/include/git2/buffer.h +++ b/include/git2/buffer.h @@ -4,8 +4,8 @@ * This file is part of libgit2, distributed under the GNU GPL v2 with * a Linking Exception. For full terms see the included COPYING file. */ -#ifndef INCLUDE_git_buffer_h__ -#define INCLUDE_git_buffer_h__ +#ifndef INCLUDE_git_buf_h__ +#define INCLUDE_git_buf_h__ #include "common.h" @@ -25,59 +25,69 @@ GIT_BEGIN_DECL * caller and have the caller take responsibility for freeing that memory. * This can be awkward if the caller does not have easy access to the same * allocation functions that libgit2 is using. In those cases, libgit2 - * will instead fill in a `git_buffer` and the caller can use - * `git_buffer_free()` to release it when they are done. + * will fill in a `git_buf` and the caller can use `git_buf_free()` to + * release it when they are done. * - * * `ptr` refers to the start of the allocated memory. - * * `size` contains the size of the data in `ptr` that is actually used. - * * `available` refers to the known total amount of allocated memory. It - * may be larger than the `size` actually in use. + * A `git_buf` may also be used for the caller to pass in a reference to + * a block of memory they hold. In this case, libgit2 will not resize or + * free the memory, but will read from it as needed. * - * In a few cases, for uniformity and simplicity, an API may populate a - * `git_buffer` with data that should *not* be freed (i.e. the lifetime of - * the data buffer is actually tied to another libgit2 object). These - * cases will be clearly documented in the APIs that use the `git_buffer`. - * In those cases, the `available` field will be set to zero even though - * the `ptr` and `size` will be valid. + * A `git_buf` is a public structure with three fields: + * + * - `ptr` points to the start of the allocated memory. If it is NULL, + * then the `git_buf` is considered empty and libgit2 will feel free + * to overwrite it with new data. + * + * - `size` holds the size (in bytes) of the data that is actually used. + * + * - `asize` holds the known total amount of allocated memory if the `ptr` + * was allocated by libgit2. It may be larger than `size`. If `ptr` + * was not allocated by libgit2 and should not be resized and/or freed, + * then `asize` will be set to zero. + * + * Some APIs may occasionally do something slightly unusual with a buffer, + * such as setting `ptr` to a value that was passed in by the user. In + * those cases, the behavior will be clearly documented by the API. */ -typedef struct git_buffer { +typedef struct { char *ptr; - size_t size; - size_t available; -} git_buffer; + size_t asize, size; +} git_buf; /** - * Use to initialize buffer structure when git_buffer is on stack - */ -#define GIT_BUFFER_INIT { NULL, 0, 0 } - -/** - * Free the memory referred to by the git_buffer. + * Free the memory referred to by the git_buf. * - * Note that this does not free the `git_buffer` itself, just the memory - * pointed to by `buffer->ptr`. If that memory was not allocated by - * libgit2 itself, be careful with using this function because it could - * cause problems. + * Note that this does not free the `git_buf` itself, just the memory + * pointed to by `buffer->ptr`. This will not free the memory if it looks + * like it was not allocated internally, but it will clear the buffer back + * to the empty state. * - * @param buffer The buffer with allocated memory + * @param buffer The buffer to deallocate */ -GIT_EXTERN(void) git_buffer_free(git_buffer *buffer); +GIT_EXTERN(void) git_buf_free(git_buf *buffer); /** * Resize the buffer allocation to make more space. * - * This will update `buffer->available` with the new size (which will be - * at least `want_size` and may be larger). This may or may not change - * `buffer->ptr` depending on whether there is an existing allocation and - * whether that allocation can be increased in place. + * This will attempt to grow the buffer to accomodate the target size. * - * Currently, this will never shrink the buffer, only expand it. + * If the buffer refers to memory that was not allocated by libgit2 (i.e. + * the `asize` field is zero), then `ptr` will be replaced with a newly + * allocated block of data. Be careful so that memory allocated by the + * caller is not lost. As a special variant, if you pass `target_size` as + * 0 and the memory is not allocated by libgit2, this will allocate a new + * buffer of size `size` and copy the external data into it. + * + * Currently, this will never shrink a buffer, only expand it. + * + * If the allocation fails, this will return an error and the buffer will be + * marked as invalid for future operations, invaliding the contents. * * @param buffer The buffer to be resized; may or may not be allocated yet - * @param want_size The desired available size - * @return 0 on success, negative error code on allocation failure + * @param target_size The desired available size + * @return 0 on success, -1 on allocation failure */ -GIT_EXTERN(int) git_buffer_resize(git_buffer *buffer, size_t want_size); +GIT_EXTERN(int) git_buf_grow(git_buf *buffer, size_t target_size); /** * Set buffer to a copy of some raw data. @@ -85,10 +95,10 @@ GIT_EXTERN(int) git_buffer_resize(git_buffer *buffer, size_t want_size); * @param buffer The buffer to set * @param data The data to copy into the buffer * @param datalen The length of the data to copy into the buffer - * @return 0 on success, negative error code on allocation failure + * @return 0 on success, -1 on allocation failure */ -GIT_EXTERN(int) git_buffer_copy( - git_buffer *buffer, const void *data, size_t datalen); +GIT_EXTERN(int) git_buf_set( + git_buf *buffer, const void *data, size_t datalen); GIT_END_DECL diff --git a/include/git2/filter.h b/include/git2/filter.h index 649ed97cf..f96b6766b 100644 --- a/include/git2/filter.h +++ b/include/git2/filter.h @@ -88,16 +88,17 @@ GIT_EXTERN(int) git_filter_list_load( /** * Apply filter list to a data buffer. * - * See `git2/buffer.h` for background on `git_buffer` objects. + * See `git2/buffer.h` for background on `git_buf` objects. * - * If the `in` buffer refers to data managed by libgit2 - * (i.e. `in->available` is not zero), then it will be overwritten when - * applying the filters. If not, then it will be left untouched. + * If the `in` buffer holds data allocated by libgit2 (i.e. `in->asize` is + * not zero), then it will be overwritten when applying the filters. If + * not, then it will be left untouched. * * If there are no filters to apply (or `filters` is NULL), then the `out` - * buffer will reference the `in` buffer data (with `available` set to - * zero) instead of allocating data. This keeps allocations to a minimum, - * but it means you have to be careful about freeing the `in` data. + * buffer will reference the `in` buffer data (with `asize` set to zero) + * instead of allocating data. This keeps allocations to a minimum, but + * it means you have to be careful about freeing the `in` data since `out` + * may be pointing to it! * * @param out Buffer to store the result of the filtering * @param filters A loaded git_filter_list (or NULL) @@ -105,15 +106,15 @@ GIT_EXTERN(int) git_filter_list_load( * @return 0 on success, an error code otherwise */ GIT_EXTERN(int) git_filter_list_apply_to_data( - git_buffer *out, + git_buf *out, git_filter_list *filters, - git_buffer *in); + git_buf *in); /** * Apply filter list to the contents of a file on disk */ GIT_EXTERN(int) git_filter_list_apply_to_file( - git_buffer *out, + git_buf *out, git_filter_list *filters, git_repository *repo, const char *path); @@ -122,7 +123,7 @@ GIT_EXTERN(int) git_filter_list_apply_to_file( * Apply filter list to the contents of a blob */ GIT_EXTERN(int) git_filter_list_apply_to_blob( - git_buffer *out, + git_buf *out, git_filter_list *filters, git_blob *blob); diff --git a/include/git2/sys/filter.h b/include/git2/sys/filter.h index c35fe55f6..36e97fe91 100644 --- a/include/git2/sys/filter.h +++ b/include/git2/sys/filter.h @@ -4,8 +4,8 @@ * This file is part of libgit2, distributed under the GNU GPL v2 with * a Linking Exception. For full terms see the included COPYING file. */ -#ifndef INCLUDE_sys_git_config_backend_h__ -#define INCLUDE_sys_git_config_backend_h__ +#ifndef INCLUDE_sys_git_filter_h__ +#define INCLUDE_sys_git_filter_h__ #include "git2/filter.h" @@ -117,19 +117,19 @@ typedef void (*git_filter_shutdown_fn)(git_filter *self); * Callback to decide if a given source needs this filter */ typedef int (*git_filter_check_fn)( - git_filter *self, - void **payload, /* points to NULL ptr on entry, may be set */ + git_filter *self, + void **payload, /* points to NULL ptr on entry, may be set */ const git_filter_source *src, - const char **attr_values); + const char **attr_values); /** * Callback to actually perform the data filtering */ typedef int (*git_filter_apply_fn)( - git_filter *self, - void **payload, /* may be read and/or set */ - git_buffer *to, - const git_buffer *from, + git_filter *self, + void **payload, /* may be read and/or set */ + git_buf *to, + const git_buf *from, const git_filter_source *src); /** diff --git a/src/blob.c b/src/blob.c index 97fd6f70d..e18db4dfc 100644 --- a/src/blob.c +++ b/src/blob.c @@ -111,7 +111,7 @@ static int write_file_filtered( git_filter_list *fl) { int error; - git_buffer tgt = GIT_BUFFER_INIT; + git_buf tgt = GIT_BUF_INIT; error = git_filter_list_apply_to_file(&tgt, fl, NULL, full_path); @@ -122,7 +122,7 @@ static int write_file_filtered( error = git_odb_write(oid, odb, tgt.ptr, tgt.size, GIT_OBJ_BLOB); } - git_buffer_free(&tgt); + git_buf_free(&tgt); return error; } @@ -329,7 +329,7 @@ int git_blob_is_binary(git_blob *blob) } int git_blob_filtered_content( - git_buffer *out, + git_buf *out, git_blob *blob, const char *path, int check_for_binary_data) diff --git a/src/buffer.c b/src/buffer.c index 07725b9cc..f8d47d928 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -32,7 +32,8 @@ void git_buf_init(git_buf *buf, size_t initial_size) git_buf_grow(buf, initial_size); } -int git_buf_try_grow(git_buf *buf, size_t target_size, bool mark_oom) +int git_buf_try_grow( + git_buf *buf, size_t target_size, bool mark_oom, bool preserve_external) { char *new_ptr; size_t new_size; @@ -40,6 +41,9 @@ int git_buf_try_grow(git_buf *buf, size_t target_size, bool mark_oom) if (buf->ptr == git_buf__oom) return -1; + if (!target_size) + target_size = buf->size; + if (target_size <= buf->asize) return 0; @@ -67,6 +71,9 @@ int git_buf_try_grow(git_buf *buf, size_t target_size, bool mark_oom) return -1; } + if (preserve_external && !buf->asize && buf->ptr != NULL && buf->size > 0) + memcpy(new_ptr, buf->ptr, min(buf->size, new_size)); + buf->asize = new_size; buf->ptr = new_ptr; @@ -78,11 +85,16 @@ int git_buf_try_grow(git_buf *buf, size_t target_size, bool mark_oom) return 0; } +int git_buf_grow(git_buf *buffer, size_t target_size) +{ + return git_buf_try_grow(buffer, target_size, true, true); +} + void git_buf_free(git_buf *buf) { if (!buf) return; - if (buf->ptr != git_buf__initbuf && buf->ptr != git_buf__oom) + if (buf->asize > 0 && buf->ptr != NULL && buf->ptr != git_buf__oom) git__free(buf->ptr); git_buf_init(buf, 0); @@ -91,11 +103,15 @@ void git_buf_free(git_buf *buf) void git_buf_clear(git_buf *buf) { buf->size = 0; + + if (!buf->ptr) + buf->ptr = git_buf__initbuf; + if (buf->asize > 0) buf->ptr[0] = '\0'; } -int git_buf_set(git_buf *buf, const char *data, size_t len) +int git_buf_set(git_buf *buf, const void *data, size_t len) { if (len == 0 || data == NULL) { git_buf_clear(buf); @@ -485,74 +501,3 @@ int git_buf_splice( buf->ptr[buf->size] = '\0'; return 0; } - -/* - * Public buffers API - */ - -void git_buffer_free(git_buffer *buffer) -{ - if (!buffer) - return; - - if (buffer->ptr != NULL && buffer->available > 0) - git__free(buffer->ptr); - - git__memzero(buffer, sizeof(*buffer)); -} - -static int git_buffer__resize( - git_buffer *buffer, size_t want_size, int preserve_data) -{ - int non_allocated_buffer = 0; - char *new_ptr; - - assert(buffer); - - /* check if buffer->ptr points to memory owned elsewhere */ - non_allocated_buffer = (buffer->ptr != NULL && buffer->available == 0); - - if (non_allocated_buffer && !want_size) - want_size = buffer->size; - - if (buffer->available >= want_size) - return 0; - - if (non_allocated_buffer) { - new_ptr = NULL; - if (want_size < buffer->size) - want_size = buffer->size; - } else { - new_ptr = buffer->ptr; - } - - want_size = (want_size + 7) & ~7; /* round up to multiple of 8 */ - - new_ptr = git__realloc(new_ptr, want_size); - GITERR_CHECK_ALLOC(new_ptr); - - if (non_allocated_buffer && preserve_data) - memcpy(new_ptr, buffer->ptr, buffer->size); - - buffer->ptr = new_ptr; - buffer->available = want_size; - - return 0; -} - -int git_buffer_resize(git_buffer *buffer, size_t want_size) -{ - return git_buffer__resize(buffer, want_size, true); -} - -int git_buffer_copy( - git_buffer *buffer, const void *data, size_t datalen) -{ - if (git_buffer__resize(buffer, datalen + 1, false) < 0) - return -1; - memcpy(buffer->ptr, data, datalen); - buffer->ptr[datalen] = '\0'; - buffer->size = datalen; - return 0; -} - diff --git a/src/buffer.h b/src/buffer.h index e07f29131..4ca9d4d94 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -12,16 +12,23 @@ #include "git2/buffer.h" #include -typedef struct { - char *ptr; - size_t asize, size; -} git_buf; +/* typedef struct { + * char *ptr; + * size_t asize, size; + * } git_buf; + */ extern char git_buf__initbuf[]; extern char git_buf__oom[]; +/* Use to initialize buffer structure when git_buf is on stack */ #define GIT_BUF_INIT { git_buf__initbuf, 0, 0 } +GIT_INLINE(bool) git_buf_is_allocated(const git_buf *buf) +{ + return (buf->ptr != NULL && buf->asize > 0); +} + /** * Initialize a git_buf structure. * @@ -33,27 +40,16 @@ extern void git_buf_init(git_buf *buf, size_t initial_size); /** * Attempt to grow the buffer to hold at least `target_size` bytes. * - * If the allocation fails, this will return an error. If mark_oom is true, + * If the allocation fails, this will return an error. If `mark_oom` is true, * this will mark the buffer as invalid for future operations; if false, * existing buffer content will be preserved, but calling code must handle - * that buffer was not expanded. + * that buffer was not expanded. If `preserve_external` is true, then any + * existing data pointed to be `ptr` even if `asize` is zero will be copied + * into the newly allocated buffer. */ -extern int git_buf_try_grow(git_buf *buf, size_t target_size, bool mark_oom); +extern int git_buf_try_grow( + git_buf *buf, size_t target_size, bool mark_oom, bool preserve_external); -/** - * Grow the buffer to hold at least `target_size` bytes. - * - * If the allocation fails, this will return an error and the buffer will be - * marked as invalid for future operations, invaliding contents. - * - * @return 0 on success or -1 on failure - */ -GIT_INLINE(int) git_buf_grow(git_buf *buf, size_t target_size) -{ - return git_buf_try_grow(buf, target_size, true); -} - -extern void git_buf_free(git_buf *buf); extern void git_buf_swap(git_buf *buf_a, git_buf *buf_b); extern char *git_buf_detach(git_buf *buf); extern void git_buf_attach(git_buf *buf, char *ptr, size_t asize); @@ -82,7 +78,6 @@ GIT_INLINE(bool) git_buf_oom(const git_buf *buf) * return code of these functions and call them in a series then just call * git_buf_oom at the end. */ -int git_buf_set(git_buf *buf, const char *data, size_t len); int git_buf_sets(git_buf *buf, const char *string); int git_buf_putc(git_buf *buf, char c); int git_buf_put(git_buf *buf, const char *data, size_t len); @@ -175,30 +170,4 @@ int git_buf_splice( const char *data, size_t nb_to_insert); - -GIT_INLINE(bool) git_buffer_is_allocated(const git_buffer *buffer) -{ - return (buffer->ptr != NULL && buffer->available > 0); -} - -#define GIT_BUF_FROM_BUFFER(buffer) \ - { (buffer)->ptr, (buffer)->available, (buffer)->size } - -GIT_INLINE(void) git_buf_from_buffer(git_buf *buf, const git_buffer *buffer) -{ - buf->ptr = buffer->ptr; - buf->size = buffer->size; - buf->asize = buffer->available; -} - -#define GIT_BUFFER_FROM_BUF(buf) \ - { (buf)->ptr, (buf)->size, (buf)->asize } - -GIT_INLINE(void) git_buffer_from_buf(git_buffer *buffer, const git_buf *buf) -{ - buffer->ptr = buf->ptr; - buffer->size = buf->size; - buffer->available = buf->asize; -} - #endif diff --git a/src/checkout.c b/src/checkout.c index 7e79c9b5e..140544c77 100644 --- a/src/checkout.c +++ b/src/checkout.c @@ -678,20 +678,19 @@ fail: static int buffer_to_file( struct stat *st, - git_buffer *buffer, + git_buf *buf, const char *path, mode_t dir_mode, int file_open_flags, mode_t file_mode) { int error; - git_buf buf = GIT_BUF_FROM_BUFFER(buffer); if ((error = git_futils_mkpath2file(path, dir_mode)) < 0) return error; if ((error = git_futils_writebuffer( - &buf, path, file_open_flags, file_mode)) < 0) + buf, path, file_open_flags, file_mode)) < 0) return error; if (st != NULL && (error = p_stat(path, st)) < 0) @@ -713,7 +712,7 @@ static int blob_content_to_file( { int error = 0; mode_t file_mode = opts->file_mode ? opts->file_mode : entry_filemode; - git_buffer out = GIT_BUFFER_INIT; + git_buf out = GIT_BUF_INIT; git_filter_list *fl = NULL; if (!opts->disable_filters && !git_blob_is_binary(blob)) @@ -731,7 +730,7 @@ static int blob_content_to_file( st->st_mode = entry_filemode; - git_buffer_free(&out); + git_buf_free(&out); } return error; diff --git a/src/config_file.c b/src/config_file.c index bd4fa7471..d0910a26c 100644 --- a/src/config_file.c +++ b/src/config_file.c @@ -293,6 +293,8 @@ static int config_iterator_new( diskfile_backend *b = (diskfile_backend *)backend; git_config_file_iter *it = git__calloc(1, sizeof(git_config_file_iter)); + GIT_UNUSED(b); + GITERR_CHECK_ALLOC(it); it->parent.backend = backend; diff --git a/src/crlf.c b/src/crlf.c index f61a870da..bde85ca06 100644 --- a/src/crlf.c +++ b/src/crlf.c @@ -119,15 +119,12 @@ static int has_cr_in_index(const git_filter_source *src) static int crlf_apply_to_odb( struct crlf_attrs *ca, - git_buffer *to, - const git_buffer *from, + git_buf *to, + const git_buf *from, const git_filter_source *src) { - const git_buf from_buf = GIT_BUF_FROM_BUFFER(from); - git_buf to_buf = GIT_BUF_FROM_BUFFER(to); - /* Empty file? Nothing to do */ - if (!git_buf_len(&from_buf)) + if (!git_buf_len(from)) return 0; /* Heuristics to see if we can skip the conversion. @@ -137,7 +134,7 @@ static int crlf_apply_to_odb( git_buf_text_stats stats; /* Check heuristics for binary vs text... */ - if (git_buf_text_gather_stats(&stats, &from_buf, false)) + if (git_buf_text_gather_stats(&stats, from, false)) return -1; /* @@ -162,13 +159,7 @@ static int crlf_apply_to_odb( } /* Actually drop the carriage returns */ - if (git_buf_text_crlf_to_lf(&to_buf, &from_buf) < 0) - return -1; - - /* Overwrite "to" buffer in case data was resized */ - git_buffer_from_buf(to, &to_buf); - - return 0; + return git_buf_text_crlf_to_lf(to, from); } static const char *line_ending(struct crlf_attrs *ca) @@ -210,14 +201,12 @@ line_ending_error: } static int crlf_apply_to_workdir( - struct crlf_attrs *ca, git_buffer *to, const git_buffer *from) + struct crlf_attrs *ca, git_buf *to, const git_buf *from) { - const git_buf from_buf = GIT_BUF_FROM_BUFFER(from); - git_buf to_buf = GIT_BUF_FROM_BUFFER(to); const char *workdir_ending = NULL; /* Empty file? Nothing to do. */ - if (git_buf_len(&from_buf) == 0) + if (git_buf_len(from) == 0) return 0; /* Determine proper line ending */ @@ -229,22 +218,19 @@ static int crlf_apply_to_workdir( if (ca->crlf_action == GIT_CRLF_GUESS && ca->auto_crlf) return GIT_ENOTFOUND; - if (git_buf_find(&from_buf, '\r') < 0) + if (git_buf_find(from, '\r') < 0) return GIT_ENOTFOUND; - if (git_buf_text_crlf_to_lf(&to_buf, &from_buf) < 0) + if (git_buf_text_crlf_to_lf(to, from) < 0) return -1; } else { /* only other supported option is lf->crlf conversion */ assert(!strcmp("\r\n", workdir_ending)); - if (git_buf_text_lf_to_crlf(&to_buf, &from_buf) < 0) + if (git_buf_text_lf_to_crlf(to, from) < 0) return -1; } - /* Overwrite "to" buffer in case data was resized */ - git_buffer_from_buf(to, &to_buf); - return 0; } @@ -297,10 +283,10 @@ static int crlf_check( } static int crlf_apply( - git_filter *self, - void **payload, /* may be read and/or set */ - git_buffer *to, - const git_buffer *from, + git_filter *self, + void **payload, /* may be read and/or set */ + git_buf *to, + const git_buf *from, const git_filter_source *src) { /* initialize payload in case `check` was bypassed */ diff --git a/src/diff_file.c b/src/diff_file.c index d02787c75..5939ee8b8 100644 --- a/src/diff_file.c +++ b/src/diff_file.c @@ -327,11 +327,11 @@ static int diff_file_content_load_workdir_file( } if (!(error = git_futils_readbuffer_fd(&raw, fd, (size_t)fc->file->size))) { - git_buffer in = GIT_BUFFER_FROM_BUF(&raw), out = GIT_BUFFER_INIT; + git_buf out = GIT_BUF_INIT; - error = git_filter_list_apply_to_data(&out, fl, &in); + error = git_filter_list_apply_to_data(&out, fl, &raw); - git_buffer_free(&in); + git_buf_free(&raw); if (!error) { fc->map.len = out.size; diff --git a/src/filter.c b/src/filter.c index f20611471..08467a631 100644 --- a/src/filter.c +++ b/src/filter.c @@ -549,23 +549,28 @@ int git_filter_list_push( } static int filter_list_out_buffer_from_raw( - git_buffer *out, const void *ptr, size_t size) + git_buf *out, const void *ptr, size_t size) { - if (git_buffer_is_allocated(out)) - git_buffer_free(out); + if (git_buf_is_allocated(out)) + git_buf_free(out); + + if (!size) { + git_buf_init(out, 0); + } else { + out->ptr = (char *)ptr; + out->asize = 0; + out->size = size; + } - out->ptr = (char *)ptr; - out->size = size; - out->available = 0; return 0; } int git_filter_list_apply_to_data( - git_buffer *tgt, git_filter_list *fl, git_buffer *src) + git_buf *tgt, git_filter_list *fl, git_buf *src) { int error = 0; uint32_t i; - git_buffer *dbuffer[2], local = GIT_BUFFER_INIT; + git_buf *dbuffer[2], local = GIT_BUF_INIT; unsigned int si = 0; if (!fl) @@ -575,8 +580,8 @@ int git_filter_list_apply_to_data( dbuffer[1] = tgt; /* if `src` buffer is reallocable, then use it, otherwise copy it */ - if (!git_buffer_is_allocated(src)) { - if (git_buffer_copy(&local, src->ptr, src->size) < 0) + if (!git_buf_is_allocated(src)) { + if (git_buf_set(&local, src->ptr, src->size) < 0) return -1; dbuffer[0] = &local; } @@ -610,19 +615,16 @@ int git_filter_list_apply_to_data( } /* Ensure that the output ends up in dbuffer[1] (i.e. the dest) */ - if (si != 1) { - git_buffer sw = *dbuffer[1]; - *dbuffer[1] = *dbuffer[0]; - *dbuffer[0] = sw; - } + if (si != 1) + git_buf_swap(dbuffer[0], dbuffer[1]); - git_buffer_free(&local); /* don't leak if we allocated locally */ + git_buf_free(&local); /* don't leak if we allocated locally */ return 0; } int git_filter_list_apply_to_file( - git_buffer *out, + git_buf *out, git_filter_list *filters, git_repository *repo, const char *path) @@ -634,11 +636,9 @@ int git_filter_list_apply_to_file( if (!(error = git_path_join_unrooted(&abspath, path, base, NULL)) && !(error = git_futils_readbuffer(&raw, abspath.ptr))) { - git_buffer in = GIT_BUFFER_FROM_BUF(&raw); + error = git_filter_list_apply_to_data(out, filters, &raw); - error = git_filter_list_apply_to_data(out, filters, &in); - - git_buffer_free(&in); + git_buf_free(&raw); } git_buf_free(&abspath); @@ -646,12 +646,12 @@ int git_filter_list_apply_to_file( } int git_filter_list_apply_to_blob( - git_buffer *out, + git_buf *out, git_filter_list *filters, git_blob *blob) { - git_buffer in = { - (char *)git_blob_rawcontent(blob), git_blob_rawsize(blob), 0 + git_buf in = { + (char *)git_blob_rawcontent(blob), 0, git_blob_rawsize(blob) }; if (filters) diff --git a/src/ident.c b/src/ident.c index aedb973f9..3ea949859 100644 --- a/src/ident.c +++ b/src/ident.c @@ -9,7 +9,7 @@ #include "filter.h" #include "buffer.h" -int ident_find_id( +static int ident_find_id( const char **id_start, const char **id_end, const char *start, size_t len) { const char *found; @@ -36,12 +36,11 @@ int ident_find_id( } static int ident_insert_id( - git_buffer *to, const git_buffer *from, const git_filter_source *src) + git_buf *to, const git_buf *from, const git_filter_source *src) { char oid[GIT_OID_HEXSZ+1]; const char *id_start, *id_end, *from_end = from->ptr + from->size; size_t need_size; - git_buf to_buf = GIT_BUF_FROM_BUFFER(to); /* replace $Id$ with blob id */ @@ -57,28 +56,23 @@ static int ident_insert_id( 5 /* "$Id: " */ + GIT_OID_HEXSZ + 1 /* "$" */ + (size_t)(from_end - id_end); - if (git_buf_grow(&to_buf, need_size) < 0) + if (git_buf_grow(to, need_size) < 0) return -1; - git_buf_set(&to_buf, from->ptr, (size_t)(id_start - from->ptr)); - git_buf_put(&to_buf, "$Id: ", 5); - git_buf_put(&to_buf, oid, GIT_OID_HEXSZ); - git_buf_putc(&to_buf, '$'); - git_buf_put(&to_buf, id_end, (size_t)(from_end - id_end)); + git_buf_set(to, from->ptr, (size_t)(id_start - from->ptr)); + git_buf_put(to, "$Id: ", 5); + git_buf_put(to, oid, GIT_OID_HEXSZ); + git_buf_putc(to, '$'); + git_buf_put(to, id_end, (size_t)(from_end - id_end)); - if (git_buf_oom(&to_buf)) - return -1; - - git_buffer_from_buf(to, &to_buf); - return 0; + return git_buf_oom(to) ? -1 : 0; } static int ident_remove_id( - git_buffer *to, const git_buffer *from) + git_buf *to, const git_buf *from) { const char *id_start, *id_end, *from_end = from->ptr + from->size; size_t need_size; - git_buf to_buf = GIT_BUF_FROM_BUFFER(to); if (ident_find_id(&id_start, &id_end, from->ptr, from->size) < 0) return GIT_ENOTFOUND; @@ -86,25 +80,21 @@ static int ident_remove_id( need_size = (size_t)(id_start - from->ptr) + 4 /* "$Id$" */ + (size_t)(from_end - id_end); - if (git_buf_grow(&to_buf, need_size) < 0) + if (git_buf_grow(to, need_size) < 0) return -1; - git_buf_set(&to_buf, from->ptr, (size_t)(id_start - from->ptr)); - git_buf_put(&to_buf, "$Id$", 4); - git_buf_put(&to_buf, id_end, (size_t)(from_end - id_end)); + git_buf_set(to, from->ptr, (size_t)(id_start - from->ptr)); + git_buf_put(to, "$Id$", 4); + git_buf_put(to, id_end, (size_t)(from_end - id_end)); - if (git_buf_oom(&to_buf)) - return -1; - - git_buffer_from_buf(to, &to_buf); - return 0; + return git_buf_oom(to) ? -1 : 0; } static int ident_apply( - git_filter *self, - void **payload, - git_buffer *to, - const git_buffer *from, + git_filter *self, + void **payload, + git_buf *to, + const git_buf *from, const git_filter_source *src) { GIT_UNUSED(self); GIT_UNUSED(payload); diff --git a/src/odb.c b/src/odb.c index b71b038bf..eef9748ca 100644 --- a/src/odb.c +++ b/src/odb.c @@ -192,16 +192,16 @@ int git_odb__hashfd_filtered( */ if (!(error = git_futils_readbuffer_fd(&raw, fd, size))) { - git_buffer pre = GIT_BUFFER_FROM_BUF(&raw), post = GIT_BUFFER_INIT; + git_buf post = GIT_BUF_INIT; - error = git_filter_list_apply_to_data(&post, fl, &pre); + error = git_filter_list_apply_to_data(&post, fl, &raw); - git_buffer_free(&pre); + git_buf_free(&raw); if (!error) error = git_odb_hash(out, post.ptr, post.size, type); - git_buffer_free(&post); + git_buf_free(&post); } return error; diff --git a/src/path.c b/src/path.c index 56b0b49ca..42b3d6f3e 100644 --- a/src/path.c +++ b/src/path.c @@ -565,7 +565,7 @@ static bool _check_dir_contents( size_t sub_size = strlen(sub); /* leave base valid even if we could not make space for subdir */ - if (git_buf_try_grow(dir, dir_size + sub_size + 2, false) < 0) + if (git_buf_try_grow(dir, dir_size + sub_size + 2, false, false) < 0) return false; /* save excursion */ diff --git a/src/util.c b/src/util.c index d0c326ae5..151782346 100644 --- a/src/util.c +++ b/src/util.c @@ -679,6 +679,9 @@ size_t git__unescape(char *str) { char *scan, *pos = str; + if (!str) + return 0; + for (scan = str; *scan; pos++, scan++) { if (*scan == '\\' && *(scan + 1) != '\0') scan++; /* skip '\' but include next char */ diff --git a/tests-clar/filter/blob.c b/tests-clar/filter/blob.c index c265ed67a..916721e12 100644 --- a/tests-clar/filter/blob.c +++ b/tests-clar/filter/blob.c @@ -23,7 +23,7 @@ void test_filter_blob__cleanup(void) void test_filter_blob__all_crlf(void) { git_blob *blob; - git_buffer buf = GIT_BUFFER_INIT; + git_buf buf = { 0 }; cl_git_pass(git_revparse_single( (git_object **)&blob, g_repo, "a9a2e891")); /* all-crlf */ @@ -43,7 +43,7 @@ void test_filter_blob__all_crlf(void) cl_assert_equal_s(ALL_CRLF_TEXT_AS_LF, buf.ptr); - git_buffer_free(&buf); + git_buf_free(&buf); git_blob_free(blob); } @@ -51,7 +51,7 @@ void test_filter_blob__ident(void) { git_oid id; git_blob *blob; - git_buffer buf = GIT_BUFFER_INIT; + git_buf buf = { 0 }; cl_git_mkfile("crlf/test.ident", "Some text\n$Id$\nGoes there\n"); cl_git_pass(git_blob_create_fromworkdir(&id, g_repo, "test.ident")); @@ -78,4 +78,7 @@ void test_filter_blob__ident(void) cl_assert_equal_s( "Some text\n$Id: 3164f585d548ac68027d22b104f2d8100b2b6845$\nGoes there\n", buf.ptr); + git_buf_free(&buf); + git_blob_free(blob); + } diff --git a/tests-clar/filter/crlf.c b/tests-clar/filter/crlf.c index 098a85d4c..ccd7ef450 100644 --- a/tests-clar/filter/crlf.c +++ b/tests-clar/filter/crlf.c @@ -20,7 +20,7 @@ void test_filter_crlf__to_worktree(void) { git_filter_list *fl; git_filter *crlf; - git_buffer in = GIT_BUFFER_INIT, out = GIT_BUFFER_INIT; + git_buf in = { 0 }, out = { 0 }; { git_config *cfg; @@ -48,14 +48,14 @@ void test_filter_crlf__to_worktree(void) #endif git_filter_list_free(fl); - git_buffer_free(&out); + git_buf_free(&out); } void test_filter_crlf__to_odb(void) { git_filter_list *fl; git_filter *crlf; - git_buffer in = GIT_BUFFER_INIT, out = GIT_BUFFER_INIT; + git_buf in = { 0 }, out = { 0 }; { git_config *cfg; @@ -79,5 +79,5 @@ void test_filter_crlf__to_odb(void) cl_assert_equal_s("Some text\nRight here\n", out.ptr); git_filter_list_free(fl); - git_buffer_free(&out); + git_buf_free(&out); } diff --git a/tests-clar/filter/ident.c b/tests-clar/filter/ident.c index 55774fbdd..2c8e6abea 100644 --- a/tests-clar/filter/ident.c +++ b/tests-clar/filter/ident.c @@ -20,7 +20,7 @@ static void add_blob_and_filter( { git_oid id; git_blob *blob; - git_buffer out = GIT_BUFFER_INIT; + git_buf out = { 0 }; cl_git_mkfile("crlf/identtest", data); cl_git_pass(git_blob_create_fromworkdir(&id, g_repo, "identtest")); @@ -31,7 +31,7 @@ static void add_blob_and_filter( cl_assert_equal_s(expected, out.ptr); git_blob_free(blob); - git_buffer_free(&out); + git_buf_free(&out); } void test_filter_ident__to_worktree(void) diff --git a/tests-clar/object/blob/filter.c b/tests-clar/object/blob/filter.c index 1e82b69cd..6dc7800db 100644 --- a/tests-clar/object/blob/filter.c +++ b/tests-clar/object/blob/filter.c @@ -104,7 +104,7 @@ void test_object_blob_filter__to_odb(void) git_config *cfg; int i; git_blob *blob; - git_buffer out = GIT_BUFFER_INIT; + git_buf out = GIT_BUF_INIT; cl_git_pass(git_repository_config(&cfg, g_repo)); cl_assert(cfg); @@ -129,7 +129,7 @@ void test_object_blob_filter__to_odb(void) } git_filter_list_free(fl); - git_buffer_free(&out); + git_buf_free(&out); git_config_free(cfg); } From 0e32635fcf9a874fe66f871e88c0bbc0511544f1 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Thu, 12 Sep 2013 14:47:15 -0700 Subject: [PATCH 13/25] Move binary check to CRLF filter itself Checkout should not reject binary files from filters, as a filter may actually wish to operate on binary files. The CRLF filter should reject binary files itself if it wishes to. Moreover, the CRLF filter requires this logic so that users can emulate the checkout data in their odb -> workdir filtering. Conflicts: src/checkout.c src/crlf.c --- src/checkout.c | 2 +- src/crlf.c | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/checkout.c b/src/checkout.c index 140544c77..0e9d11bff 100644 --- a/src/checkout.c +++ b/src/checkout.c @@ -715,7 +715,7 @@ static int blob_content_to_file( git_buf out = GIT_BUF_INIT; git_filter_list *fl = NULL; - if (!opts->disable_filters && !git_blob_is_binary(blob)) + if (!opts->disable_filters) error = git_filter_list_load( &fl, git_blob_owner(blob), blob, path, GIT_FILTER_TO_WORKTREE); diff --git a/src/crlf.c b/src/crlf.c index bde85ca06..6b1fe46a3 100644 --- a/src/crlf.c +++ b/src/crlf.c @@ -209,6 +209,10 @@ static int crlf_apply_to_workdir( if (git_buf_len(from) == 0) return 0; + /* Don't filter binary files */ + if (git_buf_text_is_binary(from)) + return GIT_ENOTFOUND; + /* Determine proper line ending */ workdir_ending = line_ending(ca); if (!workdir_ending) From b47349b8dc742943e063cb3d755b9db66203e3d9 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Thu, 12 Sep 2013 14:48:24 -0700 Subject: [PATCH 14/25] Port tests from PR 1683 This ports over some of the tests from https://github.com/libgit2/libgit2/pull/1683 by @yorah and @ethomson --- include/git2/buffer.h | 5 + include/git2/sys/filter.h | 13 ++ src/filter.c | 5 + tests-clar/filter/crlf.c | 20 +-- tests-clar/filter/crlf.h | 1 - tests-clar/filter/custom.c | 247 ++++++++++++++++++++++++++++++++ tests-clar/object/blob/filter.c | 90 ++++++------ 7 files changed, 325 insertions(+), 56 deletions(-) create mode 100644 tests-clar/filter/custom.c diff --git a/include/git2/buffer.h b/include/git2/buffer.h index ae8681f13..36a61e6c9 100644 --- a/include/git2/buffer.h +++ b/include/git2/buffer.h @@ -54,6 +54,11 @@ typedef struct { size_t asize, size; } git_buf; +/** + * Static initializer for git_buf from static buffer + */ +#define GIT_BUF_INIT_CONST(STR,LEN) { (char *)(STR), 0, (size_t)(LEN) } + /** * Free the memory referred to by the git_buf. * diff --git a/include/git2/sys/filter.h b/include/git2/sys/filter.h index 36e97fe91..9a6720a3e 100644 --- a/include/git2/sys/filter.h +++ b/include/git2/sys/filter.h @@ -59,6 +59,19 @@ GIT_EXTERN(int) git_filter_list_new( GIT_EXTERN(int) git_filter_list_push( git_filter_list *fl, git_filter *filter, void *payload); +/** + * Look up how many filters are in the list + * + * We will attempt to apply all of these filters to any data passed in, + * but note that the filter apply action still has the option of skipping + * data that is passed in (for example, the CRLF filter will skip data + * that appears to be binary). + * + * @param fl A filter list + * @return The number of filters in the list + */ +GIT_EXTERN(size_t) git_filter_list_length(const git_filter_list *fl); + /** * A filter source represents a file/blob to be processed */ diff --git a/src/filter.c b/src/filter.c index 08467a631..0375b8b0e 100644 --- a/src/filter.c +++ b/src/filter.c @@ -548,6 +548,11 @@ int git_filter_list_push( return 0; } +size_t git_filter_list_length(const git_filter_list *fl) +{ + return fl ? git_array_size(fl->filters) : 0; +} + static int filter_list_out_buffer_from_raw( git_buf *out, const void *ptr, size_t size) { diff --git a/tests-clar/filter/crlf.c b/tests-clar/filter/crlf.c index ccd7ef450..ece2e6e5e 100644 --- a/tests-clar/filter/crlf.c +++ b/tests-clar/filter/crlf.c @@ -5,10 +5,16 @@ static git_repository *g_repo = NULL; void test_filter_crlf__initialize(void) { + git_config *cfg; + g_repo = cl_git_sandbox_init("crlf"); cl_git_mkfile("crlf/.gitattributes", "*.txt text\n*.bin binary\n*.crlf text eol=crlf\n*.lf text eol=lf\n"); + + cl_git_pass(git_repository_config(&cfg, g_repo)); + cl_git_pass(git_config_set_string(cfg, "core.autocrlf", "true")); + git_config_free(cfg); } void test_filter_crlf__cleanup(void) @@ -22,13 +28,6 @@ void test_filter_crlf__to_worktree(void) git_filter *crlf; git_buf in = { 0 }, out = { 0 }; - { - git_config *cfg; - cl_git_pass(git_repository_config(&cfg, g_repo)); - cl_git_pass(git_config_set_string(cfg, "core.autocrlf", "true")); - git_config_free(cfg); - } - cl_git_pass(git_filter_list_new(&fl, g_repo, GIT_FILTER_TO_WORKTREE)); crlf = git_filter_lookup(GIT_FILTER_CRLF); @@ -57,13 +56,6 @@ void test_filter_crlf__to_odb(void) git_filter *crlf; git_buf in = { 0 }, out = { 0 }; - { - git_config *cfg; - cl_git_pass(git_repository_config(&cfg, g_repo)); - cl_git_pass(git_config_set_string(cfg, "core.autocrlf", "true")); - git_config_free(cfg); - } - cl_git_pass(git_filter_list_new(&fl, g_repo, GIT_FILTER_TO_ODB)); crlf = git_filter_lookup(GIT_FILTER_CRLF); diff --git a/tests-clar/filter/crlf.h b/tests-clar/filter/crlf.h index 8fadee950..9cb98ad4c 100644 --- a/tests-clar/filter/crlf.h +++ b/tests-clar/filter/crlf.h @@ -22,5 +22,4 @@ #define MORE_CRLF_TEXT_AS_LF "crlf\ncrlf\nlf\ncrlf\ncrlf\n" #define MORE_LF_TEXT_AS_LF "lf\nlf\ncrlf\nlf\nlf\n" - #endif diff --git a/tests-clar/filter/custom.c b/tests-clar/filter/custom.c new file mode 100644 index 000000000..4a2ff9fc4 --- /dev/null +++ b/tests-clar/filter/custom.c @@ -0,0 +1,247 @@ +#include "clar_libgit2.h" +#include "posix.h" +#include "blob.h" +#include "filter.h" +#include "buf_text.h" +#include "git2/sys/filter.h" +#include "git2/sys/repository.h" + +#define BITFLIP_FILTER_PRIORITY 20 +#define REVERSE_FILTER_PRIORITY 25 + +#define VERY_SECURE_ENCRYPTION(b) ((b) ^ 0xff) + +#ifdef GIT_WIN32 +# define NEWLINE "\r\n" +#else +# define NEWLINE "\n" +#endif + +static char workdir_data[] = + "some simple" NEWLINE + "data" NEWLINE + "that will be" NEWLINE + "trivially" NEWLINE + "scrambled." NEWLINE; + +/* Represents the data above scrambled (bits flipped) after \r\n -> \n + * conversion, then bytewise reversed + */ +static unsigned char bitflipped_and_reversed_data[] = + { 0xf5, 0xd1, 0x9b, 0x9a, 0x93, 0x9d, 0x92, 0x9e, 0x8d, 0x9c, 0x8c, + 0xf5, 0x86, 0x93, 0x93, 0x9e, 0x96, 0x89, 0x96, 0x8d, 0x8b, 0xf5, + 0x9a, 0x9d, 0xdf, 0x93, 0x93, 0x96, 0x88, 0xdf, 0x8b, 0x9e, 0x97, + 0x8b, 0xf5, 0x9e, 0x8b, 0x9e, 0x9b, 0xf5, 0x9a, 0x93, 0x8f, 0x92, + 0x96, 0x8c, 0xdf, 0x9a, 0x92, 0x90, 0x8c }; + +#define BITFLIPPED_AND_REVERSED_DATA_LEN 51 + +static git_repository *g_repo = NULL; + +static void register_custom_filters(void); + +void test_filter_custom__initialize(void) +{ + register_custom_filters(); + + g_repo = cl_git_sandbox_init("empty_standard_repo"); + + cl_git_mkfile( + "empty_standard_repo/.gitattributes", + "hero* bitflip reverse\n" + "herofile text\n" + "heroflip -reverse\n"); +} + +void test_filter_custom__cleanup(void) +{ + cl_git_sandbox_cleanup(); + g_repo = NULL; +} + +static int bitflip_filter_apply( + git_filter *self, + void **payload, + git_buf *to, + const git_buf *from, + const git_filter_source *source) +{ + const unsigned char *src = (const unsigned char *)from->ptr; + unsigned char *dst; + size_t i; + + GIT_UNUSED(self); GIT_UNUSED(payload); + + /* verify that attribute path match worked as expected */ + cl_assert_equal_i( + 0, git__strncmp("hero", git_filter_source_path(source), 4)); + + if (!from->size) + return 0; + + cl_git_pass(git_buf_grow(to, from->size)); + + dst = (unsigned char *)to->ptr; + + for (i = 0; i < from->size; i++) + dst[i] = VERY_SECURE_ENCRYPTION(src[i]); + + to->size = from->size; + + return 0; +} + +static void bitflip_filter_free(git_filter *f) +{ + git__free(f); +} + +static git_filter *create_bitflip_filter(void) +{ + git_filter *filter = git__calloc(1, sizeof(git_filter)); + cl_assert(filter); + + filter->version = GIT_FILTER_VERSION; + filter->attributes = "+bitflip"; + filter->shutdown = bitflip_filter_free; + filter->apply = bitflip_filter_apply; + + return filter; +} + + +static int reverse_filter_apply( + git_filter *self, + void **payload, + git_buf *to, + const git_buf *from, + const git_filter_source *source) +{ + const unsigned char *src = (const unsigned char *)from->ptr; + const unsigned char *end = src + from->size; + unsigned char *dst; + + GIT_UNUSED(self); GIT_UNUSED(payload); GIT_UNUSED(source); + + /* verify that attribute path match worked as expected */ + cl_assert_equal_i( + 0, git__strncmp("hero", git_filter_source_path(source), 4)); + + if (!from->size) + return 0; + + cl_git_pass(git_buf_grow(to, from->size)); + + dst = (unsigned char *)to->ptr + from->size - 1; + + while (src < end) + *dst-- = *src++; + + to->size = from->size; + + return 0; +} + +static void reverse_filter_free(git_filter *f) +{ + git__free(f); +} + +static git_filter *create_reverse_filter(void) +{ + git_filter *filter = git__calloc(1, sizeof(git_filter)); + cl_assert(filter); + + filter->version = GIT_FILTER_VERSION; + filter->attributes = "+reverse"; + filter->shutdown = reverse_filter_free; + filter->apply = reverse_filter_apply; + + return filter; +} + +static void register_custom_filters(void) +{ + static int filters_registered = 0; + + if (!filters_registered) { + cl_git_pass(git_filter_register( + "bitflip", create_bitflip_filter(), BITFLIP_FILTER_PRIORITY)); + + cl_git_pass(git_filter_register( + "reverse", create_reverse_filter(), REVERSE_FILTER_PRIORITY)); + + filters_registered = 1; + } +} + + +void test_filter_custom__to_odb(void) +{ + git_filter_list *fl; + git_buf out = { 0 }; + git_buf in = GIT_BUF_INIT_CONST(workdir_data, strlen(workdir_data)); + + cl_git_pass(git_filter_list_load( + &fl, g_repo, NULL, "herofile", GIT_FILTER_TO_ODB)); + + cl_git_pass(git_filter_list_apply_to_data(&out, fl, &in)); + + cl_assert_equal_i(BITFLIPPED_AND_REVERSED_DATA_LEN, out.size); + + cl_assert_equal_i( + 0, memcmp(bitflipped_and_reversed_data, out.ptr, out.size)); + + git_filter_list_free(fl); + git_buf_free(&out); +} + +void test_filter_custom__to_workdir(void) +{ + git_filter_list *fl; + git_buf out = { 0 }; + git_buf in = GIT_BUF_INIT_CONST( + bitflipped_and_reversed_data, BITFLIPPED_AND_REVERSED_DATA_LEN); + + cl_git_pass(git_filter_list_load( + &fl, g_repo, NULL, "herofile", GIT_FILTER_TO_WORKTREE)); + + cl_git_pass(git_filter_list_apply_to_data(&out, fl, &in)); + + cl_assert_equal_i(strlen(workdir_data), out.size); + + cl_assert_equal_i( + 0, memcmp(workdir_data, out.ptr, out.size)); + + git_filter_list_free(fl); + git_buf_free(&out); +} + +void test_filter_custom__can_register_a_custom_filter_in_the_repository(void) +{ + git_filter_list *fl; + + cl_git_pass(git_filter_list_load( + &fl, g_repo, NULL, "herofile", GIT_FILTER_TO_WORKTREE)); + /* expect: bitflip, reverse, crlf */ + cl_assert_equal_sz(3, git_filter_list_length(fl)); + git_filter_list_free(fl); + + cl_git_pass(git_filter_list_load( + &fl, g_repo, NULL, "herocorp", GIT_FILTER_TO_WORKTREE)); + /* expect: bitflip, reverse */ + cl_assert_equal_sz(2, git_filter_list_length(fl)); + git_filter_list_free(fl); + + cl_git_pass(git_filter_list_load( + &fl, g_repo, NULL, "heroflip", GIT_FILTER_TO_WORKTREE)); + /* expect: bitflip (because of -reverse) */ + cl_assert_equal_sz(1, git_filter_list_length(fl)); + git_filter_list_free(fl); + + cl_git_pass(git_filter_list_load( + &fl, g_repo, NULL, "doesntapplytome", GIT_FILTER_TO_WORKTREE)); + /* expect: none */ + cl_assert_equal_sz(0, git_filter_list_length(fl)); + git_filter_list_free(fl); +} diff --git a/tests-clar/object/blob/filter.c b/tests-clar/object/blob/filter.c index 6dc7800db..0b2d6bf9e 100644 --- a/tests-clar/object/blob/filter.c +++ b/tests-clar/object/blob/filter.c @@ -4,9 +4,10 @@ #include "buf_text.h" static git_repository *g_repo = NULL; -#define NUM_TEST_OBJECTS 9 -static git_oid g_oids[NUM_TEST_OBJECTS]; -static const char *g_raw[NUM_TEST_OBJECTS] = { + +#define CRLF_NUM_TEST_OBJECTS 9 + +static const char *g_crlf_raw[CRLF_NUM_TEST_OBJECTS] = { "", "foo\nbar\n", "foo\rbar\r", @@ -17,19 +18,14 @@ static const char *g_raw[NUM_TEST_OBJECTS] = { "\xEF\xBB\xBF\xE3\x81\xBB\xE3\x81\x92\xE3\x81\xBB\xE3\x81\x92\r\n\xE3\x81\xBB\xE3\x81\x92\xE3\x81\xBB\xE3\x81\x92\r\n", "\xFE\xFF\x00T\x00h\x00i\x00s\x00!" }; -static git_off_t g_len[NUM_TEST_OBJECTS] = { -1, -1, -1, -1, -1, 17, -1, -1, 12 }; -static git_buf_text_stats g_stats[NUM_TEST_OBJECTS] = { - { 0, 0, 0, 0, 0, 0, 0 }, - { 0, 0, 0, 2, 0, 6, 0 }, - { 0, 0, 2, 0, 0, 6, 0 }, - { 0, 0, 2, 2, 2, 6, 0 }, - { 0, 0, 4, 4, 1, 31, 0 }, - { 0, 1, 1, 2, 1, 9, 5 }, - { GIT_BOM_UTF8, 0, 0, 1, 0, 16, 0 }, - { GIT_BOM_UTF8, 0, 2, 2, 2, 27, 0 }, - { GIT_BOM_UTF16_BE, 5, 0, 0, 0, 7, 5 }, + +static git_off_t g_crlf_raw_len[CRLF_NUM_TEST_OBJECTS] = { + -1, -1, -1, -1, -1, 17, -1, -1, 12 }; -static git_buf g_crlf_filtered[NUM_TEST_OBJECTS] = { + +static git_oid g_crlf_oids[CRLF_NUM_TEST_OBJECTS]; + +static git_buf g_crlf_filtered[CRLF_NUM_TEST_OBJECTS] = { { "", 0, 0 }, { "foo\nbar\n", 0, 8 }, { "foo\rbar\r", 0, 8 }, @@ -41,30 +37,36 @@ static git_buf g_crlf_filtered[NUM_TEST_OBJECTS] = { { "\xFE\xFF\x00T\x00h\x00i\x00s\x00!", 0, 12 } }; +static git_buf_text_stats g_crlf_filtered_stats[CRLF_NUM_TEST_OBJECTS] = { + { 0, 0, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 2, 0, 6, 0 }, + { 0, 0, 2, 0, 0, 6, 0 }, + { 0, 0, 2, 2, 2, 6, 0 }, + { 0, 0, 4, 4, 1, 31, 0 }, + { 0, 1, 1, 2, 1, 9, 5 }, + { GIT_BOM_UTF8, 0, 0, 1, 0, 16, 0 }, + { GIT_BOM_UTF8, 0, 2, 2, 2, 27, 0 }, + { GIT_BOM_UTF16_BE, 5, 0, 0, 0, 7, 5 }, +}; + void test_object_blob_filter__initialize(void) { int i; - cl_fixture_sandbox("empty_standard_repo"); - cl_git_pass(p_rename( - "empty_standard_repo/.gitted", "empty_standard_repo/.git")); - cl_git_pass(git_repository_open(&g_repo, "empty_standard_repo")); + g_repo = cl_git_sandbox_init("empty_standard_repo"); - for (i = 0; i < NUM_TEST_OBJECTS; i++) { - size_t len = (g_len[i] < 0) ? strlen(g_raw[i]) : (size_t)g_len[i]; - g_len[i] = (git_off_t)len; + for (i = 0; i < CRLF_NUM_TEST_OBJECTS; i++) { + if (g_crlf_raw_len[i] < 0) + g_crlf_raw_len[i] = strlen(g_crlf_raw[i]); - cl_git_pass( - git_blob_create_frombuffer(&g_oids[i], g_repo, g_raw[i], len) - ); + cl_git_pass(git_blob_create_frombuffer( + &g_crlf_oids[i], g_repo, g_crlf_raw[i], (size_t)g_crlf_raw_len[i])); } } void test_object_blob_filter__cleanup(void) { - git_repository_free(g_repo); - g_repo = NULL; - cl_fixture_cleanup("empty_standard_repo"); + cl_git_sandbox_cleanup(); } void test_object_blob_filter__unfiltered(void) @@ -72,10 +74,15 @@ void test_object_blob_filter__unfiltered(void) int i; git_blob *blob; - for (i = 0; i < NUM_TEST_OBJECTS; i++) { - cl_git_pass(git_blob_lookup(&blob, g_repo, &g_oids[i])); - cl_assert(g_len[i] == git_blob_rawsize(blob)); - cl_assert(memcmp(git_blob_rawcontent(blob), g_raw[i], (size_t)g_len[i]) == 0); + for (i = 0; i < CRLF_NUM_TEST_OBJECTS; i++) { + size_t raw_len = (size_t)g_crlf_raw_len[i]; + + cl_git_pass(git_blob_lookup(&blob, g_repo, &g_crlf_oids[i])); + + cl_assert_equal_sz(raw_len, (size_t)git_blob_rawsize(blob)); + cl_assert_equal_i( + 0, memcmp(g_crlf_raw[i], git_blob_rawcontent(blob), raw_len)); + git_blob_free(blob); } } @@ -87,11 +94,12 @@ void test_object_blob_filter__stats(void) git_buf buf = GIT_BUF_INIT; git_buf_text_stats stats; - for (i = 0; i < NUM_TEST_OBJECTS; i++) { - cl_git_pass(git_blob_lookup(&blob, g_repo, &g_oids[i])); + for (i = 0; i < CRLF_NUM_TEST_OBJECTS; i++) { + cl_git_pass(git_blob_lookup(&blob, g_repo, &g_crlf_oids[i])); cl_git_pass(git_blob__getbuf(&buf, blob)); git_buf_text_gather_stats(&stats, &buf, false); - cl_assert(memcmp(&g_stats[i], &stats, sizeof(stats)) == 0); + cl_assert_equal_i( + 0, memcmp(&g_crlf_filtered_stats[i], &stats, sizeof(stats))); git_blob_free(blob); } @@ -116,14 +124,15 @@ void test_object_blob_filter__to_odb(void) &fl, g_repo, NULL, "filename.txt", GIT_FILTER_TO_ODB)); cl_assert(fl != NULL); - for (i = 0; i < NUM_TEST_OBJECTS; i++) { - cl_git_pass(git_blob_lookup(&blob, g_repo, &g_oids[i])); + for (i = 0; i < CRLF_NUM_TEST_OBJECTS; i++) { + cl_git_pass(git_blob_lookup(&blob, g_repo, &g_crlf_oids[i])); cl_git_pass(git_filter_list_apply_to_blob(&out, fl, blob)); - cl_assert(!memcmp( - out.ptr, g_crlf_filtered[i].ptr, - min(out.size, g_crlf_filtered[i].size))); + cl_assert_equal_sz(g_crlf_filtered[i].size, out.size); + + cl_assert_equal_i( + 0, memcmp(out.ptr, g_crlf_filtered[i].ptr, out.size)); git_blob_free(blob); } @@ -132,4 +141,3 @@ void test_object_blob_filter__to_odb(void) git_buf_free(&out); git_config_free(cfg); } - From d5b1866cc363461c3ecc14412dcd26d2e4fa8b68 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Fri, 13 Sep 2013 09:26:26 -0700 Subject: [PATCH 15/25] Rearrange clar submodule cleanup code --- tests-clar/diff/submodules.c | 1 - tests-clar/status/submodules.c | 1 - tests-clar/submodule/status.c | 1 - tests-clar/submodule/submodule_helpers.c | 21 ++++++++++++--------- tests-clar/submodule/submodule_helpers.h | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tests-clar/diff/submodules.c b/tests-clar/diff/submodules.c index 9dcf8194e..167dedfc6 100644 --- a/tests-clar/diff/submodules.c +++ b/tests-clar/diff/submodules.c @@ -11,7 +11,6 @@ void test_diff_submodules__initialize(void) void test_diff_submodules__cleanup(void) { - cleanup_fixture_submodules(); } static void check_diff_patches_at_line( diff --git a/tests-clar/status/submodules.c b/tests-clar/status/submodules.c index 7bfef503f..ef2888f7d 100644 --- a/tests-clar/status/submodules.c +++ b/tests-clar/status/submodules.c @@ -13,7 +13,6 @@ void test_status_submodules__initialize(void) void test_status_submodules__cleanup(void) { - cleanup_fixture_submodules(); } void test_status_submodules__api(void) diff --git a/tests-clar/submodule/status.c b/tests-clar/submodule/status.c index 7b29ac288..f1227a575 100644 --- a/tests-clar/submodule/status.c +++ b/tests-clar/submodule/status.c @@ -14,7 +14,6 @@ void test_submodule_status__initialize(void) void test_submodule_status__cleanup(void) { - cleanup_fixture_submodules(); } void test_submodule_status__unchanged(void) diff --git a/tests-clar/submodule/submodule_helpers.c b/tests-clar/submodule/submodule_helpers.c index a7807522b..3e79c77fd 100644 --- a/tests-clar/submodule/submodule_helpers.c +++ b/tests-clar/submodule/submodule_helpers.c @@ -83,6 +83,14 @@ void rewrite_gitmodules(const char *workdir) git_buf_free(&path); } +static void cleanup_fixture_submodules(void *payload) +{ + cl_git_sandbox_cleanup(); /* either "submodules" or "submod2" */ + + if (payload) + cl_fixture_cleanup(payload); +} + git_repository *setup_fixture_submodules(void) { git_repository *repo = cl_git_sandbox_init("submodules"); @@ -92,6 +100,8 @@ git_repository *setup_fixture_submodules(void) rewrite_gitmodules(git_repository_workdir(repo)); p_rename("submodules/testrepo/.gitted", "submodules/testrepo/.git"); + cl_set_cleanup(cleanup_fixture_submodules, "testrepo.git"); + return repo; } @@ -106,14 +116,7 @@ git_repository *setup_fixture_submod2(void) p_rename("submod2/not-submodule/.gitted", "submod2/not-submodule/.git"); p_rename("submod2/not/.gitted", "submod2/not/.git"); + cl_set_cleanup(cleanup_fixture_submodules, "submod2_target"); + return repo; } - -void cleanup_fixture_submodules(void) -{ - cl_git_sandbox_cleanup(); - - /* just try to clean up both possible extras */ - cl_fixture_cleanup("testrepo.git"); - cl_fixture_cleanup("submod2_target"); -} diff --git a/tests-clar/submodule/submodule_helpers.h b/tests-clar/submodule/submodule_helpers.h index 1de15ca17..610c40720 100644 --- a/tests-clar/submodule/submodule_helpers.h +++ b/tests-clar/submodule/submodule_helpers.h @@ -1,5 +1,5 @@ extern void rewrite_gitmodules(const char *workdir); +/* these will automatically set a cleanup callback */ extern git_repository *setup_fixture_submodules(void); extern git_repository *setup_fixture_submod2(void); -extern void cleanup_fixture_submodules(void); From ad7417d7a13e910d548e3e07225ce8914cdb218e Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Fri, 13 Sep 2013 09:44:30 -0700 Subject: [PATCH 16/25] Make filter tests somewhat more robust The global and system config could interfere with the filter tests by imposing CRLF filtering where it was not anticipated. This better isolates the tests from the system settings. --- tests-clar/filter/blob.c | 2 +- tests-clar/filter/custom.c | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/tests-clar/filter/blob.c b/tests-clar/filter/blob.c index 916721e12..9600a9779 100644 --- a/tests-clar/filter/blob.c +++ b/tests-clar/filter/blob.c @@ -12,7 +12,7 @@ void test_filter_blob__initialize(void) "*.lf text eol=lf\n" "*.ident text ident\n" "*.identcrlf ident text eol=crlf\n" - "*.identlf ident text eol.lf\n"); + "*.identlf ident text eol=lf\n"); } void test_filter_blob__cleanup(void) diff --git a/tests-clar/filter/custom.c b/tests-clar/filter/custom.c index 4a2ff9fc4..a2752efa4 100644 --- a/tests-clar/filter/custom.c +++ b/tests-clar/filter/custom.c @@ -50,7 +50,8 @@ void test_filter_custom__initialize(void) "empty_standard_repo/.gitattributes", "hero* bitflip reverse\n" "herofile text\n" - "heroflip -reverse\n"); + "heroflip -reverse binary\n" + "*.bin binary\n"); } void test_filter_custom__cleanup(void) @@ -229,6 +230,15 @@ void test_filter_custom__can_register_a_custom_filter_in_the_repository(void) cl_git_pass(git_filter_list_load( &fl, g_repo, NULL, "herocorp", GIT_FILTER_TO_WORKTREE)); + /* expect: bitflip, reverse - possibly crlf depending on global config */ + { + size_t flen = git_filter_list_length(fl); + cl_assert(flen == 2 || flen == 3); + } + git_filter_list_free(fl); + + cl_git_pass(git_filter_list_load( + &fl, g_repo, NULL, "hero.bin", GIT_FILTER_TO_WORKTREE)); /* expect: bitflip, reverse */ cl_assert_equal_sz(2, git_filter_list_length(fl)); git_filter_list_free(fl); @@ -240,7 +250,7 @@ void test_filter_custom__can_register_a_custom_filter_in_the_repository(void) git_filter_list_free(fl); cl_git_pass(git_filter_list_load( - &fl, g_repo, NULL, "doesntapplytome", GIT_FILTER_TO_WORKTREE)); + &fl, g_repo, NULL, "doesntapplytome.bin", GIT_FILTER_TO_WORKTREE)); /* expect: none */ cl_assert_equal_sz(0, git_filter_list_length(fl)); git_filter_list_free(fl); From e399c7eee8dac02b7a79c6133ad4c761f05b7ba9 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Fri, 13 Sep 2013 09:50:05 -0700 Subject: [PATCH 17/25] Fix win32 warnings I wish MSVC understood that "const char **" is not a const ptr, but it a non-const pointer to an array of const ptrs. Does that seem like too much to ask. --- src/filter.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/filter.c b/src/filter.c index 0375b8b0e..62aefb420 100644 --- a/src/filter.c +++ b/src/filter.c @@ -391,7 +391,7 @@ static int filter_list_check_attributes( /* if no values were found but no matches are needed, it's okay! */ if (error == GIT_ENOTFOUND && !fdef->nmatches) { giterr_clear(); - git__free(strs); + git__free((void *)strs); return 0; } @@ -411,7 +411,7 @@ static int filter_list_check_attributes( } if (error) - git__free(strs); + git__free((void *)strs); else *out = strs; @@ -474,7 +474,7 @@ int git_filter_list_load( error = fdef->filter->check( fdef->filter, &payload, &src, values); - git__free(values); + git__free((void *)values); if (error == GIT_ENOTFOUND) error = 0; From 8427757f78b1f0b018b1ccfe424a4c39e89ea024 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Fri, 13 Sep 2013 12:32:45 -0700 Subject: [PATCH 18/25] Fixing up some win32 issues with autocrlf --- tests-clar/diff/rename.c | 2 ++ tests-clar/filter/crlf.c | 6 +----- tests-clar/status/renames.c | 4 +++- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests-clar/diff/rename.c b/tests-clar/diff/rename.c index b5a9935fd..9864c5896 100644 --- a/tests-clar/diff/rename.c +++ b/tests-clar/diff/rename.c @@ -7,6 +7,8 @@ static git_repository *g_repo = NULL; void test_diff_rename__initialize(void) { g_repo = cl_git_sandbox_init("renames"); + + cl_repo_set_bool(g_repo, "core.autocrlf", false); } void test_diff_rename__cleanup(void) diff --git a/tests-clar/filter/crlf.c b/tests-clar/filter/crlf.c index ece2e6e5e..c9fb9cd7f 100644 --- a/tests-clar/filter/crlf.c +++ b/tests-clar/filter/crlf.c @@ -5,16 +5,12 @@ static git_repository *g_repo = NULL; void test_filter_crlf__initialize(void) { - git_config *cfg; - g_repo = cl_git_sandbox_init("crlf"); cl_git_mkfile("crlf/.gitattributes", "*.txt text\n*.bin binary\n*.crlf text eol=crlf\n*.lf text eol=lf\n"); - cl_git_pass(git_repository_config(&cfg, g_repo)); - cl_git_pass(git_config_set_string(cfg, "core.autocrlf", "true")); - git_config_free(cfg); + cl_repo_set_bool(g_repo, "core.autocrlf", true); } void test_filter_crlf__cleanup(void) diff --git a/tests-clar/status/renames.c b/tests-clar/status/renames.c index d72e563bf..de84a574d 100644 --- a/tests-clar/status/renames.c +++ b/tests-clar/status/renames.c @@ -11,6 +11,8 @@ static git_repository *g_repo = NULL; void test_status_renames__initialize(void) { g_repo = cl_git_sandbox_init("renames"); + + cl_repo_set_bool(g_repo, "core.autocrlf", false); } void test_status_renames__cleanup(void) @@ -67,7 +69,7 @@ static void test_status( actual = git_status_byindex(status_list, i); expected = &expected_list[i]; - cl_assert_equal_i((int)expected->status, (int)actual->status); + cl_assert_equal_i_fmt(expected->status, actual->status, "%04x"); oldname = actual->head_to_index ? actual->head_to_index->old_file.path : actual->index_to_workdir ? actual->index_to_workdir->old_file.path : NULL; From fa9cc14880cb50ea626c4bb0fcf1b68acdd73186 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Fri, 13 Sep 2013 13:41:33 -0700 Subject: [PATCH 19/25] Fix cleanup issues with new tests --- tests-clar/revwalk/simplify.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests-clar/revwalk/simplify.c b/tests-clar/revwalk/simplify.c index c94952105..81c19d366 100644 --- a/tests-clar/revwalk/simplify.c +++ b/tests-clar/revwalk/simplify.c @@ -1,5 +1,10 @@ #include "clar_libgit2.h" +void test_revwalk_simplify__cleanup(void) +{ + cl_git_sandbox_cleanup(); +} + /* * a4a7dce [0] Merge branch 'master' into br2 |\ @@ -47,5 +52,4 @@ void test_revwalk_simplify__first_parent(void) cl_assert_equal_i(error, GIT_ITEROVER); git_revwalk_free(walk); - git_repository_free(repo); } From 13f36ffb9e1c4fb70b44a477d716873fecfc0407 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Fri, 13 Sep 2013 16:30:21 -0700 Subject: [PATCH 20/25] Add clar helpers for testing file equality These are a couple of new clar helpers for testing that a file has expected contents that I extracted from the checkout code. Actually wrote this as part of an abandoned earlier attempt at a new filters API, but it will be useful now for some of the tests I'm going to write. --- src/fileops.c | 12 +----- tests-clar/checkout/checkout_helpers.c | 58 -------------------------- tests-clar/checkout/checkout_helpers.h | 13 +----- tests-clar/clar_libgit2.c | 55 ++++++++++++++++++++++++ tests-clar/clar_libgit2.h | 14 +++++++ 5 files changed, 72 insertions(+), 80 deletions(-) diff --git a/src/fileops.c b/src/fileops.c index 3b271e6f6..bd845e982 100644 --- a/src/fileops.c +++ b/src/fileops.c @@ -56,18 +56,8 @@ int git_futils_creat_withpath(const char *path, const mode_t dirmode, const mode int git_futils_creat_locked(const char *path, const mode_t mode) { - int fd; - -#ifdef GIT_WIN32 - git_win32_path buf; - - git_win32_path_from_c(buf, path); - fd = _wopen(buf, O_WRONLY | O_CREAT | O_TRUNC | + int fd = p_open(path, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL | O_BINARY | O_CLOEXEC, mode); -#else - fd = open(path, O_WRONLY | O_CREAT | O_TRUNC | - O_EXCL | O_BINARY | O_CLOEXEC, mode); -#endif if (fd < 0) { giterr_set(GITERR_OS, "Failed to create locked file '%s'", path); diff --git a/tests-clar/checkout/checkout_helpers.c b/tests-clar/checkout/checkout_helpers.c index f55f7b611..06b4e0682 100644 --- a/tests-clar/checkout/checkout_helpers.c +++ b/tests-clar/checkout/checkout_helpers.c @@ -3,22 +3,6 @@ #include "refs.h" #include "fileops.h" -/* this is essentially the code from git__unescape modified slightly */ -void strip_cr_from_buf(git_buf *buf) -{ - char *scan, *pos = buf->ptr, *end = pos + buf->size; - - for (scan = pos; scan < end; pos++, scan++) { - if (*scan == '\r') - scan++; /* skip '\r' */ - if (pos != scan) - *pos = *scan; - } - - *pos = '\0'; - buf->size = (pos - buf->ptr); -} - void assert_on_branch(git_repository *repo, const char *branch) { git_reference *head; @@ -50,48 +34,6 @@ void reset_index_to_treeish(git_object *treeish) git_index_free(index); } -static void check_file_contents_internal( - const char *path, - const char *expected_content, - bool strip_cr, - const char *file, - int line, - const char *msg) -{ - int fd; - char data[1024] = {0}; - git_buf buf = GIT_BUF_INIT; - size_t expected_len = expected_content ? strlen(expected_content) : 0; - - fd = p_open(path, O_RDONLY); - cl_assert(fd >= 0); - - buf.ptr = data; - buf.size = p_read(fd, buf.ptr, sizeof(data)); - - cl_git_pass(p_close(fd)); - - if (strip_cr) - strip_cr_from_buf(&buf); - - clar__assert_equal(file, line, "strlen(expected_content) != strlen(actual_content)", 1, PRIuZ, expected_len, (size_t)buf.size); - clar__assert_equal(file, line, msg, 1, "%s", expected_content, buf.ptr); -} - -void check_file_contents_at_line( - const char *path, const char *expected, - const char *file, int line, const char *msg) -{ - check_file_contents_internal(path, expected, false, file, line, msg); -} - -void check_file_contents_nocr_at_line( - const char *path, const char *expected, - const char *file, int line, const char *msg) -{ - check_file_contents_internal(path, expected, true, file, line, msg); -} - int checkout_count_callback( git_checkout_notify_t why, const char *path, diff --git a/tests-clar/checkout/checkout_helpers.h b/tests-clar/checkout/checkout_helpers.h index 0e8da31d1..705ee903d 100644 --- a/tests-clar/checkout/checkout_helpers.h +++ b/tests-clar/checkout/checkout_helpers.h @@ -2,23 +2,14 @@ #include "git2/object.h" #include "git2/repository.h" -extern void strip_cr_from_buf(git_buf *buf); extern void assert_on_branch(git_repository *repo, const char *branch); extern void reset_index_to_treeish(git_object *treeish); -extern void check_file_contents_at_line( - const char *path, const char *expected, - const char *file, int line, const char *msg); - -extern void check_file_contents_nocr_at_line( - const char *path, const char *expected, - const char *file, int line, const char *msg); - #define check_file_contents(PATH,EXP) \ - check_file_contents_at_line(PATH,EXP,__FILE__,__LINE__,"String mismatch: " #EXP " != " #PATH) + cl_assert_equal_file(EXP,0,PATH) #define check_file_contents_nocr(PATH,EXP) \ - check_file_contents_nocr_at_line(PATH,EXP,__FILE__,__LINE__,"String mismatch: " #EXP " != " #PATH) + cl_assert_equal_file_ignore_cr(EXP,0,PATH) typedef struct { int n_conflicts; diff --git a/tests-clar/clar_libgit2.c b/tests-clar/clar_libgit2.c index 340943ca8..522f73634 100644 --- a/tests-clar/clar_libgit2.c +++ b/tests-clar/clar_libgit2.c @@ -354,3 +354,58 @@ int cl_repo_get_bool(git_repository *repo, const char *cfg) git_config_free(config); return val; } + +/* this is essentially the code from git__unescape modified slightly */ +static size_t strip_cr_from_buf(char *start, size_t len) +{ + char *scan, *trail, *end = start + len; + + for (scan = trail = start; scan < end; trail++, scan++) { + while (*scan == '\r') + scan++; /* skip '\r' */ + + if (trail != scan) + *trail = *scan; + } + + *trail = '\0'; + + return (trail - start); +} + +void clar__assert_equal_file( + const char *expected_data, + size_t expected_bytes, + int ignore_cr, + const char *path, + const char *file, + size_t line) +{ + char buf[4000]; + ssize_t bytes, total_bytes = 0; + int fd = p_open(path, O_RDONLY | O_BINARY); + cl_assert(fd >= 0); + + if (expected_data && !expected_bytes) + expected_bytes = strlen(expected_data); + + while ((bytes = p_read(fd, buf, sizeof(buf))) != 0) { + clar__assert( + bytes > 0, file, line, "error reading from file", path, 1); + + if (ignore_cr) + bytes = strip_cr_from_buf(buf, bytes); + + clar__assert(memcmp(expected_data, buf, bytes) == 0, + file, line, "file content mismatch", path, 1); + + expected_data += bytes; + total_bytes += bytes; + } + + p_close(fd); + + clar__assert(!bytes, file, line, "error reading from file", path, 1); + clar__assert_equal(file, line, "mismatched file length", 1, "%"PRIuZ, + (size_t)expected_bytes, (size_t)total_bytes); +} diff --git a/tests-clar/clar_libgit2.h b/tests-clar/clar_libgit2.h index 8dcfdee48..76299e4e3 100644 --- a/tests-clar/clar_libgit2.h +++ b/tests-clar/clar_libgit2.h @@ -46,6 +46,20 @@ GIT_INLINE(void) clar__assert_in_range( #define cl_assert_in_range(L,V,H) \ clar__assert_in_range((L),(V),(H),__FILE__,__LINE__,"Range check: " #V " in [" #L "," #H "]", 1) +#define cl_assert_equal_file(DATA,SIZE,PATH) \ + clar__assert_equal_file(DATA,SIZE,0,PATH,__FILE__,__LINE__) + +#define cl_assert_equal_file_ignore_cr(DATA,SIZE,PATH) \ + clar__assert_equal_file(DATA,SIZE,1,PATH,__FILE__,__LINE__) + +void clar__assert_equal_file( + const char *expected_data, + size_t expected_size, + int ignore_cr, + const char *path, + const char *file, + size_t line); + /* * Some utility macros for building long strings */ From 155fa2342d838bdb2aa873c95a42e091351bb69a Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Thu, 5 Sep 2013 15:06:42 -0700 Subject: [PATCH 21/25] Add clar helper to create new commit from index There were a lot of places in the test code base that were creating a commit from the index on the current branch. This just adds a helper to handle that case pretty easily. There was only one test where this change ended up tweaking the test data, so pretty easy and mostly just a cleanup. --- tests-clar/clar_libgit2.c | 59 ++++++++++++++++++++++++++++++++ tests-clar/clar_libgit2.h | 8 +++++ tests-clar/diff/submodules.c | 25 ++------------ tests-clar/index/addall.c | 35 ++----------------- tests-clar/repo/init.c | 5 +++ tests-clar/stash/drop.c | 16 +++++---- tests-clar/stash/save.c | 5 ++- tests-clar/stash/stash_helpers.c | 33 +----------------- tests-clar/stash/stash_helpers.h | 5 --- tests-clar/status/worktree.c | 25 +------------- tests-clar/stress/diff.c | 24 ++----------- 11 files changed, 93 insertions(+), 147 deletions(-) diff --git a/tests-clar/clar_libgit2.c b/tests-clar/clar_libgit2.c index 522f73634..4cf682449 100644 --- a/tests-clar/clar_libgit2.c +++ b/tests-clar/clar_libgit2.c @@ -337,6 +337,65 @@ int cl_git_remove_placeholders(const char *directory_path, const char *filename) return error; } +#define CL_COMMIT_NAME "Libgit2 Tester" +#define CL_COMMIT_EMAIL "libgit2-test@github.com" +#define CL_COMMIT_MSG "Test commit of tree " + +void cl_repo_commit_from_index( + git_oid *out, + git_repository *repo, + git_signature *sig, + git_time_t time, + const char *msg) +{ + git_index *index; + git_oid commit_id, tree_id; + git_object *parent = NULL; + git_reference *ref = NULL; + git_tree *tree = NULL; + char buf[128]; + int free_sig = (sig == NULL); + + /* it is fine if looking up HEAD fails - we make this the first commit */ + git_revparse_ext(&parent, &ref, repo, "HEAD"); + + /* write the index content as a tree */ + cl_git_pass(git_repository_index(&index, repo)); + cl_git_pass(git_index_write_tree(&tree_id, index)); + cl_git_pass(git_index_write(index)); + git_index_free(index); + + cl_git_pass(git_tree_lookup(&tree, repo, &tree_id)); + + if (sig) + cl_assert(sig->name && sig->email); + else if (!time) + cl_git_pass(git_signature_now(&sig, CL_COMMIT_NAME, CL_COMMIT_EMAIL)); + else + cl_git_pass(git_signature_new( + &sig, CL_COMMIT_NAME, CL_COMMIT_EMAIL, time, 0)); + + if (!msg) { + strcpy(buf, CL_COMMIT_MSG); + git_oid_tostr(buf + strlen(CL_COMMIT_MSG), + sizeof(buf) - strlen(CL_COMMIT_MSG), &tree_id); + msg = buf; + } + + cl_git_pass(git_commit_create_v( + &commit_id, repo, ref ? git_reference_name(ref) : "HEAD", + sig, sig, NULL, msg, tree, parent ? 1 : 0, parent)); + + if (out) + git_oid_cpy(out, &commit_id); + + git_object_free(parent); + git_reference_free(ref); + if (free_sig) + git_signature_free(sig); + git_tree_free(tree); +} + void cl_repo_set_bool(git_repository *repo, const char *cfg, int value) { git_config *config; diff --git a/tests-clar/clar_libgit2.h b/tests-clar/clar_libgit2.h index 76299e4e3..f2d9c4d0b 100644 --- a/tests-clar/clar_libgit2.h +++ b/tests-clar/clar_libgit2.h @@ -98,6 +98,14 @@ const char* cl_git_path_url(const char *path); /* Test repository cleaner */ int cl_git_remove_placeholders(const char *directory_path, const char *filename); +/* commit creation helpers */ +void cl_repo_commit_from_index( + git_oid *out, + git_repository *repo, + git_signature *sig, + git_time_t time, + const char *msg); + /* config setting helpers */ void cl_repo_set_bool(git_repository *repo, const char *cfg, int value); int cl_repo_get_bool(git_repository *repo, const char *cfg); diff --git a/tests-clar/diff/submodules.c b/tests-clar/diff/submodules.c index 167dedfc6..036ff09aa 100644 --- a/tests-clar/diff/submodules.c +++ b/tests-clar/diff/submodules.c @@ -228,11 +228,11 @@ void test_diff_submodules__invalid_cache(void) "" }; static const char *expected_moved[] = { - "diff --git a/sm_changed_head b/sm_changed_head\nindex 3d9386c..0910a13 160000\n--- a/sm_changed_head\n+++ b/sm_changed_head\n@@ -1 +1 @@\n-Subproject commit 3d9386c507f6b093471a3e324085657a3c2b4247\n+Subproject commit 0910a13dfa2210496f6c590d75bc360dd11b2a1b\n", + "diff --git a/sm_changed_head b/sm_changed_head\nindex 3d9386c..7002348 160000\n--- a/sm_changed_head\n+++ b/sm_changed_head\n@@ -1 +1 @@\n-Subproject commit 3d9386c507f6b093471a3e324085657a3c2b4247\n+Subproject commit 700234833f6ccc20d744b238612646be071acaae\n", "" }; static const char *expected_moved_dirty[] = { - "diff --git a/sm_changed_head b/sm_changed_head\nindex 3d9386c..0910a13 160000\n--- a/sm_changed_head\n+++ b/sm_changed_head\n@@ -1 +1 @@\n-Subproject commit 3d9386c507f6b093471a3e324085657a3c2b4247\n+Subproject commit 0910a13dfa2210496f6c590d75bc360dd11b2a1b-dirty\n", + "diff --git a/sm_changed_head b/sm_changed_head\nindex 3d9386c..7002348 160000\n--- a/sm_changed_head\n+++ b/sm_changed_head\n@@ -1 +1 @@\n-Subproject commit 3d9386c507f6b093471a3e324085657a3c2b4247\n+Subproject commit 700234833f6ccc20d744b238612646be071acaae-dirty\n", "" }; @@ -309,26 +309,7 @@ void test_diff_submodules__invalid_cache(void) git_diff_list_free(diff); /* commit changed index of submodule */ - { - git_object *parent; - git_oid tree_id, commit_id; - git_tree *tree; - git_signature *sig; - git_reference *ref; - - cl_git_pass(git_revparse_ext(&parent, &ref, smrepo, "HEAD")); - cl_git_pass(git_index_write_tree(&tree_id, smindex)); - cl_git_pass(git_index_write(smindex)); - cl_git_pass(git_tree_lookup(&tree, smrepo, &tree_id)); - cl_git_pass(git_signature_new(&sig, "Sm Test", "sm@tester.test", 1372350000, 480)); - cl_git_pass(git_commit_create_v( - &commit_id, smrepo, git_reference_name(ref), sig, sig, - NULL, "Move it", tree, 1, parent)); - git_object_free(parent); - git_tree_free(tree); - git_reference_free(ref); - git_signature_free(sig); - } + cl_repo_commit_from_index(NULL, smrepo, NULL, 1372350000, "Move it"); git_submodule_set_ignore(sm, GIT_SUBMODULE_IGNORE_DIRTY); diff --git a/tests-clar/index/addall.c b/tests-clar/index/addall.c index 00388ee00..f46a1e16c 100644 --- a/tests-clar/index/addall.c +++ b/tests-clar/index/addall.c @@ -120,37 +120,6 @@ static void check_stat_data(git_index *index, const char *path, bool match) } } -static void commit_index_to_head( - git_repository *repo, - const char *commit_message) -{ - git_index *index; - git_oid tree_id, commit_id; - git_tree *tree; - git_signature *sig; - git_commit *parent = NULL; - - git_revparse_single((git_object **)&parent, repo, "HEAD"); - /* it is okay if looking up the HEAD fails */ - - cl_git_pass(git_repository_index(&index, repo)); - cl_git_pass(git_index_write_tree(&tree_id, index)); - cl_git_pass(git_index_write(index)); /* not needed, but might as well */ - git_index_free(index); - - cl_git_pass(git_tree_lookup(&tree, repo, &tree_id)); - - cl_git_pass(git_signature_now(&sig, "Testy McTester", "tt@tester.test")); - - cl_git_pass(git_commit_create_v( - &commit_id, repo, "HEAD", sig, sig, - NULL, commit_message, tree, parent ? 1 : 0, parent)); - - git_commit_free(parent); - git_tree_free(tree); - git_signature_free(sig); -} - void test_index_addall__repo_lifecycle(void) { int error; @@ -197,7 +166,7 @@ void test_index_addall__repo_lifecycle(void) check_stat_data(index, "addall/file.zzz", true); check_status(g_repo, 2, 0, 0, 3, 0, 0, 1); - commit_index_to_head(g_repo, "first commit"); + cl_repo_commit_from_index(NULL, g_repo, NULL, 0, "first commit"); check_status(g_repo, 0, 0, 0, 3, 0, 0, 1); /* attempt to add an ignored file - does nothing */ @@ -244,7 +213,7 @@ void test_index_addall__repo_lifecycle(void) cl_git_pass(git_index_add_bypath(index, "file.zzz")); check_status(g_repo, 1, 0, 1, 3, 0, 0, 0); - commit_index_to_head(g_repo, "second commit"); + cl_repo_commit_from_index(NULL, g_repo, NULL, 0, "second commit"); check_status(g_repo, 0, 0, 0, 3, 0, 0, 0); cl_must_pass(p_unlink("addall/file.zzz")); diff --git a/tests-clar/repo/init.c b/tests-clar/repo/init.c index e3fc112b3..caa211e75 100644 --- a/tests-clar/repo/init.c +++ b/tests-clar/repo/init.c @@ -565,6 +565,11 @@ void test_repo_init__init_with_initial_commit(void) cl_git_pass(git_index_add_bypath(index, "file.txt")); cl_git_pass(git_index_write(index)); + /* Intentionally not using cl_repo_commit_from_index here so this code + * can be used as an example of how an initial commit is typically + * made to a repository... + */ + /* Make sure we're ready to use git_signature_default :-) */ { git_config *cfg, *local; diff --git a/tests-clar/stash/drop.c b/tests-clar/stash/drop.c index 60b3c72e0..59413f01e 100644 --- a/tests-clar/stash/drop.c +++ b/tests-clar/stash/drop.c @@ -36,25 +36,27 @@ static void push_three_states(void) cl_git_mkfile("stash/zero.txt", "content\n"); cl_git_pass(git_repository_index(&index, repo)); cl_git_pass(git_index_add_bypath(index, "zero.txt")); - commit_staged_files(&oid, index, signature); + cl_repo_commit_from_index(NULL, repo, signature, 0, "Initial commit"); cl_assert(git_path_exists("stash/zero.txt")); + git_index_free(index); cl_git_mkfile("stash/one.txt", "content\n"); - cl_git_pass(git_stash_save(&oid, repo, signature, "First", GIT_STASH_INCLUDE_UNTRACKED)); + cl_git_pass(git_stash_save( + &oid, repo, signature, "First", GIT_STASH_INCLUDE_UNTRACKED)); cl_assert(!git_path_exists("stash/one.txt")); cl_assert(git_path_exists("stash/zero.txt")); cl_git_mkfile("stash/two.txt", "content\n"); - cl_git_pass(git_stash_save(&oid, repo, signature, "Second", GIT_STASH_INCLUDE_UNTRACKED)); + cl_git_pass(git_stash_save( + &oid, repo, signature, "Second", GIT_STASH_INCLUDE_UNTRACKED)); cl_assert(!git_path_exists("stash/two.txt")); cl_assert(git_path_exists("stash/zero.txt")); cl_git_mkfile("stash/three.txt", "content\n"); - cl_git_pass(git_stash_save(&oid, repo, signature, "Third", GIT_STASH_INCLUDE_UNTRACKED)); + cl_git_pass(git_stash_save( + &oid, repo, signature, "Third", GIT_STASH_INCLUDE_UNTRACKED)); cl_assert(!git_path_exists("stash/three.txt")); cl_assert(git_path_exists("stash/zero.txt")); - - git_index_free(index); } void test_stash_drop__cannot_drop_a_non_existing_stashed_state(void) @@ -160,7 +162,7 @@ void test_stash_drop__dropping_the_top_stash_updates_the_stash_reference(void) retrieve_top_stash_id(&oid); cl_git_pass(git_revparse_single(&next_top_stash, repo, "stash@{1}")); - cl_assert_equal_i(false, git_oid_cmp(&oid, git_object_id(next_top_stash)) == 0); + cl_assert(git_oid_cmp(&oid, git_object_id(next_top_stash)) != 0); cl_git_pass(git_stash_drop(repo, 0)); diff --git a/tests-clar/stash/save.c b/tests-clar/stash/save.c index bb35a3d71..035b62279 100644 --- a/tests-clar/stash/save.c +++ b/tests-clar/stash/save.c @@ -241,7 +241,7 @@ void test_stash_save__stashing_updates_the_reflog(void) void test_stash_save__cannot_stash_when_there_are_no_local_change(void) { git_index *index; - git_oid commit_oid, stash_tip_oid; + git_oid stash_tip_oid; cl_git_pass(git_repository_index(&index, repo)); @@ -251,8 +251,7 @@ void test_stash_save__cannot_stash_when_there_are_no_local_change(void) */ cl_git_pass(git_index_add_bypath(index, "what")); cl_git_pass(git_index_add_bypath(index, "who")); - cl_git_pass(git_index_write(index)); - commit_staged_files(&commit_oid, index, signature); + cl_repo_commit_from_index(NULL, repo, signature, 0, "Initial commit"); git_index_free(index); cl_assert_equal_i(GIT_ENOTFOUND, diff --git a/tests-clar/stash/stash_helpers.c b/tests-clar/stash/stash_helpers.c index f462a1351..06b63f177 100644 --- a/tests-clar/stash/stash_helpers.c +++ b/tests-clar/stash/stash_helpers.c @@ -2,38 +2,8 @@ #include "fileops.h" #include "stash_helpers.h" -void commit_staged_files( - git_oid *commit_oid, - git_index *index, - git_signature *signature) -{ - git_tree *tree; - git_oid tree_oid; - git_repository *repo; - - repo = git_index_owner(index); - - cl_git_pass(git_index_write_tree(&tree_oid, index)); - - cl_git_pass(git_tree_lookup(&tree, repo, &tree_oid)); - - cl_git_pass(git_commit_create_v( - commit_oid, - repo, - "HEAD", - signature, - signature, - NULL, - "Initial commit", - tree, - 0)); - - git_tree_free(tree); -} - void setup_stash(git_repository *repo, git_signature *signature) { - git_oid commit_oid; git_index *index; cl_git_pass(git_repository_index(&index, repo)); @@ -50,9 +20,8 @@ void setup_stash(git_repository *repo, git_signature *signature) cl_git_pass(git_index_add_bypath(index, "how")); cl_git_pass(git_index_add_bypath(index, "who")); cl_git_pass(git_index_add_bypath(index, ".gitignore")); - cl_git_pass(git_index_write(index)); - commit_staged_files(&commit_oid, index, signature); + cl_repo_commit_from_index(NULL, repo, signature, 0, "Initial commit"); cl_git_rewritefile("stash/what", "goodbye\n"); /* dd7e1c6f0fefe118f0b63d9f10908c460aa317a6 */ cl_git_rewritefile("stash/how", "not so small and\n"); /* e6d64adb2c7f3eb8feb493b556cc8070dca379a3 */ diff --git a/tests-clar/stash/stash_helpers.h b/tests-clar/stash/stash_helpers.h index bb7fec4f5..7c3e13de3 100644 --- a/tests-clar/stash/stash_helpers.h +++ b/tests-clar/stash/stash_helpers.h @@ -1,8 +1,3 @@ void setup_stash( git_repository *repo, git_signature *signature); - -void commit_staged_files( - git_oid *commit_oid, - git_index *index, - git_signature *signature); \ No newline at end of file diff --git a/tests-clar/status/worktree.c b/tests-clar/status/worktree.c index be7398cb6..135a95871 100644 --- a/tests-clar/status/worktree.c +++ b/tests-clar/status/worktree.c @@ -632,35 +632,12 @@ void test_status_worktree__conflicted_item(void) static void stage_and_commit(git_repository *repo, const char *path) { - git_oid tree_oid, commit_oid; - git_tree *tree; - git_signature *signature; git_index *index; cl_git_pass(git_repository_index(&index, repo)); cl_git_pass(git_index_add_bypath(index, path)); - cl_git_pass(git_index_write(index)); - - cl_git_pass(git_index_write_tree(&tree_oid, index)); + cl_repo_commit_from_index(NULL, repo, NULL, 1323847743, "Initial commit\n"); git_index_free(index); - - cl_git_pass(git_tree_lookup(&tree, repo, &tree_oid)); - - cl_git_pass(git_signature_new(&signature, "nulltoken", "emeric.fermas@gmail.com", 1323847743, 60)); - - cl_git_pass(git_commit_create_v( - &commit_oid, - repo, - "HEAD", - signature, - signature, - NULL, - "Initial commit\n\0", - tree, - 0)); - - git_tree_free(tree); - git_signature_free(signature); } static void assert_ignore_case( diff --git a/tests-clar/stress/diff.c b/tests-clar/stress/diff.c index 0524aa108..1d319738e 100644 --- a/tests-clar/stress/diff.c +++ b/tests-clar/stress/diff.c @@ -54,27 +54,9 @@ static void test_with_many(int expected_new) git_diff_list_free(diff); - { - git_object *parent; - git_signature *sig; - git_oid tree_id, commit_id; - git_reference *ref; - - cl_git_pass(git_index_write_tree(&tree_id, index)); - cl_git_pass(git_tree_lookup(&new_tree, g_repo, &tree_id)); - - cl_git_pass(git_revparse_ext(&parent, &ref, g_repo, "HEAD")); - cl_git_pass(git_signature_new( - &sig, "Sm Test", "sm@tester.test", 1372350000, 480)); - - cl_git_pass(git_commit_create_v( - &commit_id, g_repo, git_reference_name(ref), sig, sig, - NULL, "yoyoyo", new_tree, 1, parent)); - - git_object_free(parent); - git_reference_free(ref); - git_signature_free(sig); - } + cl_repo_commit_from_index(NULL, g_repo, NULL, 1372350000, "yoyoyo"); + cl_git_pass(git_revparse_single( + (git_object **)&new_tree, g_repo, "HEAD^{tree}")); cl_git_pass(git_diff_tree_to_tree( &diff, g_repo, tree, new_tree, &diffopts)); From 37f9e4093999498a25641018da36245d6a7cb008 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Fri, 13 Sep 2013 21:43:00 -0700 Subject: [PATCH 22/25] Some tests with ident and crlf filters Fixed the filter order to match core Git, too. This test demonstrates an interesting behavior of core Git (which is totally reasonable and which libgit2 matches, although mostly by coincidence). If you use the ident filter and commit a file with a garbage ident in it, like '$Id: this is just garbage$' and then immediately do a 'git checkout-index' with the new file, Git will not consider the file out of date and will not overwrite the file with an updated $Id$. Libgit2 has the same behavior. If you remove the file and then do a checkout-index, it will be replaced with a filtered version that has injected the OID correctly. --- examples/showindex.c | 4 +++ src/filter.c | 2 +- tests-clar/checkout/crlf.c | 65 ++++++++++++++++++++++++++++++++++++++ tests-clar/clar_libgit2.c | 11 +++++-- 4 files changed, 79 insertions(+), 3 deletions(-) diff --git a/examples/showindex.c b/examples/showindex.c index e92a9c8de..93718c89b 100644 --- a/examples/showindex.c +++ b/examples/showindex.c @@ -12,6 +12,8 @@ int main (int argc, char** argv) char out[41]; out[40] = '\0'; + git_threads_init(); + if (argc > 1) dir = argv[1]; if (!dir || argc > 2) { @@ -62,6 +64,8 @@ int main (int argc, char** argv) git_index_free(index); git_repository_free(repo); + git_threads_shutdown(); + return 0; } diff --git a/src/filter.c b/src/filter.c index 62aefb420..378209800 100644 --- a/src/filter.c +++ b/src/filter.c @@ -593,7 +593,7 @@ int git_filter_list_apply_to_data( for (i = 0; i < git_array_size(fl->filters); ++i) { unsigned int di = 1 - si; - uint32_t fidx = (fl->source.mode == GIT_FILTER_TO_ODB) ? + uint32_t fidx = (fl->source.mode == GIT_FILTER_TO_WORKTREE) ? i : git_array_size(fl->filters) - 1 - i; git_filter_entry *fe = git_array_get(fl->filters, fidx); diff --git a/tests-clar/checkout/crlf.c b/tests-clar/checkout/crlf.c index 5f5f1b776..4953609cc 100644 --- a/tests-clar/checkout/crlf.c +++ b/tests-clar/checkout/crlf.c @@ -4,6 +4,7 @@ #include "git2/checkout.h" #include "repository.h" +#include "posix.h" static git_repository *g_repo; @@ -136,3 +137,67 @@ void test_checkout_crlf__autocrlf_true_index_size_is_filtered_size(void) git_index_free(index); } + +void test_checkout_crlf__with_ident(void) +{ + git_index *index; + git_blob *blob; + git_checkout_opts opts = GIT_CHECKOUT_OPTS_INIT; + opts.checkout_strategy = GIT_CHECKOUT_SAFE_CREATE; + + cl_git_mkfile("crlf/.gitattributes", + "*.txt text\n*.bin binary\n" + "*.crlf text eol=crlf\n" + "*.lf text eol=lf\n" + "*.ident text ident\n" + "*.identcrlf ident text eol=crlf\n" + "*.identlf ident text eol=lf\n"); + + cl_repo_set_bool(g_repo, "core.autocrlf", true); + + /* add files with $Id$ */ + + cl_git_mkfile("crlf/lf.ident", ALL_LF_TEXT_RAW "\n$Id: initial content$\n"); + cl_git_mkfile("crlf/crlf.ident", ALL_CRLF_TEXT_RAW "\r\n$Id$\r\n\r\n"); + + cl_git_pass(git_repository_index(&index, g_repo)); + cl_git_pass(git_index_add_bypath(index, "lf.ident")); + cl_git_pass(git_index_add_bypath(index, "crlf.ident")); + cl_repo_commit_from_index(NULL, g_repo, NULL, 0, "Some ident files\n"); + + git_checkout_head(g_repo, &opts); + + /* check that blob has $Id$ */ + + cl_git_pass(git_blob_lookup(&blob, g_repo, + & git_index_get_bypath(index, "lf.ident", 0)->oid)); + cl_assert_equal_s( + ALL_LF_TEXT_RAW "\n$Id$\n", git_blob_rawcontent(blob)); + + git_blob_free(blob); + + /* check that filesystem is initially untouched - matching core Git */ + + cl_assert_equal_file( + ALL_LF_TEXT_RAW "\n$Id: initial content$\n", 0, "crlf/lf.ident"); + + /* check that forced checkout rewrites correctly */ + + p_unlink("crlf/lf.ident"); + p_unlink("crlf/crlflf.ident"); + + git_checkout_head(g_repo, &opts); + + if (GIT_EOL_NATIVE == GIT_EOL_LF) + cl_assert_equal_file( + ALL_LF_TEXT_RAW + "\n$Id: fcf6d4d9c212dc66563b1171b1cd99953c756467$\n", + 0, "crlf/lf.ident"); + else + cl_assert_equal_file( + ALL_LF_TEXT_AS_CRLF + "\r\n$Id: fcf6d4d9c212dc66563b1171b1cd99953c756467$\r\n", + 0, "crlf/lf.ident"); + + git_index_free(index); +} diff --git a/tests-clar/clar_libgit2.c b/tests-clar/clar_libgit2.c index 4cf682449..d7e28831f 100644 --- a/tests-clar/clar_libgit2.c +++ b/tests-clar/clar_libgit2.c @@ -455,8 +455,15 @@ void clar__assert_equal_file( if (ignore_cr) bytes = strip_cr_from_buf(buf, bytes); - clar__assert(memcmp(expected_data, buf, bytes) == 0, - file, line, "file content mismatch", path, 1); + if (memcmp(expected_data, buf, bytes) != 0) { + int pos; + for (pos = 0; pos < bytes && expected_data[pos] == buf[pos]; ++pos) + /* find differing byte offset */; + p_snprintf( + buf, sizeof(buf), "file content mismatch at byte %d", + (int)(total_bytes + pos)); + clar__fail(file, line, buf, path, 1); + } expected_data += bytes; total_bytes += bytes; From eab3746b3026950ed62842c1e5641556d7131a5b Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Sun, 15 Sep 2013 22:23:39 -0700 Subject: [PATCH 23/25] More filtering tests including order This adds more tests of filters, including the ident filter when mixed with custom filters. I was able to combine with the reverse filter and demonstrate that the order of filter application with the default priority constants matches the order of core Git. Also, this fixes two issues in the ident filter: preventing ident expansion on binary files and avoiding a NULL dereference when dollar sign characters are found without Id. --- include/git2/sys/filter.h | 14 +++++++++ src/ident.c | 7 ++++- tests-clar/checkout/crlf.c | 36 ++++++++++++++++++--- tests-clar/filter/custom.c | 64 ++++++++++++++++++++++++++++++++++++-- 4 files changed, 114 insertions(+), 7 deletions(-) diff --git a/include/git2/sys/filter.h b/include/git2/sys/filter.h index 9a6720a3e..aa89c7b56 100644 --- a/include/git2/sys/filter.h +++ b/include/git2/sys/filter.h @@ -29,9 +29,23 @@ GIT_EXTERN(git_filter *) git_filter_lookup(const char *name); #define GIT_FILTER_CRLF "crlf" #define GIT_FILTER_IDENT "ident" +/** + * This is priority that the internal CRLF filter will be registered with + */ #define GIT_FILTER_CRLF_PRIORITY 0 + +/** + * This is priority that the internal ident filter will be registered with + */ #define GIT_FILTER_IDENT_PRIORITY 100 +/** + * This is priority to use with a custom filter to imitate a core Git + * filter driver, so that it will be run last on checkout and first on + * checkin. You do not have to use this, but it helps compatibility. + */ +#define GIT_FILTER_DRIVER_PRIORITY 200 + /** * Create a new empty filter list * diff --git a/src/ident.c b/src/ident.c index 3ea949859..23c407f16 100644 --- a/src/ident.c +++ b/src/ident.c @@ -8,6 +8,7 @@ #include "git2/sys/filter.h" #include "filter.h" #include "buffer.h" +#include "buf_text.h" static int ident_find_id( const char **id_start, const char **id_end, const char *start, size_t len) @@ -24,7 +25,7 @@ static int ident_find_id( len = remaining - 1; } - if (len < 3) + if (!found || len < 3) return GIT_ENOTFOUND; *id_start = found; @@ -99,6 +100,10 @@ static int ident_apply( { GIT_UNUSED(self); GIT_UNUSED(payload); + /* Don't filter binary files */ + if (git_buf_text_is_binary(from)) + return GIT_ENOTFOUND; + if (git_filter_source_mode(src) == GIT_FILTER_SMUDGE) return ident_insert_id(to, from, src); else diff --git a/tests-clar/checkout/crlf.c b/tests-clar/checkout/crlf.c index 4953609cc..9a4cbd313 100644 --- a/tests-clar/checkout/crlf.c +++ b/tests-clar/checkout/crlf.c @@ -159,21 +159,30 @@ void test_checkout_crlf__with_ident(void) cl_git_mkfile("crlf/lf.ident", ALL_LF_TEXT_RAW "\n$Id: initial content$\n"); cl_git_mkfile("crlf/crlf.ident", ALL_CRLF_TEXT_RAW "\r\n$Id$\r\n\r\n"); + cl_git_mkfile("crlf/more1.identlf", "$Id$\n" MORE_LF_TEXT_RAW); + cl_git_mkfile("crlf/more2.identcrlf", "\r\n$Id: $\r\n" MORE_CRLF_TEXT_RAW); cl_git_pass(git_repository_index(&index, g_repo)); cl_git_pass(git_index_add_bypath(index, "lf.ident")); cl_git_pass(git_index_add_bypath(index, "crlf.ident")); + cl_git_pass(git_index_add_bypath(index, "more1.identlf")); + cl_git_pass(git_index_add_bypath(index, "more2.identcrlf")); cl_repo_commit_from_index(NULL, g_repo, NULL, 0, "Some ident files\n"); git_checkout_head(g_repo, &opts); - /* check that blob has $Id$ */ + /* check that blobs have $Id$ */ cl_git_pass(git_blob_lookup(&blob, g_repo, & git_index_get_bypath(index, "lf.ident", 0)->oid)); cl_assert_equal_s( ALL_LF_TEXT_RAW "\n$Id$\n", git_blob_rawcontent(blob)); + git_blob_free(blob); + cl_git_pass(git_blob_lookup(&blob, g_repo, + & git_index_get_bypath(index, "more2.identcrlf", 0)->oid)); + cl_assert_equal_s( + "\n$Id$\n" MORE_CRLF_TEXT_AS_LF, git_blob_rawcontent(blob)); git_blob_free(blob); /* check that filesystem is initially untouched - matching core Git */ @@ -184,20 +193,39 @@ void test_checkout_crlf__with_ident(void) /* check that forced checkout rewrites correctly */ p_unlink("crlf/lf.ident"); - p_unlink("crlf/crlflf.ident"); + p_unlink("crlf/crlf.ident"); + p_unlink("crlf/more1.identlf"); + p_unlink("crlf/more2.identcrlf"); git_checkout_head(g_repo, &opts); - if (GIT_EOL_NATIVE == GIT_EOL_LF) + if (GIT_EOL_NATIVE == GIT_EOL_LF) { cl_assert_equal_file( ALL_LF_TEXT_RAW "\n$Id: fcf6d4d9c212dc66563b1171b1cd99953c756467$\n", 0, "crlf/lf.ident"); - else + cl_assert_equal_file( + ALL_CRLF_TEXT_AS_LF + "\n$Id: f2c66ad9b2b5a734d9bf00d5000cc10a62b8a857$\n\n", + 0, "crlf/crlf.ident"); + } else { cl_assert_equal_file( ALL_LF_TEXT_AS_CRLF "\r\n$Id: fcf6d4d9c212dc66563b1171b1cd99953c756467$\r\n", 0, "crlf/lf.ident"); + cl_assert_equal_file( + ALL_CRLF_TEXT_RAW + "\r\n$Id: f2c66ad9b2b5a734d9bf00d5000cc10a62b8a857$\r\n\r\n", + 0, "crlf/crlf.ident"); + } + + cl_assert_equal_file( + "$Id: f7830382dac1f1583422be5530fdfbd26289431b$\n" + MORE_LF_TEXT_AS_LF, 0, "crlf/more1.identlf"); + + cl_assert_equal_file( + "\r\n$Id: 74677a68413012ce8d7e7cfc3f12603df3a3eac4$\r\n" + MORE_CRLF_TEXT_AS_CRLF, 0, "crlf/more2.identcrlf"); git_index_free(index); } diff --git a/tests-clar/filter/custom.c b/tests-clar/filter/custom.c index a2752efa4..d6ad4b7a3 100644 --- a/tests-clar/filter/custom.c +++ b/tests-clar/filter/custom.c @@ -6,8 +6,9 @@ #include "git2/sys/filter.h" #include "git2/sys/repository.h" -#define BITFLIP_FILTER_PRIORITY 20 -#define REVERSE_FILTER_PRIORITY 25 +/* picked these to be >= GIT_FILTER_DRIVER_PRIORITY */ +#define BITFLIP_FILTER_PRIORITY 200 +#define REVERSE_FILTER_PRIORITY 250 #define VERY_SECURE_ENCRYPTION(b) ((b) ^ 0xff) @@ -255,3 +256,62 @@ void test_filter_custom__can_register_a_custom_filter_in_the_repository(void) cl_assert_equal_sz(0, git_filter_list_length(fl)); git_filter_list_free(fl); } + +void test_filter_custom__order_dependency(void) +{ + git_index *index; + git_blob *blob; + git_buf buf = { 0 }; + + /* so if ident and reverse are used together, an interesting thing + * happens - a reversed "$Id$" string is no longer going to trigger + * ident correctly. When checking out, the filters should be applied + * in order CLRF, then ident, then reverse, so ident expansion should + * work correctly. On check in, the content should be reversed, then + * ident, then CRLF filtered. Let's make sure that works... + */ + + cl_git_mkfile( + "empty_standard_repo/.gitattributes", + "hero.*.rev-ident text ident reverse eol=lf\n"); + + cl_git_mkfile( + "empty_standard_repo/hero.1.rev-ident", + "This is a test\n$Id$\nHave fun!\n"); + + cl_git_mkfile( + "empty_standard_repo/hero.2.rev-ident", + "Another test\n$dI$\nCrazy!\n"); + + cl_git_pass(git_repository_index(&index, g_repo)); + cl_git_pass(git_index_add_bypath(index, "hero.1.rev-ident")); + cl_git_pass(git_index_add_bypath(index, "hero.2.rev-ident")); + cl_repo_commit_from_index(NULL, g_repo, NULL, 0, "Filter chains\n"); + git_index_free(index); + + cl_git_pass(git_blob_lookup(&blob, g_repo, + & git_index_get_bypath(index, "hero.1.rev-ident", 0)->oid)); + cl_assert_equal_s( + "\n!nuf evaH\n$dI$\ntset a si sihT", git_blob_rawcontent(blob)); + cl_git_pass(git_blob_filtered_content(&buf, blob, "hero.1.rev-ident", 0)); + /* no expansion because id was reversed at checkin and now at ident + * time, reverse is not applied yet */ + cl_assert_equal_s( + "This is a test\n$Id$\nHave fun!\n", buf.ptr); + git_blob_free(blob); + + cl_git_pass(git_blob_lookup(&blob, g_repo, + & git_index_get_bypath(index, "hero.2.rev-ident", 0)->oid)); + cl_assert_equal_s( + "\n!yzarC\n$Id$\ntset rehtonA", git_blob_rawcontent(blob)); + cl_git_pass(git_blob_filtered_content(&buf, blob, "hero.2.rev-ident", 0)); + /* expansion because reverse was applied at checkin and at ident time, + * reverse is not applied yet */ + cl_assert_equal_s( + "Another test\n$59001fe193103b1016b27027c0c827d036fd0ac8 :dI$\nCrazy!\n", buf.ptr); + cl_assert_equal_i(0, git_oid_strcmp( + git_blob_id(blob), "8ca0df630d728c0c72072b6101b301391ef10095")); + git_blob_free(blob); + + git_buf_free(&buf); +} From eefc32d54944ead5a5e3041c1b1f6c8c946cc014 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Mon, 16 Sep 2013 12:54:40 -0700 Subject: [PATCH 24/25] Bug fixes and cleanups This contains a few bug fixes and some header and API cleanups. The main API change is that filters should now use GIT_PASSTHROUGH to indicate that they wish to skip processing a file instead of GIT_ENOTFOUND. The bug fixes include a possible out-of-range buffer access in the ident filter, a filter ordering problem I introduced into the custom filter tests on Windows, and a filter buf NUL termination issue that was coming up on Linux. --- include/git2/sys/filter.h | 101 +++++++++++++++++++++++++------------ src/array.h | 2 +- src/crlf.c | 18 +++---- src/filter.c | 14 ++--- src/ident.c | 26 +++++----- src/win32/pthread.c | 1 + tests-clar/filter/custom.c | 34 ++++++++++--- 7 files changed, 128 insertions(+), 68 deletions(-) diff --git a/include/git2/sys/filter.h b/include/git2/sys/filter.h index aa89c7b56..94ad3aed4 100644 --- a/include/git2/sys/filter.h +++ b/include/git2/sys/filter.h @@ -125,23 +125,54 @@ GIT_EXTERN(git_filter_mode_t) git_filter_source_mode(const git_filter_source *sr * The filter lifecycle: * - initialize - first use of filter * - shutdown - filter removed/unregistered from system - * - check - considering for file - * - apply - applied to file + * - check - considering filter for file + * - apply - apply filter to file contents * - cleanup - done with file */ /** * Initialize callback on filter + * + * Specified as `filter.initialize`, this is an optional callback invoked + * before a filter is first used. It will be called once at most. + * + * If non-NULL, the filter's `initialize` callback will be invoked right + * before the first use of the filter, so you can defer expensive + * initialization operations (in case libgit2 is being used in a way that + * doesn't need the filter). */ typedef int (*git_filter_init_fn)(git_filter *self); /** * Shutdown callback on filter + * + * Specified as `filter.shutdown`, this is an optional callback invoked + * when the filter is unregistered or when libgit2 is shutting down. It + * will be called once at most and should release resources as needed. + * + * Typically this function will free the `git_filter` object itself. */ typedef void (*git_filter_shutdown_fn)(git_filter *self); /** * Callback to decide if a given source needs this filter + * + * Specified as `filter.check`, this is an optional callback that checks + * if filtering is needed for a given source. + * + * It should return 0 if the filter should be applied (i.e. success), + * GIT_PASSTHROUGH if the filter should not be applied, or an error code + * to fail out of the filter processing pipeline and return to the caller. + * + * The `attr_values` will be set to the values of any attributes given in + * the filter definition. See `git_filter` below for more detail. + * + * The `payload` will be a pointer to a reference payload for the filter. + * This will start as NULL, but `check` can assign to this pointer for + * later use by the `apply` callback. Note that the value should be heap + * allocated (not stack), so that it doesn't go away before the `apply` + * callback can use it. If a filter allocates and assigns a value to the + * `payload`, it will need a `cleanup` callback to free the payload. */ typedef int (*git_filter_check_fn)( git_filter *self, @@ -151,6 +182,15 @@ typedef int (*git_filter_check_fn)( /** * Callback to actually perform the data filtering + * + * Specified as `filter.apply`, this is the callback that actually filters + * data. If it successfully writes the output, it should return 0. Like + * `check`, it can return GIT_PASSTHROUGH to indicate that the filter + * doesn't want to run. Other error codes will stop filter processing and + * return to the caller. + * + * The `payload` value will refer to any payload that was set by the + * `check` callback. It may be read from or written to as needed. */ typedef int (*git_filter_apply_fn)( git_filter *self, @@ -161,18 +201,22 @@ typedef int (*git_filter_apply_fn)( /** * Callback to clean up after filtering has been applied + * + * Specified as `filter.cleanup`, this is an optional callback invoked + * after the filter has been applied. If the `check` or `apply` callbacks + * allocated a `payload` to keep per-source filter state, use this + * callback to free that payload and release resources as required. */ typedef void (*git_filter_cleanup_fn)( git_filter *self, void *payload); /** - * Filter structure used to register a new filter. + * Filter structure used to register custom filters. * - * To associate extra data with a filter, simply allocate extra data - * and put the `git_filter` struct at the start of your data buffer, - * then cast the `self` pointer to your larger structure when your - * callback is invoked. + * To associate extra data with a filter, allocate extra data and put the + * `git_filter` struct at the start of your data buffer, then cast the + * `self` pointer to your larger structure when your callback is invoked. * * `version` should be set to GIT_FILTER_VERSION * @@ -182,28 +226,8 @@ typedef void (*git_filter_cleanup_fn)( * a value (i.e. "name=value"), the attribute must match that value for * the filter to be applied. * - * `initialize` is an optional callback invoked before a filter is first - * used. It will be called once at most. - * - * `shutdown` is an optional callback invoked when the filter is - * unregistered or when libgit2 is shutting down. It will be called once - * at most and should free any memory as needed. - * - * `check` is an optional callback that checks if filtering is needed for - * a given source. It should return 0 if the filter should be applied - * (i.e. success), GIT_ENOTFOUND if the filter should not be applied, or - * an other error code to fail out of the filter processing pipeline and - * return to the caller. - * - * `apply` is the callback that actually filters data. If it successfully - * writes the output, it should return 0. Like `check`, it can return - * GIT_ENOTFOUND to indicate that the filter doesn't actually want to run. - * Other error codes will stop filter processing and return to the caller. - * - * `cleanup` is an optional callback that is made after the filter has - * been applied. Both the `check` and `apply` callbacks are able to - * allocate a `payload` to keep per-source filter state, and this callback - * is given that value and can clean up as needed. + * The `initialize`, `shutdown`, `check`, `apply`, and `cleanup` callbacks + * are all documented above with the respective function pointer typedefs. */ struct git_filter { unsigned int version; @@ -222,9 +246,8 @@ struct git_filter { /** * Register a filter under a given name with a given priority. * - * If non-NULL, the filter's initialize callback will be invoked before - * the first use of the filter, so you can defer expensive operations (in - * case libgit2 is being used in a way that doesn't need the filter). + * As mentioned elsewhere, the initialize callback will not be invoked + * immediately. It is deferred until the filter is used in some way. * * A filter's attribute checks and `check` and `apply` callbacks will be * issued in order of `priority` on smudge (to workdir), and in reverse @@ -237,6 +260,14 @@ struct git_filter { * Currently the filter registry is not thread safe, so any registering or * deregistering of filters must be done outside of any possible usage of * the filters (i.e. during application setup or shutdown). + * + * @param name A name by which the filter can be referenced. Attempting + * to register with an in-use name will return GIT_EEXISTS. + * @param filter The filter definition. This pointer will be stored as is + * by libgit2 so it must be a durable allocation (either static + * or on the heap). + * @param priority The priority for filter application + * @return 0 on successful registry, error code <0 on failure */ GIT_EXTERN(int) git_filter_register( const char *name, git_filter *filter, int priority); @@ -244,11 +275,15 @@ GIT_EXTERN(int) git_filter_register( /** * Remove the filter with the given name * - * It is not allowed to remove the builtin libgit2 filters. + * Attempting to remove the builtin libgit2 filters is not permitted and + * will return an error. * * Currently the filter registry is not thread safe, so any registering or * deregistering of filters must be done outside of any possible usage of * the filters (i.e. during application setup or shutdown). + * + * @param name The name under which the filter was registered + * @return 0 on success, error code <0 on failure */ GIT_EXTERN(int) git_filter_unregister(const char *name); diff --git a/src/array.h b/src/array.h index b82079bd8..d7272d78c 100644 --- a/src/array.h +++ b/src/array.h @@ -59,7 +59,7 @@ GIT_INLINE(void *) git_array_grow(void *_a, size_t item_size) #define git_array_alloc(a) \ ((a).size >= (a).asize) ? \ git_array_grow(&(a), sizeof(*(a).ptr)) : \ - (a).ptr ? &(a).ptr[(a).size++] : NULL + ((a).ptr ? &(a).ptr[(a).size++] : NULL) #define git_array_last(a) ((a).size ? &(a).ptr[(a).size - 1] : NULL) diff --git a/src/crlf.c b/src/crlf.c index 6b1fe46a3..b4eda267b 100644 --- a/src/crlf.c +++ b/src/crlf.c @@ -143,7 +143,7 @@ static int crlf_apply_to_odb( * stuff? */ if (stats.cr != stats.crlf) - return GIT_ENOTFOUND; + return GIT_PASSTHROUGH; if (ca->crlf_action == GIT_CRLF_GUESS) { /* @@ -151,11 +151,11 @@ static int crlf_apply_to_odb( * This is the new safer autocrlf handling. */ if (has_cr_in_index(src)) - return GIT_ENOTFOUND; + return GIT_PASSTHROUGH; } if (!stats.cr) - return GIT_ENOTFOUND; + return GIT_PASSTHROUGH; } /* Actually drop the carriage returns */ @@ -211,7 +211,7 @@ static int crlf_apply_to_workdir( /* Don't filter binary files */ if (git_buf_text_is_binary(from)) - return GIT_ENOTFOUND; + return GIT_PASSTHROUGH; /* Determine proper line ending */ workdir_ending = line_ending(ca); @@ -220,10 +220,10 @@ static int crlf_apply_to_workdir( if (!strcmp("\n", workdir_ending)) { if (ca->crlf_action == GIT_CRLF_GUESS && ca->auto_crlf) - return GIT_ENOTFOUND; + return GIT_PASSTHROUGH; if (git_buf_find(from, '\r') < 0) - return GIT_ENOTFOUND; + return GIT_PASSTHROUGH; if (git_buf_text_crlf_to_lf(to, from) < 0) return -1; @@ -267,7 +267,7 @@ static int crlf_check( ca.crlf_action = crlf_input_action(&ca); if (ca.crlf_action == GIT_CRLF_BINARY) - return GIT_ENOTFOUND; + return GIT_PASSTHROUGH; if (ca.crlf_action == GIT_CRLF_GUESS) { error = git_repository__cvar( @@ -276,7 +276,7 @@ static int crlf_check( return error; if (ca.auto_crlf == GIT_AUTO_CRLF_FALSE) - return GIT_ENOTFOUND; + return GIT_PASSTHROUGH; } *payload = git__malloc(sizeof(ca)); @@ -296,7 +296,7 @@ static int crlf_apply( /* initialize payload in case `check` was bypassed */ if (!*payload) { int error = crlf_check(self, payload, src, NULL); - if (error < 0 && error != GIT_ENOTFOUND) + if (error < 0 && error != GIT_PASSTHROUGH) return error; } diff --git a/src/filter.c b/src/filter.c index 378209800..503f18555 100644 --- a/src/filter.c +++ b/src/filter.c @@ -235,7 +235,7 @@ int git_filter_register( if (!filter_registry_find(NULL, name)) { giterr_set( GITERR_FILTER, "Attempt to reregister existing filter '%s'", name); - return -1; + return GIT_EEXISTS; } if (filter_def_scan_attrs(&attrs, &nattr, &nmatch, filter->attributes) < 0) @@ -270,7 +270,7 @@ int git_filter_unregister(const char *name) git_filter_def *fdef; /* cannot unregister default filters */ - if (!strcmp(GIT_FILTER_CRLF, name)) { + if (!strcmp(GIT_FILTER_CRLF, name) || !strcmp(GIT_FILTER_IDENT, name)) { giterr_set(GITERR_FILTER, "Cannot unregister filter '%s'", name); return -1; } @@ -476,7 +476,7 @@ int git_filter_list_load( git__free((void *)values); - if (error == GIT_ENOTFOUND) + if (error == GIT_PASSTHROUGH) error = 0; else if (error < 0) break; @@ -609,11 +609,13 @@ int git_filter_list_apply_to_data( error = fe->filter->apply( fe->filter, &fe->payload, dbuffer[di], dbuffer[si], &fl->source); - if (error == GIT_ENOTFOUND) + if (error == GIT_PASSTHROUGH) { + /* PASSTHROUGH means filter decided not to process the buffer */ error = 0; - else if (!error) + } else if (!error) { + git_buf_shorten(dbuffer[di], 0); /* force NUL termination */ si = di; /* swap buffers */ - else { + } else { tgt->size = 0; return error; } diff --git a/src/ident.c b/src/ident.c index 23c407f16..51630879d 100644 --- a/src/ident.c +++ b/src/ident.c @@ -13,23 +13,25 @@ static int ident_find_id( const char **id_start, const char **id_end, const char *start, size_t len) { - const char *found; + const char *end = start + len, *found = NULL; - while (len > 0 && (found = memchr(start, '$', len)) != NULL) { - size_t remaining = len - (size_t)(found - start); + while (len > 3 && (found = memchr(start, '$', len)) != NULL) { + size_t remaining = (size_t)(end - found) - 1; if (remaining < 3) return GIT_ENOTFOUND; - if (found[1] == 'I' && found[2] == 'd') - break; + start = found + 1; - len = remaining - 1; + len = remaining; + + if (start[0] == 'I' && start[1] == 'd') + break; } - if (!found || len < 3) + if (len < 3 || !found) return GIT_ENOTFOUND; *id_start = found; - if ((found = memchr(found + 3, '$', len - 3)) == NULL) + if ((found = memchr(start + 2, '$', len - 2)) == NULL) return GIT_ENOTFOUND; *id_end = found + 1; @@ -46,12 +48,12 @@ static int ident_insert_id( /* replace $Id$ with blob id */ if (!git_filter_source_id(src)) - return GIT_ENOTFOUND; + return GIT_PASSTHROUGH; git_oid_tostr(oid, sizeof(oid), git_filter_source_id(src)); if (ident_find_id(&id_start, &id_end, from->ptr, from->size) < 0) - return GIT_ENOTFOUND; + return GIT_PASSTHROUGH; need_size = (size_t)(id_start - from->ptr) + 5 /* "$Id: " */ + GIT_OID_HEXSZ + 1 /* "$" */ + @@ -76,7 +78,7 @@ static int ident_remove_id( size_t need_size; if (ident_find_id(&id_start, &id_end, from->ptr, from->size) < 0) - return GIT_ENOTFOUND; + return GIT_PASSTHROUGH; need_size = (size_t)(id_start - from->ptr) + 4 /* "$Id$" */ + (size_t)(from_end - id_end); @@ -102,7 +104,7 @@ static int ident_apply( /* Don't filter binary files */ if (git_buf_text_is_binary(from)) - return GIT_ENOTFOUND; + return GIT_PASSTHROUGH; if (git_filter_source_mode(src) == GIT_FILTER_SMUDGE) return ident_insert_id(to, from, src); diff --git a/src/win32/pthread.c b/src/win32/pthread.c index 8c7ef2856..db8927471 100644 --- a/src/win32/pthread.c +++ b/src/win32/pthread.c @@ -6,6 +6,7 @@ */ #include "pthread.h" +#include "../global.h" int pthread_create( pthread_t *GIT_RESTRICT thread, diff --git a/tests-clar/filter/custom.c b/tests-clar/filter/custom.c index d6ad4b7a3..a81885c28 100644 --- a/tests-clar/filter/custom.c +++ b/tests-clar/filter/custom.c @@ -6,9 +6,11 @@ #include "git2/sys/filter.h" #include "git2/sys/repository.h" -/* picked these to be >= GIT_FILTER_DRIVER_PRIORITY */ -#define BITFLIP_FILTER_PRIORITY 200 -#define REVERSE_FILTER_PRIORITY 250 +/* going TO_WORKDIR, filters are executed low to high + * going TO_ODB, filters are executed high to low + */ +#define BITFLIP_FILTER_PRIORITY -1 +#define REVERSE_FILTER_PRIORITY -2 #define VERY_SECURE_ENCRYPTION(b) ((b) ^ 0xff) @@ -149,13 +151,13 @@ static void reverse_filter_free(git_filter *f) git__free(f); } -static git_filter *create_reverse_filter(void) +static git_filter *create_reverse_filter(const char *attrs) { git_filter *filter = git__calloc(1, sizeof(git_filter)); cl_assert(filter); filter->version = GIT_FILTER_VERSION; - filter->attributes = "+reverse"; + filter->attributes = attrs; filter->shutdown = reverse_filter_free; filter->apply = reverse_filter_apply; @@ -171,7 +173,14 @@ static void register_custom_filters(void) "bitflip", create_bitflip_filter(), BITFLIP_FILTER_PRIORITY)); cl_git_pass(git_filter_register( - "reverse", create_reverse_filter(), REVERSE_FILTER_PRIORITY)); + "reverse", create_reverse_filter("+reverse"), + REVERSE_FILTER_PRIORITY)); + + /* re-register reverse filter with standard filter=xyz priority */ + cl_git_pass(git_filter_register( + "pre-reverse", + create_reverse_filter("+prereverse"), + GIT_FILTER_DRIVER_PRIORITY)); filters_registered = 1; } @@ -273,7 +282,7 @@ void test_filter_custom__order_dependency(void) cl_git_mkfile( "empty_standard_repo/.gitattributes", - "hero.*.rev-ident text ident reverse eol=lf\n"); + "hero.*.rev-ident text ident prereverse eol=lf\n"); cl_git_mkfile( "empty_standard_repo/hero.1.rev-ident", @@ -315,3 +324,14 @@ void test_filter_custom__order_dependency(void) git_buf_free(&buf); } + +void test_filter_custom__filter_registry_failure_cases(void) +{ + git_filter fake = { GIT_FILTER_VERSION, 0 }; + + cl_assert_equal_i(GIT_EEXISTS, git_filter_register("bitflip", &fake, 0)); + + cl_git_fail(git_filter_unregister(GIT_FILTER_CRLF)); + cl_git_fail(git_filter_unregister(GIT_FILTER_IDENT)); + cl_assert_equal_i(GIT_ENOTFOUND, git_filter_unregister("not-a-filter")); +} From f60ed4e6495b8bf68d0604335672e6f300330b3b Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Mon, 16 Sep 2013 15:08:35 -0700 Subject: [PATCH 25/25] Update clar and recreate cl_assert_equal_sz This updates clar to the version without cl_assert_equal_sz and then adds a new version of that macro the clar_libgit2.h. The new version works around a strange issue that seemed to be arising on release builds with VS 10 64-bit builds. --- tests-clar/clar.h | 1 - tests-clar/clar_libgit2.h | 5 +++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tests-clar/clar.h b/tests-clar/clar.h index c40bc7ac9..e1f244eba 100644 --- a/tests-clar/clar.h +++ b/tests-clar/clar.h @@ -68,7 +68,6 @@ void cl_fixture_cleanup(const char *fixture_name); #define cl_assert_equal_p(p1,p2) clar__assert_equal(__FILE__,__LINE__,"Pointer mismatch: " #p1 " != " #p2, 1, "%p", (p1), (p2)) -#define cl_assert_equal_sz(sz1,sz2) clar__assert_equal(__FILE__,__LINE__,#sz1 " != " #sz2, 1, "%"PRIuZ, (size_t)(sz1), (size_t)(sz2)) void clar__fail( const char *file, diff --git a/tests-clar/clar_libgit2.h b/tests-clar/clar_libgit2.h index f2d9c4d0b..c37306bc4 100644 --- a/tests-clar/clar_libgit2.h +++ b/tests-clar/clar_libgit2.h @@ -43,6 +43,11 @@ GIT_INLINE(void) clar__assert_in_range( } } +#define cl_assert_equal_sz(sz1,sz2) do { \ + size_t __sz1 = (sz1), __sz2 = (sz2); \ + clar__assert_equal(__FILE__,__LINE__,#sz1 " != " #sz2, 1, "%"PRIuZ, __sz1, __sz2); \ +} while (0) + #define cl_assert_in_range(L,V,H) \ clar__assert_in_range((L),(V),(H),__FILE__,__LINE__,"Range check: " #V " in [" #L "," #H "]", 1)