diff --git a/src/blob.c b/src/blob.c index 245326157..e1f4a7f6a 100644 --- a/src/blob.c +++ b/src/blob.c @@ -115,19 +115,18 @@ static int write_file_filtered( if (error < GIT_SUCCESS) return error; - error = git_filter__apply(&dest, &source, filters); + error = git_filters_apply(&dest, &source, filters); - if (error < GIT_SUCCESS) { - git_buf_free(&source); - git_buf_free(&dest); - return error; + /* Free the source as soon as possible. This can be big in memory, + * and we don't want to ODB write to choke */ + git_buf_free(&source); + + if (error == GIT_SUCCESS) { + /* Write the file to disk if it was properly filtered */ + error = git_odb_write(oid, odb, dest.ptr, dest.size, GIT_OBJ_BLOB); } - error = git_odb_write(oid, odb, dest.ptr, dest.size, GIT_OBJ_BLOB); - - git_buf_free(&source); git_buf_free(&dest); - return GIT_SUCCESS; } @@ -186,18 +185,25 @@ int git_blob_create_fromfile(git_oid *oid, git_repository *repo, const char *pat error = write_symlink(oid, odb, full_path.ptr, (size_t)size); } else { git_vector write_filters = GIT_VECTOR_INIT; + int filter_count; - if ((error = git_filter__load_for_file( - &write_filters, repo, path, GIT_FILTER_TO_ODB)) < GIT_SUCCESS) + /* Load the filters for writing this file to the ODB */ + filter_count = git_filters_load(&write_filters, repo, path, GIT_FILTER_TO_ODB); + + if (filter_count < 0) { + /* Negative value means there was a critical error */ + error = filter_count; goto cleanup; - - if (write_filters.length == 0) { + } else if (filter_count == 0) { + /* No filters need to be applied to the document: we can stream + * directly from disk */ error = write_file_stream(oid, odb, full_path.ptr, size); } else { + /* We need to apply one or more filters */ error = write_file_filtered(oid, odb, full_path.ptr, &write_filters); } - git_filter__free(&write_filters); + git_filters_free(&write_filters); /* * TODO: eventually support streaming filtered files, for files which are bigger diff --git a/src/crlf.c b/src/crlf.c index d8dd1c382..feaa687ee 100644 --- a/src/crlf.c +++ b/src/crlf.c @@ -102,18 +102,74 @@ static int crlf_load_attributes(struct crlf_attrs *ca, git_repository *repo, con return error; } +static int drop_crlf(git_buf *dest, const git_buf *source) +{ + size_t psize = source->size - 1; + size_t i = 0; + + /* Initial scan: see if we can reach the end of the document + * without finding a single carriage return */ + while (i < psize && source->ptr[i] != '\r') + i++; + + /* Clean file? Tell the library to skip this filter */ + if (i == psize) + return -1; + + /* Main scan loop. Keep moving forward until we find a carriage + * return, and then copy the whole chunk to the destination + * buffer. + * + * Note that we only scan until `size - 1`, because we cannot drop a + * carriage return if it's the last character in the file (what a weird + * file, anyway) + */ + while (i < psize) { + size_t org = i; + + while (i < psize && source->ptr[i] != '\r') + i++; + + if (i > org) + git_buf_put(dest, source->ptr + org, i - org); + + /* We found a carriage return. Is the next character a newline? + * If it is, we just keep moving. The newline will be copied + * to the dest in the next chunk. + * + * If it's not a newline, we need to insert the carriage return + * into the dest buffer, because we don't drop lone CRs. + */ + if (source->ptr[i + 1] != '\n') { + git_buf_putc(dest, '\r'); + } + + i++; + } + + /* Copy the last character in the file */ + git_buf_putc(dest, source->ptr[psize]); + return 0; +} + static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *source) { - size_t i = 0; struct crlf_filter *filter = (struct crlf_filter *)self; assert(self && dest && source); + /* Empty file? Nothing to do */ + if (source->size == 0) + return 0; + + /* Heuristics to see if we can skip the conversion. + * Straight from Core Git. + */ if (filter->attrs.crlf_action == GIT_CRLF_AUTO || filter->attrs.crlf_action == GIT_CRLF_GUESS) { git_text_stats stats; - git_text__stat(&stats, source); + git_text_gather_stats(&stats, source); /* * We're currently not going to even try to convert stuff @@ -126,7 +182,7 @@ static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *sou /* * And add some heuristics for binary vs text, of course... */ - if (git_text__is_binary(&stats)) + if (git_text_is_binary(&stats)) return -1; #if 0 @@ -144,50 +200,42 @@ static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *sou return -1; } - /* TODO: do not copy anything if there isn't a single CR */ - while (i < source->size) { - size_t org = i; - - while (i < source->size && source->ptr[i] != '\r') - i++; - - if (i > org) - git_buf_put(dest, source->ptr + org, i - org); - - i++; - - if (i >= source->size || source->ptr[i] != '\n') { - git_buf_putc(dest, '\r'); - } - } - - return 0; + /* Actually drop the carriage returns */ + return drop_crlf(dest, source); } -int git_filter__crlf_to_odb(git_filter **filter_out, git_repository *repo, const char *path) +int git_filter_add__crlf_to_odb(git_vector *filters, git_repository *repo, const char *path) { - struct crlf_filter filter; + struct crlf_attrs ca; + struct crlf_filter *filter; int error; - filter.f.apply = &crlf_apply_to_odb; - filter.f.do_free = NULL; - - if ((error = crlf_load_attributes(&filter.attrs, repo, path)) < 0) + /* Load gitattributes for the path */ + if ((error = crlf_load_attributes(&ca, repo, path)) < 0) return error; - filter.attrs.crlf_action = crlf_input_action(&filter.attrs); + /* + * Use the core Git logic to see if we should perform CRLF for this file + * based on its attributes & the value of `core.auto_crlf` + */ + ca.crlf_action = crlf_input_action(&ca); - if (filter.attrs.crlf_action == GIT_CRLF_BINARY) + if (ca.crlf_action == GIT_CRLF_BINARY) return 0; - if (filter.attrs.crlf_action == GIT_CRLF_GUESS && repo->filter_options.auto_crlf == GIT_AUTO_CRLF_FALSE) + if (ca.crlf_action == GIT_CRLF_GUESS && repo->filter_options.auto_crlf == GIT_AUTO_CRLF_FALSE) return 0; - *filter_out = git__malloc(sizeof(struct crlf_filter)); - if (*filter_out == NULL) + /* If we're good, we create a new filter object and push it + * into the filters array */ + filter = git__malloc(sizeof(struct crlf_filter)); + if (filter == NULL) return GIT_ENOMEM; - memcpy(*filter_out, &filter, sizeof(struct crlf_attrs)); - return 0; + filter->f.apply = &crlf_apply_to_odb; + filter->f.do_free = NULL; + memcpy(&filter->attrs, &ca, sizeof(struct crlf_attrs)); + + return git_vector_insert(filters, filter); } diff --git a/src/filter.c b/src/filter.c index f517512dd..92b3566af 100644 --- a/src/filter.c +++ b/src/filter.c @@ -13,7 +13,7 @@ #include "git2/config.h" /* Fresh from Core Git. I wonder what we could use this for... */ -void git_text__stat(git_text_stats *stats, const git_buf *text) +void git_text_gather_stats(git_text_stats *stats, const git_buf *text) { size_t i; @@ -65,7 +65,7 @@ void git_text__stat(git_text_stats *stats, const git_buf *text) /* * Fresh from Core Git */ -int git_text__is_binary(git_text_stats *stats) +int git_text_is_binary(git_text_stats *stats) { if (stats->nul) return 1; @@ -84,90 +84,7 @@ int git_text__is_binary(git_text_stats *stats) return 0; } -int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *path, int mode) -{ - int error; - git_filter *crlf_filter = NULL; - - error = git_filter__load_settings(repo); - if (error < GIT_SUCCESS) - return error; - - if (mode == GIT_FILTER_TO_ODB) { - error = git_filter__crlf_to_odb(&crlf_filter, repo, path); - if (error < GIT_SUCCESS) - return error; - - if (crlf_filter != NULL) - git_vector_insert(filters, crlf_filter); - - } else { - return git__throw(GIT_ENOTIMPLEMENTED, - "Worktree filters are not implemented yet"); - } - - return 0; -} - -void git_filter__free(git_vector *filters) -{ - size_t i; - git_filter *filter; - - git_vector_foreach(filters, i, filter) { - if (filter->do_free != NULL) - filter->do_free(filter); - else - free(filter); - } - - git_vector_free(filters); -} - -int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters) -{ - unsigned int src, dst, i; - git_buf *dbuffer[2]; - - dbuffer[0] = source; - dbuffer[1] = dest; - - src = 0; - - /* Pre-grow the destination buffer to more or less the size - * we expect it to have */ - if (git_buf_grow(dest, source->size) < 0) - return GIT_ENOMEM; - - for (i = 0; i < filters->length; ++i) { - git_filter *filter = git_vector_get(filters, i); - dst = (src + 1) % 2; - - git_buf_clear(dbuffer[dst]); - - /* Apply the filter, from dbuffer[src] to dbuffer[dst]; - * if the filtering is canceled by the user mid-filter, - * we skip to the next filter without changing the source - * of the double buffering (so that the text goes through - * cleanly). - */ - if (filter->apply(filter, dbuffer[dst], dbuffer[src]) == 0) { - src = (src + 1) % 2; - } - - if (git_buf_oom(dbuffer[dst])) - return GIT_ENOMEM; - } - - /* Ensure that the output ends up in dbuffer[1] (i.e. the dest) */ - if (dst != 1) { - git_buf_swap(dest, source); - } - - return GIT_SUCCESS; -} - -int git_filter__load_settings(git_repository *repo) +static int load_repository_settings(git_repository *repo) { static git_cvar_map map_eol[] = { {GIT_CVAR_FALSE, NULL, GIT_EOL_UNSET}, @@ -210,3 +127,90 @@ int git_filter__load_settings(git_repository *repo) repo->filter_options.loaded = 1; return 0; } + +int git_filters_load(git_vector *filters, git_repository *repo, const char *path, int mode) +{ + int error; + + /* Make sure that the relevant settings from `gitconfig` have been + * cached on the repository struct to speed things up */ + error = load_repository_settings(repo); + if (error < GIT_SUCCESS) + return error; + + if (mode == GIT_FILTER_TO_ODB) { + /* Load the CRLF cleanup filter when writing to the ODB */ + error = git_filter_add__crlf_to_odb(filters, repo, path); + if (error < GIT_SUCCESS) + return error; + } else { + return git__throw(GIT_ENOTIMPLEMENTED, + "Worktree filters are not implemented yet"); + } + + return (int)filters->length; +} + +void git_filters_free(git_vector *filters) +{ + size_t i; + git_filter *filter; + + git_vector_foreach(filters, i, filter) { + if (filter->do_free != NULL) + filter->do_free(filter); + else + free(filter); + } + + git_vector_free(filters); +} + +int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters) +{ + unsigned int src, dst, i; + git_buf *dbuffer[2]; + + dbuffer[0] = source; + dbuffer[1] = dest; + + src = 0; + + if (source->size == 0) { + git_buf_clear(dest); + return GIT_SUCCESS; + } + + /* Pre-grow the destination buffer to more or less the size + * we expect it to have */ + if (git_buf_grow(dest, source->size) < 0) + return GIT_ENOMEM; + + for (i = 0; i < filters->length; ++i) { + git_filter *filter = git_vector_get(filters, i); + dst = (src + 1) % 2; + + git_buf_clear(dbuffer[dst]); + + /* Apply the filter, from dbuffer[src] to dbuffer[dst]; + * if the filtering is canceled by the user mid-filter, + * we skip to the next filter without changing the source + * of the double buffering (so that the text goes through + * cleanly). + */ + if (filter->apply(filter, dbuffer[dst], dbuffer[src]) == 0) { + src = (src + 1) % 2; + } + + if (git_buf_oom(dbuffer[dst])) + return GIT_ENOMEM; + } + + /* Ensure that the output ends up in dbuffer[1] (i.e. the dest) */ + if (dst != 1) { + git_buf_swap(dest, source); + } + + return GIT_SUCCESS; +} + diff --git a/src/filter.h b/src/filter.h index 0cf92bd1d..601be1836 100644 --- a/src/filter.h +++ b/src/filter.h @@ -60,19 +60,81 @@ typedef struct { unsigned int printable, nonprintable; } git_text_stats; -extern int git_filter__load_settings(git_repository *repo); -extern int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *full_path, int mode); -extern void git_filter__free(git_vector *filters); -extern int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters); +/* + * FILTER API + */ -/* Gather stats for a piece of text */ -extern void git_text__stat(git_text_stats *stats, const git_buf *text); +/* + * For any given path in the working directory, fill the `filters` + * array with the relevant filters that need to be applied. + * + * Mode is either `GIT_FILTER_TO_WORKTREE` if you need to load the + * filters that will be used when checking out a file to the working + * directory, or `GIT_FILTER_TO_ODB` for the filters used when writing + * a file to the ODB. + * + * @param filters Vector where to store all the loaded filters + * @param repo Repository object that contains `path` + * @param path Relative path of the file to be filtered + * @param mode Filtering direction (WT->ODB or ODB->WT) + * @return the number of filters loaded for the file (0 if the file + * doesn't need filtering), or a negative error code + */ +extern int git_filters_load(git_vector *filters, git_repository *repo, const char *path, int mode); -/* Heuristics on a set of text stats to check whether it's binary - * text or not */ -extern int git_text__is_binary(git_text_stats *stats); +/* + * Apply one or more filters to a file. + * + * The file must have been loaded as a `git_buf` object. Both the `source` + * and `dest` buffers are owned by the caller and must be freed once + * they are no longer needed. + * + * NOTE: Because of the double-buffering schema, the `source` buffer that contains + * the original file may be tampered once the filtering is complete. Regardless, + * the `dest` buffer will always contain the final result of the filtering + * + * @param dest Buffer to store the result of the filtering + * @param source Buffer containing the document to filter + * @param filters A non-empty vector of filters as supplied by `git_filters_load` + * @return GIT_SUCCESS on success, an error code otherwise + */ +extern int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters); -/* Available filters */ -extern int git_filter__crlf_to_odb(git_filter **filter_out, git_repository *repo, const char *path); +/* + * Free the `filters` array generated by `git_filters_load`. + * + * Note that this frees both the array and its contents. The array will + * be clean/reusable after this call. + * + * @param filters A filters array as supplied by `git_filters_load` + */ +extern void git_filters_free(git_vector *filters); + +/* + * Available filters + */ + +/* Strip CRLF, from Worktree to ODB */ +extern int git_filter_add__crlf_to_odb(git_vector *filters, git_repository *repo, const char *path); + + +/* + * PLAINTEXT API + */ + +/* + * Gather stats for a piece of text + * + * Fill the `stats` structure with information on the number of + * unreadable characters, carriage returns, etc, so it can be + * used in heuristics. + */ +extern void git_text_gather_stats(git_text_stats *stats, const git_buf *text); + +/* + * Process `git_text_stats` data generated by `git_text_stat` to see + * if it qualifies as a binary file + */ +extern int git_text_is_binary(git_text_stats *stats); #endif