filter: Beautiful refactoring

Comments soothe my soul.
This commit is contained in:
Vicent Martí 2012-03-01 21:19:51 +01:00
parent 788430c8e3
commit 47a899ffed
4 changed files with 265 additions and 145 deletions

View File

@ -115,19 +115,18 @@ static int write_file_filtered(
if (error < GIT_SUCCESS)
return error;
error = git_filter__apply(&dest, &source, filters);
error = git_filters_apply(&dest, &source, filters);
if (error < GIT_SUCCESS) {
git_buf_free(&source);
git_buf_free(&dest);
return error;
/* Free the source as soon as possible. This can be big in memory,
* and we don't want to ODB write to choke */
git_buf_free(&source);
if (error == GIT_SUCCESS) {
/* Write the file to disk if it was properly filtered */
error = git_odb_write(oid, odb, dest.ptr, dest.size, GIT_OBJ_BLOB);
}
error = git_odb_write(oid, odb, dest.ptr, dest.size, GIT_OBJ_BLOB);
git_buf_free(&source);
git_buf_free(&dest);
return GIT_SUCCESS;
}
@ -186,18 +185,25 @@ int git_blob_create_fromfile(git_oid *oid, git_repository *repo, const char *pat
error = write_symlink(oid, odb, full_path.ptr, (size_t)size);
} else {
git_vector write_filters = GIT_VECTOR_INIT;
int filter_count;
if ((error = git_filter__load_for_file(
&write_filters, repo, path, GIT_FILTER_TO_ODB)) < GIT_SUCCESS)
/* Load the filters for writing this file to the ODB */
filter_count = git_filters_load(&write_filters, repo, path, GIT_FILTER_TO_ODB);
if (filter_count < 0) {
/* Negative value means there was a critical error */
error = filter_count;
goto cleanup;
if (write_filters.length == 0) {
} else if (filter_count == 0) {
/* No filters need to be applied to the document: we can stream
* directly from disk */
error = write_file_stream(oid, odb, full_path.ptr, size);
} else {
/* We need to apply one or more filters */
error = write_file_filtered(oid, odb, full_path.ptr, &write_filters);
}
git_filter__free(&write_filters);
git_filters_free(&write_filters);
/*
* TODO: eventually support streaming filtered files, for files which are bigger

View File

@ -102,18 +102,74 @@ static int crlf_load_attributes(struct crlf_attrs *ca, git_repository *repo, con
return error;
}
static int drop_crlf(git_buf *dest, const git_buf *source)
{
size_t psize = source->size - 1;
size_t i = 0;
/* Initial scan: see if we can reach the end of the document
* without finding a single carriage return */
while (i < psize && source->ptr[i] != '\r')
i++;
/* Clean file? Tell the library to skip this filter */
if (i == psize)
return -1;
/* Main scan loop. Keep moving forward until we find a carriage
* return, and then copy the whole chunk to the destination
* buffer.
*
* Note that we only scan until `size - 1`, because we cannot drop a
* carriage return if it's the last character in the file (what a weird
* file, anyway)
*/
while (i < psize) {
size_t org = i;
while (i < psize && source->ptr[i] != '\r')
i++;
if (i > org)
git_buf_put(dest, source->ptr + org, i - org);
/* We found a carriage return. Is the next character a newline?
* If it is, we just keep moving. The newline will be copied
* to the dest in the next chunk.
*
* If it's not a newline, we need to insert the carriage return
* into the dest buffer, because we don't drop lone CRs.
*/
if (source->ptr[i + 1] != '\n') {
git_buf_putc(dest, '\r');
}
i++;
}
/* Copy the last character in the file */
git_buf_putc(dest, source->ptr[psize]);
return 0;
}
static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *source)
{
size_t i = 0;
struct crlf_filter *filter = (struct crlf_filter *)self;
assert(self && dest && source);
/* Empty file? Nothing to do */
if (source->size == 0)
return 0;
/* Heuristics to see if we can skip the conversion.
* Straight from Core Git.
*/
if (filter->attrs.crlf_action == GIT_CRLF_AUTO ||
filter->attrs.crlf_action == GIT_CRLF_GUESS) {
git_text_stats stats;
git_text__stat(&stats, source);
git_text_gather_stats(&stats, source);
/*
* We're currently not going to even try to convert stuff
@ -126,7 +182,7 @@ static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *sou
/*
* And add some heuristics for binary vs text, of course...
*/
if (git_text__is_binary(&stats))
if (git_text_is_binary(&stats))
return -1;
#if 0
@ -144,50 +200,42 @@ static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *sou
return -1;
}
/* TODO: do not copy anything if there isn't a single CR */
while (i < source->size) {
size_t org = i;
while (i < source->size && source->ptr[i] != '\r')
i++;
if (i > org)
git_buf_put(dest, source->ptr + org, i - org);
i++;
if (i >= source->size || source->ptr[i] != '\n') {
git_buf_putc(dest, '\r');
}
}
return 0;
/* Actually drop the carriage returns */
return drop_crlf(dest, source);
}
int git_filter__crlf_to_odb(git_filter **filter_out, git_repository *repo, const char *path)
int git_filter_add__crlf_to_odb(git_vector *filters, git_repository *repo, const char *path)
{
struct crlf_filter filter;
struct crlf_attrs ca;
struct crlf_filter *filter;
int error;
filter.f.apply = &crlf_apply_to_odb;
filter.f.do_free = NULL;
if ((error = crlf_load_attributes(&filter.attrs, repo, path)) < 0)
/* Load gitattributes for the path */
if ((error = crlf_load_attributes(&ca, repo, path)) < 0)
return error;
filter.attrs.crlf_action = crlf_input_action(&filter.attrs);
/*
* Use the core Git logic to see if we should perform CRLF for this file
* based on its attributes & the value of `core.auto_crlf`
*/
ca.crlf_action = crlf_input_action(&ca);
if (filter.attrs.crlf_action == GIT_CRLF_BINARY)
if (ca.crlf_action == GIT_CRLF_BINARY)
return 0;
if (filter.attrs.crlf_action == GIT_CRLF_GUESS && repo->filter_options.auto_crlf == GIT_AUTO_CRLF_FALSE)
if (ca.crlf_action == GIT_CRLF_GUESS && repo->filter_options.auto_crlf == GIT_AUTO_CRLF_FALSE)
return 0;
*filter_out = git__malloc(sizeof(struct crlf_filter));
if (*filter_out == NULL)
/* If we're good, we create a new filter object and push it
* into the filters array */
filter = git__malloc(sizeof(struct crlf_filter));
if (filter == NULL)
return GIT_ENOMEM;
memcpy(*filter_out, &filter, sizeof(struct crlf_attrs));
return 0;
filter->f.apply = &crlf_apply_to_odb;
filter->f.do_free = NULL;
memcpy(&filter->attrs, &ca, sizeof(struct crlf_attrs));
return git_vector_insert(filters, filter);
}

View File

@ -13,7 +13,7 @@
#include "git2/config.h"
/* Fresh from Core Git. I wonder what we could use this for... */
void git_text__stat(git_text_stats *stats, const git_buf *text)
void git_text_gather_stats(git_text_stats *stats, const git_buf *text)
{
size_t i;
@ -65,7 +65,7 @@ void git_text__stat(git_text_stats *stats, const git_buf *text)
/*
* Fresh from Core Git
*/
int git_text__is_binary(git_text_stats *stats)
int git_text_is_binary(git_text_stats *stats)
{
if (stats->nul)
return 1;
@ -84,90 +84,7 @@ int git_text__is_binary(git_text_stats *stats)
return 0;
}
int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *path, int mode)
{
int error;
git_filter *crlf_filter = NULL;
error = git_filter__load_settings(repo);
if (error < GIT_SUCCESS)
return error;
if (mode == GIT_FILTER_TO_ODB) {
error = git_filter__crlf_to_odb(&crlf_filter, repo, path);
if (error < GIT_SUCCESS)
return error;
if (crlf_filter != NULL)
git_vector_insert(filters, crlf_filter);
} else {
return git__throw(GIT_ENOTIMPLEMENTED,
"Worktree filters are not implemented yet");
}
return 0;
}
void git_filter__free(git_vector *filters)
{
size_t i;
git_filter *filter;
git_vector_foreach(filters, i, filter) {
if (filter->do_free != NULL)
filter->do_free(filter);
else
free(filter);
}
git_vector_free(filters);
}
int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters)
{
unsigned int src, dst, i;
git_buf *dbuffer[2];
dbuffer[0] = source;
dbuffer[1] = dest;
src = 0;
/* Pre-grow the destination buffer to more or less the size
* we expect it to have */
if (git_buf_grow(dest, source->size) < 0)
return GIT_ENOMEM;
for (i = 0; i < filters->length; ++i) {
git_filter *filter = git_vector_get(filters, i);
dst = (src + 1) % 2;
git_buf_clear(dbuffer[dst]);
/* Apply the filter, from dbuffer[src] to dbuffer[dst];
* if the filtering is canceled by the user mid-filter,
* we skip to the next filter without changing the source
* of the double buffering (so that the text goes through
* cleanly).
*/
if (filter->apply(filter, dbuffer[dst], dbuffer[src]) == 0) {
src = (src + 1) % 2;
}
if (git_buf_oom(dbuffer[dst]))
return GIT_ENOMEM;
}
/* Ensure that the output ends up in dbuffer[1] (i.e. the dest) */
if (dst != 1) {
git_buf_swap(dest, source);
}
return GIT_SUCCESS;
}
int git_filter__load_settings(git_repository *repo)
static int load_repository_settings(git_repository *repo)
{
static git_cvar_map map_eol[] = {
{GIT_CVAR_FALSE, NULL, GIT_EOL_UNSET},
@ -210,3 +127,90 @@ int git_filter__load_settings(git_repository *repo)
repo->filter_options.loaded = 1;
return 0;
}
int git_filters_load(git_vector *filters, git_repository *repo, const char *path, int mode)
{
int error;
/* Make sure that the relevant settings from `gitconfig` have been
* cached on the repository struct to speed things up */
error = load_repository_settings(repo);
if (error < GIT_SUCCESS)
return error;
if (mode == GIT_FILTER_TO_ODB) {
/* Load the CRLF cleanup filter when writing to the ODB */
error = git_filter_add__crlf_to_odb(filters, repo, path);
if (error < GIT_SUCCESS)
return error;
} else {
return git__throw(GIT_ENOTIMPLEMENTED,
"Worktree filters are not implemented yet");
}
return (int)filters->length;
}
void git_filters_free(git_vector *filters)
{
size_t i;
git_filter *filter;
git_vector_foreach(filters, i, filter) {
if (filter->do_free != NULL)
filter->do_free(filter);
else
free(filter);
}
git_vector_free(filters);
}
int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters)
{
unsigned int src, dst, i;
git_buf *dbuffer[2];
dbuffer[0] = source;
dbuffer[1] = dest;
src = 0;
if (source->size == 0) {
git_buf_clear(dest);
return GIT_SUCCESS;
}
/* Pre-grow the destination buffer to more or less the size
* we expect it to have */
if (git_buf_grow(dest, source->size) < 0)
return GIT_ENOMEM;
for (i = 0; i < filters->length; ++i) {
git_filter *filter = git_vector_get(filters, i);
dst = (src + 1) % 2;
git_buf_clear(dbuffer[dst]);
/* Apply the filter, from dbuffer[src] to dbuffer[dst];
* if the filtering is canceled by the user mid-filter,
* we skip to the next filter without changing the source
* of the double buffering (so that the text goes through
* cleanly).
*/
if (filter->apply(filter, dbuffer[dst], dbuffer[src]) == 0) {
src = (src + 1) % 2;
}
if (git_buf_oom(dbuffer[dst]))
return GIT_ENOMEM;
}
/* Ensure that the output ends up in dbuffer[1] (i.e. the dest) */
if (dst != 1) {
git_buf_swap(dest, source);
}
return GIT_SUCCESS;
}

View File

@ -60,19 +60,81 @@ typedef struct {
unsigned int printable, nonprintable;
} git_text_stats;
extern int git_filter__load_settings(git_repository *repo);
extern int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *full_path, int mode);
extern void git_filter__free(git_vector *filters);
extern int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters);
/*
* FILTER API
*/
/* Gather stats for a piece of text */
extern void git_text__stat(git_text_stats *stats, const git_buf *text);
/*
* For any given path in the working directory, fill the `filters`
* array with the relevant filters that need to be applied.
*
* Mode is either `GIT_FILTER_TO_WORKTREE` if you need to load the
* filters that will be used when checking out a file to the working
* directory, or `GIT_FILTER_TO_ODB` for the filters used when writing
* a file to the ODB.
*
* @param filters Vector where to store all the loaded filters
* @param repo Repository object that contains `path`
* @param path Relative path of the file to be filtered
* @param mode Filtering direction (WT->ODB or ODB->WT)
* @return the number of filters loaded for the file (0 if the file
* doesn't need filtering), or a negative error code
*/
extern int git_filters_load(git_vector *filters, git_repository *repo, const char *path, int mode);
/* Heuristics on a set of text stats to check whether it's binary
* text or not */
extern int git_text__is_binary(git_text_stats *stats);
/*
* Apply one or more filters to a file.
*
* The file must have been loaded as a `git_buf` object. Both the `source`
* and `dest` buffers are owned by the caller and must be freed once
* they are no longer needed.
*
* NOTE: Because of the double-buffering schema, the `source` buffer that contains
* the original file may be tampered once the filtering is complete. Regardless,
* the `dest` buffer will always contain the final result of the filtering
*
* @param dest Buffer to store the result of the filtering
* @param source Buffer containing the document to filter
* @param filters A non-empty vector of filters as supplied by `git_filters_load`
* @return GIT_SUCCESS on success, an error code otherwise
*/
extern int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters);
/* Available filters */
extern int git_filter__crlf_to_odb(git_filter **filter_out, git_repository *repo, const char *path);
/*
* Free the `filters` array generated by `git_filters_load`.
*
* Note that this frees both the array and its contents. The array will
* be clean/reusable after this call.
*
* @param filters A filters array as supplied by `git_filters_load`
*/
extern void git_filters_free(git_vector *filters);
/*
* Available filters
*/
/* Strip CRLF, from Worktree to ODB */
extern int git_filter_add__crlf_to_odb(git_vector *filters, git_repository *repo, const char *path);
/*
* PLAINTEXT API
*/
/*
* Gather stats for a piece of text
*
* Fill the `stats` structure with information on the number of
* unreadable characters, carriage returns, etc, so it can be
* used in heuristics.
*/
extern void git_text_gather_stats(git_text_stats *stats, const git_buf *text);
/*
* Process `git_text_stats` data generated by `git_text_stat` to see
* if it qualifies as a binary file
*/
extern int git_text_is_binary(git_text_stats *stats);
#endif