mirror of
https://git.proxmox.com/git/libgit2
synced 2025-08-10 21:45:40 +00:00
filter: Beautiful refactoring
Comments soothe my soul.
This commit is contained in:
parent
788430c8e3
commit
47a899ffed
34
src/blob.c
34
src/blob.c
@ -115,19 +115,18 @@ static int write_file_filtered(
|
||||
if (error < GIT_SUCCESS)
|
||||
return error;
|
||||
|
||||
error = git_filter__apply(&dest, &source, filters);
|
||||
error = git_filters_apply(&dest, &source, filters);
|
||||
|
||||
if (error < GIT_SUCCESS) {
|
||||
git_buf_free(&source);
|
||||
git_buf_free(&dest);
|
||||
return error;
|
||||
/* Free the source as soon as possible. This can be big in memory,
|
||||
* and we don't want to ODB write to choke */
|
||||
git_buf_free(&source);
|
||||
|
||||
if (error == GIT_SUCCESS) {
|
||||
/* Write the file to disk if it was properly filtered */
|
||||
error = git_odb_write(oid, odb, dest.ptr, dest.size, GIT_OBJ_BLOB);
|
||||
}
|
||||
|
||||
error = git_odb_write(oid, odb, dest.ptr, dest.size, GIT_OBJ_BLOB);
|
||||
|
||||
git_buf_free(&source);
|
||||
git_buf_free(&dest);
|
||||
|
||||
return GIT_SUCCESS;
|
||||
}
|
||||
|
||||
@ -186,18 +185,25 @@ int git_blob_create_fromfile(git_oid *oid, git_repository *repo, const char *pat
|
||||
error = write_symlink(oid, odb, full_path.ptr, (size_t)size);
|
||||
} else {
|
||||
git_vector write_filters = GIT_VECTOR_INIT;
|
||||
int filter_count;
|
||||
|
||||
if ((error = git_filter__load_for_file(
|
||||
&write_filters, repo, path, GIT_FILTER_TO_ODB)) < GIT_SUCCESS)
|
||||
/* Load the filters for writing this file to the ODB */
|
||||
filter_count = git_filters_load(&write_filters, repo, path, GIT_FILTER_TO_ODB);
|
||||
|
||||
if (filter_count < 0) {
|
||||
/* Negative value means there was a critical error */
|
||||
error = filter_count;
|
||||
goto cleanup;
|
||||
|
||||
if (write_filters.length == 0) {
|
||||
} else if (filter_count == 0) {
|
||||
/* No filters need to be applied to the document: we can stream
|
||||
* directly from disk */
|
||||
error = write_file_stream(oid, odb, full_path.ptr, size);
|
||||
} else {
|
||||
/* We need to apply one or more filters */
|
||||
error = write_file_filtered(oid, odb, full_path.ptr, &write_filters);
|
||||
}
|
||||
|
||||
git_filter__free(&write_filters);
|
||||
git_filters_free(&write_filters);
|
||||
|
||||
/*
|
||||
* TODO: eventually support streaming filtered files, for files which are bigger
|
||||
|
116
src/crlf.c
116
src/crlf.c
@ -102,18 +102,74 @@ static int crlf_load_attributes(struct crlf_attrs *ca, git_repository *repo, con
|
||||
return error;
|
||||
}
|
||||
|
||||
static int drop_crlf(git_buf *dest, const git_buf *source)
|
||||
{
|
||||
size_t psize = source->size - 1;
|
||||
size_t i = 0;
|
||||
|
||||
/* Initial scan: see if we can reach the end of the document
|
||||
* without finding a single carriage return */
|
||||
while (i < psize && source->ptr[i] != '\r')
|
||||
i++;
|
||||
|
||||
/* Clean file? Tell the library to skip this filter */
|
||||
if (i == psize)
|
||||
return -1;
|
||||
|
||||
/* Main scan loop. Keep moving forward until we find a carriage
|
||||
* return, and then copy the whole chunk to the destination
|
||||
* buffer.
|
||||
*
|
||||
* Note that we only scan until `size - 1`, because we cannot drop a
|
||||
* carriage return if it's the last character in the file (what a weird
|
||||
* file, anyway)
|
||||
*/
|
||||
while (i < psize) {
|
||||
size_t org = i;
|
||||
|
||||
while (i < psize && source->ptr[i] != '\r')
|
||||
i++;
|
||||
|
||||
if (i > org)
|
||||
git_buf_put(dest, source->ptr + org, i - org);
|
||||
|
||||
/* We found a carriage return. Is the next character a newline?
|
||||
* If it is, we just keep moving. The newline will be copied
|
||||
* to the dest in the next chunk.
|
||||
*
|
||||
* If it's not a newline, we need to insert the carriage return
|
||||
* into the dest buffer, because we don't drop lone CRs.
|
||||
*/
|
||||
if (source->ptr[i + 1] != '\n') {
|
||||
git_buf_putc(dest, '\r');
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
/* Copy the last character in the file */
|
||||
git_buf_putc(dest, source->ptr[psize]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *source)
|
||||
{
|
||||
size_t i = 0;
|
||||
struct crlf_filter *filter = (struct crlf_filter *)self;
|
||||
|
||||
assert(self && dest && source);
|
||||
|
||||
/* Empty file? Nothing to do */
|
||||
if (source->size == 0)
|
||||
return 0;
|
||||
|
||||
/* Heuristics to see if we can skip the conversion.
|
||||
* Straight from Core Git.
|
||||
*/
|
||||
if (filter->attrs.crlf_action == GIT_CRLF_AUTO ||
|
||||
filter->attrs.crlf_action == GIT_CRLF_GUESS) {
|
||||
|
||||
git_text_stats stats;
|
||||
git_text__stat(&stats, source);
|
||||
git_text_gather_stats(&stats, source);
|
||||
|
||||
/*
|
||||
* We're currently not going to even try to convert stuff
|
||||
@ -126,7 +182,7 @@ static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *sou
|
||||
/*
|
||||
* And add some heuristics for binary vs text, of course...
|
||||
*/
|
||||
if (git_text__is_binary(&stats))
|
||||
if (git_text_is_binary(&stats))
|
||||
return -1;
|
||||
|
||||
#if 0
|
||||
@ -144,50 +200,42 @@ static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *sou
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* TODO: do not copy anything if there isn't a single CR */
|
||||
while (i < source->size) {
|
||||
size_t org = i;
|
||||
|
||||
while (i < source->size && source->ptr[i] != '\r')
|
||||
i++;
|
||||
|
||||
if (i > org)
|
||||
git_buf_put(dest, source->ptr + org, i - org);
|
||||
|
||||
i++;
|
||||
|
||||
if (i >= source->size || source->ptr[i] != '\n') {
|
||||
git_buf_putc(dest, '\r');
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
/* Actually drop the carriage returns */
|
||||
return drop_crlf(dest, source);
|
||||
}
|
||||
|
||||
int git_filter__crlf_to_odb(git_filter **filter_out, git_repository *repo, const char *path)
|
||||
int git_filter_add__crlf_to_odb(git_vector *filters, git_repository *repo, const char *path)
|
||||
{
|
||||
struct crlf_filter filter;
|
||||
struct crlf_attrs ca;
|
||||
struct crlf_filter *filter;
|
||||
int error;
|
||||
|
||||
filter.f.apply = &crlf_apply_to_odb;
|
||||
filter.f.do_free = NULL;
|
||||
|
||||
if ((error = crlf_load_attributes(&filter.attrs, repo, path)) < 0)
|
||||
/* Load gitattributes for the path */
|
||||
if ((error = crlf_load_attributes(&ca, repo, path)) < 0)
|
||||
return error;
|
||||
|
||||
filter.attrs.crlf_action = crlf_input_action(&filter.attrs);
|
||||
/*
|
||||
* Use the core Git logic to see if we should perform CRLF for this file
|
||||
* based on its attributes & the value of `core.auto_crlf`
|
||||
*/
|
||||
ca.crlf_action = crlf_input_action(&ca);
|
||||
|
||||
if (filter.attrs.crlf_action == GIT_CRLF_BINARY)
|
||||
if (ca.crlf_action == GIT_CRLF_BINARY)
|
||||
return 0;
|
||||
|
||||
if (filter.attrs.crlf_action == GIT_CRLF_GUESS && repo->filter_options.auto_crlf == GIT_AUTO_CRLF_FALSE)
|
||||
if (ca.crlf_action == GIT_CRLF_GUESS && repo->filter_options.auto_crlf == GIT_AUTO_CRLF_FALSE)
|
||||
return 0;
|
||||
|
||||
*filter_out = git__malloc(sizeof(struct crlf_filter));
|
||||
if (*filter_out == NULL)
|
||||
/* If we're good, we create a new filter object and push it
|
||||
* into the filters array */
|
||||
filter = git__malloc(sizeof(struct crlf_filter));
|
||||
if (filter == NULL)
|
||||
return GIT_ENOMEM;
|
||||
|
||||
memcpy(*filter_out, &filter, sizeof(struct crlf_attrs));
|
||||
return 0;
|
||||
filter->f.apply = &crlf_apply_to_odb;
|
||||
filter->f.do_free = NULL;
|
||||
memcpy(&filter->attrs, &ca, sizeof(struct crlf_attrs));
|
||||
|
||||
return git_vector_insert(filters, filter);
|
||||
}
|
||||
|
||||
|
176
src/filter.c
176
src/filter.c
@ -13,7 +13,7 @@
|
||||
#include "git2/config.h"
|
||||
|
||||
/* Fresh from Core Git. I wonder what we could use this for... */
|
||||
void git_text__stat(git_text_stats *stats, const git_buf *text)
|
||||
void git_text_gather_stats(git_text_stats *stats, const git_buf *text)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
@ -65,7 +65,7 @@ void git_text__stat(git_text_stats *stats, const git_buf *text)
|
||||
/*
|
||||
* Fresh from Core Git
|
||||
*/
|
||||
int git_text__is_binary(git_text_stats *stats)
|
||||
int git_text_is_binary(git_text_stats *stats)
|
||||
{
|
||||
if (stats->nul)
|
||||
return 1;
|
||||
@ -84,90 +84,7 @@ int git_text__is_binary(git_text_stats *stats)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *path, int mode)
|
||||
{
|
||||
int error;
|
||||
git_filter *crlf_filter = NULL;
|
||||
|
||||
error = git_filter__load_settings(repo);
|
||||
if (error < GIT_SUCCESS)
|
||||
return error;
|
||||
|
||||
if (mode == GIT_FILTER_TO_ODB) {
|
||||
error = git_filter__crlf_to_odb(&crlf_filter, repo, path);
|
||||
if (error < GIT_SUCCESS)
|
||||
return error;
|
||||
|
||||
if (crlf_filter != NULL)
|
||||
git_vector_insert(filters, crlf_filter);
|
||||
|
||||
} else {
|
||||
return git__throw(GIT_ENOTIMPLEMENTED,
|
||||
"Worktree filters are not implemented yet");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void git_filter__free(git_vector *filters)
|
||||
{
|
||||
size_t i;
|
||||
git_filter *filter;
|
||||
|
||||
git_vector_foreach(filters, i, filter) {
|
||||
if (filter->do_free != NULL)
|
||||
filter->do_free(filter);
|
||||
else
|
||||
free(filter);
|
||||
}
|
||||
|
||||
git_vector_free(filters);
|
||||
}
|
||||
|
||||
int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters)
|
||||
{
|
||||
unsigned int src, dst, i;
|
||||
git_buf *dbuffer[2];
|
||||
|
||||
dbuffer[0] = source;
|
||||
dbuffer[1] = dest;
|
||||
|
||||
src = 0;
|
||||
|
||||
/* Pre-grow the destination buffer to more or less the size
|
||||
* we expect it to have */
|
||||
if (git_buf_grow(dest, source->size) < 0)
|
||||
return GIT_ENOMEM;
|
||||
|
||||
for (i = 0; i < filters->length; ++i) {
|
||||
git_filter *filter = git_vector_get(filters, i);
|
||||
dst = (src + 1) % 2;
|
||||
|
||||
git_buf_clear(dbuffer[dst]);
|
||||
|
||||
/* Apply the filter, from dbuffer[src] to dbuffer[dst];
|
||||
* if the filtering is canceled by the user mid-filter,
|
||||
* we skip to the next filter without changing the source
|
||||
* of the double buffering (so that the text goes through
|
||||
* cleanly).
|
||||
*/
|
||||
if (filter->apply(filter, dbuffer[dst], dbuffer[src]) == 0) {
|
||||
src = (src + 1) % 2;
|
||||
}
|
||||
|
||||
if (git_buf_oom(dbuffer[dst]))
|
||||
return GIT_ENOMEM;
|
||||
}
|
||||
|
||||
/* Ensure that the output ends up in dbuffer[1] (i.e. the dest) */
|
||||
if (dst != 1) {
|
||||
git_buf_swap(dest, source);
|
||||
}
|
||||
|
||||
return GIT_SUCCESS;
|
||||
}
|
||||
|
||||
int git_filter__load_settings(git_repository *repo)
|
||||
static int load_repository_settings(git_repository *repo)
|
||||
{
|
||||
static git_cvar_map map_eol[] = {
|
||||
{GIT_CVAR_FALSE, NULL, GIT_EOL_UNSET},
|
||||
@ -210,3 +127,90 @@ int git_filter__load_settings(git_repository *repo)
|
||||
repo->filter_options.loaded = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int git_filters_load(git_vector *filters, git_repository *repo, const char *path, int mode)
|
||||
{
|
||||
int error;
|
||||
|
||||
/* Make sure that the relevant settings from `gitconfig` have been
|
||||
* cached on the repository struct to speed things up */
|
||||
error = load_repository_settings(repo);
|
||||
if (error < GIT_SUCCESS)
|
||||
return error;
|
||||
|
||||
if (mode == GIT_FILTER_TO_ODB) {
|
||||
/* Load the CRLF cleanup filter when writing to the ODB */
|
||||
error = git_filter_add__crlf_to_odb(filters, repo, path);
|
||||
if (error < GIT_SUCCESS)
|
||||
return error;
|
||||
} else {
|
||||
return git__throw(GIT_ENOTIMPLEMENTED,
|
||||
"Worktree filters are not implemented yet");
|
||||
}
|
||||
|
||||
return (int)filters->length;
|
||||
}
|
||||
|
||||
void git_filters_free(git_vector *filters)
|
||||
{
|
||||
size_t i;
|
||||
git_filter *filter;
|
||||
|
||||
git_vector_foreach(filters, i, filter) {
|
||||
if (filter->do_free != NULL)
|
||||
filter->do_free(filter);
|
||||
else
|
||||
free(filter);
|
||||
}
|
||||
|
||||
git_vector_free(filters);
|
||||
}
|
||||
|
||||
int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters)
|
||||
{
|
||||
unsigned int src, dst, i;
|
||||
git_buf *dbuffer[2];
|
||||
|
||||
dbuffer[0] = source;
|
||||
dbuffer[1] = dest;
|
||||
|
||||
src = 0;
|
||||
|
||||
if (source->size == 0) {
|
||||
git_buf_clear(dest);
|
||||
return GIT_SUCCESS;
|
||||
}
|
||||
|
||||
/* Pre-grow the destination buffer to more or less the size
|
||||
* we expect it to have */
|
||||
if (git_buf_grow(dest, source->size) < 0)
|
||||
return GIT_ENOMEM;
|
||||
|
||||
for (i = 0; i < filters->length; ++i) {
|
||||
git_filter *filter = git_vector_get(filters, i);
|
||||
dst = (src + 1) % 2;
|
||||
|
||||
git_buf_clear(dbuffer[dst]);
|
||||
|
||||
/* Apply the filter, from dbuffer[src] to dbuffer[dst];
|
||||
* if the filtering is canceled by the user mid-filter,
|
||||
* we skip to the next filter without changing the source
|
||||
* of the double buffering (so that the text goes through
|
||||
* cleanly).
|
||||
*/
|
||||
if (filter->apply(filter, dbuffer[dst], dbuffer[src]) == 0) {
|
||||
src = (src + 1) % 2;
|
||||
}
|
||||
|
||||
if (git_buf_oom(dbuffer[dst]))
|
||||
return GIT_ENOMEM;
|
||||
}
|
||||
|
||||
/* Ensure that the output ends up in dbuffer[1] (i.e. the dest) */
|
||||
if (dst != 1) {
|
||||
git_buf_swap(dest, source);
|
||||
}
|
||||
|
||||
return GIT_SUCCESS;
|
||||
}
|
||||
|
||||
|
84
src/filter.h
84
src/filter.h
@ -60,19 +60,81 @@ typedef struct {
|
||||
unsigned int printable, nonprintable;
|
||||
} git_text_stats;
|
||||
|
||||
extern int git_filter__load_settings(git_repository *repo);
|
||||
extern int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *full_path, int mode);
|
||||
extern void git_filter__free(git_vector *filters);
|
||||
extern int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters);
|
||||
/*
|
||||
* FILTER API
|
||||
*/
|
||||
|
||||
/* Gather stats for a piece of text */
|
||||
extern void git_text__stat(git_text_stats *stats, const git_buf *text);
|
||||
/*
|
||||
* For any given path in the working directory, fill the `filters`
|
||||
* array with the relevant filters that need to be applied.
|
||||
*
|
||||
* Mode is either `GIT_FILTER_TO_WORKTREE` if you need to load the
|
||||
* filters that will be used when checking out a file to the working
|
||||
* directory, or `GIT_FILTER_TO_ODB` for the filters used when writing
|
||||
* a file to the ODB.
|
||||
*
|
||||
* @param filters Vector where to store all the loaded filters
|
||||
* @param repo Repository object that contains `path`
|
||||
* @param path Relative path of the file to be filtered
|
||||
* @param mode Filtering direction (WT->ODB or ODB->WT)
|
||||
* @return the number of filters loaded for the file (0 if the file
|
||||
* doesn't need filtering), or a negative error code
|
||||
*/
|
||||
extern int git_filters_load(git_vector *filters, git_repository *repo, const char *path, int mode);
|
||||
|
||||
/* Heuristics on a set of text stats to check whether it's binary
|
||||
* text or not */
|
||||
extern int git_text__is_binary(git_text_stats *stats);
|
||||
/*
|
||||
* Apply one or more filters to a file.
|
||||
*
|
||||
* The file must have been loaded as a `git_buf` object. Both the `source`
|
||||
* and `dest` buffers are owned by the caller and must be freed once
|
||||
* they are no longer needed.
|
||||
*
|
||||
* NOTE: Because of the double-buffering schema, the `source` buffer that contains
|
||||
* the original file may be tampered once the filtering is complete. Regardless,
|
||||
* the `dest` buffer will always contain the final result of the filtering
|
||||
*
|
||||
* @param dest Buffer to store the result of the filtering
|
||||
* @param source Buffer containing the document to filter
|
||||
* @param filters A non-empty vector of filters as supplied by `git_filters_load`
|
||||
* @return GIT_SUCCESS on success, an error code otherwise
|
||||
*/
|
||||
extern int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters);
|
||||
|
||||
/* Available filters */
|
||||
extern int git_filter__crlf_to_odb(git_filter **filter_out, git_repository *repo, const char *path);
|
||||
/*
|
||||
* Free the `filters` array generated by `git_filters_load`.
|
||||
*
|
||||
* Note that this frees both the array and its contents. The array will
|
||||
* be clean/reusable after this call.
|
||||
*
|
||||
* @param filters A filters array as supplied by `git_filters_load`
|
||||
*/
|
||||
extern void git_filters_free(git_vector *filters);
|
||||
|
||||
/*
|
||||
* Available filters
|
||||
*/
|
||||
|
||||
/* Strip CRLF, from Worktree to ODB */
|
||||
extern int git_filter_add__crlf_to_odb(git_vector *filters, git_repository *repo, const char *path);
|
||||
|
||||
|
||||
/*
|
||||
* PLAINTEXT API
|
||||
*/
|
||||
|
||||
/*
|
||||
* Gather stats for a piece of text
|
||||
*
|
||||
* Fill the `stats` structure with information on the number of
|
||||
* unreadable characters, carriage returns, etc, so it can be
|
||||
* used in heuristics.
|
||||
*/
|
||||
extern void git_text_gather_stats(git_text_stats *stats, const git_buf *text);
|
||||
|
||||
/*
|
||||
* Process `git_text_stats` data generated by `git_text_stat` to see
|
||||
* if it qualifies as a binary file
|
||||
*/
|
||||
extern int git_text_is_binary(git_text_stats *stats);
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user