mirror of
https://git.proxmox.com/git/libgit2
synced 2025-08-04 15:07:41 +00:00
filter: Add write-to CRLF filter
This commit is contained in:
parent
450b40cab3
commit
27950fa3f4
24
src/blob.c
24
src/blob.c
@ -104,29 +104,29 @@ cleanup:
|
||||
static int write_file_filtered(
|
||||
git_oid *oid,
|
||||
git_odb *odb,
|
||||
const char *path,
|
||||
const char *full_path,
|
||||
git_vector *filters)
|
||||
{
|
||||
int error;
|
||||
git_buf file_in = GIT_BUF_INIT;
|
||||
git_buf filter_result = GIT_BUF_INIT;
|
||||
git_buf source = GIT_BUF_INIT;
|
||||
git_buf dest = GIT_BUF_INIT;
|
||||
|
||||
error = git_futils_readbuffer(&file_in, path);
|
||||
error = git_futils_readbuffer(&source, full_path);
|
||||
if (error < GIT_SUCCESS)
|
||||
return error;
|
||||
|
||||
error = git_filter__apply(&filter_result, &file_in, filters, path);
|
||||
error = git_filter__apply(&dest, &source, filters);
|
||||
|
||||
if (error < GIT_SUCCESS) {
|
||||
git_buf_free(&file_in);
|
||||
git_buf_free(&filter_result);
|
||||
git_buf_free(&source);
|
||||
git_buf_free(&dest);
|
||||
return error;
|
||||
}
|
||||
|
||||
error = git_odb_write(oid, odb, filter_result.ptr, filter_result.size, GIT_OBJ_BLOB);
|
||||
error = git_odb_write(oid, odb, dest.ptr, dest.size, GIT_OBJ_BLOB);
|
||||
|
||||
git_buf_free(&file_in);
|
||||
git_buf_free(&filter_result);
|
||||
git_buf_free(&source);
|
||||
git_buf_free(&dest);
|
||||
|
||||
return GIT_SUCCESS;
|
||||
}
|
||||
@ -188,7 +188,7 @@ int git_blob_create_fromfile(git_oid *oid, git_repository *repo, const char *pat
|
||||
git_vector write_filters = GIT_VECTOR_INIT;
|
||||
|
||||
if ((error = git_filter__load_for_file(
|
||||
&write_filters, repo, full_path.ptr, GIT_FILTER_TO_ODB)) < GIT_SUCCESS)
|
||||
&write_filters, repo, path, GIT_FILTER_TO_ODB)) < GIT_SUCCESS)
|
||||
goto cleanup;
|
||||
|
||||
if (write_filters.length == 0) {
|
||||
@ -197,6 +197,8 @@ int git_blob_create_fromfile(git_oid *oid, git_repository *repo, const char *pat
|
||||
error = write_file_filtered(oid, odb, full_path.ptr, &write_filters);
|
||||
}
|
||||
|
||||
git_filter__free(&write_filters);
|
||||
|
||||
/*
|
||||
* TODO: eventually support streaming filtered files, for files which are bigger
|
||||
* than a given threshold. This is not a priority because applying a filter in
|
||||
|
193
src/crlf.c
Normal file
193
src/crlf.c
Normal file
@ -0,0 +1,193 @@
|
||||
/*
|
||||
* Copyright (C) 2009-2012 the libgit2 contributors
|
||||
*
|
||||
* This file is part of libgit2, distributed under the GNU GPL v2 with
|
||||
* a Linking Exception. For full terms see the included COPYING file.
|
||||
*/
|
||||
|
||||
#include "common.h"
|
||||
#include "fileops.h"
|
||||
#include "hash.h"
|
||||
#include "filter.h"
|
||||
#include "repository.h"
|
||||
|
||||
#include "git2/attr.h"
|
||||
|
||||
struct crlf_attrs {
|
||||
int crlf_action;
|
||||
int eol;
|
||||
};
|
||||
|
||||
struct crlf_filter {
|
||||
git_filter f;
|
||||
struct crlf_attrs attrs;
|
||||
};
|
||||
|
||||
static int check_crlf(const char *value)
|
||||
{
|
||||
if (value == git_attr__true)
|
||||
return GIT_CRLF_TEXT;
|
||||
|
||||
if (value == git_attr__false)
|
||||
return GIT_CRLF_BINARY;
|
||||
|
||||
if (value == NULL)
|
||||
return GIT_CRLF_GUESS;
|
||||
|
||||
if (strcmp(value, "input") == 0)
|
||||
return GIT_CRLF_INPUT;
|
||||
|
||||
if (strcmp(value, "auto") == 0)
|
||||
return GIT_CRLF_AUTO;
|
||||
|
||||
return GIT_CRLF_GUESS;
|
||||
}
|
||||
|
||||
static int check_eol(const char *value)
|
||||
{
|
||||
if (value == NULL)
|
||||
return GIT_EOL_UNSET;
|
||||
|
||||
if (strcmp(value, "lf") == 0)
|
||||
return GIT_EOL_LF;
|
||||
|
||||
if (strcmp(value, "crlf") == 0)
|
||||
return GIT_EOL_CRLF;
|
||||
|
||||
return GIT_EOL_UNSET;
|
||||
}
|
||||
|
||||
static int crlf_input_action(struct crlf_attrs *ca)
|
||||
{
|
||||
if (ca->crlf_action == GIT_CRLF_BINARY)
|
||||
return GIT_CRLF_BINARY;
|
||||
|
||||
if (ca->eol == GIT_EOL_LF)
|
||||
return GIT_CRLF_INPUT;
|
||||
|
||||
if (ca->eol == GIT_EOL_CRLF)
|
||||
return GIT_CRLF_CRLF;
|
||||
|
||||
return ca->crlf_action;
|
||||
}
|
||||
|
||||
static int crlf_load_attributes(struct crlf_attrs *ca, git_repository *repo, const char *path)
|
||||
{
|
||||
#define NUM_CONV_ATTRS 3
|
||||
|
||||
static const char *attr_names[NUM_CONV_ATTRS] = {
|
||||
"crlf", "eol", "text",
|
||||
};
|
||||
|
||||
const char *attr_vals[NUM_CONV_ATTRS];
|
||||
int error;
|
||||
|
||||
error = git_attr_get_many(repo, path, NUM_CONV_ATTRS, attr_names, attr_vals);
|
||||
|
||||
if (error == GIT_ENOTFOUND) {
|
||||
ca->crlf_action = GIT_CRLF_GUESS;
|
||||
ca->eol = GIT_EOL_UNSET;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (error == GIT_SUCCESS) {
|
||||
ca->crlf_action = check_crlf(attr_vals[2]); /* text */
|
||||
if (ca->crlf_action == GIT_CRLF_GUESS)
|
||||
ca->crlf_action = check_crlf(attr_vals[0]); /* clrf */
|
||||
|
||||
ca->eol = check_eol(attr_vals[1]); /* eol */
|
||||
return 0;
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *source)
|
||||
{
|
||||
size_t i = 0;
|
||||
struct crlf_filter *filter = (struct crlf_filter *)self;
|
||||
|
||||
assert(self && dest && source);
|
||||
|
||||
if (filter->attrs.crlf_action == GIT_CRLF_AUTO ||
|
||||
filter->attrs.crlf_action == GIT_CRLF_GUESS) {
|
||||
|
||||
git_text_stats stats;
|
||||
git_text__stat(&stats, source);
|
||||
|
||||
/*
|
||||
* We're currently not going to even try to convert stuff
|
||||
* that has bare CR characters. Does anybody do that crazy
|
||||
* stuff?
|
||||
*/
|
||||
if (stats.cr != stats.crlf)
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* And add some heuristics for binary vs text, of course...
|
||||
*/
|
||||
if (git_text__is_binary(&stats))
|
||||
return -1;
|
||||
|
||||
#if 0
|
||||
if (crlf_action == CRLF_GUESS) {
|
||||
/*
|
||||
* If the file in the index has any CR in it, do not convert.
|
||||
* This is the new safer autocrlf handling.
|
||||
*/
|
||||
if (has_cr_in_index(path))
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!stats.cr)
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* TODO: do not copy anything if there isn't a single CR */
|
||||
while (i < source->size) {
|
||||
size_t org = i;
|
||||
|
||||
while (i < source->size && source->ptr[i] != '\r')
|
||||
i++;
|
||||
|
||||
if (i > org)
|
||||
git_buf_put(dest, source->ptr + org, i - org);
|
||||
|
||||
i++;
|
||||
|
||||
if (i >= source->size || source->ptr[i] != '\n') {
|
||||
git_buf_putc(dest, '\r');
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int git_filter__crlf_to_odb(git_filter **filter_out, git_repository *repo, const char *path)
|
||||
{
|
||||
struct crlf_filter filter;
|
||||
int error;
|
||||
|
||||
filter.f.apply = &crlf_apply_to_odb;
|
||||
filter.f.do_free = NULL;
|
||||
|
||||
if ((error = crlf_load_attributes(&filter.attrs, repo, path)) < 0)
|
||||
return error;
|
||||
|
||||
filter.attrs.crlf_action = crlf_input_action(&filter.attrs);
|
||||
|
||||
if (filter.attrs.crlf_action == GIT_CRLF_BINARY)
|
||||
return 0;
|
||||
|
||||
if (filter.attrs.crlf_action == GIT_CRLF_GUESS && repo->filter_options.auto_crlf == GIT_AUTO_CRLF_FALSE)
|
||||
return 0;
|
||||
|
||||
*filter_out = git__malloc(sizeof(struct crlf_filter));
|
||||
if (*filter_out == NULL)
|
||||
return GIT_ENOMEM;
|
||||
|
||||
memcpy(*filter_out, &filter, sizeof(struct crlf_attrs));
|
||||
return 0;
|
||||
}
|
||||
|
131
src/filter.c
131
src/filter.c
@ -10,10 +10,8 @@
|
||||
#include "hash.h"
|
||||
#include "filter.h"
|
||||
|
||||
#include "git2/attr.h"
|
||||
|
||||
/* Fresh from Core Git. I wonder what we could use this for... */
|
||||
void git_text__stat(git_text_stats *stats, git_buf *text)
|
||||
void git_text__stat(git_text_stats *stats, const git_buf *text)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
@ -84,13 +82,45 @@ int git_text__is_binary(git_text_stats *stats)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *full_path, int mode)
|
||||
int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *path, int mode)
|
||||
{
|
||||
/* We don't load any filters yet. HAHA */
|
||||
int error;
|
||||
git_filter *crlf_filter;
|
||||
|
||||
return 0; /* TODO: not quite ready yet */
|
||||
|
||||
if (mode == GIT_FILTER_TO_ODB) {
|
||||
error = git_filter__crlf_to_odb(&crlf_filter, repo, path);
|
||||
if (error < GIT_SUCCESS)
|
||||
return error;
|
||||
|
||||
if (crlf_filter != NULL)
|
||||
git_vector_insert(filters, crlf_filter);
|
||||
|
||||
} else {
|
||||
return git__throw(GIT_ENOTIMPLEMENTED,
|
||||
"Worktree filters are not implemented yet");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters, const char *filename)
|
||||
void git_filter__free(git_vector *filters)
|
||||
{
|
||||
size_t i;
|
||||
git_filter *filter;
|
||||
|
||||
git_vector_foreach(filters, i, filter) {
|
||||
if (filter->do_free != NULL)
|
||||
filter->do_free(filter);
|
||||
else
|
||||
free(filter);
|
||||
}
|
||||
|
||||
git_vector_free(filters);
|
||||
}
|
||||
|
||||
int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters)
|
||||
{
|
||||
unsigned int src, dst, i;
|
||||
git_buf *dbuffer[2];
|
||||
@ -106,7 +136,7 @@ int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters, const
|
||||
return GIT_ENOMEM;
|
||||
|
||||
for (i = 0; i < filters->length; ++i) {
|
||||
git_filter_cb filter = git_vector_get(filters, i);
|
||||
git_filter *filter = git_vector_get(filters, i);
|
||||
dst = (src + 1) % 2;
|
||||
|
||||
git_buf_clear(dbuffer[dst]);
|
||||
@ -117,7 +147,7 @@ int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters, const
|
||||
* of the double buffering (so that the text goes through
|
||||
* cleanly).
|
||||
*/
|
||||
if (filter(dbuffer[dst], dbuffer[src], filename) == 0) {
|
||||
if (filter->apply(filter, dbuffer[dst], dbuffer[src]) == 0) {
|
||||
src = (src + 1) % 2;
|
||||
}
|
||||
|
||||
@ -133,88 +163,3 @@ int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters, const
|
||||
return GIT_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int check_crlf(const char *value)
|
||||
{
|
||||
if (value == git_attr__true)
|
||||
return GIT_CRLF_TEXT;
|
||||
|
||||
if (value == git_attr__false)
|
||||
return GIT_CRLF_BINARY;
|
||||
|
||||
if (value == NULL)
|
||||
return GIT_CRLF_GUESS;
|
||||
|
||||
if (strcmp(value, "input") == 0)
|
||||
return GIT_CRLF_INPUT;
|
||||
|
||||
if (strcmp(value, "auto") == 0)
|
||||
return GIT_CRLF_AUTO;
|
||||
|
||||
return GIT_CRLF_GUESS;
|
||||
}
|
||||
|
||||
static int check_eol(const char *value)
|
||||
{
|
||||
if (value == NULL)
|
||||
return GIT_EOL_UNSET;
|
||||
|
||||
if (strcmp(value, "lf") == 0)
|
||||
return GIT_EOL_LF;
|
||||
|
||||
if (strcmp(value, "crlf") == 0)
|
||||
return GIT_EOL_CRLF;
|
||||
|
||||
return GIT_EOL_UNSET;
|
||||
}
|
||||
|
||||
static int check_ident(const char *value)
|
||||
{
|
||||
return (value == git_attr__true);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int input_crlf_action(enum crlf_action text_attr, enum eol eol_attr)
|
||||
{
|
||||
if (text_attr == CRLF_BINARY)
|
||||
return CRLF_BINARY;
|
||||
if (eol_attr == EOL_LF)
|
||||
return CRLF_INPUT;
|
||||
if (eol_attr == EOL_CRLF)
|
||||
return CRLF_CRLF;
|
||||
return text_attr;
|
||||
}
|
||||
#endif
|
||||
|
||||
int git_filter__load_attrs(git_conv_attrs *ca, git_repository *repo, const char *path)
|
||||
{
|
||||
#define NUM_CONV_ATTRS 5
|
||||
|
||||
static const char *attr_names[NUM_CONV_ATTRS] = {
|
||||
"crlf", "ident", "filter", "eol", "text",
|
||||
};
|
||||
|
||||
const char *attr_vals[NUM_CONV_ATTRS];
|
||||
int error;
|
||||
|
||||
error = git_attr_get_many(repo, path, NUM_CONV_ATTRS, attr_names, attr_vals);
|
||||
|
||||
if (error == GIT_ENOTFOUND) {
|
||||
ca->crlf_action = GIT_CRLF_GUESS;
|
||||
ca->eol_attr = GIT_EOL_UNSET;
|
||||
ca->ident = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (error == GIT_SUCCESS) {
|
||||
ca->crlf_action = check_crlf(attr_vals[4]); /* text */
|
||||
if (ca->crlf_action == GIT_CRLF_GUESS)
|
||||
ca->crlf_action = check_crlf(attr_vals[0]); /* clrf */
|
||||
|
||||
ca->ident = check_ident(attr_vals[1]); /* ident */
|
||||
ca->eol_attr = check_eol(attr_vals[3]); /* eol */
|
||||
return 0;
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
21
src/filter.h
21
src/filter.h
@ -12,7 +12,10 @@
|
||||
#include "git2/odb.h"
|
||||
#include "git2/repository.h"
|
||||
|
||||
typedef int (*git_filter_cb)(git_buf *dest, const git_buf *source, const char *filename);
|
||||
typedef struct git_filter {
|
||||
int (*apply)(struct git_filter *self, git_buf *dest, const git_buf *source);
|
||||
void (*do_free)(struct git_filter *self);
|
||||
} git_filter;
|
||||
|
||||
typedef enum {
|
||||
GIT_FILTER_TO_WORKTREE,
|
||||
@ -47,13 +50,6 @@ typedef enum {
|
||||
#endif
|
||||
} git_eol_t;
|
||||
|
||||
|
||||
typedef struct {
|
||||
int crlf_action;
|
||||
int eol_attr;
|
||||
int ident;
|
||||
} git_conv_attrs;
|
||||
|
||||
typedef struct {
|
||||
/* NUL, CR, LF and CRLF counts */
|
||||
unsigned int nul, cr, lf, crlf;
|
||||
@ -63,14 +59,17 @@ typedef struct {
|
||||
} git_text_stats;
|
||||
|
||||
extern int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *full_path, int mode);
|
||||
extern int git_filter__load_attrs(git_conv_attrs *ca, git_repository *repo, const char *path);
|
||||
extern int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters, const char *filename);
|
||||
extern void git_filter__free(git_vector *filters);
|
||||
extern int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters);
|
||||
|
||||
/* Gather stats for a piece of text */
|
||||
extern void git_text__stat(git_text_stats *stats, git_buf *text);
|
||||
extern void git_text__stat(git_text_stats *stats, const git_buf *text);
|
||||
|
||||
/* Heuristics on a set of text stats to check whether it's binary
|
||||
* text or not */
|
||||
extern int git_text__is_binary(git_text_stats *stats);
|
||||
|
||||
/* Available filters */
|
||||
extern int git_filter__crlf_to_odb(git_filter **filter_out, git_repository *repo, const char *path);
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user