libgit2/src/filter.c
Russell Belfer 2bc8fa0227 Implement git_pool paged memory allocator
This adds a `git_pool` object that can do simple paged memory
allocation with free for the entire pool at once.  Using this,
you can replace many small allocations with large blocks that
can then cheaply be doled out in small pieces.  This is best
used when you plan to free the small blocks all at once - for
example, if they represent the parsed state from a file or data
stream that are either all kept or all discarded.

There are two real patterns of usage for `git_pools`: either
for "string" allocation, where the item size is a single byte
and you end up just packing the allocations in together, or for
"fixed size" allocation where you are allocating a large object
(e.g. a `git_oid`) and you generally just allocation single
objects that can be tightly packed.  Of course, you can use it
for other things, but those two cases are the easiest.
2012-04-25 10:42:37 -07:00

166 lines
3.6 KiB
C

/*
* Copyright (C) 2009-2012 the libgit2 contributors
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#include "common.h"
#include "fileops.h"
#include "hash.h"
#include "filter.h"
#include "repository.h"
#include "git2/config.h"
/* Tweaked from Core Git. I wonder what we could use this for... */
void git_text_gather_stats(git_text_stats *stats, const git_buf *text)
{
size_t i;
memset(stats, 0, sizeof(*stats));
for (i = 0; i < text->size; i++) {
unsigned char c = text->ptr[i];
if (c == '\r') {
stats->cr++;
if (i + 1 < text->size && text->ptr[i + 1] == '\n')
stats->crlf++;
}
else if (c == '\n')
stats->lf++;
else if (c == 0x85)
/* Unicode CR+LF */
stats->crlf++;
else if (c == 127)
/* DEL */
stats->nonprintable++;
else if (c <= 0x1F || (c >= 0x80 && c <= 0x9F)) {
switch (c) {
/* BS, HT, ESC and FF */
case '\b': case '\t': case '\033': case '\014':
stats->printable++;
break;
case 0:
stats->nul++;
/* fall through */
default:
stats->nonprintable++;
}
}
else
stats->printable++;
}
/* If file ends with EOF then don't count this EOF as non-printable. */
if (text->size >= 1 && text->ptr[text->size - 1] == '\032')
stats->nonprintable--;
}
/*
* Fresh from Core Git
*/
int git_text_is_binary(git_text_stats *stats)
{
if (stats->nul)
return 1;
if ((stats->printable >> 7) < stats->nonprintable)
return 1;
/*
* Other heuristics? Average line length might be relevant,
* as might LF vs CR vs CRLF counts..
*
* NOTE! It might be normal to have a low ratio of CRLF to LF
* (somebody starts with a LF-only file and edits it with an editor
* that adds CRLF only to lines that are added..). But do we
* want to support CR-only? Probably not.
*/
return 0;
}
int git_filters_load(git_vector *filters, git_repository *repo, const char *path, int mode)
{
int error;
if (mode == GIT_FILTER_TO_ODB) {
/* Load the CRLF cleanup filter when writing to the ODB */
error = git_filter_add__crlf_to_odb(filters, repo, path);
if (error < GIT_SUCCESS)
return error;
} else {
return git__throw(GIT_ENOTIMPLEMENTED,
"Worktree filters are not implemented yet");
}
return (int)filters->length;
}
void git_filters_free(git_vector *filters)
{
size_t i;
git_filter *filter;
git_vector_foreach(filters, i, filter) {
if (filter->do_free != NULL)
filter->do_free(filter);
else
git__free(filter);
}
git_vector_free(filters);
}
int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters)
{
unsigned int i, src;
git_buf *dbuffer[2];
dbuffer[0] = source;
dbuffer[1] = dest;
src = 0;
if (source->size == 0) {
git_buf_clear(dest);
return GIT_SUCCESS;
}
/* Pre-grow the destination buffer to more or less the size
* we expect it to have */
if (git_buf_grow(dest, source->size) < 0)
return GIT_ENOMEM;
for (i = 0; i < filters->length; ++i) {
git_filter *filter = git_vector_get(filters, i);
unsigned int dst = 1 - src;
git_buf_clear(dbuffer[dst]);
/* Apply the filter from dbuffer[src] to the other buffer;
* if the filtering is canceled by the user mid-filter,
* we skip to the next filter without changing the source
* of the double buffering (so that the text goes through
* cleanly).
*/
if (filter->apply(filter, dbuffer[dst], dbuffer[src]) == 0)
src = dst;
if (git_buf_oom(dbuffer[dst]))
return GIT_ENOMEM;
}
/* Ensure that the output ends up in dbuffer[1] (i.e. the dest) */
if (src != 1)
git_buf_swap(dest, source);
return GIT_SUCCESS;
}