Add filter tests and fix some bugs

This adds some initial unit tests for file filtering and fixes
some simple bugs in filter application.
This commit is contained in:
Russell Belfer 2012-03-02 15:09:40 -08:00
parent d377fe80b1
commit ce49c7a8a9
9 changed files with 189 additions and 56 deletions

View File

@ -25,6 +25,12 @@ size_t git_blob_rawsize(git_blob *blob)
return blob->odb_object->raw.len;
}
int git_blob__getbuf(git_buf *buffer, git_blob *blob)
{
return git_buf_set(
buffer, blob->odb_object->raw.data, blob->odb_object->raw.len);
}
void git_blob__free(git_blob *blob)
{
git_odb_object_free(blob->odb_object);

View File

@ -19,5 +19,6 @@ struct git_blob {
void git_blob__free(git_blob *blob);
int git_blob__parse(git_blob *blob, git_odb_object *obj);
int git_blob__getbuf(git_buf *buffer, git_blob *blob);
#endif

View File

@ -389,3 +389,10 @@ void git_buf_rtrim(git_buf *buf)
buf->ptr[buf->size] = '\0';
}
int git_buf_cmp(const git_buf *a, const git_buf *b)
{
int result = memcmp(a->ptr, b->ptr, min(a->size, b->size));
return (result != 0) ? result :
(a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0;
}

View File

@ -118,4 +118,6 @@ GIT_INLINE(int) git_buf_rfind_next(git_buf *buf, char ch)
/* Remove whitespace from the end of the buffer */
void git_buf_rtrim(git_buf *buf);
int git_buf_cmp(const git_buf *a, const git_buf *b);
#endif

View File

@ -104,52 +104,32 @@ static int crlf_load_attributes(struct crlf_attrs *ca, git_repository *repo, con
static int drop_crlf(git_buf *dest, const git_buf *source)
{
size_t psize = source->size - 1;
size_t i = 0;
const char *scan = source->ptr, *next;
const char *scan_end = source->ptr + source->size;
/* Initial scan: see if we can reach the end of the document
* without finding a single carriage return */
while (i < psize && source->ptr[i] != '\r')
i++;
/* Clean file? Tell the library to skip this filter */
if (i == psize)
return -1;
/* Main scan loop. Keep moving forward until we find a carriage
* return, and then copy the whole chunk to the destination
* buffer.
*
* Note that we only scan until `size - 1`, because we cannot drop a
* carriage return if it's the last character in the file (what a weird
* file, anyway)
/* Main scan loop. Find the next carriage return and copy the
* whole chunk up to that point to the destination buffer.
*/
while (i < psize) {
size_t org = i;
while ((next = memchr(scan, '\r', scan_end - scan)) != NULL) {
/* copy input up to \r */
if (next > scan)
git_buf_put(dest, scan, next - scan);
while (i < psize && source->ptr[i] != '\r')
i++;
if (i > org)
git_buf_put(dest, source->ptr + org, i - org);
/* We found a carriage return. Is the next character a newline?
* If it is, we just keep moving. The newline will be copied
* to the dest in the next chunk.
*
* If it's not a newline, we need to insert the carriage return
* into the dest buffer, because we don't drop lone CRs.
*/
if (source->ptr[i + 1] != '\n') {
/* Do not drop \r unless it is followed by \n */
if (*(next + 1) != '\n')
git_buf_putc(dest, '\r');
}
i++;
scan = next + 1;
}
/* Copy the last character in the file */
git_buf_putc(dest, source->ptr[psize]);
return 0;
/* If there was no \r, then tell the library to skip this filter */
if (scan == source->ptr)
return -1;
/* Copy remaining input into dest */
git_buf_put(dest, scan, scan_end - scan);
return git_buf_lasterror(dest);
}
static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *source)

View File

@ -12,7 +12,7 @@
#include "repository.h"
#include "git2/config.h"
/* Fresh from Core Git. I wonder what we could use this for... */
/* Tweaked from Core Git. I wonder what we could use this for... */
void git_text_gather_stats(git_text_stats *stats, const git_buf *text)
{
size_t i;
@ -27,20 +27,20 @@ void git_text_gather_stats(git_text_stats *stats, const git_buf *text)
if (i + 1 < text->size && text->ptr[i + 1] == '\n')
stats->crlf++;
continue;
}
if (c == '\n') {
else if (c == '\n')
stats->lf++;
continue;
}
if (c == 127)
else if (c == 0x85)
/* Unicode CR+LF */
stats->crlf++;
else if (c == 127)
/* DEL */
stats->nonprintable++;
else if (c < 32) {
else if (c <= 0x1F || (c >= 0x80 && c <= 0x9F)) {
switch (c) {
/* BS, HT, ESC and FF */
case '\b': case '\t': case '\033': case '\014':
@ -53,6 +53,7 @@ void git_text_gather_stats(git_text_stats *stats, const git_buf *text)
stats->nonprintable++;
}
}
else
stats->printable++;
}
@ -118,7 +119,7 @@ void git_filters_free(git_vector *filters)
int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters)
{
unsigned int src, dst, i;
unsigned int i, src;
git_buf *dbuffer[2];
dbuffer[0] = source;
@ -138,28 +139,26 @@ int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters)
for (i = 0; i < filters->length; ++i) {
git_filter *filter = git_vector_get(filters, i);
dst = (src + 1) % 2;
unsigned int dst = 1 - src;
git_buf_clear(dbuffer[dst]);
/* Apply the filter, from dbuffer[src] to dbuffer[dst];
/* Apply the filter from dbuffer[src] to the other buffer;
* if the filtering is canceled by the user mid-filter,
* we skip to the next filter without changing the source
* of the double buffering (so that the text goes through
* cleanly).
*/
if (filter->apply(filter, dbuffer[dst], dbuffer[src]) == 0) {
src = (src + 1) % 2;
}
if (filter->apply(filter, dbuffer[dst], dbuffer[src]) == 0)
src = dst;
if (git_buf_oom(dbuffer[dst]))
return GIT_ENOMEM;
}
/* Ensure that the output ends up in dbuffer[1] (i.e. the dest) */
if (dst != 1) {
if (src != 1)
git_buf_swap(dest, source);
}
return GIT_SUCCESS;
}

View File

@ -27,3 +27,15 @@ void cl_git_mkfile(const char *filename, const char *content)
cl_must_pass(p_close(fd));
}
void cl_git_append2file(const char *filename, const char *new_content)
{
int fd = p_open(filename, O_WRONLY | O_APPEND | O_CREAT);
cl_assert(fd != 0);
if (!new_content)
new_content = "\n";
cl_must_pass(p_write(fd, new_content, strlen(new_content)));
cl_must_pass(p_close(fd));
cl_must_pass(p_chmod(filename, 0644));
}

View File

@ -53,5 +53,6 @@ GIT_INLINE(void) cl_assert_strequal_internal(
/* Write the contents of a buffer to disk */
void cl_git_mkfile(const char *filename, const char *content);
void cl_git_append2file(const char *filename, const char *new_content);
#endif

View File

@ -0,0 +1,125 @@
#include "clar_libgit2.h"
#include "posix.h"
#include "blob.h"
#include "filter.h"
static git_repository *g_repo = NULL;
#define NUM_TEST_OBJECTS 6
static git_oid g_oids[NUM_TEST_OBJECTS];
static const char *g_raw[NUM_TEST_OBJECTS] = {
"",
"foo\nbar\n",
"foo\rbar\r",
"foo\r\nbar\r\n",
"foo\nbar\rboth\r\nreversed\n\ragain\nproblems\r",
"123\n\000\001\002\003\004abc\255\254\253\r\n"
};
static int g_len[NUM_TEST_OBJECTS] = { -1, -1, -1, -1, -1, 17 };
static git_text_stats g_stats[NUM_TEST_OBJECTS] = {
{ 0, 0, 0, 0, 0, 0 },
{ 0, 0, 2, 0, 6, 0 },
{ 0, 2, 0, 0, 6, 0 },
{ 0, 2, 2, 2, 6, 0 },
{ 0, 4, 4, 1, 31, 0 },
{ 1, 1, 2, 1, 9, 5 }
};
static git_buf g_crlf_filtered[NUM_TEST_OBJECTS] = {
{ "", 0, 0 },
{ "foo\nbar\n", 0, 8 },
{ "foo\rbar\r", 0, 8 },
{ "foo\nbar\n", 0, 8 },
{ "foo\nbar\rboth\nreversed\n\ragain\nproblems\r", 0, 38 },
{ "123\n\000\001\002\003\004abc\255\254\253\n", 0, 16 }
};
void test_object_blob_filter__initialize(void)
{
int i;
cl_fixture_sandbox("empty_standard_repo");
cl_git_pass(p_rename(
"empty_standard_repo/.gitted", "empty_standard_repo/.git"));
cl_git_pass(git_repository_open(&g_repo, "empty_standard_repo"));
for (i = 0; i < NUM_TEST_OBJECTS; i++) {
size_t len = (g_len[i] < 0) ? strlen(g_raw[i]) : (size_t)g_len[i];
g_len[i] = (int)len;
cl_git_pass(
git_blob_create_frombuffer(&g_oids[i], g_repo, g_raw[i], len)
);
}
}
void test_object_blob_filter__cleanup(void)
{
git_repository_free(g_repo);
g_repo = NULL;
cl_fixture_cleanup("empty_standard_repo");
}
void test_object_blob_filter__unfiltered(void)
{
int i;
git_blob *blob;
for (i = 0; i < NUM_TEST_OBJECTS; i++) {
cl_git_pass(git_blob_lookup(&blob, g_repo, &g_oids[i]));
cl_assert((size_t)g_len[i] == git_blob_rawsize(blob));
cl_assert(memcmp(git_blob_rawcontent(blob), g_raw[i], g_len[i]) == 0);
git_blob_free(blob);
}
}
void test_object_blob_filter__stats(void)
{
int i;
git_blob *blob;
git_buf buf = GIT_BUF_INIT;
git_text_stats stats;
for (i = 0; i < NUM_TEST_OBJECTS; i++) {
cl_git_pass(git_blob_lookup(&blob, g_repo, &g_oids[i]));
cl_git_pass(git_blob__getbuf(&buf, blob));
git_text_gather_stats(&stats, &buf);
cl_assert(memcmp(&g_stats[i], &stats, sizeof(stats)) == 0);
git_blob_free(blob);
}
git_buf_free(&buf);
}
void test_object_blob_filter__to_odb(void)
{
git_vector filters = GIT_VECTOR_INIT;
git_config *cfg;
int i;
git_blob *blob;
git_buf orig = GIT_BUF_INIT, out = GIT_BUF_INIT;
cl_git_pass(git_repository_config(&cfg, g_repo));
cl_assert(cfg);
git_attr_cache_flush(g_repo);
cl_git_append2file("empty_standard_repo/.gitattributes", "*.txt text\n");
cl_assert(git_filters_load(
&filters, g_repo, "filename.txt", GIT_FILTER_TO_ODB) > 0);
cl_assert(filters.length == 1);
for (i = 0; i < NUM_TEST_OBJECTS; i++) {
cl_git_pass(git_blob_lookup(&blob, g_repo, &g_oids[i]));
cl_git_pass(git_blob__getbuf(&orig, blob));
cl_git_pass(git_filters_apply(&out, &orig, &filters));
cl_assert(git_buf_cmp(&out, &g_crlf_filtered[i]) == 0);
git_blob_free(blob);
}
git_filters_free(&filters);
git_buf_free(&orig);
git_buf_free(&out);
git_config_free(cfg);
}