diff --git a/src/blob.c b/src/blob.c index 4065ffa12..57a31041e 100644 --- a/src/blob.c +++ b/src/blob.c @@ -11,6 +11,7 @@ #include "common.h" #include "blob.h" +#include "filter.h" const void *git_blob_rawcontent(git_blob *blob) { @@ -65,15 +66,101 @@ int git_blob_create_frombuffer(git_oid *oid, git_repository *repo, const void *b return GIT_SUCCESS; } +static int write_file_stream(git_oid *oid, git_odb *odb, const char *path, git_off_t file_size) +{ + int fd, error; + char buffer[4096]; + git_odb_stream *stream = NULL; + + if ((error = git_odb_open_wstream(&stream, odb, file_size, GIT_OBJ_BLOB)) < GIT_SUCCESS) + return error; + + if ((fd = p_open(path, O_RDONLY)) < 0) { + error = git__throw(GIT_ENOTFOUND, "Failed to create blob. Could not open '%s'", path); + goto cleanup; + } + + while (file_size > 0) { + ssize_t read_len = p_read(fd, buffer, sizeof(buffer)); + + if (read_len < 0) { + error = git__throw(GIT_EOSERR, "Failed to create blob. Can't read full file"); + p_close(fd); + goto cleanup; + } + + stream->write(stream, buffer, read_len); + file_size -= read_len; + } + + p_close(fd); + error = stream->finalize_write(oid, stream); + +cleanup: + stream->free(stream); + return error; +} + +static int write_file_filtered( + git_oid *oid, + git_odb *odb, + const char *path, + git_vector *filters) +{ + int error; + git_buf file_in = GIT_BUF_INIT; + git_buf filter_result = GIT_BUF_INIT; + + error = git_futils_readbuffer(&file_in, path); + if (error < GIT_SUCCESS) + return error; + + error = git_filter__apply(&filter_result, &file_in, filters, path); + + if (error < GIT_SUCCESS) { + git_buf_free(&file_in); + git_buf_free(&filter_result); + return error; + } + + error = git_odb_write(oid, odb, filter_result.ptr, filter_result.size, GIT_OBJ_BLOB); + + git_buf_free(&file_in); + git_buf_free(&filter_result); + + return GIT_SUCCESS; +} + +static int write_symlink(git_oid *oid, git_odb *odb, const char *path, size_t link_size) +{ + char *link_data; + ssize_t read_len; + int error; + + link_data = git__malloc(link_size); + if (!link_data) + return GIT_ENOMEM; + + read_len = p_readlink(path, link_data, link_size); + + if (read_len != (ssize_t)link_size) { + free(link_data); + return git__throw(GIT_EOSERR, "Failed to create blob. Can't read symlink"); + } + + error = git_odb_write(oid, odb, (void *)link_data, link_size, GIT_OBJ_BLOB); + free(link_data); + return error; +} + int git_blob_create_fromfile(git_oid *oid, git_repository *repo, const char *path) { int error = GIT_SUCCESS; git_buf full_path = GIT_BUF_INIT; git_off_t size; - git_odb_stream *stream = NULL; struct stat st; const char *workdir; - git_odb *odb; + git_odb *odb = NULL; workdir = git_repository_workdir(repo); if (workdir == NULL) @@ -95,63 +182,36 @@ int git_blob_create_fromfile(git_oid *oid, git_repository *repo, const char *pat if (error < GIT_SUCCESS) goto cleanup; - if ((error = git_odb_open_wstream(&stream, odb, (size_t)size, GIT_OBJ_BLOB)) < GIT_SUCCESS) - goto cleanup; - if (S_ISLNK(st.st_mode)) { - char *link_data; - ssize_t read_len; - - link_data = git__malloc((size_t)size); - if (!link_data) { - error = GIT_ENOMEM; - goto cleanup; - } - - read_len = p_readlink(full_path.ptr, link_data, (size_t)size); - - if (read_len != (ssize_t)size) { - error = git__throw(GIT_EOSERR, "Failed to create blob. Can't read symlink"); - free(link_data); - goto cleanup; - } - - stream->write(stream, link_data, (size_t)size); - free(link_data); - + error = write_symlink(oid, odb, full_path.ptr, (size_t)size); } else { - int fd; - char buffer[2048]; + git_vector write_filters = GIT_VECTOR_INIT; - if ((fd = p_open(full_path.ptr, O_RDONLY)) < 0) { - error = git__throw(GIT_ENOTFOUND, "Failed to create blob. Could not open '%s'", full_path.ptr); + if ((error = git_filter__load_for_file( + &write_filters, repo, full_path.ptr, GIT_FILTER_TO_ODB)) < GIT_SUCCESS) goto cleanup; + + if (write_filters.length == 0) { + error = write_file_stream(oid, odb, full_path.ptr, size); + } else { + error = write_file_filtered(oid, odb, full_path.ptr, &write_filters); } - while (size > 0) { - ssize_t read_len = p_read(fd, buffer, sizeof(buffer)); - - if (read_len < 0) { - error = git__throw(GIT_EOSERR, "Failed to create blob. Can't read full file"); - p_close(fd); - goto cleanup; - } - - stream->write(stream, buffer, read_len); - size -= read_len; - } - - p_close(fd); + /* + * TODO: eventually support streaming filtered files, for files which are bigger + * than a given threshold. This is not a priority because applying a filter in + * streaming mode changes the final size of the blob, and without knowing its + * final size, the blob cannot be written in stream mode to the ODB. + * + * The plan is to do streaming writes to a tempfile on disk and then opening + * streaming that file to the ODB, using `write_file_stream`. + * + * CAREFULLY DESIGNED APIS YO + */ } - error = stream->finalize_write(oid, stream); - cleanup: - if (stream) - stream->free(stream); - git_buf_free(&full_path); - return error; } diff --git a/src/buffer.c b/src/buffer.c index b9f62cc30..e86246f94 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -235,7 +235,7 @@ char *git_buf_detach(git_buf *buf) { char *data = buf->ptr; - if (buf->asize <= 0) + if (buf->asize == 0 || buf->ptr == &git_buf__oom) return NULL; git_buf_init(buf, 0); diff --git a/src/filter.c b/src/filter.c new file mode 100644 index 000000000..b97ac6697 --- /dev/null +++ b/src/filter.c @@ -0,0 +1,132 @@ +/* + * Copyright (C) 2009-2012 the libgit2 contributors + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ + +#include "common.h" +#include "fileops.h" +#include "hash.h" +#include "filter.h" + +/* Fresh from Core Git. I wonder what we could use this for... */ +void git_text__stat(git_text_stats *stats, git_buf *text) +{ + size_t i; + + memset(stats, 0, sizeof(*stats)); + + for (i = 0; i < text->size; i++) { + unsigned char c = text->ptr[i]; + + if (c == '\r') { + stats->cr++; + + if (i + 1 < text->size && text->ptr[i + 1] == '\n') + stats->crlf++; + + continue; + } + + if (c == '\n') { + stats->lf++; + continue; + } + + if (c == 127) + /* DEL */ + stats->nonprintable++; + + else if (c < 32) { + switch (c) { + /* BS, HT, ESC and FF */ + case '\b': case '\t': case '\033': case '\014': + stats->printable++; + break; + case 0: + stats->nul++; + /* fall through */ + default: + stats->nonprintable++; + } + } + else + stats->printable++; + } + + /* If file ends with EOF then don't count this EOF as non-printable. */ + if (text->size >= 1 && text->ptr[text->size - 1] == '\032') + stats->nonprintable--; +} + +/* + * Fresh from Core Git + */ +int git_text__is_binary(git_text_stats *stats) +{ + if (stats->nul) + return 1; + + if ((stats->printable >> 7) < stats->nonprintable) + return 1; + /* + * Other heuristics? Average line length might be relevant, + * as might LF vs CR vs CRLF counts.. + * + * NOTE! It might be normal to have a low ratio of CRLF to LF + * (somebody starts with a LF-only file and edits it with an editor + * that adds CRLF only to lines that are added..). But do we + * want to support CR-only? Probably not. + */ + return 0; +} + +int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *full_path, int mode) +{ + /* We don't load any filters yet. HAHA */ + return 0; +} + +int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters, const char *filename) +{ + unsigned int src, dst, i; + git_buf *dbuffer[2]; + + dbuffer[0] = source; + dbuffer[1] = dest; + + src = 0; + + /* Pre-grow the destination buffer to more or less the size + * we expect it to have */ + if (git_buf_grow(dest, source->size) < 0) + return GIT_ENOMEM; + + for (i = 0; i < filters->length; ++i) { + git_filter_cb filter = git_vector_get(filters, i); + dst = (src + 1) % 2; + + git_buf_clear(dbuffer[dst]); + + /* Apply the filter, from dbuffer[src] to dbuffer[dst]; + * if the filtering is canceled by the user mid-filter, + * we skip to the next filter without changing the source + * of the double buffering (so that the text goes through + * cleanly). + */ + if (filter(dbuffer[dst], dbuffer[src], filename) == 0) { + src = (src + 1) % 2; + } + + if (git_buf_oom(dbuffer[dst])) + return GIT_ENOMEM; + } + + /* Ensure that the output ends up in dbuffer[1] (i.e. the dest) */ + if (dst != 1) { + git_buf_swap(dest, source); + } + + return GIT_SUCCESS; +} diff --git a/src/filter.h b/src/filter.h new file mode 100644 index 000000000..9a8f84972 --- /dev/null +++ b/src/filter.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2009-2012 the libgit2 contributors + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_filter_h__ +#define INCLUDE_filter_h__ + +#include "common.h" +#include "buffer.h" +#include "git2/odb.h" +#include "git2/repository.h" + +typedef int (*git_filter_cb)(git_buf *dest, const git_buf *source, const char *filename); + +typedef enum { + GIT_FILTER_TO_WORKTREE, + GIT_FILTER_TO_ODB +} git_filter_mode; + +typedef struct { + /* NUL, CR, LF and CRLF counts */ + unsigned int nul, cr, lf, crlf; + + /* These are just approximations! */ + unsigned int printable, nonprintable; +} git_text_stats; + +extern int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *full_path, int mode); +extern int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters, const char *filename); + +/* Gather stats for a piece of text */ +extern void git_text__stat(git_text_stats *stats, git_buf *text); + +/* Heuristics on a set of text stats to check whether it's binary + * text or not */ +extern int git_text__is_binary(git_text_stats *stats); + +#endif