diff --git a/include/git2.h b/include/git2.h index 5a55bb284..1711ff8be 100644 --- a/include/git2.h +++ b/include/git2.h @@ -30,6 +30,7 @@ #include "git2/commit.h" #include "git2/tag.h" #include "git2/tree.h" +#include "git2/diff.h" #include "git2/index.h" #include "git2/config.h" diff --git a/include/git2/diff.h b/include/git2/diff.h index 1d3a8d408..0f4b0783b 100644 --- a/include/git2/diff.h +++ b/include/git2/diff.h @@ -21,80 +21,125 @@ */ GIT_BEGIN_DECL -typedef int (*git_diff_file_fn)( - void *cb_data, - const git_oid *old, - const char *old_path, - int old_mode, - const git_oid *new, /* hashed object if from work tree */ - const char *new_path, - int new_mode); - -typedef int (*git_diff_hunk_fn)( - void *cb_data, - int old_start, - int old_lines, - int new_start, - int new_lines); - -#define GIT_DIFF_LINE_CONTEXT 0 -#define GIT_DIFF_LINE_ADDITION 1 -#define GIT_DIFF_LINE_DELETION 2 - -typedef int (*git_diff_line_fn)( - void *cb_data, - int origin, /* GIT_DIFF_LINE value from above */ - const char *content, - size_t content_len); - typedef struct { int context_lines; int interhunk_lines; int ignore_whitespace; + int force_text; + git_strarray pathspec; +} git_diff_options; - git_diff_file_fn file_cb; - git_diff_hunk_fn hunk_cb; - git_diff_line_fn line_cb; - void *cb_data; -} git_diff_opts; +typedef struct { + git_status_t status; /* value from tree.h */ + unsigned int old_attr; + unsigned int new_attr; + git_oid old_oid; + git_oid new_oid; + git_blob *old_blob; + git_blob *new_blob; + const char *path; + const char *new_path; /* NULL unless status is RENAMED or COPIED */ + int similarity; /* value from 0 to 100 */ + int binary; /* diff as binary? */ +} git_diff_delta; +typedef int (*git_diff_file_fn)( + void *cb_data, + git_diff_delta *delta, + float progress); + +typedef struct { + int old_start; + int old_lines; + int new_start; + int new_lines; +} git_diff_range; + +typedef int (*git_diff_hunk_fn)( + void *cb_data, + git_diff_delta *delta, + git_diff_range *range, + const char *header, + size_t header_len); + +#define GIT_DIFF_LINE_CONTEXT ' ' +#define GIT_DIFF_LINE_ADDITION '+' +#define GIT_DIFF_LINE_DELETION '-' +#define GIT_DIFF_LINE_ADD_EOFNL '\n' +#define GIT_DIFF_LINE_DEL_EOFNL '\0' + +typedef int (*git_diff_line_fn)( + void *cb_data, + git_diff_delta *delta, + char line_origin, /* GIT_DIFF_LINE value from above */ + const char *content, + size_t content_len); + +typedef struct git_diff_list git_diff_list; + +/* + * Generate diff lists + */ + +GIT_EXTERN(int) git_diff_tree_to_tree( + git_repository *repo, + const git_diff_options *opts, + git_tree *old, + git_tree *new, + git_diff_list **diff); + +GIT_EXTERN(int) git_diff_index_to_tree( + git_repository *repo, + const git_diff_options *opts, + git_tree *old, + git_diff_list **diff); + +GIT_EXTERN(int) git_diff_workdir_to_tree( + git_repository *repo, + const git_diff_options *opts, + git_tree *old, + git_diff_list **diff); + +GIT_EXTERN(int) git_diff_workdir_to_index( + git_repository *repo, + const git_diff_options *opts, + git_diff_list **diff); + +GIT_EXTERN(void) git_diff_list_free(git_diff_list *diff); + +/* + * Process diff lists + */ + +GIT_EXTERN(int) git_diff_foreach( + git_diff_list *diff, + void *cb_data, + git_diff_file_fn file_cb, + git_diff_hunk_fn hunk_cb, + git_diff_line_fn line_cb); + +#ifndef _STDIO_H_ +#include +#endif + +GIT_EXTERN(int) git_diff_print_compact( + FILE *fp, git_diff_list *diff); + +GIT_EXTERN(int) git_diff_print_patch( + FILE *fp, git_diff_list *diff); + +/* + * Misc + */ GIT_EXTERN(int) git_diff_blobs( git_repository *repo, git_blob *old, git_blob *new, - git_diff_opts *options); - -GIT_EXTERN(int) git_diff_trees( - git_repository *repo, - git_tree *old, - git_tree *new, - git_diff_opts *options); - -GIT_EXTERN(int) git_diff_index( - git_repository *repo, - git_tree *old, - git_diff_opts *options); - -/* pass NULL for the git_tree to diff workdir against index */ -GIT_EXTERN(int) git_diff_workdir( - git_repository *repo, - git_tree *old, - git_diff_opts *options); - -GIT_EXTERN(int) git_diff_workdir_file( - git_repository *repo, - git_blob *old, - const char *path, - git_diff_opts *options); - -/* pass git_objects to diff against or NULL for index. - * can handle: blob->blob, tree->index, tree->tree - * it will be an error if object types don't match - */ -/* pass git_object to diff WT against or NULL for index - * can handle: index->wt, tree->wt, blob->wt with path - */ + git_diff_options *options, + void *cb_data, + git_diff_hunk_fn hunk_cb, + git_diff_line_fn line_cb); GIT_END_DECL diff --git a/include/git2/tree.h b/include/git2/tree.h index 95e0fdf94..c338da092 100644 --- a/include/git2/tree.h +++ b/include/git2/tree.h @@ -316,9 +316,14 @@ GIT_EXTERN(int) git_tree_walk(git_tree *tree, git_treewalk_cb callback, int mode /** @} */ typedef enum { + GIT_STATUS_UNMODIFIED = 0, GIT_STATUS_ADDED = 1, GIT_STATUS_DELETED = 2, GIT_STATUS_MODIFIED = 3, + GIT_STATUS_RENAMED = 4, + GIT_STATUS_COPIED = 5, + GIT_STATUS_IGNORED = 6, + GIT_STATUS_UNTRACKED = 7 } git_status_t; typedef struct { diff --git a/src/diff.c b/src/diff.c index 7128b7c76..6cafeb206 100644 --- a/src/diff.c +++ b/src/diff.c @@ -7,10 +7,126 @@ #include "common.h" #include "git2/diff.h" +#include "diff.h" #include "xdiff/xdiff.h" #include "blob.h" #include +static git_diff_delta *new_file_delta( + git_diff_list *diff, + const git_tree_diff_data *tdiff) +{ + git_buf path = GIT_BUF_INIT; + git_diff_delta *delta = git__calloc(1, sizeof(git_diff_delta)); + + if (!delta) { + git__rethrow(GIT_ENOMEM, "Could not allocate diff record"); + return NULL; + } + + /* copy shared fields */ + delta->status = tdiff->status; + delta->old_attr = tdiff->old_attr; + delta->new_attr = tdiff->new_attr; + delta->old_oid = tdiff->old_oid; + delta->new_oid = tdiff->new_oid; + + if (git_buf_joinpath(&path, diff->pfx.ptr, tdiff->path) < GIT_SUCCESS || + (delta->path = git_buf_detach(&path)) == NULL) + { + git__free(delta); + git__rethrow(GIT_ENOMEM, "Could not allocate diff record path"); + return NULL; + } + + return delta; +} + +static int tree_diff_cb(const git_tree_diff_data *ptr, void *data) +{ + int error; + git_diff_list *diff = data; + + assert(S_ISDIR(ptr->old_attr) == S_ISDIR(ptr->new_attr)); + + if (S_ISDIR(ptr->old_attr)) { + git_tree *old = NULL, *new = NULL; + ssize_t pfx_len = diff->pfx.size; + + if (!(error = git_tree_lookup(&old, diff->repo, &ptr->old_oid)) && + !(error = git_tree_lookup(&new, diff->repo, &ptr->new_oid)) && + !(error = git_buf_joinpath(&diff->pfx, diff->pfx.ptr, ptr->path))) + { + error = git_tree_diff(old, new, tree_diff_cb, diff); + git_buf_truncate(&diff->pfx, pfx_len); + } + + git_tree_free(old); + git_tree_free(new); + } else { + git_diff_delta *delta = new_file_delta(diff, ptr); + if (delta == NULL) + error = GIT_ENOMEM; + else if ((error = git_vector_insert(&diff->files, delta)) < GIT_SUCCESS) + git__free(delta); + } + + return error; +} + +static git_diff_list *git_diff_list_alloc( + git_repository *repo, const git_diff_options *opts) +{ + git_diff_list *diff = git__calloc(1, sizeof(git_diff_list)); + if (diff != NULL) { + if (opts != NULL) { + memcpy(&diff->opts, opts, sizeof(git_diff_options)); + /* do something safer with the pathspec strarray */ + } + diff->repo = repo; + git_buf_init(&diff->pfx, 0); + } + return diff; +} + +void git_diff_list_free(git_diff_list *diff) +{ + if (!diff) + return; + git_buf_free(&diff->pfx); + git__free(diff); +} + +int git_diff_tree_to_tree( + git_repository *repo, + const git_diff_options *opts, + git_tree *old, + git_tree *new, + git_diff_list **diff_ptr) +{ + int error; + git_diff_list *diff = git_diff_list_alloc(repo, opts); + if (!diff) + return GIT_ENOMEM; + + if ((error = git_tree_diff(old, new, tree_diff_cb, diff)) == GIT_SUCCESS) { + git_buf_free(&diff->pfx); /* don't need this anymore */ + *diff_ptr = diff; + } else + git_diff_list_free(diff); + + return error; +} + +typedef struct { + git_diff_list *diff; + void *cb_data; + git_diff_hunk_fn hunk_cb; + git_diff_line_fn line_cb; + unsigned int index; + git_diff_delta *delta; +} diff_info; + static int read_next_int(const char **str, int *value) { const char *scan = *str; @@ -28,56 +144,373 @@ static int read_next_int(const char **str, int *value) static int diff_output_cb(void *priv, mmbuffer_t *bufs, int len) { int err = GIT_SUCCESS; - git_diff_opts *opts = priv; + diff_info *di = priv; + + if (len == 1 && di->hunk_cb) { + git_diff_range range = { -1, 0, -1, 0 }; - if (len == 1) { - int ostart = -1, olen = 0, nstart = -1, nlen = 0; /* expect something of the form "@@ -%d[,%d] +%d[,%d] @@" */ - if (opts->hunk_cb && bufs[0].ptr[0] == '@') { + if (bufs[0].ptr[0] == '@') { const char *scan = bufs[0].ptr; - if (!(err = read_next_int(&scan, &ostart)) && *scan == ',') - err = read_next_int(&scan, &olen); - if (!err && !(err = read_next_int(&scan, &nstart)) && *scan == ',') - err = read_next_int(&scan, &nlen); - if (!err && ostart >= 0 && nstart >= 0) - err = opts->hunk_cb( - opts->cb_data, ostart, olen, nstart, nlen); + if (!(err = read_next_int(&scan, &range.old_start)) && *scan == ',') + err = read_next_int(&scan, &range.old_lines); + if (!err && + !(err = read_next_int(&scan, &range.new_start)) && *scan == ',') + err = read_next_int(&scan, &range.new_lines); + if (!err && range.old_start >= 0 && range.new_start >= 0) + err = di->hunk_cb( + di->cb_data, di->delta, &range, bufs[0].ptr, bufs[0].size); } } - else if (len == 2 || len == 3) { + else if ((len == 2 || len == 3) && di->line_cb) { int origin; + /* expect " "/"-"/"+", then data, then maybe newline */ origin = (*bufs[0].ptr == '+') ? GIT_DIFF_LINE_ADDITION : (*bufs[0].ptr == '-') ? GIT_DIFF_LINE_DELETION : GIT_DIFF_LINE_CONTEXT; - if (opts->line_cb) - err = opts->line_cb( - opts->cb_data, origin, bufs[1].ptr, bufs[1].size); + err = di->line_cb( + di->cb_data, di->delta, origin, bufs[1].ptr, bufs[1].size); + + /* deal with adding and removing newline at EOF */ + if (err == GIT_SUCCESS && len == 3) { + if (origin == GIT_DIFF_LINE_ADDITION) + origin = GIT_DIFF_LINE_ADD_EOFNL; + else + origin = GIT_DIFF_LINE_DEL_EOFNL; + + err = di->line_cb( + di->cb_data, di->delta, origin, bufs[2].ptr, bufs[2].size); + } } return err; } +static int set_file_is_binary( + git_repository *repo, + git_diff_delta *file, + mmfile_t *old, + mmfile_t *new) +{ + int error; + const char *value; + + /* check diff attribute +, -, or 0 */ + error = git_attr_get(repo, file->path, "diff", &value); + if (error != GIT_SUCCESS) + return error; + + if (value == GIT_ATTR_TRUE) { + file->binary = 0; + return GIT_SUCCESS; + } + if (value == GIT_ATTR_FALSE) { + file->binary = 1; + return GIT_SUCCESS; + } + + /* TODO: if value != NULL, implement diff drivers */ + /* TODO: check if NUL byte appears in first bit */ + GIT_UNUSED_ARG(old); + GIT_UNUSED_ARG(new); + file->binary = 0; + return GIT_SUCCESS; +} + +int git_diff_foreach( + git_diff_list *diff, + void *data, + git_diff_file_fn file_cb, + git_diff_hunk_fn hunk_cb, + git_diff_line_fn line_cb) +{ + int error = GIT_SUCCESS; + diff_info di; + git_diff_delta *delta; + + di.diff = diff; + di.cb_data = data; + di.hunk_cb = hunk_cb; + di.line_cb = line_cb; + + git_vector_foreach(&diff->files, di.index, delta) { + mmfile_t old, new; + xpparam_t params; + xdemitconf_t cfg; + xdemitcb_t callback; + + /* map files */ + if (hunk_cb || line_cb) { + /* TODO: Partial blob reading to defer loading whole blob. + * I.e. I want a blob with just the first 4kb loaded, then + * later on I will read the rest of the blob if needed. + */ + + if (delta->status == GIT_STATUS_DELETED || + delta->status == GIT_STATUS_MODIFIED) + { + error = git_blob_lookup( + &delta->old_blob, diff->repo, &delta->old_oid); + old.ptr = (char *)git_blob_rawcontent(delta->old_blob); + old.size = git_blob_rawsize(delta->old_blob); + } else { + delta->old_blob = NULL; + old.ptr = ""; + old.size = 0; + } + + if (delta->status == GIT_STATUS_ADDED || + delta->status == GIT_STATUS_MODIFIED) + { + error = git_blob_lookup( + &delta->new_blob, diff->repo, &delta->new_oid); + new.ptr = (char *)git_blob_rawcontent(delta->new_blob); + new.size = git_blob_rawsize(delta->new_blob); + } else { + delta->new_blob = NULL; + new.ptr = ""; + new.size = 0; + } + } + + if (diff->opts.force_text) + delta->binary = 0; + else if ((error = set_file_is_binary( + diff->repo, delta, &old, &new)) < GIT_SUCCESS) + break; + + if (file_cb != NULL) { + error = file_cb(data, delta, (float)di.index / diff->files.length); + if (error != GIT_SUCCESS) + break; + } + + /* don't do hunk and line diffs if file is binary */ + if (delta->binary) + continue; + + /* nothing to do if we did not get a blob */ + if (!delta->old_blob && !delta->new_blob) + continue; + + assert(hunk_cb || line_cb); + + di.delta = delta; + + memset(¶ms, 0, sizeof(params)); + + memset(&cfg, 0, sizeof(cfg)); + cfg.ctxlen = diff->opts.context_lines || 3; + cfg.interhunkctxlen = diff->opts.interhunk_lines || 3; + if (diff->opts.ignore_whitespace) + cfg.flags |= XDF_WHITESPACE_FLAGS; + + memset(&callback, 0, sizeof(callback)); + callback.outf = diff_output_cb; + callback.priv = &di; + + xdl_diff(&old, &new, ¶ms, &cfg, &callback); + + git_blob_free(delta->old_blob); + delta->old_blob = NULL; + + git_blob_free(delta->new_blob); + delta->new_blob = NULL; + } + + return error; +} + + +static char pick_suffix(int mode) +{ + if (S_ISDIR(mode)) + return '/'; + else if (mode & S_IXUSR) + return '*'; + else + return ' '; +} + +static int print_compact(void *data, git_diff_delta *delta, float progress) +{ + FILE *fp = data; + char code, old_suffix, new_suffix; + + GIT_UNUSED_ARG(progress); + + switch (delta->status) { + case GIT_STATUS_ADDED: code = 'A'; break; + case GIT_STATUS_DELETED: code = 'D'; break; + case GIT_STATUS_MODIFIED: code = 'M'; break; + case GIT_STATUS_RENAMED: code = 'R'; break; + case GIT_STATUS_COPIED: code = 'C'; break; + case GIT_STATUS_IGNORED: code = 'I'; break; + case GIT_STATUS_UNTRACKED: code = '?'; break; + default: code = 0; + } + + if (!code) + return GIT_SUCCESS; + + old_suffix = pick_suffix(delta->old_attr); + new_suffix = pick_suffix(delta->new_attr); + + if (delta->new_path != NULL) + fprintf(fp, "%c\t%s%c -> %s%c\n", code, + delta->path, old_suffix, delta->new_path, new_suffix); + else if (delta->old_attr != delta->new_attr) + fprintf(fp, "%c\t%s%c (%o -> %o)\n", code, + delta->path, new_suffix, delta->old_attr, delta->new_attr); + else + fprintf(fp, "%c\t%s%c\n", code, delta->path, old_suffix); + + return GIT_SUCCESS; +} + +int git_diff_print_compact(FILE *fp, git_diff_list *diff) +{ + return git_diff_foreach(diff, fp, print_compact, NULL, NULL); +} + +static int print_oid_range(FILE *fp, git_diff_delta *delta) +{ + char start_oid[9], end_oid[9]; + /* TODO: Determine a good actual OID range to print */ + /* TODO: Print a real extra line here to match git diff */ + git_oid_to_string(start_oid, sizeof(start_oid), &delta->old_oid); + git_oid_to_string(end_oid, sizeof(end_oid), &delta->new_oid); + if (delta->old_attr == delta->new_attr) + fprintf(fp, "index %s..%s %o\n", + start_oid, end_oid, delta->old_attr); + else + fprintf(fp, "index %s..%s %o %o\n", + start_oid, end_oid, delta->old_attr, delta->new_attr); + return GIT_SUCCESS; +} + +static int print_patch_file(void *data, git_diff_delta *delta, float progress) +{ + FILE *fp = data; + const char *newpath = delta->new_path ? delta->new_path : delta->path; + + GIT_UNUSED_ARG(progress); + + if (delta->old_blob && delta->new_blob) { + fprintf(fp, "diff --git a/%s b/%s\n", delta->path, newpath); + print_oid_range(fp, delta); + fprintf(fp, "--- a/%s\n", delta->path); + fprintf(fp, "+++ b/%s\n", newpath); + } else if (delta->old_blob) { + fprintf(fp, "diff --git a/%s /dev/null\n", delta->path); + print_oid_range(fp, delta); + fprintf(fp, "--- a/%s\n", delta->path); + fputs("+++ /dev/null\n", fp); + } else if (delta->new_blob) { + fprintf(fp, "diff --git /dev/null b/%s\n", newpath); + print_oid_range(fp, delta); + fputs("--- /dev/null\n", fp); + fprintf(fp, "+++ b/%s\n", newpath); + } + + return GIT_SUCCESS; +} + +static int print_patch_hunk( + void *data, + git_diff_delta *d, + git_diff_range *r, + const char *header, + size_t header_len) +{ + FILE *fp = data; + GIT_UNUSED_ARG(d); + GIT_UNUSED_ARG(r); + fprintf(fp, "%.*s", (int)header_len, header); + return GIT_SUCCESS; +} + +static int print_patch_line( + void *data, + git_diff_delta *delta, + char line_origin, /* GIT_DIFF_LINE value from above */ + const char *content, + size_t content_len) +{ + FILE *fp = data; + GIT_UNUSED_ARG(delta); + if (line_origin == GIT_DIFF_LINE_ADDITION) + fprintf(fp, "+%.*s", (int)content_len, content); + else if (line_origin == GIT_DIFF_LINE_DELETION) + fprintf(fp, "-%.*s", (int)content_len, content); + else if (content_len > 0) + fprintf(fp, "%.*s", (int)content_len, content); + return GIT_SUCCESS; +} + +int git_diff_print_patch(FILE *fp, git_diff_list *diff) +{ + return git_diff_foreach( + diff, fp, print_patch_file, print_patch_hunk, print_patch_line); +} + int git_diff_blobs( git_repository *repo, git_blob *old_blob, git_blob *new_blob, - git_diff_opts *options) + git_diff_options *options, + void *cb_data, + git_diff_hunk_fn hunk_cb, + git_diff_line_fn line_cb) { + diff_info di; + git_diff_delta delta; mmfile_t old, new; xpparam_t params; xdemitconf_t cfg; xdemitcb_t callback; - assert(repo && old_blob && new_blob && options); + assert(repo && options); - old.ptr = (char *)git_blob_rawcontent(old_blob); - old.size = git_blob_rawsize(old_blob); + if (old_blob) { + old.ptr = (char *)git_blob_rawcontent(old_blob); + old.size = git_blob_rawsize(old_blob); + } else { + old.ptr = ""; + old.size = 0; + } - new.ptr = (char *)git_blob_rawcontent(new_blob); - new.size = git_blob_rawsize(new_blob); + if (new_blob) { + new.ptr = (char *)git_blob_rawcontent(new_blob); + new.size = git_blob_rawsize(new_blob); + } else { + new.ptr = ""; + new.size = 0; + } + + /* populate a "fake" delta record */ + delta.status = old.ptr ? + (new.ptr ? GIT_STATUS_MODIFIED : GIT_STATUS_DELETED) : + (new.ptr ? GIT_STATUS_ADDED : GIT_STATUS_UNTRACKED); + delta.old_attr = 0100644; /* can't know the truth from a blob alone */ + delta.new_attr = 0100644; + git_oid_cpy(&delta.old_oid, git_object_id((const git_object *)old_blob)); + git_oid_cpy(&delta.new_oid, git_object_id((const git_object *)new_blob)); + delta.old_blob = old_blob; + delta.new_blob = new_blob; + delta.path = NULL; + delta.new_path = NULL; + delta.similarity = 0; + delta.binary = 0; + + di.diff = NULL; + di.delta = δ + di.cb_data = cb_data; + di.hunk_cb = hunk_cb; + di.line_cb = line_cb; memset(¶ms, 0, sizeof(params)); @@ -89,16 +522,9 @@ int git_diff_blobs( memset(&callback, 0, sizeof(callback)); callback.outf = diff_output_cb; - callback.priv = options; - - if (options->file_cb) - options->file_cb( - options->cb_data, - git_object_id((const git_object *)old_blob), NULL, 010644, - git_object_id((const git_object *)new_blob), NULL, 010644); + callback.priv = &di; xdl_diff(&old, &new, ¶ms, &cfg, &callback); return GIT_SUCCESS; } - diff --git a/src/diff.h b/src/diff.h new file mode 100644 index 000000000..1bb5c36f0 --- /dev/null +++ b/src/diff.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2009-2012 the libgit2 contributors + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_diff_h__ +#define INCLUDE_diff_h__ + +#include +#include "vector.h" +#include "buffer.h" + +struct git_diff_list { + git_repository *repo; + git_diff_options opts; + git_buf pfx; + git_vector files; /* vector of git_diff_file_delta */ +}; + +#endif +