From aad6967be1025b1819c3ba25163d7f69fc814130 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Wed, 29 May 2013 21:52:21 -0700 Subject: [PATCH 01/12] Basic function context header This implements a basic callback to extract function context for a diff. It always uses the same search heuristic right now with no regular expressions or language-specific variants. Those will come next, I think. --- src/diff_output.c | 83 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 67 insertions(+), 16 deletions(-) diff --git a/src/diff_output.c b/src/diff_output.c index 8dd110cbf..bcd39f093 100644 --- a/src/diff_output.c +++ b/src/diff_output.c @@ -196,26 +196,77 @@ static int diff_delta_is_binary_by_size( return 0; } -static void setup_xdiff_options( - const git_diff_options *opts, xdemitconf_t *cfg, xpparam_t *param) +static long diff_context_find( + const char *line, + long line_len, + char *out, + long out_size, + void *payload) { - memset(cfg, 0, sizeof(xdemitconf_t)); - memset(param, 0, sizeof(xpparam_t)); + diff_context *ctxt = payload; + const char *scan; + bool found_paren = false; - cfg->ctxlen = - (!opts) ? 3 : opts->context_lines; - cfg->interhunkctxlen = - (!opts) ? 0 : opts->interhunk_lines; + if (line_len > 0 && line[line_len - 1] == '\n') + line_len--; + if (line_len > 0 && line[line_len - 1] == '\r') + line_len--; + if (!line_len) + return -1; - if (!opts) + if (!isalpha(*line)) + return -1; + + for (scan = &line[line_len - 1]; scan > line && *scan != '('; --scan) + /* search backward for ( */; + if (scan != line) { + found_paren = true; + line_len = scan - line; + + for (--scan; scan > line && !isalpha(*scan); --scan) + --line_len; + } + + if (!line_len) + return -1; + + if (out_size > line_len) { + memcpy(out, line, line_len); + + if (found_paren) + out[line_len++] = '('; + out[line_len] = '\0'; + } else { + memcpy(out, line, out_size); + line_len = out_size; + } + + return line_len; +} + +static void setup_xdiff_options(diff_context *ctxt) +{ + memset(&ctxt->xdiff_config, 0, sizeof(ctxt->xdiff_config)); + memset(&ctxt->xdiff_params, 0, sizeof(ctxt->xdiff_params)); + + ctxt->xdiff_config.ctxlen = + (!ctxt->opts) ? 3 : ctxt->opts->context_lines; + ctxt->xdiff_config.interhunkctxlen = + (!ctxt->opts) ? 0 : ctxt->opts->interhunk_lines; + + ctxt->xdiff_config.flags = XDL_EMIT_FUNCNAMES; + ctxt->xdiff_config.find_func = diff_context_find; + ctxt->xdiff_config.find_func_priv = ctxt; + + if (!ctxt->opts) return; - if (opts->flags & GIT_DIFF_IGNORE_WHITESPACE) - param->flags |= XDF_WHITESPACE_FLAGS; - if (opts->flags & GIT_DIFF_IGNORE_WHITESPACE_CHANGE) - param->flags |= XDF_IGNORE_WHITESPACE_CHANGE; - if (opts->flags & GIT_DIFF_IGNORE_WHITESPACE_EOL) - param->flags |= XDF_IGNORE_WHITESPACE_AT_EOL; + if (ctxt->opts->flags & GIT_DIFF_IGNORE_WHITESPACE) + ctxt->xdiff_params.flags |= XDF_WHITESPACE_FLAGS; + if (ctxt->opts->flags & GIT_DIFF_IGNORE_WHITESPACE_CHANGE) + ctxt->xdiff_params.flags |= XDF_IGNORE_WHITESPACE_CHANGE; + if (ctxt->opts->flags & GIT_DIFF_IGNORE_WHITESPACE_EOL) + ctxt->xdiff_params.flags |= XDF_IGNORE_WHITESPACE_AT_EOL; } @@ -499,7 +550,7 @@ static int diff_context_init( ctxt->payload = payload; ctxt->error = 0; - setup_xdiff_options(ctxt->opts, &ctxt->xdiff_config, &ctxt->xdiff_params); + setup_xdiff_options(ctxt); return 0; } From 7000f3fa7bad25ec07355d6afb640ea272201dff Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Tue, 4 Jun 2013 10:32:59 -0700 Subject: [PATCH 02/12] Move some diff helpers into separate file --- src/diff_output.c | 424 +-------------------------------------------- src/diff_print.c | 431 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 433 insertions(+), 422 deletions(-) create mode 100644 src/diff_print.c diff --git a/src/diff_output.c b/src/diff_output.c index bcd39f093..9a5be2a10 100644 --- a/src/diff_output.c +++ b/src/diff_output.c @@ -207,6 +207,8 @@ static long diff_context_find( const char *scan; bool found_paren = false; + GIT_UNUSED(ctxt); + if (line_len > 0 && line[line_len - 1] == '\n') line_len--; if (line_len > 0 && line[line_len - 1] == '\r') @@ -1064,359 +1066,6 @@ int git_diff_foreach( return error; } - -typedef struct { - git_diff_list *diff; - git_diff_data_cb print_cb; - void *payload; - git_buf *buf; - int oid_strlen; -} diff_print_info; - -static int diff_print_info_init( - diff_print_info *pi, - git_buf *out, git_diff_list *diff, git_diff_data_cb cb, void *payload) -{ - assert(diff && diff->repo); - - pi->diff = diff; - pi->print_cb = cb; - pi->payload = payload; - pi->buf = out; - - if (git_repository__cvar(&pi->oid_strlen, diff->repo, GIT_CVAR_ABBREV) < 0) - return -1; - - pi->oid_strlen += 1; /* for NUL byte */ - - if (pi->oid_strlen < 2) - pi->oid_strlen = 2; - else if (pi->oid_strlen > GIT_OID_HEXSZ + 1) - pi->oid_strlen = GIT_OID_HEXSZ + 1; - - return 0; -} - -static char pick_suffix(int mode) -{ - if (S_ISDIR(mode)) - return '/'; - else if (mode & 0100) //-V536 - /* in git, modes are very regular, so we must have 0100755 mode */ - return '*'; - else - return ' '; -} - -char git_diff_status_char(git_delta_t status) -{ - char code; - - switch (status) { - case GIT_DELTA_ADDED: code = 'A'; break; - case GIT_DELTA_DELETED: code = 'D'; break; - case GIT_DELTA_MODIFIED: code = 'M'; break; - case GIT_DELTA_RENAMED: code = 'R'; break; - case GIT_DELTA_COPIED: code = 'C'; break; - case GIT_DELTA_IGNORED: code = 'I'; break; - case GIT_DELTA_UNTRACKED: code = '?'; break; - default: code = ' '; break; - } - - return code; -} - -static int callback_error(void) -{ - giterr_clear(); - return GIT_EUSER; -} - -static int print_compact( - const git_diff_delta *delta, float progress, void *data) -{ - diff_print_info *pi = data; - char old_suffix, new_suffix, code = git_diff_status_char(delta->status); - - GIT_UNUSED(progress); - - if (code == ' ') - return 0; - - old_suffix = pick_suffix(delta->old_file.mode); - new_suffix = pick_suffix(delta->new_file.mode); - - git_buf_clear(pi->buf); - - if (delta->old_file.path != delta->new_file.path && - pi->diff->strcomp(delta->old_file.path,delta->new_file.path) != 0) - git_buf_printf(pi->buf, "%c\t%s%c -> %s%c\n", code, - delta->old_file.path, old_suffix, delta->new_file.path, new_suffix); - else if (delta->old_file.mode != delta->new_file.mode && - delta->old_file.mode != 0 && delta->new_file.mode != 0) - git_buf_printf(pi->buf, "%c\t%s%c (%o -> %o)\n", code, - delta->old_file.path, new_suffix, delta->old_file.mode, delta->new_file.mode); - else if (old_suffix != ' ') - git_buf_printf(pi->buf, "%c\t%s%c\n", code, delta->old_file.path, old_suffix); - else - git_buf_printf(pi->buf, "%c\t%s\n", code, delta->old_file.path); - - if (git_buf_oom(pi->buf)) - return -1; - - if (pi->print_cb(delta, NULL, GIT_DIFF_LINE_FILE_HDR, - git_buf_cstr(pi->buf), git_buf_len(pi->buf), pi->payload)) - return callback_error(); - - return 0; -} - -int git_diff_print_compact( - git_diff_list *diff, - git_diff_data_cb print_cb, - void *payload) -{ - int error; - git_buf buf = GIT_BUF_INIT; - diff_print_info pi; - - if (!(error = diff_print_info_init(&pi, &buf, diff, print_cb, payload))) - error = git_diff_foreach(diff, print_compact, NULL, NULL, &pi); - - git_buf_free(&buf); - - return error; -} - -static int print_raw( - const git_diff_delta *delta, float progress, void *data) -{ - diff_print_info *pi = data; - char code = git_diff_status_char(delta->status); - char start_oid[GIT_OID_HEXSZ+1], end_oid[GIT_OID_HEXSZ+1]; - - GIT_UNUSED(progress); - - if (code == ' ') - return 0; - - git_buf_clear(pi->buf); - - git_oid_tostr(start_oid, pi->oid_strlen, &delta->old_file.oid); - git_oid_tostr(end_oid, pi->oid_strlen, &delta->new_file.oid); - - git_buf_printf( - pi->buf, ":%06o %06o %s... %s... %c", - delta->old_file.mode, delta->new_file.mode, start_oid, end_oid, code); - - if (delta->similarity > 0) - git_buf_printf(pi->buf, "%03u", delta->similarity); - - if (delta->status == GIT_DELTA_RENAMED || delta->status == GIT_DELTA_COPIED) - git_buf_printf( - pi->buf, "\t%s %s\n", delta->old_file.path, delta->new_file.path); - else - git_buf_printf( - pi->buf, "\t%s\n", delta->old_file.path ? - delta->old_file.path : delta->new_file.path); - - if (git_buf_oom(pi->buf)) - return -1; - - if (pi->print_cb(delta, NULL, GIT_DIFF_LINE_FILE_HDR, - git_buf_cstr(pi->buf), git_buf_len(pi->buf), pi->payload)) - return callback_error(); - - return 0; -} - -int git_diff_print_raw( - git_diff_list *diff, - git_diff_data_cb print_cb, - void *payload) -{ - int error; - git_buf buf = GIT_BUF_INIT; - diff_print_info pi; - - if (!(error = diff_print_info_init(&pi, &buf, diff, print_cb, payload))) - error = git_diff_foreach(diff, print_raw, NULL, NULL, &pi); - - git_buf_free(&buf); - - return error; -} - -static int print_oid_range(diff_print_info *pi, const git_diff_delta *delta) -{ - char start_oid[GIT_OID_HEXSZ+1], end_oid[GIT_OID_HEXSZ+1]; - - git_oid_tostr(start_oid, pi->oid_strlen, &delta->old_file.oid); - git_oid_tostr(end_oid, pi->oid_strlen, &delta->new_file.oid); - - /* TODO: Match git diff more closely */ - if (delta->old_file.mode == delta->new_file.mode) { - git_buf_printf(pi->buf, "index %s..%s %o\n", - start_oid, end_oid, delta->old_file.mode); - } else { - if (delta->old_file.mode == 0) { - git_buf_printf(pi->buf, "new file mode %o\n", delta->new_file.mode); - } else if (delta->new_file.mode == 0) { - git_buf_printf(pi->buf, "deleted file mode %o\n", delta->old_file.mode); - } else { - git_buf_printf(pi->buf, "old mode %o\n", delta->old_file.mode); - git_buf_printf(pi->buf, "new mode %o\n", delta->new_file.mode); - } - git_buf_printf(pi->buf, "index %s..%s\n", start_oid, end_oid); - } - - if (git_buf_oom(pi->buf)) - return -1; - - return 0; -} - -static int print_patch_file( - const git_diff_delta *delta, float progress, void *data) -{ - diff_print_info *pi = data; - const char *oldpfx = pi->diff->opts.old_prefix; - const char *oldpath = delta->old_file.path; - const char *newpfx = pi->diff->opts.new_prefix; - const char *newpath = delta->new_file.path; - - GIT_UNUSED(progress); - - if (S_ISDIR(delta->new_file.mode) || - delta->status == GIT_DELTA_UNMODIFIED || - delta->status == GIT_DELTA_IGNORED || - (delta->status == GIT_DELTA_UNTRACKED && - (pi->diff->opts.flags & GIT_DIFF_INCLUDE_UNTRACKED_CONTENT) == 0)) - return 0; - - if (!oldpfx) - oldpfx = DIFF_OLD_PREFIX_DEFAULT; - - if (!newpfx) - newpfx = DIFF_NEW_PREFIX_DEFAULT; - - git_buf_clear(pi->buf); - git_buf_printf(pi->buf, "diff --git %s%s %s%s\n", oldpfx, delta->old_file.path, newpfx, delta->new_file.path); - - if (print_oid_range(pi, delta) < 0) - return -1; - - if (git_oid_iszero(&delta->old_file.oid)) { - oldpfx = ""; - oldpath = "/dev/null"; - } - if (git_oid_iszero(&delta->new_file.oid)) { - newpfx = ""; - newpath = "/dev/null"; - } - - if ((delta->flags & GIT_DIFF_FLAG_BINARY) == 0) { - git_buf_printf(pi->buf, "--- %s%s\n", oldpfx, oldpath); - git_buf_printf(pi->buf, "+++ %s%s\n", newpfx, newpath); - } - - if (git_buf_oom(pi->buf)) - return -1; - - if (pi->print_cb(delta, NULL, GIT_DIFF_LINE_FILE_HDR, - git_buf_cstr(pi->buf), git_buf_len(pi->buf), pi->payload)) - return callback_error(); - - if ((delta->flags & GIT_DIFF_FLAG_BINARY) == 0) - return 0; - - git_buf_clear(pi->buf); - git_buf_printf( - pi->buf, "Binary files %s%s and %s%s differ\n", - oldpfx, oldpath, newpfx, newpath); - if (git_buf_oom(pi->buf)) - return -1; - - if (pi->print_cb(delta, NULL, GIT_DIFF_LINE_BINARY, - git_buf_cstr(pi->buf), git_buf_len(pi->buf), pi->payload)) - return callback_error(); - - return 0; -} - -static int print_patch_hunk( - const git_diff_delta *d, - const git_diff_range *r, - const char *header, - size_t header_len, - void *data) -{ - diff_print_info *pi = data; - - if (S_ISDIR(d->new_file.mode)) - return 0; - - git_buf_clear(pi->buf); - if (git_buf_printf(pi->buf, "%.*s", (int)header_len, header) < 0) - return -1; - - if (pi->print_cb(d, r, GIT_DIFF_LINE_HUNK_HDR, - git_buf_cstr(pi->buf), git_buf_len(pi->buf), pi->payload)) - return callback_error(); - - return 0; -} - -static int print_patch_line( - const git_diff_delta *delta, - const git_diff_range *range, - char line_origin, /* GIT_DIFF_LINE value from above */ - const char *content, - size_t content_len, - void *data) -{ - diff_print_info *pi = data; - - if (S_ISDIR(delta->new_file.mode)) - return 0; - - git_buf_clear(pi->buf); - - if (line_origin == GIT_DIFF_LINE_ADDITION || - line_origin == GIT_DIFF_LINE_DELETION || - line_origin == GIT_DIFF_LINE_CONTEXT) - git_buf_printf(pi->buf, "%c%.*s", line_origin, (int)content_len, content); - else if (content_len > 0) - git_buf_printf(pi->buf, "%.*s", (int)content_len, content); - - if (git_buf_oom(pi->buf)) - return -1; - - if (pi->print_cb(delta, range, line_origin, - git_buf_cstr(pi->buf), git_buf_len(pi->buf), pi->payload)) - return callback_error(); - - return 0; -} - -int git_diff_print_patch( - git_diff_list *diff, - git_diff_data_cb print_cb, - void *payload) -{ - int error; - git_buf buf = GIT_BUF_INIT; - diff_print_info pi; - - if (!(error = diff_print_info_init(&pi, &buf, diff, print_cb, payload))) - error = git_diff_foreach( - diff, print_patch_file, print_patch_hunk, print_patch_line, &pi); - - git_buf_free(&buf); - - return error; -} - static void set_data_from_blob( const git_blob *blob, git_map *map, git_diff_file *file) { @@ -1826,75 +1475,6 @@ notfound: return diff_error_outofrange(thing); } -static int print_to_buffer_cb( - const git_diff_delta *delta, - const git_diff_range *range, - char line_origin, - const char *content, - size_t content_len, - void *payload) -{ - git_buf *output = payload; - GIT_UNUSED(delta); GIT_UNUSED(range); GIT_UNUSED(line_origin); - return git_buf_put(output, content, content_len); -} - -int git_diff_patch_print( - git_diff_patch *patch, - git_diff_data_cb print_cb, - void *payload) -{ - int error; - git_buf temp = GIT_BUF_INIT; - diff_print_info pi; - size_t h, l; - - assert(patch && print_cb); - - if (!(error = diff_print_info_init( - &pi, &temp, patch->diff, print_cb, payload))) - error = print_patch_file(patch->delta, 0, &pi); - - for (h = 0; h < patch->hunks_size && !error; ++h) { - diff_patch_hunk *hunk = &patch->hunks[h]; - - error = print_patch_hunk( - patch->delta, &hunk->range, hunk->header, hunk->header_len, &pi); - - for (l = 0; l < hunk->line_count && !error; ++l) { - diff_patch_line *line = &patch->lines[hunk->line_start + l]; - - error = print_patch_line( - patch->delta, &hunk->range, - line->origin, line->ptr, line->len, &pi); - } - } - - git_buf_free(&temp); - - return error; -} - -int git_diff_patch_to_str( - char **string, - git_diff_patch *patch) -{ - int error; - git_buf output = GIT_BUF_INIT; - - error = git_diff_patch_print(patch, print_to_buffer_cb, &output); - - /* GIT_EUSER means git_buf_put in print_to_buffer_cb returned -1, - * meaning a memory allocation failure, so just map to -1... - */ - if (error == GIT_EUSER) - error = -1; - - *string = git_buf_detach(&output); - - return error; -} - int git_diff__paired_foreach( git_diff_list *idx2head, git_diff_list *wd2idx, diff --git a/src/diff_print.c b/src/diff_print.c new file mode 100644 index 000000000..b6fbec829 --- /dev/null +++ b/src/diff_print.c @@ -0,0 +1,431 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#include "common.h" +#include "diff.h" +#include "diff_output.h" + +typedef struct { + git_diff_list *diff; + git_diff_data_cb print_cb; + void *payload; + git_buf *buf; + int oid_strlen; +} diff_print_info; + +static int diff_print_info_init( + diff_print_info *pi, + git_buf *out, git_diff_list *diff, git_diff_data_cb cb, void *payload) +{ + assert(diff && diff->repo); + + pi->diff = diff; + pi->print_cb = cb; + pi->payload = payload; + pi->buf = out; + + if (git_repository__cvar(&pi->oid_strlen, diff->repo, GIT_CVAR_ABBREV) < 0) + return -1; + + pi->oid_strlen += 1; /* for NUL byte */ + + if (pi->oid_strlen < 2) + pi->oid_strlen = 2; + else if (pi->oid_strlen > GIT_OID_HEXSZ + 1) + pi->oid_strlen = GIT_OID_HEXSZ + 1; + + return 0; +} + +static char pick_suffix(int mode) +{ + if (S_ISDIR(mode)) + return '/'; + else if (mode & 0100) //-V536 + /* in git, modes are very regular, so we must have 0100755 mode */ + return '*'; + else + return ' '; +} + +char git_diff_status_char(git_delta_t status) +{ + char code; + + switch (status) { + case GIT_DELTA_ADDED: code = 'A'; break; + case GIT_DELTA_DELETED: code = 'D'; break; + case GIT_DELTA_MODIFIED: code = 'M'; break; + case GIT_DELTA_RENAMED: code = 'R'; break; + case GIT_DELTA_COPIED: code = 'C'; break; + case GIT_DELTA_IGNORED: code = 'I'; break; + case GIT_DELTA_UNTRACKED: code = '?'; break; + default: code = ' '; break; + } + + return code; +} + +static int callback_error(void) +{ + giterr_clear(); + return GIT_EUSER; +} + +static int print_compact( + const git_diff_delta *delta, float progress, void *data) +{ + diff_print_info *pi = data; + char old_suffix, new_suffix, code = git_diff_status_char(delta->status); + + GIT_UNUSED(progress); + + if (code == ' ') + return 0; + + old_suffix = pick_suffix(delta->old_file.mode); + new_suffix = pick_suffix(delta->new_file.mode); + + git_buf_clear(pi->buf); + + if (delta->old_file.path != delta->new_file.path && + pi->diff->strcomp(delta->old_file.path,delta->new_file.path) != 0) + git_buf_printf(pi->buf, "%c\t%s%c -> %s%c\n", code, + delta->old_file.path, old_suffix, delta->new_file.path, new_suffix); + else if (delta->old_file.mode != delta->new_file.mode && + delta->old_file.mode != 0 && delta->new_file.mode != 0) + git_buf_printf(pi->buf, "%c\t%s%c (%o -> %o)\n", code, + delta->old_file.path, new_suffix, delta->old_file.mode, delta->new_file.mode); + else if (old_suffix != ' ') + git_buf_printf(pi->buf, "%c\t%s%c\n", code, delta->old_file.path, old_suffix); + else + git_buf_printf(pi->buf, "%c\t%s\n", code, delta->old_file.path); + + if (git_buf_oom(pi->buf)) + return -1; + + if (pi->print_cb(delta, NULL, GIT_DIFF_LINE_FILE_HDR, + git_buf_cstr(pi->buf), git_buf_len(pi->buf), pi->payload)) + return callback_error(); + + return 0; +} + +int git_diff_print_compact( + git_diff_list *diff, + git_diff_data_cb print_cb, + void *payload) +{ + int error; + git_buf buf = GIT_BUF_INIT; + diff_print_info pi; + + if (!(error = diff_print_info_init(&pi, &buf, diff, print_cb, payload))) + error = git_diff_foreach(diff, print_compact, NULL, NULL, &pi); + + git_buf_free(&buf); + + return error; +} + +static int print_raw( + const git_diff_delta *delta, float progress, void *data) +{ + diff_print_info *pi = data; + char code = git_diff_status_char(delta->status); + char start_oid[GIT_OID_HEXSZ+1], end_oid[GIT_OID_HEXSZ+1]; + + GIT_UNUSED(progress); + + if (code == ' ') + return 0; + + git_buf_clear(pi->buf); + + git_oid_tostr(start_oid, pi->oid_strlen, &delta->old_file.oid); + git_oid_tostr(end_oid, pi->oid_strlen, &delta->new_file.oid); + + git_buf_printf( + pi->buf, ":%06o %06o %s... %s... %c", + delta->old_file.mode, delta->new_file.mode, start_oid, end_oid, code); + + if (delta->similarity > 0) + git_buf_printf(pi->buf, "%03u", delta->similarity); + + if (delta->status == GIT_DELTA_RENAMED || delta->status == GIT_DELTA_COPIED) + git_buf_printf( + pi->buf, "\t%s %s\n", delta->old_file.path, delta->new_file.path); + else + git_buf_printf( + pi->buf, "\t%s\n", delta->old_file.path ? + delta->old_file.path : delta->new_file.path); + + if (git_buf_oom(pi->buf)) + return -1; + + if (pi->print_cb(delta, NULL, GIT_DIFF_LINE_FILE_HDR, + git_buf_cstr(pi->buf), git_buf_len(pi->buf), pi->payload)) + return callback_error(); + + return 0; +} + +int git_diff_print_raw( + git_diff_list *diff, + git_diff_data_cb print_cb, + void *payload) +{ + int error; + git_buf buf = GIT_BUF_INIT; + diff_print_info pi; + + if (!(error = diff_print_info_init(&pi, &buf, diff, print_cb, payload))) + error = git_diff_foreach(diff, print_raw, NULL, NULL, &pi); + + git_buf_free(&buf); + + return error; +} + +static int print_oid_range(diff_print_info *pi, const git_diff_delta *delta) +{ + char start_oid[GIT_OID_HEXSZ+1], end_oid[GIT_OID_HEXSZ+1]; + + git_oid_tostr(start_oid, pi->oid_strlen, &delta->old_file.oid); + git_oid_tostr(end_oid, pi->oid_strlen, &delta->new_file.oid); + + /* TODO: Match git diff more closely */ + if (delta->old_file.mode == delta->new_file.mode) { + git_buf_printf(pi->buf, "index %s..%s %o\n", + start_oid, end_oid, delta->old_file.mode); + } else { + if (delta->old_file.mode == 0) { + git_buf_printf(pi->buf, "new file mode %o\n", delta->new_file.mode); + } else if (delta->new_file.mode == 0) { + git_buf_printf(pi->buf, "deleted file mode %o\n", delta->old_file.mode); + } else { + git_buf_printf(pi->buf, "old mode %o\n", delta->old_file.mode); + git_buf_printf(pi->buf, "new mode %o\n", delta->new_file.mode); + } + git_buf_printf(pi->buf, "index %s..%s\n", start_oid, end_oid); + } + + if (git_buf_oom(pi->buf)) + return -1; + + return 0; +} + +static int print_patch_file( + const git_diff_delta *delta, float progress, void *data) +{ + diff_print_info *pi = data; + const char *oldpfx = pi->diff->opts.old_prefix; + const char *oldpath = delta->old_file.path; + const char *newpfx = pi->diff->opts.new_prefix; + const char *newpath = delta->new_file.path; + + GIT_UNUSED(progress); + + if (S_ISDIR(delta->new_file.mode) || + delta->status == GIT_DELTA_UNMODIFIED || + delta->status == GIT_DELTA_IGNORED || + (delta->status == GIT_DELTA_UNTRACKED && + (pi->diff->opts.flags & GIT_DIFF_INCLUDE_UNTRACKED_CONTENT) == 0)) + return 0; + + if (!oldpfx) + oldpfx = DIFF_OLD_PREFIX_DEFAULT; + + if (!newpfx) + newpfx = DIFF_NEW_PREFIX_DEFAULT; + + git_buf_clear(pi->buf); + git_buf_printf(pi->buf, "diff --git %s%s %s%s\n", oldpfx, delta->old_file.path, newpfx, delta->new_file.path); + + if (print_oid_range(pi, delta) < 0) + return -1; + + if (git_oid_iszero(&delta->old_file.oid)) { + oldpfx = ""; + oldpath = "/dev/null"; + } + if (git_oid_iszero(&delta->new_file.oid)) { + newpfx = ""; + newpath = "/dev/null"; + } + + if ((delta->flags & GIT_DIFF_FLAG_BINARY) == 0) { + git_buf_printf(pi->buf, "--- %s%s\n", oldpfx, oldpath); + git_buf_printf(pi->buf, "+++ %s%s\n", newpfx, newpath); + } + + if (git_buf_oom(pi->buf)) + return -1; + + if (pi->print_cb(delta, NULL, GIT_DIFF_LINE_FILE_HDR, + git_buf_cstr(pi->buf), git_buf_len(pi->buf), pi->payload)) + return callback_error(); + + if ((delta->flags & GIT_DIFF_FLAG_BINARY) == 0) + return 0; + + git_buf_clear(pi->buf); + git_buf_printf( + pi->buf, "Binary files %s%s and %s%s differ\n", + oldpfx, oldpath, newpfx, newpath); + if (git_buf_oom(pi->buf)) + return -1; + + if (pi->print_cb(delta, NULL, GIT_DIFF_LINE_BINARY, + git_buf_cstr(pi->buf), git_buf_len(pi->buf), pi->payload)) + return callback_error(); + + return 0; +} + +static int print_patch_hunk( + const git_diff_delta *d, + const git_diff_range *r, + const char *header, + size_t header_len, + void *data) +{ + diff_print_info *pi = data; + + if (S_ISDIR(d->new_file.mode)) + return 0; + + git_buf_clear(pi->buf); + if (git_buf_printf(pi->buf, "%.*s", (int)header_len, header) < 0) + return -1; + + if (pi->print_cb(d, r, GIT_DIFF_LINE_HUNK_HDR, + git_buf_cstr(pi->buf), git_buf_len(pi->buf), pi->payload)) + return callback_error(); + + return 0; +} + +static int print_patch_line( + const git_diff_delta *delta, + const git_diff_range *range, + char line_origin, /* GIT_DIFF_LINE value from above */ + const char *content, + size_t content_len, + void *data) +{ + diff_print_info *pi = data; + + if (S_ISDIR(delta->new_file.mode)) + return 0; + + git_buf_clear(pi->buf); + + if (line_origin == GIT_DIFF_LINE_ADDITION || + line_origin == GIT_DIFF_LINE_DELETION || + line_origin == GIT_DIFF_LINE_CONTEXT) + git_buf_printf(pi->buf, "%c%.*s", line_origin, (int)content_len, content); + else if (content_len > 0) + git_buf_printf(pi->buf, "%.*s", (int)content_len, content); + + if (git_buf_oom(pi->buf)) + return -1; + + if (pi->print_cb(delta, range, line_origin, + git_buf_cstr(pi->buf), git_buf_len(pi->buf), pi->payload)) + return callback_error(); + + return 0; +} + +int git_diff_print_patch( + git_diff_list *diff, + git_diff_data_cb print_cb, + void *payload) +{ + int error; + git_buf buf = GIT_BUF_INIT; + diff_print_info pi; + + if (!(error = diff_print_info_init(&pi, &buf, diff, print_cb, payload))) + error = git_diff_foreach( + diff, print_patch_file, print_patch_hunk, print_patch_line, &pi); + + git_buf_free(&buf); + + return error; +} + + +static int print_to_buffer_cb( + const git_diff_delta *delta, + const git_diff_range *range, + char line_origin, + const char *content, + size_t content_len, + void *payload) +{ + git_buf *output = payload; + GIT_UNUSED(delta); GIT_UNUSED(range); GIT_UNUSED(line_origin); + return git_buf_put(output, content, content_len); +} + +int git_diff_patch_print( + git_diff_patch *patch, + git_diff_data_cb print_cb, + void *payload) +{ + int error; + git_buf temp = GIT_BUF_INIT; + diff_print_info pi; + size_t h, l; + + assert(patch && print_cb); + + if (!(error = diff_print_info_init( + &pi, &temp, patch->diff, print_cb, payload))) + error = print_patch_file(patch->delta, 0, &pi); + + for (h = 0; h < patch->hunks_size && !error; ++h) { + diff_patch_hunk *hunk = &patch->hunks[h]; + + error = print_patch_hunk( + patch->delta, &hunk->range, hunk->header, hunk->header_len, &pi); + + for (l = 0; l < hunk->line_count && !error; ++l) { + diff_patch_line *line = &patch->lines[hunk->line_start + l]; + + error = print_patch_line( + patch->delta, &hunk->range, + line->origin, line->ptr, line->len, &pi); + } + } + + git_buf_free(&temp); + + return error; +} + +int git_diff_patch_to_str( + char **string, + git_diff_patch *patch) +{ + int error; + git_buf output = GIT_BUF_INIT; + + error = git_diff_patch_print(patch, print_to_buffer_cb, &output); + + /* GIT_EUSER means git_buf_put in print_to_buffer_cb returned -1, + * meaning a memory allocation failure, so just map to -1... + */ + if (error == GIT_EUSER) + error = -1; + + *string = git_buf_detach(&output); + + return error; +} From 114f5a6c41ea03393e00ae41126a6ddb0ef39a15 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Mon, 10 Jun 2013 10:10:39 -0700 Subject: [PATCH 03/12] Reorganize diff and add basic diff driver This is a significant reorganization of the diff code to break it into a set of more clearly distinct files and to document the new organization. Hopefully this will make the diff code easier to understand and to extend. This adds a new `git_diff_driver` object that looks of diff driver information from the attributes and the config so that things like function content in diff headers can be provided. The full driver spec is not implemented in the commit - this is focused on the reorganization of the code and putting the driver hooks in place. This also removes a few #includes from src/repository.h that were overbroad, but as a result required extra #includes in a variety of places since including src/repository.h no longer results in pulling in the whole world. --- docs/diff-internals.md | 89 ++ src/blob.c | 1 + src/checkout.c | 1 + src/clone.c | 1 + src/crlf.c | 6 +- src/diff.c | 72 ++ src/diff.h | 11 + src/diff_driver.c | 160 ++++ src/diff_driver.h | 34 + src/diff_file.c | 442 +++++++++ src/diff_file.h | 54 ++ src/diff_output.c | 1526 ------------------------------ src/diff_output.h | 93 -- src/diff_patch.c | 723 ++++++++++++++ src/diff_patch.h | 75 ++ src/diff_print.c | 10 +- src/diff_tform.c | 6 +- src/diff_xdiff.c | 161 ++++ src/diff_xdiff.h | 28 + src/fetch.c | 2 + src/iterator.c | 1 + src/merge.c | 2 + src/refdb_fs.c | 1 + src/refs.c | 1 + src/remote.h | 2 +- src/repository.c | 4 + src/repository.h | 5 +- src/signature.c | 1 + src/stash.c | 1 + src/status.c | 2 +- src/submodule.c | 2 + src/thread-utils.h | 2 - src/tree.c | 3 + src/util.h | 2 + tests-clar/checkout/index.c | 1 + tests-clar/clar.c | 36 +- tests-clar/clone/nonetwork.c | 3 +- tests-clar/diff/patch.c | 2 +- tests-clar/diff/rename.c | 2 +- tests-clar/diff/submodules.c | 1 + tests-clar/fetchhead/nonetwork.c | 2 +- tests-clar/merge/merge_helpers.c | 2 +- tests-clar/odb/alternates.c | 2 +- tests-clar/online/clone.c | 3 +- tests-clar/online/fetchhead.c | 2 +- tests-clar/refs/delete.c | 5 +- tests-clar/refs/pack.c | 10 +- tests-clar/refs/reflog/reflog.c | 2 +- tests-clar/refs/rename.c | 13 +- tests-clar/repo/discover.c | 2 +- tests-clar/status/ignore.c | 2 +- tests-clar/status/worktree.c | 2 +- 52 files changed, 1951 insertions(+), 1665 deletions(-) create mode 100644 docs/diff-internals.md create mode 100644 src/diff_driver.c create mode 100644 src/diff_driver.h create mode 100644 src/diff_file.c create mode 100644 src/diff_file.h delete mode 100644 src/diff_output.c delete mode 100644 src/diff_output.h create mode 100644 src/diff_patch.c create mode 100644 src/diff_patch.h create mode 100644 src/diff_xdiff.c create mode 100644 src/diff_xdiff.h diff --git a/docs/diff-internals.md b/docs/diff-internals.md new file mode 100644 index 000000000..1983b7939 --- /dev/null +++ b/docs/diff-internals.md @@ -0,0 +1,89 @@ +Diff is broken into four phases: + +1. Building a list of things that have changed. These changes are called + deltas (git_diff_delta objects) and are grouped into a git_diff_list. +2. Applying file similarity measurement for rename and copy detection (and + to potentially split files that have changed radically). This step is + optional. +3. Computing the textual diff for each delta. Not all deltas have a + meaningful textual diff. For those that do, the textual diff can + either be generated on the fly and passed to output callbacks or can be + turned into a git_diff_patch object. +4. Formatting the diff and/or patch into standard text formats (such as + patches, raw lists, etc). + +In the source code, step 1 is implemented in `src/diff.c`, step 2 in +`src/diff_tform.c`, step 3 in `src/diff_patch.c`, and step 4 in +`src/diff_print.c`. Additionally, when it comes to accessing file +content, everything goes through diff drivers that are implemented in +`src/diff_driver.c`. + +External Objects +---------------- + +* `git_diff_options` repesents user choices about how a diff should be + performed and is passed to most diff generating functions. +* `git_diff_file` represents an item on one side of a possible delta +* `git_diff_delta` represents a pair of items that have changed in some + way - it contains two `git_diff_file` plus a status and other stuff. +* `git_diff_list` is a list of deltas along with information about how + those particular deltas were found. +* `git_diff_patch` represents the actual diff between a pair of items. In + some cases, a delta may not have a corresponding patch, if the objects + are binary, for example. The content of a patch will be a set of hunks + and lines. +* A `hunk` is range of lines described by a `git_diff_range` (i.e. "lines + 10-20 in the old file became lines 12-23 in the new"). It will have a + header that compactly represents that information, and it will have a + number of lines of context surrounding added and deleted lines. +* A `line` is simple a line of data along with a `git_diff_line_t` value + that tells how the data should be interpretted (e.g. context or added). + +Internal Objects +---------------- + +* `git_diff_file_content` is an internal structure that represents the + data on one side of an item to be diffed; it is an augmented + `git_diff_file` with more flags and the actual file data. +** it is created from a repository plus a) a git_diff_file, b) a git_blob, + or c) raw data and size +** there are three main operations on git_diff_file_content: +*** _initialization_ sets up the data structure and does what it can up to, + but not including loading and looking at the actual data +*** _loading_ loads the data, preprocesses it (i.e. applies filters) and + potentially analyzes it (to decide if binary) +*** _free_ releases loaded data and frees any allocated memory + +* The internal structure of a `git_diff_patch` stores the actual diff + between a pair of `git_diff_file_content` items +** it may be "unset" if the items are not diffable +** "empty" if the items are the same +** otherwise it will consist of a set of hunks each of which covers some + number of lines of context, additions and deletions +** a patch is created from two git_diff_file_content items +** a patch is fully instantiated in three phases: +*** initial creation and initialization +*** loading of data and preliminary data examination +*** diffing of data and optional storage of diffs +** (TBD) if a patch is asked to store the diffs and the size of the diff + is significantly smaller than the raw data of the two sides, then the + patch may be flattened using a pool of string data + +* `git_diff_output` is an internal structure that represents an output + target for a `git_diff_patch` +** It consists of file, hunk, and line callbacks, plus a payload +** There is a standard flattened output that can be used for plain text output +** Typically we use a `git_xdiff_output` which drives the callbacks via the + xdiff code taken from core Git. + +* `git_diff_driver` is an internal structure that encapsulates the logic + for a given type of file +** a driver is looked up based on the name and mode of a file. +** the driver can then be used to: +*** determine if a file is binary (by attributes, by git_diff_options + settings, or by examining the content) +*** give you a function pointer that is used to evaluate function context + for hunk headers +** At some point, the logic for getting a filtered version of file content + or calculating the OID of a file may be moved into the driver. + diff --git a/src/blob.c b/src/blob.c index a68c4cc3e..2e4d5f479 100644 --- a/src/blob.c +++ b/src/blob.c @@ -11,6 +11,7 @@ #include "git2/odb_backend.h" #include "common.h" +#include "filebuf.h" #include "blob.h" #include "filter.h" #include "buf_text.h" diff --git a/src/checkout.c b/src/checkout.c index 7a2e68300..ede0be8e8 100644 --- a/src/checkout.c +++ b/src/checkout.c @@ -20,6 +20,7 @@ #include "refs.h" #include "repository.h" +#include "index.h" #include "filter.h" #include "blob.h" #include "diff.h" diff --git a/src/clone.c b/src/clone.c index af3298fd0..5b6c6f77d 100644 --- a/src/clone.c +++ b/src/clone.c @@ -21,6 +21,7 @@ #include "fileops.h" #include "refs.h" #include "path.h" +#include "repository.h" static int create_branch( git_reference **branch, diff --git a/src/crlf.c b/src/crlf.c index 81268da83..65039f9cc 100644 --- a/src/crlf.c +++ b/src/crlf.c @@ -5,14 +5,16 @@ * a Linking Exception. For full terms see the included COPYING file. */ +#include "git2/attr.h" +#include "git2/blob.h" +#include "git2/index.h" + #include "common.h" #include "fileops.h" #include "hash.h" #include "filter.h" #include "buf_text.h" #include "repository.h" -#include "git2/attr.h" -#include "git2/blob.h" struct crlf_attrs { int crlf_action; diff --git a/src/diff.c b/src/diff.c index 05ef4f16b..97ccb3cbd 100644 --- a/src/diff.c +++ b/src/diff.c @@ -11,6 +11,8 @@ #include "attr_file.h" #include "filter.h" #include "pathspec.h" +#include "index.h" +#include "odb.h" #define DIFF_FLAG_IS_SET(DIFF,FLAG) (((DIFF)->opts.flags & (FLAG)) != 0) #define DIFF_FLAG_ISNT_SET(DIFF,FLAG) (((DIFF)->opts.flags & (FLAG)) == 0) @@ -1170,3 +1172,73 @@ int git_diff_tree_to_workdir( return error; } + +size_t git_diff_num_deltas(git_diff_list *diff) +{ + assert(diff); + return (size_t)diff->deltas.length; +} + +size_t git_diff_num_deltas_of_type(git_diff_list *diff, git_delta_t type) +{ + size_t i, count = 0; + git_diff_delta *delta; + + assert(diff); + + git_vector_foreach(&diff->deltas, i, delta) { + count += (delta->status == type); + } + + return count; +} + +int git_diff__paired_foreach( + git_diff_list *idx2head, + git_diff_list *wd2idx, + int (*cb)(git_diff_delta *i2h, git_diff_delta *w2i, void *payload), + void *payload) +{ + int cmp; + git_diff_delta *i2h, *w2i; + size_t i, j, i_max, j_max; + int (*strcomp)(const char *, const char *); + + i_max = idx2head ? idx2head->deltas.length : 0; + j_max = wd2idx ? wd2idx->deltas.length : 0; + + /* Get appropriate strcmp function */ + strcomp = idx2head ? idx2head->strcomp : wd2idx ? wd2idx->strcomp : NULL; + + /* Assert both iterators use matching ignore-case. If this function ever + * supports merging diffs that are not sorted by the same function, then + * it will need to spool and sort on one of the results before merging + */ + if (idx2head && wd2idx) { + assert(idx2head->strcomp == wd2idx->strcomp); + } + + for (i = 0, j = 0; i < i_max || j < j_max; ) { + i2h = idx2head ? GIT_VECTOR_GET(&idx2head->deltas,i) : NULL; + w2i = wd2idx ? GIT_VECTOR_GET(&wd2idx->deltas,j) : NULL; + + cmp = !w2i ? -1 : !i2h ? 1 : + strcomp(i2h->old_file.path, w2i->old_file.path); + + if (cmp < 0) { + if (cb(i2h, NULL, payload)) + return GIT_EUSER; + i++; + } else if (cmp > 0) { + if (cb(NULL, w2i, payload)) + return GIT_EUSER; + j++; + } else { + if (cb(i2h, w2i, payload)) + return GIT_EUSER; + i++; j++; + } + } + + return 0; +} diff --git a/src/diff.h b/src/diff.h index ac8ab2aed..ad12e7731 100644 --- a/src/diff.h +++ b/src/diff.h @@ -29,11 +29,16 @@ enum { GIT_DIFFCAPS_TRUST_NANOSECS = (1 << 5), /* use stat time nanoseconds */ }; +#define DIFF_FLAGS_KNOWN_BINARY (GIT_DIFF_FLAG_BINARY|GIT_DIFF_FLAG_NOT_BINARY) +#define DIFF_FLAGS_NOT_BINARY (GIT_DIFF_FLAG_NOT_BINARY|GIT_DIFF_FLAG__NO_DATA) + enum { GIT_DIFF_FLAG__FREE_PATH = (1 << 7), /* `path` is allocated memory */ GIT_DIFF_FLAG__FREE_DATA = (1 << 8), /* internal file data is allocated */ GIT_DIFF_FLAG__UNMAP_DATA = (1 << 9), /* internal file data is mmap'ed */ GIT_DIFF_FLAG__NO_DATA = (1 << 10), /* file data should not be loaded */ + GIT_DIFF_FLAG__FREE_BLOB = (1 << 11), /* release the blob when done */ + GIT_DIFF_FLAG__LOADED = (1 << 12), /* file data has been loaded */ GIT_DIFF_FLAG__TO_DELETE = (1 << 16), /* delete entry during rename det. */ GIT_DIFF_FLAG__TO_SPLIT = (1 << 17), /* split entry during rename det. */ @@ -83,6 +88,12 @@ extern int git_diff__from_iterators( git_iterator *new_iter, const git_diff_options *opts); +extern int git_diff__paired_foreach( + git_diff_list *idx2head, + git_diff_list *wd2idx, + int (*cb)(git_diff_delta *i2h, git_diff_delta *w2i, void *payload), + void *payload); + int git_diff_find_similar__hashsig_for_file( void **out, const git_diff_file *f, const char *path, void *p); diff --git a/src/diff_driver.c b/src/diff_driver.c new file mode 100644 index 000000000..5438afc67 --- /dev/null +++ b/src/diff_driver.c @@ -0,0 +1,160 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#include "common.h" + +#include "git2/attr.h" + +#include "diff.h" +#include "diff_patch.h" +#include "diff_driver.h" +#include "strmap.h" +#include "pool.h" +#include "map.h" +#include "buf_text.h" + +typedef enum { + DIFF_DRIVER_AUTO = 0, + DIFF_DRIVER_FALSE = 1, + DIFF_DRIVER_TRUE = 2, + DIFF_DRIVER_NAMED = 3, +} git_diff_driver_t; + +enum { + DIFF_CONTEXT_FIND_NORMAL = 0, + DIFF_CONTEXT_FIND_ICASE = (1 << 0), + DIFF_CONTEXT_FIND_EXT = (1 << 1), +}; + +/* data for finding function context for a given file type */ +struct git_diff_driver { + git_diff_driver_t type; + git_strarray fn_patterns; + int binary; +}; + +struct git_diff_driver_registry { + git_strmap *drivers; + git_pool strings; +}; + +static git_diff_driver global_drivers[3] = { + { DIFF_DRIVER_AUTO, { NULL, 0 }, -1 }, + { DIFF_DRIVER_FALSE, { NULL, 0 }, 1 }, + { DIFF_DRIVER_TRUE, { NULL, 0 }, 0 }, +}; + +git_diff_driver_registry *git_diff_driver_registry_new() +{ + return git__calloc(1, sizeof(git_diff_driver_registry)); +} + +void git_diff_driver_registry_free(git_diff_driver_registry *reg) +{ + git__free(reg); +} + +int git_diff_driver_lookup( + git_diff_driver **out, git_repository *repo, const char *path) +{ + const char *value; + + assert(out); + + if (!repo || !path || !strlen(path)) + goto use_auto; + + if (git_attr_get(&value, repo, 0, path, "diff") < 0) + return -1; + + if (GIT_ATTR_FALSE(value)) { + *out = &global_drivers[DIFF_DRIVER_FALSE]; + return 0; + } + + else if (GIT_ATTR_TRUE(value)) { + *out = &global_drivers[DIFF_DRIVER_TRUE]; + return 0; + } + + /* otherwise look for driver information in config and build driver */ + +use_auto: + *out = &global_drivers[DIFF_DRIVER_AUTO]; + return 0; +} + +void git_diff_driver_free(git_diff_driver *driver) +{ + GIT_UNUSED(driver); + /* do nothing for now */ +} + +int git_diff_driver_is_binary(git_diff_driver *driver) +{ + return driver ? driver->binary : -1; +} + +int git_diff_driver_content_is_binary( + git_diff_driver *driver, const char *content, size_t content_len) +{ + const git_buf search = { (char *)content, 0, min(content_len, 4000) }; + + GIT_UNUSED(driver); + + /* TODO: provide encoding / binary detection callbacks that can + * be UTF-8 aware, etc. For now, instead of trying to be smart, + * let's just use the simple NUL-byte detection that core git uses. + */ + + /* previously was: if (git_buf_text_is_binary(&search)) */ + if (git_buf_text_contains_nul(&search)) + return 1; + + return 0; +} + +static long diff_context_find( + const char *line, + long line_len, + char *out, + long out_size, + void *payload) +{ + git_diff_driver *driver = payload; + const char *scan; + + GIT_UNUSED(driver); + + if (line_len > 0 && line[line_len - 1] == '\n') + line_len--; + if (line_len > 0 && line[line_len - 1] == '\r') + line_len--; + if (!line_len) + return -1; + + if (!git__isalpha(*line) && *line != '_' && *line != '$') + return -1; + + for (scan = &line[line_len-1]; scan > line && git__isspace(*scan); --scan) + /* search backward for non-space */; + line_len = scan - line; + + if (line_len >= out_size) + line_len = out_size - 1; + + memcpy(out, line, line_len); + out[line_len] = '\0'; + + return line_len; +} + +git_diff_find_context_fn git_diff_driver_find_content_fn(git_diff_driver *driver) +{ + GIT_UNUSED(driver); + return diff_context_find; +} + diff --git a/src/diff_driver.h b/src/diff_driver.h new file mode 100644 index 000000000..b9881a7ed --- /dev/null +++ b/src/diff_driver.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_diff_driver_h__ +#define INCLUDE_diff_driver_h__ + +#include "common.h" + +typedef struct git_diff_driver_registry git_diff_driver_registry; + +git_diff_driver_registry *git_diff_driver_registry_new(); +void git_diff_driver_registry_free(git_diff_driver_registry *); + +typedef struct git_diff_driver git_diff_driver; + +int git_diff_driver_lookup(git_diff_driver **, git_repository *, const char *); +void git_diff_driver_free(git_diff_driver *); + +/* returns -1 meaning "unknown", 0 meaning not binary, 1 meaning binary */ +int git_diff_driver_is_binary(git_diff_driver *); + +/* returns -1 meaning "unknown", 0 meaning not binary, 1 meaning binary */ +int git_diff_driver_content_is_binary( + git_diff_driver *, const char *content, size_t content_len); + +typedef long (*git_diff_find_context_fn)( + const char *, long, char *, long, void *); + +git_diff_find_context_fn git_diff_driver_find_content_fn(git_diff_driver *); + +#endif diff --git a/src/diff_file.c b/src/diff_file.c new file mode 100644 index 000000000..e4f8ca1e8 --- /dev/null +++ b/src/diff_file.c @@ -0,0 +1,442 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#include "common.h" +#include "git2/blob.h" +#include "git2/submodule.h" +#include "diff.h" +#include "diff_file.h" +#include "odb.h" +#include "fileops.h" +#include "filter.h" + +#define DIFF_MAX_FILESIZE 0x20000000 + +static bool diff_file_content_binary_by_size(git_diff_file_content *fc) +{ + /* if we have diff opts, check max_size vs file size */ + if ((fc->file.flags & DIFF_FLAGS_KNOWN_BINARY) == 0 && + fc->opts && fc->opts->max_size >= 0) + { + git_off_t threshold = DIFF_MAX_FILESIZE; + if (fc->opts->max_size > 0) + threshold = fc->opts->max_size; + if (fc->file.size > threshold) + fc->file.flags |= GIT_DIFF_FLAG_BINARY; + } + + return ((fc->file.flags & GIT_DIFF_FLAG_BINARY) != 0); +} + +static void diff_file_content_binary_by_content(git_diff_file_content *fc) +{ + if ((fc->file.flags & DIFF_FLAGS_KNOWN_BINARY) != 0) + return; + + switch (git_diff_driver_content_is_binary( + fc->driver, fc->map.data, fc->map.len)) { + case 0: fc->file.flags |= GIT_DIFF_FLAG_NOT_BINARY; break; + case 1: fc->file.flags |= GIT_DIFF_FLAG_BINARY; break; + default: break; + } +} + +static int diff_file_content_init_common(git_diff_file_content *fc) +{ + uint32_t flags = fc->opts ? fc->opts->flags : GIT_DIFF_NORMAL; + + if (!fc->driver) { + if (git_diff_driver_lookup(&fc->driver, fc->repo, "") < 0) + return -1; + fc->src = GIT_ITERATOR_TYPE_TREE; + } + + /* make sure file is conceivable mmap-able */ + if ((git_off_t)((size_t)fc->file.size) != fc->file.size) + fc->file.flags |= GIT_DIFF_FLAG_BINARY; + + /* check if user is forcing is to text diff the file */ + else if (flags & GIT_DIFF_FORCE_TEXT) + fc->file.flags |= GIT_DIFF_FLAG_NOT_BINARY; + + /* otherwise see if diff driver forces a behavior */ + else switch (git_diff_driver_is_binary(fc->driver)) { + case 0: fc->file.flags |= GIT_DIFF_FLAG_NOT_BINARY; break; + case 1: fc->file.flags |= GIT_DIFF_FLAG_BINARY; break; + default: break; + } + + diff_file_content_binary_by_size(fc); + + if ((fc->file.flags & GIT_DIFF_FLAG__NO_DATA) != 0) { + fc->file.flags |= GIT_DIFF_FLAG__LOADED; + fc->map.len = 0; + fc->map.data = ""; + } + + if ((fc->file.flags & GIT_DIFF_FLAG__LOADED) != 0) + diff_file_content_binary_by_content(fc); + + return 0; +} + +int diff_file_content_init_from_diff( + git_diff_file_content *fc, + git_diff_list *diff, + size_t delta_index, + bool use_old) +{ + git_diff_delta *delta = git_vector_get(&diff->deltas, delta_index); + git_diff_file *file = use_old ? &delta->old_file : &delta->new_file; + bool has_data = true; + + memset(fc, 0, sizeof(*fc)); + fc->repo = diff->repo; + fc->opts = &diff->opts; + fc->src = use_old ? diff->old_src : diff->new_src; + memcpy(&fc->file, file, sizeof(fc->file)); + + if (git_diff_driver_lookup(&fc->driver, fc->repo, file->path) < 0) + return -1; + + switch (delta->status) { + case GIT_DELTA_ADDED: + has_data = !use_old; break; + case GIT_DELTA_DELETED: + has_data = use_old; break; + case GIT_DELTA_UNTRACKED: + has_data = !use_old && + (diff->opts.flags & GIT_DIFF_INCLUDE_UNTRACKED_CONTENT) != 0; + break; + case GIT_DELTA_MODIFIED: + case GIT_DELTA_COPIED: + case GIT_DELTA_RENAMED: + break; + default: + has_data = false; + break; + } + + if (!has_data) + fc->file.flags |= GIT_DIFF_FLAG__NO_DATA; + + return diff_file_content_init_common(fc); +} + +int diff_file_content_init_from_blob( + git_diff_file_content *fc, + git_repository *repo, + const git_diff_options *opts, + const git_blob *blob) +{ + memset(fc, 0, sizeof(*fc)); + fc->repo = repo; + fc->opts = opts; + fc->blob = blob; + + if (!blob) { + fc->file.flags |= GIT_DIFF_FLAG__NO_DATA; + } else { + fc->file.flags |= GIT_DIFF_FLAG__LOADED | GIT_DIFF_FLAG_VALID_OID; + fc->file.size = git_blob_rawsize(blob); + fc->file.mode = 0644; + git_oid_cpy(&fc->file.oid, git_blob_id(blob)); + + fc->map.len = (size_t)fc->file.size; + fc->map.data = (char *)git_blob_rawcontent(blob); + } + + return diff_file_content_init_common(fc); +} + +int diff_file_content_init_from_raw( + git_diff_file_content *fc, + git_repository *repo, + const git_diff_options *opts, + const char *buf, + size_t buflen) +{ + memset(fc, 0, sizeof(*fc)); + fc->repo = repo; + fc->opts = opts; + + if (!buf) { + fc->file.flags |= GIT_DIFF_FLAG__NO_DATA; + } else { + fc->file.flags |= GIT_DIFF_FLAG__LOADED | GIT_DIFF_FLAG_VALID_OID; + fc->file.size = buflen; + fc->file.mode = 0644; + git_odb_hash(&fc->file.oid, buf, buflen, GIT_OBJ_BLOB); + + fc->map.len = buflen; + fc->map.data = (char *)buf; + } + + return diff_file_content_init_common(fc); +} + +static int diff_file_content_commit_to_str( + git_diff_file_content *fc, bool check_status) +{ + char oid[GIT_OID_HEXSZ+1]; + git_buf content = GIT_BUF_INIT; + const char *status = ""; + + if (check_status) { + int error = 0; + git_submodule *sm = NULL; + unsigned int sm_status = 0; + const git_oid *sm_head; + + if ((error = git_submodule_lookup(&sm, fc->repo, fc->file.path)) < 0 || + (error = git_submodule_status(&sm_status, sm)) < 0) { + /* GIT_EEXISTS means a "submodule" that has not been git added */ + if (error == GIT_EEXISTS) + error = 0; + return error; + } + + /* update OID if we didn't have it previously */ + if ((fc->file.flags & GIT_DIFF_FLAG_VALID_OID) == 0 && + ((sm_head = git_submodule_wd_id(sm)) != NULL || + (sm_head = git_submodule_head_id(sm)) != NULL)) + { + git_oid_cpy(&fc->file.oid, sm_head); + fc->file.flags |= GIT_DIFF_FLAG_VALID_OID; + } + + if (GIT_SUBMODULE_STATUS_IS_WD_DIRTY(sm_status)) + status = "-dirty"; + } + + git_oid_tostr(oid, sizeof(oid), &fc->file.oid); + if (git_buf_printf(&content, "Subproject commit %s%s\n", oid, status) < 0) + return -1; + + fc->map.len = git_buf_len(&content); + fc->map.data = git_buf_detach(&content); + fc->file.flags |= GIT_DIFF_FLAG__FREE_DATA; + + return 0; +} + +static int diff_file_content_load_blob(git_diff_file_content *fc) +{ + int error = 0; + git_odb_object *odb_obj = NULL; + + if (git_oid_iszero(&fc->file.oid)) + return 0; + + if (fc->file.mode == GIT_FILEMODE_COMMIT) + return diff_file_content_commit_to_str(fc, false); + + /* if we don't know size, try to peek at object header first */ + if (!fc->file.size) { + git_odb *odb; + size_t len; + git_otype type; + + if (!(error = git_repository_odb__weakptr(&odb, fc->repo))) { + error = git_odb__read_header_or_object( + &odb_obj, &len, &type, odb, &fc->file.oid); + git_odb_free(odb); + } + if (error) + return error; + + fc->file.size = len; + } + + if (diff_file_content_binary_by_size(fc)) + return 0; + + if (odb_obj != NULL) { + error = git_object__from_odb_object( + (git_object **)&fc->blob, fc->repo, odb_obj, GIT_OBJ_BLOB); + git_odb_object_free(odb_obj); + } else { + error = git_blob_lookup( + (git_blob **)&fc->blob, fc->repo, &fc->file.oid); + } + + if (!error) { + fc->file.flags |= GIT_DIFF_FLAG__FREE_BLOB; + fc->map.data = (void *)git_blob_rawcontent(fc->blob); + fc->map.len = (size_t)git_blob_rawsize(fc->blob); + } + + return error; +} + +static int diff_file_content_load_workdir_symlink( + git_diff_file_content *fc, git_buf *path) +{ + ssize_t alloc_len, read_len; + + /* link path on disk could be UTF-16, so prepare a buffer that is + * big enough to handle some UTF-8 data expansion + */ + alloc_len = (ssize_t)(fc->file.size * 2) + 1; + + fc->map.data = git__calloc(alloc_len, sizeof(char)); + GITERR_CHECK_ALLOC(fc->map.data); + + fc->file.flags |= GIT_DIFF_FLAG__FREE_DATA; + + read_len = p_readlink(git_buf_cstr(path), fc->map.data, alloc_len); + if (read_len < 0) { + giterr_set(GITERR_OS, "Failed to read symlink '%s'", fc->file.path); + return -1; + } + + fc->map.len = read_len; + return 0; +} + +static int diff_file_content_load_workdir_file( + git_diff_file_content *fc, git_buf *path) +{ + int error = 0; + git_vector filters = GIT_VECTOR_INIT; + git_buf raw = GIT_BUF_INIT, filtered = GIT_BUF_INIT; + git_file fd = git_futils_open_ro(git_buf_cstr(path)); + + if (fd < 0) + return fd; + + if (!fc->file.size && + !(fc->file.size = git_futils_filesize(fd))) + goto cleanup; + + if (diff_file_content_binary_by_size(fc)) + goto cleanup; + + if ((error = git_filters_load( + &filters, fc->repo, fc->file.path, GIT_FILTER_TO_ODB)) < 0) + goto cleanup; + /* error >= is a filter count */ + + if (error == 0) { + if (!(error = git_futils_mmap_ro( + &fc->map, fd, 0, (size_t)fc->file.size))) + fc->file.flags |= GIT_DIFF_FLAG__UNMAP_DATA; + else /* fall through to try readbuffer below */ + giterr_clear(); + } + + if (error != 0) { + error = git_futils_readbuffer_fd(&raw, fd, (size_t)fc->file.size); + if (error < 0) + goto cleanup; + + if (!filters.length) + git_buf_swap(&filtered, &raw); + else + error = git_filters_apply(&filtered, &raw, &filters); + + if (!error) { + fc->map.len = git_buf_len(&filtered); + fc->map.data = git_buf_detach(&filtered); + fc->file.flags |= GIT_DIFF_FLAG__FREE_DATA; + } + + git_buf_free(&raw); + git_buf_free(&filtered); + } + +cleanup: + git_filters_free(&filters); + p_close(fd); + + return error; +} + +static int diff_file_content_load_workdir(git_diff_file_content *fc) +{ + int error = 0; + git_buf path = GIT_BUF_INIT; + + if (fc->file.mode == GIT_FILEMODE_COMMIT) + return diff_file_content_commit_to_str(fc, true); + + if (fc->file.mode == GIT_FILEMODE_TREE) + return 0; + + if (git_buf_joinpath( + &path, git_repository_workdir(fc->repo), fc->file.path) < 0) + return -1; + + if (S_ISLNK(fc->file.mode)) + error = diff_file_content_load_workdir_symlink(fc, &path); + else + error = diff_file_content_load_workdir_file(fc, &path); + + /* once data is loaded, update OID if we didn't have it previously */ + if (!error && (fc->file.flags & GIT_DIFF_FLAG_VALID_OID) == 0) { + error = git_odb_hash( + &fc->file.oid, fc->map.data, fc->map.len, GIT_OBJ_BLOB); + fc->file.flags |= GIT_DIFF_FLAG_VALID_OID; + } + + git_buf_free(&path); + return error; +} + +int diff_file_content_load(git_diff_file_content *fc) +{ + int error = 0; + + if ((fc->file.flags & GIT_DIFF_FLAG__LOADED) != 0) + return 0; + + if (fc->file.flags & GIT_DIFF_FLAG_BINARY) + return 0; + + if (fc->src == GIT_ITERATOR_TYPE_WORKDIR) + error = diff_file_content_load_workdir(fc); + else + error = diff_file_content_load_blob(fc); + if (error) + return error; + + fc->file.flags |= GIT_DIFF_FLAG__LOADED; + + diff_file_content_binary_by_content(fc); + + return 0; +} + +void diff_file_content_unload(git_diff_file_content *fc) +{ + if (fc->file.flags & GIT_DIFF_FLAG__FREE_DATA) { + git__free(fc->map.data); + fc->map.data = ""; + fc->map.len = 0; + fc->file.flags &= ~GIT_DIFF_FLAG__FREE_DATA; + } + else if (fc->file.flags & GIT_DIFF_FLAG__UNMAP_DATA) { + git_futils_mmap_free(&fc->map); + fc->map.data = ""; + fc->map.len = 0; + fc->file.flags &= ~GIT_DIFF_FLAG__UNMAP_DATA; + } + + if (fc->file.flags & GIT_DIFF_FLAG__FREE_BLOB) { + git_blob_free((git_blob *)fc->blob); + fc->blob = NULL; + fc->file.flags &= ~GIT_DIFF_FLAG__FREE_BLOB; + } + + fc->file.flags &= ~GIT_DIFF_FLAG__LOADED; +} + +void diff_file_content_clear(git_diff_file_content *fc) +{ + diff_file_content_unload(fc); + + /* for now, nothing else to do */ +} diff --git a/src/diff_file.h b/src/diff_file.h new file mode 100644 index 000000000..51c6878a9 --- /dev/null +++ b/src/diff_file.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_diff_file_h__ +#define INCLUDE_diff_file_h__ + +#include "common.h" +#include "diff.h" +#include "diff_driver.h" +#include "map.h" + +/* expanded information for one side of a delta */ +typedef struct { + git_repository *repo; + const git_diff_options *opts; + git_diff_file file; + git_diff_driver *driver; + git_iterator_type_t src; + const git_blob *blob; + git_map map; +} git_diff_file_content; + +extern int diff_file_content_init_from_diff( + git_diff_file_content *fc, + git_diff_list *diff, + size_t delta_index, + bool use_old); + +extern int diff_file_content_init_from_blob( + git_diff_file_content *fc, + git_repository *repo, + const git_diff_options *opts, + const git_blob *blob); + +extern int diff_file_content_init_from_raw( + git_diff_file_content *fc, + git_repository *repo, + const git_diff_options *opts, + const char *buf, + size_t buflen); + +/* this loads the blob/file-on-disk as needed */ +extern int diff_file_content_load(git_diff_file_content *fc); + +/* this releases the blob/file-in-memory */ +extern void diff_file_content_unload(git_diff_file_content *fc); + +/* this unloads and also releases any other resources */ +extern void diff_file_content_clear(git_diff_file_content *fc); + +#endif diff --git a/src/diff_output.c b/src/diff_output.c deleted file mode 100644 index 9a5be2a10..000000000 --- a/src/diff_output.c +++ /dev/null @@ -1,1526 +0,0 @@ -/* - * Copyright (C) the libgit2 contributors. All rights reserved. - * - * This file is part of libgit2, distributed under the GNU GPL v2 with - * a Linking Exception. For full terms see the included COPYING file. - */ -#include "common.h" -#include "git2/attr.h" -#include "git2/oid.h" -#include "git2/submodule.h" -#include "diff_output.h" -#include -#include "fileops.h" -#include "filter.h" -#include "buf_text.h" - -static int read_next_int(const char **str, int *value) -{ - const char *scan = *str; - int v = 0, digits = 0; - /* find next digit */ - for (scan = *str; *scan && !isdigit(*scan); scan++); - /* parse next number */ - for (; isdigit(*scan); scan++, digits++) - v = (v * 10) + (*scan - '0'); - *str = scan; - *value = v; - return (digits > 0) ? 0 : -1; -} - -static int parse_hunk_header(git_diff_range *range, const char *header) -{ - /* expect something of the form "@@ -%d[,%d] +%d[,%d] @@" */ - if (*header != '@') - return -1; - if (read_next_int(&header, &range->old_start) < 0) - return -1; - if (*header == ',') { - if (read_next_int(&header, &range->old_lines) < 0) - return -1; - } else - range->old_lines = 1; - if (read_next_int(&header, &range->new_start) < 0) - return -1; - if (*header == ',') { - if (read_next_int(&header, &range->new_lines) < 0) - return -1; - } else - range->new_lines = 1; - if (range->old_start < 0 || range->new_start < 0) - return -1; - - return 0; -} - -#define KNOWN_BINARY_FLAGS (GIT_DIFF_FLAG_BINARY|GIT_DIFF_FLAG_NOT_BINARY) -#define NOT_BINARY_FLAGS (GIT_DIFF_FLAG_NOT_BINARY|GIT_DIFF_FLAG__NO_DATA) - -static int update_file_is_binary_by_attr( - git_repository *repo, git_diff_file *file) -{ - const char *value; - - /* because of blob diffs, cannot assume path is set */ - if (!file->path || !strlen(file->path)) - return 0; - - if (git_attr_get(&value, repo, 0, file->path, "diff") < 0) - return -1; - - if (GIT_ATTR_FALSE(value)) - file->flags |= GIT_DIFF_FLAG_BINARY; - else if (GIT_ATTR_TRUE(value)) - file->flags |= GIT_DIFF_FLAG_NOT_BINARY; - /* otherwise leave file->flags alone */ - - return 0; -} - -static void update_delta_is_binary(git_diff_delta *delta) -{ - if ((delta->old_file.flags & GIT_DIFF_FLAG_BINARY) != 0 || - (delta->new_file.flags & GIT_DIFF_FLAG_BINARY) != 0) - delta->flags |= GIT_DIFF_FLAG_BINARY; - - else if ((delta->old_file.flags & NOT_BINARY_FLAGS) != 0 && - (delta->new_file.flags & NOT_BINARY_FLAGS) != 0) - delta->flags |= GIT_DIFF_FLAG_NOT_BINARY; - - /* otherwise leave delta->flags binary value untouched */ -} - -/* returns if we forced binary setting (and no further checks needed) */ -static bool diff_delta_is_binary_forced( - diff_context *ctxt, - git_diff_delta *delta) -{ - /* return true if binary-ness has already been settled */ - if ((delta->flags & KNOWN_BINARY_FLAGS) != 0) - return true; - - /* make sure files are conceivably mmap-able */ - if ((git_off_t)((size_t)delta->old_file.size) != delta->old_file.size || - (git_off_t)((size_t)delta->new_file.size) != delta->new_file.size) { - - delta->old_file.flags |= GIT_DIFF_FLAG_BINARY; - delta->new_file.flags |= GIT_DIFF_FLAG_BINARY; - delta->flags |= GIT_DIFF_FLAG_BINARY; - return true; - } - - /* check if user is forcing us to text diff these files */ - if (ctxt->opts && (ctxt->opts->flags & GIT_DIFF_FORCE_TEXT) != 0) { - delta->old_file.flags |= GIT_DIFF_FLAG_NOT_BINARY; - delta->new_file.flags |= GIT_DIFF_FLAG_NOT_BINARY; - delta->flags |= GIT_DIFF_FLAG_NOT_BINARY; - return true; - } - - return false; -} - -static int diff_delta_is_binary_by_attr( - diff_context *ctxt, git_diff_patch *patch) -{ - int error = 0, mirror_new; - git_diff_delta *delta = patch->delta; - - if (diff_delta_is_binary_forced(ctxt, delta)) - return 0; - - /* check diff attribute +, -, or 0 */ - if (update_file_is_binary_by_attr(ctxt->repo, &delta->old_file) < 0) - return -1; - - mirror_new = (delta->new_file.path == delta->old_file.path || - ctxt->diff->strcomp(delta->new_file.path, delta->old_file.path) == 0); - if (mirror_new) - delta->new_file.flags |= (delta->old_file.flags & KNOWN_BINARY_FLAGS); - else - error = update_file_is_binary_by_attr(ctxt->repo, &delta->new_file); - - update_delta_is_binary(delta); - - return error; -} - -static int diff_delta_is_binary_by_content( - diff_context *ctxt, - git_diff_delta *delta, - git_diff_file *file, - const git_map *map) -{ - const git_buf search = { map->data, 0, min(map->len, 4000) }; - - if (diff_delta_is_binary_forced(ctxt, delta)) - return 0; - - /* TODO: provide encoding / binary detection callbacks that can - * be UTF-8 aware, etc. For now, instead of trying to be smart, - * let's just use the simple NUL-byte detection that core git uses. - */ - - /* previously was: if (git_buf_text_is_binary(&search)) */ - if (git_buf_text_contains_nul(&search)) - file->flags |= GIT_DIFF_FLAG_BINARY; - else - file->flags |= GIT_DIFF_FLAG_NOT_BINARY; - - update_delta_is_binary(delta); - - return 0; -} - -static int diff_delta_is_binary_by_size( - diff_context *ctxt, git_diff_delta *delta, git_diff_file *file) -{ - git_off_t threshold = MAX_DIFF_FILESIZE; - - if ((file->flags & KNOWN_BINARY_FLAGS) != 0) - return 0; - - if (ctxt && ctxt->opts) { - if (ctxt->opts->max_size < 0) - return 0; - - if (ctxt->opts->max_size > 0) - threshold = ctxt->opts->max_size; - } - - if (file->size > threshold) - file->flags |= GIT_DIFF_FLAG_BINARY; - - update_delta_is_binary(delta); - - return 0; -} - -static long diff_context_find( - const char *line, - long line_len, - char *out, - long out_size, - void *payload) -{ - diff_context *ctxt = payload; - const char *scan; - bool found_paren = false; - - GIT_UNUSED(ctxt); - - if (line_len > 0 && line[line_len - 1] == '\n') - line_len--; - if (line_len > 0 && line[line_len - 1] == '\r') - line_len--; - if (!line_len) - return -1; - - if (!isalpha(*line)) - return -1; - - for (scan = &line[line_len - 1]; scan > line && *scan != '('; --scan) - /* search backward for ( */; - if (scan != line) { - found_paren = true; - line_len = scan - line; - - for (--scan; scan > line && !isalpha(*scan); --scan) - --line_len; - } - - if (!line_len) - return -1; - - if (out_size > line_len) { - memcpy(out, line, line_len); - - if (found_paren) - out[line_len++] = '('; - out[line_len] = '\0'; - } else { - memcpy(out, line, out_size); - line_len = out_size; - } - - return line_len; -} - -static void setup_xdiff_options(diff_context *ctxt) -{ - memset(&ctxt->xdiff_config, 0, sizeof(ctxt->xdiff_config)); - memset(&ctxt->xdiff_params, 0, sizeof(ctxt->xdiff_params)); - - ctxt->xdiff_config.ctxlen = - (!ctxt->opts) ? 3 : ctxt->opts->context_lines; - ctxt->xdiff_config.interhunkctxlen = - (!ctxt->opts) ? 0 : ctxt->opts->interhunk_lines; - - ctxt->xdiff_config.flags = XDL_EMIT_FUNCNAMES; - ctxt->xdiff_config.find_func = diff_context_find; - ctxt->xdiff_config.find_func_priv = ctxt; - - if (!ctxt->opts) - return; - - if (ctxt->opts->flags & GIT_DIFF_IGNORE_WHITESPACE) - ctxt->xdiff_params.flags |= XDF_WHITESPACE_FLAGS; - if (ctxt->opts->flags & GIT_DIFF_IGNORE_WHITESPACE_CHANGE) - ctxt->xdiff_params.flags |= XDF_IGNORE_WHITESPACE_CHANGE; - if (ctxt->opts->flags & GIT_DIFF_IGNORE_WHITESPACE_EOL) - ctxt->xdiff_params.flags |= XDF_IGNORE_WHITESPACE_AT_EOL; -} - - -static int get_blob_content( - diff_context *ctxt, - git_diff_delta *delta, - git_diff_file *file, - git_map *map, - git_blob **blob) -{ - int error; - git_odb_object *odb_obj = NULL; - - if (git_oid_iszero(&file->oid)) - return 0; - - if (file->mode == GIT_FILEMODE_COMMIT) { - char oidstr[GIT_OID_HEXSZ+1]; - git_buf content = GIT_BUF_INIT; - - git_oid_tostr(oidstr, sizeof(oidstr), &file->oid); - git_buf_printf(&content, "Subproject commit %s\n", oidstr); - - map->data = git_buf_detach(&content); - map->len = strlen(map->data); - - file->flags |= GIT_DIFF_FLAG__FREE_DATA; - return 0; - } - - if (!file->size) { - git_odb *odb; - size_t len; - git_otype type; - - /* peek at object header to avoid loading if too large */ - if ((error = git_repository_odb__weakptr(&odb, ctxt->repo)) < 0 || - (error = git_odb__read_header_or_object( - &odb_obj, &len, &type, odb, &file->oid)) < 0) - return error; - - assert(type == GIT_OBJ_BLOB); - - file->size = len; - } - - /* if blob is too large to diff, mark as binary */ - if ((error = diff_delta_is_binary_by_size(ctxt, delta, file)) < 0) - return error; - if ((delta->flags & GIT_DIFF_FLAG_BINARY) != 0) - return 0; - - if (odb_obj != NULL) { - error = git_object__from_odb_object( - (git_object **)blob, ctxt->repo, odb_obj, GIT_OBJ_BLOB); - git_odb_object_free(odb_obj); - } else - error = git_blob_lookup(blob, ctxt->repo, &file->oid); - - if (error) - return error; - - map->data = (void *)git_blob_rawcontent(*blob); - map->len = (size_t)git_blob_rawsize(*blob); - - return diff_delta_is_binary_by_content(ctxt, delta, file, map); -} - -static int get_workdir_sm_content( - diff_context *ctxt, - git_diff_file *file, - git_map *map) -{ - int error = 0; - git_buf content = GIT_BUF_INIT; - git_submodule* sm = NULL; - unsigned int sm_status = 0; - const char* sm_status_text = ""; - char oidstr[GIT_OID_HEXSZ+1]; - - if ((error = git_submodule_lookup(&sm, ctxt->repo, file->path)) < 0 || - (error = git_submodule_status(&sm_status, sm)) < 0) { - - /* GIT_EEXISTS means a "submodule" that has not been git added */ - if (error == GIT_EEXISTS) - error = 0; - return error; - } - - /* update OID if we didn't have it previously */ - if ((file->flags & GIT_DIFF_FLAG_VALID_OID) == 0) { - const git_oid* sm_head; - - if ((sm_head = git_submodule_wd_id(sm)) != NULL || - (sm_head = git_submodule_head_id(sm)) != NULL) { - - git_oid_cpy(&file->oid, sm_head); - file->flags |= GIT_DIFF_FLAG_VALID_OID; - } - } - - git_oid_tostr(oidstr, sizeof(oidstr), &file->oid); - - if (GIT_SUBMODULE_STATUS_IS_WD_DIRTY(sm_status)) - sm_status_text = "-dirty"; - - git_buf_printf( - &content, "Subproject commit %s%s\n", oidstr, sm_status_text); - - map->data = git_buf_detach(&content); - map->len = strlen(map->data); - - file->flags |= GIT_DIFF_FLAG__FREE_DATA; - - return 0; -} - -static int get_filtered( - git_map *map, git_file fd, git_diff_file *file, git_vector *filters) -{ - int error; - git_buf raw = GIT_BUF_INIT, filtered = GIT_BUF_INIT; - - if ((error = git_futils_readbuffer_fd(&raw, fd, (size_t)file->size)) < 0) - return error; - - if (!filters->length) - git_buf_swap(&filtered, &raw); - else - error = git_filters_apply(&filtered, &raw, filters); - - if (!error) { - map->len = git_buf_len(&filtered); - map->data = git_buf_detach(&filtered); - - file->flags |= GIT_DIFF_FLAG__FREE_DATA; - } - - git_buf_free(&raw); - git_buf_free(&filtered); - - return error; -} - -static int get_workdir_content( - diff_context *ctxt, - git_diff_delta *delta, - git_diff_file *file, - git_map *map) -{ - int error = 0; - git_buf path = GIT_BUF_INIT; - const char *wd = git_repository_workdir(ctxt->repo); - - if (S_ISGITLINK(file->mode)) - return get_workdir_sm_content(ctxt, file, map); - - if (S_ISDIR(file->mode)) - return 0; - - if (git_buf_joinpath(&path, wd, file->path) < 0) - return -1; - - if (S_ISLNK(file->mode)) { - ssize_t alloc_len, read_len; - - file->flags |= GIT_DIFF_FLAG__FREE_DATA; - file->flags |= GIT_DIFF_FLAG_BINARY; - - /* link path on disk could be UTF-16, so prepare a buffer that is - * big enough to handle some UTF-8 data expansion - */ - alloc_len = (ssize_t)(file->size * 2) + 1; - - map->data = git__malloc(alloc_len); - GITERR_CHECK_ALLOC(map->data); - - read_len = p_readlink(path.ptr, map->data, alloc_len); - if (read_len < 0) { - giterr_set(GITERR_OS, "Failed to read symlink '%s'", file->path); - error = -1; - goto cleanup; - } - - map->len = read_len; - } - else { - git_file fd = git_futils_open_ro(path.ptr); - git_vector filters = GIT_VECTOR_INIT; - - if (fd < 0) { - error = fd; - goto cleanup; - } - - if (!file->size && !(file->size = git_futils_filesize(fd))) - goto close_and_cleanup; - - if ((error = diff_delta_is_binary_by_size(ctxt, delta, file)) < 0 || - (delta->flags & GIT_DIFF_FLAG_BINARY) != 0) - goto close_and_cleanup; - - error = git_filters_load( - &filters, ctxt->repo, file->path, GIT_FILTER_TO_ODB); - if (error < 0) - goto close_and_cleanup; - - if (error == 0) { /* note: git_filters_load returns filter count */ - error = git_futils_mmap_ro(map, fd, 0, (size_t)file->size); - if (!error) - file->flags |= GIT_DIFF_FLAG__UNMAP_DATA; - } - if (error != 0) - error = get_filtered(map, fd, file, &filters); - -close_and_cleanup: - git_filters_free(&filters); - p_close(fd); - } - - /* once data is loaded, update OID if we didn't have it previously */ - if (!error && (file->flags & GIT_DIFF_FLAG_VALID_OID) == 0) { - error = git_odb_hash( - &file->oid, map->data, map->len, GIT_OBJ_BLOB); - if (!error) - file->flags |= GIT_DIFF_FLAG_VALID_OID; - } - - if (!error) - error = diff_delta_is_binary_by_content(ctxt, delta, file, map); - -cleanup: - git_buf_free(&path); - return error; -} - -static void release_content(git_diff_file *file, git_map *map, git_blob *blob) -{ - if (blob != NULL) - git_blob_free(blob); - - if (file->flags & GIT_DIFF_FLAG__FREE_DATA) { - git__free(map->data); - map->data = ""; - map->len = 0; - file->flags &= ~GIT_DIFF_FLAG__FREE_DATA; - } - else if (file->flags & GIT_DIFF_FLAG__UNMAP_DATA) { - git_futils_mmap_free(map); - map->data = ""; - map->len = 0; - file->flags &= ~GIT_DIFF_FLAG__UNMAP_DATA; - } -} - - -static int diff_context_init( - diff_context *ctxt, - git_diff_list *diff, - git_repository *repo, - const git_diff_options *opts, - git_diff_file_cb file_cb, - git_diff_hunk_cb hunk_cb, - git_diff_data_cb data_cb, - void *payload) -{ - memset(ctxt, 0, sizeof(diff_context)); - - if (!repo && diff) - repo = diff->repo; - - if (!opts && diff) - opts = &diff->opts; - - ctxt->repo = repo; - ctxt->diff = diff; - ctxt->opts = opts; - ctxt->file_cb = file_cb; - ctxt->hunk_cb = hunk_cb; - ctxt->data_cb = data_cb; - ctxt->payload = payload; - ctxt->error = 0; - - setup_xdiff_options(ctxt); - - return 0; -} - -static int diff_delta_file_callback( - diff_context *ctxt, git_diff_delta *delta, size_t idx) -{ - float progress; - - if (!ctxt->file_cb) - return 0; - - progress = ctxt->diff ? ((float)idx / ctxt->diff->deltas.length) : 1.0f; - - if (ctxt->file_cb(delta, progress, ctxt->payload) != 0) - ctxt->error = GIT_EUSER; - - return ctxt->error; -} - -static void diff_patch_init( - diff_context *ctxt, git_diff_patch *patch) -{ - memset(patch, 0, sizeof(git_diff_patch)); - - patch->diff = ctxt->diff; - patch->ctxt = ctxt; - - if (patch->diff) { - patch->old_src = patch->diff->old_src; - patch->new_src = patch->diff->new_src; - } else { - patch->old_src = patch->new_src = GIT_ITERATOR_TYPE_TREE; - } -} - -static git_diff_patch *diff_patch_alloc( - diff_context *ctxt, git_diff_delta *delta) -{ - git_diff_patch *patch = git__malloc(sizeof(git_diff_patch)); - if (!patch) - return NULL; - - diff_patch_init(ctxt, patch); - - git_diff_list_addref(patch->diff); - - GIT_REFCOUNT_INC(patch); - - patch->delta = delta; - patch->flags = GIT_DIFF_PATCH_ALLOCATED; - - return patch; -} - -static int diff_patch_load( - diff_context *ctxt, git_diff_patch *patch) -{ - int error = 0; - git_diff_delta *delta = patch->delta; - bool check_if_unmodified = false; - - if ((patch->flags & GIT_DIFF_PATCH_LOADED) != 0) - return 0; - - error = diff_delta_is_binary_by_attr(ctxt, patch); - - patch->old_data.data = ""; - patch->old_data.len = 0; - patch->old_blob = NULL; - - patch->new_data.data = ""; - patch->new_data.len = 0; - patch->new_blob = NULL; - - if ((delta->flags & GIT_DIFF_FLAG_BINARY) != 0) - goto cleanup; - - if (!ctxt->hunk_cb && - !ctxt->data_cb && - (ctxt->opts->flags & GIT_DIFF_SKIP_BINARY_CHECK) != 0) - goto cleanup; - - switch (delta->status) { - case GIT_DELTA_ADDED: - delta->old_file.flags |= GIT_DIFF_FLAG__NO_DATA; - break; - case GIT_DELTA_DELETED: - delta->new_file.flags |= GIT_DIFF_FLAG__NO_DATA; - break; - case GIT_DELTA_MODIFIED: - case GIT_DELTA_COPIED: - case GIT_DELTA_RENAMED: - break; - case GIT_DELTA_UNTRACKED: - delta->old_file.flags |= GIT_DIFF_FLAG__NO_DATA; - if ((ctxt->opts->flags & GIT_DIFF_INCLUDE_UNTRACKED_CONTENT) == 0) - delta->new_file.flags |= GIT_DIFF_FLAG__NO_DATA; - break; - default: - delta->new_file.flags |= GIT_DIFF_FLAG__NO_DATA; - delta->old_file.flags |= GIT_DIFF_FLAG__NO_DATA; - break; - } - -#define CHECK_UNMODIFIED (GIT_DIFF_FLAG__NO_DATA | GIT_DIFF_FLAG_VALID_OID) - - check_if_unmodified = - (delta->old_file.flags & CHECK_UNMODIFIED) == 0 && - (delta->new_file.flags & CHECK_UNMODIFIED) == 0; - - /* Always try to load workdir content first, since it may need to be - * filtered (and hence use 2x memory) and we want to minimize the max - * memory footprint during diff. - */ - - if ((delta->old_file.flags & GIT_DIFF_FLAG__NO_DATA) == 0 && - patch->old_src == GIT_ITERATOR_TYPE_WORKDIR) { - if ((error = get_workdir_content( - ctxt, delta, &delta->old_file, &patch->old_data)) < 0) - goto cleanup; - if ((delta->flags & GIT_DIFF_FLAG_BINARY) != 0) - goto cleanup; - } - - if ((delta->new_file.flags & GIT_DIFF_FLAG__NO_DATA) == 0 && - patch->new_src == GIT_ITERATOR_TYPE_WORKDIR) { - if ((error = get_workdir_content( - ctxt, delta, &delta->new_file, &patch->new_data)) < 0) - goto cleanup; - if ((delta->flags & GIT_DIFF_FLAG_BINARY) != 0) - goto cleanup; - } - - if ((delta->old_file.flags & GIT_DIFF_FLAG__NO_DATA) == 0 && - patch->old_src != GIT_ITERATOR_TYPE_WORKDIR) { - if ((error = get_blob_content( - ctxt, delta, &delta->old_file, - &patch->old_data, &patch->old_blob)) < 0) - goto cleanup; - if ((delta->flags & GIT_DIFF_FLAG_BINARY) != 0) - goto cleanup; - } - - if ((delta->new_file.flags & GIT_DIFF_FLAG__NO_DATA) == 0 && - patch->new_src != GIT_ITERATOR_TYPE_WORKDIR) { - if ((error = get_blob_content( - ctxt, delta, &delta->new_file, - &patch->new_data, &patch->new_blob)) < 0) - goto cleanup; - if ((delta->flags & GIT_DIFF_FLAG_BINARY) != 0) - goto cleanup; - } - - /* if we did not previously have the definitive oid, we may have - * incorrect status and need to switch this to UNMODIFIED. - */ - if (check_if_unmodified && - delta->old_file.mode == delta->new_file.mode && - !git_oid__cmp(&delta->old_file.oid, &delta->new_file.oid)) { - - delta->status = GIT_DELTA_UNMODIFIED; - - if ((ctxt->opts->flags & GIT_DIFF_INCLUDE_UNMODIFIED) == 0) - goto cleanup; - } - -cleanup: - if ((delta->flags & KNOWN_BINARY_FLAGS) == 0) - update_delta_is_binary(delta); - - if (!error) { - patch->flags |= GIT_DIFF_PATCH_LOADED; - - /* patch is diffable only for non-binary, modified files where at - * least one side has data and there is actual change in the data - */ - if ((delta->flags & GIT_DIFF_FLAG_BINARY) == 0 && - delta->status != GIT_DELTA_UNMODIFIED && - (patch->old_data.len || patch->new_data.len) && - (patch->old_data.len != patch->new_data.len || - !git_oid_equal(&delta->old_file.oid, &delta->new_file.oid))) - patch->flags |= GIT_DIFF_PATCH_DIFFABLE; - } - - return error; -} - -static int diff_patch_cb(void *priv, mmbuffer_t *bufs, int len) -{ - git_diff_patch *patch = priv; - diff_context *ctxt = patch->ctxt; - - if (len == 1) { - ctxt->error = parse_hunk_header(&ctxt->range, bufs[0].ptr); - if (ctxt->error < 0) - return ctxt->error; - - if (ctxt->hunk_cb != NULL && - ctxt->hunk_cb(patch->delta, &ctxt->range, - bufs[0].ptr, bufs[0].size, ctxt->payload)) - ctxt->error = GIT_EUSER; - } - - if (len == 2 || len == 3) { - /* expect " "/"-"/"+", then data */ - char origin = - (*bufs[0].ptr == '+') ? GIT_DIFF_LINE_ADDITION : - (*bufs[0].ptr == '-') ? GIT_DIFF_LINE_DELETION : - GIT_DIFF_LINE_CONTEXT; - - if (ctxt->data_cb != NULL && - ctxt->data_cb(patch->delta, &ctxt->range, - origin, bufs[1].ptr, bufs[1].size, ctxt->payload)) - ctxt->error = GIT_EUSER; - } - - if (len == 3 && !ctxt->error) { - /* If we have a '+' and a third buf, then we have added a line - * without a newline and the old code had one, so DEL_EOFNL. - * If we have a '-' and a third buf, then we have removed a line - * with out a newline but added a blank line, so ADD_EOFNL. - */ - char origin = - (*bufs[0].ptr == '+') ? GIT_DIFF_LINE_DEL_EOFNL : - (*bufs[0].ptr == '-') ? GIT_DIFF_LINE_ADD_EOFNL : - GIT_DIFF_LINE_CONTEXT_EOFNL; - - if (ctxt->data_cb != NULL && - ctxt->data_cb(patch->delta, &ctxt->range, - origin, bufs[2].ptr, bufs[2].size, ctxt->payload)) - ctxt->error = GIT_EUSER; - } - - return ctxt->error; -} - -static int diff_patch_generate( - diff_context *ctxt, git_diff_patch *patch) -{ - int error = 0; - xdemitcb_t xdiff_callback; - mmfile_t old_xdiff_data, new_xdiff_data; - - if ((patch->flags & GIT_DIFF_PATCH_DIFFED) != 0) - return 0; - - if ((patch->flags & GIT_DIFF_PATCH_LOADED) == 0) - if ((error = diff_patch_load(ctxt, patch)) < 0) - return error; - - if ((patch->flags & GIT_DIFF_PATCH_DIFFABLE) == 0) - return 0; - - if (!ctxt->file_cb && !ctxt->hunk_cb) - return 0; - - patch->ctxt = ctxt; - - memset(&xdiff_callback, 0, sizeof(xdiff_callback)); - xdiff_callback.outf = diff_patch_cb; - xdiff_callback.priv = patch; - - old_xdiff_data.ptr = patch->old_data.data; - old_xdiff_data.size = patch->old_data.len; - new_xdiff_data.ptr = patch->new_data.data; - new_xdiff_data.size = patch->new_data.len; - - xdl_diff(&old_xdiff_data, &new_xdiff_data, - &ctxt->xdiff_params, &ctxt->xdiff_config, &xdiff_callback); - - error = ctxt->error; - - if (!error) - patch->flags |= GIT_DIFF_PATCH_DIFFED; - - return error; -} - -static void diff_patch_unload(git_diff_patch *patch) -{ - if ((patch->flags & GIT_DIFF_PATCH_DIFFED) != 0) { - patch->flags = (patch->flags & ~GIT_DIFF_PATCH_DIFFED); - - patch->hunks_size = 0; - patch->lines_size = 0; - } - - if ((patch->flags & GIT_DIFF_PATCH_LOADED) != 0) { - patch->flags = (patch->flags & ~GIT_DIFF_PATCH_LOADED); - - release_content( - &patch->delta->old_file, &patch->old_data, patch->old_blob); - release_content( - &patch->delta->new_file, &patch->new_data, patch->new_blob); - } -} - -static void diff_patch_free(git_diff_patch *patch) -{ - diff_patch_unload(patch); - - git__free(patch->lines); - patch->lines = NULL; - patch->lines_asize = 0; - - git__free(patch->hunks); - patch->hunks = NULL; - patch->hunks_asize = 0; - - if (!(patch->flags & GIT_DIFF_PATCH_ALLOCATED)) - return; - - patch->flags = 0; - - git_diff_list_free(patch->diff); /* decrements refcount */ - - git__free(patch); -} - -#define MAX_HUNK_STEP 128 -#define MIN_HUNK_STEP 8 -#define MAX_LINE_STEP 256 -#define MIN_LINE_STEP 8 - -static int diff_patch_hunk_cb( - const git_diff_delta *delta, - const git_diff_range *range, - const char *header, - size_t header_len, - void *payload) -{ - git_diff_patch *patch = payload; - diff_patch_hunk *hunk; - - GIT_UNUSED(delta); - - if (patch->hunks_size >= patch->hunks_asize) { - size_t new_size; - diff_patch_hunk *new_hunks; - - if (patch->hunks_asize > MAX_HUNK_STEP) - new_size = patch->hunks_asize + MAX_HUNK_STEP; - else - new_size = patch->hunks_asize * 2; - if (new_size < MIN_HUNK_STEP) - new_size = MIN_HUNK_STEP; - - new_hunks = git__realloc( - patch->hunks, new_size * sizeof(diff_patch_hunk)); - if (!new_hunks) - return -1; - - patch->hunks = new_hunks; - patch->hunks_asize = new_size; - } - - hunk = &patch->hunks[patch->hunks_size++]; - - memcpy(&hunk->range, range, sizeof(hunk->range)); - - assert(header_len + 1 < sizeof(hunk->header)); - memcpy(&hunk->header, header, header_len); - hunk->header[header_len] = '\0'; - hunk->header_len = header_len; - - hunk->line_start = patch->lines_size; - hunk->line_count = 0; - - patch->oldno = range->old_start; - patch->newno = range->new_start; - - return 0; -} - -static int diff_patch_line_cb( - const git_diff_delta *delta, - const git_diff_range *range, - char line_origin, - const char *content, - size_t content_len, - void *payload) -{ - git_diff_patch *patch = payload; - diff_patch_hunk *hunk; - diff_patch_line *line; - - GIT_UNUSED(delta); - GIT_UNUSED(range); - - assert(patch->hunks_size > 0); - assert(patch->hunks != NULL); - - hunk = &patch->hunks[patch->hunks_size - 1]; - - if (patch->lines_size >= patch->lines_asize) { - size_t new_size; - diff_patch_line *new_lines; - - if (patch->lines_asize > MAX_LINE_STEP) - new_size = patch->lines_asize + MAX_LINE_STEP; - else - new_size = patch->lines_asize * 2; - if (new_size < MIN_LINE_STEP) - new_size = MIN_LINE_STEP; - - new_lines = git__realloc( - patch->lines, new_size * sizeof(diff_patch_line)); - if (!new_lines) - return -1; - - patch->lines = new_lines; - patch->lines_asize = new_size; - } - - line = &patch->lines[patch->lines_size++]; - - line->ptr = content; - line->len = content_len; - line->origin = line_origin; - - /* do some bookkeeping so we can provide old/new line numbers */ - - for (line->lines = 0; content_len > 0; --content_len) { - if (*content++ == '\n') - ++line->lines; - } - - switch (line_origin) { - case GIT_DIFF_LINE_ADDITION: - case GIT_DIFF_LINE_DEL_EOFNL: - line->oldno = -1; - line->newno = patch->newno; - patch->newno += line->lines; - break; - case GIT_DIFF_LINE_DELETION: - case GIT_DIFF_LINE_ADD_EOFNL: - line->oldno = patch->oldno; - line->newno = -1; - patch->oldno += line->lines; - break; - default: - line->oldno = patch->oldno; - line->newno = patch->newno; - patch->oldno += line->lines; - patch->newno += line->lines; - break; - } - - hunk->line_count++; - - return 0; -} - -static int diff_required(git_diff_list *diff, const char *action) -{ - if (!diff) { - giterr_set(GITERR_INVALID, "Must provide valid diff to %s", action); - return -1; - } - - return 0; -} - -int git_diff_foreach( - git_diff_list *diff, - git_diff_file_cb file_cb, - git_diff_hunk_cb hunk_cb, - git_diff_data_cb data_cb, - void *payload) -{ - int error = 0; - diff_context ctxt; - size_t idx; - git_diff_patch patch; - - if (diff_required(diff, "git_diff_foreach") < 0) - return -1; - - if (diff_context_init( - &ctxt, diff, NULL, NULL, file_cb, hunk_cb, data_cb, payload) < 0) - return -1; - - diff_patch_init(&ctxt, &patch); - - git_vector_foreach(&diff->deltas, idx, patch.delta) { - - /* check flags against patch status */ - if (git_diff_delta__should_skip(ctxt.opts, patch.delta)) - continue; - - if (!(error = diff_patch_load(&ctxt, &patch))) { - - /* invoke file callback */ - error = diff_delta_file_callback(&ctxt, patch.delta, idx); - - /* generate diffs and invoke hunk and line callbacks */ - if (!error) - error = diff_patch_generate(&ctxt, &patch); - - diff_patch_unload(&patch); - } - - if (error < 0) - break; - } - - if (error == GIT_EUSER) - giterr_clear(); /* don't let error message leak */ - - return error; -} - -static void set_data_from_blob( - const git_blob *blob, git_map *map, git_diff_file *file) -{ - if (blob) { - file->size = git_blob_rawsize(blob); - git_oid_cpy(&file->oid, git_object_id((const git_object *)blob)); - file->mode = 0644; - - map->len = (size_t)file->size; - map->data = (char *)git_blob_rawcontent(blob); - } else { - file->size = 0; - file->flags |= GIT_DIFF_FLAG__NO_DATA; - - map->len = 0; - map->data = ""; - } -} - -static void set_data_from_buffer( - const char *buffer, size_t buffer_len, git_map *map, git_diff_file *file) -{ - file->size = (git_off_t)buffer_len; - file->mode = 0644; - map->len = buffer_len; - - if (!buffer) { - file->flags |= GIT_DIFF_FLAG__NO_DATA; - map->data = NULL; - } else { - map->data = (char *)buffer; - git_odb_hash(&file->oid, buffer, buffer_len, GIT_OBJ_BLOB); - } -} - -typedef struct { - diff_context ctxt; - git_diff_delta delta; - git_diff_patch patch; -} diff_single_data; - -static int diff_single_init( - diff_single_data *data, - git_repository *repo, - const git_diff_options *opts, - git_diff_file_cb file_cb, - git_diff_hunk_cb hunk_cb, - git_diff_data_cb data_cb, - void *payload) -{ - GITERR_CHECK_VERSION(opts, GIT_DIFF_OPTIONS_VERSION, "git_diff_options"); - - memset(data, 0, sizeof(*data)); - - if (diff_context_init( - &data->ctxt, NULL, repo, opts, - file_cb, hunk_cb, data_cb, payload) < 0) - return -1; - - diff_patch_init(&data->ctxt, &data->patch); - - return 0; -} - -static int diff_single_apply(diff_single_data *data) -{ - int error; - git_diff_delta *delta = &data->delta; - bool has_old = ((delta->old_file.flags & GIT_DIFF_FLAG__NO_DATA) == 0); - bool has_new = ((delta->new_file.flags & GIT_DIFF_FLAG__NO_DATA) == 0); - - /* finish setting up fake git_diff_delta record and loaded data */ - - data->patch.delta = delta; - delta->flags = delta->flags & ~KNOWN_BINARY_FLAGS; - - delta->status = has_new ? - (has_old ? GIT_DELTA_MODIFIED : GIT_DELTA_ADDED) : - (has_old ? GIT_DELTA_DELETED : GIT_DELTA_UNTRACKED); - - if (git_oid__cmp(&delta->new_file.oid, &delta->old_file.oid) == 0) - delta->status = GIT_DELTA_UNMODIFIED; - - if ((error = diff_delta_is_binary_by_content( - &data->ctxt, delta, &delta->old_file, &data->patch.old_data)) < 0 || - (error = diff_delta_is_binary_by_content( - &data->ctxt, delta, &delta->new_file, &data->patch.new_data)) < 0) - goto cleanup; - - data->patch.flags |= GIT_DIFF_PATCH_LOADED; - - if ((delta->flags & GIT_DIFF_FLAG_BINARY) == 0 && - delta->status != GIT_DELTA_UNMODIFIED) - data->patch.flags |= GIT_DIFF_PATCH_DIFFABLE; - - /* do diffs */ - - if (!(error = diff_delta_file_callback(&data->ctxt, delta, 1))) - error = diff_patch_generate(&data->ctxt, &data->patch); - -cleanup: - if (error == GIT_EUSER) - giterr_clear(); - - diff_patch_unload(&data->patch); - - return error; -} - -int git_diff_blobs( - const git_blob *old_blob, - const git_blob *new_blob, - const git_diff_options *options, - git_diff_file_cb file_cb, - git_diff_hunk_cb hunk_cb, - git_diff_data_cb data_cb, - void *payload) -{ - int error; - diff_single_data d; - git_repository *repo = - new_blob ? git_object_owner((const git_object *)new_blob) : - old_blob ? git_object_owner((const git_object *)old_blob) : NULL; - - if (!repo) /* Hmm, given two NULL blobs, silently do no callbacks? */ - return 0; - - if ((error = diff_single_init( - &d, repo, options, file_cb, hunk_cb, data_cb, payload)) < 0) - return error; - - if (options && (options->flags & GIT_DIFF_REVERSE) != 0) { - const git_blob *swap = old_blob; - old_blob = new_blob; - new_blob = swap; - } - - set_data_from_blob(old_blob, &d.patch.old_data, &d.delta.old_file); - set_data_from_blob(new_blob, &d.patch.new_data, &d.delta.new_file); - - return diff_single_apply(&d); -} - -int git_diff_blob_to_buffer( - const git_blob *old_blob, - const char *buf, - size_t buflen, - const git_diff_options *options, - git_diff_file_cb file_cb, - git_diff_hunk_cb hunk_cb, - git_diff_data_cb data_cb, - void *payload) -{ - int error; - diff_single_data d; - git_repository *repo = - old_blob ? git_object_owner((const git_object *)old_blob) : NULL; - - if (!repo && !buf) /* Hmm, given NULLs, silently do no callbacks? */ - return 0; - - if ((error = diff_single_init( - &d, repo, options, file_cb, hunk_cb, data_cb, payload)) < 0) - return error; - - if (options && (options->flags & GIT_DIFF_REVERSE) != 0) { - set_data_from_buffer(buf, buflen, &d.patch.old_data, &d.delta.old_file); - set_data_from_blob(old_blob, &d.patch.new_data, &d.delta.new_file); - } else { - set_data_from_blob(old_blob, &d.patch.old_data, &d.delta.old_file); - set_data_from_buffer(buf, buflen, &d.patch.new_data, &d.delta.new_file); - } - - return diff_single_apply(&d); -} - -size_t git_diff_num_deltas(git_diff_list *diff) -{ - assert(diff); - return (size_t)diff->deltas.length; -} - -size_t git_diff_num_deltas_of_type(git_diff_list *diff, git_delta_t type) -{ - size_t i, count = 0; - git_diff_delta *delta; - - assert(diff); - - git_vector_foreach(&diff->deltas, i, delta) { - count += (delta->status == type); - } - - return count; -} - -int git_diff_get_patch( - git_diff_patch **patch_ptr, - const git_diff_delta **delta_ptr, - git_diff_list *diff, - size_t idx) -{ - int error; - diff_context ctxt; - git_diff_delta *delta; - git_diff_patch *patch; - - if (patch_ptr) - *patch_ptr = NULL; - if (delta_ptr) - *delta_ptr = NULL; - - if (diff_required(diff, "git_diff_get_patch") < 0) - return -1; - - if (diff_context_init( - &ctxt, diff, NULL, NULL, - NULL, diff_patch_hunk_cb, diff_patch_line_cb, NULL) < 0) - return -1; - - delta = git_vector_get(&diff->deltas, idx); - if (!delta) { - giterr_set(GITERR_INVALID, "Index out of range for delta in diff"); - return GIT_ENOTFOUND; - } - - if (delta_ptr) - *delta_ptr = delta; - - if (!patch_ptr && - ((delta->flags & KNOWN_BINARY_FLAGS) != 0 || - (diff->opts.flags & GIT_DIFF_SKIP_BINARY_CHECK) != 0)) - return 0; - - if (git_diff_delta__should_skip(ctxt.opts, delta)) - return 0; - - /* Don't load the patch if the user doesn't want it */ - if (!patch_ptr) - return 0; - - patch = diff_patch_alloc(&ctxt, delta); - if (!patch) - return -1; - - if (!(error = diff_patch_load(&ctxt, patch))) { - ctxt.payload = patch; - - error = diff_patch_generate(&ctxt, patch); - - if (error == GIT_EUSER) - error = ctxt.error; - } - - if (error) - git_diff_patch_free(patch); - else if (patch_ptr) - *patch_ptr = patch; - - return error; -} - -void git_diff_patch_free(git_diff_patch *patch) -{ - if (patch) - GIT_REFCOUNT_DEC(patch, diff_patch_free); -} - -const git_diff_delta *git_diff_patch_delta(git_diff_patch *patch) -{ - assert(patch); - return patch->delta; -} - -size_t git_diff_patch_num_hunks(git_diff_patch *patch) -{ - assert(patch); - return patch->hunks_size; -} - -int git_diff_patch_line_stats( - size_t *total_ctxt, - size_t *total_adds, - size_t *total_dels, - const git_diff_patch *patch) -{ - size_t totals[3], idx; - - memset(totals, 0, sizeof(totals)); - - for (idx = 0; idx < patch->lines_size; ++idx) { - switch (patch->lines[idx].origin) { - case GIT_DIFF_LINE_CONTEXT: totals[0]++; break; - case GIT_DIFF_LINE_ADDITION: totals[1]++; break; - case GIT_DIFF_LINE_DELETION: totals[2]++; break; - default: - /* diff --stat and --numstat don't count EOFNL marks because - * they will always be paired with a ADDITION or DELETION line. - */ - break; - } - } - - if (total_ctxt) - *total_ctxt = totals[0]; - if (total_adds) - *total_adds = totals[1]; - if (total_dels) - *total_dels = totals[2]; - - return 0; -} - -static int diff_error_outofrange(const char *thing) -{ - giterr_set(GITERR_INVALID, "Diff patch %s index out of range", thing); - return GIT_ENOTFOUND; -} - -int git_diff_patch_get_hunk( - const git_diff_range **range, - const char **header, - size_t *header_len, - size_t *lines_in_hunk, - git_diff_patch *patch, - size_t hunk_idx) -{ - diff_patch_hunk *hunk; - - assert(patch); - - if (hunk_idx >= patch->hunks_size) { - if (range) *range = NULL; - if (header) *header = NULL; - if (header_len) *header_len = 0; - if (lines_in_hunk) *lines_in_hunk = 0; - - return diff_error_outofrange("hunk"); - } - - hunk = &patch->hunks[hunk_idx]; - - if (range) *range = &hunk->range; - if (header) *header = hunk->header; - if (header_len) *header_len = hunk->header_len; - if (lines_in_hunk) *lines_in_hunk = hunk->line_count; - - return 0; -} - -int git_diff_patch_num_lines_in_hunk( - git_diff_patch *patch, - size_t hunk_idx) -{ - assert(patch); - - if (hunk_idx >= patch->hunks_size) - return diff_error_outofrange("hunk"); - else - return (int)patch->hunks[hunk_idx].line_count; -} - -int git_diff_patch_get_line_in_hunk( - char *line_origin, - const char **content, - size_t *content_len, - int *old_lineno, - int *new_lineno, - git_diff_patch *patch, - size_t hunk_idx, - size_t line_of_hunk) -{ - diff_patch_hunk *hunk; - diff_patch_line *line; - const char *thing; - - assert(patch); - - if (hunk_idx >= patch->hunks_size) { - thing = "hunk"; - goto notfound; - } - hunk = &patch->hunks[hunk_idx]; - - if (line_of_hunk >= hunk->line_count) { - thing = "link"; - goto notfound; - } - - line = &patch->lines[hunk->line_start + line_of_hunk]; - - if (line_origin) *line_origin = line->origin; - if (content) *content = line->ptr; - if (content_len) *content_len = line->len; - if (old_lineno) *old_lineno = (int)line->oldno; - if (new_lineno) *new_lineno = (int)line->newno; - - return 0; - -notfound: - if (line_origin) *line_origin = GIT_DIFF_LINE_CONTEXT; - if (content) *content = NULL; - if (content_len) *content_len = 0; - if (old_lineno) *old_lineno = -1; - if (new_lineno) *new_lineno = -1; - - return diff_error_outofrange(thing); -} - -int git_diff__paired_foreach( - git_diff_list *idx2head, - git_diff_list *wd2idx, - int (*cb)(git_diff_delta *i2h, git_diff_delta *w2i, void *payload), - void *payload) -{ - int cmp; - git_diff_delta *i2h, *w2i; - size_t i, j, i_max, j_max; - int (*strcomp)(const char *, const char *); - - i_max = idx2head ? idx2head->deltas.length : 0; - j_max = wd2idx ? wd2idx->deltas.length : 0; - - /* Get appropriate strcmp function */ - strcomp = idx2head ? idx2head->strcomp : wd2idx ? wd2idx->strcomp : NULL; - - /* Assert both iterators use matching ignore-case. If this function ever - * supports merging diffs that are not sorted by the same function, then - * it will need to spool and sort on one of the results before merging - */ - if (idx2head && wd2idx) { - assert(idx2head->strcomp == wd2idx->strcomp); - } - - for (i = 0, j = 0; i < i_max || j < j_max; ) { - i2h = idx2head ? GIT_VECTOR_GET(&idx2head->deltas,i) : NULL; - w2i = wd2idx ? GIT_VECTOR_GET(&wd2idx->deltas,j) : NULL; - - cmp = !w2i ? -1 : !i2h ? 1 : - strcomp(i2h->old_file.path, w2i->old_file.path); - - if (cmp < 0) { - if (cb(i2h, NULL, payload)) - return GIT_EUSER; - i++; - } else if (cmp > 0) { - if (cb(NULL, w2i, payload)) - return GIT_EUSER; - j++; - } else { - if (cb(i2h, w2i, payload)) - return GIT_EUSER; - i++; j++; - } - } - - return 0; -} diff --git a/src/diff_output.h b/src/diff_output.h deleted file mode 100644 index 083355676..000000000 --- a/src/diff_output.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (C) the libgit2 contributors. All rights reserved. - * - * This file is part of libgit2, distributed under the GNU GPL v2 with - * a Linking Exception. For full terms see the included COPYING file. - */ -#ifndef INCLUDE_diff_output_h__ -#define INCLUDE_diff_output_h__ - -#include "git2/blob.h" -#include "diff.h" -#include "map.h" -#include "xdiff/xdiff.h" - -#define MAX_DIFF_FILESIZE 0x20000000 - -enum { - GIT_DIFF_PATCH_ALLOCATED = (1 << 0), - GIT_DIFF_PATCH_PREPPED = (1 << 1), - GIT_DIFF_PATCH_LOADED = (1 << 2), - GIT_DIFF_PATCH_DIFFABLE = (1 << 3), - GIT_DIFF_PATCH_DIFFED = (1 << 4), -}; - -/* context for performing diffs */ -typedef struct { - git_repository *repo; - git_diff_list *diff; - const git_diff_options *opts; - git_diff_file_cb file_cb; - git_diff_hunk_cb hunk_cb; - git_diff_data_cb data_cb; - void *payload; - int error; - git_diff_range range; - xdemitconf_t xdiff_config; - xpparam_t xdiff_params; -} diff_context; - -/* cached information about a single span in a diff */ -typedef struct diff_patch_line diff_patch_line; -struct diff_patch_line { - const char *ptr; - size_t len; - size_t lines, oldno, newno; - char origin; -}; - -/* cached information about a hunk in a diff */ -typedef struct diff_patch_hunk diff_patch_hunk; -struct diff_patch_hunk { - git_diff_range range; - char header[128]; - size_t header_len; - size_t line_start; - size_t line_count; -}; - -struct git_diff_patch { - git_refcount rc; - git_diff_list *diff; /* for refcount purposes, maybe NULL for blob diffs */ - git_diff_delta *delta; - diff_context *ctxt; /* only valid while generating patch */ - git_iterator_type_t old_src; - git_iterator_type_t new_src; - git_blob *old_blob; - git_blob *new_blob; - git_map old_data; - git_map new_data; - uint32_t flags; - diff_patch_hunk *hunks; - size_t hunks_asize, hunks_size; - diff_patch_line *lines; - size_t lines_asize, lines_size; - size_t oldno, newno; -}; - -/* context for performing diff on a single delta */ -typedef struct { - git_diff_patch *patch; - uint32_t prepped : 1; - uint32_t loaded : 1; - uint32_t diffable : 1; - uint32_t diffed : 1; -} diff_delta_context; - -extern int git_diff__paired_foreach( - git_diff_list *idx2head, - git_diff_list *wd2idx, - int (*cb)(git_diff_delta *i2h, git_diff_delta *w2i, void *payload), - void *payload); - -#endif diff --git a/src/diff_patch.c b/src/diff_patch.c new file mode 100644 index 000000000..d7eb69db6 --- /dev/null +++ b/src/diff_patch.c @@ -0,0 +1,723 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#include "common.h" +#include "diff.h" +#include "diff_file.h" +#include "diff_driver.h" +#include "diff_patch.h" +#include "diff_xdiff.h" + +static void diff_output_init(git_diff_output*, const git_diff_options*, + git_diff_file_cb, git_diff_hunk_cb, git_diff_data_cb, void*); + +static void diff_output_to_patch(git_diff_output *, git_diff_patch *); + +static void diff_patch_update_binary(git_diff_patch *patch) +{ + if ((patch->delta->flags & DIFF_FLAGS_KNOWN_BINARY) != 0) + return; + + if ((patch->ofile.file.flags & GIT_DIFF_FLAG_BINARY) != 0 || + (patch->nfile.file.flags & GIT_DIFF_FLAG_BINARY) != 0) + patch->delta->flags |= GIT_DIFF_FLAG_BINARY; + + else if ((patch->ofile.file.flags & DIFF_FLAGS_NOT_BINARY) != 0 && + (patch->nfile.file.flags & DIFF_FLAGS_NOT_BINARY) != 0) + patch->delta->flags |= GIT_DIFF_FLAG_NOT_BINARY; +} + +static void diff_patch_init_common(git_diff_patch *patch) +{ + diff_patch_update_binary(patch); + + if ((patch->delta->flags & GIT_DIFF_FLAG_BINARY) != 0) + patch->flags |= GIT_DIFF_PATCH_LOADED; /* set LOADED but not DIFFABLE */ + + patch->flags |= GIT_DIFF_PATCH_INITIALIZED; + + if (patch->diff) + git_diff_list_addref(patch->diff); +} + +static int diff_patch_init_from_diff( + git_diff_patch *patch, git_diff_list *diff, size_t delta_index) +{ + int error = 0; + + memset(patch, 0, sizeof(*patch)); + patch->diff = diff; + patch->delta = git_vector_get(&diff->deltas, delta_index); + patch->delta_index = delta_index; + + if ((error = diff_file_content_init_from_diff( + &patch->ofile, diff, delta_index, true)) < 0 || + (error = diff_file_content_init_from_diff( + &patch->nfile, diff, delta_index, false)) < 0) + return error; + + diff_patch_init_common(patch); + + return 0; +} + +static int diff_patch_alloc_from_diff( + git_diff_patch **out, + git_diff_list *diff, + size_t delta_index) +{ + int error; + git_diff_patch *patch = git__calloc(1, sizeof(git_diff_patch)); + GITERR_CHECK_ALLOC(patch); + + if (!(error = diff_patch_init_from_diff(patch, diff, delta_index))) { + patch->flags |= GIT_DIFF_PATCH_ALLOCATED; + GIT_REFCOUNT_INC(patch); + } else { + git__free(patch); + patch = NULL; + } + + *out = patch; + return error; +} + +static int diff_patch_load(git_diff_patch *patch, git_diff_output *output) +{ + int error = 0; + bool incomplete_data; + + if ((patch->flags & GIT_DIFF_PATCH_LOADED) != 0) + return 0; + + /* if no hunk and data callbacks and user doesn't care if data looks + * binary, then there is no need to actually load the data + */ + if (patch->ofile.opts && + (patch->ofile.opts->flags & GIT_DIFF_SKIP_BINARY_CHECK) != 0 && + output && !output->hunk_cb && !output->data_cb) + return 0; + +#define DIFF_FLAGS_KNOWN_DATA (GIT_DIFF_FLAG__NO_DATA|GIT_DIFF_FLAG_VALID_OID) + + incomplete_data = + ((patch->ofile.file.flags & DIFF_FLAGS_KNOWN_DATA) != 0 && + (patch->nfile.file.flags & DIFF_FLAGS_KNOWN_DATA) != 0); + + /* always try to load workdir content first because filtering may + * need 2x data size and this minimizes peak memory footprint + */ + if (patch->ofile.src == GIT_ITERATOR_TYPE_WORKDIR) { + if ((error = diff_file_content_load(&patch->ofile)) < 0 || + (patch->ofile.file.flags & GIT_DIFF_FLAG_BINARY) != 0) + goto cleanup; + } + if (patch->nfile.src == GIT_ITERATOR_TYPE_WORKDIR) { + if ((error = diff_file_content_load(&patch->nfile)) < 0 || + (patch->nfile.file.flags & GIT_DIFF_FLAG_BINARY) != 0) + goto cleanup; + } + + /* once workdir has been tried, load other data as needed */ + if (patch->ofile.src != GIT_ITERATOR_TYPE_WORKDIR) { + if ((error = diff_file_content_load(&patch->ofile)) < 0 || + (patch->ofile.file.flags & GIT_DIFF_FLAG_BINARY) != 0) + goto cleanup; + } + if (patch->nfile.src != GIT_ITERATOR_TYPE_WORKDIR) { + if ((error = diff_file_content_load(&patch->nfile)) < 0 || + (patch->nfile.file.flags & GIT_DIFF_FLAG_BINARY) != 0) + goto cleanup; + } + + /* if we were previously missing an oid, reassess UNMODIFIED state */ + if (incomplete_data && + patch->ofile.file.mode == patch->nfile.file.mode && + git_oid_equal(&patch->ofile.file.oid, &patch->nfile.file.oid)) + patch->delta->status = GIT_DELTA_UNMODIFIED; + +cleanup: + diff_patch_update_binary(patch); + + if (!error) { + /* patch is diffable only for non-binary, modified files where + * at least one side has data and the data actually changed + */ + if ((patch->delta->flags & GIT_DIFF_FLAG_BINARY) == 0 && + patch->delta->status != GIT_DELTA_UNMODIFIED && + (patch->ofile.map.len || patch->nfile.map.len) && + (patch->ofile.map.len != patch->nfile.map.len || + !git_oid_equal(&patch->ofile.file.oid, &patch->nfile.file.oid))) + patch->flags |= GIT_DIFF_PATCH_DIFFABLE; + + patch->flags |= GIT_DIFF_PATCH_LOADED; + } + + return error; +} + +static int diff_patch_file_callback( + git_diff_patch *patch, git_diff_output *output) +{ + float progress; + + if (!output->file_cb) + return 0; + + progress = patch->diff ? + ((float)patch->delta_index / patch->diff->deltas.length) : 1.0f; + + if (output->file_cb(patch->delta, progress, output->payload) != 0) + output->error = GIT_EUSER; + + return output->error; +} + +static int diff_patch_generate(git_diff_patch *patch, git_diff_output *output) +{ + int error = 0; + + if ((patch->flags & GIT_DIFF_PATCH_DIFFED) != 0) + return 0; + + if ((patch->flags & GIT_DIFF_PATCH_LOADED) == 0 && + (error = diff_patch_load(patch, output)) < 0) + return error; + + if ((patch->flags & GIT_DIFF_PATCH_DIFFABLE) == 0) + return 0; + + if (output->diff_cb != NULL && + !(error = output->diff_cb(output, patch))) + patch->flags |= GIT_DIFF_PATCH_DIFFED; + + return error; +} + +static void diff_patch_free(git_diff_patch *patch) +{ + diff_file_content_clear(&patch->ofile); + diff_file_content_clear(&patch->nfile); + + git_array_clear(patch->lines); + git_array_clear(patch->hunks); + + git_diff_list_free(patch->diff); /* decrements refcount */ + patch->diff = NULL; + + git_pool_clear(&patch->flattened); + + if (patch->flags & GIT_DIFF_PATCH_ALLOCATED) + git__free(patch); +} + +static int diff_required(git_diff_list *diff, const char *action) +{ + if (diff) + return 0; + giterr_set(GITERR_INVALID, "Must provide valid diff to %s", action); + return -1; +} + +int git_diff_foreach( + git_diff_list *diff, + git_diff_file_cb file_cb, + git_diff_hunk_cb hunk_cb, + git_diff_data_cb data_cb, + void *payload) +{ + int error = 0; + git_xdiff_output xo; + size_t idx; + git_diff_patch patch; + + if (diff_required(diff, "git_diff_foreach") < 0) + return -1; + + diff_output_init((git_diff_output *)&xo, + &diff->opts, file_cb, hunk_cb, data_cb, payload); + git_xdiff_init(&xo, &diff->opts); + + git_vector_foreach(&diff->deltas, idx, patch.delta) { + /* check flags against patch status */ + if (git_diff_delta__should_skip(&diff->opts, patch.delta)) + continue; + + if (!(error = diff_patch_init_from_diff(&patch, diff, idx))) { + + error = diff_patch_file_callback(&patch, (git_diff_output *)&xo); + + if (!error) + error = diff_patch_generate(&patch, (git_diff_output *)&xo); + + git_diff_patch_free(&patch); + } + + if (error < 0) + break; + } + + if (error == GIT_EUSER) + giterr_clear(); /* don't leave error message set invalidly */ + return error; +} + +typedef struct { + git_xdiff_output xo; + git_diff_patch patch; + git_diff_delta delta; +} diff_single_info; + +static int diff_single_generate(diff_single_info *info) +{ + int error = 0; + git_diff_patch *patch = &info->patch; + bool has_old = ((patch->ofile.file.flags & GIT_DIFF_FLAG__NO_DATA) == 0); + bool has_new = ((patch->nfile.file.flags & GIT_DIFF_FLAG__NO_DATA) == 0); + + info->delta.status = has_new ? + (has_old ? GIT_DELTA_MODIFIED : GIT_DELTA_ADDED) : + (has_old ? GIT_DELTA_DELETED : GIT_DELTA_UNTRACKED); + + if (git_oid_equal(&patch->nfile.file.oid, &patch->ofile.file.oid)) + info->delta.status = GIT_DELTA_UNMODIFIED; + + patch->delta = &info->delta; + + diff_patch_init_common(patch); + + error = diff_patch_file_callback(patch, (git_diff_output *)&info->xo); + + if (!error) + error = diff_patch_generate(patch, (git_diff_output *)&info->xo); + + if (error == GIT_EUSER) + giterr_clear(); /* don't leave error message set invalidly */ + + return error; +} + +int git_diff_blobs( + const git_blob *old_blob, + const git_blob *new_blob, + const git_diff_options *opts, + git_diff_file_cb file_cb, + git_diff_hunk_cb hunk_cb, + git_diff_data_cb data_cb, + void *payload) +{ + int error = 0; + diff_single_info info; + git_repository *repo = + new_blob ? git_object_owner((const git_object *)new_blob) : + old_blob ? git_object_owner((const git_object *)old_blob) : NULL; + + GITERR_CHECK_VERSION(opts, GIT_DIFF_OPTIONS_VERSION, "git_diff_options"); + + if (!repo) /* Hmm, given two NULL blobs, silently do no callbacks? */ + return 0; + + if (opts && (opts->flags & GIT_DIFF_REVERSE) != 0) { + const git_blob *swap = old_blob; + old_blob = new_blob; + new_blob = swap; + } + + memset(&info, 0, sizeof(info)); + + diff_output_init((git_diff_output *)&info.xo, + opts, file_cb, hunk_cb, data_cb, payload); + git_xdiff_init(&info.xo, opts); + + if (!(error = diff_file_content_init_from_blob( + &info.patch.ofile, repo, opts, old_blob)) && + !(error = diff_file_content_init_from_blob( + &info.patch.nfile, repo, opts, new_blob))) + error = diff_single_generate(&info); + + git_diff_patch_free(&info.patch); + + return error; +} + +int git_diff_blob_to_buffer( + const git_blob *old_blob, + const char *buf, + size_t buflen, + const git_diff_options *opts, + git_diff_file_cb file_cb, + git_diff_hunk_cb hunk_cb, + git_diff_data_cb data_cb, + void *payload) +{ + int error = 0; + diff_single_info info; + git_repository *repo = + old_blob ? git_object_owner((const git_object *)old_blob) : NULL; + + GITERR_CHECK_VERSION(opts, GIT_DIFF_OPTIONS_VERSION, "git_diff_options"); + + if (!repo && !buf) /* Hmm, given NULLs, silently do no callbacks? */ + return 0; + + memset(&info, 0, sizeof(info)); + + diff_output_init((git_diff_output *)&info.xo, + opts, file_cb, hunk_cb, data_cb, payload); + git_xdiff_init(&info.xo, opts); + + if (opts && (opts->flags & GIT_DIFF_REVERSE) != 0) { + if (!(error = diff_file_content_init_from_raw( + &info.patch.ofile, repo, opts, buf, buflen))) + error = diff_file_content_init_from_blob( + &info.patch.nfile, repo, opts, old_blob); + } else { + if (!(error = diff_file_content_init_from_blob( + &info.patch.ofile, repo, opts, old_blob))) + error = diff_file_content_init_from_raw( + &info.patch.nfile, repo, opts, buf, buflen); + } + + error = diff_single_generate(&info); + + git_diff_patch_free(&info.patch); + + return error; +} + +int git_diff_get_patch( + git_diff_patch **patch_ptr, + const git_diff_delta **delta_ptr, + git_diff_list *diff, + size_t idx) +{ + int error = 0; + git_xdiff_output xo; + git_diff_delta *delta = NULL; + git_diff_patch *patch = NULL; + + if (patch_ptr) *patch_ptr = NULL; + if (delta_ptr) *delta_ptr = NULL; + + if (diff_required(diff, "git_diff_get_patch") < 0) + return -1; + + delta = git_vector_get(&diff->deltas, idx); + if (!delta) { + giterr_set(GITERR_INVALID, "Index out of range for delta in diff"); + return GIT_ENOTFOUND; + } + + if (delta_ptr) + *delta_ptr = delta; + + if (git_diff_delta__should_skip(&diff->opts, delta)) + return 0; + + /* don't load the patch data unless we need it for binary check */ + if (!patch_ptr && + ((delta->flags & DIFF_FLAGS_KNOWN_BINARY) != 0 || + (diff->opts.flags & GIT_DIFF_SKIP_BINARY_CHECK) != 0)) + return 0; + + if ((error = diff_patch_alloc_from_diff(&patch, diff, idx)) < 0) + return error; + + diff_output_to_patch((git_diff_output *)&xo, patch); + git_xdiff_init(&xo, &diff->opts); + + error = diff_patch_file_callback(patch, (git_diff_output *)&xo); + + if (!error) + error = diff_patch_generate(patch, (git_diff_output *)&xo); + + if (!error) { + /* if cumulative diff size is < 0.5 total size, flatten the patch */ + /* unload the file content */ + } + + if (error || !patch_ptr) + git_diff_patch_free(patch); + else + *patch_ptr = patch; + + if (error == GIT_EUSER) + giterr_clear(); /* don't leave error message set invalidly */ + return error; +} + +void git_diff_patch_free(git_diff_patch *patch) +{ + if (patch) + GIT_REFCOUNT_DEC(patch, diff_patch_free); +} + +const git_diff_delta *git_diff_patch_delta(git_diff_patch *patch) +{ + assert(patch); + return patch->delta; +} + +size_t git_diff_patch_num_hunks(git_diff_patch *patch) +{ + assert(patch); + return git_array_size(patch->hunks); +} + +int git_diff_patch_line_stats( + size_t *total_ctxt, + size_t *total_adds, + size_t *total_dels, + const git_diff_patch *patch) +{ + size_t totals[3], idx; + + memset(totals, 0, sizeof(totals)); + + for (idx = 0; idx < git_array_size(patch->lines); ++idx) { + diff_patch_line *line = git_array_get(patch->lines, idx); + if (!line) + continue; + + switch (line->origin) { + case GIT_DIFF_LINE_CONTEXT: totals[0]++; break; + case GIT_DIFF_LINE_ADDITION: totals[1]++; break; + case GIT_DIFF_LINE_DELETION: totals[2]++; break; + default: + /* diff --stat and --numstat don't count EOFNL marks because + * they will always be paired with a ADDITION or DELETION line. + */ + break; + } + } + + if (total_ctxt) + *total_ctxt = totals[0]; + if (total_adds) + *total_adds = totals[1]; + if (total_dels) + *total_dels = totals[2]; + + return 0; +} + +static int diff_error_outofrange(const char *thing) +{ + giterr_set(GITERR_INVALID, "Diff patch %s index out of range", thing); + return GIT_ENOTFOUND; +} + +int git_diff_patch_get_hunk( + const git_diff_range **range, + const char **header, + size_t *header_len, + size_t *lines_in_hunk, + git_diff_patch *patch, + size_t hunk_idx) +{ + diff_patch_hunk *hunk; + assert(patch); + + hunk = git_array_get(patch->hunks, hunk_idx); + + if (!hunk) { + if (range) *range = NULL; + if (header) *header = NULL; + if (header_len) *header_len = 0; + if (lines_in_hunk) *lines_in_hunk = 0; + return diff_error_outofrange("hunk"); + } + + if (range) *range = &hunk->range; + if (header) *header = hunk->header; + if (header_len) *header_len = hunk->header_len; + if (lines_in_hunk) *lines_in_hunk = hunk->line_count; + return 0; +} + +int git_diff_patch_num_lines_in_hunk(git_diff_patch *patch, size_t hunk_idx) +{ + diff_patch_hunk *hunk; + assert(patch); + + if (!(hunk = git_array_get(patch->hunks, hunk_idx))) + return diff_error_outofrange("hunk"); + return (int)hunk->line_count; +} + +int git_diff_patch_get_line_in_hunk( + char *line_origin, + const char **content, + size_t *content_len, + int *old_lineno, + int *new_lineno, + git_diff_patch *patch, + size_t hunk_idx, + size_t line_of_hunk) +{ + diff_patch_hunk *hunk; + diff_patch_line *line; + const char *thing; + + assert(patch); + + if (!(hunk = git_array_get(patch->hunks, hunk_idx))) { + thing = "hunk"; + goto notfound; + } + + if (line_of_hunk >= hunk->line_count || + !(line = git_array_get( + patch->lines, hunk->line_start + line_of_hunk))) { + thing = "line"; + goto notfound; + } + + if (line_origin) *line_origin = line->origin; + if (content) *content = line->ptr; + if (content_len) *content_len = line->len; + if (old_lineno) *old_lineno = (int)line->oldno; + if (new_lineno) *new_lineno = (int)line->newno; + + return 0; + +notfound: + if (line_origin) *line_origin = GIT_DIFF_LINE_CONTEXT; + if (content) *content = NULL; + if (content_len) *content_len = 0; + if (old_lineno) *old_lineno = -1; + if (new_lineno) *new_lineno = -1; + + return diff_error_outofrange(thing); +} + + +static int diff_patch_file_cb( + const git_diff_delta *delta, + float progress, + void *payload) +{ + GIT_UNUSED(delta); + GIT_UNUSED(progress); + GIT_UNUSED(payload); + return 0; +} + +static int diff_patch_hunk_cb( + const git_diff_delta *delta, + const git_diff_range *range, + const char *header, + size_t header_len, + void *payload) +{ + git_diff_patch *patch = payload; + diff_patch_hunk *hunk; + + GIT_UNUSED(delta); + + git_array_alloc(patch->hunks, hunk); + GITERR_CHECK_ALLOC(hunk); + + memcpy(&hunk->range, range, sizeof(hunk->range)); + + assert(header_len + 1 < sizeof(hunk->header)); + memcpy(&hunk->header, header, header_len); + hunk->header[header_len] = '\0'; + hunk->header_len = header_len; + + hunk->line_start = git_array_size(patch->lines); + hunk->line_count = 0; + + patch->oldno = range->old_start; + patch->newno = range->new_start; + + return 0; +} + +static int diff_patch_line_cb( + const git_diff_delta *delta, + const git_diff_range *range, + char line_origin, + const char *content, + size_t content_len, + void *payload) +{ + git_diff_patch *patch = payload; + diff_patch_hunk *hunk; + diff_patch_line *line; + + GIT_UNUSED(delta); + GIT_UNUSED(range); + + hunk = git_array_last(patch->hunks); + GITERR_CHECK_ALLOC(hunk); + + git_array_alloc(patch->lines, line); + GITERR_CHECK_ALLOC(line); + + line->ptr = content; + line->len = content_len; + line->origin = line_origin; + + patch->content_size += content_len; + + /* do some bookkeeping so we can provide old/new line numbers */ + + for (line->lines = 0; content_len > 0; --content_len) { + if (*content++ == '\n') + ++line->lines; + } + + switch (line_origin) { + case GIT_DIFF_LINE_ADDITION: + case GIT_DIFF_LINE_DEL_EOFNL: + line->oldno = -1; + line->newno = patch->newno; + patch->newno += line->lines; + break; + case GIT_DIFF_LINE_DELETION: + case GIT_DIFF_LINE_ADD_EOFNL: + line->oldno = patch->oldno; + line->newno = -1; + patch->oldno += line->lines; + break; + default: + line->oldno = patch->oldno; + line->newno = patch->newno; + patch->oldno += line->lines; + patch->newno += line->lines; + break; + } + + hunk->line_count++; + + return 0; +} + +static void diff_output_init( + git_diff_output *out, + const git_diff_options *opts, + git_diff_file_cb file_cb, + git_diff_hunk_cb hunk_cb, + git_diff_data_cb data_cb, + void *payload) +{ + GIT_UNUSED(opts); + + memset(out, 0, sizeof(*out)); + + out->file_cb = file_cb; + out->hunk_cb = hunk_cb; + out->data_cb = data_cb; + out->payload = payload; +} + +static void diff_output_to_patch(git_diff_output *out, git_diff_patch *patch) +{ + diff_output_init( + out, patch->ofile.opts, + diff_patch_file_cb, diff_patch_hunk_cb, diff_patch_line_cb, patch); +} diff --git a/src/diff_patch.h b/src/diff_patch.h new file mode 100644 index 000000000..7de6e1e5b --- /dev/null +++ b/src/diff_patch.h @@ -0,0 +1,75 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_diff_patch_h__ +#define INCLUDE_diff_patch_h__ + +#include "common.h" +#include "diff.h" +#include "diff_file.h" +#include "array.h" + +/* cached information about a single span in a diff */ +typedef struct diff_patch_line diff_patch_line; +struct diff_patch_line { + const char *ptr; + size_t len; + size_t lines, oldno, newno; + char origin; +}; + +/* cached information about a hunk in a diff */ +typedef struct diff_patch_hunk diff_patch_hunk; +struct diff_patch_hunk { + git_diff_range range; + char header[128]; + size_t header_len; + size_t line_start; + size_t line_count; +}; + +struct git_diff_patch { + git_refcount rc; + git_diff_list *diff; /* for refcount purposes, maybe NULL for blob diffs */ + git_diff_delta *delta; + size_t delta_index; + git_diff_file_content ofile; + git_diff_file_content nfile; + uint32_t flags; + git_array_t(diff_patch_hunk) hunks; + git_array_t(diff_patch_line) lines; + size_t oldno, newno; + size_t content_size; + git_pool flattened; +}; + +enum { + GIT_DIFF_PATCH_ALLOCATED = (1 << 0), + GIT_DIFF_PATCH_INITIALIZED = (1 << 1), + GIT_DIFF_PATCH_LOADED = (1 << 2), + GIT_DIFF_PATCH_DIFFABLE = (1 << 3), + GIT_DIFF_PATCH_DIFFED = (1 << 4), + GIT_DIFF_PATCH_FLATTENED = (1 << 5), +}; + +typedef struct git_diff_output git_diff_output; +struct git_diff_output { + /* these callbacks are issued with the diff data */ + git_diff_file_cb file_cb; + git_diff_hunk_cb hunk_cb; + git_diff_data_cb data_cb; + void *payload; + + /* this records the actual error in cases where it may be obscured */ + int error; + + /* this callback is used to do the diff and drive the other callbacks. + * see diff_xdiff.h for how to use this in practice for now. + */ + int (*diff_cb)(git_diff_output *output, git_diff_patch *patch); +}; + +#endif diff --git a/src/diff_print.c b/src/diff_print.c index b6fbec829..860876531 100644 --- a/src/diff_print.c +++ b/src/diff_print.c @@ -6,7 +6,8 @@ */ #include "common.h" #include "diff.h" -#include "diff_output.h" +#include "diff_patch.h" +#include "buffer.h" typedef struct { git_diff_list *diff; @@ -390,14 +391,15 @@ int git_diff_patch_print( &pi, &temp, patch->diff, print_cb, payload))) error = print_patch_file(patch->delta, 0, &pi); - for (h = 0; h < patch->hunks_size && !error; ++h) { - diff_patch_hunk *hunk = &patch->hunks[h]; + for (h = 0; h < git_array_size(patch->hunks) && !error; ++h) { + diff_patch_hunk *hunk = git_array_get(patch->hunks, h); error = print_patch_hunk( patch->delta, &hunk->range, hunk->header, hunk->header_len, &pi); for (l = 0; l < hunk->line_count && !error; ++l) { - diff_patch_line *line = &patch->lines[hunk->line_start + l]; + diff_patch_line *line = + git_array_get(patch->lines, hunk->line_start + l); error = print_patch_line( patch->delta, &hunk->range, diff --git a/src/diff_tform.c b/src/diff_tform.c index bc3acae1d..597c240ae 100644 --- a/src/diff_tform.c +++ b/src/diff_tform.c @@ -5,10 +5,14 @@ * a Linking Exception. For full terms see the included COPYING file. */ #include "common.h" -#include "diff.h" + #include "git2/config.h" #include "git2/blob.h" + +#include "diff.h" #include "hashsig.h" +#include "path.h" +#include "fileops.h" static git_diff_delta *diff_delta__dup( const git_diff_delta *d, git_pool *pool) diff --git a/src/diff_xdiff.c b/src/diff_xdiff.c new file mode 100644 index 000000000..1d1c2d54c --- /dev/null +++ b/src/diff_xdiff.c @@ -0,0 +1,161 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#include "common.h" +#include "diff.h" +#include "diff_driver.h" +#include "diff_patch.h" +#include "diff_xdiff.h" + +static int git_xdiff_scan_int(const char **str, int *value) +{ + const char *scan = *str; + int v = 0, digits = 0; + /* find next digit */ + for (scan = *str; *scan && !git__isdigit(*scan); scan++); + /* parse next number */ + for (; git__isdigit(*scan); scan++, digits++) + v = (v * 10) + (*scan - '0'); + *str = scan; + *value = v; + return (digits > 0) ? 0 : -1; +} + +static int git_xdiff_parse_hunk(git_diff_range *range, const char *header) +{ + /* expect something of the form "@@ -%d[,%d] +%d[,%d] @@" */ + if (*header != '@') + return -1; + if (git_xdiff_scan_int(&header, &range->old_start) < 0) + return -1; + if (*header == ',') { + if (git_xdiff_scan_int(&header, &range->old_lines) < 0) + return -1; + } else + range->old_lines = 1; + if (git_xdiff_scan_int(&header, &range->new_start) < 0) + return -1; + if (*header == ',') { + if (git_xdiff_scan_int(&header, &range->new_lines) < 0) + return -1; + } else + range->new_lines = 1; + if (range->old_start < 0 || range->new_start < 0) + return -1; + + return 0; +} + +typedef struct { + git_xdiff_output *xo; + git_diff_patch *patch; + git_diff_range range; +} git_xdiff_info; + +static int git_xdiff_cb(void *priv, mmbuffer_t *bufs, int len) +{ + git_xdiff_info *info = priv; + git_diff_patch *patch = info->patch; + git_diff_output *output = &info->xo->output; + + if (len == 1) { + output->error = git_xdiff_parse_hunk(&info->range, bufs[0].ptr); + if (output->error < 0) + return output->error; + + if (output->hunk_cb != NULL && + output->hunk_cb(patch->delta, &info->range, + bufs[0].ptr, bufs[0].size, output->payload)) + output->error = GIT_EUSER; + } + + if (len == 2 || len == 3) { + /* expect " "/"-"/"+", then data */ + char origin = + (*bufs[0].ptr == '+') ? GIT_DIFF_LINE_ADDITION : + (*bufs[0].ptr == '-') ? GIT_DIFF_LINE_DELETION : + GIT_DIFF_LINE_CONTEXT; + + if (output->data_cb != NULL && + output->data_cb(patch->delta, &info->range, + origin, bufs[1].ptr, bufs[1].size, output->payload)) + output->error = GIT_EUSER; + } + + if (len == 3 && !output->error) { + /* If we have a '+' and a third buf, then we have added a line + * without a newline and the old code had one, so DEL_EOFNL. + * If we have a '-' and a third buf, then we have removed a line + * with out a newline but added a blank line, so ADD_EOFNL. + */ + char origin = + (*bufs[0].ptr == '+') ? GIT_DIFF_LINE_DEL_EOFNL : + (*bufs[0].ptr == '-') ? GIT_DIFF_LINE_ADD_EOFNL : + GIT_DIFF_LINE_CONTEXT_EOFNL; + + if (output->data_cb != NULL && + output->data_cb(patch->delta, &info->range, + origin, bufs[2].ptr, bufs[2].size, output->payload)) + output->error = GIT_EUSER; + } + + return output->error; +} + +static int git_xdiff(git_diff_output *output, git_diff_patch *patch) +{ + git_xdiff_output *xo = (git_xdiff_output *)output; + git_xdiff_info info; + mmfile_t old_xdiff_data, new_xdiff_data; + + memset(&info, 0, sizeof(info)); + info.patch = patch; + info.xo = xo; + + xo->callback.priv = &info; + + xo->config.find_func_priv = patch->ofile.driver; + xo->config.find_func = patch->ofile.driver ? + git_diff_driver_find_content_fn(patch->ofile.driver) : NULL; + + if (xo->config.find_func != NULL) + xo->config.flags |= XDL_EMIT_FUNCNAMES; + else + xo->config.flags &= ~XDL_EMIT_FUNCNAMES; + + + old_xdiff_data.ptr = patch->ofile.map.data; + old_xdiff_data.size = patch->ofile.map.len; + new_xdiff_data.ptr = patch->nfile.map.data; + new_xdiff_data.size = patch->nfile.map.len; + + xdl_diff(&old_xdiff_data, &new_xdiff_data, + &xo->params, &xo->config, &xo->callback); + + return xo->output.error; +} + +void git_xdiff_init(git_xdiff_output *xo, const git_diff_options *opts) +{ + uint32_t flags = opts ? opts->flags : GIT_DIFF_NORMAL; + + xo->output.diff_cb = git_xdiff; + + memset(&xo->config, 0, sizeof(xo->config)); + xo->config.ctxlen = opts ? opts->context_lines : 3; + xo->config.interhunkctxlen = opts ? opts->interhunk_lines : 0; + + memset(&xo->params, 0, sizeof(xo->params)); + if (flags & GIT_DIFF_IGNORE_WHITESPACE) + xo->params.flags |= XDF_WHITESPACE_FLAGS; + if (flags & GIT_DIFF_IGNORE_WHITESPACE_CHANGE) + xo->params.flags |= XDF_IGNORE_WHITESPACE_CHANGE; + if (flags & GIT_DIFF_IGNORE_WHITESPACE_EOL) + xo->params.flags |= XDF_IGNORE_WHITESPACE_AT_EOL; + + memset(&xo->callback, 0, sizeof(xo->callback)); + xo->callback.outf = git_xdiff_cb; +} diff --git a/src/diff_xdiff.h b/src/diff_xdiff.h new file mode 100644 index 000000000..c547b00cf --- /dev/null +++ b/src/diff_xdiff.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_diff_xdiff_h__ +#define INCLUDE_diff_xdiff_h__ + +#include "diff.h" +#include "diff_patch.h" +#include "xdiff/xdiff.h" + +/* A git_xdiff_output is a git_diff_output with extra fields necessary + * to use libxdiff. Calling git_xdiff_init() will set the diff_cb field + * of the output to use xdiff to generate the diffs. + */ +typedef struct { + git_diff_output output; + + xdemitconf_t config; + xpparam_t params; + xdemitcb_t callback; +} git_xdiff_output; + +void git_xdiff_init(git_xdiff_output *xo, const git_diff_options *opts); + +#endif diff --git a/src/fetch.c b/src/fetch.c index b5ec69777..03fad5fec 100644 --- a/src/fetch.c +++ b/src/fetch.c @@ -16,6 +16,8 @@ #include "pack.h" #include "fetch.h" #include "netops.h" +#include "repository.h" +#include "refs.h" struct filter_payload { git_remote *remote; diff --git a/src/iterator.c b/src/iterator.c index 4360b99ad..76b0e41d0 100644 --- a/src/iterator.c +++ b/src/iterator.c @@ -7,6 +7,7 @@ #include "iterator.h" #include "tree.h" +#include "index.h" #include "ignore.h" #include "buffer.h" #include "git2/submodule.h" diff --git a/src/merge.c b/src/merge.c index 047d96013..82d2e6f37 100644 --- a/src/merge.c +++ b/src/merge.c @@ -24,6 +24,8 @@ #include "blob.h" #include "hashsig.h" #include "oid.h" +#include "index.h" +#include "filebuf.h" #include "git2/types.h" #include "git2/repository.h" diff --git a/src/refdb_fs.c b/src/refdb_fs.c index 4083ba9e5..b9e283ac5 100644 --- a/src/refdb_fs.c +++ b/src/refdb_fs.c @@ -9,6 +9,7 @@ #include "hash.h" #include "repository.h" #include "fileops.h" +#include "filebuf.h" #include "pack.h" #include "reflog.h" #include "refdb.h" diff --git a/src/refs.c b/src/refs.c index 2b545954d..c0e460cc3 100644 --- a/src/refs.c +++ b/src/refs.c @@ -9,6 +9,7 @@ #include "hash.h" #include "repository.h" #include "fileops.h" +#include "filebuf.h" #include "pack.h" #include "reflog.h" #include "refdb.h" diff --git a/src/remote.h b/src/remote.h index c9c26b77d..dce4803ed 100644 --- a/src/remote.h +++ b/src/remote.h @@ -11,7 +11,7 @@ #include "git2/transport.h" #include "refspec.h" -#include "repository.h" +#include "vector.h" #define GIT_REMOTE_ORIGIN "origin" diff --git a/src/repository.c b/src/repository.c index 2e7a334c9..4514fee23 100644 --- a/src/repository.c +++ b/src/repository.c @@ -17,12 +17,15 @@ #include "tag.h" #include "blob.h" #include "fileops.h" +#include "filebuf.h" +#include "index.h" #include "config.h" #include "refs.h" #include "filter.h" #include "odb.h" #include "remote.h" #include "merge.h" +#include "diff_driver.h" #define GIT_FILE_CONTENT_PREFIX "gitdir:" @@ -108,6 +111,7 @@ void git_repository_free(git_repository *repo) git_cache_free(&repo->objects); git_submodule_config_free(repo); + git_diff_driver_registry_free(repo->diff_drivers); git__free(repo->path_repository); git__free(repo->workdir); diff --git a/src/repository.h b/src/repository.h index bd5f63dac..12dc50d51 100644 --- a/src/repository.h +++ b/src/repository.h @@ -14,15 +14,13 @@ #include "git2/object.h" #include "git2/config.h" -#include "index.h" #include "cache.h" #include "refs.h" #include "buffer.h" -#include "odb.h" #include "object.h" #include "attrcache.h" #include "strmap.h" -#include "refdb.h" +#include "diff_driver.h" #define DOT_GIT ".git" #define GIT_DIR DOT_GIT "/" @@ -108,6 +106,7 @@ struct git_repository { git_cache objects; git_attr_cache attrcache; git_strmap *submodules; + git_diff_driver_registry *diff_drivers; char *path_repository; char *workdir; diff --git a/src/signature.c b/src/signature.c index cd6167fb4..4b8f03a21 100644 --- a/src/signature.c +++ b/src/signature.c @@ -9,6 +9,7 @@ #include "signature.h" #include "repository.h" #include "git2/common.h" +#include "posix.h" void git_signature_free(git_signature *sig) { diff --git a/src/stash.c b/src/stash.c index 19b29be77..1222634d5 100644 --- a/src/stash.c +++ b/src/stash.c @@ -14,6 +14,7 @@ #include "git2/stash.h" #include "git2/status.h" #include "git2/checkout.h" +#include "git2/index.h" #include "signature.h" static int create_error(int error, const char *msg) diff --git a/src/status.c b/src/status.c index 89f3eedb5..712e0d515 100644 --- a/src/status.c +++ b/src/status.c @@ -14,10 +14,10 @@ #include "git2/status.h" #include "repository.h" #include "ignore.h" +#include "index.h" #include "git2/diff.h" #include "diff.h" -#include "diff_output.h" static unsigned int index_delta2status(git_delta_t index_status) { diff --git a/src/submodule.c b/src/submodule.c index 16114d8ac..af488b7f3 100644 --- a/src/submodule.c +++ b/src/submodule.c @@ -22,6 +22,8 @@ #include "submodule.h" #include "tree.h" #include "iterator.h" +#include "path.h" +#include "index.h" #define GIT_MODULES_FILE ".gitmodules" diff --git a/src/thread-utils.h b/src/thread-utils.h index 49b5f3b5e..f56f61b50 100644 --- a/src/thread-utils.h +++ b/src/thread-utils.h @@ -7,8 +7,6 @@ #ifndef INCLUDE_thread_utils_h__ #define INCLUDE_thread_utils_h__ -#include "common.h" - /* Common operations even if threading has been disabled */ typedef struct { #if defined(GIT_WIN32) diff --git a/src/tree.c b/src/tree.c index 10d131438..65d01b4d5 100644 --- a/src/tree.c +++ b/src/tree.c @@ -10,6 +10,9 @@ #include "tree.h" #include "git2/repository.h" #include "git2/object.h" +#include "path.h" +#include "tree-cache.h" +#include "index.h" #define DEFAULT_TREE_SIZE 16 #define MAX_FILEMODE_BYTES 6 diff --git a/src/util.h b/src/util.h index 5ae87ac10..43ba79240 100644 --- a/src/util.h +++ b/src/util.h @@ -194,6 +194,8 @@ extern int git__strcasecmp(const char *a, const char *b); extern int git__strncmp(const char *a, const char *b, size_t sz); extern int git__strncasecmp(const char *a, const char *b, size_t sz); +#include "thread-utils.h" + typedef struct { git_atomic refcount; void *owner; diff --git a/tests-clar/checkout/index.c b/tests-clar/checkout/index.c index 78ff5ac62..a3a0f8fda 100644 --- a/tests-clar/checkout/index.c +++ b/tests-clar/checkout/index.c @@ -2,6 +2,7 @@ #include "checkout_helpers.h" #include "git2/checkout.h" +#include "fileops.h" #include "repository.h" static git_repository *g_repo; diff --git a/tests-clar/clar.c b/tests-clar/clar.c index fed87c30d..0eae81bf5 100644 --- a/tests-clar/clar.c +++ b/tests-clar/clar.c @@ -183,10 +183,10 @@ clar_run_test( } static void -clar_run_suite(const struct clar_suite *suite) +clar_run_suite(const struct clar_suite *suite, const char *name) { const struct clar_func *test = suite->tests; - size_t i; + size_t i, namelen; if (!suite->enabled) return; @@ -200,7 +200,23 @@ clar_run_suite(const struct clar_suite *suite) _clar.active_suite = suite->name; _clar.suite_errors = 0; + if (name) { + size_t suitelen = strlen(suite->name); + namelen = strlen(name); + if (namelen <= suitelen) { + name = NULL; + } else { + name += suitelen; + while (*name == ':') + ++name; + namelen = strlen(name); + } + } + for (i = 0; i < suite->test_count; ++i) { + if (name && strncmp(test[i].name, name, namelen)) + continue; + _clar.active_test = test[i].name; clar_run_test(&test[i], &suite->initialize, &suite->cleanup); @@ -240,7 +256,7 @@ clar_parse_args(int argc, char **argv) case 'x': { /* given suite name */ int offset = (argument[2] == '=') ? 3 : 2, found = 0; char action = argument[1]; - size_t j, len; + size_t j, len, cmplen; argument += offset; len = strlen(argument); @@ -249,7 +265,11 @@ clar_parse_args(int argc, char **argv) clar_usage(argv[0]); for (j = 0; j < _clar_suite_count; ++j) { - if (strncmp(argument, _clar_suites[j].name, len) == 0) { + cmplen = strlen(_clar_suites[j].name); + if (cmplen > len) + cmplen = len; + + if (strncmp(argument, _clar_suites[j].name, cmplen) == 0) { int exact = !strcmp(argument, _clar_suites[j].name); ++found; @@ -258,9 +278,9 @@ clar_parse_args(int argc, char **argv) _clar.report_suite_names = 1; switch (action) { - case 's': clar_run_suite(&_clar_suites[j]); break; - case 'i': _clar_suites[j].enabled = 1; break; - case 'x': _clar_suites[j].enabled = 0; break; + case 's': clar_run_suite(&_clar_suites[j], argument); break; + case 'i': _clar_suites[j].enabled = 1; break; + case 'x': _clar_suites[j].enabled = 0; break; } if (exact) @@ -318,7 +338,7 @@ clar_test(int argc, char **argv) if (!_clar.suites_ran) { size_t i; for (i = 0; i < _clar_suite_count; ++i) - clar_run_suite(&_clar_suites[i]); + clar_run_suite(&_clar_suites[i], NULL); } clar_print_shutdown( diff --git a/tests-clar/clone/nonetwork.c b/tests-clar/clone/nonetwork.c index 8aae1fb52..339b1e70d 100644 --- a/tests-clar/clone/nonetwork.c +++ b/tests-clar/clone/nonetwork.c @@ -1,8 +1,9 @@ #include "clar_libgit2.h" #include "git2/clone.h" -#include "repository.h" #include "remote.h" +#include "fileops.h" +#include "repository.h" #define LIVE_REPO_URL "git://github.com/libgit2/TestGitRepository" diff --git a/tests-clar/diff/patch.c b/tests-clar/diff/patch.c index f9e913a74..6390957c9 100644 --- a/tests-clar/diff/patch.c +++ b/tests-clar/diff/patch.c @@ -543,7 +543,7 @@ void test_diff_patch__line_counts_with_eofnl(void) "index 378a7d9..3d0154e 100644\n" "--- a/songof7cities.txt\n" "+++ b/songof7cities.txt\n" - "@@ -42,7 +42,7 @@\n" + "@@ -42,7 +42,7 @@ With peoples undefeated of the dark, enduring blood\n" " \n" " To the sound of trumpets shall their seed restore my Cities\n" " Wealthy and well-weaponed, that once more may I behold\n" diff --git a/tests-clar/diff/rename.c b/tests-clar/diff/rename.c index 8bff96cf2..a9f1b4e20 100644 --- a/tests-clar/diff/rename.c +++ b/tests-clar/diff/rename.c @@ -558,7 +558,7 @@ void test_diff_rename__patch(void) git_diff_patch *patch; const git_diff_delta *delta; char *text; - const char *expected = "diff --git a/sixserving.txt b/ikeepsix.txt\nindex ad0a8e5..36020db 100644\n--- a/sixserving.txt\n+++ b/ikeepsix.txt\n@@ -1,3 +1,6 @@\n+I Keep Six Honest Serving-Men\n+=============================\n+\n I KEEP six honest serving-men\n (They taught me all I knew);\n Their names are What and Why and When\n@@ -21,4 +24,4 @@\n One million Hows, two million Wheres,\n And seven million Whys!\n \n- -- Rudyard Kipling\n+ -- Rudyard Kipling\n"; + const char *expected = "diff --git a/sixserving.txt b/ikeepsix.txt\nindex ad0a8e5..36020db 100644\n--- a/sixserving.txt\n+++ b/ikeepsix.txt\n@@ -1,3 +1,6 @@\n+I Keep Six Honest Serving-Men\n+=============================\n+\n I KEEP six honest serving-men\n (They taught me all I knew);\n Their names are What and Why and When\n@@ -21,4 +24,4 @@ She sends'em abroad on her own affairs\n One million Hows, two million Wheres,\n And seven million Whys!\n \n- -- Rudyard Kipling\n+ -- Rudyard Kipling\n"; old_tree = resolve_commit_oid_to_tree(g_repo, sha0); new_tree = resolve_commit_oid_to_tree(g_repo, sha1); diff --git a/tests-clar/diff/submodules.c b/tests-clar/diff/submodules.c index f152af46f..6e52a6319 100644 --- a/tests-clar/diff/submodules.c +++ b/tests-clar/diff/submodules.c @@ -1,5 +1,6 @@ #include "clar_libgit2.h" #include "repository.h" +#include "posix.h" #include "../submodule/submodule_helpers.h" static git_repository *g_repo = NULL; diff --git a/tests-clar/fetchhead/nonetwork.c b/tests-clar/fetchhead/nonetwork.c index ef30679f9..a68ebb0b7 100644 --- a/tests-clar/fetchhead/nonetwork.c +++ b/tests-clar/fetchhead/nonetwork.c @@ -1,6 +1,6 @@ #include "clar_libgit2.h" -#include "repository.h" +#include "fileops.h" #include "fetchhead.h" #include "fetchhead_data.h" diff --git a/tests-clar/merge/merge_helpers.c b/tests-clar/merge/merge_helpers.c index bc31b1f44..e4092787c 100644 --- a/tests-clar/merge/merge_helpers.c +++ b/tests-clar/merge/merge_helpers.c @@ -1,5 +1,5 @@ #include "clar_libgit2.h" -#include "buffer.h" +#include "fileops.h" #include "refs.h" #include "tree.h" #include "merge_helpers.h" diff --git a/tests-clar/odb/alternates.c b/tests-clar/odb/alternates.c index be7bfa9cd..4e876c2b3 100644 --- a/tests-clar/odb/alternates.c +++ b/tests-clar/odb/alternates.c @@ -1,6 +1,6 @@ #include "clar_libgit2.h" #include "odb.h" -#include "repository.h" +#include "filebuf.h" static git_buf destpath, filepath; static const char *paths[] = { diff --git a/tests-clar/online/clone.c b/tests-clar/online/clone.c index aa12e47c9..bc4285a00 100644 --- a/tests-clar/online/clone.c +++ b/tests-clar/online/clone.c @@ -2,8 +2,9 @@ #include "git2/clone.h" #include "git2/cred_helpers.h" -#include "repository.h" #include "remote.h" +#include "fileops.h" +#include "refs.h" #define LIVE_REPO_URL "http://github.com/libgit2/TestGitRepository" #define LIVE_EMPTYREPO_URL "http://github.com/libgit2/TestEmptyRepository" diff --git a/tests-clar/online/fetchhead.c b/tests-clar/online/fetchhead.c index e14ae0926..58717eef8 100644 --- a/tests-clar/online/fetchhead.c +++ b/tests-clar/online/fetchhead.c @@ -1,6 +1,6 @@ #include "clar_libgit2.h" -#include "repository.h" +#include "fileops.h" #include "fetchhead.h" #include "../fetchhead/fetchhead_data.h" #include "git2/clone.h" diff --git a/tests-clar/refs/delete.c b/tests-clar/refs/delete.c index 053f41229..973768aeb 100644 --- a/tests-clar/refs/delete.c +++ b/tests-clar/refs/delete.c @@ -1,7 +1,8 @@ #include "clar_libgit2.h" -#include "repository.h" +#include "fileops.h" #include "git2/reflog.h" +#include "git2/refdb.h" #include "reflog.h" #include "ref_helpers.h" @@ -31,7 +32,7 @@ void test_refs_delete__packed_loose(void) git_buf temp_path = GIT_BUF_INIT; /* Ensure the loose reference exists on the file system */ - cl_git_pass(git_buf_joinpath(&temp_path, g_repo->path_repository, packed_test_head_name)); + cl_git_pass(git_buf_joinpath(&temp_path, git_repository_path(g_repo), packed_test_head_name)); cl_assert(git_path_exists(temp_path.ptr)); /* Lookup the reference */ diff --git a/tests-clar/refs/pack.c b/tests-clar/refs/pack.c index 412c4c5fd..d8d5cc6d0 100644 --- a/tests-clar/refs/pack.c +++ b/tests-clar/refs/pack.c @@ -1,8 +1,10 @@ #include "clar_libgit2.h" -#include "repository.h" +#include "fileops.h" #include "git2/reflog.h" +#include "git2/refdb.h" #include "reflog.h" +#include "refs.h" #include "ref_helpers.h" static const char *loose_tag_ref_name = "refs/tags/e90810b"; @@ -33,7 +35,7 @@ void test_refs_pack__empty(void) // create a packfile for an empty folder git_buf temp_path = GIT_BUF_INIT; - cl_git_pass(git_buf_join_n(&temp_path, '/', 3, g_repo->path_repository, GIT_REFS_HEADS_DIR, "empty_dir")); + cl_git_pass(git_buf_join_n(&temp_path, '/', 3, git_repository_path(g_repo), GIT_REFS_HEADS_DIR, "empty_dir")); cl_git_pass(git_futils_mkdir_r(temp_path.ptr, NULL, GIT_REFS_DIR_MODE)); git_buf_free(&temp_path); @@ -60,7 +62,7 @@ void test_refs_pack__loose(void) packall(); /* Ensure the packed-refs file exists */ - cl_git_pass(git_buf_joinpath(&temp_path, g_repo->path_repository, GIT_PACKEDREFS_FILE)); + cl_git_pass(git_buf_joinpath(&temp_path, git_repository_path(g_repo), GIT_PACKEDREFS_FILE)); cl_assert(git_path_exists(temp_path.ptr)); /* Ensure the known ref can still be looked up but is now packed */ @@ -69,7 +71,7 @@ void test_refs_pack__loose(void) cl_assert_equal_s(reference->name, loose_tag_ref_name); /* Ensure the known ref has been removed from the loose folder structure */ - cl_git_pass(git_buf_joinpath(&temp_path, g_repo->path_repository, loose_tag_ref_name)); + cl_git_pass(git_buf_joinpath(&temp_path, git_repository_path(g_repo), loose_tag_ref_name)); cl_assert(!git_path_exists(temp_path.ptr)); git_reference_free(reference); diff --git a/tests-clar/refs/reflog/reflog.c b/tests-clar/refs/reflog/reflog.c index 1cd0ddd92..095cabf04 100644 --- a/tests-clar/refs/reflog/reflog.c +++ b/tests-clar/refs/reflog/reflog.c @@ -1,6 +1,6 @@ #include "clar_libgit2.h" -#include "repository.h" +#include "fileops.h" #include "git2/reflog.h" #include "reflog.h" diff --git a/tests-clar/refs/rename.c b/tests-clar/refs/rename.c index 5ab84c48e..543bc4d62 100644 --- a/tests-clar/refs/rename.c +++ b/tests-clar/refs/rename.c @@ -1,8 +1,9 @@ #include "clar_libgit2.h" -#include "repository.h" +#include "fileops.h" #include "git2/reflog.h" #include "reflog.h" +#include "refs.h" #include "ref_helpers.h" static const char *loose_tag_ref_name = "refs/tags/e90810b"; @@ -38,7 +39,7 @@ void test_refs_rename__loose(void) const char *new_name = "refs/tags/Nemo/knows/refs.kung-fu"; /* Ensure the ref doesn't exist on the file system */ - cl_git_pass(git_buf_joinpath(&temp_path, g_repo->path_repository, new_name)); + cl_git_pass(git_buf_joinpath(&temp_path, git_repository_path(g_repo), new_name)); cl_assert(!git_path_exists(temp_path.ptr)); /* Retrieval of the reference to rename */ @@ -64,7 +65,7 @@ void test_refs_rename__loose(void) cl_assert(reference_is_packed(new_ref) == 0); /* ...and the ref can be found in the file system */ - cl_git_pass(git_buf_joinpath(&temp_path, g_repo->path_repository, new_name)); + cl_git_pass(git_buf_joinpath(&temp_path, git_repository_path(g_repo), new_name)); cl_assert(git_path_exists(temp_path.ptr)); git_reference_free(new_ref); @@ -80,7 +81,7 @@ void test_refs_rename__packed(void) const char *brand_new_name = "refs/heads/brand_new_name"; /* Ensure the ref doesn't exist on the file system */ - cl_git_pass(git_buf_joinpath(&temp_path, g_repo->path_repository, packed_head_name)); + cl_git_pass(git_buf_joinpath(&temp_path, git_repository_path(g_repo), packed_head_name)); cl_assert(!git_path_exists(temp_path.ptr)); /* The reference can however be looked-up... */ @@ -106,7 +107,7 @@ void test_refs_rename__packed(void) cl_assert(reference_is_packed(new_ref) == 0); /* ...and the ref now happily lives in the file system */ - cl_git_pass(git_buf_joinpath(&temp_path, g_repo->path_repository, brand_new_name)); + cl_git_pass(git_buf_joinpath(&temp_path, git_repository_path(g_repo), brand_new_name)); cl_assert(git_path_exists(temp_path.ptr)); git_reference_free(new_ref); @@ -122,7 +123,7 @@ void test_refs_rename__packed_doesnt_pack_others(void) const char *brand_new_name = "refs/heads/brand_new_name"; /* Ensure the other reference exists on the file system */ - cl_git_pass(git_buf_joinpath(&temp_path, g_repo->path_repository, packed_test_head_name)); + cl_git_pass(git_buf_joinpath(&temp_path, git_repository_path(g_repo), packed_test_head_name)); cl_assert(git_path_exists(temp_path.ptr)); /* Lookup the other reference */ diff --git a/tests-clar/repo/discover.c b/tests-clar/repo/discover.c index 3d9aeedd7..f93ff2462 100644 --- a/tests-clar/repo/discover.c +++ b/tests-clar/repo/discover.c @@ -1,9 +1,9 @@ #include "clar_libgit2.h" #include "odb.h" +#include "fileops.h" #include "repository.h" - #define TEMP_REPO_FOLDER "temprepo/" #define DISCOVER_FOLDER TEMP_REPO_FOLDER "discover.git" diff --git a/tests-clar/status/ignore.c b/tests-clar/status/ignore.c index 2d3898ba4..4f6879cfc 100644 --- a/tests-clar/status/ignore.c +++ b/tests-clar/status/ignore.c @@ -169,7 +169,7 @@ void test_status_ignore__ignore_pattern_ignorecase(void) cl_git_mkfile("empty_standard_repo/A.txt", "Differs in case"); cl_git_pass(git_repository_index(&index, g_repo)); - ignore_case = index->ignore_case; + ignore_case = (git_index_caps(index) & GIT_INDEXCAP_IGNORE_CASE) != 0; git_index_free(index); cl_git_pass(git_status_file(&flags, g_repo, "A.txt")); diff --git a/tests-clar/status/worktree.c b/tests-clar/status/worktree.c index 062a09aeb..13335843b 100644 --- a/tests-clar/status/worktree.c +++ b/tests-clar/status/worktree.c @@ -105,7 +105,7 @@ void test_status_worktree__swap_subdir_and_file(void) bool ignore_case; cl_git_pass(git_repository_index(&index, repo)); - ignore_case = index->ignore_case; + ignore_case = (git_index_caps(index) & GIT_INDEXCAP_IGNORE_CASE) != 0; git_index_free(index); /* first alter the contents of the worktree */ From 596b121ae46554cf48b4e561541e1ab975dbef78 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Mon, 10 Jun 2013 14:16:49 -0700 Subject: [PATCH 04/12] fix missing file and bad prototype --- src/array.h | 41 +++++++++++++++++++++++++++++++++++++++++ src/diff_driver.h | 2 +- 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 src/array.h diff --git a/src/array.h b/src/array.h new file mode 100644 index 000000000..aadd021f1 --- /dev/null +++ b/src/array.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_array_h__ +#define INCLUDE_array_h__ + +#include "util.h" + +#define git_array_t(type) struct { type *ptr; size_t size, asize; } + +#define git_array_init(a) \ + do { (a).size = (a).asize = 0; (a).ptr = NULL; } while (0) + +#define git_array_clear(a) \ + do { git__free((a).ptr); git_array_init(a); } while (0) + +#define git_array_grow(a) do { \ + void *new_array; size_t new_size = \ + ((a).asize >= 256) ? (a).asize + 256 : ((a).asize >= 8) ? (a).asize * 2 : 8; \ + new_array = git__realloc((a).ptr, new_size * sizeof(*(a).ptr)); \ + if (!new_array) { git_array_clear(a); } \ + else { (a).ptr = new_array; (a).asize = new_size; } \ + } while (0) + +#define GITERR_CHECK_ARRAY(a) GITERR_CHECK_ALLOC((a).ptr) + +#define git_array_alloc(a, el) do { \ + if ((a).size >= (a).asize) git_array_grow(a); \ + (el) = (a).ptr ? &(a).ptr[(a).size++] : NULL; \ + } while (0) + +#define git_array_last(a) ((a).size ? &(a).ptr[(a).size - 1] : NULL) + +#define git_array_get(a, i) (((i) < (a).size) ? &(a).ptr[(i)] : NULL) + +#define git_array_size(a) (a).size + +#endif diff --git a/src/diff_driver.h b/src/diff_driver.h index b9881a7ed..af9fa073e 100644 --- a/src/diff_driver.h +++ b/src/diff_driver.h @@ -11,7 +11,7 @@ typedef struct git_diff_driver_registry git_diff_driver_registry; -git_diff_driver_registry *git_diff_driver_registry_new(); +git_diff_driver_registry *git_diff_driver_registry_new(void); void git_diff_driver_registry_free(git_diff_driver_registry *); typedef struct git_diff_driver git_diff_driver; From 2f77d8f15d7c71a92f01850025c91dffe3bcafd4 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Mon, 10 Jun 2013 14:16:56 -0700 Subject: [PATCH 05/12] Fix some memory leaks --- src/index.c | 2 ++ src/remote.c | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/index.c b/src/index.c index 25c38b026..fd55616b8 100644 --- a/src/index.c +++ b/src/index.c @@ -2030,6 +2030,8 @@ int git_index_read_tree(git_index *index, const git_tree *tree) error = git_tree_walk(tree, GIT_TREEWALK_POST, read_tree_cb, &data); index_entries_free(&entries); + git_vector_free(&entries); + git_vector_sort(&index->entries); return error; diff --git a/src/remote.c b/src/remote.c index 943b72bb7..0e8354a11 100644 --- a/src/remote.c +++ b/src/remote.c @@ -1267,8 +1267,10 @@ static int rename_remote_references( return -1; while ((error = git_reference_next(&ref, iter)) == 0) { - if (git__prefixcmp(ref->name, GIT_REFS_REMOTES_DIR)) + if (git__prefixcmp(ref->name, GIT_REFS_REMOTES_DIR)) { + git_reference_free(ref); continue; + } if ((error = rename_one_remote_reference(ref, old_name, new_name)) < 0) { git_reference_iterator_free(iter); From 3eadfecd325d355d3f8a9631d9c89b7e8eede98b Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Mon, 10 Jun 2013 15:24:20 -0700 Subject: [PATCH 06/12] start implementing diff driver registry --- src/diff_driver.c | 44 ++++++++++++++++++++++++++++++++++++++++---- src/repository.c | 2 ++ 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/src/diff_driver.c b/src/diff_driver.c index 5438afc67..58a903261 100644 --- a/src/diff_driver.c +++ b/src/diff_driver.c @@ -16,6 +16,8 @@ #include "map.h" #include "buf_text.h" +GIT__USE_STRMAP; + typedef enum { DIFF_DRIVER_AUTO = 0, DIFF_DRIVER_FALSE = 1, @@ -33,7 +35,7 @@ enum { struct git_diff_driver { git_diff_driver_t type; git_strarray fn_patterns; - int binary; + int binary; /* 0 => treat as text, 1 => treat as binary, -1 => auto */ }; struct git_diff_driver_registry { @@ -49,17 +51,45 @@ static git_diff_driver global_drivers[3] = { git_diff_driver_registry *git_diff_driver_registry_new() { - return git__calloc(1, sizeof(git_diff_driver_registry)); + git_diff_driver_registry *reg = + git__calloc(1, sizeof(git_diff_driver_registry)); + if (!reg) + return NULL; + + if (git_pool_init(®->strings, 1, 0) < 0 || + (reg->drivers = git_strmap_alloc()) == NULL) + { + git_diff_driver_registry_free(reg); + return NULL; + } + + return reg; } void git_diff_driver_registry_free(git_diff_driver_registry *reg) { + if (!reg) + return; + + git_strmap_free(reg->drivers); + git_pool_clear(®->strings); git__free(reg); } +static int git_diff_driver_load( + git_diff_driver **out, git_repository *repo, const char *name) +{ + GIT_UNUSED(out); + GIT_UNUSED(repo); + GIT_UNUSED(name); + + return GIT_ENOTFOUND; +} + int git_diff_driver_lookup( git_diff_driver **out, git_repository *repo, const char *path) { + int error = 0; const char *value; assert(out); @@ -67,8 +97,8 @@ int git_diff_driver_lookup( if (!repo || !path || !strlen(path)) goto use_auto; - if (git_attr_get(&value, repo, 0, path, "diff") < 0) - return -1; + if ((error = git_attr_get(&value, repo, 0, path, "diff")) < 0) + return error; if (GIT_ATTR_FALSE(value)) { *out = &global_drivers[DIFF_DRIVER_FALSE]; @@ -81,6 +111,12 @@ int git_diff_driver_lookup( } /* otherwise look for driver information in config and build driver */ + if ((error = git_diff_driver_load(out, repo, value)) < 0) { + if (error != GIT_ENOTFOUND) + return error; + else + giterr_clear(); + } use_auto: *out = &global_drivers[DIFF_DRIVER_AUTO]; diff --git a/src/repository.c b/src/repository.c index 4514fee23..e4451499c 100644 --- a/src/repository.c +++ b/src/repository.c @@ -111,7 +111,9 @@ void git_repository_free(git_repository *repo) git_cache_free(&repo->objects); git_submodule_config_free(repo); + git_diff_driver_registry_free(repo->diff_drivers); + repo->diff_drivers = NULL; git__free(repo->path_repository); git__free(repo->workdir); From 5dc98298a14a9adae3cf8b21fb01f682791c29c7 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Tue, 11 Jun 2013 11:22:22 -0700 Subject: [PATCH 07/12] Implement regex pattern diff driver This implements the loading of regular expression pattern lists for diff drivers that search for function context in that way. This also changes the way that diff drivers update options and interface with xdiff APIs to make them a little more flexible. --- docs/diff-internals.md | 1 - include/git2/diff.h | 3 + src/diff_driver.c | 286 ++++++++++++++++++++++++++++++++------- src/diff_driver.h | 21 ++- src/diff_file.c | 51 ++++--- src/diff_file.h | 3 +- src/diff_patch.c | 5 +- src/diff_xdiff.c | 12 +- tests-clar/diff/patch.c | 2 +- tests-clar/diff/rename.c | 2 +- 10 files changed, 299 insertions(+), 87 deletions(-) diff --git a/docs/diff-internals.md b/docs/diff-internals.md index 1983b7939..53e71f5b5 100644 --- a/docs/diff-internals.md +++ b/docs/diff-internals.md @@ -86,4 +86,3 @@ Internal Objects for hunk headers ** At some point, the logic for getting a filtered version of file content or calculating the OID of a file may be moved into the driver. - diff --git a/include/git2/diff.h b/include/git2/diff.h index d26456cb0..40e65b1e4 100644 --- a/include/git2/diff.h +++ b/include/git2/diff.h @@ -148,6 +148,9 @@ typedef enum { * Of course, ignore rules are still checked for the directory itself. */ GIT_DIFF_FAST_UNTRACKED_DIRS = (1 << 19), + + /** Treat all files as binary, disabling text diffs */ + GIT_DIFF_FORCE_BINARY = (1 << 20), } git_diff_option_t; /** diff --git a/src/diff_driver.c b/src/diff_driver.c index 58a903261..9d2508024 100644 --- a/src/diff_driver.c +++ b/src/diff_driver.c @@ -12,17 +12,17 @@ #include "diff_patch.h" #include "diff_driver.h" #include "strmap.h" -#include "pool.h" #include "map.h" #include "buf_text.h" +#include "repository.h" GIT__USE_STRMAP; typedef enum { DIFF_DRIVER_AUTO = 0, - DIFF_DRIVER_FALSE = 1, - DIFF_DRIVER_TRUE = 2, - DIFF_DRIVER_NAMED = 3, + DIFF_DRIVER_BINARY = 1, + DIFF_DRIVER_TEXT = 2, + DIFF_DRIVER_PATTERNLIST = 3, } git_diff_driver_t; enum { @@ -34,19 +34,22 @@ enum { /* data for finding function context for a given file type */ struct git_diff_driver { git_diff_driver_t type; - git_strarray fn_patterns; - int binary; /* 0 => treat as text, 1 => treat as binary, -1 => auto */ + uint32_t binary_flags; + uint32_t other_flags; + git_array_t(regex_t) fn_patterns; + regex_t word_pattern; }; struct git_diff_driver_registry { git_strmap *drivers; - git_pool strings; }; +#define FORCE_DIFFABLE (GIT_DIFF_FORCE_TEXT | GIT_DIFF_FORCE_BINARY) + static git_diff_driver global_drivers[3] = { - { DIFF_DRIVER_AUTO, { NULL, 0 }, -1 }, - { DIFF_DRIVER_FALSE, { NULL, 0 }, 1 }, - { DIFF_DRIVER_TRUE, { NULL, 0 }, 0 }, + { DIFF_DRIVER_AUTO, 0, 0, }, + { DIFF_DRIVER_BINARY, GIT_DIFF_FORCE_BINARY, 0 }, + { DIFF_DRIVER_TEXT, GIT_DIFF_FORCE_TEXT, 0 }, }; git_diff_driver_registry *git_diff_driver_registry_new() @@ -56,9 +59,7 @@ git_diff_driver_registry *git_diff_driver_registry_new() if (!reg) return NULL; - if (git_pool_init(®->strings, 1, 0) < 0 || - (reg->drivers = git_strmap_alloc()) == NULL) - { + if ((reg->drivers = git_strmap_alloc()) == NULL) { git_diff_driver_registry_free(reg); return NULL; } @@ -68,22 +69,165 @@ git_diff_driver_registry *git_diff_driver_registry_new() void git_diff_driver_registry_free(git_diff_driver_registry *reg) { + git_diff_driver *drv; + if (!reg) return; + git_strmap_foreach_value(reg->drivers, drv, git_diff_driver_free(drv)); git_strmap_free(reg->drivers); - git_pool_clear(®->strings); git__free(reg); } -static int git_diff_driver_load( - git_diff_driver **out, git_repository *repo, const char *name) +static int diff_driver_add_funcname( + git_diff_driver *drv, const char *name, int regex_flags) { - GIT_UNUSED(out); - GIT_UNUSED(repo); - GIT_UNUSED(name); + int error; + regex_t re, *re_ptr; - return GIT_ENOTFOUND; + if ((error = regcomp(&re, name, regex_flags)) != 0) { + /* TODO: warning about bad regex instead of failure */ + error = giterr_set_regex(&re, error); + regfree(&re); + return error; + } + + git_array_alloc(drv->fn_patterns, re_ptr); + GITERR_CHECK_ALLOC(re_ptr); + + memcpy(re_ptr, &re, sizeof(re)); + return 0; +} + +static int diff_driver_xfuncname(const git_config_entry *entry, void *payload) +{ + return diff_driver_add_funcname(payload, entry->value, REG_EXTENDED); +} + +static int diff_driver_funcname(const git_config_entry *entry, void *payload) +{ + return diff_driver_add_funcname(payload, entry->value, 0); +} + +static git_diff_driver_registry *git_repository_driver_registry( + git_repository *repo) +{ + if (!repo->diff_drivers) { + git_diff_driver_registry *reg = git_diff_driver_registry_new(); + reg = git__compare_and_swap(&repo->diff_drivers, NULL, reg); + + if (reg != NULL) /* if we race, free losing allocation */ + git_diff_driver_registry_free(reg); + } + + if (!repo->diff_drivers) + giterr_set(GITERR_REPOSITORY, "Unable to create diff driver registry"); + + return repo->diff_drivers; +} + +static int git_diff_driver_load( + git_diff_driver **out, git_repository *repo, const char *driver_name) +{ + int error = 0, bval; + git_diff_driver_registry *reg; + git_diff_driver *drv; + git_config *cfg; + git_buf name = GIT_BUF_INIT; + const char *val; + + reg = git_repository_driver_registry(repo); + if (!reg) + return -1; + else { + khiter_t pos = git_strmap_lookup_index(reg->drivers, driver_name); + if (git_strmap_valid_index(reg->drivers, pos)) { + *out = git_strmap_value_at(reg->drivers, pos); + return 0; + } + } + + /* if you can't read config for repo, just use default driver */ + if (git_repository_config__weakptr(&cfg, repo) < 0) { + giterr_clear(); + return GIT_ENOTFOUND; + } + + drv = git__calloc(1, sizeof(git_diff_driver)); + GITERR_CHECK_ALLOC(drv); + drv->type = DIFF_DRIVER_AUTO; + + if ((error = git_buf_printf(&name, "diff.%s.binary", driver_name)) < 0) + goto fail; + if ((error = git_config_get_string(&val, cfg, name.ptr)) < 0) { + if (error != GIT_ENOTFOUND) + goto fail; + /* diff..binary unspecified, so just continue */ + giterr_clear(); + } else if (git_config_parse_bool(&bval, val) < 0) { + /* TODO: warn that diff..binary has invalid value */ + giterr_clear(); + } else if (bval) { + /* if diff..binary is true, just return the binary driver */ + git__free(drv); + *out = &global_drivers[DIFF_DRIVER_BINARY]; + return 0; + } else { + /* if diff..binary is false, force binary checks off */ + /* but still may have custom function context patterns, etc. */ + drv->binary_flags = GIT_DIFF_FORCE_TEXT; + } + + /* TODO: warn if diff..command or diff..textconv are set */ + + if ((error = git_buf_printf(&name, "diff.%s.xfuncname", driver_name)) < 0) + goto fail; + if ((error = git_config_get_multivar( + cfg, name.ptr, NULL, diff_driver_xfuncname, drv)) < 0) { + if (error != GIT_ENOTFOUND) + goto fail; + /* no diff..xfuncname values, so just continue */ + giterr_clear(); + } + + if ((error = git_buf_printf(&name, "diff.%s.funcname", driver_name)) < 0) + goto fail; + if ((error = git_config_get_multivar( + cfg, name.ptr, NULL, diff_driver_funcname, drv)) < 0) { + if (error != GIT_ENOTFOUND) + goto fail; + /* no diff..funcname values, so just continue */ + giterr_clear(); + } + + /* if we found any patterns, set driver type to use correct callback */ + if (git_array_size(drv->fn_patterns) > 0) + drv->type = DIFF_DRIVER_PATTERNLIST; + + if ((error = git_buf_printf(&name, "diff.%s.wordregex", driver_name)) < 0) + goto fail; + if ((error = git_config_get_string(&val, cfg, name.ptr)) < 0) { + if (error != GIT_ENOTFOUND) + goto fail; + /* no diff..wordregex, so just continue */ + giterr_clear(); + } else if ((error = regcomp(&drv->word_pattern, val, REG_EXTENDED)) != 0) { + /* TODO: warning about bad regex instead of failure */ + error = giterr_set_regex(&drv->word_pattern, error); + goto fail; + } + + /* TODO: look up diff..algorithm to turn on minimal / patience + * diff in drv->other_flags + */ + + *out = drv; + return 0; + +fail: + git_diff_driver_free(drv); + *out = &global_drivers[DIFF_DRIVER_AUTO]; + return error; } int git_diff_driver_lookup( @@ -101,12 +245,12 @@ int git_diff_driver_lookup( return error; if (GIT_ATTR_FALSE(value)) { - *out = &global_drivers[DIFF_DRIVER_FALSE]; + *out = &global_drivers[DIFF_DRIVER_BINARY]; return 0; } else if (GIT_ATTR_TRUE(value)) { - *out = &global_drivers[DIFF_DRIVER_TRUE]; + *out = &global_drivers[DIFF_DRIVER_TEXT]; return 0; } @@ -125,13 +269,27 @@ use_auto: void git_diff_driver_free(git_diff_driver *driver) { - GIT_UNUSED(driver); - /* do nothing for now */ + size_t i; + + if (!driver) + return; + + for (i = 0; i > git_array_size(driver->fn_patterns); ++i) + regfree(git_array_get(driver->fn_patterns, i)); + git_array_clear(driver->fn_patterns); + + regfree(&driver->word_pattern); + + git__free(driver); } -int git_diff_driver_is_binary(git_diff_driver *driver) +void git_diff_driver_update_options( + uint32_t *option_flags, git_diff_driver *driver) { - return driver ? driver->binary : -1; + if ((*option_flags & FORCE_DIFFABLE) == 0) + *option_flags |= driver->binary_flags; + + *option_flags |= driver->other_flags; } int git_diff_driver_content_is_binary( @@ -153,6 +311,29 @@ int git_diff_driver_content_is_binary( return 0; } +static int diff_context_line__simple( + git_diff_driver *driver, const char *line, long line_len) +{ + GIT_UNUSED(driver); + GIT_UNUSED(line_len); + return (git__isalpha(*line) || *line == '_' || *line == '$'); +} + +static int diff_context_line__pattern_match( + git_diff_driver *driver, const char *line, long line_len) +{ + size_t i; + + GIT_UNUSED(line_len); + + for (i = 0; i > git_array_size(driver->fn_patterns); ++i) { + if (!regexec(git_array_get(driver->fn_patterns, i), line, 0, NULL, 0)) + return true; + } + + return false; +} + static long diff_context_find( const char *line, long line_len, @@ -160,37 +341,46 @@ static long diff_context_find( long out_size, void *payload) { - git_diff_driver *driver = payload; - const char *scan; + git_diff_find_context_payload *ctxt = payload; - GIT_UNUSED(driver); + if (git_buf_set(&ctxt->line, line, (size_t)line_len) < 0) + return -1; + git_buf_rtrim(&ctxt->line); - if (line_len > 0 && line[line_len - 1] == '\n') - line_len--; - if (line_len > 0 && line[line_len - 1] == '\r') - line_len--; - if (!line_len) + if (!ctxt->line.size) return -1; - if (!git__isalpha(*line) && *line != '_' && *line != '$') + if (!ctxt->match_line || + !ctxt->match_line(ctxt->driver, ctxt->line.ptr, ctxt->line.size)) return -1; - for (scan = &line[line_len-1]; scan > line && git__isspace(*scan); --scan) - /* search backward for non-space */; - line_len = scan - line; + git_buf_truncate(&ctxt->line, (size_t)out_size); + git_buf_copy_cstr(out, (size_t)out_size, &ctxt->line); - if (line_len >= out_size) - line_len = out_size - 1; - - memcpy(out, line, line_len); - out[line_len] = '\0'; - - return line_len; + return (long)ctxt->line.size; } -git_diff_find_context_fn git_diff_driver_find_content_fn(git_diff_driver *driver) +void git_diff_find_context_init( + git_diff_find_context_fn *findfn_out, + git_diff_find_context_payload *payload_out, + git_diff_driver *driver) { - GIT_UNUSED(driver); - return diff_context_find; + *findfn_out = driver ? diff_context_find : NULL; + + memset(payload_out, 0, sizeof(*payload_out)); + if (driver) { + payload_out->driver = driver; + payload_out->match_line = (driver->type == DIFF_DRIVER_PATTERNLIST) ? + diff_context_line__pattern_match : diff_context_line__simple; + git_buf_init(&payload_out->line, 0); + } +} + +void git_diff_find_context_clear(git_diff_find_context_payload *payload) +{ + if (payload) { + git_buf_free(&payload->line); + payload->driver = NULL; + } } diff --git a/src/diff_driver.h b/src/diff_driver.h index af9fa073e..3db7df000 100644 --- a/src/diff_driver.h +++ b/src/diff_driver.h @@ -8,6 +8,7 @@ #define INCLUDE_diff_driver_h__ #include "common.h" +#include "buffer.h" typedef struct git_diff_driver_registry git_diff_driver_registry; @@ -19,8 +20,8 @@ typedef struct git_diff_driver git_diff_driver; int git_diff_driver_lookup(git_diff_driver **, git_repository *, const char *); void git_diff_driver_free(git_diff_driver *); -/* returns -1 meaning "unknown", 0 meaning not binary, 1 meaning binary */ -int git_diff_driver_is_binary(git_diff_driver *); +/* diff option flags to force off and on for this driver */ +void git_diff_driver_update_options(uint32_t *option_flags, git_diff_driver *); /* returns -1 meaning "unknown", 0 meaning not binary, 1 meaning binary */ int git_diff_driver_content_is_binary( @@ -29,6 +30,20 @@ int git_diff_driver_content_is_binary( typedef long (*git_diff_find_context_fn)( const char *, long, char *, long, void *); -git_diff_find_context_fn git_diff_driver_find_content_fn(git_diff_driver *); +typedef int (*git_diff_find_context_line)( + git_diff_driver *, const char *, long); + +typedef struct { + git_diff_driver *driver; + git_diff_find_context_line match_line; + git_buf line; +} git_diff_find_context_payload; + +void git_diff_find_context_init( + git_diff_find_context_fn *findfn_out, + git_diff_find_context_payload *payload_out, + git_diff_driver *driver); + +void git_diff_find_context_clear(git_diff_find_context_payload *); #endif diff --git a/src/diff_file.c b/src/diff_file.c index e4f8ca1e8..5bdb9e4bf 100644 --- a/src/diff_file.c +++ b/src/diff_file.c @@ -19,14 +19,9 @@ static bool diff_file_content_binary_by_size(git_diff_file_content *fc) { /* if we have diff opts, check max_size vs file size */ if ((fc->file.flags & DIFF_FLAGS_KNOWN_BINARY) == 0 && - fc->opts && fc->opts->max_size >= 0) - { - git_off_t threshold = DIFF_MAX_FILESIZE; - if (fc->opts->max_size > 0) - threshold = fc->opts->max_size; - if (fc->file.size > threshold) - fc->file.flags |= GIT_DIFF_FLAG_BINARY; - } + fc->opts_max_size > 0 && + fc->file.size > fc->opts_max_size) + fc->file.flags |= GIT_DIFF_FLAG_BINARY; return ((fc->file.flags & GIT_DIFF_FLAG_BINARY) != 0); } @@ -44,9 +39,14 @@ static void diff_file_content_binary_by_content(git_diff_file_content *fc) } } -static int diff_file_content_init_common(git_diff_file_content *fc) +static int diff_file_content_init_common( + git_diff_file_content *fc, const git_diff_options *opts) { - uint32_t flags = fc->opts ? fc->opts->flags : GIT_DIFF_NORMAL; + fc->opts_flags = opts ? opts->flags : GIT_DIFF_NORMAL; + + if (opts && opts->max_size >= 0) + fc->opts_max_size = opts->max_size ? + opts->max_size : DIFF_MAX_FILESIZE; if (!fc->driver) { if (git_diff_driver_lookup(&fc->driver, fc->repo, "") < 0) @@ -54,20 +54,22 @@ static int diff_file_content_init_common(git_diff_file_content *fc) fc->src = GIT_ITERATOR_TYPE_TREE; } + /* give driver a chance to modify options */ + git_diff_driver_update_options(&fc->opts_flags, fc->driver); + /* make sure file is conceivable mmap-able */ if ((git_off_t)((size_t)fc->file.size) != fc->file.size) fc->file.flags |= GIT_DIFF_FLAG_BINARY; - - /* check if user is forcing is to text diff the file */ - else if (flags & GIT_DIFF_FORCE_TEXT) + /* check if user is forcing text diff the file */ + else if (fc->opts_flags & GIT_DIFF_FORCE_TEXT) { + fc->file.flags &= ~GIT_DIFF_FLAG_BINARY; fc->file.flags |= GIT_DIFF_FLAG_NOT_BINARY; - - /* otherwise see if diff driver forces a behavior */ - else switch (git_diff_driver_is_binary(fc->driver)) { - case 0: fc->file.flags |= GIT_DIFF_FLAG_NOT_BINARY; break; - case 1: fc->file.flags |= GIT_DIFF_FLAG_BINARY; break; - default: break; - } + } + /* check if user is forcing binary diff the file */ + else if (fc->opts_flags & GIT_DIFF_FORCE_BINARY) { + fc->file.flags &= ~GIT_DIFF_FLAG_NOT_BINARY; + fc->file.flags |= GIT_DIFF_FLAG_BINARY; + } diff_file_content_binary_by_size(fc); @@ -95,7 +97,6 @@ int diff_file_content_init_from_diff( memset(fc, 0, sizeof(*fc)); fc->repo = diff->repo; - fc->opts = &diff->opts; fc->src = use_old ? diff->old_src : diff->new_src; memcpy(&fc->file, file, sizeof(fc->file)); @@ -123,7 +124,7 @@ int diff_file_content_init_from_diff( if (!has_data) fc->file.flags |= GIT_DIFF_FLAG__NO_DATA; - return diff_file_content_init_common(fc); + return diff_file_content_init_common(fc, &diff->opts); } int diff_file_content_init_from_blob( @@ -134,7 +135,6 @@ int diff_file_content_init_from_blob( { memset(fc, 0, sizeof(*fc)); fc->repo = repo; - fc->opts = opts; fc->blob = blob; if (!blob) { @@ -149,7 +149,7 @@ int diff_file_content_init_from_blob( fc->map.data = (char *)git_blob_rawcontent(blob); } - return diff_file_content_init_common(fc); + return diff_file_content_init_common(fc, opts); } int diff_file_content_init_from_raw( @@ -161,7 +161,6 @@ int diff_file_content_init_from_raw( { memset(fc, 0, sizeof(*fc)); fc->repo = repo; - fc->opts = opts; if (!buf) { fc->file.flags |= GIT_DIFF_FLAG__NO_DATA; @@ -175,7 +174,7 @@ int diff_file_content_init_from_raw( fc->map.data = (char *)buf; } - return diff_file_content_init_common(fc); + return diff_file_content_init_common(fc, opts); } static int diff_file_content_commit_to_str( diff --git a/src/diff_file.h b/src/diff_file.h index 51c6878a9..ab7b1dc1f 100644 --- a/src/diff_file.h +++ b/src/diff_file.h @@ -15,9 +15,10 @@ /* expanded information for one side of a delta */ typedef struct { git_repository *repo; - const git_diff_options *opts; git_diff_file file; git_diff_driver *driver; + uint32_t opts_flags; + git_off_t opts_max_size; git_iterator_type_t src; const git_blob *blob; git_map map; diff --git a/src/diff_patch.c b/src/diff_patch.c index d7eb69db6..fe22d678c 100644 --- a/src/diff_patch.c +++ b/src/diff_patch.c @@ -96,8 +96,7 @@ static int diff_patch_load(git_diff_patch *patch, git_diff_output *output) /* if no hunk and data callbacks and user doesn't care if data looks * binary, then there is no need to actually load the data */ - if (patch->ofile.opts && - (patch->ofile.opts->flags & GIT_DIFF_SKIP_BINARY_CHECK) != 0 && + if ((patch->ofile.opts_flags & GIT_DIFF_SKIP_BINARY_CHECK) != 0 && output && !output->hunk_cb && !output->data_cb) return 0; @@ -718,6 +717,6 @@ static void diff_output_init( static void diff_output_to_patch(git_diff_output *out, git_diff_patch *patch) { diff_output_init( - out, patch->ofile.opts, + out, NULL, diff_patch_file_cb, diff_patch_hunk_cb, diff_patch_line_cb, patch); } diff --git a/src/diff_xdiff.c b/src/diff_xdiff.c index 1d1c2d54c..91c56f727 100644 --- a/src/diff_xdiff.c +++ b/src/diff_xdiff.c @@ -109,6 +109,7 @@ static int git_xdiff(git_diff_output *output, git_diff_patch *patch) { git_xdiff_output *xo = (git_xdiff_output *)output; git_xdiff_info info; + git_diff_find_context_payload findctxt; mmfile_t old_xdiff_data, new_xdiff_data; memset(&info, 0, sizeof(info)); @@ -117,15 +118,18 @@ static int git_xdiff(git_diff_output *output, git_diff_patch *patch) xo->callback.priv = &info; - xo->config.find_func_priv = patch->ofile.driver; - xo->config.find_func = patch->ofile.driver ? - git_diff_driver_find_content_fn(patch->ofile.driver) : NULL; + git_diff_find_context_init( + &xo->config.find_func, &findctxt, patch->ofile.driver); + xo->config.find_func_priv = &findctxt; if (xo->config.find_func != NULL) xo->config.flags |= XDL_EMIT_FUNCNAMES; else xo->config.flags &= ~XDL_EMIT_FUNCNAMES; + /* TODO: check ofile.opts_flags to see if driver-specific per-file + * updates are needed to xo->params.flags + */ old_xdiff_data.ptr = patch->ofile.map.data; old_xdiff_data.size = patch->ofile.map.len; @@ -135,6 +139,8 @@ static int git_xdiff(git_diff_output *output, git_diff_patch *patch) xdl_diff(&old_xdiff_data, &new_xdiff_data, &xo->params, &xo->config, &xo->callback); + git_diff_find_context_clear(&findctxt); + return xo->output.error; } diff --git a/tests-clar/diff/patch.c b/tests-clar/diff/patch.c index 6390957c9..3f14a0de7 100644 --- a/tests-clar/diff/patch.c +++ b/tests-clar/diff/patch.c @@ -543,7 +543,7 @@ void test_diff_patch__line_counts_with_eofnl(void) "index 378a7d9..3d0154e 100644\n" "--- a/songof7cities.txt\n" "+++ b/songof7cities.txt\n" - "@@ -42,7 +42,7 @@ With peoples undefeated of the dark, enduring blood\n" + "@@ -42,7 +42,7 @@ With peoples undefeated of the dark, enduring blood.\n" " \n" " To the sound of trumpets shall their seed restore my Cities\n" " Wealthy and well-weaponed, that once more may I behold\n" diff --git a/tests-clar/diff/rename.c b/tests-clar/diff/rename.c index a9f1b4e20..ca3f50676 100644 --- a/tests-clar/diff/rename.c +++ b/tests-clar/diff/rename.c @@ -558,7 +558,7 @@ void test_diff_rename__patch(void) git_diff_patch *patch; const git_diff_delta *delta; char *text; - const char *expected = "diff --git a/sixserving.txt b/ikeepsix.txt\nindex ad0a8e5..36020db 100644\n--- a/sixserving.txt\n+++ b/ikeepsix.txt\n@@ -1,3 +1,6 @@\n+I Keep Six Honest Serving-Men\n+=============================\n+\n I KEEP six honest serving-men\n (They taught me all I knew);\n Their names are What and Why and When\n@@ -21,4 +24,4 @@ She sends'em abroad on her own affairs\n One million Hows, two million Wheres,\n And seven million Whys!\n \n- -- Rudyard Kipling\n+ -- Rudyard Kipling\n"; + const char *expected = "diff --git a/sixserving.txt b/ikeepsix.txt\nindex ad0a8e5..36020db 100644\n--- a/sixserving.txt\n+++ b/ikeepsix.txt\n@@ -1,3 +1,6 @@\n+I Keep Six Honest Serving-Men\n+=============================\n+\n I KEEP six honest serving-men\n (They taught me all I knew);\n Their names are What and Why and When\n@@ -21,4 +24,4 @@ She sends'em abroad on her own affairs,\n One million Hows, two million Wheres,\n And seven million Whys!\n \n- -- Rudyard Kipling\n+ -- Rudyard Kipling\n"; old_tree = resolve_commit_oid_to_tree(g_repo, sha0); new_tree = resolve_commit_oid_to_tree(g_repo, sha1); From 42e6cf7860fba665357a7b1b6a8c5d3f5dc0d634 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Tue, 11 Jun 2013 17:45:14 -0700 Subject: [PATCH 08/12] Add diff drivers tests (and fix bugs) This adds real tests for user-configured diff drivers and in the process found a bunch of bugs. --- src/diff_driver.c | 65 ++++++++++++-------- tests-clar/diff/drivers.c | 126 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+), 25 deletions(-) create mode 100644 tests-clar/diff/drivers.c diff --git a/src/diff_driver.c b/src/diff_driver.c index 9d2508024..9c109e7d7 100644 --- a/src/diff_driver.c +++ b/src/diff_driver.c @@ -38,6 +38,7 @@ struct git_diff_driver { uint32_t other_flags; git_array_t(regex_t) fn_patterns; regex_t word_pattern; + char name[GIT_FLEX_ARRAY]; }; struct git_diff_driver_registry { @@ -132,15 +133,18 @@ static int git_diff_driver_load( int error = 0, bval; git_diff_driver_registry *reg; git_diff_driver *drv; + size_t namelen = strlen(driver_name); + khiter_t pos; git_config *cfg; git_buf name = GIT_BUF_INIT; const char *val; + bool found_driver = false; reg = git_repository_driver_registry(repo); if (!reg) return -1; else { - khiter_t pos = git_strmap_lookup_index(reg->drivers, driver_name); + pos = git_strmap_lookup_index(reg->drivers, driver_name); if (git_strmap_valid_index(reg->drivers, pos)) { *out = git_strmap_value_at(reg->drivers, pos); return 0; @@ -153,9 +157,10 @@ static int git_diff_driver_load( return GIT_ENOTFOUND; } - drv = git__calloc(1, sizeof(git_diff_driver)); + drv = git__calloc(1, sizeof(git_diff_driver) + namelen + 1); GITERR_CHECK_ALLOC(drv); drv->type = DIFF_DRIVER_AUTO; + memcpy(drv->name, driver_name, namelen); if ((error = git_buf_printf(&name, "diff.%s.binary", driver_name)) < 0) goto fail; @@ -176,51 +181,62 @@ static int git_diff_driver_load( /* if diff..binary is false, force binary checks off */ /* but still may have custom function context patterns, etc. */ drv->binary_flags = GIT_DIFF_FORCE_TEXT; + found_driver = true; } /* TODO: warn if diff..command or diff..textconv are set */ - if ((error = git_buf_printf(&name, "diff.%s.xfuncname", driver_name)) < 0) - goto fail; + git_buf_truncate(&name, namelen + strlen("diff..")); + git_buf_put(&name, "xfuncname", strlen("xfuncname")); if ((error = git_config_get_multivar( cfg, name.ptr, NULL, diff_driver_xfuncname, drv)) < 0) { if (error != GIT_ENOTFOUND) goto fail; - /* no diff..xfuncname values, so just continue */ - giterr_clear(); + giterr_clear(); /* no diff..xfuncname, so just continue */ } - if ((error = git_buf_printf(&name, "diff.%s.funcname", driver_name)) < 0) - goto fail; + git_buf_truncate(&name, namelen + strlen("diff..")); + git_buf_put(&name, "funcname", strlen("funcname")); if ((error = git_config_get_multivar( cfg, name.ptr, NULL, diff_driver_funcname, drv)) < 0) { if (error != GIT_ENOTFOUND) goto fail; - /* no diff..funcname values, so just continue */ - giterr_clear(); + giterr_clear(); /* no diff..funcname, so just continue */ } /* if we found any patterns, set driver type to use correct callback */ - if (git_array_size(drv->fn_patterns) > 0) + if (git_array_size(drv->fn_patterns) > 0) { drv->type = DIFF_DRIVER_PATTERNLIST; + found_driver = true; + } - if ((error = git_buf_printf(&name, "diff.%s.wordregex", driver_name)) < 0) - goto fail; + git_buf_truncate(&name, namelen + strlen("diff..")); + git_buf_put(&name, "wordregex", strlen("wordregex")); if ((error = git_config_get_string(&val, cfg, name.ptr)) < 0) { if (error != GIT_ENOTFOUND) goto fail; - /* no diff..wordregex, so just continue */ - giterr_clear(); + giterr_clear(); /* no diff..wordregex, so just continue */ } else if ((error = regcomp(&drv->word_pattern, val, REG_EXTENDED)) != 0) { /* TODO: warning about bad regex instead of failure */ error = giterr_set_regex(&drv->word_pattern, error); goto fail; + } else { + found_driver = true; } /* TODO: look up diff..algorithm to turn on minimal / patience * diff in drv->other_flags */ + /* if no driver config found, fall back on AUTO driver */ + if (!found_driver) + goto fail; + + /* store driver in registry */ + git_strmap_insert(reg->drivers, drv->name, drv, error); + if (error < 0) + goto fail; + *out = drv; return 0; @@ -244,18 +260,15 @@ int git_diff_driver_lookup( if ((error = git_attr_get(&value, repo, 0, path, "diff")) < 0) return error; - if (GIT_ATTR_FALSE(value)) { + if (GIT_ATTR_UNSPECIFIED(value)) + /* just use the auto value */; + else if (GIT_ATTR_FALSE(value)) *out = &global_drivers[DIFF_DRIVER_BINARY]; - return 0; - } - - else if (GIT_ATTR_TRUE(value)) { + else if (GIT_ATTR_TRUE(value)) *out = &global_drivers[DIFF_DRIVER_TEXT]; - return 0; - } /* otherwise look for driver information in config and build driver */ - if ((error = git_diff_driver_load(out, repo, value)) < 0) { + else if ((error = git_diff_driver_load(out, repo, value)) < 0) { if (error != GIT_ENOTFOUND) return error; else @@ -263,7 +276,9 @@ int git_diff_driver_lookup( } use_auto: - *out = &global_drivers[DIFF_DRIVER_AUTO]; + if (!*out) + *out = &global_drivers[DIFF_DRIVER_AUTO]; + return 0; } @@ -326,7 +341,7 @@ static int diff_context_line__pattern_match( GIT_UNUSED(line_len); - for (i = 0; i > git_array_size(driver->fn_patterns); ++i) { + for (i = 0; i < git_array_size(driver->fn_patterns); ++i) { if (!regexec(git_array_get(driver->fn_patterns, i), line, 0, NULL, 0)) return true; } diff --git a/tests-clar/diff/drivers.c b/tests-clar/diff/drivers.c new file mode 100644 index 000000000..8f7b1f21c --- /dev/null +++ b/tests-clar/diff/drivers.c @@ -0,0 +1,126 @@ +#include "clar_libgit2.h" +#include "diff_helpers.h" +#include "repository.h" +#include "diff_driver.h" + +static git_repository *g_repo = NULL; +static git_config *g_cfg = NULL; + +void test_diff_drivers__initialize(void) +{ +} + +void test_diff_drivers__cleanup(void) +{ + git_config_free(g_cfg); + g_cfg = NULL; + + cl_git_sandbox_cleanup(); + g_repo = NULL; +} + +void test_diff_drivers__patterns(void) +{ + const char *one_sha = "19dd32dfb1520a64e5bbaae8dce6ef423dfa2f13"; + git_tree *one; + git_diff_list *diff; + git_diff_patch *patch; + char *text; + const char *expected0 = "diff --git a/untimely.txt b/untimely.txt\nindex 9a69d96..57fd0cf 100644\n--- a/untimely.txt\n+++ b/untimely.txt\n@@ -22,3 +22,5 @@ Comes through the blood of the vanguards who\n dreamed--too soon--it had sounded.\r\n \r\n -- Rudyard Kipling\r\n+\r\n+Some new stuff\r\n"; + const char *expected1 = "diff --git a/untimely.txt b/untimely.txt\nindex 9a69d96..57fd0cf 100644\nBinary files a/untimely.txt and b/untimely.txt differ\n"; + const char *expected2 = "diff --git a/untimely.txt b/untimely.txt\nindex 9a69d96..57fd0cf 100644\n--- a/untimely.txt\n+++ b/untimely.txt\n@@ -22,3 +22,5 @@ Heaven delivers on earth the Hour that cannot be\n dreamed--too soon--it had sounded.\r\n \r\n -- Rudyard Kipling\r\n+\r\n+Some new stuff\r\n"; + + g_repo = cl_git_sandbox_init("renames"); + + one = resolve_commit_oid_to_tree(g_repo, one_sha); + + /* no diff */ + + cl_git_pass(git_diff_tree_to_workdir(&diff, g_repo, one, NULL)); + cl_assert_equal_i(0, (int)git_diff_num_deltas(diff)); + git_diff_list_free(diff); + + /* default diff */ + + cl_git_append2file("renames/untimely.txt", "\r\nSome new stuff\r\n"); + + cl_git_pass(git_diff_tree_to_workdir(&diff, g_repo, one, NULL)); + cl_assert_equal_i(1, (int)git_diff_num_deltas(diff)); + + cl_git_pass(git_diff_get_patch(&patch, NULL, diff, 0)); + cl_git_pass(git_diff_patch_to_str(&text, patch)); + cl_assert_equal_s(expected0, text); + + git__free(text); + git_diff_patch_free(patch); + git_diff_list_free(diff); + + /* attribute diff set to false */ + + cl_git_rewritefile("renames/.gitattributes", "untimely.txt -diff\n"); + + cl_git_pass(git_diff_tree_to_workdir(&diff, g_repo, one, NULL)); + cl_assert_equal_i(1, (int)git_diff_num_deltas(diff)); + + cl_git_pass(git_diff_get_patch(&patch, NULL, diff, 0)); + cl_git_pass(git_diff_patch_to_str(&text, patch)); + cl_assert_equal_s(expected1, text); + + git__free(text); + git_diff_patch_free(patch); + git_diff_list_free(diff); + + /* attribute diff set to unconfigured value (should use default) */ + + cl_git_rewritefile("renames/.gitattributes", "untimely.txt diff=kipling0\n"); + + cl_git_pass(git_diff_tree_to_workdir(&diff, g_repo, one, NULL)); + cl_assert_equal_i(1, (int)git_diff_num_deltas(diff)); + + cl_git_pass(git_diff_get_patch(&patch, NULL, diff, 0)); + cl_git_pass(git_diff_patch_to_str(&text, patch)); + cl_assert_equal_s(expected0, text); + + git__free(text); + git_diff_patch_free(patch); + git_diff_list_free(diff); + + /* let's define that driver */ + + cl_git_pass(git_repository_config(&g_cfg, g_repo)); + cl_git_pass(git_config_set_bool(g_cfg, "diff.kipling0.binary", 1)); + + cl_git_pass(git_diff_tree_to_workdir(&diff, g_repo, one, NULL)); + cl_assert_equal_i(1, (int)git_diff_num_deltas(diff)); + + cl_git_pass(git_diff_get_patch(&patch, NULL, diff, 0)); + cl_git_pass(git_diff_patch_to_str(&text, patch)); + cl_assert_equal_s(expected1, text); + + git__free(text); + git_diff_patch_free(patch); + git_diff_list_free(diff); + + /* let's use a real driver with some regular expressions */ + + git_diff_driver_registry_free(g_repo->diff_drivers); + g_repo->diff_drivers = NULL; + + cl_git_pass(git_repository_config(&g_cfg, g_repo)); + cl_git_pass(git_config_set_bool(g_cfg, "diff.kipling0.binary", 0)); + cl_git_pass(git_config_set_string(g_cfg, "diff.kipling0.xfuncname", "^H")); + + cl_git_pass(git_diff_tree_to_workdir(&diff, g_repo, one, NULL)); + cl_assert_equal_i(1, (int)git_diff_num_deltas(diff)); + + cl_git_pass(git_diff_get_patch(&patch, NULL, diff, 0)); + cl_git_pass(git_diff_patch_to_str(&text, patch)); + cl_assert_equal_s(expected2, text); + + git__free(text); + git_diff_patch_free(patch); + git_diff_list_free(diff); + + git_tree_free(one); +} + From 54faddd299ccb6187a9747c1d3ee18d33e5edf7a Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Wed, 12 Jun 2013 11:54:11 -0700 Subject: [PATCH 09/12] Fix some diff driver memory leaks --- src/diff_driver.c | 36 ++++++++++++++++++++---------------- tests-clar/diff/drivers.c | 17 ++++++++--------- 2 files changed, 28 insertions(+), 25 deletions(-) diff --git a/src/diff_driver.c b/src/diff_driver.c index 9c109e7d7..5d3274a11 100644 --- a/src/diff_driver.c +++ b/src/diff_driver.c @@ -163,10 +163,10 @@ static int git_diff_driver_load( memcpy(drv->name, driver_name, namelen); if ((error = git_buf_printf(&name, "diff.%s.binary", driver_name)) < 0) - goto fail; + goto done; if ((error = git_config_get_string(&val, cfg, name.ptr)) < 0) { if (error != GIT_ENOTFOUND) - goto fail; + goto done; /* diff..binary unspecified, so just continue */ giterr_clear(); } else if (git_config_parse_bool(&bval, val) < 0) { @@ -174,9 +174,8 @@ static int git_diff_driver_load( giterr_clear(); } else if (bval) { /* if diff..binary is true, just return the binary driver */ - git__free(drv); *out = &global_drivers[DIFF_DRIVER_BINARY]; - return 0; + goto done; } else { /* if diff..binary is false, force binary checks off */ /* but still may have custom function context patterns, etc. */ @@ -191,7 +190,7 @@ static int git_diff_driver_load( if ((error = git_config_get_multivar( cfg, name.ptr, NULL, diff_driver_xfuncname, drv)) < 0) { if (error != GIT_ENOTFOUND) - goto fail; + goto done; giterr_clear(); /* no diff..xfuncname, so just continue */ } @@ -200,7 +199,7 @@ static int git_diff_driver_load( if ((error = git_config_get_multivar( cfg, name.ptr, NULL, diff_driver_funcname, drv)) < 0) { if (error != GIT_ENOTFOUND) - goto fail; + goto done; giterr_clear(); /* no diff..funcname, so just continue */ } @@ -214,12 +213,12 @@ static int git_diff_driver_load( git_buf_put(&name, "wordregex", strlen("wordregex")); if ((error = git_config_get_string(&val, cfg, name.ptr)) < 0) { if (error != GIT_ENOTFOUND) - goto fail; + goto done; giterr_clear(); /* no diff..wordregex, so just continue */ } else if ((error = regcomp(&drv->word_pattern, val, REG_EXTENDED)) != 0) { /* TODO: warning about bad regex instead of failure */ error = giterr_set_regex(&drv->word_pattern, error); - goto fail; + goto done; } else { found_driver = true; } @@ -228,21 +227,26 @@ static int git_diff_driver_load( * diff in drv->other_flags */ - /* if no driver config found, fall back on AUTO driver */ + /* if no driver config found at all, fall back on AUTO driver */ if (!found_driver) - goto fail; + goto done; /* store driver in registry */ git_strmap_insert(reg->drivers, drv->name, drv, error); if (error < 0) - goto fail; + goto done; *out = drv; - return 0; -fail: - git_diff_driver_free(drv); - *out = &global_drivers[DIFF_DRIVER_AUTO]; +done: + git_buf_free(&name); + + if (!*out) + *out = &global_drivers[DIFF_DRIVER_AUTO]; + + if (drv && drv != *out) + git_diff_driver_free(drv); + return error; } @@ -289,7 +293,7 @@ void git_diff_driver_free(git_diff_driver *driver) if (!driver) return; - for (i = 0; i > git_array_size(driver->fn_patterns); ++i) + for (i = 0; i < git_array_size(driver->fn_patterns); ++i) regfree(git_array_get(driver->fn_patterns, i)); git_array_clear(driver->fn_patterns); diff --git a/tests-clar/diff/drivers.c b/tests-clar/diff/drivers.c index 8f7b1f21c..06ab2ff14 100644 --- a/tests-clar/diff/drivers.c +++ b/tests-clar/diff/drivers.c @@ -4,7 +4,6 @@ #include "diff_driver.h" static git_repository *g_repo = NULL; -static git_config *g_cfg = NULL; void test_diff_drivers__initialize(void) { @@ -12,15 +11,13 @@ void test_diff_drivers__initialize(void) void test_diff_drivers__cleanup(void) { - git_config_free(g_cfg); - g_cfg = NULL; - cl_git_sandbox_cleanup(); g_repo = NULL; } void test_diff_drivers__patterns(void) { + git_config *cfg; const char *one_sha = "19dd32dfb1520a64e5bbaae8dce6ef423dfa2f13"; git_tree *one; git_diff_list *diff; @@ -87,8 +84,9 @@ void test_diff_drivers__patterns(void) /* let's define that driver */ - cl_git_pass(git_repository_config(&g_cfg, g_repo)); - cl_git_pass(git_config_set_bool(g_cfg, "diff.kipling0.binary", 1)); + cl_git_pass(git_repository_config(&cfg, g_repo)); + cl_git_pass(git_config_set_bool(cfg, "diff.kipling0.binary", 1)); + git_config_free(cfg); cl_git_pass(git_diff_tree_to_workdir(&diff, g_repo, one, NULL)); cl_assert_equal_i(1, (int)git_diff_num_deltas(diff)); @@ -106,9 +104,10 @@ void test_diff_drivers__patterns(void) git_diff_driver_registry_free(g_repo->diff_drivers); g_repo->diff_drivers = NULL; - cl_git_pass(git_repository_config(&g_cfg, g_repo)); - cl_git_pass(git_config_set_bool(g_cfg, "diff.kipling0.binary", 0)); - cl_git_pass(git_config_set_string(g_cfg, "diff.kipling0.xfuncname", "^H")); + cl_git_pass(git_repository_config(&cfg, g_repo)); + cl_git_pass(git_config_set_bool(cfg, "diff.kipling0.binary", 0)); + cl_git_pass(git_config_set_string(cfg, "diff.kipling0.xfuncname", "^H")); + git_config_free(cfg); cl_git_pass(git_diff_tree_to_workdir(&diff, g_repo, one, NULL)); cl_assert_equal_i(1, (int)git_diff_num_deltas(diff)); From f9c824c592d7a23f7cc385c25c95a5d0c5c8687e Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Wed, 12 Jun 2013 11:55:27 -0700 Subject: [PATCH 10/12] Add patch from blobs API This adds two new public APIs: git_diff_patch_from_blobs and git_diff_patch_from_blob_and_buffer, plus it refactors the code for git_diff_blobs and git_diff_blob_to_buffer so that they code is almost entirely shared between these APIs, and adds tests for the new APIs. --- include/git2/diff.h | 47 +++++++- src/diff_patch.c | 207 +++++++++++++++++++++++++---------- tests-clar/diff/blob.c | 238 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 434 insertions(+), 58 deletions(-) diff --git a/include/git2/diff.h b/include/git2/diff.h index 40e65b1e4..8113a56be 100644 --- a/include/git2/diff.h +++ b/include/git2/diff.h @@ -860,7 +860,7 @@ GIT_EXTERN(size_t) git_diff_patch_num_hunks( * @param total_additions Count of addition lines in output, can be NULL. * @param total_deletions Count of deletion lines in output, can be NULL. * @param patch The git_diff_patch object - * @return Number of lines in hunk or -1 if invalid hunk index + * @return 0 on success, <0 on error */ GIT_EXTERN(int) git_diff_patch_line_stats( size_t *total_context, @@ -1000,6 +1000,26 @@ GIT_EXTERN(int) git_diff_blobs( git_diff_data_cb line_cb, void *payload); +/** + * Directly generate a patch from the difference between two blobs. + * + * This is just like `git_diff_blobs()` except it generates a patch object + * for the difference instead of directly making callbacks. You can use the + * standard `git_diff_patch` accessor functions to read the patch data, and + * you must call `git_diff_patch_free()` on the patch when done. + * + * @param out The generated patch; NULL on error + * @param old_blob Blob for old side of diff, or NULL for empty blob + * @param new_blob Blob for new side of diff, or NULL for empty blob + * @param options Options for diff, or NULL for default options + * @return 0 on success or error code < 0 + */ +GIT_EXTERN(int) git_diff_patch_from_blobs( + git_diff_patch **out, + const git_blob *old_blob, + const git_blob *new_blob, + const git_diff_options *opts); + /** * Directly run a diff between a blob and a buffer. * @@ -1013,7 +1033,7 @@ GIT_EXTERN(int) git_diff_blobs( * the reverse, with GIT_DELTA_REMOVED and blob content removed. * * @param old_blob Blob for old side of diff, or NULL for empty blob - * @param buffer Raw data for new side of diff + * @param buffer Raw data for new side of diff, or NULL for empty * @param buffer_len Length of raw data for new side of diff * @param options Options for diff, or NULL for default options * @param file_cb Callback for "file"; made once if there is a diff; can be NULL @@ -1032,6 +1052,29 @@ GIT_EXTERN(int) git_diff_blob_to_buffer( git_diff_data_cb data_cb, void *payload); +/** + * Directly generate a patch from the difference between a blob and a buffer. + * + * This is just like `git_diff_blob_to_buffer()` except it generates a patch + * object for the difference instead of directly making callbacks. You can + * use the standard `git_diff_patch` accessor functions to read the patch + * data, and you must call `git_diff_patch_free()` on the patch when done. + * + * @param out The generated patch; NULL on error + * @param old_blob Blob for old side of diff, or NULL for empty blob + * @param buffer Raw data for new side of diff, or NULL for empty + * @param buffer_len Length of raw data for new side of diff + * @param options Options for diff, or NULL for default options + * @return 0 on success or error code < 0 + */ +GIT_EXTERN(int) git_diff_patch_from_blob_and_buffer( + git_diff_patch **out, + const git_blob *old_blob, + const char *buf, + size_t buflen, + const git_diff_options *opts); + + GIT_END_DECL /** @} */ diff --git a/src/diff_patch.c b/src/diff_patch.c index fe22d678c..4c0b9a70c 100644 --- a/src/diff_patch.c +++ b/src/diff_patch.c @@ -265,33 +265,32 @@ int git_diff_foreach( } typedef struct { - git_xdiff_output xo; git_diff_patch patch; git_diff_delta delta; -} diff_single_info; +} diff_patch_with_delta; -static int diff_single_generate(diff_single_info *info) +static int diff_single_generate(diff_patch_with_delta *pd, git_xdiff_output *xo) { int error = 0; - git_diff_patch *patch = &info->patch; + git_diff_patch *patch = &pd->patch; bool has_old = ((patch->ofile.file.flags & GIT_DIFF_FLAG__NO_DATA) == 0); bool has_new = ((patch->nfile.file.flags & GIT_DIFF_FLAG__NO_DATA) == 0); - info->delta.status = has_new ? + pd->delta.status = has_new ? (has_old ? GIT_DELTA_MODIFIED : GIT_DELTA_ADDED) : (has_old ? GIT_DELTA_DELETED : GIT_DELTA_UNTRACKED); if (git_oid_equal(&patch->nfile.file.oid, &patch->ofile.file.oid)) - info->delta.status = GIT_DELTA_UNMODIFIED; + pd->delta.status = GIT_DELTA_UNMODIFIED; - patch->delta = &info->delta; + patch->delta = &pd->delta; diff_patch_init_common(patch); - error = diff_patch_file_callback(patch, (git_diff_output *)&info->xo); + error = diff_patch_file_callback(patch, (git_diff_output *)xo); if (!error) - error = diff_patch_generate(patch, (git_diff_output *)&info->xo); + error = diff_patch_generate(patch, (git_diff_output *)xo); if (error == GIT_EUSER) giterr_clear(); /* don't leave error message set invalidly */ @@ -299,6 +298,40 @@ static int diff_single_generate(diff_single_info *info) return error; } +static int diff_patch_from_blobs( + diff_patch_with_delta *pd, + git_xdiff_output *xo, + const git_blob *old_blob, + const git_blob *new_blob, + const git_diff_options *opts) +{ + int error = 0; + git_repository *repo = + new_blob ? git_object_owner((const git_object *)new_blob) : + old_blob ? git_object_owner((const git_object *)old_blob) : NULL; + + GITERR_CHECK_VERSION(opts, GIT_DIFF_OPTIONS_VERSION, "git_diff_options"); + + pd->patch.delta = &pd->delta; + + if (!repo) /* return two NULL items as UNMODIFIED delta */ + return 0; + + if (opts && (opts->flags & GIT_DIFF_REVERSE) != 0) { + const git_blob *swap = old_blob; + old_blob = new_blob; + new_blob = swap; + } + + if ((error = diff_file_content_init_from_blob( + &pd->patch.ofile, repo, opts, old_blob)) < 0 || + (error = diff_file_content_init_from_blob( + &pd->patch.nfile, repo, opts, new_blob)) < 0) + return error; + + return diff_single_generate(pd, xo); +} + int git_diff_blobs( const git_blob *old_blob, const git_blob *new_blob, @@ -309,37 +342,85 @@ int git_diff_blobs( void *payload) { int error = 0; - diff_single_info info; + diff_patch_with_delta pd; + git_xdiff_output xo; + + memset(&pd, 0, sizeof(pd)); + memset(&xo, 0, sizeof(xo)); + + diff_output_init( + (git_diff_output *)&xo, opts, file_cb, hunk_cb, data_cb, payload); + git_xdiff_init(&xo, opts); + + error = diff_patch_from_blobs(&pd, &xo, old_blob, new_blob, opts); + + git_diff_patch_free((git_diff_patch *)&pd); + + return error; +} + +int git_diff_patch_from_blobs( + git_diff_patch **out, + const git_blob *old_blob, + const git_blob *new_blob, + const git_diff_options *opts) +{ + int error = 0; + diff_patch_with_delta *pd; + git_xdiff_output xo; + + assert(out); + *out = NULL; + + pd = git__calloc(1, sizeof(*pd)); + GITERR_CHECK_ALLOC(pd); + pd->patch.flags = GIT_DIFF_PATCH_ALLOCATED; + + memset(&xo, 0, sizeof(xo)); + + diff_output_to_patch((git_diff_output *)&xo, &pd->patch); + git_xdiff_init(&xo, opts); + + if (!(error = diff_patch_from_blobs(pd, &xo, old_blob, new_blob, opts))) + *out = (git_diff_patch *)pd; + else + git_diff_patch_free((git_diff_patch *)pd); + + return error; +} + +static int diff_patch_from_blob_and_buffer( + diff_patch_with_delta *pd, + git_xdiff_output *xo, + const git_blob *old_blob, + const char *buf, + size_t buflen, + const git_diff_options *opts) +{ + int error = 0; git_repository *repo = - new_blob ? git_object_owner((const git_object *)new_blob) : old_blob ? git_object_owner((const git_object *)old_blob) : NULL; GITERR_CHECK_VERSION(opts, GIT_DIFF_OPTIONS_VERSION, "git_diff_options"); - if (!repo) /* Hmm, given two NULL blobs, silently do no callbacks? */ + pd->patch.delta = &pd->delta; + + if (!repo && !buf) /* return two NULL items as UNMODIFIED delta */ return 0; if (opts && (opts->flags & GIT_DIFF_REVERSE) != 0) { - const git_blob *swap = old_blob; - old_blob = new_blob; - new_blob = swap; + if (!(error = diff_file_content_init_from_raw( + &pd->patch.ofile, repo, opts, buf, buflen))) + error = diff_file_content_init_from_blob( + &pd->patch.nfile, repo, opts, old_blob); + } else { + if (!(error = diff_file_content_init_from_blob( + &pd->patch.ofile, repo, opts, old_blob))) + error = diff_file_content_init_from_raw( + &pd->patch.nfile, repo, opts, buf, buflen); } - memset(&info, 0, sizeof(info)); - - diff_output_init((git_diff_output *)&info.xo, - opts, file_cb, hunk_cb, data_cb, payload); - git_xdiff_init(&info.xo, opts); - - if (!(error = diff_file_content_init_from_blob( - &info.patch.ofile, repo, opts, old_blob)) && - !(error = diff_file_content_init_from_blob( - &info.patch.nfile, repo, opts, new_blob))) - error = diff_single_generate(&info); - - git_diff_patch_free(&info.patch); - - return error; + return diff_single_generate(pd, xo); } int git_diff_blob_to_buffer( @@ -353,36 +434,52 @@ int git_diff_blob_to_buffer( void *payload) { int error = 0; - diff_single_info info; - git_repository *repo = - old_blob ? git_object_owner((const git_object *)old_blob) : NULL; + diff_patch_with_delta pd; + git_xdiff_output xo; - GITERR_CHECK_VERSION(opts, GIT_DIFF_OPTIONS_VERSION, "git_diff_options"); + memset(&pd, 0, sizeof(pd)); + memset(&xo, 0, sizeof(xo)); - if (!repo && !buf) /* Hmm, given NULLs, silently do no callbacks? */ - return 0; + diff_output_init( + (git_diff_output *)&xo, opts, file_cb, hunk_cb, data_cb, payload); + git_xdiff_init(&xo, opts); - memset(&info, 0, sizeof(info)); + error = diff_patch_from_blob_and_buffer( + &pd, &xo, old_blob, buf, buflen, opts); - diff_output_init((git_diff_output *)&info.xo, - opts, file_cb, hunk_cb, data_cb, payload); - git_xdiff_init(&info.xo, opts); + git_diff_patch_free((git_diff_patch *)&pd); - if (opts && (opts->flags & GIT_DIFF_REVERSE) != 0) { - if (!(error = diff_file_content_init_from_raw( - &info.patch.ofile, repo, opts, buf, buflen))) - error = diff_file_content_init_from_blob( - &info.patch.nfile, repo, opts, old_blob); - } else { - if (!(error = diff_file_content_init_from_blob( - &info.patch.ofile, repo, opts, old_blob))) - error = diff_file_content_init_from_raw( - &info.patch.nfile, repo, opts, buf, buflen); - } + return error; +} - error = diff_single_generate(&info); +int git_diff_patch_from_blob_and_buffer( + git_diff_patch **out, + const git_blob *old_blob, + const char *buf, + size_t buflen, + const git_diff_options *opts) +{ + int error = 0; + diff_patch_with_delta *pd; + git_xdiff_output xo; - git_diff_patch_free(&info.patch); + assert(out); + *out = NULL; + + pd = git__calloc(1, sizeof(*pd)); + GITERR_CHECK_ALLOC(pd); + pd->patch.flags = GIT_DIFF_PATCH_ALLOCATED; + + memset(&xo, 0, sizeof(xo)); + + diff_output_to_patch((git_diff_output *)&xo, &pd->patch); + git_xdiff_init(&xo, opts); + + if (!(error = diff_patch_from_blob_and_buffer( + pd, &xo, old_blob, buf, buflen, opts))) + *out = (git_diff_patch *)pd; + else + git_diff_patch_free((git_diff_patch *)pd); return error; } @@ -599,9 +696,7 @@ static int diff_patch_file_cb( float progress, void *payload) { - GIT_UNUSED(delta); - GIT_UNUSED(progress); - GIT_UNUSED(payload); + GIT_UNUSED(delta); GIT_UNUSED(progress); GIT_UNUSED(payload); return 0; } diff --git a/tests-clar/diff/blob.c b/tests-clar/diff/blob.c index 2ac8dbc51..b12186d98 100644 --- a/tests-clar/diff/blob.c +++ b/tests-clar/diff/blob.c @@ -120,6 +120,93 @@ void test_diff_blob__can_compare_text_blobs(void) git_blob_free(c); } +void test_diff_blob__can_compare_text_blobs_with_patch(void) +{ + git_blob *a, *b, *c; + git_oid a_oid, b_oid, c_oid; + git_diff_patch *p; + size_t tc, ta, td; + + /* tests/resources/attr/root_test1 */ + cl_git_pass(git_oid_fromstrn(&a_oid, "45141a79", 8)); + cl_git_pass(git_blob_lookup_prefix(&a, g_repo, &a_oid, 4)); + + /* tests/resources/attr/root_test2 */ + cl_git_pass(git_oid_fromstrn(&b_oid, "4d713dc4", 8)); + cl_git_pass(git_blob_lookup_prefix(&b, g_repo, &b_oid, 4)); + + /* tests/resources/attr/root_test3 */ + cl_git_pass(git_oid_fromstrn(&c_oid, "c96bbb2c2557a832", 16)); + cl_git_pass(git_blob_lookup_prefix(&c, g_repo, &c_oid, 8)); + + /* Doing the equivalent of a `git diff -U1` on these files */ + + /* diff on tests/resources/attr/root_test1 */ + cl_git_pass(git_diff_patch_from_blobs(&p, a, b, &opts)); + + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_MODIFIED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(1, (int)git_diff_patch_num_hunks(p)); + cl_assert_equal_i(6, git_diff_patch_num_lines_in_hunk(p, 0)); + + cl_git_pass(git_diff_patch_line_stats(&tc, &ta, &td, p)); + cl_assert_equal_i(1, (int)tc); + cl_assert_equal_i(5, (int)ta); + cl_assert_equal_i(0, (int)td); + + git_diff_patch_free(p); + + /* diff on tests/resources/attr/root_test2 */ + cl_git_pass(git_diff_patch_from_blobs(&p, b, c, &opts)); + + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_MODIFIED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(1, (int)git_diff_patch_num_hunks(p)); + cl_assert_equal_i(15, git_diff_patch_num_lines_in_hunk(p, 0)); + + cl_git_pass(git_diff_patch_line_stats(&tc, &ta, &td, p)); + cl_assert_equal_i(3, (int)tc); + cl_assert_equal_i(9, (int)ta); + cl_assert_equal_i(3, (int)td); + + git_diff_patch_free(p); + + /* diff on tests/resources/attr/root_test3 */ + cl_git_pass(git_diff_patch_from_blobs(&p, a, c, &opts)); + + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_MODIFIED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(1, (int)git_diff_patch_num_hunks(p)); + cl_assert_equal_i(13, git_diff_patch_num_lines_in_hunk(p, 0)); + + cl_git_pass(git_diff_patch_line_stats(&tc, &ta, &td, p)); + cl_assert_equal_i(0, (int)tc); + cl_assert_equal_i(12, (int)ta); + cl_assert_equal_i(1, (int)td); + + git_diff_patch_free(p); + + /* one more */ + cl_git_pass(git_diff_patch_from_blobs(&p, c, d, &opts)); + + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_MODIFIED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(2, (int)git_diff_patch_num_hunks(p)); + cl_assert_equal_i(5, git_diff_patch_num_lines_in_hunk(p, 0)); + cl_assert_equal_i(9, git_diff_patch_num_lines_in_hunk(p, 1)); + + cl_git_pass(git_diff_patch_line_stats(&tc, &ta, &td, p)); + cl_assert_equal_i(4, (int)tc); + cl_assert_equal_i(6, (int)ta); + cl_assert_equal_i(4, (int)td); + + git_diff_patch_free(p); + + git_blob_free(a); + git_blob_free(b); + git_blob_free(c); +} + void test_diff_blob__can_compare_against_null_blobs(void) { git_blob *e = NULL; @@ -175,6 +262,66 @@ void test_diff_blob__can_compare_against_null_blobs(void) cl_assert_equal_i(0, expected.lines); } +void test_diff_blob__can_compare_against_null_blobs_with_patch(void) +{ + git_blob *e = NULL; + git_diff_patch *p; + int line; + char origin; + + cl_git_pass(git_diff_patch_from_blobs(&p, d, e, &opts)); + + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_DELETED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(1, (int)git_diff_patch_num_hunks(p)); + cl_assert_equal_i(14, git_diff_patch_num_lines_in_hunk(p, 0)); + + for (line = 0; line < git_diff_patch_num_lines_in_hunk(p, 0); ++line) { + cl_git_pass(git_diff_patch_get_line_in_hunk( + &origin, NULL, NULL, NULL, NULL, p, 0, line)); + cl_assert_equal_i(GIT_DIFF_LINE_DELETION, (int)origin); + } + + git_diff_patch_free(p); + + opts.flags |= GIT_DIFF_REVERSE; + + cl_git_pass(git_diff_patch_from_blobs(&p, d, e, &opts)); + + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_ADDED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(1, (int)git_diff_patch_num_hunks(p)); + cl_assert_equal_i(14, git_diff_patch_num_lines_in_hunk(p, 0)); + + for (line = 0; line < git_diff_patch_num_lines_in_hunk(p, 0); ++line) { + cl_git_pass(git_diff_patch_get_line_in_hunk( + &origin, NULL, NULL, NULL, NULL, p, 0, line)); + cl_assert_equal_i(GIT_DIFF_LINE_ADDITION, (int)origin); + } + + git_diff_patch_free(p); + + opts.flags ^= GIT_DIFF_REVERSE; + + cl_git_pass(git_diff_patch_from_blobs(&p, alien, NULL, &opts)); + + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_DELETED, git_diff_patch_delta(p)->status); + cl_assert((git_diff_patch_delta(p)->flags & GIT_DIFF_FLAG_BINARY) != 0); + cl_assert_equal_i(0, (int)git_diff_patch_num_hunks(p)); + + git_diff_patch_free(p); + + cl_git_pass(git_diff_patch_from_blobs(&p, NULL, alien, &opts)); + + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_ADDED, git_diff_patch_delta(p)->status); + cl_assert((git_diff_patch_delta(p)->flags & GIT_DIFF_FLAG_BINARY) != 0); + cl_assert_equal_i(0, (int)git_diff_patch_num_hunks(p)); + + git_diff_patch_free(p); +} + static void assert_identical_blobs_comparison(diff_expects *expected) { cl_assert_equal_i(1, expected->files); @@ -206,6 +353,29 @@ void test_diff_blob__can_compare_identical_blobs(void) assert_identical_blobs_comparison(&expected); } +void test_diff_blob__can_compare_identical_blobs_with_patch(void) +{ + git_diff_patch *p; + + cl_git_pass(git_diff_patch_from_blobs(&p, d, d, &opts)); + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_UNMODIFIED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(0, (int)git_diff_patch_num_hunks(p)); + git_diff_patch_free(p); + + cl_git_pass(git_diff_patch_from_blobs(&p, NULL, NULL, &opts)); + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_UNMODIFIED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(0, (int)git_diff_patch_num_hunks(p)); + git_diff_patch_free(p); + + cl_git_pass(git_diff_patch_from_blobs(&p, alien, alien, &opts)); + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_UNMODIFIED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(0, (int)git_diff_patch_num_hunks(p)); + git_diff_patch_free(p); +} + static void assert_binary_blobs_comparison(diff_expects *expected) { cl_assert(expected->files_binary > 0); @@ -428,6 +598,74 @@ void test_diff_blob__can_compare_blob_to_buffer(void) git_blob_free(a); } +void test_diff_blob__can_compare_blob_to_buffer_with_patch(void) +{ + git_diff_patch *p; + git_blob *a; + git_oid a_oid; + const char *a_content = "Hello from the root\n"; + const char *b_content = "Hello from the root\n\nSome additional lines\n\nDown here below\n\n"; + size_t tc, ta, td; + + /* tests/resources/attr/root_test1 */ + cl_git_pass(git_oid_fromstrn(&a_oid, "45141a79", 8)); + cl_git_pass(git_blob_lookup_prefix(&a, g_repo, &a_oid, 4)); + + /* diff from blob a to content of b */ + cl_git_pass(git_diff_patch_from_blob_and_buffer( + &p, a, b_content, strlen(b_content), &opts)); + + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_MODIFIED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(1, (int)git_diff_patch_num_hunks(p)); + cl_assert_equal_i(6, git_diff_patch_num_lines_in_hunk(p, 0)); + + cl_git_pass(git_diff_patch_line_stats(&tc, &ta, &td, p)); + cl_assert_equal_i(1, (int)tc); + cl_assert_equal_i(5, (int)ta); + cl_assert_equal_i(0, (int)td); + + git_diff_patch_free(p); + + /* diff from blob a to content of a */ + cl_git_pass(git_diff_patch_from_blob_and_buffer( + &p, a, a_content, strlen(a_content), &opts)); + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_UNMODIFIED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(0, (int)git_diff_patch_num_hunks(p)); + git_diff_patch_free(p); + + /* diff from NULL blob to content of a */ + cl_git_pass(git_diff_patch_from_blob_and_buffer( + &p, NULL, a_content, strlen(a_content), &opts)); + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_ADDED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(1, (int)git_diff_patch_num_hunks(p)); + cl_assert_equal_i(1, git_diff_patch_num_lines_in_hunk(p, 0)); + git_diff_patch_free(p); + + /* diff from blob a to NULL buffer */ + cl_git_pass(git_diff_patch_from_blob_and_buffer( + &p, a, NULL, 0, &opts)); + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_DELETED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(1, (int)git_diff_patch_num_hunks(p)); + cl_assert_equal_i(1, git_diff_patch_num_lines_in_hunk(p, 0)); + git_diff_patch_free(p); + + /* diff with reverse */ + opts.flags ^= GIT_DIFF_REVERSE; + + cl_git_pass(git_diff_patch_from_blob_and_buffer( + &p, a, NULL, 0, &opts)); + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_ADDED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(1, (int)git_diff_patch_num_hunks(p)); + cl_assert_equal_i(1, git_diff_patch_num_lines_in_hunk(p, 0)); + git_diff_patch_free(p); + + git_blob_free(a); +} static void assert_one_modified_with_lines(diff_expects *expected, int lines) { From ef3374a8a81786a7b544ed7eded53c95766eb02f Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Wed, 12 Jun 2013 13:46:44 -0700 Subject: [PATCH 11/12] Improvements to git_array This changes the size data to uint32_t, fixes the array growth logic to use a simple 1.5x multiplier, and uses a generic inline function for growing the array to make the git_array_alloc API feel more natural (i.e. it returns a pointer to the new item). --- src/array.h | 51 +++++++++++++++++++++++++++++++++++------------ src/diff_driver.c | 2 +- src/diff_patch.c | 4 ++-- 3 files changed, 41 insertions(+), 16 deletions(-) diff --git a/src/array.h b/src/array.h index aadd021f1..2d77c71a0 100644 --- a/src/array.h +++ b/src/array.h @@ -9,7 +9,23 @@ #include "util.h" -#define git_array_t(type) struct { type *ptr; size_t size, asize; } +/* + * Use this to declare a typesafe resizable array of items, a la: + * + * git_array_t(int) my_ints = GIT_ARRAY_INIT; + * ... + * int *i = git_array_alloc(my_ints); + * GITERR_CHECK_ALLOC(i); + * ... + * git_array_clear(my_ints); + * + * You may also want to do things like: + * + * typedef git_array_t(my_struct) my_struct_array_t; + */ +#define git_array_t(type) struct { type *ptr; uint32_t size, asize; } + +#define GIT_ARRAY_INIT { NULL, 0, 0 } #define git_array_init(a) \ do { (a).size = (a).asize = 0; (a).ptr = NULL; } while (0) @@ -17,20 +33,29 @@ #define git_array_clear(a) \ do { git__free((a).ptr); git_array_init(a); } while (0) -#define git_array_grow(a) do { \ - void *new_array; size_t new_size = \ - ((a).asize >= 256) ? (a).asize + 256 : ((a).asize >= 8) ? (a).asize * 2 : 8; \ - new_array = git__realloc((a).ptr, new_size * sizeof(*(a).ptr)); \ - if (!new_array) { git_array_clear(a); } \ - else { (a).ptr = new_array; (a).asize = new_size; } \ - } while (0) - #define GITERR_CHECK_ARRAY(a) GITERR_CHECK_ALLOC((a).ptr) -#define git_array_alloc(a, el) do { \ - if ((a).size >= (a).asize) git_array_grow(a); \ - (el) = (a).ptr ? &(a).ptr[(a).size++] : NULL; \ - } while (0) + +typedef git_array_t(void) git_array_generic_t; + +/* use a generic array for growth so this can return the new item */ +GIT_INLINE(void *) git_array_grow(git_array_generic_t *a, size_t item_size) +{ + uint32_t new_size = (a->size < 8) ? 8 : a->asize * 3 / 2; + void *new_array = git__realloc(a->ptr, new_size * item_size); + if (!new_array) { + git_array_clear(*a); + return NULL; + } else { + a->ptr = new_array; a->asize = new_size; a->size++; + return (((char *)a->ptr) + (a->size - 1) * item_size); + } +} + +#define git_array_alloc(a) \ + ((a).size >= (a).asize) ? \ + git_array_grow((git_array_generic_t *)&(a), sizeof(*(a).ptr)) : \ + (a).ptr ? &(a).ptr[(a).size++] : NULL #define git_array_last(a) ((a).size ? &(a).ptr[(a).size - 1] : NULL) diff --git a/src/diff_driver.c b/src/diff_driver.c index 5d3274a11..ae2b7c319 100644 --- a/src/diff_driver.c +++ b/src/diff_driver.c @@ -93,7 +93,7 @@ static int diff_driver_add_funcname( return error; } - git_array_alloc(drv->fn_patterns, re_ptr); + re_ptr = git_array_alloc(drv->fn_patterns); GITERR_CHECK_ALLOC(re_ptr); memcpy(re_ptr, &re, sizeof(re)); diff --git a/src/diff_patch.c b/src/diff_patch.c index 4c0b9a70c..05dee5ef7 100644 --- a/src/diff_patch.c +++ b/src/diff_patch.c @@ -712,7 +712,7 @@ static int diff_patch_hunk_cb( GIT_UNUSED(delta); - git_array_alloc(patch->hunks, hunk); + hunk = git_array_alloc(patch->hunks); GITERR_CHECK_ALLOC(hunk); memcpy(&hunk->range, range, sizeof(hunk->range)); @@ -749,7 +749,7 @@ static int diff_patch_line_cb( hunk = git_array_last(patch->hunks); GITERR_CHECK_ALLOC(hunk); - git_array_alloc(patch->lines, line); + line = git_array_alloc(patch->lines); GITERR_CHECK_ALLOC(line); line->ptr = content; From 360f42f4b3f5de31270416220bd799b951202b2d Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Wed, 12 Jun 2013 14:18:09 -0700 Subject: [PATCH 12/12] Fix diff header naming issues This makes the git_diff_patch definition private to diff_patch.c and fixes a number of other header file naming inconsistencies to use `git_` prefixes on functions and structures that are shared between files. --- src/diff_file.c | 14 ++--- src/diff_file.h | 12 ++--- src/diff_patch.c | 133 ++++++++++++++++++++++++++++++++++++++++++----- src/diff_patch.h | 49 ++++------------- src/diff_print.c | 22 ++------ src/diff_xdiff.c | 19 ++++--- 6 files changed, 154 insertions(+), 95 deletions(-) diff --git a/src/diff_file.c b/src/diff_file.c index 5bdb9e4bf..4fd1177ae 100644 --- a/src/diff_file.c +++ b/src/diff_file.c @@ -85,7 +85,7 @@ static int diff_file_content_init_common( return 0; } -int diff_file_content_init_from_diff( +int git_diff_file_content__init_from_diff( git_diff_file_content *fc, git_diff_list *diff, size_t delta_index, @@ -127,7 +127,7 @@ int diff_file_content_init_from_diff( return diff_file_content_init_common(fc, &diff->opts); } -int diff_file_content_init_from_blob( +int git_diff_file_content__init_from_blob( git_diff_file_content *fc, git_repository *repo, const git_diff_options *opts, @@ -152,7 +152,7 @@ int diff_file_content_init_from_blob( return diff_file_content_init_common(fc, opts); } -int diff_file_content_init_from_raw( +int git_diff_file_content__init_from_raw( git_diff_file_content *fc, git_repository *repo, const git_diff_options *opts, @@ -385,7 +385,7 @@ static int diff_file_content_load_workdir(git_diff_file_content *fc) return error; } -int diff_file_content_load(git_diff_file_content *fc) +int git_diff_file_content__load(git_diff_file_content *fc) { int error = 0; @@ -409,7 +409,7 @@ int diff_file_content_load(git_diff_file_content *fc) return 0; } -void diff_file_content_unload(git_diff_file_content *fc) +void git_diff_file_content__unload(git_diff_file_content *fc) { if (fc->file.flags & GIT_DIFF_FLAG__FREE_DATA) { git__free(fc->map.data); @@ -433,9 +433,9 @@ void diff_file_content_unload(git_diff_file_content *fc) fc->file.flags &= ~GIT_DIFF_FLAG__LOADED; } -void diff_file_content_clear(git_diff_file_content *fc) +void git_diff_file_content__clear(git_diff_file_content *fc) { - diff_file_content_unload(fc); + git_diff_file_content__unload(fc); /* for now, nothing else to do */ } diff --git a/src/diff_file.h b/src/diff_file.h index ab7b1dc1f..afad8510b 100644 --- a/src/diff_file.h +++ b/src/diff_file.h @@ -24,19 +24,19 @@ typedef struct { git_map map; } git_diff_file_content; -extern int diff_file_content_init_from_diff( +extern int git_diff_file_content__init_from_diff( git_diff_file_content *fc, git_diff_list *diff, size_t delta_index, bool use_old); -extern int diff_file_content_init_from_blob( +extern int git_diff_file_content__init_from_blob( git_diff_file_content *fc, git_repository *repo, const git_diff_options *opts, const git_blob *blob); -extern int diff_file_content_init_from_raw( +extern int git_diff_file_content__init_from_raw( git_diff_file_content *fc, git_repository *repo, const git_diff_options *opts, @@ -44,12 +44,12 @@ extern int diff_file_content_init_from_raw( size_t buflen); /* this loads the blob/file-on-disk as needed */ -extern int diff_file_content_load(git_diff_file_content *fc); +extern int git_diff_file_content__load(git_diff_file_content *fc); /* this releases the blob/file-in-memory */ -extern void diff_file_content_unload(git_diff_file_content *fc); +extern void git_diff_file_content__unload(git_diff_file_content *fc); /* this unloads and also releases any other resources */ -extern void diff_file_content_clear(git_diff_file_content *fc); +extern void git_diff_file_content__clear(git_diff_file_content *fc); #endif diff --git a/src/diff_patch.c b/src/diff_patch.c index 05dee5ef7..a1e1fe84c 100644 --- a/src/diff_patch.c +++ b/src/diff_patch.c @@ -11,6 +11,49 @@ #include "diff_patch.h" #include "diff_xdiff.h" +/* cached information about a single span in a diff */ +typedef struct diff_patch_line diff_patch_line; +struct diff_patch_line { + const char *ptr; + size_t len; + size_t lines, oldno, newno; + char origin; +}; + +/* cached information about a hunk in a diff */ +typedef struct diff_patch_hunk diff_patch_hunk; +struct diff_patch_hunk { + git_diff_range range; + char header[128]; + size_t header_len; + size_t line_start; + size_t line_count; +}; + +struct git_diff_patch { + git_refcount rc; + git_diff_list *diff; /* for refcount purposes, maybe NULL for blob diffs */ + git_diff_delta *delta; + size_t delta_index; + git_diff_file_content ofile; + git_diff_file_content nfile; + uint32_t flags; + git_array_t(diff_patch_hunk) hunks; + git_array_t(diff_patch_line) lines; + size_t oldno, newno; + size_t content_size; + git_pool flattened; +}; + +enum { + GIT_DIFF_PATCH_ALLOCATED = (1 << 0), + GIT_DIFF_PATCH_INITIALIZED = (1 << 1), + GIT_DIFF_PATCH_LOADED = (1 << 2), + GIT_DIFF_PATCH_DIFFABLE = (1 << 3), + GIT_DIFF_PATCH_DIFFED = (1 << 4), + GIT_DIFF_PATCH_FLATTENED = (1 << 5), +}; + static void diff_output_init(git_diff_output*, const git_diff_options*, git_diff_file_cb, git_diff_hunk_cb, git_diff_data_cb, void*); @@ -53,9 +96,9 @@ static int diff_patch_init_from_diff( patch->delta = git_vector_get(&diff->deltas, delta_index); patch->delta_index = delta_index; - if ((error = diff_file_content_init_from_diff( + if ((error = git_diff_file_content__init_from_diff( &patch->ofile, diff, delta_index, true)) < 0 || - (error = diff_file_content_init_from_diff( + (error = git_diff_file_content__init_from_diff( &patch->nfile, diff, delta_index, false)) < 0) return error; @@ -110,24 +153,24 @@ static int diff_patch_load(git_diff_patch *patch, git_diff_output *output) * need 2x data size and this minimizes peak memory footprint */ if (patch->ofile.src == GIT_ITERATOR_TYPE_WORKDIR) { - if ((error = diff_file_content_load(&patch->ofile)) < 0 || + if ((error = git_diff_file_content__load(&patch->ofile)) < 0 || (patch->ofile.file.flags & GIT_DIFF_FLAG_BINARY) != 0) goto cleanup; } if (patch->nfile.src == GIT_ITERATOR_TYPE_WORKDIR) { - if ((error = diff_file_content_load(&patch->nfile)) < 0 || + if ((error = git_diff_file_content__load(&patch->nfile)) < 0 || (patch->nfile.file.flags & GIT_DIFF_FLAG_BINARY) != 0) goto cleanup; } /* once workdir has been tried, load other data as needed */ if (patch->ofile.src != GIT_ITERATOR_TYPE_WORKDIR) { - if ((error = diff_file_content_load(&patch->ofile)) < 0 || + if ((error = git_diff_file_content__load(&patch->ofile)) < 0 || (patch->ofile.file.flags & GIT_DIFF_FLAG_BINARY) != 0) goto cleanup; } if (patch->nfile.src != GIT_ITERATOR_TYPE_WORKDIR) { - if ((error = diff_file_content_load(&patch->nfile)) < 0 || + if ((error = git_diff_file_content__load(&patch->nfile)) < 0 || (patch->nfile.file.flags & GIT_DIFF_FLAG_BINARY) != 0) goto cleanup; } @@ -198,8 +241,8 @@ static int diff_patch_generate(git_diff_patch *patch, git_diff_output *output) static void diff_patch_free(git_diff_patch *patch) { - diff_file_content_clear(&patch->ofile); - diff_file_content_clear(&patch->nfile); + git_diff_file_content__clear(&patch->ofile); + git_diff_file_content__clear(&patch->nfile); git_array_clear(patch->lines); git_array_clear(patch->hunks); @@ -323,9 +366,9 @@ static int diff_patch_from_blobs( new_blob = swap; } - if ((error = diff_file_content_init_from_blob( + if ((error = git_diff_file_content__init_from_blob( &pd->patch.ofile, repo, opts, old_blob)) < 0 || - (error = diff_file_content_init_from_blob( + (error = git_diff_file_content__init_from_blob( &pd->patch.nfile, repo, opts, new_blob)) < 0) return error; @@ -409,14 +452,14 @@ static int diff_patch_from_blob_and_buffer( return 0; if (opts && (opts->flags & GIT_DIFF_REVERSE) != 0) { - if (!(error = diff_file_content_init_from_raw( + if (!(error = git_diff_file_content__init_from_raw( &pd->patch.ofile, repo, opts, buf, buflen))) - error = diff_file_content_init_from_blob( + error = git_diff_file_content__init_from_blob( &pd->patch.nfile, repo, opts, old_blob); } else { - if (!(error = diff_file_content_init_from_blob( + if (!(error = git_diff_file_content__init_from_blob( &pd->patch.ofile, repo, opts, old_blob))) - error = diff_file_content_init_from_raw( + error = git_diff_file_content__init_from_raw( &pd->patch.nfile, repo, opts, buf, buflen); } @@ -690,6 +733,68 @@ notfound: return diff_error_outofrange(thing); } +git_diff_list *git_diff_patch__diff(git_diff_patch *patch) +{ + return patch->diff; +} + +git_diff_driver *git_diff_patch__driver(git_diff_patch *patch) +{ + /* ofile driver is representative for whole patch */ + return patch->ofile.driver; +} + +void git_diff_patch__old_data( + char **ptr, size_t *len, git_diff_patch *patch) +{ + *ptr = patch->ofile.map.data; + *len = patch->ofile.map.len; +} + +void git_diff_patch__new_data( + char **ptr, size_t *len, git_diff_patch *patch) +{ + *ptr = patch->nfile.map.data; + *len = patch->nfile.map.len; +} + +int git_diff_patch__invoke_callbacks( + git_diff_patch *patch, + git_diff_file_cb file_cb, + git_diff_hunk_cb hunk_cb, + git_diff_data_cb line_cb, + void *payload) +{ + int error = 0; + uint32_t i, j; + + if (file_cb) + error = file_cb(patch->delta, 0, payload); + + if (!hunk_cb && !line_cb) + return error; + + for (i = 0; !error && i < git_array_size(patch->hunks); ++i) { + diff_patch_hunk *h = git_array_get(patch->hunks, i); + + error = hunk_cb( + patch->delta, &h->range, h->header, h->header_len, payload); + + if (!line_cb) + continue; + + for (j = 0; !error && j < h->line_count; ++j) { + diff_patch_line *l = + git_array_get(patch->lines, h->line_start + j); + + error = line_cb( + patch->delta, &h->range, l->origin, l->ptr, l->len, payload); + } + } + + return error; +} + static int diff_patch_file_cb( const git_diff_delta *delta, diff --git a/src/diff_patch.h b/src/diff_patch.h index 7de6e1e5b..56af14600 100644 --- a/src/diff_patch.h +++ b/src/diff_patch.h @@ -12,48 +12,19 @@ #include "diff_file.h" #include "array.h" -/* cached information about a single span in a diff */ -typedef struct diff_patch_line diff_patch_line; -struct diff_patch_line { - const char *ptr; - size_t len; - size_t lines, oldno, newno; - char origin; -}; +extern git_diff_list *git_diff_patch__diff(git_diff_patch *); -/* cached information about a hunk in a diff */ -typedef struct diff_patch_hunk diff_patch_hunk; -struct diff_patch_hunk { - git_diff_range range; - char header[128]; - size_t header_len; - size_t line_start; - size_t line_count; -}; +extern git_diff_driver *git_diff_patch__driver(git_diff_patch *); -struct git_diff_patch { - git_refcount rc; - git_diff_list *diff; /* for refcount purposes, maybe NULL for blob diffs */ - git_diff_delta *delta; - size_t delta_index; - git_diff_file_content ofile; - git_diff_file_content nfile; - uint32_t flags; - git_array_t(diff_patch_hunk) hunks; - git_array_t(diff_patch_line) lines; - size_t oldno, newno; - size_t content_size; - git_pool flattened; -}; +extern void git_diff_patch__old_data(char **, size_t *, git_diff_patch *); +extern void git_diff_patch__new_data(char **, size_t *, git_diff_patch *); -enum { - GIT_DIFF_PATCH_ALLOCATED = (1 << 0), - GIT_DIFF_PATCH_INITIALIZED = (1 << 1), - GIT_DIFF_PATCH_LOADED = (1 << 2), - GIT_DIFF_PATCH_DIFFABLE = (1 << 3), - GIT_DIFF_PATCH_DIFFED = (1 << 4), - GIT_DIFF_PATCH_FLATTENED = (1 << 5), -}; +extern int git_diff_patch__invoke_callbacks( + git_diff_patch *patch, + git_diff_file_cb file_cb, + git_diff_hunk_cb hunk_cb, + git_diff_data_cb line_cb, + void *payload); typedef struct git_diff_output git_diff_output; struct git_diff_output { diff --git a/src/diff_print.c b/src/diff_print.c index 860876531..244aa6e1d 100644 --- a/src/diff_print.c +++ b/src/diff_print.c @@ -383,29 +383,13 @@ int git_diff_patch_print( int error; git_buf temp = GIT_BUF_INIT; diff_print_info pi; - size_t h, l; assert(patch && print_cb); if (!(error = diff_print_info_init( - &pi, &temp, patch->diff, print_cb, payload))) - error = print_patch_file(patch->delta, 0, &pi); - - for (h = 0; h < git_array_size(patch->hunks) && !error; ++h) { - diff_patch_hunk *hunk = git_array_get(patch->hunks, h); - - error = print_patch_hunk( - patch->delta, &hunk->range, hunk->header, hunk->header_len, &pi); - - for (l = 0; l < hunk->line_count && !error; ++l) { - diff_patch_line *line = - git_array_get(patch->lines, hunk->line_start + l); - - error = print_patch_line( - patch->delta, &hunk->range, - line->origin, line->ptr, line->len, &pi); - } - } + &pi, &temp, git_diff_patch__diff(patch), print_cb, payload))) + error = git_diff_patch__invoke_callbacks( + patch, print_patch_file, print_patch_hunk, print_patch_line, &pi); git_buf_free(&temp); diff --git a/src/diff_xdiff.c b/src/diff_xdiff.c index 91c56f727..7694fb996 100644 --- a/src/diff_xdiff.c +++ b/src/diff_xdiff.c @@ -59,6 +59,7 @@ static int git_xdiff_cb(void *priv, mmbuffer_t *bufs, int len) { git_xdiff_info *info = priv; git_diff_patch *patch = info->patch; + const git_diff_delta *delta = git_diff_patch_delta(patch); git_diff_output *output = &info->xo->output; if (len == 1) { @@ -67,7 +68,7 @@ static int git_xdiff_cb(void *priv, mmbuffer_t *bufs, int len) return output->error; if (output->hunk_cb != NULL && - output->hunk_cb(patch->delta, &info->range, + output->hunk_cb(delta, &info->range, bufs[0].ptr, bufs[0].size, output->payload)) output->error = GIT_EUSER; } @@ -80,7 +81,7 @@ static int git_xdiff_cb(void *priv, mmbuffer_t *bufs, int len) GIT_DIFF_LINE_CONTEXT; if (output->data_cb != NULL && - output->data_cb(patch->delta, &info->range, + output->data_cb(delta, &info->range, origin, bufs[1].ptr, bufs[1].size, output->payload)) output->error = GIT_EUSER; } @@ -97,7 +98,7 @@ static int git_xdiff_cb(void *priv, mmbuffer_t *bufs, int len) GIT_DIFF_LINE_CONTEXT_EOFNL; if (output->data_cb != NULL && - output->data_cb(patch->delta, &info->range, + output->data_cb(delta, &info->range, origin, bufs[2].ptr, bufs[2].size, output->payload)) output->error = GIT_EUSER; } @@ -110,7 +111,7 @@ static int git_xdiff(git_diff_output *output, git_diff_patch *patch) git_xdiff_output *xo = (git_xdiff_output *)output; git_xdiff_info info; git_diff_find_context_payload findctxt; - mmfile_t old_xdiff_data, new_xdiff_data; + mmfile_t xd_old_data, xd_new_data; memset(&info, 0, sizeof(info)); info.patch = patch; @@ -119,7 +120,7 @@ static int git_xdiff(git_diff_output *output, git_diff_patch *patch) xo->callback.priv = &info; git_diff_find_context_init( - &xo->config.find_func, &findctxt, patch->ofile.driver); + &xo->config.find_func, &findctxt, git_diff_patch__driver(patch)); xo->config.find_func_priv = &findctxt; if (xo->config.find_func != NULL) @@ -131,12 +132,10 @@ static int git_xdiff(git_diff_output *output, git_diff_patch *patch) * updates are needed to xo->params.flags */ - old_xdiff_data.ptr = patch->ofile.map.data; - old_xdiff_data.size = patch->ofile.map.len; - new_xdiff_data.ptr = patch->nfile.map.data; - new_xdiff_data.size = patch->nfile.map.len; + git_diff_patch__old_data(&xd_old_data.ptr, &xd_old_data.size, patch); + git_diff_patch__new_data(&xd_new_data.ptr, &xd_new_data.size, patch); - xdl_diff(&old_xdiff_data, &new_xdiff_data, + xdl_diff(&xd_old_data, &xd_new_data, &xo->params, &xo->config, &xo->callback); git_diff_find_context_clear(&findctxt);