diff --git a/docs/diff-internals.md b/docs/diff-internals.md new file mode 100644 index 000000000..53e71f5b5 --- /dev/null +++ b/docs/diff-internals.md @@ -0,0 +1,88 @@ +Diff is broken into four phases: + +1. Building a list of things that have changed. These changes are called + deltas (git_diff_delta objects) and are grouped into a git_diff_list. +2. Applying file similarity measurement for rename and copy detection (and + to potentially split files that have changed radically). This step is + optional. +3. Computing the textual diff for each delta. Not all deltas have a + meaningful textual diff. For those that do, the textual diff can + either be generated on the fly and passed to output callbacks or can be + turned into a git_diff_patch object. +4. Formatting the diff and/or patch into standard text formats (such as + patches, raw lists, etc). + +In the source code, step 1 is implemented in `src/diff.c`, step 2 in +`src/diff_tform.c`, step 3 in `src/diff_patch.c`, and step 4 in +`src/diff_print.c`. Additionally, when it comes to accessing file +content, everything goes through diff drivers that are implemented in +`src/diff_driver.c`. + +External Objects +---------------- + +* `git_diff_options` repesents user choices about how a diff should be + performed and is passed to most diff generating functions. +* `git_diff_file` represents an item on one side of a possible delta +* `git_diff_delta` represents a pair of items that have changed in some + way - it contains two `git_diff_file` plus a status and other stuff. +* `git_diff_list` is a list of deltas along with information about how + those particular deltas were found. +* `git_diff_patch` represents the actual diff between a pair of items. In + some cases, a delta may not have a corresponding patch, if the objects + are binary, for example. The content of a patch will be a set of hunks + and lines. +* A `hunk` is range of lines described by a `git_diff_range` (i.e. "lines + 10-20 in the old file became lines 12-23 in the new"). It will have a + header that compactly represents that information, and it will have a + number of lines of context surrounding added and deleted lines. +* A `line` is simple a line of data along with a `git_diff_line_t` value + that tells how the data should be interpretted (e.g. context or added). + +Internal Objects +---------------- + +* `git_diff_file_content` is an internal structure that represents the + data on one side of an item to be diffed; it is an augmented + `git_diff_file` with more flags and the actual file data. +** it is created from a repository plus a) a git_diff_file, b) a git_blob, + or c) raw data and size +** there are three main operations on git_diff_file_content: +*** _initialization_ sets up the data structure and does what it can up to, + but not including loading and looking at the actual data +*** _loading_ loads the data, preprocesses it (i.e. applies filters) and + potentially analyzes it (to decide if binary) +*** _free_ releases loaded data and frees any allocated memory + +* The internal structure of a `git_diff_patch` stores the actual diff + between a pair of `git_diff_file_content` items +** it may be "unset" if the items are not diffable +** "empty" if the items are the same +** otherwise it will consist of a set of hunks each of which covers some + number of lines of context, additions and deletions +** a patch is created from two git_diff_file_content items +** a patch is fully instantiated in three phases: +*** initial creation and initialization +*** loading of data and preliminary data examination +*** diffing of data and optional storage of diffs +** (TBD) if a patch is asked to store the diffs and the size of the diff + is significantly smaller than the raw data of the two sides, then the + patch may be flattened using a pool of string data + +* `git_diff_output` is an internal structure that represents an output + target for a `git_diff_patch` +** It consists of file, hunk, and line callbacks, plus a payload +** There is a standard flattened output that can be used for plain text output +** Typically we use a `git_xdiff_output` which drives the callbacks via the + xdiff code taken from core Git. + +* `git_diff_driver` is an internal structure that encapsulates the logic + for a given type of file +** a driver is looked up based on the name and mode of a file. +** the driver can then be used to: +*** determine if a file is binary (by attributes, by git_diff_options + settings, or by examining the content) +*** give you a function pointer that is used to evaluate function context + for hunk headers +** At some point, the logic for getting a filtered version of file content + or calculating the OID of a file may be moved into the driver. diff --git a/include/git2/diff.h b/include/git2/diff.h index d26456cb0..8113a56be 100644 --- a/include/git2/diff.h +++ b/include/git2/diff.h @@ -148,6 +148,9 @@ typedef enum { * Of course, ignore rules are still checked for the directory itself. */ GIT_DIFF_FAST_UNTRACKED_DIRS = (1 << 19), + + /** Treat all files as binary, disabling text diffs */ + GIT_DIFF_FORCE_BINARY = (1 << 20), } git_diff_option_t; /** @@ -857,7 +860,7 @@ GIT_EXTERN(size_t) git_diff_patch_num_hunks( * @param total_additions Count of addition lines in output, can be NULL. * @param total_deletions Count of deletion lines in output, can be NULL. * @param patch The git_diff_patch object - * @return Number of lines in hunk or -1 if invalid hunk index + * @return 0 on success, <0 on error */ GIT_EXTERN(int) git_diff_patch_line_stats( size_t *total_context, @@ -997,6 +1000,26 @@ GIT_EXTERN(int) git_diff_blobs( git_diff_data_cb line_cb, void *payload); +/** + * Directly generate a patch from the difference between two blobs. + * + * This is just like `git_diff_blobs()` except it generates a patch object + * for the difference instead of directly making callbacks. You can use the + * standard `git_diff_patch` accessor functions to read the patch data, and + * you must call `git_diff_patch_free()` on the patch when done. + * + * @param out The generated patch; NULL on error + * @param old_blob Blob for old side of diff, or NULL for empty blob + * @param new_blob Blob for new side of diff, or NULL for empty blob + * @param options Options for diff, or NULL for default options + * @return 0 on success or error code < 0 + */ +GIT_EXTERN(int) git_diff_patch_from_blobs( + git_diff_patch **out, + const git_blob *old_blob, + const git_blob *new_blob, + const git_diff_options *opts); + /** * Directly run a diff between a blob and a buffer. * @@ -1010,7 +1033,7 @@ GIT_EXTERN(int) git_diff_blobs( * the reverse, with GIT_DELTA_REMOVED and blob content removed. * * @param old_blob Blob for old side of diff, or NULL for empty blob - * @param buffer Raw data for new side of diff + * @param buffer Raw data for new side of diff, or NULL for empty * @param buffer_len Length of raw data for new side of diff * @param options Options for diff, or NULL for default options * @param file_cb Callback for "file"; made once if there is a diff; can be NULL @@ -1029,6 +1052,29 @@ GIT_EXTERN(int) git_diff_blob_to_buffer( git_diff_data_cb data_cb, void *payload); +/** + * Directly generate a patch from the difference between a blob and a buffer. + * + * This is just like `git_diff_blob_to_buffer()` except it generates a patch + * object for the difference instead of directly making callbacks. You can + * use the standard `git_diff_patch` accessor functions to read the patch + * data, and you must call `git_diff_patch_free()` on the patch when done. + * + * @param out The generated patch; NULL on error + * @param old_blob Blob for old side of diff, or NULL for empty blob + * @param buffer Raw data for new side of diff, or NULL for empty + * @param buffer_len Length of raw data for new side of diff + * @param options Options for diff, or NULL for default options + * @return 0 on success or error code < 0 + */ +GIT_EXTERN(int) git_diff_patch_from_blob_and_buffer( + git_diff_patch **out, + const git_blob *old_blob, + const char *buf, + size_t buflen, + const git_diff_options *opts); + + GIT_END_DECL /** @} */ diff --git a/src/array.h b/src/array.h new file mode 100644 index 000000000..2d77c71a0 --- /dev/null +++ b/src/array.h @@ -0,0 +1,66 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_array_h__ +#define INCLUDE_array_h__ + +#include "util.h" + +/* + * Use this to declare a typesafe resizable array of items, a la: + * + * git_array_t(int) my_ints = GIT_ARRAY_INIT; + * ... + * int *i = git_array_alloc(my_ints); + * GITERR_CHECK_ALLOC(i); + * ... + * git_array_clear(my_ints); + * + * You may also want to do things like: + * + * typedef git_array_t(my_struct) my_struct_array_t; + */ +#define git_array_t(type) struct { type *ptr; uint32_t size, asize; } + +#define GIT_ARRAY_INIT { NULL, 0, 0 } + +#define git_array_init(a) \ + do { (a).size = (a).asize = 0; (a).ptr = NULL; } while (0) + +#define git_array_clear(a) \ + do { git__free((a).ptr); git_array_init(a); } while (0) + +#define GITERR_CHECK_ARRAY(a) GITERR_CHECK_ALLOC((a).ptr) + + +typedef git_array_t(void) git_array_generic_t; + +/* use a generic array for growth so this can return the new item */ +GIT_INLINE(void *) git_array_grow(git_array_generic_t *a, size_t item_size) +{ + uint32_t new_size = (a->size < 8) ? 8 : a->asize * 3 / 2; + void *new_array = git__realloc(a->ptr, new_size * item_size); + if (!new_array) { + git_array_clear(*a); + return NULL; + } else { + a->ptr = new_array; a->asize = new_size; a->size++; + return (((char *)a->ptr) + (a->size - 1) * item_size); + } +} + +#define git_array_alloc(a) \ + ((a).size >= (a).asize) ? \ + git_array_grow((git_array_generic_t *)&(a), sizeof(*(a).ptr)) : \ + (a).ptr ? &(a).ptr[(a).size++] : NULL + +#define git_array_last(a) ((a).size ? &(a).ptr[(a).size - 1] : NULL) + +#define git_array_get(a, i) (((i) < (a).size) ? &(a).ptr[(i)] : NULL) + +#define git_array_size(a) (a).size + +#endif diff --git a/src/blob.c b/src/blob.c index a68c4cc3e..2e4d5f479 100644 --- a/src/blob.c +++ b/src/blob.c @@ -11,6 +11,7 @@ #include "git2/odb_backend.h" #include "common.h" +#include "filebuf.h" #include "blob.h" #include "filter.h" #include "buf_text.h" diff --git a/src/checkout.c b/src/checkout.c index 7a2e68300..ede0be8e8 100644 --- a/src/checkout.c +++ b/src/checkout.c @@ -20,6 +20,7 @@ #include "refs.h" #include "repository.h" +#include "index.h" #include "filter.h" #include "blob.h" #include "diff.h" diff --git a/src/clone.c b/src/clone.c index af3298fd0..5b6c6f77d 100644 --- a/src/clone.c +++ b/src/clone.c @@ -21,6 +21,7 @@ #include "fileops.h" #include "refs.h" #include "path.h" +#include "repository.h" static int create_branch( git_reference **branch, diff --git a/src/crlf.c b/src/crlf.c index 81268da83..65039f9cc 100644 --- a/src/crlf.c +++ b/src/crlf.c @@ -5,14 +5,16 @@ * a Linking Exception. For full terms see the included COPYING file. */ +#include "git2/attr.h" +#include "git2/blob.h" +#include "git2/index.h" + #include "common.h" #include "fileops.h" #include "hash.h" #include "filter.h" #include "buf_text.h" #include "repository.h" -#include "git2/attr.h" -#include "git2/blob.h" struct crlf_attrs { int crlf_action; diff --git a/src/diff.c b/src/diff.c index 05ef4f16b..97ccb3cbd 100644 --- a/src/diff.c +++ b/src/diff.c @@ -11,6 +11,8 @@ #include "attr_file.h" #include "filter.h" #include "pathspec.h" +#include "index.h" +#include "odb.h" #define DIFF_FLAG_IS_SET(DIFF,FLAG) (((DIFF)->opts.flags & (FLAG)) != 0) #define DIFF_FLAG_ISNT_SET(DIFF,FLAG) (((DIFF)->opts.flags & (FLAG)) == 0) @@ -1170,3 +1172,73 @@ int git_diff_tree_to_workdir( return error; } + +size_t git_diff_num_deltas(git_diff_list *diff) +{ + assert(diff); + return (size_t)diff->deltas.length; +} + +size_t git_diff_num_deltas_of_type(git_diff_list *diff, git_delta_t type) +{ + size_t i, count = 0; + git_diff_delta *delta; + + assert(diff); + + git_vector_foreach(&diff->deltas, i, delta) { + count += (delta->status == type); + } + + return count; +} + +int git_diff__paired_foreach( + git_diff_list *idx2head, + git_diff_list *wd2idx, + int (*cb)(git_diff_delta *i2h, git_diff_delta *w2i, void *payload), + void *payload) +{ + int cmp; + git_diff_delta *i2h, *w2i; + size_t i, j, i_max, j_max; + int (*strcomp)(const char *, const char *); + + i_max = idx2head ? idx2head->deltas.length : 0; + j_max = wd2idx ? wd2idx->deltas.length : 0; + + /* Get appropriate strcmp function */ + strcomp = idx2head ? idx2head->strcomp : wd2idx ? wd2idx->strcomp : NULL; + + /* Assert both iterators use matching ignore-case. If this function ever + * supports merging diffs that are not sorted by the same function, then + * it will need to spool and sort on one of the results before merging + */ + if (idx2head && wd2idx) { + assert(idx2head->strcomp == wd2idx->strcomp); + } + + for (i = 0, j = 0; i < i_max || j < j_max; ) { + i2h = idx2head ? GIT_VECTOR_GET(&idx2head->deltas,i) : NULL; + w2i = wd2idx ? GIT_VECTOR_GET(&wd2idx->deltas,j) : NULL; + + cmp = !w2i ? -1 : !i2h ? 1 : + strcomp(i2h->old_file.path, w2i->old_file.path); + + if (cmp < 0) { + if (cb(i2h, NULL, payload)) + return GIT_EUSER; + i++; + } else if (cmp > 0) { + if (cb(NULL, w2i, payload)) + return GIT_EUSER; + j++; + } else { + if (cb(i2h, w2i, payload)) + return GIT_EUSER; + i++; j++; + } + } + + return 0; +} diff --git a/src/diff.h b/src/diff.h index ac8ab2aed..ad12e7731 100644 --- a/src/diff.h +++ b/src/diff.h @@ -29,11 +29,16 @@ enum { GIT_DIFFCAPS_TRUST_NANOSECS = (1 << 5), /* use stat time nanoseconds */ }; +#define DIFF_FLAGS_KNOWN_BINARY (GIT_DIFF_FLAG_BINARY|GIT_DIFF_FLAG_NOT_BINARY) +#define DIFF_FLAGS_NOT_BINARY (GIT_DIFF_FLAG_NOT_BINARY|GIT_DIFF_FLAG__NO_DATA) + enum { GIT_DIFF_FLAG__FREE_PATH = (1 << 7), /* `path` is allocated memory */ GIT_DIFF_FLAG__FREE_DATA = (1 << 8), /* internal file data is allocated */ GIT_DIFF_FLAG__UNMAP_DATA = (1 << 9), /* internal file data is mmap'ed */ GIT_DIFF_FLAG__NO_DATA = (1 << 10), /* file data should not be loaded */ + GIT_DIFF_FLAG__FREE_BLOB = (1 << 11), /* release the blob when done */ + GIT_DIFF_FLAG__LOADED = (1 << 12), /* file data has been loaded */ GIT_DIFF_FLAG__TO_DELETE = (1 << 16), /* delete entry during rename det. */ GIT_DIFF_FLAG__TO_SPLIT = (1 << 17), /* split entry during rename det. */ @@ -83,6 +88,12 @@ extern int git_diff__from_iterators( git_iterator *new_iter, const git_diff_options *opts); +extern int git_diff__paired_foreach( + git_diff_list *idx2head, + git_diff_list *wd2idx, + int (*cb)(git_diff_delta *i2h, git_diff_delta *w2i, void *payload), + void *payload); + int git_diff_find_similar__hashsig_for_file( void **out, const git_diff_file *f, const char *path, void *p); diff --git a/src/diff_driver.c b/src/diff_driver.c new file mode 100644 index 000000000..ae2b7c319 --- /dev/null +++ b/src/diff_driver.c @@ -0,0 +1,405 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#include "common.h" + +#include "git2/attr.h" + +#include "diff.h" +#include "diff_patch.h" +#include "diff_driver.h" +#include "strmap.h" +#include "map.h" +#include "buf_text.h" +#include "repository.h" + +GIT__USE_STRMAP; + +typedef enum { + DIFF_DRIVER_AUTO = 0, + DIFF_DRIVER_BINARY = 1, + DIFF_DRIVER_TEXT = 2, + DIFF_DRIVER_PATTERNLIST = 3, +} git_diff_driver_t; + +enum { + DIFF_CONTEXT_FIND_NORMAL = 0, + DIFF_CONTEXT_FIND_ICASE = (1 << 0), + DIFF_CONTEXT_FIND_EXT = (1 << 1), +}; + +/* data for finding function context for a given file type */ +struct git_diff_driver { + git_diff_driver_t type; + uint32_t binary_flags; + uint32_t other_flags; + git_array_t(regex_t) fn_patterns; + regex_t word_pattern; + char name[GIT_FLEX_ARRAY]; +}; + +struct git_diff_driver_registry { + git_strmap *drivers; +}; + +#define FORCE_DIFFABLE (GIT_DIFF_FORCE_TEXT | GIT_DIFF_FORCE_BINARY) + +static git_diff_driver global_drivers[3] = { + { DIFF_DRIVER_AUTO, 0, 0, }, + { DIFF_DRIVER_BINARY, GIT_DIFF_FORCE_BINARY, 0 }, + { DIFF_DRIVER_TEXT, GIT_DIFF_FORCE_TEXT, 0 }, +}; + +git_diff_driver_registry *git_diff_driver_registry_new() +{ + git_diff_driver_registry *reg = + git__calloc(1, sizeof(git_diff_driver_registry)); + if (!reg) + return NULL; + + if ((reg->drivers = git_strmap_alloc()) == NULL) { + git_diff_driver_registry_free(reg); + return NULL; + } + + return reg; +} + +void git_diff_driver_registry_free(git_diff_driver_registry *reg) +{ + git_diff_driver *drv; + + if (!reg) + return; + + git_strmap_foreach_value(reg->drivers, drv, git_diff_driver_free(drv)); + git_strmap_free(reg->drivers); + git__free(reg); +} + +static int diff_driver_add_funcname( + git_diff_driver *drv, const char *name, int regex_flags) +{ + int error; + regex_t re, *re_ptr; + + if ((error = regcomp(&re, name, regex_flags)) != 0) { + /* TODO: warning about bad regex instead of failure */ + error = giterr_set_regex(&re, error); + regfree(&re); + return error; + } + + re_ptr = git_array_alloc(drv->fn_patterns); + GITERR_CHECK_ALLOC(re_ptr); + + memcpy(re_ptr, &re, sizeof(re)); + return 0; +} + +static int diff_driver_xfuncname(const git_config_entry *entry, void *payload) +{ + return diff_driver_add_funcname(payload, entry->value, REG_EXTENDED); +} + +static int diff_driver_funcname(const git_config_entry *entry, void *payload) +{ + return diff_driver_add_funcname(payload, entry->value, 0); +} + +static git_diff_driver_registry *git_repository_driver_registry( + git_repository *repo) +{ + if (!repo->diff_drivers) { + git_diff_driver_registry *reg = git_diff_driver_registry_new(); + reg = git__compare_and_swap(&repo->diff_drivers, NULL, reg); + + if (reg != NULL) /* if we race, free losing allocation */ + git_diff_driver_registry_free(reg); + } + + if (!repo->diff_drivers) + giterr_set(GITERR_REPOSITORY, "Unable to create diff driver registry"); + + return repo->diff_drivers; +} + +static int git_diff_driver_load( + git_diff_driver **out, git_repository *repo, const char *driver_name) +{ + int error = 0, bval; + git_diff_driver_registry *reg; + git_diff_driver *drv; + size_t namelen = strlen(driver_name); + khiter_t pos; + git_config *cfg; + git_buf name = GIT_BUF_INIT; + const char *val; + bool found_driver = false; + + reg = git_repository_driver_registry(repo); + if (!reg) + return -1; + else { + pos = git_strmap_lookup_index(reg->drivers, driver_name); + if (git_strmap_valid_index(reg->drivers, pos)) { + *out = git_strmap_value_at(reg->drivers, pos); + return 0; + } + } + + /* if you can't read config for repo, just use default driver */ + if (git_repository_config__weakptr(&cfg, repo) < 0) { + giterr_clear(); + return GIT_ENOTFOUND; + } + + drv = git__calloc(1, sizeof(git_diff_driver) + namelen + 1); + GITERR_CHECK_ALLOC(drv); + drv->type = DIFF_DRIVER_AUTO; + memcpy(drv->name, driver_name, namelen); + + if ((error = git_buf_printf(&name, "diff.%s.binary", driver_name)) < 0) + goto done; + if ((error = git_config_get_string(&val, cfg, name.ptr)) < 0) { + if (error != GIT_ENOTFOUND) + goto done; + /* diff..binary unspecified, so just continue */ + giterr_clear(); + } else if (git_config_parse_bool(&bval, val) < 0) { + /* TODO: warn that diff..binary has invalid value */ + giterr_clear(); + } else if (bval) { + /* if diff..binary is true, just return the binary driver */ + *out = &global_drivers[DIFF_DRIVER_BINARY]; + goto done; + } else { + /* if diff..binary is false, force binary checks off */ + /* but still may have custom function context patterns, etc. */ + drv->binary_flags = GIT_DIFF_FORCE_TEXT; + found_driver = true; + } + + /* TODO: warn if diff..command or diff..textconv are set */ + + git_buf_truncate(&name, namelen + strlen("diff..")); + git_buf_put(&name, "xfuncname", strlen("xfuncname")); + if ((error = git_config_get_multivar( + cfg, name.ptr, NULL, diff_driver_xfuncname, drv)) < 0) { + if (error != GIT_ENOTFOUND) + goto done; + giterr_clear(); /* no diff..xfuncname, so just continue */ + } + + git_buf_truncate(&name, namelen + strlen("diff..")); + git_buf_put(&name, "funcname", strlen("funcname")); + if ((error = git_config_get_multivar( + cfg, name.ptr, NULL, diff_driver_funcname, drv)) < 0) { + if (error != GIT_ENOTFOUND) + goto done; + giterr_clear(); /* no diff..funcname, so just continue */ + } + + /* if we found any patterns, set driver type to use correct callback */ + if (git_array_size(drv->fn_patterns) > 0) { + drv->type = DIFF_DRIVER_PATTERNLIST; + found_driver = true; + } + + git_buf_truncate(&name, namelen + strlen("diff..")); + git_buf_put(&name, "wordregex", strlen("wordregex")); + if ((error = git_config_get_string(&val, cfg, name.ptr)) < 0) { + if (error != GIT_ENOTFOUND) + goto done; + giterr_clear(); /* no diff..wordregex, so just continue */ + } else if ((error = regcomp(&drv->word_pattern, val, REG_EXTENDED)) != 0) { + /* TODO: warning about bad regex instead of failure */ + error = giterr_set_regex(&drv->word_pattern, error); + goto done; + } else { + found_driver = true; + } + + /* TODO: look up diff..algorithm to turn on minimal / patience + * diff in drv->other_flags + */ + + /* if no driver config found at all, fall back on AUTO driver */ + if (!found_driver) + goto done; + + /* store driver in registry */ + git_strmap_insert(reg->drivers, drv->name, drv, error); + if (error < 0) + goto done; + + *out = drv; + +done: + git_buf_free(&name); + + if (!*out) + *out = &global_drivers[DIFF_DRIVER_AUTO]; + + if (drv && drv != *out) + git_diff_driver_free(drv); + + return error; +} + +int git_diff_driver_lookup( + git_diff_driver **out, git_repository *repo, const char *path) +{ + int error = 0; + const char *value; + + assert(out); + + if (!repo || !path || !strlen(path)) + goto use_auto; + + if ((error = git_attr_get(&value, repo, 0, path, "diff")) < 0) + return error; + + if (GIT_ATTR_UNSPECIFIED(value)) + /* just use the auto value */; + else if (GIT_ATTR_FALSE(value)) + *out = &global_drivers[DIFF_DRIVER_BINARY]; + else if (GIT_ATTR_TRUE(value)) + *out = &global_drivers[DIFF_DRIVER_TEXT]; + + /* otherwise look for driver information in config and build driver */ + else if ((error = git_diff_driver_load(out, repo, value)) < 0) { + if (error != GIT_ENOTFOUND) + return error; + else + giterr_clear(); + } + +use_auto: + if (!*out) + *out = &global_drivers[DIFF_DRIVER_AUTO]; + + return 0; +} + +void git_diff_driver_free(git_diff_driver *driver) +{ + size_t i; + + if (!driver) + return; + + for (i = 0; i < git_array_size(driver->fn_patterns); ++i) + regfree(git_array_get(driver->fn_patterns, i)); + git_array_clear(driver->fn_patterns); + + regfree(&driver->word_pattern); + + git__free(driver); +} + +void git_diff_driver_update_options( + uint32_t *option_flags, git_diff_driver *driver) +{ + if ((*option_flags & FORCE_DIFFABLE) == 0) + *option_flags |= driver->binary_flags; + + *option_flags |= driver->other_flags; +} + +int git_diff_driver_content_is_binary( + git_diff_driver *driver, const char *content, size_t content_len) +{ + const git_buf search = { (char *)content, 0, min(content_len, 4000) }; + + GIT_UNUSED(driver); + + /* TODO: provide encoding / binary detection callbacks that can + * be UTF-8 aware, etc. For now, instead of trying to be smart, + * let's just use the simple NUL-byte detection that core git uses. + */ + + /* previously was: if (git_buf_text_is_binary(&search)) */ + if (git_buf_text_contains_nul(&search)) + return 1; + + return 0; +} + +static int diff_context_line__simple( + git_diff_driver *driver, const char *line, long line_len) +{ + GIT_UNUSED(driver); + GIT_UNUSED(line_len); + return (git__isalpha(*line) || *line == '_' || *line == '$'); +} + +static int diff_context_line__pattern_match( + git_diff_driver *driver, const char *line, long line_len) +{ + size_t i; + + GIT_UNUSED(line_len); + + for (i = 0; i < git_array_size(driver->fn_patterns); ++i) { + if (!regexec(git_array_get(driver->fn_patterns, i), line, 0, NULL, 0)) + return true; + } + + return false; +} + +static long diff_context_find( + const char *line, + long line_len, + char *out, + long out_size, + void *payload) +{ + git_diff_find_context_payload *ctxt = payload; + + if (git_buf_set(&ctxt->line, line, (size_t)line_len) < 0) + return -1; + git_buf_rtrim(&ctxt->line); + + if (!ctxt->line.size) + return -1; + + if (!ctxt->match_line || + !ctxt->match_line(ctxt->driver, ctxt->line.ptr, ctxt->line.size)) + return -1; + + git_buf_truncate(&ctxt->line, (size_t)out_size); + git_buf_copy_cstr(out, (size_t)out_size, &ctxt->line); + + return (long)ctxt->line.size; +} + +void git_diff_find_context_init( + git_diff_find_context_fn *findfn_out, + git_diff_find_context_payload *payload_out, + git_diff_driver *driver) +{ + *findfn_out = driver ? diff_context_find : NULL; + + memset(payload_out, 0, sizeof(*payload_out)); + if (driver) { + payload_out->driver = driver; + payload_out->match_line = (driver->type == DIFF_DRIVER_PATTERNLIST) ? + diff_context_line__pattern_match : diff_context_line__simple; + git_buf_init(&payload_out->line, 0); + } +} + +void git_diff_find_context_clear(git_diff_find_context_payload *payload) +{ + if (payload) { + git_buf_free(&payload->line); + payload->driver = NULL; + } +} + diff --git a/src/diff_driver.h b/src/diff_driver.h new file mode 100644 index 000000000..3db7df000 --- /dev/null +++ b/src/diff_driver.h @@ -0,0 +1,49 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_diff_driver_h__ +#define INCLUDE_diff_driver_h__ + +#include "common.h" +#include "buffer.h" + +typedef struct git_diff_driver_registry git_diff_driver_registry; + +git_diff_driver_registry *git_diff_driver_registry_new(void); +void git_diff_driver_registry_free(git_diff_driver_registry *); + +typedef struct git_diff_driver git_diff_driver; + +int git_diff_driver_lookup(git_diff_driver **, git_repository *, const char *); +void git_diff_driver_free(git_diff_driver *); + +/* diff option flags to force off and on for this driver */ +void git_diff_driver_update_options(uint32_t *option_flags, git_diff_driver *); + +/* returns -1 meaning "unknown", 0 meaning not binary, 1 meaning binary */ +int git_diff_driver_content_is_binary( + git_diff_driver *, const char *content, size_t content_len); + +typedef long (*git_diff_find_context_fn)( + const char *, long, char *, long, void *); + +typedef int (*git_diff_find_context_line)( + git_diff_driver *, const char *, long); + +typedef struct { + git_diff_driver *driver; + git_diff_find_context_line match_line; + git_buf line; +} git_diff_find_context_payload; + +void git_diff_find_context_init( + git_diff_find_context_fn *findfn_out, + git_diff_find_context_payload *payload_out, + git_diff_driver *driver); + +void git_diff_find_context_clear(git_diff_find_context_payload *); + +#endif diff --git a/src/diff_file.c b/src/diff_file.c new file mode 100644 index 000000000..4fd1177ae --- /dev/null +++ b/src/diff_file.c @@ -0,0 +1,441 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#include "common.h" +#include "git2/blob.h" +#include "git2/submodule.h" +#include "diff.h" +#include "diff_file.h" +#include "odb.h" +#include "fileops.h" +#include "filter.h" + +#define DIFF_MAX_FILESIZE 0x20000000 + +static bool diff_file_content_binary_by_size(git_diff_file_content *fc) +{ + /* if we have diff opts, check max_size vs file size */ + if ((fc->file.flags & DIFF_FLAGS_KNOWN_BINARY) == 0 && + fc->opts_max_size > 0 && + fc->file.size > fc->opts_max_size) + fc->file.flags |= GIT_DIFF_FLAG_BINARY; + + return ((fc->file.flags & GIT_DIFF_FLAG_BINARY) != 0); +} + +static void diff_file_content_binary_by_content(git_diff_file_content *fc) +{ + if ((fc->file.flags & DIFF_FLAGS_KNOWN_BINARY) != 0) + return; + + switch (git_diff_driver_content_is_binary( + fc->driver, fc->map.data, fc->map.len)) { + case 0: fc->file.flags |= GIT_DIFF_FLAG_NOT_BINARY; break; + case 1: fc->file.flags |= GIT_DIFF_FLAG_BINARY; break; + default: break; + } +} + +static int diff_file_content_init_common( + git_diff_file_content *fc, const git_diff_options *opts) +{ + fc->opts_flags = opts ? opts->flags : GIT_DIFF_NORMAL; + + if (opts && opts->max_size >= 0) + fc->opts_max_size = opts->max_size ? + opts->max_size : DIFF_MAX_FILESIZE; + + if (!fc->driver) { + if (git_diff_driver_lookup(&fc->driver, fc->repo, "") < 0) + return -1; + fc->src = GIT_ITERATOR_TYPE_TREE; + } + + /* give driver a chance to modify options */ + git_diff_driver_update_options(&fc->opts_flags, fc->driver); + + /* make sure file is conceivable mmap-able */ + if ((git_off_t)((size_t)fc->file.size) != fc->file.size) + fc->file.flags |= GIT_DIFF_FLAG_BINARY; + /* check if user is forcing text diff the file */ + else if (fc->opts_flags & GIT_DIFF_FORCE_TEXT) { + fc->file.flags &= ~GIT_DIFF_FLAG_BINARY; + fc->file.flags |= GIT_DIFF_FLAG_NOT_BINARY; + } + /* check if user is forcing binary diff the file */ + else if (fc->opts_flags & GIT_DIFF_FORCE_BINARY) { + fc->file.flags &= ~GIT_DIFF_FLAG_NOT_BINARY; + fc->file.flags |= GIT_DIFF_FLAG_BINARY; + } + + diff_file_content_binary_by_size(fc); + + if ((fc->file.flags & GIT_DIFF_FLAG__NO_DATA) != 0) { + fc->file.flags |= GIT_DIFF_FLAG__LOADED; + fc->map.len = 0; + fc->map.data = ""; + } + + if ((fc->file.flags & GIT_DIFF_FLAG__LOADED) != 0) + diff_file_content_binary_by_content(fc); + + return 0; +} + +int git_diff_file_content__init_from_diff( + git_diff_file_content *fc, + git_diff_list *diff, + size_t delta_index, + bool use_old) +{ + git_diff_delta *delta = git_vector_get(&diff->deltas, delta_index); + git_diff_file *file = use_old ? &delta->old_file : &delta->new_file; + bool has_data = true; + + memset(fc, 0, sizeof(*fc)); + fc->repo = diff->repo; + fc->src = use_old ? diff->old_src : diff->new_src; + memcpy(&fc->file, file, sizeof(fc->file)); + + if (git_diff_driver_lookup(&fc->driver, fc->repo, file->path) < 0) + return -1; + + switch (delta->status) { + case GIT_DELTA_ADDED: + has_data = !use_old; break; + case GIT_DELTA_DELETED: + has_data = use_old; break; + case GIT_DELTA_UNTRACKED: + has_data = !use_old && + (diff->opts.flags & GIT_DIFF_INCLUDE_UNTRACKED_CONTENT) != 0; + break; + case GIT_DELTA_MODIFIED: + case GIT_DELTA_COPIED: + case GIT_DELTA_RENAMED: + break; + default: + has_data = false; + break; + } + + if (!has_data) + fc->file.flags |= GIT_DIFF_FLAG__NO_DATA; + + return diff_file_content_init_common(fc, &diff->opts); +} + +int git_diff_file_content__init_from_blob( + git_diff_file_content *fc, + git_repository *repo, + const git_diff_options *opts, + const git_blob *blob) +{ + memset(fc, 0, sizeof(*fc)); + fc->repo = repo; + fc->blob = blob; + + if (!blob) { + fc->file.flags |= GIT_DIFF_FLAG__NO_DATA; + } else { + fc->file.flags |= GIT_DIFF_FLAG__LOADED | GIT_DIFF_FLAG_VALID_OID; + fc->file.size = git_blob_rawsize(blob); + fc->file.mode = 0644; + git_oid_cpy(&fc->file.oid, git_blob_id(blob)); + + fc->map.len = (size_t)fc->file.size; + fc->map.data = (char *)git_blob_rawcontent(blob); + } + + return diff_file_content_init_common(fc, opts); +} + +int git_diff_file_content__init_from_raw( + git_diff_file_content *fc, + git_repository *repo, + const git_diff_options *opts, + const char *buf, + size_t buflen) +{ + memset(fc, 0, sizeof(*fc)); + fc->repo = repo; + + if (!buf) { + fc->file.flags |= GIT_DIFF_FLAG__NO_DATA; + } else { + fc->file.flags |= GIT_DIFF_FLAG__LOADED | GIT_DIFF_FLAG_VALID_OID; + fc->file.size = buflen; + fc->file.mode = 0644; + git_odb_hash(&fc->file.oid, buf, buflen, GIT_OBJ_BLOB); + + fc->map.len = buflen; + fc->map.data = (char *)buf; + } + + return diff_file_content_init_common(fc, opts); +} + +static int diff_file_content_commit_to_str( + git_diff_file_content *fc, bool check_status) +{ + char oid[GIT_OID_HEXSZ+1]; + git_buf content = GIT_BUF_INIT; + const char *status = ""; + + if (check_status) { + int error = 0; + git_submodule *sm = NULL; + unsigned int sm_status = 0; + const git_oid *sm_head; + + if ((error = git_submodule_lookup(&sm, fc->repo, fc->file.path)) < 0 || + (error = git_submodule_status(&sm_status, sm)) < 0) { + /* GIT_EEXISTS means a "submodule" that has not been git added */ + if (error == GIT_EEXISTS) + error = 0; + return error; + } + + /* update OID if we didn't have it previously */ + if ((fc->file.flags & GIT_DIFF_FLAG_VALID_OID) == 0 && + ((sm_head = git_submodule_wd_id(sm)) != NULL || + (sm_head = git_submodule_head_id(sm)) != NULL)) + { + git_oid_cpy(&fc->file.oid, sm_head); + fc->file.flags |= GIT_DIFF_FLAG_VALID_OID; + } + + if (GIT_SUBMODULE_STATUS_IS_WD_DIRTY(sm_status)) + status = "-dirty"; + } + + git_oid_tostr(oid, sizeof(oid), &fc->file.oid); + if (git_buf_printf(&content, "Subproject commit %s%s\n", oid, status) < 0) + return -1; + + fc->map.len = git_buf_len(&content); + fc->map.data = git_buf_detach(&content); + fc->file.flags |= GIT_DIFF_FLAG__FREE_DATA; + + return 0; +} + +static int diff_file_content_load_blob(git_diff_file_content *fc) +{ + int error = 0; + git_odb_object *odb_obj = NULL; + + if (git_oid_iszero(&fc->file.oid)) + return 0; + + if (fc->file.mode == GIT_FILEMODE_COMMIT) + return diff_file_content_commit_to_str(fc, false); + + /* if we don't know size, try to peek at object header first */ + if (!fc->file.size) { + git_odb *odb; + size_t len; + git_otype type; + + if (!(error = git_repository_odb__weakptr(&odb, fc->repo))) { + error = git_odb__read_header_or_object( + &odb_obj, &len, &type, odb, &fc->file.oid); + git_odb_free(odb); + } + if (error) + return error; + + fc->file.size = len; + } + + if (diff_file_content_binary_by_size(fc)) + return 0; + + if (odb_obj != NULL) { + error = git_object__from_odb_object( + (git_object **)&fc->blob, fc->repo, odb_obj, GIT_OBJ_BLOB); + git_odb_object_free(odb_obj); + } else { + error = git_blob_lookup( + (git_blob **)&fc->blob, fc->repo, &fc->file.oid); + } + + if (!error) { + fc->file.flags |= GIT_DIFF_FLAG__FREE_BLOB; + fc->map.data = (void *)git_blob_rawcontent(fc->blob); + fc->map.len = (size_t)git_blob_rawsize(fc->blob); + } + + return error; +} + +static int diff_file_content_load_workdir_symlink( + git_diff_file_content *fc, git_buf *path) +{ + ssize_t alloc_len, read_len; + + /* link path on disk could be UTF-16, so prepare a buffer that is + * big enough to handle some UTF-8 data expansion + */ + alloc_len = (ssize_t)(fc->file.size * 2) + 1; + + fc->map.data = git__calloc(alloc_len, sizeof(char)); + GITERR_CHECK_ALLOC(fc->map.data); + + fc->file.flags |= GIT_DIFF_FLAG__FREE_DATA; + + read_len = p_readlink(git_buf_cstr(path), fc->map.data, alloc_len); + if (read_len < 0) { + giterr_set(GITERR_OS, "Failed to read symlink '%s'", fc->file.path); + return -1; + } + + fc->map.len = read_len; + return 0; +} + +static int diff_file_content_load_workdir_file( + git_diff_file_content *fc, git_buf *path) +{ + int error = 0; + git_vector filters = GIT_VECTOR_INIT; + git_buf raw = GIT_BUF_INIT, filtered = GIT_BUF_INIT; + git_file fd = git_futils_open_ro(git_buf_cstr(path)); + + if (fd < 0) + return fd; + + if (!fc->file.size && + !(fc->file.size = git_futils_filesize(fd))) + goto cleanup; + + if (diff_file_content_binary_by_size(fc)) + goto cleanup; + + if ((error = git_filters_load( + &filters, fc->repo, fc->file.path, GIT_FILTER_TO_ODB)) < 0) + goto cleanup; + /* error >= is a filter count */ + + if (error == 0) { + if (!(error = git_futils_mmap_ro( + &fc->map, fd, 0, (size_t)fc->file.size))) + fc->file.flags |= GIT_DIFF_FLAG__UNMAP_DATA; + else /* fall through to try readbuffer below */ + giterr_clear(); + } + + if (error != 0) { + error = git_futils_readbuffer_fd(&raw, fd, (size_t)fc->file.size); + if (error < 0) + goto cleanup; + + if (!filters.length) + git_buf_swap(&filtered, &raw); + else + error = git_filters_apply(&filtered, &raw, &filters); + + if (!error) { + fc->map.len = git_buf_len(&filtered); + fc->map.data = git_buf_detach(&filtered); + fc->file.flags |= GIT_DIFF_FLAG__FREE_DATA; + } + + git_buf_free(&raw); + git_buf_free(&filtered); + } + +cleanup: + git_filters_free(&filters); + p_close(fd); + + return error; +} + +static int diff_file_content_load_workdir(git_diff_file_content *fc) +{ + int error = 0; + git_buf path = GIT_BUF_INIT; + + if (fc->file.mode == GIT_FILEMODE_COMMIT) + return diff_file_content_commit_to_str(fc, true); + + if (fc->file.mode == GIT_FILEMODE_TREE) + return 0; + + if (git_buf_joinpath( + &path, git_repository_workdir(fc->repo), fc->file.path) < 0) + return -1; + + if (S_ISLNK(fc->file.mode)) + error = diff_file_content_load_workdir_symlink(fc, &path); + else + error = diff_file_content_load_workdir_file(fc, &path); + + /* once data is loaded, update OID if we didn't have it previously */ + if (!error && (fc->file.flags & GIT_DIFF_FLAG_VALID_OID) == 0) { + error = git_odb_hash( + &fc->file.oid, fc->map.data, fc->map.len, GIT_OBJ_BLOB); + fc->file.flags |= GIT_DIFF_FLAG_VALID_OID; + } + + git_buf_free(&path); + return error; +} + +int git_diff_file_content__load(git_diff_file_content *fc) +{ + int error = 0; + + if ((fc->file.flags & GIT_DIFF_FLAG__LOADED) != 0) + return 0; + + if (fc->file.flags & GIT_DIFF_FLAG_BINARY) + return 0; + + if (fc->src == GIT_ITERATOR_TYPE_WORKDIR) + error = diff_file_content_load_workdir(fc); + else + error = diff_file_content_load_blob(fc); + if (error) + return error; + + fc->file.flags |= GIT_DIFF_FLAG__LOADED; + + diff_file_content_binary_by_content(fc); + + return 0; +} + +void git_diff_file_content__unload(git_diff_file_content *fc) +{ + if (fc->file.flags & GIT_DIFF_FLAG__FREE_DATA) { + git__free(fc->map.data); + fc->map.data = ""; + fc->map.len = 0; + fc->file.flags &= ~GIT_DIFF_FLAG__FREE_DATA; + } + else if (fc->file.flags & GIT_DIFF_FLAG__UNMAP_DATA) { + git_futils_mmap_free(&fc->map); + fc->map.data = ""; + fc->map.len = 0; + fc->file.flags &= ~GIT_DIFF_FLAG__UNMAP_DATA; + } + + if (fc->file.flags & GIT_DIFF_FLAG__FREE_BLOB) { + git_blob_free((git_blob *)fc->blob); + fc->blob = NULL; + fc->file.flags &= ~GIT_DIFF_FLAG__FREE_BLOB; + } + + fc->file.flags &= ~GIT_DIFF_FLAG__LOADED; +} + +void git_diff_file_content__clear(git_diff_file_content *fc) +{ + git_diff_file_content__unload(fc); + + /* for now, nothing else to do */ +} diff --git a/src/diff_file.h b/src/diff_file.h new file mode 100644 index 000000000..afad8510b --- /dev/null +++ b/src/diff_file.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_diff_file_h__ +#define INCLUDE_diff_file_h__ + +#include "common.h" +#include "diff.h" +#include "diff_driver.h" +#include "map.h" + +/* expanded information for one side of a delta */ +typedef struct { + git_repository *repo; + git_diff_file file; + git_diff_driver *driver; + uint32_t opts_flags; + git_off_t opts_max_size; + git_iterator_type_t src; + const git_blob *blob; + git_map map; +} git_diff_file_content; + +extern int git_diff_file_content__init_from_diff( + git_diff_file_content *fc, + git_diff_list *diff, + size_t delta_index, + bool use_old); + +extern int git_diff_file_content__init_from_blob( + git_diff_file_content *fc, + git_repository *repo, + const git_diff_options *opts, + const git_blob *blob); + +extern int git_diff_file_content__init_from_raw( + git_diff_file_content *fc, + git_repository *repo, + const git_diff_options *opts, + const char *buf, + size_t buflen); + +/* this loads the blob/file-on-disk as needed */ +extern int git_diff_file_content__load(git_diff_file_content *fc); + +/* this releases the blob/file-in-memory */ +extern void git_diff_file_content__unload(git_diff_file_content *fc); + +/* this unloads and also releases any other resources */ +extern void git_diff_file_content__clear(git_diff_file_content *fc); + +#endif diff --git a/src/diff_output.c b/src/diff_output.c deleted file mode 100644 index 8dd110cbf..000000000 --- a/src/diff_output.c +++ /dev/null @@ -1,1895 +0,0 @@ -/* - * Copyright (C) the libgit2 contributors. All rights reserved. - * - * This file is part of libgit2, distributed under the GNU GPL v2 with - * a Linking Exception. For full terms see the included COPYING file. - */ -#include "common.h" -#include "git2/attr.h" -#include "git2/oid.h" -#include "git2/submodule.h" -#include "diff_output.h" -#include -#include "fileops.h" -#include "filter.h" -#include "buf_text.h" - -static int read_next_int(const char **str, int *value) -{ - const char *scan = *str; - int v = 0, digits = 0; - /* find next digit */ - for (scan = *str; *scan && !isdigit(*scan); scan++); - /* parse next number */ - for (; isdigit(*scan); scan++, digits++) - v = (v * 10) + (*scan - '0'); - *str = scan; - *value = v; - return (digits > 0) ? 0 : -1; -} - -static int parse_hunk_header(git_diff_range *range, const char *header) -{ - /* expect something of the form "@@ -%d[,%d] +%d[,%d] @@" */ - if (*header != '@') - return -1; - if (read_next_int(&header, &range->old_start) < 0) - return -1; - if (*header == ',') { - if (read_next_int(&header, &range->old_lines) < 0) - return -1; - } else - range->old_lines = 1; - if (read_next_int(&header, &range->new_start) < 0) - return -1; - if (*header == ',') { - if (read_next_int(&header, &range->new_lines) < 0) - return -1; - } else - range->new_lines = 1; - if (range->old_start < 0 || range->new_start < 0) - return -1; - - return 0; -} - -#define KNOWN_BINARY_FLAGS (GIT_DIFF_FLAG_BINARY|GIT_DIFF_FLAG_NOT_BINARY) -#define NOT_BINARY_FLAGS (GIT_DIFF_FLAG_NOT_BINARY|GIT_DIFF_FLAG__NO_DATA) - -static int update_file_is_binary_by_attr( - git_repository *repo, git_diff_file *file) -{ - const char *value; - - /* because of blob diffs, cannot assume path is set */ - if (!file->path || !strlen(file->path)) - return 0; - - if (git_attr_get(&value, repo, 0, file->path, "diff") < 0) - return -1; - - if (GIT_ATTR_FALSE(value)) - file->flags |= GIT_DIFF_FLAG_BINARY; - else if (GIT_ATTR_TRUE(value)) - file->flags |= GIT_DIFF_FLAG_NOT_BINARY; - /* otherwise leave file->flags alone */ - - return 0; -} - -static void update_delta_is_binary(git_diff_delta *delta) -{ - if ((delta->old_file.flags & GIT_DIFF_FLAG_BINARY) != 0 || - (delta->new_file.flags & GIT_DIFF_FLAG_BINARY) != 0) - delta->flags |= GIT_DIFF_FLAG_BINARY; - - else if ((delta->old_file.flags & NOT_BINARY_FLAGS) != 0 && - (delta->new_file.flags & NOT_BINARY_FLAGS) != 0) - delta->flags |= GIT_DIFF_FLAG_NOT_BINARY; - - /* otherwise leave delta->flags binary value untouched */ -} - -/* returns if we forced binary setting (and no further checks needed) */ -static bool diff_delta_is_binary_forced( - diff_context *ctxt, - git_diff_delta *delta) -{ - /* return true if binary-ness has already been settled */ - if ((delta->flags & KNOWN_BINARY_FLAGS) != 0) - return true; - - /* make sure files are conceivably mmap-able */ - if ((git_off_t)((size_t)delta->old_file.size) != delta->old_file.size || - (git_off_t)((size_t)delta->new_file.size) != delta->new_file.size) { - - delta->old_file.flags |= GIT_DIFF_FLAG_BINARY; - delta->new_file.flags |= GIT_DIFF_FLAG_BINARY; - delta->flags |= GIT_DIFF_FLAG_BINARY; - return true; - } - - /* check if user is forcing us to text diff these files */ - if (ctxt->opts && (ctxt->opts->flags & GIT_DIFF_FORCE_TEXT) != 0) { - delta->old_file.flags |= GIT_DIFF_FLAG_NOT_BINARY; - delta->new_file.flags |= GIT_DIFF_FLAG_NOT_BINARY; - delta->flags |= GIT_DIFF_FLAG_NOT_BINARY; - return true; - } - - return false; -} - -static int diff_delta_is_binary_by_attr( - diff_context *ctxt, git_diff_patch *patch) -{ - int error = 0, mirror_new; - git_diff_delta *delta = patch->delta; - - if (diff_delta_is_binary_forced(ctxt, delta)) - return 0; - - /* check diff attribute +, -, or 0 */ - if (update_file_is_binary_by_attr(ctxt->repo, &delta->old_file) < 0) - return -1; - - mirror_new = (delta->new_file.path == delta->old_file.path || - ctxt->diff->strcomp(delta->new_file.path, delta->old_file.path) == 0); - if (mirror_new) - delta->new_file.flags |= (delta->old_file.flags & KNOWN_BINARY_FLAGS); - else - error = update_file_is_binary_by_attr(ctxt->repo, &delta->new_file); - - update_delta_is_binary(delta); - - return error; -} - -static int diff_delta_is_binary_by_content( - diff_context *ctxt, - git_diff_delta *delta, - git_diff_file *file, - const git_map *map) -{ - const git_buf search = { map->data, 0, min(map->len, 4000) }; - - if (diff_delta_is_binary_forced(ctxt, delta)) - return 0; - - /* TODO: provide encoding / binary detection callbacks that can - * be UTF-8 aware, etc. For now, instead of trying to be smart, - * let's just use the simple NUL-byte detection that core git uses. - */ - - /* previously was: if (git_buf_text_is_binary(&search)) */ - if (git_buf_text_contains_nul(&search)) - file->flags |= GIT_DIFF_FLAG_BINARY; - else - file->flags |= GIT_DIFF_FLAG_NOT_BINARY; - - update_delta_is_binary(delta); - - return 0; -} - -static int diff_delta_is_binary_by_size( - diff_context *ctxt, git_diff_delta *delta, git_diff_file *file) -{ - git_off_t threshold = MAX_DIFF_FILESIZE; - - if ((file->flags & KNOWN_BINARY_FLAGS) != 0) - return 0; - - if (ctxt && ctxt->opts) { - if (ctxt->opts->max_size < 0) - return 0; - - if (ctxt->opts->max_size > 0) - threshold = ctxt->opts->max_size; - } - - if (file->size > threshold) - file->flags |= GIT_DIFF_FLAG_BINARY; - - update_delta_is_binary(delta); - - return 0; -} - -static void setup_xdiff_options( - const git_diff_options *opts, xdemitconf_t *cfg, xpparam_t *param) -{ - memset(cfg, 0, sizeof(xdemitconf_t)); - memset(param, 0, sizeof(xpparam_t)); - - cfg->ctxlen = - (!opts) ? 3 : opts->context_lines; - cfg->interhunkctxlen = - (!opts) ? 0 : opts->interhunk_lines; - - if (!opts) - return; - - if (opts->flags & GIT_DIFF_IGNORE_WHITESPACE) - param->flags |= XDF_WHITESPACE_FLAGS; - if (opts->flags & GIT_DIFF_IGNORE_WHITESPACE_CHANGE) - param->flags |= XDF_IGNORE_WHITESPACE_CHANGE; - if (opts->flags & GIT_DIFF_IGNORE_WHITESPACE_EOL) - param->flags |= XDF_IGNORE_WHITESPACE_AT_EOL; -} - - -static int get_blob_content( - diff_context *ctxt, - git_diff_delta *delta, - git_diff_file *file, - git_map *map, - git_blob **blob) -{ - int error; - git_odb_object *odb_obj = NULL; - - if (git_oid_iszero(&file->oid)) - return 0; - - if (file->mode == GIT_FILEMODE_COMMIT) { - char oidstr[GIT_OID_HEXSZ+1]; - git_buf content = GIT_BUF_INIT; - - git_oid_tostr(oidstr, sizeof(oidstr), &file->oid); - git_buf_printf(&content, "Subproject commit %s\n", oidstr); - - map->data = git_buf_detach(&content); - map->len = strlen(map->data); - - file->flags |= GIT_DIFF_FLAG__FREE_DATA; - return 0; - } - - if (!file->size) { - git_odb *odb; - size_t len; - git_otype type; - - /* peek at object header to avoid loading if too large */ - if ((error = git_repository_odb__weakptr(&odb, ctxt->repo)) < 0 || - (error = git_odb__read_header_or_object( - &odb_obj, &len, &type, odb, &file->oid)) < 0) - return error; - - assert(type == GIT_OBJ_BLOB); - - file->size = len; - } - - /* if blob is too large to diff, mark as binary */ - if ((error = diff_delta_is_binary_by_size(ctxt, delta, file)) < 0) - return error; - if ((delta->flags & GIT_DIFF_FLAG_BINARY) != 0) - return 0; - - if (odb_obj != NULL) { - error = git_object__from_odb_object( - (git_object **)blob, ctxt->repo, odb_obj, GIT_OBJ_BLOB); - git_odb_object_free(odb_obj); - } else - error = git_blob_lookup(blob, ctxt->repo, &file->oid); - - if (error) - return error; - - map->data = (void *)git_blob_rawcontent(*blob); - map->len = (size_t)git_blob_rawsize(*blob); - - return diff_delta_is_binary_by_content(ctxt, delta, file, map); -} - -static int get_workdir_sm_content( - diff_context *ctxt, - git_diff_file *file, - git_map *map) -{ - int error = 0; - git_buf content = GIT_BUF_INIT; - git_submodule* sm = NULL; - unsigned int sm_status = 0; - const char* sm_status_text = ""; - char oidstr[GIT_OID_HEXSZ+1]; - - if ((error = git_submodule_lookup(&sm, ctxt->repo, file->path)) < 0 || - (error = git_submodule_status(&sm_status, sm)) < 0) { - - /* GIT_EEXISTS means a "submodule" that has not been git added */ - if (error == GIT_EEXISTS) - error = 0; - return error; - } - - /* update OID if we didn't have it previously */ - if ((file->flags & GIT_DIFF_FLAG_VALID_OID) == 0) { - const git_oid* sm_head; - - if ((sm_head = git_submodule_wd_id(sm)) != NULL || - (sm_head = git_submodule_head_id(sm)) != NULL) { - - git_oid_cpy(&file->oid, sm_head); - file->flags |= GIT_DIFF_FLAG_VALID_OID; - } - } - - git_oid_tostr(oidstr, sizeof(oidstr), &file->oid); - - if (GIT_SUBMODULE_STATUS_IS_WD_DIRTY(sm_status)) - sm_status_text = "-dirty"; - - git_buf_printf( - &content, "Subproject commit %s%s\n", oidstr, sm_status_text); - - map->data = git_buf_detach(&content); - map->len = strlen(map->data); - - file->flags |= GIT_DIFF_FLAG__FREE_DATA; - - return 0; -} - -static int get_filtered( - git_map *map, git_file fd, git_diff_file *file, git_vector *filters) -{ - int error; - git_buf raw = GIT_BUF_INIT, filtered = GIT_BUF_INIT; - - if ((error = git_futils_readbuffer_fd(&raw, fd, (size_t)file->size)) < 0) - return error; - - if (!filters->length) - git_buf_swap(&filtered, &raw); - else - error = git_filters_apply(&filtered, &raw, filters); - - if (!error) { - map->len = git_buf_len(&filtered); - map->data = git_buf_detach(&filtered); - - file->flags |= GIT_DIFF_FLAG__FREE_DATA; - } - - git_buf_free(&raw); - git_buf_free(&filtered); - - return error; -} - -static int get_workdir_content( - diff_context *ctxt, - git_diff_delta *delta, - git_diff_file *file, - git_map *map) -{ - int error = 0; - git_buf path = GIT_BUF_INIT; - const char *wd = git_repository_workdir(ctxt->repo); - - if (S_ISGITLINK(file->mode)) - return get_workdir_sm_content(ctxt, file, map); - - if (S_ISDIR(file->mode)) - return 0; - - if (git_buf_joinpath(&path, wd, file->path) < 0) - return -1; - - if (S_ISLNK(file->mode)) { - ssize_t alloc_len, read_len; - - file->flags |= GIT_DIFF_FLAG__FREE_DATA; - file->flags |= GIT_DIFF_FLAG_BINARY; - - /* link path on disk could be UTF-16, so prepare a buffer that is - * big enough to handle some UTF-8 data expansion - */ - alloc_len = (ssize_t)(file->size * 2) + 1; - - map->data = git__malloc(alloc_len); - GITERR_CHECK_ALLOC(map->data); - - read_len = p_readlink(path.ptr, map->data, alloc_len); - if (read_len < 0) { - giterr_set(GITERR_OS, "Failed to read symlink '%s'", file->path); - error = -1; - goto cleanup; - } - - map->len = read_len; - } - else { - git_file fd = git_futils_open_ro(path.ptr); - git_vector filters = GIT_VECTOR_INIT; - - if (fd < 0) { - error = fd; - goto cleanup; - } - - if (!file->size && !(file->size = git_futils_filesize(fd))) - goto close_and_cleanup; - - if ((error = diff_delta_is_binary_by_size(ctxt, delta, file)) < 0 || - (delta->flags & GIT_DIFF_FLAG_BINARY) != 0) - goto close_and_cleanup; - - error = git_filters_load( - &filters, ctxt->repo, file->path, GIT_FILTER_TO_ODB); - if (error < 0) - goto close_and_cleanup; - - if (error == 0) { /* note: git_filters_load returns filter count */ - error = git_futils_mmap_ro(map, fd, 0, (size_t)file->size); - if (!error) - file->flags |= GIT_DIFF_FLAG__UNMAP_DATA; - } - if (error != 0) - error = get_filtered(map, fd, file, &filters); - -close_and_cleanup: - git_filters_free(&filters); - p_close(fd); - } - - /* once data is loaded, update OID if we didn't have it previously */ - if (!error && (file->flags & GIT_DIFF_FLAG_VALID_OID) == 0) { - error = git_odb_hash( - &file->oid, map->data, map->len, GIT_OBJ_BLOB); - if (!error) - file->flags |= GIT_DIFF_FLAG_VALID_OID; - } - - if (!error) - error = diff_delta_is_binary_by_content(ctxt, delta, file, map); - -cleanup: - git_buf_free(&path); - return error; -} - -static void release_content(git_diff_file *file, git_map *map, git_blob *blob) -{ - if (blob != NULL) - git_blob_free(blob); - - if (file->flags & GIT_DIFF_FLAG__FREE_DATA) { - git__free(map->data); - map->data = ""; - map->len = 0; - file->flags &= ~GIT_DIFF_FLAG__FREE_DATA; - } - else if (file->flags & GIT_DIFF_FLAG__UNMAP_DATA) { - git_futils_mmap_free(map); - map->data = ""; - map->len = 0; - file->flags &= ~GIT_DIFF_FLAG__UNMAP_DATA; - } -} - - -static int diff_context_init( - diff_context *ctxt, - git_diff_list *diff, - git_repository *repo, - const git_diff_options *opts, - git_diff_file_cb file_cb, - git_diff_hunk_cb hunk_cb, - git_diff_data_cb data_cb, - void *payload) -{ - memset(ctxt, 0, sizeof(diff_context)); - - if (!repo && diff) - repo = diff->repo; - - if (!opts && diff) - opts = &diff->opts; - - ctxt->repo = repo; - ctxt->diff = diff; - ctxt->opts = opts; - ctxt->file_cb = file_cb; - ctxt->hunk_cb = hunk_cb; - ctxt->data_cb = data_cb; - ctxt->payload = payload; - ctxt->error = 0; - - setup_xdiff_options(ctxt->opts, &ctxt->xdiff_config, &ctxt->xdiff_params); - - return 0; -} - -static int diff_delta_file_callback( - diff_context *ctxt, git_diff_delta *delta, size_t idx) -{ - float progress; - - if (!ctxt->file_cb) - return 0; - - progress = ctxt->diff ? ((float)idx / ctxt->diff->deltas.length) : 1.0f; - - if (ctxt->file_cb(delta, progress, ctxt->payload) != 0) - ctxt->error = GIT_EUSER; - - return ctxt->error; -} - -static void diff_patch_init( - diff_context *ctxt, git_diff_patch *patch) -{ - memset(patch, 0, sizeof(git_diff_patch)); - - patch->diff = ctxt->diff; - patch->ctxt = ctxt; - - if (patch->diff) { - patch->old_src = patch->diff->old_src; - patch->new_src = patch->diff->new_src; - } else { - patch->old_src = patch->new_src = GIT_ITERATOR_TYPE_TREE; - } -} - -static git_diff_patch *diff_patch_alloc( - diff_context *ctxt, git_diff_delta *delta) -{ - git_diff_patch *patch = git__malloc(sizeof(git_diff_patch)); - if (!patch) - return NULL; - - diff_patch_init(ctxt, patch); - - git_diff_list_addref(patch->diff); - - GIT_REFCOUNT_INC(patch); - - patch->delta = delta; - patch->flags = GIT_DIFF_PATCH_ALLOCATED; - - return patch; -} - -static int diff_patch_load( - diff_context *ctxt, git_diff_patch *patch) -{ - int error = 0; - git_diff_delta *delta = patch->delta; - bool check_if_unmodified = false; - - if ((patch->flags & GIT_DIFF_PATCH_LOADED) != 0) - return 0; - - error = diff_delta_is_binary_by_attr(ctxt, patch); - - patch->old_data.data = ""; - patch->old_data.len = 0; - patch->old_blob = NULL; - - patch->new_data.data = ""; - patch->new_data.len = 0; - patch->new_blob = NULL; - - if ((delta->flags & GIT_DIFF_FLAG_BINARY) != 0) - goto cleanup; - - if (!ctxt->hunk_cb && - !ctxt->data_cb && - (ctxt->opts->flags & GIT_DIFF_SKIP_BINARY_CHECK) != 0) - goto cleanup; - - switch (delta->status) { - case GIT_DELTA_ADDED: - delta->old_file.flags |= GIT_DIFF_FLAG__NO_DATA; - break; - case GIT_DELTA_DELETED: - delta->new_file.flags |= GIT_DIFF_FLAG__NO_DATA; - break; - case GIT_DELTA_MODIFIED: - case GIT_DELTA_COPIED: - case GIT_DELTA_RENAMED: - break; - case GIT_DELTA_UNTRACKED: - delta->old_file.flags |= GIT_DIFF_FLAG__NO_DATA; - if ((ctxt->opts->flags & GIT_DIFF_INCLUDE_UNTRACKED_CONTENT) == 0) - delta->new_file.flags |= GIT_DIFF_FLAG__NO_DATA; - break; - default: - delta->new_file.flags |= GIT_DIFF_FLAG__NO_DATA; - delta->old_file.flags |= GIT_DIFF_FLAG__NO_DATA; - break; - } - -#define CHECK_UNMODIFIED (GIT_DIFF_FLAG__NO_DATA | GIT_DIFF_FLAG_VALID_OID) - - check_if_unmodified = - (delta->old_file.flags & CHECK_UNMODIFIED) == 0 && - (delta->new_file.flags & CHECK_UNMODIFIED) == 0; - - /* Always try to load workdir content first, since it may need to be - * filtered (and hence use 2x memory) and we want to minimize the max - * memory footprint during diff. - */ - - if ((delta->old_file.flags & GIT_DIFF_FLAG__NO_DATA) == 0 && - patch->old_src == GIT_ITERATOR_TYPE_WORKDIR) { - if ((error = get_workdir_content( - ctxt, delta, &delta->old_file, &patch->old_data)) < 0) - goto cleanup; - if ((delta->flags & GIT_DIFF_FLAG_BINARY) != 0) - goto cleanup; - } - - if ((delta->new_file.flags & GIT_DIFF_FLAG__NO_DATA) == 0 && - patch->new_src == GIT_ITERATOR_TYPE_WORKDIR) { - if ((error = get_workdir_content( - ctxt, delta, &delta->new_file, &patch->new_data)) < 0) - goto cleanup; - if ((delta->flags & GIT_DIFF_FLAG_BINARY) != 0) - goto cleanup; - } - - if ((delta->old_file.flags & GIT_DIFF_FLAG__NO_DATA) == 0 && - patch->old_src != GIT_ITERATOR_TYPE_WORKDIR) { - if ((error = get_blob_content( - ctxt, delta, &delta->old_file, - &patch->old_data, &patch->old_blob)) < 0) - goto cleanup; - if ((delta->flags & GIT_DIFF_FLAG_BINARY) != 0) - goto cleanup; - } - - if ((delta->new_file.flags & GIT_DIFF_FLAG__NO_DATA) == 0 && - patch->new_src != GIT_ITERATOR_TYPE_WORKDIR) { - if ((error = get_blob_content( - ctxt, delta, &delta->new_file, - &patch->new_data, &patch->new_blob)) < 0) - goto cleanup; - if ((delta->flags & GIT_DIFF_FLAG_BINARY) != 0) - goto cleanup; - } - - /* if we did not previously have the definitive oid, we may have - * incorrect status and need to switch this to UNMODIFIED. - */ - if (check_if_unmodified && - delta->old_file.mode == delta->new_file.mode && - !git_oid__cmp(&delta->old_file.oid, &delta->new_file.oid)) { - - delta->status = GIT_DELTA_UNMODIFIED; - - if ((ctxt->opts->flags & GIT_DIFF_INCLUDE_UNMODIFIED) == 0) - goto cleanup; - } - -cleanup: - if ((delta->flags & KNOWN_BINARY_FLAGS) == 0) - update_delta_is_binary(delta); - - if (!error) { - patch->flags |= GIT_DIFF_PATCH_LOADED; - - /* patch is diffable only for non-binary, modified files where at - * least one side has data and there is actual change in the data - */ - if ((delta->flags & GIT_DIFF_FLAG_BINARY) == 0 && - delta->status != GIT_DELTA_UNMODIFIED && - (patch->old_data.len || patch->new_data.len) && - (patch->old_data.len != patch->new_data.len || - !git_oid_equal(&delta->old_file.oid, &delta->new_file.oid))) - patch->flags |= GIT_DIFF_PATCH_DIFFABLE; - } - - return error; -} - -static int diff_patch_cb(void *priv, mmbuffer_t *bufs, int len) -{ - git_diff_patch *patch = priv; - diff_context *ctxt = patch->ctxt; - - if (len == 1) { - ctxt->error = parse_hunk_header(&ctxt->range, bufs[0].ptr); - if (ctxt->error < 0) - return ctxt->error; - - if (ctxt->hunk_cb != NULL && - ctxt->hunk_cb(patch->delta, &ctxt->range, - bufs[0].ptr, bufs[0].size, ctxt->payload)) - ctxt->error = GIT_EUSER; - } - - if (len == 2 || len == 3) { - /* expect " "/"-"/"+", then data */ - char origin = - (*bufs[0].ptr == '+') ? GIT_DIFF_LINE_ADDITION : - (*bufs[0].ptr == '-') ? GIT_DIFF_LINE_DELETION : - GIT_DIFF_LINE_CONTEXT; - - if (ctxt->data_cb != NULL && - ctxt->data_cb(patch->delta, &ctxt->range, - origin, bufs[1].ptr, bufs[1].size, ctxt->payload)) - ctxt->error = GIT_EUSER; - } - - if (len == 3 && !ctxt->error) { - /* If we have a '+' and a third buf, then we have added a line - * without a newline and the old code had one, so DEL_EOFNL. - * If we have a '-' and a third buf, then we have removed a line - * with out a newline but added a blank line, so ADD_EOFNL. - */ - char origin = - (*bufs[0].ptr == '+') ? GIT_DIFF_LINE_DEL_EOFNL : - (*bufs[0].ptr == '-') ? GIT_DIFF_LINE_ADD_EOFNL : - GIT_DIFF_LINE_CONTEXT_EOFNL; - - if (ctxt->data_cb != NULL && - ctxt->data_cb(patch->delta, &ctxt->range, - origin, bufs[2].ptr, bufs[2].size, ctxt->payload)) - ctxt->error = GIT_EUSER; - } - - return ctxt->error; -} - -static int diff_patch_generate( - diff_context *ctxt, git_diff_patch *patch) -{ - int error = 0; - xdemitcb_t xdiff_callback; - mmfile_t old_xdiff_data, new_xdiff_data; - - if ((patch->flags & GIT_DIFF_PATCH_DIFFED) != 0) - return 0; - - if ((patch->flags & GIT_DIFF_PATCH_LOADED) == 0) - if ((error = diff_patch_load(ctxt, patch)) < 0) - return error; - - if ((patch->flags & GIT_DIFF_PATCH_DIFFABLE) == 0) - return 0; - - if (!ctxt->file_cb && !ctxt->hunk_cb) - return 0; - - patch->ctxt = ctxt; - - memset(&xdiff_callback, 0, sizeof(xdiff_callback)); - xdiff_callback.outf = diff_patch_cb; - xdiff_callback.priv = patch; - - old_xdiff_data.ptr = patch->old_data.data; - old_xdiff_data.size = patch->old_data.len; - new_xdiff_data.ptr = patch->new_data.data; - new_xdiff_data.size = patch->new_data.len; - - xdl_diff(&old_xdiff_data, &new_xdiff_data, - &ctxt->xdiff_params, &ctxt->xdiff_config, &xdiff_callback); - - error = ctxt->error; - - if (!error) - patch->flags |= GIT_DIFF_PATCH_DIFFED; - - return error; -} - -static void diff_patch_unload(git_diff_patch *patch) -{ - if ((patch->flags & GIT_DIFF_PATCH_DIFFED) != 0) { - patch->flags = (patch->flags & ~GIT_DIFF_PATCH_DIFFED); - - patch->hunks_size = 0; - patch->lines_size = 0; - } - - if ((patch->flags & GIT_DIFF_PATCH_LOADED) != 0) { - patch->flags = (patch->flags & ~GIT_DIFF_PATCH_LOADED); - - release_content( - &patch->delta->old_file, &patch->old_data, patch->old_blob); - release_content( - &patch->delta->new_file, &patch->new_data, patch->new_blob); - } -} - -static void diff_patch_free(git_diff_patch *patch) -{ - diff_patch_unload(patch); - - git__free(patch->lines); - patch->lines = NULL; - patch->lines_asize = 0; - - git__free(patch->hunks); - patch->hunks = NULL; - patch->hunks_asize = 0; - - if (!(patch->flags & GIT_DIFF_PATCH_ALLOCATED)) - return; - - patch->flags = 0; - - git_diff_list_free(patch->diff); /* decrements refcount */ - - git__free(patch); -} - -#define MAX_HUNK_STEP 128 -#define MIN_HUNK_STEP 8 -#define MAX_LINE_STEP 256 -#define MIN_LINE_STEP 8 - -static int diff_patch_hunk_cb( - const git_diff_delta *delta, - const git_diff_range *range, - const char *header, - size_t header_len, - void *payload) -{ - git_diff_patch *patch = payload; - diff_patch_hunk *hunk; - - GIT_UNUSED(delta); - - if (patch->hunks_size >= patch->hunks_asize) { - size_t new_size; - diff_patch_hunk *new_hunks; - - if (patch->hunks_asize > MAX_HUNK_STEP) - new_size = patch->hunks_asize + MAX_HUNK_STEP; - else - new_size = patch->hunks_asize * 2; - if (new_size < MIN_HUNK_STEP) - new_size = MIN_HUNK_STEP; - - new_hunks = git__realloc( - patch->hunks, new_size * sizeof(diff_patch_hunk)); - if (!new_hunks) - return -1; - - patch->hunks = new_hunks; - patch->hunks_asize = new_size; - } - - hunk = &patch->hunks[patch->hunks_size++]; - - memcpy(&hunk->range, range, sizeof(hunk->range)); - - assert(header_len + 1 < sizeof(hunk->header)); - memcpy(&hunk->header, header, header_len); - hunk->header[header_len] = '\0'; - hunk->header_len = header_len; - - hunk->line_start = patch->lines_size; - hunk->line_count = 0; - - patch->oldno = range->old_start; - patch->newno = range->new_start; - - return 0; -} - -static int diff_patch_line_cb( - const git_diff_delta *delta, - const git_diff_range *range, - char line_origin, - const char *content, - size_t content_len, - void *payload) -{ - git_diff_patch *patch = payload; - diff_patch_hunk *hunk; - diff_patch_line *line; - - GIT_UNUSED(delta); - GIT_UNUSED(range); - - assert(patch->hunks_size > 0); - assert(patch->hunks != NULL); - - hunk = &patch->hunks[patch->hunks_size - 1]; - - if (patch->lines_size >= patch->lines_asize) { - size_t new_size; - diff_patch_line *new_lines; - - if (patch->lines_asize > MAX_LINE_STEP) - new_size = patch->lines_asize + MAX_LINE_STEP; - else - new_size = patch->lines_asize * 2; - if (new_size < MIN_LINE_STEP) - new_size = MIN_LINE_STEP; - - new_lines = git__realloc( - patch->lines, new_size * sizeof(diff_patch_line)); - if (!new_lines) - return -1; - - patch->lines = new_lines; - patch->lines_asize = new_size; - } - - line = &patch->lines[patch->lines_size++]; - - line->ptr = content; - line->len = content_len; - line->origin = line_origin; - - /* do some bookkeeping so we can provide old/new line numbers */ - - for (line->lines = 0; content_len > 0; --content_len) { - if (*content++ == '\n') - ++line->lines; - } - - switch (line_origin) { - case GIT_DIFF_LINE_ADDITION: - case GIT_DIFF_LINE_DEL_EOFNL: - line->oldno = -1; - line->newno = patch->newno; - patch->newno += line->lines; - break; - case GIT_DIFF_LINE_DELETION: - case GIT_DIFF_LINE_ADD_EOFNL: - line->oldno = patch->oldno; - line->newno = -1; - patch->oldno += line->lines; - break; - default: - line->oldno = patch->oldno; - line->newno = patch->newno; - patch->oldno += line->lines; - patch->newno += line->lines; - break; - } - - hunk->line_count++; - - return 0; -} - -static int diff_required(git_diff_list *diff, const char *action) -{ - if (!diff) { - giterr_set(GITERR_INVALID, "Must provide valid diff to %s", action); - return -1; - } - - return 0; -} - -int git_diff_foreach( - git_diff_list *diff, - git_diff_file_cb file_cb, - git_diff_hunk_cb hunk_cb, - git_diff_data_cb data_cb, - void *payload) -{ - int error = 0; - diff_context ctxt; - size_t idx; - git_diff_patch patch; - - if (diff_required(diff, "git_diff_foreach") < 0) - return -1; - - if (diff_context_init( - &ctxt, diff, NULL, NULL, file_cb, hunk_cb, data_cb, payload) < 0) - return -1; - - diff_patch_init(&ctxt, &patch); - - git_vector_foreach(&diff->deltas, idx, patch.delta) { - - /* check flags against patch status */ - if (git_diff_delta__should_skip(ctxt.opts, patch.delta)) - continue; - - if (!(error = diff_patch_load(&ctxt, &patch))) { - - /* invoke file callback */ - error = diff_delta_file_callback(&ctxt, patch.delta, idx); - - /* generate diffs and invoke hunk and line callbacks */ - if (!error) - error = diff_patch_generate(&ctxt, &patch); - - diff_patch_unload(&patch); - } - - if (error < 0) - break; - } - - if (error == GIT_EUSER) - giterr_clear(); /* don't let error message leak */ - - return error; -} - - -typedef struct { - git_diff_list *diff; - git_diff_data_cb print_cb; - void *payload; - git_buf *buf; - int oid_strlen; -} diff_print_info; - -static int diff_print_info_init( - diff_print_info *pi, - git_buf *out, git_diff_list *diff, git_diff_data_cb cb, void *payload) -{ - assert(diff && diff->repo); - - pi->diff = diff; - pi->print_cb = cb; - pi->payload = payload; - pi->buf = out; - - if (git_repository__cvar(&pi->oid_strlen, diff->repo, GIT_CVAR_ABBREV) < 0) - return -1; - - pi->oid_strlen += 1; /* for NUL byte */ - - if (pi->oid_strlen < 2) - pi->oid_strlen = 2; - else if (pi->oid_strlen > GIT_OID_HEXSZ + 1) - pi->oid_strlen = GIT_OID_HEXSZ + 1; - - return 0; -} - -static char pick_suffix(int mode) -{ - if (S_ISDIR(mode)) - return '/'; - else if (mode & 0100) //-V536 - /* in git, modes are very regular, so we must have 0100755 mode */ - return '*'; - else - return ' '; -} - -char git_diff_status_char(git_delta_t status) -{ - char code; - - switch (status) { - case GIT_DELTA_ADDED: code = 'A'; break; - case GIT_DELTA_DELETED: code = 'D'; break; - case GIT_DELTA_MODIFIED: code = 'M'; break; - case GIT_DELTA_RENAMED: code = 'R'; break; - case GIT_DELTA_COPIED: code = 'C'; break; - case GIT_DELTA_IGNORED: code = 'I'; break; - case GIT_DELTA_UNTRACKED: code = '?'; break; - default: code = ' '; break; - } - - return code; -} - -static int callback_error(void) -{ - giterr_clear(); - return GIT_EUSER; -} - -static int print_compact( - const git_diff_delta *delta, float progress, void *data) -{ - diff_print_info *pi = data; - char old_suffix, new_suffix, code = git_diff_status_char(delta->status); - - GIT_UNUSED(progress); - - if (code == ' ') - return 0; - - old_suffix = pick_suffix(delta->old_file.mode); - new_suffix = pick_suffix(delta->new_file.mode); - - git_buf_clear(pi->buf); - - if (delta->old_file.path != delta->new_file.path && - pi->diff->strcomp(delta->old_file.path,delta->new_file.path) != 0) - git_buf_printf(pi->buf, "%c\t%s%c -> %s%c\n", code, - delta->old_file.path, old_suffix, delta->new_file.path, new_suffix); - else if (delta->old_file.mode != delta->new_file.mode && - delta->old_file.mode != 0 && delta->new_file.mode != 0) - git_buf_printf(pi->buf, "%c\t%s%c (%o -> %o)\n", code, - delta->old_file.path, new_suffix, delta->old_file.mode, delta->new_file.mode); - else if (old_suffix != ' ') - git_buf_printf(pi->buf, "%c\t%s%c\n", code, delta->old_file.path, old_suffix); - else - git_buf_printf(pi->buf, "%c\t%s\n", code, delta->old_file.path); - - if (git_buf_oom(pi->buf)) - return -1; - - if (pi->print_cb(delta, NULL, GIT_DIFF_LINE_FILE_HDR, - git_buf_cstr(pi->buf), git_buf_len(pi->buf), pi->payload)) - return callback_error(); - - return 0; -} - -int git_diff_print_compact( - git_diff_list *diff, - git_diff_data_cb print_cb, - void *payload) -{ - int error; - git_buf buf = GIT_BUF_INIT; - diff_print_info pi; - - if (!(error = diff_print_info_init(&pi, &buf, diff, print_cb, payload))) - error = git_diff_foreach(diff, print_compact, NULL, NULL, &pi); - - git_buf_free(&buf); - - return error; -} - -static int print_raw( - const git_diff_delta *delta, float progress, void *data) -{ - diff_print_info *pi = data; - char code = git_diff_status_char(delta->status); - char start_oid[GIT_OID_HEXSZ+1], end_oid[GIT_OID_HEXSZ+1]; - - GIT_UNUSED(progress); - - if (code == ' ') - return 0; - - git_buf_clear(pi->buf); - - git_oid_tostr(start_oid, pi->oid_strlen, &delta->old_file.oid); - git_oid_tostr(end_oid, pi->oid_strlen, &delta->new_file.oid); - - git_buf_printf( - pi->buf, ":%06o %06o %s... %s... %c", - delta->old_file.mode, delta->new_file.mode, start_oid, end_oid, code); - - if (delta->similarity > 0) - git_buf_printf(pi->buf, "%03u", delta->similarity); - - if (delta->status == GIT_DELTA_RENAMED || delta->status == GIT_DELTA_COPIED) - git_buf_printf( - pi->buf, "\t%s %s\n", delta->old_file.path, delta->new_file.path); - else - git_buf_printf( - pi->buf, "\t%s\n", delta->old_file.path ? - delta->old_file.path : delta->new_file.path); - - if (git_buf_oom(pi->buf)) - return -1; - - if (pi->print_cb(delta, NULL, GIT_DIFF_LINE_FILE_HDR, - git_buf_cstr(pi->buf), git_buf_len(pi->buf), pi->payload)) - return callback_error(); - - return 0; -} - -int git_diff_print_raw( - git_diff_list *diff, - git_diff_data_cb print_cb, - void *payload) -{ - int error; - git_buf buf = GIT_BUF_INIT; - diff_print_info pi; - - if (!(error = diff_print_info_init(&pi, &buf, diff, print_cb, payload))) - error = git_diff_foreach(diff, print_raw, NULL, NULL, &pi); - - git_buf_free(&buf); - - return error; -} - -static int print_oid_range(diff_print_info *pi, const git_diff_delta *delta) -{ - char start_oid[GIT_OID_HEXSZ+1], end_oid[GIT_OID_HEXSZ+1]; - - git_oid_tostr(start_oid, pi->oid_strlen, &delta->old_file.oid); - git_oid_tostr(end_oid, pi->oid_strlen, &delta->new_file.oid); - - /* TODO: Match git diff more closely */ - if (delta->old_file.mode == delta->new_file.mode) { - git_buf_printf(pi->buf, "index %s..%s %o\n", - start_oid, end_oid, delta->old_file.mode); - } else { - if (delta->old_file.mode == 0) { - git_buf_printf(pi->buf, "new file mode %o\n", delta->new_file.mode); - } else if (delta->new_file.mode == 0) { - git_buf_printf(pi->buf, "deleted file mode %o\n", delta->old_file.mode); - } else { - git_buf_printf(pi->buf, "old mode %o\n", delta->old_file.mode); - git_buf_printf(pi->buf, "new mode %o\n", delta->new_file.mode); - } - git_buf_printf(pi->buf, "index %s..%s\n", start_oid, end_oid); - } - - if (git_buf_oom(pi->buf)) - return -1; - - return 0; -} - -static int print_patch_file( - const git_diff_delta *delta, float progress, void *data) -{ - diff_print_info *pi = data; - const char *oldpfx = pi->diff->opts.old_prefix; - const char *oldpath = delta->old_file.path; - const char *newpfx = pi->diff->opts.new_prefix; - const char *newpath = delta->new_file.path; - - GIT_UNUSED(progress); - - if (S_ISDIR(delta->new_file.mode) || - delta->status == GIT_DELTA_UNMODIFIED || - delta->status == GIT_DELTA_IGNORED || - (delta->status == GIT_DELTA_UNTRACKED && - (pi->diff->opts.flags & GIT_DIFF_INCLUDE_UNTRACKED_CONTENT) == 0)) - return 0; - - if (!oldpfx) - oldpfx = DIFF_OLD_PREFIX_DEFAULT; - - if (!newpfx) - newpfx = DIFF_NEW_PREFIX_DEFAULT; - - git_buf_clear(pi->buf); - git_buf_printf(pi->buf, "diff --git %s%s %s%s\n", oldpfx, delta->old_file.path, newpfx, delta->new_file.path); - - if (print_oid_range(pi, delta) < 0) - return -1; - - if (git_oid_iszero(&delta->old_file.oid)) { - oldpfx = ""; - oldpath = "/dev/null"; - } - if (git_oid_iszero(&delta->new_file.oid)) { - newpfx = ""; - newpath = "/dev/null"; - } - - if ((delta->flags & GIT_DIFF_FLAG_BINARY) == 0) { - git_buf_printf(pi->buf, "--- %s%s\n", oldpfx, oldpath); - git_buf_printf(pi->buf, "+++ %s%s\n", newpfx, newpath); - } - - if (git_buf_oom(pi->buf)) - return -1; - - if (pi->print_cb(delta, NULL, GIT_DIFF_LINE_FILE_HDR, - git_buf_cstr(pi->buf), git_buf_len(pi->buf), pi->payload)) - return callback_error(); - - if ((delta->flags & GIT_DIFF_FLAG_BINARY) == 0) - return 0; - - git_buf_clear(pi->buf); - git_buf_printf( - pi->buf, "Binary files %s%s and %s%s differ\n", - oldpfx, oldpath, newpfx, newpath); - if (git_buf_oom(pi->buf)) - return -1; - - if (pi->print_cb(delta, NULL, GIT_DIFF_LINE_BINARY, - git_buf_cstr(pi->buf), git_buf_len(pi->buf), pi->payload)) - return callback_error(); - - return 0; -} - -static int print_patch_hunk( - const git_diff_delta *d, - const git_diff_range *r, - const char *header, - size_t header_len, - void *data) -{ - diff_print_info *pi = data; - - if (S_ISDIR(d->new_file.mode)) - return 0; - - git_buf_clear(pi->buf); - if (git_buf_printf(pi->buf, "%.*s", (int)header_len, header) < 0) - return -1; - - if (pi->print_cb(d, r, GIT_DIFF_LINE_HUNK_HDR, - git_buf_cstr(pi->buf), git_buf_len(pi->buf), pi->payload)) - return callback_error(); - - return 0; -} - -static int print_patch_line( - const git_diff_delta *delta, - const git_diff_range *range, - char line_origin, /* GIT_DIFF_LINE value from above */ - const char *content, - size_t content_len, - void *data) -{ - diff_print_info *pi = data; - - if (S_ISDIR(delta->new_file.mode)) - return 0; - - git_buf_clear(pi->buf); - - if (line_origin == GIT_DIFF_LINE_ADDITION || - line_origin == GIT_DIFF_LINE_DELETION || - line_origin == GIT_DIFF_LINE_CONTEXT) - git_buf_printf(pi->buf, "%c%.*s", line_origin, (int)content_len, content); - else if (content_len > 0) - git_buf_printf(pi->buf, "%.*s", (int)content_len, content); - - if (git_buf_oom(pi->buf)) - return -1; - - if (pi->print_cb(delta, range, line_origin, - git_buf_cstr(pi->buf), git_buf_len(pi->buf), pi->payload)) - return callback_error(); - - return 0; -} - -int git_diff_print_patch( - git_diff_list *diff, - git_diff_data_cb print_cb, - void *payload) -{ - int error; - git_buf buf = GIT_BUF_INIT; - diff_print_info pi; - - if (!(error = diff_print_info_init(&pi, &buf, diff, print_cb, payload))) - error = git_diff_foreach( - diff, print_patch_file, print_patch_hunk, print_patch_line, &pi); - - git_buf_free(&buf); - - return error; -} - -static void set_data_from_blob( - const git_blob *blob, git_map *map, git_diff_file *file) -{ - if (blob) { - file->size = git_blob_rawsize(blob); - git_oid_cpy(&file->oid, git_object_id((const git_object *)blob)); - file->mode = 0644; - - map->len = (size_t)file->size; - map->data = (char *)git_blob_rawcontent(blob); - } else { - file->size = 0; - file->flags |= GIT_DIFF_FLAG__NO_DATA; - - map->len = 0; - map->data = ""; - } -} - -static void set_data_from_buffer( - const char *buffer, size_t buffer_len, git_map *map, git_diff_file *file) -{ - file->size = (git_off_t)buffer_len; - file->mode = 0644; - map->len = buffer_len; - - if (!buffer) { - file->flags |= GIT_DIFF_FLAG__NO_DATA; - map->data = NULL; - } else { - map->data = (char *)buffer; - git_odb_hash(&file->oid, buffer, buffer_len, GIT_OBJ_BLOB); - } -} - -typedef struct { - diff_context ctxt; - git_diff_delta delta; - git_diff_patch patch; -} diff_single_data; - -static int diff_single_init( - diff_single_data *data, - git_repository *repo, - const git_diff_options *opts, - git_diff_file_cb file_cb, - git_diff_hunk_cb hunk_cb, - git_diff_data_cb data_cb, - void *payload) -{ - GITERR_CHECK_VERSION(opts, GIT_DIFF_OPTIONS_VERSION, "git_diff_options"); - - memset(data, 0, sizeof(*data)); - - if (diff_context_init( - &data->ctxt, NULL, repo, opts, - file_cb, hunk_cb, data_cb, payload) < 0) - return -1; - - diff_patch_init(&data->ctxt, &data->patch); - - return 0; -} - -static int diff_single_apply(diff_single_data *data) -{ - int error; - git_diff_delta *delta = &data->delta; - bool has_old = ((delta->old_file.flags & GIT_DIFF_FLAG__NO_DATA) == 0); - bool has_new = ((delta->new_file.flags & GIT_DIFF_FLAG__NO_DATA) == 0); - - /* finish setting up fake git_diff_delta record and loaded data */ - - data->patch.delta = delta; - delta->flags = delta->flags & ~KNOWN_BINARY_FLAGS; - - delta->status = has_new ? - (has_old ? GIT_DELTA_MODIFIED : GIT_DELTA_ADDED) : - (has_old ? GIT_DELTA_DELETED : GIT_DELTA_UNTRACKED); - - if (git_oid__cmp(&delta->new_file.oid, &delta->old_file.oid) == 0) - delta->status = GIT_DELTA_UNMODIFIED; - - if ((error = diff_delta_is_binary_by_content( - &data->ctxt, delta, &delta->old_file, &data->patch.old_data)) < 0 || - (error = diff_delta_is_binary_by_content( - &data->ctxt, delta, &delta->new_file, &data->patch.new_data)) < 0) - goto cleanup; - - data->patch.flags |= GIT_DIFF_PATCH_LOADED; - - if ((delta->flags & GIT_DIFF_FLAG_BINARY) == 0 && - delta->status != GIT_DELTA_UNMODIFIED) - data->patch.flags |= GIT_DIFF_PATCH_DIFFABLE; - - /* do diffs */ - - if (!(error = diff_delta_file_callback(&data->ctxt, delta, 1))) - error = diff_patch_generate(&data->ctxt, &data->patch); - -cleanup: - if (error == GIT_EUSER) - giterr_clear(); - - diff_patch_unload(&data->patch); - - return error; -} - -int git_diff_blobs( - const git_blob *old_blob, - const git_blob *new_blob, - const git_diff_options *options, - git_diff_file_cb file_cb, - git_diff_hunk_cb hunk_cb, - git_diff_data_cb data_cb, - void *payload) -{ - int error; - diff_single_data d; - git_repository *repo = - new_blob ? git_object_owner((const git_object *)new_blob) : - old_blob ? git_object_owner((const git_object *)old_blob) : NULL; - - if (!repo) /* Hmm, given two NULL blobs, silently do no callbacks? */ - return 0; - - if ((error = diff_single_init( - &d, repo, options, file_cb, hunk_cb, data_cb, payload)) < 0) - return error; - - if (options && (options->flags & GIT_DIFF_REVERSE) != 0) { - const git_blob *swap = old_blob; - old_blob = new_blob; - new_blob = swap; - } - - set_data_from_blob(old_blob, &d.patch.old_data, &d.delta.old_file); - set_data_from_blob(new_blob, &d.patch.new_data, &d.delta.new_file); - - return diff_single_apply(&d); -} - -int git_diff_blob_to_buffer( - const git_blob *old_blob, - const char *buf, - size_t buflen, - const git_diff_options *options, - git_diff_file_cb file_cb, - git_diff_hunk_cb hunk_cb, - git_diff_data_cb data_cb, - void *payload) -{ - int error; - diff_single_data d; - git_repository *repo = - old_blob ? git_object_owner((const git_object *)old_blob) : NULL; - - if (!repo && !buf) /* Hmm, given NULLs, silently do no callbacks? */ - return 0; - - if ((error = diff_single_init( - &d, repo, options, file_cb, hunk_cb, data_cb, payload)) < 0) - return error; - - if (options && (options->flags & GIT_DIFF_REVERSE) != 0) { - set_data_from_buffer(buf, buflen, &d.patch.old_data, &d.delta.old_file); - set_data_from_blob(old_blob, &d.patch.new_data, &d.delta.new_file); - } else { - set_data_from_blob(old_blob, &d.patch.old_data, &d.delta.old_file); - set_data_from_buffer(buf, buflen, &d.patch.new_data, &d.delta.new_file); - } - - return diff_single_apply(&d); -} - -size_t git_diff_num_deltas(git_diff_list *diff) -{ - assert(diff); - return (size_t)diff->deltas.length; -} - -size_t git_diff_num_deltas_of_type(git_diff_list *diff, git_delta_t type) -{ - size_t i, count = 0; - git_diff_delta *delta; - - assert(diff); - - git_vector_foreach(&diff->deltas, i, delta) { - count += (delta->status == type); - } - - return count; -} - -int git_diff_get_patch( - git_diff_patch **patch_ptr, - const git_diff_delta **delta_ptr, - git_diff_list *diff, - size_t idx) -{ - int error; - diff_context ctxt; - git_diff_delta *delta; - git_diff_patch *patch; - - if (patch_ptr) - *patch_ptr = NULL; - if (delta_ptr) - *delta_ptr = NULL; - - if (diff_required(diff, "git_diff_get_patch") < 0) - return -1; - - if (diff_context_init( - &ctxt, diff, NULL, NULL, - NULL, diff_patch_hunk_cb, diff_patch_line_cb, NULL) < 0) - return -1; - - delta = git_vector_get(&diff->deltas, idx); - if (!delta) { - giterr_set(GITERR_INVALID, "Index out of range for delta in diff"); - return GIT_ENOTFOUND; - } - - if (delta_ptr) - *delta_ptr = delta; - - if (!patch_ptr && - ((delta->flags & KNOWN_BINARY_FLAGS) != 0 || - (diff->opts.flags & GIT_DIFF_SKIP_BINARY_CHECK) != 0)) - return 0; - - if (git_diff_delta__should_skip(ctxt.opts, delta)) - return 0; - - /* Don't load the patch if the user doesn't want it */ - if (!patch_ptr) - return 0; - - patch = diff_patch_alloc(&ctxt, delta); - if (!patch) - return -1; - - if (!(error = diff_patch_load(&ctxt, patch))) { - ctxt.payload = patch; - - error = diff_patch_generate(&ctxt, patch); - - if (error == GIT_EUSER) - error = ctxt.error; - } - - if (error) - git_diff_patch_free(patch); - else if (patch_ptr) - *patch_ptr = patch; - - return error; -} - -void git_diff_patch_free(git_diff_patch *patch) -{ - if (patch) - GIT_REFCOUNT_DEC(patch, diff_patch_free); -} - -const git_diff_delta *git_diff_patch_delta(git_diff_patch *patch) -{ - assert(patch); - return patch->delta; -} - -size_t git_diff_patch_num_hunks(git_diff_patch *patch) -{ - assert(patch); - return patch->hunks_size; -} - -int git_diff_patch_line_stats( - size_t *total_ctxt, - size_t *total_adds, - size_t *total_dels, - const git_diff_patch *patch) -{ - size_t totals[3], idx; - - memset(totals, 0, sizeof(totals)); - - for (idx = 0; idx < patch->lines_size; ++idx) { - switch (patch->lines[idx].origin) { - case GIT_DIFF_LINE_CONTEXT: totals[0]++; break; - case GIT_DIFF_LINE_ADDITION: totals[1]++; break; - case GIT_DIFF_LINE_DELETION: totals[2]++; break; - default: - /* diff --stat and --numstat don't count EOFNL marks because - * they will always be paired with a ADDITION or DELETION line. - */ - break; - } - } - - if (total_ctxt) - *total_ctxt = totals[0]; - if (total_adds) - *total_adds = totals[1]; - if (total_dels) - *total_dels = totals[2]; - - return 0; -} - -static int diff_error_outofrange(const char *thing) -{ - giterr_set(GITERR_INVALID, "Diff patch %s index out of range", thing); - return GIT_ENOTFOUND; -} - -int git_diff_patch_get_hunk( - const git_diff_range **range, - const char **header, - size_t *header_len, - size_t *lines_in_hunk, - git_diff_patch *patch, - size_t hunk_idx) -{ - diff_patch_hunk *hunk; - - assert(patch); - - if (hunk_idx >= patch->hunks_size) { - if (range) *range = NULL; - if (header) *header = NULL; - if (header_len) *header_len = 0; - if (lines_in_hunk) *lines_in_hunk = 0; - - return diff_error_outofrange("hunk"); - } - - hunk = &patch->hunks[hunk_idx]; - - if (range) *range = &hunk->range; - if (header) *header = hunk->header; - if (header_len) *header_len = hunk->header_len; - if (lines_in_hunk) *lines_in_hunk = hunk->line_count; - - return 0; -} - -int git_diff_patch_num_lines_in_hunk( - git_diff_patch *patch, - size_t hunk_idx) -{ - assert(patch); - - if (hunk_idx >= patch->hunks_size) - return diff_error_outofrange("hunk"); - else - return (int)patch->hunks[hunk_idx].line_count; -} - -int git_diff_patch_get_line_in_hunk( - char *line_origin, - const char **content, - size_t *content_len, - int *old_lineno, - int *new_lineno, - git_diff_patch *patch, - size_t hunk_idx, - size_t line_of_hunk) -{ - diff_patch_hunk *hunk; - diff_patch_line *line; - const char *thing; - - assert(patch); - - if (hunk_idx >= patch->hunks_size) { - thing = "hunk"; - goto notfound; - } - hunk = &patch->hunks[hunk_idx]; - - if (line_of_hunk >= hunk->line_count) { - thing = "link"; - goto notfound; - } - - line = &patch->lines[hunk->line_start + line_of_hunk]; - - if (line_origin) *line_origin = line->origin; - if (content) *content = line->ptr; - if (content_len) *content_len = line->len; - if (old_lineno) *old_lineno = (int)line->oldno; - if (new_lineno) *new_lineno = (int)line->newno; - - return 0; - -notfound: - if (line_origin) *line_origin = GIT_DIFF_LINE_CONTEXT; - if (content) *content = NULL; - if (content_len) *content_len = 0; - if (old_lineno) *old_lineno = -1; - if (new_lineno) *new_lineno = -1; - - return diff_error_outofrange(thing); -} - -static int print_to_buffer_cb( - const git_diff_delta *delta, - const git_diff_range *range, - char line_origin, - const char *content, - size_t content_len, - void *payload) -{ - git_buf *output = payload; - GIT_UNUSED(delta); GIT_UNUSED(range); GIT_UNUSED(line_origin); - return git_buf_put(output, content, content_len); -} - -int git_diff_patch_print( - git_diff_patch *patch, - git_diff_data_cb print_cb, - void *payload) -{ - int error; - git_buf temp = GIT_BUF_INIT; - diff_print_info pi; - size_t h, l; - - assert(patch && print_cb); - - if (!(error = diff_print_info_init( - &pi, &temp, patch->diff, print_cb, payload))) - error = print_patch_file(patch->delta, 0, &pi); - - for (h = 0; h < patch->hunks_size && !error; ++h) { - diff_patch_hunk *hunk = &patch->hunks[h]; - - error = print_patch_hunk( - patch->delta, &hunk->range, hunk->header, hunk->header_len, &pi); - - for (l = 0; l < hunk->line_count && !error; ++l) { - diff_patch_line *line = &patch->lines[hunk->line_start + l]; - - error = print_patch_line( - patch->delta, &hunk->range, - line->origin, line->ptr, line->len, &pi); - } - } - - git_buf_free(&temp); - - return error; -} - -int git_diff_patch_to_str( - char **string, - git_diff_patch *patch) -{ - int error; - git_buf output = GIT_BUF_INIT; - - error = git_diff_patch_print(patch, print_to_buffer_cb, &output); - - /* GIT_EUSER means git_buf_put in print_to_buffer_cb returned -1, - * meaning a memory allocation failure, so just map to -1... - */ - if (error == GIT_EUSER) - error = -1; - - *string = git_buf_detach(&output); - - return error; -} - -int git_diff__paired_foreach( - git_diff_list *idx2head, - git_diff_list *wd2idx, - int (*cb)(git_diff_delta *i2h, git_diff_delta *w2i, void *payload), - void *payload) -{ - int cmp; - git_diff_delta *i2h, *w2i; - size_t i, j, i_max, j_max; - int (*strcomp)(const char *, const char *); - - i_max = idx2head ? idx2head->deltas.length : 0; - j_max = wd2idx ? wd2idx->deltas.length : 0; - - /* Get appropriate strcmp function */ - strcomp = idx2head ? idx2head->strcomp : wd2idx ? wd2idx->strcomp : NULL; - - /* Assert both iterators use matching ignore-case. If this function ever - * supports merging diffs that are not sorted by the same function, then - * it will need to spool and sort on one of the results before merging - */ - if (idx2head && wd2idx) { - assert(idx2head->strcomp == wd2idx->strcomp); - } - - for (i = 0, j = 0; i < i_max || j < j_max; ) { - i2h = idx2head ? GIT_VECTOR_GET(&idx2head->deltas,i) : NULL; - w2i = wd2idx ? GIT_VECTOR_GET(&wd2idx->deltas,j) : NULL; - - cmp = !w2i ? -1 : !i2h ? 1 : - strcomp(i2h->old_file.path, w2i->old_file.path); - - if (cmp < 0) { - if (cb(i2h, NULL, payload)) - return GIT_EUSER; - i++; - } else if (cmp > 0) { - if (cb(NULL, w2i, payload)) - return GIT_EUSER; - j++; - } else { - if (cb(i2h, w2i, payload)) - return GIT_EUSER; - i++; j++; - } - } - - return 0; -} diff --git a/src/diff_output.h b/src/diff_output.h deleted file mode 100644 index 083355676..000000000 --- a/src/diff_output.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (C) the libgit2 contributors. All rights reserved. - * - * This file is part of libgit2, distributed under the GNU GPL v2 with - * a Linking Exception. For full terms see the included COPYING file. - */ -#ifndef INCLUDE_diff_output_h__ -#define INCLUDE_diff_output_h__ - -#include "git2/blob.h" -#include "diff.h" -#include "map.h" -#include "xdiff/xdiff.h" - -#define MAX_DIFF_FILESIZE 0x20000000 - -enum { - GIT_DIFF_PATCH_ALLOCATED = (1 << 0), - GIT_DIFF_PATCH_PREPPED = (1 << 1), - GIT_DIFF_PATCH_LOADED = (1 << 2), - GIT_DIFF_PATCH_DIFFABLE = (1 << 3), - GIT_DIFF_PATCH_DIFFED = (1 << 4), -}; - -/* context for performing diffs */ -typedef struct { - git_repository *repo; - git_diff_list *diff; - const git_diff_options *opts; - git_diff_file_cb file_cb; - git_diff_hunk_cb hunk_cb; - git_diff_data_cb data_cb; - void *payload; - int error; - git_diff_range range; - xdemitconf_t xdiff_config; - xpparam_t xdiff_params; -} diff_context; - -/* cached information about a single span in a diff */ -typedef struct diff_patch_line diff_patch_line; -struct diff_patch_line { - const char *ptr; - size_t len; - size_t lines, oldno, newno; - char origin; -}; - -/* cached information about a hunk in a diff */ -typedef struct diff_patch_hunk diff_patch_hunk; -struct diff_patch_hunk { - git_diff_range range; - char header[128]; - size_t header_len; - size_t line_start; - size_t line_count; -}; - -struct git_diff_patch { - git_refcount rc; - git_diff_list *diff; /* for refcount purposes, maybe NULL for blob diffs */ - git_diff_delta *delta; - diff_context *ctxt; /* only valid while generating patch */ - git_iterator_type_t old_src; - git_iterator_type_t new_src; - git_blob *old_blob; - git_blob *new_blob; - git_map old_data; - git_map new_data; - uint32_t flags; - diff_patch_hunk *hunks; - size_t hunks_asize, hunks_size; - diff_patch_line *lines; - size_t lines_asize, lines_size; - size_t oldno, newno; -}; - -/* context for performing diff on a single delta */ -typedef struct { - git_diff_patch *patch; - uint32_t prepped : 1; - uint32_t loaded : 1; - uint32_t diffable : 1; - uint32_t diffed : 1; -} diff_delta_context; - -extern int git_diff__paired_foreach( - git_diff_list *idx2head, - git_diff_list *wd2idx, - int (*cb)(git_diff_delta *i2h, git_diff_delta *w2i, void *payload), - void *payload); - -#endif diff --git a/src/diff_patch.c b/src/diff_patch.c new file mode 100644 index 000000000..a1e1fe84c --- /dev/null +++ b/src/diff_patch.c @@ -0,0 +1,922 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#include "common.h" +#include "diff.h" +#include "diff_file.h" +#include "diff_driver.h" +#include "diff_patch.h" +#include "diff_xdiff.h" + +/* cached information about a single span in a diff */ +typedef struct diff_patch_line diff_patch_line; +struct diff_patch_line { + const char *ptr; + size_t len; + size_t lines, oldno, newno; + char origin; +}; + +/* cached information about a hunk in a diff */ +typedef struct diff_patch_hunk diff_patch_hunk; +struct diff_patch_hunk { + git_diff_range range; + char header[128]; + size_t header_len; + size_t line_start; + size_t line_count; +}; + +struct git_diff_patch { + git_refcount rc; + git_diff_list *diff; /* for refcount purposes, maybe NULL for blob diffs */ + git_diff_delta *delta; + size_t delta_index; + git_diff_file_content ofile; + git_diff_file_content nfile; + uint32_t flags; + git_array_t(diff_patch_hunk) hunks; + git_array_t(diff_patch_line) lines; + size_t oldno, newno; + size_t content_size; + git_pool flattened; +}; + +enum { + GIT_DIFF_PATCH_ALLOCATED = (1 << 0), + GIT_DIFF_PATCH_INITIALIZED = (1 << 1), + GIT_DIFF_PATCH_LOADED = (1 << 2), + GIT_DIFF_PATCH_DIFFABLE = (1 << 3), + GIT_DIFF_PATCH_DIFFED = (1 << 4), + GIT_DIFF_PATCH_FLATTENED = (1 << 5), +}; + +static void diff_output_init(git_diff_output*, const git_diff_options*, + git_diff_file_cb, git_diff_hunk_cb, git_diff_data_cb, void*); + +static void diff_output_to_patch(git_diff_output *, git_diff_patch *); + +static void diff_patch_update_binary(git_diff_patch *patch) +{ + if ((patch->delta->flags & DIFF_FLAGS_KNOWN_BINARY) != 0) + return; + + if ((patch->ofile.file.flags & GIT_DIFF_FLAG_BINARY) != 0 || + (patch->nfile.file.flags & GIT_DIFF_FLAG_BINARY) != 0) + patch->delta->flags |= GIT_DIFF_FLAG_BINARY; + + else if ((patch->ofile.file.flags & DIFF_FLAGS_NOT_BINARY) != 0 && + (patch->nfile.file.flags & DIFF_FLAGS_NOT_BINARY) != 0) + patch->delta->flags |= GIT_DIFF_FLAG_NOT_BINARY; +} + +static void diff_patch_init_common(git_diff_patch *patch) +{ + diff_patch_update_binary(patch); + + if ((patch->delta->flags & GIT_DIFF_FLAG_BINARY) != 0) + patch->flags |= GIT_DIFF_PATCH_LOADED; /* set LOADED but not DIFFABLE */ + + patch->flags |= GIT_DIFF_PATCH_INITIALIZED; + + if (patch->diff) + git_diff_list_addref(patch->diff); +} + +static int diff_patch_init_from_diff( + git_diff_patch *patch, git_diff_list *diff, size_t delta_index) +{ + int error = 0; + + memset(patch, 0, sizeof(*patch)); + patch->diff = diff; + patch->delta = git_vector_get(&diff->deltas, delta_index); + patch->delta_index = delta_index; + + if ((error = git_diff_file_content__init_from_diff( + &patch->ofile, diff, delta_index, true)) < 0 || + (error = git_diff_file_content__init_from_diff( + &patch->nfile, diff, delta_index, false)) < 0) + return error; + + diff_patch_init_common(patch); + + return 0; +} + +static int diff_patch_alloc_from_diff( + git_diff_patch **out, + git_diff_list *diff, + size_t delta_index) +{ + int error; + git_diff_patch *patch = git__calloc(1, sizeof(git_diff_patch)); + GITERR_CHECK_ALLOC(patch); + + if (!(error = diff_patch_init_from_diff(patch, diff, delta_index))) { + patch->flags |= GIT_DIFF_PATCH_ALLOCATED; + GIT_REFCOUNT_INC(patch); + } else { + git__free(patch); + patch = NULL; + } + + *out = patch; + return error; +} + +static int diff_patch_load(git_diff_patch *patch, git_diff_output *output) +{ + int error = 0; + bool incomplete_data; + + if ((patch->flags & GIT_DIFF_PATCH_LOADED) != 0) + return 0; + + /* if no hunk and data callbacks and user doesn't care if data looks + * binary, then there is no need to actually load the data + */ + if ((patch->ofile.opts_flags & GIT_DIFF_SKIP_BINARY_CHECK) != 0 && + output && !output->hunk_cb && !output->data_cb) + return 0; + +#define DIFF_FLAGS_KNOWN_DATA (GIT_DIFF_FLAG__NO_DATA|GIT_DIFF_FLAG_VALID_OID) + + incomplete_data = + ((patch->ofile.file.flags & DIFF_FLAGS_KNOWN_DATA) != 0 && + (patch->nfile.file.flags & DIFF_FLAGS_KNOWN_DATA) != 0); + + /* always try to load workdir content first because filtering may + * need 2x data size and this minimizes peak memory footprint + */ + if (patch->ofile.src == GIT_ITERATOR_TYPE_WORKDIR) { + if ((error = git_diff_file_content__load(&patch->ofile)) < 0 || + (patch->ofile.file.flags & GIT_DIFF_FLAG_BINARY) != 0) + goto cleanup; + } + if (patch->nfile.src == GIT_ITERATOR_TYPE_WORKDIR) { + if ((error = git_diff_file_content__load(&patch->nfile)) < 0 || + (patch->nfile.file.flags & GIT_DIFF_FLAG_BINARY) != 0) + goto cleanup; + } + + /* once workdir has been tried, load other data as needed */ + if (patch->ofile.src != GIT_ITERATOR_TYPE_WORKDIR) { + if ((error = git_diff_file_content__load(&patch->ofile)) < 0 || + (patch->ofile.file.flags & GIT_DIFF_FLAG_BINARY) != 0) + goto cleanup; + } + if (patch->nfile.src != GIT_ITERATOR_TYPE_WORKDIR) { + if ((error = git_diff_file_content__load(&patch->nfile)) < 0 || + (patch->nfile.file.flags & GIT_DIFF_FLAG_BINARY) != 0) + goto cleanup; + } + + /* if we were previously missing an oid, reassess UNMODIFIED state */ + if (incomplete_data && + patch->ofile.file.mode == patch->nfile.file.mode && + git_oid_equal(&patch->ofile.file.oid, &patch->nfile.file.oid)) + patch->delta->status = GIT_DELTA_UNMODIFIED; + +cleanup: + diff_patch_update_binary(patch); + + if (!error) { + /* patch is diffable only for non-binary, modified files where + * at least one side has data and the data actually changed + */ + if ((patch->delta->flags & GIT_DIFF_FLAG_BINARY) == 0 && + patch->delta->status != GIT_DELTA_UNMODIFIED && + (patch->ofile.map.len || patch->nfile.map.len) && + (patch->ofile.map.len != patch->nfile.map.len || + !git_oid_equal(&patch->ofile.file.oid, &patch->nfile.file.oid))) + patch->flags |= GIT_DIFF_PATCH_DIFFABLE; + + patch->flags |= GIT_DIFF_PATCH_LOADED; + } + + return error; +} + +static int diff_patch_file_callback( + git_diff_patch *patch, git_diff_output *output) +{ + float progress; + + if (!output->file_cb) + return 0; + + progress = patch->diff ? + ((float)patch->delta_index / patch->diff->deltas.length) : 1.0f; + + if (output->file_cb(patch->delta, progress, output->payload) != 0) + output->error = GIT_EUSER; + + return output->error; +} + +static int diff_patch_generate(git_diff_patch *patch, git_diff_output *output) +{ + int error = 0; + + if ((patch->flags & GIT_DIFF_PATCH_DIFFED) != 0) + return 0; + + if ((patch->flags & GIT_DIFF_PATCH_LOADED) == 0 && + (error = diff_patch_load(patch, output)) < 0) + return error; + + if ((patch->flags & GIT_DIFF_PATCH_DIFFABLE) == 0) + return 0; + + if (output->diff_cb != NULL && + !(error = output->diff_cb(output, patch))) + patch->flags |= GIT_DIFF_PATCH_DIFFED; + + return error; +} + +static void diff_patch_free(git_diff_patch *patch) +{ + git_diff_file_content__clear(&patch->ofile); + git_diff_file_content__clear(&patch->nfile); + + git_array_clear(patch->lines); + git_array_clear(patch->hunks); + + git_diff_list_free(patch->diff); /* decrements refcount */ + patch->diff = NULL; + + git_pool_clear(&patch->flattened); + + if (patch->flags & GIT_DIFF_PATCH_ALLOCATED) + git__free(patch); +} + +static int diff_required(git_diff_list *diff, const char *action) +{ + if (diff) + return 0; + giterr_set(GITERR_INVALID, "Must provide valid diff to %s", action); + return -1; +} + +int git_diff_foreach( + git_diff_list *diff, + git_diff_file_cb file_cb, + git_diff_hunk_cb hunk_cb, + git_diff_data_cb data_cb, + void *payload) +{ + int error = 0; + git_xdiff_output xo; + size_t idx; + git_diff_patch patch; + + if (diff_required(diff, "git_diff_foreach") < 0) + return -1; + + diff_output_init((git_diff_output *)&xo, + &diff->opts, file_cb, hunk_cb, data_cb, payload); + git_xdiff_init(&xo, &diff->opts); + + git_vector_foreach(&diff->deltas, idx, patch.delta) { + /* check flags against patch status */ + if (git_diff_delta__should_skip(&diff->opts, patch.delta)) + continue; + + if (!(error = diff_patch_init_from_diff(&patch, diff, idx))) { + + error = diff_patch_file_callback(&patch, (git_diff_output *)&xo); + + if (!error) + error = diff_patch_generate(&patch, (git_diff_output *)&xo); + + git_diff_patch_free(&patch); + } + + if (error < 0) + break; + } + + if (error == GIT_EUSER) + giterr_clear(); /* don't leave error message set invalidly */ + return error; +} + +typedef struct { + git_diff_patch patch; + git_diff_delta delta; +} diff_patch_with_delta; + +static int diff_single_generate(diff_patch_with_delta *pd, git_xdiff_output *xo) +{ + int error = 0; + git_diff_patch *patch = &pd->patch; + bool has_old = ((patch->ofile.file.flags & GIT_DIFF_FLAG__NO_DATA) == 0); + bool has_new = ((patch->nfile.file.flags & GIT_DIFF_FLAG__NO_DATA) == 0); + + pd->delta.status = has_new ? + (has_old ? GIT_DELTA_MODIFIED : GIT_DELTA_ADDED) : + (has_old ? GIT_DELTA_DELETED : GIT_DELTA_UNTRACKED); + + if (git_oid_equal(&patch->nfile.file.oid, &patch->ofile.file.oid)) + pd->delta.status = GIT_DELTA_UNMODIFIED; + + patch->delta = &pd->delta; + + diff_patch_init_common(patch); + + error = diff_patch_file_callback(patch, (git_diff_output *)xo); + + if (!error) + error = diff_patch_generate(patch, (git_diff_output *)xo); + + if (error == GIT_EUSER) + giterr_clear(); /* don't leave error message set invalidly */ + + return error; +} + +static int diff_patch_from_blobs( + diff_patch_with_delta *pd, + git_xdiff_output *xo, + const git_blob *old_blob, + const git_blob *new_blob, + const git_diff_options *opts) +{ + int error = 0; + git_repository *repo = + new_blob ? git_object_owner((const git_object *)new_blob) : + old_blob ? git_object_owner((const git_object *)old_blob) : NULL; + + GITERR_CHECK_VERSION(opts, GIT_DIFF_OPTIONS_VERSION, "git_diff_options"); + + pd->patch.delta = &pd->delta; + + if (!repo) /* return two NULL items as UNMODIFIED delta */ + return 0; + + if (opts && (opts->flags & GIT_DIFF_REVERSE) != 0) { + const git_blob *swap = old_blob; + old_blob = new_blob; + new_blob = swap; + } + + if ((error = git_diff_file_content__init_from_blob( + &pd->patch.ofile, repo, opts, old_blob)) < 0 || + (error = git_diff_file_content__init_from_blob( + &pd->patch.nfile, repo, opts, new_blob)) < 0) + return error; + + return diff_single_generate(pd, xo); +} + +int git_diff_blobs( + const git_blob *old_blob, + const git_blob *new_blob, + const git_diff_options *opts, + git_diff_file_cb file_cb, + git_diff_hunk_cb hunk_cb, + git_diff_data_cb data_cb, + void *payload) +{ + int error = 0; + diff_patch_with_delta pd; + git_xdiff_output xo; + + memset(&pd, 0, sizeof(pd)); + memset(&xo, 0, sizeof(xo)); + + diff_output_init( + (git_diff_output *)&xo, opts, file_cb, hunk_cb, data_cb, payload); + git_xdiff_init(&xo, opts); + + error = diff_patch_from_blobs(&pd, &xo, old_blob, new_blob, opts); + + git_diff_patch_free((git_diff_patch *)&pd); + + return error; +} + +int git_diff_patch_from_blobs( + git_diff_patch **out, + const git_blob *old_blob, + const git_blob *new_blob, + const git_diff_options *opts) +{ + int error = 0; + diff_patch_with_delta *pd; + git_xdiff_output xo; + + assert(out); + *out = NULL; + + pd = git__calloc(1, sizeof(*pd)); + GITERR_CHECK_ALLOC(pd); + pd->patch.flags = GIT_DIFF_PATCH_ALLOCATED; + + memset(&xo, 0, sizeof(xo)); + + diff_output_to_patch((git_diff_output *)&xo, &pd->patch); + git_xdiff_init(&xo, opts); + + if (!(error = diff_patch_from_blobs(pd, &xo, old_blob, new_blob, opts))) + *out = (git_diff_patch *)pd; + else + git_diff_patch_free((git_diff_patch *)pd); + + return error; +} + +static int diff_patch_from_blob_and_buffer( + diff_patch_with_delta *pd, + git_xdiff_output *xo, + const git_blob *old_blob, + const char *buf, + size_t buflen, + const git_diff_options *opts) +{ + int error = 0; + git_repository *repo = + old_blob ? git_object_owner((const git_object *)old_blob) : NULL; + + GITERR_CHECK_VERSION(opts, GIT_DIFF_OPTIONS_VERSION, "git_diff_options"); + + pd->patch.delta = &pd->delta; + + if (!repo && !buf) /* return two NULL items as UNMODIFIED delta */ + return 0; + + if (opts && (opts->flags & GIT_DIFF_REVERSE) != 0) { + if (!(error = git_diff_file_content__init_from_raw( + &pd->patch.ofile, repo, opts, buf, buflen))) + error = git_diff_file_content__init_from_blob( + &pd->patch.nfile, repo, opts, old_blob); + } else { + if (!(error = git_diff_file_content__init_from_blob( + &pd->patch.ofile, repo, opts, old_blob))) + error = git_diff_file_content__init_from_raw( + &pd->patch.nfile, repo, opts, buf, buflen); + } + + return diff_single_generate(pd, xo); +} + +int git_diff_blob_to_buffer( + const git_blob *old_blob, + const char *buf, + size_t buflen, + const git_diff_options *opts, + git_diff_file_cb file_cb, + git_diff_hunk_cb hunk_cb, + git_diff_data_cb data_cb, + void *payload) +{ + int error = 0; + diff_patch_with_delta pd; + git_xdiff_output xo; + + memset(&pd, 0, sizeof(pd)); + memset(&xo, 0, sizeof(xo)); + + diff_output_init( + (git_diff_output *)&xo, opts, file_cb, hunk_cb, data_cb, payload); + git_xdiff_init(&xo, opts); + + error = diff_patch_from_blob_and_buffer( + &pd, &xo, old_blob, buf, buflen, opts); + + git_diff_patch_free((git_diff_patch *)&pd); + + return error; +} + +int git_diff_patch_from_blob_and_buffer( + git_diff_patch **out, + const git_blob *old_blob, + const char *buf, + size_t buflen, + const git_diff_options *opts) +{ + int error = 0; + diff_patch_with_delta *pd; + git_xdiff_output xo; + + assert(out); + *out = NULL; + + pd = git__calloc(1, sizeof(*pd)); + GITERR_CHECK_ALLOC(pd); + pd->patch.flags = GIT_DIFF_PATCH_ALLOCATED; + + memset(&xo, 0, sizeof(xo)); + + diff_output_to_patch((git_diff_output *)&xo, &pd->patch); + git_xdiff_init(&xo, opts); + + if (!(error = diff_patch_from_blob_and_buffer( + pd, &xo, old_blob, buf, buflen, opts))) + *out = (git_diff_patch *)pd; + else + git_diff_patch_free((git_diff_patch *)pd); + + return error; +} + +int git_diff_get_patch( + git_diff_patch **patch_ptr, + const git_diff_delta **delta_ptr, + git_diff_list *diff, + size_t idx) +{ + int error = 0; + git_xdiff_output xo; + git_diff_delta *delta = NULL; + git_diff_patch *patch = NULL; + + if (patch_ptr) *patch_ptr = NULL; + if (delta_ptr) *delta_ptr = NULL; + + if (diff_required(diff, "git_diff_get_patch") < 0) + return -1; + + delta = git_vector_get(&diff->deltas, idx); + if (!delta) { + giterr_set(GITERR_INVALID, "Index out of range for delta in diff"); + return GIT_ENOTFOUND; + } + + if (delta_ptr) + *delta_ptr = delta; + + if (git_diff_delta__should_skip(&diff->opts, delta)) + return 0; + + /* don't load the patch data unless we need it for binary check */ + if (!patch_ptr && + ((delta->flags & DIFF_FLAGS_KNOWN_BINARY) != 0 || + (diff->opts.flags & GIT_DIFF_SKIP_BINARY_CHECK) != 0)) + return 0; + + if ((error = diff_patch_alloc_from_diff(&patch, diff, idx)) < 0) + return error; + + diff_output_to_patch((git_diff_output *)&xo, patch); + git_xdiff_init(&xo, &diff->opts); + + error = diff_patch_file_callback(patch, (git_diff_output *)&xo); + + if (!error) + error = diff_patch_generate(patch, (git_diff_output *)&xo); + + if (!error) { + /* if cumulative diff size is < 0.5 total size, flatten the patch */ + /* unload the file content */ + } + + if (error || !patch_ptr) + git_diff_patch_free(patch); + else + *patch_ptr = patch; + + if (error == GIT_EUSER) + giterr_clear(); /* don't leave error message set invalidly */ + return error; +} + +void git_diff_patch_free(git_diff_patch *patch) +{ + if (patch) + GIT_REFCOUNT_DEC(patch, diff_patch_free); +} + +const git_diff_delta *git_diff_patch_delta(git_diff_patch *patch) +{ + assert(patch); + return patch->delta; +} + +size_t git_diff_patch_num_hunks(git_diff_patch *patch) +{ + assert(patch); + return git_array_size(patch->hunks); +} + +int git_diff_patch_line_stats( + size_t *total_ctxt, + size_t *total_adds, + size_t *total_dels, + const git_diff_patch *patch) +{ + size_t totals[3], idx; + + memset(totals, 0, sizeof(totals)); + + for (idx = 0; idx < git_array_size(patch->lines); ++idx) { + diff_patch_line *line = git_array_get(patch->lines, idx); + if (!line) + continue; + + switch (line->origin) { + case GIT_DIFF_LINE_CONTEXT: totals[0]++; break; + case GIT_DIFF_LINE_ADDITION: totals[1]++; break; + case GIT_DIFF_LINE_DELETION: totals[2]++; break; + default: + /* diff --stat and --numstat don't count EOFNL marks because + * they will always be paired with a ADDITION or DELETION line. + */ + break; + } + } + + if (total_ctxt) + *total_ctxt = totals[0]; + if (total_adds) + *total_adds = totals[1]; + if (total_dels) + *total_dels = totals[2]; + + return 0; +} + +static int diff_error_outofrange(const char *thing) +{ + giterr_set(GITERR_INVALID, "Diff patch %s index out of range", thing); + return GIT_ENOTFOUND; +} + +int git_diff_patch_get_hunk( + const git_diff_range **range, + const char **header, + size_t *header_len, + size_t *lines_in_hunk, + git_diff_patch *patch, + size_t hunk_idx) +{ + diff_patch_hunk *hunk; + assert(patch); + + hunk = git_array_get(patch->hunks, hunk_idx); + + if (!hunk) { + if (range) *range = NULL; + if (header) *header = NULL; + if (header_len) *header_len = 0; + if (lines_in_hunk) *lines_in_hunk = 0; + return diff_error_outofrange("hunk"); + } + + if (range) *range = &hunk->range; + if (header) *header = hunk->header; + if (header_len) *header_len = hunk->header_len; + if (lines_in_hunk) *lines_in_hunk = hunk->line_count; + return 0; +} + +int git_diff_patch_num_lines_in_hunk(git_diff_patch *patch, size_t hunk_idx) +{ + diff_patch_hunk *hunk; + assert(patch); + + if (!(hunk = git_array_get(patch->hunks, hunk_idx))) + return diff_error_outofrange("hunk"); + return (int)hunk->line_count; +} + +int git_diff_patch_get_line_in_hunk( + char *line_origin, + const char **content, + size_t *content_len, + int *old_lineno, + int *new_lineno, + git_diff_patch *patch, + size_t hunk_idx, + size_t line_of_hunk) +{ + diff_patch_hunk *hunk; + diff_patch_line *line; + const char *thing; + + assert(patch); + + if (!(hunk = git_array_get(patch->hunks, hunk_idx))) { + thing = "hunk"; + goto notfound; + } + + if (line_of_hunk >= hunk->line_count || + !(line = git_array_get( + patch->lines, hunk->line_start + line_of_hunk))) { + thing = "line"; + goto notfound; + } + + if (line_origin) *line_origin = line->origin; + if (content) *content = line->ptr; + if (content_len) *content_len = line->len; + if (old_lineno) *old_lineno = (int)line->oldno; + if (new_lineno) *new_lineno = (int)line->newno; + + return 0; + +notfound: + if (line_origin) *line_origin = GIT_DIFF_LINE_CONTEXT; + if (content) *content = NULL; + if (content_len) *content_len = 0; + if (old_lineno) *old_lineno = -1; + if (new_lineno) *new_lineno = -1; + + return diff_error_outofrange(thing); +} + +git_diff_list *git_diff_patch__diff(git_diff_patch *patch) +{ + return patch->diff; +} + +git_diff_driver *git_diff_patch__driver(git_diff_patch *patch) +{ + /* ofile driver is representative for whole patch */ + return patch->ofile.driver; +} + +void git_diff_patch__old_data( + char **ptr, size_t *len, git_diff_patch *patch) +{ + *ptr = patch->ofile.map.data; + *len = patch->ofile.map.len; +} + +void git_diff_patch__new_data( + char **ptr, size_t *len, git_diff_patch *patch) +{ + *ptr = patch->nfile.map.data; + *len = patch->nfile.map.len; +} + +int git_diff_patch__invoke_callbacks( + git_diff_patch *patch, + git_diff_file_cb file_cb, + git_diff_hunk_cb hunk_cb, + git_diff_data_cb line_cb, + void *payload) +{ + int error = 0; + uint32_t i, j; + + if (file_cb) + error = file_cb(patch->delta, 0, payload); + + if (!hunk_cb && !line_cb) + return error; + + for (i = 0; !error && i < git_array_size(patch->hunks); ++i) { + diff_patch_hunk *h = git_array_get(patch->hunks, i); + + error = hunk_cb( + patch->delta, &h->range, h->header, h->header_len, payload); + + if (!line_cb) + continue; + + for (j = 0; !error && j < h->line_count; ++j) { + diff_patch_line *l = + git_array_get(patch->lines, h->line_start + j); + + error = line_cb( + patch->delta, &h->range, l->origin, l->ptr, l->len, payload); + } + } + + return error; +} + + +static int diff_patch_file_cb( + const git_diff_delta *delta, + float progress, + void *payload) +{ + GIT_UNUSED(delta); GIT_UNUSED(progress); GIT_UNUSED(payload); + return 0; +} + +static int diff_patch_hunk_cb( + const git_diff_delta *delta, + const git_diff_range *range, + const char *header, + size_t header_len, + void *payload) +{ + git_diff_patch *patch = payload; + diff_patch_hunk *hunk; + + GIT_UNUSED(delta); + + hunk = git_array_alloc(patch->hunks); + GITERR_CHECK_ALLOC(hunk); + + memcpy(&hunk->range, range, sizeof(hunk->range)); + + assert(header_len + 1 < sizeof(hunk->header)); + memcpy(&hunk->header, header, header_len); + hunk->header[header_len] = '\0'; + hunk->header_len = header_len; + + hunk->line_start = git_array_size(patch->lines); + hunk->line_count = 0; + + patch->oldno = range->old_start; + patch->newno = range->new_start; + + return 0; +} + +static int diff_patch_line_cb( + const git_diff_delta *delta, + const git_diff_range *range, + char line_origin, + const char *content, + size_t content_len, + void *payload) +{ + git_diff_patch *patch = payload; + diff_patch_hunk *hunk; + diff_patch_line *line; + + GIT_UNUSED(delta); + GIT_UNUSED(range); + + hunk = git_array_last(patch->hunks); + GITERR_CHECK_ALLOC(hunk); + + line = git_array_alloc(patch->lines); + GITERR_CHECK_ALLOC(line); + + line->ptr = content; + line->len = content_len; + line->origin = line_origin; + + patch->content_size += content_len; + + /* do some bookkeeping so we can provide old/new line numbers */ + + for (line->lines = 0; content_len > 0; --content_len) { + if (*content++ == '\n') + ++line->lines; + } + + switch (line_origin) { + case GIT_DIFF_LINE_ADDITION: + case GIT_DIFF_LINE_DEL_EOFNL: + line->oldno = -1; + line->newno = patch->newno; + patch->newno += line->lines; + break; + case GIT_DIFF_LINE_DELETION: + case GIT_DIFF_LINE_ADD_EOFNL: + line->oldno = patch->oldno; + line->newno = -1; + patch->oldno += line->lines; + break; + default: + line->oldno = patch->oldno; + line->newno = patch->newno; + patch->oldno += line->lines; + patch->newno += line->lines; + break; + } + + hunk->line_count++; + + return 0; +} + +static void diff_output_init( + git_diff_output *out, + const git_diff_options *opts, + git_diff_file_cb file_cb, + git_diff_hunk_cb hunk_cb, + git_diff_data_cb data_cb, + void *payload) +{ + GIT_UNUSED(opts); + + memset(out, 0, sizeof(*out)); + + out->file_cb = file_cb; + out->hunk_cb = hunk_cb; + out->data_cb = data_cb; + out->payload = payload; +} + +static void diff_output_to_patch(git_diff_output *out, git_diff_patch *patch) +{ + diff_output_init( + out, NULL, + diff_patch_file_cb, diff_patch_hunk_cb, diff_patch_line_cb, patch); +} diff --git a/src/diff_patch.h b/src/diff_patch.h new file mode 100644 index 000000000..56af14600 --- /dev/null +++ b/src/diff_patch.h @@ -0,0 +1,46 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_diff_patch_h__ +#define INCLUDE_diff_patch_h__ + +#include "common.h" +#include "diff.h" +#include "diff_file.h" +#include "array.h" + +extern git_diff_list *git_diff_patch__diff(git_diff_patch *); + +extern git_diff_driver *git_diff_patch__driver(git_diff_patch *); + +extern void git_diff_patch__old_data(char **, size_t *, git_diff_patch *); +extern void git_diff_patch__new_data(char **, size_t *, git_diff_patch *); + +extern int git_diff_patch__invoke_callbacks( + git_diff_patch *patch, + git_diff_file_cb file_cb, + git_diff_hunk_cb hunk_cb, + git_diff_data_cb line_cb, + void *payload); + +typedef struct git_diff_output git_diff_output; +struct git_diff_output { + /* these callbacks are issued with the diff data */ + git_diff_file_cb file_cb; + git_diff_hunk_cb hunk_cb; + git_diff_data_cb data_cb; + void *payload; + + /* this records the actual error in cases where it may be obscured */ + int error; + + /* this callback is used to do the diff and drive the other callbacks. + * see diff_xdiff.h for how to use this in practice for now. + */ + int (*diff_cb)(git_diff_output *output, git_diff_patch *patch); +}; + +#endif diff --git a/src/diff_print.c b/src/diff_print.c new file mode 100644 index 000000000..244aa6e1d --- /dev/null +++ b/src/diff_print.c @@ -0,0 +1,417 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#include "common.h" +#include "diff.h" +#include "diff_patch.h" +#include "buffer.h" + +typedef struct { + git_diff_list *diff; + git_diff_data_cb print_cb; + void *payload; + git_buf *buf; + int oid_strlen; +} diff_print_info; + +static int diff_print_info_init( + diff_print_info *pi, + git_buf *out, git_diff_list *diff, git_diff_data_cb cb, void *payload) +{ + assert(diff && diff->repo); + + pi->diff = diff; + pi->print_cb = cb; + pi->payload = payload; + pi->buf = out; + + if (git_repository__cvar(&pi->oid_strlen, diff->repo, GIT_CVAR_ABBREV) < 0) + return -1; + + pi->oid_strlen += 1; /* for NUL byte */ + + if (pi->oid_strlen < 2) + pi->oid_strlen = 2; + else if (pi->oid_strlen > GIT_OID_HEXSZ + 1) + pi->oid_strlen = GIT_OID_HEXSZ + 1; + + return 0; +} + +static char pick_suffix(int mode) +{ + if (S_ISDIR(mode)) + return '/'; + else if (mode & 0100) //-V536 + /* in git, modes are very regular, so we must have 0100755 mode */ + return '*'; + else + return ' '; +} + +char git_diff_status_char(git_delta_t status) +{ + char code; + + switch (status) { + case GIT_DELTA_ADDED: code = 'A'; break; + case GIT_DELTA_DELETED: code = 'D'; break; + case GIT_DELTA_MODIFIED: code = 'M'; break; + case GIT_DELTA_RENAMED: code = 'R'; break; + case GIT_DELTA_COPIED: code = 'C'; break; + case GIT_DELTA_IGNORED: code = 'I'; break; + case GIT_DELTA_UNTRACKED: code = '?'; break; + default: code = ' '; break; + } + + return code; +} + +static int callback_error(void) +{ + giterr_clear(); + return GIT_EUSER; +} + +static int print_compact( + const git_diff_delta *delta, float progress, void *data) +{ + diff_print_info *pi = data; + char old_suffix, new_suffix, code = git_diff_status_char(delta->status); + + GIT_UNUSED(progress); + + if (code == ' ') + return 0; + + old_suffix = pick_suffix(delta->old_file.mode); + new_suffix = pick_suffix(delta->new_file.mode); + + git_buf_clear(pi->buf); + + if (delta->old_file.path != delta->new_file.path && + pi->diff->strcomp(delta->old_file.path,delta->new_file.path) != 0) + git_buf_printf(pi->buf, "%c\t%s%c -> %s%c\n", code, + delta->old_file.path, old_suffix, delta->new_file.path, new_suffix); + else if (delta->old_file.mode != delta->new_file.mode && + delta->old_file.mode != 0 && delta->new_file.mode != 0) + git_buf_printf(pi->buf, "%c\t%s%c (%o -> %o)\n", code, + delta->old_file.path, new_suffix, delta->old_file.mode, delta->new_file.mode); + else if (old_suffix != ' ') + git_buf_printf(pi->buf, "%c\t%s%c\n", code, delta->old_file.path, old_suffix); + else + git_buf_printf(pi->buf, "%c\t%s\n", code, delta->old_file.path); + + if (git_buf_oom(pi->buf)) + return -1; + + if (pi->print_cb(delta, NULL, GIT_DIFF_LINE_FILE_HDR, + git_buf_cstr(pi->buf), git_buf_len(pi->buf), pi->payload)) + return callback_error(); + + return 0; +} + +int git_diff_print_compact( + git_diff_list *diff, + git_diff_data_cb print_cb, + void *payload) +{ + int error; + git_buf buf = GIT_BUF_INIT; + diff_print_info pi; + + if (!(error = diff_print_info_init(&pi, &buf, diff, print_cb, payload))) + error = git_diff_foreach(diff, print_compact, NULL, NULL, &pi); + + git_buf_free(&buf); + + return error; +} + +static int print_raw( + const git_diff_delta *delta, float progress, void *data) +{ + diff_print_info *pi = data; + char code = git_diff_status_char(delta->status); + char start_oid[GIT_OID_HEXSZ+1], end_oid[GIT_OID_HEXSZ+1]; + + GIT_UNUSED(progress); + + if (code == ' ') + return 0; + + git_buf_clear(pi->buf); + + git_oid_tostr(start_oid, pi->oid_strlen, &delta->old_file.oid); + git_oid_tostr(end_oid, pi->oid_strlen, &delta->new_file.oid); + + git_buf_printf( + pi->buf, ":%06o %06o %s... %s... %c", + delta->old_file.mode, delta->new_file.mode, start_oid, end_oid, code); + + if (delta->similarity > 0) + git_buf_printf(pi->buf, "%03u", delta->similarity); + + if (delta->status == GIT_DELTA_RENAMED || delta->status == GIT_DELTA_COPIED) + git_buf_printf( + pi->buf, "\t%s %s\n", delta->old_file.path, delta->new_file.path); + else + git_buf_printf( + pi->buf, "\t%s\n", delta->old_file.path ? + delta->old_file.path : delta->new_file.path); + + if (git_buf_oom(pi->buf)) + return -1; + + if (pi->print_cb(delta, NULL, GIT_DIFF_LINE_FILE_HDR, + git_buf_cstr(pi->buf), git_buf_len(pi->buf), pi->payload)) + return callback_error(); + + return 0; +} + +int git_diff_print_raw( + git_diff_list *diff, + git_diff_data_cb print_cb, + void *payload) +{ + int error; + git_buf buf = GIT_BUF_INIT; + diff_print_info pi; + + if (!(error = diff_print_info_init(&pi, &buf, diff, print_cb, payload))) + error = git_diff_foreach(diff, print_raw, NULL, NULL, &pi); + + git_buf_free(&buf); + + return error; +} + +static int print_oid_range(diff_print_info *pi, const git_diff_delta *delta) +{ + char start_oid[GIT_OID_HEXSZ+1], end_oid[GIT_OID_HEXSZ+1]; + + git_oid_tostr(start_oid, pi->oid_strlen, &delta->old_file.oid); + git_oid_tostr(end_oid, pi->oid_strlen, &delta->new_file.oid); + + /* TODO: Match git diff more closely */ + if (delta->old_file.mode == delta->new_file.mode) { + git_buf_printf(pi->buf, "index %s..%s %o\n", + start_oid, end_oid, delta->old_file.mode); + } else { + if (delta->old_file.mode == 0) { + git_buf_printf(pi->buf, "new file mode %o\n", delta->new_file.mode); + } else if (delta->new_file.mode == 0) { + git_buf_printf(pi->buf, "deleted file mode %o\n", delta->old_file.mode); + } else { + git_buf_printf(pi->buf, "old mode %o\n", delta->old_file.mode); + git_buf_printf(pi->buf, "new mode %o\n", delta->new_file.mode); + } + git_buf_printf(pi->buf, "index %s..%s\n", start_oid, end_oid); + } + + if (git_buf_oom(pi->buf)) + return -1; + + return 0; +} + +static int print_patch_file( + const git_diff_delta *delta, float progress, void *data) +{ + diff_print_info *pi = data; + const char *oldpfx = pi->diff->opts.old_prefix; + const char *oldpath = delta->old_file.path; + const char *newpfx = pi->diff->opts.new_prefix; + const char *newpath = delta->new_file.path; + + GIT_UNUSED(progress); + + if (S_ISDIR(delta->new_file.mode) || + delta->status == GIT_DELTA_UNMODIFIED || + delta->status == GIT_DELTA_IGNORED || + (delta->status == GIT_DELTA_UNTRACKED && + (pi->diff->opts.flags & GIT_DIFF_INCLUDE_UNTRACKED_CONTENT) == 0)) + return 0; + + if (!oldpfx) + oldpfx = DIFF_OLD_PREFIX_DEFAULT; + + if (!newpfx) + newpfx = DIFF_NEW_PREFIX_DEFAULT; + + git_buf_clear(pi->buf); + git_buf_printf(pi->buf, "diff --git %s%s %s%s\n", oldpfx, delta->old_file.path, newpfx, delta->new_file.path); + + if (print_oid_range(pi, delta) < 0) + return -1; + + if (git_oid_iszero(&delta->old_file.oid)) { + oldpfx = ""; + oldpath = "/dev/null"; + } + if (git_oid_iszero(&delta->new_file.oid)) { + newpfx = ""; + newpath = "/dev/null"; + } + + if ((delta->flags & GIT_DIFF_FLAG_BINARY) == 0) { + git_buf_printf(pi->buf, "--- %s%s\n", oldpfx, oldpath); + git_buf_printf(pi->buf, "+++ %s%s\n", newpfx, newpath); + } + + if (git_buf_oom(pi->buf)) + return -1; + + if (pi->print_cb(delta, NULL, GIT_DIFF_LINE_FILE_HDR, + git_buf_cstr(pi->buf), git_buf_len(pi->buf), pi->payload)) + return callback_error(); + + if ((delta->flags & GIT_DIFF_FLAG_BINARY) == 0) + return 0; + + git_buf_clear(pi->buf); + git_buf_printf( + pi->buf, "Binary files %s%s and %s%s differ\n", + oldpfx, oldpath, newpfx, newpath); + if (git_buf_oom(pi->buf)) + return -1; + + if (pi->print_cb(delta, NULL, GIT_DIFF_LINE_BINARY, + git_buf_cstr(pi->buf), git_buf_len(pi->buf), pi->payload)) + return callback_error(); + + return 0; +} + +static int print_patch_hunk( + const git_diff_delta *d, + const git_diff_range *r, + const char *header, + size_t header_len, + void *data) +{ + diff_print_info *pi = data; + + if (S_ISDIR(d->new_file.mode)) + return 0; + + git_buf_clear(pi->buf); + if (git_buf_printf(pi->buf, "%.*s", (int)header_len, header) < 0) + return -1; + + if (pi->print_cb(d, r, GIT_DIFF_LINE_HUNK_HDR, + git_buf_cstr(pi->buf), git_buf_len(pi->buf), pi->payload)) + return callback_error(); + + return 0; +} + +static int print_patch_line( + const git_diff_delta *delta, + const git_diff_range *range, + char line_origin, /* GIT_DIFF_LINE value from above */ + const char *content, + size_t content_len, + void *data) +{ + diff_print_info *pi = data; + + if (S_ISDIR(delta->new_file.mode)) + return 0; + + git_buf_clear(pi->buf); + + if (line_origin == GIT_DIFF_LINE_ADDITION || + line_origin == GIT_DIFF_LINE_DELETION || + line_origin == GIT_DIFF_LINE_CONTEXT) + git_buf_printf(pi->buf, "%c%.*s", line_origin, (int)content_len, content); + else if (content_len > 0) + git_buf_printf(pi->buf, "%.*s", (int)content_len, content); + + if (git_buf_oom(pi->buf)) + return -1; + + if (pi->print_cb(delta, range, line_origin, + git_buf_cstr(pi->buf), git_buf_len(pi->buf), pi->payload)) + return callback_error(); + + return 0; +} + +int git_diff_print_patch( + git_diff_list *diff, + git_diff_data_cb print_cb, + void *payload) +{ + int error; + git_buf buf = GIT_BUF_INIT; + diff_print_info pi; + + if (!(error = diff_print_info_init(&pi, &buf, diff, print_cb, payload))) + error = git_diff_foreach( + diff, print_patch_file, print_patch_hunk, print_patch_line, &pi); + + git_buf_free(&buf); + + return error; +} + + +static int print_to_buffer_cb( + const git_diff_delta *delta, + const git_diff_range *range, + char line_origin, + const char *content, + size_t content_len, + void *payload) +{ + git_buf *output = payload; + GIT_UNUSED(delta); GIT_UNUSED(range); GIT_UNUSED(line_origin); + return git_buf_put(output, content, content_len); +} + +int git_diff_patch_print( + git_diff_patch *patch, + git_diff_data_cb print_cb, + void *payload) +{ + int error; + git_buf temp = GIT_BUF_INIT; + diff_print_info pi; + + assert(patch && print_cb); + + if (!(error = diff_print_info_init( + &pi, &temp, git_diff_patch__diff(patch), print_cb, payload))) + error = git_diff_patch__invoke_callbacks( + patch, print_patch_file, print_patch_hunk, print_patch_line, &pi); + + git_buf_free(&temp); + + return error; +} + +int git_diff_patch_to_str( + char **string, + git_diff_patch *patch) +{ + int error; + git_buf output = GIT_BUF_INIT; + + error = git_diff_patch_print(patch, print_to_buffer_cb, &output); + + /* GIT_EUSER means git_buf_put in print_to_buffer_cb returned -1, + * meaning a memory allocation failure, so just map to -1... + */ + if (error == GIT_EUSER) + error = -1; + + *string = git_buf_detach(&output); + + return error; +} diff --git a/src/diff_tform.c b/src/diff_tform.c index bc3acae1d..597c240ae 100644 --- a/src/diff_tform.c +++ b/src/diff_tform.c @@ -5,10 +5,14 @@ * a Linking Exception. For full terms see the included COPYING file. */ #include "common.h" -#include "diff.h" + #include "git2/config.h" #include "git2/blob.h" + +#include "diff.h" #include "hashsig.h" +#include "path.h" +#include "fileops.h" static git_diff_delta *diff_delta__dup( const git_diff_delta *d, git_pool *pool) diff --git a/src/diff_xdiff.c b/src/diff_xdiff.c new file mode 100644 index 000000000..7694fb996 --- /dev/null +++ b/src/diff_xdiff.c @@ -0,0 +1,166 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#include "common.h" +#include "diff.h" +#include "diff_driver.h" +#include "diff_patch.h" +#include "diff_xdiff.h" + +static int git_xdiff_scan_int(const char **str, int *value) +{ + const char *scan = *str; + int v = 0, digits = 0; + /* find next digit */ + for (scan = *str; *scan && !git__isdigit(*scan); scan++); + /* parse next number */ + for (; git__isdigit(*scan); scan++, digits++) + v = (v * 10) + (*scan - '0'); + *str = scan; + *value = v; + return (digits > 0) ? 0 : -1; +} + +static int git_xdiff_parse_hunk(git_diff_range *range, const char *header) +{ + /* expect something of the form "@@ -%d[,%d] +%d[,%d] @@" */ + if (*header != '@') + return -1; + if (git_xdiff_scan_int(&header, &range->old_start) < 0) + return -1; + if (*header == ',') { + if (git_xdiff_scan_int(&header, &range->old_lines) < 0) + return -1; + } else + range->old_lines = 1; + if (git_xdiff_scan_int(&header, &range->new_start) < 0) + return -1; + if (*header == ',') { + if (git_xdiff_scan_int(&header, &range->new_lines) < 0) + return -1; + } else + range->new_lines = 1; + if (range->old_start < 0 || range->new_start < 0) + return -1; + + return 0; +} + +typedef struct { + git_xdiff_output *xo; + git_diff_patch *patch; + git_diff_range range; +} git_xdiff_info; + +static int git_xdiff_cb(void *priv, mmbuffer_t *bufs, int len) +{ + git_xdiff_info *info = priv; + git_diff_patch *patch = info->patch; + const git_diff_delta *delta = git_diff_patch_delta(patch); + git_diff_output *output = &info->xo->output; + + if (len == 1) { + output->error = git_xdiff_parse_hunk(&info->range, bufs[0].ptr); + if (output->error < 0) + return output->error; + + if (output->hunk_cb != NULL && + output->hunk_cb(delta, &info->range, + bufs[0].ptr, bufs[0].size, output->payload)) + output->error = GIT_EUSER; + } + + if (len == 2 || len == 3) { + /* expect " "/"-"/"+", then data */ + char origin = + (*bufs[0].ptr == '+') ? GIT_DIFF_LINE_ADDITION : + (*bufs[0].ptr == '-') ? GIT_DIFF_LINE_DELETION : + GIT_DIFF_LINE_CONTEXT; + + if (output->data_cb != NULL && + output->data_cb(delta, &info->range, + origin, bufs[1].ptr, bufs[1].size, output->payload)) + output->error = GIT_EUSER; + } + + if (len == 3 && !output->error) { + /* If we have a '+' and a third buf, then we have added a line + * without a newline and the old code had one, so DEL_EOFNL. + * If we have a '-' and a third buf, then we have removed a line + * with out a newline but added a blank line, so ADD_EOFNL. + */ + char origin = + (*bufs[0].ptr == '+') ? GIT_DIFF_LINE_DEL_EOFNL : + (*bufs[0].ptr == '-') ? GIT_DIFF_LINE_ADD_EOFNL : + GIT_DIFF_LINE_CONTEXT_EOFNL; + + if (output->data_cb != NULL && + output->data_cb(delta, &info->range, + origin, bufs[2].ptr, bufs[2].size, output->payload)) + output->error = GIT_EUSER; + } + + return output->error; +} + +static int git_xdiff(git_diff_output *output, git_diff_patch *patch) +{ + git_xdiff_output *xo = (git_xdiff_output *)output; + git_xdiff_info info; + git_diff_find_context_payload findctxt; + mmfile_t xd_old_data, xd_new_data; + + memset(&info, 0, sizeof(info)); + info.patch = patch; + info.xo = xo; + + xo->callback.priv = &info; + + git_diff_find_context_init( + &xo->config.find_func, &findctxt, git_diff_patch__driver(patch)); + xo->config.find_func_priv = &findctxt; + + if (xo->config.find_func != NULL) + xo->config.flags |= XDL_EMIT_FUNCNAMES; + else + xo->config.flags &= ~XDL_EMIT_FUNCNAMES; + + /* TODO: check ofile.opts_flags to see if driver-specific per-file + * updates are needed to xo->params.flags + */ + + git_diff_patch__old_data(&xd_old_data.ptr, &xd_old_data.size, patch); + git_diff_patch__new_data(&xd_new_data.ptr, &xd_new_data.size, patch); + + xdl_diff(&xd_old_data, &xd_new_data, + &xo->params, &xo->config, &xo->callback); + + git_diff_find_context_clear(&findctxt); + + return xo->output.error; +} + +void git_xdiff_init(git_xdiff_output *xo, const git_diff_options *opts) +{ + uint32_t flags = opts ? opts->flags : GIT_DIFF_NORMAL; + + xo->output.diff_cb = git_xdiff; + + memset(&xo->config, 0, sizeof(xo->config)); + xo->config.ctxlen = opts ? opts->context_lines : 3; + xo->config.interhunkctxlen = opts ? opts->interhunk_lines : 0; + + memset(&xo->params, 0, sizeof(xo->params)); + if (flags & GIT_DIFF_IGNORE_WHITESPACE) + xo->params.flags |= XDF_WHITESPACE_FLAGS; + if (flags & GIT_DIFF_IGNORE_WHITESPACE_CHANGE) + xo->params.flags |= XDF_IGNORE_WHITESPACE_CHANGE; + if (flags & GIT_DIFF_IGNORE_WHITESPACE_EOL) + xo->params.flags |= XDF_IGNORE_WHITESPACE_AT_EOL; + + memset(&xo->callback, 0, sizeof(xo->callback)); + xo->callback.outf = git_xdiff_cb; +} diff --git a/src/diff_xdiff.h b/src/diff_xdiff.h new file mode 100644 index 000000000..c547b00cf --- /dev/null +++ b/src/diff_xdiff.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_diff_xdiff_h__ +#define INCLUDE_diff_xdiff_h__ + +#include "diff.h" +#include "diff_patch.h" +#include "xdiff/xdiff.h" + +/* A git_xdiff_output is a git_diff_output with extra fields necessary + * to use libxdiff. Calling git_xdiff_init() will set the diff_cb field + * of the output to use xdiff to generate the diffs. + */ +typedef struct { + git_diff_output output; + + xdemitconf_t config; + xpparam_t params; + xdemitcb_t callback; +} git_xdiff_output; + +void git_xdiff_init(git_xdiff_output *xo, const git_diff_options *opts); + +#endif diff --git a/src/fetch.c b/src/fetch.c index b5ec69777..03fad5fec 100644 --- a/src/fetch.c +++ b/src/fetch.c @@ -16,6 +16,8 @@ #include "pack.h" #include "fetch.h" #include "netops.h" +#include "repository.h" +#include "refs.h" struct filter_payload { git_remote *remote; diff --git a/src/index.c b/src/index.c index 25c38b026..fd55616b8 100644 --- a/src/index.c +++ b/src/index.c @@ -2030,6 +2030,8 @@ int git_index_read_tree(git_index *index, const git_tree *tree) error = git_tree_walk(tree, GIT_TREEWALK_POST, read_tree_cb, &data); index_entries_free(&entries); + git_vector_free(&entries); + git_vector_sort(&index->entries); return error; diff --git a/src/iterator.c b/src/iterator.c index 4360b99ad..76b0e41d0 100644 --- a/src/iterator.c +++ b/src/iterator.c @@ -7,6 +7,7 @@ #include "iterator.h" #include "tree.h" +#include "index.h" #include "ignore.h" #include "buffer.h" #include "git2/submodule.h" diff --git a/src/merge.c b/src/merge.c index 047d96013..82d2e6f37 100644 --- a/src/merge.c +++ b/src/merge.c @@ -24,6 +24,8 @@ #include "blob.h" #include "hashsig.h" #include "oid.h" +#include "index.h" +#include "filebuf.h" #include "git2/types.h" #include "git2/repository.h" diff --git a/src/refdb_fs.c b/src/refdb_fs.c index 4083ba9e5..b9e283ac5 100644 --- a/src/refdb_fs.c +++ b/src/refdb_fs.c @@ -9,6 +9,7 @@ #include "hash.h" #include "repository.h" #include "fileops.h" +#include "filebuf.h" #include "pack.h" #include "reflog.h" #include "refdb.h" diff --git a/src/refs.c b/src/refs.c index 2b545954d..c0e460cc3 100644 --- a/src/refs.c +++ b/src/refs.c @@ -9,6 +9,7 @@ #include "hash.h" #include "repository.h" #include "fileops.h" +#include "filebuf.h" #include "pack.h" #include "reflog.h" #include "refdb.h" diff --git a/src/remote.c b/src/remote.c index 943b72bb7..0e8354a11 100644 --- a/src/remote.c +++ b/src/remote.c @@ -1267,8 +1267,10 @@ static int rename_remote_references( return -1; while ((error = git_reference_next(&ref, iter)) == 0) { - if (git__prefixcmp(ref->name, GIT_REFS_REMOTES_DIR)) + if (git__prefixcmp(ref->name, GIT_REFS_REMOTES_DIR)) { + git_reference_free(ref); continue; + } if ((error = rename_one_remote_reference(ref, old_name, new_name)) < 0) { git_reference_iterator_free(iter); diff --git a/src/remote.h b/src/remote.h index c9c26b77d..dce4803ed 100644 --- a/src/remote.h +++ b/src/remote.h @@ -11,7 +11,7 @@ #include "git2/transport.h" #include "refspec.h" -#include "repository.h" +#include "vector.h" #define GIT_REMOTE_ORIGIN "origin" diff --git a/src/repository.c b/src/repository.c index 2e7a334c9..e4451499c 100644 --- a/src/repository.c +++ b/src/repository.c @@ -17,12 +17,15 @@ #include "tag.h" #include "blob.h" #include "fileops.h" +#include "filebuf.h" +#include "index.h" #include "config.h" #include "refs.h" #include "filter.h" #include "odb.h" #include "remote.h" #include "merge.h" +#include "diff_driver.h" #define GIT_FILE_CONTENT_PREFIX "gitdir:" @@ -109,6 +112,9 @@ void git_repository_free(git_repository *repo) git_cache_free(&repo->objects); git_submodule_config_free(repo); + git_diff_driver_registry_free(repo->diff_drivers); + repo->diff_drivers = NULL; + git__free(repo->path_repository); git__free(repo->workdir); git__free(repo->namespace); diff --git a/src/repository.h b/src/repository.h index bd5f63dac..12dc50d51 100644 --- a/src/repository.h +++ b/src/repository.h @@ -14,15 +14,13 @@ #include "git2/object.h" #include "git2/config.h" -#include "index.h" #include "cache.h" #include "refs.h" #include "buffer.h" -#include "odb.h" #include "object.h" #include "attrcache.h" #include "strmap.h" -#include "refdb.h" +#include "diff_driver.h" #define DOT_GIT ".git" #define GIT_DIR DOT_GIT "/" @@ -108,6 +106,7 @@ struct git_repository { git_cache objects; git_attr_cache attrcache; git_strmap *submodules; + git_diff_driver_registry *diff_drivers; char *path_repository; char *workdir; diff --git a/src/signature.c b/src/signature.c index e338f0802..0a34ccfaa 100644 --- a/src/signature.c +++ b/src/signature.c @@ -9,6 +9,7 @@ #include "signature.h" #include "repository.h" #include "git2/common.h" +#include "posix.h" void git_signature_free(git_signature *sig) { diff --git a/src/stash.c b/src/stash.c index 19b29be77..1222634d5 100644 --- a/src/stash.c +++ b/src/stash.c @@ -14,6 +14,7 @@ #include "git2/stash.h" #include "git2/status.h" #include "git2/checkout.h" +#include "git2/index.h" #include "signature.h" static int create_error(int error, const char *msg) diff --git a/src/status.c b/src/status.c index 89f3eedb5..712e0d515 100644 --- a/src/status.c +++ b/src/status.c @@ -14,10 +14,10 @@ #include "git2/status.h" #include "repository.h" #include "ignore.h" +#include "index.h" #include "git2/diff.h" #include "diff.h" -#include "diff_output.h" static unsigned int index_delta2status(git_delta_t index_status) { diff --git a/src/submodule.c b/src/submodule.c index 16114d8ac..af488b7f3 100644 --- a/src/submodule.c +++ b/src/submodule.c @@ -22,6 +22,8 @@ #include "submodule.h" #include "tree.h" #include "iterator.h" +#include "path.h" +#include "index.h" #define GIT_MODULES_FILE ".gitmodules" diff --git a/src/thread-utils.h b/src/thread-utils.h index 49b5f3b5e..f56f61b50 100644 --- a/src/thread-utils.h +++ b/src/thread-utils.h @@ -7,8 +7,6 @@ #ifndef INCLUDE_thread_utils_h__ #define INCLUDE_thread_utils_h__ -#include "common.h" - /* Common operations even if threading has been disabled */ typedef struct { #if defined(GIT_WIN32) diff --git a/src/tree.c b/src/tree.c index 10d131438..65d01b4d5 100644 --- a/src/tree.c +++ b/src/tree.c @@ -10,6 +10,9 @@ #include "tree.h" #include "git2/repository.h" #include "git2/object.h" +#include "path.h" +#include "tree-cache.h" +#include "index.h" #define DEFAULT_TREE_SIZE 16 #define MAX_FILEMODE_BYTES 6 diff --git a/src/util.h b/src/util.h index 5ae87ac10..43ba79240 100644 --- a/src/util.h +++ b/src/util.h @@ -194,6 +194,8 @@ extern int git__strcasecmp(const char *a, const char *b); extern int git__strncmp(const char *a, const char *b, size_t sz); extern int git__strncasecmp(const char *a, const char *b, size_t sz); +#include "thread-utils.h" + typedef struct { git_atomic refcount; void *owner; diff --git a/tests-clar/checkout/index.c b/tests-clar/checkout/index.c index 78ff5ac62..a3a0f8fda 100644 --- a/tests-clar/checkout/index.c +++ b/tests-clar/checkout/index.c @@ -2,6 +2,7 @@ #include "checkout_helpers.h" #include "git2/checkout.h" +#include "fileops.h" #include "repository.h" static git_repository *g_repo; diff --git a/tests-clar/clar.c b/tests-clar/clar.c index fed87c30d..0eae81bf5 100644 --- a/tests-clar/clar.c +++ b/tests-clar/clar.c @@ -183,10 +183,10 @@ clar_run_test( } static void -clar_run_suite(const struct clar_suite *suite) +clar_run_suite(const struct clar_suite *suite, const char *name) { const struct clar_func *test = suite->tests; - size_t i; + size_t i, namelen; if (!suite->enabled) return; @@ -200,7 +200,23 @@ clar_run_suite(const struct clar_suite *suite) _clar.active_suite = suite->name; _clar.suite_errors = 0; + if (name) { + size_t suitelen = strlen(suite->name); + namelen = strlen(name); + if (namelen <= suitelen) { + name = NULL; + } else { + name += suitelen; + while (*name == ':') + ++name; + namelen = strlen(name); + } + } + for (i = 0; i < suite->test_count; ++i) { + if (name && strncmp(test[i].name, name, namelen)) + continue; + _clar.active_test = test[i].name; clar_run_test(&test[i], &suite->initialize, &suite->cleanup); @@ -240,7 +256,7 @@ clar_parse_args(int argc, char **argv) case 'x': { /* given suite name */ int offset = (argument[2] == '=') ? 3 : 2, found = 0; char action = argument[1]; - size_t j, len; + size_t j, len, cmplen; argument += offset; len = strlen(argument); @@ -249,7 +265,11 @@ clar_parse_args(int argc, char **argv) clar_usage(argv[0]); for (j = 0; j < _clar_suite_count; ++j) { - if (strncmp(argument, _clar_suites[j].name, len) == 0) { + cmplen = strlen(_clar_suites[j].name); + if (cmplen > len) + cmplen = len; + + if (strncmp(argument, _clar_suites[j].name, cmplen) == 0) { int exact = !strcmp(argument, _clar_suites[j].name); ++found; @@ -258,9 +278,9 @@ clar_parse_args(int argc, char **argv) _clar.report_suite_names = 1; switch (action) { - case 's': clar_run_suite(&_clar_suites[j]); break; - case 'i': _clar_suites[j].enabled = 1; break; - case 'x': _clar_suites[j].enabled = 0; break; + case 's': clar_run_suite(&_clar_suites[j], argument); break; + case 'i': _clar_suites[j].enabled = 1; break; + case 'x': _clar_suites[j].enabled = 0; break; } if (exact) @@ -318,7 +338,7 @@ clar_test(int argc, char **argv) if (!_clar.suites_ran) { size_t i; for (i = 0; i < _clar_suite_count; ++i) - clar_run_suite(&_clar_suites[i]); + clar_run_suite(&_clar_suites[i], NULL); } clar_print_shutdown( diff --git a/tests-clar/clone/nonetwork.c b/tests-clar/clone/nonetwork.c index 8aae1fb52..339b1e70d 100644 --- a/tests-clar/clone/nonetwork.c +++ b/tests-clar/clone/nonetwork.c @@ -1,8 +1,9 @@ #include "clar_libgit2.h" #include "git2/clone.h" -#include "repository.h" #include "remote.h" +#include "fileops.h" +#include "repository.h" #define LIVE_REPO_URL "git://github.com/libgit2/TestGitRepository" diff --git a/tests-clar/diff/blob.c b/tests-clar/diff/blob.c index 2ac8dbc51..b12186d98 100644 --- a/tests-clar/diff/blob.c +++ b/tests-clar/diff/blob.c @@ -120,6 +120,93 @@ void test_diff_blob__can_compare_text_blobs(void) git_blob_free(c); } +void test_diff_blob__can_compare_text_blobs_with_patch(void) +{ + git_blob *a, *b, *c; + git_oid a_oid, b_oid, c_oid; + git_diff_patch *p; + size_t tc, ta, td; + + /* tests/resources/attr/root_test1 */ + cl_git_pass(git_oid_fromstrn(&a_oid, "45141a79", 8)); + cl_git_pass(git_blob_lookup_prefix(&a, g_repo, &a_oid, 4)); + + /* tests/resources/attr/root_test2 */ + cl_git_pass(git_oid_fromstrn(&b_oid, "4d713dc4", 8)); + cl_git_pass(git_blob_lookup_prefix(&b, g_repo, &b_oid, 4)); + + /* tests/resources/attr/root_test3 */ + cl_git_pass(git_oid_fromstrn(&c_oid, "c96bbb2c2557a832", 16)); + cl_git_pass(git_blob_lookup_prefix(&c, g_repo, &c_oid, 8)); + + /* Doing the equivalent of a `git diff -U1` on these files */ + + /* diff on tests/resources/attr/root_test1 */ + cl_git_pass(git_diff_patch_from_blobs(&p, a, b, &opts)); + + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_MODIFIED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(1, (int)git_diff_patch_num_hunks(p)); + cl_assert_equal_i(6, git_diff_patch_num_lines_in_hunk(p, 0)); + + cl_git_pass(git_diff_patch_line_stats(&tc, &ta, &td, p)); + cl_assert_equal_i(1, (int)tc); + cl_assert_equal_i(5, (int)ta); + cl_assert_equal_i(0, (int)td); + + git_diff_patch_free(p); + + /* diff on tests/resources/attr/root_test2 */ + cl_git_pass(git_diff_patch_from_blobs(&p, b, c, &opts)); + + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_MODIFIED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(1, (int)git_diff_patch_num_hunks(p)); + cl_assert_equal_i(15, git_diff_patch_num_lines_in_hunk(p, 0)); + + cl_git_pass(git_diff_patch_line_stats(&tc, &ta, &td, p)); + cl_assert_equal_i(3, (int)tc); + cl_assert_equal_i(9, (int)ta); + cl_assert_equal_i(3, (int)td); + + git_diff_patch_free(p); + + /* diff on tests/resources/attr/root_test3 */ + cl_git_pass(git_diff_patch_from_blobs(&p, a, c, &opts)); + + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_MODIFIED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(1, (int)git_diff_patch_num_hunks(p)); + cl_assert_equal_i(13, git_diff_patch_num_lines_in_hunk(p, 0)); + + cl_git_pass(git_diff_patch_line_stats(&tc, &ta, &td, p)); + cl_assert_equal_i(0, (int)tc); + cl_assert_equal_i(12, (int)ta); + cl_assert_equal_i(1, (int)td); + + git_diff_patch_free(p); + + /* one more */ + cl_git_pass(git_diff_patch_from_blobs(&p, c, d, &opts)); + + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_MODIFIED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(2, (int)git_diff_patch_num_hunks(p)); + cl_assert_equal_i(5, git_diff_patch_num_lines_in_hunk(p, 0)); + cl_assert_equal_i(9, git_diff_patch_num_lines_in_hunk(p, 1)); + + cl_git_pass(git_diff_patch_line_stats(&tc, &ta, &td, p)); + cl_assert_equal_i(4, (int)tc); + cl_assert_equal_i(6, (int)ta); + cl_assert_equal_i(4, (int)td); + + git_diff_patch_free(p); + + git_blob_free(a); + git_blob_free(b); + git_blob_free(c); +} + void test_diff_blob__can_compare_against_null_blobs(void) { git_blob *e = NULL; @@ -175,6 +262,66 @@ void test_diff_blob__can_compare_against_null_blobs(void) cl_assert_equal_i(0, expected.lines); } +void test_diff_blob__can_compare_against_null_blobs_with_patch(void) +{ + git_blob *e = NULL; + git_diff_patch *p; + int line; + char origin; + + cl_git_pass(git_diff_patch_from_blobs(&p, d, e, &opts)); + + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_DELETED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(1, (int)git_diff_patch_num_hunks(p)); + cl_assert_equal_i(14, git_diff_patch_num_lines_in_hunk(p, 0)); + + for (line = 0; line < git_diff_patch_num_lines_in_hunk(p, 0); ++line) { + cl_git_pass(git_diff_patch_get_line_in_hunk( + &origin, NULL, NULL, NULL, NULL, p, 0, line)); + cl_assert_equal_i(GIT_DIFF_LINE_DELETION, (int)origin); + } + + git_diff_patch_free(p); + + opts.flags |= GIT_DIFF_REVERSE; + + cl_git_pass(git_diff_patch_from_blobs(&p, d, e, &opts)); + + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_ADDED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(1, (int)git_diff_patch_num_hunks(p)); + cl_assert_equal_i(14, git_diff_patch_num_lines_in_hunk(p, 0)); + + for (line = 0; line < git_diff_patch_num_lines_in_hunk(p, 0); ++line) { + cl_git_pass(git_diff_patch_get_line_in_hunk( + &origin, NULL, NULL, NULL, NULL, p, 0, line)); + cl_assert_equal_i(GIT_DIFF_LINE_ADDITION, (int)origin); + } + + git_diff_patch_free(p); + + opts.flags ^= GIT_DIFF_REVERSE; + + cl_git_pass(git_diff_patch_from_blobs(&p, alien, NULL, &opts)); + + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_DELETED, git_diff_patch_delta(p)->status); + cl_assert((git_diff_patch_delta(p)->flags & GIT_DIFF_FLAG_BINARY) != 0); + cl_assert_equal_i(0, (int)git_diff_patch_num_hunks(p)); + + git_diff_patch_free(p); + + cl_git_pass(git_diff_patch_from_blobs(&p, NULL, alien, &opts)); + + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_ADDED, git_diff_patch_delta(p)->status); + cl_assert((git_diff_patch_delta(p)->flags & GIT_DIFF_FLAG_BINARY) != 0); + cl_assert_equal_i(0, (int)git_diff_patch_num_hunks(p)); + + git_diff_patch_free(p); +} + static void assert_identical_blobs_comparison(diff_expects *expected) { cl_assert_equal_i(1, expected->files); @@ -206,6 +353,29 @@ void test_diff_blob__can_compare_identical_blobs(void) assert_identical_blobs_comparison(&expected); } +void test_diff_blob__can_compare_identical_blobs_with_patch(void) +{ + git_diff_patch *p; + + cl_git_pass(git_diff_patch_from_blobs(&p, d, d, &opts)); + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_UNMODIFIED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(0, (int)git_diff_patch_num_hunks(p)); + git_diff_patch_free(p); + + cl_git_pass(git_diff_patch_from_blobs(&p, NULL, NULL, &opts)); + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_UNMODIFIED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(0, (int)git_diff_patch_num_hunks(p)); + git_diff_patch_free(p); + + cl_git_pass(git_diff_patch_from_blobs(&p, alien, alien, &opts)); + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_UNMODIFIED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(0, (int)git_diff_patch_num_hunks(p)); + git_diff_patch_free(p); +} + static void assert_binary_blobs_comparison(diff_expects *expected) { cl_assert(expected->files_binary > 0); @@ -428,6 +598,74 @@ void test_diff_blob__can_compare_blob_to_buffer(void) git_blob_free(a); } +void test_diff_blob__can_compare_blob_to_buffer_with_patch(void) +{ + git_diff_patch *p; + git_blob *a; + git_oid a_oid; + const char *a_content = "Hello from the root\n"; + const char *b_content = "Hello from the root\n\nSome additional lines\n\nDown here below\n\n"; + size_t tc, ta, td; + + /* tests/resources/attr/root_test1 */ + cl_git_pass(git_oid_fromstrn(&a_oid, "45141a79", 8)); + cl_git_pass(git_blob_lookup_prefix(&a, g_repo, &a_oid, 4)); + + /* diff from blob a to content of b */ + cl_git_pass(git_diff_patch_from_blob_and_buffer( + &p, a, b_content, strlen(b_content), &opts)); + + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_MODIFIED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(1, (int)git_diff_patch_num_hunks(p)); + cl_assert_equal_i(6, git_diff_patch_num_lines_in_hunk(p, 0)); + + cl_git_pass(git_diff_patch_line_stats(&tc, &ta, &td, p)); + cl_assert_equal_i(1, (int)tc); + cl_assert_equal_i(5, (int)ta); + cl_assert_equal_i(0, (int)td); + + git_diff_patch_free(p); + + /* diff from blob a to content of a */ + cl_git_pass(git_diff_patch_from_blob_and_buffer( + &p, a, a_content, strlen(a_content), &opts)); + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_UNMODIFIED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(0, (int)git_diff_patch_num_hunks(p)); + git_diff_patch_free(p); + + /* diff from NULL blob to content of a */ + cl_git_pass(git_diff_patch_from_blob_and_buffer( + &p, NULL, a_content, strlen(a_content), &opts)); + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_ADDED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(1, (int)git_diff_patch_num_hunks(p)); + cl_assert_equal_i(1, git_diff_patch_num_lines_in_hunk(p, 0)); + git_diff_patch_free(p); + + /* diff from blob a to NULL buffer */ + cl_git_pass(git_diff_patch_from_blob_and_buffer( + &p, a, NULL, 0, &opts)); + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_DELETED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(1, (int)git_diff_patch_num_hunks(p)); + cl_assert_equal_i(1, git_diff_patch_num_lines_in_hunk(p, 0)); + git_diff_patch_free(p); + + /* diff with reverse */ + opts.flags ^= GIT_DIFF_REVERSE; + + cl_git_pass(git_diff_patch_from_blob_and_buffer( + &p, a, NULL, 0, &opts)); + cl_assert(p != NULL); + cl_assert_equal_i(GIT_DELTA_ADDED, git_diff_patch_delta(p)->status); + cl_assert_equal_i(1, (int)git_diff_patch_num_hunks(p)); + cl_assert_equal_i(1, git_diff_patch_num_lines_in_hunk(p, 0)); + git_diff_patch_free(p); + + git_blob_free(a); +} static void assert_one_modified_with_lines(diff_expects *expected, int lines) { diff --git a/tests-clar/diff/drivers.c b/tests-clar/diff/drivers.c new file mode 100644 index 000000000..06ab2ff14 --- /dev/null +++ b/tests-clar/diff/drivers.c @@ -0,0 +1,125 @@ +#include "clar_libgit2.h" +#include "diff_helpers.h" +#include "repository.h" +#include "diff_driver.h" + +static git_repository *g_repo = NULL; + +void test_diff_drivers__initialize(void) +{ +} + +void test_diff_drivers__cleanup(void) +{ + cl_git_sandbox_cleanup(); + g_repo = NULL; +} + +void test_diff_drivers__patterns(void) +{ + git_config *cfg; + const char *one_sha = "19dd32dfb1520a64e5bbaae8dce6ef423dfa2f13"; + git_tree *one; + git_diff_list *diff; + git_diff_patch *patch; + char *text; + const char *expected0 = "diff --git a/untimely.txt b/untimely.txt\nindex 9a69d96..57fd0cf 100644\n--- a/untimely.txt\n+++ b/untimely.txt\n@@ -22,3 +22,5 @@ Comes through the blood of the vanguards who\n dreamed--too soon--it had sounded.\r\n \r\n -- Rudyard Kipling\r\n+\r\n+Some new stuff\r\n"; + const char *expected1 = "diff --git a/untimely.txt b/untimely.txt\nindex 9a69d96..57fd0cf 100644\nBinary files a/untimely.txt and b/untimely.txt differ\n"; + const char *expected2 = "diff --git a/untimely.txt b/untimely.txt\nindex 9a69d96..57fd0cf 100644\n--- a/untimely.txt\n+++ b/untimely.txt\n@@ -22,3 +22,5 @@ Heaven delivers on earth the Hour that cannot be\n dreamed--too soon--it had sounded.\r\n \r\n -- Rudyard Kipling\r\n+\r\n+Some new stuff\r\n"; + + g_repo = cl_git_sandbox_init("renames"); + + one = resolve_commit_oid_to_tree(g_repo, one_sha); + + /* no diff */ + + cl_git_pass(git_diff_tree_to_workdir(&diff, g_repo, one, NULL)); + cl_assert_equal_i(0, (int)git_diff_num_deltas(diff)); + git_diff_list_free(diff); + + /* default diff */ + + cl_git_append2file("renames/untimely.txt", "\r\nSome new stuff\r\n"); + + cl_git_pass(git_diff_tree_to_workdir(&diff, g_repo, one, NULL)); + cl_assert_equal_i(1, (int)git_diff_num_deltas(diff)); + + cl_git_pass(git_diff_get_patch(&patch, NULL, diff, 0)); + cl_git_pass(git_diff_patch_to_str(&text, patch)); + cl_assert_equal_s(expected0, text); + + git__free(text); + git_diff_patch_free(patch); + git_diff_list_free(diff); + + /* attribute diff set to false */ + + cl_git_rewritefile("renames/.gitattributes", "untimely.txt -diff\n"); + + cl_git_pass(git_diff_tree_to_workdir(&diff, g_repo, one, NULL)); + cl_assert_equal_i(1, (int)git_diff_num_deltas(diff)); + + cl_git_pass(git_diff_get_patch(&patch, NULL, diff, 0)); + cl_git_pass(git_diff_patch_to_str(&text, patch)); + cl_assert_equal_s(expected1, text); + + git__free(text); + git_diff_patch_free(patch); + git_diff_list_free(diff); + + /* attribute diff set to unconfigured value (should use default) */ + + cl_git_rewritefile("renames/.gitattributes", "untimely.txt diff=kipling0\n"); + + cl_git_pass(git_diff_tree_to_workdir(&diff, g_repo, one, NULL)); + cl_assert_equal_i(1, (int)git_diff_num_deltas(diff)); + + cl_git_pass(git_diff_get_patch(&patch, NULL, diff, 0)); + cl_git_pass(git_diff_patch_to_str(&text, patch)); + cl_assert_equal_s(expected0, text); + + git__free(text); + git_diff_patch_free(patch); + git_diff_list_free(diff); + + /* let's define that driver */ + + cl_git_pass(git_repository_config(&cfg, g_repo)); + cl_git_pass(git_config_set_bool(cfg, "diff.kipling0.binary", 1)); + git_config_free(cfg); + + cl_git_pass(git_diff_tree_to_workdir(&diff, g_repo, one, NULL)); + cl_assert_equal_i(1, (int)git_diff_num_deltas(diff)); + + cl_git_pass(git_diff_get_patch(&patch, NULL, diff, 0)); + cl_git_pass(git_diff_patch_to_str(&text, patch)); + cl_assert_equal_s(expected1, text); + + git__free(text); + git_diff_patch_free(patch); + git_diff_list_free(diff); + + /* let's use a real driver with some regular expressions */ + + git_diff_driver_registry_free(g_repo->diff_drivers); + g_repo->diff_drivers = NULL; + + cl_git_pass(git_repository_config(&cfg, g_repo)); + cl_git_pass(git_config_set_bool(cfg, "diff.kipling0.binary", 0)); + cl_git_pass(git_config_set_string(cfg, "diff.kipling0.xfuncname", "^H")); + git_config_free(cfg); + + cl_git_pass(git_diff_tree_to_workdir(&diff, g_repo, one, NULL)); + cl_assert_equal_i(1, (int)git_diff_num_deltas(diff)); + + cl_git_pass(git_diff_get_patch(&patch, NULL, diff, 0)); + cl_git_pass(git_diff_patch_to_str(&text, patch)); + cl_assert_equal_s(expected2, text); + + git__free(text); + git_diff_patch_free(patch); + git_diff_list_free(diff); + + git_tree_free(one); +} + diff --git a/tests-clar/diff/patch.c b/tests-clar/diff/patch.c index f9e913a74..3f14a0de7 100644 --- a/tests-clar/diff/patch.c +++ b/tests-clar/diff/patch.c @@ -543,7 +543,7 @@ void test_diff_patch__line_counts_with_eofnl(void) "index 378a7d9..3d0154e 100644\n" "--- a/songof7cities.txt\n" "+++ b/songof7cities.txt\n" - "@@ -42,7 +42,7 @@\n" + "@@ -42,7 +42,7 @@ With peoples undefeated of the dark, enduring blood.\n" " \n" " To the sound of trumpets shall their seed restore my Cities\n" " Wealthy and well-weaponed, that once more may I behold\n" diff --git a/tests-clar/diff/rename.c b/tests-clar/diff/rename.c index 8bff96cf2..ca3f50676 100644 --- a/tests-clar/diff/rename.c +++ b/tests-clar/diff/rename.c @@ -558,7 +558,7 @@ void test_diff_rename__patch(void) git_diff_patch *patch; const git_diff_delta *delta; char *text; - const char *expected = "diff --git a/sixserving.txt b/ikeepsix.txt\nindex ad0a8e5..36020db 100644\n--- a/sixserving.txt\n+++ b/ikeepsix.txt\n@@ -1,3 +1,6 @@\n+I Keep Six Honest Serving-Men\n+=============================\n+\n I KEEP six honest serving-men\n (They taught me all I knew);\n Their names are What and Why and When\n@@ -21,4 +24,4 @@\n One million Hows, two million Wheres,\n And seven million Whys!\n \n- -- Rudyard Kipling\n+ -- Rudyard Kipling\n"; + const char *expected = "diff --git a/sixserving.txt b/ikeepsix.txt\nindex ad0a8e5..36020db 100644\n--- a/sixserving.txt\n+++ b/ikeepsix.txt\n@@ -1,3 +1,6 @@\n+I Keep Six Honest Serving-Men\n+=============================\n+\n I KEEP six honest serving-men\n (They taught me all I knew);\n Their names are What and Why and When\n@@ -21,4 +24,4 @@ She sends'em abroad on her own affairs,\n One million Hows, two million Wheres,\n And seven million Whys!\n \n- -- Rudyard Kipling\n+ -- Rudyard Kipling\n"; old_tree = resolve_commit_oid_to_tree(g_repo, sha0); new_tree = resolve_commit_oid_to_tree(g_repo, sha1); diff --git a/tests-clar/diff/submodules.c b/tests-clar/diff/submodules.c index f152af46f..6e52a6319 100644 --- a/tests-clar/diff/submodules.c +++ b/tests-clar/diff/submodules.c @@ -1,5 +1,6 @@ #include "clar_libgit2.h" #include "repository.h" +#include "posix.h" #include "../submodule/submodule_helpers.h" static git_repository *g_repo = NULL; diff --git a/tests-clar/fetchhead/nonetwork.c b/tests-clar/fetchhead/nonetwork.c index ef30679f9..a68ebb0b7 100644 --- a/tests-clar/fetchhead/nonetwork.c +++ b/tests-clar/fetchhead/nonetwork.c @@ -1,6 +1,6 @@ #include "clar_libgit2.h" -#include "repository.h" +#include "fileops.h" #include "fetchhead.h" #include "fetchhead_data.h" diff --git a/tests-clar/merge/merge_helpers.c b/tests-clar/merge/merge_helpers.c index bc31b1f44..e4092787c 100644 --- a/tests-clar/merge/merge_helpers.c +++ b/tests-clar/merge/merge_helpers.c @@ -1,5 +1,5 @@ #include "clar_libgit2.h" -#include "buffer.h" +#include "fileops.h" #include "refs.h" #include "tree.h" #include "merge_helpers.h" diff --git a/tests-clar/odb/alternates.c b/tests-clar/odb/alternates.c index be7bfa9cd..4e876c2b3 100644 --- a/tests-clar/odb/alternates.c +++ b/tests-clar/odb/alternates.c @@ -1,6 +1,6 @@ #include "clar_libgit2.h" #include "odb.h" -#include "repository.h" +#include "filebuf.h" static git_buf destpath, filepath; static const char *paths[] = { diff --git a/tests-clar/online/clone.c b/tests-clar/online/clone.c index aa12e47c9..bc4285a00 100644 --- a/tests-clar/online/clone.c +++ b/tests-clar/online/clone.c @@ -2,8 +2,9 @@ #include "git2/clone.h" #include "git2/cred_helpers.h" -#include "repository.h" #include "remote.h" +#include "fileops.h" +#include "refs.h" #define LIVE_REPO_URL "http://github.com/libgit2/TestGitRepository" #define LIVE_EMPTYREPO_URL "http://github.com/libgit2/TestEmptyRepository" diff --git a/tests-clar/online/fetchhead.c b/tests-clar/online/fetchhead.c index e14ae0926..58717eef8 100644 --- a/tests-clar/online/fetchhead.c +++ b/tests-clar/online/fetchhead.c @@ -1,6 +1,6 @@ #include "clar_libgit2.h" -#include "repository.h" +#include "fileops.h" #include "fetchhead.h" #include "../fetchhead/fetchhead_data.h" #include "git2/clone.h" diff --git a/tests-clar/refs/delete.c b/tests-clar/refs/delete.c index 053f41229..973768aeb 100644 --- a/tests-clar/refs/delete.c +++ b/tests-clar/refs/delete.c @@ -1,7 +1,8 @@ #include "clar_libgit2.h" -#include "repository.h" +#include "fileops.h" #include "git2/reflog.h" +#include "git2/refdb.h" #include "reflog.h" #include "ref_helpers.h" @@ -31,7 +32,7 @@ void test_refs_delete__packed_loose(void) git_buf temp_path = GIT_BUF_INIT; /* Ensure the loose reference exists on the file system */ - cl_git_pass(git_buf_joinpath(&temp_path, g_repo->path_repository, packed_test_head_name)); + cl_git_pass(git_buf_joinpath(&temp_path, git_repository_path(g_repo), packed_test_head_name)); cl_assert(git_path_exists(temp_path.ptr)); /* Lookup the reference */ diff --git a/tests-clar/refs/pack.c b/tests-clar/refs/pack.c index 412c4c5fd..d8d5cc6d0 100644 --- a/tests-clar/refs/pack.c +++ b/tests-clar/refs/pack.c @@ -1,8 +1,10 @@ #include "clar_libgit2.h" -#include "repository.h" +#include "fileops.h" #include "git2/reflog.h" +#include "git2/refdb.h" #include "reflog.h" +#include "refs.h" #include "ref_helpers.h" static const char *loose_tag_ref_name = "refs/tags/e90810b"; @@ -33,7 +35,7 @@ void test_refs_pack__empty(void) // create a packfile for an empty folder git_buf temp_path = GIT_BUF_INIT; - cl_git_pass(git_buf_join_n(&temp_path, '/', 3, g_repo->path_repository, GIT_REFS_HEADS_DIR, "empty_dir")); + cl_git_pass(git_buf_join_n(&temp_path, '/', 3, git_repository_path(g_repo), GIT_REFS_HEADS_DIR, "empty_dir")); cl_git_pass(git_futils_mkdir_r(temp_path.ptr, NULL, GIT_REFS_DIR_MODE)); git_buf_free(&temp_path); @@ -60,7 +62,7 @@ void test_refs_pack__loose(void) packall(); /* Ensure the packed-refs file exists */ - cl_git_pass(git_buf_joinpath(&temp_path, g_repo->path_repository, GIT_PACKEDREFS_FILE)); + cl_git_pass(git_buf_joinpath(&temp_path, git_repository_path(g_repo), GIT_PACKEDREFS_FILE)); cl_assert(git_path_exists(temp_path.ptr)); /* Ensure the known ref can still be looked up but is now packed */ @@ -69,7 +71,7 @@ void test_refs_pack__loose(void) cl_assert_equal_s(reference->name, loose_tag_ref_name); /* Ensure the known ref has been removed from the loose folder structure */ - cl_git_pass(git_buf_joinpath(&temp_path, g_repo->path_repository, loose_tag_ref_name)); + cl_git_pass(git_buf_joinpath(&temp_path, git_repository_path(g_repo), loose_tag_ref_name)); cl_assert(!git_path_exists(temp_path.ptr)); git_reference_free(reference); diff --git a/tests-clar/refs/reflog/reflog.c b/tests-clar/refs/reflog/reflog.c index 1cd0ddd92..095cabf04 100644 --- a/tests-clar/refs/reflog/reflog.c +++ b/tests-clar/refs/reflog/reflog.c @@ -1,6 +1,6 @@ #include "clar_libgit2.h" -#include "repository.h" +#include "fileops.h" #include "git2/reflog.h" #include "reflog.h" diff --git a/tests-clar/refs/rename.c b/tests-clar/refs/rename.c index 5ab84c48e..543bc4d62 100644 --- a/tests-clar/refs/rename.c +++ b/tests-clar/refs/rename.c @@ -1,8 +1,9 @@ #include "clar_libgit2.h" -#include "repository.h" +#include "fileops.h" #include "git2/reflog.h" #include "reflog.h" +#include "refs.h" #include "ref_helpers.h" static const char *loose_tag_ref_name = "refs/tags/e90810b"; @@ -38,7 +39,7 @@ void test_refs_rename__loose(void) const char *new_name = "refs/tags/Nemo/knows/refs.kung-fu"; /* Ensure the ref doesn't exist on the file system */ - cl_git_pass(git_buf_joinpath(&temp_path, g_repo->path_repository, new_name)); + cl_git_pass(git_buf_joinpath(&temp_path, git_repository_path(g_repo), new_name)); cl_assert(!git_path_exists(temp_path.ptr)); /* Retrieval of the reference to rename */ @@ -64,7 +65,7 @@ void test_refs_rename__loose(void) cl_assert(reference_is_packed(new_ref) == 0); /* ...and the ref can be found in the file system */ - cl_git_pass(git_buf_joinpath(&temp_path, g_repo->path_repository, new_name)); + cl_git_pass(git_buf_joinpath(&temp_path, git_repository_path(g_repo), new_name)); cl_assert(git_path_exists(temp_path.ptr)); git_reference_free(new_ref); @@ -80,7 +81,7 @@ void test_refs_rename__packed(void) const char *brand_new_name = "refs/heads/brand_new_name"; /* Ensure the ref doesn't exist on the file system */ - cl_git_pass(git_buf_joinpath(&temp_path, g_repo->path_repository, packed_head_name)); + cl_git_pass(git_buf_joinpath(&temp_path, git_repository_path(g_repo), packed_head_name)); cl_assert(!git_path_exists(temp_path.ptr)); /* The reference can however be looked-up... */ @@ -106,7 +107,7 @@ void test_refs_rename__packed(void) cl_assert(reference_is_packed(new_ref) == 0); /* ...and the ref now happily lives in the file system */ - cl_git_pass(git_buf_joinpath(&temp_path, g_repo->path_repository, brand_new_name)); + cl_git_pass(git_buf_joinpath(&temp_path, git_repository_path(g_repo), brand_new_name)); cl_assert(git_path_exists(temp_path.ptr)); git_reference_free(new_ref); @@ -122,7 +123,7 @@ void test_refs_rename__packed_doesnt_pack_others(void) const char *brand_new_name = "refs/heads/brand_new_name"; /* Ensure the other reference exists on the file system */ - cl_git_pass(git_buf_joinpath(&temp_path, g_repo->path_repository, packed_test_head_name)); + cl_git_pass(git_buf_joinpath(&temp_path, git_repository_path(g_repo), packed_test_head_name)); cl_assert(git_path_exists(temp_path.ptr)); /* Lookup the other reference */ diff --git a/tests-clar/repo/discover.c b/tests-clar/repo/discover.c index 3d9aeedd7..f93ff2462 100644 --- a/tests-clar/repo/discover.c +++ b/tests-clar/repo/discover.c @@ -1,9 +1,9 @@ #include "clar_libgit2.h" #include "odb.h" +#include "fileops.h" #include "repository.h" - #define TEMP_REPO_FOLDER "temprepo/" #define DISCOVER_FOLDER TEMP_REPO_FOLDER "discover.git" diff --git a/tests-clar/status/ignore.c b/tests-clar/status/ignore.c index 2d3898ba4..4f6879cfc 100644 --- a/tests-clar/status/ignore.c +++ b/tests-clar/status/ignore.c @@ -169,7 +169,7 @@ void test_status_ignore__ignore_pattern_ignorecase(void) cl_git_mkfile("empty_standard_repo/A.txt", "Differs in case"); cl_git_pass(git_repository_index(&index, g_repo)); - ignore_case = index->ignore_case; + ignore_case = (git_index_caps(index) & GIT_INDEXCAP_IGNORE_CASE) != 0; git_index_free(index); cl_git_pass(git_status_file(&flags, g_repo, "A.txt")); diff --git a/tests-clar/status/worktree.c b/tests-clar/status/worktree.c index 062a09aeb..13335843b 100644 --- a/tests-clar/status/worktree.c +++ b/tests-clar/status/worktree.c @@ -105,7 +105,7 @@ void test_status_worktree__swap_subdir_and_file(void) bool ignore_case; cl_git_pass(git_repository_index(&index, repo)); - ignore_case = index->ignore_case; + ignore_case = (git_index_caps(index) & GIT_INDEXCAP_IGNORE_CASE) != 0; git_index_free(index); /* first alter the contents of the worktree */