libgit2/src/diff_file.c
Russell Belfer 74ded02457 Add "as_path" parameters to blob and buffer diffs
This adds parameters to the four functions that allow for blob-to-
blob and blob-to-buffer differencing (either via callbacks or by
making a git_diff_patch object).  These parameters let you say
that filename we should pretend the blob has while doing the diff.
If you pass NULL, there should be no change from the existing
behavior, which is to skip using attributes for file type checks
and just look at content.  With the parameters, you can plug into
the new diff driver functionality and get binary or non-binary
behavior, plus function context regular expressions, etc.

This commit also fixes things so that the git_diff_delta that is
generated by these functions will actually be populated with the
data that we know about the blobs (or buffers) so you can use it
appropriately.  It also fixes a bug in generating patches from
the git_diff_patch objects created via these functions.

Lastly, there is one other behavior change that may matter.  If
there is no difference between the two blobs, these functions no
longer generate any diff callbacks / patches unless you have
passed in GIT_DIFF_INCLUDE_UNMODIFIED.  This is pretty natural,
but could potentially change the behavior of existing usage.
2013-06-17 17:03:34 -07:00

448 lines
11 KiB
C

/*
* Copyright (C) the libgit2 contributors. All rights reserved.
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#include "common.h"
#include "git2/blob.h"
#include "git2/submodule.h"
#include "diff.h"
#include "diff_file.h"
#include "odb.h"
#include "fileops.h"
#include "filter.h"
#define DIFF_MAX_FILESIZE 0x20000000
static bool diff_file_content_binary_by_size(git_diff_file_content *fc)
{
/* if we have diff opts, check max_size vs file size */
if ((fc->file->flags & DIFF_FLAGS_KNOWN_BINARY) == 0 &&
fc->opts_max_size > 0 &&
fc->file->size > fc->opts_max_size)
fc->file->flags |= GIT_DIFF_FLAG_BINARY;
return ((fc->file->flags & GIT_DIFF_FLAG_BINARY) != 0);
}
static void diff_file_content_binary_by_content(git_diff_file_content *fc)
{
if ((fc->file->flags & DIFF_FLAGS_KNOWN_BINARY) != 0)
return;
switch (git_diff_driver_content_is_binary(
fc->driver, fc->map.data, fc->map.len)) {
case 0: fc->file->flags |= GIT_DIFF_FLAG_NOT_BINARY; break;
case 1: fc->file->flags |= GIT_DIFF_FLAG_BINARY; break;
default: break;
}
}
static int diff_file_content_init_common(
git_diff_file_content *fc, const git_diff_options *opts)
{
fc->opts_flags = opts ? opts->flags : GIT_DIFF_NORMAL;
if (opts && opts->max_size >= 0)
fc->opts_max_size = opts->max_size ?
opts->max_size : DIFF_MAX_FILESIZE;
if (fc->src == GIT_ITERATOR_TYPE_EMPTY)
fc->src = GIT_ITERATOR_TYPE_TREE;
if (!fc->driver &&
git_diff_driver_lookup(&fc->driver, fc->repo, fc->file->path) < 0)
return -1;
/* give driver a chance to modify options */
git_diff_driver_update_options(&fc->opts_flags, fc->driver);
/* make sure file is conceivable mmap-able */
if ((git_off_t)((size_t)fc->file->size) != fc->file->size)
fc->file->flags |= GIT_DIFF_FLAG_BINARY;
/* check if user is forcing text diff the file */
else if (fc->opts_flags & GIT_DIFF_FORCE_TEXT) {
fc->file->flags &= ~GIT_DIFF_FLAG_BINARY;
fc->file->flags |= GIT_DIFF_FLAG_NOT_BINARY;
}
/* check if user is forcing binary diff the file */
else if (fc->opts_flags & GIT_DIFF_FORCE_BINARY) {
fc->file->flags &= ~GIT_DIFF_FLAG_NOT_BINARY;
fc->file->flags |= GIT_DIFF_FLAG_BINARY;
}
diff_file_content_binary_by_size(fc);
if ((fc->flags & GIT_DIFF_FLAG__NO_DATA) != 0) {
fc->flags |= GIT_DIFF_FLAG__LOADED;
fc->map.len = 0;
fc->map.data = "";
}
if ((fc->flags & GIT_DIFF_FLAG__LOADED) != 0)
diff_file_content_binary_by_content(fc);
return 0;
}
int git_diff_file_content__init_from_diff(
git_diff_file_content *fc,
git_diff_list *diff,
size_t delta_index,
bool use_old)
{
git_diff_delta *delta = git_vector_get(&diff->deltas, delta_index);
bool has_data = true;
memset(fc, 0, sizeof(*fc));
fc->repo = diff->repo;
fc->file = use_old ? &delta->old_file : &delta->new_file;
fc->src = use_old ? diff->old_src : diff->new_src;
if (git_diff_driver_lookup(&fc->driver, fc->repo, fc->file->path) < 0)
return -1;
switch (delta->status) {
case GIT_DELTA_ADDED:
has_data = !use_old; break;
case GIT_DELTA_DELETED:
has_data = use_old; break;
case GIT_DELTA_UNTRACKED:
has_data = !use_old &&
(diff->opts.flags & GIT_DIFF_INCLUDE_UNTRACKED_CONTENT) != 0;
break;
case GIT_DELTA_MODIFIED:
case GIT_DELTA_COPIED:
case GIT_DELTA_RENAMED:
break;
default:
has_data = false;
break;
}
if (!has_data)
fc->flags |= GIT_DIFF_FLAG__NO_DATA;
return diff_file_content_init_common(fc, &diff->opts);
}
int git_diff_file_content__init_from_blob(
git_diff_file_content *fc,
git_repository *repo,
const git_diff_options *opts,
const git_blob *blob,
git_diff_file *as_file)
{
memset(fc, 0, sizeof(*fc));
fc->repo = repo;
fc->file = as_file;
fc->blob = blob;
if (!blob) {
fc->flags |= GIT_DIFF_FLAG__NO_DATA;
} else {
fc->flags |= GIT_DIFF_FLAG__LOADED;
fc->file->flags |= GIT_DIFF_FLAG_VALID_OID;
fc->file->size = git_blob_rawsize(blob);
fc->file->mode = GIT_FILEMODE_BLOB;
git_oid_cpy(&fc->file->oid, git_blob_id(blob));
fc->map.len = (size_t)fc->file->size;
fc->map.data = (char *)git_blob_rawcontent(blob);
}
return diff_file_content_init_common(fc, opts);
}
int git_diff_file_content__init_from_raw(
git_diff_file_content *fc,
git_repository *repo,
const git_diff_options *opts,
const char *buf,
size_t buflen,
git_diff_file *as_file)
{
memset(fc, 0, sizeof(*fc));
fc->repo = repo;
fc->file = as_file;
if (!buf) {
fc->flags |= GIT_DIFF_FLAG__NO_DATA;
} else {
fc->flags |= GIT_DIFF_FLAG__LOADED;
fc->file->flags |= GIT_DIFF_FLAG_VALID_OID;
fc->file->size = buflen;
fc->file->mode = GIT_FILEMODE_BLOB;
git_odb_hash(&fc->file->oid, buf, buflen, GIT_OBJ_BLOB);
fc->map.len = buflen;
fc->map.data = (char *)buf;
}
return diff_file_content_init_common(fc, opts);
}
static int diff_file_content_commit_to_str(
git_diff_file_content *fc, bool check_status)
{
char oid[GIT_OID_HEXSZ+1];
git_buf content = GIT_BUF_INIT;
const char *status = "";
if (check_status) {
int error = 0;
git_submodule *sm = NULL;
unsigned int sm_status = 0;
const git_oid *sm_head;
if ((error = git_submodule_lookup(&sm, fc->repo, fc->file->path)) < 0 ||
(error = git_submodule_status(&sm_status, sm)) < 0) {
/* GIT_EEXISTS means a "submodule" that has not been git added */
if (error == GIT_EEXISTS)
error = 0;
return error;
}
/* update OID if we didn't have it previously */
if ((fc->file->flags & GIT_DIFF_FLAG_VALID_OID) == 0 &&
((sm_head = git_submodule_wd_id(sm)) != NULL ||
(sm_head = git_submodule_head_id(sm)) != NULL))
{
git_oid_cpy(&fc->file->oid, sm_head);
fc->file->flags |= GIT_DIFF_FLAG_VALID_OID;
}
if (GIT_SUBMODULE_STATUS_IS_WD_DIRTY(sm_status))
status = "-dirty";
}
git_oid_tostr(oid, sizeof(oid), &fc->file->oid);
if (git_buf_printf(&content, "Subproject commit %s%s\n", oid, status) < 0)
return -1;
fc->map.len = git_buf_len(&content);
fc->map.data = git_buf_detach(&content);
fc->flags |= GIT_DIFF_FLAG__FREE_DATA;
return 0;
}
static int diff_file_content_load_blob(git_diff_file_content *fc)
{
int error = 0;
git_odb_object *odb_obj = NULL;
if (git_oid_iszero(&fc->file->oid))
return 0;
if (fc->file->mode == GIT_FILEMODE_COMMIT)
return diff_file_content_commit_to_str(fc, false);
/* if we don't know size, try to peek at object header first */
if (!fc->file->size) {
git_odb *odb;
size_t len;
git_otype type;
if (!(error = git_repository_odb__weakptr(&odb, fc->repo))) {
error = git_odb__read_header_or_object(
&odb_obj, &len, &type, odb, &fc->file->oid);
git_odb_free(odb);
}
if (error)
return error;
fc->file->size = len;
}
if (diff_file_content_binary_by_size(fc))
return 0;
if (odb_obj != NULL) {
error = git_object__from_odb_object(
(git_object **)&fc->blob, fc->repo, odb_obj, GIT_OBJ_BLOB);
git_odb_object_free(odb_obj);
} else {
error = git_blob_lookup(
(git_blob **)&fc->blob, fc->repo, &fc->file->oid);
}
if (!error) {
fc->flags |= GIT_DIFF_FLAG__FREE_BLOB;
fc->map.data = (void *)git_blob_rawcontent(fc->blob);
fc->map.len = (size_t)git_blob_rawsize(fc->blob);
}
return error;
}
static int diff_file_content_load_workdir_symlink(
git_diff_file_content *fc, git_buf *path)
{
ssize_t alloc_len, read_len;
/* link path on disk could be UTF-16, so prepare a buffer that is
* big enough to handle some UTF-8 data expansion
*/
alloc_len = (ssize_t)(fc->file->size * 2) + 1;
fc->map.data = git__calloc(alloc_len, sizeof(char));
GITERR_CHECK_ALLOC(fc->map.data);
fc->flags |= GIT_DIFF_FLAG__FREE_DATA;
read_len = p_readlink(git_buf_cstr(path), fc->map.data, alloc_len);
if (read_len < 0) {
giterr_set(GITERR_OS, "Failed to read symlink '%s'", fc->file->path);
return -1;
}
fc->map.len = read_len;
return 0;
}
static int diff_file_content_load_workdir_file(
git_diff_file_content *fc, git_buf *path)
{
int error = 0;
git_vector filters = GIT_VECTOR_INIT;
git_buf raw = GIT_BUF_INIT, filtered = GIT_BUF_INIT;
git_file fd = git_futils_open_ro(git_buf_cstr(path));
if (fd < 0)
return fd;
if (!fc->file->size &&
!(fc->file->size = git_futils_filesize(fd)))
goto cleanup;
if (diff_file_content_binary_by_size(fc))
goto cleanup;
if ((error = git_filters_load(
&filters, fc->repo, fc->file->path, GIT_FILTER_TO_ODB)) < 0)
goto cleanup;
/* error >= is a filter count */
if (error == 0) {
if (!(error = git_futils_mmap_ro(
&fc->map, fd, 0, (size_t)fc->file->size)))
fc->flags |= GIT_DIFF_FLAG__UNMAP_DATA;
else /* fall through to try readbuffer below */
giterr_clear();
}
if (error != 0) {
error = git_futils_readbuffer_fd(&raw, fd, (size_t)fc->file->size);
if (error < 0)
goto cleanup;
if (!filters.length)
git_buf_swap(&filtered, &raw);
else
error = git_filters_apply(&filtered, &raw, &filters);
if (!error) {
fc->map.len = git_buf_len(&filtered);
fc->map.data = git_buf_detach(&filtered);
fc->flags |= GIT_DIFF_FLAG__FREE_DATA;
}
git_buf_free(&raw);
git_buf_free(&filtered);
}
cleanup:
git_filters_free(&filters);
p_close(fd);
return error;
}
static int diff_file_content_load_workdir(git_diff_file_content *fc)
{
int error = 0;
git_buf path = GIT_BUF_INIT;
if (fc->file->mode == GIT_FILEMODE_COMMIT)
return diff_file_content_commit_to_str(fc, true);
if (fc->file->mode == GIT_FILEMODE_TREE)
return 0;
if (git_buf_joinpath(
&path, git_repository_workdir(fc->repo), fc->file->path) < 0)
return -1;
if (S_ISLNK(fc->file->mode))
error = diff_file_content_load_workdir_symlink(fc, &path);
else
error = diff_file_content_load_workdir_file(fc, &path);
/* once data is loaded, update OID if we didn't have it previously */
if (!error && (fc->file->flags & GIT_DIFF_FLAG_VALID_OID) == 0) {
error = git_odb_hash(
&fc->file->oid, fc->map.data, fc->map.len, GIT_OBJ_BLOB);
fc->file->flags |= GIT_DIFF_FLAG_VALID_OID;
}
git_buf_free(&path);
return error;
}
int git_diff_file_content__load(git_diff_file_content *fc)
{
int error = 0;
if ((fc->flags & GIT_DIFF_FLAG__LOADED) != 0)
return 0;
if ((fc->file->flags & GIT_DIFF_FLAG_BINARY) != 0)
return 0;
if (fc->src == GIT_ITERATOR_TYPE_WORKDIR)
error = diff_file_content_load_workdir(fc);
else
error = diff_file_content_load_blob(fc);
if (error)
return error;
fc->flags |= GIT_DIFF_FLAG__LOADED;
diff_file_content_binary_by_content(fc);
return 0;
}
void git_diff_file_content__unload(git_diff_file_content *fc)
{
if (fc->flags & GIT_DIFF_FLAG__FREE_DATA) {
git__free(fc->map.data);
fc->map.data = "";
fc->map.len = 0;
fc->flags &= ~GIT_DIFF_FLAG__FREE_DATA;
}
else if (fc->flags & GIT_DIFF_FLAG__UNMAP_DATA) {
git_futils_mmap_free(&fc->map);
fc->map.data = "";
fc->map.len = 0;
fc->flags &= ~GIT_DIFF_FLAG__UNMAP_DATA;
}
if (fc->flags & GIT_DIFF_FLAG__FREE_BLOB) {
git_blob_free((git_blob *)fc->blob);
fc->blob = NULL;
fc->flags &= ~GIT_DIFF_FLAG__FREE_BLOB;
}
fc->flags &= ~GIT_DIFF_FLAG__LOADED;
}
void git_diff_file_content__clear(git_diff_file_content *fc)
{
git_diff_file_content__unload(fc);
/* for now, nothing else to do */
}