diff --git a/CHANGELOG.md b/CHANGELOG.md index eb7ae842b..1340f78f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -69,6 +69,9 @@ support for HTTPS connections insead of OpenSSL. and `git_diff_buffers` now accept a new binary callback of type `git_diff_binary_cb` that includes the binary diff information. +* The race condition mitigations described in `racy-git.txt` have been + implemented. + ### API additions * The `git_merge_options` gained a `file_flags` member. diff --git a/src/diff.c b/src/diff.c index d7365ef77..cc93f57cd 100644 --- a/src/diff.c +++ b/src/diff.c @@ -816,11 +816,11 @@ static int maybe_modified( } else if (git_oid_iszero(&nitem->id) && new_is_workdir) { bool use_ctime = ((diff->diffcaps & GIT_DIFFCAPS_TRUST_CTIME) != 0); bool use_nanos = ((diff->diffcaps & GIT_DIFFCAPS_TRUST_NANOSECS) != 0); + git_index *index; + git_iterator_index(&index, info->new_iter); status = GIT_DELTA_UNMODIFIED; - /* TODO: add check against index file st_mtime to avoid racy-git */ - if (S_ISGITLINK(nmode)) { if ((error = maybe_modified_submodule(&status, &noid, diff, info)) < 0) return error; @@ -839,7 +839,8 @@ static int maybe_modified( !diff_time_eq(&oitem->ctime, &nitem->ctime, use_nanos)) || oitem->ino != nitem->ino || oitem->uid != nitem->uid || - oitem->gid != nitem->gid) + oitem->gid != nitem->gid || + (index && nitem->mtime.seconds >= index->stamp.mtime)) { status = GIT_DELTA_MODIFIED; modified_uncertain = true; diff --git a/src/index.c b/src/index.c index 1fb3c48f3..5ce5522f8 100644 --- a/src/index.c +++ b/src/index.c @@ -688,20 +688,59 @@ int git_index__changed_relative_to( return !!git_oid_cmp(&index->checksum, checksum); } +static bool is_racy_timestamp(git_time_t stamp, git_index_entry *entry) +{ + /* Git special-cases submodules in the check */ + if (S_ISGITLINK(entry->mode)) + return false; + + /* If we never read the index, we can't have this race either */ + if (stamp == 0) + return false; + + /* If the timestamp is the same or newer than the index, it's racy */ + return ((int32_t) stamp) <= entry->mtime.seconds; +} + /* * Force the next diff to take a look at those entries which have the * same timestamp as the current index. */ -static void truncate_racily_clean(git_index *index) +static int truncate_racily_clean(git_index *index) { size_t i; + int error; git_index_entry *entry; git_time_t ts = index->stamp.mtime; + git_diff_options diff_opts = GIT_DIFF_OPTIONS_INIT; + git_diff *diff; + /* Nothing to do if there's no repo to talk about */ + if (!INDEX_OWNER(index)) + return 0; + + /* If there's no workdir, we can't know where to even check */ + if (!git_repository_workdir(INDEX_OWNER(index))) + return 0; + + diff_opts.flags |= GIT_DIFF_INCLUDE_TYPECHANGE | GIT_DIFF_IGNORE_SUBMODULES | GIT_DIFF_DISABLE_PATHSPEC_MATCH; git_vector_foreach(&index->entries, i, entry) { - if (entry->mtime.seconds == ts || ts == 0) + if (!is_racy_timestamp(ts, entry)) + continue; + + diff_opts.pathspec.count = 1; + diff_opts.pathspec.strings = (char **) &entry->path; + + if ((error = git_diff_index_to_workdir(&diff, INDEX_OWNER(index), index, &diff_opts)) < 0) + return error; + + if (git_diff_num_deltas(diff) > 0) entry->file_size = 0; + + git_diff_free(diff); } + + return 0; } int git_index_write(git_index *index) diff --git a/src/iterator.c b/src/iterator.c index 7807a1636..d5f7eec34 100644 --- a/src/iterator.c +++ b/src/iterator.c @@ -1762,6 +1762,18 @@ int git_iterator_current_workdir_path(git_buf **path, git_iterator *iter) return 0; } +int git_iterator_index(git_index **out, git_iterator *iter) +{ + workdir_iterator *wi = (workdir_iterator *)iter; + + if (iter->type != GIT_ITERATOR_TYPE_WORKDIR) + *out = NULL; + + *out = wi->index; + + return 0; +} + int git_iterator_advance_over_with_status( const git_index_entry **entryptr, git_iterator_status_t *status, diff --git a/src/iterator.h b/src/iterator.h index db1f325a7..57f82416a 100644 --- a/src/iterator.h +++ b/src/iterator.h @@ -11,6 +11,7 @@ #include "git2/index.h" #include "vector.h" #include "buffer.h" +#include "ignore.h" typedef struct git_iterator git_iterator; @@ -286,4 +287,11 @@ typedef enum { extern int git_iterator_advance_over_with_status( const git_index_entry **entry, git_iterator_status_t *status, git_iterator *iter); +/** + * Retrieve the index stored in the iterator. + * + * Only implemented for the workdir iterator + */ +extern int git_iterator_index(git_index **out, git_iterator *iter); + #endif diff --git a/tests/diff/racy.c b/tests/diff/racy.c deleted file mode 100644 index a109f8c3b..000000000 --- a/tests/diff/racy.c +++ /dev/null @@ -1,39 +0,0 @@ -#include "clar_libgit2.h" - -#include "buffer.h" - -static git_repository *g_repo; - -void test_diff_racy__initialize(void) -{ - cl_git_pass(git_repository_init(&g_repo, "diff_racy", false)); -} - -void test_diff_racy__cleanup(void) -{ - cl_git_sandbox_cleanup(); -} - -void test_diff_racy__diff(void) -{ - git_index *index; - git_diff *diff; - git_buf path = GIT_BUF_INIT; - - cl_git_pass(git_buf_joinpath(&path, git_repository_workdir(g_repo), "A")); - cl_git_mkfile(path.ptr, "A"); - - /* Put 'A' into the index */ - cl_git_pass(git_repository_index(&index, g_repo)); - cl_git_pass(git_index_add_bypath(index, "A")); - cl_git_pass(git_index_write(index)); - - cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL)); - cl_assert_equal_i(0, git_diff_num_deltas(diff)); - - /* Change its contents quickly, so we get the same timestamp */ - cl_git_mkfile(path.ptr, "B"); - - cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL)); - cl_assert_equal_i(1, git_diff_num_deltas(diff)); -} diff --git a/tests/diff/workdir.c b/tests/diff/workdir.c index ecc556ce2..13de6a98b 100644 --- a/tests/diff/workdir.c +++ b/tests/diff/workdir.c @@ -1623,6 +1623,8 @@ void test_diff_workdir__can_update_index(void) /* now if we do it again, we should see fewer OID calculations */ + /* tick again as the index updating from the previous diff might have reset the timestamp */ + tick_index(index); basic_diff_status(&diff, &opts); cl_git_pass(git_diff_get_perfdata(&perf, diff)); diff --git a/tests/index/racy.c b/tests/index/racy.c new file mode 100644 index 000000000..3a4bc439e --- /dev/null +++ b/tests/index/racy.c @@ -0,0 +1,143 @@ +#include "clar_libgit2.h" +#include "../checkout/checkout_helpers.h" + +#include "buffer.h" +#include "index.h" + +static git_repository *g_repo; + +void test_index_racy__initialize(void) +{ + cl_git_pass(git_repository_init(&g_repo, "diff_racy", false)); +} + +void test_index_racy__cleanup(void) +{ + git_repository_free(g_repo); + g_repo = NULL; + + cl_fixture_cleanup("diff_racy"); +} + +void test_index_racy__diff(void) +{ + git_index *index; + git_diff *diff; + git_buf path = GIT_BUF_INIT; + + cl_git_pass(git_buf_joinpath(&path, git_repository_workdir(g_repo), "A")); + cl_git_mkfile(path.ptr, "A"); + + /* Put 'A' into the index */ + cl_git_pass(git_repository_index(&index, g_repo)); + cl_git_pass(git_index_add_bypath(index, "A")); + cl_git_pass(git_index_write(index)); + + cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL)); + cl_assert_equal_i(0, git_diff_num_deltas(diff)); + git_diff_free(diff); + + /* Change its contents quickly, so we get the same timestamp */ + cl_git_mkfile(path.ptr, "B"); + + cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL)); + cl_assert_equal_i(1, git_diff_num_deltas(diff)); + + git_index_free(index); + git_diff_free(diff); + git_buf_free(&path); +} + +void test_index_racy__write_index_just_after_file(void) +{ + git_index *index; + git_diff *diff; + git_buf path = GIT_BUF_INIT; + struct timeval times[2]; + + /* Make sure we do have a timestamp */ + cl_git_pass(git_repository_index(&index, g_repo)); + cl_git_pass(git_index_write(index)); + + cl_git_pass(git_buf_joinpath(&path, git_repository_workdir(g_repo), "A")); + cl_git_mkfile(path.ptr, "A"); + /* Force the file's timestamp to be a second after we wrote the index */ + times[0].tv_sec = index->stamp.mtime + 1; + times[0].tv_usec = 0; + times[1].tv_sec = index->stamp.mtime + 1; + times[1].tv_usec = 0; + cl_git_pass(p_utimes(path.ptr, times)); + + /* + * Put 'A' into the index, the size field will be filled, + * because the index' on-disk timestamp does not match the + * file's timestamp. + */ + cl_git_pass(git_index_add_bypath(index, "A")); + cl_git_pass(git_index_write(index)); + + cl_git_mkfile(path.ptr, "B"); + /* + * Pretend this index' modification happend a second after the + * file update, and rewrite the file in that same second. + */ + times[0].tv_sec = index->stamp.mtime + 2; + times[0].tv_usec = 0; + times[1].tv_sec = index->stamp.mtime + 2; + times[0].tv_usec = 0; + + cl_git_pass(p_utimes(git_index_path(index), times)); + cl_git_pass(p_utimes(path.ptr, times)); + + cl_git_pass(git_index_read(index, true)); + + cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL)); + cl_assert_equal_i(1, git_diff_num_deltas(diff)); + + git_buf_free(&path); + git_diff_free(diff); + git_index_free(index); +} + +void test_index_racy__empty_file_after_smudge(void) +{ + git_index *index; + git_diff *diff; + git_buf path = GIT_BUF_INIT; + int i, found_race = 0; + const git_index_entry *entry; + + /* Make sure we do have a timestamp */ + cl_git_pass(git_repository_index(&index, g_repo)); + cl_git_pass(git_index_write(index)); + + cl_git_pass(git_buf_joinpath(&path, git_repository_workdir(g_repo), "A")); + + /* Make sure writing the file, adding and rewriting happen in the same second */ + for (i = 0; i < 10; i++) { + struct stat st; + cl_git_mkfile(path.ptr, "A"); + + cl_git_pass(git_index_add_bypath(index, "A")); + cl_git_mkfile(path.ptr, "B"); + cl_git_pass(git_index_write(index)); + + cl_git_mkfile(path.ptr, ""); + + cl_git_pass(p_stat(path.ptr, &st)); + cl_assert(entry = git_index_get_bypath(index, "A", 0)); + if (entry->mtime.seconds == (int32_t) st.st_mtime) { + found_race = 1; + break; + } + + } + + if (!found_race) + cl_fail("failed to find race after 10 attempts"); + + cl_assert_equal_i(0, entry->file_size); + + cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL)); + cl_assert_equal_i(1, git_diff_num_deltas(diff)); +} diff --git a/tests/merge/workdir/dirty.c b/tests/merge/workdir/dirty.c index 30c404b70..4bf984c23 100644 --- a/tests/merge/workdir/dirty.c +++ b/tests/merge/workdir/dirty.c @@ -133,12 +133,25 @@ static void hack_index(char *files[]) struct stat statbuf; git_buf path = GIT_BUF_INIT; git_index_entry *entry; + struct timeval times[2]; + time_t now; size_t i; /* Update the index to suggest that checkout placed these files on * disk, keeping the object id but updating the cache, which will * emulate a Git implementation's different filter. + * + * We set the file's timestamp to before now to pretend that + * it was an old checkout so we don't trigger the racy + * protections would would check the content. */ + + now = time(NULL); + times[0].tv_sec = now - 5; + times[0].tv_usec = 0; + times[1].tv_sec = now - 5; + times[1].tv_usec = 0; + for (i = 0, filename = files[i]; filename; filename = files[++i]) { git_buf_clear(&path); @@ -146,6 +159,7 @@ static void hack_index(char *files[]) git_index_get_bypath(repo_index, filename, 0)); cl_git_pass(git_buf_printf(&path, "%s/%s", TEST_REPO_PATH, filename)); + cl_git_pass(p_utimes(path.ptr, times)); cl_git_pass(p_stat(path.ptr, &statbuf)); entry->ctime.seconds = (git_time_t)statbuf.st_ctime; @@ -245,7 +259,6 @@ static int merge_differently_filtered_files(char *files[]) write_files(files); hack_index(files); - repo_index->stamp.mtime = time(NULL) + 1; cl_git_pass(git_index_write(repo_index)); error = merge_branch(); diff --git a/tests/status/worktree.c b/tests/status/worktree.c index 56f98a882..75c7b71b0 100644 --- a/tests/status/worktree.c +++ b/tests/status/worktree.c @@ -985,6 +985,8 @@ void test_status_worktree__update_stat_cache_0(void) opts.flags &= ~GIT_STATUS_OPT_UPDATE_INDEX; + /* tick again as the index updating from the previous diff might have reset the timestamp */ + tick_index(index); cl_git_pass(git_status_list_new(&status, repo, &opts)); check_status0(status); cl_git_pass(git_status_list_get_perfdata(&perf, status));