From ff47537557f0ac1919e77c5cb21f36f2e98425de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Wed, 17 Jun 2015 14:34:10 +0200 Subject: [PATCH 1/8] diff: check files with the same or newer timestamps When a file on the workdir has the same or a newer timestamp than the index, we need to perform a full check of the contents, as the update of the file may have happened just after we wrote the index. The iterator changes are such that we can reach inside the workdir iterator from the diff, though it may be better to have an accessor instead of moving these structs into the header. --- src/diff.c | 7 ++++--- src/iterator.c | 12 ++++++++++++ src/iterator.h | 8 ++++++++ tests/diff/racy.c | 33 +++++++++++++++++++++++++++++++++ tests/diff/workdir.c | 2 ++ tests/status/worktree.c | 2 ++ 6 files changed, 61 insertions(+), 3 deletions(-) diff --git a/src/diff.c b/src/diff.c index d7365ef77..cc93f57cd 100644 --- a/src/diff.c +++ b/src/diff.c @@ -816,11 +816,11 @@ static int maybe_modified( } else if (git_oid_iszero(&nitem->id) && new_is_workdir) { bool use_ctime = ((diff->diffcaps & GIT_DIFFCAPS_TRUST_CTIME) != 0); bool use_nanos = ((diff->diffcaps & GIT_DIFFCAPS_TRUST_NANOSECS) != 0); + git_index *index; + git_iterator_index(&index, info->new_iter); status = GIT_DELTA_UNMODIFIED; - /* TODO: add check against index file st_mtime to avoid racy-git */ - if (S_ISGITLINK(nmode)) { if ((error = maybe_modified_submodule(&status, &noid, diff, info)) < 0) return error; @@ -839,7 +839,8 @@ static int maybe_modified( !diff_time_eq(&oitem->ctime, &nitem->ctime, use_nanos)) || oitem->ino != nitem->ino || oitem->uid != nitem->uid || - oitem->gid != nitem->gid) + oitem->gid != nitem->gid || + (index && nitem->mtime.seconds >= index->stamp.mtime)) { status = GIT_DELTA_MODIFIED; modified_uncertain = true; diff --git a/src/iterator.c b/src/iterator.c index 7807a1636..d5f7eec34 100644 --- a/src/iterator.c +++ b/src/iterator.c @@ -1762,6 +1762,18 @@ int git_iterator_current_workdir_path(git_buf **path, git_iterator *iter) return 0; } +int git_iterator_index(git_index **out, git_iterator *iter) +{ + workdir_iterator *wi = (workdir_iterator *)iter; + + if (iter->type != GIT_ITERATOR_TYPE_WORKDIR) + *out = NULL; + + *out = wi->index; + + return 0; +} + int git_iterator_advance_over_with_status( const git_index_entry **entryptr, git_iterator_status_t *status, diff --git a/src/iterator.h b/src/iterator.h index db1f325a7..57f82416a 100644 --- a/src/iterator.h +++ b/src/iterator.h @@ -11,6 +11,7 @@ #include "git2/index.h" #include "vector.h" #include "buffer.h" +#include "ignore.h" typedef struct git_iterator git_iterator; @@ -286,4 +287,11 @@ typedef enum { extern int git_iterator_advance_over_with_status( const git_index_entry **entry, git_iterator_status_t *status, git_iterator *iter); +/** + * Retrieve the index stored in the iterator. + * + * Only implemented for the workdir iterator + */ +extern int git_iterator_index(git_index **out, git_iterator *iter); + #endif diff --git a/tests/diff/racy.c b/tests/diff/racy.c index a109f8c3b..66630cc4d 100644 --- a/tests/diff/racy.c +++ b/tests/diff/racy.c @@ -1,4 +1,5 @@ #include "clar_libgit2.h" +#include "../checkout/checkout_helpers.h" #include "buffer.h" @@ -37,3 +38,35 @@ void test_diff_racy__diff(void) cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL)); cl_assert_equal_i(1, git_diff_num_deltas(diff)); } + +void test_diff_racy__write_index_just_after_file(void) +{ + git_index *index; + git_diff *diff; + git_buf path = GIT_BUF_INIT; + + /* Make sure we do have a timestamp */ + cl_git_pass(git_repository_index(&index, g_repo)); + cl_git_pass(git_index_write(index)); + /* The timestamp will be one second before we change the file */ + sleep(1); + + cl_git_pass(git_buf_joinpath(&path, git_repository_workdir(g_repo), "A")); + cl_git_mkfile(path.ptr, "A"); + + /* + * Put 'A' into the index, the size field will be filled, + * because the index' on-disk timestamp does not match the + * file's timestamp. Both timestamps will however match after + * writing out the index. + */ + cl_git_pass(git_repository_index(&index, g_repo)); + cl_git_pass(git_index_add_bypath(index, "A")); + cl_git_pass(git_index_write(index)); + + /* Change its contents quickly, so we get the same timestamp */ + cl_git_mkfile(path.ptr, "B"); + + cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL)); + cl_assert_equal_i(1, git_diff_num_deltas(diff)); +} diff --git a/tests/diff/workdir.c b/tests/diff/workdir.c index 6b72f3286..3ca82e733 100644 --- a/tests/diff/workdir.c +++ b/tests/diff/workdir.c @@ -1623,6 +1623,8 @@ void test_diff_workdir__can_update_index(void) /* now if we do it again, we should see fewer OID calculations */ + /* tick again as the index updating from the previous diff might have reset the timestamp */ + tick_index(index); basic_diff_status(&diff, &opts); cl_git_pass(git_diff_get_perfdata(&perf, diff)); diff --git a/tests/status/worktree.c b/tests/status/worktree.c index 56f98a882..75c7b71b0 100644 --- a/tests/status/worktree.c +++ b/tests/status/worktree.c @@ -985,6 +985,8 @@ void test_status_worktree__update_stat_cache_0(void) opts.flags &= ~GIT_STATUS_OPT_UPDATE_INDEX; + /* tick again as the index updating from the previous diff might have reset the timestamp */ + tick_index(index); cl_git_pass(git_status_list_new(&status, repo, &opts)); check_status0(status); cl_git_pass(git_status_list_get_perfdata(&perf, status)); From 74975846510794a874d9c8630f44aa14991901fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Thu, 18 Jun 2015 14:22:10 +0200 Subject: [PATCH 2/8] index: check racily clean entries more thoroughly When an entry has a racy timestamp, we need to check whether the file itself has changed since we put its entry in the index. Only then do we smudge the size field to force a check the next time around. --- src/index.c | 43 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/src/index.c b/src/index.c index 1fb3c48f3..5ce5522f8 100644 --- a/src/index.c +++ b/src/index.c @@ -688,20 +688,59 @@ int git_index__changed_relative_to( return !!git_oid_cmp(&index->checksum, checksum); } +static bool is_racy_timestamp(git_time_t stamp, git_index_entry *entry) +{ + /* Git special-cases submodules in the check */ + if (S_ISGITLINK(entry->mode)) + return false; + + /* If we never read the index, we can't have this race either */ + if (stamp == 0) + return false; + + /* If the timestamp is the same or newer than the index, it's racy */ + return ((int32_t) stamp) <= entry->mtime.seconds; +} + /* * Force the next diff to take a look at those entries which have the * same timestamp as the current index. */ -static void truncate_racily_clean(git_index *index) +static int truncate_racily_clean(git_index *index) { size_t i; + int error; git_index_entry *entry; git_time_t ts = index->stamp.mtime; + git_diff_options diff_opts = GIT_DIFF_OPTIONS_INIT; + git_diff *diff; + /* Nothing to do if there's no repo to talk about */ + if (!INDEX_OWNER(index)) + return 0; + + /* If there's no workdir, we can't know where to even check */ + if (!git_repository_workdir(INDEX_OWNER(index))) + return 0; + + diff_opts.flags |= GIT_DIFF_INCLUDE_TYPECHANGE | GIT_DIFF_IGNORE_SUBMODULES | GIT_DIFF_DISABLE_PATHSPEC_MATCH; git_vector_foreach(&index->entries, i, entry) { - if (entry->mtime.seconds == ts || ts == 0) + if (!is_racy_timestamp(ts, entry)) + continue; + + diff_opts.pathspec.count = 1; + diff_opts.pathspec.strings = (char **) &entry->path; + + if ((error = git_diff_index_to_workdir(&diff, INDEX_OWNER(index), index, &diff_opts)) < 0) + return error; + + if (git_diff_num_deltas(diff) > 0) entry->file_size = 0; + + git_diff_free(diff); } + + return 0; } int git_index_write(git_index *index) From 6c5eaead49aab97f40305357814f6df720cdd41c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Sat, 20 Jun 2015 12:36:58 +0200 Subject: [PATCH 3/8] tests: plug leaks in the racy test --- tests/diff/racy.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/diff/racy.c b/tests/diff/racy.c index 66630cc4d..072144099 100644 --- a/tests/diff/racy.c +++ b/tests/diff/racy.c @@ -12,7 +12,10 @@ void test_diff_racy__initialize(void) void test_diff_racy__cleanup(void) { - cl_git_sandbox_cleanup(); + git_repository_free(g_repo); + g_repo = NULL; + + cl_fixture_cleanup("diff_racy"); } void test_diff_racy__diff(void) @@ -31,12 +34,17 @@ void test_diff_racy__diff(void) cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL)); cl_assert_equal_i(0, git_diff_num_deltas(diff)); + git_diff_free(diff); /* Change its contents quickly, so we get the same timestamp */ cl_git_mkfile(path.ptr, "B"); cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL)); cl_assert_equal_i(1, git_diff_num_deltas(diff)); + + git_index_free(index); + git_diff_free(diff); + git_buf_free(&path); } void test_diff_racy__write_index_just_after_file(void) From 26432a9c62957957bfca96634de6b1e77052c1c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Sat, 20 Jun 2015 12:37:32 +0200 Subject: [PATCH 4/8] tests: set racy times manually --- tests/diff/racy.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/tests/diff/racy.c b/tests/diff/racy.c index 072144099..a516e26c0 100644 --- a/tests/diff/racy.c +++ b/tests/diff/racy.c @@ -2,6 +2,7 @@ #include "../checkout/checkout_helpers.h" #include "buffer.h" +#include "index.h" static git_repository *g_repo; @@ -52,29 +53,48 @@ void test_diff_racy__write_index_just_after_file(void) git_index *index; git_diff *diff; git_buf path = GIT_BUF_INIT; + struct timeval times[2]; /* Make sure we do have a timestamp */ cl_git_pass(git_repository_index(&index, g_repo)); cl_git_pass(git_index_write(index)); - /* The timestamp will be one second before we change the file */ - sleep(1); cl_git_pass(git_buf_joinpath(&path, git_repository_workdir(g_repo), "A")); cl_git_mkfile(path.ptr, "A"); + /* Force the file's timestamp to be a second after we wrote the index */ + times[0].tv_sec = index->stamp.mtime + 1; + times[0].tv_usec = 0; + times[1].tv_sec = index->stamp.mtime + 1; + times[1].tv_usec = 0; + cl_git_pass(p_utimes(path.ptr, times)); /* * Put 'A' into the index, the size field will be filled, * because the index' on-disk timestamp does not match the - * file's timestamp. Both timestamps will however match after - * writing out the index. + * file's timestamp. */ - cl_git_pass(git_repository_index(&index, g_repo)); cl_git_pass(git_index_add_bypath(index, "A")); cl_git_pass(git_index_write(index)); - /* Change its contents quickly, so we get the same timestamp */ cl_git_mkfile(path.ptr, "B"); + /* + * Pretend this index' modification happend a second after the + * file update, and rewrite the file in that same second. + */ + times[0].tv_sec = index->stamp.mtime + 2; + times[0].tv_usec = 0; + times[1].tv_sec = index->stamp.mtime + 2; + times[0].tv_usec = 0; + + cl_git_pass(p_utimes(git_index_path(index), times)); + cl_git_pass(p_utimes(path.ptr, times)); + + cl_git_pass(git_index_read(index, true)); cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL)); cl_assert_equal_i(1, git_diff_num_deltas(diff)); + + git_buf_free(&path); + git_diff_free(diff); + git_index_free(index); } From 5b05f954242a00077eedcdff8f20a820299deef2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Sat, 20 Jun 2015 13:17:06 +0200 Subject: [PATCH 5/8] merge: work around write-side racy protection when hacking the index As we attempt to replicate a situation in which an older checkout has put a file on disk with different filtering settings from us, set the timestamp on the entry and file to a second before we're performing the operation so the entry in the index counts as old. This way we can test that we're not looking at the on-disk file when the index has the entry and we detect it as clean. --- tests/merge/workdir/dirty.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tests/merge/workdir/dirty.c b/tests/merge/workdir/dirty.c index 30c404b70..4bf984c23 100644 --- a/tests/merge/workdir/dirty.c +++ b/tests/merge/workdir/dirty.c @@ -133,12 +133,25 @@ static void hack_index(char *files[]) struct stat statbuf; git_buf path = GIT_BUF_INIT; git_index_entry *entry; + struct timeval times[2]; + time_t now; size_t i; /* Update the index to suggest that checkout placed these files on * disk, keeping the object id but updating the cache, which will * emulate a Git implementation's different filter. + * + * We set the file's timestamp to before now to pretend that + * it was an old checkout so we don't trigger the racy + * protections would would check the content. */ + + now = time(NULL); + times[0].tv_sec = now - 5; + times[0].tv_usec = 0; + times[1].tv_sec = now - 5; + times[1].tv_usec = 0; + for (i = 0, filename = files[i]; filename; filename = files[++i]) { git_buf_clear(&path); @@ -146,6 +159,7 @@ static void hack_index(char *files[]) git_index_get_bypath(repo_index, filename, 0)); cl_git_pass(git_buf_printf(&path, "%s/%s", TEST_REPO_PATH, filename)); + cl_git_pass(p_utimes(path.ptr, times)); cl_git_pass(p_stat(path.ptr, &statbuf)); entry->ctime.seconds = (git_time_t)statbuf.st_ctime; @@ -245,7 +259,6 @@ static int merge_differently_filtered_files(char *files[]) write_files(files); hack_index(files); - repo_index->stamp.mtime = time(NULL) + 1; cl_git_pass(git_index_write(repo_index)); error = merge_branch(); From 27133caf3c912d81657d307577869571b70cc102 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Sat, 20 Jun 2015 17:20:07 +0200 Subject: [PATCH 6/8] tests: move racy tests to the index They fit there much better, even though we often check by diffing, it's about the behaviour of the index. --- tests/{diff => index}/racy.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) rename tests/{diff => index}/racy.c (93%) diff --git a/tests/diff/racy.c b/tests/index/racy.c similarity index 93% rename from tests/diff/racy.c rename to tests/index/racy.c index a516e26c0..089fe5d7d 100644 --- a/tests/diff/racy.c +++ b/tests/index/racy.c @@ -6,12 +6,12 @@ static git_repository *g_repo; -void test_diff_racy__initialize(void) +void test_index_racy__initialize(void) { cl_git_pass(git_repository_init(&g_repo, "diff_racy", false)); } -void test_diff_racy__cleanup(void) +void test_index_racy__cleanup(void) { git_repository_free(g_repo); g_repo = NULL; @@ -19,7 +19,7 @@ void test_diff_racy__cleanup(void) cl_fixture_cleanup("diff_racy"); } -void test_diff_racy__diff(void) +void test_index_racy__diff(void) { git_index *index; git_diff *diff; @@ -48,7 +48,7 @@ void test_diff_racy__diff(void) git_buf_free(&path); } -void test_diff_racy__write_index_just_after_file(void) +void test_index_racy__write_index_just_after_file(void) { git_index *index; git_diff *diff; From 6e611f7c3c8dc5ffd9ac7ea249d2913cd4f0971c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Mon, 22 Jun 2015 14:17:39 +0200 Subject: [PATCH 7/8] index: add a diff test for smudging a file which becomes empty Even though the file is empty and thus the size in the entry matches, we should be able to detect it as a difference. --- tests/index/racy.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/tests/index/racy.c b/tests/index/racy.c index 089fe5d7d..3a4bc439e 100644 --- a/tests/index/racy.c +++ b/tests/index/racy.c @@ -98,3 +98,46 @@ void test_index_racy__write_index_just_after_file(void) git_diff_free(diff); git_index_free(index); } + +void test_index_racy__empty_file_after_smudge(void) +{ + git_index *index; + git_diff *diff; + git_buf path = GIT_BUF_INIT; + int i, found_race = 0; + const git_index_entry *entry; + + /* Make sure we do have a timestamp */ + cl_git_pass(git_repository_index(&index, g_repo)); + cl_git_pass(git_index_write(index)); + + cl_git_pass(git_buf_joinpath(&path, git_repository_workdir(g_repo), "A")); + + /* Make sure writing the file, adding and rewriting happen in the same second */ + for (i = 0; i < 10; i++) { + struct stat st; + cl_git_mkfile(path.ptr, "A"); + + cl_git_pass(git_index_add_bypath(index, "A")); + cl_git_mkfile(path.ptr, "B"); + cl_git_pass(git_index_write(index)); + + cl_git_mkfile(path.ptr, ""); + + cl_git_pass(p_stat(path.ptr, &st)); + cl_assert(entry = git_index_get_bypath(index, "A", 0)); + if (entry->mtime.seconds == (int32_t) st.st_mtime) { + found_race = 1; + break; + } + + } + + if (!found_race) + cl_fail("failed to find race after 10 attempts"); + + cl_assert_equal_i(0, entry->file_size); + + cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL)); + cl_assert_equal_i(1, git_diff_num_deltas(diff)); +} From bb4896f22c9199e88b25a47ee4389a7e778d9d7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Mon, 22 Jun 2015 14:20:13 +0200 Subject: [PATCH 8/8] Add a note about racy-git in CHANGELOG --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a8e3e18ac..3250973f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -69,6 +69,9 @@ support for HTTPS connections insead of OpenSSL. and `git_diff_buffers` now accept a new binary callback of type `git_diff_binary_cb` that includes the binary diff information. +* The race condition mitigations described in `racy-git.txt` have been + implemented. + ### API additions * The `git_merge_options` gained a `file_flags` member.