From cb0ff012d3cb9658b88ad4f752046286ba5ea442 Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Fri, 6 Nov 2015 17:15:35 -0500 Subject: [PATCH 1/7] racy-git: do a single index->workdir diff When examining paths that are racily clean, do a single index->workdir diff over the entirety of the racily clean files, instead of a diff per file. --- src/index.c | 41 +++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/src/index.c b/src/index.c index 0903770bb..9eb1289c3 100644 --- a/src/index.c +++ b/src/index.c @@ -18,6 +18,7 @@ #include "ignore.h" #include "blob.h" #include "idxmap.h" +#include "diff.h" #include "git2/odb.h" #include "git2/oid.h" @@ -752,7 +753,9 @@ static int truncate_racily_clean(git_index *index) int error; git_index_entry *entry; git_diff_options diff_opts = GIT_DIFF_OPTIONS_INIT; - git_diff *diff; + git_diff *diff = NULL; + git_vector paths = GIT_VECTOR_INIT; + git_diff_delta *delta; /* Nothing to do if there's no repo to talk about */ if (!INDEX_OWNER(index)) @@ -765,21 +768,31 @@ static int truncate_racily_clean(git_index *index) diff_opts.flags |= GIT_DIFF_INCLUDE_TYPECHANGE | GIT_DIFF_IGNORE_SUBMODULES | GIT_DIFF_DISABLE_PATHSPEC_MATCH; git_vector_foreach(&index->entries, i, entry) { if (!is_racy_timestamp(&index->stamp.mtime, entry)) - continue; - - /* TODO: use the (non-fnmatching) filelist iterator */ - diff_opts.pathspec.count = 1; - diff_opts.pathspec.strings = (char **) &entry->path; - - if ((error = git_diff_index_to_workdir(&diff, INDEX_OWNER(index), index, &diff_opts)) < 0) - return error; - - if (git_diff_num_deltas(diff) > 0) - entry->file_size = 0; - - git_diff_free(diff); + git_vector_insert(&paths, (char *)entry->path); } + if (paths.length == 0) + goto done; + + diff_opts.pathspec.count = paths.length; + diff_opts.pathspec.strings = (char **)paths.contents; + + if ((error = git_diff_index_to_workdir(&diff, INDEX_OWNER(index), index, &diff_opts)) < 0) + return error; + + git_vector_foreach(&diff->deltas, i, delta) { + entry = (git_index_entry *)git_index_get_bypath(index, delta->old_file.path, 0); + + /* Ensure that we have a stage 0 for this file (ie, it's not a + * conflict), otherwise smudging it is quite pointless. + */ + if (entry) + entry->file_size = 0; + } + +done: + git_diff_free(diff); + git_vector_free(&paths); return 0; } From d1101263f751a33d70ec585da908bec938f587fd Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Fri, 13 Nov 2015 15:32:48 -0500 Subject: [PATCH 2/7] index: don't detect raciness in uptodate entries Keep track of entries that we believe are up-to-date, because we added the index entries since the index was loaded. This prevents us from unnecessarily examining files that we wrote during the cleanup of racy entries (when we smudge racily clean files that have a timestamp newer than or equal to the index's timestamp when we read it). Without keeping track of this, we would examine every file that we just checked out for raciness, since all their timestamps would be newer than the index's timestamp. --- src/index.c | 9 ++++-- tests/index/racy.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 2 deletions(-) diff --git a/src/index.c b/src/index.c index 9eb1289c3..b3fb077cc 100644 --- a/src/index.c +++ b/src/index.c @@ -767,7 +767,8 @@ static int truncate_racily_clean(git_index *index) diff_opts.flags |= GIT_DIFF_INCLUDE_TYPECHANGE | GIT_DIFF_IGNORE_SUBMODULES | GIT_DIFF_DISABLE_PATHSPEC_MATCH; git_vector_foreach(&index->entries, i, entry) { - if (!is_racy_timestamp(&index->stamp.mtime, entry)) + if ((entry->flags_extended & GIT_IDXENTRY_UPTODATE) == 0 && + is_racy_timestamp(&index->stamp.mtime, entry)) git_vector_insert(&paths, (char *)entry->path); } @@ -1295,6 +1296,9 @@ static int index_insert( else entry->flags |= GIT_IDXENTRY_NAMEMASK; + /* this entry is now up-to-date and should not be checked for raciness */ + entry->flags_extended |= GIT_IDXENTRY_UPTODATE; + if (git_mutex_lock(&index->lock) < 0) { giterr_set(GITERR_OS, "Unable to acquire index lock"); return -1; @@ -2558,7 +2562,8 @@ static int write_disk_entry(git_filebuf *file, git_index_entry *entry) if (entry->flags & GIT_IDXENTRY_EXTENDED) { struct entry_long *ondisk_ext; ondisk_ext = (struct entry_long *)ondisk; - ondisk_ext->flags_extended = htons(entry->flags_extended); + ondisk_ext->flags_extended = htons(entry->flags_extended & + GIT_IDXENTRY_EXTENDED_FLAGS); path = ondisk_ext->path; } else diff --git a/tests/index/racy.c b/tests/index/racy.c index df25c851c..6ff12a39a 100644 --- a/tests/index/racy.c +++ b/tests/index/racy.c @@ -145,3 +145,80 @@ void test_index_racy__empty_file_after_smudge(void) git_buf_free(&path); git_diff_free(diff); } + +static void setup_uptodate_files(void) +{ + git_buf path = GIT_BUF_INIT; + git_index *index; + git_index_entry new_entry = {0}; + + cl_git_pass(git_repository_index(&index, g_repo)); + + cl_git_pass(git_buf_joinpath(&path, git_repository_workdir(g_repo), "A")); + cl_git_mkfile(path.ptr, "A"); + + /* Put 'A' into the index */ + cl_git_pass(git_index_add_bypath(index, "A")); + + /* Put 'B' into the index */ + new_entry.path = "B"; + new_entry.mode = GIT_FILEMODE_BLOB; + cl_git_pass(git_index_add(index, &new_entry)); + + /* Put 'C' into the index */ + new_entry.path = "C"; + new_entry.mode = GIT_FILEMODE_BLOB; + cl_git_pass(git_index_add_frombuffer(index, &new_entry, "hello!\n", 7)); + + git_index_free(index); + git_buf_free(&path); +} + +void test_index_racy__adding_to_index_is_uptodate(void) +{ + git_index *index; + const git_index_entry *entry; + + setup_uptodate_files(); + + cl_git_pass(git_repository_index(&index, g_repo)); + + /* ensure that they're all uptodate */ + cl_assert((entry = git_index_get_bypath(index, "A", 0))); + cl_assert_equal_i(GIT_IDXENTRY_UPTODATE, (entry->flags_extended & GIT_IDXENTRY_UPTODATE)); + + cl_assert((entry = git_index_get_bypath(index, "B", 0))); + cl_assert_equal_i(GIT_IDXENTRY_UPTODATE, (entry->flags_extended & GIT_IDXENTRY_UPTODATE)); + + cl_assert((entry = git_index_get_bypath(index, "C", 0))); + cl_assert_equal_i(GIT_IDXENTRY_UPTODATE, (entry->flags_extended & GIT_IDXENTRY_UPTODATE)); + + cl_git_pass(git_index_write(index)); + + git_index_free(index); +} + +void test_index_racy__reading_clears_uptodate_bit(void) +{ + git_index *index; + const git_index_entry *entry; + + setup_uptodate_files(); + + cl_git_pass(git_repository_index(&index, g_repo)); + cl_git_pass(git_index_write(index)); + + cl_git_pass(git_index_read(index, true)); + + /* ensure that no files are uptodate */ + cl_assert((entry = git_index_get_bypath(index, "A", 0))); + cl_assert_equal_i(0, (entry->flags_extended & GIT_IDXENTRY_UPTODATE)); + + cl_assert((entry = git_index_get_bypath(index, "B", 0))); + cl_assert_equal_i(0, (entry->flags_extended & GIT_IDXENTRY_UPTODATE)); + + cl_assert((entry = git_index_get_bypath(index, "C", 0))); + cl_assert_equal_i(0, (entry->flags_extended & GIT_IDXENTRY_UPTODATE)); + + git_index_free(index); +} From de999f260ffdf0266bc0109cd71621ecf65cad98 Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Fri, 13 Nov 2015 15:36:45 -0500 Subject: [PATCH 3/7] checkout::crlf test: don't crash when no idx entry When there's no matching index entry (for whatever reason), don't try to dereference the null return value to get at the id. Otherwise when we break something in the index API, the checkout test crashes for confusing reasons and causes us to step through it in a debugger thinking that we had broken much more than we actually did. --- tests/checkout/crlf.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/checkout/crlf.c b/tests/checkout/crlf.c index 8e77d0845..347c22198 100644 --- a/tests/checkout/crlf.c +++ b/tests/checkout/crlf.c @@ -278,6 +278,7 @@ void test_checkout_crlf__autocrlf_true_index_size_is_filtered_size(void) void test_checkout_crlf__with_ident(void) { git_index *index; + git_index_entry *entry; git_blob *blob; git_checkout_options opts = GIT_CHECKOUT_OPTIONS_INIT; opts.checkout_strategy = GIT_CHECKOUT_FORCE; @@ -310,14 +311,14 @@ void test_checkout_crlf__with_ident(void) /* check that blobs have $Id$ */ - cl_git_pass(git_blob_lookup(&blob, g_repo, - & git_index_get_bypath(index, "lf.ident", 0)->id)); + cl_assert((entry = git_index_get_bypath(index, "lf.ident", 0))); + cl_git_pass(git_blob_lookup(&blob, g_repo, &entry->id)); cl_assert_equal_s( ALL_LF_TEXT_RAW "\n$Id$\n", git_blob_rawcontent(blob)); git_blob_free(blob); - cl_git_pass(git_blob_lookup(&blob, g_repo, - & git_index_get_bypath(index, "more2.identcrlf", 0)->id)); + cl_assert((entry = git_index_get_bypath(index, "more2.identcrlf", 0))); + cl_git_pass(git_blob_lookup(&blob, g_repo, &entry->id)); cl_assert_equal_s( "\n$Id$\n" MORE_CRLF_TEXT_AS_LF, git_blob_rawcontent(blob)); git_blob_free(blob); From 956f4da897aa0eec3f66f00cb7a4a47914c24059 Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Fri, 13 Nov 2015 16:30:39 -0500 Subject: [PATCH 4/7] index: test for smudged entries on write only Test that entries are only smudged when we write the index: the entry smudging is to prevent us from updating an index in a way that it would be impossible to tell that an item was racy. Consider when we load an index: any entries that have the same (or newer) timestamp than the index itself are considered racy, and are subject to further scrutiny. If we *save* that index with the same entries that we loaded, then the index would now have a newer timestamp than the entries, and they would no longer be given that additional scrutiny, failing our racy detection! So test that we smudge those entries only on writing the new index, but that we can detect them (in diff) without having to write. --- tests/index/racy.c | 42 +++++++++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/tests/index/racy.c b/tests/index/racy.c index 6ff12a39a..fc48f07a8 100644 --- a/tests/index/racy.c +++ b/tests/index/racy.c @@ -100,13 +100,13 @@ void test_index_racy__write_index_just_after_file(void) git_index_free(index); } -void test_index_racy__empty_file_after_smudge(void) + +static void setup_race(void) { - git_index *index; - git_diff *diff; git_buf path = GIT_BUF_INIT; - int i, found_race = 0; + git_index *index; const git_index_entry *entry; + int i, found_race = 0; /* Make sure we do have a timestamp */ cl_git_pass(git_repository_index__weakptr(&index, g_repo)); @@ -131,18 +131,46 @@ void test_index_racy__empty_file_after_smudge(void) found_race = 1; break; } - } if (!found_race) cl_fail("failed to find race after 10 attempts"); + git_buf_free(&path); +} + +void test_index_racy__smudges_index_entry_on_save(void) +{ + git_index *index; + const git_index_entry *entry; + + setup_race(); + + /* write the index, which will smudge anything that had the same timestamp + * as the index when the index was loaded. that way future loads of the + * index (with the new timestamp) will know that these files were not + * clean. + */ + + cl_git_pass(git_repository_index__weakptr(&index, g_repo)); + cl_git_pass(git_index_write(index)); + + cl_assert(entry = git_index_get_bypath(index, "A", 0)); cl_assert_equal_i(0, entry->file_size); +} + +void test_index_racy__detects_diff_of_change_in_identical_timestamp(void) +{ + git_index *index; + git_diff *diff; + + cl_git_pass(git_repository_index__weakptr(&index, g_repo)); + + setup_race(); cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL)); cl_assert_equal_i(1, git_diff_num_deltas(diff)); - git_buf_free(&path); git_diff_free(diff); } @@ -204,7 +232,7 @@ void test_index_racy__reading_clears_uptodate_bit(void) const git_index_entry *entry; setup_uptodate_files(); - + cl_git_pass(git_repository_index(&index, g_repo)); cl_git_pass(git_index_write(index)); From 27bc41cf1798c8937b323a7fe6c7fe459c70f8c1 Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Fri, 13 Nov 2015 16:31:51 -0500 Subject: [PATCH 5/7] index: clear uptodate bit on save The uptodate bit should have a lifecycle of a single read->write on the index. Once the index is written, the files within it should be scanned for racy timestamps against the new index timestamp. --- src/index.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/index.c b/src/index.c index b3fb077cc..ed4079665 100644 --- a/src/index.c +++ b/src/index.c @@ -2746,6 +2746,15 @@ static int write_tree_extension(git_index *index, git_filebuf *file) return error; } +static void clear_uptodate(git_index *index) +{ + git_index_entry *entry; + size_t i; + + git_vector_foreach(&index->entries, i, entry) + entry->flags_extended &= ~GIT_IDXENTRY_UPTODATE; +} + static int write_index(git_oid *checksum, git_index *index, git_filebuf *file) { git_oid hash_final; @@ -2785,7 +2794,13 @@ static int write_index(git_oid *checksum, git_index *index, git_filebuf *file) git_oid_cpy(checksum, &hash_final); /* write it at the end of the file */ - return git_filebuf_write(file, hash_final.id, GIT_OID_RAWSZ); + if (git_filebuf_write(file, hash_final.id, GIT_OID_RAWSZ) < 0) + return -1; + + /* file entries are no longer up to date */ + clear_uptodate(index); + + return 0; } int git_index_entry_stage(const git_index_entry *entry) From c30051f0d0fd6ff74d6e2fbe0fc5b0209ecf8b85 Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Mon, 16 Nov 2015 18:05:46 -0500 Subject: [PATCH 6/7] racy: ensure git_index_read_tree clears uptodate Ensure that `git_index_read_tree` clears the uptodate bit on files that it modifies. --- tests/index/racy.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/index/racy.c b/tests/index/racy.c index fc48f07a8..ce395316f 100644 --- a/tests/index/racy.c +++ b/tests/index/racy.c @@ -250,3 +250,31 @@ void test_index_racy__reading_clears_uptodate_bit(void) git_index_free(index); } + +void test_index_racy__read_tree_clears_uptodate_bit(void) +{ + git_index *index; + git_tree *tree; + const git_index_entry *entry; + git_oid id; + + setup_uptodate_files(); + + cl_git_pass(git_repository_index(&index, g_repo)); + cl_git_pass(git_index_write_tree_to(&id, index, g_repo)); + cl_git_pass(git_tree_lookup(&tree, g_repo, &id)); + cl_git_pass(git_index_read_tree(index, tree)); + + /* ensure that no files are uptodate */ + cl_assert((entry = git_index_get_bypath(index, "A", 0))); + cl_assert_equal_i(0, (entry->flags_extended & GIT_IDXENTRY_UPTODATE)); + + cl_assert((entry = git_index_get_bypath(index, "B", 0))); + cl_assert_equal_i(0, (entry->flags_extended & GIT_IDXENTRY_UPTODATE)); + + cl_assert((entry = git_index_get_bypath(index, "C", 0))); + cl_assert_equal_i(0, (entry->flags_extended & GIT_IDXENTRY_UPTODATE)); + + git_tree_free(tree); + git_index_free(index); +} From 5f32c50683cefaf51fa4f8825abfe533f36fe6f3 Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Mon, 16 Nov 2015 18:06:52 -0500 Subject: [PATCH 7/7] racy: make git_index_read_index handle raciness Ensure that `git_index_read_index` clears the uptodate bit on files that it modifies. Further, do not propagate the cache from an on-disk index into another on-disk index. Although this should not be done, as `git_index_read_index` is used to bring an in-memory index into another index (that may or may not be on-disk), ensure that we do not accidentally bring in these bits when misused. --- src/index.c | 82 ++++++++++++++++++++++++++++------------------ tests/index/racy.c | 48 +++++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 32 deletions(-) diff --git a/src/index.c b/src/index.c index ed4079665..bce16348c 100644 --- a/src/index.c +++ b/src/index.c @@ -1003,20 +1003,11 @@ static int index_entry_reuc_init(git_index_reuc_entry **reuc_out, static void index_entry_cpy( git_index_entry *tgt, - git_index *index, - const git_index_entry *src, - bool update_path) + const git_index_entry *src) { const char *tgt_path = tgt->path; memcpy(tgt, src, sizeof(*tgt)); - - /* keep the existing path buffer, but update the path to the one - * given by the caller, if we trust it. - */ tgt->path = tgt_path; - - if (index->ignore_case && update_path) - memcpy((char *)tgt->path, src->path, strlen(tgt->path)); } static int index_entry_dup( @@ -1024,18 +1015,32 @@ static int index_entry_dup( git_index *index, const git_index_entry *src) { - git_index_entry *entry; - - if (!src) { - *out = NULL; - return 0; - } - - if (index_entry_create(&entry, INDEX_OWNER(index), src->path) < 0) + if (index_entry_create(out, INDEX_OWNER(index), src->path) < 0) return -1; - index_entry_cpy(entry, index, src, false); - *out = entry; + index_entry_cpy(*out, src); + return 0; +} + +static void index_entry_cpy_nocache( + git_index_entry *tgt, + const git_index_entry *src) +{ + git_oid_cpy(&tgt->id, &src->id); + tgt->mode = src->mode; + tgt->flags = src->flags; + tgt->flags_extended = (src->flags_extended & GIT_IDXENTRY_EXTENDED_FLAGS); +} + +static int index_entry_dup_nocache( + git_index_entry **out, + git_index *index, + const git_index_entry *src) +{ + if (index_entry_create(out, INDEX_OWNER(index), src->path) < 0) + return -1; + + index_entry_cpy_nocache(*out, src); return 0; } @@ -1331,8 +1336,13 @@ static int index_insert( * and return it in place of the passed in one. */ else if (existing) { - if (replace) - index_entry_cpy(existing, index, entry, trust_path); + if (replace) { + index_entry_cpy(existing, entry); + + if (trust_path) + memcpy((char *)existing->path, entry->path, strlen(entry->path)); + } + index_entry_free(entry); *entry_ptr = entry = existing; } @@ -1709,9 +1719,12 @@ int git_index_conflict_add(git_index *index, assert (index); - if ((ret = index_entry_dup(&entries[0], index, ancestor_entry)) < 0 || - (ret = index_entry_dup(&entries[1], index, our_entry)) < 0 || - (ret = index_entry_dup(&entries[2], index, their_entry)) < 0) + if ((ancestor_entry && + (ret = index_entry_dup(&entries[0], index, ancestor_entry)) < 0) || + (our_entry && + (ret = index_entry_dup(&entries[1], index, our_entry)) < 0) || + (their_entry && + (ret = index_entry_dup(&entries[2], index, their_entry)) < 0)) goto on_error; /* Validate entries */ @@ -2849,7 +2862,7 @@ static int read_tree_cb( entry->mode == old_entry->mode && git_oid_equal(&entry->id, &old_entry->id)) { - index_entry_cpy(entry, data->index, old_entry, false); + index_entry_cpy(entry, old_entry); entry->flags_extended = 0; } @@ -2974,7 +2987,10 @@ int git_index_read_index( goto done; while (true) { - git_index_entry *add_entry = NULL, *remove_entry = NULL; + git_index_entry + *dup_entry = NULL, + *add_entry = NULL, + *remove_entry = NULL; int diff; if (old_entry && new_entry) @@ -2989,8 +3005,7 @@ int git_index_read_index( if (diff < 0) { remove_entry = (git_index_entry *)old_entry; } else if (diff > 0) { - if ((error = index_entry_dup(&add_entry, index, new_entry)) < 0) - goto done; + dup_entry = (git_index_entry *)new_entry; } else { /* Path and stage are equal, if the OID is equal, keep it to * keep the stat cache data. @@ -2998,13 +3013,16 @@ int git_index_read_index( if (git_oid_equal(&old_entry->id, &new_entry->id)) { add_entry = (git_index_entry *)old_entry; } else { - if ((error = index_entry_dup(&add_entry, index, new_entry)) < 0) - goto done; - + dup_entry = (git_index_entry *)new_entry; remove_entry = (git_index_entry *)old_entry; } } + if (dup_entry) { + if ((error = index_entry_dup_nocache(&add_entry, index, dup_entry)) < 0) + goto done; + } + if (add_entry) { if ((error = git_vector_insert(&new_entries, add_entry)) == 0) INSERT_IN_MAP_EX(index, new_entries_map, add_entry, error); diff --git a/tests/index/racy.c b/tests/index/racy.c index ce395316f..f7440c3af 100644 --- a/tests/index/racy.c +++ b/tests/index/racy.c @@ -278,3 +278,51 @@ void test_index_racy__read_tree_clears_uptodate_bit(void) git_tree_free(tree); git_index_free(index); } + +void test_index_racy__read_index_smudges(void) +{ + git_index *index, *newindex; + const git_index_entry *entry; + + /* if we are reading an index into our new index, ensure that any + * racy entries in the index that we're reading are smudged so that + * we don't propagate their timestamps without further investigation. + */ + setup_race(); + + cl_git_pass(git_repository_index(&index, g_repo)); + cl_git_pass(git_index_new(&newindex)); + cl_git_pass(git_index_read_index(newindex, index)); + + cl_assert(entry = git_index_get_bypath(newindex, "A", 0)); + cl_assert_equal_i(0, entry->file_size); + + git_index_free(index); + git_index_free(newindex); +} + +void test_index_racy__read_index_clears_uptodate_bit(void) +{ + git_index *index, *newindex; + const git_index_entry *entry; + git_oid id; + + setup_uptodate_files(); + + cl_git_pass(git_repository_index(&index, g_repo)); + cl_git_pass(git_index_new(&newindex)); + cl_git_pass(git_index_read_index(newindex, index)); + + /* ensure that files brought in from the other index are not uptodate */ + cl_assert((entry = git_index_get_bypath(newindex, "A", 0))); + cl_assert_equal_i(0, (entry->flags_extended & GIT_IDXENTRY_UPTODATE)); + + cl_assert((entry = git_index_get_bypath(newindex, "B", 0))); + cl_assert_equal_i(0, (entry->flags_extended & GIT_IDXENTRY_UPTODATE)); + + cl_assert((entry = git_index_get_bypath(newindex, "C", 0))); + cl_assert_equal_i(0, (entry->flags_extended & GIT_IDXENTRY_UPTODATE)); + + git_index_free(index); + git_index_free(newindex); +}