From 46c84c72600c0aed379e0df659ad1ca518fdd7cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Fri, 19 Jun 2015 21:56:42 +0200 Subject: [PATCH 1/3] index: user a better assertion when comparing sizes This will tell us which numbers we were trying to compare, rather than just telling us that they're different. --- tests/index/tests.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/index/tests.c b/tests/index/tests.c index 3c8060a2e..e1ff12ad0 100644 --- a/tests/index/tests.c +++ b/tests/index/tests.c @@ -103,8 +103,8 @@ void test_index_tests__default_test_index(void) git_index_entry *e = entries[test_entries[i].index]; cl_assert_equal_s(e->path, test_entries[i].path); - cl_assert(e->mtime.seconds == test_entries[i].mtime); - cl_assert(e->file_size == test_entries[i].file_size); + cl_assert_equal_i(e->mtime.seconds, test_entries[i].mtime); + cl_assert_equal_i(e->file_size, test_entries[i].file_size); } git_index_free(index); From 5e947c91d49aacc8abb5b3f018737c72c52486a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Fri, 19 Jun 2015 22:05:08 +0200 Subject: [PATCH 2/3] index: use the checksum to check whether it's been modified We currently use a timetamp to check whether an index file has been modified since we last read it, but this is racy. If two updates happen in the same second and we read after the first one, we won't detect the second one. Instead read the SHA-1 checksum of the file, which are its last 20 bytes which gives us a sure-fire way to detect whether the file has changed since we last read it. As we're now keeping track of it, expose an accessor to this data. --- include/git2/index.h | 12 +++++++++++ src/index.c | 47 +++++++++++++++++++++++++++++++++++++++----- src/index.h | 1 + 3 files changed, 55 insertions(+), 5 deletions(-) diff --git a/include/git2/index.h b/include/git2/index.h index 49bbe1614..7caf3ed78 100644 --- a/include/git2/index.h +++ b/include/git2/index.h @@ -273,6 +273,18 @@ GIT_EXTERN(int) git_index_write(git_index *index); */ GIT_EXTERN(const char *) git_index_path(const git_index *index); +/** + * Get the checksum of the index + * + * This checksum is the SHA-1 hash over the index file (except the + * last 20 bytes which are the checksum itself). In cases where the + * index does not exist on-disk, it will be zeroed out. + * + * @param index an existing index object + * @return a pointer to the checksum of the index + */ +GIT_EXTERN(const git_oid *) git_index_checksum(git_index *index); + /** * Read a tree into the index file with stats * diff --git a/src/index.c b/src/index.c index a931f048a..ba101ea14 100644 --- a/src/index.c +++ b/src/index.c @@ -116,7 +116,7 @@ static int read_header(struct index_header *dest, const void *buffer); static int parse_index(git_index *index, const char *buffer, size_t buffer_size); static bool is_index_extended(git_index *index); -static int write_index(git_index *index, git_filebuf *file); +static int write_index(git_oid *checksum, git_index *index, git_filebuf *file); static void index_entry_free(git_index_entry *entry); static void index_entry_reuc_free(git_index_reuc_entry *reuc); @@ -598,6 +598,38 @@ int git_index_caps(const git_index *index) (index->no_symlinks ? GIT_INDEXCAP_NO_SYMLINKS : 0)); } +const git_oid *git_index_checksum(git_index *index) +{ + return &index->checksum; +} + +/** + * Returns 1 for changed, 0 for not changed and <0 for errors + */ +static int compare_checksum(git_index *index) +{ + int fd, error; + ssize_t bytes_read; + git_oid checksum = {{ 0 }}; + + if ((fd = p_open(index->index_file_path, O_RDONLY)) < 0) + return fd; + + if ((error = p_lseek(fd, -20, SEEK_END)) < 0) { + p_close(fd); + giterr_set(GITERR_OS, "failed to seek to end of file"); + return -1; + } + + bytes_read = p_read(fd, &checksum, GIT_OID_RAWSZ); + p_close(fd); + + if (bytes_read < 0) + return -1; + + return !!git_oid_cmp(&checksum, &index->checksum); +} + int git_index_read(git_index *index, int force) { int error = 0, updated; @@ -616,8 +648,8 @@ int git_index_read(git_index *index, int force) return 0; } - updated = git_futils_filestamp_check(&stamp, index->index_file_path); - if (updated < 0) { + if ((updated = git_futils_filestamp_check(&stamp, index->index_file_path) < 0) || + ((updated = compare_checksum(index)) < 0)) { giterr_set( GITERR_INDEX, "Failed to read index: '%s' no longer exists", @@ -2092,6 +2124,8 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size) goto done; } + git_oid_cpy(&index->checksum, &checksum_calculated); + #undef seek_forward /* Entries are stored case-sensitively on disk, so re-sort now if @@ -2355,7 +2389,7 @@ static int write_tree_extension(git_index *index, git_filebuf *file) return error; } -static int write_index(git_index *index, git_filebuf *file) +static int write_index(git_oid *checksum, git_index *index, git_filebuf *file) { git_oid hash_final; struct index_header header; @@ -2391,6 +2425,7 @@ static int write_index(git_index *index, git_filebuf *file) /* get out the hash for all the contents we've appended to the file */ git_filebuf_hash(&hash_final, file); + git_oid_cpy(checksum, &hash_final); /* write it at the end of the file */ return git_filebuf_write(file, hash_final.id, GIT_OID_RAWSZ); @@ -2953,6 +2988,7 @@ int git_indexwriter_init_for_operation( int git_indexwriter_commit(git_indexwriter *writer) { int error; + git_oid checksum = {{ 0 }}; if (!writer->should_write) return 0; @@ -2962,7 +2998,7 @@ int git_indexwriter_commit(git_indexwriter *writer) git_vector_sort(&writer->index->reuc); - if ((error = write_index(writer->index, &writer->file)) < 0) { + if ((error = write_index(&checksum, writer->index, &writer->file)) < 0) { git_indexwriter_cleanup(writer); return error; } @@ -2977,6 +3013,7 @@ int git_indexwriter_commit(git_indexwriter *writer) } writer->index->on_disk = 1; + git_oid_cpy(&writer->index->checksum, &checksum); git_index_free(writer->index); writer->index = NULL; diff --git a/src/index.h b/src/index.h index 0f6f4e86e..615d703c4 100644 --- a/src/index.h +++ b/src/index.h @@ -22,6 +22,7 @@ struct git_index { char *index_file_path; git_futils_filestamp stamp; + git_oid checksum; /* checksum at the end of the file */ git_vector entries; From 624c949f01ca553fdd0b42fbac439e822c1bdd5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Sat, 20 Jun 2015 16:17:28 +0200 Subject: [PATCH 3/3] index: make relative comparison use the checksum as well This is used by the submodule in order to figure out if the index has changed since it last read it. Using a timestamp is racy, so let's make it use the checksum, just like we now do for reloading the index itself. --- src/index.c | 6 ++---- src/index.h | 2 +- src/submodule.c | 5 ++--- src/submodule.h | 2 +- 4 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/index.c b/src/index.c index ba101ea14..1fb3c48f3 100644 --- a/src/index.c +++ b/src/index.c @@ -679,15 +679,13 @@ int git_index_read(git_index *index, int force) } int git_index__changed_relative_to( - git_index *index, const git_futils_filestamp *fs) + git_index *index, const git_oid *checksum) { /* attempt to update index (ignoring errors) */ if (git_index_read(index, false) < 0) giterr_clear(); - return (index->stamp.mtime != fs->mtime || - index->stamp.size != fs->size || - index->stamp.ino != fs->ino); + return !!git_oid_cmp(&index->checksum, checksum); } /* diff --git a/src/index.h b/src/index.h index 615d703c4..9c60b015c 100644 --- a/src/index.h +++ b/src/index.h @@ -81,7 +81,7 @@ GIT_INLINE(const git_futils_filestamp *) git_index__filestamp(git_index *index) return &index->stamp; } -extern int git_index__changed_relative_to(git_index *index, const git_futils_filestamp *fs); +extern int git_index__changed_relative_to(git_index *index, const git_oid *checksum); /* Copy the current entries vector *and* increment the index refcount. * Call `git_index__release_snapshot` when done. diff --git a/src/submodule.c b/src/submodule.c index 1139df973..246502e99 100644 --- a/src/submodule.c +++ b/src/submodule.c @@ -1946,7 +1946,7 @@ static int submodule_cache_refresh(git_submodule_cache *cache, int refresh) update_index = update_head = update_gitmod = true; else { update_index = - !idx || git_index__changed_relative_to(idx, &cache->index_stamp); + !idx || git_index__changed_relative_to(idx, &cache->index_checksum); update_head = !head || !git_oid_equal(&cache->head_id, git_tree_id(head)); @@ -1984,8 +1984,7 @@ static int submodule_cache_refresh(git_submodule_cache *cache, int refresh) if ((error = submodule_cache_refresh_from_index(cache, idx)) < 0) goto cleanup; - git_futils_filestamp_set( - &cache->index_stamp, git_index__filestamp(idx)); + git_oid_cpy(&cache->index_checksum, git_index_checksum(idx)); } /* add submodule information from HEAD */ diff --git a/src/submodule.h b/src/submodule.h index a6182beca..7a9bf9c92 100644 --- a/src/submodule.h +++ b/src/submodule.h @@ -110,7 +110,7 @@ typedef struct { /* cache invalidation data */ git_oid head_id; - git_futils_filestamp index_stamp; + git_oid index_checksum; git_buf gitmodules_path; git_futils_filestamp gitmodules_stamp; git_futils_filestamp config_stamp;