From 8a5e7aaecf7ce48dfab805debfc92458ab1112c5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 22 May 2017 12:53:44 +0200 Subject: [PATCH 01/14] varint: fix computation for remaining buffer space When encoding varints to a buffer, we want to remain sure that the required buffer space does not exceed what is actually available. Our current check does not do the right thing, though, in that it does not honor that our `pos` variable counts the position down instead of up. As such, we will require too much memory for small varints and not enough memory for big varints. Fix the issue by correctly calculating the required size as `(sizeof(varint) - pos)`. Add a test which failed before. --- src/varint.c | 2 +- tests/core/encoding.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/varint.c b/src/varint.c index 2f868607c..beac8c709 100644 --- a/src/varint.c +++ b/src/varint.c @@ -36,7 +36,7 @@ int git_encode_varint(unsigned char *buf, size_t bufsize, uintmax_t value) while (value >>= 7) varint[--pos] = 128 | (--value & 127); if (buf) { - if (bufsize < pos) + if (bufsize < (sizeof(varint) - pos)) return -1; memcpy(buf, varint + pos, sizeof(varint) - pos); } diff --git a/tests/core/encoding.c b/tests/core/encoding.c index 7d91720f4..a677afe2e 100644 --- a/tests/core/encoding.c +++ b/tests/core/encoding.c @@ -29,6 +29,9 @@ void test_core_encoding__encode(void) cl_assert(git_encode_varint(buf, 100, 65) == 1); cl_assert(buf[0] == 'A'); + cl_assert(git_encode_varint(buf, 1, 1) == 1); + cl_assert(!memcmp(buf, "\x01", 1)); + cl_assert(git_encode_varint(buf, 100, 267869656) == 4); cl_assert(!memcmp(buf, "\xfe\xdc\xbaX", 4)); From febe8c14a0cb3530f49a3990c1ff031d45d12ce6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 10 May 2017 14:27:12 +0200 Subject: [PATCH 02/14] index: fix confusion with shared prefix in compressed path names The index version 4 introduced compressed path names for the entries. From the git.git index-format documentation: At the beginning of an entry, an integer N in the variable width encoding [...] is stored, followed by a NUL-terminated string S. Removing N bytes from the end of the path name for the previous entry, and replacing it with the string S yields the path name for this entry. But instead of stripping N bytes from the previous path's string and using the remaining prefix, we were instead simply concatenating the previous path with the current entry path, which is obviously wrong. Fix the issue by correctly copying the first N bytes of the previous entry only and concatenating the result with our current entry's path. --- src/index.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/index.c b/src/index.c index 932a5306a..f5115a62d 100644 --- a/src/index.c +++ b/src/index.c @@ -2355,21 +2355,24 @@ static size_t read_entry( entry.path = (char *)path_ptr; } else { size_t varint_len; - size_t shared = git_decode_varint((const unsigned char *)path_ptr, - &varint_len); - size_t len = strlen(path_ptr + varint_len); + size_t strip_len = git_decode_varint((const unsigned char *)path_ptr, + &varint_len); size_t last_len = strlen(*last); - size_t tmp_path_len; + size_t prefix_len = last_len - strip_len; + size_t suffix_len = strlen(path_ptr + varint_len); + size_t path_len; if (varint_len == 0) return index_error_invalid("incorrect prefix length"); - GITERR_CHECK_ALLOC_ADD(&tmp_path_len, shared, len + 1); - tmp_path = git__malloc(tmp_path_len); + GITERR_CHECK_ALLOC_ADD(&path_len, prefix_len, suffix_len); + GITERR_CHECK_ALLOC_ADD(&path_len, path_len, 1); + tmp_path = git__malloc(path_len); GITERR_CHECK_ALLOC(tmp_path); - memcpy(tmp_path, last, last_len); - memcpy(tmp_path + last_len, path_ptr + varint_len, len); - entry_size = long_entry_size(shared + len); + + memcpy(tmp_path, last, prefix_len); + memcpy(tmp_path + prefix_len, path_ptr + varint_len, suffix_len + 1); + entry_size = long_entry_size(prefix_len + suffix_len); entry.path = tmp_path; } From 11d0be23c481f2df2e03e804c611a0f7d2be1599 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 12 May 2017 10:01:43 +0200 Subject: [PATCH 03/14] index: set last entry when reading compressed entries To calculate the path of a compressed index entry, we need to know the preceding entry's path. While we do actually set the first predecessor correctly to "", we fail to update this while reading the entries. Fix the issue by updating `last` inside of the loop. Previously, we've been passing a double-pointer to `read_entry`, which it didn't update. As it is more obvious to update the pointer inside the loop itself, though, we can simply convert it to a normal pointer. --- src/index.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/index.c b/src/index.c index f5115a62d..110760c9c 100644 --- a/src/index.c +++ b/src/index.c @@ -2287,7 +2287,7 @@ static size_t read_entry( git_index *index, const void *buffer, size_t buffer_size, - const char **last) + const char *last) { size_t path_length, entry_size; const char *path_ptr; @@ -2357,7 +2357,7 @@ static size_t read_entry( size_t varint_len; size_t strip_len = git_decode_varint((const unsigned char *)path_ptr, &varint_len); - size_t last_len = strlen(*last); + size_t last_len = strlen(last); size_t prefix_len = last_len - strip_len; size_t suffix_len = strlen(path_ptr + varint_len); size_t path_len; @@ -2448,7 +2448,7 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size) unsigned int i; struct index_header header = { 0 }; git_oid checksum_calculated, checksum_expected; - const char **last = NULL; + const char *last = NULL; const char *empty = ""; #define seek_forward(_increase) { \ @@ -2472,7 +2472,7 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size) index->version = header.version; if (index->version >= INDEX_VERSION_NUMBER_COMP) - last = ∅ + last = empty; seek_forward(INDEX_HEADER_SIZE); @@ -2507,6 +2507,9 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size) } error = 0; + if (index->version >= INDEX_VERSION_NUMBER_COMP) + last = entry->path; + seek_forward(entry_size); } From 8ceb890b77fc3ef1ab0fb4ef1d669366734cea85 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 19 May 2017 12:35:21 +0200 Subject: [PATCH 04/14] index: set last written index entry in foreach-entry-loop The last written disk entry is currently being written inside of the function `write_disk_entry`. Make behavior a bit more obviously by instead setting it inside of `write_entries` while iterating all entries. --- src/index.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/index.c b/src/index.c index 110760c9c..aaf76c85d 100644 --- a/src/index.c +++ b/src/index.c @@ -2580,7 +2580,7 @@ static bool is_index_extended(git_index *index) return (extended > 0); } -static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const char **last) +static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const char *last) { void *mem = NULL; struct entry_short *ondisk; @@ -2592,7 +2592,7 @@ static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const cha path_len = ((struct entry_internal *)entry)->pathlen; if (last) { - const char *last_c = *last; + const char *last_c = last; while (*path_start == *last_c) { if (!*path_start || !*last_c) @@ -2602,7 +2602,6 @@ static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const cha ++same_len; } path_len -= same_len; - *last = entry->path; } if (entry->flags & GIT_IDXENTRY_EXTENDED) @@ -2668,8 +2667,7 @@ static int write_entries(git_index *index, git_filebuf *file) size_t i; git_vector case_sorted, *entries; git_index_entry *entry; - const char **last = NULL; - const char *empty = ""; + const char *last = NULL; /* If index->entries is sorted case-insensitively, then we need * to re-sort it case-sensitively before writing */ @@ -2682,11 +2680,14 @@ static int write_entries(git_index *index, git_filebuf *file) } if (index->version >= INDEX_VERSION_NUMBER_COMP) - last = ∅ + last = ""; - git_vector_foreach(entries, i, entry) + git_vector_foreach(entries, i, entry) { if ((error = write_disk_entry(file, entry, last)) < 0) break; + if (index->version >= INDEX_VERSION_NUMBER_COMP) + last = entry->path; + } if (index->ignore_case) git_vector_free(&case_sorted); From 29f498e0fd0b4f1001ffed209042d8996956e6aa Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 19 May 2017 13:38:34 +0200 Subject: [PATCH 05/14] index: move index entry size computation into its own function Create a new function `index_entry_size` which encapsulates the logic to calculate how much space is needed for an index entry, whether it is simple/extended or compressed/uncompressed. This can later be re-used by our code writing index entries. --- src/index.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/index.c b/src/index.c index aaf76c85d..161a9da1f 100644 --- a/src/index.c +++ b/src/index.c @@ -2282,6 +2282,21 @@ out_err: return 0; } +static size_t index_entry_size(size_t path_len, size_t varint_len, uint32_t flags) +{ + if (varint_len) { + if (flags & GIT_IDXENTRY_EXTENDED) + return offsetof(struct entry_long, path) + path_len + 1 + varint_len; + else + return offsetof(struct entry_short, path) + path_len + 1 + varint_len; + } else { + if (flags & GIT_IDXENTRY_EXTENDED) + return long_entry_size(path_len); + else + return short_entry_size(path_len); + } +} + static size_t read_entry( git_index_entry **out, git_index *index, @@ -2344,10 +2359,7 @@ static size_t read_entry( path_length = path_end - path_ptr; } - if (entry.flags & GIT_IDXENTRY_EXTENDED) - entry_size = long_entry_size(path_length); - else - entry_size = short_entry_size(path_length); + entry_size = index_entry_size(path_length, 0, entry.flags); if (INDEX_FOOTER_SIZE + entry_size > buffer_size) return 0; @@ -2372,7 +2384,7 @@ static size_t read_entry( memcpy(tmp_path, last, prefix_len); memcpy(tmp_path + prefix_len, path_ptr + varint_len, suffix_len + 1); - entry_size = long_entry_size(prefix_len + suffix_len); + entry_size = index_entry_size(suffix_len, varint_len, entry.flags); entry.path = tmp_path; } From 46b67034a12555a6ef60d28c02d0d9d15fdc117b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 19 May 2017 13:59:53 +0200 Subject: [PATCH 06/14] index: don't right-pad paths when writing compressed entries Our code to write index entries to disk does not check whether the entry that is to be written should use prefix compression for the path. As such, we were overallocating memory and added bogus right-padding into the resulting index entries. As there is no padding allowed in the index version 4 format, this should actually result in an invalid index. Fix this by re-using the newly extracted `index_entry_size` function. --- src/index.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/index.c b/src/index.c index 161a9da1f..820c9716d 100644 --- a/src/index.c +++ b/src/index.c @@ -2597,6 +2597,7 @@ static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const cha void *mem = NULL; struct entry_short *ondisk; size_t path_len, disk_size; + int varint_len = 0; char *path; const char *path_start = entry->path; size_t same_len = 0; @@ -2614,12 +2615,10 @@ static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const cha ++same_len; } path_len -= same_len; + varint_len = git_encode_varint(NULL, 0, same_len); } - if (entry->flags & GIT_IDXENTRY_EXTENDED) - disk_size = long_entry_size(path_len); - else - disk_size = short_entry_size(path_len); + disk_size = index_entry_size(path_len, varint_len, entry->flags); if (git_filebuf_reserve(file, &mem, disk_size) < 0) return -1; From 350d2c47bcbf9d9c09f2b65c9659f0c5c2d683b5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 19 May 2017 14:22:35 +0200 Subject: [PATCH 07/14] index: remove file-scope entry size macros All index entry size computations are now performed in `index_entry_size`. As such, we do not need the file-scope macros for computing these sizes anymore. Remove them and move the `entry_size` macro into the `index_entry_size` function. --- src/index.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/index.c b/src/index.c index 820c9716d..ed9f47839 100644 --- a/src/index.c +++ b/src/index.c @@ -54,10 +54,6 @@ static int index_apply_to_wd_diff(git_index *index, int action, const git_strarr unsigned int flags, git_index_matched_path_cb cb, void *payload); -#define entry_size(type,len) ((offsetof(type, path) + (len) + 8) & ~7) -#define short_entry_size(len) entry_size(struct entry_short, len) -#define long_entry_size(len) entry_size(struct entry_long, len) - #define minimal_entry_size (offsetof(struct entry_short, path)) static const size_t INDEX_FOOTER_SIZE = GIT_OID_RAWSZ; @@ -2290,10 +2286,12 @@ static size_t index_entry_size(size_t path_len, size_t varint_len, uint32_t flag else return offsetof(struct entry_short, path) + path_len + 1 + varint_len; } else { +#define entry_size(type,len) ((offsetof(type, path) + (len) + 8) & ~7) if (flags & GIT_IDXENTRY_EXTENDED) - return long_entry_size(path_len); + return entry_size(struct entry_long, path_len); else - return short_entry_size(path_len); + return entry_size(struct entry_short, path_len); +#undef entry_size } } From 83e0392ceadb7e1d9db09a7f013e28759a666e89 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 19 May 2017 13:39:05 +0200 Subject: [PATCH 08/14] index: also sanity check entry size with compressed entries We have a check in place whether the index has enough data left for the required footer after reading an index entry, but this was only used for uncompressed entries. Move the check down a bit so that it is executed for both compressed and uncompressed index entries. --- src/index.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/index.c b/src/index.c index ed9f47839..bff5d7f73 100644 --- a/src/index.c +++ b/src/index.c @@ -2358,10 +2358,6 @@ static size_t read_entry( } entry_size = index_entry_size(path_length, 0, entry.flags); - - if (INDEX_FOOTER_SIZE + entry_size > buffer_size) - return 0; - entry.path = (char *)path_ptr; } else { size_t varint_len; @@ -2386,6 +2382,9 @@ static size_t read_entry( entry.path = tmp_path; } + if (INDEX_FOOTER_SIZE + entry_size > buffer_size) + return 0; + if (index_entry_dup(out, index, &entry) < 0) { git__free(tmp_path); return 0; From c71dff7e8a1bde298972c10901802608bd4cbb55 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 19 May 2017 13:49:34 +0200 Subject: [PATCH 09/14] index: fix shared prefix computation when writing index entry When using compressed index entries, each entry's path is preceded by a varint encoding how long the shared prefix with the previous index entry actually is. We currently encode a length of `(path_len - same_len)`, which is doubly wrong. First, `path_len` is already set to `path_len - same_len` previously. Second, we want to encode the shared prefix rather than the un-shared suffix length. Fix this by using `same_len` as the varint value instead. --- src/index.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/index.c b/src/index.c index bff5d7f73..860adaf12 100644 --- a/src/index.c +++ b/src/index.c @@ -2661,8 +2661,7 @@ static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const cha if (last) { path += git_encode_varint((unsigned char *) path, - disk_size, - path_len - same_len); + disk_size, same_len); } memcpy(path, path_start, path_len); From 064a60e96fc0ac134c9cba7b99bd31e1afbef886 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 19 May 2017 14:06:15 +0200 Subject: [PATCH 10/14] index: verify we have enough space left when writing index entries In our code writing index entries, we carry around a `disk_size` representing how much memory we have in total and pass this value to `git_encode_varint` to do bounds checks. This does not make much sense, as at the time when passing on this variable it is already out of date. Fix this by subtracting used memory from `disk_size` as we go along. Furthermore, assert we've actually got enough space left to do the final path memcpy. --- src/index.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/src/index.c b/src/index.c index 860adaf12..c29e90fb0 100644 --- a/src/index.c +++ b/src/index.c @@ -2655,15 +2655,34 @@ static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const cha ondisk_ext->flags_extended = htons(entry->flags_extended & GIT_IDXENTRY_EXTENDED_FLAGS); path = ondisk_ext->path; - } - else + disk_size -= offsetof(struct entry_long, path); + } else { path = ondisk->path; + disk_size -= offsetof(struct entry_short, path); + } if (last) { - path += git_encode_varint((unsigned char *) path, + varint_len = git_encode_varint((unsigned char *) path, disk_size, same_len); + assert(varint_len > 0); + path += varint_len; + disk_size -= varint_len; + + /* + * If using path compression, we are not allowed + * to have additional trailing NULs. + */ + assert(disk_size == path_len + 1); + } else { + /* + * If no path compression is used, we do have + * NULs as padding. As such, simply assert that + * we have enough space left to write the path. + */ + assert(disk_size > path_len); } - memcpy(path, path_start, path_len); + + memcpy(path, path_start, path_len + 1); return 0; } From fea0c81e95d5c22b758df6bedbd13321fa2ab195 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 12 May 2017 09:09:07 +0200 Subject: [PATCH 11/14] tests: index::version: move up cleanup function The init and cleanup functions for test suites are usually prepended to our actual tests. The index::version test suite does not adhere to this stile. Fix this. --- tests/index/version.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/index/version.c b/tests/index/version.c index 3fd240d3c..688a20f20 100644 --- a/tests/index/version.c +++ b/tests/index/version.c @@ -3,6 +3,12 @@ static git_repository *g_repo = NULL; +void test_index_version__cleanup(void) +{ + cl_git_sandbox_cleanup(); + g_repo = NULL; +} + void test_index_version__can_write_v4(void) { git_index *index; @@ -33,9 +39,3 @@ void test_index_version__can_write_v4(void) git_index_free(index); } - -void test_index_version__cleanup(void) -{ - cl_git_sandbox_cleanup(); - g_repo = NULL; -} From 82368b1bad1eae400b9e1f8f0e7a452c1335d4db Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 12 May 2017 10:04:42 +0200 Subject: [PATCH 12/14] tests: index::version: add test to read index version v4 While we have a simple test to determine whether we can write an index of version 4, we never verified that we are able to read this kind of index (and in fact, we were not able to do so). Add a new repository which has an index of version 4. This repository is then read from a new test. --- tests/index/version.c | 23 ++++++++++++++++++ tests/resources/indexv4/.gitted/HEAD | Bin 0 -> 23 bytes tests/resources/indexv4/.gitted/config | Bin 0 -> 92 bytes tests/resources/indexv4/.gitted/index | Bin 0 -> 572 bytes .../4c/9109b3e671d851eec87e0e72f6305b582e7e99 | Bin 0 -> 70 bytes .../b0/952dbb50bed5f01e03e31b296184cb183e54a7 | Bin 0 -> 154 bytes .../e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 | Bin 0 -> 15 bytes .../indexv4/.gitted/refs/heads/master | Bin 0 -> 41 bytes tests/resources/indexv4/file.tx | Bin tests/resources/indexv4/file.txt | Bin tests/resources/indexv4/file.txz | Bin tests/resources/indexv4/foo | Bin tests/resources/indexv4/zzz | Bin 13 files changed, 23 insertions(+) create mode 100644 tests/resources/indexv4/.gitted/HEAD create mode 100644 tests/resources/indexv4/.gitted/config create mode 100644 tests/resources/indexv4/.gitted/index create mode 100644 tests/resources/indexv4/.gitted/objects/4c/9109b3e671d851eec87e0e72f6305b582e7e99 create mode 100644 tests/resources/indexv4/.gitted/objects/b0/952dbb50bed5f01e03e31b296184cb183e54a7 create mode 100644 tests/resources/indexv4/.gitted/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 create mode 100644 tests/resources/indexv4/.gitted/refs/heads/master create mode 100644 tests/resources/indexv4/file.tx create mode 100644 tests/resources/indexv4/file.txt create mode 100644 tests/resources/indexv4/file.txz create mode 100644 tests/resources/indexv4/foo create mode 100644 tests/resources/indexv4/zzz diff --git a/tests/index/version.c b/tests/index/version.c index 688a20f20..c3cb0cd40 100644 --- a/tests/index/version.c +++ b/tests/index/version.c @@ -9,6 +9,29 @@ void test_index_version__cleanup(void) g_repo = NULL; } +void test_index_version__can_read_v4(void) +{ + const char *paths[] = { + "file.tx", "file.txt", "file.txz", "foo", "zzz", + }; + git_index *index; + size_t i; + + g_repo = cl_git_sandbox_init("indexv4"); + + cl_git_pass(git_repository_index(&index, g_repo)); + cl_assert_equal_sz(git_index_entrycount(index), 5); + + for (i = 0; i < ARRAY_SIZE(paths); i++) { + const git_index_entry *entry = + git_index_get_bypath(index, paths[i], GIT_INDEX_STAGE_NORMAL); + + cl_assert(entry != NULL); + } + + git_index_free(index); +} + void test_index_version__can_write_v4(void) { git_index *index; diff --git a/tests/resources/indexv4/.gitted/HEAD b/tests/resources/indexv4/.gitted/HEAD new file mode 100644 index 0000000000000000000000000000000000000000..cb089cd89a7d7686d284d8761201649346b5aa1c GIT binary patch literal 23 ecmXR)O|w!cN=+-)&qz&7Db~+TEG|hc;sO9;xClW2 literal 0 HcmV?d00001 diff --git a/tests/resources/indexv4/.gitted/config b/tests/resources/indexv4/.gitted/config new file mode 100644 index 0000000000000000000000000000000000000000..515f4836297fdf7567c066983c16e5eff598f7bd GIT binary patch literal 92 zcmXxa!3}^Q3~s^_Nmuv(#Z!#wLX gEz=wWXF<_~p=KR1>|`I9#D_#r5;1VEi5mQGFX>V7!5T>k0@g}7)o#%pii6uj8(V{Fd)hRX7>Di zTqYP2X99CoRTV=>kgF@u>yiwn3I<$06FE0OE4&f-?nE75(KmzW2)(+Qp?)DjKm|)x zeDaeMOEUBG6!c4S3-mMdQc^3*Omq~AD~n4~a}|6t^GYj#>Zk_Th&4$sJ+mY;Jukl~ cm4Trt+qC|}D!;7j`)57g%`ng8_BLg60Qu>j^#A|> literal 0 HcmV?d00001 diff --git a/tests/resources/indexv4/.gitted/objects/4c/9109b3e671d851eec87e0e72f6305b582e7e99 b/tests/resources/indexv4/.gitted/objects/4c/9109b3e671d851eec87e0e72f6305b582e7e99 new file mode 100644 index 0000000000000000000000000000000000000000..cedd594b04b27400efb1359c75aefb3b5464d1c7 GIT binary patch literal 70 zcmV-M0J;Bo0V^p=O;s>7vt%$dFfcPQQAo?oN!2Ti_@% literal 0 HcmV?d00001 diff --git a/tests/resources/indexv4/.gitted/objects/b0/952dbb50bed5f01e03e31b296184cb183e54a7 b/tests/resources/indexv4/.gitted/objects/b0/952dbb50bed5f01e03e31b296184cb183e54a7 new file mode 100644 index 0000000000000000000000000000000000000000..0ddc1d1a944e09d86ed5aaa9498d607c2b3d087a GIT binary patch literal 154 zcmV;L0A>Gp0hP_s3BoWGMq!^b#eEkfZPTU!5#2y=f;9O{Lv0e0E9mwXH*kP+K9{yF zFtySgxB#S_X{4Dt47%}S-vDmZkkIWsbh4lNv4w^)!oGM(h3=M1a4S6+AuhIfrgN^> zyw+3LF4QQadaaZi=qOt$h5LDo|4VW6p62jGB~HXU`UHjO@g_trAIg>&+W)zr){vt3 I0t5s_J^v+3NdN!< literal 0 HcmV?d00001 diff --git a/tests/resources/indexv4/.gitted/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 b/tests/resources/indexv4/.gitted/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 new file mode 100644 index 0000000000000000000000000000000000000000..711223894375fe1186ac5bfffdc48fb1fa1e65cc GIT binary patch literal 15 Wcmb Date: Fri, 19 May 2017 12:45:48 +0200 Subject: [PATCH 13/14] tests: index::version: verify we write compressed index entries While we do have a test which checks whether a written index of version 4 has the correct version set, we do not check whether this actually enables path compression for index entries. This commit adds a new test by adding a number of index entries with equal path prefixes to the index and subsequently flushing that to disk. With suffix compression enabled by index version 4, only the last few bytes of these paths will actually have to be written to the index, saving a lot of disk space. For the test, differences are about an order of magnitude, allowing us to easily verify without taking a deeper look at actual on-disk contents. --- tests/index/version.c | 60 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/tests/index/version.c b/tests/index/version.c index c3cb0cd40..fc139f6b3 100644 --- a/tests/index/version.c +++ b/tests/index/version.c @@ -62,3 +62,63 @@ void test_index_version__can_write_v4(void) git_index_free(index); } + +void test_index_version__v4_uses_path_compression(void) +{ + git_index_entry entry; + git_index *index; + char path[250], buf[1]; + struct stat st; + char i, j; + + memset(path, 'a', sizeof(path)); + memset(buf, 'a', sizeof(buf)); + + memset(&entry, 0, sizeof(entry)); + entry.path = path; + entry.mode = GIT_FILEMODE_BLOB; + + g_repo = cl_git_sandbox_init("indexv4"); + cl_git_pass(git_repository_index(&index, g_repo)); + + /* write 676 paths of 250 bytes length */ + for (i = 'a'; i <= 'z'; i++) { + for (j = 'a'; j < 'z'; j++) { + path[ARRAY_SIZE(path) - 3] = i; + path[ARRAY_SIZE(path) - 2] = j; + path[ARRAY_SIZE(path) - 1] = '\0'; + cl_git_pass(git_index_add_frombuffer(index, &entry, buf, sizeof(buf))); + } + } + + cl_git_pass(git_index_write(index)); + cl_git_pass(p_stat(git_index_path(index), &st)); + + /* + * Without path compression, the written paths would at + * least take + * + * (entries * pathlen) = len + * (676 * 250) = 169000 + * + * bytes. As index v4 uses suffix-compression and our + * written paths only differ in the last two entries, + * this number will be much smaller, e.g. + * + * (1 * pathlen) + (675 * 2) = len + * 676 + 1350 = 2026 + * + * bytes. + * + * Note that the above calculations do not include + * additional metadata of the index, e.g. OIDs or + * index extensions. Including those we get an index + * of approx. 200kB without compression and 40kB with + * compression. As this is a lot smaller than without + * compression, we can verify that path compression is + * used. + */ + cl_assert_(st.st_size < 75000, "path compression not enabled"); + + git_index_free(index); +} From 92d3ea4e398e8fbe3bf59088dc1a9ce8642b23fe Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 19 May 2017 13:04:32 +0200 Subject: [PATCH 14/14] tests: index::version: improve write test for index v4 The current write test does not trigger some edge-cases in the index version 4 path compression code. Rewrite the test to start off the an empty standard repository, creating index entries with interesting paths itself. This allows for more fine-grained control over checked paths. Furthermore, we now also verify that entry paths are actually reconstructed correctly. --- tests/index/version.c | 41 +++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/tests/index/version.c b/tests/index/version.c index fc139f6b3..7ada302b5 100644 --- a/tests/index/version.c +++ b/tests/index/version.c @@ -34,31 +34,44 @@ void test_index_version__can_read_v4(void) void test_index_version__can_write_v4(void) { + const char *paths[] = { + "foo", + "foox", + "foobar", + "foobal", + "x", + "xz", + "xyzzyx" + }; + git_index_entry entry; git_index *index; - const git_index_entry *entry; + size_t i; - g_repo = cl_git_sandbox_init("filemodes"); + g_repo = cl_git_sandbox_init("empty_standard_repo"); cl_git_pass(git_repository_index(&index, g_repo)); - - cl_assert(index->on_disk); - cl_assert(git_index_version(index) == 2); - - cl_assert(git_index_entrycount(index) == 6); - cl_git_pass(git_index_set_version(index, 4)); + for (i = 0; i < ARRAY_SIZE(paths); i++) { + memset(&entry, 0, sizeof(entry)); + entry.path = paths[i]; + entry.mode = GIT_FILEMODE_BLOB; + cl_git_pass(git_index_add_frombuffer(index, &entry, paths[i], + strlen(paths[i]) + 1)); + } + cl_assert_equal_sz(git_index_entrycount(index), ARRAY_SIZE(paths)); + cl_git_pass(git_index_write(index)); git_index_free(index); cl_git_pass(git_repository_index(&index, g_repo)); cl_assert(git_index_version(index) == 4); - entry = git_index_get_bypath(index, "exec_off", 0); - cl_assert(entry); - entry = git_index_get_bypath(index, "exec_off2on_staged", 0); - cl_assert(entry); - entry = git_index_get_bypath(index, "exec_on", 0); - cl_assert(entry); + for (i = 0; i < ARRAY_SIZE(paths); i++) { + const git_index_entry *e; + + cl_assert(e = git_index_get_bypath(index, paths[i], 0)); + cl_assert_equal_s(paths[i], e->path); + } git_index_free(index); }