From aeb5ee5ab50a062aac02ca084b02582430669808 Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 17 May 2016 15:40:46 -0400 Subject: [PATCH 1/2] varint: Add varint encoding/decoding This code is ported from git.git Signed-off-by: Junio C Hamano Signed-off-by: David Turner --- src/varint.c | 44 +++++++++++++++++++++++++++++++++++++++++++ src/varint.h | 15 +++++++++++++++ tests/core/encoding.c | 39 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+) create mode 100644 src/varint.c create mode 100644 src/varint.h create mode 100644 tests/core/encoding.c diff --git a/src/varint.c b/src/varint.c new file mode 100644 index 000000000..2f868607c --- /dev/null +++ b/src/varint.c @@ -0,0 +1,44 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ + +#include "common.h" +#include "varint.h" + +uintmax_t git_decode_varint(const unsigned char *bufp, size_t *varint_len) +{ + const unsigned char *buf = bufp; + unsigned char c = *buf++; + uintmax_t val = c & 127; + while (c & 128) { + val += 1; + if (!val || MSB(val, 7)) { + /* This is not a valid varint_len, so it signals + the error */ + *varint_len = 0; + return 0; /* overflow */ + } + c = *buf++; + val = (val << 7) + (c & 127); + } + *varint_len = buf - bufp; + return val; +} + +int git_encode_varint(unsigned char *buf, size_t bufsize, uintmax_t value) +{ + unsigned char varint[16]; + unsigned pos = sizeof(varint) - 1; + varint[pos] = value & 127; + while (value >>= 7) + varint[--pos] = 128 | (--value & 127); + if (buf) { + if (bufsize < pos) + return -1; + memcpy(buf, varint + pos, sizeof(varint) - pos); + } + return sizeof(varint) - pos; +} diff --git a/src/varint.h b/src/varint.h new file mode 100644 index 000000000..650ec7d2a --- /dev/null +++ b/src/varint.h @@ -0,0 +1,15 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_varint_h__ +#define INCLUDE_varint_h__ + +#include + +extern int git_encode_varint(unsigned char *, size_t, uintmax_t); +extern uintmax_t git_decode_varint(const unsigned char *, size_t *); + +#endif diff --git a/tests/core/encoding.c b/tests/core/encoding.c new file mode 100644 index 000000000..7d91720f4 --- /dev/null +++ b/tests/core/encoding.c @@ -0,0 +1,39 @@ +#include "clar_libgit2.h" +#include "varint.h" + +void test_core_encoding__decode(void) +{ + const unsigned char *buf = (unsigned char *)"AB"; + size_t size; + + cl_assert(git_decode_varint(buf, &size) == 65); + cl_assert(size == 1); + + buf = (unsigned char *)"\xfe\xdc\xbaXY"; + cl_assert(git_decode_varint(buf, &size) == 267869656); + cl_assert(size == 4); + + buf = (unsigned char *)"\xaa\xaa\xfe\xdc\xbaXY"; + cl_assert(git_decode_varint(buf, &size) == 1489279344088ULL); + cl_assert(size == 6); + + buf = (unsigned char *)"\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xfe\xdc\xbaXY"; + cl_assert(git_decode_varint(buf, &size) == 0); + cl_assert(size == 0); + +} + +void test_core_encoding__encode(void) +{ + unsigned char buf[100]; + cl_assert(git_encode_varint(buf, 100, 65) == 1); + cl_assert(buf[0] == 'A'); + + cl_assert(git_encode_varint(buf, 100, 267869656) == 4); + cl_assert(!memcmp(buf, "\xfe\xdc\xbaX", 4)); + + cl_assert(git_encode_varint(buf, 100, 1489279344088ULL) == 6); + cl_assert(!memcmp(buf, "\xaa\xaa\xfe\xdc\xbaX", 6)); + + cl_assert(git_encode_varint(buf, 1, 1489279344088ULL) == -1); +} From 5625d86b994fd81f1b0d887890e8168d7b5f46cc Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 17 May 2016 15:40:32 -0400 Subject: [PATCH 2/2] index: support index v4 Support reading and writing index v4. Index v4 uses a very simple compression scheme for pathnames, but is otherwise similar to index v3. Signed-off-by: David Turner --- CHANGELOG.md | 5 ++ include/git2/index.h | 25 +++++++ src/index.c | 147 +++++++++++++++++++++++++++++++++--------- src/index.h | 2 + tests/index/version.c | 41 ++++++++++++ 5 files changed, 191 insertions(+), 29 deletions(-) create mode 100644 tests/index/version.c diff --git a/CHANGELOG.md b/CHANGELOG.md index 92bc0c1f2..e4fd68dfe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ v0.24 + 1 * Do not fail when deleting remotes in the presence of broken global configs which contain branches. +* Support for reading and writing git index v4 files + ### API additions * You can now get the user-agent used by libgit2 using the @@ -49,6 +51,9 @@ v0.24 + 1 * `git_diff_from_buffer` can create a `git_diff` object from the contents of a git-style patch file. +* `git_index_version()` and `git_index_set_version()` to get and set + the index version + ### API removals * `git_blob_create_fromchunks()` has been removed in favour of diff --git a/include/git2/index.h b/include/git2/index.h index 466765be3..e58b3287e 100644 --- a/include/git2/index.h +++ b/include/git2/index.h @@ -251,6 +251,31 @@ GIT_EXTERN(int) git_index_caps(const git_index *index); */ GIT_EXTERN(int) git_index_set_caps(git_index *index, int caps); +/** + * Get index on-disk version. + * + * Valid return values are 2, 3, or 4. If 3 is returned, an index + * with version 2 may be written instead, if the extension data in + * version 3 is not necessary. + * + * @param index An existing index object + * @return the index version + */ +GIT_EXTERN(unsigned int) git_index_version(git_index *index); + +/** + * Set index on-disk version. + * + * Valid values are 2, 3, or 4. If 2 is given, git_index_write may + * write an index with version 3 instead, if necessary to accurately + * represent the index. + * + * @param index An existing index object + * @param version The new version number + * @return 0 on success, -1 on failure + */ +GIT_EXTERN(int) git_index_set_version(git_index *index, unsigned int version); + /** * Update the contents of an existing index object in memory by reading * from the hard disk. diff --git a/src/index.c b/src/index.c index 9908ba64b..bc15959a8 100644 --- a/src/index.c +++ b/src/index.c @@ -19,6 +19,7 @@ #include "blob.h" #include "idxmap.h" #include "diff.h" +#include "varint.h" #include "git2/odb.h" #include "git2/oid.h" @@ -65,8 +66,11 @@ static int index_apply_to_wd_diff(git_index *index, int action, const git_strarr static const size_t INDEX_FOOTER_SIZE = GIT_OID_RAWSZ; static const size_t INDEX_HEADER_SIZE = 12; -static const unsigned int INDEX_VERSION_NUMBER = 2; +static const unsigned int INDEX_VERSION_NUMBER_DEFAULT = 2; +static const unsigned int INDEX_VERSION_NUMBER_LB = 2; static const unsigned int INDEX_VERSION_NUMBER_EXT = 3; +static const unsigned int INDEX_VERSION_NUMBER_COMP = 4; +static const unsigned int INDEX_VERSION_NUMBER_UB = 4; static const unsigned int INDEX_HEADER_SIG = 0x44495243; static const char INDEX_EXT_TREECACHE_SIG[] = {'T', 'R', 'E', 'E'}; @@ -434,6 +438,7 @@ int git_index_open(git_index **index_out, const char *index_path) index->entries_search = git_index_entry_srch; index->entries_search_path = index_entry_srch_path; index->reuc_search = reuc_srch; + index->version = INDEX_VERSION_NUMBER_DEFAULT; if (index_path != NULL && (error = git_index_read(index, true)) < 0) goto fail; @@ -747,6 +752,28 @@ done: return 0; } +unsigned git_index_version(git_index *index) +{ + assert(index); + + return index->version; +} + +int git_index_set_version(git_index *index, unsigned int version) +{ + assert(index); + + if (version < INDEX_VERSION_NUMBER_LB || + version > INDEX_VERSION_NUMBER_UB) { + giterr_set(GITERR_INDEX, "Invalid version number"); + return -1; + } + + index->version = version; + + return 0; +} + int git_index_write(git_index *index) { git_indexwriter writer = GIT_INDEXWRITER_INIT; @@ -2262,12 +2289,15 @@ static size_t read_entry( git_index_entry **out, git_index *index, const void *buffer, - size_t buffer_size) + size_t buffer_size, + const char **last) { size_t path_length, entry_size; const char *path_ptr; struct entry_short source; git_index_entry entry = {{0}}; + bool compressed = index->version >= INDEX_VERSION_NUMBER_COMP; + char *tmp_path = NULL; if (INDEX_FOOTER_SIZE + minimal_entry_size > buffer_size) return 0; @@ -2302,33 +2332,56 @@ static size_t read_entry( } else path_ptr = (const char *) buffer + offsetof(struct entry_short, path); - path_length = entry.flags & GIT_IDXENTRY_NAMEMASK; + if (!compressed) { + path_length = entry.flags & GIT_IDXENTRY_NAMEMASK; - /* if this is a very long string, we must find its - * real length without overflowing */ - if (path_length == 0xFFF) { - const char *path_end; + /* if this is a very long string, we must find its + * real length without overflowing */ + if (path_length == 0xFFF) { + const char *path_end; - path_end = memchr(path_ptr, '\0', buffer_size); - if (path_end == NULL) + path_end = memchr(path_ptr, '\0', buffer_size); + if (path_end == NULL) + return 0; + + path_length = path_end - path_ptr; + } + + if (entry.flags & GIT_IDXENTRY_EXTENDED) + entry_size = long_entry_size(path_length); + else + entry_size = short_entry_size(path_length); + + if (INDEX_FOOTER_SIZE + entry_size > buffer_size) return 0; - path_length = path_end - path_ptr; + entry.path = (char *)path_ptr; + } else { + size_t varint_len; + size_t shared = git_decode_varint((const unsigned char *)path_ptr, + &varint_len); + size_t len = strlen(path_ptr + varint_len); + size_t last_len = strlen(*last); + size_t tmp_path_len; + + if (varint_len == 0) + return index_error_invalid("incorrect prefix length"); + + GITERR_CHECK_ALLOC_ADD(&tmp_path_len, shared, len + 1); + tmp_path = git__malloc(tmp_path_len); + GITERR_CHECK_ALLOC(tmp_path); + memcpy(tmp_path, last, last_len); + memcpy(tmp_path + last_len, path_ptr + varint_len, len); + entry_size = long_entry_size(shared + len); + entry.path = tmp_path; } - if (entry.flags & GIT_IDXENTRY_EXTENDED) - entry_size = long_entry_size(path_length); - else - entry_size = short_entry_size(path_length); - - if (INDEX_FOOTER_SIZE + entry_size > buffer_size) - return 0; - - entry.path = (char *)path_ptr; - - if (index_entry_dup(out, index, &entry) < 0) + if (index_entry_dup(out, index, &entry) < 0) { + git__free(tmp_path); return 0; + } + git__free(tmp_path); return entry_size; } @@ -2341,8 +2394,8 @@ static int read_header(struct index_header *dest, const void *buffer) return index_error_invalid("incorrect header signature"); dest->version = ntohl(source->version); - if (dest->version != INDEX_VERSION_NUMBER_EXT && - dest->version != INDEX_VERSION_NUMBER) + if (dest->version < INDEX_VERSION_NUMBER_LB || + dest->version > INDEX_VERSION_NUMBER_UB) return index_error_invalid("incorrect header version"); dest->entry_count = ntohl(source->entry_count); @@ -2395,6 +2448,8 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size) unsigned int i; struct index_header header = { 0 }; git_oid checksum_calculated, checksum_expected; + const char **last = NULL; + const char *empty = ""; #define seek_forward(_increase) { \ if (_increase >= buffer_size) { \ @@ -2415,6 +2470,10 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size) if ((error = read_header(&header, buffer)) < 0) return error; + index->version = header.version; + if (index->version >= INDEX_VERSION_NUMBER_COMP) + last = ∅ + seek_forward(INDEX_HEADER_SIZE); assert(!index->entries.length); @@ -2427,7 +2486,7 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size) /* Parse all the entries */ for (i = 0; i < header.entry_count && buffer_size > INDEX_FOOTER_SIZE; ++i) { git_index_entry *entry; - size_t entry_size = read_entry(&entry, index, buffer, buffer_size); + size_t entry_size = read_entry(&entry, index, buffer, buffer_size, last); /* 0 bytes read means an object corruption */ if (entry_size == 0) { @@ -2518,15 +2577,31 @@ static bool is_index_extended(git_index *index) return (extended > 0); } -static int write_disk_entry(git_filebuf *file, git_index_entry *entry) +static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const char **last) { void *mem = NULL; struct entry_short *ondisk; size_t path_len, disk_size; char *path; + const char *path_start = entry->path; + size_t same_len = 0; path_len = ((struct entry_internal *)entry)->pathlen; + if (last) { + const char *last_c = *last; + + while (*path_start == *last_c) { + if (!*path_start || !*last_c) + break; + ++path_start; + ++last_c; + ++same_len; + } + path_len -= same_len; + *last = entry->path; + } + if (entry->flags & GIT_IDXENTRY_EXTENDED) disk_size = long_entry_size(path_len); else @@ -2574,7 +2649,12 @@ static int write_disk_entry(git_filebuf *file, git_index_entry *entry) else path = ondisk->path; - memcpy(path, entry->path, path_len); + if (last) { + path += git_encode_varint((unsigned char *) path, + disk_size, + path_len - same_len); + } + memcpy(path, path_start, path_len); return 0; } @@ -2585,6 +2665,8 @@ static int write_entries(git_index *index, git_filebuf *file) size_t i; git_vector case_sorted, *entries; git_index_entry *entry; + const char **last = NULL; + const char *empty = ""; /* If index->entries is sorted case-insensitively, then we need * to re-sort it case-sensitively before writing */ @@ -2596,8 +2678,11 @@ static int write_entries(git_index *index, git_filebuf *file) entries = &index->entries; } + if (index->version >= INDEX_VERSION_NUMBER_COMP) + last = ∅ + git_vector_foreach(entries, i, entry) - if ((error = write_disk_entry(file, entry)) < 0) + if ((error = write_disk_entry(file, entry, last)) < 0) break; if (index->ignore_case) @@ -2762,8 +2847,12 @@ static int write_index(git_oid *checksum, git_index *index, git_filebuf *file) assert(index && file); - is_extended = is_index_extended(index); - index_version_number = is_extended ? INDEX_VERSION_NUMBER_EXT : INDEX_VERSION_NUMBER; + if (index->version <= INDEX_VERSION_NUMBER_EXT) { + is_extended = is_index_extended(index); + index_version_number = is_extended ? INDEX_VERSION_NUMBER_EXT : INDEX_VERSION_NUMBER_LB; + } else { + index_version_number = index->version; + } header.signature = htonl(INDEX_HEADER_SIG); header.version = htonl(index_version_number); diff --git a/src/index.h b/src/index.h index 8b9b49498..9918f140d 100644 --- a/src/index.h +++ b/src/index.h @@ -46,6 +46,8 @@ struct git_index { git_vector_cmp entries_search; git_vector_cmp entries_search_path; git_vector_cmp reuc_search; + + unsigned int version; }; struct git_index_conflict_iterator { diff --git a/tests/index/version.c b/tests/index/version.c new file mode 100644 index 000000000..3fd240d3c --- /dev/null +++ b/tests/index/version.c @@ -0,0 +1,41 @@ +#include "clar_libgit2.h" +#include "index.h" + +static git_repository *g_repo = NULL; + +void test_index_version__can_write_v4(void) +{ + git_index *index; + const git_index_entry *entry; + + g_repo = cl_git_sandbox_init("filemodes"); + cl_git_pass(git_repository_index(&index, g_repo)); + + cl_assert(index->on_disk); + cl_assert(git_index_version(index) == 2); + + cl_assert(git_index_entrycount(index) == 6); + + cl_git_pass(git_index_set_version(index, 4)); + + cl_git_pass(git_index_write(index)); + git_index_free(index); + + cl_git_pass(git_repository_index(&index, g_repo)); + cl_assert(git_index_version(index) == 4); + + entry = git_index_get_bypath(index, "exec_off", 0); + cl_assert(entry); + entry = git_index_get_bypath(index, "exec_off2on_staged", 0); + cl_assert(entry); + entry = git_index_get_bypath(index, "exec_on", 0); + cl_assert(entry); + + git_index_free(index); +} + +void test_index_version__cleanup(void) +{ + cl_git_sandbox_cleanup(); + g_repo = NULL; +}