diff --git a/include/git2/blob.h b/include/git2/blob.h index a68c78b5a..30055b614 100644 --- a/include/git2/blob.h +++ b/include/git2/blob.h @@ -183,6 +183,19 @@ GIT_EXTERN(int) git_blob_create_fromchunks( */ GIT_EXTERN(int) git_blob_create_frombuffer(git_oid *oid, git_repository *repo, const void *buffer, size_t len); +/** + * Determine if the blob content is most certainly binary or not. + * + * The heuristic used to guess if a file is binary is taken from core git: + * Searching for NUL bytes and looking for a reasonable ratio of printable + * to non-printable characters among the first 4000 bytes. + * + * @param blob The blob which content should be analyzed + * @return 1 if the content of the blob is detected + * as binary; 0 otherwise. + */ +GIT_EXTERN(int) git_blob_is_binary(git_blob *blob); + /** @} */ GIT_END_DECL #endif diff --git a/src/blob.c b/src/blob.c index b168df137..811bd850f 100644 --- a/src/blob.c +++ b/src/blob.c @@ -296,3 +296,15 @@ cleanup: git__free(content); return error; } + +int git_blob_is_binary(git_blob *blob) +{ + git_buf content; + + assert(blob); + + content.ptr = blob->odb_object->raw.data; + content.size = min(blob->odb_object->raw.len, 4000); + + return git_buf_text_is_binary(&content); +} diff --git a/src/indexer.c b/src/indexer.c index 2fb780412..b9240f30b 100644 --- a/src/indexer.c +++ b/src/indexer.c @@ -201,7 +201,7 @@ static void hash_header(git_hash_ctx *ctx, git_off_t len, git_otype type) char buffer[64]; size_t hdrlen; - hdrlen = git_odb__format_object_header(buffer, sizeof(buffer), len, type); + hdrlen = git_odb__format_object_header(buffer, sizeof(buffer), (size_t)len, type); git_hash_update(ctx, buffer, hdrlen); } @@ -269,11 +269,11 @@ static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, git_off_t start, crc = crc32(0L, Z_NULL, 0); while (size) { - ptr = git_mwindow_open(mwf, &w, start, size, &left); + ptr = git_mwindow_open(mwf, &w, start, (size_t)size, &left); if (ptr == NULL) return -1; - len = min(left, size); + len = min(left, (size_t)size); crc = crc32(crc, ptr, len); size -= len; start += len; diff --git a/src/win32/posix_w32.c b/src/win32/posix_w32.c index 3a4398bbd..2ce92b9e5 100644 --- a/src/win32/posix_w32.c +++ b/src/win32/posix_w32.c @@ -59,7 +59,6 @@ static int do_lstat( { WIN32_FILE_ATTRIBUTE_DATA fdata; wchar_t fbuf[GIT_WIN_PATH], lastch; - DWORD last_error; int flen; flen = git__utf8_to_16(fbuf, GIT_WIN_PATH, file_name); diff --git a/tests-clar/diff/blob.c b/tests-clar/diff/blob.c index d7fdba0e6..8300cb716 100644 --- a/tests-clar/diff/blob.c +++ b/tests-clar/diff/blob.c @@ -335,3 +335,15 @@ void test_diff_blob__checks_options_version_too_high(void) err = giterr_last(); cl_assert_equal_i(GITERR_INVALID, err->klass); } + +void test_diff_blob__can_correctly_detect_a_binary_blob_as_binary(void) +{ + /* alien.png */ + cl_assert_equal_i(true, git_blob_is_binary(alien)); +} + +void test_diff_blob__can_correctly_detect_a_textual_blob_as_non_binary(void) +{ + /* tests/resources/attr/root_test4.txt */ + cl_assert_equal_i(false, git_blob_is_binary(d)); +}