mirror of
https://git.proxmox.com/git/libgit2
synced 2025-07-06 03:04:58 +00:00
Match binary file check of core git in diff
Core git just looks for NUL bytes in files when deciding about is-binary inside diff (although it uses a better algorithm in checkout, when deciding if CRLF conversion should be done). Libgit2 was using the better algorithm in both places, but that is causing some confusion. For now, this makes diff just look for NUL bytes to decide if a file is binary by content in diff.
This commit is contained in:
parent
d0b14cea0e
commit
0d65acade8
@ -109,6 +109,11 @@ bool git_buf_text_is_binary(const git_buf *buf)
|
||||
return ((printable >> 7) < nonprintable);
|
||||
}
|
||||
|
||||
bool git_buf_text_contains_nul(const git_buf *buf)
|
||||
{
|
||||
return (strnlen(buf->ptr, buf->size) != buf->size);
|
||||
}
|
||||
|
||||
int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf, size_t offset)
|
||||
{
|
||||
const char *ptr;
|
||||
|
@ -70,6 +70,14 @@ extern int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strs);
|
||||
*/
|
||||
extern bool git_buf_text_is_binary(const git_buf *buf);
|
||||
|
||||
/**
|
||||
* Check quickly if buffer contains a NUL byte
|
||||
*
|
||||
* @param buf Buffer to check
|
||||
* @return true if buffer contains a NUL byte
|
||||
*/
|
||||
extern bool git_buf_text_contains_nul(const git_buf *buf);
|
||||
|
||||
/**
|
||||
* Check if a buffer begins with a UTF BOM
|
||||
*
|
||||
|
@ -142,7 +142,12 @@ static int diff_delta_is_binary_by_content(
|
||||
GIT_UNUSED(ctxt);
|
||||
|
||||
if ((file->flags & KNOWN_BINARY_FLAGS) == 0) {
|
||||
if (git_buf_text_is_binary(&search))
|
||||
/* TODO: provide encoding / binary detection callbacks that can
|
||||
* be UTF-8 aware, etc. For now, instead of trying to be smart,
|
||||
* let's just use the simple NUL-byte detection that core git uses.
|
||||
*/
|
||||
/* previously was: if (git_buf_text_is_binary(&search)) */
|
||||
if (git_buf_text_contains_nul(&search))
|
||||
file->flags |= GIT_DIFF_FILE_BINARY;
|
||||
else
|
||||
file->flags |= GIT_DIFF_FILE_NOT_BINARY;
|
||||
|
@ -704,3 +704,26 @@ void test_core_buffer__base64(void)
|
||||
|
||||
git_buf_free(&buf);
|
||||
}
|
||||
|
||||
void test_core_buffer__classify_with_utf8(void)
|
||||
{
|
||||
char *data0 = "Simple text\n";
|
||||
size_t data0len = 12;
|
||||
char *data1 = "Is that UTF-8 data I see…\nYep!\n";
|
||||
size_t data1len = 31;
|
||||
char *data2 = "Internal NUL!!!\000\n\nI see you!\n";
|
||||
size_t data2len = 29;
|
||||
git_buf b;
|
||||
|
||||
b.ptr = data0; b.size = b.asize = data0len;
|
||||
cl_assert(!git_buf_text_is_binary(&b));
|
||||
cl_assert(!git_buf_text_contains_nul(&b));
|
||||
|
||||
b.ptr = data1; b.size = b.asize = data1len;
|
||||
cl_assert(git_buf_text_is_binary(&b));
|
||||
cl_assert(!git_buf_text_contains_nul(&b));
|
||||
|
||||
b.ptr = data2; b.size = b.asize = data2len;
|
||||
cl_assert(git_buf_text_is_binary(&b));
|
||||
cl_assert(git_buf_text_contains_nul(&b));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user