Match binary file check of core git in diff

Core git just looks for NUL bytes in files when deciding about
is-binary inside diff (although it uses a better algorithm in
checkout, when deciding if CRLF conversion should be done).
Libgit2 was using the better algorithm in both places, but that
is causing some confusion.  For now, this makes diff just look
for NUL bytes to decide if a file is binary by content in diff.
This commit is contained in:
Russell Belfer 2013-01-11 11:24:26 -08:00
parent d0b14cea0e
commit 0d65acade8
4 changed files with 42 additions and 1 deletions

View File

@ -109,6 +109,11 @@ bool git_buf_text_is_binary(const git_buf *buf)
return ((printable >> 7) < nonprintable);
}
bool git_buf_text_contains_nul(const git_buf *buf)
{
return (strnlen(buf->ptr, buf->size) != buf->size);
}
int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf, size_t offset)
{
const char *ptr;

View File

@ -70,6 +70,14 @@ extern int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strs);
*/
extern bool git_buf_text_is_binary(const git_buf *buf);
/**
* Check quickly if buffer contains a NUL byte
*
* @param buf Buffer to check
* @return true if buffer contains a NUL byte
*/
extern bool git_buf_text_contains_nul(const git_buf *buf);
/**
* Check if a buffer begins with a UTF BOM
*

View File

@ -142,7 +142,12 @@ static int diff_delta_is_binary_by_content(
GIT_UNUSED(ctxt);
if ((file->flags & KNOWN_BINARY_FLAGS) == 0) {
if (git_buf_text_is_binary(&search))
/* TODO: provide encoding / binary detection callbacks that can
* be UTF-8 aware, etc. For now, instead of trying to be smart,
* let's just use the simple NUL-byte detection that core git uses.
*/
/* previously was: if (git_buf_text_is_binary(&search)) */
if (git_buf_text_contains_nul(&search))
file->flags |= GIT_DIFF_FILE_BINARY;
else
file->flags |= GIT_DIFF_FILE_NOT_BINARY;

View File

@ -704,3 +704,26 @@ void test_core_buffer__base64(void)
git_buf_free(&buf);
}
void test_core_buffer__classify_with_utf8(void)
{
char *data0 = "Simple text\n";
size_t data0len = 12;
char *data1 = "Is that UTF-8 data I see…\nYep!\n";
size_t data1len = 31;
char *data2 = "Internal NUL!!!\000\n\nI see you!\n";
size_t data2len = 29;
git_buf b;
b.ptr = data0; b.size = b.asize = data0len;
cl_assert(!git_buf_text_is_binary(&b));
cl_assert(!git_buf_text_contains_nul(&b));
b.ptr = data1; b.size = b.asize = data1len;
cl_assert(git_buf_text_is_binary(&b));
cl_assert(!git_buf_text_contains_nul(&b));
b.ptr = data2; b.size = b.asize = data2len;
cl_assert(git_buf_text_is_binary(&b));
cl_assert(git_buf_text_contains_nul(&b));
}