Make binary detection work similar to vanilla git

Main change: Don't treat chars > 128 as non-printable (common in UTF-8 files)

Signed-off-by: Sven Strickroth <email@cs-ware.de>
This commit is contained in:
Sven Strickroth 2014-11-13 19:30:47 +01:00 committed by Edward Thomson
parent 2136240dbd
commit 0161e096a3
3 changed files with 8 additions and 2 deletions

View File

@ -3,6 +3,9 @@ v0.22 + 1
### Changes or improvements
* Updated binary identification in CRLF filtering to avoid false positives in
UTF-8 files.
* Rename and copy detection is enabled for small files.
### API additions

View File

@ -191,7 +191,10 @@ bool git_buf_text_is_binary(const git_buf *buf)
while (scan < end) {
unsigned char c = *scan++;
if (c > 0x1F && c < 0x7F)
/* Printable characters are those above SPACE (0x1F) excluding DEL,
* and including BS, ESC and FF.
*/
if ((c > 0x1F && c != 127) || c == '\b' || c == '\033' || c == '\014')
printable++;
else if (c == '\0')
return true;

View File

@ -830,7 +830,7 @@ void test_core_buffer__classify_with_utf8(void)
cl_assert(!git_buf_text_contains_nul(&b));
b.ptr = data1; b.size = b.asize = data1len;
cl_assert(git_buf_text_is_binary(&b));
cl_assert(!git_buf_text_is_binary(&b));
cl_assert(!git_buf_text_contains_nul(&b));
b.ptr = data2; b.size = b.asize = data2len;