From 0161e096a30912e0721cf3e6446595d3400d55b7 Mon Sep 17 00:00:00 2001 From: Sven Strickroth Date: Thu, 13 Nov 2014 19:30:47 +0100 Subject: [PATCH 1/2] Make binary detection work similar to vanilla git Main change: Don't treat chars > 128 as non-printable (common in UTF-8 files) Signed-off-by: Sven Strickroth --- CHANGELOG.md | 3 +++ src/buf_text.c | 5 ++++- tests/core/buffer.c | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 96bd9a16e..e1c02f965 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,9 @@ v0.22 + 1 ### Changes or improvements +* Updated binary identification in CRLF filtering to avoid false positives in + UTF-8 files. + * Rename and copy detection is enabled for small files. ### API additions diff --git a/src/buf_text.c b/src/buf_text.c index cead599f4..cb3661edb 100644 --- a/src/buf_text.c +++ b/src/buf_text.c @@ -191,7 +191,10 @@ bool git_buf_text_is_binary(const git_buf *buf) while (scan < end) { unsigned char c = *scan++; - if (c > 0x1F && c < 0x7F) + /* Printable characters are those above SPACE (0x1F) excluding DEL, + * and including BS, ESC and FF. + */ + if ((c > 0x1F && c != 127) || c == '\b' || c == '\033' || c == '\014') printable++; else if (c == '\0') return true; diff --git a/tests/core/buffer.c b/tests/core/buffer.c index 87dec4607..d28aa218f 100644 --- a/tests/core/buffer.c +++ b/tests/core/buffer.c @@ -830,7 +830,7 @@ void test_core_buffer__classify_with_utf8(void) cl_assert(!git_buf_text_contains_nul(&b)); b.ptr = data1; b.size = b.asize = data1len; - cl_assert(git_buf_text_is_binary(&b)); + cl_assert(!git_buf_text_is_binary(&b)); cl_assert(!git_buf_text_contains_nul(&b)); b.ptr = data2; b.size = b.asize = data2len; From b4c6a9da9391ed9525010438ced9d125b84c6a3f Mon Sep 17 00:00:00 2001 From: Linquize Date: Sun, 26 Oct 2014 05:45:23 +0100 Subject: [PATCH 2/2] Add files and tests with many UTF-8 chars and few UTF-8 chars --- tests/checkout/crlf.c | 25 ++++++++++++++++++ tests/filter/crlf.h | 5 ++++ .../0e/052888828a954ca17e5882638e3c6a083e75c0 | Bin 0 -> 107 bytes .../9a/6c3533fef19abd6eec8e61206b5c51982b80d9 | Bin 0 -> 58 bytes .../a2/34455d62297f1856c4603686150c59fcb0aafe | Bin 0 -> 189 bytes .../c3/e11722855ff260bd27418988ac1467c4e9e73a | Bin 0 -> 261 bytes .../cd/574f5a2baa4c79504f8837b730fa0b11defe99 | Bin 0 -> 62 bytes .../f4/d25b796d86387205a5498175d66e91d1e5006a | Bin 0 -> 106 bytes tests/resources/crlf/.gitted/refs/heads/utf8 | Bin 41 -> 41 bytes 9 files changed, 30 insertions(+) create mode 100644 tests/resources/crlf/.gitted/objects/0e/052888828a954ca17e5882638e3c6a083e75c0 create mode 100644 tests/resources/crlf/.gitted/objects/9a/6c3533fef19abd6eec8e61206b5c51982b80d9 create mode 100644 tests/resources/crlf/.gitted/objects/a2/34455d62297f1856c4603686150c59fcb0aafe create mode 100644 tests/resources/crlf/.gitted/objects/c3/e11722855ff260bd27418988ac1467c4e9e73a create mode 100644 tests/resources/crlf/.gitted/objects/cd/574f5a2baa4c79504f8837b730fa0b11defe99 create mode 100644 tests/resources/crlf/.gitted/objects/f4/d25b796d86387205a5498175d66e91d1e5006a diff --git a/tests/checkout/crlf.c b/tests/checkout/crlf.c index 496f83d5d..b6d4e949a 100644 --- a/tests/checkout/crlf.c +++ b/tests/checkout/crlf.c @@ -106,6 +106,31 @@ void test_checkout_crlf__all_crlf_autocrlf_true(void) check_file_contents("./crlf/all-crlf", ALL_CRLF_TEXT_RAW); } +void test_checkout_crlf__detect_crlf_autocrlf_true_utf8(void) +{ + git_checkout_options opts = GIT_CHECKOUT_OPTIONS_INIT; + opts.checkout_strategy = GIT_CHECKOUT_SAFE_CREATE; + + cl_repo_set_bool(g_repo, "core.autocrlf", true); + + git_repository_set_head(g_repo, "refs/heads/utf8", NULL, NULL); + git_checkout_head(g_repo, &opts); + + if (GIT_EOL_NATIVE == GIT_EOL_LF) + { + check_file_contents("./crlf/few-utf8-chars-lf.txt", FEW_UTF8_LF_RAW); + check_file_contents("./crlf/many-utf8-chars-lf.txt", MANY_UTF8_LF_RAW); + } + else + { + check_file_contents("./crlf/few-utf8-chars-lf.txt", FEW_UTF8_CRLF_RAW); + check_file_contents("./crlf/many-utf8-chars-lf.txt", MANY_UTF8_CRLF_RAW); + } + + check_file_contents("./crlf/few-utf8-chars-crlf.txt", FEW_UTF8_CRLF_RAW); + check_file_contents("./crlf/many-utf8-chars-crlf.txt", MANY_UTF8_CRLF_RAW); +} + void test_checkout_crlf__autocrlf_true_index_size_is_filtered_size(void) { git_index *index; diff --git a/tests/filter/crlf.h b/tests/filter/crlf.h index 9cb98ad4c..786edfc96 100644 --- a/tests/filter/crlf.h +++ b/tests/filter/crlf.h @@ -22,4 +22,9 @@ #define MORE_CRLF_TEXT_AS_LF "crlf\ncrlf\nlf\ncrlf\ncrlf\n" #define MORE_LF_TEXT_AS_LF "lf\nlf\ncrlf\nlf\nlf\n" +#define FEW_UTF8_CRLF_RAW "\xe2\x9a\xbdThe rest is ASCII01.\r\nThe rest is ASCII02.\r\nThe rest is ASCII03.\r\nThe rest is ASCII04.\r\nThe rest is ASCII05.\r\nThe rest is ASCII06.\r\nThe rest is ASCII07.\r\nThe rest is ASCII08.\r\nThe rest is ASCII09.\r\nThe rest is ASCII10.\r\nThe rest is ASCII11.\r\nThe rest is ASCII12.\r\nThe rest is ASCII13.\r\nThe rest is ASCII14.\r\nThe rest is ASCII15.\r\nThe rest is ASCII16.\r\nThe rest is ASCII17.\r\nThe rest is ASCII18.\r\nThe rest is ASCII19.\r\nThe rest is ASCII20.\r\nThe rest is ASCII21.\r\nThe rest is ASCII22.\r\n" +#define FEW_UTF8_LF_RAW "\xe2\x9a\xbdThe rest is ASCII01.\nThe rest is ASCII02.\nThe rest is ASCII03.\nThe rest is ASCII04.\nThe rest is ASCII05.\nThe rest is ASCII06.\nThe rest is ASCII07.\nThe rest is ASCII08.\nThe rest is ASCII09.\nThe rest is ASCII10.\nThe rest is ASCII11.\nThe rest is ASCII12.\nThe rest is ASCII13.\nThe rest is ASCII14.\nThe rest is ASCII15.\nThe rest is ASCII16.\nThe rest is ASCII17.\nThe rest is ASCII18.\nThe rest is ASCII19.\nThe rest is ASCII20.\nThe rest is ASCII21.\nThe rest is ASCII22.\n" +#define MANY_UTF8_CRLF_RAW "Lets sing!\r\n\xe2\x99\xab\xe2\x99\xaa\xe2\x99\xac\xe2\x99\xa9\r\nEat food\r\n\xf0\x9f\x8d\x85\xf0\x9f\x8d\x95\r\n" +#define MANY_UTF8_LF_RAW "Lets sing!\n\xe2\x99\xab\xe2\x99\xaa\xe2\x99\xac\xe2\x99\xa9\nEat food\n\xf0\x9f\x8d\x85\xf0\x9f\x8d\x95\n" + #endif diff --git a/tests/resources/crlf/.gitted/objects/0e/052888828a954ca17e5882638e3c6a083e75c0 b/tests/resources/crlf/.gitted/objects/0e/052888828a954ca17e5882638e3c6a083e75c0 new file mode 100644 index 0000000000000000000000000000000000000000..746143f85fbceb0f63fe7c4f6adfec3383afa43a GIT binary patch literal 107 zcmb0{37MT5%ubAoQ@dTYz(`>&5B1@+fN|hH%s!!aS|GO;z*0+-u_f^i9 z_Mgt_vz(XKyG;6*0N2yVU*X+Ri>$72ZFS2Hu+8#b#Uz@4X!ZxSJ)AH~EMF}D+wQ&n HOg;So;|(x` literal 0 HcmV?d00001 diff --git a/tests/resources/crlf/.gitted/objects/9a/6c3533fef19abd6eec8e61206b5c51982b80d9 b/tests/resources/crlf/.gitted/objects/9a/6c3533fef19abd6eec8e61206b5c51982b80d9 new file mode 100644 index 0000000000000000000000000000000000000000..78fc8aeb70ae6140b597378f374170705ab52781 GIT binary patch literal 58 zcmV-A0LA}!0ZYosPf{>3V(>{VDOM=X%u83~dNgzOqnWE7&0O6@VgGux+g zbfhoUX(|o0vzxXsLo%$C*E%2$gLX3Z+FKR65GX0@e6rlJHzdAk;du=sVzjZdgF{RK zy)zgcby8xI=!ksJ28|YUZ@Dh;S>~U6`DNIT|LgdqT9Ghr|^FfcPQQAo_m(M>MONz*MYNwZ+!TAt_bt2+DoLA&j0 z3oUajWVgn2ASr|?t2vkcw6XZn2Km~JD(6=1Gi<&%#}le7EwvnMlx}iHVo@>JXuXn( z5(Yk2jgF?SsXhzqBASx>Y_d4)N)KSyl9LA3@a0l;Wp0~A5$jUV#?otf6E8kx$bxFf zP0XvrYRTDf|0wNMK9vFf9p>8&esK%l`!^GZ9;hv|a!if?eVny7?@eE#LUv5x4DE)S zFdg|tsYpTdika6Y=Z&I5=9^_RPVHK{C6r%O5LqQGZ0>YC-YI^sNltZ|_N%&sUt@e8 L3F-g3WAI5WDOM=X%u83~<$5%8^`n`q9?e|yXy!^@F4x2og|z(q U6ke_m^Ltx?cq%U!08z~r+MU}ScK`qY literal 0 HcmV?d00001 diff --git a/tests/resources/crlf/.gitted/objects/f4/d25b796d86387205a5498175d66e91d1e5006a b/tests/resources/crlf/.gitted/objects/f4/d25b796d86387205a5498175d66e91d1e5006a new file mode 100644 index 0000000000000000000000000000000000000000..792b1659d5dccc83f0a48e66e1eb975587e3051b GIT binary patch literal 106 zcmb