From d6d34cd0f49621c6f5ea992820c4564f5e968b73 Mon Sep 17 00:00:00 2001 From: crazymaster Date: Sat, 13 Jul 2013 02:10:16 +0900 Subject: [PATCH 1/7] Add test for multi-byte characters --- tests-clar/object/blob/filter.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests-clar/object/blob/filter.c b/tests-clar/object/blob/filter.c index 042bddab7..50b4af4f4 100644 --- a/tests-clar/object/blob/filter.c +++ b/tests-clar/object/blob/filter.c @@ -5,7 +5,7 @@ #include "buf_text.h" static git_repository *g_repo = NULL; -#define NUM_TEST_OBJECTS 8 +#define NUM_TEST_OBJECTS 9 static git_oid g_oids[NUM_TEST_OBJECTS]; static const char *g_raw[NUM_TEST_OBJECTS] = { "", @@ -15,9 +15,10 @@ static const char *g_raw[NUM_TEST_OBJECTS] = { "foo\nbar\rboth\r\nreversed\n\ragain\nproblems\r", "123\n\000\001\002\003\004abc\255\254\253\r\n", "\xEF\xBB\xBFThis is UTF-8\n", + "\xEF\xBB\xBFほげほげ\r\nほげほげ\r\n", "\xFE\xFF\x00T\x00h\x00i\x00s\x00!" }; -static git_off_t g_len[NUM_TEST_OBJECTS] = { -1, -1, -1, -1, -1, 17, -1, 12 }; +static git_off_t g_len[NUM_TEST_OBJECTS] = { -1, -1, -1, -1, -1, 17, -1, -1, 12 }; static git_buf_text_stats g_stats[NUM_TEST_OBJECTS] = { { 0, 0, 0, 0, 0, 0, 0 }, { 0, 0, 0, 2, 0, 6, 0 }, @@ -26,6 +27,7 @@ static git_buf_text_stats g_stats[NUM_TEST_OBJECTS] = { { 0, 0, 4, 4, 1, 31, 0 }, { 0, 1, 1, 2, 1, 9, 5 }, { GIT_BOM_UTF8, 0, 0, 1, 0, 16, 0 }, + { GIT_BOM_UTF8, 0, 2, 2, 2, 27, 0 }, { GIT_BOM_UTF16_BE, 5, 0, 0, 0, 7, 5 }, }; static git_buf g_crlf_filtered[NUM_TEST_OBJECTS] = { @@ -36,6 +38,7 @@ static git_buf g_crlf_filtered[NUM_TEST_OBJECTS] = { { "foo\nbar\rboth\nreversed\n\ragain\nproblems\r", 0, 38 }, { "123\n\000\001\002\003\004abc\255\254\253\n", 0, 16 }, { "\xEF\xBB\xBFThis is UTF-8\n", 0, 17 }, + { "\xEF\xBB\xBFほげほげ\nほげほげ\n", 0, 29 }, { "\xFE\xFF\x00T\x00h\x00i\x00s\x00!", 0, 12 } }; From 6550565af387119b080a65d71f77f1261752595b Mon Sep 17 00:00:00 2001 From: crazymaster Date: Sat, 13 Jul 2013 03:02:00 +0900 Subject: [PATCH 2/7] Fix gather_stats --- src/buf_text.c | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/src/buf_text.c b/src/buf_text.c index 443454b5f..bc8d04680 100644 --- a/src/buf_text.c +++ b/src/buf_text.c @@ -261,29 +261,34 @@ bool git_buf_text_gather_stats( /* Counting loop */ while (scan < end) { unsigned char c = *scan++; - - if ((c > 0x1F && c < 0x7F) || c > 0x9f) - stats->printable++; - else switch (c) { - case '\0': - stats->nul++; - stats->nonprintable++; - break; - case '\n': - stats->lf++; - break; - case '\r': - stats->cr++; - if (scan < end && *scan == '\n') - stats->crlf++; - break; - case '\t': case '\f': case '\v': case '\b': case 0x1b: /*ESC*/ + if (c == '\r') { + stats->cr++; + if (scan < end && *scan == '\n') + stats->crlf++; + continue; + } + if (c == '\n') { + stats->lf++; + continue; + } + if (c == 127) + /* DEL */ + stats->nonprintable++; + else if (c < 32) { + switch (c) { + /* BS, HT, ESC and FF */ + case '\b': case '\t': case '\033': case '\014': stats->printable++; break; + case 0: + stats->nul++; + /* fall through */ default: stats->nonprintable++; - break; } + } + else + stats->printable++; } return (stats->nul > 0 || From a91e4d6b21e141c2abc76b65b2d4c91d5d3e03cc Mon Sep 17 00:00:00 2001 From: crazymaster Date: Mon, 15 Jul 2013 07:19:42 +0900 Subject: [PATCH 3/7] Replace Japanese characters with the encoded hexadecimal values --- tests-clar/object/blob/filter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests-clar/object/blob/filter.c b/tests-clar/object/blob/filter.c index 50b4af4f4..b5e9bb298 100644 --- a/tests-clar/object/blob/filter.c +++ b/tests-clar/object/blob/filter.c @@ -15,7 +15,7 @@ static const char *g_raw[NUM_TEST_OBJECTS] = { "foo\nbar\rboth\r\nreversed\n\ragain\nproblems\r", "123\n\000\001\002\003\004abc\255\254\253\r\n", "\xEF\xBB\xBFThis is UTF-8\n", - "\xEF\xBB\xBFほげほげ\r\nほげほげ\r\n", + "\xEF\xBB\xBF0xE30x810x0xBB0xE30x810x920xE30x810x0xBB0xE30x810x92\r\n0xE30x810x0xBB0xE30x810x920xE30x810x0xBB0xE30x810x92\r\n", "\xFE\xFF\x00T\x00h\x00i\x00s\x00!" }; static git_off_t g_len[NUM_TEST_OBJECTS] = { -1, -1, -1, -1, -1, 17, -1, -1, 12 }; @@ -38,7 +38,7 @@ static git_buf g_crlf_filtered[NUM_TEST_OBJECTS] = { { "foo\nbar\rboth\nreversed\n\ragain\nproblems\r", 0, 38 }, { "123\n\000\001\002\003\004abc\255\254\253\n", 0, 16 }, { "\xEF\xBB\xBFThis is UTF-8\n", 0, 17 }, - { "\xEF\xBB\xBFほげほげ\nほげほげ\n", 0, 29 }, + { "\xEF\xBB\xBF0xE30x810x0xBB0xE30x810x920xE30x810x0xBB0xE30x810x92\n0xE30x810x0xBB0xE30x810x920xE30x810x0xBB0xE30x810x92\n", 0, 29 }, { "\xFE\xFF\x00T\x00h\x00i\x00s\x00!", 0, 12 } }; From 19bee769d49467704f4d8ee36965c548cb40e3c6 Mon Sep 17 00:00:00 2001 From: crazymaster Date: Mon, 15 Jul 2013 07:39:16 +0900 Subject: [PATCH 4/7] Revert "Replace Japanese characters with the encoded hexadecimal values" This reverts commit a91e4d6b21e141c2abc76b65b2d4c91d5d3e03cc. --- tests-clar/object/blob/filter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests-clar/object/blob/filter.c b/tests-clar/object/blob/filter.c index b5e9bb298..50b4af4f4 100644 --- a/tests-clar/object/blob/filter.c +++ b/tests-clar/object/blob/filter.c @@ -15,7 +15,7 @@ static const char *g_raw[NUM_TEST_OBJECTS] = { "foo\nbar\rboth\r\nreversed\n\ragain\nproblems\r", "123\n\000\001\002\003\004abc\255\254\253\r\n", "\xEF\xBB\xBFThis is UTF-8\n", - "\xEF\xBB\xBF0xE30x810x0xBB0xE30x810x920xE30x810x0xBB0xE30x810x92\r\n0xE30x810x0xBB0xE30x810x920xE30x810x0xBB0xE30x810x92\r\n", + "\xEF\xBB\xBFほげほげ\r\nほげほげ\r\n", "\xFE\xFF\x00T\x00h\x00i\x00s\x00!" }; static git_off_t g_len[NUM_TEST_OBJECTS] = { -1, -1, -1, -1, -1, 17, -1, -1, 12 }; @@ -38,7 +38,7 @@ static git_buf g_crlf_filtered[NUM_TEST_OBJECTS] = { { "foo\nbar\rboth\nreversed\n\ragain\nproblems\r", 0, 38 }, { "123\n\000\001\002\003\004abc\255\254\253\n", 0, 16 }, { "\xEF\xBB\xBFThis is UTF-8\n", 0, 17 }, - { "\xEF\xBB\xBF0xE30x810x0xBB0xE30x810x920xE30x810x0xBB0xE30x810x92\n0xE30x810x0xBB0xE30x810x920xE30x810x0xBB0xE30x810x92\n", 0, 29 }, + { "\xEF\xBB\xBFほげほげ\nほげほげ\n", 0, 29 }, { "\xFE\xFF\x00T\x00h\x00i\x00s\x00!", 0, 12 } }; From b74d4478df8c6d62c96b3bd067ae1987209583a6 Mon Sep 17 00:00:00 2001 From: crazymaster Date: Mon, 15 Jul 2013 07:41:39 +0900 Subject: [PATCH 5/7] Fix the initial line --- src/buf_text.c | 41 ++++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/src/buf_text.c b/src/buf_text.c index bc8d04680..472339def 100644 --- a/src/buf_text.c +++ b/src/buf_text.c @@ -261,34 +261,29 @@ bool git_buf_text_gather_stats( /* Counting loop */ while (scan < end) { unsigned char c = *scan++; - if (c == '\r') { - stats->cr++; - if (scan < end && *scan == '\n') - stats->crlf++; - continue; - } - if (c == '\n') { - stats->lf++; - continue; - } - if (c == 127) - /* DEL */ - stats->nonprintable++; - else if (c < 32) { - switch (c) { - /* BS, HT, ESC and FF */ - case '\b': case '\t': case '\033': case '\014': + + if (c > 0x1F && c != 0x7F) + stats->printable++; + else switch (c) { + case '\0': + stats->nul++; + stats->nonprintable++; + break; + case '\n': + stats->lf++; + break; + case '\r': + stats->cr++; + if (scan < end && *scan == '\n') + stats->crlf++; + break; + case '\t': case '\f': case '\v': case '\b': case 0x1b: /*ESC*/ stats->printable++; break; - case 0: - stats->nul++; - /* fall through */ default: stats->nonprintable++; + break; } - } - else - stats->printable++; } return (stats->nul > 0 || From 2185dd6f99474b69287e6f3cd2e4a24c3a75155b Mon Sep 17 00:00:00 2001 From: crazymaster Date: Mon, 15 Jul 2013 07:59:04 +0900 Subject: [PATCH 6/7] Fix typo --- tests-clar/object/blob/filter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests-clar/object/blob/filter.c b/tests-clar/object/blob/filter.c index 50b4af4f4..6293046d3 100644 --- a/tests-clar/object/blob/filter.c +++ b/tests-clar/object/blob/filter.c @@ -15,7 +15,7 @@ static const char *g_raw[NUM_TEST_OBJECTS] = { "foo\nbar\rboth\r\nreversed\n\ragain\nproblems\r", "123\n\000\001\002\003\004abc\255\254\253\r\n", "\xEF\xBB\xBFThis is UTF-8\n", - "\xEF\xBB\xBFほげほげ\r\nほげほげ\r\n", + "\xEF\xBB\xBF0xE30x810xBB0xE30x810x920xE30x810xBB0xE30x810x92\r\n0xE30x810xBB0xE30x810x920xE30x810xBB0xE30x810x92\r\n", "\xFE\xFF\x00T\x00h\x00i\x00s\x00!" }; static git_off_t g_len[NUM_TEST_OBJECTS] = { -1, -1, -1, -1, -1, 17, -1, -1, 12 }; @@ -38,7 +38,7 @@ static git_buf g_crlf_filtered[NUM_TEST_OBJECTS] = { { "foo\nbar\rboth\nreversed\n\ragain\nproblems\r", 0, 38 }, { "123\n\000\001\002\003\004abc\255\254\253\n", 0, 16 }, { "\xEF\xBB\xBFThis is UTF-8\n", 0, 17 }, - { "\xEF\xBB\xBFほげほげ\nほげほげ\n", 0, 29 }, + { "\xEF\xBB\xBF0xE30x810xBB0xE30x810x920xE30x810xBB0xE30x810x92\n0xE30x810xBB0xE30x810x920xE30x810xBB0xE30x810x92\n", 0, 29 }, { "\xFE\xFF\x00T\x00h\x00i\x00s\x00!", 0, 12 } }; From d0b25d9dff363976eea92509c359ca8e08aaebb5 Mon Sep 17 00:00:00 2001 From: crazymaster Date: Mon, 15 Jul 2013 08:14:00 +0900 Subject: [PATCH 7/7] Fix --- tests-clar/object/blob/filter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests-clar/object/blob/filter.c b/tests-clar/object/blob/filter.c index 6293046d3..2b3954d9c 100644 --- a/tests-clar/object/blob/filter.c +++ b/tests-clar/object/blob/filter.c @@ -15,7 +15,7 @@ static const char *g_raw[NUM_TEST_OBJECTS] = { "foo\nbar\rboth\r\nreversed\n\ragain\nproblems\r", "123\n\000\001\002\003\004abc\255\254\253\r\n", "\xEF\xBB\xBFThis is UTF-8\n", - "\xEF\xBB\xBF0xE30x810xBB0xE30x810x920xE30x810xBB0xE30x810x92\r\n0xE30x810xBB0xE30x810x920xE30x810xBB0xE30x810x92\r\n", + "\xEF\xBB\xBF\xE3\x81\xBB\xE3\x81\x92\xE3\x81\xBB\xE3\x81\x92\r\n\xE3\x81\xBB\xE3\x81\x92\xE3\x81\xBB\xE3\x81\x92\r\n", "\xFE\xFF\x00T\x00h\x00i\x00s\x00!" }; static git_off_t g_len[NUM_TEST_OBJECTS] = { -1, -1, -1, -1, -1, 17, -1, -1, 12 }; @@ -38,7 +38,7 @@ static git_buf g_crlf_filtered[NUM_TEST_OBJECTS] = { { "foo\nbar\rboth\nreversed\n\ragain\nproblems\r", 0, 38 }, { "123\n\000\001\002\003\004abc\255\254\253\n", 0, 16 }, { "\xEF\xBB\xBFThis is UTF-8\n", 0, 17 }, - { "\xEF\xBB\xBF0xE30x810xBB0xE30x810x920xE30x810xBB0xE30x810x92\n0xE30x810xBB0xE30x810x920xE30x810xBB0xE30x810x92\n", 0, 29 }, + { "\xEF\xBB\xBF\xE3\x81\xBB\xE3\x81\x92\xE3\x81\xBB\xE3\x81\x92\n\xE3\x81\xBB\xE3\x81\x92\xE3\x81\xBB\xE3\x81\x92\n", 0, 29 }, { "\xFE\xFF\x00T\x00h\x00i\x00s\x00!", 0, 12 } };