From 006548da91bfe05375ae0e786c6c13e9bad85a40 Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Fri, 29 May 2015 16:07:51 -0400 Subject: [PATCH 1/3] git__strcasecmp: treat input bytes as unsigned Treat input bytes as unsigned before doing arithmetic on them, lest we look at some non-ASCII byte (like a UTF-8 character) as a negative value and perform the comparison incorrectly. --- src/util.c | 4 ++-- tests/core/string.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/util.c b/src/util.c index 6bb7d03ee..b8d6cf58c 100644 --- a/src/util.c +++ b/src/util.c @@ -173,7 +173,7 @@ int git__strcasecmp(const char *a, const char *b) { while (*a && *b && tolower(*a) == tolower(*b)) ++a, ++b; - return (tolower(*a) - tolower(*b)); + return ((unsigned char)tolower(*a) - (unsigned char)tolower(*b)); } int git__strcasesort_cmp(const char *a, const char *b) @@ -193,7 +193,7 @@ int git__strcasesort_cmp(const char *a, const char *b) } if (*a || *b) - return tolower(*a) - tolower(*b); + return (unsigned char)tolower(*a) - (unsigned char)tolower(*b); return cmp; } diff --git a/tests/core/string.c b/tests/core/string.c index ec9575685..c6c2d95ac 100644 --- a/tests/core/string.c +++ b/tests/core/string.c @@ -39,3 +39,41 @@ void test_core_string__2(void) cl_assert(git__strcasesort_cmp("BAR", "foo") < 0); cl_assert(git__strcasesort_cmp("fooBar", "foobar") < 0); } + +void test_core_string__strcmp(void) +{ + cl_assert(git__strcmp("", "") == 0); + cl_assert(git__strcmp("foo", "foo") == 0); + cl_assert(git__strcmp("Foo", "foo") < 0); + cl_assert(git__strcmp("foo", "FOO") > 0); + cl_assert(git__strcmp("foo", "fOO") > 0); + + cl_assert(strcmp("rt\303\202of", "rt dev\302\266h") > 0); + cl_assert(strcmp("e\342\202\254ghi=", "et") > 0); + cl_assert(strcmp("rt dev\302\266h", "rt\303\202of") < 0); + cl_assert(strcmp("et", "e\342\202\254ghi=") < 0); + + cl_assert(git__strcmp("rt\303\202of", "rt dev\302\266h") > 0); + cl_assert(git__strcmp("e\342\202\254ghi=", "et") > 0); + cl_assert(git__strcmp("rt dev\302\266h", "rt\303\202of") < 0); + cl_assert(git__strcmp("et", "e\342\202\254ghi=") < 0); +} + +void test_core_string__strcasecmp(void) +{ + cl_assert(git__strcasecmp("", "") == 0); + cl_assert(git__strcasecmp("foo", "foo") == 0); + cl_assert(git__strcasecmp("foo", "Foo") == 0); + cl_assert(git__strcasecmp("foo", "FOO") == 0); + cl_assert(git__strcasecmp("foo", "fOO") == 0); + + cl_assert(strcasecmp("rt\303\202of", "rt dev\302\266h") > 0); + cl_assert(strcasecmp("e\342\202\254ghi=", "et") > 0); + cl_assert(strcasecmp("rt dev\302\266h", "rt\303\202of") < 0); + cl_assert(strcasecmp("et", "e\342\202\254ghi=") < 0); + + cl_assert(git__strcasecmp("rt\303\202of", "rt dev\302\266h") > 0); + cl_assert(git__strcasecmp("e\342\202\254ghi=", "et") > 0); + cl_assert(git__strcasecmp("rt dev\302\266h", "rt\303\202of") < 0); + cl_assert(git__strcasecmp("et", "e\342\202\254ghi=") < 0); +} From 75a4636f502908ddd406a69a4b065e29b79276da Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Fri, 29 May 2015 16:56:38 -0400 Subject: [PATCH 2/3] git__tolower: a tolower() that isn't dumb Some brain damaged tolower() implementations appear to want to take the locale into account, and this may require taking some insanely aggressive lock on the locale and slowing down what should be the most trivial of trivial calls for people who just want to downcase ASCII. --- src/config_file.c | 4 ++-- src/fnmatch.c | 10 +++++----- src/iterator.c | 8 ++++---- src/netops.c | 6 +++--- src/path.c | 2 +- src/util.c | 18 +++++++++--------- src/util.h | 9 +++++++++ 7 files changed, 33 insertions(+), 24 deletions(-) diff --git a/src/config_file.c b/src/config_file.c index d6a62b85c..52a5376bd 100644 --- a/src/config_file.c +++ b/src/config_file.c @@ -156,7 +156,7 @@ int git_config_file_normalize_section(char *start, char *end) if (end && scan >= end) break; if (isalnum(*scan)) - *scan = (char)tolower(*scan); + *scan = (char)git__tolower(*scan); else if (*scan != '-' || scan == start) return GIT_EINVALIDSPEC; } @@ -1083,7 +1083,7 @@ static int parse_section_header(struct reader *reader, char **section_out) goto fail_parse; } - name[name_length++] = (char) tolower(c); + name[name_length++] = (char)git__tolower(c); } while ((c = line[pos++]) != ']'); diff --git a/src/fnmatch.c b/src/fnmatch.c index d7899e3e6..a2945b8db 100644 --- a/src/fnmatch.c +++ b/src/fnmatch.c @@ -164,8 +164,8 @@ p_fnmatchx(const char *pattern, const char *string, int flags, size_t recurs) default: normal: if (c != *string && !((flags & FNM_CASEFOLD) && - (tolower((unsigned char)c) == - tolower((unsigned char)*string)))) + (git__tolower((unsigned char)c) == + git__tolower((unsigned char)*string)))) return (FNM_NOMATCH); ++string; break; @@ -190,7 +190,7 @@ rangematch(const char *pattern, char test, int flags, char **newp) ++pattern; if (flags & FNM_CASEFOLD) - test = (char)tolower((unsigned char)test); + test = (char)git__tolower((unsigned char)test); /* * A right bracket shall lose its special meaning and represent @@ -207,7 +207,7 @@ rangematch(const char *pattern, char test, int flags, char **newp) if (c == '/' && (flags & FNM_PATHNAME)) return (RANGE_NOMATCH); if ((flags & FNM_CASEFOLD)) - c = (char)tolower((unsigned char)c); + c = (char)git__tolower((unsigned char)c); if (*pattern == '-' && (c2 = *(pattern+1)) != EOS && c2 != ']') { pattern += 2; @@ -216,7 +216,7 @@ rangematch(const char *pattern, char test, int flags, char **newp) if (c2 == EOS) return (RANGE_ERROR); if (flags & FNM_CASEFOLD) - c2 = (char)tolower((unsigned char)c2); + c2 = (char)git__tolower((unsigned char)c2); if (c <= test && test <= c2) ok = 1; } else if (c == test) diff --git a/src/iterator.c b/src/iterator.c index c5c5fd7ce..52b4cd724 100644 --- a/src/iterator.c +++ b/src/iterator.c @@ -1401,10 +1401,10 @@ GIT_INLINE(bool) workdir_path_is_dotgit(const git_buf *path) if (path->ptr[len - 1] == '/') len--; - if (tolower(path->ptr[len - 1]) != 't' || - tolower(path->ptr[len - 2]) != 'i' || - tolower(path->ptr[len - 3]) != 'g' || - tolower(path->ptr[len - 4]) != '.') + if (git__tolower(path->ptr[len - 1]) != 't' || + git__tolower(path->ptr[len - 2]) != 'i' || + git__tolower(path->ptr[len - 3]) != 'g' || + git__tolower(path->ptr[len - 4]) != '.') return false; return (len == 4 || path->ptr[len - 5] == '/'); diff --git a/src/netops.c b/src/netops.c index 6047cf1ac..5e8075597 100644 --- a/src/netops.c +++ b/src/netops.c @@ -84,7 +84,7 @@ void gitno_consume_n(gitno_buffer *buf, size_t cons) int gitno__match_host(const char *pattern, const char *host) { for (;;) { - char c = tolower(*pattern++); + char c = git__tolower(*pattern++); if (c == '\0') return *host ? -1 : 0; @@ -102,7 +102,7 @@ int gitno__match_host(const char *pattern, const char *host) */ while(*host) { - char h = tolower(*host); + char h = git__tolower(*host); if (c == h) return gitno__match_host(pattern, host++); if (h == '.') @@ -112,7 +112,7 @@ int gitno__match_host(const char *pattern, const char *host) return -1; } - if (c != tolower(*host++)) + if (c != git__tolower(*host++)) return -1; } diff --git a/src/path.c b/src/path.c index 81b4d51df..d79153599 100644 --- a/src/path.c +++ b/src/path.c @@ -1471,7 +1471,7 @@ static int32_t next_hfs_char(const char **in, size_t *len) * the ASCII range, which is perfectly fine, because the * git folder name can only be composed of ascii characters */ - return tolower(codepoint); + return git__tolower(codepoint); } return 0; /* NULL byte -- end of string */ } diff --git a/src/util.c b/src/util.c index b8d6cf58c..c62826420 100644 --- a/src/util.c +++ b/src/util.c @@ -171,9 +171,9 @@ int git__strcmp(const char *a, const char *b) int git__strcasecmp(const char *a, const char *b) { - while (*a && *b && tolower(*a) == tolower(*b)) + while (*a && *b && git__tolower(*a) == git__tolower(*b)) ++a, ++b; - return ((unsigned char)tolower(*a) - (unsigned char)tolower(*b)); + return ((unsigned char)git__tolower(*a) - (unsigned char)git__tolower(*b)); } int git__strcasesort_cmp(const char *a, const char *b) @@ -182,7 +182,7 @@ int git__strcasesort_cmp(const char *a, const char *b) while (*a && *b) { if (*a != *b) { - if (tolower(*a) != tolower(*b)) + if (git__tolower(*a) != git__tolower(*b)) break; /* use case in sort order even if not in equivalence */ if (!cmp) @@ -193,7 +193,7 @@ int git__strcasesort_cmp(const char *a, const char *b) } if (*a || *b) - return (unsigned char)tolower(*a) - (unsigned char)tolower(*b); + return (unsigned char)git__tolower(*a) - (unsigned char)git__tolower(*b); return cmp; } @@ -212,8 +212,8 @@ int git__strncasecmp(const char *a, const char *b, size_t sz) int al, bl; do { - al = (unsigned char)tolower(*a); - bl = (unsigned char)tolower(*b); + al = (unsigned char)git__tolower(*a); + bl = (unsigned char)git__tolower(*b); ++a, ++b; } while (--sz && al && al == bl); @@ -225,7 +225,7 @@ void git__strntolower(char *str, size_t len) size_t i; for (i = 0; i < len; ++i) { - str[i] = (char) tolower(str[i]); + str[i] = (char)git__tolower(str[i]); } } @@ -255,8 +255,8 @@ int git__prefixncmp_icase(const char *str, size_t str_n, const char *prefix) int s, p; while(str_n--) { - s = (unsigned char)tolower(*str++); - p = (unsigned char)tolower(*prefix++); + s = (unsigned char)git__tolower(*str++); + p = (unsigned char)git__tolower(*prefix++); if (s != p) return s - p; diff --git a/src/util.h b/src/util.h index be6534580..b2abbe6a6 100644 --- a/src/util.h +++ b/src/util.h @@ -275,6 +275,15 @@ extern char *git__strsep(char **end, const char *sep); extern void git__strntolower(char *str, size_t len); extern void git__strtolower(char *str); +#ifdef GIT_WIN32 +GIT_INLINE(int) git__tolower(int c) +{ + return (c >= 'A' && c <= 'Z') ? (c + 32) : c; +} +#else +# define git__tolower(a) tolower(a) +#endif + GIT_INLINE(const char *) git__next_line(const char *s) { while (*s && *s != '\n') s++; From bad33a5dfb4bc0b2dab5d8c9736508429170b24a Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Fri, 29 May 2015 17:39:11 -0400 Subject: [PATCH 3/3] git__tolower: test that some non-ASCII downcasing isn't --- tests/core/string.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/core/string.c b/tests/core/string.c index c6c2d95ac..90e8fa027 100644 --- a/tests/core/string.c +++ b/tests/core/string.c @@ -52,11 +52,13 @@ void test_core_string__strcmp(void) cl_assert(strcmp("e\342\202\254ghi=", "et") > 0); cl_assert(strcmp("rt dev\302\266h", "rt\303\202of") < 0); cl_assert(strcmp("et", "e\342\202\254ghi=") < 0); + cl_assert(strcmp("\303\215", "\303\255") < 0); cl_assert(git__strcmp("rt\303\202of", "rt dev\302\266h") > 0); cl_assert(git__strcmp("e\342\202\254ghi=", "et") > 0); cl_assert(git__strcmp("rt dev\302\266h", "rt\303\202of") < 0); cl_assert(git__strcmp("et", "e\342\202\254ghi=") < 0); + cl_assert(git__strcmp("\303\215", "\303\255") < 0); } void test_core_string__strcasecmp(void) @@ -71,9 +73,11 @@ void test_core_string__strcasecmp(void) cl_assert(strcasecmp("e\342\202\254ghi=", "et") > 0); cl_assert(strcasecmp("rt dev\302\266h", "rt\303\202of") < 0); cl_assert(strcasecmp("et", "e\342\202\254ghi=") < 0); + cl_assert(strcasecmp("\303\215", "\303\255") < 0); cl_assert(git__strcasecmp("rt\303\202of", "rt dev\302\266h") > 0); cl_assert(git__strcasecmp("e\342\202\254ghi=", "et") > 0); cl_assert(git__strcasecmp("rt dev\302\266h", "rt\303\202of") < 0); cl_assert(git__strcasecmp("et", "e\342\202\254ghi=") < 0); + cl_assert(git__strcasecmp("\303\215", "\303\255") < 0); }