mirror of
https://git.proxmox.com/git/libgit2
synced 2025-05-08 15:52:02 +00:00
path: Use UTF8 iteration for HFS chars
This commit is contained in:
parent
11d67b754d
commit
8e35527de2
124
src/path.c
124
src/path.c
@ -1282,93 +1282,57 @@ GIT_INLINE(bool) verify_dospath(
|
|||||||
component[last] != ':');
|
component[last] != ':');
|
||||||
}
|
}
|
||||||
|
|
||||||
GIT_INLINE(bool) verify_dotgit_hfs(const char *component, size_t len)
|
static int32_t next_hfs_char(const char **in, size_t *len)
|
||||||
{
|
{
|
||||||
const unsigned char *c;
|
while (*len) {
|
||||||
int git = 0, ign = 0;
|
int32_t codepoint;
|
||||||
unsigned char one, two;
|
int cp_len = git__utf8_iterate((const uint8_t *)(*in), (int)(*len), &codepoint);
|
||||||
|
if (cp_len < 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
while (len) {
|
(*in) += cp_len;
|
||||||
switch (*(c = (const unsigned char *)component++)) {
|
(*len) -= cp_len;
|
||||||
case '.':
|
|
||||||
if (ign || git++ != 0)
|
|
||||||
return true;
|
|
||||||
break;
|
|
||||||
case 'g':
|
|
||||||
case 'G':
|
|
||||||
if (ign || git++ != 1)
|
|
||||||
return true;
|
|
||||||
break;
|
|
||||||
case 'i':
|
|
||||||
case 'I':
|
|
||||||
if (ign || git++ != 2)
|
|
||||||
return true;
|
|
||||||
break;
|
|
||||||
case 't':
|
|
||||||
case 'T':
|
|
||||||
if (ign || git++ != 3)
|
|
||||||
return true;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 0xe2:
|
/* these code points are ignored completely */
|
||||||
case 0xef:
|
switch (codepoint) {
|
||||||
if (ign++ != 0)
|
case 0x200c: /* ZERO WIDTH NON-JOINER */
|
||||||
return true;
|
case 0x200d: /* ZERO WIDTH JOINER */
|
||||||
one = *c;
|
case 0x200e: /* LEFT-TO-RIGHT MARK */
|
||||||
break;
|
case 0x200f: /* RIGHT-TO-LEFT MARK */
|
||||||
|
case 0x202a: /* LEFT-TO-RIGHT EMBEDDING */
|
||||||
case 0x80:
|
case 0x202b: /* RIGHT-TO-LEFT EMBEDDING */
|
||||||
case 0x81:
|
case 0x202c: /* POP DIRECTIONAL FORMATTING */
|
||||||
if (ign++ != 1 || one != 0xe2)
|
case 0x202d: /* LEFT-TO-RIGHT OVERRIDE */
|
||||||
return true;
|
case 0x202e: /* RIGHT-TO-LEFT OVERRIDE */
|
||||||
two = *c;
|
case 0x206a: /* INHIBIT SYMMETRIC SWAPPING */
|
||||||
break;
|
case 0x206b: /* ACTIVATE SYMMETRIC SWAPPING */
|
||||||
|
case 0x206c: /* INHIBIT ARABIC FORM SHAPING */
|
||||||
case 0xbb:
|
case 0x206d: /* ACTIVATE ARABIC FORM SHAPING */
|
||||||
if (ign++ != 1 || one != 0xef)
|
case 0x206e: /* NATIONAL DIGIT SHAPES */
|
||||||
return true;
|
case 0x206f: /* NOMINAL DIGIT SHAPES */
|
||||||
two = *c;
|
case 0xfeff: /* ZERO WIDTH NO-BREAK SPACE */
|
||||||
break;
|
continue;
|
||||||
|
|
||||||
case 0x8c:
|
|
||||||
case 0x8d:
|
|
||||||
case 0x8e:
|
|
||||||
case 0x8f:
|
|
||||||
if (ign != 2 || two != 0x80)
|
|
||||||
return true;
|
|
||||||
ign = 0;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 0xaa:
|
|
||||||
case 0xab:
|
|
||||||
case 0xac:
|
|
||||||
case 0xad:
|
|
||||||
case 0xae:
|
|
||||||
if (ign != 2 || (two != 0x80 && two != 0x81))
|
|
||||||
return true;
|
|
||||||
ign = 0;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 0xaf:
|
|
||||||
if (ign != 2 || two != 0x81)
|
|
||||||
return true;
|
|
||||||
ign = 0;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 0xbf:
|
|
||||||
if (ign != 2 || two != 0xbb)
|
|
||||||
return true;
|
|
||||||
ign = 0;
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
len--;
|
/* fold into lowercase -- this will only fold characters in
|
||||||
|
* the ASCII range, which is perfectly fine, because the
|
||||||
|
* git folder name can only be composed of ascii characters
|
||||||
|
*/
|
||||||
|
return tolower(codepoint);
|
||||||
}
|
}
|
||||||
|
return 0; /* NULL byte -- end of string */
|
||||||
|
}
|
||||||
|
|
||||||
return (ign || git != 4);
|
static bool verify_dotgit_hfs(const char *path, size_t len)
|
||||||
|
{
|
||||||
|
if (next_hfs_char(&path, &len) != '.' ||
|
||||||
|
next_hfs_char(&path, &len) != 'g' ||
|
||||||
|
next_hfs_char(&path, &len) != 'i' ||
|
||||||
|
next_hfs_char(&path, &len) != 't' ||
|
||||||
|
next_hfs_char(&path, &len) != 0)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
GIT_INLINE(bool) verify_char(unsigned char c, unsigned int flags)
|
GIT_INLINE(bool) verify_char(unsigned char c, unsigned int flags)
|
||||||
|
76
src/util.c
76
src/util.c
@ -664,3 +664,79 @@ void git__insertsort_r(
|
|||||||
if (freeswap)
|
if (freeswap)
|
||||||
git__free(swapel);
|
git__free(swapel);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const int8_t utf8proc_utf8class[256] = {
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||||
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||||
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||||
|
4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0
|
||||||
|
};
|
||||||
|
|
||||||
|
int git__utf8_charlen(const uint8_t *str, int str_len)
|
||||||
|
{
|
||||||
|
int length, i;
|
||||||
|
|
||||||
|
length = utf8proc_utf8class[str[0]];
|
||||||
|
if (!length)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if (str_len >= 0 && length > str_len)
|
||||||
|
return -str_len;
|
||||||
|
|
||||||
|
for (i = 1; i < length; i++) {
|
||||||
|
if ((str[i] & 0xC0) != 0x80)
|
||||||
|
return -i;
|
||||||
|
}
|
||||||
|
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
|
||||||
|
int git__utf8_iterate(const uint8_t *str, int str_len, int32_t *dst)
|
||||||
|
{
|
||||||
|
int length;
|
||||||
|
int32_t uc = -1;
|
||||||
|
|
||||||
|
*dst = -1;
|
||||||
|
length = git__utf8_charlen(str, str_len);
|
||||||
|
if (length < 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
switch (length) {
|
||||||
|
case 1:
|
||||||
|
uc = str[0];
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F);
|
||||||
|
if (uc < 0x80) uc = -1;
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6)
|
||||||
|
+ (str[2] & 0x3F);
|
||||||
|
if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000) ||
|
||||||
|
(uc >= 0xFDD0 && uc < 0xFDF0)) uc = -1;
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12)
|
||||||
|
+ ((str[2] & 0x3F) << 6) + (str[3] & 0x3F);
|
||||||
|
if (uc < 0x10000 || uc >= 0x110000) uc = -1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (uc < 0 || ((uc & 0xFFFF) >= 0xFFFE))
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
*dst = uc;
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
11
src/util.h
11
src/util.h
@ -367,6 +367,17 @@ extern int git__date_rfc2822_fmt(char *out, size_t len, const git_time *date);
|
|||||||
*/
|
*/
|
||||||
extern size_t git__unescape(char *str);
|
extern size_t git__unescape(char *str);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Iterate through an UTF-8 string, yielding one
|
||||||
|
* codepoint at a time.
|
||||||
|
*
|
||||||
|
* @param str current position in the string
|
||||||
|
* @param str_len size left in the string; -1 if the string is NULL-terminated
|
||||||
|
* @param dst pointer where to store the current codepoint
|
||||||
|
* @return length in bytes of the read codepoint; -1 if the codepoint was invalid
|
||||||
|
*/
|
||||||
|
extern int git__utf8_iterate(const uint8_t *str, int str_len, int32_t *dst);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Safely zero-out memory, making sure that the compiler
|
* Safely zero-out memory, making sure that the compiler
|
||||||
* doesn't optimize away the operation.
|
* doesn't optimize away the operation.
|
||||||
|
Loading…
Reference in New Issue
Block a user