mirror of
https://git.proxmox.com/git/libgit2
synced 2026-01-04 15:17:48 +00:00
UTF-8 changes yo
This commit is contained in:
parent
319ad0ba20
commit
3b73a03497
@ -29,6 +29,98 @@ void gitwin_set_utf8(void)
|
||||
_active_codepage = CP_UTF8;
|
||||
}
|
||||
|
||||
#define U16_LEAD(c) (wchar_t)(((c)>>10)+0xd7c0)
|
||||
#define U16_TRAIL(c) (wchar_t)(((c)&0x3ff)|0xdc00)
|
||||
|
||||
void git__utf8_to_16(wchar_t *dest, const char *src, size_t srcLength)
|
||||
{
|
||||
wchar_t *pDest = dest;
|
||||
uint32_t ch;
|
||||
const uint8_t* pSrc = (uint8_t*) src;
|
||||
const uint8_t *pSrcLimit = pSrc + srcLength;
|
||||
|
||||
assert(dest && src && srcLength > 0);
|
||||
|
||||
if ((pSrcLimit - pSrc) >= 4) {
|
||||
pSrcLimit -= 3; /* temporarily reduce pSrcLimit */
|
||||
|
||||
/* in this loop, we can always access at least 4 bytes, up to pSrc+3 */
|
||||
do {
|
||||
ch = *pSrc++;
|
||||
if(ch < 0xc0) {
|
||||
/*
|
||||
* ASCII, or a trail byte in lead position which is treated like
|
||||
* a single-byte sequence for better character boundary
|
||||
* resynchronization after illegal sequences.
|
||||
*/
|
||||
*pDest++=(wchar_t)ch;
|
||||
} else if(ch < 0xe0) { /* U+0080..U+07FF */
|
||||
/* 0x3080 = (0xc0 << 6) + 0x80 */
|
||||
*pDest++ = (wchar_t)((ch << 6) + *pSrc++ - 0x3080);
|
||||
} else if(ch < 0xf0) { /* U+0800..U+FFFF */
|
||||
/* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
|
||||
/* 0x2080 = (0x80 << 6) + 0x80 */
|
||||
ch = (ch << 12) + (*pSrc++ << 6);
|
||||
*pDest++ = (wchar_t)(ch + *pSrc++ - 0x2080);
|
||||
} else /* f0..f4 */ { /* U+10000..U+10FFFF */
|
||||
/* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
|
||||
ch = (ch << 18) + (*pSrc++ << 12);
|
||||
ch += *pSrc++ << 6;
|
||||
ch += *pSrc++ - 0x3c82080;
|
||||
*(pDest++) = U16_LEAD(ch);
|
||||
*(pDest++) = U16_TRAIL(ch);
|
||||
}
|
||||
} while(pSrc < pSrcLimit);
|
||||
|
||||
pSrcLimit += 3; /* restore original pSrcLimit */
|
||||
}
|
||||
|
||||
while(pSrc < pSrcLimit) {
|
||||
ch = *pSrc++;
|
||||
if(ch < 0xc0) {
|
||||
/*
|
||||
* ASCII, or a trail byte in lead position which is treated like
|
||||
* a single-byte sequence for better character boundary
|
||||
* resynchronization after illegal sequences.
|
||||
*/
|
||||
*pDest++=(wchar_t)ch;
|
||||
continue;
|
||||
} else if(ch < 0xe0) { /* U+0080..U+07FF */
|
||||
if(pSrc < pSrcLimit) {
|
||||
/* 0x3080 = (0xc0 << 6) + 0x80 */
|
||||
*pDest++ = (wchar_t)((ch << 6) + *pSrc++ - 0x3080);
|
||||
continue;
|
||||
}
|
||||
} else if(ch < 0xf0) { /* U+0800..U+FFFF */
|
||||
if((pSrcLimit - pSrc) >= 2) {
|
||||
/* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
|
||||
/* 0x2080 = (0x80 << 6) + 0x80 */
|
||||
ch = (ch << 12) + (*pSrc++ << 6);
|
||||
*pDest++ = (wchar_t)(ch + *pSrc++ - 0x2080);
|
||||
pSrc += 3;
|
||||
continue;
|
||||
}
|
||||
} else /* f0..f4 */ { /* U+10000..U+10FFFF */
|
||||
if((pSrcLimit - pSrc) >= 3) {
|
||||
/* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
|
||||
ch = (ch << 18) + (*pSrc++ << 12);
|
||||
ch += *pSrc++ << 6;
|
||||
ch += *pSrc++ - 0x3c82080;
|
||||
*(pDest++) = U16_LEAD(ch);
|
||||
*(pDest++) = U16_TRAIL(ch);
|
||||
pSrc += 4;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* truncated character at the end */
|
||||
*pDest++ = 0xfffd;
|
||||
break;
|
||||
}
|
||||
|
||||
*pDest++ = 0x0;
|
||||
}
|
||||
|
||||
wchar_t* gitwin_to_utf16(const char* str)
|
||||
{
|
||||
wchar_t* ret;
|
||||
|
||||
Loading…
Reference in New Issue
Block a user