diff --git a/src/buf_text.c b/src/buf_text.c new file mode 100644 index 000000000..3c5024e6c --- /dev/null +++ b/src/buf_text.c @@ -0,0 +1,208 @@ +/* + * Copyright (C) 2009-2012 the libgit2 contributors + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#include "buf_text.h" + +int git_buf_text_puts_escaped( + git_buf *buf, + const char *string, + const char *esc_chars, + const char *esc_with) +{ + const char *scan; + size_t total = 0, esc_len = strlen(esc_with), count; + + if (!string) + return 0; + + for (scan = string; *scan; ) { + /* count run of non-escaped characters */ + count = strcspn(scan, esc_chars); + total += count; + scan += count; + /* count run of escaped characters */ + count = strspn(scan, esc_chars); + total += count * (esc_len + 1); + scan += count; + } + + if (git_buf_grow(buf, buf->size + total + 1) < 0) + return -1; + + for (scan = string; *scan; ) { + count = strcspn(scan, esc_chars); + + memmove(buf->ptr + buf->size, scan, count); + scan += count; + buf->size += count; + + for (count = strspn(scan, esc_chars); count > 0; --count) { + /* copy escape sequence */ + memmove(buf->ptr + buf->size, esc_with, esc_len); + buf->size += esc_len; + /* copy character to be escaped */ + buf->ptr[buf->size] = *scan; + buf->size++; + scan++; + } + } + + buf->ptr[buf->size] = '\0'; + + return 0; +} + +void git_buf_text_unescape(git_buf *buf) +{ + buf->size = git__unescape(buf->ptr); +} + +int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strings) +{ + size_t i; + const char *str, *pfx; + + git_buf_clear(buf); + + if (!strings || !strings->count) + return 0; + + /* initialize common prefix to first string */ + if (git_buf_sets(buf, strings->strings[0]) < 0) + return -1; + + /* go through the rest of the strings, truncating to shared prefix */ + for (i = 1; i < strings->count; ++i) { + + for (str = strings->strings[i], pfx = buf->ptr; + *str && *str == *pfx; str++, pfx++) + /* scanning */; + + git_buf_truncate(buf, pfx - buf->ptr); + + if (!buf->size) + break; + } + + return 0; +} + +bool git_buf_text_is_binary(const git_buf *buf) +{ + const char *scan = buf->ptr, *end = buf->ptr + buf->size; + int printable = 0, nonprintable = 0; + + while (scan < end) { + unsigned char c = *scan++; + + if (c > 0x1F && c < 0x7F) + printable++; + else if (c == '\0') + return true; + else if (!git__isspace(c)) + nonprintable++; + } + + return ((printable >> 7) < nonprintable); +} + +int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf, size_t offset) +{ + const char *ptr; + size_t len; + + /* need at least 2 bytes after offset to look for any BOM */ + if (buf->size < offset + 2) + return 0; + + ptr = buf->ptr + offset; + len = buf->size - offset; + + switch (*ptr++) { + case 0: + if (len >= 4 && ptr[0] == 0 && ptr[1] == '\xFE' && ptr[2] == '\xFF') { + *bom = GIT_BOM_UTF32_BE; + return 4; + } + break; + case '\xEF': + if (len >= 3 && ptr[0] == '\xBB' && ptr[1] == '\xBF') { + *bom = GIT_BOM_UTF8; + return 3; + } + break; + case '\xFE': + if (*ptr == '\xFF') { + *bom = GIT_BOM_UTF16_BE; + return 2; + } + break; + case '\xFF': + if (*ptr != '\xFE') + break; + if (len >= 4 && ptr[1] == 0 && ptr[2] == 0) { + *bom = GIT_BOM_UTF32_LE; + return 4; + } else { + *bom = GIT_BOM_UTF16_LE; + return 2; + } + break; + default: + break; + } + + return 0; +} + +bool git_buf_text_gather_stats( + git_buf_text_stats *stats, const git_buf *buf, bool skip_bom) +{ + const char *scan = buf->ptr, *end = buf->ptr + buf->size; + int skip; + + memset(stats, 0, sizeof(*stats)); + + /* BOM detection */ + skip = git_buf_text_detect_bom(&stats->bom, buf, 0); + if (skip_bom) + scan += skip; + + /* Ignore EOF character */ + if (buf->size > 0 && end[-1] == '\032') + end--; + + /* Counting loop */ + while (scan < end) { + unsigned char c = *scan++; + + if ((c > 0x1F && c < 0x7F) || c > 0x9f) + stats->printable++; + else switch (c) { + case '\0': + stats->nul++; + stats->nonprintable++; + break; + case '\n': + stats->lf++; + break; + case '\r': + stats->cr++; + if (scan < end && *scan == '\n') + stats->crlf++; + break; + case '\t': case '\f': case '\v': case '\b': case 0x1b: /*ESC*/ + stats->printable++; + break; + default: + stats->nonprintable++; + break; + } + } + + return (stats->nul > 0 || + ((stats->printable >> 7) < stats->nonprintable)); +} diff --git a/src/buf_text.h b/src/buf_text.h new file mode 100644 index 000000000..c22499bfe --- /dev/null +++ b/src/buf_text.h @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2009-2012 the libgit2 contributors + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_buf_text_h__ +#define INCLUDE_buf_text_h__ + +#include "buffer.h" + +typedef enum { + GIT_BOM_NONE = 0, + GIT_BOM_UTF8 = 1, + GIT_BOM_UTF16_LE = 2, + GIT_BOM_UTF16_BE = 3, + GIT_BOM_UTF32_LE = 4, + GIT_BOM_UTF32_BE = 5 +} git_bom_t; + +typedef struct { + git_bom_t bom; /* BOM found at head of text */ + unsigned int nul, cr, lf, crlf; /* NUL, CR, LF and CRLF counts */ + unsigned int printable, nonprintable; /* These are just approximations! */ +} git_buf_text_stats; + +/** + * Append string to buffer, prefixing each character from `esc_chars` with + * `esc_with` string. + * + * @param buf Buffer to append data to + * @param string String to escape and append + * @param esc_chars Characters to be escaped + * @param esc_with String to insert in from of each found character + * @return 0 on success, <0 on failure (probably allocation problem) + */ +extern int git_buf_text_puts_escaped( + git_buf *buf, + const char *string, + const char *esc_chars, + const char *esc_with); + +/** + * Append string escaping characters that are regex special + */ +GIT_INLINE(int) git_buf_text_puts_escape_regex(git_buf *buf, const char *string) +{ + return git_buf_text_puts_escaped(buf, string, "^.[]$()|*+?{}\\", "\\"); +} + +/** + * Unescape all characters in a buffer in place + * + * I.e. remove backslashes + */ +extern void git_buf_text_unescape(git_buf *buf); + +/** + * Fill buffer with the common prefix of a array of strings + * + * Buffer will be set to empty if there is no common prefix + */ +extern int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strs); + +/** + * Check quickly if buffer looks like it contains binary data + * + * @param buf Buffer to check + * @return true if buffer looks like non-text data + */ +extern bool git_buf_text_is_binary(const git_buf *buf); + +/** + * Check if a buffer begins with a UTF BOM + * + * @param bom Set to the type of BOM detected or GIT_BOM_NONE + * @param buf Buffer in which to check the first bytes for a BOM + * @param offset Offset into buffer to look for BOM + * @return Number of bytes of BOM data (or 0 if no BOM found) + */ +extern int git_buf_text_detect_bom( + git_bom_t *bom, const git_buf *buf, size_t offset); + +/** + * Gather stats for a piece of text + * + * Fill the `stats` structure with counts of unreadable characters, carriage + * returns, etc, so it can be used in heuristics. This automatically skips + * a trailing EOF (\032 character). Also it will look for a BOM at the + * start of the text and can be told to skip that as well. + * + * @param stats Structure to be filled in + * @param buf Text to process + * @param skip_bom Exclude leading BOM from stats if true + * @return Does the buffer heuristically look like binary data + */ +extern bool git_buf_text_gather_stats( + git_buf_text_stats *stats, const git_buf *buf, bool skip_bom); + +#endif diff --git a/src/buffer.c b/src/buffer.c index e55b0a230..0e2c005df 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -31,15 +31,7 @@ void git_buf_init(git_buf *buf, size_t initial_size) git_buf_grow(buf, initial_size); } -int git_buf_grow(git_buf *buf, size_t target_size) -{ - int error = git_buf_try_grow(buf, target_size); - if (error != 0) - buf->ptr = git_buf__oom; - return error; -} - -int git_buf_try_grow(git_buf *buf, size_t target_size) +int git_buf_try_grow(git_buf *buf, size_t target_size, bool mark_oom) { char *new_ptr; size_t new_size; @@ -67,8 +59,12 @@ int git_buf_try_grow(git_buf *buf, size_t target_size) new_size = (new_size + 7) & ~7; new_ptr = git__realloc(new_ptr, new_size); - if (!new_ptr) + + if (!new_ptr) { + if (mark_oom) + buf->ptr = git_buf__oom; return -1; + } buf->asize = new_size; buf->ptr = new_ptr; @@ -141,51 +137,6 @@ int git_buf_puts(git_buf *buf, const char *string) return git_buf_put(buf, string, strlen(string)); } -int git_buf_puts_escaped( - git_buf *buf, const char *string, const char *esc_chars, const char *esc_with) -{ - const char *scan; - size_t total = 0, esc_len = strlen(esc_with), count; - - if (!string) - return 0; - - for (scan = string; *scan; ) { - /* count run of non-escaped characters */ - count = strcspn(scan, esc_chars); - total += count; - scan += count; - /* count run of escaped characters */ - count = strspn(scan, esc_chars); - total += count * (esc_len + 1); - scan += count; - } - - ENSURE_SIZE(buf, buf->size + total + 1); - - for (scan = string; *scan; ) { - count = strcspn(scan, esc_chars); - - memmove(buf->ptr + buf->size, scan, count); - scan += count; - buf->size += count; - - for (count = strspn(scan, esc_chars); count > 0; --count) { - /* copy escape sequence */ - memmove(buf->ptr + buf->size, esc_with, esc_len); - buf->size += esc_len; - /* copy character to be escaped */ - buf->ptr[buf->size] = *scan; - buf->size++; - scan++; - } - } - - buf->ptr[buf->size] = '\0'; - - return 0; -} - static const char b64str[64] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; @@ -497,59 +448,6 @@ int git_buf_cmp(const git_buf *a, const git_buf *b) (a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0; } -int git_buf_common_prefix(git_buf *buf, const git_strarray *strings) -{ - size_t i; - const char *str, *pfx; - - git_buf_clear(buf); - - if (!strings || !strings->count) - return 0; - - /* initialize common prefix to first string */ - if (git_buf_sets(buf, strings->strings[0]) < 0) - return -1; - - /* go through the rest of the strings, truncating to shared prefix */ - for (i = 1; i < strings->count; ++i) { - - for (str = strings->strings[i], pfx = buf->ptr; - *str && *str == *pfx; str++, pfx++) - /* scanning */; - - git_buf_truncate(buf, pfx - buf->ptr); - - if (!buf->size) - break; - } - - return 0; -} - -bool git_buf_is_binary(const git_buf *buf) -{ - size_t i; - int printable = 0, nonprintable = 0; - - for (i = 0; i < buf->size; i++) { - unsigned char c = buf->ptr[i]; - if (c > 0x1F && c < 0x7F) - printable++; - else if (c == '\0') - return true; - else if (!git__isspace(c)) - nonprintable++; - } - - return ((printable >> 7) < nonprintable); -} - -void git_buf_unescape(git_buf *buf) -{ - buf->size = git__unescape(buf->ptr); -} - int git_buf_splice( git_buf *buf, size_t where, diff --git a/src/buffer.h b/src/buffer.h index a2896d486..379216bfc 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -27,30 +27,35 @@ extern char git_buf__oom[]; * For the cases where GIT_BUF_INIT cannot be used to do static * initialization. */ -void git_buf_init(git_buf *buf, size_t initial_size); - -/** - * Grow the buffer to hold at least `target_size` bytes. - * - * If the allocation fails, this will return an error and the buffer - * will be marked as invalid for future operations. The existing - * contents of the buffer will be preserved however. - * @return 0 on success or -1 on failure - */ -int git_buf_grow(git_buf *buf, size_t target_size); +extern void git_buf_init(git_buf *buf, size_t initial_size); /** * Attempt to grow the buffer to hold at least `target_size` bytes. * - * This is just like `git_buf_grow` except that even if the allocation - * fails, the git_buf will still be left in a valid state. + * If the allocation fails, this will return an error. If mark_oom is true, + * this will mark the buffer as invalid for future operations; if false, + * existing buffer content will be preserved, but calling code must handle + * that buffer was not expanded. */ -int git_buf_try_grow(git_buf *buf, size_t target_size); +extern int git_buf_try_grow(git_buf *buf, size_t target_size, bool mark_oom); -void git_buf_free(git_buf *buf); -void git_buf_swap(git_buf *buf_a, git_buf *buf_b); -char *git_buf_detach(git_buf *buf); -void git_buf_attach(git_buf *buf, char *ptr, size_t asize); +/** + * Grow the buffer to hold at least `target_size` bytes. + * + * If the allocation fails, this will return an error and the buffer will be + * marked as invalid for future operations, invaliding contents. + * + * @return 0 on success or -1 on failure + */ +GIT_INLINE(int) git_buf_grow(git_buf *buf, size_t target_size) +{ + return git_buf_try_grow(buf, target_size, true); +} + +extern void git_buf_free(git_buf *buf); +extern void git_buf_swap(git_buf *buf_a, git_buf *buf_b); +extern char *git_buf_detach(git_buf *buf); +extern void git_buf_attach(git_buf *buf, char *ptr, size_t asize); /** * Test if there have been any reallocation failures with this git_buf. @@ -91,18 +96,6 @@ void git_buf_rtruncate_at_char(git_buf *path, char separator); int git_buf_join_n(git_buf *buf, char separator, int nbuf, ...); int git_buf_join(git_buf *buf, char separator, const char *str_a, const char *str_b); -/** - * Copy string into buf prefixing every character that is contained in the - * esc_chars string with the esc_with string. - */ -int git_buf_puts_escaped( - git_buf *buf, const char *string, const char *esc_chars, const char *esc_with); - -GIT_INLINE(int) git_buf_puts_escape_regex(git_buf *buf, const char *string) -{ - return git_buf_puts_escaped(buf, string, "^.[]$()|*+?{}\\", "\\"); -} - /** * Join two strings as paths, inserting a slash between as needed. * @return 0 on success, -1 on failure @@ -146,15 +139,6 @@ void git_buf_rtrim(git_buf *buf); int git_buf_cmp(const git_buf *a, const git_buf *b); -/* Fill buf with the common prefix of a array of strings */ -int git_buf_common_prefix(git_buf *buf, const git_strarray *strings); - -/* Check if buffer looks like it contains binary data */ -bool git_buf_is_binary(const git_buf *buf); - -/* Unescape all characters in a buffer */ -void git_buf_unescape(git_buf *buf); - /* Write data as base64 encoded in buffer */ int git_buf_put_base64(git_buf *buf, const char *data, size_t len); diff --git a/src/config.c b/src/config.c index 5ee0d39ff..6347f7df7 100644 --- a/src/config.c +++ b/src/config.c @@ -10,6 +10,7 @@ #include "config.h" #include "git2/config.h" #include "vector.h" +#include "buf_text.h" #if GIT_WIN32 # include #endif @@ -803,7 +804,7 @@ int git_config_rename_section( int error = -1; struct rename_data data; - git_buf_puts_escape_regex(&pattern, old_section_name); + git_buf_text_puts_escape_regex(&pattern, old_section_name); git_buf_puts(&pattern, "\\..+"); if (git_buf_oom(&pattern)) goto cleanup; diff --git a/src/config_file.c b/src/config_file.c index 7cc812aa4..354a91986 100644 --- a/src/config_file.c +++ b/src/config_file.c @@ -10,6 +10,7 @@ #include "fileops.h" #include "filebuf.h" #include "buffer.h" +#include "buf_text.h" #include "git2/config.h" #include "git2/types.h" #include "strmap.h" @@ -854,17 +855,14 @@ fail_parse: static int skip_bom(diskfile_backend *cfg) { - static const char utf8_bom[] = { '\xef', '\xbb', '\xbf' }; + git_bom_t bom; + int bom_offset = git_buf_text_detect_bom(&bom, + &cfg->reader.buffer, cfg->reader.read_ptr - cfg->reader.buffer.ptr); - if (cfg->reader.buffer.size < sizeof(utf8_bom)) - return 0; + if (bom == GIT_BOM_UTF8) + cfg->reader.read_ptr += bom_offset; - if (memcmp(cfg->reader.read_ptr, utf8_bom, sizeof(utf8_bom)) == 0) - cfg->reader.read_ptr += sizeof(utf8_bom); - - /* TODO: the reference implementation does pretty stupid - stuff with the BoM - */ + /* TODO: reference implementation is pretty stupid with BoM */ return 0; } diff --git a/src/crlf.c b/src/crlf.c index 5e86b4eb6..80204ebf0 100644 --- a/src/crlf.c +++ b/src/crlf.c @@ -148,8 +148,11 @@ static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *sou if (filter->attrs.crlf_action == GIT_CRLF_AUTO || filter->attrs.crlf_action == GIT_CRLF_GUESS) { - git_text_stats stats; - git_text_gather_stats(&stats, source); + git_buf_text_stats stats; + + /* Check heuristics for binary vs text... */ + if (git_buf_text_gather_stats(&stats, source, false)) + return -1; /* * We're currently not going to even try to convert stuff @@ -159,12 +162,6 @@ static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *sou if (stats.cr != stats.crlf) return -1; - /* - * And add some heuristics for binary vs text, of course... - */ - if (git_text_is_binary(&stats)) - return -1; - #if 0 if (crlf_action == CRLF_GUESS) { /* diff --git a/src/diff_output.c b/src/diff_output.c index 3d5e03a29..e137fd0f2 100644 --- a/src/diff_output.c +++ b/src/diff_output.c @@ -142,7 +142,7 @@ static int diff_delta_is_binary_by_content( search.ptr = map->data; search.size = min(map->len, 4000); - if (git_buf_is_binary(&search)) + if (git_buf_text_is_binary(&search)) file->flags |= GIT_DIFF_FILE_BINARY; else file->flags |= GIT_DIFF_FILE_NOT_BINARY; diff --git a/src/filter.c b/src/filter.c index f2ab1b85a..6d27c0c46 100644 --- a/src/filter.c +++ b/src/filter.c @@ -13,75 +13,6 @@ #include "git2/config.h" #include "blob.h" -/* Tweaked from Core Git. I wonder what we could use this for... */ -void git_text_gather_stats(git_text_stats *stats, const git_buf *text) -{ - size_t i; - - memset(stats, 0, sizeof(*stats)); - - for (i = 0; i < git_buf_len(text); i++) { - unsigned char c = text->ptr[i]; - - if (c == '\r') { - stats->cr++; - - if (i + 1 < git_buf_len(text) && text->ptr[i + 1] == '\n') - stats->crlf++; - } - - else if (c == '\n') - stats->lf++; - - else if (c == 127) - /* DEL */ - stats->nonprintable++; - - else if (c <= 0x1F || (c >= 0x80 && c <= 0x9F)) { - switch (c) { - /* BS, HT, ESC and FF */ - case '\b': case '\t': case '\033': case '\014': - stats->printable++; - break; - case 0: - stats->nul++; - /* fall through */ - default: - stats->nonprintable++; - } - } - - else - stats->printable++; - } - - /* If file ends with EOF then don't count this EOF as non-printable. */ - if (git_buf_len(text) >= 1 && text->ptr[text->size - 1] == '\032') - stats->nonprintable--; -} - -/* - * Fresh from Core Git - */ -int git_text_is_binary(git_text_stats *stats) -{ - if (stats->nul) - return 1; - - if ((stats->printable >> 7) < stats->nonprintable) - return 1; - /* - * Other heuristics? Average line length might be relevant, - * as might LF vs CR vs CRLF counts.. - * - * NOTE! It might be normal to have a low ratio of CRLF to LF - * (somebody starts with a LF-only file and edits it with an editor - * that adds CRLF only to lines that are added..). But do we - * want to support CR-only? Probably not. - */ - return 0; -} - int git_filters_load(git_vector *filters, git_repository *repo, const char *path, int mode) { int error; diff --git a/src/filter.h b/src/filter.h index b9beb4942..2b5051c69 100644 --- a/src/filter.h +++ b/src/filter.h @@ -9,6 +9,7 @@ #include "common.h" #include "buffer.h" +#include "buf_text.h" #include "git2/odb.h" #include "git2/repository.h" @@ -31,14 +32,6 @@ typedef enum { GIT_CRLF_AUTO, } git_crlf_t; -typedef struct { - /* NUL, CR, LF and CRLF counts */ - unsigned int nul, cr, lf, crlf; - - /* These are just approximations! */ - unsigned int printable, nonprintable; -} git_text_stats; - /* * FILTER API */ @@ -99,24 +92,4 @@ extern int git_filter_add__crlf_to_odb(git_vector *filters, git_repository *repo /* Add CRLF, from ODB to worktree */ extern int git_filter_add__crlf_to_workdir(git_vector *filters, git_repository *repo, const char *path); - -/* - * PLAINTEXT API - */ - -/* - * Gather stats for a piece of text - * - * Fill the `stats` structure with information on the number of - * unreadable characters, carriage returns, etc, so it can be - * used in heuristics. - */ -extern void git_text_gather_stats(git_text_stats *stats, const git_buf *text); - -/* - * Process `git_text_stats` data generated by `git_text_stat` to see - * if it qualifies as a binary file - */ -extern int git_text_is_binary(git_text_stats *stats); - #endif diff --git a/src/path.c b/src/path.c index 98351bec3..87eded3c4 100644 --- a/src/path.c +++ b/src/path.c @@ -511,7 +511,7 @@ static bool _check_dir_contents( size_t sub_size = strlen(sub); /* leave base valid even if we could not make space for subdir */ - if (git_buf_try_grow(dir, dir_size + sub_size + 2) < 0) + if (git_buf_try_grow(dir, dir_size + sub_size + 2, false) < 0) return false; /* save excursion */ diff --git a/src/pathspec.c b/src/pathspec.c index fc6547afe..993b44667 100644 --- a/src/pathspec.c +++ b/src/pathspec.c @@ -6,6 +6,7 @@ */ #include "pathspec.h" +#include "buf_text.h" #include "attr_file.h" /* what is the common non-wildcard prefix for all items in the pathspec */ @@ -15,7 +16,7 @@ char *git_pathspec_prefix(const git_strarray *pathspec) const char *scan; if (!pathspec || !pathspec->count || - git_buf_common_prefix(&prefix, pathspec) < 0) + git_buf_text_common_prefix(&prefix, pathspec) < 0) return NULL; /* diff prefix will only be leading non-wildcards */ @@ -31,7 +32,7 @@ char *git_pathspec_prefix(const git_strarray *pathspec) return NULL; } - git_buf_unescape(&prefix); + git_buf_text_unescape(&prefix); return git_buf_detach(&prefix); } diff --git a/src/submodule.c b/src/submodule.c index 15158f0d8..c117255d4 100644 --- a/src/submodule.c +++ b/src/submodule.c @@ -12,6 +12,7 @@ #include "git2/index.h" #include "git2/submodule.h" #include "buffer.h" +#include "buf_text.h" #include "vector.h" #include "posix.h" #include "config_file.h" @@ -782,7 +783,7 @@ int git_submodule_reload(git_submodule *submodule) git_buf path = GIT_BUF_INIT; git_buf_sets(&path, "submodule\\."); - git_buf_puts_escape_regex(&path, submodule->name); + git_buf_text_puts_escape_regex(&path, submodule->name); git_buf_puts(&path, ".*"); if (git_buf_oom(&path)) diff --git a/tests-clar/core/buffer.c b/tests-clar/core/buffer.c index 236bf39da..40fc4c571 100644 --- a/tests-clar/core/buffer.c +++ b/tests-clar/core/buffer.c @@ -1,5 +1,6 @@ #include "clar_libgit2.h" #include "buffer.h" +#include "buf_text.h" #define TESTSTR "Have you seen that? Have you seeeen that??" const char *test_string = TESTSTR; @@ -576,37 +577,37 @@ void test_core_buffer__11(void) t.strings = t1; t.count = 3; - cl_git_pass(git_buf_common_prefix(&a, &t)); + cl_git_pass(git_buf_text_common_prefix(&a, &t)); cl_assert_equal_s(a.ptr, ""); t.strings = t2; t.count = 3; - cl_git_pass(git_buf_common_prefix(&a, &t)); + cl_git_pass(git_buf_text_common_prefix(&a, &t)); cl_assert_equal_s(a.ptr, "some"); t.strings = t3; t.count = 3; - cl_git_pass(git_buf_common_prefix(&a, &t)); + cl_git_pass(git_buf_text_common_prefix(&a, &t)); cl_assert_equal_s(a.ptr, ""); t.strings = t4; t.count = 3; - cl_git_pass(git_buf_common_prefix(&a, &t)); + cl_git_pass(git_buf_text_common_prefix(&a, &t)); cl_assert_equal_s(a.ptr, "happ"); t.strings = t5; t.count = 3; - cl_git_pass(git_buf_common_prefix(&a, &t)); + cl_git_pass(git_buf_text_common_prefix(&a, &t)); cl_assert_equal_s(a.ptr, "happ"); t.strings = t6; t.count = 3; - cl_git_pass(git_buf_common_prefix(&a, &t)); + cl_git_pass(git_buf_text_common_prefix(&a, &t)); cl_assert_equal_s(a.ptr, ""); t.strings = t7; t.count = 3; - cl_git_pass(git_buf_common_prefix(&a, &t)); + cl_git_pass(git_buf_text_common_prefix(&a, &t)); cl_assert_equal_s(a.ptr, ""); git_buf_free(&a); @@ -641,19 +642,19 @@ void test_core_buffer__puts_escaped(void) git_buf a = GIT_BUF_INIT; git_buf_clear(&a); - cl_git_pass(git_buf_puts_escaped(&a, "this is a test", "", "")); + cl_git_pass(git_buf_text_puts_escaped(&a, "this is a test", "", "")); cl_assert_equal_s("this is a test", a.ptr); git_buf_clear(&a); - cl_git_pass(git_buf_puts_escaped(&a, "this is a test", "t", "\\")); + cl_git_pass(git_buf_text_puts_escaped(&a, "this is a test", "t", "\\")); cl_assert_equal_s("\\this is a \\tes\\t", a.ptr); git_buf_clear(&a); - cl_git_pass(git_buf_puts_escaped(&a, "this is a test", "i ", "__")); + cl_git_pass(git_buf_text_puts_escaped(&a, "this is a test", "i ", "__")); cl_assert_equal_s("th__is__ __is__ a__ test", a.ptr); git_buf_clear(&a); - cl_git_pass(git_buf_puts_escape_regex(&a, "^match\\s*[A-Z]+.*")); + cl_git_pass(git_buf_text_puts_escape_regex(&a, "^match\\s*[A-Z]+.*")); cl_assert_equal_s("\\^match\\\\s\\*\\[A-Z\\]\\+\\.\\*", a.ptr); git_buf_free(&a); @@ -663,7 +664,7 @@ static void assert_unescape(char *expected, char *to_unescape) { git_buf buf = GIT_BUF_INIT; cl_git_pass(git_buf_sets(&buf, to_unescape)); - git_buf_unescape(&buf); + git_buf_text_unescape(&buf); cl_assert_equal_s(expected, buf.ptr); cl_assert_equal_sz(strlen(expected), buf.size); diff --git a/tests-clar/object/blob/filter.c b/tests-clar/object/blob/filter.c index 785489849..b9bbfff0c 100644 --- a/tests-clar/object/blob/filter.c +++ b/tests-clar/object/blob/filter.c @@ -4,7 +4,7 @@ #include "filter.h" static git_repository *g_repo = NULL; -#define NUM_TEST_OBJECTS 6 +#define NUM_TEST_OBJECTS 8 static git_oid g_oids[NUM_TEST_OBJECTS]; static const char *g_raw[NUM_TEST_OBJECTS] = { "", @@ -12,16 +12,20 @@ static const char *g_raw[NUM_TEST_OBJECTS] = { "foo\rbar\r", "foo\r\nbar\r\n", "foo\nbar\rboth\r\nreversed\n\ragain\nproblems\r", - "123\n\000\001\002\003\004abc\255\254\253\r\n" + "123\n\000\001\002\003\004abc\255\254\253\r\n", + "\xEF\xBB\xBFThis is UTF-8\n", + "\xFE\xFF\x00T\x00h\x00i\x00s\x00!" }; -static git_off_t g_len[NUM_TEST_OBJECTS] = { -1, -1, -1, -1, -1, 17 }; -static git_text_stats g_stats[NUM_TEST_OBJECTS] = { - { 0, 0, 0, 0, 0, 0 }, - { 0, 0, 2, 0, 6, 0 }, - { 0, 2, 0, 0, 6, 0 }, - { 0, 2, 2, 2, 6, 0 }, - { 0, 4, 4, 1, 31, 0 }, - { 1, 1, 2, 1, 9, 5 } +static git_off_t g_len[NUM_TEST_OBJECTS] = { -1, -1, -1, -1, -1, 17, -1, 12 }; +static git_buf_text_stats g_stats[NUM_TEST_OBJECTS] = { + { 0, 0, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 2, 0, 6, 0 }, + { 0, 0, 2, 0, 0, 6, 0 }, + { 0, 0, 2, 2, 2, 6, 0 }, + { 0, 0, 4, 4, 1, 31, 0 }, + { 0, 1, 1, 2, 1, 9, 5 }, + { GIT_BOM_UTF8, 0, 0, 1, 0, 16, 0 }, + { GIT_BOM_UTF16_BE, 5, 0, 0, 0, 7, 5 }, }; static git_buf g_crlf_filtered[NUM_TEST_OBJECTS] = { { "", 0, 0 }, @@ -29,7 +33,9 @@ static git_buf g_crlf_filtered[NUM_TEST_OBJECTS] = { { "foo\rbar\r", 0, 8 }, { "foo\nbar\n", 0, 8 }, { "foo\nbar\rboth\nreversed\n\ragain\nproblems\r", 0, 38 }, - { "123\n\000\001\002\003\004abc\255\254\253\n", 0, 16 } + { "123\n\000\001\002\003\004abc\255\254\253\n", 0, 16 }, + { "\xEF\xBB\xBFThis is UTF-8\n", 0, 17 }, + { "\xFE\xFF\x00T\x00h\x00i\x00s\x00!", 0, 12 } }; void test_object_blob_filter__initialize(void) @@ -76,12 +82,12 @@ void test_object_blob_filter__stats(void) int i; git_blob *blob; git_buf buf = GIT_BUF_INIT; - git_text_stats stats; + git_buf_text_stats stats; for (i = 0; i < NUM_TEST_OBJECTS; i++) { cl_git_pass(git_blob_lookup(&blob, g_repo, &g_oids[i])); cl_git_pass(git_blob__getbuf(&buf, blob)); - git_text_gather_stats(&stats, &buf); + git_buf_text_gather_stats(&stats, &buf, false); cl_assert(memcmp(&g_stats[i], &stats, sizeof(stats)) == 0); git_blob_free(blob); }