From 4c09e19a3764a1e5f3340dabf8104dfed32e7673 Mon Sep 17 00:00:00 2001 From: J Wyman Date: Mon, 30 Mar 2015 14:07:44 -0700 Subject: [PATCH 01/16] Improvements to ignore performance on Windows. Minimizing the number directory and file opens, minimizes the amount of IO thus reducing the overall cost of performing ignore operations. --- src/attr.c | 6 +++--- src/attr_file.c | 18 ++++++++++++++++-- src/attr_file.h | 4 +++- src/ignore.c | 6 +++--- src/ignore.h | 2 +- src/iterator.c | 21 ++++++++++++++++----- tests/attr/lookup.c | 6 +++--- 7 files changed, 45 insertions(+), 18 deletions(-) diff --git a/src/attr.c b/src/attr.c index 38420807a..102d0248c 100644 --- a/src/attr.c +++ b/src/attr.c @@ -55,7 +55,7 @@ int git_attr_get( *value = NULL; - if (git_attr_path__init(&path, pathname, git_repository_workdir(repo)) < 0) + if (git_attr_path__init(&path, pathname, git_repository_workdir(repo), GIT_DIR_FLAG_UNKNOWN) < 0) return -1; if ((error = collect_attr_files(repo, NULL, flags, pathname, &files)) < 0) @@ -114,7 +114,7 @@ int git_attr_get_many_with_session( assert(values && repo && names); - if (git_attr_path__init(&path, pathname, git_repository_workdir(repo)) < 0) + if (git_attr_path__init(&path, pathname, git_repository_workdir(repo), GIT_DIR_FLAG_UNKNOWN) < 0) return -1; if ((error = collect_attr_files(repo, attr_session, flags, pathname, &files)) < 0) @@ -193,7 +193,7 @@ int git_attr_foreach( assert(repo && callback); - if (git_attr_path__init(&path, pathname, git_repository_workdir(repo)) < 0) + if (git_attr_path__init(&path, pathname, git_repository_workdir(repo), GIT_DIR_FLAG_UNKNOWN) < 0) return -1; if ((error = collect_attr_files(repo, NULL, flags, pathname, &files)) < 0 || diff --git a/src/attr_file.c b/src/attr_file.c index eed39661f..ef98aacc2 100644 --- a/src/attr_file.c +++ b/src/attr_file.c @@ -457,7 +457,7 @@ git_attr_assignment *git_attr_rule__lookup_assignment( } int git_attr_path__init( - git_attr_path *info, const char *path, const char *base) + git_attr_path *info, const char *path, const char *base, git_dir_flag dir_flag) { ssize_t root; @@ -488,7 +488,21 @@ int git_attr_path__init( if (!info->basename || !*info->basename) info->basename = info->path; - info->is_dir = (int)git_path_isdir(info->full.ptr); + switch (dir_flag) + { + case GIT_DIR_FLAG_FALSE: + info->is_dir = 0; + break; + + case GIT_DIR_FLAG_TRUE: + info->is_dir = 1; + break; + + case GIT_DIR_FLAG_UNKNOWN: + default: + info->is_dir = (int)git_path_isdir(info->full.ptr); + break; + } return 0; } diff --git a/src/attr_file.h b/src/attr_file.h index aa9a16de0..388ecf4c0 100644 --- a/src/attr_file.h +++ b/src/attr_file.h @@ -202,8 +202,10 @@ extern bool git_attr_rule__match( extern git_attr_assignment *git_attr_rule__lookup_assignment( git_attr_rule *rule, const char *name); +typedef enum { GIT_DIR_FLAG_TRUE = 1, GIT_DIR_FLAG_FALSE = 0, GIT_DIR_FLAG_UNKNOWN = -1 } git_dir_flag; + extern int git_attr_path__init( - git_attr_path *info, const char *path, const char *base); + git_attr_path *info, const char *path, const char *base, git_dir_flag is_dir); extern void git_attr_path__free(git_attr_path *info); diff --git a/src/ignore.c b/src/ignore.c index 3a5efedce..7ad8500e8 100644 --- a/src/ignore.c +++ b/src/ignore.c @@ -388,7 +388,7 @@ static bool ignore_lookup_in_rules( } int git_ignore__lookup( - int *out, git_ignores *ignores, const char *pathname) + int *out, git_ignores *ignores, const char *pathname, git_dir_flag dir_flag) { unsigned int i; git_attr_file *file; @@ -397,7 +397,7 @@ int git_ignore__lookup( *out = GIT_IGNORE_NOTFOUND; if (git_attr_path__init( - &path, pathname, git_repository_workdir(ignores->repo)) < 0) + &path, pathname, git_repository_workdir(ignores->repo), dir_flag) < 0) return -1; /* first process builtins - success means path was found */ @@ -470,7 +470,7 @@ int git_ignore_path_is_ignored( memset(&path, 0, sizeof(path)); memset(&ignores, 0, sizeof(ignores)); - if ((error = git_attr_path__init(&path, pathname, workdir)) < 0 || + if ((error = git_attr_path__init(&path, pathname, workdir, GIT_DIR_FLAG_UNKNOWN)) < 0 || (error = git_ignore__for_path(repo, path.path, &ignores)) < 0) goto cleanup; diff --git a/src/ignore.h b/src/ignore.h index 77668c661..d40bd60f9 100644 --- a/src/ignore.h +++ b/src/ignore.h @@ -49,7 +49,7 @@ enum { GIT_IGNORE_TRUE = 1, }; -extern int git_ignore__lookup(int *out, git_ignores *ign, const char *path); +extern int git_ignore__lookup(int *out, git_ignores *ign, const char *path, git_dir_flag dir_flag); /* command line Git sometimes generates an error message if given a * pathspec that contains an exact match to an ignored file (provided diff --git a/src/iterator.c b/src/iterator.c index 9ddacebd1..8bab1aab0 100644 --- a/src/iterator.c +++ b/src/iterator.c @@ -1344,6 +1344,16 @@ static int is_submodule(workdir_iterator *wi, git_path_with_stat *ie) return is_submodule; } +GIT_INLINE(git_dir_flag) git_entry__dir_flag(git_index_entry *entry) { +#if defined(GIT_WIN32) && !defined(__MINGW32__) + return (entry && entry->mode) + ? S_ISDIR(entry->mode) ? GIT_DIR_FLAG_TRUE : GIT_DIR_FLAG_FALSE + : GIT_DIR_FLAG_UNKNOWN; +#else + return GIT_DIR_FLAG_UNKNOWN; +#endif +} + static int workdir_iterator__enter_dir(fs_iterator *fi) { workdir_iterator *wi = (workdir_iterator *)fi; @@ -1352,9 +1362,10 @@ static int workdir_iterator__enter_dir(fs_iterator *fi) git_path_with_stat *entry; bool found_submodules = false; + git_dir_flag dir_flag = git_entry__dir_flag(&fi->entry); + /* check if this directory is ignored */ - if (git_ignore__lookup( - &ff->is_ignored, &wi->ignores, fi->path.ptr + fi->root_len) < 0) { + if (git_ignore__lookup(&ff->is_ignored, &wi->ignores, fi->path.ptr + fi->root_len, dir_flag) < 0) { giterr_clear(); ff->is_ignored = GIT_IGNORE_NOTFOUND; } @@ -1483,7 +1494,6 @@ int git_iterator_for_workdir_ext( return fs_iterator__initialize(out, &wi->fi, repo_workdir); } - void git_iterator_free(git_iterator *iter) { if (iter == NULL) @@ -1574,8 +1584,9 @@ int git_iterator_current_parent_tree( static void workdir_iterator_update_is_ignored(workdir_iterator *wi) { - if (git_ignore__lookup( - &wi->is_ignored, &wi->ignores, wi->fi.entry.path) < 0) { + git_dir_flag dir_flag = git_entry__dir_flag(&wi->fi.entry); + + if (git_ignore__lookup(&wi->is_ignored, &wi->ignores, wi->fi.entry.path, dir_flag) < 0) { giterr_clear(); wi->is_ignored = GIT_IGNORE_NOTFOUND; } diff --git a/tests/attr/lookup.c b/tests/attr/lookup.c index 030ea075d..71e87cbae 100644 --- a/tests/attr/lookup.c +++ b/tests/attr/lookup.c @@ -13,7 +13,7 @@ void test_attr_lookup__simple(void) cl_assert_equal_s(cl_fixture("attr/attr0"), file->entry->path); cl_assert(file->rules.length == 1); - cl_git_pass(git_attr_path__init(&path, "test", NULL)); + cl_git_pass(git_attr_path__init(&path, "test", NULL, GIT_DIR_FLAG_UNKNOWN)); cl_assert_equal_s("test", path.path); cl_assert_equal_s("test", path.basename); cl_assert(!path.is_dir); @@ -36,7 +36,7 @@ static void run_test_cases(git_attr_file *file, struct attr_expected *cases, int int error; for (c = cases; c->path != NULL; c++) { - cl_git_pass(git_attr_path__init(&path, c->path, NULL)); + cl_git_pass(git_attr_path__init(&path, c->path, NULL, GIT_DIR_FLAG_UNKNOWN)); if (force_dir) path.is_dir = 1; @@ -133,7 +133,7 @@ void test_attr_lookup__match_variants(void) cl_assert_equal_s(cl_fixture("attr/attr1"), file->entry->path); cl_assert(file->rules.length == 10); - cl_git_pass(git_attr_path__init(&path, "/testing/for/pat0", NULL)); + cl_git_pass(git_attr_path__init(&path, "/testing/for/pat0", NULL, GIT_DIR_FLAG_UNKNOWN)); cl_assert_equal_s("pat0", path.basename); run_test_cases(file, cases, 0); From 1920ee4ef6096f888a9bb19bc329424d2c7ee656 Mon Sep 17 00:00:00 2001 From: J Wyman Date: Thu, 26 Mar 2015 18:10:24 -0400 Subject: [PATCH 02/16] Improvements to status performance on Windows. Changed win32/path_w32.c to utilize NTFS' FindFirst..FindNext data instead of doing an lstat per file. Avoiding unnecessary directory opens and file scans reduces IO, improving overall performance. Effect is magnified due to NTFS being a kernel mode file system (as opposed to user mode). --- src/iterator.c | 18 +++- src/win32/path_w32.c | 225 ++++++++++++++++++++++++++++++++++++++++++ src/win32/path_w32.h | 27 +++++ src/win32/posix.h | 12 ++- src/win32/posix_w32.c | 86 +--------------- 5 files changed, 281 insertions(+), 87 deletions(-) diff --git a/src/iterator.c b/src/iterator.c index 8bab1aab0..80b7d5faa 100644 --- a/src/iterator.c +++ b/src/iterator.c @@ -984,6 +984,21 @@ static void fs_iterator__seek_frame_start( ff->index = 0; } +GIT_INLINE(int) path_dirload_with_stat( + const char *path, + size_t prefix_len, + unsigned int flags, + const char *start_stat, + const char *end_stat, + git_vector *contents) +{ +#if defined(GIT_WIN32) && !defined(__MINGW32__) + return git_win32_path_dirload_with_stat(path, prefix_len, flags, start_stat, end_stat, contents); +#else + return git_path_dirload_with_stat(path, prefix_len, flags, start_stat, end_stat, contents); +#endif +} + static int fs_iterator__expand_dir(fs_iterator *fi) { int error; @@ -998,7 +1013,7 @@ static int fs_iterator__expand_dir(fs_iterator *fi) ff = fs_iterator__alloc_frame(fi); GITERR_CHECK_ALLOC(ff); - error = git_path_dirload_with_stat( + error = path_dirload_with_stat( fi->path.ptr, fi->root_len, fi->dirload_flags, fi->base.start, fi->base.end, &ff->entries); @@ -1350,6 +1365,7 @@ GIT_INLINE(git_dir_flag) git_entry__dir_flag(git_index_entry *entry) { ? S_ISDIR(entry->mode) ? GIT_DIR_FLAG_TRUE : GIT_DIR_FLAG_FALSE : GIT_DIR_FLAG_UNKNOWN; #else + GIT_UNUSED(entry); return GIT_DIR_FLAG_UNKNOWN; #endif } diff --git a/src/win32/path_w32.c b/src/win32/path_w32.c index d66969c4d..e9bc64a5f 100644 --- a/src/win32/path_w32.c +++ b/src/win32/path_w32.c @@ -9,6 +9,9 @@ #include "path.h" #include "path_w32.h" #include "utf-conv.h" +#include "posix.h" +#include "reparse.h" +#include "dir.h" #define PATH__NT_NAMESPACE L"\\\\?\\" #define PATH__NT_NAMESPACE_LEN 4 @@ -27,6 +30,8 @@ #define path__is_unc(p) \ (((p)[0] == '\\' && (p)[1] == '\\') || ((p)[0] == '/' && (p)[1] == '/')) +#define PATH__MAX_UNC_LEN (32767) + GIT_INLINE(int) path__cwd(wchar_t *path, int size) { int len; @@ -303,3 +308,223 @@ char *git_win32_path_8dot3_name(const char *path) return shortname; } + +#if !defined(__MINGW32__) +int git_win32_path_dirload_with_stat( + const char *path, + size_t prefix_len, + unsigned int flags, + const char *start_stat, + const char *end_stat, + git_vector *contents) +{ + int error = 0; + git_path_with_stat *ps; + git_win32_path pathw; + DIR *dir; + int(*strncomp)(const char *a, const char *b, size_t sz); + size_t cmp_len; + size_t start_len = start_stat ? strlen(start_stat) : 0; + size_t end_len = end_stat ? strlen(end_stat) : 0; + size_t path_size = strlen(path); + const char *repo_path = path + prefix_len; + size_t repo_path_len = strlen(repo_path); + char work_path[PATH__MAX_UNC_LEN]; + git_win32_path target; + size_t path_len; + int fMode; + + if (!git_win32__findfirstfile_filter(pathw, path)) { + error = -1; + giterr_set(GITERR_OS, "Could not parse the path '%s'", path); + goto clean_up_and_exit; + } + + strncomp = (flags & GIT_PATH_DIR_IGNORE_CASE) != 0 + ? git__strncasecmp + : git__strncmp; + + /* use of FIND_FIRST_EX_LARGE_FETCH flag in the FindFirstFileExW call could benefit perormance + * here when querying large repositories on Windows 7 (0x0600) or newer versions of Windows. + * doing so could introduce compatibility issues on older versions of Windows. */ + dir = git__calloc(1, sizeof(DIR)); + dir->h = FindFirstFileExW(pathw, FindExInfoBasic, &dir->f, FindExSearchNameMatch, NULL, 0); + dir->first = 1; + if (dir->h == INVALID_HANDLE_VALUE) { + error = -1; + giterr_set(GITERR_OS, "Could not open directory '%s'", path); + goto clean_up_and_exit; + } + + if (repo_path_len > PATH__MAX_UNC_LEN) { + error = -1; + giterr_set(GITERR_OS, "Could not open directory '%s'", path); + goto clean_up_and_exit; + } + + memcpy(work_path, repo_path, repo_path_len); + + while (dir) { + if (!git_path_is_dot_or_dotdotW(dir->f.cFileName)) { + path_len = git__utf16_to_8(work_path + repo_path_len, ARRAYSIZE(work_path) - repo_path_len, dir->f.cFileName); + + work_path[path_len + repo_path_len] = '\0'; + path_len = path_len + repo_path_len; + + cmp_len = min(start_len, path_len); + if (!(cmp_len && strncomp(work_path, start_stat, cmp_len) < 0)) { + cmp_len = min(end_len, path_len); + if (!(cmp_len && strncomp(work_path, end_stat, cmp_len) > 0)) { + fMode = S_IREAD; + + if (dir->f.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) + fMode |= S_IFDIR; + else + fMode |= S_IFREG; + + if (!(dir->f.dwFileAttributes & FILE_ATTRIBUTE_READONLY)) + fMode |= S_IWRITE; + + ps = git__calloc(1, sizeof(git_path_with_stat) + path_len + 2); + memcpy(ps->path, work_path, path_len + 1); + ps->path_len = path_len; + ps->st.st_atime = filetime_to_time_t(&dir->f.ftLastAccessTime); + ps->st.st_ctime = filetime_to_time_t(&dir->f.ftCreationTime); + ps->st.st_mtime = filetime_to_time_t(&dir->f.ftLastWriteTime); + ps->st.st_size = dir->f.nFileSizeHigh; + ps->st.st_size <<= 32; + ps->st.st_size |= dir->f.nFileSizeLow; + ps->st.st_dev = ps->st.st_rdev = (_getdrive() - 1); + ps->st.st_mode = (mode_t)fMode; + ps->st.st_ino = 0; + ps->st.st_gid = 0; + ps->st.st_uid = 0; + ps->st.st_nlink = 1; + + if (dir->f.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) { + if (git_win32_path_readlink_w(target, dir->f.cFileName) >= 0) { + ps->st.st_mode = (ps->st.st_mode & ~S_IFMT) | S_IFLNK; + + /* st_size gets the UTF-8 length of the target name, in bytes, + * not counting the NULL terminator */ + if ((ps->st.st_size = git__utf16_to_8(NULL, 0, target)) < 0) { + error = -1; + giterr_set(GITERR_OS, "Could not manage reparse link '%s'", dir->f.cFileName); + goto clean_up_and_exit; + } + } + } + + if (S_ISDIR(ps->st.st_mode)) { + ps->path[ps->path_len++] = '/'; + ps->path[ps->path_len] = '\0'; + } else if (!S_ISREG(ps->st.st_mode) && !S_ISLNK(ps->st.st_mode)) { + git__free(ps); + ps = NULL; + } + + if (ps) + git_vector_insert(contents, ps); + } + } + } + + memset(&dir->f, 0, sizeof(git_path_with_stat)); + dir->first = 0; + + if (!FindNextFileW(dir->h, &dir->f)) { + if (GetLastError() == ERROR_NO_MORE_FILES) + break; + else { + error = -1; + giterr_set(GITERR_OS, "Could not get attributes for file in '%s'", path); + goto clean_up_and_exit; + } + } + } + + /* sort now that directory suffix is added */ + git_vector_sort(contents); + +clean_up_and_exit: + + if (dir) { + FindClose(dir->h); + free(dir); + } + + return error; +} +#endif + +static bool path_is_volume(wchar_t *target, size_t target_len) +{ + return (target_len && wcsncmp(target, L"\\??\\Volume{", 11) == 0); +} + +/* On success, returns the length, in characters, of the path stored in dest. +* On failure, returns a negative value. */ +int git_win32_path_readlink_w(git_win32_path dest, const git_win32_path path) +{ + BYTE buf[MAXIMUM_REPARSE_DATA_BUFFER_SIZE]; + GIT_REPARSE_DATA_BUFFER *reparse_buf = (GIT_REPARSE_DATA_BUFFER *)buf; + HANDLE handle = NULL; + DWORD ioctl_ret; + wchar_t *target; + size_t target_len; + + int error = -1; + + handle = CreateFileW(path, GENERIC_READ, + FILE_SHARE_READ | FILE_SHARE_DELETE, NULL, OPEN_EXISTING, + FILE_FLAG_OPEN_REPARSE_POINT | FILE_FLAG_BACKUP_SEMANTICS, NULL); + + if (handle == INVALID_HANDLE_VALUE) { + errno = ENOENT; + return -1; + } + + if (!DeviceIoControl(handle, FSCTL_GET_REPARSE_POINT, NULL, 0, + reparse_buf, sizeof(buf), &ioctl_ret, NULL)) { + errno = EINVAL; + goto on_error; + } + + switch (reparse_buf->ReparseTag) { + case IO_REPARSE_TAG_SYMLINK: + target = reparse_buf->SymbolicLinkReparseBuffer.PathBuffer + + (reparse_buf->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)); + target_len = reparse_buf->SymbolicLinkReparseBuffer.SubstituteNameLength / sizeof(WCHAR); + break; + case IO_REPARSE_TAG_MOUNT_POINT: + target = reparse_buf->MountPointReparseBuffer.PathBuffer + + (reparse_buf->MountPointReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)); + target_len = reparse_buf->MountPointReparseBuffer.SubstituteNameLength / sizeof(WCHAR); + break; + default: + errno = EINVAL; + goto on_error; + } + + if (path_is_volume(target, target_len)) { + /* This path is a reparse point that represents another volume mounted + * at this location, it is not a symbolic link our input was canonical. + */ + errno = EINVAL; + error = -1; + } else if (target_len) { + /* The path may need to have a prefix removed. */ + target_len = git_win32__canonicalize_path(target, target_len); + + /* Need one additional character in the target buffer + * for the terminating NULL. */ + if (GIT_WIN_PATH_UTF16 > target_len) { + wcscpy(dest, target); + error = (int)target_len; + } + } + +on_error: + CloseHandle(handle); + return error; +} diff --git a/src/win32/path_w32.h b/src/win32/path_w32.h index 033afbb0f..57ce732d6 100644 --- a/src/win32/path_w32.h +++ b/src/win32/path_w32.h @@ -8,6 +8,7 @@ #define INCLUDE_git_path_w32_h__ #include "common.h" +#include "vector.h" /* * Provides a large enough buffer to support Windows paths: MAX_PATH is @@ -79,4 +80,30 @@ extern int git_win32_path_to_utf8(git_win32_utf8_path dest, const wchar_t *src); */ extern char *git_win32_path_8dot3_name(const char *path); +#if !defined(__MINGW32__) +/** + * Load all directory entries along with stat info into a vector. + * Performed in a single pass per directory for optimized performance on Windows. + * + * This adds four things on top of plain `git_path_dirload`: + * + * 1. Each entry in the vector is a `git_path_with_stat` struct that + * contains both the path and the stat info + * 2. The entries will be sorted alphabetically + * 3. Entries that are directories will be suffixed with a '/' + * 4. Optionally, you can be a start and end prefix and only elements + * after the start and before the end (inclusively) will be stat'ed. + * + * @param path The directory to read from + * @param prefix_len The trailing part of path to prefix to entry paths + * @param flags GIT_PATH_DIR flags from above + * @param start_stat As optimization, only stat values after this prefix + * @param end_stat As optimization, only stat values before this prefix + * @param contents Vector to fill with git_path_with_stat structures + */ +extern int git_win32_path_dirload_with_stat(const char *path, size_t prefix_len, unsigned int flags, const char *start_stat, const char *end_stat, git_vector *contents); +#endif + +extern int git_win32_path_readlink_w(git_win32_path dest, const git_win32_path path); + #endif diff --git a/src/win32/posix.h b/src/win32/posix.h index 4bc6bfe2e..1a1ae76b2 100644 --- a/src/win32/posix.h +++ b/src/win32/posix.h @@ -49,7 +49,15 @@ extern int p_ftruncate(int fd, git_off_t size); */ extern int p_lstat_posixly(const char *filename, struct stat *buf); -extern struct tm * p_localtime_r (const time_t *timer, struct tm *result); -extern struct tm * p_gmtime_r (const time_t *timer, struct tm *result); +extern struct tm * p_localtime_r(const time_t *timer, struct tm *result); +extern struct tm * p_gmtime_r(const time_t *timer, struct tm *result); + +GIT_INLINE(time_t) filetime_to_time_t(const FILETIME *ft) +{ + long long winTime = ((long long)ft->dwHighDateTime << 32) + ft->dwLowDateTime; + winTime -= 116444736000000000LL; /* Windows to Unix Epoch conversion */ + winTime /= 10000000; /* Nano to seconds resolution */ + return (time_t)winTime; +} #endif diff --git a/src/win32/posix_w32.c b/src/win32/posix_w32.c index 544b1ebd5..1c490a8e9 100644 --- a/src/win32/posix_w32.c +++ b/src/win32/posix_w32.c @@ -130,88 +130,6 @@ int p_fsync(int fd) return 0; } -GIT_INLINE(time_t) filetime_to_time_t(const FILETIME *ft) -{ - long long winTime = ((long long)ft->dwHighDateTime << 32) + ft->dwLowDateTime; - winTime -= 116444736000000000LL; /* Windows to Unix Epoch conversion */ - winTime /= 10000000; /* Nano to seconds resolution */ - return (time_t)winTime; -} - -static bool path_is_volume(wchar_t *target, size_t target_len) -{ - return (target_len && wcsncmp(target, L"\\??\\Volume{", 11) == 0); -} - -/* On success, returns the length, in characters, of the path stored in dest. - * On failure, returns a negative value. */ -static int readlink_w( - git_win32_path dest, - const git_win32_path path) -{ - BYTE buf[MAXIMUM_REPARSE_DATA_BUFFER_SIZE]; - GIT_REPARSE_DATA_BUFFER *reparse_buf = (GIT_REPARSE_DATA_BUFFER *)buf; - HANDLE handle = NULL; - DWORD ioctl_ret; - wchar_t *target; - size_t target_len; - - int error = -1; - - handle = CreateFileW(path, GENERIC_READ, - FILE_SHARE_READ | FILE_SHARE_DELETE, NULL, OPEN_EXISTING, - FILE_FLAG_OPEN_REPARSE_POINT | FILE_FLAG_BACKUP_SEMANTICS, NULL); - - if (handle == INVALID_HANDLE_VALUE) { - errno = ENOENT; - return -1; - } - - if (!DeviceIoControl(handle, FSCTL_GET_REPARSE_POINT, NULL, 0, - reparse_buf, sizeof(buf), &ioctl_ret, NULL)) { - errno = EINVAL; - goto on_error; - } - - switch (reparse_buf->ReparseTag) { - case IO_REPARSE_TAG_SYMLINK: - target = reparse_buf->SymbolicLinkReparseBuffer.PathBuffer + - (reparse_buf->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)); - target_len = reparse_buf->SymbolicLinkReparseBuffer.SubstituteNameLength / sizeof(WCHAR); - break; - case IO_REPARSE_TAG_MOUNT_POINT: - target = reparse_buf->MountPointReparseBuffer.PathBuffer + - (reparse_buf->MountPointReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)); - target_len = reparse_buf->MountPointReparseBuffer.SubstituteNameLength / sizeof(WCHAR); - break; - default: - errno = EINVAL; - goto on_error; - } - - if (path_is_volume(target, target_len)) { - /* This path is a reparse point that represents another volume mounted - * at this location, it is not a symbolic link our input was canonical. - */ - errno = EINVAL; - error = -1; - } else if (target_len) { - /* The path may need to have a prefix removed. */ - target_len = git_win32__canonicalize_path(target, target_len); - - /* Need one additional character in the target buffer - * for the terminating NULL. */ - if (GIT_WIN_PATH_UTF16 > target_len) { - wcscpy(dest, target); - error = (int)target_len; - } - } - -on_error: - CloseHandle(handle); - return error; -} - #define WIN32_IS_WSEP(CH) ((CH) == L'/' || (CH) == L'\\') static int lstat_w( @@ -249,7 +167,7 @@ static int lstat_w( if (fdata.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) { git_win32_path target; - if (readlink_w(target, path) >= 0) { + if (git_win32_path_readlink_w(target, path) >= 0) { buf->st_mode = (buf->st_mode & ~S_IFMT) | S_IFLNK; /* st_size gets the UTF-8 length of the target name, in bytes, @@ -331,7 +249,7 @@ int p_readlink(const char *path, char *buf, size_t bufsiz) * we need to buffer the result on the stack. */ if (git_win32_path_from_utf8(path_w, path) < 0 || - readlink_w(target_w, path_w) < 0 || + git_win32_path_readlink_w(target_w, path_w) < 0 || (len = git_win32_path_to_utf8(target, target_w)) < 0) return -1; From f3c444b87926bf1ae449f1820792bcdaf510f29e Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Mon, 27 Apr 2015 17:47:51 -0400 Subject: [PATCH 03/16] win32: abstract file attributes -> struct stat fn --- src/win32/path_w32.c | 47 ++++++---------------------- src/win32/posix.h | 8 ----- src/win32/posix_w32.c | 36 +--------------------- src/win32/w32_util.h | 72 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 82 insertions(+), 81 deletions(-) diff --git a/src/win32/path_w32.c b/src/win32/path_w32.c index e9bc64a5f..a1ecce435 100644 --- a/src/win32/path_w32.c +++ b/src/win32/path_w32.c @@ -330,9 +330,7 @@ int git_win32_path_dirload_with_stat( const char *repo_path = path + prefix_len; size_t repo_path_len = strlen(repo_path); char work_path[PATH__MAX_UNC_LEN]; - git_win32_path target; size_t path_len; - int fMode; if (!git_win32__findfirstfile_filter(pathw, path)) { error = -1; @@ -374,46 +372,19 @@ int git_win32_path_dirload_with_stat( cmp_len = min(start_len, path_len); if (!(cmp_len && strncomp(work_path, start_stat, cmp_len) < 0)) { cmp_len = min(end_len, path_len); + if (!(cmp_len && strncomp(work_path, end_stat, cmp_len) > 0)) { - fMode = S_IREAD; - - if (dir->f.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) - fMode |= S_IFDIR; - else - fMode |= S_IFREG; - - if (!(dir->f.dwFileAttributes & FILE_ATTRIBUTE_READONLY)) - fMode |= S_IWRITE; - ps = git__calloc(1, sizeof(git_path_with_stat) + path_len + 2); + + if ((error = git_win32__file_attribute_to_stat(&ps->st, + (WIN32_FILE_ATTRIBUTE_DATA *)&dir->f, + NULL)) < 0) { + git__free(ps); + goto clean_up_and_exit; + } + memcpy(ps->path, work_path, path_len + 1); ps->path_len = path_len; - ps->st.st_atime = filetime_to_time_t(&dir->f.ftLastAccessTime); - ps->st.st_ctime = filetime_to_time_t(&dir->f.ftCreationTime); - ps->st.st_mtime = filetime_to_time_t(&dir->f.ftLastWriteTime); - ps->st.st_size = dir->f.nFileSizeHigh; - ps->st.st_size <<= 32; - ps->st.st_size |= dir->f.nFileSizeLow; - ps->st.st_dev = ps->st.st_rdev = (_getdrive() - 1); - ps->st.st_mode = (mode_t)fMode; - ps->st.st_ino = 0; - ps->st.st_gid = 0; - ps->st.st_uid = 0; - ps->st.st_nlink = 1; - - if (dir->f.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) { - if (git_win32_path_readlink_w(target, dir->f.cFileName) >= 0) { - ps->st.st_mode = (ps->st.st_mode & ~S_IFMT) | S_IFLNK; - - /* st_size gets the UTF-8 length of the target name, in bytes, - * not counting the NULL terminator */ - if ((ps->st.st_size = git__utf16_to_8(NULL, 0, target)) < 0) { - error = -1; - giterr_set(GITERR_OS, "Could not manage reparse link '%s'", dir->f.cFileName); - goto clean_up_and_exit; - } - } - } if (S_ISDIR(ps->st.st_mode)) { ps->path[ps->path_len++] = '/'; diff --git a/src/win32/posix.h b/src/win32/posix.h index 1a1ae76b2..bf35c8125 100644 --- a/src/win32/posix.h +++ b/src/win32/posix.h @@ -52,12 +52,4 @@ extern int p_lstat_posixly(const char *filename, struct stat *buf); extern struct tm * p_localtime_r(const time_t *timer, struct tm *result); extern struct tm * p_gmtime_r(const time_t *timer, struct tm *result); -GIT_INLINE(time_t) filetime_to_time_t(const FILETIME *ft) -{ - long long winTime = ((long long)ft->dwHighDateTime << 32) + ft->dwLowDateTime; - winTime -= 116444736000000000LL; /* Windows to Unix Epoch conversion */ - winTime /= 10000000; /* Nano to seconds resolution */ - return (time_t)winTime; -} - #endif diff --git a/src/win32/posix_w32.c b/src/win32/posix_w32.c index 1c490a8e9..332ea233c 100644 --- a/src/win32/posix_w32.c +++ b/src/win32/posix_w32.c @@ -140,44 +140,10 @@ static int lstat_w( WIN32_FILE_ATTRIBUTE_DATA fdata; if (GetFileAttributesExW(path, GetFileExInfoStandard, &fdata)) { - int fMode = S_IREAD; - if (!buf) return 0; - if (fdata.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) - fMode |= S_IFDIR; - else - fMode |= S_IFREG; - - if (!(fdata.dwFileAttributes & FILE_ATTRIBUTE_READONLY)) - fMode |= S_IWRITE; - - buf->st_ino = 0; - buf->st_gid = 0; - buf->st_uid = 0; - buf->st_nlink = 1; - buf->st_mode = (mode_t)fMode; - buf->st_size = ((git_off_t)fdata.nFileSizeHigh << 32) + fdata.nFileSizeLow; - buf->st_dev = buf->st_rdev = (_getdrive() - 1); - buf->st_atime = filetime_to_time_t(&(fdata.ftLastAccessTime)); - buf->st_mtime = filetime_to_time_t(&(fdata.ftLastWriteTime)); - buf->st_ctime = filetime_to_time_t(&(fdata.ftCreationTime)); - - if (fdata.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) { - git_win32_path target; - - if (git_win32_path_readlink_w(target, path) >= 0) { - buf->st_mode = (buf->st_mode & ~S_IFMT) | S_IFLNK; - - /* st_size gets the UTF-8 length of the target name, in bytes, - * not counting the NULL terminator */ - if ((buf->st_size = git__utf16_to_8(NULL, 0, target)) < 0) - return -1; - } - } - - return 0; + return git_win32__file_attribute_to_stat(buf, &fdata, path); } errno = ENOENT; diff --git a/src/win32/w32_util.h b/src/win32/w32_util.h index 9c1b94359..8cb0f5b94 100644 --- a/src/win32/w32_util.h +++ b/src/win32/w32_util.h @@ -9,8 +9,21 @@ #define INCLUDE_w32_util_h__ #include "utf-conv.h" +#include "posix.h" #include "path_w32.h" +/* + +#include "common.h" +#include "path.h" +#include "path_w32.h" +#include "utf-conv.h" +#include "posix.h" +#include "reparse.h" +#include "dir.h" +*/ + + GIT_INLINE(bool) git_win32__isalpha(wchar_t c) { return ((c >= L'A' && c <= L'Z') || (c >= L'a' && c <= L'z')); @@ -52,4 +65,63 @@ size_t git_win32__path_trim_end(wchar_t *str, size_t len); */ size_t git_win32__canonicalize_path(wchar_t *str, size_t len); +/** + * Converts a FILETIME structure to a time_t. + * + * @param FILETIME A pointer to a FILETIME + * @return A time_t containing the same time + */ +GIT_INLINE(time_t) git_win32__filetime_to_time_t(const FILETIME *ft) +{ + long long winTime = ((long long)ft->dwHighDateTime << 32) + ft->dwLowDateTime; + winTime -= 116444736000000000LL; /* Windows to Unix Epoch conversion */ + winTime /= 10000000; /* Nano to seconds resolution */ + return (time_t)winTime; +} + +GIT_INLINE(int) git_win32__file_attribute_to_stat( + struct stat *st, + const WIN32_FILE_ATTRIBUTE_DATA *attrdata, + const wchar_t *path) +{ + mode_t mode = S_IREAD; + + if (attrdata->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) + mode |= S_IFDIR; + else + mode |= S_IFREG; + + if ((attrdata->dwFileAttributes & FILE_ATTRIBUTE_READONLY) == 0) + mode |= S_IWRITE; + + st->st_ino = 0; + st->st_gid = 0; + st->st_uid = 0; + st->st_nlink = 1; + st->st_mode = mode; + st->st_size = ((git_off_t)attrdata->nFileSizeHigh << 32) + attrdata->nFileSizeLow; + st->st_dev = _getdrive() - 1; + st->st_rdev = st->st_dev; + st->st_atime = git_win32__filetime_to_time_t(&(attrdata->ftLastAccessTime)); + st->st_mtime = git_win32__filetime_to_time_t(&(attrdata->ftLastWriteTime)); + st->st_ctime = git_win32__filetime_to_time_t(&(attrdata->ftCreationTime)); + + if (attrdata->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT && path) { + git_win32_path target; + + if (git_win32_path_readlink_w(target, path) >= 0) { + st->st_mode = (st->st_mode & ~S_IFMT) | S_IFLNK; + + /* st_size gets the UTF-8 length of the target name, in bytes, + * not counting the NULL terminator */ + if ((st->st_size = git__utf16_to_8(NULL, 0, target)) < 0) { + giterr_set(GITERR_OS, "Could not convert reparse point name for '%s'", path); + return -1; + } + } + } + + return 0; +} + #endif From e05531ddbdb03fb7698f8122a71862ad11afe812 Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Mon, 27 Apr 2015 18:02:06 -0400 Subject: [PATCH 04/16] win32 dirload: don't heap allocate DIR structure --- src/win32/path_w32.c | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/src/win32/path_w32.c b/src/win32/path_w32.c index a1ecce435..f793cbd1f 100644 --- a/src/win32/path_w32.c +++ b/src/win32/path_w32.c @@ -321,7 +321,7 @@ int git_win32_path_dirload_with_stat( int error = 0; git_path_with_stat *ps; git_win32_path pathw; - DIR *dir; + DIR dir = {0}; int(*strncomp)(const char *a, const char *b, size_t sz); size_t cmp_len; size_t start_len = start_stat ? strlen(start_stat) : 0; @@ -333,22 +333,21 @@ int git_win32_path_dirload_with_stat( size_t path_len; if (!git_win32__findfirstfile_filter(pathw, path)) { - error = -1; giterr_set(GITERR_OS, "Could not parse the path '%s'", path); - goto clean_up_and_exit; + return -1; } - strncomp = (flags & GIT_PATH_DIR_IGNORE_CASE) != 0 - ? git__strncasecmp - : git__strncmp; + strncomp = (flags & GIT_PATH_DIR_IGNORE_CASE) != 0 ? + git__strncasecmp : git__strncmp; + /* use of FIND_FIRST_EX_LARGE_FETCH flag in the FindFirstFileExW call could benefit perormance * here when querying large repositories on Windows 7 (0x0600) or newer versions of Windows. * doing so could introduce compatibility issues on older versions of Windows. */ - dir = git__calloc(1, sizeof(DIR)); - dir->h = FindFirstFileExW(pathw, FindExInfoBasic, &dir->f, FindExSearchNameMatch, NULL, 0); - dir->first = 1; - if (dir->h == INVALID_HANDLE_VALUE) { + dir.h = FindFirstFileExW(pathw, FindExInfoBasic, &dir.f, FindExSearchNameMatch, NULL, 0); + dir.first = 1; + + if (dir.h == INVALID_HANDLE_VALUE) { error = -1; giterr_set(GITERR_OS, "Could not open directory '%s'", path); goto clean_up_and_exit; @@ -362,9 +361,9 @@ int git_win32_path_dirload_with_stat( memcpy(work_path, repo_path, repo_path_len); - while (dir) { - if (!git_path_is_dot_or_dotdotW(dir->f.cFileName)) { - path_len = git__utf16_to_8(work_path + repo_path_len, ARRAYSIZE(work_path) - repo_path_len, dir->f.cFileName); + while (1) { + if (!git_path_is_dot_or_dotdotW(dir.f.cFileName)) { + path_len = git__utf16_to_8(work_path + repo_path_len, ARRAYSIZE(work_path) - repo_path_len, dir.f.cFileName); work_path[path_len + repo_path_len] = '\0'; path_len = path_len + repo_path_len; @@ -377,7 +376,7 @@ int git_win32_path_dirload_with_stat( ps = git__calloc(1, sizeof(git_path_with_stat) + path_len + 2); if ((error = git_win32__file_attribute_to_stat(&ps->st, - (WIN32_FILE_ATTRIBUTE_DATA *)&dir->f, + (WIN32_FILE_ATTRIBUTE_DATA *)&dir.f, NULL)) < 0) { git__free(ps); goto clean_up_and_exit; @@ -400,10 +399,10 @@ int git_win32_path_dirload_with_stat( } } - memset(&dir->f, 0, sizeof(git_path_with_stat)); - dir->first = 0; + memset(&dir.f, 0, sizeof(git_path_with_stat)); + dir.first = 0; - if (!FindNextFileW(dir->h, &dir->f)) { + if (!FindNextFileW(dir.h, &dir.f)) { if (GetLastError() == ERROR_NO_MORE_FILES) break; else { @@ -418,11 +417,8 @@ int git_win32_path_dirload_with_stat( git_vector_sort(contents); clean_up_and_exit: - - if (dir) { - FindClose(dir->h); - free(dir); - } + if (dir.h != INVALID_HANDLE_VALUE) + FindClose(dir.h); return error; } From b3f6cef066844fb27e649b8f2c0daca6f596bc81 Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Tue, 28 Apr 2015 11:16:42 -0400 Subject: [PATCH 05/16] dirload: loop conditional; less path mangling --- src/win32/path_w32.c | 169 +++++++++++++++++++++++++++---------------- 1 file changed, 106 insertions(+), 63 deletions(-) diff --git a/src/win32/path_w32.c b/src/win32/path_w32.c index f793cbd1f..eda6e2696 100644 --- a/src/win32/path_w32.c +++ b/src/win32/path_w32.c @@ -32,6 +32,11 @@ #define PATH__MAX_UNC_LEN (32767) +/* Using _FIND_FIRST_EX_LARGE_FETCH may increase performance in Windows 7 + * and better. Prior versions will ignore this. + */ +#define _FIND_FIRST_EX_LARGE_FETCH 2 + GIT_INLINE(int) path__cwd(wchar_t *path, int size) { int len; @@ -309,6 +314,45 @@ char *git_win32_path_8dot3_name(const char *path) return shortname; } +GIT_INLINE(int) path_with_stat_alloc( + git_path_with_stat **out, + const char *parent_path, + size_t parent_path_len, + const char *child_path, + size_t child_path_len, + bool trailing_slash) +{ + git_path_with_stat *ps; + int inner_slash = + (parent_path_len > 0 && parent_path[parent_path_len-1] != '/'); + size_t path_len, ps_size; + + GITERR_CHECK_ALLOC_ADD(&path_len, parent_path_len, inner_slash); + GITERR_CHECK_ALLOC_ADD(&path_len, path_len, child_path_len); + GITERR_CHECK_ALLOC_ADD(&path_len, path_len, trailing_slash ? 1 : 0); + + GITERR_CHECK_ALLOC_ADD(&ps_size, sizeof(git_path_with_stat), path_len); + + ps = git__calloc(1, ps_size); + GITERR_CHECK_ALLOC(ps); + + if (parent_path_len) + memcpy(ps->path, parent_path, parent_path_len); + + if (inner_slash) + ps->path[parent_path_len] = '/'; + + memcpy(&ps->path[parent_path_len + inner_slash], child_path, child_path_len); + + if (trailing_slash) + ps->path[path_len-1] = '/'; + + ps->path_len = path_len; + + *out = ps; + return 0; +} + #if !defined(__MINGW32__) int git_win32_path_dirload_with_stat( const char *path, @@ -326,11 +370,9 @@ int git_win32_path_dirload_with_stat( size_t cmp_len; size_t start_len = start_stat ? strlen(start_stat) : 0; size_t end_len = end_stat ? strlen(end_stat) : 0; - size_t path_size = strlen(path); - const char *repo_path = path + prefix_len; - size_t repo_path_len = strlen(repo_path); char work_path[PATH__MAX_UNC_LEN]; - size_t path_len; + const char *suffix; + size_t path_len, work_path_len, suffix_len; if (!git_win32__findfirstfile_filter(pathw, path)) { giterr_set(GITERR_OS, "Could not parse the path '%s'", path); @@ -338,14 +380,23 @@ int git_win32_path_dirload_with_stat( } strncomp = (flags & GIT_PATH_DIR_IGNORE_CASE) != 0 ? - git__strncasecmp : git__strncmp; + git__strncasecmp : git__strncmp; + path_len = strlen(path); + + suffix = path + prefix_len; + suffix_len = path_len - prefix_len; /* use of FIND_FIRST_EX_LARGE_FETCH flag in the FindFirstFileExW call could benefit perormance * here when querying large repositories on Windows 7 (0x0600) or newer versions of Windows. * doing so could introduce compatibility issues on older versions of Windows. */ - dir.h = FindFirstFileExW(pathw, FindExInfoBasic, &dir.f, FindExSearchNameMatch, NULL, 0); - dir.first = 1; + dir.h = FindFirstFileExW( + pathw, + FindExInfoBasic, + &dir.f, + FindExSearchNameMatch, + NULL, + _FIND_FIRST_EX_LARGE_FETCH); if (dir.h == INVALID_HANDLE_VALUE) { error = -1; @@ -353,66 +404,58 @@ int git_win32_path_dirload_with_stat( goto clean_up_and_exit; } - if (repo_path_len > PATH__MAX_UNC_LEN) { + do { + if (git_path_is_dot_or_dotdotW(dir.f.cFileName)) + continue; + + if ((work_path_len = git__utf16_to_8(work_path, PATH__MAX_UNC_LEN, dir.f.cFileName)) < 0) { + error = -1; + giterr_set(GITERR_OS, "Could not convert path to UTF-8 (path too long?)"); + goto clean_up_and_exit; + } + work_path[work_path_len] = '\0'; + + /* TODO: what about junctions to directories? */ + if ((error = path_with_stat_alloc(&ps, + suffix, suffix_len, + work_path, work_path_len, + (dir.f.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0)) < 0) + goto clean_up_and_exit; + + /* skip if before start_stat or after end_stat */ + cmp_len = min(start_len, work_path_len); + if (cmp_len && strncomp(ps->path, start_stat, cmp_len) < 0) { + git__free(ps); + continue; + } + + cmp_len = min(end_len, work_path_len); + if (cmp_len && strncomp(ps->path, end_stat, cmp_len) > 0) { + git__free(ps); + continue; + } + + if ((error = git_win32__file_attribute_to_stat(&ps->st, + (WIN32_FILE_ATTRIBUTE_DATA *)&dir.f, + NULL)) < 0) { + git__free(ps); + goto clean_up_and_exit; + } + + if (!S_ISDIR(ps->st.st_mode) && !S_ISREG(ps->st.st_mode) && !S_ISLNK(ps->st.st_mode)) { + git__free(ps); + continue; + } + + git_vector_insert(contents, ps); + } while (FindNextFileW(dir.h, &dir.f)); + + if (GetLastError() != ERROR_NO_MORE_FILES) { error = -1; - giterr_set(GITERR_OS, "Could not open directory '%s'", path); + giterr_set(GITERR_OS, "Could not get attributes for file in '%s'", path); goto clean_up_and_exit; } - memcpy(work_path, repo_path, repo_path_len); - - while (1) { - if (!git_path_is_dot_or_dotdotW(dir.f.cFileName)) { - path_len = git__utf16_to_8(work_path + repo_path_len, ARRAYSIZE(work_path) - repo_path_len, dir.f.cFileName); - - work_path[path_len + repo_path_len] = '\0'; - path_len = path_len + repo_path_len; - - cmp_len = min(start_len, path_len); - if (!(cmp_len && strncomp(work_path, start_stat, cmp_len) < 0)) { - cmp_len = min(end_len, path_len); - - if (!(cmp_len && strncomp(work_path, end_stat, cmp_len) > 0)) { - ps = git__calloc(1, sizeof(git_path_with_stat) + path_len + 2); - - if ((error = git_win32__file_attribute_to_stat(&ps->st, - (WIN32_FILE_ATTRIBUTE_DATA *)&dir.f, - NULL)) < 0) { - git__free(ps); - goto clean_up_and_exit; - } - - memcpy(ps->path, work_path, path_len + 1); - ps->path_len = path_len; - - if (S_ISDIR(ps->st.st_mode)) { - ps->path[ps->path_len++] = '/'; - ps->path[ps->path_len] = '\0'; - } else if (!S_ISREG(ps->st.st_mode) && !S_ISLNK(ps->st.st_mode)) { - git__free(ps); - ps = NULL; - } - - if (ps) - git_vector_insert(contents, ps); - } - } - } - - memset(&dir.f, 0, sizeof(git_path_with_stat)); - dir.first = 0; - - if (!FindNextFileW(dir.h, &dir.f)) { - if (GetLastError() == ERROR_NO_MORE_FILES) - break; - else { - error = -1; - giterr_set(GITERR_OS, "Could not get attributes for file in '%s'", path); - goto clean_up_and_exit; - } - } - } - /* sort now that directory suffix is added */ git_vector_sort(contents); From c074d7a4c5293b6b396b5ffa18273f1df94efaed Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Tue, 28 Apr 2015 12:24:08 -0400 Subject: [PATCH 06/16] win32: mimic git_path_dirload_with_stat closely --- src/win32/path_w32.c | 70 ++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 41 deletions(-) diff --git a/src/win32/path_w32.c b/src/win32/path_w32.c index eda6e2696..2bcf4a349 100644 --- a/src/win32/path_w32.c +++ b/src/win32/path_w32.c @@ -30,8 +30,6 @@ #define path__is_unc(p) \ (((p)[0] == '\\' && (p)[1] == '\\') || ((p)[0] == '/' && (p)[1] == '/')) -#define PATH__MAX_UNC_LEN (32767) - /* Using _FIND_FIRST_EX_LARGE_FETCH may increase performance in Windows 7 * and better. Prior versions will ignore this. */ @@ -318,20 +316,25 @@ GIT_INLINE(int) path_with_stat_alloc( git_path_with_stat **out, const char *parent_path, size_t parent_path_len, - const char *child_path, - size_t child_path_len, + const wchar_t *child_path_utf16, bool trailing_slash) { git_path_with_stat *ps; int inner_slash = (parent_path_len > 0 && parent_path[parent_path_len-1] != '/'); - size_t path_len, ps_size; + size_t path_len, child_path_len, ps_size; + + if ((child_path_len = git__utf16_to_8(NULL, 0, child_path_utf16)) < 0) { + giterr_set(GITERR_OS, "Could not convert path to UTF-8 (path too long?)"); + return -1; + } GITERR_CHECK_ALLOC_ADD(&path_len, parent_path_len, inner_slash); GITERR_CHECK_ALLOC_ADD(&path_len, path_len, child_path_len); GITERR_CHECK_ALLOC_ADD(&path_len, path_len, trailing_slash ? 1 : 0); GITERR_CHECK_ALLOC_ADD(&ps_size, sizeof(git_path_with_stat), path_len); + GITERR_CHECK_ALLOC_ADD(&ps_size, ps_size, 1); ps = git__calloc(1, ps_size); GITERR_CHECK_ALLOC(ps); @@ -342,7 +345,13 @@ GIT_INLINE(int) path_with_stat_alloc( if (inner_slash) ps->path[parent_path_len] = '/'; - memcpy(&ps->path[parent_path_len + inner_slash], child_path, child_path_len); + if (git__utf16_to_8( + &ps->path[parent_path_len + inner_slash], + child_path_len + 1, child_path_utf16) != child_path_len) { + git__free(ps); + giterr_set(GITERR_OS, "Could not convert path to UTF-8 (size changed)"); + return -1; + } if (trailing_slash) ps->path[path_len-1] = '/'; @@ -370,9 +379,8 @@ int git_win32_path_dirload_with_stat( size_t cmp_len; size_t start_len = start_stat ? strlen(start_stat) : 0; size_t end_len = end_stat ? strlen(end_stat) : 0; - char work_path[PATH__MAX_UNC_LEN]; const char *suffix; - size_t path_len, work_path_len, suffix_len; + size_t path_len, suffix_len; if (!git_win32__findfirstfile_filter(pathw, path)) { giterr_set(GITERR_OS, "Could not parse the path '%s'", path); @@ -387,9 +395,9 @@ int git_win32_path_dirload_with_stat( suffix = path + prefix_len; suffix_len = path_len - prefix_len; - /* use of FIND_FIRST_EX_LARGE_FETCH flag in the FindFirstFileExW call could benefit perormance - * here when querying large repositories on Windows 7 (0x0600) or newer versions of Windows. - * doing so could introduce compatibility issues on older versions of Windows. */ + /* We use FIND_FIRST_EX_LARGE_FETCH here for a minor perf bump; this + * flag should be ignored on previous version of Windows. + */ dir.h = FindFirstFileExW( pathw, FindExInfoBasic, @@ -408,46 +416,26 @@ int git_win32_path_dirload_with_stat( if (git_path_is_dot_or_dotdotW(dir.f.cFileName)) continue; - if ((work_path_len = git__utf16_to_8(work_path, PATH__MAX_UNC_LEN, dir.f.cFileName)) < 0) { - error = -1; - giterr_set(GITERR_OS, "Could not convert path to UTF-8 (path too long?)"); - goto clean_up_and_exit; - } - work_path[work_path_len] = '\0'; - - /* TODO: what about junctions to directories? */ if ((error = path_with_stat_alloc(&ps, - suffix, suffix_len, - work_path, work_path_len, + suffix, suffix_len, dir.f.cFileName, (dir.f.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0)) < 0) goto clean_up_and_exit; - /* skip if before start_stat or after end_stat */ - cmp_len = min(start_len, work_path_len); - if (cmp_len && strncomp(ps->path, start_stat, cmp_len) < 0) { - git__free(ps); - continue; - } + git_vector_insert(contents, ps); - cmp_len = min(end_len, work_path_len); - if (cmp_len && strncomp(ps->path, end_stat, cmp_len) > 0) { - git__free(ps); + /* skip stat if before start_stat or after end_stat */ + cmp_len = min(start_len, ps->path_len); + if (cmp_len && strncomp(ps->path, start_stat, cmp_len) < 0) + continue; + + cmp_len = min(end_len, ps->path_len); + if (cmp_len && strncomp(ps->path, end_stat, cmp_len) > 0) continue; - } if ((error = git_win32__file_attribute_to_stat(&ps->st, (WIN32_FILE_ATTRIBUTE_DATA *)&dir.f, - NULL)) < 0) { - git__free(ps); + NULL)) < 0) goto clean_up_and_exit; - } - - if (!S_ISDIR(ps->st.st_mode) && !S_ISREG(ps->st.st_mode) && !S_ISLNK(ps->st.st_mode)) { - git__free(ps); - continue; - } - - git_vector_insert(contents, ps); } while (FindNextFileW(dir.h, &dir.f)); if (GetLastError() != ERROR_NO_MORE_FILES) { From 544139f50bd7471a62135b29b6a1a2f7c64a1a1c Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Tue, 28 Apr 2015 16:39:47 -0400 Subject: [PATCH 07/16] win32: keep full path for realpath usage --- src/win32/path_w32.c | 42 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/src/win32/path_w32.c b/src/win32/path_w32.c index 2bcf4a349..3dcc19c69 100644 --- a/src/win32/path_w32.c +++ b/src/win32/path_w32.c @@ -362,6 +362,30 @@ GIT_INLINE(int) path_with_stat_alloc( return 0; } +GIT_INLINE(int) path_add_base( + git_win32_path out, + size_t dir_len, + wchar_t *base, + size_t base_len) +{ + size_t out_len; + + if (GIT_ADD_SIZET_OVERFLOW(&out_len, dir_len, base_len) < 0 || + GIT_ADD_SIZET_OVERFLOW(&out_len, out_len, 2) < 0) + return -1; + + if (out_len > GIT_WIN_PATH_UTF16) { + giterr_set(GITERR_FILESYSTEM, "invalid path '%.*ls\\%.*ls' (path too long)", dir_len, out, base_len, base); + return -1; + } + + out[dir_len] = '\\'; + memcpy(&out[dir_len+1], base, base_len * sizeof(wchar_t)); + out[out_len-1] = '\0'; + + return 0; +} + #if !defined(__MINGW32__) int git_win32_path_dirload_with_stat( const char *path, @@ -373,16 +397,17 @@ int git_win32_path_dirload_with_stat( { int error = 0; git_path_with_stat *ps; - git_win32_path pathw; + git_win32_path path_filter, file_path; DIR dir = {0}; int(*strncomp)(const char *a, const char *b, size_t sz); - size_t cmp_len; size_t start_len = start_stat ? strlen(start_stat) : 0; size_t end_len = end_stat ? strlen(end_stat) : 0; + size_t path_len, suffix_len, cmp_len; const char *suffix; - size_t path_len, suffix_len; + int root_len; - if (!git_win32__findfirstfile_filter(pathw, path)) { + if ((root_len = git_win32_path_from_utf8(file_path, path)) < 0 || + !git_win32__findfirstfile_filter(path_filter, path)) { giterr_set(GITERR_OS, "Could not parse the path '%s'", path); return -1; } @@ -399,7 +424,7 @@ int git_win32_path_dirload_with_stat( * flag should be ignored on previous version of Windows. */ dir.h = FindFirstFileExW( - pathw, + path_filter, FindExInfoBasic, &dir.f, FindExSearchNameMatch, @@ -416,7 +441,8 @@ int git_win32_path_dirload_with_stat( if (git_path_is_dot_or_dotdotW(dir.f.cFileName)) continue; - if ((error = path_with_stat_alloc(&ps, + if ((error = path_add_base(file_path, root_len, dir.f.cFileName, wcslen(dir.f.cFileName))) < 0 || + (error = path_with_stat_alloc(&ps, suffix, suffix_len, dir.f.cFileName, (dir.f.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0)) < 0) goto clean_up_and_exit; @@ -433,9 +459,9 @@ int git_win32_path_dirload_with_stat( continue; if ((error = git_win32__file_attribute_to_stat(&ps->st, - (WIN32_FILE_ATTRIBUTE_DATA *)&dir.f, - NULL)) < 0) + (WIN32_FILE_ATTRIBUTE_DATA *)&dir.f, file_path)) < 0) { goto clean_up_and_exit; + } } while (FindNextFileW(dir.h, &dir.f)); if (GetLastError() != ERROR_NO_MORE_FILES) { From edbfc52cdd8657371c53070c5e09b58e004bb67a Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Wed, 29 Apr 2015 11:05:27 -0400 Subject: [PATCH 08/16] git_path: introduce 'git_path_diriter' Introduce a new `git_path_diriter` that can iterate directories efficiently for each platform. --- src/path.c | 130 +++++++++++++++++++++++++++++++++++++++++++++++ src/path.h | 32 ++++++++++++ src/posix.h | 1 - src/unix/posix.h | 1 + 4 files changed, 163 insertions(+), 1 deletion(-) diff --git a/src/path.c b/src/path.c index 6a636bbd2..2390b4fd0 100644 --- a/src/path.c +++ b/src/path.c @@ -260,6 +260,20 @@ int git_path_root(const char *path) return -1; /* Not a real error - signals that path is not rooted */ } +void git_path_trim_slashes(git_buf *path) +{ + int ceiling = git_path_root(path->ptr) + 1; + assert(ceiling >= 0); + + while (path->size > (size_t)ceiling) { + if (path->ptr[path->size-1] != '/') + break; + + path->ptr[path->size-1] = '\0'; + path->size--; + } +} + int git_path_join_unrooted( git_buf *path_out, const char *path, const char *base, ssize_t *root_at) { @@ -1181,6 +1195,122 @@ int git_path_with_stat_cmp_icase(const void *a, const void *b) return strcasecmp(psa->path, psb->path); } +int git_path_diriter_init( + git_path_diriter *diriter, + const char *path, + unsigned int flags) +{ + assert(diriter && path); + + memset(diriter, 0, sizeof(git_path_diriter)); + + if (git_buf_puts(&diriter->path, path) < 0) + return -1; + + git_path_mkposix(diriter->path.ptr); + git_path_trim_slashes(&diriter->path); + + if ((diriter->dir = opendir(diriter->path.ptr)) == NULL) { + git_buf_free(&diriter->path); + + giterr_set(GITERR_OS, "Failed to open directory '%s'", path); + return -1; + } + +#ifdef GIT_USE_ICONV + if ((flags & GIT_PATH_DIR_PRECOMPOSE_UNICODE) != 0) + (void)git_path_iconv_init_precompose(&ic); +#endif + + diriter->parent_len = diriter->path.size; + diriter->flags = flags; + + return 0; +} + +int git_path_diriter_next( + const char **out, + size_t *out_len, + git_path_diriter *diriter) +{ + struct dirent *de; + const char *filename; + size_t filename_len; + bool skip_dot = !(diriter->flags & GIT_PATH_DIR_INCLUDE_DOT_AND_DOTDOT); + int error = 0; + + assert(out && out_len && diriter); + + *out = NULL; + *out_len = 0; + + errno = 0; + + do { + if ((de = readdir(diriter->dir)) == NULL) { + if (!errno) + return GIT_ITEROVER; + + giterr_set(GITERR_OS, + "Could not read directory '%s'", diriter->path); + return -1; + } + } while (skip_dot && git_path_is_dot_or_dotdot(de->d_name)); + + filename = de->d_name; + filename_len = strlen(filename); + +#ifdef GIT_USE_ICONV + if ((error = git_path_iconv(&diriter->ic, &filename, &filename_len)) < 0) + return error; +#endif + + git_buf_truncate(&diriter->path, diriter->parent_len); + git_buf_putc(&diriter->path, '/'); + git_buf_put(&diriter->path, filename, filename_len); + + if (git_buf_oom(&diriter->path)) + return -1; + + *out = &diriter->path.ptr[diriter->parent_len+1]; + *out_len = filename_len; + + return error; +} + +int git_path_diriter_fullpath( + const char **out, + size_t *out_len, + git_path_diriter *diriter) +{ + assert(out && out_len && diriter); + + *out = diriter->path.ptr; + *out_len = diriter->path.size; + return 0; +} + +int git_path_diriter_stat(struct stat *out, git_path_diriter *diriter) +{ + assert(out && diriter); + + return git_path_lstat(diriter->path.ptr, out); +} + +void git_path_diriter_free(git_path_diriter *diriter) +{ + if (diriter == NULL) + return; + + closedir(diriter->dir); + +#ifdef GIT_USE_ICONV + git_path_iconv_clear(&diriter->ic); +#endif + + git_buf_free(&diriter->path); +} + int git_path_dirload_with_stat( const char *path, size_t prefix_len, diff --git a/src/path.h b/src/path.h index 440b5420c..3a25d4aed 100644 --- a/src/path.h +++ b/src/path.h @@ -273,6 +273,7 @@ extern int git_path_apply_relative(git_buf *target, const char *relpath); enum { GIT_PATH_DIR_IGNORE_CASE = (1u << 0), GIT_PATH_DIR_PRECOMPOSE_UNICODE = (1u << 1), + GIT_PATH_DIR_INCLUDE_DOT_AND_DOTDOT = (1u << 2), }; /** @@ -326,6 +327,37 @@ extern int git_path_walk_up( int (*callback)(void *payload, const char *path), void *payload); +typedef struct git_path_diriter git_path_diriter; + +struct git_path_diriter +{ + git_buf path; + size_t parent_len; + + unsigned int flags; + + DIR *dir; +}; + +extern int git_path_diriter_init( + git_path_diriter *diriter, + const char *path, + unsigned int flags); + +extern int git_path_diriter_next( + const char **out, + size_t *out_len, + git_path_diriter *diriter); + +extern int git_path_diriter_fullpath( + const char **out, + size_t *out_len, + git_path_diriter *diriter); + +extern int git_path_diriter_stat(struct stat *out, git_path_diriter *diriter); + +extern void git_path_diriter_free(git_path_diriter *diriter); + /** * Load all directory entries (except '.' and '..') into a vector. * diff --git a/src/posix.h b/src/posix.h index 22f472c90..8785a4c99 100644 --- a/src/posix.h +++ b/src/posix.h @@ -122,7 +122,6 @@ extern int git__page_size(size_t *page_size); #include "strnlen.h" #ifdef NO_READDIR_R -# include GIT_INLINE(int) p_readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result) { GIT_UNUSED(entry); diff --git a/src/unix/posix.h b/src/unix/posix.h index e4f3ac67a..8b4f427f7 100644 --- a/src/unix/posix.h +++ b/src/unix/posix.h @@ -8,6 +8,7 @@ #define INCLUDE_posix__unix_h__ #include +#include #include typedef int GIT_SOCKET; From 07bbc045c77d47e8eb245d599f0f753ed62fea9b Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Wed, 29 Apr 2015 11:58:10 -0400 Subject: [PATCH 09/16] git_path_dirload: use git_path_diriter --- src/path.c | 187 ++++++++++++++++++++++++++----------------- src/path.h | 10 +-- tests/diff/drivers.c | 2 +- 3 files changed, 116 insertions(+), 83 deletions(-) diff --git a/src/path.c b/src/path.c index 2390b4fd0..72bc917f4 100644 --- a/src/path.c +++ b/src/path.c @@ -1109,80 +1109,6 @@ static int entry_path_alloc( return 0; } -int git_path_dirload( - const char *path, - size_t prefix_len, - size_t alloc_extra, - unsigned int flags, - git_vector *contents) -{ - int error; - DIR *dir; - size_t path_len; - path_dirent_data de_data; - struct dirent *de, *de_buf = (struct dirent *)&de_data; - -#ifdef GIT_USE_ICONV - git_path_iconv_t ic = GIT_PATH_ICONV_INIT; -#endif - - GIT_UNUSED(flags); - - assert(path && contents); - - path_len = strlen(path); - - if (!path_len || path_len < prefix_len) { - giterr_set(GITERR_INVALID, "Invalid directory path '%s'", path); - return -1; - } - if ((dir = opendir(path)) == NULL) { - giterr_set(GITERR_OS, "Failed to open directory '%s'", path); - return -1; - } - -#ifdef GIT_USE_ICONV - if ((flags & GIT_PATH_DIR_PRECOMPOSE_UNICODE) != 0) - (void)git_path_iconv_init_precompose(&ic); -#endif - - path += prefix_len; - path_len -= prefix_len; - - while ((error = p_readdir_r(dir, de_buf, &de)) == 0 && de != NULL) { - char *entry_path, *de_path = de->d_name; - size_t de_len = strlen(de_path); - - if (git_path_is_dot_or_dotdot(de_path)) - continue; - -#ifdef GIT_USE_ICONV - if ((error = git_path_iconv(&ic, &de_path, &de_len)) < 0) - break; -#endif - - if ((error = entry_path_alloc(&entry_path, - path, path_len, de_path, de_len, alloc_extra)) < 0) - break; - - if ((error = git_vector_insert(contents, entry_path)) < 0) { - git__free(entry_path); - break; - } - } - - closedir(dir); - -#ifdef GIT_USE_ICONV - git_path_iconv_clear(&ic); -#endif - - if (error != 0) - giterr_set(GITERR_OS, "Failed to process directory entry in '%s'", path); - - return error; -} - int git_path_with_stat_cmp(const void *a, const void *b) { const git_path_with_stat *psa = a, *psb = b; @@ -1311,6 +1237,117 @@ void git_path_diriter_free(git_path_diriter *diriter) git_buf_free(&diriter->path); } +int git_path_dirload( + git_vector *contents, + const char *path, + size_t prefix_len, + unsigned int flags) +{ + git_path_diriter iter = {0}; + const char *name; + size_t name_len; + char *dup; + int error; + + assert(contents && path); + + if ((error = git_path_diriter_init(&iter, path, flags)) < 0) + return error; + + while ((error = git_path_diriter_next(&name, &name_len, &iter)) == 0) { + if ((error = git_path_diriter_fullpath(&name, &name_len, &iter)) < 0) + break; + + assert(name_len > prefix_len); + + dup = git__strndup(name + prefix_len, name_len - prefix_len); + GITERR_CHECK_ALLOC(dup); + + if ((error = git_vector_insert(contents, dup)) < 0) + break; + } + + if (error == GIT_ITEROVER) + error = 0; + + git_path_diriter_free(&iter); + return error; +} + +static int _dirload( + const char *path, + size_t prefix_len, + size_t alloc_extra, + unsigned int flags, + git_vector *contents) +{ + int error; + DIR *dir; + size_t path_len; + path_dirent_data de_data; + struct dirent *de, *de_buf = (struct dirent *)&de_data; + +#ifdef GIT_USE_ICONV + git_path_iconv_t ic = GIT_PATH_ICONV_INIT; +#endif + + GIT_UNUSED(flags); + + assert(path && contents); + + path_len = strlen(path); + + if (!path_len || path_len < prefix_len) { + giterr_set(GITERR_INVALID, "Invalid directory path '%s'", path); + return -1; + } + if ((dir = opendir(path)) == NULL) { + giterr_set(GITERR_OS, "Failed to open directory '%s'", path); + return -1; + } + +#ifdef GIT_USE_ICONV + if ((flags & GIT_PATH_DIR_PRECOMPOSE_UNICODE) != 0) + (void)git_path_iconv_init_precompose(&ic); +#endif + + path += prefix_len; + path_len -= prefix_len; + + while ((error = p_readdir_r(dir, de_buf, &de)) == 0 && de != NULL) { + char *entry_path, *de_path = de->d_name; + size_t de_len = strlen(de_path); + + if (git_path_is_dot_or_dotdot(de_path)) + continue; + +#ifdef GIT_USE_ICONV + if ((error = git_path_iconv(&ic, &de_path, &de_len)) < 0) + break; +#endif + + if ((error = entry_path_alloc(&entry_path, + path, path_len, de_path, de_len, alloc_extra)) < 0) + break; + + if ((error = git_vector_insert(contents, entry_path)) < 0) { + git__free(entry_path); + break; + } + } + + closedir(dir); + +#ifdef GIT_USE_ICONV + git_path_iconv_clear(&ic); +#endif + + if (error != 0) + giterr_set(GITERR_OS, "Failed to process directory entry in '%s'", path); + + return error; +} + int git_path_dirload_with_stat( const char *path, size_t prefix_len, @@ -1330,7 +1367,7 @@ int git_path_dirload_with_stat( if (git_buf_set(&full, path, prefix_len) < 0) return -1; - error = git_path_dirload( + error = _dirload( path, prefix_len, sizeof(git_path_with_stat) + 1, flags, contents); if (error < 0) { git_buf_free(&full); diff --git a/src/path.h b/src/path.h index 3a25d4aed..ff743f626 100644 --- a/src/path.h +++ b/src/path.h @@ -366,22 +366,18 @@ extern void git_path_diriter_free(git_path_diriter *diriter); * of strings. That vector can then be sorted, iterated, or whatever. * Remember to free alloc of the allocated strings when you are done. * + * @param contents Vector to fill with directory entry names. * @param path The directory to read from. * @param prefix_len When inserting entries, the trailing part of path * will be prefixed after this length. I.e. given path "/a/b" and * prefix_len 3, the entries will look like "b/e1", "b/e2", etc. - * @param alloc_extra Extra bytes to add to each string allocation in - * case you want to append anything funny. * @param flags Combination of GIT_PATH_DIR flags. - * @param contents Vector to fill with directory entry names. */ extern int git_path_dirload( + git_vector *contents, const char *path, size_t prefix_len, - size_t alloc_extra, - uint32_t flags, - git_vector *contents); - + uint32_t flags); typedef struct { struct stat st; diff --git a/tests/diff/drivers.c b/tests/diff/drivers.c index 8b12368ea..e3a0014db 100644 --- a/tests/diff/drivers.c +++ b/tests/diff/drivers.c @@ -186,7 +186,7 @@ void test_diff_drivers__builtins(void) g_repo = cl_git_sandbox_init("userdiff"); - cl_git_pass(git_path_dirload("userdiff/files", 9, 0, 0, &files)); + cl_git_pass(git_path_dirload(&files, "userdiff/files", 9, 0)); opts.interhunk_lines = 1; opts.context_lines = 1; From 35c1d20750cab4c3c1f86d1668bdb95213a25c4d Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Wed, 29 Apr 2015 14:03:20 -0400 Subject: [PATCH 10/16] git_win32_path_dirload_with_stat: removed --- src/iterator.c | 17 +---- src/win32/path_w32.c | 169 ------------------------------------------- src/win32/path_w32.h | 24 ------ 3 files changed, 1 insertion(+), 209 deletions(-) diff --git a/src/iterator.c b/src/iterator.c index 80b7d5faa..93815b478 100644 --- a/src/iterator.c +++ b/src/iterator.c @@ -984,21 +984,6 @@ static void fs_iterator__seek_frame_start( ff->index = 0; } -GIT_INLINE(int) path_dirload_with_stat( - const char *path, - size_t prefix_len, - unsigned int flags, - const char *start_stat, - const char *end_stat, - git_vector *contents) -{ -#if defined(GIT_WIN32) && !defined(__MINGW32__) - return git_win32_path_dirload_with_stat(path, prefix_len, flags, start_stat, end_stat, contents); -#else - return git_path_dirload_with_stat(path, prefix_len, flags, start_stat, end_stat, contents); -#endif -} - static int fs_iterator__expand_dir(fs_iterator *fi) { int error; @@ -1013,7 +998,7 @@ static int fs_iterator__expand_dir(fs_iterator *fi) ff = fs_iterator__alloc_frame(fi); GITERR_CHECK_ALLOC(ff); - error = path_dirload_with_stat( + error = git_path_dirload_with_stat( fi->path.ptr, fi->root_len, fi->dirload_flags, fi->base.start, fi->base.end, &ff->entries); diff --git a/src/win32/path_w32.c b/src/win32/path_w32.c index 3dcc19c69..c145379f7 100644 --- a/src/win32/path_w32.c +++ b/src/win32/path_w32.c @@ -312,175 +312,6 @@ char *git_win32_path_8dot3_name(const char *path) return shortname; } -GIT_INLINE(int) path_with_stat_alloc( - git_path_with_stat **out, - const char *parent_path, - size_t parent_path_len, - const wchar_t *child_path_utf16, - bool trailing_slash) -{ - git_path_with_stat *ps; - int inner_slash = - (parent_path_len > 0 && parent_path[parent_path_len-1] != '/'); - size_t path_len, child_path_len, ps_size; - - if ((child_path_len = git__utf16_to_8(NULL, 0, child_path_utf16)) < 0) { - giterr_set(GITERR_OS, "Could not convert path to UTF-8 (path too long?)"); - return -1; - } - - GITERR_CHECK_ALLOC_ADD(&path_len, parent_path_len, inner_slash); - GITERR_CHECK_ALLOC_ADD(&path_len, path_len, child_path_len); - GITERR_CHECK_ALLOC_ADD(&path_len, path_len, trailing_slash ? 1 : 0); - - GITERR_CHECK_ALLOC_ADD(&ps_size, sizeof(git_path_with_stat), path_len); - GITERR_CHECK_ALLOC_ADD(&ps_size, ps_size, 1); - - ps = git__calloc(1, ps_size); - GITERR_CHECK_ALLOC(ps); - - if (parent_path_len) - memcpy(ps->path, parent_path, parent_path_len); - - if (inner_slash) - ps->path[parent_path_len] = '/'; - - if (git__utf16_to_8( - &ps->path[parent_path_len + inner_slash], - child_path_len + 1, child_path_utf16) != child_path_len) { - git__free(ps); - giterr_set(GITERR_OS, "Could not convert path to UTF-8 (size changed)"); - return -1; - } - - if (trailing_slash) - ps->path[path_len-1] = '/'; - - ps->path_len = path_len; - - *out = ps; - return 0; -} - -GIT_INLINE(int) path_add_base( - git_win32_path out, - size_t dir_len, - wchar_t *base, - size_t base_len) -{ - size_t out_len; - - if (GIT_ADD_SIZET_OVERFLOW(&out_len, dir_len, base_len) < 0 || - GIT_ADD_SIZET_OVERFLOW(&out_len, out_len, 2) < 0) - return -1; - - if (out_len > GIT_WIN_PATH_UTF16) { - giterr_set(GITERR_FILESYSTEM, "invalid path '%.*ls\\%.*ls' (path too long)", dir_len, out, base_len, base); - return -1; - } - - out[dir_len] = '\\'; - memcpy(&out[dir_len+1], base, base_len * sizeof(wchar_t)); - out[out_len-1] = '\0'; - - return 0; -} - -#if !defined(__MINGW32__) -int git_win32_path_dirload_with_stat( - const char *path, - size_t prefix_len, - unsigned int flags, - const char *start_stat, - const char *end_stat, - git_vector *contents) -{ - int error = 0; - git_path_with_stat *ps; - git_win32_path path_filter, file_path; - DIR dir = {0}; - int(*strncomp)(const char *a, const char *b, size_t sz); - size_t start_len = start_stat ? strlen(start_stat) : 0; - size_t end_len = end_stat ? strlen(end_stat) : 0; - size_t path_len, suffix_len, cmp_len; - const char *suffix; - int root_len; - - if ((root_len = git_win32_path_from_utf8(file_path, path)) < 0 || - !git_win32__findfirstfile_filter(path_filter, path)) { - giterr_set(GITERR_OS, "Could not parse the path '%s'", path); - return -1; - } - - strncomp = (flags & GIT_PATH_DIR_IGNORE_CASE) != 0 ? - git__strncasecmp : git__strncmp; - - path_len = strlen(path); - - suffix = path + prefix_len; - suffix_len = path_len - prefix_len; - - /* We use FIND_FIRST_EX_LARGE_FETCH here for a minor perf bump; this - * flag should be ignored on previous version of Windows. - */ - dir.h = FindFirstFileExW( - path_filter, - FindExInfoBasic, - &dir.f, - FindExSearchNameMatch, - NULL, - _FIND_FIRST_EX_LARGE_FETCH); - - if (dir.h == INVALID_HANDLE_VALUE) { - error = -1; - giterr_set(GITERR_OS, "Could not open directory '%s'", path); - goto clean_up_and_exit; - } - - do { - if (git_path_is_dot_or_dotdotW(dir.f.cFileName)) - continue; - - if ((error = path_add_base(file_path, root_len, dir.f.cFileName, wcslen(dir.f.cFileName))) < 0 || - (error = path_with_stat_alloc(&ps, - suffix, suffix_len, dir.f.cFileName, - (dir.f.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0)) < 0) - goto clean_up_and_exit; - - git_vector_insert(contents, ps); - - /* skip stat if before start_stat or after end_stat */ - cmp_len = min(start_len, ps->path_len); - if (cmp_len && strncomp(ps->path, start_stat, cmp_len) < 0) - continue; - - cmp_len = min(end_len, ps->path_len); - if (cmp_len && strncomp(ps->path, end_stat, cmp_len) > 0) - continue; - - if ((error = git_win32__file_attribute_to_stat(&ps->st, - (WIN32_FILE_ATTRIBUTE_DATA *)&dir.f, file_path)) < 0) { - goto clean_up_and_exit; - } - } while (FindNextFileW(dir.h, &dir.f)); - - if (GetLastError() != ERROR_NO_MORE_FILES) { - error = -1; - giterr_set(GITERR_OS, "Could not get attributes for file in '%s'", path); - goto clean_up_and_exit; - } - - /* sort now that directory suffix is added */ - git_vector_sort(contents); - -clean_up_and_exit: - if (dir.h != INVALID_HANDLE_VALUE) - FindClose(dir.h); - - return error; -} -#endif - static bool path_is_volume(wchar_t *target, size_t target_len) { return (target_len && wcsncmp(target, L"\\??\\Volume{", 11) == 0); diff --git a/src/win32/path_w32.h b/src/win32/path_w32.h index 57ce732d6..3d9f82860 100644 --- a/src/win32/path_w32.h +++ b/src/win32/path_w32.h @@ -80,30 +80,6 @@ extern int git_win32_path_to_utf8(git_win32_utf8_path dest, const wchar_t *src); */ extern char *git_win32_path_8dot3_name(const char *path); -#if !defined(__MINGW32__) -/** - * Load all directory entries along with stat info into a vector. - * Performed in a single pass per directory for optimized performance on Windows. - * - * This adds four things on top of plain `git_path_dirload`: - * - * 1. Each entry in the vector is a `git_path_with_stat` struct that - * contains both the path and the stat info - * 2. The entries will be sorted alphabetically - * 3. Entries that are directories will be suffixed with a '/' - * 4. Optionally, you can be a start and end prefix and only elements - * after the start and before the end (inclusively) will be stat'ed. - * - * @param path The directory to read from - * @param prefix_len The trailing part of path to prefix to entry paths - * @param flags GIT_PATH_DIR flags from above - * @param start_stat As optimization, only stat values after this prefix - * @param end_stat As optimization, only stat values before this prefix - * @param contents Vector to fill with git_path_with_stat structures - */ -extern int git_win32_path_dirload_with_stat(const char *path, size_t prefix_len, unsigned int flags, const char *start_stat, const char *end_stat, git_vector *contents); -#endif - extern int git_win32_path_readlink_w(git_win32_path dest, const git_win32_path path); #endif From ba8ef18a53b89c02df012cb80b01e195f478aada Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Wed, 29 Apr 2015 13:28:29 -0400 Subject: [PATCH 11/16] git_path_dirload_with_stat: use git_path_diriter --- src/path.c | 174 ++++++++++++++++------------------------------------- 1 file changed, 51 insertions(+), 123 deletions(-) diff --git a/src/path.c b/src/path.c index 72bc917f4..7285acc62 100644 --- a/src/path.c +++ b/src/path.c @@ -1228,7 +1228,10 @@ void git_path_diriter_free(git_path_diriter *diriter) if (diriter == NULL) return; - closedir(diriter->dir); + if (diriter->dir) { + closedir(diriter->dir); + diriter->dir = NULL; + } #ifdef GIT_USE_ICONV git_path_iconv_clear(&diriter->ic); @@ -1274,162 +1277,87 @@ int git_path_dirload( return error; } -static int _dirload( - const char *path, - size_t prefix_len, - size_t alloc_extra, - unsigned int flags, - git_vector *contents) -{ - int error; - DIR *dir; - size_t path_len; - path_dirent_data de_data; - struct dirent *de, *de_buf = (struct dirent *)&de_data; - -#ifdef GIT_USE_ICONV - git_path_iconv_t ic = GIT_PATH_ICONV_INIT; -#endif - - GIT_UNUSED(flags); - - assert(path && contents); - - path_len = strlen(path); - - if (!path_len || path_len < prefix_len) { - giterr_set(GITERR_INVALID, "Invalid directory path '%s'", path); - return -1; - } - if ((dir = opendir(path)) == NULL) { - giterr_set(GITERR_OS, "Failed to open directory '%s'", path); - return -1; - } - -#ifdef GIT_USE_ICONV - if ((flags & GIT_PATH_DIR_PRECOMPOSE_UNICODE) != 0) - (void)git_path_iconv_init_precompose(&ic); -#endif - - path += prefix_len; - path_len -= prefix_len; - - while ((error = p_readdir_r(dir, de_buf, &de)) == 0 && de != NULL) { - char *entry_path, *de_path = de->d_name; - size_t de_len = strlen(de_path); - - if (git_path_is_dot_or_dotdot(de_path)) - continue; - -#ifdef GIT_USE_ICONV - if ((error = git_path_iconv(&ic, &de_path, &de_len)) < 0) - break; -#endif - - if ((error = entry_path_alloc(&entry_path, - path, path_len, de_path, de_len, alloc_extra)) < 0) - break; - - if ((error = git_vector_insert(contents, entry_path)) < 0) { - git__free(entry_path); - break; - } - } - - closedir(dir); - -#ifdef GIT_USE_ICONV - git_path_iconv_clear(&ic); -#endif - - if (error != 0) - giterr_set(GITERR_OS, "Failed to process directory entry in '%s'", path); - - return error; -} - int git_path_dirload_with_stat( - const char *path, + const char *dirpath, size_t prefix_len, unsigned int flags, const char *start_stat, const char *end_stat, git_vector *contents) { - int error; - unsigned int i; - git_path_with_stat *ps; - git_buf full = GIT_BUF_INIT; + git_path_diriter diriter = {0}; + const char *path; int (*strncomp)(const char *a, const char *b, size_t sz); size_t start_len = start_stat ? strlen(start_stat) : 0; - size_t end_len = end_stat ? strlen(end_stat) : 0, cmp_len; - - if (git_buf_set(&full, path, prefix_len) < 0) - return -1; - - error = _dirload( - path, prefix_len, sizeof(git_path_with_stat) + 1, flags, contents); - if (error < 0) { - git_buf_free(&full); - return error; - } + size_t end_len = end_stat ? strlen(end_stat) : 0; + git_path_with_stat *ps; + size_t path_len, cmp_len, ps_size; + int error; strncomp = (flags & GIT_PATH_DIR_IGNORE_CASE) != 0 ? git__strncasecmp : git__strncmp; - /* stat struct at start of git_path_with_stat, so shift path text */ - git_vector_foreach(contents, i, ps) { - size_t path_len = strlen((char *)ps); - memmove(ps->path, ps, path_len + 1); - ps->path_len = path_len; - } + if ((error = git_path_diriter_init(&diriter, dirpath, flags)) < 0) + goto done; + + while ((error = git_path_diriter_next(&path, &path_len, &diriter)) == 0) { + if ((error = git_path_diriter_fullpath(&path, &path_len, &diriter)) < 0) + goto done; + + assert(path_len > prefix_len); + + /* remove the prefix if requested */ + path += prefix_len; + path_len -= prefix_len; - git_vector_foreach(contents, i, ps) { /* skip if before start_stat or after end_stat */ - cmp_len = min(start_len, ps->path_len); - if (cmp_len && strncomp(ps->path, start_stat, cmp_len) < 0) + cmp_len = min(start_len, path_len); + if (cmp_len && strncomp(path, start_stat, cmp_len) < 0) continue; - cmp_len = min(end_len, ps->path_len); - if (cmp_len && strncomp(ps->path, end_stat, cmp_len) > 0) + cmp_len = min(end_len, path_len); + if (cmp_len && strncomp(path, end_stat, cmp_len) > 0) continue; - git_buf_truncate(&full, prefix_len); + GITERR_CHECK_ALLOC_ADD(&ps_size, sizeof(git_path_with_stat), path_len); + GITERR_CHECK_ALLOC_ADD(&ps_size, ps_size, 2); - if ((error = git_buf_joinpath(&full, full.ptr, ps->path)) < 0 || - (error = git_path_lstat(full.ptr, &ps->st)) < 0) { + ps = git__calloc(1, ps_size); + ps->path_len = path_len; + memcpy(ps->path, path, path_len); + + if ((error = git_path_diriter_stat(&ps->st, &diriter)) < 0) { if (error == GIT_ENOTFOUND) { /* file was removed between readdir and lstat */ - char *entry_path = git_vector_get(contents, i); - git_vector_remove(contents, i--); - git__free(entry_path); - } else { - /* Treat the file as unreadable if we get any other error */ - memset(&ps->st, 0, sizeof(ps->st)); - ps->st.st_mode = GIT_FILEMODE_UNREADABLE; + git__free(ps); + continue; } + /* Treat the file as unreadable if we get any other error */ + memset(&ps->st, 0, sizeof(ps->st)); + ps->st.st_mode = GIT_FILEMODE_UNREADABLE; + giterr_clear(); error = 0; - continue; - } - - if (S_ISDIR(ps->st.st_mode)) { + } else if (S_ISDIR(ps->st.st_mode)) { + /* Suffix directory paths with a '/' */ ps->path[ps->path_len++] = '/'; ps->path[ps->path_len] = '\0'; + } else if(!S_ISREG(ps->st.st_mode) && !S_ISLNK(ps->st.st_mode)) { + /* Ignore wacky things in the filesystem */ } - else if (!S_ISREG(ps->st.st_mode) && !S_ISLNK(ps->st.st_mode)) { - char *entry_path = git_vector_get(contents, i); - git_vector_remove(contents, i--); - git__free(entry_path); - } + + git_vector_insert(contents, ps); } + if (error == GIT_ITEROVER) + error = 0; + /* sort now that directory suffix is added */ git_vector_sort(contents); - git_buf_free(&full); - +done: + git_path_diriter_free(&diriter); return error; } From 7ef005f165518a9f76774c392fa2895dc1b34c96 Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Wed, 29 Apr 2015 14:04:01 -0400 Subject: [PATCH 12/16] git_path_dirload_with_stat: moved to fs_iterator --- src/iterator.c | 123 ++++++++++++++++++++++++++++++++++++++++++++--- src/path.c | 127 ------------------------------------------------- src/path.h | 36 -------------- 3 files changed, 116 insertions(+), 170 deletions(-) diff --git a/src/iterator.c b/src/iterator.c index 93815b478..7e89b77cc 100644 --- a/src/iterator.c +++ b/src/iterator.c @@ -920,12 +920,31 @@ struct fs_iterator { #define FS_MAX_DEPTH 100 +typedef struct { + struct stat st; + size_t path_len; + char path[GIT_FLEX_ARRAY]; +} fs_iterator_path_with_stat; + +static int fs_iterator_path_with_stat_cmp(const void *a, const void *b) +{ + const fs_iterator_path_with_stat *psa = a, *psb = b; + return strcmp(psa->path, psb->path); +} + +static int fs_iterator_path_with_stat_cmp_icase(const void *a, const void *b) +{ + const fs_iterator_path_with_stat *psa = a, *psb = b; + return strcasecmp(psa->path, psb->path); +} + static fs_iterator_frame *fs_iterator__alloc_frame(fs_iterator *fi) { fs_iterator_frame *ff = git__calloc(1, sizeof(fs_iterator_frame)); git_vector_cmp entry_compare = CASESELECT( iterator__ignore_case(fi), - git_path_with_stat_cmp_icase, git_path_with_stat_cmp); + fs_iterator_path_with_stat_cmp_icase, + fs_iterator_path_with_stat_cmp); if (ff && git_vector_init(&ff->entries, 0, entry_compare) < 0) { git__free(ff); @@ -967,7 +986,7 @@ static int fs_iterator__advance_over( static int fs_iterator__entry_cmp(const void *i, const void *item) { const fs_iterator *fi = (const fs_iterator *)i; - const git_path_with_stat *ps = item; + const fs_iterator_path_with_stat *ps = item; return fi->base.prefixcomp(fi->base.start, ps->path); } @@ -984,6 +1003,96 @@ static void fs_iterator__seek_frame_start( ff->index = 0; } +static int dirload_with_stat( + const char *dirpath, + size_t prefix_len, + unsigned int flags, + const char *start_stat, + const char *end_stat, + git_vector *contents) +{ + git_path_diriter diriter = {0}; + const char *path; + int (*strncomp)(const char *a, const char *b, size_t sz); + size_t start_len = start_stat ? strlen(start_stat) : 0; + size_t end_len = end_stat ? strlen(end_stat) : 0; + fs_iterator_path_with_stat *ps; + size_t path_len, cmp_len, ps_size; + int error; + + strncomp = (flags & GIT_PATH_DIR_IGNORE_CASE) != 0 ? + git__strncasecmp : git__strncmp; + + if ((error = git_path_diriter_init(&diriter, dirpath, flags)) < 0) + goto done; + + while ((error = git_path_diriter_next(&path, &path_len, &diriter)) == 0) { + if ((error = git_path_diriter_fullpath(&path, &path_len, &diriter)) < 0) + goto done; + + assert(path_len > prefix_len); + + /* remove the prefix if requested */ + path += prefix_len; + path_len -= prefix_len; + + /* skip if before start_stat or after end_stat */ + cmp_len = min(start_len, path_len); + if (cmp_len && strncomp(path, start_stat, cmp_len) < 0) + continue; + cmp_len = min(end_len, path_len); + if (cmp_len && strncomp(path, end_stat, cmp_len) > 0) + continue; + + /* Make sure to append two bytes, one for the path's null + * termination, one for a possible trailing '/' for folders. + */ + GITERR_CHECK_ALLOC_ADD(&ps_size, sizeof(fs_iterator_path_with_stat), path_len); + GITERR_CHECK_ALLOC_ADD(&ps_size, ps_size, 2); + + ps = git__calloc(1, ps_size); + ps->path_len = path_len; + + memcpy(ps->path, path, path_len); + + if ((error = git_path_diriter_stat(&ps->st, &diriter)) < 0) { + if (error == GIT_ENOTFOUND) { + /* file was removed between readdir and lstat */ + git__free(ps); + continue; + } + + /* Treat the file as unreadable if we get any other error */ + memset(&ps->st, 0, sizeof(ps->st)); + ps->st.st_mode = GIT_FILEMODE_UNREADABLE; + + giterr_clear(); + error = 0; + } else if (S_ISDIR(ps->st.st_mode)) { + /* Suffix directory paths with a '/' */ + ps->path[ps->path_len++] = '/'; + ps->path[ps->path_len] = '\0'; + } else if(!S_ISREG(ps->st.st_mode) && !S_ISLNK(ps->st.st_mode)) { + /* Ignore wacky things in the filesystem */ + git__free(ps); + continue; + } + + git_vector_insert(contents, ps); + } + + if (error == GIT_ITEROVER) + error = 0; + + /* sort now that directory suffix is added */ + git_vector_sort(contents); + +done: + git_path_diriter_free(&diriter); + return error; +} + + static int fs_iterator__expand_dir(fs_iterator *fi) { int error; @@ -998,7 +1107,7 @@ static int fs_iterator__expand_dir(fs_iterator *fi) ff = fs_iterator__alloc_frame(fi); GITERR_CHECK_ALLOC(ff); - error = git_path_dirload_with_stat( + error = dirload_with_stat( fi->path.ptr, fi->root_len, fi->dirload_flags, fi->base.start, fi->base.end, &ff->entries); @@ -1086,7 +1195,7 @@ static int fs_iterator__advance_over( int error = 0; fs_iterator *fi = (fs_iterator *)self; fs_iterator_frame *ff; - git_path_with_stat *next; + fs_iterator_path_with_stat *next; if (entry != NULL) *entry = NULL; @@ -1176,7 +1285,7 @@ static void fs_iterator__free(git_iterator *self) static int fs_iterator__update_entry(fs_iterator *fi) { - git_path_with_stat *ps; + fs_iterator_path_with_stat *ps; memset(&fi->entry, 0, sizeof(fi->entry)); @@ -1307,7 +1416,7 @@ GIT_INLINE(bool) workdir_path_is_dotgit(const git_buf *path) * We consider it a submodule if the path is listed as a submodule in * either the tree or the index. */ -static int is_submodule(workdir_iterator *wi, git_path_with_stat *ie) +static int is_submodule(workdir_iterator *wi, fs_iterator_path_with_stat *ie) { int error, is_submodule = 0; @@ -1360,7 +1469,7 @@ static int workdir_iterator__enter_dir(fs_iterator *fi) workdir_iterator *wi = (workdir_iterator *)fi; fs_iterator_frame *ff = fi->stack; size_t pos; - git_path_with_stat *entry; + fs_iterator_path_with_stat *entry; bool found_submodules = false; git_dir_flag dir_flag = git_entry__dir_flag(&fi->entry); diff --git a/src/path.c b/src/path.c index 7285acc62..d8f3c234e 100644 --- a/src/path.c +++ b/src/path.c @@ -1078,49 +1078,6 @@ int git_path_direach( return error; } -static int entry_path_alloc( - char **out, - const char *path, - size_t path_len, - const char *de_path, - size_t de_len, - size_t alloc_extra) -{ - int need_slash = (path_len > 0 && path[path_len-1] != '/') ? 1 : 0; - size_t alloc_size; - char *entry_path; - - GITERR_CHECK_ALLOC_ADD(&alloc_size, path_len, de_len); - GITERR_CHECK_ALLOC_ADD(&alloc_size, alloc_size, need_slash); - GITERR_CHECK_ALLOC_ADD(&alloc_size, alloc_size, 1); - GITERR_CHECK_ALLOC_ADD(&alloc_size, alloc_size, alloc_extra); - entry_path = git__calloc(1, alloc_size); - GITERR_CHECK_ALLOC(entry_path); - - if (path_len) - memcpy(entry_path, path, path_len); - - if (need_slash) - entry_path[path_len] = '/'; - - memcpy(&entry_path[path_len + need_slash], de_path, de_len); - - *out = entry_path; - return 0; -} - -int git_path_with_stat_cmp(const void *a, const void *b) -{ - const git_path_with_stat *psa = a, *psb = b; - return strcmp(psa->path, psb->path); -} - -int git_path_with_stat_cmp_icase(const void *a, const void *b) -{ - const git_path_with_stat *psa = a, *psb = b; - return strcasecmp(psa->path, psb->path); -} - int git_path_diriter_init( git_path_diriter *diriter, const char *path, @@ -1277,90 +1234,6 @@ int git_path_dirload( return error; } -int git_path_dirload_with_stat( - const char *dirpath, - size_t prefix_len, - unsigned int flags, - const char *start_stat, - const char *end_stat, - git_vector *contents) -{ - git_path_diriter diriter = {0}; - const char *path; - int (*strncomp)(const char *a, const char *b, size_t sz); - size_t start_len = start_stat ? strlen(start_stat) : 0; - size_t end_len = end_stat ? strlen(end_stat) : 0; - git_path_with_stat *ps; - size_t path_len, cmp_len, ps_size; - int error; - - strncomp = (flags & GIT_PATH_DIR_IGNORE_CASE) != 0 ? - git__strncasecmp : git__strncmp; - - if ((error = git_path_diriter_init(&diriter, dirpath, flags)) < 0) - goto done; - - while ((error = git_path_diriter_next(&path, &path_len, &diriter)) == 0) { - if ((error = git_path_diriter_fullpath(&path, &path_len, &diriter)) < 0) - goto done; - - assert(path_len > prefix_len); - - /* remove the prefix if requested */ - path += prefix_len; - path_len -= prefix_len; - - /* skip if before start_stat or after end_stat */ - cmp_len = min(start_len, path_len); - if (cmp_len && strncomp(path, start_stat, cmp_len) < 0) - continue; - cmp_len = min(end_len, path_len); - if (cmp_len && strncomp(path, end_stat, cmp_len) > 0) - continue; - - GITERR_CHECK_ALLOC_ADD(&ps_size, sizeof(git_path_with_stat), path_len); - GITERR_CHECK_ALLOC_ADD(&ps_size, ps_size, 2); - - ps = git__calloc(1, ps_size); - ps->path_len = path_len; - - memcpy(ps->path, path, path_len); - - if ((error = git_path_diriter_stat(&ps->st, &diriter)) < 0) { - if (error == GIT_ENOTFOUND) { - /* file was removed between readdir and lstat */ - git__free(ps); - continue; - } - - /* Treat the file as unreadable if we get any other error */ - memset(&ps->st, 0, sizeof(ps->st)); - ps->st.st_mode = GIT_FILEMODE_UNREADABLE; - - giterr_clear(); - error = 0; - } else if (S_ISDIR(ps->st.st_mode)) { - /* Suffix directory paths with a '/' */ - ps->path[ps->path_len++] = '/'; - ps->path[ps->path_len] = '\0'; - } else if(!S_ISREG(ps->st.st_mode) && !S_ISLNK(ps->st.st_mode)) { - /* Ignore wacky things in the filesystem */ - } - - git_vector_insert(contents, ps); - } - - if (error == GIT_ITEROVER) - error = 0; - - /* sort now that directory suffix is added */ - git_vector_sort(contents); - -done: - git_path_diriter_free(&diriter); - return error; -} - int git_path_from_url_or_path(git_buf *local_path_out, const char *url_or_path) { if (git_path_is_local_file_url(url_or_path)) diff --git a/src/path.h b/src/path.h index ff743f626..4900dceb0 100644 --- a/src/path.h +++ b/src/path.h @@ -379,42 +379,6 @@ extern int git_path_dirload( size_t prefix_len, uint32_t flags); -typedef struct { - struct stat st; - size_t path_len; - char path[GIT_FLEX_ARRAY]; -} git_path_with_stat; - -extern int git_path_with_stat_cmp(const void *a, const void *b); -extern int git_path_with_stat_cmp_icase(const void *a, const void *b); - -/** - * Load all directory entries along with stat info into a vector. - * - * This adds four things on top of plain `git_path_dirload`: - * - * 1. Each entry in the vector is a `git_path_with_stat` struct that - * contains both the path and the stat info - * 2. The entries will be sorted alphabetically - * 3. Entries that are directories will be suffixed with a '/' - * 4. Optionally, you can be a start and end prefix and only elements - * after the start and before the end (inclusively) will be stat'ed. - * - * @param path The directory to read from - * @param prefix_len The trailing part of path to prefix to entry paths - * @param flags GIT_PATH_DIR flags from above - * @param start_stat As optimization, only stat values after this prefix - * @param end_stat As optimization, only stat values before this prefix - * @param contents Vector to fill with git_path_with_stat structures - */ -extern int git_path_dirload_with_stat( - const char *path, - size_t prefix_len, - uint32_t flags, - const char *start_stat, - const char *end_stat, - git_vector *contents); - enum { GIT_PATH_NOTEQUAL = 0, GIT_PATH_EQUAL = 1, GIT_PATH_PREFIX = 2 }; /* From 5c387b6c5a616d245e51e4ca1935e6ffd78c710e Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Wed, 29 Apr 2015 14:31:59 -0400 Subject: [PATCH 13/16] git_path_diriter: next shouldn't take path ptr The _next method shouldn't take a path pointer (and a path_len pointer) as 100% of current users use the full path and ignore the filename. Plus let's add some docs and a unit test. --- src/iterator.c | 2 +- src/path.c | 27 ++++++++++++--------- src/path.h | 49 ++++++++++++++++++++++++++++++++++++- tests/core/dirent.c | 59 ++++++++++++++++++++++++++++++++++++++------- 4 files changed, 114 insertions(+), 23 deletions(-) diff --git a/src/iterator.c b/src/iterator.c index 7e89b77cc..52814bae7 100644 --- a/src/iterator.c +++ b/src/iterator.c @@ -1026,7 +1026,7 @@ static int dirload_with_stat( if ((error = git_path_diriter_init(&diriter, dirpath, flags)) < 0) goto done; - while ((error = git_path_diriter_next(&path, &path_len, &diriter)) == 0) { + while ((error = git_path_diriter_next(&diriter)) == 0) { if ((error = git_path_diriter_fullpath(&path, &path_len, &diriter)) < 0) goto done; diff --git a/src/path.c b/src/path.c index d8f3c234e..ee566985a 100644 --- a/src/path.c +++ b/src/path.c @@ -1111,10 +1111,7 @@ int git_path_diriter_init( return 0; } -int git_path_diriter_next( - const char **out, - size_t *out_len, - git_path_diriter *diriter) +int git_path_diriter_next(git_path_diriter *diriter) { struct dirent *de; const char *filename; @@ -1122,10 +1119,7 @@ int git_path_diriter_next( bool skip_dot = !(diriter->flags & GIT_PATH_DIR_INCLUDE_DOT_AND_DOTDOT); int error = 0; - assert(out && out_len && diriter); - - *out = NULL; - *out_len = 0; + assert(diriter); errno = 0; @@ -1155,12 +1149,21 @@ int git_path_diriter_next( if (git_buf_oom(&diriter->path)) return -1; - *out = &diriter->path.ptr[diriter->parent_len+1]; - *out_len = filename_len; - return error; } +int git_path_diriter_filename( + const char **out, + size_t *out_len, + git_path_diriter *diriter) +{ + assert(out && out_len && diriter); + + *out = &diriter->path.ptr[diriter->parent_len+1]; + *out_len = diriter->path.size - diriter->parent_len - 1; + return 0; +} + int git_path_diriter_fullpath( const char **out, size_t *out_len, @@ -1214,7 +1217,7 @@ int git_path_dirload( if ((error = git_path_diriter_init(&iter, path, flags)) < 0) return error; - while ((error = git_path_diriter_next(&name, &name_len, &iter)) == 0) { + while ((error = git_path_diriter_next(&iter)) == 0) { if ((error = git_path_diriter_fullpath(&name, &name_len, &iter)) < 0) break; diff --git a/src/path.h b/src/path.h index 4900dceb0..927d2fc6e 100644 --- a/src/path.h +++ b/src/path.h @@ -339,23 +339,70 @@ struct git_path_diriter DIR *dir; }; +/** + * Initialize a directory iterator. + * + * @param diriter Pointer to a diriter structure that will be setup. + * @param path The path that will be iterated over + * @param flags Directory reader flags + * @return 0 or an error code + */ extern int git_path_diriter_init( git_path_diriter *diriter, const char *path, unsigned int flags); -extern int git_path_diriter_next( +/** + * Advance the directory iterator. Will return GIT_ITEROVER when + * the iteration has completed successfully. + * + * @param diriter The directory iterator + * @return 0, GIT_ITEROVER, or an error code + */ +extern int git_path_diriter_next(git_path_diriter *diriter); + +/** + * Returns the file name of the current item in the iterator. + * + * @param out Pointer to store the path in + * @param out_len Pointer to store the length of the path in + * @param diriter The directory iterator + * @return 0 or an error code + */ +extern int git_path_diriter_filename( const char **out, size_t *out_len, git_path_diriter *diriter); +/** + * Returns the full path of the current item in the iterator; that + * is the current filename plus the path of the directory that the + * iterator was constructed with. + * + * @param out Pointer to store the path in + * @param out_len Pointer to store the length of the path in + * @param diriter The directory iterator + * @return 0 or an error code + */ extern int git_path_diriter_fullpath( const char **out, size_t *out_len, git_path_diriter *diriter); +/** + * Performs an `lstat` on the current item in the iterator. + * + * @param out Pointer to store the stat data in + * @param diriter The directory iterator + * @return 0 or an error code + */ extern int git_path_diriter_stat(struct stat *out, git_path_diriter *diriter); +/** + * Closes the directory iterator. + * + * @param diriter The directory iterator + */ extern void git_path_diriter_free(git_path_diriter *diriter); /** diff --git a/tests/core/dirent.c b/tests/core/dirent.c index f17260362..d95e44196 100644 --- a/tests/core/dirent.c +++ b/tests/core/dirent.c @@ -67,10 +67,23 @@ static void check_counts(walk_data *d) } } +static int update_count(name_data *data, const char *name) +{ + name_data *n; + + for (n = data; n->name; n++) { + if (!strcmp(n->name, name)) { + n->count++; + return 0; + } + } + + return GIT_ERROR; +} + static int one_entry(void *state, git_buf *path) { walk_data *d = (walk_data *) state; - name_data *n; if (state != state_loc) return GIT_ERROR; @@ -78,14 +91,7 @@ static int one_entry(void *state, git_buf *path) if (path != &d->path) return GIT_ERROR; - for (n = d->names; n->name; n++) { - if (!strcmp(n->name, path->ptr)) { - n->count++; - return 0; - } - } - - return GIT_ERROR; + return update_count(d->names, path->ptr); } @@ -234,3 +240,38 @@ void test_core_dirent__empty_dir(void) cl_must_pass(p_rmdir("empty_dir")); } + +static void handle_next(git_path_diriter *diriter, walk_data *walk) +{ + const char *fullpath, *filename; + size_t fullpath_len, filename_len; + + cl_git_pass(git_path_diriter_fullpath(&fullpath, &fullpath_len, diriter)); + cl_git_pass(git_path_diriter_filename(&filename, &filename_len, diriter)); + + cl_assert_equal_strn(fullpath, "sub/", 4); + cl_assert_equal_s(fullpath+4, filename); + + update_count(walk->names, fullpath); +} + +/* test directory iterator */ +void test_core_dirent__diriter_with_fullname(void) +{ + git_path_diriter diriter = GIT_PATH_DIRITER_INIT; + int error; + + cl_set_cleanup(&dirent_cleanup__cb, &sub); + setup(&sub); + + cl_git_pass(git_path_diriter_init(&diriter, sub.path.ptr, 0)); + + while ((error = git_path_diriter_next(&diriter)) == 0) + handle_next(&diriter, &sub); + + cl_assert_equal_i(error, GIT_ITEROVER); + + git_path_diriter_free(&diriter); + + check_counts(&sub); +} From f63a1b729bc74e1e72f80f75843b8f2042f3f81f Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Wed, 29 Apr 2015 17:23:02 -0400 Subject: [PATCH 14/16] git_path_diriter: use FindFirstFile in win32 Using FindFirstFile and FindNextFile in win32 allows us to use the directory information that is returned, instead of us having to get the file attributes all over again, which is a distinct cost savings on win32. --- src/iterator.c | 2 +- src/path.c | 188 ++++++++++++++++++++++++++++++++++++++++++- src/path.h | 26 ++++++ src/win32/path_w32.c | 5 -- 4 files changed, 213 insertions(+), 8 deletions(-) diff --git a/src/iterator.c b/src/iterator.c index 52814bae7..c5c5fd7ce 100644 --- a/src/iterator.c +++ b/src/iterator.c @@ -1011,7 +1011,7 @@ static int dirload_with_stat( const char *end_stat, git_vector *contents) { - git_path_diriter diriter = {0}; + git_path_diriter diriter = GIT_PATH_DIRITER_INIT; const char *path; int (*strncomp)(const char *a, const char *b, size_t sz); size_t start_len = start_stat ? strlen(start_stat) : 0; diff --git a/src/path.c b/src/path.c index ee566985a..6c9852b79 100644 --- a/src/path.c +++ b/src/path.c @@ -1078,6 +1078,182 @@ int git_path_direach( return error; } +#if defined(GIT_WIN32) && !defined(__MINGW32__) + +/* Using _FIND_FIRST_EX_LARGE_FETCH may increase performance in Windows 7 + * and better. Prior versions will ignore this. + */ +#ifndef FIND_FIRST_EX_LARGE_FETCH +# define FIND_FIRST_EX_LARGE_FETCH 2 +#endif + +int git_path_diriter_init( + git_path_diriter *diriter, + const char *path, + unsigned int flags) +{ + git_win32_path path_filter; + git_buf hack = {0}; + + assert(diriter && path); + + memset(diriter, 0, sizeof(git_path_diriter)); + diriter->handle = INVALID_HANDLE_VALUE; + + if (git_buf_puts(&diriter->path_utf8, path) < 0) + return -1; + + git_path_trim_slashes(&diriter->path_utf8); + + if (diriter->path_utf8.size == 0) { + giterr_set(GITERR_FILESYSTEM, "Could not open directory '%s'", path); + return -1; + } + + if ((diriter->parent_len = git_win32_path_from_utf8(diriter->path, diriter->path_utf8.ptr)) < 0 || + !git_win32__findfirstfile_filter(path_filter, diriter->path_utf8.ptr)) { + giterr_set(GITERR_OS, "Could not parse the directory path '%s'", path); + return -1; + } + + diriter->handle = FindFirstFileExW( + path_filter, + FindExInfoBasic, + &diriter->current, + FindExSearchNameMatch, + NULL, + FIND_FIRST_EX_LARGE_FETCH); + + if (diriter->handle == INVALID_HANDLE_VALUE) { + giterr_set(GITERR_OS, "Could not open directory '%s'", path); + return -1; + } + + diriter->parent_utf8_len = diriter->path_utf8.size; + diriter->flags = flags; + return 0; +} + +static int diriter_update_utf16(git_path_diriter *diriter) +{ + size_t filename_len, path_len; + + filename_len = wcslen(diriter->current.cFileName); + + if (GIT_ADD_SIZET_OVERFLOW(&path_len, diriter->parent_len, filename_len) || + GIT_ADD_SIZET_OVERFLOW(&path_len, path_len, 2)) + return -1; + + if (path_len > GIT_WIN_PATH_UTF16) { + giterr_set(GITERR_FILESYSTEM, + "invalid path '%.*ls\\%ls' (path too long)", + diriter->parent_len, diriter->path, diriter->current.cFileName); + return -1; + } + + diriter->path[diriter->parent_len] = L'\\'; + memcpy(&diriter->path[diriter->parent_len+1], + diriter->current.cFileName, filename_len * sizeof(wchar_t)); + diriter->path[path_len-1] = L'\0'; + + return 0; +} + +static int diriter_update_utf8(git_path_diriter *diriter) +{ + git_win32_utf8_path filename_utf8; + wchar_t *filename_utf16; + int filename_utf8_len; + + /* Don't copy the full UTF-16 path into the UTF-8 path, only do the + * UTF16 -> UTF8 conversion of the filename portion. This prevents us + * from trying to encode the parent path differently, which would be + * bad since we do arithmetic based on the already computed parent len. + */ + + filename_utf16 = &diriter->path[diriter->parent_len + 1]; + + if ((filename_utf8_len = git_win32_path_to_utf8(filename_utf8, filename_utf16)) < 0) + return filename_utf8_len; + + git_buf_truncate(&diriter->path_utf8, diriter->parent_utf8_len); + git_buf_putc(&diriter->path_utf8, '/'); + git_buf_put(&diriter->path_utf8, filename_utf8, (size_t)filename_utf8_len); + + if (git_buf_oom(&diriter->path_utf8)) + return -1; + + return 0; +} + +int git_path_diriter_next(git_path_diriter *diriter) +{ + bool skip_dot = !(diriter->flags & GIT_PATH_DIR_INCLUDE_DOT_AND_DOTDOT); + + do { + /* Our first time through, we already have the data from + * FindFirstFileW. Use it, otherwise get the next file. + */ + if (!diriter->needs_next) + diriter->needs_next = 1; + else if (!FindNextFileW(diriter->handle, &diriter->current)) + return GIT_ITEROVER; + } while (skip_dot && git_path_is_dot_or_dotdotW(diriter->current.cFileName)); + + if (diriter_update_utf16(diriter) < 0 || diriter_update_utf8(diriter) < 0) + return -1; + + return 0; +} + +int git_path_diriter_filename( + const char **out, + size_t *out_len, + git_path_diriter *diriter) +{ + assert(out && out_len && diriter); + + assert(diriter->path_utf8.size > diriter->parent_utf8_len); + + *out = &diriter->path_utf8.ptr[diriter->parent_utf8_len+1]; + *out_len = diriter->path_utf8.size - diriter->parent_utf8_len - 1; + return 0; +} + +int git_path_diriter_fullpath( + const char **out, + size_t *out_len, + git_path_diriter *diriter) +{ + assert(out && out_len && diriter); + + *out = diriter->path_utf8.ptr; + *out_len = diriter->path_utf8.size; + return 0; +} + +int git_path_diriter_stat(struct stat *out, git_path_diriter *diriter) +{ + assert(out && diriter); + + return git_win32__file_attribute_to_stat(out, + (WIN32_FILE_ATTRIBUTE_DATA *)&diriter->current, + diriter->path); +} + +void git_path_diriter_free(git_path_diriter *diriter) +{ + if (diriter == NULL) + return; + + if (diriter->handle != INVALID_HANDLE_VALUE) { + FindClose(diriter->handle); + diriter->handle = INVALID_HANDLE_VALUE; + } +} + +#else + int git_path_diriter_init( git_path_diriter *diriter, const char *path, @@ -1090,9 +1266,13 @@ int git_path_diriter_init( if (git_buf_puts(&diriter->path, path) < 0) return -1; - git_path_mkposix(diriter->path.ptr); git_path_trim_slashes(&diriter->path); + if (diriter->path.size == 0) { + giterr_set(GITERR_FILESYSTEM, "Could not open directory '%s'", path); + return -1; + } + if ((diriter->dir = opendir(diriter->path.ptr)) == NULL) { git_buf_free(&diriter->path); @@ -1159,6 +1339,8 @@ int git_path_diriter_filename( { assert(out && out_len && diriter); + assert(diriter->path.size > diriter->parent_len); + *out = &diriter->path.ptr[diriter->parent_len+1]; *out_len = diriter->path.size - diriter->parent_len - 1; return 0; @@ -1200,13 +1382,15 @@ void git_path_diriter_free(git_path_diriter *diriter) git_buf_free(&diriter->path); } +#endif + int git_path_dirload( git_vector *contents, const char *path, size_t prefix_len, unsigned int flags) { - git_path_diriter iter = {0}; + git_path_diriter iter = GIT_PATH_DIRITER_INIT; const char *name; size_t name_len; char *dup; diff --git a/src/path.h b/src/path.h index 927d2fc6e..9c2b85a87 100644 --- a/src/path.h +++ b/src/path.h @@ -329,6 +329,28 @@ extern int git_path_walk_up( typedef struct git_path_diriter git_path_diriter; +#if defined(GIT_WIN32) && !defined(__MINGW32__) + +struct git_path_diriter +{ + git_win32_path path; + size_t parent_len; + + git_buf path_utf8; + size_t parent_utf8_len; + + HANDLE handle; + + unsigned int flags; + + WIN32_FIND_DATAW current; + unsigned int needs_next; +}; + +#define GIT_PATH_DIRITER_INIT { {0}, 0, GIT_BUF_INIT, 0, INVALID_HANDLE_VALUE } + +#else + struct git_path_diriter { git_buf path; @@ -339,6 +361,10 @@ struct git_path_diriter DIR *dir; }; +#define GIT_PATH_DIRITER_INIT { GIT_BUF_INIT } + +#endif + /** * Initialize a directory iterator. * diff --git a/src/win32/path_w32.c b/src/win32/path_w32.c index c145379f7..118e8bcc5 100644 --- a/src/win32/path_w32.c +++ b/src/win32/path_w32.c @@ -30,11 +30,6 @@ #define path__is_unc(p) \ (((p)[0] == '\\' && (p)[1] == '\\') || ((p)[0] == '/' && (p)[1] == '/')) -/* Using _FIND_FIRST_EX_LARGE_FETCH may increase performance in Windows 7 - * and better. Prior versions will ignore this. - */ -#define _FIND_FIRST_EX_LARGE_FETCH 2 - GIT_INLINE(int) path__cwd(wchar_t *path, int size) { int len; From cd39e4e2f3cc27a2976c9a8e0058f9e32d1f1c8f Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Wed, 29 Apr 2015 18:12:51 -0400 Subject: [PATCH 15/16] git_buf_put_w: introduce utf16->utf8 conversion --- src/path.c | 27 ++++------------------ src/win32/buffer.c | 55 ++++++++++++++++++++++++++++++++++++++++++++ src/win32/buffer.h | 18 +++++++++++++++ src/win32/utf-conv.c | 4 ---- src/win32/utf-conv.h | 4 ++++ 5 files changed, 81 insertions(+), 27 deletions(-) create mode 100644 src/win32/buffer.c create mode 100644 src/win32/buffer.h diff --git a/src/path.c b/src/path.c index 6c9852b79..5b9fb935e 100644 --- a/src/path.c +++ b/src/path.c @@ -10,6 +10,7 @@ #include "repository.h" #ifdef GIT_WIN32 #include "win32/posix.h" +#include "win32/buffer.h" #include "win32/w32_util.h" #else #include @@ -1134,7 +1135,7 @@ int git_path_diriter_init( return 0; } -static int diriter_update_utf16(git_path_diriter *diriter) +static int diriter_update_paths(git_path_diriter *diriter) { size_t filename_len, path_len; @@ -1156,29 +1157,9 @@ static int diriter_update_utf16(git_path_diriter *diriter) diriter->current.cFileName, filename_len * sizeof(wchar_t)); diriter->path[path_len-1] = L'\0'; - return 0; -} - -static int diriter_update_utf8(git_path_diriter *diriter) -{ - git_win32_utf8_path filename_utf8; - wchar_t *filename_utf16; - int filename_utf8_len; - - /* Don't copy the full UTF-16 path into the UTF-8 path, only do the - * UTF16 -> UTF8 conversion of the filename portion. This prevents us - * from trying to encode the parent path differently, which would be - * bad since we do arithmetic based on the already computed parent len. - */ - - filename_utf16 = &diriter->path[diriter->parent_len + 1]; - - if ((filename_utf8_len = git_win32_path_to_utf8(filename_utf8, filename_utf16)) < 0) - return filename_utf8_len; - git_buf_truncate(&diriter->path_utf8, diriter->parent_utf8_len); git_buf_putc(&diriter->path_utf8, '/'); - git_buf_put(&diriter->path_utf8, filename_utf8, (size_t)filename_utf8_len); + git_buf_put_w(&diriter->path_utf8, diriter->current.cFileName, filename_len); if (git_buf_oom(&diriter->path_utf8)) return -1; @@ -1200,7 +1181,7 @@ int git_path_diriter_next(git_path_diriter *diriter) return GIT_ITEROVER; } while (skip_dot && git_path_is_dot_or_dotdotW(diriter->current.cFileName)); - if (diriter_update_utf16(diriter) < 0 || diriter_update_utf8(diriter) < 0) + if (diriter_update_paths(diriter) < 0) return -1; return 0; diff --git a/src/win32/buffer.c b/src/win32/buffer.c new file mode 100644 index 000000000..74950189e --- /dev/null +++ b/src/win32/buffer.c @@ -0,0 +1,55 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ + +#include "common.h" +#include "buffer.h" +#include "../buffer.h" +#include "utf-conv.h" + +GIT_INLINE(int) handle_wc_error(void) +{ + if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) + errno = ENAMETOOLONG; + else + errno = EINVAL; + + return -1; +} + +int git_buf_put_w(git_buf *buf, const wchar_t *string_w, size_t len_w) +{ + int utf8_len, utf8_write_len; + size_t new_size; + + if (!len_w) + return 0; + + assert(string_w); + + /* Measure the string necessary for conversion */ + if ((utf8_len = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, string_w, len_w, NULL, 0, NULL, NULL)) == 0) + return 0; + + assert(utf8_len > 0); + + GITERR_CHECK_ALLOC_ADD(&new_size, buf->size, (size_t)utf8_len); + GITERR_CHECK_ALLOC_ADD(&new_size, new_size, 1); + + if (git_buf_grow(buf, new_size) < 0) + return -1; + + if ((utf8_write_len = WideCharToMultiByte( + CP_UTF8, WC_ERR_INVALID_CHARS, string_w, len_w, &buf->ptr[buf->size], utf8_len, NULL, NULL)) == 0) + return handle_wc_error(); + + assert(utf8_write_len == utf8_len); + + buf->size += utf8_write_len; + buf->ptr[buf->size] = '\0'; + return 0; +} + diff --git a/src/win32/buffer.h b/src/win32/buffer.h new file mode 100644 index 000000000..62243986f --- /dev/null +++ b/src/win32/buffer.h @@ -0,0 +1,18 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_git_win32_buffer_h__ +#define INCLUDE_git_win32_buffer_h__ + +#include "../buffer.h" + +/** + * Convert a wide character string to UTF-8 and append the results to the + * buffer. + */ +int git_buf_put_w(git_buf *buf, const wchar_t *string_w, size_t len_w); + +#endif diff --git a/src/win32/utf-conv.c b/src/win32/utf-conv.c index 0dad4eab0..f1b674ea0 100644 --- a/src/win32/utf-conv.c +++ b/src/win32/utf-conv.c @@ -8,10 +8,6 @@ #include "common.h" #include "utf-conv.h" -#ifndef WC_ERR_INVALID_CHARS -# define WC_ERR_INVALID_CHARS 0x80 -#endif - GIT_INLINE(DWORD) get_wc_flags(void) { static char inited = 0; diff --git a/src/win32/utf-conv.h b/src/win32/utf-conv.h index 89cdb96da..33b95f59f 100644 --- a/src/win32/utf-conv.h +++ b/src/win32/utf-conv.h @@ -10,6 +10,10 @@ #include #include "common.h" +#ifndef WC_ERR_INVALID_CHARS +# define WC_ERR_INVALID_CHARS 0x80 +#endif + /** * Converts a UTF-8 string to wide characters. * From be3f104967ad21e949f72ef10a6b5ec00795ffaa Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Fri, 1 May 2015 11:27:44 -0400 Subject: [PATCH 16/16] diriter: actually use iconv on mac --- src/path.c | 5 +- src/path.h | 175 ++++++++++++++++++++++++++++------------------------- 2 files changed, 94 insertions(+), 86 deletions(-) diff --git a/src/path.c b/src/path.c index 5b9fb935e..df6762c3a 100644 --- a/src/path.c +++ b/src/path.c @@ -1263,7 +1263,7 @@ int git_path_diriter_init( #ifdef GIT_USE_ICONV if ((flags & GIT_PATH_DIR_PRECOMPOSE_UNICODE) != 0) - (void)git_path_iconv_init_precompose(&ic); + (void)git_path_iconv_init_precompose(&diriter->ic); #endif diriter->parent_len = diriter->path.size; @@ -1299,7 +1299,8 @@ int git_path_diriter_next(git_path_diriter *diriter) filename_len = strlen(filename); #ifdef GIT_USE_ICONV - if ((error = git_path_iconv(&diriter->ic, &filename, &filename_len)) < 0) + if ((diriter->flags & GIT_PATH_DIR_PRECOMPOSE_UNICODE) != 0 && + (error = git_path_iconv(&diriter->ic, (char **)&filename, &filename_len)) < 0) return error; #endif diff --git a/src/path.h b/src/path.h index 9c2b85a87..14237cb46 100644 --- a/src/path.h +++ b/src/path.h @@ -327,6 +327,93 @@ extern int git_path_walk_up( int (*callback)(void *payload, const char *path), void *payload); + +enum { GIT_PATH_NOTEQUAL = 0, GIT_PATH_EQUAL = 1, GIT_PATH_PREFIX = 2 }; + +/* + * Determines if a path is equal to or potentially a child of another. + * @param parent The possible parent + * @param child The possible child + */ +GIT_INLINE(int) git_path_equal_or_prefixed( + const char *parent, + const char *child, + ssize_t *prefixlen) +{ + const char *p = parent, *c = child; + int lastslash = 0; + + while (*p && *c) { + lastslash = (*p == '/'); + + if (*p++ != *c++) + return GIT_PATH_NOTEQUAL; + } + + if (*p != '\0') + return GIT_PATH_NOTEQUAL; + + if (*c == '\0') { + if (prefixlen) + *prefixlen = p - parent; + + return GIT_PATH_EQUAL; + } + + if (*c == '/' || lastslash) { + if (prefixlen) + *prefixlen = (p - parent) - lastslash; + + return GIT_PATH_PREFIX; + } + + return GIT_PATH_NOTEQUAL; +} + +/* translate errno to libgit2 error code and set error message */ +extern int git_path_set_error( + int errno_value, const char *path, const char *action); + +/* check if non-ascii characters are present in filename */ +extern bool git_path_has_non_ascii(const char *path, size_t pathlen); + +#define GIT_PATH_REPO_ENCODING "UTF-8" + +#ifdef __APPLE__ +#define GIT_PATH_NATIVE_ENCODING "UTF-8-MAC" +#else +#define GIT_PATH_NATIVE_ENCODING "UTF-8" +#endif + +#ifdef GIT_USE_ICONV + +#include + +typedef struct { + iconv_t map; + git_buf buf; +} git_path_iconv_t; + +#define GIT_PATH_ICONV_INIT { (iconv_t)-1, GIT_BUF_INIT } + +/* Init iconv data for converting decomposed UTF-8 to precomposed */ +extern int git_path_iconv_init_precompose(git_path_iconv_t *ic); + +/* Clear allocated iconv data */ +extern void git_path_iconv_clear(git_path_iconv_t *ic); + +/* + * Rewrite `in` buffer using iconv map if necessary, replacing `in` + * pointer internal iconv buffer if rewrite happened. The `in` pointer + * will be left unchanged if no rewrite was needed. + */ +extern int git_path_iconv(git_path_iconv_t *ic, char **in, size_t *inlen); + +#endif /* GIT_USE_ICONV */ + +extern bool git_path_does_fs_decompose_unicode(const char *root); + + typedef struct git_path_diriter git_path_diriter; #if defined(GIT_WIN32) && !defined(__MINGW32__) @@ -359,6 +446,10 @@ struct git_path_diriter unsigned int flags; DIR *dir; + +#ifdef GIT_USE_ICONV + git_path_iconv_t ic; +#endif }; #define GIT_PATH_DIRITER_INIT { GIT_BUF_INIT } @@ -452,90 +543,6 @@ extern int git_path_dirload( size_t prefix_len, uint32_t flags); -enum { GIT_PATH_NOTEQUAL = 0, GIT_PATH_EQUAL = 1, GIT_PATH_PREFIX = 2 }; - -/* - * Determines if a path is equal to or potentially a child of another. - * @param parent The possible parent - * @param child The possible child - */ -GIT_INLINE(int) git_path_equal_or_prefixed( - const char *parent, - const char *child, - ssize_t *prefixlen) -{ - const char *p = parent, *c = child; - int lastslash = 0; - - while (*p && *c) { - lastslash = (*p == '/'); - - if (*p++ != *c++) - return GIT_PATH_NOTEQUAL; - } - - if (*p != '\0') - return GIT_PATH_NOTEQUAL; - - if (*c == '\0') { - if (prefixlen) - *prefixlen = p - parent; - - return GIT_PATH_EQUAL; - } - - if (*c == '/' || lastslash) { - if (prefixlen) - *prefixlen = (p - parent) - lastslash; - - return GIT_PATH_PREFIX; - } - - return GIT_PATH_NOTEQUAL; -} - -/* translate errno to libgit2 error code and set error message */ -extern int git_path_set_error( - int errno_value, const char *path, const char *action); - -/* check if non-ascii characters are present in filename */ -extern bool git_path_has_non_ascii(const char *path, size_t pathlen); - -#define GIT_PATH_REPO_ENCODING "UTF-8" - -#ifdef __APPLE__ -#define GIT_PATH_NATIVE_ENCODING "UTF-8-MAC" -#else -#define GIT_PATH_NATIVE_ENCODING "UTF-8" -#endif - -#ifdef GIT_USE_ICONV - -#include - -typedef struct { - iconv_t map; - git_buf buf; -} git_path_iconv_t; - -#define GIT_PATH_ICONV_INIT { (iconv_t)-1, GIT_BUF_INIT } - -/* Init iconv data for converting decomposed UTF-8 to precomposed */ -extern int git_path_iconv_init_precompose(git_path_iconv_t *ic); - -/* Clear allocated iconv data */ -extern void git_path_iconv_clear(git_path_iconv_t *ic); - -/* - * Rewrite `in` buffer using iconv map if necessary, replacing `in` - * pointer internal iconv buffer if rewrite happened. The `in` pointer - * will be left unchanged if no rewrite was needed. - */ -extern int git_path_iconv(git_path_iconv_t *ic, char **in, size_t *inlen); - -#endif /* GIT_USE_ICONV */ - -extern bool git_path_does_fs_decompose_unicode(const char *root); /* Used for paths to repositories on the filesystem */ extern bool git_path_is_local_file_url(const char *file_url);