mirror of
				https://git.proxmox.com/git/mirror_zfs
				synced 2025-10-25 21:41:28 +00:00 
			
		
		
		
	 f224eddf92
			
		
	
	
		f224eddf92
		
			
		
	
	
	
	
		
			
			Linux 5.17 commit torvalds/linux@5dfbfe71e enables "the idmapping infrastructure to support idmapped mounts of filesystems mounted with an idmapping". Update the OpenZFS accordingly to improve the idmapped mount support. This pull request contains the following changes: - xattr setter functions are fixed to take mnt_ns argument. Without this, cp -p would fail for an idmapped mount in a user namespace. - idmap_util is enhanced/fixed for its use in a user ns context. - One test case added to test idmapped mount in a user ns. Reviewed-by: Christian Brauner <christian@brauner.io> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Youzhong Yang <yyang@mathworks.com> Closes #14097
		
			
				
	
	
		
			809 lines
		
	
	
		
			17 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			809 lines
		
	
	
		
			17 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * CDDL HEADER START
 | |
|  *
 | |
|  * The contents of this file are subject to the terms of the
 | |
|  * Common Development and Distribution License (the "License").
 | |
|  * You may not use this file except in compliance with the License.
 | |
|  *
 | |
|  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 | |
|  * or https://opensource.org/licenses/CDDL-1.0.
 | |
|  * See the License for the specific language governing permissions
 | |
|  * and limitations under the License.
 | |
|  *
 | |
|  * When distributing Covered Code, include this CDDL HEADER in each
 | |
|  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 | |
|  * If applicable, add the following below this CDDL HEADER, with the
 | |
|  * fields enclosed by brackets "[]" replaced with your own identifying
 | |
|  * information: Portions Copyright [yyyy] [name of copyright owner]
 | |
|  *
 | |
|  * CDDL HEADER END
 | |
|  */
 | |
| 
 | |
| #ifndef _GNU_SOURCE
 | |
| #define	_GNU_SOURCE
 | |
| #endif
 | |
| 
 | |
| #include <stdio.h>
 | |
| #include <stdlib.h>
 | |
| #include <stdbool.h>
 | |
| #include <stddef.h>
 | |
| #include <string.h>
 | |
| #include <linux/types.h>
 | |
| #include <sys/wait.h>
 | |
| #include <sys/stat.h>
 | |
| #include <sys/mount.h>
 | |
| #include <fcntl.h>
 | |
| #include <errno.h>
 | |
| #include <sched.h>
 | |
| #include <syscall.h>
 | |
| #include <sys/socket.h>
 | |
| 
 | |
| #include <sys/list.h>
 | |
| 
 | |
| #ifndef UINT_MAX
 | |
| #define	UINT_MAX	4294967295U
 | |
| #endif
 | |
| 
 | |
| #ifndef __NR_Linux
 | |
| #if defined __alpha__
 | |
| #define	__NR_Linux 110
 | |
| #elif defined _MIPS_SIM
 | |
| #if _MIPS_SIM == _MIPS_SIM_ABI32
 | |
| #define	__NR_Linux 4000
 | |
| #endif
 | |
| #if _MIPS_SIM == _MIPS_SIM_NABI32
 | |
| #define	__NR_Linux 6000
 | |
| #endif
 | |
| #if _MIPS_SIM == _MIPS_SIM_ABI64
 | |
| #define	__NR_Linux 5000
 | |
| #endif
 | |
| #elif defined __ia64__
 | |
| #define	__NR_Linux 1024
 | |
| #else
 | |
| #define	__NR_Linux 0
 | |
| #endif
 | |
| #endif
 | |
| 
 | |
| #ifndef __NR_mount_setattr
 | |
| #define	__NR_mount_setattr (442 + __NR_Linux)
 | |
| #endif
 | |
| 
 | |
| #ifndef __NR_open_tree
 | |
| #define	__NR_open_tree (428 + __NR_Linux)
 | |
| #endif
 | |
| 
 | |
| #ifndef __NR_move_mount
 | |
| #define	__NR_move_mount (429 + __NR_Linux)
 | |
| #endif
 | |
| 
 | |
| #ifndef MNT_DETACH
 | |
| #define	MNT_DETACH 2
 | |
| #endif
 | |
| 
 | |
| #ifndef MOVE_MOUNT_F_EMPTY_PATH
 | |
| #define	MOVE_MOUNT_F_EMPTY_PATH 0x00000004
 | |
| #endif
 | |
| 
 | |
| #ifndef MOUNT_ATTR_IDMAP
 | |
| #define	MOUNT_ATTR_IDMAP 0x00100000
 | |
| #endif
 | |
| 
 | |
| #ifndef OPEN_TREE_CLONE
 | |
| #define	OPEN_TREE_CLONE 1
 | |
| #endif
 | |
| 
 | |
| #ifndef OPEN_TREE_CLOEXEC
 | |
| #define	OPEN_TREE_CLOEXEC O_CLOEXEC
 | |
| #endif
 | |
| 
 | |
| #ifndef AT_RECURSIVE
 | |
| #define	AT_RECURSIVE 0x8000
 | |
| #endif
 | |
| 
 | |
| typedef struct {
 | |
| 	__u64 attr_set;
 | |
| 	__u64 attr_clr;
 | |
| 	__u64 propagation;
 | |
| 	__u64 userns_fd;
 | |
| } mount_attr_t;
 | |
| 
 | |
| static inline int
 | |
| sys_mount_setattr(int dfd, const char *path, unsigned int flags,
 | |
|     mount_attr_t *attr, size_t size)
 | |
| {
 | |
| 	return (syscall(__NR_mount_setattr, dfd, path, flags, attr, size));
 | |
| }
 | |
| 
 | |
| static inline int
 | |
| sys_open_tree(int dfd, const char *filename, unsigned int flags)
 | |
| {
 | |
| 	return (syscall(__NR_open_tree, dfd, filename, flags));
 | |
| }
 | |
| 
 | |
| static inline int sys_move_mount(int from_dfd, const char *from_pathname,
 | |
|     int to_dfd, const char *to_pathname, unsigned int flags)
 | |
| {
 | |
| 	return (syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd,
 | |
| 	    to_pathname, flags));
 | |
| }
 | |
| 
 | |
| typedef enum idmap_type_t {
 | |
| 	TYPE_UID,
 | |
| 	TYPE_GID,
 | |
| 	TYPE_BOTH
 | |
| } idmap_type_t;
 | |
| 
 | |
| struct idmap_entry {
 | |
| 	__u32 first;
 | |
| 	__u32 lower_first;
 | |
| 	__u32 count;
 | |
| 	idmap_type_t type;
 | |
| 	list_node_t node;
 | |
| };
 | |
| 
 | |
| static void
 | |
| log_msg(const char *msg, ...)
 | |
| {
 | |
| 	va_list ap;
 | |
| 
 | |
| 	va_start(ap, msg);
 | |
| 	vfprintf(stderr, msg, ap);
 | |
| 	fputc('\n', stderr);
 | |
| 	va_end(ap);
 | |
| }
 | |
| 
 | |
| #define	log_errno(msg, args...) \
 | |
| 	do { \
 | |
| 		log_msg("%s:%d:%s: [%m] " msg, __FILE__, __LINE__,\
 | |
| 		    __FUNCTION__, ##args); \
 | |
| 	} while (0)
 | |
| 
 | |
| /*
 | |
|  * Parse the idmapping in the following format
 | |
|  * and add to the list:
 | |
|  *
 | |
|  *   u:nsid_first:hostid_first:count
 | |
|  *   g:nsid_first:hostid_first:count
 | |
|  *   b:nsid_first:hostid_first:count
 | |
|  *
 | |
|  * The delimiter can be : or space character.
 | |
|  *
 | |
|  * Return:
 | |
|  *   0      if success
 | |
|  *   ENOMEM if out of memory
 | |
|  *   EINVAL if wrong arg or input
 | |
|  */
 | |
| static int
 | |
| parse_idmap_entry(list_t *head, char *input)
 | |
| {
 | |
| 	char *token, *savedptr = NULL;
 | |
| 	struct idmap_entry *entry;
 | |
| 	unsigned long ul;
 | |
| 	char *delimiter = (char *)": ";
 | |
| 	char c;
 | |
| 
 | |
| 	if (!input || !head)
 | |
| 		return (EINVAL);
 | |
| 	entry = malloc(sizeof (*entry));
 | |
| 	if (!entry)
 | |
| 		return (ENOMEM);
 | |
| 
 | |
| 	token = strtok_r(input, delimiter, &savedptr);
 | |
| 	if (token)
 | |
| 		c = token[0];
 | |
| 	if (!token || (c != 'b' && c != 'u' && c != 'g'))
 | |
| 		goto errout;
 | |
| 	entry->type = (c == 'b') ? TYPE_BOTH :
 | |
| 	    ((c == 'u') ? TYPE_UID : TYPE_GID);
 | |
| 
 | |
| 	token = strtok_r(NULL, delimiter, &savedptr);
 | |
| 	if (!token)
 | |
| 		goto errout;
 | |
| 	ul = strtoul(token, NULL, 10);
 | |
| 	if (ul > UINT_MAX || errno != 0)
 | |
| 		goto errout;
 | |
| 	entry->first = (__u32)ul;
 | |
| 
 | |
| 	token = strtok_r(NULL, delimiter, &savedptr);
 | |
| 	if (!token)
 | |
| 		goto errout;
 | |
| 	ul = strtoul(token, NULL, 10);
 | |
| 	if (ul > UINT_MAX || errno != 0)
 | |
| 		goto errout;
 | |
| 	entry->lower_first = (__u32)ul;
 | |
| 
 | |
| 	token = strtok_r(NULL, delimiter, &savedptr);
 | |
| 	if (!token)
 | |
| 		goto errout;
 | |
| 	ul = strtoul(token, NULL, 10);
 | |
| 	if (ul > UINT_MAX || errno != 0)
 | |
| 		goto errout;
 | |
| 	entry->count = (__u32)ul;
 | |
| 
 | |
| 	list_insert_tail(head, entry);
 | |
| 
 | |
| 	return (0);
 | |
| 
 | |
| errout:
 | |
| 	free(entry);
 | |
| 	return (EINVAL);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Release all the entries in the list
 | |
|  */
 | |
| static void
 | |
| free_idmap(list_t *head)
 | |
| {
 | |
| 	struct idmap_entry *entry;
 | |
| 
 | |
| 	while ((entry = list_remove_head(head)) != NULL)
 | |
| 		free(entry);
 | |
| 	/* list_destroy() to be done by the caller */
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Write all bytes in the buffer to fd
 | |
|  */
 | |
| static ssize_t
 | |
| write_buf(int fd, const char *buf, size_t buf_size)
 | |
| {
 | |
| 	ssize_t written, total_written = 0;
 | |
| 	size_t remaining = buf_size;
 | |
| 	char *position = (char *)buf;
 | |
| 
 | |
| 	for (;;) {
 | |
| 		written = write(fd, position, remaining);
 | |
| 		if (written < 0 && errno == EINTR)
 | |
| 			continue;
 | |
| 		if (written < 0) {
 | |
| 			log_errno("write");
 | |
| 			return (written);
 | |
| 		}
 | |
| 		total_written += written;
 | |
| 		if (total_written == buf_size)
 | |
| 			break;
 | |
| 		remaining -= written;
 | |
| 		position += written;
 | |
| 	}
 | |
| 
 | |
| 	return (total_written);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Read data from file into buffer
 | |
|  */
 | |
| static ssize_t
 | |
| read_buf(int fd, char *buf, size_t buf_size)
 | |
| {
 | |
| 	int ret;
 | |
| 	for (;;) {
 | |
| 		ret = read(fd, buf, buf_size);
 | |
| 		if (ret < 0 && errno == EINTR)
 | |
| 			continue;
 | |
| 		break;
 | |
| 	}
 | |
| 	if (ret < 0)
 | |
| 		log_errno("read");
 | |
| 	return (ret);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Write idmap of the given type in the buffer to the
 | |
|  * process' uid_map or gid_map proc file.
 | |
|  *
 | |
|  * Return:
 | |
|  *   0     if success
 | |
|  *   errno if there's any error
 | |
|  */
 | |
| static int
 | |
| write_idmap(pid_t pid, char *buf, size_t buf_size, idmap_type_t type)
 | |
| {
 | |
| 	char path[PATH_MAX];
 | |
| 	int fd = -EBADF;
 | |
| 	int ret;
 | |
| 
 | |
| 	(void) snprintf(path, sizeof (path), "/proc/%d/%cid_map",
 | |
| 	    pid, type == TYPE_UID ? 'u' : 'g');
 | |
| 	fd = open(path, O_WRONLY | O_CLOEXEC);
 | |
| 	if (fd < 0) {
 | |
| 		ret = errno;
 | |
| 		log_errno("open(%s)", path);
 | |
| 		goto out;
 | |
| 	}
 | |
| 	ret = write_buf(fd, buf, buf_size);
 | |
| 	if (ret < 0)
 | |
| 		ret = errno;
 | |
| 	else
 | |
| 		ret = 0;
 | |
| out:
 | |
| 	if (fd >= 0)
 | |
| 		close(fd);
 | |
| 	return (ret);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Write idmap info in the list to the process
 | |
|  * user namespace, i.e. its /proc/<pid>/uid_map
 | |
|  * and /proc/<pid>/gid_map file.
 | |
|  *
 | |
|  * Return:
 | |
|  *   0     if success
 | |
|  *   errno if it fails
 | |
|  */
 | |
| static int
 | |
| write_pid_idmaps(pid_t pid, list_t *head)
 | |
| {
 | |
| 	char *buf_uids, *buf_gids;
 | |
| 	char *curr_bufu, *curr_bufg;
 | |
| 	/* max 4k to be allowed for each map */
 | |
| 	int size_buf_uids = 4096, size_buf_gids = 4096;
 | |
| 	struct idmap_entry *entry;
 | |
| 	int uid_filled, gid_filled;
 | |
| 	int ret = 0;
 | |
| 	int has_uids = 0, has_gids = 0;
 | |
| 	size_t buf_size;
 | |
| 
 | |
| 	buf_uids = malloc(size_buf_uids);
 | |
| 	if (!buf_uids)
 | |
| 		return (ENOMEM);
 | |
| 	buf_gids = malloc(size_buf_gids);
 | |
| 	if (!buf_gids) {
 | |
| 		free(buf_uids);
 | |
| 		return (ENOMEM);
 | |
| 	}
 | |
| 	curr_bufu = buf_uids;
 | |
| 	curr_bufg = buf_gids;
 | |
| 
 | |
| 	for (entry = list_head(head); entry; entry = list_next(head, entry)) {
 | |
| 		if (entry->type == TYPE_UID || entry->type == TYPE_BOTH) {
 | |
| 			uid_filled = snprintf(curr_bufu, size_buf_uids,
 | |
| 			    "%u %u %u\n", entry->first, entry->lower_first,
 | |
| 			    entry->count);
 | |
| 			if (uid_filled <= 0 || uid_filled >= size_buf_uids) {
 | |
| 				ret = E2BIG;
 | |
| 				goto out;
 | |
| 			}
 | |
| 			curr_bufu += uid_filled;
 | |
| 			size_buf_uids -= uid_filled;
 | |
| 			has_uids = 1;
 | |
| 		}
 | |
| 		if (entry->type == TYPE_GID || entry->type == TYPE_BOTH) {
 | |
| 			gid_filled = snprintf(curr_bufg, size_buf_gids,
 | |
| 			    "%u %u %u\n", entry->first, entry->lower_first,
 | |
| 			    entry->count);
 | |
| 			if (gid_filled <= 0 || gid_filled >= size_buf_gids) {
 | |
| 				ret = E2BIG;
 | |
| 				goto out;
 | |
| 			}
 | |
| 			curr_bufg += gid_filled;
 | |
| 			size_buf_gids -= gid_filled;
 | |
| 			has_gids = 1;
 | |
| 		}
 | |
| 	}
 | |
| 	if (has_uids) {
 | |
| 		buf_size = curr_bufu - buf_uids;
 | |
| 		ret = write_idmap(pid, buf_uids, buf_size, TYPE_UID);
 | |
| 		if (ret)
 | |
| 			goto out;
 | |
| 	}
 | |
| 	if (has_gids) {
 | |
| 		buf_size = curr_bufg - buf_gids;
 | |
| 		ret = write_idmap(pid, buf_gids, buf_size, TYPE_GID);
 | |
| 	}
 | |
| 
 | |
| out:
 | |
| 	free(buf_uids);
 | |
| 	free(buf_gids);
 | |
| 	return (ret);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Wait for the child process to exit
 | |
|  * and reap it.
 | |
|  *
 | |
|  * Return:
 | |
|  *   process exit code if available
 | |
|  */
 | |
| static int
 | |
| wait_for_pid(pid_t pid)
 | |
| {
 | |
| 	int status;
 | |
| 	int ret;
 | |
| 
 | |
| 	for (;;) {
 | |
| 		ret = waitpid(pid, &status, 0);
 | |
| 		if (ret < 0) {
 | |
| 			if (errno == EINTR)
 | |
| 				continue;
 | |
| 			return (EXIT_FAILURE);
 | |
| 		}
 | |
| 		break;
 | |
| 	}
 | |
| 	if (!WIFEXITED(status))
 | |
| 		return (EXIT_FAILURE);
 | |
| 	return (WEXITSTATUS(status));
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Get the file descriptor of the process user namespace
 | |
|  * given its pid.
 | |
|  *
 | |
|  * Return:
 | |
|  *   fd  if success
 | |
|  *   -1  if it fails
 | |
|  */
 | |
| static int
 | |
| userns_fd_from_pid(pid_t pid)
 | |
| {
 | |
| 	int fd;
 | |
| 	char path[PATH_MAX];
 | |
| 
 | |
| 	(void) snprintf(path, sizeof (path), "/proc/%d/ns/user", pid);
 | |
| 	fd = open(path, O_RDONLY | O_CLOEXEC);
 | |
| 	if (fd < 0)
 | |
| 		log_errno("open(%s)", path);
 | |
| 	return (fd);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Get the user namespace file descriptor given a list
 | |
|  * of idmap info.
 | |
|  *
 | |
|  * Return:
 | |
|  *   fd     if success
 | |
|  *   -errno if it fails
 | |
|  */
 | |
| static int
 | |
| userns_fd_from_idmap(list_t *head)
 | |
| {
 | |
| 	pid_t pid;
 | |
| 	int ret, fd;
 | |
| 	int fds[2];
 | |
| 	char c;
 | |
| 	int saved_errno = 0;
 | |
| 
 | |
| 	/* socketpair for bidirectional communication */
 | |
| 	ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, fds);
 | |
| 	if (ret) {
 | |
| 		log_errno("socketpair");
 | |
| 		return (-errno);
 | |
| 	}
 | |
| 
 | |
| 	pid = fork();
 | |
| 	if (pid < 0) {
 | |
| 		log_errno("fork");
 | |
| 		fd = -errno;
 | |
| 		goto out;
 | |
| 	}
 | |
| 
 | |
| 	if (pid == 0) {
 | |
| 		/* child process */
 | |
| 		ret = unshare(CLONE_NEWUSER);
 | |
| 		if (ret == 0) {
 | |
| 			/* notify the parent of success */
 | |
| 			ret = write_buf(fds[1], "1", 1);
 | |
| 			if (ret < 0)
 | |
| 				saved_errno = errno;
 | |
| 			else {
 | |
| 				/*
 | |
| 				 * Until the parent has written to idmap,
 | |
| 				 * we cannot exit, otherwise the defunct
 | |
| 				 * process is owned by the real root, writing
 | |
| 				 * to its idmap ends up with EPERM in the
 | |
| 				 * context of a user ns
 | |
| 				 */
 | |
| 				ret = read_buf(fds[1], &c, 1);
 | |
| 				if (ret < 0)
 | |
| 					saved_errno = errno;
 | |
| 			}
 | |
| 		} else {
 | |
| 			saved_errno = errno;
 | |
| 			log_errno("unshare");
 | |
| 			ret = write_buf(fds[1], "0", 1);
 | |
| 			if (ret < 0)
 | |
| 				saved_errno = errno;
 | |
| 		}
 | |
| 		exit(saved_errno);
 | |
| 	}
 | |
| 
 | |
| 	/* parent process */
 | |
| 	ret = read_buf(fds[0], &c, 1);
 | |
| 	if (ret == 1 && c == '1') {
 | |
| 		ret = write_pid_idmaps(pid, head);
 | |
| 		if (!ret) {
 | |
| 			fd = userns_fd_from_pid(pid);
 | |
| 			if (fd < 0)
 | |
| 				fd = -errno;
 | |
| 		} else {
 | |
| 			fd = -ret;
 | |
| 		}
 | |
| 		/* Let child know it can exit */
 | |
| 		(void) write_buf(fds[0], "1", 1);
 | |
| 	} else {
 | |
| 		fd = -EBADF;
 | |
| 	}
 | |
| 	(void) wait_for_pid(pid);
 | |
| out:
 | |
| 	close(fds[0]);
 | |
| 	close(fds[1]);
 | |
| 	return (fd);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Check if the operating system supports idmapped mount on the
 | |
|  * given path or not.
 | |
|  *
 | |
|  * Return:
 | |
|  *   true  if supported
 | |
|  *   false if not supported
 | |
|  */
 | |
| static bool
 | |
| is_idmap_supported(char *path)
 | |
| {
 | |
| 	list_t head;
 | |
| 	int ret;
 | |
| 	int tree_fd = -EBADF, path_fd = -EBADF;
 | |
| 	mount_attr_t attr = {
 | |
| 	    .attr_set	= MOUNT_ATTR_IDMAP,
 | |
| 	    .userns_fd  = -EBADF,
 | |
| 	};
 | |
| 
 | |
| 	/* strtok_r() won't be happy with a const string */
 | |
| 	/* To check if idmapped mount can be done in a user ns, map 0 to 0 */
 | |
| 	char *input = strdup("b:0:0:1");
 | |
| 
 | |
| 	if (!input) {
 | |
| 		errno = ENOMEM;
 | |
| 		log_errno("strdup");
 | |
| 		return (false);
 | |
| 	}
 | |
| 
 | |
| 	list_create(&head, sizeof (struct idmap_entry),
 | |
| 	    offsetof(struct idmap_entry, node));
 | |
| 	ret = parse_idmap_entry(&head, input);
 | |
| 	if (ret) {
 | |
| 		errno = ret;
 | |
| 		log_errno("parse_idmap_entry(%s)", input);
 | |
| 		goto out1;
 | |
| 	}
 | |
| 	ret = userns_fd_from_idmap(&head);
 | |
| 	if (ret < 0)
 | |
| 		goto out1;
 | |
| 	attr.userns_fd = ret;
 | |
| 	ret = openat(-EBADF, path, O_DIRECTORY | O_CLOEXEC);
 | |
| 	if (ret < 0) {
 | |
| 		log_errno("openat(%s)", path);
 | |
| 		goto out;
 | |
| 	}
 | |
| 	path_fd = ret;
 | |
| 	ret = sys_open_tree(path_fd, "", AT_EMPTY_PATH | AT_NO_AUTOMOUNT |
 | |
| 	    AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
 | |
| 	if (ret < 0) {
 | |
| 		log_errno("sys_open_tree");
 | |
| 		goto out;
 | |
| 	}
 | |
| 	tree_fd = ret;
 | |
| 	ret = sys_mount_setattr(tree_fd, "", AT_EMPTY_PATH, &attr,
 | |
| 	    sizeof (attr));
 | |
| 	if (ret < 0) {
 | |
| 		log_errno("sys_mount_setattr");
 | |
| 	}
 | |
| out:
 | |
| 	close(attr.userns_fd);
 | |
| out1:
 | |
| 	free_idmap(&head);
 | |
| 	list_destroy(&head);
 | |
| 	if (tree_fd >= 0)
 | |
| 		close(tree_fd);
 | |
| 	if (path_fd >= 0)
 | |
| 		close(path_fd);
 | |
| 	free(input);
 | |
| 	return (ret == 0);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Check if the given path is a mount point or not.
 | |
|  *
 | |
|  * Return:
 | |
|  *   true  if it is
 | |
|  *   false otherwise
 | |
|  */
 | |
| static bool
 | |
| is_mountpoint(char *path)
 | |
| {
 | |
| 	char *parent;
 | |
| 	struct stat st_me, st_parent;
 | |
| 	bool ret;
 | |
| 
 | |
| 	parent = malloc(strlen(path)+4);
 | |
| 	if (!parent) {
 | |
| 		errno = ENOMEM;
 | |
| 		log_errno("malloc");
 | |
| 		return (false);
 | |
| 	}
 | |
| 	strcat(strcpy(parent, path), "/..");
 | |
| 	if (lstat(path, &st_me) != 0 ||
 | |
| 	    lstat(parent, &st_parent) != 0)
 | |
| 		ret = false;
 | |
| 	else
 | |
| 		if (st_me.st_dev != st_parent.st_dev ||
 | |
| 		    st_me.st_ino == st_parent.st_ino)
 | |
| 			ret = true;
 | |
| 		else
 | |
| 			ret = false;
 | |
| 	free(parent);
 | |
| 	return (ret);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Remount the source on the new target folder with the given
 | |
|  * list of idmap info. If target is NULL, the source will be
 | |
|  * unmounted and then remounted if it is a mountpoint, otherwise
 | |
|  * no unmount is done, the source is simply idmap remounted.
 | |
|  *
 | |
|  * Return:
 | |
|  *   0      if success
 | |
|  *   -errno otherwise
 | |
|  */
 | |
| static int
 | |
| do_idmap_mount(list_t *idmap, char *source, char *target, int flags)
 | |
| {
 | |
| 	int ret;
 | |
| 	int tree_fd = -EBADF, source_fd = -EBADF;
 | |
| 	mount_attr_t attr = {
 | |
| 	    .attr_set   = MOUNT_ATTR_IDMAP,
 | |
| 	    .userns_fd  = -EBADF,
 | |
| 	};
 | |
| 
 | |
| 	ret = userns_fd_from_idmap(idmap);
 | |
| 	if (ret < 0)
 | |
| 		goto out1;
 | |
| 	attr.userns_fd = ret;
 | |
| 	ret = openat(-EBADF, source, O_DIRECTORY | O_CLOEXEC);
 | |
| 	if (ret < 0) {
 | |
| 		ret = -errno;
 | |
| 		log_errno("openat(%s)", source);
 | |
| 		goto out;
 | |
| 	}
 | |
| 	source_fd = ret;
 | |
| 	ret = sys_open_tree(source_fd, "", AT_EMPTY_PATH | AT_NO_AUTOMOUNT |
 | |
| 	    AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE | flags);
 | |
| 	if (ret < 0) {
 | |
| 		ret = -errno;
 | |
| 		log_errno("sys_open_tree");
 | |
| 		goto out;
 | |
| 	}
 | |
| 	tree_fd = ret;
 | |
| 	ret = sys_mount_setattr(tree_fd, "", AT_EMPTY_PATH | flags, &attr,
 | |
| 	    sizeof (attr));
 | |
| 	if (ret < 0) {
 | |
| 		ret = -errno;
 | |
| 		log_errno("sys_mount_setattr");
 | |
| 		goto out;
 | |
| 	}
 | |
| 	if (target == NULL && is_mountpoint(source)) {
 | |
| 		ret = umount2(source, MNT_DETACH);
 | |
| 		if (ret < 0) {
 | |
| 			ret = -errno;
 | |
| 			log_errno("umount2(%s)", source);
 | |
| 			goto out;
 | |
| 		}
 | |
| 	}
 | |
| 	ret = sys_move_mount(tree_fd, "", -EBADF, target == NULL ?
 | |
| 	    source : target, MOVE_MOUNT_F_EMPTY_PATH);
 | |
| 	if (ret < 0) {
 | |
| 		ret = -errno;
 | |
| 		log_errno("sys_move_mount(%s)", target == NULL ?
 | |
| 		    source : target);
 | |
| 	}
 | |
| out:
 | |
| 	close(attr.userns_fd);
 | |
| out1:
 | |
| 	if (tree_fd >= 0)
 | |
| 		close(tree_fd);
 | |
| 	if (source_fd >= 0)
 | |
| 		close(source_fd);
 | |
| 	return (ret);
 | |
| }
 | |
| 
 | |
| static void
 | |
| print_usage(char *argv[])
 | |
| {
 | |
| 	fprintf(stderr, "Usage: %s [-r] [-c] [-m <idmap1>] [-m <idmap2>]" \
 | |
| 	    " ... [<source>] [<target>]\n", argv[0]);
 | |
| 	fprintf(stderr, "\n");
 | |
| 	fprintf(stderr, "  -r Recursively do idmapped mount.\n");
 | |
| 	fprintf(stderr, "\n");
 | |
| 	fprintf(stderr, "  -c Checks if idmapped mount is supported " \
 | |
| 	    "on the <source> by the operating system or not.\n");
 | |
| 	fprintf(stderr, "\n");
 | |
| 	fprintf(stderr, "  -m <idmap> to specify the idmap info, " \
 | |
| 	    "in the following format:\n");
 | |
| 	fprintf(stderr, "     <id_type>:<nsid_first>:<hostid_first>:<count>\n");
 | |
| 	fprintf(stderr, "\n");
 | |
| 	fprintf(stderr, "  <id_type> can be either of 'b', 'u', and 'g'.\n");
 | |
| 	fprintf(stderr, "\n");
 | |
| 	fprintf(stderr, "The <source> folder will be mounted at <target> " \
 | |
| 	    "with the provided idmap information.\nIf no <target> is " \
 | |
| 	    "specified, and <source> is a mount point, " \
 | |
| 	    "then <source> will be unmounted and then remounted.\n");
 | |
| }
 | |
| 
 | |
| int
 | |
| main(int argc, char *argv[])
 | |
| {
 | |
| 	int opt;
 | |
| 	list_t idmap_head;
 | |
| 	int check_supported = 0;
 | |
| 	int ret = EXIT_SUCCESS;
 | |
| 	char *source = NULL, *target = NULL;
 | |
| 	int flags = 0;
 | |
| 
 | |
| 	list_create(&idmap_head, sizeof (struct idmap_entry),
 | |
| 	    offsetof(struct idmap_entry, node));
 | |
| 
 | |
| 	while ((opt = getopt(argc, argv, "rcm:")) != -1) {
 | |
| 		switch (opt) {
 | |
| 		case 'r':
 | |
| 			flags |= AT_RECURSIVE;
 | |
| 			break;
 | |
| 		case 'c':
 | |
| 			check_supported = 1;
 | |
| 			break;
 | |
| 		case 'm':
 | |
| 			ret = parse_idmap_entry(&idmap_head, optarg);
 | |
| 			if (ret) {
 | |
| 				errno = ret;
 | |
| 				log_errno("parse_idmap_entry(%s)", optarg);
 | |
| 				ret = EXIT_FAILURE;
 | |
| 				goto out;
 | |
| 			}
 | |
| 			break;
 | |
| 		default:
 | |
| 			print_usage(argv);
 | |
| 			exit(EXIT_FAILURE);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if (check_supported == 0 && list_is_empty(&idmap_head))	{
 | |
| 		print_usage(argv);
 | |
| 		ret = EXIT_FAILURE;
 | |
| 		goto out;
 | |
| 	}
 | |
| 
 | |
| 	if (optind >= argc) {
 | |
| 		fprintf(stderr, "Expected to have <source>, <target>.\n");
 | |
| 		print_usage(argv);
 | |
| 		ret = EXIT_FAILURE;
 | |
| 		goto out;
 | |
| 	}
 | |
| 
 | |
| 	source = argv[optind];
 | |
| 	if (optind < (argc - 1)) {
 | |
| 		target = argv[optind + 1];
 | |
| 	}
 | |
| 
 | |
| 	if (check_supported) {
 | |
| 		free_idmap(&idmap_head);
 | |
| 		list_destroy(&idmap_head);
 | |
| 		if (is_idmap_supported(source)) {
 | |
| 			printf("idmapped mount is supported on [%s].\n",
 | |
| 			    source);
 | |
| 			return (EXIT_SUCCESS);
 | |
| 		} else {
 | |
| 			printf("idmapped mount is NOT supported.\n");
 | |
| 			return (EXIT_FAILURE);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	ret = do_idmap_mount(&idmap_head, source, target, flags);
 | |
| 	if (ret)
 | |
| 		ret = EXIT_FAILURE;
 | |
| out:
 | |
| 	free_idmap(&idmap_head);
 | |
| 	list_destroy(&idmap_head);
 | |
| 
 | |
| 	exit(ret);
 | |
| }
 |