mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson
synced 2025-08-31 22:23:05 +00:00

Add helper to utils.c and use it in statmount userns tests. Reviewed-by: John Hubbard <jhubbard@nvidia.com> Signed-off-by: Amir Goldstein <amir73il@gmail.com> Link: https://lore.kernel.org/20250509133240.529330-8-amir73il@gmail.com Reviewed-by: Christian Brauner <brauner@kernel.org> Signed-off-by: Christian Brauner <brauner@kernel.org>
590 lines
12 KiB
C
590 lines
12 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#ifndef _GNU_SOURCE
|
|
#define _GNU_SOURCE
|
|
#endif
|
|
#include <fcntl.h>
|
|
#include <sys/types.h>
|
|
#include <dirent.h>
|
|
#include <grp.h>
|
|
#include <linux/limits.h>
|
|
#include <sched.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <sys/eventfd.h>
|
|
#include <sys/fsuid.h>
|
|
#include <sys/prctl.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/types.h>
|
|
#include <sys/wait.h>
|
|
#include <sys/xattr.h>
|
|
#include <sys/mount.h>
|
|
|
|
#include "../kselftest.h"
|
|
#include "wrappers.h"
|
|
#include "utils.h"
|
|
|
|
#define MAX_USERNS_LEVEL 32
|
|
|
|
#define syserror(format, ...) \
|
|
({ \
|
|
fprintf(stderr, "%m - " format "\n", ##__VA_ARGS__); \
|
|
(-errno); \
|
|
})
|
|
|
|
#define syserror_set(__ret__, format, ...) \
|
|
({ \
|
|
typeof(__ret__) __internal_ret__ = (__ret__); \
|
|
errno = labs(__ret__); \
|
|
fprintf(stderr, "%m - " format "\n", ##__VA_ARGS__); \
|
|
__internal_ret__; \
|
|
})
|
|
|
|
#define STRLITERALLEN(x) (sizeof(""x"") - 1)
|
|
|
|
#define INTTYPE_TO_STRLEN(type) \
|
|
(2 + (sizeof(type) <= 1 \
|
|
? 3 \
|
|
: sizeof(type) <= 2 \
|
|
? 5 \
|
|
: sizeof(type) <= 4 \
|
|
? 10 \
|
|
: sizeof(type) <= 8 ? 20 : sizeof(int[-2 * (sizeof(type) > 8)])))
|
|
|
|
#define list_for_each(__iterator, __list) \
|
|
for (__iterator = (__list)->next; __iterator != __list; __iterator = __iterator->next)
|
|
|
|
typedef enum idmap_type_t {
|
|
ID_TYPE_UID,
|
|
ID_TYPE_GID
|
|
} idmap_type_t;
|
|
|
|
struct id_map {
|
|
idmap_type_t map_type;
|
|
__u32 nsid;
|
|
__u32 hostid;
|
|
__u32 range;
|
|
};
|
|
|
|
struct list {
|
|
void *elem;
|
|
struct list *next;
|
|
struct list *prev;
|
|
};
|
|
|
|
struct userns_hierarchy {
|
|
int fd_userns;
|
|
int fd_event;
|
|
unsigned int level;
|
|
struct list id_map;
|
|
};
|
|
|
|
static inline void list_init(struct list *list)
|
|
{
|
|
list->elem = NULL;
|
|
list->next = list->prev = list;
|
|
}
|
|
|
|
static inline int list_empty(const struct list *list)
|
|
{
|
|
return list == list->next;
|
|
}
|
|
|
|
static inline void __list_add(struct list *new, struct list *prev, struct list *next)
|
|
{
|
|
next->prev = new;
|
|
new->next = next;
|
|
new->prev = prev;
|
|
prev->next = new;
|
|
}
|
|
|
|
static inline void list_add_tail(struct list *head, struct list *list)
|
|
{
|
|
__list_add(list, head->prev, head);
|
|
}
|
|
|
|
static inline void list_del(struct list *list)
|
|
{
|
|
struct list *next, *prev;
|
|
|
|
next = list->next;
|
|
prev = list->prev;
|
|
next->prev = prev;
|
|
prev->next = next;
|
|
}
|
|
|
|
static ssize_t read_nointr(int fd, void *buf, size_t count)
|
|
{
|
|
ssize_t ret;
|
|
|
|
do {
|
|
ret = read(fd, buf, count);
|
|
} while (ret < 0 && errno == EINTR);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static ssize_t write_nointr(int fd, const void *buf, size_t count)
|
|
{
|
|
ssize_t ret;
|
|
|
|
do {
|
|
ret = write(fd, buf, count);
|
|
} while (ret < 0 && errno == EINTR);
|
|
|
|
return ret;
|
|
}
|
|
|
|
#define __STACK_SIZE (8 * 1024 * 1024)
|
|
static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
|
|
{
|
|
void *stack;
|
|
|
|
stack = malloc(__STACK_SIZE);
|
|
if (!stack)
|
|
return -ENOMEM;
|
|
|
|
#ifdef __ia64__
|
|
return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
|
|
#else
|
|
return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
|
|
#endif
|
|
}
|
|
|
|
static int get_userns_fd_cb(void *data)
|
|
{
|
|
for (;;)
|
|
pause();
|
|
_exit(0);
|
|
}
|
|
|
|
static int wait_for_pid(pid_t pid)
|
|
{
|
|
int status, ret;
|
|
|
|
again:
|
|
ret = waitpid(pid, &status, 0);
|
|
if (ret == -1) {
|
|
if (errno == EINTR)
|
|
goto again;
|
|
|
|
return -1;
|
|
}
|
|
|
|
if (!WIFEXITED(status))
|
|
return -1;
|
|
|
|
return WEXITSTATUS(status);
|
|
}
|
|
|
|
static int write_id_mapping(idmap_type_t map_type, pid_t pid, const char *buf, size_t buf_size)
|
|
{
|
|
int fd = -EBADF, setgroups_fd = -EBADF;
|
|
int fret = -1;
|
|
int ret;
|
|
char path[STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(pid_t) +
|
|
STRLITERALLEN("/setgroups") + 1];
|
|
|
|
if (geteuid() != 0 && map_type == ID_TYPE_GID) {
|
|
ret = snprintf(path, sizeof(path), "/proc/%d/setgroups", pid);
|
|
if (ret < 0 || ret >= sizeof(path))
|
|
goto out;
|
|
|
|
setgroups_fd = open(path, O_WRONLY | O_CLOEXEC);
|
|
if (setgroups_fd < 0 && errno != ENOENT) {
|
|
syserror("Failed to open \"%s\"", path);
|
|
goto out;
|
|
}
|
|
|
|
if (setgroups_fd >= 0) {
|
|
ret = write_nointr(setgroups_fd, "deny\n", STRLITERALLEN("deny\n"));
|
|
if (ret != STRLITERALLEN("deny\n")) {
|
|
syserror("Failed to write \"deny\" to \"/proc/%d/setgroups\"", pid);
|
|
goto out;
|
|
}
|
|
}
|
|
}
|
|
|
|
ret = snprintf(path, sizeof(path), "/proc/%d/%cid_map", pid, map_type == ID_TYPE_UID ? 'u' : 'g');
|
|
if (ret < 0 || ret >= sizeof(path))
|
|
goto out;
|
|
|
|
fd = open(path, O_WRONLY | O_CLOEXEC);
|
|
if (fd < 0) {
|
|
syserror("Failed to open \"%s\"", path);
|
|
goto out;
|
|
}
|
|
|
|
ret = write_nointr(fd, buf, buf_size);
|
|
if (ret != buf_size) {
|
|
syserror("Failed to write %cid mapping to \"%s\"",
|
|
map_type == ID_TYPE_UID ? 'u' : 'g', path);
|
|
goto out;
|
|
}
|
|
|
|
fret = 0;
|
|
out:
|
|
close(fd);
|
|
close(setgroups_fd);
|
|
|
|
return fret;
|
|
}
|
|
|
|
static int map_ids_from_idmap(struct list *idmap, pid_t pid)
|
|
{
|
|
int fill, left;
|
|
char mapbuf[4096] = {};
|
|
bool had_entry = false;
|
|
idmap_type_t map_type, u_or_g;
|
|
|
|
if (list_empty(idmap))
|
|
return 0;
|
|
|
|
for (map_type = ID_TYPE_UID, u_or_g = 'u';
|
|
map_type <= ID_TYPE_GID; map_type++, u_or_g = 'g') {
|
|
char *pos = mapbuf;
|
|
int ret;
|
|
struct list *iterator;
|
|
|
|
|
|
list_for_each(iterator, idmap) {
|
|
struct id_map *map = iterator->elem;
|
|
if (map->map_type != map_type)
|
|
continue;
|
|
|
|
had_entry = true;
|
|
|
|
left = 4096 - (pos - mapbuf);
|
|
fill = snprintf(pos, left, "%u %u %u\n", map->nsid, map->hostid, map->range);
|
|
/*
|
|
* The kernel only takes <= 4k for writes to
|
|
* /proc/<pid>/{g,u}id_map
|
|
*/
|
|
if (fill <= 0 || fill >= left)
|
|
return syserror_set(-E2BIG, "Too many %cid mappings defined", u_or_g);
|
|
|
|
pos += fill;
|
|
}
|
|
if (!had_entry)
|
|
continue;
|
|
|
|
ret = write_id_mapping(map_type, pid, mapbuf, pos - mapbuf);
|
|
if (ret < 0)
|
|
return syserror("Failed to write mapping: %s", mapbuf);
|
|
|
|
memset(mapbuf, 0, sizeof(mapbuf));
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int get_userns_fd_from_idmap(struct list *idmap)
|
|
{
|
|
int ret;
|
|
pid_t pid;
|
|
char path_ns[STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(pid_t) +
|
|
STRLITERALLEN("/ns/user") + 1];
|
|
|
|
pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER | CLONE_NEWNS);
|
|
if (pid < 0)
|
|
return -errno;
|
|
|
|
ret = map_ids_from_idmap(idmap, pid);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
ret = snprintf(path_ns, sizeof(path_ns), "/proc/%d/ns/user", pid);
|
|
if (ret < 0 || (size_t)ret >= sizeof(path_ns))
|
|
ret = -EIO;
|
|
else
|
|
ret = open(path_ns, O_RDONLY | O_CLOEXEC | O_NOCTTY);
|
|
|
|
(void)kill(pid, SIGKILL);
|
|
(void)wait_for_pid(pid);
|
|
return ret;
|
|
}
|
|
|
|
int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
|
|
{
|
|
struct list head, uid_mapl, gid_mapl;
|
|
struct id_map uid_map = {
|
|
.map_type = ID_TYPE_UID,
|
|
.nsid = nsid,
|
|
.hostid = hostid,
|
|
.range = range,
|
|
};
|
|
struct id_map gid_map = {
|
|
.map_type = ID_TYPE_GID,
|
|
.nsid = nsid,
|
|
.hostid = hostid,
|
|
.range = range,
|
|
};
|
|
|
|
list_init(&head);
|
|
uid_mapl.elem = &uid_map;
|
|
gid_mapl.elem = &gid_map;
|
|
list_add_tail(&head, &uid_mapl);
|
|
list_add_tail(&head, &gid_mapl);
|
|
|
|
return get_userns_fd_from_idmap(&head);
|
|
}
|
|
|
|
bool switch_ids(uid_t uid, gid_t gid)
|
|
{
|
|
if (setgroups(0, NULL))
|
|
return syserror("failure: setgroups");
|
|
|
|
if (setresgid(gid, gid, gid))
|
|
return syserror("failure: setresgid");
|
|
|
|
if (setresuid(uid, uid, uid))
|
|
return syserror("failure: setresuid");
|
|
|
|
/* Ensure we can access proc files from processes we can ptrace. */
|
|
if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0))
|
|
return syserror("failure: make dumpable");
|
|
|
|
return true;
|
|
}
|
|
|
|
static int create_userns_hierarchy(struct userns_hierarchy *h);
|
|
|
|
static int userns_fd_cb(void *data)
|
|
{
|
|
struct userns_hierarchy *h = data;
|
|
char c;
|
|
int ret;
|
|
|
|
ret = read_nointr(h->fd_event, &c, 1);
|
|
if (ret < 0)
|
|
return syserror("failure: read from socketpair");
|
|
|
|
/* Only switch ids if someone actually wrote a mapping for us. */
|
|
if (c == '1') {
|
|
if (!switch_ids(0, 0))
|
|
return syserror("failure: switch ids to 0");
|
|
}
|
|
|
|
ret = write_nointr(h->fd_event, "1", 1);
|
|
if (ret < 0)
|
|
return syserror("failure: write to socketpair");
|
|
|
|
ret = create_userns_hierarchy(++h);
|
|
if (ret < 0)
|
|
return syserror("failure: userns level %d", h->level);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int create_userns_hierarchy(struct userns_hierarchy *h)
|
|
{
|
|
int fret = -1;
|
|
char c;
|
|
int fd_socket[2];
|
|
int fd_userns = -EBADF, ret = -1;
|
|
ssize_t bytes;
|
|
pid_t pid;
|
|
char path[256];
|
|
|
|
if (h->level == MAX_USERNS_LEVEL)
|
|
return 0;
|
|
|
|
ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, fd_socket);
|
|
if (ret < 0)
|
|
return syserror("failure: create socketpair");
|
|
|
|
/* Note the CLONE_FILES | CLONE_VM when mucking with fds and memory. */
|
|
h->fd_event = fd_socket[1];
|
|
pid = do_clone(userns_fd_cb, h, CLONE_NEWUSER | CLONE_FILES | CLONE_VM);
|
|
if (pid < 0) {
|
|
syserror("failure: userns level %d", h->level);
|
|
goto out_close;
|
|
}
|
|
|
|
ret = map_ids_from_idmap(&h->id_map, pid);
|
|
if (ret < 0) {
|
|
kill(pid, SIGKILL);
|
|
syserror("failure: writing id mapping for userns level %d for %d", h->level, pid);
|
|
goto out_wait;
|
|
}
|
|
|
|
if (!list_empty(&h->id_map))
|
|
bytes = write_nointr(fd_socket[0], "1", 1); /* Inform the child we wrote a mapping. */
|
|
else
|
|
bytes = write_nointr(fd_socket[0], "0", 1); /* Inform the child we didn't write a mapping. */
|
|
if (bytes < 0) {
|
|
kill(pid, SIGKILL);
|
|
syserror("failure: write to socketpair");
|
|
goto out_wait;
|
|
}
|
|
|
|
/* Wait for child to set*id() and become dumpable. */
|
|
bytes = read_nointr(fd_socket[0], &c, 1);
|
|
if (bytes < 0) {
|
|
kill(pid, SIGKILL);
|
|
syserror("failure: read from socketpair");
|
|
goto out_wait;
|
|
}
|
|
|
|
snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
|
|
fd_userns = open(path, O_RDONLY | O_CLOEXEC);
|
|
if (fd_userns < 0) {
|
|
kill(pid, SIGKILL);
|
|
syserror("failure: open userns level %d for %d", h->level, pid);
|
|
goto out_wait;
|
|
}
|
|
|
|
fret = 0;
|
|
|
|
out_wait:
|
|
if (!wait_for_pid(pid) && !fret) {
|
|
h->fd_userns = fd_userns;
|
|
fd_userns = -EBADF;
|
|
}
|
|
|
|
out_close:
|
|
if (fd_userns >= 0)
|
|
close(fd_userns);
|
|
close(fd_socket[0]);
|
|
close(fd_socket[1]);
|
|
return fret;
|
|
}
|
|
|
|
static int write_file(const char *path, const char *val)
|
|
{
|
|
int fd = open(path, O_WRONLY);
|
|
size_t len = strlen(val);
|
|
int ret;
|
|
|
|
if (fd == -1) {
|
|
ksft_print_msg("opening %s for write: %s\n", path, strerror(errno));
|
|
return -1;
|
|
}
|
|
|
|
ret = write(fd, val, len);
|
|
if (ret == -1) {
|
|
ksft_print_msg("writing to %s: %s\n", path, strerror(errno));
|
|
return -1;
|
|
}
|
|
if (ret != len) {
|
|
ksft_print_msg("short write to %s\n", path);
|
|
return -1;
|
|
}
|
|
|
|
ret = close(fd);
|
|
if (ret == -1) {
|
|
ksft_print_msg("closing %s\n", path);
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int setup_userns(void)
|
|
{
|
|
int ret;
|
|
char buf[32];
|
|
uid_t uid = getuid();
|
|
gid_t gid = getgid();
|
|
|
|
ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID);
|
|
if (ret) {
|
|
ksft_exit_fail_msg("unsharing mountns and userns: %s\n",
|
|
strerror(errno));
|
|
return ret;
|
|
}
|
|
|
|
sprintf(buf, "0 %d 1", uid);
|
|
ret = write_file("/proc/self/uid_map", buf);
|
|
if (ret)
|
|
return ret;
|
|
ret = write_file("/proc/self/setgroups", "deny");
|
|
if (ret)
|
|
return ret;
|
|
sprintf(buf, "0 %d 1", gid);
|
|
ret = write_file("/proc/self/gid_map", buf);
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
|
|
if (ret) {
|
|
ksft_print_msg("making mount tree private: %s\n", strerror(errno));
|
|
return ret;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* caps_down - lower all effective caps */
|
|
int caps_down(void)
|
|
{
|
|
bool fret = false;
|
|
cap_t caps = NULL;
|
|
int ret = -1;
|
|
|
|
caps = cap_get_proc();
|
|
if (!caps)
|
|
goto out;
|
|
|
|
ret = cap_clear_flag(caps, CAP_EFFECTIVE);
|
|
if (ret)
|
|
goto out;
|
|
|
|
ret = cap_set_proc(caps);
|
|
if (ret)
|
|
goto out;
|
|
|
|
fret = true;
|
|
|
|
out:
|
|
cap_free(caps);
|
|
return fret;
|
|
}
|
|
|
|
/* cap_down - lower an effective cap */
|
|
int cap_down(cap_value_t down)
|
|
{
|
|
bool fret = false;
|
|
cap_t caps = NULL;
|
|
cap_value_t cap = down;
|
|
int ret = -1;
|
|
|
|
caps = cap_get_proc();
|
|
if (!caps)
|
|
goto out;
|
|
|
|
ret = cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap, 0);
|
|
if (ret)
|
|
goto out;
|
|
|
|
ret = cap_set_proc(caps);
|
|
if (ret)
|
|
goto out;
|
|
|
|
fret = true;
|
|
|
|
out:
|
|
cap_free(caps);
|
|
return fret;
|
|
}
|
|
|
|
uint64_t get_unique_mnt_id(const char *path)
|
|
{
|
|
struct statx sx;
|
|
int ret;
|
|
|
|
ret = statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx);
|
|
if (ret == -1) {
|
|
ksft_print_msg("retrieving unique mount ID for %s: %s\n", path,
|
|
strerror(errno));
|
|
return 0;
|
|
}
|
|
|
|
if (!(sx.stx_mask & STATX_MNT_ID_UNIQUE)) {
|
|
ksft_print_msg("no unique mount ID available for %s\n", path);
|
|
return 0;
|
|
}
|
|
|
|
return sx.stx_mnt_id;
|
|
}
|