mainloop: add io_uring support

Users can choose to compile liblxc with io_uring support. This will
cause LXC to use io_uring instead of epoll.
We're using both, io_uring's one-shot and multi-shot poll mode depending
on the type of handler.

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
This commit is contained in:
Christian Brauner 2021-06-04 18:21:04 +02:00
parent 3298b37d53
commit 543d2f838c
No known key found for this signature in database
GPG Key ID: 8EB056D53EECB12D
16 changed files with 677 additions and 189 deletions

View File

@ -671,6 +671,22 @@ AC_CHECK_HEADER([ifaddrs.h],
AC_DEFINE(HAVE_IFADDRS_H, 1, [Have ifaddrs.h]),
AM_CONDITIONAL(HAVE_IFADDRS_H, false))
AC_ARG_ENABLE([liburing],
[AS_HELP_STRING([--enable-liburing], [enable liburing support [default=auto]])],
[enable_liburing=$enableval], [enable_liburing=auto])
if test "x$enable_liburing" = "auto"; then
AC_CHECK_LIB([uring],[__io_uring_sqring_wait],[enable_liburing=yes],[enable_liburing=no])
fi
AM_CONDITIONAL([ENABLE_LIBURING], [test "x$enable_liburing" = "xyes"])
AM_COND_IF([ENABLE_LIBURING],
[AC_CHECK_HEADER([liburing.h],[],[AC_MSG_ERROR([You must install the liburing development package in order to compile lxc])])
# We use __io_uring_sqring_wait as an indicator whether liburing is new enough to support poll.
AC_CHECK_LIB([uring],[__io_uring_sqring_wait],[],[AC_MSG_ERROR([The liburing development package in order to compile lxc])])
AC_SUBST([LIBURING_LIBS], [-luring])])
# lookup major()/minor()/makedev()
AC_HEADER_MAJOR

View File

@ -274,7 +274,8 @@ liblxc_la_LIBADD = $(CAP_LIBS) \
$(OPENSSL_LIBS) \
$(SELINUX_LIBS) \
$(SECCOMP_LIBS) \
$(DLOG_LIBS)
$(DLOG_LIBS) \
$(LIBURING_LIBS)
bin_SCRIPTS=
@ -333,7 +334,8 @@ LDADD = liblxc.la \
@OPENSSL_LIBS@ \
@SECCOMP_LIBS@ \
@SELINUX_LIBS@ \
@DLOG_LIBS@
@DLOG_LIBS@ \
@LIBURING_LIBS@
if ENABLE_TOOLS
lxc_attach_SOURCES = tools/lxc_attach.c \

View File

@ -1987,7 +1987,11 @@ static int cg_unified_freeze_do(struct cgroup_ops *ops, int timeout,
/* automatically cleaned up now */
descr_ptr = &descr;
ret = lxc_mainloop_add_handler_events(&descr, fd, EPOLLPRI, freezer_cgroup_events_cb, INT_TO_PTR(state_num));
ret = lxc_mainloop_add_handler_events(&descr, fd, EPOLLPRI,
freezer_cgroup_events_cb,
default_cleanup_handler,
INT_TO_PTR(state_num),
"freezer_cgroup_events_cb");
if (ret < 0)
return log_error_errno(-1, errno, "Failed to add cgroup.events fd handler to mainloop");
}
@ -3669,7 +3673,11 @@ static int do_cgroup_freeze(int unified_fd,
if (events_fd < 0)
return log_error_errno(-errno, errno, "Failed to open cgroup.events file");
ret = lxc_mainloop_add_handler_events(&descr, events_fd, EPOLLPRI, freezer_cgroup_events_cb, INT_TO_PTR(state_num));
ret = lxc_mainloop_add_handler_events(&descr, events_fd, EPOLLPRI,
freezer_cgroup_events_cb,
default_cleanup_handler,
INT_TO_PTR(state_num),
"freezer_cgroup_events_cb");
if (ret < 0)
return log_error_errno(-1, errno, "Failed to add cgroup.events fd handler to mainloop");
}

View File

@ -37,8 +37,6 @@ lxc_log_define(lxc_monitord, lxc);
sigjmp_buf mark;
static void lxc_monitord_cleanup(void);
/*
* Defines the structure to store the monitor information
* @lxcpath : the path being monitored
@ -113,27 +111,23 @@ static int lxc_monitord_fifo_delete(struct lxc_monitor *mon)
return 0;
}
static void lxc_monitord_sockfd_remove(struct lxc_monitor *mon, int fd)
static int lxc_monitord_sockfd_remove(struct lxc_monitor *mon, int fd)
{
int i;
if (lxc_mainloop_del_handler(&mon->descr, fd))
CRIT("File descriptor %d not found in mainloop", fd);
close(fd);
for (i = 0; i < mon->clientfds_cnt; i++)
if (mon->clientfds[i] == fd)
break;
if (i >= mon->clientfds_cnt) {
CRIT("File descriptor %d not found in clients array", fd);
lxc_monitord_cleanup();
exit(EXIT_FAILURE);
return LXC_MAINLOOP_ERROR;
}
memmove(&mon->clientfds[i], &mon->clientfds[i+1],
(mon->clientfds_cnt - i - 1) * sizeof(mon->clientfds[0]));
mon->clientfds_cnt--;
return LXC_MAINLOOP_DISARM;
}
static int lxc_monitord_sock_handler(int fd, uint32_t events, void *data,
@ -146,12 +140,14 @@ static int lxc_monitord_sock_handler(int fd, uint32_t events, void *data,
char buf[4];
rc = lxc_read_nointr(fd, buf, sizeof(buf));
if (rc > 0 && !strncmp(buf, "quit", 4))
if (rc > 0 && !strncmp(buf, "quit", 4)) {
quit = LXC_MAINLOOP_CLOSE;
return LXC_MAINLOOP_CLOSE;
}
}
if (events & EPOLLHUP)
lxc_monitord_sockfd_remove(mon, fd);
return lxc_monitord_sockfd_remove(mon, fd);
return quit;
}
@ -202,7 +198,9 @@ static int lxc_monitord_sock_accept(int fd, uint32_t events, void *data,
}
ret = lxc_mainloop_add_handler(&mon->descr, clientfd,
lxc_monitord_sock_handler, mon);
lxc_monitord_sock_handler,
default_cleanup_handler,
mon, "lxc_monitord_sock_handler");
if (ret < 0) {
ERROR("Failed to add socket handler");
goto err1;
@ -264,20 +262,14 @@ static int lxc_monitord_create(struct lxc_monitor *mon)
static void lxc_monitord_delete(struct lxc_monitor *mon)
{
int i;
lxc_mainloop_del_handler(&mon->descr, mon->listenfd);
lxc_abstract_unix_close(mon->listenfd);
lxc_monitord_sock_delete(mon);
lxc_mainloop_del_handler(&mon->descr, mon->fifofd);
lxc_monitord_fifo_delete(mon);
close(mon->fifofd);
for (i = 0; i < mon->clientfds_cnt; i++) {
lxc_mainloop_del_handler(&mon->descr, mon->clientfds[i]);
for (int i = 0; i < mon->clientfds_cnt; i++)
close(mon->clientfds[i]);
}
mon->clientfds_cnt = 0;
}
@ -310,14 +302,18 @@ static int lxc_monitord_mainloop_add(struct lxc_monitor *mon)
int ret;
ret = lxc_mainloop_add_handler(&mon->descr, mon->fifofd,
lxc_monitord_fifo_handler, mon);
lxc_monitord_fifo_handler,
default_cleanup_handler,
mon, "lxc_monitord_fifo_handler");
if (ret < 0) {
ERROR("Failed to add to mainloop monitor handler for fifo");
return -1;
}
ret = lxc_mainloop_add_handler(&mon->descr, mon->listenfd,
lxc_monitord_sock_accept, mon);
lxc_monitord_sock_accept,
default_cleanup_handler,
mon, "lxc_monitord_sock_accept");
if (ret < 0) {
ERROR("Failed to add to mainloop monitor handler for listen socket");
return -1;
@ -326,11 +322,6 @@ static int lxc_monitord_mainloop_add(struct lxc_monitor *mon)
return 0;
}
static void lxc_monitord_cleanup(void)
{
lxc_monitord_delete(&monitor);
}
static void lxc_monitord_sig_handler(int sig)
{
siglongjmp(mark, 1);
@ -453,11 +444,11 @@ on_signal:
ret = EXIT_SUCCESS;
on_error:
if (monitord_created)
lxc_monitord_cleanup();
if (mainloop_opened)
lxc_mainloop_close(&monitor.descr);
if (monitord_created)
lxc_monitord_delete(&monitor);
exit(ret);
}

View File

@ -1587,8 +1587,10 @@ static int lxc_cmd_seccomp_notify_add_listener_callback(int fd,
goto out;
}
ret = lxc_mainloop_add_handler(descr, recv_fd, seccomp_notify_handler,
handler);
ret = lxc_mainloop_add_handler(descr, recv_fd,
seccomp_notify_handler,
seccomp_notify_cleanup_handler,
handler, "seccomp_notify_handler");
if (ret < 0) {
rsp.ret = -errno;
goto out;
@ -1900,11 +1902,8 @@ static int lxc_cmd_process(int fd, struct lxc_cmd_req *req,
}
static void lxc_cmd_fd_cleanup(int fd, struct lxc_handler *handler,
struct lxc_async_descr *descr, const lxc_cmd_t cmd)
const lxc_cmd_t cmd)
{
lxc_terminal_free(handler->conf, fd);
lxc_mainloop_del_handler(descr, fd);
if (cmd == LXC_CMD_ADD_STATE_CLIENT) {
struct lxc_list *cur, *next;
@ -1937,11 +1936,25 @@ static void lxc_cmd_fd_cleanup(int fd, struct lxc_handler *handler,
* was already reached by the time we were ready to add it. So
* fallthrough and clean it up.
*/
TRACE("Closing state client fd %d for command \"%s\"", fd, lxc_cmd_str(cmd));
TRACE("Deleted state client fd %d for command \"%s\"", fd, lxc_cmd_str(cmd));
}
TRACE("Closing client fd %d for command \"%s\"", fd, lxc_cmd_str(cmd));
/*
* We're not closing the client fd here. They will instead be notified
* from the mainloop when it calls the cleanup handler. This will cause
* a slight delay but is semantically cleaner then what we used to do.
*/
}
static int lxc_cmd_cleanup_handler(int fd, void *data)
{
struct lxc_handler *handler = data;
lxc_terminal_free(handler->conf, fd);
close(fd);
TRACE("Closing client fd %d for \"%s\"", fd, __FUNCTION__);
return 0;
}
static int lxc_cmd_handler(int fd, uint32_t events, void *data,
@ -1965,20 +1978,20 @@ static int lxc_cmd_handler(int fd, uint32_t events, void *data,
__lxc_cmd_rsp_send(fd, &rsp);
}
goto out_close;
goto out;
}
if (ret == 0)
goto out_close;
goto out;
if (ret != sizeof(req)) {
WARN("Failed to receive full command request. Ignoring request for \"%s\"", lxc_cmd_str(req.cmd));
goto out_close;
goto out;
}
if ((req.datalen > LXC_CMD_DATA_MAX) && (req.cmd != LXC_CMD_CONSOLE_LOG)) {
ERROR("Received command data length %d is too large for command \"%s\"", req.datalen, lxc_cmd_str(req.cmd));
goto out_close;
goto out;
}
if (req.datalen > 0) {
@ -1986,7 +1999,7 @@ static int lxc_cmd_handler(int fd, uint32_t events, void *data,
ret = lxc_recv_nointr(fd, reqdata, req.datalen, 0);
if (ret != req.datalen) {
WARN("Failed to receive full command request. Ignoring request for \"%s\"", lxc_cmd_str(req.cmd));
goto out_close;
goto out;
}
req.data = reqdata;
@ -1995,20 +2008,20 @@ static int lxc_cmd_handler(int fd, uint32_t events, void *data,
ret = lxc_cmd_process(fd, &req, handler, descr);
if (ret < 0) {
DEBUG("Failed to process command %s; cleaning up client fd %d", lxc_cmd_str(req.cmd), fd);
goto out_close;
} else if (ret == LXC_CMD_REAP_CLIENT_FD) {
TRACE("Processed command %s; cleaning up client fd %d", lxc_cmd_str(req.cmd), fd);
goto out_close;
} else {
TRACE("Processed command %s; keeping client fd %d", lxc_cmd_str(req.cmd), fd);
goto out;
}
out:
if (ret == LXC_CMD_REAP_CLIENT_FD) {
TRACE("Processed command %s; cleaning up client fd %d", lxc_cmd_str(req.cmd), fd);
goto out;
}
TRACE("Processed command %s; keeping client fd %d", lxc_cmd_str(req.cmd), fd);
return LXC_MAINLOOP_CONTINUE;
out_close:
lxc_cmd_fd_cleanup(fd, handler, descr, req.cmd);
goto out;
out:
lxc_cmd_fd_cleanup(fd, handler, req.cmd);
return LXC_MAINLOOP_DISARM;
}
static int lxc_cmd_accept(int fd, uint32_t events, void *data,
@ -2029,7 +2042,10 @@ static int lxc_cmd_accept(int fd, uint32_t events, void *data,
if (ret < 0)
return log_error_errno(ret, errno, "Failed to enable necessary credentials on command socket");
ret = lxc_mainloop_add_handler(descr, connection, lxc_cmd_handler, data);
ret = lxc_mainloop_add_oneshot_handler(descr, connection,
lxc_cmd_handler,
lxc_cmd_cleanup_handler,
data, "lxc_cmd_handler");
if (ret)
return log_error(ret, "Failed to add command handler");
@ -2068,7 +2084,10 @@ int lxc_cmd_mainloop_add(const char *name, struct lxc_async_descr *descr,
{
int ret;
ret = lxc_mainloop_add_handler(descr, handler->conf->maincmd_fd, lxc_cmd_accept, handler);
ret = lxc_mainloop_add_handler(descr, handler->conf->maincmd_fd,
lxc_cmd_accept,
default_cleanup_handler,
handler, "lxc_cmd_accept");
if (ret < 0)
return log_error(ret, "Failed to add handler for command socket fd %d", handler->conf->maincmd_fd);

View File

@ -81,6 +81,7 @@ struct lxc_seccomp {
__hidden extern int lxc_seccomp_load(struct lxc_conf *conf);
__hidden extern int lxc_read_seccomp_config(struct lxc_conf *conf);
__hidden extern void lxc_seccomp_free(struct lxc_seccomp *seccomp);
__hidden extern int seccomp_notify_cleanup_handler(int fd, void *data);
__hidden extern int seccomp_notify_handler(int fd, uint32_t events, void *data,
struct lxc_async_descr *descr);
__hidden extern void seccomp_conf_init(struct lxc_conf *conf);
@ -133,7 +134,12 @@ static inline void lxc_seccomp_free(struct lxc_seccomp *seccomp)
static inline int seccomp_notify_handler(int fd, uint32_t events, void *data,
struct lxc_async_descr *descr)
{
return -ENOSYS;
return ret_errno(ENOSYS);
}
static inline int seccomp_notify_cleanup_handler(void *data)
{
return ret_errno(ENOSYS);
}
static inline void seccomp_conf_init(struct lxc_conf *conf)

View File

@ -747,4 +747,9 @@ enum {
#define PER_LINUX32 0x0008
#endif
static inline bool has_exact_flags(__u32 flags, __u32 mask)
{
return (flags & mask) == mask;
}
#endif /* __LXC_MACRO_H */

View File

@ -8,21 +8,292 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/poll.h>
#include <sys/epoll.h>
#include <unistd.h>
#include "config.h"
#include "log.h"
#include "macro.h"
#include "mainloop.h"
lxc_log_define(mainloop, lxc);
#define CANCEL_RAISED (1 << 0)
#define CANCEL_RECEIVED (1 << 1)
#define CANCEL_SUCCESS (1 << 2)
struct mainloop_handler {
lxc_mainloop_callback_t callback;
struct lxc_list *list;
int fd;
void *data;
lxc_mainloop_callback_t callback;
lxc_mainloop_cleanup_t cleanup;
const char *handler_name;
unsigned int flags;
};
#define MAX_EVENTS 10
int lxc_mainloop(struct lxc_async_descr *descr, int timeout_ms)
static int __io_uring_disarm(struct lxc_async_descr *descr,
struct mainloop_handler *handler);
static void delete_handler(struct lxc_async_descr *descr,
struct mainloop_handler *handler, bool oneshot)
{
int ret = 0;
struct lxc_list *list;
if (descr->type == LXC_MAINLOOP_IO_URING) {
/*
* For a oneshot handler we don't have to do anything. If we
* end up here we know that an event for this handler has been
* generated before and since this is a oneshot handler it
* means that it has been deactivated. So the only thing we
* need to do is to call the registered cleanup handler and
* remove the handlerfrom the list.
*/
if (!oneshot)
ret = __io_uring_disarm(descr, handler);
} else {
ret = epoll_ctl(descr->epfd, EPOLL_CTL_DEL, handler->fd, NULL);
}
if (ret < 0)
SYSWARN("Failed to delete \"%d\" for \"%s\"", handler->fd, handler->handler_name);
if (handler->cleanup) {
ret = handler->cleanup(handler->fd, handler->data);
if (ret < 0)
SYSWARN("Failed to call cleanup \"%s\" handler", handler->handler_name);
}
list = move_ptr(handler->list);
lxc_list_del(list);
free(list->elem);
free(list);
}
#ifndef HAVE_LIBURING
static inline int __lxc_mainloop_io_uring(struct lxc_async_descr *descr,
int timeout_ms)
{
return ret_errno(ENOSYS);
}
static int __io_uring_arm(struct lxc_async_descr *descr,
struct mainloop_handler *handler, bool oneshot)
{
return ret_errno(ENOSYS);
}
static int __io_uring_disarm(struct lxc_async_descr *descr,
struct mainloop_handler *handler)
{
return ret_errno(ENOSYS);
}
static inline int __io_uring_open(struct lxc_async_descr *descr)
{
return ret_errno(ENOSYS);
}
#else
static inline int __io_uring_open(struct lxc_async_descr *descr)
{
int ret;
*descr = (struct lxc_async_descr){
.epfd = -EBADF,
};
descr->ring = mmap(NULL, sizeof(struct io_uring), PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE | MAP_ANONYMOUS, -1, 0);
if (descr->ring == MAP_FAILED)
return syserror("Failed to mmap io_uring memory");
ret = io_uring_queue_init(512, descr->ring, IORING_SETUP_SQPOLL);
if (ret) {
SYSERROR("Failed to initialize io_uring instance");
goto on_error;
}
ret = io_uring_ring_dontfork(descr->ring);
if (ret) {
SYSERROR("Failed to prevent inheritance of io_uring mmaped region");
goto on_error;
}
descr->type = LXC_MAINLOOP_IO_URING;
TRACE("Created io-uring instance");
return 0;
on_error:
ret = munmap(descr->ring, sizeof(struct io_uring));
if (ret < 0)
SYSWARN("Failed to unmap io_uring mmaped memory");
return ret_errno(ENOSYS);
}
static int __io_uring_arm(struct lxc_async_descr *descr,
struct mainloop_handler *handler, bool oneshot)
{
int ret;
struct io_uring_sqe *sqe;
sqe = io_uring_get_sqe(descr->ring);
if (!sqe)
return syserror_set(ENOENT, "Failed to get submission queue entry");
io_uring_prep_poll_add(sqe, handler->fd, EPOLLIN);
/*
* Raise IORING_POLL_ADD_MULTI to set up a multishot poll. The same sqe
* will now produce multiple cqes. A cqe produced from a multishot sqe
* will raise IORING_CQE_F_MORE in cqe->flags.
* Some devices can't be used with IORING_POLL_ADD_MULTI. This can only
* be detected at completion time. The IORING_CQE_F_MORE flag will not
* raised in cqe->flags. This includes terminal devices. So
* unfortunately we can't use multishot for them although we really
* would like to. But instead we will need to resubmit them. The
* io_uring based mainloop will deal cases whwere multishot doesn't
* work and resubmit the request. The handler just needs to inform the
* mainloop that it wants to keep the handler.
*/
if (!oneshot)
sqe->len |= IORING_POLL_ADD_MULTI;
io_uring_sqe_set_data(sqe, handler);
ret = io_uring_submit(descr->ring);
if (ret < 0) {
if (!oneshot && ret == -EINVAL) {
/* The kernel might not yet support multishot. */
sqe->len &= ~IORING_POLL_ADD_MULTI;
ret = io_uring_submit(descr->ring);
}
}
if (ret < 0)
return syserror_ret(ret, "Failed to add \"%s\" handler", handler->handler_name);
TRACE("Added \"%s\" handler", handler->handler_name);
return 0;
}
static int __io_uring_disarm(struct lxc_async_descr *descr,
struct mainloop_handler *handler)
{
int ret;
struct io_uring_sqe *sqe;
sqe = io_uring_get_sqe(descr->ring);
if (!sqe)
return syserror_set(ENOENT,
"Failed to get submission queue entry");
io_uring_prep_poll_remove(sqe, handler);
handler->flags |= CANCEL_RAISED;
io_uring_sqe_set_data(sqe, handler);
ret = io_uring_submit(descr->ring);
if (ret < 0) {
handler->flags &= ~CANCEL_RAISED;
return syserror_ret(ret, "Failed to remove \"%s\" handler",
handler->handler_name);
}
TRACE("Removed handler \"%s\"", handler->handler_name);
return ret;
}
static void msec_to_ts(struct __kernel_timespec *ts, unsigned int timeout_ms)
{
ts->tv_sec = timeout_ms / 1000;
ts->tv_nsec = (timeout_ms % 1000) * 1000000;
}
static int __lxc_mainloop_io_uring(struct lxc_async_descr *descr, int timeout_ms)
{
struct __kernel_timespec ts;
if (timeout_ms >= 0)
msec_to_ts(&ts, timeout_ms);
for (;;) {
int ret;
__s32 mask = 0;
bool oneshot = false;
struct io_uring_cqe *cqe = NULL;
struct mainloop_handler *handler = NULL;
if (timeout_ms >= 0)
ret = io_uring_wait_cqe_timeout(descr->ring, &cqe, &ts);
else
ret = io_uring_wait_cqe(descr->ring, &cqe);
if (ret < 0) {
if (ret == -EINTR)
continue;
if (ret == -ETIME)
return 0;
return syserror_ret(ret, "Failed to wait for completion");
}
ret = LXC_MAINLOOP_CONTINUE;
oneshot = !(cqe->flags & IORING_CQE_F_MORE);
mask = cqe->res;
handler = io_uring_cqe_get_data(cqe);
io_uring_cqe_seen(descr->ring, cqe);
switch (mask) {
case -ECANCELED:
handler->flags |= CANCEL_RECEIVED;
TRACE("Canceled \"%s\" handler", handler->handler_name);
goto out;
case -ENOENT:
handler->flags = CANCEL_SUCCESS | CANCEL_RECEIVED;
TRACE("No sqe for \"%s\" handler", handler->handler_name);
goto out;
case -EALREADY:
TRACE("Repeat sqe remove request for \"%s\" handler", handler->handler_name);
goto out;
case 0:
handler->flags |= CANCEL_SUCCESS;
TRACE("Removed \"%s\" handler", handler->handler_name);
goto out;
default:
/*
* We need to always remove the handler for a
* successful oneshot request.
*/
if (oneshot)
handler->flags = CANCEL_SUCCESS | CANCEL_RECEIVED;
}
ret = handler->callback(handler->fd, mask, handler->data, descr);
switch (ret) {
case LXC_MAINLOOP_CONTINUE:
/* We're operating in oneshot mode so we need to rearm. */
if (oneshot && __io_uring_arm(descr, handler, true))
return -1;
break;
case LXC_MAINLOOP_DISARM:
if (has_exact_flags(handler->flags, (CANCEL_SUCCESS | CANCEL_RECEIVED)))
delete_handler(descr, handler, oneshot);
break;
case LXC_MAINLOOP_CLOSE:
return log_trace(0, "Closing from \"%s\"", handler->handler_name);
case LXC_MAINLOOP_ERROR:
return syserror_ret(-1, "Closing with error from \"%s\"", handler->handler_name);
}
out:
if (lxc_list_empty(&descr->handlers))
return error_ret(0, "Closing because there are no more handlers");
}
}
#endif
static int __lxc_mainloop_epoll(struct lxc_async_descr *descr, int timeout_ms)
{
int i, nfds, ret;
struct mainloop_handler *handler;
@ -45,10 +316,17 @@ int lxc_mainloop(struct lxc_async_descr *descr, int timeout_ms)
*/
ret = handler->callback(handler->fd, events[i].events,
handler->data, descr);
if (ret == LXC_MAINLOOP_ERROR)
return -1;
if (ret == LXC_MAINLOOP_CLOSE)
switch (ret) {
case LXC_MAINLOOP_DISARM:
delete_handler(descr, handler, false);
__fallthrough;
case LXC_MAINLOOP_CONTINUE:
break;
case LXC_MAINLOOP_CLOSE:
return 0;
case LXC_MAINLOOP_ERROR:
return -1;
}
}
if (nfds == 0)
@ -59,76 +337,153 @@ int lxc_mainloop(struct lxc_async_descr *descr, int timeout_ms)
}
}
int lxc_mainloop_add_handler_events(struct lxc_async_descr *descr, int fd,
int events,
lxc_mainloop_callback_t callback,
void *data)
int lxc_mainloop(struct lxc_async_descr *descr, int timeout_ms)
{
if (descr->type == LXC_MAINLOOP_IO_URING)
return __lxc_mainloop_io_uring(descr, timeout_ms);
return __lxc_mainloop_epoll(descr, timeout_ms);
}
static int __lxc_mainloop_add_handler_events(struct lxc_async_descr *descr,
int fd, int events,
lxc_mainloop_callback_t callback,
lxc_mainloop_cleanup_t cleanup,
void *data, bool oneshot,
const char *handler_name)
{
__do_free struct mainloop_handler *handler = NULL;
__do_free struct lxc_list *item = NULL;
__do_free struct lxc_list *list = NULL;
int ret;
struct epoll_event ev;
if (fd < 0)
return -1;
return ret_errno(EBADF);
handler = malloc(sizeof(*handler));
if (!callback || !cleanup || !events || !handler_name)
return ret_errno(EINVAL);
handler = zalloc(sizeof(*handler));
if (!handler)
return -1;
handler->callback = callback;
handler->fd = fd;
handler->data = data;
ev.events = events;
ev.data.ptr = handler;
if (epoll_ctl(descr->epfd, EPOLL_CTL_ADD, fd, &ev) < 0)
return -errno;
item = malloc(sizeof(*item));
if (!item)
return ret_errno(ENOMEM);
item->elem = move_ptr(handler);
lxc_list_add(&descr->handlers, move_ptr(item));
handler->callback = callback;
handler->cleanup = cleanup;
handler->fd = fd;
handler->data = data;
handler->handler_name = handler_name;
if (descr->type == LXC_MAINLOOP_IO_URING) {
ret = __io_uring_arm(descr, handler, oneshot);
} else {
ev.events = events;
ev.data.ptr = handler;
ret = epoll_ctl(descr->epfd, EPOLL_CTL_ADD, fd, &ev);
}
if (ret < 0)
return -errno;
list = lxc_list_new();
if (!list)
return ret_errno(ENOMEM);
handler->list = list;
lxc_list_add_elem(list, move_ptr(handler));;
lxc_list_add_tail(&descr->handlers, move_ptr(list));
return 0;
}
int lxc_mainloop_add_handler(struct lxc_async_descr *descr, int fd,
lxc_mainloop_callback_t callback, void *data)
int lxc_mainloop_add_handler_events(struct lxc_async_descr *descr, int fd,
int events,
lxc_mainloop_callback_t callback,
lxc_mainloop_cleanup_t cleanup,
void *data, const char *handler_name)
{
return lxc_mainloop_add_handler_events(descr, fd, EPOLLIN, callback,
data);
return __lxc_mainloop_add_handler_events(descr, fd, events,
callback, cleanup,
data, false, handler_name);
}
int lxc_mainloop_add_handler(struct lxc_async_descr *descr, int fd,
lxc_mainloop_callback_t callback,
lxc_mainloop_cleanup_t cleanup,
void *data, const char *handler_name)
{
return __lxc_mainloop_add_handler_events(descr, fd, EPOLLIN,
callback, cleanup,
data, false, handler_name);
}
int lxc_mainloop_add_oneshot_handler(struct lxc_async_descr *descr, int fd,
lxc_mainloop_callback_t callback,
lxc_mainloop_cleanup_t cleanup,
void *data, const char *handler_name)
{
return __lxc_mainloop_add_handler_events(descr, fd, EPOLLIN,
callback, cleanup,
data, true, handler_name);
}
int lxc_mainloop_del_handler(struct lxc_async_descr *descr, int fd)
{
struct mainloop_handler *handler;
struct lxc_list *iterator;
int ret;
struct lxc_list *iterator = NULL;
lxc_list_for_each(iterator, &descr->handlers) {
handler = iterator->elem;
struct mainloop_handler *handler = iterator->elem;
if (handler->fd == fd) {
/* found */
if (epoll_ctl(descr->epfd, EPOLL_CTL_DEL, fd, NULL))
return -errno;
if (handler->fd != fd)
continue;
if (descr->type == LXC_MAINLOOP_IO_URING)
ret = __io_uring_disarm(descr, handler);
else
ret = epoll_ctl(descr->epfd, EPOLL_CTL_DEL, fd, NULL);
if (ret < 0)
return syserror("Failed to disarm \"%s\"", handler->handler_name);
/*
* For io_uring the deletion happens at completion time. Either
* we get ENOENT if the request was oneshot and it had already
* triggered or we get ECANCELED for the original sqe and 0 for
* the cancellation request.
*/
if (descr->type == LXC_MAINLOOP_EPOLL) {
lxc_list_del(iterator);
free(iterator->elem);
free(iterator);
return 0;
}
return 0;
}
return ret_errno(EINVAL);
}
int lxc_mainloop_open(struct lxc_async_descr *descr)
static inline int __epoll_open(struct lxc_async_descr *descr)
{
*descr = (struct lxc_async_descr){
.epfd = -EBADF,
};
descr->epfd = epoll_create1(EPOLL_CLOEXEC);
if (descr->epfd < 0)
return -errno;
return syserror("Failed to create epoll instance");
descr->type = LXC_MAINLOOP_EPOLL;
TRACE("Created epoll instance");
return 0;
}
int lxc_mainloop_open(struct lxc_async_descr *descr)
{
int ret;
ret = __io_uring_open(descr);
if (ret == -ENOSYS)
ret = __epoll_open(descr);
if (ret < 0)
return syserror("Failed to create mainloop instance");
lxc_list_init(&descr->handlers);
return 0;
@ -148,5 +503,14 @@ void lxc_mainloop_close(struct lxc_async_descr *descr)
iterator = next;
}
close_prot_errno_disarm(descr->epfd);
if (descr->type == LXC_MAINLOOP_IO_URING) {
#ifdef HAVE_LIBURING
io_uring_queue_exit(descr->ring);
munmap(descr->ring, sizeof(struct io_uring));
#else
ERROR("Unsupported io_uring mainloop");
#endif
} else {
close_prot_errno_disarm(descr->epfd);
}
}

View File

@ -9,24 +9,55 @@
#include "list.h"
#include "memory_utils.h"
#ifdef HAVE_LIBURING
#include <liburing.h>
#endif
#define LXC_MAINLOOP_ERROR -1
#define LXC_MAINLOOP_CONTINUE 0
#define LXC_MAINLOOP_CLOSE 1
#define LXC_MAINLOOP_DISARM 2
typedef enum {
LXC_MAINLOOP_EPOLL = 1,
LXC_MAINLOOP_IO_URING = 2,
} async_descr_t;
struct lxc_async_descr {
int epfd;
async_descr_t type;
union {
int epfd;
#ifdef HAVE_LIBURING
struct io_uring *ring;
#endif
};
struct lxc_list handlers;
};
static inline int default_cleanup_handler(int fd, void *data)
{
return 0;
}
typedef int (*lxc_mainloop_callback_t)(int fd, uint32_t event, void *data,
struct lxc_async_descr *descr);
typedef int (*lxc_mainloop_cleanup_t)(int fd, void *data);
__hidden extern int lxc_mainloop(struct lxc_async_descr *descr, int timeout_ms);
__hidden extern int lxc_mainloop_add_handler_events(struct lxc_async_descr *descr, int fd, int events,
lxc_mainloop_callback_t callback, void *data);
lxc_mainloop_callback_t callback,
lxc_mainloop_cleanup_t cleanup,
void *data, const char *handler_name);
__hidden extern int lxc_mainloop_add_handler(struct lxc_async_descr *descr, int fd,
lxc_mainloop_callback_t callback, void *data);
lxc_mainloop_callback_t callback,
lxc_mainloop_cleanup_t cleanup,
void *data, const char *handler_name);
__hidden extern int lxc_mainloop_add_oneshot_handler(struct lxc_async_descr *descr, int fd,
lxc_mainloop_callback_t callback,
lxc_mainloop_cleanup_t cleanup,
void *data, const char *handler_name);
__hidden extern int lxc_mainloop_del_handler(struct lxc_async_descr *descr, int fd);

View File

@ -1358,6 +1358,23 @@ static void seccomp_notify_default_answer(int fd, struct seccomp_notif *req,
}
#endif
int seccomp_notify_cleanup_handler(int fd, void *data)
{
struct lxc_handler *hdlr = data;
struct lxc_conf *conf = hdlr->conf;
/* TODO: Make sure that we don't need to free any memory in here. */
if (fd == conf->seccomp.notifier.notify_fd)
fd = move_fd(conf->seccomp.notifier.notify_fd);
/*
* If this isn't the main notify_fd it means that someone registered a
* seccomp notify handler through the command socket (e.g. for attach)
* and so we won't touch the container's config.
*/
return 0;
}
int seccomp_notify_handler(int fd, uint32_t events, void *data,
struct lxc_async_descr *descr)
{
@ -1384,11 +1401,8 @@ int seccomp_notify_handler(int fd, uint32_t events, void *data,
char *cookie = conf->seccomp.notifier.cookie;
__u64 req_id;
if (events & EPOLLHUP) {
lxc_mainloop_del_handler(descr, fd);
close(fd);
return log_trace(0, "Removing seccomp notifier fd %d", fd);
}
if (events & EPOLLHUP)
return log_trace(LXC_MAINLOOP_DISARM, "Removing seccomp notifier fd %d", fd);
memset(req, 0, conf->seccomp.notifier.sizes.seccomp_notif);
ret = seccomp_notify_receive(fd, req);
@ -1604,9 +1618,11 @@ int lxc_seccomp_setup_proxy(struct lxc_seccomp *seccomp,
return -1;
}
ret = lxc_mainloop_add_handler(descr,
seccomp->notifier.notify_fd,
seccomp_notify_handler, handler);
ret = lxc_mainloop_add_handler(descr, seccomp->notifier.notify_fd,
seccomp_notify_handler,
seccomp_notify_cleanup_handler,
handler,
"seccomp_notify_handler");
if (ret < 0) {
ERROR("Failed to add seccomp notify handler for %d to mainloop",
notify_fd);

View File

@ -398,6 +398,9 @@ static int signal_handler(int fd, uint32_t events, void *data,
if (ret == 0 && info.si_pid == hdlr->pid)
hdlr->init_died = true;
TRACE("Received signal ssi_signo(%d) for ssi_pid(%d), si_signo(%d), si_pid(%d)",
siginfo.ssi_signo, siginfo.ssi_pid, info.si_signo, info.si_pid);
/* Try to figure out a reasonable exit status to report. */
if (hdlr->init_died) {
switch (info.si_code) {
@ -576,12 +579,11 @@ int lxc_set_state(const char *name, struct lxc_handler *handler,
int lxc_poll(const char *name, struct lxc_handler *handler)
{
int ret;
bool has_console = true;
struct lxc_terminal *console = &handler->conf->console;
struct lxc_async_descr descr, descr_console;
if (handler->conf->console.path &&
strequal(handler->conf->console.path, "none"))
has_console = false;
if (!wants_console(console))
console = NULL;
ret = lxc_mainloop_open(&descr);
if (ret < 0) {
@ -589,7 +591,7 @@ int lxc_poll(const char *name, struct lxc_handler *handler)
goto out_sigfd;
}
if (has_console) {
if (console) {
ret = lxc_mainloop_open(&descr_console);
if (ret < 0) {
ERROR("Failed to create console mainloop");
@ -597,7 +599,10 @@ int lxc_poll(const char *name, struct lxc_handler *handler)
}
}
ret = lxc_mainloop_add_handler(&descr, handler->sigfd, signal_handler, handler);
ret = lxc_mainloop_add_handler(&descr, handler->sigfd,
signal_handler,
default_cleanup_handler,
handler, "signal_handler");
if (ret < 0) {
ERROR("Failed to add signal handler for %d to mainloop", handler->sigfd);
goto out_mainloop_console;
@ -609,22 +614,12 @@ int lxc_poll(const char *name, struct lxc_handler *handler)
goto out_mainloop_console;
}
if (has_console) {
struct lxc_terminal *console = &handler->conf->console;
if (console) {
ret = lxc_terminal_mainloop_add(&descr, console);
if (ret < 0) {
ERROR("Failed to add console handlers to mainloop");
goto out_mainloop_console;
}
ret = lxc_terminal_mainloop_add(&descr_console, console);
if (ret < 0) {
ERROR("Failed to add console handlers to console mainloop");
goto out_mainloop_console;
}
handler->conf->console.descr = &descr;
}
ret = lxc_cmd_mainloop_add(name, &descr, handler);
@ -640,11 +635,14 @@ int lxc_poll(const char *name, struct lxc_handler *handler)
if (ret < 0 || !handler->init_died)
goto out_mainloop_console;
if (has_console)
ret = lxc_mainloop(&descr_console, 0);
if (console) {
ret = lxc_terminal_mainloop_add(&descr_console, console);
if (ret == 0)
ret = lxc_mainloop(&descr_console, 0);
}
out_mainloop_console:
if (has_console) {
if (console) {
lxc_mainloop_close(&descr_console);
TRACE("Closed console mainloop");
}

View File

@ -27,10 +27,6 @@
#include <sys/signalfd.h>
#endif
#ifdef HAVE_STRUCT_OPEN_HOW
#include <linux/openat2.h>
#endif
#if HAVE_SYS_PERSONALITY_H
#include <sys/personality.h>
#endif
@ -299,11 +295,7 @@ struct lxc_open_how {
#ifndef HAVE_OPENAT2
static inline int openat2(int dfd, const char *filename, struct lxc_open_how *how, size_t size)
{
/* When struct open_how is updated we should update lxc as well. */
#ifdef HAVE_STRUCT_OPEN_HOW
BUILD_BUG_ON(sizeof(struct lxc_open_how) != sizeof(struct open_how));
#endif
return syscall(__NR_openat2, dfd, filename, (struct open_how *)how, size);
return syscall(__NR_openat2, dfd, filename, how, size);
}
#endif /* HAVE_OPENAT2 */

View File

@ -328,48 +328,27 @@ static int lxc_terminal_write_log_file(struct lxc_terminal *terminal, char *buf,
return bytes_read;
}
int lxc_terminal_io_cb(int fd, uint32_t events, void *data,
struct lxc_async_descr *descr)
static int lxc_terminal_ptx_io(struct lxc_terminal *terminal)
{
struct lxc_terminal *terminal = data;
char buf[LXC_TERMINAL_BUFFER_SIZE];
int r, w, w_log, w_rbuf;
w = r = lxc_read_nointr(fd, buf, sizeof(buf));
if (r <= 0) {
INFO("Terminal client on fd %d has exited", fd);
lxc_mainloop_del_handler(descr, fd);
if (fd == terminal->ptx) {
terminal->ptx = -EBADF;
} else if (fd == terminal->peer) {
lxc_terminal_signal_fini(terminal);
terminal->peer = -EBADF;
} else {
ERROR("Handler received unexpected file descriptor");
}
close(fd);
return LXC_MAINLOOP_CLOSE;
}
if (fd == terminal->peer)
w = lxc_write_nointr(terminal->ptx, buf, r);
w = r = lxc_read_nointr(terminal->ptx, buf, sizeof(buf));
if (r <= 0)
return -1;
w_rbuf = w_log = 0;
if (fd == terminal->ptx) {
/* write to peer first */
if (terminal->peer >= 0)
w = lxc_write_nointr(terminal->peer, buf, r);
/* write to peer first */
if (terminal->peer >= 0)
w = lxc_write_nointr(terminal->peer, buf, r);
/* write to terminal ringbuffer */
if (terminal->buffer_size > 0)
w_rbuf = lxc_ringbuf_write(&terminal->ringbuf, buf, r);
/* write to terminal ringbuffer */
if (terminal->buffer_size > 0)
w_rbuf = lxc_ringbuf_write(&terminal->ringbuf, buf, r);
/* write to terminal log */
if (terminal->log_fd >= 0)
w_log = lxc_terminal_write_log_file(terminal, buf, r);
}
/* write to terminal log */
if (terminal->log_fd >= 0)
w_log = lxc_terminal_write_log_file(terminal, buf, r);
if (w != r)
WARN("Short write on terminal r:%d != w:%d", r, w);
@ -382,6 +361,52 @@ int lxc_terminal_io_cb(int fd, uint32_t events, void *data,
if (w_log < 0)
TRACE("Failed to write %d bytes to terminal log", r);
return 0;
}
static int lxc_terminal_peer_io(struct lxc_terminal *terminal)
{
char buf[LXC_TERMINAL_BUFFER_SIZE];
int r, w;
w = r = lxc_read_nointr(terminal->peer, buf, sizeof(buf));
if (r <= 0)
return -1;
w = lxc_write_nointr(terminal->ptx, buf, r);
if (w != r)
WARN("Short write on terminal r:%d != w:%d", r, w);
return 0;
}
static int lxc_terminal_ptx_io_handler(int fd, uint32_t events, void *data,
struct lxc_async_descr *descr)
{
struct lxc_terminal *terminal = data;
int ret;
ret = lxc_terminal_ptx_io(data);
if (ret < 0)
return log_info(LXC_MAINLOOP_CLOSE,
"Terminal client on fd %d has exited",
terminal->ptx);
return LXC_MAINLOOP_CONTINUE;
}
static int lxc_terminal_peer_io_handler(int fd, uint32_t events, void *data,
struct lxc_async_descr *descr)
{
struct lxc_terminal *terminal = data;
int ret;
ret = lxc_terminal_peer_io(data);
if (ret < 0)
return log_info(LXC_MAINLOOP_CLOSE,
"Terminal client on fd %d has exited",
terminal->peer);
return LXC_MAINLOOP_CONTINUE;
}
@ -391,7 +416,9 @@ static int lxc_terminal_mainloop_add_peer(struct lxc_terminal *terminal)
if (terminal->peer >= 0) {
ret = lxc_mainloop_add_handler(terminal->descr, terminal->peer,
lxc_terminal_io_cb, terminal);
lxc_terminal_peer_io_handler,
default_cleanup_handler,
terminal, "lxc_terminal_peer_io_handler");
if (ret < 0) {
WARN("Failed to add terminal peer handler to mainloop");
return -1;
@ -401,8 +428,12 @@ static int lxc_terminal_mainloop_add_peer(struct lxc_terminal *terminal)
if (!terminal->tty_state || terminal->tty_state->sigfd < 0)
return 0;
ret = lxc_mainloop_add_handler(terminal->descr, terminal->tty_state->sigfd,
lxc_terminal_signalfd_cb, terminal->tty_state);
ret = lxc_mainloop_add_handler(terminal->descr,
terminal->tty_state->sigfd,
lxc_terminal_signalfd_cb,
default_cleanup_handler,
terminal->tty_state,
"lxc_terminal_signalfd_cb");
if (ret < 0) {
WARN("Failed to add signal handler to mainloop");
return -1;
@ -422,10 +453,11 @@ int lxc_terminal_mainloop_add(struct lxc_async_descr *descr,
}
ret = lxc_mainloop_add_handler(descr, terminal->ptx,
lxc_terminal_io_cb, terminal);
lxc_terminal_ptx_io_handler,
default_cleanup_handler,
terminal, "lxc_terminal_ptx_io_handler");
if (ret < 0) {
ERROR("Failed to add handler for terminal ptx fd %d to "
"mainloop", terminal->ptx);
ERROR("Failed to add handler for terminal ptx fd %d to mainloop", terminal->ptx);
return -1;
}
@ -1221,7 +1253,9 @@ int lxc_console(struct lxc_container *c, int ttynum,
if (ts->sigfd != -1) {
ret = lxc_mainloop_add_handler(&descr, ts->sigfd,
lxc_terminal_signalfd_cb, ts);
lxc_terminal_signalfd_cb,
default_cleanup_handler,
ts, "lxc_terminal_signalfd_cb");
if (ret < 0) {
ERROR("Failed to add signal handler to mainloop");
goto close_mainloop;
@ -1229,14 +1263,18 @@ int lxc_console(struct lxc_container *c, int ttynum,
}
ret = lxc_mainloop_add_handler(&descr, ts->stdinfd,
lxc_terminal_stdin_cb, ts);
lxc_terminal_stdin_cb,
default_cleanup_handler,
ts, "lxc_terminal_stdin_cb");
if (ret < 0) {
ERROR("Failed to add stdin handler");
goto close_mainloop;
}
ret = lxc_mainloop_add_handler(&descr, ts->ptxfd,
lxc_terminal_ptx_cb, ts);
lxc_terminal_ptx_cb,
default_cleanup_handler,
ts, "lxc_terminal_ptx_cb");
if (ret < 0) {
ERROR("Failed to add ptx handler");
goto close_mainloop;

View File

@ -244,8 +244,6 @@ __hidden extern int lxc_terminal_signalfd_cb(int fd, uint32_t events, void *cbda
__hidden extern int lxc_terminal_write_ringbuffer(struct lxc_terminal *terminal);
__hidden extern int lxc_terminal_create_log_file(struct lxc_terminal *terminal);
__hidden extern int lxc_terminal_io_cb(int fd, uint32_t events, void *data,
struct lxc_async_descr *descr);
__hidden extern int lxc_make_controlling_terminal(int fd);
__hidden extern int lxc_terminal_prepare_login(int fd);

View File

@ -594,7 +594,10 @@ int main(int argc, char *argv[])
goto out;
}
ret = lxc_mainloop_add_handler(&descr, 0, stdin_handler, &in_char);
ret = lxc_mainloop_add_handler(&descr, 0,
stdin_handler,
default_cleanup_handler,
&in_char, "stdin_handler");
if (ret) {
fprintf(stderr, "Failed to add stdin handler\n");
ret = EXIT_FAILURE;

View File

@ -5,7 +5,8 @@ LDADD = ../lxc/liblxc.la \
@OPENSSL_LIBS@ \
@SECCOMP_LIBS@ \
@SELINUX_LIBS@ \
@DLOG_LIBS@
@DLOG_LIBS@ \
@LIBURING_LIBS@
LSM_SOURCES = ../lxc/lsm/lsm.c \
../lxc/lsm/lsm.h \