diff --git a/doc/lxc.container.conf.sgml.in b/doc/lxc.container.conf.sgml.in index ae04e3af3..b45639883 100644 --- a/doc/lxc.container.conf.sgml.in +++ b/doc/lxc.container.conf.sgml.in @@ -1571,6 +1571,53 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + + + + + + This is similar to , but must be + used together with and + affects only the container's cgroup path. This option is mutually + exclusive with . + Note that the final path the container attaches to may be + extended further by the + option. + + + + + + + + + + This is the monitor process counterpart to + . + + + + + + + + + + Specify an additional subdirectory where the cgroup namespace + will be created. With this option, the cgroup limits will be + applied to the outer path specified in + , which is not accessible + from within the container, making it possible to better enforce + limits for privileged containers in a way they cannot override + them. + This only works in conjunction with the + and + options and has otherwise + no effect. + + + diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c index d3595bcdf..cf0f5fbc5 100644 --- a/src/lxc/cgroups/cgfsng.c +++ b/src/lxc/cgroups/cgfsng.c @@ -725,6 +725,7 @@ static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char new->container_base_path = container_base_path; new->version = type; new->cgfd_con = -EBADF; + new->cgfd_limit = -EBADF; new->cgfd_mon = -EBADF; newentry = append_null_to_list((void ***)h); @@ -956,13 +957,15 @@ static int cgroup_tree_remove(struct hierarchy **hierarchies, struct hierarchy *h = hierarchies[i]; int ret; - if (!h->container_full_path) + if (!h->container_limit_path) continue; - ret = lxc_rm_rf(h->container_full_path); + ret = lxc_rm_rf(h->container_limit_path); if (ret < 0) - WARN("Failed to destroy \"%s\"", h->container_full_path); + WARN("Failed to destroy \"%s\"", h->container_limit_path); + if (h->container_limit_path != h->container_full_path) + free_disarm(h->container_limit_path); free_disarm(h->container_full_path); } @@ -1089,7 +1092,12 @@ __cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops, goto try_lxc_rm_rf; } - if (conf && conf->cgroup_meta.dir) + if (conf && conf->cgroup_meta.monitor_dir) + pivot_path = must_make_path(h->mountpoint, + h->container_base_path, + conf->cgroup_meta.monitor_dir, + CGROUP_PIVOT, NULL); + else if (conf && conf->cgroup_meta.dir) pivot_path = must_make_path(h->mountpoint, h->container_base_path, conf->cgroup_meta.dir, @@ -1147,7 +1155,8 @@ static int mkdir_eexist_on_last(const char *dir, mode_t mode) } static bool cgroup_tree_create(struct hierarchy *h, const char *cgroup_tree, - const char *cgroup_leaf, bool payload) + const char *cgroup_leaf, bool payload, + const char *cgroup_limit_dir) { __do_free char *path = NULL; int ret, ret_cpuset; @@ -1176,6 +1185,16 @@ static bool cgroup_tree_create(struct hierarchy *h, const char *cgroup_tree, if (h->cgfd_con < 0) return log_error_errno(false, errno, "Failed to open %s", path); h->container_full_path = move_ptr(path); + if (cgroup_limit_dir) { + path = must_make_path(h->mountpoint, h->container_base_path, cgroup_limit_dir, NULL); + h->cgfd_limit = lxc_open_dirfd(path); + if (h->cgfd_limit < 0) + return log_error_errno(false, errno, "Failed to open %s", path); + h->container_limit_path = move_ptr(path); + } else { + h->container_limit_path = h->container_full_path; + h->cgfd_limit = h->cgfd_con; + } } else { h->cgfd_mon = lxc_open_dirfd(path); if (h->cgfd_mon < 0) @@ -1188,11 +1207,15 @@ static bool cgroup_tree_create(struct hierarchy *h, const char *cgroup_tree, static void cgroup_tree_leaf_remove(struct hierarchy *h, bool payload) { - __do_free char *full_path = NULL; + __do_free char *full_path = NULL, *__limit_path = NULL; + char *limit_path = NULL; if (payload) { __lxc_unused __do_close int fd = move_fd(h->cgfd_con); full_path = move_ptr(h->container_full_path); + limit_path = move_ptr(h->container_limit_path); + if (limit_path != full_path) + __limit_path = limit_path; } else { __lxc_unused __do_close int fd = move_fd(h->cgfd_mon); full_path = move_ptr(h->monitor_full_path); @@ -1200,6 +1223,39 @@ static void cgroup_tree_leaf_remove(struct hierarchy *h, bool payload) if (full_path && rmdir(full_path)) SYSWARN("Failed to rmdir(\"%s\") cgroup", full_path); + if (limit_path && rmdir(limit_path)) + SYSWARN("Failed to rmdir(\"%s\") cgroup", limit_path); +} + +/* + * Check we have no lxc.cgroup.dir, and that lxc.cgroup.dir.limit_prefix is a + * proper prefix directory of lxc.cgroup.dir.payload. + * + * Returns the prefix length if it is set, otherwise zero on success. + */ +static bool check_cgroup_dir_config(struct lxc_conf *conf) +{ + const char *monitor_dir = conf->cgroup_meta.monitor_dir, + *container_dir = conf->cgroup_meta.container_dir, + *namespace_dir = conf->cgroup_meta.namespace_dir; + size_t prefix_len; + + /* none of the new options are set, all is fine */ + if (!monitor_dir && !container_dir && !namespace_dir) + return true; + + /* some are set, make sure lxc.cgroup.dir is not also set*/ + if (conf->cgroup_meta.dir) + return log_error_errno(false, EINVAL, + "lxc.cgroup.dir conflicts with lxc.cgroup.dir.payload/monitor"); + + /* make sure both monitor and payload are set */ + if (!monitor_dir || !container_dir) + return log_error_errno(false, EINVAL, + "lxc.cgroup.dir.payload and lxc.cgroup.dir.monitor must both be set"); + + /* namespace_dir may be empty */ + return true; } __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops, @@ -1210,7 +1266,7 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops, int idx = 0; int i; size_t len; - char *suffix; + char *suffix = NULL; struct lxc_conf *conf; if (!ops) @@ -1227,7 +1283,13 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops, conf = handler->conf; - if (conf->cgroup_meta.dir) { + if (!check_cgroup_dir_config(conf)) + return false; + + if (conf->cgroup_meta.monitor_dir) { + cgroup_tree = NULL; + monitor_cgroup = strdup(conf->cgroup_meta.monitor_dir); + } else if (conf->cgroup_meta.dir) { cgroup_tree = conf->cgroup_meta.dir; monitor_cgroup = must_concat(&len, conf->cgroup_meta.dir, "/", DEFAULT_MONITOR_CGROUP_PREFIX, @@ -1251,14 +1313,16 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops, if (!monitor_cgroup) return ret_set_errno(false, ENOMEM); - suffix = monitor_cgroup + len - CGROUP_CREATE_RETRY_LEN; - *suffix = '\0'; + if (!conf->cgroup_meta.monitor_dir) { + suffix = monitor_cgroup + len - CGROUP_CREATE_RETRY_LEN; + *suffix = '\0'; + } do { - if (idx) + if (idx && suffix) sprintf(suffix, "-%d", idx); for (i = 0; ops->hierarchies[i]; i++) { - if (cgroup_tree_create(ops->hierarchies[i], cgroup_tree, monitor_cgroup, false)) + if (cgroup_tree_create(ops->hierarchies[i], cgroup_tree, monitor_cgroup, false, NULL)) continue; ERROR("Failed to create cgroup \"%s\"", ops->hierarchies[i]->monitor_full_path ?: "(null)"); @@ -1268,9 +1332,9 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops, idx++; break; } - } while (ops->hierarchies[i] && idx > 0 && idx < 1000); + } while (ops->hierarchies[i] && idx > 0 && idx < 1000 && suffix); - if (idx == 1000) + if (idx == 1000 || (!suffix && idx != 0)) return ret_set_errno(false, ERANGE); ops->monitor_cgroup = move_ptr(monitor_cgroup); @@ -1284,12 +1348,14 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops, __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops, struct lxc_handler *handler) { - __do_free char *container_cgroup = NULL, *__cgroup_tree = NULL; + __do_free char *container_cgroup = NULL, + *__cgroup_tree = NULL, + *limiting_cgroup = NULL; const char *cgroup_tree; int idx = 0; int i; size_t len; - char *suffix; + char *suffix = NULL; struct lxc_conf *conf; if (!ops) @@ -1306,7 +1372,20 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops, conf = handler->conf; - if (conf->cgroup_meta.dir) { + if (!check_cgroup_dir_config(conf)) + return false; + + if (conf->cgroup_meta.container_dir) { + cgroup_tree = NULL; + + limiting_cgroup = strdup(conf->cgroup_meta.container_dir); + if (!limiting_cgroup) + return ret_set_errno(false, ENOMEM); + + container_cgroup = must_make_path(limiting_cgroup, + conf->cgroup_meta.namespace_dir, + NULL); + } else if (conf->cgroup_meta.dir) { cgroup_tree = conf->cgroup_meta.dir; container_cgroup = must_concat(&len, cgroup_tree, "/", DEFAULT_PAYLOAD_CGROUP_PREFIX, @@ -1330,14 +1409,18 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops, if (!container_cgroup) return ret_set_errno(false, ENOMEM); - suffix = container_cgroup + len - CGROUP_CREATE_RETRY_LEN; - *suffix = '\0'; + if (!conf->cgroup_meta.container_dir) { + suffix = container_cgroup + len - CGROUP_CREATE_RETRY_LEN; + *suffix = '\0'; + } do { - if (idx) + if (idx && suffix) sprintf(suffix, "-%d", idx); for (i = 0; ops->hierarchies[i]; i++) { - if (cgroup_tree_create(ops->hierarchies[i], cgroup_tree, container_cgroup, true)) + if (cgroup_tree_create(ops->hierarchies[i], cgroup_tree, + container_cgroup, true, + limiting_cgroup)) continue; ERROR("Failed to create cgroup \"%s\"", ops->hierarchies[i]->container_full_path ?: "(null)"); @@ -1347,9 +1430,9 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops, idx++; break; } - } while (ops->hierarchies[i] && idx > 0 && idx < 1000); + } while (ops->hierarchies[i] && idx > 0 && idx < 1000 && suffix); - if (idx == 1000) + if (idx == 1000 || (!suffix && idx != 0)) return ret_set_errno(false, ERANGE); ops->container_cgroup = move_ptr(container_cgroup); @@ -2039,8 +2122,8 @@ __cgfsng_ops static int cgfsng_unfreeze(struct cgroup_ops *ops, int timeout) return cg_unified_unfreeze(ops, timeout); } -__cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops, - const char *controller) +static const char *cgfsng_get_cgroup_do(struct cgroup_ops *ops, + const char *controller, bool limiting) { struct hierarchy *h; @@ -2049,11 +2132,28 @@ __cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops, return log_warn_errno(NULL, ENOENT, "Failed to find hierarchy for controller \"%s\"", controller ? controller : "(null)"); + if (limiting) + return h->container_limit_path + ? h->container_limit_path + strlen(h->mountpoint) + : NULL; + return h->container_full_path ? h->container_full_path + strlen(h->mountpoint) : NULL; } +__cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops, + const char *controller) +{ + return cgfsng_get_cgroup_do(ops, controller, false); +} + +__cgfsng_ops static const char *cgfsng_get_limiting_cgroup(struct cgroup_ops *ops, + const char *controller) +{ + return cgfsng_get_cgroup_do(ops, controller, true); +} + /* Given a cgroup path returned from lxc_cmd_get_cgroup_path, build a full path, * which must be freed by the caller. */ @@ -2382,7 +2482,7 @@ __cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename, if (p) *p = '\0'; - path = lxc_cmd_get_cgroup_path(name, lxcpath, controller); + path = lxc_cmd_get_limiting_cgroup_path(name, lxcpath, controller); /* not running */ if (!path) return -1; @@ -2547,7 +2647,7 @@ __cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops, return 0; } - path = lxc_cmd_get_cgroup_path(name, lxcpath, controller); + path = lxc_cmd_get_limiting_cgroup_path(name, lxcpath, controller); /* not running */ if (!path) return -1; @@ -2657,7 +2757,7 @@ static int convert_devpath(const char *invalue, char *dest) * we created the cgroups. */ static int cg_legacy_set_data(struct cgroup_ops *ops, const char *filename, - const char *value) + const char *value, bool is_cpuset) { __do_free char *controller = NULL; char *p; @@ -2683,7 +2783,12 @@ static int cg_legacy_set_data(struct cgroup_ops *ops, const char *filename, if (!h) return log_error_errno(-ENOENT, ENOENT, "Failed to setup limits for the \"%s\" controller. The controller seems to be unused by \"cgfsng\" cgroup driver or not enabled on the cgroup hierarchy", controller); - return lxc_write_openat(h->container_full_path, filename, value, strlen(value)); + if (is_cpuset) { + int ret = lxc_write_openat(h->container_full_path, filename, value, strlen(value)); + if (ret) + return ret; + } + return lxc_write_openat(h->container_limit_path, filename, value, strlen(value)); } __cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops, @@ -2717,7 +2822,7 @@ __cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops, cg = iterator->elem; if (do_devices == !strncmp("devices", cg->subsystem, 7)) { - if (cg_legacy_set_data(ops, cg->subsystem, cg->value)) { + if (cg_legacy_set_data(ops, cg->subsystem, cg->value, strncmp("cpuset", cg->subsystem, 6) == 0)) { if (do_devices && (errno == EACCES || errno == EPERM)) { SYSWARN("Failed to set \"%s\" to \"%s\"", cg->subsystem, cg->value); continue; @@ -2802,7 +2907,7 @@ __cgfsng_ops static bool cgfsng_setup_limits(struct cgroup_ops *ops, ret = bpf_device_cgroup_prepare(ops, conf, cg->subsystem, cg->value); } else { - ret = lxc_write_openat(h->container_full_path, + ret = lxc_write_openat(h->container_limit_path, cg->subsystem, cg->value, strlen(cg->value)); if (ret < 0) @@ -2878,7 +2983,7 @@ __cgfsng_ops bool cgfsng_devices_activate(struct cgroup_ops *ops, return log_error_errno(false, ENOMEM, "Failed to finalize bpf program"); ret = bpf_program_cgroup_attach(devices, BPF_CGROUP_DEVICE, - unified->container_full_path, + unified->container_limit_path, BPF_F_ALLOW_MULTI); if (ret) return log_error_errno(false, ENOMEM, "Failed to attach bpf program"); @@ -3323,6 +3428,7 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf) cgfsng_ops->chown = cgfsng_chown; cgfsng_ops->mount = cgfsng_mount; cgfsng_ops->devices_activate = cgfsng_devices_activate; + cgfsng_ops->get_limiting_cgroup = cgfsng_get_limiting_cgroup; return move_ptr(cgfsng_ops); } diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h index 1e08a017a..c5bf7941a 100644 --- a/src/lxc/cgroups/cgroup.h +++ b/src/lxc/cgroups/cgroup.h @@ -54,7 +54,11 @@ typedef enum { * init's cgroup (if root). * * @container_full_path - * - The full path to the containers cgroup. + * - The full path to the container's cgroup. + * + * @container_limit_path + * - The full path to the container's limiting cgroup. May simply point to + * container_full_path. * * @monitor_full_path * - The full path to the monitor's cgroup. @@ -77,15 +81,18 @@ struct hierarchy { char *mountpoint; char *container_base_path; char *container_full_path; + char *container_limit_path; char *monitor_full_path; int version; /* cgroup2 only */ unsigned int bpf_device_controller:1; - /* monitor cgroup fd */ - int cgfd_con; /* container cgroup fd */ + int cgfd_con; + /* limiting cgroup fd (may be equal to cgfd_con if not separated) */ + int cgfd_limit; + /* monitor cgroup fd */ int cgfd_mon; }; @@ -169,6 +176,7 @@ struct cgroup_ops { bool (*monitor_delegate_controllers)(struct cgroup_ops *ops); bool (*payload_delegate_controllers)(struct cgroup_ops *ops); void (*payload_finalize)(struct cgroup_ops *ops); + const char *(*get_limiting_cgroup)(struct cgroup_ops *ops, const char *controller); }; extern struct cgroup_ops *cgroup_init(struct lxc_conf *conf); diff --git a/src/lxc/commands.c b/src/lxc/commands.c index 991bca290..5ac3f5d9c 100644 --- a/src/lxc/commands.c +++ b/src/lxc/commands.c @@ -84,6 +84,8 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd) [LXC_CMD_UNFREEZE] = "unfreeze", [LXC_CMD_GET_CGROUP2_FD] = "get_cgroup2_fd", [LXC_CMD_GET_INIT_PIDFD] = "get_init_pidfd", + [LXC_CMD_GET_LIMITING_CGROUP] = "get_limiting_cgroup", + [LXC_CMD_GET_LIMITING_CGROUP2_FD] = "get_limiting_cgroup2_fd", }; if (cmd >= LXC_CMD_MAX) @@ -142,7 +144,9 @@ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd) rsp->data = rspdata; } - if (cmd->req.cmd == LXC_CMD_GET_CGROUP2_FD) { + if (cmd->req.cmd == LXC_CMD_GET_CGROUP2_FD || + cmd->req.cmd == LXC_CMD_GET_LIMITING_CGROUP2_FD) + { int cgroup2_fd = move_fd(fd_rsp); rsp->data = INT_TO_PTR(cgroup2_fd); } @@ -483,25 +487,14 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req, return 0; } -/* - * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a - * particular subsystem. This is the cgroup path relative to the root - * of the cgroup filesystem. - * - * @name : name of container to connect to - * @lxcpath : the lxcpath in which the container is running - * @subsystem : the subsystem being asked about - * - * Returns the path on success, NULL on failure. The caller must free() the - * returned path. - */ -char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, - const char *subsystem) +static char *lxc_cmd_get_cgroup_path_do(const char *name, const char *lxcpath, + const char *subsystem, + lxc_cmd_t command) { int ret, stopped; struct lxc_cmd_rr cmd = { .req = { - .cmd = LXC_CMD_GET_CGROUP, + .cmd = command, .data = subsystem, .datalen = 0, }, @@ -525,24 +518,72 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, return cmd.rsp.data; } -static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req, - struct lxc_handler *handler, - struct lxc_epoll_descr *descr) +/* + * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a + * particular subsystem. This is the cgroup path relative to the root + * of the cgroup filesystem. + * + * @name : name of container to connect to + * @lxcpath : the lxcpath in which the container is running + * @subsystem : the subsystem being asked about + * + * Returns the path on success, NULL on failure. The caller must free() the + * returned path. + */ +char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, + const char *subsystem) +{ + return lxc_cmd_get_cgroup_path_do(name, lxcpath, subsystem, + LXC_CMD_GET_CGROUP); +} + +/* + * lxc_cmd_get_limiting_cgroup_path: Calculate a container's limiting cgroup + * path for a particular subsystem. This is the cgroup path relative to the + * root of the cgroup filesystem. This may be the same as the path returned by + * lxc_cmd_get_cgroup_path if the container doesn't have a limiting path prefix + * set. + * + * @name : name of container to connect to + * @lxcpath : the lxcpath in which the container is running + * @subsystem : the subsystem being asked about + * + * Returns the path on success, NULL on failure. The caller must free() the + * returned path. + */ +char *lxc_cmd_get_limiting_cgroup_path(const char *name, const char *lxcpath, + const char *subsystem) +{ + return lxc_cmd_get_cgroup_path_do(name, lxcpath, subsystem, + LXC_CMD_GET_LIMITING_CGROUP); +} + +static int lxc_cmd_get_cgroup_callback_do(int fd, struct lxc_cmd_req *req, + struct lxc_handler *handler, + struct lxc_epoll_descr *descr, + bool limiting_cgroup) { int ret; const char *path; + const void *reqdata; struct lxc_cmd_rsp rsp; struct cgroup_ops *cgroup_ops = handler->cgroup_ops; + const char *(*get_fn)(struct cgroup_ops *ops, const char *controller); if (req->datalen > 0) { ret = validate_string_request(fd, req); if (ret != 0) return ret; - - path = cgroup_ops->get_cgroup(cgroup_ops, req->data); + reqdata = req->data; } else { - path = cgroup_ops->get_cgroup(cgroup_ops, NULL); + reqdata = NULL; } + + get_fn = (limiting_cgroup ? cgroup_ops->get_cgroup + : cgroup_ops->get_limiting_cgroup); + + path = get_fn(cgroup_ops, reqdata); + if (!path) return -1; @@ -557,6 +598,20 @@ static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req, return 0; } +static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req, + struct lxc_handler *handler, + struct lxc_epoll_descr *descr) +{ + return lxc_cmd_get_cgroup_callback_do(fd, req, handler, descr, false); +} + +static int lxc_cmd_get_limiting_cgroup_callback(int fd, struct lxc_cmd_req *req, + struct lxc_handler *handler, + struct lxc_epoll_descr *descr) +{ + return lxc_cmd_get_cgroup_callback_do(fd, req, handler, descr, true); +} + /* * lxc_cmd_get_config_item: Get config item the running container * @@ -1366,28 +1421,48 @@ int lxc_cmd_get_cgroup2_fd(const char *name, const char *lxcpath) return PTR_TO_INT(cmd.rsp.data); } -static int lxc_cmd_get_cgroup2_fd_callback(int fd, struct lxc_cmd_req *req, - struct lxc_handler *handler, - struct lxc_epoll_descr *descr) +static int lxc_cmd_get_cgroup2_fd_callback_do(int fd, struct lxc_cmd_req *req, + struct lxc_handler *handler, + struct lxc_epoll_descr *descr, + bool limiting_cgroup) { struct lxc_cmd_rsp rsp = { .ret = -EINVAL, }; struct cgroup_ops *ops = handler->cgroup_ops; - int ret; + int ret, send_fd; if (!pure_unified_layout(ops) || !ops->unified) return lxc_cmd_rsp_send(fd, &rsp); + send_fd = limiting_cgroup ? ops->unified->cgfd_limit + : ops->unified->cgfd_con; + rsp.ret = 0; - ret = lxc_abstract_unix_send_fds(fd, &ops->unified->cgfd_con, 1, &rsp, - sizeof(rsp)); + ret = lxc_abstract_unix_send_fds(fd, &send_fd, 1, &rsp, sizeof(rsp)); if (ret < 0) return log_error(LXC_CMD_REAP_CLIENT_FD, "Failed to send cgroup2 fd"); return 0; } +static int lxc_cmd_get_cgroup2_fd_callback(int fd, struct lxc_cmd_req *req, + struct lxc_handler *handler, + struct lxc_epoll_descr *descr) +{ + return lxc_cmd_get_cgroup2_fd_callback_do(fd, req, handler, descr, + false); +} + +static int lxc_cmd_get_limiting_cgroup2_fd_callback(int fd, + struct lxc_cmd_req *req, + struct lxc_handler *handler, + struct lxc_epoll_descr *descr) +{ + return lxc_cmd_get_cgroup2_fd_callback_do(fd, req, handler, descr, + true); +} + static int lxc_cmd_process(int fd, struct lxc_cmd_req *req, struct lxc_handler *handler, struct lxc_epoll_descr *descr) @@ -1415,6 +1490,8 @@ static int lxc_cmd_process(int fd, struct lxc_cmd_req *req, [LXC_CMD_UNFREEZE] = lxc_cmd_unfreeze_callback, [LXC_CMD_GET_CGROUP2_FD] = lxc_cmd_get_cgroup2_fd_callback, [LXC_CMD_GET_INIT_PIDFD] = lxc_cmd_get_init_pidfd_callback, + [LXC_CMD_GET_LIMITING_CGROUP] = lxc_cmd_get_limiting_cgroup_callback, + [LXC_CMD_GET_LIMITING_CGROUP2_FD] = lxc_cmd_get_limiting_cgroup2_fd_callback, }; if (req->cmd >= LXC_CMD_MAX) diff --git a/src/lxc/commands.h b/src/lxc/commands.h index 9e5248424..878998832 100644 --- a/src/lxc/commands.h +++ b/src/lxc/commands.h @@ -38,6 +38,8 @@ typedef enum { LXC_CMD_UNFREEZE, LXC_CMD_GET_CGROUP2_FD, LXC_CMD_GET_INIT_PIDFD, + LXC_CMD_GET_LIMITING_CGROUP, + LXC_CMD_GET_LIMITING_CGROUP2_FD, LXC_CMD_MAX, } lxc_cmd_t; @@ -129,5 +131,9 @@ extern int lxc_cmd_add_bpf_device_cgroup(const char *name, const char *lxcpath, extern int lxc_cmd_freeze(const char *name, const char *lxcpath, int timeout); extern int lxc_cmd_unfreeze(const char *name, const char *lxcpath, int timeout); extern int lxc_cmd_get_cgroup2_fd(const char *name, const char *lxcpath); +extern char *lxc_cmd_get_limiting_cgroup_path(const char *name, + const char *lxcpath, + const char *subsystem); +extern int lxc_cmd_get_limiting_cgroup2_fd(const char *name, const char *lxcpath); #endif /* __commands_h */ diff --git a/src/lxc/conf.c b/src/lxc/conf.c index 2f6be9f26..8d480b049 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -3832,6 +3832,9 @@ void lxc_conf_free(struct lxc_conf *conf) lxc_clear_apparmor_raw(conf); lxc_clear_namespace(conf); free(conf->cgroup_meta.dir); + free(conf->cgroup_meta.monitor_dir); + free(conf->cgroup_meta.container_dir); + free(conf->cgroup_meta.namespace_dir); free(conf->cgroup_meta.controllers); free(conf->shmount.path_host); free(conf->shmount.path_cont); diff --git a/src/lxc/conf.h b/src/lxc/conf.h index 64885c35e..3ff226b72 100644 --- a/src/lxc/conf.h +++ b/src/lxc/conf.h @@ -60,6 +60,9 @@ struct lxc_cgroup { struct /* meta */ { char *controllers; char *dir; + char *monitor_dir; + char *container_dir; + char *namespace_dir; bool relative; }; }; diff --git a/src/lxc/confile.c b/src/lxc/confile.c index 0ca577fa3..59553f23e 100644 --- a/src/lxc/confile.c +++ b/src/lxc/confile.c @@ -71,6 +71,9 @@ lxc_config_define(cap_keep); lxc_config_define(cgroup_controller); lxc_config_define(cgroup2_controller); lxc_config_define(cgroup_dir); +lxc_config_define(cgroup_monitor_dir); +lxc_config_define(cgroup_container_dir); +lxc_config_define(cgroup_container_inner_dir); lxc_config_define(cgroup_relative); lxc_config_define(console_buffer_size); lxc_config_define(console_logfile); @@ -170,6 +173,9 @@ static struct lxc_config_t config_jump_table[] = { { "lxc.cap.drop", set_config_cap_drop, get_config_cap_drop, clr_config_cap_drop, }, { "lxc.cap.keep", set_config_cap_keep, get_config_cap_keep, clr_config_cap_keep, }, { "lxc.cgroup2", set_config_cgroup2_controller, get_config_cgroup2_controller, clr_config_cgroup2_controller, }, + { "lxc.cgroup.dir.monitor", set_config_cgroup_monitor_dir, get_config_cgroup_monitor_dir, clr_config_cgroup_monitor_dir, }, + { "lxc.cgroup.dir.container", set_config_cgroup_container_dir, get_config_cgroup_container_dir, clr_config_cgroup_container_dir, }, + { "lxc.cgroup.dir.container.inner",set_config_cgroup_container_inner_dir, get_config_cgroup_container_inner_dir, clr_config_cgroup_container_inner_dir,}, { "lxc.cgroup.dir", set_config_cgroup_dir, get_config_cgroup_dir, clr_config_cgroup_dir, }, { "lxc.cgroup.relative", set_config_cgroup_relative, get_config_cgroup_relative, clr_config_cgroup_relative, }, { "lxc.cgroup", set_config_cgroup_controller, get_config_cgroup_controller, clr_config_cgroup_controller, }, @@ -1721,6 +1727,48 @@ static int set_config_cgroup_dir(const char *key, const char *value, return set_config_string_item(&lxc_conf->cgroup_meta.dir, value); } +static int set_config_cgroup_monitor_dir(const char *key, const char *value, + struct lxc_conf *lxc_conf, void *data) +{ + if (lxc_config_value_empty(value)) + return clr_config_cgroup_monitor_dir(key, lxc_conf, NULL); + + return set_config_string_item(&lxc_conf->cgroup_meta.monitor_dir, + value); +} + +static int set_config_cgroup_container_dir(const char *key, const char *value, + struct lxc_conf *lxc_conf, + void *data) +{ + if (lxc_config_value_empty(value)) + return clr_config_cgroup_container_dir(key, lxc_conf, NULL); + + return set_config_string_item(&lxc_conf->cgroup_meta.container_dir, + value); +} + +static int set_config_cgroup_container_inner_dir(const char *key, + const char *value, + struct lxc_conf *lxc_conf, + void *data) +{ + if (lxc_config_value_empty(value)) + return clr_config_cgroup_container_inner_dir(key, lxc_conf, + NULL); + + if (strchr(value, '/') || + strcmp(value, ".") == 0 || + strcmp(value, "..") == 0) + { + ERROR("lxc.cgroup.dir.container.inner must be a single directory name"); + return -1; + } + + return set_config_string_item(&lxc_conf->cgroup_meta.namespace_dir, + value); +} + static int set_config_cgroup_relative(const char *key, const char *value, struct lxc_conf *lxc_conf, void *data) { @@ -3644,6 +3692,58 @@ static int get_config_cgroup_dir(const char *key, char *retv, int inlen, return fulllen; } +static int get_config_cgroup_monitor_dir(const char *key, char *retv, int inlen, + struct lxc_conf *lxc_conf, void *data) +{ + int len; + int fulllen = 0; + + if (!retv) + inlen = 0; + else + memset(retv, 0, inlen); + + strprint(retv, inlen, "%s", lxc_conf->cgroup_meta.monitor_dir); + + return fulllen; +} + +static int get_config_cgroup_container_dir(const char *key, char *retv, + int inlen, + struct lxc_conf *lxc_conf, + void *data) +{ + int len; + int fulllen = 0; + + if (!retv) + inlen = 0; + else + memset(retv, 0, inlen); + + strprint(retv, inlen, "%s", lxc_conf->cgroup_meta.container_dir); + + return fulllen; +} + +static int get_config_cgroup_container_inner_dir(const char *key, char *retv, + int inlen, + struct lxc_conf *lxc_conf, + void *data) +{ + int len; + int fulllen = 0; + + if (!retv) + inlen = 0; + else + memset(retv, 0, inlen); + + strprint(retv, inlen, "%s", lxc_conf->cgroup_meta.namespace_dir); + + return fulllen; +} + static inline int get_config_cgroup_relative(const char *key, char *retv, int inlen, struct lxc_conf *lxc_conf, void *data) @@ -4458,6 +4558,30 @@ static int clr_config_cgroup_dir(const char *key, struct lxc_conf *lxc_conf, return 0; } +static int clr_config_cgroup_monitor_dir(const char *key, + struct lxc_conf *lxc_conf, + void *data) +{ + free_disarm(lxc_conf->cgroup_meta.monitor_dir); + return 0; +} + +static int clr_config_cgroup_container_dir(const char *key, + struct lxc_conf *lxc_conf, + void *data) +{ + free_disarm(lxc_conf->cgroup_meta.container_dir); + return 0; +} + +static int clr_config_cgroup_container_inner_dir(const char *key, + struct lxc_conf *lxc_conf, + void *data) +{ + free_disarm(lxc_conf->cgroup_meta.namespace_dir); + return 0; +} + static inline int clr_config_cgroup_relative(const char *key, struct lxc_conf *lxc_conf, void *data) diff --git a/src/lxc/criu.c b/src/lxc/criu.c index 1a909bb6c..2485accc0 100644 --- a/src/lxc/criu.c +++ b/src/lxc/criu.c @@ -303,7 +303,7 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf, * the handler the restore task created. */ if (!strcmp(opts->action, "dump") || !strcmp(opts->action, "pre-dump")) { - path = lxc_cmd_get_cgroup_path(opts->c->name, opts->c->config_path, controllers[0]); + path = lxc_cmd_get_limiting_cgroup_path(opts->c->name, opts->c->config_path, controllers[0]); if (!path) { ERROR("failed to get cgroup path for %s", controllers[0]); goto err; @@ -311,7 +311,7 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf, } else { const char *p; - p = cgroup_ops->get_cgroup(cgroup_ops, controllers[0]); + p = cgroup_ops->get_limiting_cgroup(cgroup_ops, controllers[0]); if (!p) { ERROR("failed to get cgroup path for %s", controllers[0]); goto err; @@ -406,9 +406,9 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf, DECLARE_ARG("-t"); DECLARE_ARG(pid); - freezer_relative = lxc_cmd_get_cgroup_path(opts->c->name, - opts->c->config_path, - "freezer"); + freezer_relative = lxc_cmd_get_limiting_cgroup_path(opts->c->name, + opts->c->config_path, + "freezer"); if (!freezer_relative) { ERROR("failed getting freezer path"); goto err;