diff --git a/doc/lxc.container.conf.sgml.in b/doc/lxc.container.conf.sgml.in
index ae04e3af3..b45639883 100644
--- a/doc/lxc.container.conf.sgml.in
+++ b/doc/lxc.container.conf.sgml.in
@@ -1571,6 +1571,53 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+
+
+
+
+
+ This is similar to , but must be
+ used together with and
+ affects only the container's cgroup path. This option is mutually
+ exclusive with .
+ Note that the final path the container attaches to may be
+ extended further by the
+ option.
+
+
+
+
+
+
+
+
+
+ This is the monitor process counterpart to
+ .
+
+
+
+
+
+
+
+
+
+ Specify an additional subdirectory where the cgroup namespace
+ will be created. With this option, the cgroup limits will be
+ applied to the outer path specified in
+ , which is not accessible
+ from within the container, making it possible to better enforce
+ limits for privileged containers in a way they cannot override
+ them.
+ This only works in conjunction with the
+ and
+ options and has otherwise
+ no effect.
+
+
+
diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
index d3595bcdf..cf0f5fbc5 100644
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -725,6 +725,7 @@ static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char
new->container_base_path = container_base_path;
new->version = type;
new->cgfd_con = -EBADF;
+ new->cgfd_limit = -EBADF;
new->cgfd_mon = -EBADF;
newentry = append_null_to_list((void ***)h);
@@ -956,13 +957,15 @@ static int cgroup_tree_remove(struct hierarchy **hierarchies,
struct hierarchy *h = hierarchies[i];
int ret;
- if (!h->container_full_path)
+ if (!h->container_limit_path)
continue;
- ret = lxc_rm_rf(h->container_full_path);
+ ret = lxc_rm_rf(h->container_limit_path);
if (ret < 0)
- WARN("Failed to destroy \"%s\"", h->container_full_path);
+ WARN("Failed to destroy \"%s\"", h->container_limit_path);
+ if (h->container_limit_path != h->container_full_path)
+ free_disarm(h->container_limit_path);
free_disarm(h->container_full_path);
}
@@ -1089,7 +1092,12 @@ __cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops,
goto try_lxc_rm_rf;
}
- if (conf && conf->cgroup_meta.dir)
+ if (conf && conf->cgroup_meta.monitor_dir)
+ pivot_path = must_make_path(h->mountpoint,
+ h->container_base_path,
+ conf->cgroup_meta.monitor_dir,
+ CGROUP_PIVOT, NULL);
+ else if (conf && conf->cgroup_meta.dir)
pivot_path = must_make_path(h->mountpoint,
h->container_base_path,
conf->cgroup_meta.dir,
@@ -1147,7 +1155,8 @@ static int mkdir_eexist_on_last(const char *dir, mode_t mode)
}
static bool cgroup_tree_create(struct hierarchy *h, const char *cgroup_tree,
- const char *cgroup_leaf, bool payload)
+ const char *cgroup_leaf, bool payload,
+ const char *cgroup_limit_dir)
{
__do_free char *path = NULL;
int ret, ret_cpuset;
@@ -1176,6 +1185,16 @@ static bool cgroup_tree_create(struct hierarchy *h, const char *cgroup_tree,
if (h->cgfd_con < 0)
return log_error_errno(false, errno, "Failed to open %s", path);
h->container_full_path = move_ptr(path);
+ if (cgroup_limit_dir) {
+ path = must_make_path(h->mountpoint, h->container_base_path, cgroup_limit_dir, NULL);
+ h->cgfd_limit = lxc_open_dirfd(path);
+ if (h->cgfd_limit < 0)
+ return log_error_errno(false, errno, "Failed to open %s", path);
+ h->container_limit_path = move_ptr(path);
+ } else {
+ h->container_limit_path = h->container_full_path;
+ h->cgfd_limit = h->cgfd_con;
+ }
} else {
h->cgfd_mon = lxc_open_dirfd(path);
if (h->cgfd_mon < 0)
@@ -1188,11 +1207,15 @@ static bool cgroup_tree_create(struct hierarchy *h, const char *cgroup_tree,
static void cgroup_tree_leaf_remove(struct hierarchy *h, bool payload)
{
- __do_free char *full_path = NULL;
+ __do_free char *full_path = NULL, *__limit_path = NULL;
+ char *limit_path = NULL;
if (payload) {
__lxc_unused __do_close int fd = move_fd(h->cgfd_con);
full_path = move_ptr(h->container_full_path);
+ limit_path = move_ptr(h->container_limit_path);
+ if (limit_path != full_path)
+ __limit_path = limit_path;
} else {
__lxc_unused __do_close int fd = move_fd(h->cgfd_mon);
full_path = move_ptr(h->monitor_full_path);
@@ -1200,6 +1223,39 @@ static void cgroup_tree_leaf_remove(struct hierarchy *h, bool payload)
if (full_path && rmdir(full_path))
SYSWARN("Failed to rmdir(\"%s\") cgroup", full_path);
+ if (limit_path && rmdir(limit_path))
+ SYSWARN("Failed to rmdir(\"%s\") cgroup", limit_path);
+}
+
+/*
+ * Check we have no lxc.cgroup.dir, and that lxc.cgroup.dir.limit_prefix is a
+ * proper prefix directory of lxc.cgroup.dir.payload.
+ *
+ * Returns the prefix length if it is set, otherwise zero on success.
+ */
+static bool check_cgroup_dir_config(struct lxc_conf *conf)
+{
+ const char *monitor_dir = conf->cgroup_meta.monitor_dir,
+ *container_dir = conf->cgroup_meta.container_dir,
+ *namespace_dir = conf->cgroup_meta.namespace_dir;
+ size_t prefix_len;
+
+ /* none of the new options are set, all is fine */
+ if (!monitor_dir && !container_dir && !namespace_dir)
+ return true;
+
+ /* some are set, make sure lxc.cgroup.dir is not also set*/
+ if (conf->cgroup_meta.dir)
+ return log_error_errno(false, EINVAL,
+ "lxc.cgroup.dir conflicts with lxc.cgroup.dir.payload/monitor");
+
+ /* make sure both monitor and payload are set */
+ if (!monitor_dir || !container_dir)
+ return log_error_errno(false, EINVAL,
+ "lxc.cgroup.dir.payload and lxc.cgroup.dir.monitor must both be set");
+
+ /* namespace_dir may be empty */
+ return true;
}
__cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
@@ -1210,7 +1266,7 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
int idx = 0;
int i;
size_t len;
- char *suffix;
+ char *suffix = NULL;
struct lxc_conf *conf;
if (!ops)
@@ -1227,7 +1283,13 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
conf = handler->conf;
- if (conf->cgroup_meta.dir) {
+ if (!check_cgroup_dir_config(conf))
+ return false;
+
+ if (conf->cgroup_meta.monitor_dir) {
+ cgroup_tree = NULL;
+ monitor_cgroup = strdup(conf->cgroup_meta.monitor_dir);
+ } else if (conf->cgroup_meta.dir) {
cgroup_tree = conf->cgroup_meta.dir;
monitor_cgroup = must_concat(&len, conf->cgroup_meta.dir, "/",
DEFAULT_MONITOR_CGROUP_PREFIX,
@@ -1251,14 +1313,16 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
if (!monitor_cgroup)
return ret_set_errno(false, ENOMEM);
- suffix = monitor_cgroup + len - CGROUP_CREATE_RETRY_LEN;
- *suffix = '\0';
+ if (!conf->cgroup_meta.monitor_dir) {
+ suffix = monitor_cgroup + len - CGROUP_CREATE_RETRY_LEN;
+ *suffix = '\0';
+ }
do {
- if (idx)
+ if (idx && suffix)
sprintf(suffix, "-%d", idx);
for (i = 0; ops->hierarchies[i]; i++) {
- if (cgroup_tree_create(ops->hierarchies[i], cgroup_tree, monitor_cgroup, false))
+ if (cgroup_tree_create(ops->hierarchies[i], cgroup_tree, monitor_cgroup, false, NULL))
continue;
ERROR("Failed to create cgroup \"%s\"", ops->hierarchies[i]->monitor_full_path ?: "(null)");
@@ -1268,9 +1332,9 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
idx++;
break;
}
- } while (ops->hierarchies[i] && idx > 0 && idx < 1000);
+ } while (ops->hierarchies[i] && idx > 0 && idx < 1000 && suffix);
- if (idx == 1000)
+ if (idx == 1000 || (!suffix && idx != 0))
return ret_set_errno(false, ERANGE);
ops->monitor_cgroup = move_ptr(monitor_cgroup);
@@ -1284,12 +1348,14 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
__cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
struct lxc_handler *handler)
{
- __do_free char *container_cgroup = NULL, *__cgroup_tree = NULL;
+ __do_free char *container_cgroup = NULL,
+ *__cgroup_tree = NULL,
+ *limiting_cgroup = NULL;
const char *cgroup_tree;
int idx = 0;
int i;
size_t len;
- char *suffix;
+ char *suffix = NULL;
struct lxc_conf *conf;
if (!ops)
@@ -1306,7 +1372,20 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
conf = handler->conf;
- if (conf->cgroup_meta.dir) {
+ if (!check_cgroup_dir_config(conf))
+ return false;
+
+ if (conf->cgroup_meta.container_dir) {
+ cgroup_tree = NULL;
+
+ limiting_cgroup = strdup(conf->cgroup_meta.container_dir);
+ if (!limiting_cgroup)
+ return ret_set_errno(false, ENOMEM);
+
+ container_cgroup = must_make_path(limiting_cgroup,
+ conf->cgroup_meta.namespace_dir,
+ NULL);
+ } else if (conf->cgroup_meta.dir) {
cgroup_tree = conf->cgroup_meta.dir;
container_cgroup = must_concat(&len, cgroup_tree, "/",
DEFAULT_PAYLOAD_CGROUP_PREFIX,
@@ -1330,14 +1409,18 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
if (!container_cgroup)
return ret_set_errno(false, ENOMEM);
- suffix = container_cgroup + len - CGROUP_CREATE_RETRY_LEN;
- *suffix = '\0';
+ if (!conf->cgroup_meta.container_dir) {
+ suffix = container_cgroup + len - CGROUP_CREATE_RETRY_LEN;
+ *suffix = '\0';
+ }
do {
- if (idx)
+ if (idx && suffix)
sprintf(suffix, "-%d", idx);
for (i = 0; ops->hierarchies[i]; i++) {
- if (cgroup_tree_create(ops->hierarchies[i], cgroup_tree, container_cgroup, true))
+ if (cgroup_tree_create(ops->hierarchies[i], cgroup_tree,
+ container_cgroup, true,
+ limiting_cgroup))
continue;
ERROR("Failed to create cgroup \"%s\"", ops->hierarchies[i]->container_full_path ?: "(null)");
@@ -1347,9 +1430,9 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
idx++;
break;
}
- } while (ops->hierarchies[i] && idx > 0 && idx < 1000);
+ } while (ops->hierarchies[i] && idx > 0 && idx < 1000 && suffix);
- if (idx == 1000)
+ if (idx == 1000 || (!suffix && idx != 0))
return ret_set_errno(false, ERANGE);
ops->container_cgroup = move_ptr(container_cgroup);
@@ -2039,8 +2122,8 @@ __cgfsng_ops static int cgfsng_unfreeze(struct cgroup_ops *ops, int timeout)
return cg_unified_unfreeze(ops, timeout);
}
-__cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops,
- const char *controller)
+static const char *cgfsng_get_cgroup_do(struct cgroup_ops *ops,
+ const char *controller, bool limiting)
{
struct hierarchy *h;
@@ -2049,11 +2132,28 @@ __cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops,
return log_warn_errno(NULL, ENOENT, "Failed to find hierarchy for controller \"%s\"",
controller ? controller : "(null)");
+ if (limiting)
+ return h->container_limit_path
+ ? h->container_limit_path + strlen(h->mountpoint)
+ : NULL;
+
return h->container_full_path
? h->container_full_path + strlen(h->mountpoint)
: NULL;
}
+__cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops,
+ const char *controller)
+{
+ return cgfsng_get_cgroup_do(ops, controller, false);
+}
+
+__cgfsng_ops static const char *cgfsng_get_limiting_cgroup(struct cgroup_ops *ops,
+ const char *controller)
+{
+ return cgfsng_get_cgroup_do(ops, controller, true);
+}
+
/* Given a cgroup path returned from lxc_cmd_get_cgroup_path, build a full path,
* which must be freed by the caller.
*/
@@ -2382,7 +2482,7 @@ __cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename,
if (p)
*p = '\0';
- path = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
+ path = lxc_cmd_get_limiting_cgroup_path(name, lxcpath, controller);
/* not running */
if (!path)
return -1;
@@ -2547,7 +2647,7 @@ __cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops,
return 0;
}
- path = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
+ path = lxc_cmd_get_limiting_cgroup_path(name, lxcpath, controller);
/* not running */
if (!path)
return -1;
@@ -2657,7 +2757,7 @@ static int convert_devpath(const char *invalue, char *dest)
* we created the cgroups.
*/
static int cg_legacy_set_data(struct cgroup_ops *ops, const char *filename,
- const char *value)
+ const char *value, bool is_cpuset)
{
__do_free char *controller = NULL;
char *p;
@@ -2683,7 +2783,12 @@ static int cg_legacy_set_data(struct cgroup_ops *ops, const char *filename,
if (!h)
return log_error_errno(-ENOENT, ENOENT, "Failed to setup limits for the \"%s\" controller. The controller seems to be unused by \"cgfsng\" cgroup driver or not enabled on the cgroup hierarchy", controller);
- return lxc_write_openat(h->container_full_path, filename, value, strlen(value));
+ if (is_cpuset) {
+ int ret = lxc_write_openat(h->container_full_path, filename, value, strlen(value));
+ if (ret)
+ return ret;
+ }
+ return lxc_write_openat(h->container_limit_path, filename, value, strlen(value));
}
__cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops,
@@ -2717,7 +2822,7 @@ __cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops,
cg = iterator->elem;
if (do_devices == !strncmp("devices", cg->subsystem, 7)) {
- if (cg_legacy_set_data(ops, cg->subsystem, cg->value)) {
+ if (cg_legacy_set_data(ops, cg->subsystem, cg->value, strncmp("cpuset", cg->subsystem, 6) == 0)) {
if (do_devices && (errno == EACCES || errno == EPERM)) {
SYSWARN("Failed to set \"%s\" to \"%s\"", cg->subsystem, cg->value);
continue;
@@ -2802,7 +2907,7 @@ __cgfsng_ops static bool cgfsng_setup_limits(struct cgroup_ops *ops,
ret = bpf_device_cgroup_prepare(ops, conf, cg->subsystem,
cg->value);
} else {
- ret = lxc_write_openat(h->container_full_path,
+ ret = lxc_write_openat(h->container_limit_path,
cg->subsystem, cg->value,
strlen(cg->value));
if (ret < 0)
@@ -2878,7 +2983,7 @@ __cgfsng_ops bool cgfsng_devices_activate(struct cgroup_ops *ops,
return log_error_errno(false, ENOMEM, "Failed to finalize bpf program");
ret = bpf_program_cgroup_attach(devices, BPF_CGROUP_DEVICE,
- unified->container_full_path,
+ unified->container_limit_path,
BPF_F_ALLOW_MULTI);
if (ret)
return log_error_errno(false, ENOMEM, "Failed to attach bpf program");
@@ -3323,6 +3428,7 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
cgfsng_ops->chown = cgfsng_chown;
cgfsng_ops->mount = cgfsng_mount;
cgfsng_ops->devices_activate = cgfsng_devices_activate;
+ cgfsng_ops->get_limiting_cgroup = cgfsng_get_limiting_cgroup;
return move_ptr(cgfsng_ops);
}
diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h
index 1e08a017a..c5bf7941a 100644
--- a/src/lxc/cgroups/cgroup.h
+++ b/src/lxc/cgroups/cgroup.h
@@ -54,7 +54,11 @@ typedef enum {
* init's cgroup (if root).
*
* @container_full_path
- * - The full path to the containers cgroup.
+ * - The full path to the container's cgroup.
+ *
+ * @container_limit_path
+ * - The full path to the container's limiting cgroup. May simply point to
+ * container_full_path.
*
* @monitor_full_path
* - The full path to the monitor's cgroup.
@@ -77,15 +81,18 @@ struct hierarchy {
char *mountpoint;
char *container_base_path;
char *container_full_path;
+ char *container_limit_path;
char *monitor_full_path;
int version;
/* cgroup2 only */
unsigned int bpf_device_controller:1;
- /* monitor cgroup fd */
- int cgfd_con;
/* container cgroup fd */
+ int cgfd_con;
+ /* limiting cgroup fd (may be equal to cgfd_con if not separated) */
+ int cgfd_limit;
+ /* monitor cgroup fd */
int cgfd_mon;
};
@@ -169,6 +176,7 @@ struct cgroup_ops {
bool (*monitor_delegate_controllers)(struct cgroup_ops *ops);
bool (*payload_delegate_controllers)(struct cgroup_ops *ops);
void (*payload_finalize)(struct cgroup_ops *ops);
+ const char *(*get_limiting_cgroup)(struct cgroup_ops *ops, const char *controller);
};
extern struct cgroup_ops *cgroup_init(struct lxc_conf *conf);
diff --git a/src/lxc/commands.c b/src/lxc/commands.c
index 991bca290..5ac3f5d9c 100644
--- a/src/lxc/commands.c
+++ b/src/lxc/commands.c
@@ -84,6 +84,8 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd)
[LXC_CMD_UNFREEZE] = "unfreeze",
[LXC_CMD_GET_CGROUP2_FD] = "get_cgroup2_fd",
[LXC_CMD_GET_INIT_PIDFD] = "get_init_pidfd",
+ [LXC_CMD_GET_LIMITING_CGROUP] = "get_limiting_cgroup",
+ [LXC_CMD_GET_LIMITING_CGROUP2_FD] = "get_limiting_cgroup2_fd",
};
if (cmd >= LXC_CMD_MAX)
@@ -142,7 +144,9 @@ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd)
rsp->data = rspdata;
}
- if (cmd->req.cmd == LXC_CMD_GET_CGROUP2_FD) {
+ if (cmd->req.cmd == LXC_CMD_GET_CGROUP2_FD ||
+ cmd->req.cmd == LXC_CMD_GET_LIMITING_CGROUP2_FD)
+ {
int cgroup2_fd = move_fd(fd_rsp);
rsp->data = INT_TO_PTR(cgroup2_fd);
}
@@ -483,25 +487,14 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req,
return 0;
}
-/*
- * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
- * particular subsystem. This is the cgroup path relative to the root
- * of the cgroup filesystem.
- *
- * @name : name of container to connect to
- * @lxcpath : the lxcpath in which the container is running
- * @subsystem : the subsystem being asked about
- *
- * Returns the path on success, NULL on failure. The caller must free() the
- * returned path.
- */
-char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
- const char *subsystem)
+static char *lxc_cmd_get_cgroup_path_do(const char *name, const char *lxcpath,
+ const char *subsystem,
+ lxc_cmd_t command)
{
int ret, stopped;
struct lxc_cmd_rr cmd = {
.req = {
- .cmd = LXC_CMD_GET_CGROUP,
+ .cmd = command,
.data = subsystem,
.datalen = 0,
},
@@ -525,24 +518,72 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
return cmd.rsp.data;
}
-static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
- struct lxc_handler *handler,
- struct lxc_epoll_descr *descr)
+/*
+ * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
+ * particular subsystem. This is the cgroup path relative to the root
+ * of the cgroup filesystem.
+ *
+ * @name : name of container to connect to
+ * @lxcpath : the lxcpath in which the container is running
+ * @subsystem : the subsystem being asked about
+ *
+ * Returns the path on success, NULL on failure. The caller must free() the
+ * returned path.
+ */
+char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
+ const char *subsystem)
+{
+ return lxc_cmd_get_cgroup_path_do(name, lxcpath, subsystem,
+ LXC_CMD_GET_CGROUP);
+}
+
+/*
+ * lxc_cmd_get_limiting_cgroup_path: Calculate a container's limiting cgroup
+ * path for a particular subsystem. This is the cgroup path relative to the
+ * root of the cgroup filesystem. This may be the same as the path returned by
+ * lxc_cmd_get_cgroup_path if the container doesn't have a limiting path prefix
+ * set.
+ *
+ * @name : name of container to connect to
+ * @lxcpath : the lxcpath in which the container is running
+ * @subsystem : the subsystem being asked about
+ *
+ * Returns the path on success, NULL on failure. The caller must free() the
+ * returned path.
+ */
+char *lxc_cmd_get_limiting_cgroup_path(const char *name, const char *lxcpath,
+ const char *subsystem)
+{
+ return lxc_cmd_get_cgroup_path_do(name, lxcpath, subsystem,
+ LXC_CMD_GET_LIMITING_CGROUP);
+}
+
+static int lxc_cmd_get_cgroup_callback_do(int fd, struct lxc_cmd_req *req,
+ struct lxc_handler *handler,
+ struct lxc_epoll_descr *descr,
+ bool limiting_cgroup)
{
int ret;
const char *path;
+ const void *reqdata;
struct lxc_cmd_rsp rsp;
struct cgroup_ops *cgroup_ops = handler->cgroup_ops;
+ const char *(*get_fn)(struct cgroup_ops *ops, const char *controller);
if (req->datalen > 0) {
ret = validate_string_request(fd, req);
if (ret != 0)
return ret;
-
- path = cgroup_ops->get_cgroup(cgroup_ops, req->data);
+ reqdata = req->data;
} else {
- path = cgroup_ops->get_cgroup(cgroup_ops, NULL);
+ reqdata = NULL;
}
+
+ get_fn = (limiting_cgroup ? cgroup_ops->get_cgroup
+ : cgroup_ops->get_limiting_cgroup);
+
+ path = get_fn(cgroup_ops, reqdata);
+
if (!path)
return -1;
@@ -557,6 +598,20 @@ static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
return 0;
}
+static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
+ struct lxc_handler *handler,
+ struct lxc_epoll_descr *descr)
+{
+ return lxc_cmd_get_cgroup_callback_do(fd, req, handler, descr, false);
+}
+
+static int lxc_cmd_get_limiting_cgroup_callback(int fd, struct lxc_cmd_req *req,
+ struct lxc_handler *handler,
+ struct lxc_epoll_descr *descr)
+{
+ return lxc_cmd_get_cgroup_callback_do(fd, req, handler, descr, true);
+}
+
/*
* lxc_cmd_get_config_item: Get config item the running container
*
@@ -1366,28 +1421,48 @@ int lxc_cmd_get_cgroup2_fd(const char *name, const char *lxcpath)
return PTR_TO_INT(cmd.rsp.data);
}
-static int lxc_cmd_get_cgroup2_fd_callback(int fd, struct lxc_cmd_req *req,
- struct lxc_handler *handler,
- struct lxc_epoll_descr *descr)
+static int lxc_cmd_get_cgroup2_fd_callback_do(int fd, struct lxc_cmd_req *req,
+ struct lxc_handler *handler,
+ struct lxc_epoll_descr *descr,
+ bool limiting_cgroup)
{
struct lxc_cmd_rsp rsp = {
.ret = -EINVAL,
};
struct cgroup_ops *ops = handler->cgroup_ops;
- int ret;
+ int ret, send_fd;
if (!pure_unified_layout(ops) || !ops->unified)
return lxc_cmd_rsp_send(fd, &rsp);
+ send_fd = limiting_cgroup ? ops->unified->cgfd_limit
+ : ops->unified->cgfd_con;
+
rsp.ret = 0;
- ret = lxc_abstract_unix_send_fds(fd, &ops->unified->cgfd_con, 1, &rsp,
- sizeof(rsp));
+ ret = lxc_abstract_unix_send_fds(fd, &send_fd, 1, &rsp, sizeof(rsp));
if (ret < 0)
return log_error(LXC_CMD_REAP_CLIENT_FD, "Failed to send cgroup2 fd");
return 0;
}
+static int lxc_cmd_get_cgroup2_fd_callback(int fd, struct lxc_cmd_req *req,
+ struct lxc_handler *handler,
+ struct lxc_epoll_descr *descr)
+{
+ return lxc_cmd_get_cgroup2_fd_callback_do(fd, req, handler, descr,
+ false);
+}
+
+static int lxc_cmd_get_limiting_cgroup2_fd_callback(int fd,
+ struct lxc_cmd_req *req,
+ struct lxc_handler *handler,
+ struct lxc_epoll_descr *descr)
+{
+ return lxc_cmd_get_cgroup2_fd_callback_do(fd, req, handler, descr,
+ true);
+}
+
static int lxc_cmd_process(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler,
struct lxc_epoll_descr *descr)
@@ -1415,6 +1490,8 @@ static int lxc_cmd_process(int fd, struct lxc_cmd_req *req,
[LXC_CMD_UNFREEZE] = lxc_cmd_unfreeze_callback,
[LXC_CMD_GET_CGROUP2_FD] = lxc_cmd_get_cgroup2_fd_callback,
[LXC_CMD_GET_INIT_PIDFD] = lxc_cmd_get_init_pidfd_callback,
+ [LXC_CMD_GET_LIMITING_CGROUP] = lxc_cmd_get_limiting_cgroup_callback,
+ [LXC_CMD_GET_LIMITING_CGROUP2_FD] = lxc_cmd_get_limiting_cgroup2_fd_callback,
};
if (req->cmd >= LXC_CMD_MAX)
diff --git a/src/lxc/commands.h b/src/lxc/commands.h
index 9e5248424..878998832 100644
--- a/src/lxc/commands.h
+++ b/src/lxc/commands.h
@@ -38,6 +38,8 @@ typedef enum {
LXC_CMD_UNFREEZE,
LXC_CMD_GET_CGROUP2_FD,
LXC_CMD_GET_INIT_PIDFD,
+ LXC_CMD_GET_LIMITING_CGROUP,
+ LXC_CMD_GET_LIMITING_CGROUP2_FD,
LXC_CMD_MAX,
} lxc_cmd_t;
@@ -129,5 +131,9 @@ extern int lxc_cmd_add_bpf_device_cgroup(const char *name, const char *lxcpath,
extern int lxc_cmd_freeze(const char *name, const char *lxcpath, int timeout);
extern int lxc_cmd_unfreeze(const char *name, const char *lxcpath, int timeout);
extern int lxc_cmd_get_cgroup2_fd(const char *name, const char *lxcpath);
+extern char *lxc_cmd_get_limiting_cgroup_path(const char *name,
+ const char *lxcpath,
+ const char *subsystem);
+extern int lxc_cmd_get_limiting_cgroup2_fd(const char *name, const char *lxcpath);
#endif /* __commands_h */
diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index 2f6be9f26..8d480b049 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -3832,6 +3832,9 @@ void lxc_conf_free(struct lxc_conf *conf)
lxc_clear_apparmor_raw(conf);
lxc_clear_namespace(conf);
free(conf->cgroup_meta.dir);
+ free(conf->cgroup_meta.monitor_dir);
+ free(conf->cgroup_meta.container_dir);
+ free(conf->cgroup_meta.namespace_dir);
free(conf->cgroup_meta.controllers);
free(conf->shmount.path_host);
free(conf->shmount.path_cont);
diff --git a/src/lxc/conf.h b/src/lxc/conf.h
index 64885c35e..3ff226b72 100644
--- a/src/lxc/conf.h
+++ b/src/lxc/conf.h
@@ -60,6 +60,9 @@ struct lxc_cgroup {
struct /* meta */ {
char *controllers;
char *dir;
+ char *monitor_dir;
+ char *container_dir;
+ char *namespace_dir;
bool relative;
};
};
diff --git a/src/lxc/confile.c b/src/lxc/confile.c
index 0ca577fa3..59553f23e 100644
--- a/src/lxc/confile.c
+++ b/src/lxc/confile.c
@@ -71,6 +71,9 @@ lxc_config_define(cap_keep);
lxc_config_define(cgroup_controller);
lxc_config_define(cgroup2_controller);
lxc_config_define(cgroup_dir);
+lxc_config_define(cgroup_monitor_dir);
+lxc_config_define(cgroup_container_dir);
+lxc_config_define(cgroup_container_inner_dir);
lxc_config_define(cgroup_relative);
lxc_config_define(console_buffer_size);
lxc_config_define(console_logfile);
@@ -170,6 +173,9 @@ static struct lxc_config_t config_jump_table[] = {
{ "lxc.cap.drop", set_config_cap_drop, get_config_cap_drop, clr_config_cap_drop, },
{ "lxc.cap.keep", set_config_cap_keep, get_config_cap_keep, clr_config_cap_keep, },
{ "lxc.cgroup2", set_config_cgroup2_controller, get_config_cgroup2_controller, clr_config_cgroup2_controller, },
+ { "lxc.cgroup.dir.monitor", set_config_cgroup_monitor_dir, get_config_cgroup_monitor_dir, clr_config_cgroup_monitor_dir, },
+ { "lxc.cgroup.dir.container", set_config_cgroup_container_dir, get_config_cgroup_container_dir, clr_config_cgroup_container_dir, },
+ { "lxc.cgroup.dir.container.inner",set_config_cgroup_container_inner_dir, get_config_cgroup_container_inner_dir, clr_config_cgroup_container_inner_dir,},
{ "lxc.cgroup.dir", set_config_cgroup_dir, get_config_cgroup_dir, clr_config_cgroup_dir, },
{ "lxc.cgroup.relative", set_config_cgroup_relative, get_config_cgroup_relative, clr_config_cgroup_relative, },
{ "lxc.cgroup", set_config_cgroup_controller, get_config_cgroup_controller, clr_config_cgroup_controller, },
@@ -1721,6 +1727,48 @@ static int set_config_cgroup_dir(const char *key, const char *value,
return set_config_string_item(&lxc_conf->cgroup_meta.dir, value);
}
+static int set_config_cgroup_monitor_dir(const char *key, const char *value,
+ struct lxc_conf *lxc_conf, void *data)
+{
+ if (lxc_config_value_empty(value))
+ return clr_config_cgroup_monitor_dir(key, lxc_conf, NULL);
+
+ return set_config_string_item(&lxc_conf->cgroup_meta.monitor_dir,
+ value);
+}
+
+static int set_config_cgroup_container_dir(const char *key, const char *value,
+ struct lxc_conf *lxc_conf,
+ void *data)
+{
+ if (lxc_config_value_empty(value))
+ return clr_config_cgroup_container_dir(key, lxc_conf, NULL);
+
+ return set_config_string_item(&lxc_conf->cgroup_meta.container_dir,
+ value);
+}
+
+static int set_config_cgroup_container_inner_dir(const char *key,
+ const char *value,
+ struct lxc_conf *lxc_conf,
+ void *data)
+{
+ if (lxc_config_value_empty(value))
+ return clr_config_cgroup_container_inner_dir(key, lxc_conf,
+ NULL);
+
+ if (strchr(value, '/') ||
+ strcmp(value, ".") == 0 ||
+ strcmp(value, "..") == 0)
+ {
+ ERROR("lxc.cgroup.dir.container.inner must be a single directory name");
+ return -1;
+ }
+
+ return set_config_string_item(&lxc_conf->cgroup_meta.namespace_dir,
+ value);
+}
+
static int set_config_cgroup_relative(const char *key, const char *value,
struct lxc_conf *lxc_conf, void *data)
{
@@ -3644,6 +3692,58 @@ static int get_config_cgroup_dir(const char *key, char *retv, int inlen,
return fulllen;
}
+static int get_config_cgroup_monitor_dir(const char *key, char *retv, int inlen,
+ struct lxc_conf *lxc_conf, void *data)
+{
+ int len;
+ int fulllen = 0;
+
+ if (!retv)
+ inlen = 0;
+ else
+ memset(retv, 0, inlen);
+
+ strprint(retv, inlen, "%s", lxc_conf->cgroup_meta.monitor_dir);
+
+ return fulllen;
+}
+
+static int get_config_cgroup_container_dir(const char *key, char *retv,
+ int inlen,
+ struct lxc_conf *lxc_conf,
+ void *data)
+{
+ int len;
+ int fulllen = 0;
+
+ if (!retv)
+ inlen = 0;
+ else
+ memset(retv, 0, inlen);
+
+ strprint(retv, inlen, "%s", lxc_conf->cgroup_meta.container_dir);
+
+ return fulllen;
+}
+
+static int get_config_cgroup_container_inner_dir(const char *key, char *retv,
+ int inlen,
+ struct lxc_conf *lxc_conf,
+ void *data)
+{
+ int len;
+ int fulllen = 0;
+
+ if (!retv)
+ inlen = 0;
+ else
+ memset(retv, 0, inlen);
+
+ strprint(retv, inlen, "%s", lxc_conf->cgroup_meta.namespace_dir);
+
+ return fulllen;
+}
+
static inline int get_config_cgroup_relative(const char *key, char *retv,
int inlen, struct lxc_conf *lxc_conf,
void *data)
@@ -4458,6 +4558,30 @@ static int clr_config_cgroup_dir(const char *key, struct lxc_conf *lxc_conf,
return 0;
}
+static int clr_config_cgroup_monitor_dir(const char *key,
+ struct lxc_conf *lxc_conf,
+ void *data)
+{
+ free_disarm(lxc_conf->cgroup_meta.monitor_dir);
+ return 0;
+}
+
+static int clr_config_cgroup_container_dir(const char *key,
+ struct lxc_conf *lxc_conf,
+ void *data)
+{
+ free_disarm(lxc_conf->cgroup_meta.container_dir);
+ return 0;
+}
+
+static int clr_config_cgroup_container_inner_dir(const char *key,
+ struct lxc_conf *lxc_conf,
+ void *data)
+{
+ free_disarm(lxc_conf->cgroup_meta.namespace_dir);
+ return 0;
+}
+
static inline int clr_config_cgroup_relative(const char *key,
struct lxc_conf *lxc_conf,
void *data)
diff --git a/src/lxc/criu.c b/src/lxc/criu.c
index 1a909bb6c..2485accc0 100644
--- a/src/lxc/criu.c
+++ b/src/lxc/criu.c
@@ -303,7 +303,7 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf,
* the handler the restore task created.
*/
if (!strcmp(opts->action, "dump") || !strcmp(opts->action, "pre-dump")) {
- path = lxc_cmd_get_cgroup_path(opts->c->name, opts->c->config_path, controllers[0]);
+ path = lxc_cmd_get_limiting_cgroup_path(opts->c->name, opts->c->config_path, controllers[0]);
if (!path) {
ERROR("failed to get cgroup path for %s", controllers[0]);
goto err;
@@ -311,7 +311,7 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf,
} else {
const char *p;
- p = cgroup_ops->get_cgroup(cgroup_ops, controllers[0]);
+ p = cgroup_ops->get_limiting_cgroup(cgroup_ops, controllers[0]);
if (!p) {
ERROR("failed to get cgroup path for %s", controllers[0]);
goto err;
@@ -406,9 +406,9 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf,
DECLARE_ARG("-t");
DECLARE_ARG(pid);
- freezer_relative = lxc_cmd_get_cgroup_path(opts->c->name,
- opts->c->config_path,
- "freezer");
+ freezer_relative = lxc_cmd_get_limiting_cgroup_path(opts->c->name,
+ opts->c->config_path,
+ "freezer");
if (!freezer_relative) {
ERROR("failed getting freezer path");
goto err;