diff --git a/doc/lxc.container.conf.sgml.in b/doc/lxc.container.conf.sgml.in
index 34b8117bb..27b518e03 100644
--- a/doc/lxc.container.conf.sgml.in
+++ b/doc/lxc.container.conf.sgml.in
@@ -1126,36 +1126,75 @@ dev/null proc/kcore none bind,relative 0 0
/sys as read-write
+
:
- mount a tmpfs to /sys/fs/cgroup,
- create directories for all hierarchies to which
- the container is added, create subdirectories
- there with the name of the cgroup, and bind-mount
- the container's own cgroup into that directory.
- The container will be able to write to its own
- cgroup directory, but not the parents, since they
- will be remounted read-only.
+ Mount a tmpfs to /sys/fs/cgroup,
+ create directories for all hierarchies to which the container
+ is added, create subdirectories in those hierarchies with the
+ name of the cgroup, and bind-mount the container's own cgroup
+ into that directory. The container will be able to write to
+ its own cgroup directory, but not the parents, since they will
+ be remounted read-only.
+
- : similar to
- , but everything will
- be mounted read-only.
+ :
+ The option will cause LXC to perform
+ the cgroup mounts for the container under all circumstances.
+ Otherwise it is similar to .
+ This is mainly useful when the cgroup namespaces are enabled
+ where LXC will normally leave mounting cgroups to the init
+ binary of the container since it is perfectly safe to do so.
+
+
+
+ :
+ similar to , but everything will
+ be mounted read-only.
+
+
+
+
+
+ :
+ The option will cause LXC to perform
+ the cgroup mounts for the container under all circumstances.
+ Otherwise it is similar to .
+ This is mainly useful when the cgroup namespaces are enabled
+ where LXC will normally leave mounting cgroups to the init
+ binary of the container since it is perfectly safe to do so.
+
+
+
: similar to
- , but everything will
- be mounted read-write. Note that the paths leading
- up to the container's own cgroup will be writable,
- but will not be a cgroup filesystem but just part
- of the tmpfs of /sys/fs/cgroup
+ , but everything will be mounted
+ read-write. Note that the paths leading up to the container's
+ own cgroup will be writable, but will not be a cgroup
+ filesystem but just part of the tmpfs of
+ /sys/fs/cgroup
+
+
+
+ :
+ The option will cause LXC to perform
+ the cgroup mounts for the container under all circumstances.
+ Otherwise it is similar to .
+ This is mainly useful when the cgroup namespaces are enabled
+ where LXC will normally leave mounting cgroups to the init
+ binary of the container since it is perfectly safe to do so.
+
+
+
(without specifier):
diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
index 826ea600b..c13f7fa2f 100644
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -2043,44 +2043,49 @@ static int cg_mount_in_cgroup_namespace(int type, struct hierarchy *h,
static bool cgfsng_mount(void *hdata, const char *root, int type)
{
- int i;
+ int i, ret;
char *tmpfspath = NULL;
bool retval = false;
struct lxc_handler *handler = hdata;
struct cgfsng_handler_data *d = handler->cgroup_data;
- bool has_cgns = false, has_sys_admin = true;
+ bool has_cgns = false, wants_force_mount = false;
if ((type & LXC_AUTO_CGROUP_MASK) == 0)
return true;
+ if (type & LXC_AUTO_CGROUP_FORCE) {
+ type &= ~LXC_AUTO_CGROUP_FORCE;
+ wants_force_mount = true;
+ }
+
+ if (!wants_force_mount){
+ if (!lxc_list_empty(&handler->conf->keepcaps))
+ wants_force_mount = !in_caplist(CAP_SYS_ADMIN, &handler->conf->keepcaps);
+ else
+ wants_force_mount = in_caplist(CAP_SYS_ADMIN, &handler->conf->caps);
+ }
+
has_cgns = cgns_supported();
- if (!lxc_list_empty(&handler->conf->keepcaps))
- has_sys_admin = in_caplist(CAP_SYS_ADMIN, &handler->conf->keepcaps);
- else
- has_sys_admin = !in_caplist(CAP_SYS_ADMIN, &handler->conf->caps);
-
- if (has_cgns && has_sys_admin)
+ if (has_cgns && !wants_force_mount)
return true;
- tmpfspath = must_make_path(root, "/sys/fs/cgroup", NULL);
-
if (type == LXC_AUTO_CGROUP_NOSPEC)
type = LXC_AUTO_CGROUP_MIXED;
else if (type == LXC_AUTO_CGROUP_FULL_NOSPEC)
type = LXC_AUTO_CGROUP_FULL_MIXED;
/* Mount tmpfs */
- if (safe_mount("cgroup_root", tmpfspath, "tmpfs",
- MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME,
- "size=10240k,mode=755",
- root) < 0)
- goto bad;
+ tmpfspath = must_make_path(root, "/sys/fs/cgroup", NULL);
+ ret = safe_mount("cgroup_root", tmpfspath, "tmpfs",
+ MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME,
+ "size=10240k,mode=755", root);
+ if (ret < 0)
+ goto on_error;
for (i = 0; hierarchies[i]; i++) {
char *controllerpath, *path2;
struct hierarchy *h = hierarchies[i];
char *controller = strrchr(h->mountpoint, '/');
- int r;
if (!controller)
continue;
@@ -2090,49 +2095,56 @@ static bool cgfsng_mount(void *hdata, const char *root, int type)
free(controllerpath);
continue;
}
- if (mkdir(controllerpath, 0755) < 0) {
+ ret = mkdir(controllerpath, 0755);
+ if (ret < 0) {
SYSERROR("Error creating cgroup path: %s", controllerpath);
free(controllerpath);
- goto bad;
+ goto on_error;
}
- if (has_cgns && !has_sys_admin) {
+ if (has_cgns && wants_force_mount) {
/* If cgroup namespaces are supported but the container
* will not have CAP_SYS_ADMIN after it has started we
* need to mount the cgroups manually.
*/
- r = cg_mount_in_cgroup_namespace(type, h, controllerpath);
+ ret = cg_mount_in_cgroup_namespace(type, h, controllerpath);
free(controllerpath);
- if (r < 0)
- goto bad;
+ if (ret < 0)
+ goto on_error;
+
continue;
}
- if (mount_cgroup_full(type, h, controllerpath, d->container_cgroup) < 0) {
+ ret = mount_cgroup_full(type, h, controllerpath, d->container_cgroup);
+ if (ret < 0) {
free(controllerpath);
- goto bad;
+ goto on_error;
}
+
if (!cg_mount_needs_subdirs(type)) {
free(controllerpath);
continue;
}
- path2 = must_make_path(controllerpath, h->base_cgroup, d->container_cgroup, NULL);
- if (mkdir_p(path2, 0755) < 0) {
+
+ path2 = must_make_path(controllerpath, h->base_cgroup,
+ d->container_cgroup, NULL);
+ ret = mkdir_p(path2, 0755);
+ if (ret < 0) {
free(controllerpath);
free(path2);
- goto bad;
+ goto on_error;
}
- r = do_secondstage_mounts_if_needed(type, h, controllerpath, path2,
- d->container_cgroup);
+ ret = do_secondstage_mounts_if_needed(
+ type, h, controllerpath, path2, d->container_cgroup);
free(controllerpath);
free(path2);
- if (r < 0)
- goto bad;
+ if (ret < 0)
+ goto on_error;
}
retval = true;
-bad:
+on_error:
free(tmpfspath);
return retval;
}
diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index 01f11422a..98d8d3871 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -715,7 +715,7 @@ static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_ha
if (flags & LXC_AUTO_CGROUP_MASK) {
int cg_flags;
- cg_flags = flags & LXC_AUTO_CGROUP_MASK;
+ cg_flags = flags & (LXC_AUTO_CGROUP_MASK & ~LXC_AUTO_CGROUP_FORCE);
/* If the type of cgroup mount was not specified, it depends on the
* container's capabilities as to what makes sense: if we have
* CAP_SYS_ADMIN, the read-only part can be remounted read-write
@@ -737,7 +737,8 @@ static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_ha
else
cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_FULL_RW : LXC_AUTO_CGROUP_FULL_MIXED;
}
-
+ if (flags & LXC_AUTO_CGROUP_FORCE)
+ cg_flags |= LXC_AUTO_CGROUP_FORCE;
if (!cgroup_mount(conf->rootfs.path ? conf->rootfs.mount : "", handler, cg_flags)) {
SYSERROR("error mounting /sys/fs/cgroup");
return -1;
@@ -3343,7 +3344,7 @@ int lxc_setup(struct lxc_handler *handler)
* before, /sys could not have been mounted
* (is either mounted automatically or via fstab entries)
*/
- if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP_MASK, handler) < 0) {
+ if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & (LXC_AUTO_CGROUP_MASK), handler) < 0) {
ERROR("failed to setup the automatic mounts for '%s'", name);
return -1;
}
diff --git a/src/lxc/conf.h b/src/lxc/conf.h
index c5f27336a..388c0518c 100644
--- a/src/lxc/conf.h
+++ b/src/lxc/conf.h
@@ -233,9 +233,9 @@ enum {
* variants, which is safe. */
LXC_AUTO_CGROUP_NOSPEC = 0x0B0, /* /sys/fs/cgroup (partial mount, r/w or mixed, depending on caps) */
LXC_AUTO_CGROUP_FULL_NOSPEC = 0x0E0, /* /sys/fs/cgroup (full mount, r/w or mixed, depending on caps) */
- LXC_AUTO_CGROUP_MASK = 0x0F0,
-
- LXC_AUTO_ALL_MASK = 0x0FF, /* all known settings */
+ LXC_AUTO_CGROUP_FORCE = 0x100, /* mount cgroups even when cgroup namespaces are supported */
+ LXC_AUTO_CGROUP_MASK = 0x1F0, /* all known cgroup options, doe not contain LXC_AUTO_CGROUP_FORCE */
+ LXC_AUTO_ALL_MASK = 0x1FF, /* all known settings */
};
/*
diff --git a/src/lxc/confile.c b/src/lxc/confile.c
index 66b7615fe..da90b1982 100644
--- a/src/lxc/confile.c
+++ b/src/lxc/confile.c
@@ -1706,26 +1706,30 @@ static int set_config_mount_auto(const char *key, const char *value,
int mask;
int flag;
} allowed_auto_mounts[] = {
- { "proc", LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED },
- { "proc:mixed", LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED },
- { "proc:rw", LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW },
- { "sys", LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED },
- { "sys:ro", LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO },
- { "sys:mixed", LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED },
- { "sys:rw", LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW },
- { "cgroup", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_NOSPEC },
- { "cgroup:mixed", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_MIXED },
- { "cgroup:ro", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_RO },
- { "cgroup:rw", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_RW },
- { "cgroup-full", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_NOSPEC },
- { "cgroup-full:mixed", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_MIXED },
- { "cgroup-full:ro", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_RO },
- { "cgroup-full:rw", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_RW },
+ { "proc", LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED },
+ { "proc:mixed", LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED },
+ { "proc:rw", LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW },
+ { "sys", LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED },
+ { "sys:ro", LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO },
+ { "sys:mixed", LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED },
+ { "sys:rw", LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW },
+ { "cgroup", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_NOSPEC },
+ { "cgroup:mixed", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_MIXED },
+ { "cgroup:ro", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_RO },
+ { "cgroup:rw", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_RW },
+ { "cgroup:force", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_NOSPEC | LXC_AUTO_CGROUP_FORCE },
+ { "cgroup:mixed:force", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_MIXED | LXC_AUTO_CGROUP_FORCE },
+ { "cgroup:ro:force", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_RO | LXC_AUTO_CGROUP_FORCE },
+ { "cgroup:rw:force", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_RW | LXC_AUTO_CGROUP_FORCE },
+ { "cgroup-full", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_NOSPEC },
+ { "cgroup-full:mixed", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_MIXED },
+ { "cgroup-full:ro", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_RO },
+ { "cgroup-full:rw", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_RW },
/* For adding anything that is just a single on/off, but has no
- * options: keep mask and flag identical and just define the enum
- * value as an unused bit so far
+ * options: keep mask and flag identical and just define the enum
+ * value as an unused bit so far
*/
- { NULL, 0, 0 }
+ { NULL, 0, 0 }
};
if (lxc_config_value_empty(value)) {