conf: add cgroup2, cgroup2:ro, cgroup2:force, cgroup2:ro:force options

We keep running into situations where we want to pre-mount a pure
cgroup2 layout regardless of the layout of the host.

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
This commit is contained in:
Christian Brauner 2021-10-21 16:17:59 +02:00
parent 62054cf35e
commit d90d30072c
No known key found for this signature in database
GPG Key ID: 8EB056D53EECB12D
5 changed files with 54 additions and 25 deletions

View File

@ -153,3 +153,10 @@ Whether this LXC instance can handle idmapped mounts for the rootfs.
Whether this LXC instance can handle idmapped mounts for lxc.mount.entry Whether this LXC instance can handle idmapped mounts for lxc.mount.entry
entries. entries.
## cgroup2\_auto_mounting
This adds the new options `cgroup2`, `cgroup2:ro`, `cgroup2:force`,
`cgroup2:ro:force` for the `lxc.mount.auto` configuration key. For example, if
a user specifies `cgroup2:force` LXC will pre-mount a pure `cgroup2` layout for
the container even if the host is running with a hybrid layout.

View File

@ -47,6 +47,7 @@ static char *api_extensions[] = {
"idmapped_mounts", "idmapped_mounts",
"idmapped_mounts_v2", "idmapped_mounts_v2",
"core_scheduling", "core_scheduling",
"cgroup2_auto_mounting",
}; };
static size_t nr_api_extensions = sizeof(api_extensions) / sizeof(*api_extensions); static size_t nr_api_extensions = sizeof(api_extensions) / sizeof(*api_extensions);

View File

@ -1487,7 +1487,8 @@ static int __cgroupfs_mount(int cgroup_automount_type, struct hierarchy *h,
flags |= MOUNT_ATTR_RELATIME; flags |= MOUNT_ATTR_RELATIME;
if ((cgroup_automount_type == LXC_AUTO_CGROUP_RO) || if ((cgroup_automount_type == LXC_AUTO_CGROUP_RO) ||
(cgroup_automount_type == LXC_AUTO_CGROUP_FULL_RO)) (cgroup_automount_type == LXC_AUTO_CGROUP_FULL_RO) ||
(cgroup_automount_type == LXC_AUTO_CGROUP2_RO))
flags |= MOUNT_ATTR_RDONLY; flags |= MOUNT_ATTR_RDONLY;
if (is_unified_hierarchy(h)) if (is_unified_hierarchy(h))
@ -1618,6 +1619,12 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
case LXC_AUTO_CGROUP_FULL_MIXED: case LXC_AUTO_CGROUP_FULL_MIXED:
TRACE("Full mixed cgroup mounts requested"); TRACE("Full mixed cgroup mounts requested");
break; break;
case LXC_AUTO_CGROUP2_RW:
TRACE("Read-write cgroup2 mount requested");
break;
case LXC_AUTO_CGROUP2_RO:
TRACE("Read-only cgroup2 mount requested");
break;
default: default:
return log_error_errno(false, EINVAL, "Invalid cgroup mount options specified"); return log_error_errno(false, EINVAL, "Invalid cgroup mount options specified");
} }
@ -1647,9 +1654,14 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
return log_trace(true, "Mounting cgroups not requested or needed"); return log_trace(true, "Mounting cgroups not requested or needed");
/* This is really the codepath that we want. */ /* This is really the codepath that we want. */
if (pure_unified_layout(ops)) { if (pure_unified_layout(ops) ||
(cgroup_automount_type == LXC_AUTO_CGROUP2_RW) ||
(cgroup_automount_type == LXC_AUTO_CGROUP2_RO)) {
__do_close int dfd_mnt_unified = -EBADF; __do_close int dfd_mnt_unified = -EBADF;
if (!ops->unified)
return log_error_errno(false, EINVAL, "No unified cgroup hierarchy mounted on the host");
dfd_mnt_unified = open_at(rootfs->dfd_mnt, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE, dfd_mnt_unified = open_at(rootfs->dfd_mnt, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE,
PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH_XDEV, 0); PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH_XDEV, 0);
if (dfd_mnt_unified < 0) if (dfd_mnt_unified < 0)
@ -1684,6 +1696,11 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
* 10. cgroup-full:rw:force -> Not supported. * 10. cgroup-full:rw:force -> Not supported.
* 11. cgroup-full:ro:force -> Not supported. * 11. cgroup-full:ro:force -> Not supported.
* 12. cgroup-full:mixed:force -> Not supported. * 12. cgroup-full:mixed:force -> Not supported.
*
* 13. cgroup2 -> No-op; init system responsible for mounting.
* 14. cgroup2:ro -> No-op; init system responsible for mounting.
* 15. cgroup2:force -> Mount the cgroup2 filesystem read-write
* 16. cgroup2:ro:force -> Mount the cgroup2 filesystem read-only
*/ */
ret = cgroupfs_mount(cgroup_automount_type, ops->unified, rootfs, dfd_mnt_unified, ""); ret = cgroupfs_mount(cgroup_automount_type, ops->unified, rootfs, dfd_mnt_unified, "");
if (ret < 0) if (ret < 0)
@ -1697,25 +1714,11 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
* Or the user requested to keep the cgroup namespace * Or the user requested to keep the cgroup namespace
* of the host or another container. * of the host or another container.
*/ */
if (wants_force_mount) { errno = EOPNOTSUPP;
/* if (wants_force_mount)
* 1. cgroup:rw:force -> Bind-mount the cgroup2 filesystem writable.
* 2. cgroup:ro:force -> Bind-mount the cgroup2 filesystem read-only.
* 3. cgroup:mixed:force -> bind-mount the cgroup2 filesystem and
* and make the parent directory of the
* container's cgroup read-only but the
* container's cgroup writable.
*
* 10. cgroup-full:rw:force ->
* 11. cgroup-full:ro:force ->
* 12. cgroup-full:mixed:force ->
*/
errno = EOPNOTSUPP;
SYSWARN("Force-mounting the unified cgroup hierarchy without cgroup namespace support is currently not supported"); SYSWARN("Force-mounting the unified cgroup hierarchy without cgroup namespace support is currently not supported");
} else { else
errno = EOPNOTSUPP;
SYSWARN("Mounting the unified cgroup hierarchy without cgroup namespace support is currently not supported"); SYSWARN("Mounting the unified cgroup hierarchy without cgroup namespace support is currently not supported");
}
} }
return syserror_ret(false, "Failed to mount cgroups"); return syserror_ret(false, "Failed to mount cgroups");
@ -1729,15 +1732,15 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
if (can_use_mount_api()) { if (can_use_mount_api()) {
fd_fs = fs_prepare("tmpfs", -EBADF, "", 0, 0); fd_fs = fs_prepare("tmpfs", -EBADF, "", 0, 0);
if (fd_fs < 0) if (fd_fs < 0)
return log_error_errno(-errno, errno, "Failed to create new filesystem context for tmpfs"); return log_error_errno(false, errno, "Failed to create new filesystem context for tmpfs");
ret = fs_set_property(fd_fs, "mode", "0755"); ret = fs_set_property(fd_fs, "mode", "0755");
if (ret < 0) if (ret < 0)
return log_error_errno(-errno, errno, "Failed to mount tmpfs onto %d(dev)", fd_fs); return log_error_errno(false, errno, "Failed to mount tmpfs onto %d(dev)", fd_fs);
ret = fs_set_property(fd_fs, "size", "10240k"); ret = fs_set_property(fd_fs, "size", "10240k");
if (ret < 0) if (ret < 0)
return log_error_errno(-errno, errno, "Failed to mount tmpfs onto %d(dev)", fd_fs); return log_error_errno(false, errno, "Failed to mount tmpfs onto %d(dev)", fd_fs);
ret = fs_attach(fd_fs, rootfs->dfd_mnt, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE, ret = fs_attach(fd_fs, rootfs->dfd_mnt, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE,
PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH_XDEV, PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH_XDEV,

View File

@ -282,6 +282,18 @@ enum {
/* /sys/fs/cgroup (full mount, parent r/o, own r/w) */ /* /sys/fs/cgroup (full mount, parent r/o, own r/w) */
LXC_AUTO_CGROUP_FULL_MIXED = LXC_AUTO_CGROUP_FULL_RO | LXC_AUTO_CGROUP_FULL_MIXED = LXC_AUTO_CGROUP_FULL_RO |
LXC_AUTO_CGROUP_FULL_RW, LXC_AUTO_CGROUP_FULL_RW,
/*
* Mount a pure read-write cgroup2 layout in the container independent
* of the cgroup layout used on the host.
*/
LXC_AUTO_CGROUP2_RW = BIT(8),
/*
* Mount a pure read-only cgroup2 layout in the container independent
* of the cgroup layout used on the host.
*/
LXC_AUTO_CGROUP2_RO = BIT(9),
/* /*
* These are defined in such a way as to retain binary compatibility * These are defined in such a way as to retain binary compatibility
* with earlier versions of this code. If the previous mask is applied, * with earlier versions of this code. If the previous mask is applied,
@ -293,16 +305,18 @@ enum {
/* /sys/fs/cgroup (full mount, r/w or mixed, depending on caps) */ /* /sys/fs/cgroup (full mount, r/w or mixed, depending on caps) */
LXC_AUTO_CGROUP_FULL_NOSPEC = 0x0E0, LXC_AUTO_CGROUP_FULL_NOSPEC = 0x0E0,
/* mount cgroups even when cgroup namespaces are supported */ /* mount cgroups even when cgroup namespaces are supported */
LXC_AUTO_CGROUP_FORCE = BIT(8), LXC_AUTO_CGROUP_FORCE = BIT(10),
/* all known cgroup options */ /* all known cgroup options */
LXC_AUTO_CGROUP_MASK = LXC_AUTO_CGROUP_MIXED | LXC_AUTO_CGROUP_MASK = LXC_AUTO_CGROUP_MIXED |
LXC_AUTO_CGROUP_FULL_MIXED | LXC_AUTO_CGROUP_FULL_MIXED |
LXC_AUTO_CGROUP_NOSPEC | LXC_AUTO_CGROUP_NOSPEC |
LXC_AUTO_CGROUP_FULL_NOSPEC | LXC_AUTO_CGROUP_FULL_NOSPEC |
LXC_AUTO_CGROUP_FORCE, LXC_AUTO_CGROUP_FORCE |
LXC_AUTO_CGROUP2_RW |
LXC_AUTO_CGROUP2_RO,
/* shared mount point */ /* shared mount point */
LXC_AUTO_SHMOUNTS = BIT(9), LXC_AUTO_SHMOUNTS = BIT(11),
/* shared mount point mask */ /* shared mount point mask */
LXC_AUTO_SHMOUNTS_MASK = LXC_AUTO_SHMOUNTS, LXC_AUTO_SHMOUNTS_MASK = LXC_AUTO_SHMOUNTS,

View File

@ -2265,6 +2265,10 @@ static int set_config_mount_auto(const char *key, const char *value,
{ "sys:ro", LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO }, { "sys:ro", LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO },
{ "sys:mixed", LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED }, { "sys:mixed", LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED },
{ "sys:rw", LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW }, { "sys:rw", LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW },
{ "cgroup2", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP2_RW | LXC_AUTO_CGROUP_FORCE },
{ "cgroup2:ro", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP2_RO | LXC_AUTO_CGROUP_FORCE },
{ "cgroup2:force", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP2_RW | LXC_AUTO_CGROUP_FORCE },
{ "cgroup2:ro:force", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP2_RO | LXC_AUTO_CGROUP_FORCE },
{ "cgroup", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_NOSPEC }, { "cgroup", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_NOSPEC },
{ "cgroup:mixed", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_MIXED }, { "cgroup:mixed", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_MIXED },
{ "cgroup:ro", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_RO }, { "cgroup:ro", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_RO },