mirror of
https://git.proxmox.com/git/mirror_lxc
synced 2025-07-13 03:45:30 +00:00
cgfs: don't mount /sys/fs/cgroup readonly
Ubuntu containers have had trouble with automatic cgroup mounting that was not read-write (i.e. lxc.mount.auto = cgroup{,-full}:{ro,mixed}) in containers without CAP_SYS_ADMIN. Ubuntu's mountall program reads /lib/init/fstab, which contains an entry for /sys/fs/cgroup. Since there is no ro option specified for that filesystem, mountall will try to remount it readwrite if it is already mounted. Without CAP_SYS_ADMIN, that fails and mountall will interrupt boot and wait for user input on whether to proceed anyway or to manually fix it, effectively hanging container bootup. This patch makes sure that /sys/fs/cgroup is always a readwrite tmpfs, but that the actual cgroup hierarchy paths (/sys/fs/cgroup/$subsystem) are readonly if :ro or :mixed is used. This still has the desired effect within the container (no cgroup escalation possible and programs get errors if they try to do so anyway), while keeping Ubuntu containers happy. Signed-off-by: Christian Seiler <christian@iwakd.de> Cc: Serge Hallyn <serge.hallyn@ubuntu.com> Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
This commit is contained in:
parent
3c597cee88
commit
b46f055358
@ -811,6 +811,26 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
|
<para>
|
||||||
|
Note that if automatic mounting of the cgroup filesystem
|
||||||
|
is enabled, the tmpfs under
|
||||||
|
<filename>/sys/fs/cgroup</filename> will always be
|
||||||
|
mounted read-write (but for the <option>:mixed</option>
|
||||||
|
and <option>:ro</option> cases, the individual
|
||||||
|
hierarchies,
|
||||||
|
<filename>/sys/fs/cgroup/$hierarchy</filename>, will be
|
||||||
|
read-only). This is in order to work around a quirk in
|
||||||
|
Ubuntu's
|
||||||
|
<citerefentry>
|
||||||
|
<refentrytitle>mountall</refentrytitle>
|
||||||
|
<manvolnum>8</manvolnum>
|
||||||
|
</citerefentry>
|
||||||
|
command that will cause containers to wait for user
|
||||||
|
input at boot if
|
||||||
|
<filename>/sys/fs/cgroup</filename> is mounted read-only
|
||||||
|
and the container can't remount it read-write due to a
|
||||||
|
lack of CAP_SYS_ADMIN.
|
||||||
|
</para>
|
||||||
<para>
|
<para>
|
||||||
Examples:
|
Examples:
|
||||||
</para>
|
</para>
|
||||||
|
@ -1442,6 +1442,24 @@ static bool cgroupfs_mount_cgroup(void *hdata, const char *root, int type)
|
|||||||
goto out_error;
|
goto out_error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* for read-only and mixed cases, we have to bind-mount the tmpfs directory
|
||||||
|
* that points to the hierarchy itself (i.e. /sys/fs/cgroup/cpu etc.) onto
|
||||||
|
* itself and then bind-mount it read-only, since we keep the tmpfs itself
|
||||||
|
* read-write (see comment below)
|
||||||
|
*/
|
||||||
|
if (type == LXC_AUTO_CGROUP_MIXED || type == LXC_AUTO_CGROUP_RO) {
|
||||||
|
r = mount(abs_path, abs_path, NULL, MS_BIND, NULL);
|
||||||
|
if (r < 0) {
|
||||||
|
SYSERROR("error bind-mounting %s onto itself", abs_path);
|
||||||
|
goto out_error;
|
||||||
|
}
|
||||||
|
r = mount(NULL, abs_path, NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL);
|
||||||
|
if (r < 0) {
|
||||||
|
SYSERROR("error re-mounting %s readonly", abs_path);
|
||||||
|
goto out_error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
free(abs_path);
|
free(abs_path);
|
||||||
abs_path = NULL;
|
abs_path = NULL;
|
||||||
|
|
||||||
@ -1487,13 +1505,21 @@ static bool cgroupfs_mount_cgroup(void *hdata, const char *root, int type)
|
|||||||
parts = NULL;
|
parts = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* try to remount the tmpfs readonly, since the container shouldn't
|
/* We used to remount the entire tmpfs readonly if any :ro or
|
||||||
* change anything (this will also make sure that trying to create
|
* :mixed mode was specified. However, Ubuntu's mountall has the
|
||||||
* new cgroups outside the allowed area fails with an error instead
|
* unfortunate behavior to block bootup if /sys/fs/cgroup is
|
||||||
* of simply causing this to create directories in the tmpfs itself)
|
* mounted read-only and cannot be remounted read-write.
|
||||||
|
* (mountall reads /lib/init/fstab and tries to (re-)mount all of
|
||||||
|
* these if they are not already mounted with the right options;
|
||||||
|
* it contains an entry for /sys/fs/cgroup. In case it can't do
|
||||||
|
* that, it prompts for the user to either manually fix it or
|
||||||
|
* boot anyway. But without user input, booting of the container
|
||||||
|
* hangs.)
|
||||||
|
*
|
||||||
|
* Instead of remounting the entire tmpfs readonly, we only
|
||||||
|
* remount the paths readonly that are part of the cgroup
|
||||||
|
* hierarchy.
|
||||||
*/
|
*/
|
||||||
if (type != LXC_AUTO_CGROUP_RW && type != LXC_AUTO_CGROUP_FULL_RW)
|
|
||||||
mount(NULL, path, NULL, MS_REMOUNT|MS_RDONLY, NULL);
|
|
||||||
|
|
||||||
free(path);
|
free(path);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user