lxc.mount.auto: improve defaults for cgroup and cgroup-full

If the user specifies cgroup or cgroup-full without a specifier (:ro,
:rw or :mixed), this changes the behavior. Previously, these were
simple aliases for the :mixed variants; now they depend on whether the
container also has CAP_SYS_ADMIN; if it does they resolve to the :rw
variants, if it doesn't to the :mixed variants (as before).

If a container has CAP_SYS_ADMIN privileges, any filesystem can be
remounted read-write from within, so initially mounting the cgroup
filesystems partially read-only as a default creates a false sense of
security. It is better to default to full read-write mounts to show the
administrator what keeping CAP_SYS_ADMIN entails.

If an administrator really wants both CAP_SYS_ADMIN and the :mixed
variant of cgroup or cgroup-full automatic mounts, they can still
specify that explicitly; this commit just changes the default without
specifier.

Signed-off-by: Christian Seiler <christian@iwakd.de>
Cc: Serge Hallyn <serge.hallyn@ubuntu.com>
Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
This commit is contained in:
Christian Seiler 2014-05-03 20:57:46 +02:00 committed by Serge Hallyn
parent bab88e6894
commit 0769b82a42
5 changed files with 125 additions and 42 deletions

View File

@ -743,8 +743,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
</listitem> </listitem>
<listitem> <listitem>
<para> <para>
<option>cgroup:mixed</option> (or <option>cgroup:mixed</option>:
<option>cgroup</option>):
mount a tmpfs to <filename>/sys/fs/cgroup</filename>, mount a tmpfs to <filename>/sys/fs/cgroup</filename>,
create directories for all hierarchies to which create directories for all hierarchies to which
the container is added, create subdirectories the container is added, create subdirectories
@ -774,8 +773,15 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
</listitem> </listitem>
<listitem> <listitem>
<para> <para>
<option>cgroup-full:mixed</option> (or <option>cgroup</option> (without specifier):
<option>cgroup-full</option>): defaults to <option>cgroup:rw</option> if the
container retains the CAP_SYS_ADMIN capability,
<option>cgroup:mixed</option> otherwise.
</para>
</listitem>
<listitem>
<para>
<option>cgroup-full:mixed</option>:
mount a tmpfs to <filename>/sys/fs/cgroup</filename>, mount a tmpfs to <filename>/sys/fs/cgroup</filename>,
create directories for all hierarchies to which create directories for all hierarchies to which
the container is added, bind-mount the hierarchies the container is added, bind-mount the hierarchies
@ -810,6 +816,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
do so anyway.) do so anyway.)
</para> </para>
</listitem> </listitem>
<listitem>
<para>
<option>cgroup-full</option> (without specifier):
defaults to <option>cgroup-full:rw</option> if the
container retains the CAP_SYS_ADMIN capability,
<option>cgroup-full:mixed</option> otherwise.
</para>
</listitem>
</itemizedlist> </itemizedlist>
<para> <para>
Note that if automatic mounting of the cgroup filesystem Note that if automatic mounting of the cgroup filesystem

View File

@ -1344,6 +1344,15 @@ static bool cgroupfs_mount_cgroup(void *hdata, const char *root, int type)
return false; return false;
base_info = cgfs_d->info; base_info = cgfs_d->info;
/* If we get passed the _NOSPEC types, we default to _MIXED, since we don't
* have access to the lxc_conf object at this point. It really should be up
* to the caller to fix this, but this doesn't really hurt.
*/
if (type == LXC_AUTO_CGROUP_FULL_NOSPEC)
type = LXC_AUTO_CGROUP_FULL_MIXED;
else if (type == LXC_AUTO_CGROUP_NOSPEC)
type = LXC_AUTO_CGROUP_MIXED;
if (type < LXC_AUTO_CGROUP_RO || type > LXC_AUTO_CGROUP_FULL_MIXED) { if (type < LXC_AUTO_CGROUP_RO || type > LXC_AUTO_CGROUP_FULL_MIXED) {
ERROR("could not mount cgroups into container: invalid type specified internally"); ERROR("could not mount cgroups into container: invalid type specified internally");
errno = EINVAL; errno = EINVAL;

View File

@ -115,6 +115,12 @@ lxc_log_define(lxc_conf, lxc);
#define LO_FLAGS_AUTOCLEAR 4 #define LO_FLAGS_AUTOCLEAR 4
#endif #endif
/* needed for cgroup automount checks, regardless of whether we
* have included linux/capability.h or not */
#ifndef CAP_SYS_ADMIN
#define CAP_SYS_ADMIN 21
#endif
/* Define pivot_root() if missing from the C library */ /* Define pivot_root() if missing from the C library */
#ifndef HAVE_PIVOT_ROOT #ifndef HAVE_PIVOT_ROOT
static int pivot_root(const char * new_root, const char * put_old) static int pivot_root(const char * new_root, const char * put_old)
@ -164,6 +170,9 @@ struct caps_opt {
int value; int value;
}; };
/* Declare this here, since we don't want to reshuffle the whole file. */
static int in_caplist(int cap, struct lxc_list *caps);
static int instanciate_veth(struct lxc_handler *, struct lxc_netdev *); static int instanciate_veth(struct lxc_handler *, struct lxc_netdev *);
static int instanciate_macvlan(struct lxc_handler *, struct lxc_netdev *); static int instanciate_macvlan(struct lxc_handler *, struct lxc_netdev *);
static int instanciate_vlan(struct lxc_handler *, struct lxc_netdev *); static int instanciate_vlan(struct lxc_handler *, struct lxc_netdev *);
@ -743,8 +752,32 @@ static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_ha
} }
if (flags & LXC_AUTO_CGROUP_MASK) { if (flags & LXC_AUTO_CGROUP_MASK) {
if (!cgroup_mount(conf->rootfs.mount, handler, int cg_flags;
flags & LXC_AUTO_CGROUP_MASK)) {
cg_flags = flags & LXC_AUTO_CGROUP_MASK;
/* If the type of cgroup mount was not specified, it depends on the
* container's capabilities as to what makes sense: if we have
* CAP_SYS_ADMIN, the read-only part can be remounted read-write
* anyway, so we may as well default to read-write; then the admin
* will not be given a false sense of security. (And if they really
* want mixed r/o r/w, then they can explicitly specify :mixed.)
* OTOH, if the container lacks CAP_SYS_ADMIN, do only default to
* :mixed, because then the container can't remount it read-write. */
if (cg_flags == LXC_AUTO_CGROUP_NOSPEC || cg_flags == LXC_AUTO_CGROUP_FULL_NOSPEC) {
int has_sys_admin = 0;
if (!lxc_list_empty(&conf->keepcaps)) {
has_sys_admin = in_caplist(CAP_SYS_ADMIN, &conf->keepcaps);
} else {
has_sys_admin = !in_caplist(CAP_SYS_ADMIN, &conf->caps);
}
if (cg_flags == LXC_AUTO_CGROUP_NOSPEC) {
cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_RW : LXC_AUTO_CGROUP_MIXED;
} else {
cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_FULL_RW : LXC_AUTO_CGROUP_FULL_MIXED;
}
}
if (!cgroup_mount(conf->rootfs.mount, handler, cg_flags)) {
SYSERROR("error mounting /sys/fs/cgroup"); SYSERROR("error mounting /sys/fs/cgroup");
return -1; return -1;
} }
@ -2192,6 +2225,20 @@ static int parse_cap(const char *cap)
return capid; return capid;
} }
int in_caplist(int cap, struct lxc_list *caps)
{
struct lxc_list *iterator;
int capid;
lxc_list_for_each(iterator, caps) {
capid = parse_cap(iterator->elem);
if (capid == cap)
return 1;
}
return 0;
}
static int setup_caps(struct lxc_list *caps) static int setup_caps(struct lxc_list *caps)
{ {
struct lxc_list *iterator; struct lxc_list *iterator;

View File

@ -240,9 +240,16 @@ enum {
LXC_AUTO_CGROUP_FULL_RO = 0x040, /* /sys/fs/cgroup (full mount, read-only) */ LXC_AUTO_CGROUP_FULL_RO = 0x040, /* /sys/fs/cgroup (full mount, read-only) */
LXC_AUTO_CGROUP_FULL_RW = 0x050, /* /sys/fs/cgroup (full mount, read-write) */ LXC_AUTO_CGROUP_FULL_RW = 0x050, /* /sys/fs/cgroup (full mount, read-write) */
LXC_AUTO_CGROUP_FULL_MIXED = 0x060, /* /sys/fs/cgroup (full mount, parent r/o, own r/w) */ LXC_AUTO_CGROUP_FULL_MIXED = 0x060, /* /sys/fs/cgroup (full mount, parent r/o, own r/w) */
LXC_AUTO_CGROUP_MASK = 0x070, /* These are defined in such a way as to retain
* binary compatibility with earlier versions of
* this code. If the previous mask is applied,
* both of these will default back to the _MIXED
* variants, which is safe. */
LXC_AUTO_CGROUP_NOSPEC = 0x0B0, /* /sys/fs/cgroup (partial mount, r/w or mixed, depending on caps) */
LXC_AUTO_CGROUP_FULL_NOSPEC = 0x0E0, /* /sys/fs/cgroup (full mount, r/w or mixed, depending on caps) */
LXC_AUTO_CGROUP_MASK = 0x0F0,
LXC_AUTO_ALL_MASK = 0x07F, /* all known settings */ LXC_AUTO_ALL_MASK = 0x0FF, /* all known settings */
}; };
/* /*

View File

@ -1347,20 +1347,20 @@ static int config_mount_auto(const char *key, const char *value,
{ {
char *autos, *autoptr, *sptr, *token; char *autos, *autoptr, *sptr, *token;
static struct { const char *token; int mask; int flag; } allowed_auto_mounts[] = { static struct { const char *token; int mask; int flag; } allowed_auto_mounts[] = {
{ "proc", LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED }, { "proc", LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED },
{ "proc:mixed", LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED }, { "proc:mixed", LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED },
{ "proc:rw", LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW }, { "proc:rw", LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW },
{ "sys", LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO }, { "sys", LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO },
{ "sys:ro", LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO }, { "sys:ro", LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO },
{ "sys:rw", LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW }, { "sys:rw", LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW },
{ "cgroup", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_MIXED }, { "cgroup", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_NOSPEC },
{ "cgroup:mixed", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_MIXED }, { "cgroup:mixed", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_MIXED },
{ "cgroup:ro", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_RO }, { "cgroup:ro", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_RO },
{ "cgroup:rw", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_RW }, { "cgroup:rw", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_RW },
{ "cgroup-full", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_MIXED }, { "cgroup-full", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_NOSPEC },
{ "cgroup-full:mixed", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_MIXED }, { "cgroup-full:mixed", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_MIXED },
{ "cgroup-full:ro", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_RO }, { "cgroup-full:ro", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_RO },
{ "cgroup-full:rw", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_RW }, { "cgroup-full:rw", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_RW },
/* NB: For adding anything that ist just a single on/off, but has /* NB: For adding anything that ist just a single on/off, but has
* no options: keep mask and flag identical and just define the * no options: keep mask and flag identical and just define the
* enum value as an unused bit so far * enum value as an unused bit so far
@ -1954,6 +1954,7 @@ static int lxc_get_mount_entries(struct lxc_conf *c, char *retv, int inlen)
static int lxc_get_auto_mounts(struct lxc_conf *c, char *retv, int inlen) static int lxc_get_auto_mounts(struct lxc_conf *c, char *retv, int inlen)
{ {
int len, fulllen = 0; int len, fulllen = 0;
const char *sep = "";
if (!retv) if (!retv)
inlen = 0; inlen = 0;
@ -1964,24 +1965,27 @@ static int lxc_get_auto_mounts(struct lxc_conf *c, char *retv, int inlen)
return 0; return 0;
switch (c->auto_mounts & LXC_AUTO_PROC_MASK) { switch (c->auto_mounts & LXC_AUTO_PROC_MASK) {
case LXC_AUTO_PROC_MIXED: strprint(retv, inlen, "proc:mixed\n"); break; case LXC_AUTO_PROC_MIXED: strprint(retv, inlen, "%sproc:mixed", sep); sep = " "; break;
case LXC_AUTO_PROC_RW: strprint(retv, inlen, "proc:rw"); break; case LXC_AUTO_PROC_RW: strprint(retv, inlen, "%sproc:rw", sep); sep = " "; break;
default: break; default: break;
} }
switch (c->auto_mounts & LXC_AUTO_SYS_MASK) { switch (c->auto_mounts & LXC_AUTO_SYS_MASK) {
case LXC_AUTO_SYS_RO: strprint(retv, inlen, "sys:ro"); break; case LXC_AUTO_SYS_RO: strprint(retv, inlen, "%ssys:ro", sep); sep = " "; break;
case LXC_AUTO_SYS_RW: strprint(retv, inlen, "sys:rw"); break; case LXC_AUTO_SYS_RW: strprint(retv, inlen, "%ssys:rw", sep); sep = " "; break;
default: break; default: break;
} }
switch (c->auto_mounts & LXC_AUTO_CGROUP_MASK) { switch (c->auto_mounts & LXC_AUTO_CGROUP_MASK) {
case LXC_AUTO_CGROUP_MIXED: strprint(retv, inlen, "cgroup:mixed"); break; case LXC_AUTO_CGROUP_NOSPEC: strprint(retv, inlen, "%scgroup", sep); sep = " "; break;
case LXC_AUTO_CGROUP_RO: strprint(retv, inlen, "cgroup:ro"); break; case LXC_AUTO_CGROUP_MIXED: strprint(retv, inlen, "%scgroup:mixed", sep); sep = " "; break;
case LXC_AUTO_CGROUP_RW: strprint(retv, inlen, "cgroup:rw"); break; case LXC_AUTO_CGROUP_RO: strprint(retv, inlen, "%scgroup:ro", sep); sep = " "; break;
case LXC_AUTO_CGROUP_FULL_MIXED: strprint(retv, inlen, "cgroup-full:mixed"); break; case LXC_AUTO_CGROUP_RW: strprint(retv, inlen, "%scgroup:rw", sep); sep = " "; break;
case LXC_AUTO_CGROUP_FULL_RO: strprint(retv, inlen, "cgroup-full:ro"); break; case LXC_AUTO_CGROUP_FULL_NOSPEC: strprint(retv, inlen, "%scgroup-full", sep); sep = " "; break;
case LXC_AUTO_CGROUP_FULL_RW: strprint(retv, inlen, "cgroup-full:rw"); break; case LXC_AUTO_CGROUP_FULL_MIXED: strprint(retv, inlen, "%scgroup-full:mixed", sep); sep = " "; break;
case LXC_AUTO_CGROUP_FULL_RO: strprint(retv, inlen, "%scgroup-full:ro", sep); sep = " "; break;
case LXC_AUTO_CGROUP_FULL_RW: strprint(retv, inlen, "%scgroup-full:rw", sep); sep = " "; break;
default: break; default: break;
} }
return fulllen; return fulllen;
} }
@ -2227,22 +2231,24 @@ void write_config(FILE *fout, struct lxc_conf *c)
if (c->auto_mounts & LXC_AUTO_ALL_MASK) { if (c->auto_mounts & LXC_AUTO_ALL_MASK) {
fprintf(fout, "lxc.mount.auto ="); fprintf(fout, "lxc.mount.auto =");
switch (c->auto_mounts & LXC_AUTO_PROC_MASK) { switch (c->auto_mounts & LXC_AUTO_PROC_MASK) {
case LXC_AUTO_PROC_MIXED: fprintf(fout, " proc:mixed"); break; case LXC_AUTO_PROC_MIXED: fprintf(fout, " proc:mixed"); break;
case LXC_AUTO_PROC_RW: fprintf(fout, " proc:rw"); break; case LXC_AUTO_PROC_RW: fprintf(fout, " proc:rw"); break;
default: break; default: break;
} }
switch (c->auto_mounts & LXC_AUTO_SYS_MASK) { switch (c->auto_mounts & LXC_AUTO_SYS_MASK) {
case LXC_AUTO_SYS_RO: fprintf(fout, " sys:ro"); break; case LXC_AUTO_SYS_RO: fprintf(fout, " sys:ro"); break;
case LXC_AUTO_SYS_RW: fprintf(fout, " sys:rw"); break; case LXC_AUTO_SYS_RW: fprintf(fout, " sys:rw"); break;
default: break; default: break;
} }
switch (c->auto_mounts & LXC_AUTO_CGROUP_MASK) { switch (c->auto_mounts & LXC_AUTO_CGROUP_MASK) {
case LXC_AUTO_CGROUP_MIXED: fprintf(fout, " cgroup:mixed"); break; case LXC_AUTO_CGROUP_NOSPEC: fprintf(fout, " cgroup"); break;
case LXC_AUTO_CGROUP_RO: fprintf(fout, " cgroup:ro"); break; case LXC_AUTO_CGROUP_MIXED: fprintf(fout, " cgroup:mixed"); break;
case LXC_AUTO_CGROUP_RW: fprintf(fout, " cgroup:rw"); break; case LXC_AUTO_CGROUP_RO: fprintf(fout, " cgroup:ro"); break;
case LXC_AUTO_CGROUP_FULL_MIXED: fprintf(fout, " cgroup-full:mixed"); break; case LXC_AUTO_CGROUP_RW: fprintf(fout, " cgroup:rw"); break;
case LXC_AUTO_CGROUP_FULL_RO: fprintf(fout, " cgroup-full:ro"); break; case LXC_AUTO_CGROUP_FULL_NOSPEC: fprintf(fout, " cgroup-full"); break;
case LXC_AUTO_CGROUP_FULL_RW: fprintf(fout, " cgroup-full:rw"); break; case LXC_AUTO_CGROUP_FULL_MIXED: fprintf(fout, " cgroup-full:mixed"); break;
case LXC_AUTO_CGROUP_FULL_RO: fprintf(fout, " cgroup-full:ro"); break;
case LXC_AUTO_CGROUP_FULL_RW: fprintf(fout, " cgroup-full:rw"); break;
default: break; default: break;
} }
fprintf(fout, "\n"); fprintf(fout, "\n");