Merge pull request #752 from hallyn/2016-01-04/cgroup.use

2016 01 04/cgroup.use
This commit is contained in:
Serge Hallyn 2016-01-08 13:05:42 -08:00
commit c7ec3de825
7 changed files with 203 additions and 29 deletions

View File

@ -1593,6 +1593,22 @@ mknod errno 0
</listitem> </listitem>
</varlistentry> </varlistentry>
</variablelist> </variablelist>
<variablelist>
<varlistentry>
<term>
<option>LXC_CGNS_AWARE</option>
</term>
<listitem>
<para>
If unset, then this version of lxc is not aware of cgroup
namespaces. If set, it will be set to 1, and lxc is aware
of cgroup namespaces. Note this does not guarantee that
cgroup namespaces are enabled in the kernel. This is used
by the lxcfs mount hook.
</para>
</listitem>
</varlistentry>
</variablelist>
</refsect2> </refsect2>
<refsect2> <refsect2>
<title>Logging</title> <title>Logging</title>

View File

@ -957,6 +957,13 @@ int lxc_attach(const char* name, const char* lxcpath, lxc_attach_exec_t exec_fun
WARN("could not change directory to '%s'", new_cwd); WARN("could not change directory to '%s'", new_cwd);
free(cwd); free(cwd);
if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP && cgns_supported()) {
if (unshare(CLONE_NEWCGROUP) != 0) {
SYSERROR("cgroupns unshare: permission denied");
rexit(-1);
}
}
/* now create the real child process */ /* now create the real child process */
{ {
struct attach_clone_payload payload = { struct attach_clone_payload payload = {

View File

@ -210,11 +210,6 @@ static void check_supports_multiple_controllers(pid_t pid)
cgm_supports_multiple_controllers = false; cgm_supports_multiple_controllers = false;
cgm_all_controllers_same = false; cgm_all_controllers_same = false;
if (api_version < CGM_SUPPORTS_MULT_CONTROLLERS) {
cgm_supports_multiple_controllers = false;
return;
}
cgm_supports_multiple_controllers = true; cgm_supports_multiple_controllers = true;
if (pid == -1) if (pid == -1)
@ -544,17 +539,13 @@ static void *cgm_init(const char *name)
{ {
struct cgm_data *d; struct cgm_data *d;
d = malloc(sizeof(*d));
if (!d)
return NULL;
if (!cgm_dbus_connect()) { if (!cgm_dbus_connect()) {
ERROR("Error connecting to cgroup manager"); ERROR("Error connecting to cgroup manager");
return NULL; goto err1;
}
check_supports_multiple_controllers(-1);
d = malloc(sizeof(*d));
if (!d) {
cgm_dbus_disconnect();
return NULL;
} }
memset(d, 0, sizeof(*d)); memset(d, 0, sizeof(*d));
@ -1132,6 +1123,9 @@ static void cull_user_controllers(void)
} }
} }
/*
* return true if inword is in the comma-delimited list cgroup_use
*/
static bool in_comma_list(const char *inword, const char *cgroup_use) static bool in_comma_list(const char *inword, const char *cgroup_use)
{ {
char *e; char *e;
@ -1148,6 +1142,23 @@ static bool in_comma_list(const char *inword, const char *cgroup_use)
return false; return false;
} }
/*
* inlist is a comma-delimited list of cgroups; so is checklist. Return
* true if any member of inlist is in checklist.
*/
static bool any_in_comma_list(const char *inlist, const char *checklist)
{
char *tmp = alloca(strlen(inlist) + 1), *tok, *saveptr = NULL;
strcpy(tmp, inlist);
for (tok = strtok_r(tmp, ",", &saveptr); tok; tok = strtok_r(NULL, ",", &saveptr)) {
if (in_comma_list(tok, checklist))
return true;
}
return false;
}
static bool in_subsystem_list(const char *c) static bool in_subsystem_list(const char *c)
{ {
int i; int i;
@ -1202,6 +1213,132 @@ static bool verify_and_prune(const char *cgroup_use)
return true; return true;
} }
static void drop_subsystem(int which)
{
int i;
if (which < 0 || which >= nr_subsystems) {
ERROR("code error: dropping invalid subsystem index\n");
exit(1);
}
free(subsystems[which]);
/* note - we have nr_subsystems+1 entries, last one a NULL */
for (i = which; i < nr_subsystems; i++)
subsystems[i] = subsystems[i+1];
nr_subsystems -= 1;
}
/*
* Check whether we can create the cgroups we would want
*/
static bool subsys_is_writeable(const char *controller, const char *probe)
{
int32_t existed;
bool ret = true;
if ( cgmanager_create_sync(NULL, cgroup_manager, controller,
probe, &existed) != 0) {
NihError *nerr;
nerr = nih_error_get();
ERROR("call to cgmanager_create_sync failed: %s", nerr->message);
nih_free(nerr);
ERROR("Failed to create %s:%s", controller, probe);
ret = false;
}
return ret;
}
/*
* Return true if this is a subsystem which we cannot do
* without
*/
static bool is_crucial_subsys(const char *s)
{
if (strcmp(s, "systemd") == 0)
return true;
if (strcmp(s, "name=systemd") == 0)
return true;
if (strcmp(s, "freezer") == 0)
return true;
return false;
}
static char *get_last_controller_in_list(char *list)
{
char *p;
while ((p = strchr(list, ',')) != NULL)
list = p + 1;
return list;
}
/*
* Make sure that all the controllers are writeable.
* If any are not, then
* - if they are listed in lxc.cgroup.use, refuse to start
* - else if they are crucial subsystems, refuse to start
* - else warn and do not use them
*/
static bool verify_final_subsystems(const char *cgroup_use)
{
int i = 0;
bool dropped_any = false;
bool ret = false;
const char *cgroup_pattern;
char tmpnam[50], *probe;
if (!cgm_dbus_connect()) {
ERROR("Error connecting to cgroup manager");
return false;
}
cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern");
i = snprintf(tmpnam, 50, "lxcprobe-%d", getpid());
if (i < 0 || i >= 50) {
ERROR("Attack - format string modified?");
return false;
}
probe = lxc_string_replace("%n", tmpnam, cgroup_pattern);
if (!probe)
goto out;
while (i < nr_subsystems) {
char *p = get_last_controller_in_list(subsystems[i]);
if (!subsys_is_writeable(p, probe)) {
if (is_crucial_subsys(p)) {
ERROR("Cannot write to crucial subsystem %s\n",
subsystems[i]);
goto out;
}
if (cgroup_use && any_in_comma_list(subsystems[i], cgroup_use)) {
ERROR("Cannot write to subsystem %s which is requested in lxc.cgroup.use\n",
subsystems[i]);
goto out;
}
WARN("Cannot write to subsystem %s, continuing with out it\n",
subsystems[i]);
dropped_any = true;
drop_subsystem(i);
} else {
cgm_remove_cgroup(subsystems[i], probe);
i++;
}
}
if (dropped_any)
cgm_all_controllers_same = false;
ret = true;
out:
free(probe);
cgm_dbus_disconnect();
return ret;
}
static bool collect_subsytems(void) static bool collect_subsytems(void)
{ {
char *line = NULL; char *line = NULL;
@ -1285,7 +1422,7 @@ collected:
/* make sure that cgroup.use can be and is honored */ /* make sure that cgroup.use can be and is honored */
const char *cgroup_use = lxc_global_config_value("lxc.cgroup.use"); const char *cgroup_use = lxc_global_config_value("lxc.cgroup.use");
if (!cgroup_use && errno != 0) if (!cgroup_use && errno != 0)
goto out_good; goto final_verify;
if (cgroup_use) { if (cgroup_use) {
if (!verify_and_prune(cgroup_use)) { if (!verify_and_prune(cgroup_use)) {
free_subsystems(); free_subsystems();
@ -1295,8 +1432,8 @@ collected:
cgm_all_controllers_same = false; cgm_all_controllers_same = false;
} }
out_good: final_verify:
return true; return verify_final_subsystems(cgroup_use);
out_free: out_free:
free(line); free(line);
@ -1313,25 +1450,22 @@ out_free:
*/ */
struct cgroup_ops *cgm_ops_init(void) struct cgroup_ops *cgm_ops_init(void)
{ {
check_supports_multiple_controllers(-1);
if (!collect_subsytems()) if (!collect_subsytems())
return NULL; return NULL;
if (!cgm_dbus_connect())
goto err1;
// root; try to escape to root cgroup if (api_version < CGM_SUPPORTS_MULT_CONTROLLERS)
if (geteuid() == 0 && !cgm_escape()) cgm_supports_multiple_controllers = false;
goto err2;
cgm_dbus_disconnect();
return &cgmanager_ops; // if root, try to escape to root cgroup
if (geteuid() == 0 && !cgm_escape()) {
err2:
cgm_dbus_disconnect();
err1:
free_subsystems(); free_subsystems();
return NULL; return NULL;
} }
return &cgmanager_ops;
}
/* unfreeze is called by the command api after killing a container. */ /* unfreeze is called by the command api after killing a container. */
static bool cgm_unfreeze(void *hdata) static bool cgm_unfreeze(void *hdata)
{ {

View File

@ -34,6 +34,9 @@
#ifndef CLONE_NEWNS #ifndef CLONE_NEWNS
# define CLONE_NEWNS 0x00020000 # define CLONE_NEWNS 0x00020000
#endif #endif
#ifndef CLONE_NEWCGROUP
# define CLONE_NEWCGROUP 0x02000000
#endif
#ifndef CLONE_NEWUTS #ifndef CLONE_NEWUTS
# define CLONE_NEWUTS 0x04000000 # define CLONE_NEWUTS 0x04000000
#endif #endif

View File

@ -451,6 +451,9 @@ struct lxc_handler *lxc_init(const char *name, struct lxc_conf *conf, const char
if (conf->console.log_path && setenv("LXC_CONSOLE_LOGPATH", conf->console.log_path, 1)) { if (conf->console.log_path && setenv("LXC_CONSOLE_LOGPATH", conf->console.log_path, 1)) {
SYSERROR("failed to set environment variable for console log"); SYSERROR("failed to set environment variable for console log");
} }
if (setenv("LXC_CGNS_AWARE", "1", 1)) {
SYSERROR("failed to set LXC_CGNS_AWARE environment variable");
}
/* End of environment variable setup for hooks */ /* End of environment variable setup for hooks */
if (run_lxc_hooks(name, "pre-start", conf, handler->lxcpath, NULL)) { if (run_lxc_hooks(name, "pre-start", conf, handler->lxcpath, NULL)) {
@ -842,6 +845,11 @@ static int do_start(void *data)
if (handler->backgrounded && null_stdfds() < 0) if (handler->backgrounded && null_stdfds() < 0)
goto out_warn_father; goto out_warn_father;
if (cgns_supported() && unshare(CLONE_NEWCGROUP) != 0) {
SYSERROR("Failed to unshare cgroup namespace");
goto out_warn_father;
}
/* after this call, we are in error because this /* after this call, we are in error because this
* ops should not return as it execs */ * ops should not return as it execs */
handler->ops->start(handler, handler->data); handler->ops->start(handler, handler->data);

View File

@ -1185,6 +1185,11 @@ bool file_exists(const char *f)
return stat(f, &statbuf) == 0; return stat(f, &statbuf) == 0;
} }
bool cgns_supported(void)
{
return file_exists("/proc/self/ns/cgroup");
}
/* historically lxc-init has been under /usr/lib/lxc and under /* historically lxc-init has been under /usr/lib/lxc and under
* /usr/lib/$ARCH/lxc. It now lives as $prefix/sbin/init.lxc. * /usr/lib/$ARCH/lxc. It now lives as $prefix/sbin/init.lxc.
*/ */

View File

@ -273,6 +273,7 @@ int detect_shared_rootfs(void);
int detect_ramfs_rootfs(void); int detect_ramfs_rootfs(void);
char *on_path(char *cmd, const char *rootfs); char *on_path(char *cmd, const char *rootfs);
bool file_exists(const char *f); bool file_exists(const char *f);
bool cgns_supported(void);
char *choose_init(const char *rootfs); char *choose_init(const char *rootfs);
int print_to_file(const char *file, const char *content); int print_to_file(const char *file, const char *content);
bool switch_to_ns(pid_t pid, const char *ns); bool switch_to_ns(pid_t pid, const char *ns);