mirror of
https://git.proxmox.com/git/mirror_lxc
synced 2025-07-14 13:38:33 +00:00
cgroups: rework to handle nested containers with multiple and partial mounts
Currently, if you create a container and use the mountcgruop hook, you get the /lxc/c1/c1.real cgroup mounted to /. If you then try to start containers inside that container, lxc can get confused. This patch addresses that, by accepting that the cgroup as found in /proc/self/cgroup can be partially hidden by bind mounts. In this patch: Add optional 'lxc.cgroup.use' to /etc/lxc/lxc.conf to specify which mounted cgroup filesystems lxc should use. So far only the cgroup creation respects this. Keep separate cgroup information for each cgroup mountpoint. So if the caller is in devices cgroup /a but cpuset cgroup /b that should now be ok. Change how we decide whether to ignore failure to set devices cgroup settings. Actually look to see if our current cgroup already has the settings. If not, add them. Finally, the real reason for this patch: in a nested container, /proc/self/cgroup says nothing about where under /sys/fs/cgroup you might find yourself. Handle this by searching for our pid in tasks files, and keep that info in the cgroup handler. Also remove all strdupa from cgroup.c (not android-friendly). Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
This commit is contained in:
parent
070a4b8e68
commit
b98f7d6ed1
1063
src/lxc/cgroup.c
1063
src/lxc/cgroup.c
File diff suppressed because it is too large
Load Diff
@ -24,15 +24,34 @@
|
||||
#define _cgroup_h
|
||||
#include <stdbool.h>
|
||||
|
||||
/*
|
||||
* cgroup_desc: describe a container's cgroup membership
|
||||
*/
|
||||
struct cgroup_desc {
|
||||
char *mntpt; /* where this is mounted */
|
||||
char *subsystems; /* comma-separated list of subsystems, or NULL */
|
||||
char *curcgroup; /* task's current cgroup, full pathanme */
|
||||
char *realcgroup; /* the cgroup as known in /proc/self/cgroup */
|
||||
struct cgroup_desc *next;
|
||||
};
|
||||
|
||||
struct lxc_handler;
|
||||
extern int lxc_cgroup_destroy(const char *cgpath);
|
||||
extern void lxc_cgroup_destroy_desc(struct cgroup_desc *cgroups);
|
||||
extern char *lxc_cgroup_path_get(const char *subsystem, const char *name,
|
||||
const char *lxcpath);
|
||||
extern int lxc_cgroup_nrtasks(const char *cgpath);
|
||||
extern char *lxc_cgroup_path_create(const char *lxcgroup, const char *name);
|
||||
extern int lxc_cgroup_enter(const char *cgpath, pid_t pid);
|
||||
extern int lxc_cgroup_nrtasks(struct lxc_handler *handler);
|
||||
struct cgroup_desc *lxc_cgroup_path_create(const char *name);
|
||||
extern int lxc_cgroup_enter(struct cgroup_desc *cgroups, pid_t pid);
|
||||
extern int lxc_cgroup_attach(pid_t pid, const char *name, const char *lxcpath);
|
||||
extern char *cgroup_path_get(const char *subsystem, const char *cgpath);
|
||||
extern bool is_in_subcgroup(int pid, const char *subsystem, const char *cgpath);
|
||||
extern int lxc_curcgroup(char *cgroup, int inlen);
|
||||
extern bool get_subsys_mount(char *dest, const char *subsystem);
|
||||
extern bool is_in_subcgroup(int pid, const char *subsystem, struct cgroup_desc *d);
|
||||
/*
|
||||
* Called by commands.c by a container's monitor to find out the
|
||||
* container's cgroup path in a specific subsystem
|
||||
*/
|
||||
extern char *cgroup_get_subsys_path(struct lxc_handler *handler, const char *subsys);
|
||||
struct lxc_list;
|
||||
extern int setup_cgroup(struct lxc_handler *h, struct lxc_list *cgroups);
|
||||
extern int setup_cgroup_devices(struct lxc_handler *h, struct lxc_list *cgroups);
|
||||
#endif
|
||||
|
@ -341,6 +341,7 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req,
|
||||
return lxc_cmd_rsp_send(fd, &rsp);
|
||||
}
|
||||
|
||||
extern char *cgroup_get_subsys_path(struct lxc_handler *handler, const char *subsys);
|
||||
/*
|
||||
* lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
|
||||
* particular subsystem. This is the cgroup path relative to the root
|
||||
@ -348,15 +349,21 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req,
|
||||
*
|
||||
* @name : name of container to connect to
|
||||
* @lxcpath : the lxcpath in which the container is running
|
||||
* @subsystem : the subsystem being asked about
|
||||
*
|
||||
* Returns the path on success, NULL on failure. The caller must free() the
|
||||
* returned path.
|
||||
*/
|
||||
char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath)
|
||||
char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
|
||||
const char *subsystem)
|
||||
{
|
||||
int ret, stopped = 0;
|
||||
struct lxc_cmd_rr cmd = {
|
||||
.req = { .cmd = LXC_CMD_GET_CGROUP },
|
||||
.req = {
|
||||
.cmd = LXC_CMD_GET_CGROUP,
|
||||
.datalen = strlen(subsystem)+1,
|
||||
.data = subsystem,
|
||||
},
|
||||
};
|
||||
|
||||
ret = lxc_cmd(name, &cmd, &stopped, lxcpath);
|
||||
@ -381,10 +388,17 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath)
|
||||
static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
|
||||
struct lxc_handler *handler)
|
||||
{
|
||||
struct lxc_cmd_rsp rsp = {
|
||||
.datalen = strlen(handler->cgroup) + 1,
|
||||
.data = handler->cgroup,
|
||||
};
|
||||
struct lxc_cmd_rsp rsp;
|
||||
char *path;
|
||||
|
||||
if (req->datalen < 1)
|
||||
return -1;
|
||||
|
||||
path = cgroup_get_subsys_path(handler, req->data);
|
||||
if (!path)
|
||||
return -1;
|
||||
rsp.datalen = strlen(path) + 1,
|
||||
rsp.data = path;
|
||||
|
||||
return lxc_cmd_rsp_send(fd, &rsp);
|
||||
}
|
||||
@ -535,7 +549,13 @@ static int lxc_cmd_stop_callback(int fd, struct lxc_cmd_req *req,
|
||||
memset(&rsp, 0, sizeof(rsp));
|
||||
rsp.ret = kill(handler->pid, stopsignal);
|
||||
if (!rsp.ret) {
|
||||
ret = lxc_unfreeze_bypath(handler->cgroup);
|
||||
char *path = cgroup_get_subsys_path(handler, "freezer");
|
||||
if (!path) {
|
||||
ERROR("container %s:%s is not in a freezer cgroup",
|
||||
handler->lxcpath, handler->name);
|
||||
return 0;
|
||||
}
|
||||
ret = lxc_unfreeze_bypath(path);
|
||||
if (!ret)
|
||||
return 0;
|
||||
|
||||
|
@ -69,7 +69,12 @@ struct lxc_cmd_console_rsp_data {
|
||||
extern int lxc_cmd_console_winch(const char *name, const char *lxcpath);
|
||||
extern int lxc_cmd_console(const char *name, int *ttynum, int *fd,
|
||||
const char *lxcpath);
|
||||
extern char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath);
|
||||
/*
|
||||
* Get the 'real' cgroup path (as seen in /proc/self/cgroup) for a container
|
||||
* for a particular subsystem
|
||||
*/
|
||||
extern char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
|
||||
const char *subsystem);
|
||||
extern int lxc_cmd_get_clone_flags(const char *name, const char *lxcpath);
|
||||
extern char *lxc_cmd_get_config_item(const char *name, const char *item, const char *lxcpath);
|
||||
extern pid_t lxc_cmd_get_init_pid(const char *name, const char *lxcpath);
|
||||
|
@ -1487,47 +1487,6 @@ static int setup_kmsg(const struct lxc_rootfs *rootfs,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int _setup_cgroup(const char *cgpath, struct lxc_list *cgroups,
|
||||
int devices)
|
||||
{
|
||||
struct lxc_list *iterator;
|
||||
struct lxc_cgroup *cg;
|
||||
int ret = -1;
|
||||
|
||||
if (lxc_list_empty(cgroups))
|
||||
return 0;
|
||||
|
||||
lxc_list_for_each(iterator, cgroups) {
|
||||
cg = iterator->elem;
|
||||
|
||||
if (devices == !strncmp("devices", cg->subsystem, 7)) {
|
||||
if (lxc_cgroup_set_bypath(cgpath, cg->subsystem,
|
||||
cg->value)) {
|
||||
ERROR("Error setting %s to %s for %s\n",
|
||||
cg->subsystem, cg->value, cgpath);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
INFO("cgroup has been setup");
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int setup_cgroup_devices(const char *cgpath, struct lxc_list *cgroups)
|
||||
{
|
||||
return _setup_cgroup(cgpath, cgroups, 1);
|
||||
}
|
||||
|
||||
int setup_cgroup(const char *cgpath, struct lxc_list *cgroups)
|
||||
{
|
||||
return _setup_cgroup(cgpath, cgroups, 0);
|
||||
}
|
||||
|
||||
static void parse_mntopt(char *opt, unsigned long *flags, char **data)
|
||||
{
|
||||
struct mount_opt *mo;
|
||||
|
@ -301,8 +301,6 @@ struct lxc_conf {
|
||||
int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
|
||||
const char *lxcpath, char *argv[]);
|
||||
|
||||
extern int setup_cgroup(const char *cgpath, struct lxc_list *cgroups);
|
||||
extern int setup_cgroup_devices(const char *cgpath, struct lxc_list *cgroups);
|
||||
extern int detect_shared_rootfs(void);
|
||||
|
||||
/*
|
||||
|
@ -145,14 +145,17 @@ int lxc_unfreeze(const char *name, const char *lxcpath)
|
||||
|
||||
int lxc_unfreeze_bypath(const char *cgrelpath)
|
||||
{
|
||||
char *cgabspath;
|
||||
int ret;
|
||||
char cgabspath[MAXPATHLEN];
|
||||
int len, ret;
|
||||
|
||||
cgabspath = cgroup_path_get("freezer", cgrelpath);
|
||||
if (!cgabspath)
|
||||
if (!get_subsys_mount(cgabspath, "freezer"))
|
||||
return -1;
|
||||
len = strlen(cgabspath);
|
||||
ret = snprintf(cgabspath+len, MAXPATHLEN-len, "/%s", cgrelpath);
|
||||
if (ret < 0 || ret >= MAXPATHLEN-len) {
|
||||
ERROR("freezer path name too long");
|
||||
return -1;
|
||||
}
|
||||
|
||||
ret = do_unfreeze(cgabspath, 0, NULL, NULL);
|
||||
free(cgabspath);
|
||||
return ret;
|
||||
return do_unfreeze(cgabspath, 0, NULL, NULL);
|
||||
}
|
||||
|
@ -136,15 +136,17 @@ extern int lxc_unfreeze_bypath(const char *cgpath);
|
||||
*/
|
||||
extern lxc_state_t lxc_state(const char *name, const char *lxcpath);
|
||||
|
||||
struct lxc_handler;
|
||||
/*
|
||||
* Set a specified value for a specified subsystem. The specified
|
||||
* subsystem must be fully specified, eg. "cpu.shares"
|
||||
* @cgpath : the cgroup path of the container
|
||||
* @d : the cgroup descriptor for the container
|
||||
* @filename : the cgroup attribute filename
|
||||
* @value : the value to be set
|
||||
* Returns 0 on success, < 0 otherwise
|
||||
*/
|
||||
extern int lxc_cgroup_set_bypath(const char *cgpath, const char *filename, const char *value);
|
||||
extern int lxc_cgroup_set_value(struct lxc_handler *hander, const char *filename,
|
||||
const char *value);
|
||||
|
||||
/*
|
||||
* Set a specified value for a specified subsystem. The specified
|
||||
|
@ -283,7 +283,7 @@ static int utmp_get_ntasks(struct lxc_handler *handler)
|
||||
{
|
||||
int ntasks;
|
||||
|
||||
ntasks = lxc_cgroup_nrtasks(handler->cgroup);
|
||||
ntasks = lxc_cgroup_nrtasks(handler);
|
||||
|
||||
if (ntasks < 0) {
|
||||
ERROR("failed to get the number of tasks");
|
||||
|
@ -374,8 +374,7 @@ static void lxc_fini(const char *name, struct lxc_handler *handler)
|
||||
handler->conf->maincmd_fd = -1;
|
||||
free(handler->name);
|
||||
if (handler->cgroup) {
|
||||
lxc_cgroup_destroy(handler->cgroup);
|
||||
free(handler->cgroup);
|
||||
lxc_cgroup_destroy_desc(handler->cgroup);
|
||||
handler->cgroup = NULL;
|
||||
}
|
||||
free(handler);
|
||||
@ -594,12 +593,11 @@ int save_phys_nics(struct lxc_conf *conf)
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern bool is_in_subcgroup(int pid, const char *subsystem, const char *cgpath);
|
||||
extern bool is_in_subcgroup(int pid, const char *subsystem, struct cgroup_desc *d);
|
||||
int lxc_spawn(struct lxc_handler *handler)
|
||||
{
|
||||
int failed_before_rename = 0, len;
|
||||
int failed_before_rename = 0;
|
||||
const char *name = handler->name;
|
||||
char *curcgroup = NULL;
|
||||
|
||||
if (lxc_sync_init(handler))
|
||||
return -1;
|
||||
@ -661,18 +659,10 @@ int lxc_spawn(struct lxc_handler *handler)
|
||||
if (lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE))
|
||||
failed_before_rename = 1;
|
||||
|
||||
if ((len = lxc_curcgroup(NULL, 0)) > 1) {
|
||||
curcgroup = alloca(len);
|
||||
if (lxc_curcgroup(curcgroup, len) <= 1)
|
||||
curcgroup = NULL;
|
||||
FILE *f = fopen("/tmp/a", "a");
|
||||
fprintf(f, "curcgroup is %s\n", curcgroup);
|
||||
fclose(f);
|
||||
}
|
||||
if ((handler->cgroup = lxc_cgroup_path_create(curcgroup, name)) == NULL)
|
||||
if ((handler->cgroup = lxc_cgroup_path_create(name)) == NULL)
|
||||
goto out_delete_net;
|
||||
|
||||
if (setup_cgroup(handler->cgroup, &handler->conf->cgroup)) {
|
||||
if (setup_cgroup(handler, &handler->conf->cgroup)) {
|
||||
ERROR("failed to setup the cgroups for '%s'", name);
|
||||
goto out_delete_net;
|
||||
}
|
||||
@ -707,16 +697,10 @@ int lxc_spawn(struct lxc_handler *handler)
|
||||
if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CONFIGURE))
|
||||
goto out_delete_net;
|
||||
|
||||
if (setup_cgroup_devices(handler->cgroup, &handler->conf->cgroup)) {
|
||||
/* an unfortunate special case: startup hooks may have already
|
||||
* setup the cgroup. If a setting fails, and this is the devices
|
||||
* subsystem, *and* we are already in a subset of the cgroup,
|
||||
* then ignore the failure */
|
||||
if (!is_in_subcgroup(handler->pid, "devices", handler->cgroup)) {
|
||||
if (setup_cgroup_devices(handler, &handler->conf->cgroup)) {
|
||||
ERROR("failed to setup the devices cgroup for '%s'", name);
|
||||
goto out_delete_net;
|
||||
}
|
||||
}
|
||||
|
||||
/* Tell the child to complete its initialization and wait for
|
||||
* it to exec or return an error. (the child will never
|
||||
|
@ -37,6 +37,8 @@ struct lxc_operations {
|
||||
int (*post_start)(struct lxc_handler *, void *);
|
||||
};
|
||||
|
||||
struct cgroup_desc;
|
||||
|
||||
struct lxc_handler {
|
||||
pid_t pid;
|
||||
char *name;
|
||||
@ -53,7 +55,7 @@ struct lxc_handler {
|
||||
#endif
|
||||
int pinfd;
|
||||
const char *lxcpath;
|
||||
char *cgroup;
|
||||
struct cgroup_desc *cgroup;
|
||||
};
|
||||
|
||||
extern struct lxc_handler *lxc_init(const char *name, struct lxc_conf *, const char *);
|
||||
|
@ -36,102 +36,6 @@
|
||||
fprintf(stderr, "%s:%d " fmt "\n", __FILE__, __LINE__, ##__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* test_same_name: Create the same named container within a group
|
||||
*
|
||||
* @group : name of the container group or NULL for default "lxc"
|
||||
* @name : name of the container
|
||||
*
|
||||
* Note, lxc will append a -<nr> to duplicate container names. This is what
|
||||
* is tested here.
|
||||
*
|
||||
* Returns 0 on success, < 0 on failure
|
||||
*/
|
||||
static int test_same_name(const char *group, const char *name)
|
||||
{
|
||||
int ret = -1;
|
||||
char *cgrelpath1;
|
||||
char *cgrelpath2;
|
||||
char relpath[PATH_MAX+1];
|
||||
|
||||
sprintf(relpath, "%s/%s-1", group ? group : "lxc", name);
|
||||
|
||||
cgrelpath1 = lxc_cgroup_path_create(group, name);
|
||||
if (!cgrelpath1) {
|
||||
TSTERR("lxc_cgroup_path_create returned NULL");
|
||||
goto err1;
|
||||
}
|
||||
cgrelpath2 = lxc_cgroup_path_create(group, name);
|
||||
if (!cgrelpath2) {
|
||||
TSTERR("lxc_cgroup_path_create returned NULL");
|
||||
goto err2;
|
||||
}
|
||||
if (strcmp(cgrelpath2, relpath)) {
|
||||
TSTERR("unexpected name for duplicate %s", cgrelpath2);
|
||||
goto err3;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
err3:
|
||||
lxc_cgroup_destroy(cgrelpath2);
|
||||
free(cgrelpath2);
|
||||
err2:
|
||||
lxc_cgroup_destroy(cgrelpath1);
|
||||
free(cgrelpath1);
|
||||
err1:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* test_basic: Test cgroup functions that don't require a running container
|
||||
*
|
||||
* @group : name of the container group or NULL for default "lxc"
|
||||
* @name : name of the container
|
||||
*
|
||||
* Returns 0 on success, < 0 on failure
|
||||
*/
|
||||
static int test_basic(const char *group, const char *name)
|
||||
{
|
||||
int ret = -1;
|
||||
char *cgabspath;
|
||||
char *cgrelpath;
|
||||
char relpath[PATH_MAX+1];
|
||||
|
||||
sprintf(relpath, "%s/%s", group ? group : "lxc", name);
|
||||
|
||||
cgrelpath = lxc_cgroup_path_create(group, name);
|
||||
if (!cgrelpath) {
|
||||
TSTERR("lxc_cgroup_path_create returned NULL");
|
||||
goto err1;
|
||||
}
|
||||
if (!strstr(cgrelpath, relpath)) {
|
||||
TSTERR("lxc_cgroup_path_create %s not in %s", relpath, cgrelpath);
|
||||
goto err2;
|
||||
}
|
||||
|
||||
cgabspath = cgroup_path_get("freezer", cgrelpath);
|
||||
if (!cgabspath) {
|
||||
TSTERR("cgroup_path_get returned NULL");
|
||||
goto err2;
|
||||
}
|
||||
if (!strstr(cgabspath, relpath)) {
|
||||
TSTERR("cgroup_path_get %s not in %s", relpath, cgabspath);
|
||||
goto err3;
|
||||
}
|
||||
|
||||
ret = lxc_cgroup_destroy(cgrelpath);
|
||||
if (ret < 0) {
|
||||
TSTERR("lxc_cgroup_destroy failed");
|
||||
goto err3;
|
||||
}
|
||||
err3:
|
||||
free(cgabspath);
|
||||
err2:
|
||||
free(cgrelpath);
|
||||
err1:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* test_running_container: test cgroup functions against a running container
|
||||
*
|
||||
@ -141,7 +45,7 @@ err1:
|
||||
static int test_running_container(const char *lxcpath,
|
||||
const char *group, const char *name)
|
||||
{
|
||||
int nrtasks, ret = -1;
|
||||
int ret = -1;
|
||||
struct lxc_container *c = NULL;
|
||||
char *cgrelpath;
|
||||
char *cgabspath;
|
||||
@ -160,7 +64,7 @@ static int test_running_container(const char *lxcpath,
|
||||
goto err2;
|
||||
}
|
||||
|
||||
cgrelpath = lxc_cmd_get_cgroup_path(c->name, c->config_path);
|
||||
cgrelpath = lxc_cmd_get_cgroup_path(c->name, c->config_path, "freezer");
|
||||
if (!cgrelpath) {
|
||||
TSTERR("lxc_cmd_get_cgroup_path returned NULL");
|
||||
goto err2;
|
||||
@ -179,7 +83,7 @@ static int test_running_container(const char *lxcpath,
|
||||
}
|
||||
strcpy(value_save, value);
|
||||
|
||||
ret = lxc_cgroup_set_bypath(cgrelpath, "memory.swappiness", "100");
|
||||
ret = lxc_cgroup_set(c->name, "memory.swappiness", "100", c->config_path);
|
||||
if (ret < 0) {
|
||||
TSTERR("lxc_cgroup_set_bypath failed");
|
||||
goto err3;
|
||||
@ -213,12 +117,6 @@ static int test_running_container(const char *lxcpath,
|
||||
goto err3;
|
||||
}
|
||||
|
||||
nrtasks = lxc_cgroup_nrtasks(cgrelpath);
|
||||
if (nrtasks < 1) {
|
||||
TSTERR("failed getting nrtasks");
|
||||
goto err3;
|
||||
}
|
||||
|
||||
cgabspath = lxc_cgroup_path_get("freezer", c->name, c->config_path);
|
||||
if (!cgabspath) {
|
||||
TSTERR("lxc_cgroup_path_get returned NULL");
|
||||
@ -312,18 +210,6 @@ int main()
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if (test_basic(NULL, MYNAME) < 0)
|
||||
goto out;
|
||||
if (test_basic("ab", MYNAME) < 0)
|
||||
goto out;
|
||||
printf("Basic cgroup path tests...Passed\n");
|
||||
|
||||
if (test_same_name(NULL, MYNAME) < 0)
|
||||
goto out;
|
||||
if (test_same_name("ab", MYNAME) < 0)
|
||||
goto out;
|
||||
printf("Same name tests...Passed\n");
|
||||
|
||||
#if TEST_ALREADY_RUNNING_CT
|
||||
|
||||
/*
|
||||
|
Loading…
Reference in New Issue
Block a user