Enable network namespace sharing in lxc-start

Right now lxc-start always does one of two things: it creates
a new namespace or inherits it from the parent environment.
This patch adds a third option: share a namespace with another
container (actually: a process).

In some situations this is handy. For example by sharing a network
namespace it is possible to migrate services between containers
without (or with little) downtime.

This patch creates an infrastructure for inheriting any type
of namespace, but only the network namespace is supported for now.
This commit is contained in:
Marek Majkowski 2013-11-06 15:16:33 -08:00
parent 9beb9ce0ef
commit 9f30a19089
6 changed files with 164 additions and 22 deletions

View File

@ -53,6 +53,9 @@ struct lxc_arguments {
/* set to 0 to accept only 1 lxcpath, -1 for unlimited */
int lxcpath_additional;
/* for lxc-start */
const char *share_net;
/* for lxc-checkpoint/restart */
const char *statefile;
int statefd;

View File

@ -2399,6 +2399,9 @@ struct lxc_conf *lxc_conf_init(void)
new->lsm_se_context = NULL;
new->lsm_umount_proc = 0;
for (i = 0; i < LXC_NS_MAX; i++)
new->inherit_ns_fd[i] = -1;
return new;
}

View File

@ -318,6 +318,8 @@ struct lxc_conf {
// store the config file specified values here.
char *logfile; // the logfile as specifed in config
int loglevel; // loglevel as specifed in config (if any)
int inherit_ns_fd[LXC_NS_MAX];
};
int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,

View File

@ -51,6 +51,8 @@
#include "confile.h"
#include "arguments.h"
#define OPT_SHARE_NET OPT_USAGE+1
lxc_log_define(lxc_start_ui, lxc_start);
static struct lxc_list defines;
@ -101,6 +103,7 @@ static int my_parser(struct lxc_arguments* args, int c, char* arg)
case 'C': args->close_all_fds = 1; break;
case 's': return lxc_config_define_add(&defines, arg);
case 'p': args->pidfile = arg; break;
case OPT_SHARE_NET: args->share_net = arg; break;
}
return 0;
}
@ -113,6 +116,7 @@ static const struct option my_longopts[] = {
{"console-log", required_argument, 0, 'L'},
{"close-all-fds", no_argument, 0, 'C'},
{"pidfile", required_argument, 0, 'p'},
{"share-net", required_argument, 0, OPT_SHARE_NET},
LXC_COMMON_OPTIONS
};
@ -133,7 +137,9 @@ Options :\n\
-C, --close-all-fds If any fds are inherited, close them\n\
If not specified, exit with failure instead\n\
Note: --daemon implies --close-all-fds\n\
-s, --define KEY=VAL Assign VAL to configuration variable KEY\n",
-s, --define KEY=VAL Assign VAL to configuration variable KEY\n\
--share-net=PID Share a network namespace with another container\n\
",
.options = my_longopts,
.parser = my_parser,
.checker = NULL,
@ -249,6 +255,27 @@ int main(int argc, char *argv[])
}
}
if (my_args.share_net != NULL) {
char *eptr;
int fd;
int pid = strtol(my_args.share_net, &eptr, 10);
if (*eptr != '\0') {
SYSERROR("'%s' is not a valid pid number", my_args.share_net);
goto out;
}
char path[MAXPATHLEN];
int ret = snprintf(path, MAXPATHLEN, "/proc/%d/ns/net", pid);
if (ret < 0 || ret >= MAXPATHLEN)
goto out;
fd = open(path, O_RDONLY);
if (fd < 0) {
SYSERROR("failed to open %s", path);
goto out;
}
conf->inherit_ns_fd[LXC_NS_NET] = fd;
}
if (my_args.daemonize) {
c->want_daemonize(c);
}

View File

@ -75,6 +75,78 @@
lxc_log_define(lxc_start, lxc);
const struct ns_info ns_info[LXC_NS_MAX] = {
[LXC_NS_MNT] = {"mnt", CLONE_NEWNS},
[LXC_NS_PID] = {"pid", CLONE_NEWPID},
[LXC_NS_UTS] = {"uts", CLONE_NEWUTS},
[LXC_NS_IPC] = {"ipc", CLONE_NEWIPC},
[LXC_NS_USER] = {"user", CLONE_NEWUSER},
[LXC_NS_NET] = {"net", CLONE_NEWNET}
};
static void close_ns(int ns_fd[LXC_NS_MAX]) {
int i;
process_lock();
for (i = 0; i < LXC_NS_MAX; i++) {
if (ns_fd[i] > -1) {
close(ns_fd[i]);
ns_fd[i] = -1;
}
}
process_unlock();
}
static int preserve_ns(int ns_fd[LXC_NS_MAX], int clone_flags) {
int i, saved_errno;
char path[MAXPATHLEN];
if (access("/proc/self/ns", X_OK)) {
ERROR("Does this kernel version support 'attach'?");
return -1;
}
for (i = 0; i < LXC_NS_MAX; i++)
ns_fd[i] = -1;
for (i = 0; i < LXC_NS_MAX; i++) {
if ((clone_flags & ns_info[i].clone_flag) == 0)
continue;
snprintf(path, MAXPATHLEN, "/proc/self/ns/%s", ns_info[i].proc_name);
process_lock();
ns_fd[i] = open(path, O_RDONLY | O_CLOEXEC);
process_unlock();
if (ns_fd[i] < 0)
goto error;
}
return 0;
error:
saved_errno = errno;
close_ns(ns_fd);
errno = saved_errno;
SYSERROR("failed to open '%s'", path);
return -1;
}
static int attach_ns(const int ns_fd[LXC_NS_MAX]) {
int i;
for (i = 0; i < LXC_NS_MAX; i++) {
if (ns_fd[i] < 0)
continue;
if (setns(ns_fd[i], 0) != 0)
goto error;
}
return 0;
error:
SYSERROR("failed to set namespace '%s'", ns_info[i].proc_name);
return -1;
}
static int match_fd(int fd)
{
return (fd == 0 || fd == 1 || fd == 2);
@ -645,6 +717,12 @@ int lxc_spawn(struct lxc_handler *handler)
const char *name = handler->name;
struct cgroup_meta_data *cgroup_meta = NULL;
const char *cgroup_pattern = NULL;
int saved_ns_fd[LXC_NS_MAX];
int preserve_mask = 0, i;
for (i = 0; i < LXC_NS_MAX; i++)
if (handler->conf->inherit_ns_fd[i] > -1)
preserve_mask |= ns_info[i].clone_flag;
if (lxc_sync_init(handler))
return -1;
@ -654,34 +732,40 @@ int lxc_spawn(struct lxc_handler *handler)
INFO("Cloning a new user namespace");
handler->clone_flags |= CLONE_NEWUSER;
}
if (!lxc_list_empty(&handler->conf->network)) {
handler->clone_flags |= CLONE_NEWNET;
if (handler->conf->inherit_ns_fd[LXC_NS_NET] == -1) {
if (!lxc_list_empty(&handler->conf->network)) {
/* Find gateway addresses from the link device, which is
* no longer accessible inside the container. Do this
* before creating network interfaces, since goto
* out_delete_net does not work before lxc_clone. */
if (lxc_find_gateway_addresses(handler)) {
ERROR("failed to find gateway addresses");
lxc_sync_fini(handler);
return -1;
handler->clone_flags |= CLONE_NEWNET;
/* Find gateway addresses from the link device, which is
* no longer accessible inside the container. Do this
* before creating network interfaces, since goto
* out_delete_net does not work before lxc_clone. */
if (lxc_find_gateway_addresses(handler)) {
ERROR("failed to find gateway addresses");
lxc_sync_fini(handler);
return -1;
}
/* that should be done before the clone because we will
* fill the netdev index and use them in the child
*/
if (lxc_create_network(handler)) {
ERROR("failed to create the network IW WAS ERE");
lxc_sync_fini(handler);
return -1;
}
}
/* that should be done before the clone because we will
* fill the netdev index and use them in the child
*/
if (lxc_create_network(handler)) {
ERROR("failed to create the network");
lxc_sync_fini(handler);
return -1;
if (save_phys_nics(handler->conf)) {
ERROR("failed to save physical nic info");
goto out_abort;
}
} else {
INFO("Inheriting a net namespace");
}
if (save_phys_nics(handler->conf)) {
ERROR("failed to save physical nic info");
goto out_abort;
}
cgroup_meta = lxc_cgroup_load_meta();
if (!cgroup_meta) {
@ -716,6 +800,9 @@ int lxc_spawn(struct lxc_handler *handler)
if (handler->pinfd == -1)
INFO("failed to pin the container's rootfs");
preserve_ns(saved_ns_fd, preserve_mask);
attach_ns(handler->conf->inherit_ns_fd);
/* Create a process in a new set of namespaces */
handler->pid = lxc_clone(do_start, handler, handler->clone_flags);
if (handler->pid < 0) {
@ -723,6 +810,8 @@ int lxc_spawn(struct lxc_handler *handler)
goto out_delete_net;
}
attach_ns(saved_ns_fd);
lxc_sync_fini_child(handler);
if (lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE))

View File

@ -27,6 +27,7 @@
#include <lxc/state.h>
#include <sys/param.h>
#include "namespace.h"
struct lxc_conf;
@ -39,6 +40,23 @@ struct lxc_operations {
struct cgroup_desc;
enum {
LXC_NS_MNT,
LXC_NS_PID,
LXC_NS_UTS,
LXC_NS_IPC,
LXC_NS_USER,
LXC_NS_NET,
LXC_NS_MAX
};
struct ns_info {
const char *proc_name;
int clone_flag;
};
const struct ns_info ns_info[LXC_NS_MAX];
struct lxc_handler {
pid_t pid;
char *name;