commands: add lxc_cmd_state_server()

A LXC container's lifecycle is regulated by the states STARTING, RUNNING,
STOPPING, STOPPED, ABORTING. These states are tracked in the LXC handler and
can be checked via approriate functions in the command socket callback system.
(The freezer stages are not part of a container's lifecycle since they are not
recorded in the LXC handler. This might change in the future but given that the
freezer controller will be removed from future cgroup implementations it is
unlikely.) So far, LXC was using an external helper to track the states of a
container (lxc-monitord). This solution was error prone. For example, the
external state server would hang in various scenarios that seemed to be caused
by either very subtle internal races or irritation of the external state server
by signals.

LXC will switch from an external state monitor (lxc-monitord) which serves as a
state server for state clients to a native implementation using the indiviual
container's command socket. This solution was discussed and outlined by Stéphane
Graber and Christian Brauner during a LX{C,D} sprint.

The LXC handler will gain an additional field to track state clients. In order
for a state client to receive state notifications from the command server he
will need to register himself via the lxc_cmd_state_server() function in the
state client list. The state client list will be served by lxc_set_state()
during the container's lifecycle. lxc_set_state() will also take care of
removing any clients from the state list in the LXC handler once the requested
state has been reached and sent to the client.
In order to prevent races between adding and serving new state clients the state
client list and the state field in the LXC handler will be protected by a lock.

This commit effectively deprecates lxc-monitord. Instead of serving states to
state clients via the lxc-monitord fifo and socket we will now send the state
of the container via the container's command socket.
lxc-monitord is still useable and will - for the sake of the lxc-monitor
command - be kept around so that non-API state clients can still monitor the
container during it's lifecycle.

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
This commit is contained in:
Christian Brauner 2017-06-11 01:29:45 +02:00
parent 3f903c04fe
commit dbc9832d21
No known key found for this signature in database
GPG Key ID: 7B3C391EFEA93624
7 changed files with 264 additions and 97 deletions

View File

@ -43,7 +43,9 @@
#include "commands.h"
#include "console.h"
#include "confile.h"
#include "lxclock.h"
#include "mainloop.h"
#include "monitor.h"
#include "af_unix.h"
#include "config.h"
@ -142,6 +144,7 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd)
[LXC_CMD_GET_CONFIG_ITEM] = "get_config_item",
[LXC_CMD_GET_NAME] = "get_name",
[LXC_CMD_GET_LXCPATH] = "get_lxcpath",
[LXC_CMD_STATE_SERVER] = "state_server",
};
if (cmd >= LXC_CMD_MAX)
@ -283,7 +286,11 @@ static int lxc_cmd(const char *name, struct lxc_cmd_rr *cmd, int *stopped,
char path[sizeof(((struct sockaddr_un *)0)->sun_path)] = { 0 };
char *offset = &path[1];
size_t len;
int stay_connected = cmd->req.cmd == LXC_CMD_CONSOLE;
bool stay_connected = false;
if (cmd->req.cmd == LXC_CMD_CONSOLE ||
cmd->req.cmd == LXC_CMD_STATE_SERVER)
stay_connected = true;
*stopped = 0;
@ -581,7 +588,7 @@ out:
*
* Returns the state on success, < 0 on failure
*/
lxc_state_t lxc_cmd_get_state(const char *name, const char *lxcpath)
int lxc_cmd_get_state(const char *name, const char *lxcpath)
{
int ret, stopped;
struct lxc_cmd_rr cmd = {
@ -861,6 +868,145 @@ static int lxc_cmd_get_lxcpath_callback(int fd, struct lxc_cmd_req *req,
return lxc_cmd_rsp_send(fd, &rsp);
}
/*
* lxc_cmd_state_server: register a client fd in the handler list
*
* @name : name of container to connect to
* @lxcpath : the lxcpath in which the container is running
*
* Returns the lxcpath on success, NULL on failure.
*/
int lxc_cmd_state_server(const char *name, const char *lxcpath,
lxc_state_t states[MAX_STATE])
{
int stopped;
ssize_t ret;
int state = -1;
struct lxc_msg msg = {0};
struct lxc_cmd_rr cmd = {
.req = {
.cmd = LXC_CMD_STATE_SERVER,
.data = states,
.datalen = (sizeof(lxc_state_t) * MAX_STATE)
},
};
/* Lock the whole lxc_cmd_state_server_callback() call to ensure that
* lxc_set_state() doesn't cause us to miss a state.
*/
process_lock();
/* Check if already in requested state. */
state = lxc_getstate(name, lxcpath);
if (state < 0) {
process_unlock();
TRACE("failed to retrieve state of container: %s",
strerror(errno));
return -1;
} else if (states[state]) {
process_unlock();
TRACE("container is %s state", lxc_state2str(state));
return state;
}
if ((state == STARTING) && !states[RUNNING] && !states[STOPPING] && !states[STOPPED]) {
process_unlock();
TRACE("container is in %s state and caller requested to be "
"informed about a previous state",
lxc_state2str(state));
return state;
} else if ((state == RUNNING) && !states[STOPPING] && !states[STOPPED]) {
process_unlock();
TRACE("container is in %s state and caller requested to be "
"informed about a previous state",
lxc_state2str(state));
return state;
} else if ((state == STOPPING) && !states[STOPPED]) {
process_unlock();
TRACE("container is in %s state and caller requested to be "
"informed about a previous state",
lxc_state2str(state));
return state;
} else if ((state == STOPPED) || (state == ABORTING)) {
process_unlock();
TRACE("container is in %s state and caller requested to be "
"informed about a previous state",
lxc_state2str(state));
return state;
}
ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
process_unlock();
if (ret < 0) {
ERROR("failed to execute command: %s", strerror(errno));
return -1;
}
/* We should now be guaranteed to get an answer from the state sending
* function.
*/
if (cmd.rsp.ret < 0) {
ERROR("failed to receive socket fd");
return -1;
}
again:
ret = recv(cmd.rsp.ret, &msg, sizeof(msg), 0);
if (ret < 0) {
if (errno == EINTR)
goto again;
ERROR("failed to receive message: %s", strerror(errno));
return -1;
}
if (ret == 0) {
ERROR("length of message was 0");
return -1;
}
TRACE("received state %s from state client %d",
lxc_state2str(msg.value), cmd.rsp.ret);
return msg.value;
}
static int lxc_cmd_state_server_callback(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler)
{
struct lxc_cmd_rsp rsp = {0};
struct state_client *newclient;
struct lxc_list *tmplist;
if (req->datalen < 0) {
TRACE("requested datalen was < 0");
return -1;
}
if (!req->data) {
TRACE("no states requested");
return -1;
}
newclient = malloc(sizeof(*newclient));
if (!newclient)
return -1;
/* copy requested states */
memcpy(newclient->states, req->data, sizeof(newclient->states));
newclient->clientfd = fd;
tmplist = malloc(sizeof(*tmplist));
if (!tmplist) {
free(newclient);
return -1;
}
lxc_list_add_elem(tmplist, newclient);
lxc_list_add_tail(&handler->state_clients, tmplist);
TRACE("added state client %d to state client list", fd);
return lxc_cmd_rsp_send(fd, &rsp);
}
static int lxc_cmd_process(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler)
{
@ -877,6 +1023,7 @@ static int lxc_cmd_process(int fd, struct lxc_cmd_req *req,
[LXC_CMD_GET_CONFIG_ITEM] = lxc_cmd_get_config_item_callback,
[LXC_CMD_GET_NAME] = lxc_cmd_get_name_callback,
[LXC_CMD_GET_LXCPATH] = lxc_cmd_get_lxcpath_callback,
[LXC_CMD_STATE_SERVER] = lxc_cmd_state_server_callback,
};
if (req->cmd >= LXC_CMD_MAX) {

View File

@ -43,6 +43,7 @@ typedef enum {
LXC_CMD_GET_CONFIG_ITEM,
LXC_CMD_GET_NAME,
LXC_CMD_GET_LXCPATH,
LXC_CMD_STATE_SERVER,
LXC_CMD_MAX,
} lxc_cmd_t;
@ -82,8 +83,10 @@ extern char *lxc_cmd_get_config_item(const char *name, const char *item, const c
extern char *lxc_cmd_get_name(const char *hashed_sock);
extern char *lxc_cmd_get_lxcpath(const char *hashed_sock);
extern pid_t lxc_cmd_get_init_pid(const char *name, const char *lxcpath);
extern lxc_state_t lxc_cmd_get_state(const char *name, const char *lxcpath);
extern int lxc_cmd_get_state(const char *name, const char *lxcpath);
extern int lxc_cmd_stop(const char *name, const char *lxcpath);
extern int lxc_cmd_state_server(const char *name, const char *lxcpath,
lxc_state_t states[MAX_STATE]);
struct lxc_epoll_descr;
struct lxc_handler;

View File

@ -758,10 +758,10 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a
return false;
conf = c->lxc_conf;
daemonize = c->daemonize;
container_mem_unlock(c);
/* initialize handler */
handler = lxc_init_handler(c->name, conf, c->config_path);
container_mem_unlock(c);
if (!handler)
return false;
@ -800,7 +800,7 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a
* the PID file, child will do the free and unlink.
*/
c->pidfile = NULL;
close(c->lxc_conf->maincmd_fd);
close(handler->conf->maincmd_fd);
return wait_on_daemonized_start(c, pid);
}

View File

@ -341,10 +341,64 @@ static int signal_handler(int fd, uint32_t events, void *data,
return 1;
}
int lxc_set_state(const char *name, struct lxc_handler *handler, lxc_state_t state)
int lxc_set_state(const char *name, struct lxc_handler *handler,
lxc_state_t state)
{
ssize_t ret;
struct lxc_list *cur, *next;
struct state_client *client;
struct lxc_msg msg = {.type = lxc_msg_state, .value = state};
process_lock();
/* Only set state under process lock held so that we don't cause
* lxc_cmd_state_server() to miss a state.
*/
handler->state = state;
TRACE("set container state to %s", lxc_state2str(state));
if (lxc_list_empty(&handler->state_clients)) {
TRACE("no state clients registered");
process_unlock();
return 0;
}
strncpy(msg.name, name, sizeof(msg.name));
msg.name[sizeof(msg.name) - 1] = 0;
lxc_list_for_each_safe(cur, &handler->state_clients, next) {
client = cur->elem;
if (!client->states[state]) {
TRACE("state %s not registered for state client %d",
lxc_state2str(state), client->clientfd);
continue;
}
TRACE("sending state %s to state client %d",
lxc_state2str(state), client->clientfd);
again:
ret = send(client->clientfd, &msg, sizeof(msg), 0);
if (ret < 0) {
if (errno == EINTR)
goto again;
ERROR("failed to send message to client");
}
/* kick client from list */
close(client->clientfd);
lxc_list_del(cur);
free(cur->elem);
free(cur);
}
process_unlock();
/* This function will try to connect to the legacy lxc-monitord state
* server and only exists for backwards compatibility.
*/
lxc_monitor_send_state(name, state, handler->lxcpath);
return 0;
}
@ -415,6 +469,7 @@ struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf,
handler->conf = conf;
handler->lxcpath = lxcpath;
handler->pinfd = -1;
lxc_list_init(&handler->state_clients);
for (i = 0; i < LXC_NS_MAX; i++)
handler->nsfd[i] = -1;
@ -455,12 +510,14 @@ int lxc_init(const char *name, struct lxc_handler *handler)
ERROR("Failed loading seccomp policy.");
goto out_close_maincmd_fd;
}
TRACE("read seccomp policy");
/* Begin by setting the state to STARTING. */
if (lxc_set_state(name, handler, STARTING)) {
ERROR("Failed to set state for container \"%s\" to \"%s\".", name, lxc_state2str(STARTING));
goto out_close_maincmd_fd;
}
TRACE("set container state to \"STARTING\"");
/* Start of environment variable setup for hooks. */
if (name && setenv("LXC_NAME", name, 1))
@ -485,10 +542,13 @@ int lxc_init(const char *name, struct lxc_handler *handler)
SYSERROR("Failed to set environment variable LXC_CGNS_AWARE=1.");
/* End of environment variable setup for hooks. */
TRACE("set environment variables");
if (run_lxc_hooks(name, "pre-start", conf, handler->lxcpath, NULL)) {
ERROR("Failed to run lxc.hook.pre-start for container \"%s\".", name);
goto out_aborting;
}
TRACE("ran pre-start hooks");
/* The signal fd has to be created before forking otherwise if the child
* process exits before we setup the signal fd, the event will be lost
@ -499,19 +559,22 @@ int lxc_init(const char *name, struct lxc_handler *handler)
ERROR("Failed to setup SIGCHLD fd handler.");
goto out_delete_tty;
}
TRACE("set up signal fd");
/* Do this after setting up signals since it might unblock SIGWINCH. */
if (lxc_console_create(conf)) {
ERROR("Failed to create console for container \"%s\".", name);
goto out_restore_sigmask;
}
TRACE("created console");
if (lxc_ttys_shift_ids(conf) < 0) {
ERROR("Failed to shift tty into container.");
goto out_restore_sigmask;
}
TRACE("shifted tty ids");
INFO("Container \"%s\" is initialized.", name);
INFO("container \"%s\" is initialized", name);
return 0;
out_restore_sigmask:
@ -529,8 +592,9 @@ out_close_maincmd_fd:
void lxc_fini(const char *name, struct lxc_handler *handler)
{
int i, rc;
struct lxc_list *cur, *next;
pid_t self = getpid();
char *namespaces[LXC_NS_MAX+1];
char *namespaces[LXC_NS_MAX + 1];
size_t namespace_count = 0;
/* The STOPPING state is there for future cleanup code which can take
@ -592,8 +656,23 @@ void lxc_fini(const char *name, struct lxc_handler *handler)
lxc_console_delete(&handler->conf->console);
lxc_delete_tty(&handler->conf->tty_info);
/* close the command socket */
close(handler->conf->maincmd_fd);
handler->conf->maincmd_fd = -1;
/* The command socket is now closed, no more state clients can register
* themselves from now on. So free the list of state clients.
*/
lxc_list_for_each_safe(cur, &handler->state_clients, next) {
struct state_client *client = cur->elem;
/* close state client socket */
close(client->clientfd);
lxc_list_del(cur);
free(cur->elem);
free(cur);
}
free(handler->name);
if (handler->ttysock[0] != -1) {
close(handler->ttysock[0]);

View File

@ -32,16 +32,6 @@
#include "state.h"
#include "namespace.h"
struct lxc_handler;
struct lxc_operations {
int (*start)(struct lxc_handler *, void *);
int (*post_start)(struct lxc_handler *, void *);
};
struct cgroup_desc;
struct lxc_handler {
pid_t pid;
char *name;
@ -60,8 +50,18 @@ struct lxc_handler {
bool backgrounded; // indicates whether should we close std{in,out,err} on start
int nsfd[LXC_NS_MAX];
int netnsfd;
struct lxc_list state_clients;
};
struct lxc_operations {
int (*start)(struct lxc_handler *, void *);
int (*post_start)(struct lxc_handler *, void *);
};
struct state_client {
int clientfd;
lxc_state_t states[MAX_STATE];
};
extern int lxc_poll(const char *name, struct lxc_handler *handler);
extern int lxc_set_state(const char *name, struct lxc_handler *handler, lxc_state_t state);

View File

@ -79,7 +79,7 @@ lxc_state_t lxc_getstate(const char *name, const char *lxcpath)
return state;
}
static int fillwaitedstates(const char *strstates, int *states)
static int fillwaitedstates(const char *strstates, lxc_state_t *states)
{
char *token, *saveptr = NULL;
char *strstates_dup = strdup(strstates);
@ -108,90 +108,21 @@ static int fillwaitedstates(const char *strstates, int *states)
extern int lxc_wait(const char *lxcname, const char *states, int timeout,
const char *lxcpath)
{
struct lxc_msg msg;
int state;
int s[MAX_STATE] = {0}, fd = -1, ret = -1;
lxc_state_t s[MAX_STATE] = {0};
if (fillwaitedstates(states, s))
return -1;
/*
* if container present,
* then check if already in requested state
*/
state = lxc_getstate(lxcname, lxcpath);
state = lxc_cmd_state_server(lxcname, lxcpath, s);
if (state < 0) {
goto out_close;
} else if ((state >= 0) && (s[state])) {
ret = 0;
goto out_close;
SYSERROR("failed to receive state from monitor");
return -1;
}
if (lxc_monitord_spawn(lxcpath))
TRACE("retrieved state of container %s", lxc_state2str(state));
if (!s[state])
return -1;
fd = lxc_monitor_open(lxcpath);
if (fd < 0)
return -1;
for (;;) {
int64_t elapsed_time, curtime = 0;
struct timespec tspec;
int stop = 0;
int retval;
if (timeout != -1) {
retval = clock_gettime(CLOCK_REALTIME, &tspec);
if (retval)
goto out_close;
curtime = tspec.tv_sec;
}
if (lxc_monitor_read_timeout(fd, &msg, timeout) < 0) {
/* try again if select interrupted by signal */
if (errno != EINTR)
goto out_close;
}
if (timeout != -1) {
retval = clock_gettime(CLOCK_REALTIME, &tspec);
if (retval)
goto out_close;
elapsed_time = tspec.tv_sec - curtime;
if (timeout - elapsed_time <= 0)
stop = 1;
timeout -= elapsed_time;
}
if (strcmp(lxcname, msg.name)) {
if (stop) {
ret = -2;
goto out_close;
}
continue;
}
switch (msg.type) {
case lxc_msg_state:
if (msg.value < 0 || msg.value >= MAX_STATE)
goto out_close;
if (s[msg.value]) {
ret = 0;
goto out_close;
}
break;
default:
if (stop) {
ret = -2;
goto out_close;
}
/* just ignore garbage */
break;
}
}
out_close:
if (fd >= 0)
lxc_monitor_close(fd);
return ret;
return 0;
}

View File

@ -24,8 +24,15 @@
#define __LXC_STATE_H
typedef enum {
STOPPED, STARTING, RUNNING, STOPPING,
ABORTING, FREEZING, FROZEN, THAWED, MAX_STATE,
STOPPED,
STARTING,
RUNNING,
STOPPING,
ABORTING,
FREEZING,
FROZEN,
THAWED,
MAX_STATE,
} lxc_state_t;
extern int lxc_rmstate(const char *name);