make monitor/monitord more resilient to unexpected termination

Reported-by: Florian Klink <flokli@flokli.de>
Signed-off-by: Dwight Engen <dwight.engen@oracle.com>
Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
This commit is contained in:
Dwight Engen 2014-04-02 13:12:38 -04:00 committed by Serge Hallyn
parent 94a77f3fd8
commit 8bf1e61ea3
2 changed files with 25 additions and 7 deletions

View File

@ -75,6 +75,7 @@ static int quit;
static int lxc_monitord_fifo_create(struct lxc_monitor *mon) static int lxc_monitord_fifo_create(struct lxc_monitor *mon)
{ {
struct flock lk;
char fifo_path[PATH_MAX]; char fifo_path[PATH_MAX];
int ret; int ret;
@ -83,8 +84,8 @@ static int lxc_monitord_fifo_create(struct lxc_monitor *mon)
return ret; return ret;
ret = mknod(fifo_path, S_IFIFO|S_IRUSR|S_IWUSR, 0); ret = mknod(fifo_path, S_IFIFO|S_IRUSR|S_IWUSR, 0);
if (ret < 0) { if (ret < 0 && errno != EEXIST) {
INFO("monitor fifo %s exists, already running?", fifo_path); INFO("failed to mknod monitor fifo %s %s", fifo_path, strerror(errno));
return -1; return -1;
} }
@ -94,6 +95,17 @@ static int lxc_monitord_fifo_create(struct lxc_monitor *mon)
ERROR("failed to open monitor fifo"); ERROR("failed to open monitor fifo");
return -1; return -1;
} }
lk.l_type = F_WRLCK;
lk.l_whence = SEEK_SET;
lk.l_start = 0;
lk.l_len = 0;
if (fcntl(mon->fifofd, F_SETLK, &lk) != 0) {
/* another lxc-monitord is already running, don't start up */
DEBUG("lxc-monitord already running on lxcpath %s", mon->lxcpath);
close(mon->fifofd);
return -1;
}
return 0; return 0;
} }
@ -264,8 +276,8 @@ static void lxc_monitord_delete(struct lxc_monitor *mon)
lxc_monitord_sock_delete(mon); lxc_monitord_sock_delete(mon);
lxc_mainloop_del_handler(&mon->descr, mon->fifofd); lxc_mainloop_del_handler(&mon->descr, mon->fifofd);
close(mon->fifofd);
lxc_monitord_fifo_delete(mon); lxc_monitord_fifo_delete(mon);
close(mon->fifofd);
for (i = 0; i < mon->clientfds_cnt; i++) { for (i = 0; i < mon->clientfds_cnt; i++) {
lxc_mainloop_del_handler(&mon->descr, mon->clientfds[i]); lxc_mainloop_del_handler(&mon->descr, mon->clientfds[i]);
@ -401,7 +413,7 @@ int main(int argc, char *argv[])
goto out; goto out;
} }
NOTICE("monitoring lxcpath %s", mon.lxcpath); NOTICE("pid:%d monitoring lxcpath %s", getpid(), mon.lxcpath);
for(;;) { for(;;) {
ret = lxc_mainloop(&mon.descr, 1000 * 30); ret = lxc_mainloop(&mon.descr, 1000 * 30);
if (mon.clientfds_cnt <= 0) if (mon.clientfds_cnt <= 0)

View File

@ -95,14 +95,20 @@ static void lxc_monitor_fifo_send(struct lxc_msg *msg, const char *lxcpath)
if (ret < 0) if (ret < 0)
return; return;
fd = open(fifo_path, O_WRONLY); /* open the fifo nonblock in case the monitor is dead, we don't want
* the open to wait for a reader since it may never come.
*/
fd = open(fifo_path, O_WRONLY|O_NONBLOCK);
if (fd < 0) { if (fd < 0) {
/* it is normal for this open to fail when there is no monitor /* it is normal for this open to fail ENXIO when there is no
* running, so we don't log it * monitor running, so we don't log it
*/ */
return; return;
} }
if (fcntl(fd, F_SETFL, O_WRONLY) < 0)
return;
ret = write(fd, msg, sizeof(*msg)); ret = write(fd, msg, sizeof(*msg));
if (ret != sizeof(*msg)) { if (ret != sizeof(*msg)) {
close(fd); close(fd);