From 6a22e862589c9d3e42d2d79fdf7e73a62b8145bd Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 25 Nov 2016 20:02:24 +0100 Subject: [PATCH 1/3] configure: check whether gettid() is declared Signed-off-by: Christian Brauner --- configure.ac | 2 +- src/lxc/log.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index cd424c450..2b284cdea 100644 --- a/configure.ac +++ b/configure.ac @@ -628,7 +628,7 @@ AC_CHECK_DECLS([PR_GET_NO_NEW_PRIVS], [], [], [#include ]) AC_CHECK_HEADERS([sys/signalfd.h pty.h ifaddrs.h sys/capability.h sys/personality.h utmpx.h sys/timerfd.h]) # Check for some syscalls functions -AC_CHECK_FUNCS([setns pivot_root sethostname unshare rand_r confstr faccessat]) +AC_CHECK_FUNCS([setns pivot_root sethostname unshare rand_r confstr faccessat gettid]) # Check for some functions AC_CHECK_LIB(pthread, main) diff --git a/src/lxc/log.c b/src/lxc/log.c index a08cc2637..7b89aada9 100644 --- a/src/lxc/log.c +++ b/src/lxc/log.c @@ -45,6 +45,20 @@ #define LXC_LOG_DATEFOMAT_SIZE 15 +#ifndef HAVE_GETTID +static inline pid_t gettid(void) +{ +#ifdef __NR_gettid + return (pid_t)syscall(SYS_gettid); +#else + errno = ENOSYS; + return -1; +#endif +} +#else +extern pid_t gettid(void); +#endif + int lxc_log_fd = -1; static int syslog_enable = 0; int lxc_quiet_specified; From 0dcdbf8a67534897275795edddd5550993ecc003 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 25 Nov 2016 17:28:18 +0100 Subject: [PATCH 2/3] log: fix race localtime_r() can lead to deadlocks because it calls __tzset() and __tzconvert() internally. The deadlock stems from an interaction between these functions and the functions in monitor.c and commands.{c,h}. The latter functions will write to the log independent of the container thread that is currently running. Since the monitor fork()ed it seems to duplicate the mutex states of the time functions mentioned above causing the deadlock. As a short termm fix, I suggest to simply disable receiving the time when monitor.c or command.{c,h} functions are called. This should be ok, since the [lxc monitor] will only emit a few messages and thread-safety is currently more important than beautiful logs. The rest of the log stays the same as it was before. Here is an example output from logs where I printed the pid and tid of the process that is currently writing to the log: lxc 20161125170200.619 INFO lxc_start: 18695-18695: - start.c:lxc_check_inherited:243 - Closed inherited fd: 23. lxc 20161125170200.640 DEBUG lxc_start: 18677-18677: - start.c:__lxc_start:1334 - Not dropping CAP_SYS_BOOT or watching utmp. lxc 20161125170200.640 INFO lxc_cgroup: 18677-18677: - cgroups/cgroup.c:cgroup_init:68 - cgroup driver cgroupfs-ng initing for lxc-test-concurrent-0 ----------> lxc 20150427012246.000 INFO lxc_monitor: 13017-18622: - monitor.c:lxc_monitor_sock_name:178 - using monitor sock name lxc/ad055575fe28ddd5//var/lib/lxc lxc 20161125170200.662 DEBUG lxc_cgfsng: 18677-18677: - cgroups/cgfsng.c:filter_and_set_cpus:478 - No isolated cpus detected. lxc 20161125170200.662 DEBUG lxc_cgfsng: 18677-18677: - cgroups/cgfsng.c:handle_cpuset_hierarchy:648 - "cgroup.clone_children" was already set to "1". Signed-off-by: Christian Brauner --- src/lxc/log.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/src/lxc/log.c b/src/lxc/log.c index 7b89aada9..c2400197a 100644 --- a/src/lxc/log.c +++ b/src/lxc/log.c @@ -155,7 +155,7 @@ static int log_append_logfile(const struct lxc_log_appender *appender, char buffer[LXC_LOG_BUFFER_SIZE]; struct tm newtime; int n; - int ms; + int ms = 0; int fd_to_use = -1; #ifndef NO_LXC_CONF @@ -169,11 +169,25 @@ static int log_append_logfile(const struct lxc_log_appender *appender, if (fd_to_use == -1) return 0; - if (!localtime_r(&event->timestamp.tv_sec, &newtime)) - return 0; - - strftime(date, sizeof(date), "%Y%m%d%H%M%S", &newtime); - ms = event->timestamp.tv_usec / 1000; + /* localtime_r() can lead to deadlocks because it calls __tzset() and + * __tzconvert() internally. The deadlock stems from an interaction + * between these functions and the functions in monitor.c and + * commands.{c,h}. The latter functions will write to the log + * independent of the container thread that is currently running. Since + * the monitor fork()ed it seems to duplicate the mutex states of the + * time functions mentioned above. + * As a short termm fix, I suggest to simply disable receiving the time + * when monitor.c or command.{c,h} functions are called. This should be + * ok, since the [lxc monitor] will only emit a few messages and + * thread-safety is currently more important than beautiful logs. The + * rest of the log stays the same as it was before. + */ + if (getpid() == gettid()) { + if (!localtime_r(&event->timestamp.tv_sec, &newtime)) + return 0; + strftime(date, sizeof(date), "%Y%m%d%H%M%S", &newtime); + ms = event->timestamp.tv_usec / 1000; + } n = snprintf(buffer, sizeof(buffer), "%15s%s%s %10s.%03d %-8s %s - %s:%s:%d - ", log_prefix, @@ -190,13 +204,9 @@ static int log_append_logfile(const struct lxc_log_appender *appender, return n; if (n < sizeof(buffer) - 1) - n += vsnprintf(buffer + n, sizeof(buffer) - n, event->fmt, - *event->vap); - else { - WARN("truncated next event from %d to %zd bytes", n, - sizeof(buffer)); + n += vsnprintf(buffer + n, sizeof(buffer) - n, event->fmt, *event->vap); + else n = sizeof(buffer) - 1; - } buffer[n] = '\n'; From 450b6d3d684000762dab8817825feb6da55b08f4 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 25 Nov 2016 20:23:00 +0100 Subject: [PATCH 3/3] log: use N/A if getpid() != gettid() when threaded Sample log output: lxc 20161125201943.819 INFO lxc_start - start.c:lxc_check_inherited:243 - Closed inherited fd: 54. --> lxc N/A INFO lxc_monitor - monitor.c:lxc_monitor_sock_name:178 - using monitor sock name lxc/ad055575fe28ddd5//var/lib/lxc lxc 20161125201943.958 DEBUG lxc_commands - commands.c:lxc_cmd_handler:893 - peer has disconnected --> lxc N/A DEBUG lxc_commands - commands.c:lxc_cmd_get_state:579 - 'lxc-test-concurrent-0' is in 'RUNNING' state lxc 20161125201943.960 DEBUG lxc_commands - commands.c:lxc_cmd_handler:893 - peer has disconnected lxc 20161125201944.009 INFO lxc_start - start.c:lxc_check_inherited:243 - Closed inherited fd: 3. Signed-off-by: Christian Brauner --- src/lxc/log.c | 62 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 25 deletions(-) diff --git a/src/lxc/log.c b/src/lxc/log.c index c2400197a..e9f2d7dc6 100644 --- a/src/lxc/log.c +++ b/src/lxc/log.c @@ -151,7 +151,7 @@ static int log_append_stderr(const struct lxc_log_appender *appender, static int log_append_logfile(const struct lxc_log_appender *appender, struct lxc_log_event *event) { - char date[LXC_LOG_DATEFOMAT_SIZE] = "20150427012246"; + char date[LXC_LOG_DATEFOMAT_SIZE] = "N/A"; char buffer[LXC_LOG_BUFFER_SIZE]; struct tm newtime; int n; @@ -169,41 +169,53 @@ static int log_append_logfile(const struct lxc_log_appender *appender, if (fd_to_use == -1) return 0; - /* localtime_r() can lead to deadlocks because it calls __tzset() and - * __tzconvert() internally. The deadlock stems from an interaction - * between these functions and the functions in monitor.c and - * commands.{c,h}. The latter functions will write to the log - * independent of the container thread that is currently running. Since - * the monitor fork()ed it seems to duplicate the mutex states of the - * time functions mentioned above. - * As a short termm fix, I suggest to simply disable receiving the time - * when monitor.c or command.{c,h} functions are called. This should be - * ok, since the [lxc monitor] will only emit a few messages and - * thread-safety is currently more important than beautiful logs. The - * rest of the log stays the same as it was before. + /* localtime_r() can lead to deadlocks when LXC is used multithreaded + * because it calls __tzset() and __tzconvert() internally. The deadlock + * stems from an interaction between these functions and the functions + * in monitor.c and commands.{c,h}. Some of the latter functions will + * write to the log independent of the container thread that is + * currently running. Since the monitor fork()ed it seems to duplicate + * the mutex states of the time functions mentioned above. + * As a short term fix, I suggest to simply disable receiving the time + * when these monitor.c or command.{c,h} functions are called. This + * should be ok, since the [lxc monitor] will only emit a few messages + * and thread-safety is currently more important than beautiful logs. + * The rest of the log stays the same as it was before. */ if (getpid() == gettid()) { if (!localtime_r(&event->timestamp.tv_sec, &newtime)) return 0; strftime(date, sizeof(date), "%Y%m%d%H%M%S", &newtime); ms = event->timestamp.tv_usec / 1000; + n = snprintf(buffer, sizeof(buffer), + "%15s%s%s %14s.%03d %-8s %s - %s:%s:%d - ", + log_prefix, + log_vmname ? " " : "", + log_vmname ? log_vmname : "", + date, + ms, + lxc_log_priority_to_string(event->priority), + event->category, + event->locinfo->file, event->locinfo->func, + event->locinfo->line); + } else { + n = snprintf(buffer, sizeof(buffer), + "%15s%s%s %-14s %3s %-8s %s - %s:%s:%d - ", + log_prefix, + log_vmname ? " " : "", + log_vmname ? log_vmname : "", + date, + "", + lxc_log_priority_to_string(event->priority), + event->category, + event->locinfo->file, event->locinfo->func, + event->locinfo->line); } - n = snprintf(buffer, sizeof(buffer), - "%15s%s%s %10s.%03d %-8s %s - %s:%s:%d - ", - log_prefix, - log_vmname ? " " : "", - log_vmname ? log_vmname : "", - date, - ms, - lxc_log_priority_to_string(event->priority), - event->category, - event->locinfo->file, event->locinfo->func, - event->locinfo->line); if (n < 0) return n; - if (n < sizeof(buffer) - 1) + if ((size_t)n < (sizeof(buffer) - 1)) n += vsnprintf(buffer + n, sizeof(buffer) - n, event->fmt, *event->vap); else n = sizeof(buffer) - 1;