From f749d524f18a8b8ae29df6ccbdb05c949f409cff Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 18 Oct 2017 13:36:36 +0200 Subject: [PATCH 01/13] utils: move memfd_create() definition Signed-off-by: Christian Brauner --- src/lxc/conf.c | 57 ------------------------------------------------- src/lxc/utils.h | 57 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 57 deletions(-) diff --git a/src/lxc/conf.c b/src/lxc/conf.c index 88ed2b7a6..3b1e422a3 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -68,10 +68,6 @@ #include <../include/openpty.h> #endif -#ifdef HAVE_LINUX_MEMFD_H -#include -#endif - #include "af_unix.h" #include "caps.h" /* for lxc_caps_last_cap() */ #include "cgroup.h" @@ -181,59 +177,6 @@ static int sethostname(const char * name, size_t len) #define MS_LAZYTIME (1<<25) #endif -/* memfd_create() */ -#ifndef MFD_CLOEXEC -#define MFD_CLOEXEC 0x0001U -#endif - -#ifndef MFD_ALLOW_SEALING -#define MFD_ALLOW_SEALING 0x0002U -#endif - -#ifndef HAVE_MEMFD_CREATE -static int memfd_create(const char *name, unsigned int flags) { - #ifndef __NR_memfd_create - #if defined __i386__ - #define __NR_memfd_create 356 - #elif defined __x86_64__ - #define __NR_memfd_create 319 - #elif defined __arm__ - #define __NR_memfd_create 385 - #elif defined __aarch64__ - #define __NR_memfd_create 279 - #elif defined __s390__ - #define __NR_memfd_create 350 - #elif defined __powerpc__ - #define __NR_memfd_create 360 - #elif defined __sparc__ - #define __NR_memfd_create 348 - #elif defined __blackfin__ - #define __NR_memfd_create 390 - #elif defined __ia64__ - #define __NR_memfd_create 1340 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 - #define __NR_memfd_create 4354 - #endif - #if _MIPS_SIM == _MIPS_SIM_NABI32 - #define __NR_memfd_create 6318 - #endif - #if _MIPS_SIM == _MIPS_SIM_ABI64 - #define __NR_memfd_create 5314 - #endif - #endif - #endif - #ifdef __NR_memfd_create - return syscall(__NR_memfd_create, name, flags); - #else - errno = ENOSYS; - return -1; - #endif -} -#else -extern int memfd_create(const char *name, unsigned int flags); -#endif - char *lxchook_names[NUM_LXC_HOOKS] = {"pre-start", "pre-mount", "mount", "autodev", "start", "stop", "post-stop", "clone", "destroy", diff --git a/src/lxc/utils.h b/src/lxc/utils.h index 41c43827a..2005aa61e 100644 --- a/src/lxc/utils.h +++ b/src/lxc/utils.h @@ -39,6 +39,10 @@ #include #include +#ifdef HAVE_LINUX_MEMFD_H +#include +#endif + #include "initutils.h" /* Define __S_ISTYPE if missing from the C library. */ @@ -184,6 +188,59 @@ static inline int signalfd(int fd, const sigset_t *mask, int flags) #define LOOP_CTL_GET_FREE 0x4C82 #endif +/* memfd_create() */ +#ifndef MFD_CLOEXEC +#define MFD_CLOEXEC 0x0001U +#endif + +#ifndef MFD_ALLOW_SEALING +#define MFD_ALLOW_SEALING 0x0002U +#endif + +#ifndef HAVE_MEMFD_CREATE +static inline int memfd_create(const char *name, unsigned int flags) { + #ifndef __NR_memfd_create + #if defined __i386__ + #define __NR_memfd_create 356 + #elif defined __x86_64__ + #define __NR_memfd_create 319 + #elif defined __arm__ + #define __NR_memfd_create 385 + #elif defined __aarch64__ + #define __NR_memfd_create 279 + #elif defined __s390__ + #define __NR_memfd_create 350 + #elif defined __powerpc__ + #define __NR_memfd_create 360 + #elif defined __sparc__ + #define __NR_memfd_create 348 + #elif defined __blackfin__ + #define __NR_memfd_create 390 + #elif defined __ia64__ + #define __NR_memfd_create 1340 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 + #define __NR_memfd_create 4354 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 + #define __NR_memfd_create 6318 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 + #define __NR_memfd_create 5314 + #endif + #endif + #endif + #ifdef __NR_memfd_create + return syscall(__NR_memfd_create, name, flags); + #else + errno = ENOSYS; + return -1; + #endif +} +#else +extern int memfd_create(const char *name, unsigned int flags); +#endif + /* Struct to carry child pid from lxc_popen() to lxc_pclose(). * Not an opaque struct to allow direct access to the underlying FILE * * (i.e., struct lxc_popen_FILE *file; fgets(buf, sizeof(buf), file->f)) From b499121f1fd0729d84a64458038a01f7009d707c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 18 Oct 2017 19:48:29 +0200 Subject: [PATCH 02/13] utils: add lxc_cloexec() Signed-off-by: Christian Brauner --- src/lxc/utils.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/lxc/utils.h b/src/lxc/utils.h index 2005aa61e..881a09d5d 100644 --- a/src/lxc/utils.h +++ b/src/lxc/utils.h @@ -241,6 +241,11 @@ static inline int memfd_create(const char *name, unsigned int flags) { extern int memfd_create(const char *name, unsigned int flags); #endif +static inline int lxc_set_cloexec(int fd) +{ + return fcntl(fd, F_SETFD, FD_CLOEXEC); +} + /* Struct to carry child pid from lxc_popen() to lxc_pclose(). * Not an opaque struct to allow direct access to the underlying FILE * * (i.e., struct lxc_popen_FILE *file; fgets(buf, sizeof(buf), file->f)) From 127c6e703b0a10b68aef44e8951b0507d72efd90 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 18 Oct 2017 19:48:57 +0200 Subject: [PATCH 03/13] utils: add lxc_make_tmpfile() Signed-off-by: Christian Brauner --- src/lxc/utils.c | 20 ++++++++++++++++++++ src/lxc/utils.h | 1 + 2 files changed, 21 insertions(+) diff --git a/src/lxc/utils.c b/src/lxc/utils.c index 4c886cadd..e4015bdc8 100644 --- a/src/lxc/utils.c +++ b/src/lxc/utils.c @@ -2339,3 +2339,23 @@ bool lxc_nic_exists(char *nic) return true; } + +int lxc_make_tmpfile(char *template, bool rm) +{ + int fd, ret; + + fd = mkstemp(template); + if (fd < 0) + return -1; + + if (!rm) + return fd; + + ret = unlink(template); + if (ret < 0) { + close(fd); + return -1; + } + + return fd; +} diff --git a/src/lxc/utils.h b/src/lxc/utils.h index 881a09d5d..32a181acc 100644 --- a/src/lxc/utils.h +++ b/src/lxc/utils.h @@ -466,5 +466,6 @@ typedef __typeof__(((struct statfs *)NULL)->f_type) fs_type_magic; extern bool has_fs_type(const char *path, fs_type_magic magic_val); extern bool is_fs_type(const struct statfs *fs, fs_type_magic magic_val); extern bool lxc_nic_exists(char *nic); +extern int lxc_make_tmpfile(char *template, bool rm); #endif /* __LXC_UTILS_H */ From e46361235c04c10a9dfb5f2485e23a187fae9189 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 21 Oct 2017 16:22:58 +0200 Subject: [PATCH 04/13] utils: add lxc_getpagesize() Signed-off-by: Christian Brauner --- src/lxc/utils.c | 11 +++++++++++ src/lxc/utils.h | 1 + 2 files changed, 12 insertions(+) diff --git a/src/lxc/utils.c b/src/lxc/utils.c index e4015bdc8..95aad8b09 100644 --- a/src/lxc/utils.c +++ b/src/lxc/utils.c @@ -2359,3 +2359,14 @@ int lxc_make_tmpfile(char *template, bool rm) return fd; } + +uint64_t lxc_getpagesize(void) +{ + int64_t pgsz; + + pgsz = sysconf(_SC_PAGESIZE); + if (pgsz <= 0) + pgsz = 1 << 12; + + return pgsz; +} diff --git a/src/lxc/utils.h b/src/lxc/utils.h index 32a181acc..8d32e4aaa 100644 --- a/src/lxc/utils.h +++ b/src/lxc/utils.h @@ -467,5 +467,6 @@ extern bool has_fs_type(const char *path, fs_type_magic magic_val); extern bool is_fs_type(const struct statfs *fs, fs_type_magic magic_val); extern bool lxc_nic_exists(char *nic); extern int lxc_make_tmpfile(char *template, bool rm); +extern uint64_t lxc_getpagesize(void); #endif /* __LXC_UTILS_H */ From f3d05ee66dbf0e8283a2eeb6321e1bc7dfcb3034 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 18 Oct 2017 14:19:31 +0200 Subject: [PATCH 05/13] ringbuf: implement simple and efficient ringbuffer liblxc will use a ringbuffer implementation that employs mmap()ed memory. Specifically, the ringbuffer will create an anonymous memory mapping twice the requested size for the ringbuffer. Afterwards, an in-memory file the requested size for the ringbuffer will be created. This in-memory file will then be memory mapped twice into the previously established anonymous memory mapping thereby effectively splitting the anoymous memory mapping in two halves of equal size. This will allow the ringbuffer to get rid of any complex boundary and wrap-around calculation logic. Since the underlying physical memory is the same in both halves of the memory mapping only a single memcpy() call for both reads and writes from and to the ringbuffer is needed. Design Notes: - Since we're using MAP_FIXED memory mappings to map the same in-memory file twice into the anonymous memory mapping the kernel requires us to always operate on properly aligned pages. To guarantee proper page aligment the size of the ringbuffer must always be a muliple of the kernel's page size. This also implies that the minimum size of the ringbuffer must be at least equal to one page size. This additional requirement is reasonably unproblematic. First, any ringbuffer smaller than the size of a single page is very likely useless since the standard page size on linux is 4096 bytes. - Because liblxc is not able to predict the output a user is going to produce (e.g. users could cat binary files onto the console) and because the ringbuffer is located in a hotpath and needs to be as performant as possible liblxc will not parse the buffer. Use Case: The ringbuffer is needed by liblxc in order to safely log the output of write intensive callers that produce unpredictable output or unpredictable amounts of output. The console output created by a booting system and the user is one of those cases. Allowing a container to log the console's output to a file it would be possible for a malicious user to fill up the host filesystem by producing random ouput on the container's console if quota support is either not enabled or not available for the underlying filesystem. Using a ringbuffer is a reliable and secure way to ensure a fixed-size log. Closes #1857. Signed-off-by: Christian Brauner --- src/lxc/Makefile.am | 1 + src/lxc/ringbuf.c | 145 ++++++++++++++++++++++++++++++++++++++++++++ src/lxc/ringbuf.h | 90 +++++++++++++++++++++++++++ 3 files changed, 236 insertions(+) create mode 100644 src/lxc/ringbuf.c create mode 100644 src/lxc/ringbuf.h diff --git a/src/lxc/Makefile.am b/src/lxc/Makefile.am index b71992d75..fff32ae4f 100644 --- a/src/lxc/Makefile.am +++ b/src/lxc/Makefile.am @@ -116,6 +116,7 @@ liblxc_la_SOURCES = \ log.c log.h \ attach.c attach.h \ criu.c criu.h \ + ringbuf.c ringbuf.h \ \ network.c network.h \ nl.c nl.h \ diff --git a/src/lxc/ringbuf.c b/src/lxc/ringbuf.c new file mode 100644 index 000000000..1299fe709 --- /dev/null +++ b/src/lxc/ringbuf.c @@ -0,0 +1,145 @@ +/* liblxcapi + * + * Copyright © 2017 Christian Brauner . + * Copyright © 2017 Canonical Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#define _GNU_SOURCE +#define __STDC_FORMAT_MACROS +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ringbuf.h" +#include "utils.h" + +int lxc_ringbuf_create(struct lxc_ringbuf *buf, size_t size) +{ + char *tmp; + int ret; + int memfd = -1; + + buf->size = size; + buf->r_off = 0; + buf->w_off = 0; + + /* verify that we are at least given the multiple of a page size */ + if (buf->size % lxc_getpagesize()) + return -EINVAL; + + buf->addr = mmap(NULL, buf->size * 2, PROT_NONE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (buf->addr == MAP_FAILED) + return -EINVAL; + + memfd = memfd_create(".lxc_ringbuf", MFD_CLOEXEC); + if (memfd < 0) { + if (errno != ENOSYS) + goto on_error; + + memfd = lxc_make_tmpfile((char *){P_tmpdir"/.lxc_ringbuf_XXXXXX"}, true); + } + if (memfd < 0) + goto on_error; + + ret = ftruncate(memfd, buf->size); + if (ret < 0) + goto on_error; + + tmp = mmap(buf->addr, buf->size, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_SHARED, memfd, 0); + if (tmp == MAP_FAILED || tmp != buf->addr) + goto on_error; + + tmp = mmap(buf->addr + buf->size, buf->size, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_SHARED, memfd, 0); + if (tmp == MAP_FAILED || tmp != (buf->addr + buf->size)) + goto on_error; + + close(memfd); + + return 0; + +on_error: + lxc_ringbuf_release(buf); + if (memfd >= 0) + close(memfd); + return -1; +} + +void lxc_ringbuf_move_read_addr(struct lxc_ringbuf *buf, size_t len) +{ + buf->r_off += len; + + if (buf->r_off < buf->size) + return; + + /* wrap around */ + buf->r_off -= buf->size; + buf->w_off -= buf->size; +} + +/** + * lxc_ringbuf_write - write a message to the ringbuffer + * - The size of the message should never be greater than the size of the whole + * ringbuffer. + * - The write method will always succeed i.e. it will always advance the r_off + * if it detects that there's not enough space available to write the + * message. + */ +int lxc_ringbuf_write(struct lxc_ringbuf *buf, const char *msg, size_t len) +{ + char *w_addr; + uint64_t free; + + /* sanity check: a write should never exceed the ringbuffer's total size */ + if (len > buf->size) + return -EFBIG; + + free = lxc_ringbuf_free(buf); + + /* not enough space left so advance read address */ + if (len > free) + lxc_ringbuf_move_read_addr(buf, len); + w_addr = lxc_ringbuf_get_write_addr(buf); + memcpy(w_addr, msg, len); + lxc_ringbuf_move_write_addr(buf, len); + return 0; +} + +int lxc_ringbuf_read(struct lxc_ringbuf *buf, char *out, size_t *len) +{ + uint64_t used; + + /* there's nothing to read */ + if (buf->r_off == buf->w_off) + return -ENODATA; + + /* read maximum amount available */ + used = lxc_ringbuf_used(buf); + if (used < *len) + *len = used; + + /* copy data to reader but don't advance addr */ + memcpy(out, lxc_ringbuf_get_read_addr(buf), *len); + out[*len - 1] = '\0'; + return 0; +} diff --git a/src/lxc/ringbuf.h b/src/lxc/ringbuf.h new file mode 100644 index 000000000..0e8e7922f --- /dev/null +++ b/src/lxc/ringbuf.h @@ -0,0 +1,90 @@ +/* liblxcapi + * + * Copyright © 2017 Christian Brauner . + * Copyright © 2017 Canonical Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef __LXC_RINGBUF_H +#define __LXC_RINGBUF_H + +#include +#include +#include +#include + +/** + * lxc_ringbuf - Implements a simple and efficient memory mapped ringbuffer. + * - The "addr" field of struct lxc_ringbuf is considered immutable. Instead the + * read and write offsets r_off and w_off are used to calculate the current + * read and write addresses. There should never be a need to use any of those + * fields directly. Instead use the appropriate helpers below. + * - Callers are expected to synchronize read and write accesses to the + * ringbuffer. + */ +struct lxc_ringbuf { + char *addr; /* start address of the ringbuffer */ + uint64_t size; /* total size of the ringbuffer in bytes */ + uint64_t r_off; /* read offset */ + uint64_t w_off; /* write offset */ +}; + +/** + * lxc_ringbuf_create - Initialize a new ringbuffer. + * + * @param[in] size Size of the new ringbuffer as a power of 2. + */ +extern int lxc_ringbuf_create(struct lxc_ringbuf *buf, size_t size); +extern void lxc_ringbuf_move_read_addr(struct lxc_ringbuf *buf, size_t len); +extern int lxc_ringbuf_write(struct lxc_ringbuf *buf, const char *msg, size_t len); +extern int lxc_ringbuf_read(struct lxc_ringbuf *buf, char *out, size_t *len); + +static inline void lxc_ringbuf_release(struct lxc_ringbuf *buf) +{ + munmap(buf->addr, buf->size * 2); +} + +static inline void lxc_ringbuf_clear(struct lxc_ringbuf *buf) +{ + buf->r_off = 0; + buf->w_off = 0; +} + +static inline uint64_t lxc_ringbuf_used(struct lxc_ringbuf *buf) +{ + return buf->w_off - buf->r_off; +} + +static inline uint64_t lxc_ringbuf_free(struct lxc_ringbuf *buf) +{ + return buf->size - lxc_ringbuf_used(buf); +} + +static inline char *lxc_ringbuf_get_read_addr(struct lxc_ringbuf *buf) +{ + return buf->addr + buf->r_off; +} + +static inline char *lxc_ringbuf_get_write_addr(struct lxc_ringbuf *buf) +{ + return buf->addr + buf->w_off; +} + +static inline void lxc_ringbuf_move_write_addr(struct lxc_ringbuf *buf, size_t len) +{ + buf->w_off += len; +} + +#endif /* __LXC_RINGBUF_H */ From b037bc675b33363ace7a244ae6e698d0638d2256 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 18 Oct 2017 19:53:17 +0200 Subject: [PATCH 06/13] utils: add lxc_safe_long_long() Signed-off-by: Christian Brauner --- src/lxc/utils.c | 20 ++++++++++++++++++++ src/lxc/utils.h | 1 + 2 files changed, 21 insertions(+) diff --git a/src/lxc/utils.c b/src/lxc/utils.c index 95aad8b09..f76a8a8a0 100644 --- a/src/lxc/utils.c +++ b/src/lxc/utils.c @@ -2003,6 +2003,26 @@ int lxc_safe_long(const char *numstr, long int *converted) return 0; } +int lxc_safe_long_long(const char *numstr, long long int *converted) +{ + char *err = NULL; + signed long long int sli; + + errno = 0; + sli = strtoll(numstr, &err, 0); + if (errno == ERANGE && (sli == LLONG_MAX || sli == LLONG_MIN)) + return -ERANGE; + + if (errno != 0 && sli == 0) + return -EINVAL; + + if (err == numstr || *err != '\0') + return -EINVAL; + + *converted = sli; + return 0; +} + int lxc_switch_uid_gid(uid_t uid, gid_t gid) { if (setgid(gid) < 0) { diff --git a/src/lxc/utils.h b/src/lxc/utils.h index 8d32e4aaa..1449f4df4 100644 --- a/src/lxc/utils.h +++ b/src/lxc/utils.h @@ -419,6 +419,7 @@ extern bool task_blocking_signal(pid_t pid, int signal); extern int lxc_safe_uint(const char *numstr, unsigned int *converted); extern int lxc_safe_int(const char *numstr, int *converted); extern int lxc_safe_long(const char *numstr, long int *converted); +extern int lxc_safe_long_long(const char *numstr, long long int *converted); extern int lxc_safe_ulong(const char *numstr, unsigned long *converted); /* Switch to a new uid and gid. */ From e3db0162ff48d45b43ed94ff4ba4e6115df6d572 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 18 Oct 2017 19:53:37 +0200 Subject: [PATCH 07/13] utils: parse_byte_size_string() Signed-off-by: Christian Brauner --- src/lxc/utils.c | 67 ++++++++++++++++++++++++++++++ src/lxc/utils.h | 2 + src/tests/lxc-test-utils.c | 83 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+) diff --git a/src/lxc/utils.c b/src/lxc/utils.c index f76a8a8a0..7c60a4451 100644 --- a/src/lxc/utils.c +++ b/src/lxc/utils.c @@ -47,6 +47,7 @@ #include "log.h" #include "lxclock.h" #include "namespace.h" +#include "parse.h" #include "utils.h" #ifndef O_PATH @@ -2390,3 +2391,69 @@ uint64_t lxc_getpagesize(void) return pgsz; } + +int parse_byte_size_string(const char *s, int64_t *converted) +{ + int ret, suffix_len; + long long int conv; + int64_t mltpl, overflow; + char *end; + char dup[LXC_NUMSTRLEN64 + 2]; + char suffix[3]; + + if (!s || !strcmp(s, "")) + return -EINVAL; + + end = stpncpy(dup, s, sizeof(dup)); + if (*end != '\0') + return -EINVAL; + + if (isdigit(*(end - 1))) + suffix_len = 0; + else if (isalpha(*(end - 1))) + suffix_len = 1; + else + return -EINVAL; + + if ((end - 2) == dup && !isdigit(*(end - 2))) + return -EINVAL; + + if (isalpha(*(end - 2))) { + if (suffix_len == 1) + suffix_len++; + else + return -EINVAL; + } + + if (suffix_len > 0) { + memcpy(suffix, end - suffix_len, suffix_len); + *(suffix + suffix_len) = '\0'; + *(end - suffix_len) = '\0'; + } + dup[lxc_char_right_gc(dup, strlen(dup))] = '\0'; + + ret = lxc_safe_long_long(dup, &conv); + if (ret < 0) + return -ret; + + if (suffix_len != 2) { + *converted = conv; + return 0; + } + + if (!strcmp(suffix, "kB")) + mltpl = 1024; + else if (!strcmp(suffix, "MB")) + mltpl = 1024 * 1024; + else if (!strcmp(suffix, "GB")) + mltpl = 1024 * 1024 * 1024; + else + return -EINVAL; + + overflow = conv * mltpl; + if (conv != 0 && (overflow / conv) != mltpl) + return -ERANGE; + + *converted = overflow; + return 0; +} diff --git a/src/lxc/utils.h b/src/lxc/utils.h index 1449f4df4..fa9f88cc2 100644 --- a/src/lxc/utils.h +++ b/src/lxc/utils.h @@ -421,6 +421,8 @@ extern int lxc_safe_int(const char *numstr, int *converted); extern int lxc_safe_long(const char *numstr, long int *converted); extern int lxc_safe_long_long(const char *numstr, long long int *converted); extern int lxc_safe_ulong(const char *numstr, unsigned long *converted); +/* Handles B, kb, MB, GB. Detects overflows and reports -ERANGE. */ +extern int parse_byte_size_string(const char *s, int64_t *converted); /* Switch to a new uid and gid. */ extern int lxc_switch_uid_gid(uid_t uid, gid_t gid); diff --git a/src/tests/lxc-test-utils.c b/src/tests/lxc-test-utils.c index aba7706ab..4c3c17a78 100644 --- a/src/tests/lxc-test-utils.c +++ b/src/tests/lxc-test-utils.c @@ -380,6 +380,88 @@ void test_lxc_string_in_array(void) lxc_test_assert_abort(lxc_string_in_array("XYZ", (const char *[]){"BERTA", "ARQWE(9", "C8Zhkd", "7U", "XYZ", "UOIZ9", "=)()", NULL})); } +void test_parse_byte_size_string(void) +{ + int ret; + int64_t n; + + ret = parse_byte_size_string("0", &n); + if (ret < 0) + exit(EXIT_FAILURE); + if (n != 0) + exit(EXIT_FAILURE); + + ret = parse_byte_size_string("1", &n); + if (ret < 0) + exit(EXIT_FAILURE); + if (n != 1) + exit(EXIT_FAILURE); + + ret = parse_byte_size_string("1 ", &n); + if (ret == 0) + exit(EXIT_FAILURE); + + ret = parse_byte_size_string("1B", &n); + if (ret < 0) + exit(EXIT_FAILURE); + if (n != 1) + exit(EXIT_FAILURE); + + ret = parse_byte_size_string("1kB", &n); + if (ret < 0) + exit(EXIT_FAILURE); + if (n != 1024) + exit(EXIT_FAILURE); + + ret = parse_byte_size_string("1MB", &n); + if (ret < 0) + exit(EXIT_FAILURE); + if (n != 1048576) + exit(EXIT_FAILURE); + + ret = parse_byte_size_string("1GB", &n); + if (ret < 0) + exit(EXIT_FAILURE); + if (n != 1073741824) + exit(EXIT_FAILURE); + + ret = parse_byte_size_string("1TB", &n); + if (ret == 0) + exit(EXIT_FAILURE); + + ret = parse_byte_size_string("1 B", &n); + if (ret < 0) + exit(EXIT_FAILURE); + if (n != 1) + exit(EXIT_FAILURE); + + ret = parse_byte_size_string("1 kB", &n); + if (ret < 0) + exit(EXIT_FAILURE); + if (n != 1024) + exit(EXIT_FAILURE); + + ret = parse_byte_size_string("1 MB", &n); + if (ret < 0) + exit(EXIT_FAILURE); + if (n != 1048576) + exit(EXIT_FAILURE); + + ret = parse_byte_size_string("1 GB", &n); + if (ret < 0) + exit(EXIT_FAILURE); + if (n != 1073741824) + exit(EXIT_FAILURE); + + ret = parse_byte_size_string("1 TB", &n); + if (ret == 0) + exit(EXIT_FAILURE); + + ret = parse_byte_size_string("asdf", &n); + if (ret == 0) + exit(EXIT_FAILURE); +} + int main(int argc, char *argv[]) { test_lxc_string_replace(); @@ -389,6 +471,7 @@ int main(int argc, char *argv[]) test_lxc_safe_uint(); test_lxc_safe_int(); test_lxc_safe_long(); + test_parse_byte_size_string(); exit(EXIT_SUCCESS); } From 6222c3f48b87940377dec082c300770b1c773611 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 18 Oct 2017 21:20:37 +0200 Subject: [PATCH 08/13] utils: add lxc_find_next_power2() Signed-off-by: Christian Brauner --- src/lxc/utils.c | 18 ++++++++++++++++++ src/lxc/utils.h | 10 ++++++++++ 2 files changed, 28 insertions(+) diff --git a/src/lxc/utils.c b/src/lxc/utils.c index 7c60a4451..19e6c2ee6 100644 --- a/src/lxc/utils.c +++ b/src/lxc/utils.c @@ -2457,3 +2457,21 @@ int parse_byte_size_string(const char *s, int64_t *converted) *converted = overflow; return 0; } + +uint64_t lxc_find_next_power2(uint64_t n) +{ + /* 0 is not valid input. We return 0 to the caller since 0 is not a + * valid power of two. + */ + if (n == 0) + return 0; + + if (!(n & (n - 1))) + return n; + + while (n & (n - 1)) + n = n & (n - 1); + + n = n << 1; + return n; +} diff --git a/src/lxc/utils.h b/src/lxc/utils.h index fa9f88cc2..7ef3056a5 100644 --- a/src/lxc/utils.h +++ b/src/lxc/utils.h @@ -472,4 +472,14 @@ extern bool lxc_nic_exists(char *nic); extern int lxc_make_tmpfile(char *template, bool rm); extern uint64_t lxc_getpagesize(void); +/* If n is not a power of 2 this function will return the next power of 2 + * greater than that number. Note that this function always returns the *next* + * power of 2 *greater* that number not the *nearest*. For example, passing 1025 + * as argument this function will return 2048 although the closest power of 2 + * would be 1024. + * If the caller passes in 0 they will receive 0 in return since this is invalid + * input and 0 is not a power of 2. + */ +extern uint64_t lxc_find_next_power2(uint64_t n); + #endif /* __LXC_UTILS_H */ From 2ea479c9a6a40a8351b9781a6c87edaf561965c0 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 18 Oct 2017 16:20:01 +0200 Subject: [PATCH 09/13] confile_utils: add lxc_get_conf_uint64() Signed-off-by: Christian Brauner --- src/lxc/confile_utils.c | 10 ++++++++++ src/lxc/confile_utils.h | 2 ++ 2 files changed, 12 insertions(+) diff --git a/src/lxc/confile_utils.c b/src/lxc/confile_utils.c index 59d592d74..9db4bc6d3 100644 --- a/src/lxc/confile_utils.c +++ b/src/lxc/confile_utils.c @@ -672,6 +672,16 @@ int lxc_get_conf_int(struct lxc_conf *c, char *retv, int inlen, int v) return snprintf(retv, inlen, "%d", v); } +int lxc_get_conf_uint64(struct lxc_conf *c, char *retv, int inlen, uint64_t v) +{ + if (!retv) + inlen = 0; + else + memset(retv, 0, inlen); + + return snprintf(retv, inlen, "%"PRIu64, v); +} + bool parse_limit_value(const char **value, rlim_t *res) { char *endptr = NULL; diff --git a/src/lxc/confile_utils.h b/src/lxc/confile_utils.h index 585b4b52f..ee9376e24 100644 --- a/src/lxc/confile_utils.h +++ b/src/lxc/confile_utils.h @@ -84,5 +84,7 @@ extern void update_hwaddr(const char *line); extern bool new_hwaddr(char *hwaddr); extern int lxc_get_conf_str(char *retv, int inlen, const char *value); extern int lxc_get_conf_int(struct lxc_conf *c, char *retv, int inlen, int v); +extern int lxc_get_conf_uint64(struct lxc_conf *c, char *retv, int inlen, uint64_t v); extern bool parse_limit_value(const char **value, rlim_t *res); + #endif /* __LXC_CONFILE_UTILS_H */ From a04220de0b0f402b0f312ba514bc382d39d0e05c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 18 Oct 2017 16:20:36 +0200 Subject: [PATCH 10/13] confile: add lxc.console.logsize Closes #1857. Signed-off-by: Christian Brauner --- src/lxc/conf.h | 1 + src/lxc/confile.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/src/lxc/conf.h b/src/lxc/conf.h index 169857f2b..d89aed400 100644 --- a/src/lxc/conf.h +++ b/src/lxc/conf.h @@ -152,6 +152,7 @@ struct lxc_console { char name[MAXPATHLEN]; struct termios *tios; struct lxc_tty_state *tty_state; + uint64_t log_size; }; /* diff --git a/src/lxc/confile.c b/src/lxc/confile.c index 3567525da..d94a30719 100644 --- a/src/lxc/confile.c +++ b/src/lxc/confile.c @@ -83,6 +83,7 @@ lxc_config_define(cap_keep); lxc_config_define(cgroup_controller); lxc_config_define(cgroup_dir); lxc_config_define(console_logfile); +lxc_config_define(console_logsize); lxc_config_define(console_path); lxc_config_define(environment); lxc_config_define(ephemeral); @@ -148,6 +149,7 @@ static struct lxc_config_t config[] = { { "lxc.cgroup.dir", false, set_config_cgroup_dir, get_config_cgroup_dir, clr_config_cgroup_dir, }, { "lxc.cgroup", false, set_config_cgroup_controller, get_config_cgroup_controller, clr_config_cgroup_controller, }, { "lxc.console.logfile", false, set_config_console_logfile, get_config_console_logfile, clr_config_console_logfile, }, + { "lxc.console.logsize", false, set_config_console_logsize, get_config_console_logsize, clr_config_console_logsize, }, { "lxc.console.path", false, set_config_console_path, get_config_console_path, clr_config_console_path, }, { "lxc.environment", false, set_config_environment, get_config_environment, clr_config_environment, }, { "lxc.ephemeral", false, set_config_ephemeral, get_config_ephemeral, clr_config_ephemeral, }, @@ -1790,6 +1792,53 @@ static int set_config_console_logfile(const char *key, const char *value, return set_config_path_item(&lxc_conf->console.log_path, value); } +static int set_config_console_logsize(const char *key, const char *value, + struct lxc_conf *lxc_conf, void *data) +{ + int ret; + int64_t size; + uint64_t logsize, pgsz; + + if (lxc_config_value_empty(value)) { + lxc_conf->console.log_size = 0; + return 0; + } + + /* If the user specified "auto" the default log size is 2^17 = 128 Kib */ + if (!strcmp(value, "auto")) { + lxc_conf->console.log_size = 1 << 17; + return 0; + } + + ret = parse_byte_size_string(value, &size); + if (ret < 0) + return -1; + + if (size < 0) + return -EINVAL; + + /* must be at least a page size */ + pgsz = lxc_getpagesize(); + if ((uint64_t)size < pgsz) { + NOTICE("Requested ringbuffer size for the console is %" PRId64 + " but must be at least %" PRId64 + " bytes. Setting ringbuffer size to %" PRId64 " bytes", + size, pgsz, pgsz); + size = pgsz; + } + + logsize = lxc_find_next_power2((uint64_t)size); + if (logsize == 0) + return -EINVAL; + + if (logsize != size) + NOTICE("Passed size was not a power of 2. Rounding log size to " + "next power of two: %" PRIu64 " bytes", logsize); + + lxc_conf->console.log_size = logsize; + return 0; +} + int append_unexp_config_line(const char *line, struct lxc_conf *conf) { size_t len = conf->unexpanded_len, linelen = strlen(line); @@ -3023,6 +3072,12 @@ static int get_config_console_logfile(const char *key, char *retv, int inlen, return lxc_get_conf_str(retv, inlen, c->console.log_path); } +static int get_config_console_logsize(const char *key, char *retv, int inlen, + struct lxc_conf *c, void *data) +{ + return lxc_get_conf_uint64(c, retv, inlen, c->autodev); +} + static int get_config_seccomp_profile(const char *key, char *retv, int inlen, struct lxc_conf *c, void *data) { @@ -3405,6 +3460,13 @@ static inline int clr_config_console_logfile(const char *key, return 0; } +static inline int clr_config_console_logsize(const char *key, + struct lxc_conf *c, void *data) +{ + c->console.log_size = 0; + return 0; +} + static inline int clr_config_seccomp_profile(const char *key, struct lxc_conf *c, void *data) { From 7f135597a271499cd323899f9a5de099b2469db9 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 18 Oct 2017 20:24:54 +0200 Subject: [PATCH 11/13] conf: lxc_setup() -> lxc_setup_child() Closes #1857. Signed-off-by: Christian Brauner --- src/lxc/conf.c | 2 +- src/lxc/conf.h | 2 +- src/lxc/start.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lxc/conf.c b/src/lxc/conf.c index 3b1e422a3..f6a0788c7 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -3079,7 +3079,7 @@ static bool verify_start_hooks(struct lxc_conf *conf) return true; } -int lxc_setup(struct lxc_handler *handler) +int lxc_setup_child(struct lxc_handler *handler) { int ret; const char *name = handler->name; diff --git a/src/lxc/conf.h b/src/lxc/conf.h index d89aed400..d228c94c1 100644 --- a/src/lxc/conf.h +++ b/src/lxc/conf.h @@ -377,7 +377,7 @@ extern int lxc_delete_autodev(struct lxc_handler *handler); extern void lxc_clear_includes(struct lxc_conf *conf); extern int do_rootfs_setup(struct lxc_conf *conf, const char *name, const char *lxcpath); -extern int lxc_setup(struct lxc_handler *handler); +extern int lxc_setup_child(struct lxc_handler *handler); extern int setup_resource_limits(struct lxc_list *limits, pid_t pid); extern int find_unmapped_nsid(struct lxc_conf *conf, enum idtype idtype); extern int mapped_hostid(unsigned id, struct lxc_conf *conf, diff --git a/src/lxc/start.c b/src/lxc/start.c index 402bba552..28ddb721f 100644 --- a/src/lxc/start.c +++ b/src/lxc/start.c @@ -937,7 +937,7 @@ static int do_start(void *data) } /* Setup the container, ip, names, utsname, ... */ - ret = lxc_setup(handler); + ret = lxc_setup_child(handler); close(handler->data_sock[0]); close(handler->data_sock[1]); if (ret < 0) { From 732375f5f5363f4eb0d4b4575f509f764c76ea1d Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 18 Oct 2017 20:30:08 +0200 Subject: [PATCH 12/13] console: add ringbuffer Closes #1857. Signed-off-by: Christian Brauner --- src/lxc/conf.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++ src/lxc/conf.h | 3 +++ src/lxc/console.c | 32 ++++++++++++++++++------ src/lxc/start.c | 5 ++++ 4 files changed, 95 insertions(+), 8 deletions(-) diff --git a/src/lxc/conf.c b/src/lxc/conf.c index f6a0788c7..d2fab9450 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -80,6 +80,7 @@ #include "namespace.h" #include "network.h" #include "parse.h" +#include "ringbuf.h" #include "storage.h" #include "storage/aufs.h" #include "storage/overlay.h" @@ -2428,6 +2429,7 @@ struct lxc_conf *lxc_conf_init(void) new->autodev = 1; new->console.log_path = NULL; new->console.log_fd = -1; + new->console.log_size = 0; new->console.path = NULL; new->console.peer = -1; new->console.peerpty.busy = -1; @@ -2436,6 +2438,7 @@ struct lxc_conf *lxc_conf_init(void) new->console.master = -1; new->console.slave = -1; new->console.name[0] = '\0'; + memset(&new->console.ringbuf, 0, sizeof(struct lxc_ringbuf)); new->maincmd_fd = -1; new->nbd_idx = -1; new->rootfs.mount = strdup(default_rootfs_mount); @@ -3079,6 +3082,64 @@ static bool verify_start_hooks(struct lxc_conf *conf) return true; } +/** + * Note that this function needs to run before the mainloop starts. Since we + * register a handler for the console's masterfd when we create the mainloop + * the console handler needs to see an allocated ringbuffer. + */ +static int lxc_setup_console_ringbuf(struct lxc_console *console) +{ + int ret; + struct lxc_ringbuf *buf = &console->ringbuf; + uint64_t size = console->log_size; + + /* no ringbuffer previously allocated and no ringbuffer requested */ + if (!buf->addr && size <= 0) + return 0; + + /* ringbuffer allocated but no new ringbuffer requested */ + if (buf->addr && size <= 0) { + lxc_ringbuf_release(buf); + buf->addr = NULL; + buf->r_off = 0; + buf->w_off = 0; + buf->size = 0; + TRACE("Deallocated console ringbuffer"); + return 0; + } + + if (size <= 0) + return 0; + + /* check wether the requested size for the ringbuffer has changed */ + if (buf->addr && buf->size != size) { + TRACE("Console ringbuffer size changed from %" PRIu64 + " to %" PRIu64 " bytes. Deallocating console ringbuffer", + buf->size, size); + lxc_ringbuf_release(buf); + } + + ret = lxc_ringbuf_create(buf, size); + if (ret < 0) { + ERROR("Failed to setup %" PRIu64 " byte console ringbuffer", size); + return -1; + } + + TRACE("Allocated %" PRIu64 " byte console ringbuffer", size); + return 0; +} + +int lxc_setup_parent(struct lxc_handler *handler) +{ + int ret; + + ret = lxc_setup_console_ringbuf(&handler->conf->console); + if (ret < 0) + return -1; + + return 0; +} + int lxc_setup_child(struct lxc_handler *handler) { int ret; @@ -3459,6 +3520,8 @@ void lxc_conf_free(struct lxc_conf *conf) current_config = NULL; free(conf->console.log_path); free(conf->console.path); + if (conf->console.log_size > 0 && conf->console.ringbuf.addr) + lxc_ringbuf_release(&conf->console.ringbuf); free(conf->rootfs.mount); free(conf->rootfs.bdev_type); free(conf->rootfs.options); diff --git a/src/lxc/conf.h b/src/lxc/conf.h index d228c94c1..c61f861ed 100644 --- a/src/lxc/conf.h +++ b/src/lxc/conf.h @@ -36,6 +36,7 @@ #include #include "list.h" +#include "ringbuf.h" #include "start.h" /* for lxc_handler */ #if HAVE_SCMP_FILTER_CTX @@ -153,6 +154,7 @@ struct lxc_console { struct termios *tios; struct lxc_tty_state *tty_state; uint64_t log_size; + struct lxc_ringbuf ringbuf; }; /* @@ -378,6 +380,7 @@ extern void lxc_clear_includes(struct lxc_conf *conf); extern int do_rootfs_setup(struct lxc_conf *conf, const char *name, const char *lxcpath); extern int lxc_setup_child(struct lxc_handler *handler); +extern int lxc_setup_parent(struct lxc_handler *handler); extern int setup_resource_limits(struct lxc_list *limits, pid_t pid); extern int find_unmapped_nsid(struct lxc_conf *conf, enum idtype idtype); extern int mapped_hostid(unsigned id, struct lxc_conf *conf, diff --git a/src/lxc/console.c b/src/lxc/console.c index c8e545eb4..cbee9f8e8 100644 --- a/src/lxc/console.c +++ b/src/lxc/console.c @@ -51,6 +51,8 @@ #include <../include/openpty.h> #endif +#define LXC_CONSOLE_BUFFER_SIZE 1024 + lxc_log_define(console, lxc); static struct lxc_list lxc_ttys; @@ -167,12 +169,12 @@ static int lxc_console_cb_con(int fd, uint32_t events, void *data, struct lxc_epoll_descr *descr) { struct lxc_console *console = (struct lxc_console *)data; - char buf[1024]; - int r, w; + char buf[LXC_CONSOLE_BUFFER_SIZE]; + int r, w, w_log, w_rbuf; w = r = lxc_read_nointr(fd, buf, sizeof(buf)); if (r <= 0) { - INFO("console client on fd %d has exited", fd); + INFO("Console client on fd %d has exited", fd); lxc_mainloop_del_handler(descr, fd); if (fd == console->peer) { if (console->tty_state) { @@ -190,16 +192,30 @@ static int lxc_console_cb_con(int fd, uint32_t events, void *data, if (fd == console->peer) w = lxc_write_nointr(console->master, buf, r); + w_rbuf = w_log = 0; if (fd == console->master) { - if (console->log_fd >= 0) - w = lxc_write_nointr(console->log_fd, buf, r); - + /* write to peer first */ if (console->peer >= 0) w = lxc_write_nointr(console->peer, buf, r); + + /* write to console ringbuffer */ + if (console->log_size > 0) + w_rbuf = lxc_ringbuf_write(&console->ringbuf, buf, r); + + /* write to console log */ + if (console->log_fd >= 0) + w_log = lxc_write_nointr(console->log_fd, buf, r); } if (w != r) - WARN("console short write r:%d w:%d", r, w); + WARN("Console short write r:%d != w:%d", r, w); + + if (w_rbuf < 0) + TRACE("%s - Failed to write %d bytes to console ringbuffer", + strerror(-w_rbuf), r); + + if (w_log < 0) + TRACE("Failed to write %d bytes to console log", r); return 0; } @@ -632,7 +648,7 @@ int lxc_console_cb_tty_master(int fd, uint32_t events, void *cbdata, struct lxc_epoll_descr *descr) { struct lxc_tty_state *ts = cbdata; - char buf[1024]; + char buf[LXC_CONSOLE_BUFFER_SIZE]; int r, w; if (fd != ts->masterfd) diff --git a/src/lxc/start.c b/src/lxc/start.c index 28ddb721f..7748dbf61 100644 --- a/src/lxc/start.c +++ b/src/lxc/start.c @@ -1254,6 +1254,11 @@ static int lxc_spawn(struct lxc_handler *handler) */ flags &= ~CLONE_NEWNET; } + + ret = lxc_setup_parent(handler); + if (ret < 0) + goto out_delete_net; + handler->pid = lxc_clone(do_start, handler, flags); if (handler->pid < 0) { SYSERROR("Failed to clone a new set of namespaces."); From a2028b8f5ff873ebee3a66cee9c2e99310b235f8 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 21 Oct 2017 16:27:34 +0200 Subject: [PATCH 13/13] namespace: use lxc_getpagesize() Signed-off-by: Christian Brauner --- src/lxc/namespace.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/lxc/namespace.c b/src/lxc/namespace.c index 3a5b3bef6..e760c0d9a 100644 --- a/src/lxc/namespace.c +++ b/src/lxc/namespace.c @@ -21,17 +21,18 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include #include #include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include -#include "namespace.h" #include "log.h" +#include "namespace.h" +#include "utils.h" lxc_log_define(lxc_namespace, lxc); @@ -53,7 +54,7 @@ pid_t lxc_clone(int (*fn)(void *), void *arg, int flags) .arg = arg, }; - size_t stack_size = sysconf(_SC_PAGESIZE); + size_t stack_size = lxc_getpagesize(); void *stack = alloca(stack_size); pid_t ret;