From 8f3e280e77fc55dfb41c47b8c3672b11bd3336cc Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 23 Nov 2016 06:47:07 +0100 Subject: [PATCH 1/3] conf: non-functional changes Signed-off-by: Christian Brauner --- src/lxc/conf.c | 84 ++++++++++++++++++++++++-------------------------- 1 file changed, 40 insertions(+), 44 deletions(-) diff --git a/src/lxc/conf.c b/src/lxc/conf.c index d5ea3c07b..2a4a13e51 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -24,20 +24,34 @@ #define _GNU_SOURCE #include "config.h" +#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include #include -#include +#include #include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include +#include +#include #ifdef HAVE_STATVFS #include @@ -49,37 +63,21 @@ #include <../include/openpty.h> #endif -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "bdev.h" -#include "network.h" -#include "error.h" #include "af_unix.h" +#include "bdev.h" +#include "caps.h" /* for lxc_caps_last_cap() */ +#include "cgroup.h" +#include "conf.h" +#include "error.h" +#include "log.h" +#include "lxcaufs.h" +#include "lxclock.h" +#include "lxcoverlay.h" +#include "lxcseccomp.h" +#include "namespace.h" +#include "network.h" #include "parse.h" #include "utils.h" -#include "conf.h" -#include "log.h" -#include "caps.h" /* for lxc_caps_last_cap() */ -#include "lxcaufs.h" -#include "lxcoverlay.h" -#include "cgroup.h" -#include "lxclock.h" -#include "namespace.h" #include "lsm/lsm.h" #if HAVE_SYS_CAPABILITY_H @@ -96,8 +94,6 @@ #include #endif -#include "lxcseccomp.h" - lxc_log_define(lxc_conf, lxc); #define LINELEN 4096 @@ -135,10 +131,10 @@ lxc_log_define(lxc_conf, lxc); static int pivot_root(const char * new_root, const char * put_old) { #ifdef __NR_pivot_root -return syscall(__NR_pivot_root, new_root, put_old); + return syscall(__NR_pivot_root, new_root, put_old); #else -errno = ENOSYS; -return -1; + errno = ENOSYS; + return -1; #endif } #else @@ -150,10 +146,10 @@ extern int pivot_root(const char * new_root, const char * put_old); static int sethostname(const char * name, size_t len) { #ifdef __NR_sethostname -return syscall(__NR_sethostname, name, len); + return syscall(__NR_sethostname, name, len); #else -errno = ENOSYS; -return -1; + errno = ENOSYS; + return -1; #endif } #endif From 413c294f756684cb683f78fc317c9c5c0953de18 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 25 Nov 2016 20:55:38 +0100 Subject: [PATCH 2/3] configure: check for memfd_create() Signed-off-by: Christian Brauner --- configure.ac | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 2b284cdea..b1bd9a4ce 100644 --- a/configure.ac +++ b/configure.ac @@ -625,10 +625,10 @@ AC_CHECK_DECLS([PR_SET_NO_NEW_PRIVS], [], [], [#include ]) AC_CHECK_DECLS([PR_GET_NO_NEW_PRIVS], [], [], [#include ]) # Check for some headers -AC_CHECK_HEADERS([sys/signalfd.h pty.h ifaddrs.h sys/capability.h sys/personality.h utmpx.h sys/timerfd.h]) +AC_CHECK_HEADERS([sys/signalfd.h pty.h ifaddrs.h sys/capability.h sys/memfd.h sys/personality.h utmpx.h sys/timerfd.h]) # Check for some syscalls functions -AC_CHECK_FUNCS([setns pivot_root sethostname unshare rand_r confstr faccessat gettid]) +AC_CHECK_FUNCS([setns pivot_root sethostname unshare rand_r confstr faccessat gettid memfd_create]) # Check for some functions AC_CHECK_LIB(pthread, main) From 5ef5c9a34460a8bd7c562b72bc977b2e7ac47cc8 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 23 Nov 2016 06:47:37 +0100 Subject: [PATCH 3/3] conf, criu: add make_anonymous_mount_file() Before we used tmpfile() to write out mount entries for the container. This requires a writeable /tmp file system which can be a problem for systems where this filesystem is not present. This commit switches from tmpfile() to using the memfd_create() syscall. It allows us to create an anonymous tmpfs file (And is somewhat similar to mmap().) which is automatically deleted as soon as any references to it are dropped. In case we detect that syscall is not implemented, we fallback to using tmpfile(). Signed-off-by: Christian Brauner --- src/lxc/conf.c | 97 ++++++++++++++++++++++++++++++++++++++++++++------ src/lxc/conf.h | 2 +- src/lxc/criu.c | 2 +- 3 files changed, 88 insertions(+), 13 deletions(-) diff --git a/src/lxc/conf.c b/src/lxc/conf.c index 2a4a13e51..7b66e3747 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include @@ -63,6 +62,10 @@ #include <../include/openpty.h> #endif +#ifdef HAVE_LINUX_MEMFD_H +#include +#endif + #include "af_unix.h" #include "bdev.h" #include "caps.h" /* for lxc_caps_last_cap() */ @@ -163,6 +166,59 @@ static int sethostname(const char * name, size_t len) #define MS_PRIVATE (1<<18) #endif +/* memfd_create() */ +#ifndef MFD_CLOEXEC +#define MFD_CLOEXEC 0x0001U +#endif + +#ifndef MFD_ALLOW_SEALING +#define MFD_ALLOW_SEALING 0x0002U +#endif + +#ifndef HAVE_MEMFD_CREATE +static int memfd_create(const char *name, unsigned int flags) { + #ifndef __NR_memfd_create + #if defined __i386__ + #define __NR_memfd_create 356 + #elif defined __x86_64__ + #define __NR_memfd_create 319 + #elif defined __arm__ + #define __NR_memfd_create 385 + #elif defined __aarch64__ + #define __NR_memfd_create 279 + #elif defined __s390__ + #define __NR_memfd_create 350 + #elif defined __powerpc__ + #define __NR_memfd_create 360 + #elif defined __sparc__ + #define __NR_memfd_create 348 + #elif defined __blackfin__ + #define __NR_memfd_create 390 + #elif defined __ia64__ + #define __NR_memfd_create 1340 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 + #define __NR_memfd_create 4354 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 + #define __NR_memfd_create 6318 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 + #define __NR_memfd_create 5314 + #endif + #endif + #endif + #ifdef __NR_memfd_create + return syscall(__NR_memfd_create, name, flags); + #else + errno = ENOSYS; + return -1; + #endif +} +#else +extern int memfd_create(const char *name, unsigned int flags); +#endif + char *lxchook_names[NUM_LXC_HOOKS] = { "pre-start", "pre-mount", "mount", "autodev", "start", "stop", "post-stop", "clone", "destroy" }; @@ -1946,34 +2002,53 @@ static int setup_mount(const struct lxc_rootfs *rootfs, const char *fstab, return ret; } -FILE *write_mount_file(struct lxc_list *mount) +FILE *make_anonymous_mount_file(struct lxc_list *mount) { - FILE *file; - struct lxc_list *iterator; + int ret; char *mount_entry; + struct lxc_list *iterator; + FILE *file; + int fd = -1; + + fd = memfd_create("lxc_mount_file", MFD_CLOEXEC); + if (fd < 0) { + if (errno != ENOSYS) + return NULL; + file = tmpfile(); + } else { + file = fdopen(fd, "r+"); + } - file = tmpfile(); if (!file) { - ERROR("Could not create temporary file: %s.", strerror(errno)); + if (fd != -1) + close(fd); + ERROR("Could not create mount entry file: %s.", strerror(errno)); return NULL; } lxc_list_for_each(iterator, mount) { mount_entry = iterator->elem; - fprintf(file, "%s\n", mount_entry); + ret = fprintf(file, "%s\n", mount_entry); + if (ret < strlen(mount_entry)) + WARN("Could not write mount entry to anonymous mount file."); + } + + if (fseek(file, 0, SEEK_SET) < 0) { + fclose(file); + return NULL; } - rewind(file); return file; } -static int setup_mount_entries(const struct lxc_rootfs *rootfs, struct lxc_list *mount, - const char *lxc_name, const char *lxc_path) +static int setup_mount_entries(const struct lxc_rootfs *rootfs, + struct lxc_list *mount, const char *lxc_name, + const char *lxc_path) { FILE *file; int ret; - file = write_mount_file(mount); + file = make_anonymous_mount_file(mount); if (!file) return -1; diff --git a/src/lxc/conf.h b/src/lxc/conf.h index ae29d4210..b7d15cbd2 100644 --- a/src/lxc/conf.h +++ b/src/lxc/conf.h @@ -452,6 +452,6 @@ extern int parse_mntopts(const char *mntopts, unsigned long *mntflags, extern void tmp_proc_unmount(struct lxc_conf *lxc_conf); void remount_all_slave(void); extern void suggest_default_idmap(void); -FILE *write_mount_file(struct lxc_list *mount); +FILE *make_anonymous_mount_file(struct lxc_list *mount); struct lxc_list *sort_cgroup_settings(struct lxc_list* cgroup_settings); #endif diff --git a/src/lxc/criu.c b/src/lxc/criu.c index 50a74000d..125e67428 100644 --- a/src/lxc/criu.c +++ b/src/lxc/criu.c @@ -330,7 +330,7 @@ static void exec_criu(struct criu_opts *opts) DECLARE_ARG(opts->user->action_script); } - mnts = write_mount_file(&opts->c->lxc_conf->mount_list); + mnts = make_anonymous_mount_file(&opts->c->lxc_conf->mount_list); if (!mnts) goto err;