mirror of
https://git.proxmox.com/git/mirror_lxc
synced 2025-07-26 23:37:22 +00:00
4802 lines
112 KiB
C
4802 lines
112 KiB
C
/*
|
|
* lxc: linux Container library
|
|
*
|
|
* (C) Copyright IBM Corp. 2007, 2008
|
|
*
|
|
* Authors:
|
|
* Daniel Lezcano <daniel.lezcano at free.fr>
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#define _GNU_SOURCE
|
|
#include "config.h"
|
|
|
|
#include <dirent.h>
|
|
#include <errno.h>
|
|
#include <fcntl.h>
|
|
#include <grp.h>
|
|
#include <inttypes.h>
|
|
#include <libgen.h>
|
|
#include <pwd.h>
|
|
#include <stdarg.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <time.h>
|
|
#include <unistd.h>
|
|
#include <arpa/inet.h>
|
|
#include <linux/loop.h>
|
|
#include <net/if.h>
|
|
#include <netinet/in.h>
|
|
#include <sys/mman.h>
|
|
#include <sys/mount.h>
|
|
#include <sys/param.h>
|
|
#include <sys/prctl.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/sysmacros.h>
|
|
#include <sys/syscall.h>
|
|
#include <sys/types.h>
|
|
#include <sys/utsname.h>
|
|
#include <sys/wait.h>
|
|
|
|
/* makedev() */
|
|
#ifdef MAJOR_IN_MKDEV
|
|
# include <sys/mkdev.h>
|
|
#endif
|
|
|
|
#ifdef HAVE_STATVFS
|
|
#include <sys/statvfs.h>
|
|
#endif
|
|
|
|
#if HAVE_PTY_H
|
|
#include <pty.h>
|
|
#else
|
|
#include <../include/openpty.h>
|
|
#endif
|
|
|
|
#ifdef HAVE_LINUX_MEMFD_H
|
|
#include <linux/memfd.h>
|
|
#endif
|
|
|
|
#include "af_unix.h"
|
|
#include "bdev.h"
|
|
#include "caps.h" /* for lxc_caps_last_cap() */
|
|
#include "cgroup.h"
|
|
#include "conf.h"
|
|
#include "error.h"
|
|
#include "log.h"
|
|
#include "lxcaufs.h"
|
|
#include "lxclock.h"
|
|
#include "lxcoverlay.h"
|
|
#include "lxcseccomp.h"
|
|
#include "namespace.h"
|
|
#include "network.h"
|
|
#include "parse.h"
|
|
#include "utils.h"
|
|
#include "lsm/lsm.h"
|
|
|
|
#if HAVE_LIBCAP
|
|
#include <sys/capability.h>
|
|
#endif
|
|
|
|
#if HAVE_SYS_PERSONALITY_H
|
|
#include <sys/personality.h>
|
|
#endif
|
|
|
|
#if IS_BIONIC
|
|
#include <../include/lxcmntent.h>
|
|
#ifndef HAVE_PRLIMIT
|
|
#include <../include/prlimit.h>
|
|
#endif
|
|
#else
|
|
#include <mntent.h>
|
|
#endif
|
|
|
|
lxc_log_define(lxc_conf, lxc);
|
|
|
|
#if HAVE_LIBCAP
|
|
#ifndef CAP_SETFCAP
|
|
#define CAP_SETFCAP 31
|
|
#endif
|
|
|
|
#ifndef CAP_MAC_OVERRIDE
|
|
#define CAP_MAC_OVERRIDE 32
|
|
#endif
|
|
|
|
#ifndef CAP_MAC_ADMIN
|
|
#define CAP_MAC_ADMIN 33
|
|
#endif
|
|
#endif
|
|
|
|
#ifndef PR_CAPBSET_DROP
|
|
#define PR_CAPBSET_DROP 24
|
|
#endif
|
|
|
|
#ifndef LO_FLAGS_AUTOCLEAR
|
|
#define LO_FLAGS_AUTOCLEAR 4
|
|
#endif
|
|
|
|
/* needed for cgroup automount checks, regardless of whether we
|
|
* have included linux/capability.h or not */
|
|
#ifndef CAP_SYS_ADMIN
|
|
#define CAP_SYS_ADMIN 21
|
|
#endif
|
|
|
|
/* Define pivot_root() if missing from the C library */
|
|
#ifndef HAVE_PIVOT_ROOT
|
|
static int pivot_root(const char * new_root, const char * put_old)
|
|
{
|
|
#ifdef __NR_pivot_root
|
|
return syscall(__NR_pivot_root, new_root, put_old);
|
|
#else
|
|
errno = ENOSYS;
|
|
return -1;
|
|
#endif
|
|
}
|
|
#else
|
|
extern int pivot_root(const char * new_root, const char * put_old);
|
|
#endif
|
|
|
|
/* Define sethostname() if missing from the C library */
|
|
#ifndef HAVE_SETHOSTNAME
|
|
static int sethostname(const char * name, size_t len)
|
|
{
|
|
#ifdef __NR_sethostname
|
|
return syscall(__NR_sethostname, name, len);
|
|
#else
|
|
errno = ENOSYS;
|
|
return -1;
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
/* Define __S_ISTYPE if missing from the C library */
|
|
#ifndef __S_ISTYPE
|
|
#define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
|
|
#endif
|
|
|
|
#ifndef MS_PRIVATE
|
|
#define MS_PRIVATE (1<<18)
|
|
#endif
|
|
|
|
/* memfd_create() */
|
|
#ifndef MFD_CLOEXEC
|
|
#define MFD_CLOEXEC 0x0001U
|
|
#endif
|
|
|
|
#ifndef MFD_ALLOW_SEALING
|
|
#define MFD_ALLOW_SEALING 0x0002U
|
|
#endif
|
|
|
|
#ifndef HAVE_MEMFD_CREATE
|
|
static int memfd_create(const char *name, unsigned int flags) {
|
|
#ifndef __NR_memfd_create
|
|
#if defined __i386__
|
|
#define __NR_memfd_create 356
|
|
#elif defined __x86_64__
|
|
#define __NR_memfd_create 319
|
|
#elif defined __arm__
|
|
#define __NR_memfd_create 385
|
|
#elif defined __aarch64__
|
|
#define __NR_memfd_create 279
|
|
#elif defined __s390__
|
|
#define __NR_memfd_create 350
|
|
#elif defined __powerpc__
|
|
#define __NR_memfd_create 360
|
|
#elif defined __sparc__
|
|
#define __NR_memfd_create 348
|
|
#elif defined __blackfin__
|
|
#define __NR_memfd_create 390
|
|
#elif defined __ia64__
|
|
#define __NR_memfd_create 1340
|
|
#elif defined _MIPS_SIM
|
|
#if _MIPS_SIM == _MIPS_SIM_ABI32
|
|
#define __NR_memfd_create 4354
|
|
#endif
|
|
#if _MIPS_SIM == _MIPS_SIM_NABI32
|
|
#define __NR_memfd_create 6318
|
|
#endif
|
|
#if _MIPS_SIM == _MIPS_SIM_ABI64
|
|
#define __NR_memfd_create 5314
|
|
#endif
|
|
#endif
|
|
#endif
|
|
#ifdef __NR_memfd_create
|
|
return syscall(__NR_memfd_create, name, flags);
|
|
#else
|
|
errno = ENOSYS;
|
|
return -1;
|
|
#endif
|
|
}
|
|
#else
|
|
extern int memfd_create(const char *name, unsigned int flags);
|
|
#endif
|
|
|
|
char *lxchook_names[NUM_LXC_HOOKS] = {
|
|
"pre-start", "pre-mount", "mount", "autodev", "start", "stop", "post-stop", "clone", "destroy" };
|
|
|
|
typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
|
|
|
|
struct mount_opt {
|
|
char *name;
|
|
int clear;
|
|
int flag;
|
|
};
|
|
|
|
struct caps_opt {
|
|
char *name;
|
|
int value;
|
|
};
|
|
|
|
struct limit_opt {
|
|
char *name;
|
|
int value;
|
|
};
|
|
|
|
/*
|
|
* The lxc_conf of the container currently being worked on in an
|
|
* API call
|
|
* This is used in the error calls
|
|
*/
|
|
#ifdef HAVE_TLS
|
|
__thread struct lxc_conf *current_config;
|
|
#else
|
|
struct lxc_conf *current_config;
|
|
#endif
|
|
|
|
/* Declare this here, since we don't want to reshuffle the whole file. */
|
|
static int in_caplist(int cap, struct lxc_list *caps);
|
|
|
|
static int instantiate_veth(struct lxc_handler *, struct lxc_netdev *);
|
|
static int instantiate_macvlan(struct lxc_handler *, struct lxc_netdev *);
|
|
static int instantiate_vlan(struct lxc_handler *, struct lxc_netdev *);
|
|
static int instantiate_phys(struct lxc_handler *, struct lxc_netdev *);
|
|
static int instantiate_empty(struct lxc_handler *, struct lxc_netdev *);
|
|
static int instantiate_none(struct lxc_handler *, struct lxc_netdev *);
|
|
|
|
static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
|
|
[LXC_NET_VETH] = instantiate_veth,
|
|
[LXC_NET_MACVLAN] = instantiate_macvlan,
|
|
[LXC_NET_VLAN] = instantiate_vlan,
|
|
[LXC_NET_PHYS] = instantiate_phys,
|
|
[LXC_NET_EMPTY] = instantiate_empty,
|
|
[LXC_NET_NONE] = instantiate_none,
|
|
};
|
|
|
|
static int shutdown_veth(struct lxc_handler *, struct lxc_netdev *);
|
|
static int shutdown_macvlan(struct lxc_handler *, struct lxc_netdev *);
|
|
static int shutdown_vlan(struct lxc_handler *, struct lxc_netdev *);
|
|
static int shutdown_phys(struct lxc_handler *, struct lxc_netdev *);
|
|
static int shutdown_empty(struct lxc_handler *, struct lxc_netdev *);
|
|
static int shutdown_none(struct lxc_handler *, struct lxc_netdev *);
|
|
|
|
static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
|
|
[LXC_NET_VETH] = shutdown_veth,
|
|
[LXC_NET_MACVLAN] = shutdown_macvlan,
|
|
[LXC_NET_VLAN] = shutdown_vlan,
|
|
[LXC_NET_PHYS] = shutdown_phys,
|
|
[LXC_NET_EMPTY] = shutdown_empty,
|
|
[LXC_NET_NONE] = shutdown_none,
|
|
};
|
|
|
|
static struct mount_opt mount_opt[] = {
|
|
{ "defaults", 0, 0 },
|
|
{ "ro", 0, MS_RDONLY },
|
|
{ "rw", 1, MS_RDONLY },
|
|
{ "suid", 1, MS_NOSUID },
|
|
{ "nosuid", 0, MS_NOSUID },
|
|
{ "dev", 1, MS_NODEV },
|
|
{ "nodev", 0, MS_NODEV },
|
|
{ "exec", 1, MS_NOEXEC },
|
|
{ "noexec", 0, MS_NOEXEC },
|
|
{ "sync", 0, MS_SYNCHRONOUS },
|
|
{ "async", 1, MS_SYNCHRONOUS },
|
|
{ "dirsync", 0, MS_DIRSYNC },
|
|
{ "remount", 0, MS_REMOUNT },
|
|
{ "mand", 0, MS_MANDLOCK },
|
|
{ "nomand", 1, MS_MANDLOCK },
|
|
{ "atime", 1, MS_NOATIME },
|
|
{ "noatime", 0, MS_NOATIME },
|
|
{ "diratime", 1, MS_NODIRATIME },
|
|
{ "nodiratime", 0, MS_NODIRATIME },
|
|
{ "bind", 0, MS_BIND },
|
|
{ "rbind", 0, MS_BIND|MS_REC },
|
|
{ "relatime", 0, MS_RELATIME },
|
|
{ "norelatime", 1, MS_RELATIME },
|
|
{ "strictatime", 0, MS_STRICTATIME },
|
|
{ "nostrictatime", 1, MS_STRICTATIME },
|
|
{ NULL, 0, 0 },
|
|
};
|
|
|
|
#if HAVE_LIBCAP
|
|
static struct caps_opt caps_opt[] = {
|
|
{ "chown", CAP_CHOWN },
|
|
{ "dac_override", CAP_DAC_OVERRIDE },
|
|
{ "dac_read_search", CAP_DAC_READ_SEARCH },
|
|
{ "fowner", CAP_FOWNER },
|
|
{ "fsetid", CAP_FSETID },
|
|
{ "kill", CAP_KILL },
|
|
{ "setgid", CAP_SETGID },
|
|
{ "setuid", CAP_SETUID },
|
|
{ "setpcap", CAP_SETPCAP },
|
|
{ "linux_immutable", CAP_LINUX_IMMUTABLE },
|
|
{ "net_bind_service", CAP_NET_BIND_SERVICE },
|
|
{ "net_broadcast", CAP_NET_BROADCAST },
|
|
{ "net_admin", CAP_NET_ADMIN },
|
|
{ "net_raw", CAP_NET_RAW },
|
|
{ "ipc_lock", CAP_IPC_LOCK },
|
|
{ "ipc_owner", CAP_IPC_OWNER },
|
|
{ "sys_module", CAP_SYS_MODULE },
|
|
{ "sys_rawio", CAP_SYS_RAWIO },
|
|
{ "sys_chroot", CAP_SYS_CHROOT },
|
|
{ "sys_ptrace", CAP_SYS_PTRACE },
|
|
{ "sys_pacct", CAP_SYS_PACCT },
|
|
{ "sys_admin", CAP_SYS_ADMIN },
|
|
{ "sys_boot", CAP_SYS_BOOT },
|
|
{ "sys_nice", CAP_SYS_NICE },
|
|
{ "sys_resource", CAP_SYS_RESOURCE },
|
|
{ "sys_time", CAP_SYS_TIME },
|
|
{ "sys_tty_config", CAP_SYS_TTY_CONFIG },
|
|
{ "mknod", CAP_MKNOD },
|
|
{ "lease", CAP_LEASE },
|
|
#ifdef CAP_AUDIT_READ
|
|
{ "audit_read", CAP_AUDIT_READ },
|
|
#endif
|
|
#ifdef CAP_AUDIT_WRITE
|
|
{ "audit_write", CAP_AUDIT_WRITE },
|
|
#endif
|
|
#ifdef CAP_AUDIT_CONTROL
|
|
{ "audit_control", CAP_AUDIT_CONTROL },
|
|
#endif
|
|
{ "setfcap", CAP_SETFCAP },
|
|
{ "mac_override", CAP_MAC_OVERRIDE },
|
|
{ "mac_admin", CAP_MAC_ADMIN },
|
|
#ifdef CAP_SYSLOG
|
|
{ "syslog", CAP_SYSLOG },
|
|
#endif
|
|
#ifdef CAP_WAKE_ALARM
|
|
{ "wake_alarm", CAP_WAKE_ALARM },
|
|
#endif
|
|
#ifdef CAP_BLOCK_SUSPEND
|
|
{ "block_suspend", CAP_BLOCK_SUSPEND },
|
|
#endif
|
|
};
|
|
#else
|
|
static struct caps_opt caps_opt[] = {};
|
|
#endif
|
|
|
|
static struct limit_opt limit_opt[] = {
|
|
#ifdef RLIMIT_AS
|
|
{ "as", RLIMIT_AS },
|
|
#endif
|
|
#ifdef RLIMIT_CORE
|
|
{ "core", RLIMIT_CORE },
|
|
#endif
|
|
#ifdef RLIMIT_CPU
|
|
{ "cpu", RLIMIT_CPU },
|
|
#endif
|
|
#ifdef RLIMIT_DATA
|
|
{ "data", RLIMIT_DATA },
|
|
#endif
|
|
#ifdef RLIMIT_FSIZE
|
|
{ "fsize", RLIMIT_FSIZE },
|
|
#endif
|
|
#ifdef RLIMIT_LOCKS
|
|
{ "locks", RLIMIT_LOCKS },
|
|
#endif
|
|
#ifdef RLIMIT_MEMLOCK
|
|
{ "memlock", RLIMIT_MEMLOCK },
|
|
#endif
|
|
#ifdef RLIMIT_MSGQUEUE
|
|
{ "msgqueue", RLIMIT_MSGQUEUE },
|
|
#endif
|
|
#ifdef RLIMIT_NICE
|
|
{ "nice", RLIMIT_NICE },
|
|
#endif
|
|
#ifdef RLIMIT_NOFILE
|
|
{ "nofile", RLIMIT_NOFILE },
|
|
#endif
|
|
#ifdef RLIMIT_NPROC
|
|
{ "nproc", RLIMIT_NPROC },
|
|
#endif
|
|
#ifdef RLIMIT_RSS
|
|
{ "rss", RLIMIT_RSS },
|
|
#endif
|
|
#ifdef RLIMIT_RTPRIO
|
|
{ "rtprio", RLIMIT_RTPRIO },
|
|
#endif
|
|
#ifdef RLIMIT_RTTIME
|
|
{ "rttime", RLIMIT_RTTIME },
|
|
#endif
|
|
#ifdef RLIMIT_SIGPENDING
|
|
{ "sigpending", RLIMIT_SIGPENDING },
|
|
#endif
|
|
#ifdef RLIMIT_STACK
|
|
{ "stack", RLIMIT_STACK },
|
|
#endif
|
|
};
|
|
|
|
static int run_buffer(char *buffer)
|
|
{
|
|
struct lxc_popen_FILE *f;
|
|
char *output;
|
|
int ret;
|
|
|
|
f = lxc_popen(buffer);
|
|
if (!f) {
|
|
SYSERROR("Failed to popen() %s.", buffer);
|
|
return -1;
|
|
}
|
|
|
|
output = malloc(LXC_LOG_BUFFER_SIZE);
|
|
if (!output) {
|
|
ERROR("Failed to allocate memory for %s.", buffer);
|
|
lxc_pclose(f);
|
|
return -1;
|
|
}
|
|
|
|
while (fgets(output, LXC_LOG_BUFFER_SIZE, f->f))
|
|
DEBUG("Script %s with output: %s.", buffer, output);
|
|
|
|
free(output);
|
|
|
|
ret = lxc_pclose(f);
|
|
if (ret == -1) {
|
|
SYSERROR("Script exited with error.");
|
|
return -1;
|
|
} else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) {
|
|
ERROR("Script exited with status %d.", WEXITSTATUS(ret));
|
|
return -1;
|
|
} else if (WIFSIGNALED(ret)) {
|
|
ERROR("Script terminated by signal %d.", WTERMSIG(ret));
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int run_script_argv(const char *name, const char *section,
|
|
const char *script, const char *hook,
|
|
const char *lxcpath, char **argsin)
|
|
{
|
|
int ret, i;
|
|
char *buffer;
|
|
size_t size = 0;
|
|
|
|
INFO("Executing script \"%s\" for container \"%s\", config section \"%s\".",
|
|
script, name, section);
|
|
|
|
for (i = 0; argsin && argsin[i]; i++)
|
|
size += strlen(argsin[i]) + 1;
|
|
|
|
size += strlen(hook) + 1;
|
|
|
|
size += strlen(script);
|
|
size += strlen(name);
|
|
size += strlen(section);
|
|
size += 3;
|
|
|
|
if (size > INT_MAX)
|
|
return -1;
|
|
|
|
buffer = alloca(size);
|
|
if (!buffer) {
|
|
ERROR("Failed to allocate memory.");
|
|
return -1;
|
|
}
|
|
|
|
ret =
|
|
snprintf(buffer, size, "%s %s %s %s", script, name, section, hook);
|
|
if (ret < 0 || (size_t)ret >= size) {
|
|
ERROR("Script name too long.");
|
|
return -1;
|
|
}
|
|
|
|
for (i = 0; argsin && argsin[i]; i++) {
|
|
int len = size - ret;
|
|
int rc;
|
|
rc = snprintf(buffer + ret, len, " %s", argsin[i]);
|
|
if (rc < 0 || rc >= len) {
|
|
ERROR("Script args too long.");
|
|
return -1;
|
|
}
|
|
ret += rc;
|
|
}
|
|
|
|
return run_buffer(buffer);
|
|
}
|
|
|
|
static int run_script(const char *name, const char *section, const char *script,
|
|
...)
|
|
{
|
|
int ret;
|
|
char *buffer, *p;
|
|
size_t size = 0;
|
|
va_list ap;
|
|
|
|
INFO("Executing script \"%s\" for container \"%s\", config section \"%s\".",
|
|
script, name, section);
|
|
|
|
va_start(ap, script);
|
|
while ((p = va_arg(ap, char *)))
|
|
size += strlen(p) + 1;
|
|
va_end(ap);
|
|
|
|
size += strlen(script);
|
|
size += strlen(name);
|
|
size += strlen(section);
|
|
size += 3;
|
|
|
|
if (size > INT_MAX)
|
|
return -1;
|
|
|
|
buffer = alloca(size);
|
|
if (!buffer) {
|
|
ERROR("Failed to allocate memory.");
|
|
return -1;
|
|
}
|
|
|
|
ret = snprintf(buffer, size, "%s %s %s", script, name, section);
|
|
if (ret < 0 || ret >= size) {
|
|
ERROR("Script name too long.");
|
|
return -1;
|
|
}
|
|
|
|
va_start(ap, script);
|
|
while ((p = va_arg(ap, char *))) {
|
|
int len = size - ret;
|
|
int rc;
|
|
rc = snprintf(buffer + ret, len, " %s", p);
|
|
if (rc < 0 || rc >= len) {
|
|
ERROR("Script args too long.");
|
|
return -1;
|
|
}
|
|
ret += rc;
|
|
}
|
|
va_end(ap);
|
|
|
|
return run_buffer(buffer);
|
|
}
|
|
|
|
static int mount_rootfs_dir(const char *rootfs, const char *target,
|
|
const char *options)
|
|
{
|
|
unsigned long mntflags;
|
|
char *mntdata;
|
|
int ret;
|
|
|
|
if (parse_mntopts(options, &mntflags, &mntdata) < 0) {
|
|
free(mntdata);
|
|
return -1;
|
|
}
|
|
|
|
ret = mount(rootfs, target, "none", MS_BIND | MS_REC | mntflags, mntdata);
|
|
free(mntdata);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int setup_lodev(const char *rootfs, int fd, struct loop_info64 *loinfo)
|
|
{
|
|
int rfd;
|
|
int ret = -1;
|
|
|
|
rfd = open(rootfs, O_RDWR);
|
|
if (rfd < 0) {
|
|
SYSERROR("failed to open '%s'", rootfs);
|
|
return -1;
|
|
}
|
|
|
|
memset(loinfo, 0, sizeof(*loinfo));
|
|
|
|
loinfo->lo_flags = LO_FLAGS_AUTOCLEAR;
|
|
|
|
if (ioctl(fd, LOOP_SET_FD, rfd)) {
|
|
SYSERROR("failed to LOOP_SET_FD");
|
|
goto out;
|
|
}
|
|
|
|
if (ioctl(fd, LOOP_SET_STATUS64, loinfo)) {
|
|
SYSERROR("failed to LOOP_SET_STATUS64");
|
|
goto out;
|
|
}
|
|
|
|
ret = 0;
|
|
out:
|
|
close(rfd);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int mount_rootfs_file(const char *rootfs, const char *target,
|
|
const char *options)
|
|
{
|
|
struct dirent *direntp;
|
|
struct loop_info64 loinfo;
|
|
int ret = -1, fd = -1, rc;
|
|
DIR *dir;
|
|
char path[MAXPATHLEN];
|
|
|
|
dir = opendir("/dev");
|
|
if (!dir) {
|
|
SYSERROR("failed to open '/dev'");
|
|
return -1;
|
|
}
|
|
|
|
while ((direntp = readdir(dir))) {
|
|
|
|
if (!direntp)
|
|
break;
|
|
|
|
if (!strcmp(direntp->d_name, "."))
|
|
continue;
|
|
|
|
if (!strcmp(direntp->d_name, ".."))
|
|
continue;
|
|
|
|
if (strncmp(direntp->d_name, "loop", 4))
|
|
continue;
|
|
|
|
rc = snprintf(path, MAXPATHLEN, "/dev/%s", direntp->d_name);
|
|
if (rc < 0 || rc >= MAXPATHLEN)
|
|
continue;
|
|
|
|
fd = open(path, O_RDWR);
|
|
if (fd < 0)
|
|
continue;
|
|
|
|
if (ioctl(fd, LOOP_GET_STATUS64, &loinfo) == 0) {
|
|
close(fd);
|
|
continue;
|
|
}
|
|
|
|
if (errno != ENXIO) {
|
|
WARN("unexpected error for ioctl on '%s': %m",
|
|
direntp->d_name);
|
|
close(fd);
|
|
continue;
|
|
}
|
|
|
|
DEBUG("found '%s' free lodev", path);
|
|
|
|
ret = setup_lodev(rootfs, fd, &loinfo);
|
|
if (!ret)
|
|
ret = mount_unknown_fs(path, target, options);
|
|
close(fd);
|
|
|
|
break;
|
|
}
|
|
|
|
if (closedir(dir))
|
|
WARN("failed to close directory");
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int mount_rootfs_block(const char *rootfs, const char *target,
|
|
const char *options)
|
|
{
|
|
return mount_unknown_fs(rootfs, target, options);
|
|
}
|
|
|
|
/*
|
|
* pin_rootfs
|
|
* if rootfs is a directory, then open ${rootfs}/lxc.hold for writing for
|
|
* the duration of the container run, to prevent the container from marking
|
|
* the underlying fs readonly on shutdown. unlink the file immediately so
|
|
* no name pollution is happens
|
|
* return -1 on error.
|
|
* return -2 if nothing needed to be pinned.
|
|
* return an open fd (>=0) if we pinned it.
|
|
*/
|
|
int pin_rootfs(const char *rootfs)
|
|
{
|
|
char absrootfs[MAXPATHLEN];
|
|
char absrootfspin[MAXPATHLEN];
|
|
struct stat s;
|
|
int ret, fd;
|
|
|
|
if (rootfs == NULL || strlen(rootfs) == 0)
|
|
return -2;
|
|
|
|
if (!realpath(rootfs, absrootfs))
|
|
return -2;
|
|
|
|
if (access(absrootfs, F_OK))
|
|
return -1;
|
|
|
|
if (stat(absrootfs, &s))
|
|
return -1;
|
|
|
|
if (!S_ISDIR(s.st_mode))
|
|
return -2;
|
|
|
|
ret = snprintf(absrootfspin, MAXPATHLEN, "%s/lxc.hold", absrootfs);
|
|
if (ret >= MAXPATHLEN)
|
|
return -1;
|
|
|
|
fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR|S_IRUSR);
|
|
if (fd < 0)
|
|
return fd;
|
|
(void)unlink(absrootfspin);
|
|
return fd;
|
|
}
|
|
|
|
/*
|
|
* If we are asking to remount something, make sure that any
|
|
* NOEXEC etc are honored.
|
|
*/
|
|
static unsigned long add_required_remount_flags(const char *s, const char *d,
|
|
unsigned long flags)
|
|
{
|
|
#ifdef HAVE_STATVFS
|
|
struct statvfs sb;
|
|
unsigned long required_flags = 0;
|
|
|
|
if (!(flags & MS_REMOUNT))
|
|
return flags;
|
|
|
|
if (!s)
|
|
s = d;
|
|
|
|
if (!s)
|
|
return flags;
|
|
if (statvfs(s, &sb) < 0)
|
|
return flags;
|
|
|
|
if (sb.f_flag & MS_NOSUID)
|
|
required_flags |= MS_NOSUID;
|
|
if (sb.f_flag & MS_NODEV)
|
|
required_flags |= MS_NODEV;
|
|
if (sb.f_flag & MS_RDONLY)
|
|
required_flags |= MS_RDONLY;
|
|
if (sb.f_flag & MS_NOEXEC)
|
|
required_flags |= MS_NOEXEC;
|
|
|
|
return flags | required_flags;
|
|
#else
|
|
return flags;
|
|
#endif
|
|
}
|
|
|
|
static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_handler *handler)
|
|
{
|
|
int r;
|
|
int i;
|
|
static struct {
|
|
int match_mask;
|
|
int match_flag;
|
|
const char *source;
|
|
const char *destination;
|
|
const char *fstype;
|
|
unsigned long flags;
|
|
const char *options;
|
|
} default_mounts[] = {
|
|
/* Read-only bind-mounting... In older kernels, doing that required
|
|
* to do one MS_BIND mount and then MS_REMOUNT|MS_RDONLY the same
|
|
* one. According to mount(2) manpage, MS_BIND honors MS_RDONLY from
|
|
* kernel 2.6.26 onwards. However, this apparently does not work on
|
|
* kernel 3.8. Unfortunately, on that very same kernel, doing the
|
|
* same trick as above doesn't seem to work either, there one needs
|
|
* to ALSO specify MS_BIND for the remount, otherwise the entire
|
|
* fs is remounted read-only or the mount fails because it's busy...
|
|
* MS_REMOUNT|MS_BIND|MS_RDONLY seems to work for kernels as low as
|
|
* 2.6.32...
|
|
*/
|
|
{ LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
|
|
/* proc/tty is used as a temporary placeholder for proc/sys/net which we'll move back in a few steps */
|
|
{ LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys/net", "%r/proc/tty", NULL, MS_BIND, NULL },
|
|
{ LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys", "%r/proc/sys", NULL, MS_BIND, NULL },
|
|
{ LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
|
|
{ LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/tty", "%r/proc/sys/net", NULL, MS_MOVE, NULL },
|
|
{ LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger", "%r/proc/sysrq-trigger", NULL, MS_BIND, NULL },
|
|
{ LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sysrq-trigger", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
|
|
{ LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
|
|
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL },
|
|
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL },
|
|
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys", "sysfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
|
|
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys", "%r/sys", NULL, MS_BIND, NULL },
|
|
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
|
|
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys/devices/virtual/net", "sysfs", 0, NULL },
|
|
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys/devices/virtual/net/devices/virtual/net", "%r/sys/devices/virtual/net", NULL, MS_BIND, NULL },
|
|
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys/devices/virtual/net", NULL, MS_REMOUNT|MS_BIND|MS_NOSUID|MS_NODEV|MS_NOEXEC, NULL },
|
|
{ 0, 0, NULL, NULL, NULL, 0, NULL }
|
|
};
|
|
|
|
for (i = 0; default_mounts[i].match_mask; i++) {
|
|
if ((flags & default_mounts[i].match_mask) == default_mounts[i].match_flag) {
|
|
char *source = NULL;
|
|
char *destination = NULL;
|
|
int saved_errno;
|
|
unsigned long mflags;
|
|
|
|
if (default_mounts[i].source) {
|
|
/* will act like strdup if %r is not present */
|
|
source = lxc_string_replace("%r", conf->rootfs.path ? conf->rootfs.mount : "", default_mounts[i].source);
|
|
if (!source) {
|
|
SYSERROR("memory allocation error");
|
|
return -1;
|
|
}
|
|
}
|
|
if (!default_mounts[i].destination) {
|
|
ERROR("BUG: auto mounts destination %d was NULL", i);
|
|
free(source);
|
|
return -1;
|
|
}
|
|
/* will act like strdup if %r is not present */
|
|
destination = lxc_string_replace("%r", conf->rootfs.path ? conf->rootfs.mount : "", default_mounts[i].destination);
|
|
if (!destination) {
|
|
saved_errno = errno;
|
|
SYSERROR("memory allocation error");
|
|
free(source);
|
|
errno = saved_errno;
|
|
return -1;
|
|
}
|
|
mflags = add_required_remount_flags(source, destination,
|
|
default_mounts[i].flags);
|
|
r = safe_mount(source, destination, default_mounts[i].fstype, mflags, default_mounts[i].options, conf->rootfs.path ? conf->rootfs.mount : NULL);
|
|
saved_errno = errno;
|
|
if (r < 0 && errno == ENOENT) {
|
|
INFO("Mount source or target for %s on %s doesn't exist. Skipping.", source, destination);
|
|
r = 0;
|
|
}
|
|
else if (r < 0)
|
|
SYSERROR("error mounting %s on %s flags %lu", source, destination, mflags);
|
|
|
|
free(source);
|
|
free(destination);
|
|
if (r < 0) {
|
|
errno = saved_errno;
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (flags & LXC_AUTO_CGROUP_MASK) {
|
|
int cg_flags;
|
|
|
|
cg_flags = flags & LXC_AUTO_CGROUP_MASK;
|
|
/* If the type of cgroup mount was not specified, it depends on the
|
|
* container's capabilities as to what makes sense: if we have
|
|
* CAP_SYS_ADMIN, the read-only part can be remounted read-write
|
|
* anyway, so we may as well default to read-write; then the admin
|
|
* will not be given a false sense of security. (And if they really
|
|
* want mixed r/o r/w, then they can explicitly specify :mixed.)
|
|
* OTOH, if the container lacks CAP_SYS_ADMIN, do only default to
|
|
* :mixed, because then the container can't remount it read-write. */
|
|
if (cg_flags == LXC_AUTO_CGROUP_NOSPEC || cg_flags == LXC_AUTO_CGROUP_FULL_NOSPEC) {
|
|
int has_sys_admin = 0;
|
|
if (!lxc_list_empty(&conf->keepcaps)) {
|
|
has_sys_admin = in_caplist(CAP_SYS_ADMIN, &conf->keepcaps);
|
|
} else {
|
|
has_sys_admin = !in_caplist(CAP_SYS_ADMIN, &conf->caps);
|
|
}
|
|
if (cg_flags == LXC_AUTO_CGROUP_NOSPEC) {
|
|
cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_RW : LXC_AUTO_CGROUP_MIXED;
|
|
} else {
|
|
cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_FULL_RW : LXC_AUTO_CGROUP_FULL_MIXED;
|
|
}
|
|
}
|
|
|
|
if (!cgroup_mount(conf->rootfs.path ? conf->rootfs.mount : "", handler, cg_flags)) {
|
|
SYSERROR("error mounting /sys/fs/cgroup");
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int mount_rootfs(const char *rootfs, const char *target, const char *options)
|
|
{
|
|
char absrootfs[MAXPATHLEN];
|
|
struct stat s;
|
|
int i;
|
|
|
|
typedef int (*rootfs_cb)(const char *, const char *, const char *);
|
|
|
|
struct rootfs_type {
|
|
int type;
|
|
rootfs_cb cb;
|
|
} rtfs_type[] = {
|
|
{ S_IFDIR, mount_rootfs_dir },
|
|
{ S_IFBLK, mount_rootfs_block },
|
|
{ S_IFREG, mount_rootfs_file },
|
|
};
|
|
|
|
if (!realpath(rootfs, absrootfs)) {
|
|
SYSERROR("failed to get real path for '%s'", rootfs);
|
|
return -1;
|
|
}
|
|
|
|
if (access(absrootfs, F_OK)) {
|
|
SYSERROR("'%s' is not accessible", absrootfs);
|
|
return -1;
|
|
}
|
|
|
|
if (stat(absrootfs, &s)) {
|
|
SYSERROR("failed to stat '%s'", absrootfs);
|
|
return -1;
|
|
}
|
|
|
|
for (i = 0; i < sizeof(rtfs_type)/sizeof(rtfs_type[0]); i++) {
|
|
|
|
if (!__S_ISTYPE(s.st_mode, rtfs_type[i].type))
|
|
continue;
|
|
|
|
return rtfs_type[i].cb(absrootfs, target, options);
|
|
}
|
|
|
|
ERROR("unsupported rootfs type for '%s'", absrootfs);
|
|
return -1;
|
|
}
|
|
|
|
static int setup_utsname(struct utsname *utsname)
|
|
{
|
|
if (!utsname)
|
|
return 0;
|
|
|
|
if (sethostname(utsname->nodename, strlen(utsname->nodename))) {
|
|
SYSERROR("failed to set the hostname to '%s'", utsname->nodename);
|
|
return -1;
|
|
}
|
|
|
|
INFO("'%s' hostname has been setup", utsname->nodename);
|
|
|
|
return 0;
|
|
}
|
|
|
|
struct dev_symlinks {
|
|
const char *oldpath;
|
|
const char *name;
|
|
};
|
|
|
|
static const struct dev_symlinks dev_symlinks[] = {
|
|
{"/proc/self/fd", "fd"},
|
|
{"/proc/self/fd/0", "stdin"},
|
|
{"/proc/self/fd/1", "stdout"},
|
|
{"/proc/self/fd/2", "stderr"},
|
|
};
|
|
|
|
static int setup_dev_symlinks(const struct lxc_rootfs *rootfs)
|
|
{
|
|
char path[MAXPATHLEN];
|
|
int ret,i;
|
|
struct stat s;
|
|
|
|
|
|
for (i = 0; i < sizeof(dev_symlinks) / sizeof(dev_symlinks[0]); i++) {
|
|
const struct dev_symlinks *d = &dev_symlinks[i];
|
|
ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->path ? rootfs->mount : "", d->name);
|
|
if (ret < 0 || ret >= MAXPATHLEN)
|
|
return -1;
|
|
|
|
/*
|
|
* Stat the path first. If we don't get an error
|
|
* accept it as is and don't try to create it
|
|
*/
|
|
if (!stat(path, &s)) {
|
|
continue;
|
|
}
|
|
|
|
ret = symlink(d->oldpath, path);
|
|
|
|
if (ret && errno != EEXIST) {
|
|
if ( errno == EROFS ) {
|
|
WARN("Warning: Read Only file system while creating %s", path);
|
|
} else {
|
|
SYSERROR("Error creating %s", path);
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Build a space-separate list of ptys to pass to systemd.
|
|
*/
|
|
static bool append_ptyname(char **pp, char *name)
|
|
{
|
|
char *p;
|
|
|
|
if (!*pp) {
|
|
*pp = malloc(strlen(name) + strlen("container_ttys=") + 1);
|
|
if (!*pp)
|
|
return false;
|
|
sprintf(*pp, "container_ttys=%s", name);
|
|
return true;
|
|
}
|
|
p = realloc(*pp, strlen(*pp) + strlen(name) + 2);
|
|
if (!p)
|
|
return false;
|
|
*pp = p;
|
|
strcat(p, " ");
|
|
strcat(p, name);
|
|
return true;
|
|
}
|
|
|
|
static int setup_tty(struct lxc_conf *conf)
|
|
{
|
|
const struct lxc_tty_info *tty_info = &conf->tty_info;
|
|
char *ttydir = conf->ttydir;
|
|
char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
|
|
int i, ret;
|
|
|
|
if (!conf->rootfs.path)
|
|
return 0;
|
|
|
|
for (i = 0; i < tty_info->nbtty; i++) {
|
|
|
|
struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
|
|
|
|
ret = snprintf(path, sizeof(path), "/dev/tty%d", i + 1);
|
|
if (ret >= sizeof(path)) {
|
|
ERROR("pathname too long for ttys");
|
|
return -1;
|
|
}
|
|
if (ttydir) {
|
|
/* create dev/lxc/tty%d" */
|
|
ret = snprintf(lxcpath, sizeof(lxcpath), "/dev/%s/tty%d", ttydir, i + 1);
|
|
if (ret >= sizeof(lxcpath)) {
|
|
ERROR("pathname too long for ttys");
|
|
return -1;
|
|
}
|
|
ret = creat(lxcpath, 0660);
|
|
if (ret==-1 && errno != EEXIST) {
|
|
SYSERROR("error creating %s", lxcpath);
|
|
return -1;
|
|
}
|
|
if (ret >= 0)
|
|
close(ret);
|
|
ret = unlink(path);
|
|
if (ret && errno != ENOENT) {
|
|
SYSERROR("error unlinking %s", path);
|
|
return -1;
|
|
}
|
|
|
|
if (mount(pty_info->name, lxcpath, "none", MS_BIND, 0)) {
|
|
WARN("failed to mount '%s'->'%s'",
|
|
pty_info->name, path);
|
|
continue;
|
|
}
|
|
|
|
ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d", ttydir, i+1);
|
|
if (ret >= sizeof(lxcpath)) {
|
|
ERROR("tty pathname too long");
|
|
return -1;
|
|
}
|
|
ret = symlink(lxcpath, path);
|
|
if (ret) {
|
|
SYSERROR("failed to create symlink for tty %d", i+1);
|
|
return -1;
|
|
}
|
|
} else {
|
|
/* If we populated /dev, then we need to create /dev/ttyN */
|
|
if (access(path, F_OK)) {
|
|
ret = creat(path, 0660);
|
|
if (ret==-1) {
|
|
SYSERROR("error creating %s", path);
|
|
/* this isn't fatal, continue */
|
|
} else {
|
|
close(ret);
|
|
}
|
|
}
|
|
if (mount(pty_info->name, path, "none", MS_BIND, 0)) {
|
|
SYSERROR("failed to mount '%s'->'%s'", pty_info->name, path);
|
|
continue;
|
|
}
|
|
}
|
|
if (!append_ptyname(&conf->pty_names, pty_info->name)) {
|
|
ERROR("Error setting up container_ttys string");
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
INFO("%d tty(s) has been setup", tty_info->nbtty);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
static int setup_rootfs_pivot_root(const char *rootfs)
|
|
{
|
|
int oldroot = -1, newroot = -1;
|
|
|
|
oldroot = open("/", O_DIRECTORY | O_RDONLY);
|
|
if (oldroot < 0) {
|
|
SYSERROR("Error opening old-/ for fchdir");
|
|
return -1;
|
|
}
|
|
newroot = open(rootfs, O_DIRECTORY | O_RDONLY);
|
|
if (newroot < 0) {
|
|
SYSERROR("Error opening new-/ for fchdir");
|
|
goto fail;
|
|
}
|
|
|
|
/* change into new root fs */
|
|
if (fchdir(newroot)) {
|
|
SYSERROR("can't chdir to new rootfs '%s'", rootfs);
|
|
goto fail;
|
|
}
|
|
|
|
/* pivot_root into our new root fs */
|
|
if (pivot_root(".", ".")) {
|
|
SYSERROR("pivot_root syscall failed");
|
|
goto fail;
|
|
}
|
|
|
|
/*
|
|
* at this point the old-root is mounted on top of our new-root
|
|
* To unmounted it we must not be chdir'd into it, so escape back
|
|
* to old-root
|
|
*/
|
|
if (fchdir(oldroot) < 0) {
|
|
SYSERROR("Error entering oldroot");
|
|
goto fail;
|
|
}
|
|
if (umount2(".", MNT_DETACH) < 0) {
|
|
SYSERROR("Error detaching old root");
|
|
goto fail;
|
|
}
|
|
|
|
if (fchdir(newroot) < 0) {
|
|
SYSERROR("Error re-entering newroot");
|
|
goto fail;
|
|
}
|
|
|
|
close(oldroot);
|
|
close(newroot);
|
|
|
|
DEBUG("pivot_root syscall to '%s' successful", rootfs);
|
|
|
|
return 0;
|
|
|
|
fail:
|
|
if (oldroot != -1)
|
|
close(oldroot);
|
|
if (newroot != -1)
|
|
close(newroot);
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* Just create a path for /dev under $lxcpath/$name and in rootfs
|
|
* If we hit an error, log it but don't fail yet.
|
|
*/
|
|
static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs, const char *lxcpath)
|
|
{
|
|
int ret;
|
|
size_t clen;
|
|
char *path;
|
|
|
|
INFO("Mounting container /dev");
|
|
|
|
/* $(rootfs->mount) + "/dev/pts" + '\0' */
|
|
clen = (rootfs->path ? strlen(rootfs->mount) : 0) + 9;
|
|
path = alloca(clen);
|
|
|
|
ret = snprintf(path, clen, "%s/dev", rootfs->path ? rootfs->mount : "");
|
|
if (ret < 0 || ret >= clen)
|
|
return -1;
|
|
|
|
if (!dir_exists(path)) {
|
|
WARN("No /dev in container.");
|
|
WARN("Proceeding without autodev setup");
|
|
return 0;
|
|
}
|
|
|
|
ret = safe_mount("none", path, "tmpfs", 0, "size=500000,mode=755",
|
|
rootfs->path ? rootfs->mount : NULL);
|
|
if (ret != 0) {
|
|
SYSERROR("Failed mounting tmpfs onto %s\n", path);
|
|
return -1;
|
|
}
|
|
|
|
INFO("Mounted tmpfs onto %s", path);
|
|
|
|
ret = snprintf(path, clen, "%s/dev/pts", rootfs->path ? rootfs->mount : "");
|
|
if (ret < 0 || ret >= clen)
|
|
return -1;
|
|
|
|
/*
|
|
* If we are running on a devtmpfs mapping, dev/pts may already exist.
|
|
* If not, then create it and exit if that fails...
|
|
*/
|
|
if (!dir_exists(path)) {
|
|
ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
|
|
if (ret) {
|
|
SYSERROR("Failed to create /dev/pts in container");
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
INFO("Mounted container /dev");
|
|
return 0;
|
|
}
|
|
|
|
struct lxc_devs {
|
|
const char *name;
|
|
mode_t mode;
|
|
int maj;
|
|
int min;
|
|
};
|
|
|
|
static const struct lxc_devs lxc_devs[] = {
|
|
{ "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 },
|
|
{ "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 },
|
|
{ "full", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 7 },
|
|
{ "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 },
|
|
{ "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 },
|
|
{ "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 },
|
|
{ "console", S_IFCHR | S_IRUSR | S_IWUSR, 5, 1 },
|
|
};
|
|
|
|
static int fill_autodev(const struct lxc_rootfs *rootfs, bool mount_console)
|
|
{
|
|
int ret;
|
|
char path[MAXPATHLEN];
|
|
int i;
|
|
mode_t cmask;
|
|
|
|
INFO("Creating initial consoles under container /dev");
|
|
|
|
ret = snprintf(path, MAXPATHLEN, "%s/dev", rootfs->path ? rootfs->mount : "");
|
|
if (ret < 0 || ret >= MAXPATHLEN) {
|
|
ERROR("Error calculating container /dev location");
|
|
return -1;
|
|
}
|
|
|
|
if (!dir_exists(path)) // ignore, just don't try to fill in
|
|
return 0;
|
|
|
|
INFO("Populating container /dev");
|
|
cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
|
|
for (i = 0; i < sizeof(lxc_devs) / sizeof(lxc_devs[0]); i++) {
|
|
const struct lxc_devs *d = &lxc_devs[i];
|
|
|
|
if (!strcmp(d->name, "console") && !mount_console)
|
|
continue;
|
|
|
|
ret = snprintf(path, MAXPATHLEN, "%s/dev/%s", rootfs->path ? rootfs->mount : "", d->name);
|
|
if (ret < 0 || ret >= MAXPATHLEN)
|
|
return -1;
|
|
ret = mknod(path, d->mode, makedev(d->maj, d->min));
|
|
if (ret && errno != EEXIST) {
|
|
char hostpath[MAXPATHLEN];
|
|
FILE *pathfile;
|
|
|
|
// Unprivileged containers cannot create devices, so
|
|
// bind mount the device from the host
|
|
ret = snprintf(hostpath, MAXPATHLEN, "/dev/%s", d->name);
|
|
if (ret < 0 || ret >= MAXPATHLEN)
|
|
return -1;
|
|
pathfile = fopen(path, "wb");
|
|
if (!pathfile) {
|
|
SYSERROR("Failed to create device mount target '%s'", path);
|
|
return -1;
|
|
}
|
|
fclose(pathfile);
|
|
if (safe_mount(hostpath, path, 0, MS_BIND, NULL,
|
|
rootfs->path ? rootfs->mount : NULL) != 0) {
|
|
SYSERROR("Failed bind mounting device %s from host into container",
|
|
d->name);
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
umask(cmask);
|
|
|
|
INFO("Populated container /dev");
|
|
return 0;
|
|
}
|
|
|
|
static int setup_rootfs(struct lxc_conf *conf)
|
|
{
|
|
const struct lxc_rootfs *rootfs = &conf->rootfs;
|
|
|
|
if (!rootfs->path) {
|
|
if (mount("", "/", NULL, MS_SLAVE|MS_REC, 0)) {
|
|
SYSERROR("Failed to make / rslave");
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
if (access(rootfs->mount, F_OK)) {
|
|
SYSERROR("failed to access to '%s', check it is present",
|
|
rootfs->mount);
|
|
return -1;
|
|
}
|
|
|
|
// First try mounting rootfs using a bdev
|
|
struct bdev *bdev = bdev_init(conf, rootfs->path, rootfs->mount, rootfs->options);
|
|
if (bdev && bdev->ops->mount(bdev) == 0) {
|
|
bdev_put(bdev);
|
|
DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
|
|
return 0;
|
|
}
|
|
if (bdev)
|
|
bdev_put(bdev);
|
|
if (mount_rootfs(rootfs->path, rootfs->mount, rootfs->options)) {
|
|
ERROR("failed to mount rootfs");
|
|
return -1;
|
|
}
|
|
|
|
DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int prepare_ramfs_root(char *root)
|
|
{
|
|
char buf[LXC_LINELEN], *p;
|
|
char nroot[PATH_MAX];
|
|
FILE *f;
|
|
int i;
|
|
char *p2;
|
|
|
|
if (realpath(root, nroot) == NULL)
|
|
return -1;
|
|
|
|
if (chdir("/") == -1)
|
|
return -1;
|
|
|
|
/*
|
|
* We could use here MS_MOVE, but in userns this mount is
|
|
* locked and can't be moved.
|
|
*/
|
|
if (mount(root, "/", NULL, MS_REC | MS_BIND, NULL)) {
|
|
SYSERROR("Failed to move %s into /", root);
|
|
return -1;
|
|
}
|
|
|
|
if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
|
|
SYSERROR("Failed to make . rprivate");
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* The following code cleans up inhereted mounts which are not
|
|
* required for CT.
|
|
*
|
|
* The mountinfo file shows not all mounts, if a few points have been
|
|
* unmounted between read operations from the mountinfo. So we need to
|
|
* read mountinfo a few times.
|
|
*
|
|
* This loop can be skipped if a container uses unserns, because all
|
|
* inherited mounts are locked and we should live with all this trash.
|
|
*/
|
|
while (1) {
|
|
int progress = 0;
|
|
|
|
f = fopen("./proc/self/mountinfo", "r");
|
|
if (!f) {
|
|
SYSERROR("Unable to open /proc/self/mountinfo");
|
|
return -1;
|
|
}
|
|
while (fgets(buf, LXC_LINELEN, f)) {
|
|
for (p = buf, i=0; p && i < 4; i++)
|
|
p = strchr(p+1, ' ');
|
|
if (!p)
|
|
continue;
|
|
p2 = strchr(p+1, ' ');
|
|
if (!p2)
|
|
continue;
|
|
|
|
*p2 = '\0';
|
|
*p = '.';
|
|
|
|
if (strcmp(p + 1, "/") == 0)
|
|
continue;
|
|
if (strcmp(p + 1, "/proc") == 0)
|
|
continue;
|
|
|
|
if (umount2(p, MNT_DETACH) == 0)
|
|
progress++;
|
|
}
|
|
fclose(f);
|
|
if (!progress)
|
|
break;
|
|
}
|
|
|
|
/* This also can be skipped if a container uses unserns */
|
|
umount2("./proc", MNT_DETACH);
|
|
|
|
/* It is weird, but chdir("..") moves us in a new root */
|
|
if (chdir("..") == -1) {
|
|
SYSERROR("Unable to change working directory");
|
|
return -1;
|
|
}
|
|
|
|
if (chroot(".") == -1) {
|
|
SYSERROR("Unable to chroot");
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int setup_pivot_root(const struct lxc_rootfs *rootfs)
|
|
{
|
|
if (!rootfs->path)
|
|
return 0;
|
|
|
|
if (detect_ramfs_rootfs()) {
|
|
if (prepare_ramfs_root(rootfs->mount))
|
|
return -1;
|
|
} else if (setup_rootfs_pivot_root(rootfs->mount)) {
|
|
ERROR("failed to setup pivot root");
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int setup_pts(int pts)
|
|
{
|
|
char target[PATH_MAX];
|
|
|
|
if (!pts)
|
|
return 0;
|
|
|
|
if (!access("/dev/pts/ptmx", F_OK) && umount("/dev/pts")) {
|
|
SYSERROR("failed to umount 'dev/pts'");
|
|
return -1;
|
|
}
|
|
|
|
if (mkdir("/dev/pts", 0755)) {
|
|
if ( errno != EEXIST ) {
|
|
SYSERROR("failed to create '/dev/pts'");
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL,
|
|
"newinstance,ptmxmode=0666,mode=0620,gid=5")) {
|
|
SYSERROR("failed to mount a new instance of '/dev/pts'");
|
|
return -1;
|
|
}
|
|
|
|
if (access("/dev/ptmx", F_OK)) {
|
|
if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
|
|
goto out;
|
|
SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
|
|
return -1;
|
|
}
|
|
|
|
if (realpath("/dev/ptmx", target) && !strcmp(target, "/dev/pts/ptmx"))
|
|
goto out;
|
|
|
|
/* fallback here, /dev/pts/ptmx exists just mount bind */
|
|
if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND, 0)) {
|
|
SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
|
|
return -1;
|
|
}
|
|
|
|
INFO("created new pts instance");
|
|
|
|
out:
|
|
return 0;
|
|
}
|
|
|
|
static int setup_personality(int persona)
|
|
{
|
|
#if HAVE_SYS_PERSONALITY_H
|
|
if (persona == -1)
|
|
return 0;
|
|
|
|
if (personality(persona) < 0) {
|
|
SYSERROR("failed to set personality to '0x%x'", persona);
|
|
return -1;
|
|
}
|
|
|
|
INFO("set personality to '0x%x'", persona);
|
|
#endif
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int setup_dev_console(const struct lxc_rootfs *rootfs,
|
|
const struct lxc_console *console)
|
|
{
|
|
char path[MAXPATHLEN];
|
|
int ret, fd;
|
|
|
|
ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
|
|
if (ret >= sizeof(path)) {
|
|
ERROR("console path too long");
|
|
return -1;
|
|
}
|
|
|
|
fd = open(path, O_CREAT | O_EXCL, S_IXUSR | S_IXGRP | S_IXOTH);
|
|
if (fd < 0) {
|
|
if (errno != EEXIST) {
|
|
SYSERROR("failed to create console");
|
|
return -1;
|
|
}
|
|
} else {
|
|
close(fd);
|
|
}
|
|
|
|
if (console->master < 0) {
|
|
INFO("no console");
|
|
return 0;
|
|
}
|
|
|
|
if (chmod(console->name, S_IXUSR | S_IXGRP | S_IXOTH)) {
|
|
SYSERROR("failed to set mode '0%o' to '%s'",
|
|
S_IXUSR | S_IXGRP | S_IXOTH, console->name);
|
|
return -1;
|
|
}
|
|
|
|
if (safe_mount(console->name, path, "none", MS_BIND, 0, rootfs->mount)) {
|
|
ERROR("failed to mount '%s' on '%s'", console->name, path);
|
|
return -1;
|
|
}
|
|
|
|
INFO("console has been setup");
|
|
return 0;
|
|
}
|
|
|
|
static int setup_ttydir_console(const struct lxc_rootfs *rootfs,
|
|
const struct lxc_console *console,
|
|
char *ttydir)
|
|
{
|
|
char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
|
|
int ret;
|
|
|
|
/* create rootfs/dev/<ttydir> directory */
|
|
ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount,
|
|
ttydir);
|
|
if (ret >= sizeof(path))
|
|
return -1;
|
|
ret = mkdir(path, 0755);
|
|
if (ret && errno != EEXIST) {
|
|
SYSERROR("failed with errno %d to create %s", errno, path);
|
|
return -1;
|
|
}
|
|
INFO("created %s", path);
|
|
|
|
ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console",
|
|
rootfs->mount, ttydir);
|
|
if (ret >= sizeof(lxcpath)) {
|
|
ERROR("console path too long");
|
|
return -1;
|
|
}
|
|
|
|
snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
|
|
ret = unlink(path);
|
|
if (ret && errno != ENOENT) {
|
|
SYSERROR("error unlinking %s", path);
|
|
return -1;
|
|
}
|
|
|
|
ret = creat(lxcpath, 0660);
|
|
if (ret==-1 && errno != EEXIST) {
|
|
SYSERROR("error %d creating %s", errno, lxcpath);
|
|
return -1;
|
|
}
|
|
if (ret >= 0)
|
|
close(ret);
|
|
|
|
if (console->master < 0) {
|
|
INFO("no console");
|
|
return 0;
|
|
}
|
|
|
|
if (safe_mount(console->name, lxcpath, "none", MS_BIND, 0, rootfs->mount)) {
|
|
ERROR("failed to mount '%s' on '%s'", console->name, lxcpath);
|
|
return -1;
|
|
}
|
|
|
|
/* create symlink from rootfs/dev/console to 'lxc/console' */
|
|
ret = snprintf(lxcpath, sizeof(lxcpath), "%s/console", ttydir);
|
|
if (ret >= sizeof(lxcpath)) {
|
|
ERROR("lxc/console path too long");
|
|
return -1;
|
|
}
|
|
ret = symlink(lxcpath, path);
|
|
if (ret) {
|
|
SYSERROR("failed to create symlink for console");
|
|
return -1;
|
|
}
|
|
|
|
INFO("console has been setup on %s", lxcpath);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int setup_console(const struct lxc_rootfs *rootfs,
|
|
const struct lxc_console *console,
|
|
char *ttydir)
|
|
{
|
|
/* We don't have a rootfs, /dev/console will be shared */
|
|
if (!rootfs->path)
|
|
return 0;
|
|
if (!ttydir)
|
|
return setup_dev_console(rootfs, console);
|
|
|
|
return setup_ttydir_console(rootfs, console, ttydir);
|
|
}
|
|
|
|
static int setup_kmsg(const struct lxc_rootfs *rootfs,
|
|
const struct lxc_console *console)
|
|
{
|
|
char kpath[MAXPATHLEN];
|
|
int ret;
|
|
|
|
if (!rootfs->path)
|
|
return 0;
|
|
ret = snprintf(kpath, sizeof(kpath), "%s/dev/kmsg", rootfs->mount);
|
|
if (ret < 0 || ret >= sizeof(kpath))
|
|
return -1;
|
|
|
|
ret = unlink(kpath);
|
|
if (ret && errno != ENOENT) {
|
|
SYSERROR("error unlinking %s", kpath);
|
|
return -1;
|
|
}
|
|
|
|
ret = symlink("console", kpath);
|
|
if (ret) {
|
|
SYSERROR("failed to create symlink for kmsg");
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void parse_mntopt(char *opt, unsigned long *flags, char **data)
|
|
{
|
|
struct mount_opt *mo;
|
|
|
|
/* If opt is found in mount_opt, set or clear flags.
|
|
* Otherwise append it to data. */
|
|
|
|
for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
|
|
if (!strncmp(opt, mo->name, strlen(mo->name))) {
|
|
if (mo->clear)
|
|
*flags &= ~mo->flag;
|
|
else
|
|
*flags |= mo->flag;
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (strlen(*data))
|
|
strcat(*data, ",");
|
|
strcat(*data, opt);
|
|
}
|
|
|
|
int parse_mntopts(const char *mntopts, unsigned long *mntflags,
|
|
char **mntdata)
|
|
{
|
|
char *s, *data;
|
|
char *p, *saveptr = NULL;
|
|
|
|
*mntdata = NULL;
|
|
*mntflags = 0L;
|
|
|
|
if (!mntopts)
|
|
return 0;
|
|
|
|
s = strdup(mntopts);
|
|
if (!s) {
|
|
SYSERROR("failed to allocate memory");
|
|
return -1;
|
|
}
|
|
|
|
data = malloc(strlen(s) + 1);
|
|
if (!data) {
|
|
SYSERROR("failed to allocate memory");
|
|
free(s);
|
|
return -1;
|
|
}
|
|
*data = 0;
|
|
|
|
for (p = strtok_r(s, ",", &saveptr); p != NULL;
|
|
p = strtok_r(NULL, ",", &saveptr))
|
|
parse_mntopt(p, mntflags, &data);
|
|
|
|
if (*data)
|
|
*mntdata = data;
|
|
else
|
|
free(data);
|
|
free(s);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void null_endofword(char *word)
|
|
{
|
|
while (*word && *word != ' ' && *word != '\t')
|
|
word++;
|
|
*word = '\0';
|
|
}
|
|
|
|
/*
|
|
* skip @nfields spaces in @src
|
|
*/
|
|
static char *get_field(char *src, int nfields)
|
|
{
|
|
char *p = src;
|
|
int i;
|
|
|
|
for (i = 0; i < nfields; i++) {
|
|
while (*p && *p != ' ' && *p != '\t')
|
|
p++;
|
|
if (!*p)
|
|
break;
|
|
p++;
|
|
}
|
|
return p;
|
|
}
|
|
|
|
static int mount_entry(const char *fsname, const char *target,
|
|
const char *fstype, unsigned long mountflags,
|
|
const char *data, int optional, int dev, const char *rootfs)
|
|
{
|
|
#ifdef HAVE_STATVFS
|
|
struct statvfs sb;
|
|
#endif
|
|
|
|
if (safe_mount(fsname, target, fstype, mountflags & ~MS_REMOUNT, data, rootfs)) {
|
|
if (optional) {
|
|
INFO("failed to mount '%s' on '%s' (optional): %s", fsname,
|
|
target, strerror(errno));
|
|
return 0;
|
|
}
|
|
else {
|
|
SYSERROR("failed to mount '%s' on '%s'", fsname, target);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
if ((mountflags & MS_REMOUNT) || (mountflags & MS_BIND)) {
|
|
DEBUG("remounting %s on %s to respect bind or remount options",
|
|
fsname ? fsname : "(none)", target ? target : "(none)");
|
|
unsigned long rqd_flags = 0;
|
|
if (mountflags & MS_RDONLY)
|
|
rqd_flags |= MS_RDONLY;
|
|
#ifdef HAVE_STATVFS
|
|
if (statvfs(fsname, &sb) == 0) {
|
|
unsigned long required_flags = rqd_flags;
|
|
if (sb.f_flag & MS_NOSUID)
|
|
required_flags |= MS_NOSUID;
|
|
if (sb.f_flag & MS_NODEV && !dev)
|
|
required_flags |= MS_NODEV;
|
|
if (sb.f_flag & MS_RDONLY)
|
|
required_flags |= MS_RDONLY;
|
|
if (sb.f_flag & MS_NOEXEC)
|
|
required_flags |= MS_NOEXEC;
|
|
DEBUG("(at remount) flags for %s was %lu, required extra flags are %lu", fsname, sb.f_flag, required_flags);
|
|
/*
|
|
* If this was a bind mount request, and required_flags
|
|
* does not have any flags which are not already in
|
|
* mountflags, then skip the remount
|
|
*/
|
|
if (!(mountflags & MS_REMOUNT)) {
|
|
if (!(required_flags & ~mountflags) && rqd_flags == 0) {
|
|
DEBUG("mountflags already was %lu, skipping remount",
|
|
mountflags);
|
|
goto skipremount;
|
|
}
|
|
}
|
|
mountflags |= required_flags;
|
|
}
|
|
#endif
|
|
|
|
if (mount(fsname, target, fstype,
|
|
mountflags | MS_REMOUNT, data) < 0) {
|
|
if (optional) {
|
|
INFO("failed to mount '%s' on '%s' (optional): %s",
|
|
fsname, target, strerror(errno));
|
|
return 0;
|
|
}
|
|
else {
|
|
SYSERROR("failed to mount '%s' on '%s'",
|
|
fsname, target);
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef HAVE_STATVFS
|
|
skipremount:
|
|
#endif
|
|
DEBUG("mounted '%s' on '%s', type '%s'", fsname, target, fstype);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Remove 'optional', 'create=dir', and 'create=file' from mntopt
|
|
*/
|
|
static void cull_mntent_opt(struct mntent *mntent)
|
|
{
|
|
int i;
|
|
char *p, *p2;
|
|
char *list[] = {"create=dir",
|
|
"create=file",
|
|
"optional",
|
|
NULL };
|
|
|
|
for (i=0; list[i]; i++) {
|
|
if (!(p = strstr(mntent->mnt_opts, list[i])))
|
|
continue;
|
|
p2 = strchr(p, ',');
|
|
if (!p2) {
|
|
/* no more mntopts, so just chop it here */
|
|
*p = '\0';
|
|
continue;
|
|
}
|
|
memmove(p, p2+1, strlen(p2+1)+1);
|
|
}
|
|
}
|
|
|
|
static int mount_entry_create_dir_file(const struct mntent *mntent,
|
|
const char* path, const struct lxc_rootfs *rootfs,
|
|
const char *lxc_name, const char *lxc_path)
|
|
{
|
|
char *pathdirname = NULL;
|
|
int ret = 0;
|
|
FILE *pathfile = NULL;
|
|
|
|
if (strncmp(mntent->mnt_type, "overlay", 7) == 0) {
|
|
if (ovl_mkdir(mntent, rootfs, lxc_name, lxc_path) < 0)
|
|
return -1;
|
|
} else if (strncmp(mntent->mnt_type, "aufs", 4) == 0) {
|
|
if (aufs_mkdir(mntent, rootfs, lxc_name, lxc_path) < 0)
|
|
return -1;
|
|
}
|
|
|
|
if (hasmntopt(mntent, "create=dir")) {
|
|
if (mkdir_p(path, 0755) < 0) {
|
|
WARN("Failed to create mount target '%s'", path);
|
|
ret = -1;
|
|
}
|
|
}
|
|
|
|
if (hasmntopt(mntent, "create=file") && access(path, F_OK)) {
|
|
pathdirname = strdup(path);
|
|
pathdirname = dirname(pathdirname);
|
|
if (mkdir_p(pathdirname, 0755) < 0) {
|
|
WARN("Failed to create target directory");
|
|
}
|
|
pathfile = fopen(path, "wb");
|
|
if (!pathfile) {
|
|
WARN("Failed to create mount target '%s'", path);
|
|
ret = -1;
|
|
} else {
|
|
fclose(pathfile);
|
|
}
|
|
}
|
|
free(pathdirname);
|
|
return ret;
|
|
}
|
|
|
|
/* rootfs, lxc_name, and lxc_path can be NULL when the container is created
|
|
* without a rootfs. */
|
|
static inline int mount_entry_on_generic(struct mntent *mntent,
|
|
const char* path, const struct lxc_rootfs *rootfs,
|
|
const char *lxc_name, const char *lxc_path)
|
|
{
|
|
unsigned long mntflags;
|
|
char *mntdata;
|
|
int ret;
|
|
bool optional = hasmntopt(mntent, "optional") != NULL;
|
|
bool dev = hasmntopt(mntent, "dev") != NULL;
|
|
|
|
char *rootfs_path = NULL;
|
|
if (rootfs && rootfs->path)
|
|
rootfs_path = rootfs->mount;
|
|
|
|
ret = mount_entry_create_dir_file(mntent, path, rootfs, lxc_name, lxc_path);
|
|
|
|
if (ret < 0)
|
|
return optional ? 0 : -1;
|
|
|
|
cull_mntent_opt(mntent);
|
|
|
|
if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
|
|
free(mntdata);
|
|
return -1;
|
|
}
|
|
|
|
ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type, mntflags,
|
|
mntdata, optional, dev, rootfs_path);
|
|
|
|
free(mntdata);
|
|
return ret;
|
|
}
|
|
|
|
static inline int mount_entry_on_systemfs(struct mntent *mntent)
|
|
{
|
|
char path[MAXPATHLEN];
|
|
int ret;
|
|
|
|
/* For containers created without a rootfs all mounts are treated as
|
|
* absolute paths starting at / on the host. */
|
|
if (mntent->mnt_dir[0] != '/')
|
|
ret = snprintf(path, sizeof(path), "/%s", mntent->mnt_dir);
|
|
else
|
|
ret = snprintf(path, sizeof(path), "%s", mntent->mnt_dir);
|
|
|
|
if (ret < 0 || ret >= sizeof(path)) {
|
|
ERROR("path name too long");
|
|
return -1;
|
|
}
|
|
|
|
return mount_entry_on_generic(mntent, path, NULL, NULL, NULL);
|
|
}
|
|
|
|
static int mount_entry_on_absolute_rootfs(struct mntent *mntent,
|
|
const struct lxc_rootfs *rootfs,
|
|
const char *lxc_name,
|
|
const char *lxc_path)
|
|
{
|
|
char *aux;
|
|
char path[MAXPATHLEN];
|
|
int r, ret = 0, offset;
|
|
const char *lxcpath;
|
|
|
|
lxcpath = lxc_global_config_value("lxc.lxcpath");
|
|
if (!lxcpath) {
|
|
ERROR("Out of memory");
|
|
return -1;
|
|
}
|
|
|
|
/* if rootfs->path is a blockdev path, allow container fstab to
|
|
* use $lxcpath/CN/rootfs as the target prefix */
|
|
r = snprintf(path, MAXPATHLEN, "%s/%s/rootfs", lxcpath, lxc_name);
|
|
if (r < 0 || r >= MAXPATHLEN)
|
|
goto skipvarlib;
|
|
|
|
aux = strstr(mntent->mnt_dir, path);
|
|
if (aux) {
|
|
offset = strlen(path);
|
|
goto skipabs;
|
|
}
|
|
|
|
skipvarlib:
|
|
aux = strstr(mntent->mnt_dir, rootfs->path);
|
|
if (!aux) {
|
|
WARN("ignoring mount point '%s'", mntent->mnt_dir);
|
|
return ret;
|
|
}
|
|
offset = strlen(rootfs->path);
|
|
|
|
skipabs:
|
|
|
|
r = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount,
|
|
aux + offset);
|
|
if (r < 0 || r >= MAXPATHLEN) {
|
|
WARN("pathnme too long for '%s'", mntent->mnt_dir);
|
|
return -1;
|
|
}
|
|
|
|
return mount_entry_on_generic(mntent, path, rootfs, lxc_name, lxc_path);
|
|
}
|
|
|
|
static int mount_entry_on_relative_rootfs(struct mntent *mntent,
|
|
const struct lxc_rootfs *rootfs,
|
|
const char *lxc_name,
|
|
const char *lxc_path)
|
|
{
|
|
char path[MAXPATHLEN];
|
|
int ret;
|
|
|
|
/* relative to root mount point */
|
|
ret = snprintf(path, sizeof(path), "%s/%s", rootfs->mount, mntent->mnt_dir);
|
|
if (ret < 0 || ret >= sizeof(path)) {
|
|
ERROR("path name too long");
|
|
return -1;
|
|
}
|
|
|
|
return mount_entry_on_generic(mntent, path, rootfs, lxc_name, lxc_path);
|
|
}
|
|
|
|
static int mount_file_entries(const struct lxc_rootfs *rootfs, FILE *file,
|
|
const char *lxc_name, const char *lxc_path)
|
|
{
|
|
struct mntent mntent;
|
|
char buf[4096];
|
|
int ret = -1;
|
|
|
|
while (getmntent_r(file, &mntent, buf, sizeof(buf))) {
|
|
|
|
if (!rootfs->path) {
|
|
if (mount_entry_on_systemfs(&mntent))
|
|
goto out;
|
|
continue;
|
|
}
|
|
|
|
/* We have a separate root, mounts are relative to it */
|
|
if (mntent.mnt_dir[0] != '/') {
|
|
if (mount_entry_on_relative_rootfs(&mntent, rootfs, lxc_name, lxc_path))
|
|
goto out;
|
|
continue;
|
|
}
|
|
|
|
if (mount_entry_on_absolute_rootfs(&mntent, rootfs, lxc_name, lxc_path))
|
|
goto out;
|
|
}
|
|
|
|
ret = 0;
|
|
|
|
INFO("mount points have been setup");
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
static int setup_mount(const struct lxc_rootfs *rootfs, const char *fstab,
|
|
const char *lxc_name, const char *lxc_path)
|
|
{
|
|
FILE *file;
|
|
int ret;
|
|
|
|
if (!fstab)
|
|
return 0;
|
|
|
|
file = setmntent(fstab, "r");
|
|
if (!file) {
|
|
SYSERROR("failed to use '%s'", fstab);
|
|
return -1;
|
|
}
|
|
|
|
ret = mount_file_entries(rootfs, file, lxc_name, lxc_path);
|
|
|
|
endmntent(file);
|
|
return ret;
|
|
}
|
|
|
|
FILE *make_anonymous_mount_file(struct lxc_list *mount)
|
|
{
|
|
int ret;
|
|
char *mount_entry;
|
|
struct lxc_list *iterator;
|
|
FILE *file;
|
|
int fd = -1;
|
|
|
|
fd = memfd_create("lxc_mount_file", MFD_CLOEXEC);
|
|
if (fd < 0) {
|
|
if (errno != ENOSYS)
|
|
return NULL;
|
|
file = tmpfile();
|
|
} else {
|
|
file = fdopen(fd, "r+");
|
|
}
|
|
|
|
if (!file) {
|
|
int saved_errno = errno;
|
|
if (fd != -1)
|
|
close(fd);
|
|
ERROR("Could not create mount entry file: %s.", strerror(saved_errno));
|
|
return NULL;
|
|
}
|
|
|
|
lxc_list_for_each(iterator, mount) {
|
|
mount_entry = iterator->elem;
|
|
ret = fprintf(file, "%s\n", mount_entry);
|
|
if (ret < strlen(mount_entry))
|
|
WARN("Could not write mount entry to anonymous mount file.");
|
|
}
|
|
|
|
if (fseek(file, 0, SEEK_SET) < 0) {
|
|
fclose(file);
|
|
return NULL;
|
|
}
|
|
|
|
return file;
|
|
}
|
|
|
|
static int setup_mount_entries(const struct lxc_rootfs *rootfs,
|
|
struct lxc_list *mount, const char *lxc_name,
|
|
const char *lxc_path)
|
|
{
|
|
FILE *file;
|
|
int ret;
|
|
|
|
file = make_anonymous_mount_file(mount);
|
|
if (!file)
|
|
return -1;
|
|
|
|
ret = mount_file_entries(rootfs, file, lxc_name, lxc_path);
|
|
|
|
fclose(file);
|
|
return ret;
|
|
}
|
|
|
|
static int parse_cap(const char *cap)
|
|
{
|
|
char *ptr = NULL;
|
|
size_t i;
|
|
int capid = -1;
|
|
|
|
if (!strcmp(cap, "none"))
|
|
return -2;
|
|
|
|
for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
|
|
|
|
if (strcmp(cap, caps_opt[i].name))
|
|
continue;
|
|
|
|
capid = caps_opt[i].value;
|
|
break;
|
|
}
|
|
|
|
if (capid < 0) {
|
|
/* try to see if it's numeric, so the user may specify
|
|
* capabilities that the running kernel knows about but
|
|
* we don't */
|
|
errno = 0;
|
|
capid = strtol(cap, &ptr, 10);
|
|
if (!ptr || *ptr != '\0' || errno != 0)
|
|
/* not a valid number */
|
|
capid = -1;
|
|
else if (capid > lxc_caps_last_cap())
|
|
/* we have a number but it's not a valid
|
|
* capability */
|
|
capid = -1;
|
|
}
|
|
|
|
return capid;
|
|
}
|
|
|
|
int in_caplist(int cap, struct lxc_list *caps)
|
|
{
|
|
struct lxc_list *iterator;
|
|
int capid;
|
|
|
|
lxc_list_for_each(iterator, caps) {
|
|
capid = parse_cap(iterator->elem);
|
|
if (capid == cap)
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int setup_caps(struct lxc_list *caps)
|
|
{
|
|
struct lxc_list *iterator;
|
|
char *drop_entry;
|
|
int capid;
|
|
|
|
lxc_list_for_each(iterator, caps) {
|
|
|
|
drop_entry = iterator->elem;
|
|
|
|
capid = parse_cap(drop_entry);
|
|
|
|
if (capid < 0) {
|
|
ERROR("unknown capability %s", drop_entry);
|
|
return -1;
|
|
}
|
|
|
|
DEBUG("drop capability '%s' (%d)", drop_entry, capid);
|
|
|
|
if (prctl(PR_CAPBSET_DROP, capid, 0, 0, 0)) {
|
|
SYSERROR("failed to remove %s capability", drop_entry);
|
|
return -1;
|
|
}
|
|
|
|
}
|
|
|
|
DEBUG("capabilities have been setup");
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int dropcaps_except(struct lxc_list *caps)
|
|
{
|
|
struct lxc_list *iterator;
|
|
char *keep_entry;
|
|
int i, capid;
|
|
int numcaps = lxc_caps_last_cap() + 1;
|
|
INFO("found %d capabilities", numcaps);
|
|
|
|
if (numcaps <= 0 || numcaps > 200)
|
|
return -1;
|
|
|
|
// caplist[i] is 1 if we keep capability i
|
|
int *caplist = alloca(numcaps * sizeof(int));
|
|
memset(caplist, 0, numcaps * sizeof(int));
|
|
|
|
lxc_list_for_each(iterator, caps) {
|
|
|
|
keep_entry = iterator->elem;
|
|
|
|
capid = parse_cap(keep_entry);
|
|
|
|
if (capid == -2)
|
|
continue;
|
|
|
|
if (capid < 0) {
|
|
ERROR("unknown capability %s", keep_entry);
|
|
return -1;
|
|
}
|
|
|
|
DEBUG("keep capability '%s' (%d)", keep_entry, capid);
|
|
|
|
caplist[capid] = 1;
|
|
}
|
|
for (i=0; i<numcaps; i++) {
|
|
if (caplist[i])
|
|
continue;
|
|
if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0)) {
|
|
SYSERROR("failed to remove capability %d", i);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
DEBUG("capabilities have been setup");
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int setup_hw_addr(char *hwaddr, const char *ifname)
|
|
{
|
|
struct sockaddr sockaddr;
|
|
struct ifreq ifr;
|
|
int ret, fd, saved_errno;
|
|
|
|
ret = lxc_convert_mac(hwaddr, &sockaddr);
|
|
if (ret) {
|
|
ERROR("mac address '%s' conversion failed : %s",
|
|
hwaddr, strerror(-ret));
|
|
return -1;
|
|
}
|
|
|
|
memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
|
|
ifr.ifr_name[IFNAMSIZ-1] = '\0';
|
|
memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
|
|
|
|
fd = socket(AF_INET, SOCK_DGRAM, 0);
|
|
if (fd < 0) {
|
|
ERROR("socket failure : %s", strerror(errno));
|
|
return -1;
|
|
}
|
|
|
|
ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
|
|
saved_errno = errno;
|
|
close(fd);
|
|
if (ret)
|
|
ERROR("ioctl failure : %s", strerror(saved_errno));
|
|
|
|
DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifr.ifr_name);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
|
|
{
|
|
struct lxc_list *iterator;
|
|
struct lxc_inetdev *inetdev;
|
|
int err;
|
|
|
|
lxc_list_for_each(iterator, ip) {
|
|
|
|
inetdev = iterator->elem;
|
|
|
|
err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
|
|
&inetdev->bcast, inetdev->prefix);
|
|
if (err) {
|
|
ERROR("failed to setup_ipv4_addr ifindex %d : %s",
|
|
ifindex, strerror(-err));
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
|
|
{
|
|
struct lxc_list *iterator;
|
|
struct lxc_inet6dev *inet6dev;
|
|
int err;
|
|
|
|
lxc_list_for_each(iterator, ip) {
|
|
|
|
inet6dev = iterator->elem;
|
|
|
|
err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
|
|
&inet6dev->mcast, &inet6dev->acast,
|
|
inet6dev->prefix);
|
|
if (err) {
|
|
ERROR("failed to setup_ipv6_addr ifindex %d : %s",
|
|
ifindex, strerror(-err));
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int setup_netdev(struct lxc_netdev *netdev)
|
|
{
|
|
char ifname[IFNAMSIZ];
|
|
char *current_ifname = ifname;
|
|
int err;
|
|
|
|
/* empty network namespace */
|
|
if (!netdev->ifindex) {
|
|
if (netdev->flags & IFF_UP) {
|
|
err = lxc_netdev_up("lo");
|
|
if (err) {
|
|
ERROR("failed to set the loopback up : %s",
|
|
strerror(-err));
|
|
return -1;
|
|
}
|
|
}
|
|
if (netdev->type != LXC_NET_VETH)
|
|
return 0;
|
|
netdev->ifindex = if_nametoindex(netdev->name);
|
|
}
|
|
|
|
/* get the new ifindex in case of physical netdev */
|
|
if (netdev->type == LXC_NET_PHYS) {
|
|
if (!(netdev->ifindex = if_nametoindex(netdev->link))) {
|
|
ERROR("failed to get ifindex for %s",
|
|
netdev->link);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/* retrieve the name of the interface */
|
|
if (!if_indextoname(netdev->ifindex, current_ifname)) {
|
|
ERROR("no interface corresponding to index '%d'",
|
|
netdev->ifindex);
|
|
return -1;
|
|
}
|
|
|
|
/* default: let the system to choose one interface name */
|
|
if (!netdev->name)
|
|
netdev->name = netdev->type == LXC_NET_PHYS ?
|
|
netdev->link : "eth%d";
|
|
|
|
/* rename the interface name */
|
|
if (strcmp(ifname, netdev->name) != 0) {
|
|
err = lxc_netdev_rename_by_name(ifname, netdev->name);
|
|
if (err) {
|
|
ERROR("failed to rename %s->%s : %s", ifname, netdev->name,
|
|
strerror(-err));
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/* Re-read the name of the interface because its name has changed
|
|
* and would be automatically allocated by the system
|
|
*/
|
|
if (!if_indextoname(netdev->ifindex, current_ifname)) {
|
|
ERROR("no interface corresponding to index '%d'",
|
|
netdev->ifindex);
|
|
return -1;
|
|
}
|
|
|
|
/* set a mac address */
|
|
if (netdev->hwaddr) {
|
|
if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
|
|
ERROR("failed to setup hw address for '%s'",
|
|
current_ifname);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/* setup ipv4 addresses on the interface */
|
|
if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
|
|
ERROR("failed to setup ip addresses for '%s'",
|
|
ifname);
|
|
return -1;
|
|
}
|
|
|
|
/* setup ipv6 addresses on the interface */
|
|
if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
|
|
ERROR("failed to setup ipv6 addresses for '%s'",
|
|
ifname);
|
|
return -1;
|
|
}
|
|
|
|
/* set the network device up */
|
|
if (netdev->flags & IFF_UP) {
|
|
int err;
|
|
|
|
err = lxc_netdev_up(current_ifname);
|
|
if (err) {
|
|
ERROR("failed to set '%s' up : %s", current_ifname,
|
|
strerror(-err));
|
|
return -1;
|
|
}
|
|
|
|
/* the network is up, make the loopback up too */
|
|
err = lxc_netdev_up("lo");
|
|
if (err) {
|
|
ERROR("failed to set the loopback up : %s",
|
|
strerror(-err));
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/* We can only set up the default routes after bringing
|
|
* up the interface, sine bringing up the interface adds
|
|
* the link-local routes and we can't add a default
|
|
* route if the gateway is not reachable. */
|
|
|
|
/* setup ipv4 gateway on the interface */
|
|
if (netdev->ipv4_gateway) {
|
|
if (!(netdev->flags & IFF_UP)) {
|
|
ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname);
|
|
return -1;
|
|
}
|
|
|
|
if (lxc_list_empty(&netdev->ipv4)) {
|
|
ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname);
|
|
return -1;
|
|
}
|
|
|
|
err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
|
|
if (err) {
|
|
err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway);
|
|
if (err) {
|
|
ERROR("failed to add ipv4 dest for '%s': %s",
|
|
ifname, strerror(-err));
|
|
}
|
|
|
|
err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
|
|
if (err) {
|
|
ERROR("failed to setup ipv4 gateway for '%s': %s",
|
|
ifname, strerror(-err));
|
|
if (netdev->ipv4_gateway_auto) {
|
|
char buf[INET_ADDRSTRLEN];
|
|
inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
|
|
ERROR("tried to set autodetected ipv4 gateway '%s'", buf);
|
|
}
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* setup ipv6 gateway on the interface */
|
|
if (netdev->ipv6_gateway) {
|
|
if (!(netdev->flags & IFF_UP)) {
|
|
ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname);
|
|
return -1;
|
|
}
|
|
|
|
if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
|
|
ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname);
|
|
return -1;
|
|
}
|
|
|
|
err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
|
|
if (err) {
|
|
err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway);
|
|
if (err) {
|
|
ERROR("failed to add ipv6 dest for '%s': %s",
|
|
ifname, strerror(-err));
|
|
}
|
|
|
|
err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
|
|
if (err) {
|
|
ERROR("failed to setup ipv6 gateway for '%s': %s",
|
|
ifname, strerror(-err));
|
|
if (netdev->ipv6_gateway_auto) {
|
|
char buf[INET6_ADDRSTRLEN];
|
|
inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
|
|
ERROR("tried to set autodetected ipv6 gateway '%s'", buf);
|
|
}
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
DEBUG("'%s' has been setup", current_ifname);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int setup_network(struct lxc_list *network)
|
|
{
|
|
struct lxc_list *iterator;
|
|
struct lxc_netdev *netdev;
|
|
|
|
lxc_list_for_each(iterator, network) {
|
|
|
|
netdev = iterator->elem;
|
|
|
|
if (setup_netdev(netdev)) {
|
|
ERROR("failed to setup netdev");
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
if (!lxc_list_empty(network))
|
|
INFO("network has been setup");
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int parse_resource(const char *res) {
|
|
size_t i;
|
|
int resid = -1;
|
|
|
|
for (i = 0; i < sizeof(limit_opt)/sizeof(limit_opt[0]); ++i) {
|
|
if (strcmp(res, limit_opt[i].name) == 0)
|
|
return limit_opt[i].value;
|
|
}
|
|
|
|
/* try to see if it's numeric, so the user may specify
|
|
* resources that the running kernel knows about but
|
|
* we don't */
|
|
if (lxc_safe_int(res, &resid) == 0)
|
|
return resid;
|
|
return -1;
|
|
}
|
|
|
|
int setup_resource_limits(struct lxc_list *limits, pid_t pid) {
|
|
struct lxc_list *it;
|
|
struct lxc_limit *lim;
|
|
int resid;
|
|
|
|
lxc_list_for_each(it, limits) {
|
|
lim = it->elem;
|
|
|
|
resid = parse_resource(lim->resource);
|
|
if (resid < 0) {
|
|
ERROR("unknown resource %s", lim->resource);
|
|
return -1;
|
|
}
|
|
|
|
if (prlimit(pid, resid, &lim->limit, NULL) != 0) {
|
|
ERROR("failed to set limit %s: %s", lim->resource, strerror(errno));
|
|
return -1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/* try to move physical nics to the init netns */
|
|
void lxc_restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf)
|
|
{
|
|
int i, oldfd;
|
|
char ifname[IFNAMSIZ];
|
|
|
|
if (netnsfd < 0 || conf->num_savednics == 0)
|
|
return;
|
|
|
|
INFO("Running to reset %d nic names.", conf->num_savednics);
|
|
|
|
oldfd = lxc_preserve_ns(getpid(), "net");
|
|
if (oldfd < 0) {
|
|
SYSERROR("Failed to open monitor netns fd.");
|
|
return;
|
|
}
|
|
|
|
if (setns(netnsfd, 0) != 0) {
|
|
SYSERROR("Failed to enter container netns to reset nics");
|
|
close(oldfd);
|
|
return;
|
|
}
|
|
for (i=0; i<conf->num_savednics; i++) {
|
|
struct saved_nic *s = &conf->saved_nics[i];
|
|
/* retrieve the name of the interface */
|
|
if (!if_indextoname(s->ifindex, ifname)) {
|
|
WARN("no interface corresponding to index '%d'", s->ifindex);
|
|
continue;
|
|
}
|
|
if (lxc_netdev_move_by_name(ifname, 1, s->orig_name))
|
|
WARN("Error moving nic name:%s back to host netns", ifname);
|
|
free(s->orig_name);
|
|
}
|
|
conf->num_savednics = 0;
|
|
|
|
if (setns(oldfd, 0) != 0)
|
|
SYSERROR("Failed to re-enter monitor's netns");
|
|
close(oldfd);
|
|
}
|
|
|
|
static char *default_rootfs_mount = LXCROOTFSMOUNT;
|
|
|
|
struct lxc_conf *lxc_conf_init(void)
|
|
{
|
|
struct lxc_conf *new;
|
|
int i;
|
|
|
|
new = malloc(sizeof(*new));
|
|
if (!new) {
|
|
ERROR("lxc_conf_init : %m");
|
|
return NULL;
|
|
}
|
|
memset(new, 0, sizeof(*new));
|
|
|
|
new->loglevel = LXC_LOG_PRIORITY_NOTSET;
|
|
new->personality = -1;
|
|
new->autodev = 1;
|
|
new->console.log_path = NULL;
|
|
new->console.log_fd = -1;
|
|
new->console.path = NULL;
|
|
new->console.peer = -1;
|
|
new->console.peerpty.busy = -1;
|
|
new->console.peerpty.master = -1;
|
|
new->console.peerpty.slave = -1;
|
|
new->console.master = -1;
|
|
new->console.slave = -1;
|
|
new->console.name[0] = '\0';
|
|
new->maincmd_fd = -1;
|
|
new->nbd_idx = -1;
|
|
new->rootfs.mount = strdup(default_rootfs_mount);
|
|
if (!new->rootfs.mount) {
|
|
ERROR("lxc_conf_init : %m");
|
|
free(new);
|
|
return NULL;
|
|
}
|
|
new->kmsg = 0;
|
|
new->logfd = -1;
|
|
lxc_list_init(&new->cgroup);
|
|
lxc_list_init(&new->network);
|
|
lxc_list_init(&new->mount_list);
|
|
lxc_list_init(&new->caps);
|
|
lxc_list_init(&new->keepcaps);
|
|
lxc_list_init(&new->id_map);
|
|
lxc_list_init(&new->includes);
|
|
lxc_list_init(&new->aliens);
|
|
lxc_list_init(&new->environment);
|
|
lxc_list_init(&new->limits);
|
|
for (i=0; i<NUM_LXC_HOOKS; i++)
|
|
lxc_list_init(&new->hooks[i]);
|
|
lxc_list_init(&new->groups);
|
|
new->lsm_aa_profile = NULL;
|
|
new->lsm_se_context = NULL;
|
|
new->tmp_umount_proc = 0;
|
|
|
|
for (i = 0; i < LXC_NS_MAX; i++)
|
|
new->inherit_ns_fd[i] = -1;
|
|
|
|
/* if running in a new user namespace, init and COMMAND
|
|
* default to running as UID/GID 0 when using lxc-execute */
|
|
new->init_uid = 0;
|
|
new->init_gid = 0;
|
|
|
|
return new;
|
|
}
|
|
|
|
static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
|
|
{
|
|
char veth1buf[IFNAMSIZ], *veth1;
|
|
char veth2buf[IFNAMSIZ], *veth2;
|
|
int bridge_index, err;
|
|
unsigned int mtu = 0;
|
|
|
|
if (netdev->priv.veth_attr.pair) {
|
|
veth1 = netdev->priv.veth_attr.pair;
|
|
if (handler->conf->reboot)
|
|
lxc_netdev_delete_by_name(veth1);
|
|
} else {
|
|
err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
|
|
if (err >= sizeof(veth1buf)) { /* can't *really* happen, but... */
|
|
ERROR("veth1 name too long");
|
|
return -1;
|
|
}
|
|
veth1 = lxc_mkifname(veth1buf);
|
|
if (!veth1) {
|
|
ERROR("failed to allocate a temporary name");
|
|
return -1;
|
|
}
|
|
/* store away for deconf */
|
|
memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
|
|
}
|
|
|
|
snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
|
|
veth2 = lxc_mkifname(veth2buf);
|
|
if (!veth2) {
|
|
ERROR("failed to allocate a temporary name");
|
|
goto out_delete;
|
|
}
|
|
|
|
err = lxc_veth_create(veth1, veth2);
|
|
if (err) {
|
|
ERROR("failed to create veth pair (%s and %s): %s", veth1, veth2,
|
|
strerror(-err));
|
|
goto out_delete;
|
|
}
|
|
|
|
/* changing the high byte of the mac address to 0xfe, the bridge interface
|
|
* will always keep the host's mac address and not take the mac address
|
|
* of a container */
|
|
err = setup_private_host_hw_addr(veth1);
|
|
if (err) {
|
|
ERROR("failed to change mac address of host interface '%s': %s",
|
|
veth1, strerror(-err));
|
|
goto out_delete;
|
|
}
|
|
|
|
netdev->ifindex = if_nametoindex(veth2);
|
|
if (!netdev->ifindex) {
|
|
ERROR("failed to retrieve the index for %s", veth2);
|
|
goto out_delete;
|
|
}
|
|
|
|
if (netdev->mtu) {
|
|
if (lxc_safe_uint(netdev->mtu, &mtu) < 0)
|
|
WARN("Failed to parse mtu from.");
|
|
else
|
|
INFO("Retrieved mtu %d", mtu);
|
|
} else if (netdev->link) {
|
|
bridge_index = if_nametoindex(netdev->link);
|
|
if (bridge_index) {
|
|
mtu = netdev_get_mtu(bridge_index);
|
|
INFO("Retrieved mtu %d from %s", mtu, netdev->link);
|
|
} else {
|
|
mtu = netdev_get_mtu(netdev->ifindex);
|
|
INFO("Retrieved mtu %d from %s", mtu, veth2);
|
|
}
|
|
}
|
|
|
|
if (mtu) {
|
|
err = lxc_netdev_set_mtu(veth1, mtu);
|
|
if (!err)
|
|
err = lxc_netdev_set_mtu(veth2, mtu);
|
|
if (err) {
|
|
ERROR("failed to set mtu '%i' for veth pair (%s and %s): %s",
|
|
mtu, veth1, veth2, strerror(-err));
|
|
goto out_delete;
|
|
}
|
|
}
|
|
|
|
if (netdev->link) {
|
|
err = lxc_bridge_attach(handler->lxcpath, handler->name, netdev->link, veth1);
|
|
if (err) {
|
|
ERROR("failed to attach '%s' to the bridge '%s': %s",
|
|
veth1, netdev->link, strerror(-err));
|
|
goto out_delete;
|
|
}
|
|
INFO("Attached '%s': to the bridge '%s': ", veth1, netdev->link);
|
|
}
|
|
|
|
err = lxc_netdev_up(veth1);
|
|
if (err) {
|
|
ERROR("failed to set %s up : %s", veth1, strerror(-err));
|
|
goto out_delete;
|
|
}
|
|
|
|
if (netdev->upscript) {
|
|
err = run_script(handler->name, "net", netdev->upscript, "up",
|
|
"veth", veth1, (char*) NULL);
|
|
if (err)
|
|
goto out_delete;
|
|
}
|
|
|
|
DEBUG("instantiated veth '%s/%s', index is '%d'",
|
|
veth1, veth2, netdev->ifindex);
|
|
|
|
return 0;
|
|
|
|
out_delete:
|
|
lxc_netdev_delete_by_name(veth1);
|
|
if (!netdev->priv.veth_attr.pair)
|
|
free(veth1);
|
|
free(veth2);
|
|
return -1;
|
|
}
|
|
|
|
static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
|
|
{
|
|
char *veth1;
|
|
int err;
|
|
|
|
if (netdev->priv.veth_attr.pair)
|
|
veth1 = netdev->priv.veth_attr.pair;
|
|
else
|
|
veth1 = netdev->priv.veth_attr.veth1;
|
|
|
|
if (netdev->downscript) {
|
|
err = run_script(handler->name, "net", netdev->downscript,
|
|
"down", "veth", veth1, (char*) NULL);
|
|
if (err)
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
|
|
{
|
|
char peerbuf[IFNAMSIZ], *peer;
|
|
int err;
|
|
|
|
if (!netdev->link) {
|
|
ERROR("no link specified for macvlan netdev");
|
|
return -1;
|
|
}
|
|
|
|
err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
|
|
if (err >= sizeof(peerbuf))
|
|
return -1;
|
|
|
|
peer = lxc_mkifname(peerbuf);
|
|
if (!peer) {
|
|
ERROR("failed to make a temporary name");
|
|
return -1;
|
|
}
|
|
|
|
err = lxc_macvlan_create(netdev->link, peer,
|
|
netdev->priv.macvlan_attr.mode);
|
|
if (err) {
|
|
ERROR("failed to create macvlan interface '%s' on '%s' : %s",
|
|
peer, netdev->link, strerror(-err));
|
|
goto out;
|
|
}
|
|
|
|
netdev->ifindex = if_nametoindex(peer);
|
|
if (!netdev->ifindex) {
|
|
ERROR("failed to retrieve the index for %s", peer);
|
|
goto out;
|
|
}
|
|
|
|
if (netdev->upscript) {
|
|
err = run_script(handler->name, "net", netdev->upscript, "up",
|
|
"macvlan", netdev->link, (char*) NULL);
|
|
if (err)
|
|
goto out;
|
|
}
|
|
|
|
DEBUG("instantiated macvlan '%s', index is '%d' and mode '%d'",
|
|
peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
|
|
|
|
return 0;
|
|
out:
|
|
lxc_netdev_delete_by_name(peer);
|
|
free(peer);
|
|
return -1;
|
|
}
|
|
|
|
static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
|
|
{
|
|
int err;
|
|
|
|
if (netdev->downscript) {
|
|
err = run_script(handler->name, "net", netdev->downscript,
|
|
"down", "macvlan", netdev->link,
|
|
(char*) NULL);
|
|
if (err)
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/* XXX: merge with instantiate_macvlan */
|
|
static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
|
|
{
|
|
char peer[IFNAMSIZ];
|
|
int err;
|
|
static uint16_t vlan_cntr = 0;
|
|
unsigned int mtu = 0;
|
|
|
|
if (!netdev->link) {
|
|
ERROR("no link specified for vlan netdev");
|
|
return -1;
|
|
}
|
|
|
|
err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++);
|
|
if (err >= sizeof(peer)) {
|
|
ERROR("peer name too long");
|
|
return -1;
|
|
}
|
|
|
|
err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
|
|
if (err) {
|
|
ERROR("failed to create vlan interface '%s' on '%s' : %s",
|
|
peer, netdev->link, strerror(-err));
|
|
return -1;
|
|
}
|
|
|
|
netdev->ifindex = if_nametoindex(peer);
|
|
if (!netdev->ifindex) {
|
|
ERROR("failed to retrieve the ifindex for %s", peer);
|
|
lxc_netdev_delete_by_name(peer);
|
|
return -1;
|
|
}
|
|
|
|
DEBUG("instantiated vlan '%s', ifindex is '%d'", " vlan1000",
|
|
netdev->ifindex);
|
|
if (netdev->mtu) {
|
|
if (lxc_safe_uint(netdev->mtu, &mtu) < 0) {
|
|
ERROR("Failed to retrieve mtu from: '%d'/'%s'.",
|
|
netdev->ifindex, netdev->name);
|
|
return -1;
|
|
}
|
|
err = lxc_netdev_set_mtu(peer, mtu);
|
|
if (err) {
|
|
ERROR("failed to set mtu '%s' for %s : %s",
|
|
netdev->mtu, peer, strerror(-err));
|
|
lxc_netdev_delete_by_name(peer);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
|
|
{
|
|
if (!netdev->link) {
|
|
ERROR("no link specified for the physical interface");
|
|
return -1;
|
|
}
|
|
|
|
netdev->ifindex = if_nametoindex(netdev->link);
|
|
if (!netdev->ifindex) {
|
|
ERROR("failed to retrieve the index for %s", netdev->link);
|
|
return -1;
|
|
}
|
|
|
|
if (netdev->upscript) {
|
|
int err;
|
|
err = run_script(handler->name, "net", netdev->upscript,
|
|
"up", "phys", netdev->link, (char*) NULL);
|
|
if (err)
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
|
|
{
|
|
int err;
|
|
|
|
if (netdev->downscript) {
|
|
err = run_script(handler->name, "net", netdev->downscript,
|
|
"down", "phys", netdev->link, (char*) NULL);
|
|
if (err)
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
|
|
{
|
|
netdev->ifindex = 0;
|
|
return 0;
|
|
}
|
|
|
|
static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
|
|
{
|
|
netdev->ifindex = 0;
|
|
if (netdev->upscript) {
|
|
int err;
|
|
err = run_script(handler->name, "net", netdev->upscript,
|
|
"up", "empty", (char*) NULL);
|
|
if (err)
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
|
|
{
|
|
int err;
|
|
|
|
if (netdev->downscript) {
|
|
err = run_script(handler->name, "net", netdev->downscript,
|
|
"down", "empty", (char*) NULL);
|
|
if (err)
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
int lxc_requests_empty_network(struct lxc_handler *handler)
|
|
{
|
|
struct lxc_list *network = &handler->conf->network;
|
|
struct lxc_list *iterator;
|
|
struct lxc_netdev *netdev;
|
|
bool found_none = false, found_nic = false;
|
|
|
|
if (lxc_list_empty(network))
|
|
return 0;
|
|
|
|
lxc_list_for_each(iterator, network) {
|
|
|
|
netdev = iterator->elem;
|
|
|
|
if (netdev->type == LXC_NET_NONE)
|
|
found_none = true;
|
|
else
|
|
found_nic = true;
|
|
}
|
|
if (found_none && !found_nic)
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
int lxc_create_network(struct lxc_handler *handler)
|
|
{
|
|
struct lxc_list *network = &handler->conf->network;
|
|
struct lxc_list *iterator;
|
|
struct lxc_netdev *netdev;
|
|
int am_root = (getuid() == 0);
|
|
|
|
if (!am_root)
|
|
return 0;
|
|
|
|
lxc_list_for_each(iterator, network) {
|
|
|
|
netdev = iterator->elem;
|
|
|
|
if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
|
|
ERROR("invalid network configuration type '%d'",
|
|
netdev->type);
|
|
return -1;
|
|
}
|
|
|
|
if (netdev_conf[netdev->type](handler, netdev)) {
|
|
ERROR("failed to create netdev");
|
|
return -1;
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
bool lxc_delete_network(struct lxc_handler *handler)
|
|
{
|
|
int ret;
|
|
struct lxc_list *network = &handler->conf->network;
|
|
struct lxc_list *iterator;
|
|
struct lxc_netdev *netdev;
|
|
bool deleted_all = true;
|
|
|
|
lxc_list_for_each(iterator, network) {
|
|
netdev = iterator->elem;
|
|
|
|
if (netdev->ifindex != 0 && netdev->type == LXC_NET_PHYS) {
|
|
if (lxc_netdev_rename_by_index(netdev->ifindex, netdev->link))
|
|
WARN("Failed to rename interface with index %d "
|
|
"to its initial name \"%s\".",
|
|
netdev->ifindex, netdev->link);
|
|
continue;
|
|
}
|
|
|
|
if (netdev_deconf[netdev->type](handler, netdev)) {
|
|
WARN("Failed to destroy netdev");
|
|
}
|
|
|
|
/* Recent kernel remove the virtual interfaces when the network
|
|
* namespace is destroyed but in case we did not moved the
|
|
* interface to the network namespace, we have to destroy it
|
|
*/
|
|
if (netdev->ifindex != 0) {
|
|
ret = lxc_netdev_delete_by_index(netdev->ifindex);
|
|
if (-ret == ENODEV) {
|
|
INFO("Interface \"%s\" with index %d already "
|
|
"deleted or existing in different network "
|
|
"namespace.",
|
|
netdev->name ? netdev->name : "(null)",
|
|
netdev->ifindex);
|
|
} else if (ret < 0) {
|
|
deleted_all = false;
|
|
WARN("Failed to remove interface \"%s\" with "
|
|
"index %d: %s.",
|
|
netdev->name ? netdev->name : "(null)",
|
|
netdev->ifindex, strerror(-ret));
|
|
} else {
|
|
INFO("Removed interface \"%s\" with index %d.",
|
|
netdev->name ? netdev->name : "(null)",
|
|
netdev->ifindex);
|
|
}
|
|
}
|
|
|
|
/* Explicitly delete host veth device to prevent lingering
|
|
* devices. We had issues in LXD around this.
|
|
*/
|
|
if (netdev->type == LXC_NET_VETH && !am_unpriv()) {
|
|
char *hostveth;
|
|
if (netdev->priv.veth_attr.pair) {
|
|
hostveth = netdev->priv.veth_attr.pair;
|
|
ret = lxc_netdev_delete_by_name(hostveth);
|
|
if (ret < 0) {
|
|
WARN("Failed to remove interface \"%s\" from host: %s.", hostveth, strerror(-ret));
|
|
} else {
|
|
INFO("Removed interface \"%s\" from host.", hostveth);
|
|
}
|
|
} else if (strlen(netdev->priv.veth_attr.veth1) > 0) {
|
|
hostveth = netdev->priv.veth_attr.veth1;
|
|
ret = lxc_netdev_delete_by_name(hostveth);
|
|
if (ret < 0) {
|
|
WARN("Failed to remove \"%s\" from host: %s.", hostveth, strerror(-ret));
|
|
} else {
|
|
INFO("Removed interface \"%s\" from host.", hostveth);
|
|
memset((void *)&netdev->priv.veth_attr.veth1, 0, sizeof(netdev->priv.veth_attr.veth1));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return deleted_all;
|
|
}
|
|
|
|
#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
|
|
|
|
/* lxc-user-nic returns "interface_name:interface_name\n" */
|
|
#define MAX_BUFFER_SIZE IFNAMSIZ * 2 + 2
|
|
static int unpriv_assign_nic(const char *lxcpath, char *lxcname,
|
|
struct lxc_netdev *netdev, pid_t pid)
|
|
{
|
|
pid_t child;
|
|
int bytes, pipefd[2];
|
|
char *token, *saveptr = NULL;
|
|
char buffer[MAX_BUFFER_SIZE];
|
|
char netdev_link[IFNAMSIZ + 1];
|
|
|
|
if (netdev->type != LXC_NET_VETH) {
|
|
ERROR("nic type %d not support for unprivileged use",
|
|
netdev->type);
|
|
return -1;
|
|
}
|
|
|
|
if (pipe(pipefd) < 0) {
|
|
SYSERROR("pipe failed");
|
|
return -1;
|
|
}
|
|
|
|
child = fork();
|
|
if (child < 0) {
|
|
SYSERROR("fork");
|
|
close(pipefd[0]);
|
|
close(pipefd[1]);
|
|
return -1;
|
|
}
|
|
|
|
if (child == 0) { // child
|
|
/* Call lxc-user-nic pid type bridge. */
|
|
int ret;
|
|
char pidstr[LXC_NUMSTRLEN64];
|
|
|
|
close(pipefd[0]); /* Close the read-end of the pipe. */
|
|
|
|
/* Redirect stdout to write-end of the pipe. */
|
|
ret = dup2(pipefd[1], STDOUT_FILENO);
|
|
close(pipefd[1]); /* Close the write-end of the pipe. */
|
|
if (ret < 0) {
|
|
SYSERROR("Failed to dup2() to redirect stdout to pipe file descriptor.");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
if (netdev->link)
|
|
strncpy(netdev_link, netdev->link, IFNAMSIZ);
|
|
else
|
|
strncpy(netdev_link, "none", IFNAMSIZ);
|
|
|
|
ret = snprintf(pidstr, LXC_NUMSTRLEN64, "%d", pid);
|
|
if (ret < 0 || ret >= LXC_NUMSTRLEN64)
|
|
exit(EXIT_FAILURE);
|
|
pidstr[LXC_NUMSTRLEN64 - 1] = '\0';
|
|
|
|
INFO("Execing lxc-user-nic %s %s %s veth %s %s", lxcpath,
|
|
lxcname, pidstr, netdev_link, netdev->name);
|
|
execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, lxcpath, lxcname,
|
|
pidstr, "veth", netdev_link, netdev->name, NULL);
|
|
|
|
SYSERROR("Failed to exec lxc-user-nic.");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
/* close the write-end of the pipe */
|
|
close(pipefd[1]);
|
|
|
|
bytes = read(pipefd[0], &buffer, MAX_BUFFER_SIZE);
|
|
if (bytes < 0)
|
|
SYSERROR("Failed to read from pipe file descriptor.");
|
|
buffer[bytes - 1] = '\0';
|
|
|
|
if (wait_for_pid(child) != 0) {
|
|
close(pipefd[0]);
|
|
return -1;
|
|
}
|
|
|
|
/* close the read-end of the pipe */
|
|
close(pipefd[0]);
|
|
|
|
/* fill netdev->name field */
|
|
token = strtok_r(buffer, ":", &saveptr);
|
|
if (!token)
|
|
return -1;
|
|
|
|
netdev->name = malloc(IFNAMSIZ + 1);
|
|
if (!netdev->name) {
|
|
SYSERROR("Failed to allocate memory.");
|
|
return -1;
|
|
}
|
|
memset(netdev->name, 0, IFNAMSIZ + 1);
|
|
strncpy(netdev->name, token, IFNAMSIZ);
|
|
|
|
/* fill netdev->veth_attr.pair field */
|
|
token = strtok_r(NULL, ":", &saveptr);
|
|
if (!token)
|
|
return -1;
|
|
|
|
netdev->priv.veth_attr.pair = strdup(token);
|
|
if (!netdev->priv.veth_attr.pair) {
|
|
ERROR("Failed to allocate memory.");
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int lxc_assign_network(const char *lxcpath, char *lxcname,
|
|
struct lxc_list *network, pid_t pid)
|
|
{
|
|
struct lxc_list *iterator;
|
|
struct lxc_netdev *netdev;
|
|
char ifname[IFNAMSIZ];
|
|
int am_root = (getuid() == 0);
|
|
int err;
|
|
|
|
lxc_list_for_each(iterator, network) {
|
|
|
|
netdev = iterator->elem;
|
|
|
|
if (netdev->type == LXC_NET_VETH && !am_root) {
|
|
if (unpriv_assign_nic(lxcpath, lxcname, netdev, pid))
|
|
return -1;
|
|
// lxc-user-nic has moved the nic to the new ns.
|
|
// unpriv_assign_nic() fills in netdev->name.
|
|
// netdev->ifindex will be filed in at setup_netdev.
|
|
continue;
|
|
}
|
|
|
|
/* empty network namespace, nothing to move */
|
|
if (!netdev->ifindex)
|
|
continue;
|
|
|
|
/* retrieve the name of the interface */
|
|
if (!if_indextoname(netdev->ifindex, ifname)) {
|
|
ERROR("no interface corresponding to index '%d'", netdev->ifindex);
|
|
return -1;
|
|
}
|
|
|
|
err = lxc_netdev_move_by_name(ifname, pid, NULL);
|
|
if (err) {
|
|
ERROR("failed to move '%s' to the container : %s",
|
|
netdev->link, strerror(-err));
|
|
return -1;
|
|
}
|
|
|
|
DEBUG("move '%s'/'%s' to '%d': .", ifname, netdev->name, pid);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
|
|
size_t buf_size)
|
|
{
|
|
char path[PATH_MAX];
|
|
int ret, closeret;
|
|
FILE *f;
|
|
|
|
ret = snprintf(path, PATH_MAX, "/proc/%d/%cid_map", pid, idtype == ID_TYPE_UID ? 'u' : 'g');
|
|
if (ret < 0 || ret >= PATH_MAX) {
|
|
fprintf(stderr, "%s: path name too long\n", __func__);
|
|
return -E2BIG;
|
|
}
|
|
f = fopen(path, "w");
|
|
if (!f) {
|
|
perror("open");
|
|
return -EINVAL;
|
|
}
|
|
ret = fwrite(buf, buf_size, 1, f);
|
|
if (ret < 0)
|
|
SYSERROR("writing id mapping");
|
|
closeret = fclose(f);
|
|
if (closeret)
|
|
SYSERROR("writing id mapping");
|
|
return ret < 0 ? ret : closeret;
|
|
}
|
|
|
|
int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
|
|
{
|
|
struct lxc_list *iterator;
|
|
struct id_map *map;
|
|
int ret = 0, use_shadow = 0;
|
|
enum idtype type;
|
|
char *buf = NULL, *pos, *cmdpath = NULL;
|
|
|
|
/*
|
|
* If newuidmap exists, that is, if shadow is handing out subuid
|
|
* ranges, then insist that root also reserve ranges in subuid. This
|
|
* will protected it by preventing another user from being handed the
|
|
* range by shadow.
|
|
*/
|
|
cmdpath = on_path("newuidmap", NULL);
|
|
if (cmdpath) {
|
|
use_shadow = 1;
|
|
free(cmdpath);
|
|
}
|
|
|
|
if (!use_shadow && geteuid()) {
|
|
ERROR("Missing newuidmap/newgidmap");
|
|
return -1;
|
|
}
|
|
|
|
for(type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
|
|
int left, fill;
|
|
int had_entry = 0;
|
|
if (!buf) {
|
|
buf = pos = malloc(4096);
|
|
if (!buf)
|
|
return -ENOMEM;
|
|
}
|
|
pos = buf;
|
|
if (use_shadow)
|
|
pos += sprintf(buf, "new%cidmap %d",
|
|
type == ID_TYPE_UID ? 'u' : 'g',
|
|
pid);
|
|
|
|
lxc_list_for_each(iterator, idmap) {
|
|
/* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
|
|
map = iterator->elem;
|
|
if (map->idtype != type)
|
|
continue;
|
|
|
|
had_entry = 1;
|
|
left = 4096 - (pos - buf);
|
|
fill = snprintf(pos, left, "%s%lu %lu %lu%s",
|
|
use_shadow ? " " : "",
|
|
map->nsid, map->hostid, map->range,
|
|
use_shadow ? "" : "\n");
|
|
if (fill <= 0 || fill >= left)
|
|
SYSERROR("snprintf failed, too many mappings");
|
|
pos += fill;
|
|
}
|
|
if (!had_entry)
|
|
continue;
|
|
|
|
if (!use_shadow) {
|
|
ret = write_id_mapping(type, pid, buf, pos-buf);
|
|
} else {
|
|
left = 4096 - (pos - buf);
|
|
fill = snprintf(pos, left, "\n");
|
|
if (fill <= 0 || fill >= left)
|
|
SYSERROR("snprintf failed, too many mappings");
|
|
pos += fill;
|
|
ret = system(buf);
|
|
}
|
|
|
|
if (ret)
|
|
break;
|
|
}
|
|
|
|
free(buf);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* return the host uid/gid to which the container root is mapped in
|
|
* *val.
|
|
* Return true if id was found, false otherwise.
|
|
*/
|
|
bool get_mapped_rootid(struct lxc_conf *conf, enum idtype idtype,
|
|
unsigned long *val)
|
|
{
|
|
struct lxc_list *it;
|
|
struct id_map *map;
|
|
|
|
lxc_list_for_each(it, &conf->id_map) {
|
|
map = it->elem;
|
|
if (map->idtype != idtype)
|
|
continue;
|
|
if (map->nsid != 0)
|
|
continue;
|
|
*val = map->hostid;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
int mapped_hostid(unsigned id, struct lxc_conf *conf, enum idtype idtype)
|
|
{
|
|
struct lxc_list *it;
|
|
struct id_map *map;
|
|
lxc_list_for_each(it, &conf->id_map) {
|
|
map = it->elem;
|
|
if (map->idtype != idtype)
|
|
continue;
|
|
if (id >= map->hostid && id < map->hostid + map->range)
|
|
return (id - map->hostid) + map->nsid;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
int find_unmapped_nsuid(struct lxc_conf *conf, enum idtype idtype)
|
|
{
|
|
struct lxc_list *it;
|
|
struct id_map *map;
|
|
unsigned int freeid = 0;
|
|
again:
|
|
lxc_list_for_each(it, &conf->id_map) {
|
|
map = it->elem;
|
|
if (map->idtype != idtype)
|
|
continue;
|
|
if (freeid >= map->nsid && freeid < map->nsid + map->range) {
|
|
freeid = map->nsid + map->range;
|
|
goto again;
|
|
}
|
|
}
|
|
return freeid;
|
|
}
|
|
|
|
int lxc_find_gateway_addresses(struct lxc_handler *handler)
|
|
{
|
|
struct lxc_list *network = &handler->conf->network;
|
|
struct lxc_list *iterator;
|
|
struct lxc_netdev *netdev;
|
|
int link_index;
|
|
|
|
lxc_list_for_each(iterator, network) {
|
|
netdev = iterator->elem;
|
|
|
|
if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
|
|
continue;
|
|
|
|
if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
|
|
ERROR("gateway = auto only supported for "
|
|
"veth and macvlan");
|
|
return -1;
|
|
}
|
|
|
|
if (!netdev->link) {
|
|
ERROR("gateway = auto needs a link interface");
|
|
return -1;
|
|
}
|
|
|
|
link_index = if_nametoindex(netdev->link);
|
|
if (!link_index)
|
|
return -EINVAL;
|
|
|
|
if (netdev->ipv4_gateway_auto) {
|
|
if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
|
|
ERROR("failed to automatically find ipv4 gateway "
|
|
"address from link interface '%s'", netdev->link);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
if (netdev->ipv6_gateway_auto) {
|
|
if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
|
|
ERROR("failed to automatically find ipv6 gateway "
|
|
"address from link interface '%s'", netdev->link);
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int lxc_create_tty(const char *name, struct lxc_conf *conf)
|
|
{
|
|
struct lxc_tty_info *tty_info = &conf->tty_info;
|
|
int i, ret;
|
|
|
|
/* no tty in the configuration */
|
|
if (!conf->tty)
|
|
return 0;
|
|
|
|
tty_info->pty_info =
|
|
malloc(sizeof(*tty_info->pty_info)*conf->tty);
|
|
if (!tty_info->pty_info) {
|
|
SYSERROR("failed to allocate pty_info");
|
|
return -1;
|
|
}
|
|
|
|
for (i = 0; i < conf->tty; i++) {
|
|
|
|
struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
|
|
|
|
process_lock();
|
|
ret = openpty(&pty_info->master, &pty_info->slave,
|
|
pty_info->name, NULL, NULL);
|
|
process_unlock();
|
|
if (ret) {
|
|
SYSERROR("failed to create pty #%d", i);
|
|
tty_info->nbtty = i;
|
|
lxc_delete_tty(tty_info);
|
|
return -1;
|
|
}
|
|
|
|
DEBUG("allocated pty '%s' (%d/%d)",
|
|
pty_info->name, pty_info->master, pty_info->slave);
|
|
|
|
/* Prevent leaking the file descriptors to the container */
|
|
fcntl(pty_info->master, F_SETFD, FD_CLOEXEC);
|
|
fcntl(pty_info->slave, F_SETFD, FD_CLOEXEC);
|
|
|
|
pty_info->busy = 0;
|
|
}
|
|
|
|
tty_info->nbtty = conf->tty;
|
|
|
|
INFO("tty's configured");
|
|
|
|
return 0;
|
|
}
|
|
|
|
void lxc_delete_tty(struct lxc_tty_info *tty_info)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < tty_info->nbtty; i++) {
|
|
struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
|
|
|
|
close(pty_info->master);
|
|
close(pty_info->slave);
|
|
}
|
|
|
|
free(tty_info->pty_info);
|
|
tty_info->pty_info = NULL;
|
|
tty_info->nbtty = 0;
|
|
}
|
|
|
|
/*
|
|
* chown_mapped_root: for an unprivileged user with uid/gid X to
|
|
* chown a dir to subuid/subgid Y, he needs to run chown as root
|
|
* in a userns where nsid 0 is mapped to hostuid/hostgid Y, and
|
|
* nsid Y is mapped to hostuid/hostgid X. That way, the container
|
|
* root is privileged with respect to hostuid/hostgid X, allowing
|
|
* him to do the chown.
|
|
*/
|
|
int chown_mapped_root(char *path, struct lxc_conf *conf)
|
|
{
|
|
uid_t rootuid;
|
|
gid_t rootgid;
|
|
pid_t pid;
|
|
unsigned long val;
|
|
char *chownpath = path;
|
|
|
|
if (!get_mapped_rootid(conf, ID_TYPE_UID, &val)) {
|
|
ERROR("No mapping for container root");
|
|
return -1;
|
|
}
|
|
rootuid = (uid_t) val;
|
|
if (!get_mapped_rootid(conf, ID_TYPE_GID, &val)) {
|
|
ERROR("No mapping for container root");
|
|
return -1;
|
|
}
|
|
rootgid = (gid_t) val;
|
|
|
|
/*
|
|
* In case of overlay, we want only the writeable layer
|
|
* to be chowned
|
|
*/
|
|
if (strncmp(path, "overlayfs:", 10) == 0 || strncmp(path, "aufs:", 5) == 0) {
|
|
chownpath = strchr(path, ':');
|
|
if (!chownpath) {
|
|
ERROR("Bad overlay path: %s", path);
|
|
return -1;
|
|
}
|
|
chownpath = strchr(chownpath+1, ':');
|
|
if (!chownpath) {
|
|
ERROR("Bad overlay path: %s", path);
|
|
return -1;
|
|
}
|
|
chownpath++;
|
|
}
|
|
path = chownpath;
|
|
if (geteuid() == 0) {
|
|
if (chown(path, rootuid, rootgid) < 0) {
|
|
ERROR("Error chowning %s", path);
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
if (rootuid == geteuid()) {
|
|
// nothing to do
|
|
INFO("%s: container root is our uid; no need to chown" ,__func__);
|
|
return 0;
|
|
}
|
|
|
|
pid = fork();
|
|
if (pid < 0) {
|
|
SYSERROR("Failed forking");
|
|
return -1;
|
|
}
|
|
if (!pid) {
|
|
int hostuid = geteuid(), hostgid = getegid(), ret;
|
|
struct stat sb;
|
|
char map1[100], map2[100], map3[100], map4[100], map5[100];
|
|
char ugid[100];
|
|
char *args1[] = { "lxc-usernsexec", "-m", map1, "-m", map2,
|
|
"-m", map3, "-m", map5,
|
|
"--", "chown", ugid, path, NULL };
|
|
char *args2[] = { "lxc-usernsexec", "-m", map1, "-m", map2,
|
|
"-m", map3, "-m", map4, "-m", map5,
|
|
"--", "chown", ugid, path, NULL };
|
|
|
|
// save the current gid of "path"
|
|
if (stat(path, &sb) < 0) {
|
|
ERROR("Error stat %s", path);
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* A file has to be group-owned by a gid mapped into the
|
|
* container, or the container won't be privileged over it.
|
|
*/
|
|
if (sb.st_uid == geteuid() &&
|
|
mapped_hostid(sb.st_gid, conf, ID_TYPE_GID) < 0 &&
|
|
chown(path, -1, hostgid) < 0) {
|
|
ERROR("Failed chgrping %s", path);
|
|
return -1;
|
|
}
|
|
|
|
// "u:0:rootuid:1"
|
|
ret = snprintf(map1, 100, "u:0:%d:1", rootuid);
|
|
if (ret < 0 || ret >= 100) {
|
|
ERROR("Error uid printing map string");
|
|
return -1;
|
|
}
|
|
|
|
// "u:hostuid:hostuid:1"
|
|
ret = snprintf(map2, 100, "u:%d:%d:1", hostuid, hostuid);
|
|
if (ret < 0 || ret >= 100) {
|
|
ERROR("Error uid printing map string");
|
|
return -1;
|
|
}
|
|
|
|
// "g:0:rootgid:1"
|
|
ret = snprintf(map3, 100, "g:0:%d:1", rootgid);
|
|
if (ret < 0 || ret >= 100) {
|
|
ERROR("Error gid printing map string");
|
|
return -1;
|
|
}
|
|
|
|
// "g:pathgid:rootgid+pathgid:1"
|
|
ret = snprintf(map4, 100, "g:%d:%d:1", (gid_t)sb.st_gid,
|
|
rootgid + (gid_t)sb.st_gid);
|
|
if (ret < 0 || ret >= 100) {
|
|
ERROR("Error gid printing map string");
|
|
return -1;
|
|
}
|
|
|
|
// "g:hostgid:hostgid:1"
|
|
ret = snprintf(map5, 100, "g:%d:%d:1", hostgid, hostgid);
|
|
if (ret < 0 || ret >= 100) {
|
|
ERROR("Error gid printing map string");
|
|
return -1;
|
|
}
|
|
|
|
// "0:pathgid" (chown)
|
|
ret = snprintf(ugid, 100, "0:%d", (gid_t)sb.st_gid);
|
|
if (ret < 0 || ret >= 100) {
|
|
ERROR("Error owner printing format string for chown");
|
|
return -1;
|
|
}
|
|
|
|
if (hostgid == sb.st_gid)
|
|
ret = execvp("lxc-usernsexec", args1);
|
|
else
|
|
ret = execvp("lxc-usernsexec", args2);
|
|
SYSERROR("Failed executing usernsexec");
|
|
exit(1);
|
|
}
|
|
return wait_for_pid(pid);
|
|
}
|
|
|
|
int ttys_shift_ids(struct lxc_conf *c)
|
|
{
|
|
if (lxc_list_empty(&c->id_map))
|
|
return 0;
|
|
|
|
if (strcmp(c->console.name, "") !=0 && chown_mapped_root(c->console.name, c) < 0) {
|
|
ERROR("Failed to chown %s", c->console.name);
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* NOTE: not to be called from inside the container namespace! */
|
|
int tmp_proc_mount(struct lxc_conf *lxc_conf)
|
|
{
|
|
int mounted;
|
|
|
|
mounted = mount_proc_if_needed(lxc_conf->rootfs.path ? lxc_conf->rootfs.mount : "");
|
|
if (mounted == -1) {
|
|
SYSERROR("failed to mount /proc in the container.");
|
|
/* continue only if there is no rootfs */
|
|
if (lxc_conf->rootfs.path)
|
|
return -1;
|
|
} else if (mounted == 1) {
|
|
lxc_conf->tmp_umount_proc = 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
void tmp_proc_unmount(struct lxc_conf *lxc_conf)
|
|
{
|
|
if (lxc_conf->tmp_umount_proc == 1) {
|
|
umount("/proc");
|
|
lxc_conf->tmp_umount_proc = 0;
|
|
}
|
|
}
|
|
|
|
void remount_all_slave(void)
|
|
{
|
|
/* walk /proc/mounts and change any shared entries to slave */
|
|
FILE *f = fopen("/proc/self/mountinfo", "r");
|
|
char *line = NULL;
|
|
size_t len = 0;
|
|
|
|
if (!f) {
|
|
SYSERROR("Failed to open /proc/self/mountinfo to mark all shared");
|
|
ERROR("Continuing container startup...");
|
|
return;
|
|
}
|
|
|
|
while (getline(&line, &len, f) != -1) {
|
|
char *target, *opts;
|
|
target = get_field(line, 4);
|
|
if (!target)
|
|
continue;
|
|
opts = get_field(target, 2);
|
|
if (!opts)
|
|
continue;
|
|
null_endofword(opts);
|
|
if (!strstr(opts, "shared"))
|
|
continue;
|
|
null_endofword(target);
|
|
if (mount(NULL, target, NULL, MS_SLAVE, NULL)) {
|
|
SYSERROR("Failed to make %s rslave", target);
|
|
ERROR("Continuing...");
|
|
}
|
|
}
|
|
fclose(f);
|
|
free(line);
|
|
}
|
|
|
|
void lxc_execute_bind_init(struct lxc_conf *conf)
|
|
{
|
|
int ret;
|
|
char path[PATH_MAX], destpath[PATH_MAX], *p;
|
|
|
|
/* If init exists in the container, don't bind mount a static one */
|
|
p = choose_init(conf->rootfs.mount);
|
|
if (p) {
|
|
free(p);
|
|
return;
|
|
}
|
|
|
|
ret = snprintf(path, PATH_MAX, SBINDIR "/init.lxc.static");
|
|
if (ret < 0 || ret >= PATH_MAX) {
|
|
WARN("Path name too long searching for lxc.init.static");
|
|
return;
|
|
}
|
|
|
|
if (!file_exists(path)) {
|
|
INFO("%s does not exist on host", path);
|
|
return;
|
|
}
|
|
|
|
ret = snprintf(destpath, PATH_MAX, "%s%s", conf->rootfs.mount, "/init.lxc.static");
|
|
if (ret < 0 || ret >= PATH_MAX) {
|
|
WARN("Path name too long for container's lxc.init.static");
|
|
return;
|
|
}
|
|
|
|
if (!file_exists(destpath)) {
|
|
FILE * pathfile = fopen(destpath, "wb");
|
|
if (!pathfile) {
|
|
SYSERROR("Failed to create mount target '%s'", destpath);
|
|
return;
|
|
}
|
|
fclose(pathfile);
|
|
}
|
|
|
|
ret = safe_mount(path, destpath, "none", MS_BIND, NULL, conf->rootfs.mount);
|
|
if (ret < 0)
|
|
SYSERROR("Failed to bind lxc.init.static into container");
|
|
INFO("lxc.init.static bound into container at %s", path);
|
|
}
|
|
|
|
/*
|
|
* This does the work of remounting / if it is shared, calling the
|
|
* container pre-mount hooks, and mounting the rootfs.
|
|
*/
|
|
int do_rootfs_setup(struct lxc_conf *conf, const char *name, const char *lxcpath)
|
|
{
|
|
if (conf->rootfs_setup) {
|
|
/*
|
|
* rootfs was set up in another namespace. bind-mount it
|
|
* to give us a mount in our own ns so we can pivot_root to it
|
|
*/
|
|
const char *path = conf->rootfs.mount;
|
|
if (mount(path, path, "rootfs", MS_BIND, NULL) < 0) {
|
|
ERROR("Failed to bind-mount container / onto itself");
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
remount_all_slave();
|
|
|
|
if (run_lxc_hooks(name, "pre-mount", conf, lxcpath, NULL)) {
|
|
ERROR("failed to run pre-mount hooks for container '%s'.", name);
|
|
return -1;
|
|
}
|
|
|
|
if (setup_rootfs(conf)) {
|
|
ERROR("failed to setup rootfs for '%s'", name);
|
|
return -1;
|
|
}
|
|
|
|
conf->rootfs_setup = true;
|
|
return 0;
|
|
}
|
|
|
|
static bool verify_start_hooks(struct lxc_conf *conf)
|
|
{
|
|
struct lxc_list *it;
|
|
char path[MAXPATHLEN];
|
|
lxc_list_for_each(it, &conf->hooks[LXCHOOK_START]) {
|
|
char *hookname = it->elem;
|
|
struct stat st;
|
|
int ret;
|
|
|
|
ret = snprintf(path, MAXPATHLEN, "%s%s",
|
|
conf->rootfs.path ? conf->rootfs.mount : "", hookname);
|
|
if (ret < 0 || ret >= MAXPATHLEN)
|
|
return false;
|
|
ret = stat(path, &st);
|
|
if (ret) {
|
|
SYSERROR("Start hook %s not found in container",
|
|
hookname);
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static int send_fd(int sock, int fd)
|
|
{
|
|
int ret = lxc_abstract_unix_send_fd(sock, fd, NULL, 0);
|
|
|
|
|
|
if (ret < 0) {
|
|
SYSERROR("Error sending tty fd to parent");
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int send_ttys_to_parent(struct lxc_handler *handler)
|
|
{
|
|
struct lxc_conf *conf = handler->conf;
|
|
const struct lxc_tty_info *tty_info = &conf->tty_info;
|
|
int i;
|
|
int sock = handler->ttysock[0];
|
|
|
|
for (i = 0; i < tty_info->nbtty; i++) {
|
|
struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
|
|
if (send_fd(sock, pty_info->slave) < 0)
|
|
goto bad;
|
|
close(pty_info->slave);
|
|
pty_info->slave = -1;
|
|
if (send_fd(sock, pty_info->master) < 0)
|
|
goto bad;
|
|
close(pty_info->master);
|
|
pty_info->master = -1;
|
|
}
|
|
|
|
close(handler->ttysock[0]);
|
|
close(handler->ttysock[1]);
|
|
|
|
return 0;
|
|
|
|
bad:
|
|
ERROR("Error writing tty fd to parent");
|
|
return -1;
|
|
}
|
|
|
|
int lxc_setup(struct lxc_handler *handler)
|
|
{
|
|
const char *name = handler->name;
|
|
struct lxc_conf *lxc_conf = handler->conf;
|
|
const char *lxcpath = handler->lxcpath;
|
|
|
|
if (do_rootfs_setup(lxc_conf, name, lxcpath) < 0) {
|
|
ERROR("Error setting up rootfs mount after spawn");
|
|
return -1;
|
|
}
|
|
|
|
if (lxc_conf->inherit_ns_fd[LXC_NS_UTS] == -1) {
|
|
if (setup_utsname(lxc_conf->utsname)) {
|
|
ERROR("failed to setup the utsname for '%s'", name);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
if (setup_network(&lxc_conf->network)) {
|
|
ERROR("failed to setup the network for '%s'", name);
|
|
return -1;
|
|
}
|
|
|
|
if (lxc_conf->autodev > 0) {
|
|
if (mount_autodev(name, &lxc_conf->rootfs, lxcpath)) {
|
|
ERROR("failed to mount /dev in the container");
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/* do automatic mounts (mainly /proc and /sys), but exclude
|
|
* those that need to wait until other stuff has finished
|
|
*/
|
|
if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & ~LXC_AUTO_CGROUP_MASK, handler) < 0) {
|
|
ERROR("failed to setup the automatic mounts for '%s'", name);
|
|
return -1;
|
|
}
|
|
|
|
if (setup_mount(&lxc_conf->rootfs, lxc_conf->fstab, name, lxcpath)) {
|
|
ERROR("failed to setup the mounts for '%s'", name);
|
|
return -1;
|
|
}
|
|
|
|
if (!lxc_list_empty(&lxc_conf->mount_list) && setup_mount_entries(&lxc_conf->rootfs, &lxc_conf->mount_list, name, lxcpath)) {
|
|
ERROR("failed to setup the mount entries for '%s'", name);
|
|
return -1;
|
|
}
|
|
|
|
/* Make sure any start hooks are in the container */
|
|
if (!verify_start_hooks(lxc_conf))
|
|
return -1;
|
|
|
|
if (lxc_conf->is_execute)
|
|
lxc_execute_bind_init(lxc_conf);
|
|
|
|
/* now mount only cgroup, if wanted;
|
|
* before, /sys could not have been mounted
|
|
* (is either mounted automatically or via fstab entries)
|
|
*/
|
|
if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP_MASK, handler) < 0) {
|
|
ERROR("failed to setup the automatic mounts for '%s'", name);
|
|
return -1;
|
|
}
|
|
|
|
if (run_lxc_hooks(name, "mount", lxc_conf, lxcpath, NULL)) {
|
|
ERROR("failed to run mount hooks for container '%s'.", name);
|
|
return -1;
|
|
}
|
|
|
|
if (lxc_conf->autodev > 0) {
|
|
bool mount_console = lxc_conf->console.path && !strcmp(lxc_conf->console.path, "none");
|
|
|
|
if (run_lxc_hooks(name, "autodev", lxc_conf, lxcpath, NULL)) {
|
|
ERROR("failed to run autodev hooks for container '%s'.", name);
|
|
return -1;
|
|
}
|
|
if (fill_autodev(&lxc_conf->rootfs, mount_console)) {
|
|
ERROR("failed to populate /dev in the container");
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
if (!lxc_conf->is_execute && setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
|
|
ERROR("failed to setup the console for '%s'", name);
|
|
return -1;
|
|
}
|
|
|
|
if (lxc_conf->kmsg) {
|
|
if (setup_kmsg(&lxc_conf->rootfs, &lxc_conf->console)) // don't fail
|
|
ERROR("failed to setup kmsg for '%s'", name);
|
|
}
|
|
|
|
if (!lxc_conf->is_execute && setup_dev_symlinks(&lxc_conf->rootfs)) {
|
|
ERROR("failed to setup /dev symlinks for '%s'", name);
|
|
return -1;
|
|
}
|
|
|
|
/* mount /proc if it's not already there */
|
|
if (tmp_proc_mount(lxc_conf) < 0) {
|
|
ERROR("failed to LSM mount proc for '%s'", name);
|
|
return -1;
|
|
}
|
|
|
|
if (setup_pivot_root(&lxc_conf->rootfs)) {
|
|
ERROR("failed to set rootfs for '%s'", name);
|
|
return -1;
|
|
}
|
|
|
|
if (setup_pts(lxc_conf->pts)) {
|
|
ERROR("failed to setup the new pts instance");
|
|
return -1;
|
|
}
|
|
|
|
if (lxc_create_tty(name, lxc_conf)) {
|
|
ERROR("failed to create the ttys");
|
|
return -1;
|
|
}
|
|
|
|
if (send_ttys_to_parent(handler) < 0) {
|
|
ERROR("failure sending console info to parent");
|
|
return -1;
|
|
}
|
|
|
|
|
|
if (!lxc_conf->is_execute && setup_tty(lxc_conf)) {
|
|
ERROR("failed to setup the ttys for '%s'", name);
|
|
return -1;
|
|
}
|
|
|
|
if (lxc_conf->pty_names && setenv("container_ttys", lxc_conf->pty_names, 1))
|
|
SYSERROR("failed to set environment variable for container ptys");
|
|
|
|
|
|
if (setup_personality(lxc_conf->personality)) {
|
|
ERROR("failed to setup personality");
|
|
return -1;
|
|
}
|
|
|
|
if (!lxc_list_empty(&lxc_conf->keepcaps)) {
|
|
if (!lxc_list_empty(&lxc_conf->caps)) {
|
|
ERROR("Container requests lxc.cap.drop and lxc.cap.keep: either use lxc.cap.drop or lxc.cap.keep, not both.");
|
|
return -1;
|
|
}
|
|
if (dropcaps_except(&lxc_conf->keepcaps)) {
|
|
ERROR("failed to keep requested caps");
|
|
return -1;
|
|
}
|
|
} else if (setup_caps(&lxc_conf->caps)) {
|
|
ERROR("failed to drop capabilities");
|
|
return -1;
|
|
}
|
|
|
|
NOTICE("'%s' is setup.", name);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
|
|
const char *lxcpath, char *argv[])
|
|
{
|
|
int which = -1;
|
|
struct lxc_list *it;
|
|
|
|
if (strcmp(hook, "pre-start") == 0)
|
|
which = LXCHOOK_PRESTART;
|
|
else if (strcmp(hook, "pre-mount") == 0)
|
|
which = LXCHOOK_PREMOUNT;
|
|
else if (strcmp(hook, "mount") == 0)
|
|
which = LXCHOOK_MOUNT;
|
|
else if (strcmp(hook, "autodev") == 0)
|
|
which = LXCHOOK_AUTODEV;
|
|
else if (strcmp(hook, "start") == 0)
|
|
which = LXCHOOK_START;
|
|
else if (strcmp(hook, "stop") == 0)
|
|
which = LXCHOOK_STOP;
|
|
else if (strcmp(hook, "post-stop") == 0)
|
|
which = LXCHOOK_POSTSTOP;
|
|
else if (strcmp(hook, "clone") == 0)
|
|
which = LXCHOOK_CLONE;
|
|
else if (strcmp(hook, "destroy") == 0)
|
|
which = LXCHOOK_DESTROY;
|
|
else
|
|
return -1;
|
|
lxc_list_for_each(it, &conf->hooks[which]) {
|
|
int ret;
|
|
char *hookname = it->elem;
|
|
ret = run_script_argv(name, "lxc", hookname, hook, lxcpath, argv);
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void lxc_remove_nic(struct lxc_list *it)
|
|
{
|
|
struct lxc_netdev *netdev = it->elem;
|
|
struct lxc_list *it2,*next;
|
|
|
|
lxc_list_del(it);
|
|
|
|
free(netdev->link);
|
|
free(netdev->name);
|
|
if (netdev->type == LXC_NET_VETH)
|
|
free(netdev->priv.veth_attr.pair);
|
|
free(netdev->upscript);
|
|
free(netdev->hwaddr);
|
|
free(netdev->mtu);
|
|
free(netdev->ipv4_gateway);
|
|
free(netdev->ipv6_gateway);
|
|
lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
|
|
lxc_list_del(it2);
|
|
free(it2->elem);
|
|
free(it2);
|
|
}
|
|
lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
|
|
lxc_list_del(it2);
|
|
free(it2->elem);
|
|
free(it2);
|
|
}
|
|
free(netdev);
|
|
free(it);
|
|
}
|
|
|
|
/* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
|
|
int lxc_clear_nic(struct lxc_conf *c, const char *key)
|
|
{
|
|
char *p1;
|
|
int ret, idx, i;
|
|
struct lxc_list *it;
|
|
struct lxc_netdev *netdev;
|
|
|
|
p1 = strchr(key, '.');
|
|
if (!p1 || *(p1+1) == '\0')
|
|
p1 = NULL;
|
|
|
|
ret = sscanf(key, "%d", &idx);
|
|
if (ret != 1) return -1;
|
|
if (idx < 0)
|
|
return -1;
|
|
|
|
i = 0;
|
|
lxc_list_for_each(it, &c->network) {
|
|
if (i == idx)
|
|
break;
|
|
i++;
|
|
}
|
|
if (i < idx) // we don't have that many nics defined
|
|
return -1;
|
|
|
|
if (!it || !it->elem)
|
|
return -1;
|
|
|
|
netdev = it->elem;
|
|
|
|
if (!p1) {
|
|
lxc_remove_nic(it);
|
|
} else if (strcmp(p1, ".ipv4") == 0) {
|
|
struct lxc_list *it2,*next;
|
|
lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
|
|
lxc_list_del(it2);
|
|
free(it2->elem);
|
|
free(it2);
|
|
}
|
|
} else if (strcmp(p1, ".ipv6") == 0) {
|
|
struct lxc_list *it2,*next;
|
|
lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
|
|
lxc_list_del(it2);
|
|
free(it2->elem);
|
|
free(it2);
|
|
}
|
|
}
|
|
else return -1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int lxc_clear_config_network(struct lxc_conf *c)
|
|
{
|
|
struct lxc_list *it,*next;
|
|
lxc_list_for_each_safe(it, &c->network, next) {
|
|
lxc_remove_nic(it);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int lxc_clear_config_caps(struct lxc_conf *c)
|
|
{
|
|
struct lxc_list *it,*next;
|
|
|
|
lxc_list_for_each_safe(it, &c->caps, next) {
|
|
lxc_list_del(it);
|
|
free(it->elem);
|
|
free(it);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int lxc_free_idmap(struct lxc_list *id_map) {
|
|
struct lxc_list *it, *next;
|
|
|
|
lxc_list_for_each_safe(it, id_map, next) {
|
|
lxc_list_del(it);
|
|
free(it->elem);
|
|
free(it);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int lxc_clear_idmaps(struct lxc_conf *c)
|
|
{
|
|
return lxc_free_idmap(&c->id_map);
|
|
}
|
|
|
|
int lxc_clear_config_keepcaps(struct lxc_conf *c)
|
|
{
|
|
struct lxc_list *it,*next;
|
|
|
|
lxc_list_for_each_safe(it, &c->keepcaps, next) {
|
|
lxc_list_del(it);
|
|
free(it->elem);
|
|
free(it);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int lxc_clear_cgroups(struct lxc_conf *c, const char *key)
|
|
{
|
|
struct lxc_list *it,*next;
|
|
bool all = false;
|
|
const char *k = NULL;
|
|
|
|
if (strcmp(key, "lxc.cgroup") == 0)
|
|
all = true;
|
|
else if (strncmp(key, "lxc.cgroup.", sizeof("lxc.cgroup.")-1) == 0)
|
|
k = key + sizeof("lxc.cgroup.")-1;
|
|
else
|
|
return -1;
|
|
|
|
lxc_list_for_each_safe(it, &c->cgroup, next) {
|
|
struct lxc_cgroup *cg = it->elem;
|
|
if (!all && strcmp(cg->subsystem, k) != 0)
|
|
continue;
|
|
lxc_list_del(it);
|
|
free(cg->subsystem);
|
|
free(cg->value);
|
|
free(cg);
|
|
free(it);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int lxc_clear_limits(struct lxc_conf *c, const char *key)
|
|
{
|
|
struct lxc_list *it, *next;
|
|
bool all = false;
|
|
const char *k = NULL;
|
|
|
|
if (strcmp(key, "lxc.limit") == 0)
|
|
all = true;
|
|
else if (strncmp(key, "lxc.limit.", sizeof("lxc.limit.")-1) == 0)
|
|
k = key + sizeof("lxc.limit.")-1;
|
|
else
|
|
return -1;
|
|
|
|
lxc_list_for_each_safe(it, &c->limits, next) {
|
|
struct lxc_limit *lim = it->elem;
|
|
if (!all && strcmp(lim->resource, k) != 0)
|
|
continue;
|
|
lxc_list_del(it);
|
|
free(lim->resource);
|
|
free(lim);
|
|
free(it);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int lxc_clear_groups(struct lxc_conf *c)
|
|
{
|
|
struct lxc_list *it,*next;
|
|
|
|
lxc_list_for_each_safe(it, &c->groups, next) {
|
|
lxc_list_del(it);
|
|
free(it->elem);
|
|
free(it);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int lxc_clear_environment(struct lxc_conf *c)
|
|
{
|
|
struct lxc_list *it,*next;
|
|
|
|
lxc_list_for_each_safe(it, &c->environment, next) {
|
|
lxc_list_del(it);
|
|
free(it->elem);
|
|
free(it);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
int lxc_clear_mount_entries(struct lxc_conf *c)
|
|
{
|
|
struct lxc_list *it,*next;
|
|
|
|
lxc_list_for_each_safe(it, &c->mount_list, next) {
|
|
lxc_list_del(it);
|
|
free(it->elem);
|
|
free(it);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int lxc_clear_automounts(struct lxc_conf *c)
|
|
{
|
|
c->auto_mounts = 0;
|
|
return 0;
|
|
}
|
|
|
|
int lxc_clear_hooks(struct lxc_conf *c, const char *key)
|
|
{
|
|
struct lxc_list *it,*next;
|
|
bool all = false, done = false;
|
|
const char *k = NULL;
|
|
int i;
|
|
|
|
if (strcmp(key, "lxc.hook") == 0)
|
|
all = true;
|
|
else if (strncmp(key, "lxc.hook.", sizeof("lxc.hook.")-1) == 0)
|
|
k = key + sizeof("lxc.hook.")-1;
|
|
else
|
|
return -1;
|
|
|
|
for (i=0; i<NUM_LXC_HOOKS; i++) {
|
|
if (all || strcmp(k, lxchook_names[i]) == 0) {
|
|
lxc_list_for_each_safe(it, &c->hooks[i], next) {
|
|
lxc_list_del(it);
|
|
free(it->elem);
|
|
free(it);
|
|
}
|
|
done = true;
|
|
}
|
|
}
|
|
|
|
if (!done) {
|
|
ERROR("Invalid hook key: %s", key);
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void lxc_clear_saved_nics(struct lxc_conf *conf)
|
|
{
|
|
int i;
|
|
|
|
if (!conf->saved_nics)
|
|
return;
|
|
for (i=0; i < conf->num_savednics; i++)
|
|
free(conf->saved_nics[i].orig_name);
|
|
free(conf->saved_nics);
|
|
}
|
|
|
|
static inline void lxc_clear_aliens(struct lxc_conf *conf)
|
|
{
|
|
struct lxc_list *it,*next;
|
|
|
|
lxc_list_for_each_safe(it, &conf->aliens, next) {
|
|
lxc_list_del(it);
|
|
free(it->elem);
|
|
free(it);
|
|
}
|
|
}
|
|
|
|
static inline void lxc_clear_includes(struct lxc_conf *conf)
|
|
{
|
|
struct lxc_list *it,*next;
|
|
|
|
lxc_list_for_each_safe(it, &conf->includes, next) {
|
|
lxc_list_del(it);
|
|
free(it->elem);
|
|
free(it);
|
|
}
|
|
}
|
|
|
|
void lxc_conf_free(struct lxc_conf *conf)
|
|
{
|
|
if (!conf)
|
|
return;
|
|
if (current_config == conf)
|
|
current_config = NULL;
|
|
free(conf->console.log_path);
|
|
free(conf->console.path);
|
|
free(conf->rootfs.mount);
|
|
free(conf->rootfs.bdev_type);
|
|
free(conf->rootfs.options);
|
|
free(conf->rootfs.path);
|
|
free(conf->logfile);
|
|
if (conf->logfd != -1)
|
|
close(conf->logfd);
|
|
free(conf->utsname);
|
|
free(conf->ttydir);
|
|
free(conf->fstab);
|
|
free(conf->rcfile);
|
|
free(conf->init_cmd);
|
|
free(conf->unexpanded_config);
|
|
free(conf->pty_names);
|
|
free(conf->syslog);
|
|
lxc_clear_config_network(conf);
|
|
free(conf->lsm_aa_profile);
|
|
free(conf->lsm_se_context);
|
|
lxc_seccomp_free(conf);
|
|
lxc_clear_config_caps(conf);
|
|
lxc_clear_config_keepcaps(conf);
|
|
lxc_clear_cgroups(conf, "lxc.cgroup");
|
|
lxc_clear_hooks(conf, "lxc.hook");
|
|
lxc_clear_mount_entries(conf);
|
|
lxc_clear_saved_nics(conf);
|
|
lxc_clear_idmaps(conf);
|
|
lxc_clear_groups(conf);
|
|
lxc_clear_includes(conf);
|
|
lxc_clear_aliens(conf);
|
|
lxc_clear_environment(conf);
|
|
lxc_clear_limits(conf, "lxc.limit");
|
|
free(conf);
|
|
}
|
|
|
|
struct userns_fn_data {
|
|
int (*fn)(void *);
|
|
void *arg;
|
|
int p[2];
|
|
};
|
|
|
|
static int run_userns_fn(void *data)
|
|
{
|
|
struct userns_fn_data *d = data;
|
|
char c;
|
|
// we're not sharing with the parent any more, if it was a thread
|
|
|
|
close(d->p[1]);
|
|
if (read(d->p[0], &c, 1) != 1)
|
|
return -1;
|
|
close(d->p[0]);
|
|
return d->fn(d->arg);
|
|
}
|
|
|
|
/*
|
|
* Add ID_TYPE_UID/ID_TYPE_GID entries to an existing lxc_conf,
|
|
* if they are not already there.
|
|
*/
|
|
static struct lxc_list *idmap_add_id(struct lxc_conf *conf,
|
|
uid_t uid, gid_t gid)
|
|
{
|
|
int hostuid_mapped = mapped_hostid(uid, conf, ID_TYPE_UID);
|
|
int hostgid_mapped = mapped_hostid(gid, conf, ID_TYPE_GID);
|
|
struct lxc_list *new = NULL, *tmp, *it, *next;
|
|
struct id_map *entry;
|
|
|
|
new = malloc(sizeof(*new));
|
|
if (!new) {
|
|
ERROR("Out of memory building id map");
|
|
return NULL;
|
|
}
|
|
lxc_list_init(new);
|
|
|
|
if (hostuid_mapped < 0) {
|
|
hostuid_mapped = find_unmapped_nsuid(conf, ID_TYPE_UID);
|
|
if (hostuid_mapped < 0)
|
|
goto err;
|
|
tmp = malloc(sizeof(*tmp));
|
|
if (!tmp)
|
|
goto err;
|
|
entry = malloc(sizeof(*entry));
|
|
if (!entry) {
|
|
free(tmp);
|
|
goto err;
|
|
}
|
|
tmp->elem = entry;
|
|
entry->idtype = ID_TYPE_UID;
|
|
entry->nsid = hostuid_mapped;
|
|
entry->hostid = (unsigned long) uid;
|
|
entry->range = 1;
|
|
lxc_list_add_tail(new, tmp);
|
|
}
|
|
if (hostgid_mapped < 0) {
|
|
hostgid_mapped = find_unmapped_nsuid(conf, ID_TYPE_GID);
|
|
if (hostgid_mapped < 0)
|
|
goto err;
|
|
tmp = malloc(sizeof(*tmp));
|
|
if (!tmp)
|
|
goto err;
|
|
entry = malloc(sizeof(*entry));
|
|
if (!entry) {
|
|
free(tmp);
|
|
goto err;
|
|
}
|
|
tmp->elem = entry;
|
|
entry->idtype = ID_TYPE_GID;
|
|
entry->nsid = hostgid_mapped;
|
|
entry->hostid = (unsigned long) gid;
|
|
entry->range = 1;
|
|
lxc_list_add_tail(new, tmp);
|
|
}
|
|
lxc_list_for_each_safe(it, &conf->id_map, next) {
|
|
tmp = malloc(sizeof(*tmp));
|
|
if (!tmp)
|
|
goto err;
|
|
entry = malloc(sizeof(*entry));
|
|
if (!entry) {
|
|
free(tmp);
|
|
goto err;
|
|
}
|
|
memset(entry, 0, sizeof(*entry));
|
|
memcpy(entry, it->elem, sizeof(*entry));
|
|
tmp->elem = entry;
|
|
lxc_list_add_tail(new, tmp);
|
|
}
|
|
|
|
return new;
|
|
|
|
err:
|
|
ERROR("Out of memory building a new uid/gid map");
|
|
if (new)
|
|
lxc_free_idmap(new);
|
|
free(new);
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* Run a function in a new user namespace.
|
|
* The caller's euid/egid will be mapped in if it is not already.
|
|
*/
|
|
int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data)
|
|
{
|
|
int ret, pid;
|
|
struct userns_fn_data d;
|
|
char c = '1';
|
|
int p[2];
|
|
struct lxc_list *idmap;
|
|
|
|
ret = pipe(p);
|
|
if (ret < 0) {
|
|
SYSERROR("opening pipe");
|
|
return -1;
|
|
}
|
|
d.fn = fn;
|
|
d.arg = data;
|
|
d.p[0] = p[0];
|
|
d.p[1] = p[1];
|
|
pid = lxc_clone(run_userns_fn, &d, CLONE_NEWUSER);
|
|
if (pid < 0)
|
|
goto err;
|
|
close(p[0]);
|
|
p[0] = -1;
|
|
|
|
if ((idmap = idmap_add_id(conf, geteuid(), getegid())) == NULL) {
|
|
ERROR("Error adding self to container uid/gid map");
|
|
goto err;
|
|
}
|
|
|
|
ret = lxc_map_ids(idmap, pid);
|
|
lxc_free_idmap(idmap);
|
|
free(idmap);
|
|
if (ret) {
|
|
ERROR("Error setting up child mappings");
|
|
goto err;
|
|
}
|
|
|
|
// kick the child
|
|
if (write(p[1], &c, 1) != 1) {
|
|
SYSERROR("writing to pipe to child");
|
|
goto err;
|
|
}
|
|
|
|
ret = wait_for_pid(pid);
|
|
|
|
close(p[1]);
|
|
return ret;
|
|
|
|
err:
|
|
if (p[0] != -1)
|
|
close(p[0]);
|
|
close(p[1]);
|
|
return -1;
|
|
}
|
|
|
|
/* not thread-safe, do not use from api without first forking */
|
|
static char* getuname(void)
|
|
{
|
|
struct passwd *result;
|
|
|
|
result = getpwuid(geteuid());
|
|
if (!result)
|
|
return NULL;
|
|
|
|
return strdup(result->pw_name);
|
|
}
|
|
|
|
/* not thread-safe, do not use from api without first forking */
|
|
static char *getgname(void)
|
|
{
|
|
struct group *result;
|
|
|
|
result = getgrgid(getegid());
|
|
if (!result)
|
|
return NULL;
|
|
|
|
return strdup(result->gr_name);
|
|
}
|
|
|
|
/* not thread-safe, do not use from api without first forking */
|
|
void suggest_default_idmap(void)
|
|
{
|
|
FILE *f;
|
|
unsigned int uid = 0, urange = 0, gid = 0, grange = 0;
|
|
char *line = NULL;
|
|
char *uname, *gname;
|
|
size_t len = 0;
|
|
|
|
if (!(uname = getuname()))
|
|
return;
|
|
|
|
if (!(gname = getgname())) {
|
|
free(uname);
|
|
return;
|
|
}
|
|
|
|
f = fopen(subuidfile, "r");
|
|
if (!f) {
|
|
ERROR("Your system is not configured with subuids");
|
|
free(gname);
|
|
free(uname);
|
|
return;
|
|
}
|
|
while (getline(&line, &len, f) != -1) {
|
|
size_t no_newline = 0;
|
|
char *p = strchr(line, ':'), *p2;
|
|
if (*line == '#')
|
|
continue;
|
|
if (!p)
|
|
continue;
|
|
*p = '\0';
|
|
p++;
|
|
if (strcmp(line, uname))
|
|
continue;
|
|
p2 = strchr(p, ':');
|
|
if (!p2)
|
|
continue;
|
|
*p2 = '\0';
|
|
p2++;
|
|
if (!*p2)
|
|
continue;
|
|
no_newline = strcspn(p2, "\n");
|
|
p2[no_newline] = '\0';
|
|
|
|
if (lxc_safe_uint(p, &uid) < 0)
|
|
WARN("Could not parse UID.");
|
|
if (lxc_safe_uint(p2, &urange) < 0)
|
|
WARN("Could not parse UID range.");
|
|
}
|
|
fclose(f);
|
|
|
|
f = fopen(subgidfile, "r");
|
|
if (!f) {
|
|
ERROR("Your system is not configured with subgids");
|
|
free(gname);
|
|
free(uname);
|
|
return;
|
|
}
|
|
while (getline(&line, &len, f) != -1) {
|
|
size_t no_newline = 0;
|
|
char *p = strchr(line, ':'), *p2;
|
|
if (*line == '#')
|
|
continue;
|
|
if (!p)
|
|
continue;
|
|
*p = '\0';
|
|
p++;
|
|
if (strcmp(line, uname))
|
|
continue;
|
|
p2 = strchr(p, ':');
|
|
if (!p2)
|
|
continue;
|
|
*p2 = '\0';
|
|
p2++;
|
|
if (!*p2)
|
|
continue;
|
|
no_newline = strcspn(p2, "\n");
|
|
p2[no_newline] = '\0';
|
|
|
|
if (lxc_safe_uint(p, &gid) < 0)
|
|
WARN("Could not parse GID.");
|
|
if (lxc_safe_uint(p2, &grange) < 0)
|
|
WARN("Could not parse GID range.");
|
|
}
|
|
fclose(f);
|
|
|
|
free(line);
|
|
|
|
if (!urange || !grange) {
|
|
ERROR("You do not have subuids or subgids allocated");
|
|
ERROR("Unprivileged containers require subuids and subgids");
|
|
return;
|
|
}
|
|
|
|
ERROR("You must either run as root, or define uid mappings");
|
|
ERROR("To pass uid mappings to lxc-create, you could create");
|
|
ERROR("~/.config/lxc/default.conf:");
|
|
ERROR("lxc.include = %s", LXC_DEFAULT_CONFIG);
|
|
ERROR("lxc.id_map = u 0 %u %u", uid, urange);
|
|
ERROR("lxc.id_map = g 0 %u %u", gid, grange);
|
|
|
|
free(gname);
|
|
free(uname);
|
|
}
|
|
|
|
static void free_cgroup_settings(struct lxc_list *result)
|
|
{
|
|
struct lxc_list *iterator, *next;
|
|
|
|
lxc_list_for_each_safe(iterator, result, next) {
|
|
lxc_list_del(iterator);
|
|
free(iterator);
|
|
}
|
|
free(result);
|
|
}
|
|
|
|
/*
|
|
* Return the list of cgroup_settings sorted according to the following rules
|
|
* 1. Put memory.limit_in_bytes before memory.memsw.limit_in_bytes
|
|
*/
|
|
struct lxc_list *sort_cgroup_settings(struct lxc_list* cgroup_settings)
|
|
{
|
|
struct lxc_list *result;
|
|
struct lxc_list *memsw_limit = NULL;
|
|
struct lxc_list *it = NULL;
|
|
struct lxc_cgroup *cg = NULL;
|
|
struct lxc_list *item = NULL;
|
|
|
|
result = malloc(sizeof(*result));
|
|
if (!result) {
|
|
ERROR("failed to allocate memory to sort cgroup settings");
|
|
return NULL;
|
|
}
|
|
lxc_list_init(result);
|
|
|
|
/*Iterate over the cgroup settings and copy them to the output list*/
|
|
lxc_list_for_each(it, cgroup_settings) {
|
|
item = malloc(sizeof(*item));
|
|
if (!item) {
|
|
ERROR("failed to allocate memory to sort cgroup settings");
|
|
free_cgroup_settings(result);
|
|
return NULL;
|
|
}
|
|
item->elem = it->elem;
|
|
cg = it->elem;
|
|
if (strcmp(cg->subsystem, "memory.memsw.limit_in_bytes") == 0) {
|
|
/* Store the memsw_limit location */
|
|
memsw_limit = item;
|
|
} else if (strcmp(cg->subsystem, "memory.limit_in_bytes") == 0 && memsw_limit != NULL) {
|
|
/* lxc.cgroup.memory.memsw.limit_in_bytes is found before
|
|
* lxc.cgroup.memory.limit_in_bytes, swap these two items */
|
|
item->elem = memsw_limit->elem;
|
|
memsw_limit->elem = it->elem;
|
|
}
|
|
lxc_list_add_tail(result, item);
|
|
}
|
|
|
|
return result;
|
|
}
|