diff --git a/configure.ac b/configure.ac index 0103579a0..8c07d9791 100644 --- a/configure.ac +++ b/configure.ac @@ -598,6 +598,10 @@ AM_CONDITIONAL([IS_BIONIC], [test "x$is_bionic" = "xyes"]) # Some systems lack PR_CAPBSET_DROP definition => HAVE_DECL_PR_CAPBSET_DROP AC_CHECK_DECLS([PR_CAPBSET_DROP], [], [], [#include ]) +# Some systems lack PR_{G,S}ET_NO_NEW_PRIVS definition => HAVE_DECL_PR_{G,S}ET_NO_NEW_PRIVS +AC_CHECK_DECLS([PR_SET_NO_NEW_PRIVS], [], [], [#include ]) +AC_CHECK_DECLS([PR_GET_NO_NEW_PRIVS], [], [], [#include ]) + # Check for some headers AC_CHECK_HEADERS([sys/signalfd.h pty.h ifaddrs.h sys/capability.h sys/personality.h utmpx.h sys/timerfd.h]) diff --git a/doc/lxc.container.conf.sgml.in b/doc/lxc.container.conf.sgml.in index 1b740a57e..fcccd8ba9 100644 --- a/doc/lxc.container.conf.sgml.in +++ b/doc/lxc.container.conf.sgml.in @@ -1310,6 +1310,34 @@ mknod errno 0 + + PR_SET_NO_NEW_PRIVS + + With PR_SET_NO_NEW_PRIVS active execve() promises not to grant + privileges to do anything that could not have been done without + the execve() call (for example, rendering the set-user-ID and + set-group-ID mode bits, and file capabilities non-functional). + Once set, this bit cannot be unset. The setting of this bit is + inherited by children created by fork() and clone(), and preserved + across execve(). + Note that PR_SET_NO_NEW_PRIVS is applied after the container has + changed into its intended AppArmor profile or SElinux context. + + + + + + + + + Specify whether the PR_SET_NO_NEW_PRIVS flag should be set for the + container. Set to 1 to activate. + + + + + + UID mappings diff --git a/src/lxc/attach.c b/src/lxc/attach.c index 0d9e3d047..c74141050 100644 --- a/src/lxc/attach.c +++ b/src/lxc/attach.c @@ -39,10 +39,18 @@ #include #include -#if !HAVE_DECL_PR_CAPBSET_DROP +#ifndef HAVE_DECL_PR_CAPBSET_DROP #define PR_CAPBSET_DROP 24 #endif +#ifndef HAVE_DECL_PR_SET_NO_NEW_PRIVS +#define PR_SET_NO_NEW_PRIVS 38 +#endif + +#ifndef HAVE_DECL_PR_GET_NO_NEW_PRIVS +#define PR_GET_NO_NEW_PRIVS 39 +#endif + #include "namespace.h" #include "log.h" #include "attach.h" @@ -657,8 +665,8 @@ static int attach_child_main(void* data); /* define default options if no options are supplied by the user */ static lxc_attach_options_t attach_static_default_options = LXC_ATTACH_OPTIONS_DEFAULT; -static bool fetch_seccomp(const char *name, const char *lxcpath, - struct lxc_proc_context_info *i, lxc_attach_options_t *options) +static bool fetch_seccomp(struct lxc_proc_context_info *i, + lxc_attach_options_t *options) { struct lxc_container *c; char *path; @@ -666,12 +674,9 @@ static bool fetch_seccomp(const char *name, const char *lxcpath, if (!(options->namespaces & CLONE_NEWNS) || !(options->attach_flags & LXC_ATTACH_LSM)) return true; - c = lxc_container_new(name, lxcpath); - if (!c) - return false; - i->container = c; + c = i->container; - /* Initialize an empty lxc_conf */ + /* Remove current setting. */ if (!c->set_config_item(c, "lxc.seccomp", "")) { return false; } @@ -695,6 +700,37 @@ static bool fetch_seccomp(const char *name, const char *lxcpath, return false; } + INFO("Retrieved seccomp policy."); + return true; +} + +static bool no_new_privs(struct lxc_proc_context_info *ctx, + lxc_attach_options_t *options) +{ + struct lxc_container *c; + char *val; + + c = ctx->container; + + /* Remove current setting. */ + if (!c->set_config_item(c, "lxc.no_new_privs", "")) { + return false; + } + + /* Retrieve currently active setting. */ + val = c->get_running_config_item(c, "lxc.no_new_privs"); + if (!val) { + INFO("Failed to get running config item for lxc.no_new_privs."); + return false; + } + + /* Set currently active setting. */ + if (!c->set_config_item(c, "lxc.no_new_privs", val)) { + free(val); + return false; + } + free(val); + return true; } @@ -744,9 +780,16 @@ int lxc_attach(const char* name, const char* lxcpath, lxc_attach_exec_t exec_fun } init_ctx->personality = personality; - if (!fetch_seccomp(name, lxcpath, init_ctx, options)) + init_ctx->container = lxc_container_new(name, lxcpath); + if (!init_ctx->container) + return -1; + + if (!fetch_seccomp(init_ctx, options)) WARN("Failed to get seccomp policy"); + if (!no_new_privs(init_ctx, options)) + WARN("Could not determine whether PR_SET_NO_NEW_PRIVS is set."); + cwd = getcwd(NULL, 0); /* determine which namespaces the container was created with @@ -1146,6 +1189,19 @@ static int attach_child_main(void* data) shutdown(ipc_socket, SHUT_RDWR); close(ipc_socket); + if ((init_ctx->container && init_ctx->container->lxc_conf && + init_ctx->container->lxc_conf->no_new_privs) || + (options->attach_flags & LXC_ATTACH_NO_NEW_PRIVS)) { + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) { + SYSERROR("PR_SET_NO_NEW_PRIVS could not be set. " + "Process can use execve() gainable " + "privileges."); + rexit(-1); + } + INFO("PR_SET_NO_NEW_PRIVS is set. Process cannot use execve() " + "gainable privileges."); + } + /* set new apparmor profile/selinux context */ if ((options->namespaces & CLONE_NEWNS) && (options->attach_flags & LXC_ATTACH_LSM) && init_ctx->lsm_label) { int on_exec; @@ -1161,7 +1217,6 @@ static int attach_child_main(void* data) ERROR("Loading seccomp policy"); rexit(-1); } - lxc_proc_put_context_info(init_ctx); /* The following is done after the communication socket is diff --git a/src/lxc/attach_options.h b/src/lxc/attach_options.h index 3c54e7ca6..1df69924c 100644 --- a/src/lxc/attach_options.h +++ b/src/lxc/attach_options.h @@ -49,6 +49,8 @@ enum { /* the following are off by default */ LXC_ATTACH_REMOUNT_PROC_SYS = 0x00010000, //!< Remount /proc filesystem LXC_ATTACH_LSM_NOW = 0x00020000, //!< FIXME: unknown + /* Set PR_SET_NO_NEW_PRIVS to block execve() gainable privileges. */ + LXC_ATTACH_NO_NEW_PRIVS = 0x00040000, //!< PR_SET_NO_NEW_PRIVS /* we have 16 bits for things that are on by default * and 16 bits that are off by default, that should diff --git a/src/lxc/conf.h b/src/lxc/conf.h index 69a72ea50..e48466730 100644 --- a/src/lxc/conf.h +++ b/src/lxc/conf.h @@ -382,6 +382,9 @@ struct lxc_conf { /* The facility to pass to syslog. Let's users establish as what type of * program liblxc is supposed to write to the syslog. */ char *syslog; + + /* Whether PR_SET_NO_NEW_PRIVS will be set for the container. */ + bool no_new_privs; }; #ifdef HAVE_TLS diff --git a/src/lxc/confile.c b/src/lxc/confile.c index 9ad05e588..8f370f6cf 100644 --- a/src/lxc/confile.c +++ b/src/lxc/confile.c @@ -114,6 +114,7 @@ static int config_init_cmd(const char *, const char *, struct lxc_conf *); static int config_init_uid(const char *, const char *, struct lxc_conf *); static int config_init_gid(const char *, const char *, struct lxc_conf *); static int config_ephemeral(const char *, const char *, struct lxc_conf *); +static int config_no_new_privs(const char *, const char *, struct lxc_conf *); static struct lxc_config_t config[] = { @@ -187,6 +188,7 @@ static struct lxc_config_t config[] = { { "lxc.init_gid", config_init_gid }, { "lxc.ephemeral", config_ephemeral }, { "lxc.syslog", config_syslog }, + { "lxc.no_new_privs", config_no_new_privs }, }; struct signame { @@ -2562,6 +2564,8 @@ int lxc_get_config_item(struct lxc_conf *c, const char *key, char *retv, return lxc_get_conf_int(c, retv, inlen, c->ephemeral); else if (strcmp(key, "lxc.syslog") == 0) v = c->syslog; + else if (strcmp(key, "lxc.no_new_privs") == 0) + return lxc_get_conf_int(c, retv, inlen, c->no_new_privs); else return -1; if (!v) @@ -2954,3 +2958,17 @@ static int config_syslog(const char *key, const char *value, lxc_log_syslog(facility); return config_string_item(&lxc_conf->syslog, value); } + +static int config_no_new_privs(const char *key, const char *value, + struct lxc_conf *lxc_conf) +{ + int v = atoi(value); + + if (v != 0 && v != 1) { + ERROR("Wrong value for lxc.no_new_privs. Can only be set to 0 or 1"); + return -1; + } + lxc_conf->no_new_privs = v ? true : false; + + return 0; +} diff --git a/src/lxc/start.c b/src/lxc/start.c index 2411626de..ecc7b08f6 100644 --- a/src/lxc/start.c +++ b/src/lxc/start.c @@ -50,10 +50,18 @@ #include #endif -#if !HAVE_DECL_PR_CAPBSET_DROP +#ifndef HAVE_DECL_PR_CAPBSET_DROP #define PR_CAPBSET_DROP 24 #endif +#ifndef HAVE_DECL_PR_SET_NO_NEW_PRIVS +#define PR_SET_NO_NEW_PRIVS 38 +#endif + +#ifndef HAVE_DECL_PR_GET_NO_NEW_PRIVS +#define PR_GET_NO_NEW_PRIVS 39 +#endif + #include "af_unix.h" #include "bdev.h" #include "caps.h" @@ -850,6 +858,16 @@ static int do_start(void *data) if (lsm_process_label_set(NULL, handler->conf, 1, 1) < 0) goto out_warn_father; + /* Set PR_SET_NO_NEW_PRIVS after we changed the lsm label. If we do it + * before we aren't allowed anymore. */ + if (handler->conf->no_new_privs) { + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) { + SYSERROR("Could not set PR_SET_NO_NEW_PRIVS to block execve() gainable privileges."); + goto out_warn_father; + } + DEBUG("Set PR_SET_NO_NEW_PRIVS to block execve() gainable privileges."); + } + /* Some init's such as busybox will set sane tty settings on stdin, * stdout, stderr which it thinks is the console. We already set them * the way we wanted on the real terminal, and we want init to do its diff --git a/src/tests/Makefile.am b/src/tests/Makefile.am index cffc74204..2e7dd5602 100644 --- a/src/tests/Makefile.am +++ b/src/tests/Makefile.am @@ -53,8 +53,11 @@ bin_PROGRAMS = lxc-test-containertests lxc-test-locktests lxc-test-startone \ lxc-test-reboot lxc-test-list lxc-test-attach lxc-test-device-add-remove \ lxc-test-apparmor lxc-test-utils -bin_SCRIPTS = lxc-test-automount lxc-test-autostart lxc-test-cloneconfig \ - lxc-test-createconfig +bin_SCRIPTS = lxc-test-automount \ + lxc-test-autostart \ + lxc-test-cloneconfig \ + lxc-test-createconfig \ + lxc-test-no-new-privs if DISTRO_UBUNTU bin_SCRIPTS += \ @@ -91,6 +94,7 @@ EXTRA_DIST = \ lxc-test-checkpoint-restore \ lxc-test-cloneconfig \ lxc-test-createconfig \ + lxc-test-no-new-privs \ lxc-test-snapdeps \ lxc-test-symlink \ lxc-test-ubuntu \ diff --git a/src/tests/lxc-test-no-new-privs b/src/tests/lxc-test-no-new-privs new file mode 100755 index 000000000..d10e0449a --- /dev/null +++ b/src/tests/lxc-test-no-new-privs @@ -0,0 +1,104 @@ +#!/bin/bash + +# lxc: linux Container library + +# Authors: +# Christian Brauner +# +# This is a test script for PR_SET_NO_NEW_PRIVS + +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +set -eux + +DONE=0 +cleanup() { + cd / + lxc-destroy -n c1 -f || true + if [ $DONE -eq 0 ]; then + echo "FAIL" + exit 1 + fi + echo "PASS" +} + +trap cleanup EXIT SIGHUP SIGINT SIGTERM + +mkdir -p /etc/lxc/ +cat > /etc/lxc/default.conf << EOF +lxc.network.type = veth +lxc.network.link = lxcbr0 +EOF + +ARCH=i386 +if type dpkg >/dev/null 2>&1; then + ARCH=$(dpkg --print-architecture) +fi + +lxc-create -t download -n c1 -- -d ubuntu -r xenial -a $ARCH +echo "lxc.no_new_privs = 1" >> /var/lib/lxc/c1/config + +lxc-start -n c1 +p1=$(lxc-info -n c1 -p -H) +[ "$p1" != "-1" ] || { echo "Failed to start container c1 (run $count)"; false; } +sleep 5s +lxc-attach -n c1 --clear-env -- apt update -y +lxc-attach -n c1 --clear-env -- apt install -y gcc make + +# Here documents don't seem to like sudo -i. +lxc-attach -n c1 --clear-env -- /bin/bash -c "cat < /nnptest.c +#include +#include +#include + +int main(int argc, char *argv[]) +{ + printf(\"%d\n\", geteuid()); +} +EOF" +lxc-attach -n c1 --clear-env -- cat /nnptest.c +lxc-attach -n c1 --clear-env -- make -C / nnptest +lxc-attach -n c1 --clear-env -- chmod u+s /nnptest + +# Check that lxc-attach obeys PR_SET_NO_NEW_PRIVS when it is set. +NNP_EUID=$(lxc-attach -n c1 --clear-env -- sudo -u ubuntu /nnptest) +if [ "$NNP_EUID" -ne 1000 ]; then + exit 1 +fi +lxc-stop -n c1 -k + +# Check that lxc-attach obeys PR_SET_NO_NEW_PRIVS when it is not set. +sed -i 's/lxc.no_new_privs = 1/lxc.no_new_privs = 0/' /var/lib/lxc/c1/config +lxc-start -n c1 +NNP_EUID=$(lxc-attach -n c1 --clear-env -- sudo -u ubuntu /nnptest) +if [ "$NNP_EUID" -ne 0 ]; then + exit 1 +fi +lxc-stop -n c1 -k + +# Check that lxc-execute and lxc-start obey PR_SET_NO_NEW_PRIVS when it is set. +NNP_EUID=$(lxc-execute -n c1 -- sudo -u ubuntu /nnptest) +if [ "$NNP_EUID" -ne 0 ]; then + exit 1 +fi + +# Check that lxc-execute and lxc-start obey PR_SET_NO_NEW_PRIVS when it is not set. +sed -i 's/lxc.no_new_privs = 0/lxc.no_new_privs = 1/' /var/lib/lxc/c1/config +NNP_EUID=$(lxc-execute -n c1 -- sudo -u ubuntu /nnptest) +if [ "$NNP_EUID" -ne 1000 ]; then + exit 1 +fi + +DONE=1