mirror of
https://git.proxmox.com/git/mirror_lxc
synced 2025-07-25 09:51:52 +00:00
seccomp: support 'all' arch sections (plus bugfixes)
seccomp_ctx is already a void*, so don't use 'scmp_filter_ctx *' Separately track the native arch from the arch a rule is aimed at. Clearly ignore irrelevant architectures (i.e. arm rules on x86) Don't try to load seccomp (and don't fail) if we are already seccomp-confined. Otherwise nested containers fail. Make it clear that the extra seccomp ctx is only for compat calls on 64-bit arch. (This will be extended to arm64 when libseccomp supports it). Power may will complicate this (if ever it is supported) and require a new rethink and rewrite. NOTE - currently when starting a 32-bit container on 64-bit host, rules pertaining to 32-bit syscalls (as opposed to once which have the same syscall #) appear to be ignored. I can reproduce that without lxc, so either there is a bug in seccomp or a fundamental misunderstanding in how I"m merging the contexts. Rereading the seccomp_rule_add manpage suggests that keeping the seccond seccomp context may not be necessary, but this is not something I care to test right now. If it's true, then the code could be simplified, and it may solve my concerns about power. With this patch I'm able to start nested containers (with seccomp policies defined) including 32-bit and 32-bit-in-64-bit. [ this patch does not yet add the default seccomp policy ] Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com> Acked-by: Stéphane Graber <stgraber@ubuntu.com>
This commit is contained in:
parent
d74b6771c0
commit
d58c6ad0a6
@ -312,7 +312,7 @@ struct lxc_conf {
|
||||
int tmp_umount_proc;
|
||||
char *seccomp; // filename with the seccomp rules
|
||||
#if HAVE_SCMP_FILTER_CTX
|
||||
scmp_filter_ctx *seccomp_ctx;
|
||||
scmp_filter_ctx seccomp_ctx;
|
||||
#endif
|
||||
int maincmd_fd;
|
||||
int autodev; // if 1, mount and fill a /dev at start
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include <seccomp.h>
|
||||
#include <errno.h>
|
||||
#include <seccomp.h>
|
||||
#include <sys/utsname.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "lxcseccomp.h"
|
||||
@ -112,6 +113,98 @@ static uint32_t get_and_clear_v2_action(char *line, uint32_t def_action)
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH
|
||||
enum lxc_hostarch_t {
|
||||
lxc_seccomp_arch_all = 0,
|
||||
lxc_seccomp_arch_native,
|
||||
lxc_seccomp_arch_i386,
|
||||
lxc_seccomp_arch_amd64,
|
||||
lxc_seccomp_arch_arm,
|
||||
lxc_seccomp_arch_unknown = 999,
|
||||
};
|
||||
|
||||
int get_hostarch(void)
|
||||
{
|
||||
struct utsname uts;
|
||||
if (uname(&uts) < 0) {
|
||||
SYSERROR("Failed to read host arch");
|
||||
return -1;
|
||||
}
|
||||
if (strcmp(uts.machine, "i686") == 0)
|
||||
return lxc_seccomp_arch_i386;
|
||||
else if (strcmp(uts.machine, "x86_64") == 0)
|
||||
return lxc_seccomp_arch_amd64;
|
||||
else if (strncmp(uts.machine, "armv7", 5) == 0)
|
||||
return lxc_seccomp_arch_arm;
|
||||
return lxc_seccomp_arch_unknown;
|
||||
}
|
||||
|
||||
scmp_filter_ctx get_new_ctx(enum lxc_hostarch_t n_arch, uint32_t default_policy_action)
|
||||
{
|
||||
scmp_filter_ctx ctx;
|
||||
int ret;
|
||||
uint32_t arch;
|
||||
|
||||
switch(n_arch) {
|
||||
case lxc_seccomp_arch_i386: arch = SCMP_ARCH_X86; break;
|
||||
case lxc_seccomp_arch_amd64: arch = SCMP_ARCH_X86_64; break;
|
||||
case lxc_seccomp_arch_arm: arch = SCMP_ARCH_ARM; break;
|
||||
default: return NULL;
|
||||
}
|
||||
|
||||
if ((ctx = seccomp_init(default_policy_action)) == NULL) {
|
||||
ERROR("Error initializing seccomp context");
|
||||
return NULL;
|
||||
}
|
||||
if (seccomp_attr_set(ctx, SCMP_FLTATR_CTL_NNP, 0)) {
|
||||
ERROR("failed to turn off n-new-privs");
|
||||
seccomp_release(ctx);
|
||||
return NULL;
|
||||
}
|
||||
ret = seccomp_arch_add(ctx, arch);
|
||||
if (ret != 0) {
|
||||
ERROR("Seccomp error %d (%s) adding arch: %d", ret,
|
||||
strerror(ret), (int)n_arch);
|
||||
seccomp_release(ctx);
|
||||
return NULL;
|
||||
}
|
||||
if (seccomp_arch_remove(ctx, SCMP_ARCH_NATIVE) != 0) {
|
||||
ERROR("Seccomp error removing native arch");
|
||||
seccomp_release(ctx);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
bool do_resolve_add_rule(uint32_t arch, char *line, scmp_filter_ctx ctx,
|
||||
uint32_t action)
|
||||
{
|
||||
int nr, ret;
|
||||
|
||||
if (arch && !seccomp_arch_exist(ctx, arch)) {
|
||||
ERROR("BUG: seccomp: rule and context arch do not match (arch %d)", arch);
|
||||
return false;
|
||||
}
|
||||
nr = seccomp_syscall_resolve_name_arch(arch, line);
|
||||
if (nr == __NR_SCMP_ERROR) {
|
||||
WARN("Seccomp: failed to resolve syscall: %s", line);
|
||||
WARN("This syscall will NOT be blacklisted");
|
||||
return true;
|
||||
}
|
||||
if (nr < 0) {
|
||||
WARN("Seccomp: got negative # for syscall: %s", line);
|
||||
WARN("This syscall will NOT be blacklisted");
|
||||
return true;
|
||||
}
|
||||
ret = seccomp_rule_add_exact(ctx, action, nr, 0);
|
||||
if (ret < 0) {
|
||||
ERROR("failed (%d) loading rule for %s (nr %d action %d)", ret, line, nr, action);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* v2 consists of
|
||||
* [x86]
|
||||
@ -128,13 +221,14 @@ static uint32_t get_and_clear_v2_action(char *line, uint32_t def_action)
|
||||
*/
|
||||
static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
|
||||
{
|
||||
#if HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH
|
||||
char *p;
|
||||
int ret;
|
||||
scmp_filter_ctx *ctx = NULL;
|
||||
scmp_filter_ctx compat_ctx = NULL;
|
||||
bool blacklist = false;
|
||||
uint32_t default_policy_action = -1, default_rule_action = -1, action;
|
||||
uint32_t arch = SCMP_ARCH_NATIVE;
|
||||
enum lxc_hostarch_t native_arch = get_hostarch(),
|
||||
cur_rule_arch = native_arch;
|
||||
|
||||
if (strncmp(line, "blacklist", 9) == 0)
|
||||
blacklist = true;
|
||||
@ -162,6 +256,14 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
|
||||
default_rule_action = SCMP_ACT_ALLOW;
|
||||
}
|
||||
|
||||
if (native_arch == lxc_seccomp_arch_amd64) {
|
||||
cur_rule_arch = lxc_seccomp_arch_all;
|
||||
compat_ctx = get_new_ctx(lxc_seccomp_arch_i386,
|
||||
default_policy_action);
|
||||
if (!compat_ctx)
|
||||
goto bad;
|
||||
}
|
||||
|
||||
if (default_policy_action != SCMP_ACT_KILL) {
|
||||
ret = seccomp_reset(conf->seccomp_ctx, default_policy_action);
|
||||
if (ret != 0) {
|
||||
@ -175,7 +277,6 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
|
||||
}
|
||||
|
||||
while (fgets(line, 1024, f)) {
|
||||
int nr;
|
||||
|
||||
if (line[0] == '#')
|
||||
continue;
|
||||
@ -186,72 +287,112 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
|
||||
if (line[0] == '[') {
|
||||
// read the architecture for next set of rules
|
||||
if (strcmp(line, "[x86]") == 0 ||
|
||||
strcmp(line, "[X86]") == 0)
|
||||
strcmp(line, "[X86]") == 0) {
|
||||
if (native_arch != lxc_seccomp_arch_i386 &&
|
||||
native_arch != lxc_seccomp_arch_amd64) {
|
||||
cur_rule_arch = lxc_seccomp_arch_unknown;
|
||||
continue;
|
||||
}
|
||||
cur_rule_arch = lxc_seccomp_arch_i386;
|
||||
arch = SCMP_ARCH_X86;
|
||||
else if (strcmp(line, "[X86_64]") == 0 ||
|
||||
strcmp(line, "[x86_64]") == 0)
|
||||
if (native_arch == lxc_seccomp_arch_amd64) {
|
||||
if (compat_ctx)
|
||||
continue;
|
||||
compat_ctx = get_new_ctx(lxc_seccomp_arch_i386,
|
||||
default_policy_action);
|
||||
if (!compat_ctx)
|
||||
goto bad;
|
||||
}
|
||||
} else if (strcmp(line, "[X86_64]") == 0 ||
|
||||
strcmp(line, "[x86_64]") == 0) {
|
||||
if (native_arch != lxc_seccomp_arch_amd64) {
|
||||
cur_rule_arch = lxc_seccomp_arch_unknown;
|
||||
continue;
|
||||
}
|
||||
cur_rule_arch = lxc_seccomp_arch_amd64;
|
||||
arch = SCMP_ARCH_X86_64;
|
||||
} else if (strcmp(line, "[all]") == 0 ||
|
||||
strcmp(line, "[ALL]") == 0) {
|
||||
cur_rule_arch = lxc_seccomp_arch_all;
|
||||
if (native_arch == lxc_seccomp_arch_amd64 && !compat_ctx) {
|
||||
if (compat_ctx)
|
||||
continue;
|
||||
compat_ctx = get_new_ctx(lxc_seccomp_arch_i386,
|
||||
default_policy_action);
|
||||
if (!compat_ctx)
|
||||
goto bad;
|
||||
}
|
||||
}
|
||||
#ifdef SCMP_ARCH_ARM
|
||||
else if (strcmp(line, "[arm]") == 0 ||
|
||||
strcmp(line, "[ARM]") == 0)
|
||||
strcmp(line, "[ARM]") == 0) {
|
||||
if (native_arch != lxc_seccomp_arch_arm) {
|
||||
cur_rule_arch = lxc_seccomp_arch_unknown;
|
||||
continue;
|
||||
}
|
||||
cur_rule_arch = lxc_seccomp_arch_arm;
|
||||
arch = SCMP_ARCH_ARM;
|
||||
}
|
||||
#endif
|
||||
else
|
||||
goto bad_arch;
|
||||
if (ctx) {
|
||||
ERROR("Only two arch sections per policy supported");
|
||||
goto bad_arch;
|
||||
}
|
||||
if ((ctx = seccomp_init(default_policy_action)) == NULL) {
|
||||
ERROR("Error initializing seccomp context");
|
||||
return -1;
|
||||
}
|
||||
if (seccomp_attr_set(ctx, SCMP_FLTATR_CTL_NNP, 0)) {
|
||||
ERROR("failed to turn off n-new-privs");
|
||||
seccomp_release(ctx);
|
||||
return -1;
|
||||
}
|
||||
ret = seccomp_arch_add(ctx, arch);
|
||||
if (ret == -EEXIST) {
|
||||
seccomp_release(ctx);
|
||||
ctx = NULL;
|
||||
continue;
|
||||
}
|
||||
if (ret != 0) {
|
||||
ERROR("Error %d adding arch: %s", ret, line);
|
||||
goto bad_arch;
|
||||
}
|
||||
if (seccomp_arch_remove(ctx, SCMP_ARCH_NATIVE) != 0) {
|
||||
ERROR("Error removing native arch from %s", line);
|
||||
goto bad_arch;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
/* irrelevant arch - i.e. arm on i386 */
|
||||
if (cur_rule_arch == lxc_seccomp_arch_unknown)
|
||||
continue;
|
||||
|
||||
/* read optional action which follows the syscall */
|
||||
action = get_and_clear_v2_action(line, default_rule_action);
|
||||
if (action == -1) {
|
||||
ERROR("Failed to interpret action");
|
||||
goto bad_rule;
|
||||
}
|
||||
nr = seccomp_syscall_resolve_name_arch(arch, line);
|
||||
if (nr < 0) {
|
||||
WARN("Seccomp: failed to resolve syscall: %s (returned %d)",
|
||||
line, nr);
|
||||
WARN("This syscall will NOT be blacklisted");
|
||||
continue;
|
||||
|
||||
if (cur_rule_arch == lxc_seccomp_arch_all)
|
||||
arch = SCMP_ARCH_NATIVE;
|
||||
|
||||
/*
|
||||
* TODO generalize - if !is_compat_only(native_arch, cur_rule_arch)
|
||||
*/
|
||||
if (!(cur_rule_arch == lxc_seccomp_arch_i386 &&
|
||||
native_arch == lxc_seccomp_arch_amd64)) {
|
||||
INFO("Adding non-compat rule for %s action %d", line, action);
|
||||
if (!do_resolve_add_rule(arch, line, conf->seccomp_ctx, action))
|
||||
goto bad_rule;
|
||||
}
|
||||
ret = seccomp_rule_add(ctx ? ctx : conf->seccomp_ctx,
|
||||
action, nr, 0);
|
||||
if (ret < 0) {
|
||||
ERROR("failed (%d) loading rule for %s", ret, line);
|
||||
goto bad_rule;
|
||||
|
||||
/*
|
||||
* TODO generalize - if need_compat(native_arch, cur_rule_arch)
|
||||
*/
|
||||
if (native_arch == lxc_seccomp_arch_amd64 &&
|
||||
cur_rule_arch != lxc_seccomp_arch_amd64) {
|
||||
int nr1, nr2;
|
||||
INFO("Adding compat rule for %s action %d", line, action);
|
||||
nr1 = seccomp_syscall_resolve_name_arch(lxc_seccomp_arch_amd64, line);
|
||||
nr2 = seccomp_syscall_resolve_name_arch(lxc_seccomp_arch_i386, line);
|
||||
if (nr1 == nr2) {
|
||||
/* sigh - if the syscall # is the same for 32- and 64-bit, then
|
||||
* we get EFAULT if we try to aplly to compat_ctx. So apply to
|
||||
* the noncompat ctx. We may already have done so, but that's ok
|
||||
*/
|
||||
if (!do_resolve_add_rule(arch, line, conf->seccomp_ctx, action))
|
||||
goto bad_rule;
|
||||
continue;
|
||||
}
|
||||
if (!do_resolve_add_rule(SCMP_ARCH_X86, line,
|
||||
compat_ctx, action))
|
||||
goto bad_rule;
|
||||
}
|
||||
}
|
||||
if (ctx) {
|
||||
if (seccomp_merge(conf->seccomp_ctx, ctx) != 0) {
|
||||
seccomp_release(ctx);
|
||||
ERROR("Error merging seccomp contexts");
|
||||
return -1;
|
||||
|
||||
if (compat_ctx) {
|
||||
INFO("Merging in the compat seccomp ctx into the main one");
|
||||
if (seccomp_merge(conf->seccomp_ctx, compat_ctx) != 0) {
|
||||
ERROR("Error merging i386 seccomp contexts");
|
||||
goto bad;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
@ -259,13 +400,17 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
|
||||
bad_arch:
|
||||
ERROR("Unsupported arch: %s", line);
|
||||
bad_rule:
|
||||
if (ctx)
|
||||
seccomp_release(ctx);
|
||||
bad:
|
||||
if (compat_ctx)
|
||||
seccomp_release(compat_ctx);
|
||||
return -1;
|
||||
#else
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
#else /* HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH */
|
||||
static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
#endif /* HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH */
|
||||
|
||||
/*
|
||||
* The first line of the config file has a policy language version
|
||||
@ -304,6 +449,32 @@ static int parse_config(FILE *f, struct lxc_conf *conf)
|
||||
return parse_config_v2(f, line, conf);
|
||||
}
|
||||
|
||||
static bool seccomp_already_confined(void)
|
||||
{
|
||||
FILE *f = fopen("/proc/self/status", "r");
|
||||
char line[1024];
|
||||
bool bret = false;
|
||||
int ret, v;
|
||||
|
||||
if (!f)
|
||||
return false;
|
||||
|
||||
while (fgets(line, 1024, f)) {
|
||||
if (strncmp(line, "Seccomp:", 8) == 0) {
|
||||
ret = sscanf(line+8, "%d", &v);
|
||||
if (ret != 1)
|
||||
continue;
|
||||
if (v != 0)
|
||||
bret = true;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
fclose(f);
|
||||
return bret;
|
||||
}
|
||||
|
||||
int lxc_read_seccomp_config(struct lxc_conf *conf)
|
||||
{
|
||||
FILE *f;
|
||||
@ -312,6 +483,10 @@ int lxc_read_seccomp_config(struct lxc_conf *conf)
|
||||
if (!conf->seccomp)
|
||||
return 0;
|
||||
|
||||
if (seccomp_already_confined()) {
|
||||
INFO("Already confined by seccomp; not loading a new policy");
|
||||
return 0;
|
||||
}
|
||||
#if HAVE_SCMP_FILTER_CTX
|
||||
/* XXX for debug, pass in SCMP_ACT_TRAP */
|
||||
conf->seccomp_ctx = seccomp_init(SCMP_ACT_KILL);
|
||||
@ -350,6 +525,10 @@ int lxc_seccomp_load(struct lxc_conf *conf)
|
||||
int ret;
|
||||
if (!conf->seccomp)
|
||||
return 0;
|
||||
if (seccomp_already_confined()) {
|
||||
INFO("Already confined by seccomp; not loading a new policy");
|
||||
return 0;
|
||||
}
|
||||
ret = seccomp_load(
|
||||
#if HAVE_SCMP_FILTER_CTX
|
||||
conf->seccomp_ctx
|
||||
|
Loading…
Reference in New Issue
Block a user