diff --git a/doc/lxc.container.conf.sgml.in b/doc/lxc.container.conf.sgml.in index 022ef869f..095562704 100644 --- a/doc/lxc.container.conf.sgml.in +++ b/doc/lxc.container.conf.sgml.in @@ -338,6 +338,33 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + Core Scheduling + + Core scheduling defines if the container payload + is marked as being schedulable on the same core. Doing so will cause + the kernel scheduler to ensure that tasks that are not in the same + group never run simultaneously on a core. This can serve as an extra + security measure to prevent the container payload from using + cross hyper thread attacks. + + + + + + + + + The only allowed values are 0 and 1. Set this to 1 to create a + core scheduling domain for the container or 0 to not create one. + If not set explicitly no core scheduling domain will be created + for the container. + + + + + + Proc diff --git a/src/lxc/api_extensions.h b/src/lxc/api_extensions.h index bdffa649f..90f972463 100644 --- a/src/lxc/api_extensions.h +++ b/src/lxc/api_extensions.h @@ -46,6 +46,7 @@ static char *api_extensions[] = { "seccomp_proxy_send_notify_fd", "idmapped_mounts", "idmapped_mounts_v2", + "core_scheduling", }; static size_t nr_api_extensions = sizeof(api_extensions) / sizeof(*api_extensions); diff --git a/src/lxc/conf.c b/src/lxc/conf.c index e645d10be..45b13b086 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -3382,6 +3382,8 @@ struct lxc_conf *lxc_conf_init(void) new->transient_procfs_mnt = false; new->shmount.path_host = NULL; new->shmount.path_cont = NULL; + new->sched_core = false; + new->sched_core_cookie = INVALID_SCHED_CORE_COOKIE; /* if running in a new user namespace, init and COMMAND * default to running as UID/GID 0 when using lxc-execute */ diff --git a/src/lxc/conf.h b/src/lxc/conf.h index 636b9017d..12c26d98d 100644 --- a/src/lxc/conf.h +++ b/src/lxc/conf.h @@ -523,6 +523,9 @@ struct lxc_conf { } shmount; struct timens_offsets timens; + + bool sched_core; + __u64 sched_core_cookie; }; __hidden extern int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf, size_t buf_size) diff --git a/src/lxc/confile.c b/src/lxc/confile.c index 4a74f8daf..5d2c99f8a 100644 --- a/src/lxc/confile.c +++ b/src/lxc/confile.c @@ -154,6 +154,7 @@ lxc_config_define(tty_dir); lxc_config_define(uts_name); lxc_config_define(sysctl); lxc_config_define(proc); +lxc_config_define(sched_core); static int set_config_unsupported_key(const char *key, const char *value, struct lxc_conf *lxc_conf, void *data) @@ -207,6 +208,7 @@ static struct lxc_config_t config_jump_table[] = { { "lxc.console.path", true, set_config_console_path, get_config_console_path, clr_config_console_path, }, { "lxc.console.rotate", true, set_config_console_rotate, get_config_console_rotate, clr_config_console_rotate, }, { "lxc.console.size", true, set_config_console_size, get_config_console_size, clr_config_console_size, }, + { "lxc.sched.core", true, set_config_sched_core, get_config_sched_core, clr_config_sched_core, }, { "lxc.environment", true, set_config_environment, get_config_environment, clr_config_environment, }, { "lxc.ephemeral", true, set_config_ephemeral, get_config_ephemeral, clr_config_ephemeral, }, { "lxc.execute.cmd", true, set_config_execute_cmd, get_config_execute_cmd, clr_config_execute_cmd, }, @@ -6583,3 +6585,35 @@ int lxc_list_net(struct lxc_conf *c, const char *key, char *retv, int inlen) return fulllen; } + +static int set_config_sched_core(const char *key, const char *value, + struct lxc_conf *lxc_conf, void *data) +{ + int ret; + unsigned int nr; + + if (lxc_config_value_empty(value)) + return clr_config_sched_core(key, lxc_conf, data); + + ret = lxc_safe_uint(value, &nr); + if (ret) + return ret_errno(EINVAL); + + if (nr != 0 && nr != 1) + return ret_errno(EINVAL); + + lxc_conf->sched_core = (nr == 1); + return 0; +} + +static int get_config_sched_core(const char *key, char *retv, int inlen, + struct lxc_conf *c, void *data) +{ + return lxc_get_conf_bool(c, retv, inlen, c->sched_core); +} + +static int clr_config_sched_core(const char *key, struct lxc_conf *c, void *data) +{ + c->sched_core = false; + return 0; +} diff --git a/src/lxc/start.c b/src/lxc/start.c index a9d3d5ead..f783f2e53 100644 --- a/src/lxc/start.c +++ b/src/lxc/start.c @@ -1553,6 +1553,32 @@ static inline int do_share_ns(void *arg) return 0; } +static int core_scheduling(struct lxc_handler *handler) +{ + struct lxc_conf *conf = handler->conf; + int ret; + + if (!conf->sched_core) + return log_trace(0, "No new core scheduling domain requested"); + + ret = core_scheduling_cookie_create_thread(handler->pid); + if (ret < 0) { + if (ret == -EINVAL) + return sysinfo("The kernel does not support core scheduling"); + + return syserror("Failed to create new core scheduling domain"); + } + + conf->sched_core_cookie = core_scheduling_cookie_get(handler->pid); + if (conf->sched_core_cookie == INVALID_SCHED_CORE_COOKIE) + return syserror("Failed to retrieve core scheduling domain cookie"); + + TRACE("Created new core scheduling domain with cookie %llu", + (long long unsigned int)conf->sched_core_cookie); + + return 0; +} + /* lxc_spawn() performs crucial setup tasks and clone()s the new process which * exec()s the requested container binary. * Note that lxc_spawn() runs in the parent namespaces. Any operations performed @@ -1709,6 +1735,10 @@ static int lxc_spawn(struct lxc_handler *handler) handler->clone_flags &= ~CLONE_PIDFD; TRACE("Cloned child process %d", handler->pid); + ret = core_scheduling(handler); + if (ret < 0) + goto out_delete_net; + /* Verify that we can actually make use of pidfds. */ if (!lxc_can_use_pidfd(handler->pidfd)) close_prot_errno_disarm(handler->pidfd); diff --git a/src/lxc/syscall_wrappers.h b/src/lxc/syscall_wrappers.h index f1004d264..6e90f572d 100644 --- a/src/lxc/syscall_wrappers.h +++ b/src/lxc/syscall_wrappers.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -320,4 +321,70 @@ static inline int personality(unsigned long persona) } #endif +/* arg1 of prctl() */ +#ifndef PR_SCHED_CORE +#define PR_SCHED_CORE 62 +#endif + +/* arg2 of prctl() */ +#ifndef PR_SCHED_CORE_GET +#define PR_SCHED_CORE_GET 0 +#endif + +#ifndef PR_SCHED_CORE_CREATE +#define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */ +#endif + +#ifndef PR_SCHED_CORE_SHARE_TO +#define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */ +#endif + +#ifndef PR_SCHED_CORE_SHARE_FROM +#define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */ +#endif + +#ifndef PR_SCHED_CORE_MAX +#define PR_SCHED_CORE_MAX 4 +#endif + +/* arg3 of prctl() */ +#ifndef PR_SCHED_CORE_SCOPE_THREAD +#define PR_SCHED_CORE_SCOPE_THREAD 0 +#endif + +#ifndef PR_SCHED_CORE_SCOPE_THREAD_GROUP +#define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1 +#endif + +#ifndef PR_SCHED_CORE_SCOPE_PROCESS_GROUP +#define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2 +#endif + +#define INVALID_SCHED_CORE_COOKIE ((__u64)-1) + +static inline __u64 core_scheduling_cookie_get(pid_t pid) +{ + __u64 cookie; + int ret; + + ret = prctl(PR_SCHED_CORE, PR_SCHED_CORE_GET, pid, + PR_SCHED_CORE_SCOPE_THREAD, (unsigned long)&cookie); + if (ret) + return INVALID_SCHED_CORE_COOKIE; + + return cookie; +} + +static inline int core_scheduling_cookie_create_thread(pid_t pid) +{ + int ret; + + ret = prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, pid, + PR_SCHED_CORE_SCOPE_THREAD_GROUP, 0); + if (ret) + return -errno; + + return 0; +} + #endif /* __LXC_SYSCALL_WRAPPER_H */ diff --git a/src/tests/parse_config_file.c b/src/tests/parse_config_file.c index e7468a96a..d19e24ec9 100644 --- a/src/tests/parse_config_file.c +++ b/src/tests/parse_config_file.c @@ -925,6 +925,11 @@ int main(int argc, char *argv[]) goto non_test_error; } + if (set_get_compare_clear_save_load(c, "lxc.sched.core", "1", tmpf, true) < 0) { + lxc_error("%s\n", "lxc.sched.core"); + goto non_test_error; + } + fret = EXIT_SUCCESS; non_test_error: