diff --git a/doc/lxc.container.conf.sgml.in b/doc/lxc.container.conf.sgml.in
index 022ef869f..095562704 100644
--- a/doc/lxc.container.conf.sgml.in
+++ b/doc/lxc.container.conf.sgml.in
@@ -338,6 +338,33 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+ Core Scheduling
+
+ Core scheduling defines if the container payload
+ is marked as being schedulable on the same core. Doing so will cause
+ the kernel scheduler to ensure that tasks that are not in the same
+ group never run simultaneously on a core. This can serve as an extra
+ security measure to prevent the container payload from using
+ cross hyper thread attacks.
+
+
+
+
+
+
+
+
+ The only allowed values are 0 and 1. Set this to 1 to create a
+ core scheduling domain for the container or 0 to not create one.
+ If not set explicitly no core scheduling domain will be created
+ for the container.
+
+
+
+
+
+
Proc
diff --git a/src/lxc/api_extensions.h b/src/lxc/api_extensions.h
index bdffa649f..90f972463 100644
--- a/src/lxc/api_extensions.h
+++ b/src/lxc/api_extensions.h
@@ -46,6 +46,7 @@ static char *api_extensions[] = {
"seccomp_proxy_send_notify_fd",
"idmapped_mounts",
"idmapped_mounts_v2",
+ "core_scheduling",
};
static size_t nr_api_extensions = sizeof(api_extensions) / sizeof(*api_extensions);
diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index e645d10be..45b13b086 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -3382,6 +3382,8 @@ struct lxc_conf *lxc_conf_init(void)
new->transient_procfs_mnt = false;
new->shmount.path_host = NULL;
new->shmount.path_cont = NULL;
+ new->sched_core = false;
+ new->sched_core_cookie = INVALID_SCHED_CORE_COOKIE;
/* if running in a new user namespace, init and COMMAND
* default to running as UID/GID 0 when using lxc-execute */
diff --git a/src/lxc/conf.h b/src/lxc/conf.h
index 636b9017d..12c26d98d 100644
--- a/src/lxc/conf.h
+++ b/src/lxc/conf.h
@@ -523,6 +523,9 @@ struct lxc_conf {
} shmount;
struct timens_offsets timens;
+
+ bool sched_core;
+ __u64 sched_core_cookie;
};
__hidden extern int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf, size_t buf_size)
diff --git a/src/lxc/confile.c b/src/lxc/confile.c
index 4a74f8daf..5d2c99f8a 100644
--- a/src/lxc/confile.c
+++ b/src/lxc/confile.c
@@ -154,6 +154,7 @@ lxc_config_define(tty_dir);
lxc_config_define(uts_name);
lxc_config_define(sysctl);
lxc_config_define(proc);
+lxc_config_define(sched_core);
static int set_config_unsupported_key(const char *key, const char *value,
struct lxc_conf *lxc_conf, void *data)
@@ -207,6 +208,7 @@ static struct lxc_config_t config_jump_table[] = {
{ "lxc.console.path", true, set_config_console_path, get_config_console_path, clr_config_console_path, },
{ "lxc.console.rotate", true, set_config_console_rotate, get_config_console_rotate, clr_config_console_rotate, },
{ "lxc.console.size", true, set_config_console_size, get_config_console_size, clr_config_console_size, },
+ { "lxc.sched.core", true, set_config_sched_core, get_config_sched_core, clr_config_sched_core, },
{ "lxc.environment", true, set_config_environment, get_config_environment, clr_config_environment, },
{ "lxc.ephemeral", true, set_config_ephemeral, get_config_ephemeral, clr_config_ephemeral, },
{ "lxc.execute.cmd", true, set_config_execute_cmd, get_config_execute_cmd, clr_config_execute_cmd, },
@@ -6583,3 +6585,35 @@ int lxc_list_net(struct lxc_conf *c, const char *key, char *retv, int inlen)
return fulllen;
}
+
+static int set_config_sched_core(const char *key, const char *value,
+ struct lxc_conf *lxc_conf, void *data)
+{
+ int ret;
+ unsigned int nr;
+
+ if (lxc_config_value_empty(value))
+ return clr_config_sched_core(key, lxc_conf, data);
+
+ ret = lxc_safe_uint(value, &nr);
+ if (ret)
+ return ret_errno(EINVAL);
+
+ if (nr != 0 && nr != 1)
+ return ret_errno(EINVAL);
+
+ lxc_conf->sched_core = (nr == 1);
+ return 0;
+}
+
+static int get_config_sched_core(const char *key, char *retv, int inlen,
+ struct lxc_conf *c, void *data)
+{
+ return lxc_get_conf_bool(c, retv, inlen, c->sched_core);
+}
+
+static int clr_config_sched_core(const char *key, struct lxc_conf *c, void *data)
+{
+ c->sched_core = false;
+ return 0;
+}
diff --git a/src/lxc/start.c b/src/lxc/start.c
index a9d3d5ead..f783f2e53 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -1553,6 +1553,32 @@ static inline int do_share_ns(void *arg)
return 0;
}
+static int core_scheduling(struct lxc_handler *handler)
+{
+ struct lxc_conf *conf = handler->conf;
+ int ret;
+
+ if (!conf->sched_core)
+ return log_trace(0, "No new core scheduling domain requested");
+
+ ret = core_scheduling_cookie_create_thread(handler->pid);
+ if (ret < 0) {
+ if (ret == -EINVAL)
+ return sysinfo("The kernel does not support core scheduling");
+
+ return syserror("Failed to create new core scheduling domain");
+ }
+
+ conf->sched_core_cookie = core_scheduling_cookie_get(handler->pid);
+ if (conf->sched_core_cookie == INVALID_SCHED_CORE_COOKIE)
+ return syserror("Failed to retrieve core scheduling domain cookie");
+
+ TRACE("Created new core scheduling domain with cookie %llu",
+ (long long unsigned int)conf->sched_core_cookie);
+
+ return 0;
+}
+
/* lxc_spawn() performs crucial setup tasks and clone()s the new process which
* exec()s the requested container binary.
* Note that lxc_spawn() runs in the parent namespaces. Any operations performed
@@ -1709,6 +1735,10 @@ static int lxc_spawn(struct lxc_handler *handler)
handler->clone_flags &= ~CLONE_PIDFD;
TRACE("Cloned child process %d", handler->pid);
+ ret = core_scheduling(handler);
+ if (ret < 0)
+ goto out_delete_net;
+
/* Verify that we can actually make use of pidfds. */
if (!lxc_can_use_pidfd(handler->pidfd))
close_prot_errno_disarm(handler->pidfd);
diff --git a/src/lxc/syscall_wrappers.h b/src/lxc/syscall_wrappers.h
index f1004d264..6e90f572d 100644
--- a/src/lxc/syscall_wrappers.h
+++ b/src/lxc/syscall_wrappers.h
@@ -10,6 +10,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -320,4 +321,70 @@ static inline int personality(unsigned long persona)
}
#endif
+/* arg1 of prctl() */
+#ifndef PR_SCHED_CORE
+#define PR_SCHED_CORE 62
+#endif
+
+/* arg2 of prctl() */
+#ifndef PR_SCHED_CORE_GET
+#define PR_SCHED_CORE_GET 0
+#endif
+
+#ifndef PR_SCHED_CORE_CREATE
+#define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */
+#endif
+
+#ifndef PR_SCHED_CORE_SHARE_TO
+#define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */
+#endif
+
+#ifndef PR_SCHED_CORE_SHARE_FROM
+#define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */
+#endif
+
+#ifndef PR_SCHED_CORE_MAX
+#define PR_SCHED_CORE_MAX 4
+#endif
+
+/* arg3 of prctl() */
+#ifndef PR_SCHED_CORE_SCOPE_THREAD
+#define PR_SCHED_CORE_SCOPE_THREAD 0
+#endif
+
+#ifndef PR_SCHED_CORE_SCOPE_THREAD_GROUP
+#define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1
+#endif
+
+#ifndef PR_SCHED_CORE_SCOPE_PROCESS_GROUP
+#define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2
+#endif
+
+#define INVALID_SCHED_CORE_COOKIE ((__u64)-1)
+
+static inline __u64 core_scheduling_cookie_get(pid_t pid)
+{
+ __u64 cookie;
+ int ret;
+
+ ret = prctl(PR_SCHED_CORE, PR_SCHED_CORE_GET, pid,
+ PR_SCHED_CORE_SCOPE_THREAD, (unsigned long)&cookie);
+ if (ret)
+ return INVALID_SCHED_CORE_COOKIE;
+
+ return cookie;
+}
+
+static inline int core_scheduling_cookie_create_thread(pid_t pid)
+{
+ int ret;
+
+ ret = prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, pid,
+ PR_SCHED_CORE_SCOPE_THREAD_GROUP, 0);
+ if (ret)
+ return -errno;
+
+ return 0;
+}
+
#endif /* __LXC_SYSCALL_WRAPPER_H */
diff --git a/src/tests/parse_config_file.c b/src/tests/parse_config_file.c
index e7468a96a..d19e24ec9 100644
--- a/src/tests/parse_config_file.c
+++ b/src/tests/parse_config_file.c
@@ -925,6 +925,11 @@ int main(int argc, char *argv[])
goto non_test_error;
}
+ if (set_get_compare_clear_save_load(c, "lxc.sched.core", "1", tmpf, true) < 0) {
+ lxc_error("%s\n", "lxc.sched.core");
+ goto non_test_error;
+ }
+
fret = EXIT_SUCCESS;
non_test_error: