mirror of
https://git.proxmox.com/git/mirror_lxc
synced 2025-07-27 00:09:52 +00:00
raw_syscalls: add lxc_raw_clone{_cb}()
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
This commit is contained in:
parent
13be27338c
commit
38e5c2dbba
@ -59,6 +59,7 @@
|
||||
#include "macro.h"
|
||||
#include "mainloop.h"
|
||||
#include "namespace.h"
|
||||
#include "raw_syscalls.h"
|
||||
#include "terminal.h"
|
||||
#include "utils.h"
|
||||
|
||||
|
@ -75,80 +75,6 @@ pid_t lxc_clone(int (*fn)(void *), void *arg, int flags)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is based on raw_clone in systemd but adapted to our needs. This uses
|
||||
* copy on write semantics and doesn't pass a stack. CLONE_VM is tricky and
|
||||
* doesn't really matter to us so disallow it.
|
||||
*
|
||||
* The nice thing about this is that we get fork() behavior. That is
|
||||
* lxc_raw_clone() returns 0 in the child and the child pid in the parent.
|
||||
*/
|
||||
pid_t lxc_raw_clone(unsigned long flags)
|
||||
{
|
||||
|
||||
/* These flags don't interest at all so we don't jump through any hoopes
|
||||
* of retrieving them and passing them to the kernel.
|
||||
*/
|
||||
errno = EINVAL;
|
||||
if ((flags & (CLONE_VM | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID |
|
||||
CLONE_CHILD_CLEARTID | CLONE_SETTLS)))
|
||||
return -EINVAL;
|
||||
|
||||
#if defined(__s390x__) || defined(__s390__) || defined(__CRIS__)
|
||||
/* On s390/s390x and cris the order of the first and second arguments
|
||||
* of the system call is reversed.
|
||||
*/
|
||||
return (int)syscall(__NR_clone, NULL, flags | SIGCHLD);
|
||||
#elif defined(__sparc__) && defined(__arch64__)
|
||||
{
|
||||
/**
|
||||
* sparc64 always returns the other process id in %o0, and
|
||||
* a boolean flag whether this is the child or the parent in
|
||||
* %o1. Inline assembly is needed to get the flag returned
|
||||
* in %o1.
|
||||
*/
|
||||
int in_child;
|
||||
int child_pid;
|
||||
asm volatile("mov %2, %%g1\n\t"
|
||||
"mov %3, %%o0\n\t"
|
||||
"mov 0 , %%o1\n\t"
|
||||
"t 0x6d\n\t"
|
||||
"mov %%o1, %0\n\t"
|
||||
"mov %%o0, %1"
|
||||
: "=r"(in_child), "=r"(child_pid)
|
||||
: "i"(__NR_clone), "r"(flags | SIGCHLD)
|
||||
: "%o1", "%o0", "%g1");
|
||||
|
||||
if (in_child)
|
||||
return 0;
|
||||
else
|
||||
return child_pid;
|
||||
}
|
||||
#elif defined(__ia64__)
|
||||
/* On ia64 the stack and stack size are passed as separate arguments. */
|
||||
return (int)syscall(__NR_clone, flags | SIGCHLD, NULL, 0);
|
||||
#else
|
||||
return (int)syscall(__NR_clone, flags | SIGCHLD, NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args, unsigned long flags)
|
||||
{
|
||||
pid_t pid;
|
||||
|
||||
pid = lxc_raw_clone(flags);
|
||||
if (pid < 0)
|
||||
return -1;
|
||||
|
||||
/* exit() is not thread-safe and might mess with the parent's signal
|
||||
* handlers and other stuff when exec() fails.
|
||||
*/
|
||||
if (pid == 0)
|
||||
_exit(fn(args));
|
||||
|
||||
return pid;
|
||||
}
|
||||
|
||||
/* Leave the user namespace at the first position in the array of structs so
|
||||
* that we always attach to it first when iterating over the struct and using
|
||||
* setns() to switch namespaces. This especially affects lxc_attach(): Suppose
|
||||
|
@ -128,55 +128,13 @@ int clone(int (*fn)(void *), void *child_stack,
|
||||
* corresponding libc wrapper. glibc currently does not run pthread_atfork()
|
||||
* handlers but does not guarantee that they are not. Other libcs might or
|
||||
* might not run pthread_atfork() handlers. If you require guarantees please
|
||||
* refer to the lxc_raw_clone*() functions below.
|
||||
* refer to the lxc_raw_clone*() functions in raw_syscalls.{c,h}.
|
||||
*
|
||||
* - should call lxc_raw_getpid():
|
||||
* The child should use lxc_raw_getpid() to retrieve its pid.
|
||||
*/
|
||||
extern pid_t lxc_clone(int (*fn)(void *), void *arg, int flags);
|
||||
|
||||
/**
|
||||
* lxc_raw_clone() - create a new process
|
||||
*
|
||||
* - fork() behavior:
|
||||
* This function returns 0 in the child and > 0 in the parent.
|
||||
*
|
||||
* - copy-on-write:
|
||||
* This function does not allocate a new stack and relies on copy-on-write
|
||||
* semantics.
|
||||
*
|
||||
* - supports subset of ClONE_* flags:
|
||||
* lxc_raw_clone() intentionally only supports a subset of the flags available
|
||||
* to the actual system call. Please refer to the implementation what flags
|
||||
* cannot be used. Also, please don't assume that just because a flag isn't
|
||||
* explicitly checked for as being unsupported that it is supported. If in
|
||||
* doubt or not sufficiently familiar with process creation in the kernel and
|
||||
* interactions with libcs this function should be used.
|
||||
*
|
||||
* - no pthread_atfork() handlers:
|
||||
* This function circumvents - as much as this this is possible - any libc
|
||||
* wrappers and thus does not run any pthread_atfork() handlers. Make sure
|
||||
* that this is safe to do in the context you are trying to call this
|
||||
* function.
|
||||
*
|
||||
* - must call lxc_raw_getpid():
|
||||
* The child must use lxc_raw_getpid() to retrieve its pid.
|
||||
*/
|
||||
extern pid_t lxc_raw_clone(unsigned long flags);
|
||||
/**
|
||||
* lxc_raw_clone_cb() - create a new process
|
||||
*
|
||||
* - non-fork() behavior:
|
||||
* Function does return pid of the child or -1 on error. Pass in a callback
|
||||
* function via the "fn" argument that gets executed in the child process. The
|
||||
* "args" argument is passed to "fn".
|
||||
*
|
||||
* All other comments that apply to lxc_raw_clone() apply to lxc_raw_clone_cb()
|
||||
* as well.
|
||||
*/
|
||||
extern pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args,
|
||||
unsigned long flags);
|
||||
|
||||
extern int lxc_namespace_2_cloneflag(const char *namespace);
|
||||
extern int lxc_namespace_2_ns_idx(const char *namespace);
|
||||
extern int lxc_namespace_2_std_identifiers(char *namespaces);
|
||||
|
@ -2,12 +2,16 @@
|
||||
#define _GNU_SOURCE 1
|
||||
#endif
|
||||
#include <errno.h>
|
||||
#include <sched.h>
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "macro.h"
|
||||
#include "raw_syscalls.h"
|
||||
|
||||
int lxc_raw_execveat(int dirfd, const char *pathname, char *const argv[],
|
||||
char *const envp[], int flags)
|
||||
@ -19,3 +23,78 @@ int lxc_raw_execveat(int dirfd, const char *pathname, char *const argv[],
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* This is based on raw_clone in systemd but adapted to our needs. This uses
|
||||
* copy on write semantics and doesn't pass a stack. CLONE_VM is tricky and
|
||||
* doesn't really matter to us so disallow it.
|
||||
*
|
||||
* The nice thing about this is that we get fork() behavior. That is
|
||||
* lxc_raw_clone() returns 0 in the child and the child pid in the parent.
|
||||
*/
|
||||
pid_t lxc_raw_clone(unsigned long flags)
|
||||
{
|
||||
|
||||
/*
|
||||
* These flags don't interest at all so we don't jump through any hoopes
|
||||
* of retrieving them and passing them to the kernel.
|
||||
*/
|
||||
errno = EINVAL;
|
||||
if ((flags & (CLONE_VM | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID |
|
||||
CLONE_CHILD_CLEARTID | CLONE_SETTLS)))
|
||||
return -EINVAL;
|
||||
|
||||
#if defined(__s390x__) || defined(__s390__) || defined(__CRIS__)
|
||||
/* On s390/s390x and cris the order of the first and second arguments
|
||||
* of the system call is reversed.
|
||||
*/
|
||||
return (int)syscall(__NR_clone, NULL, flags | SIGCHLD);
|
||||
#elif defined(__sparc__) && defined(__arch64__)
|
||||
{
|
||||
/*
|
||||
* sparc64 always returns the other process id in %o0, and a
|
||||
* boolean flag whether this is the child or the parent in %o1.
|
||||
* Inline assembly is needed to get the flag returned in %o1.
|
||||
*/
|
||||
int in_child;
|
||||
int child_pid;
|
||||
asm volatile("mov %2, %%g1\n\t"
|
||||
"mov %3, %%o0\n\t"
|
||||
"mov 0 , %%o1\n\t"
|
||||
"t 0x6d\n\t"
|
||||
"mov %%o1, %0\n\t"
|
||||
"mov %%o0, %1"
|
||||
: "=r"(in_child), "=r"(child_pid)
|
||||
: "i"(__NR_clone), "r"(flags | SIGCHLD)
|
||||
: "%o1", "%o0", "%g1");
|
||||
|
||||
if (in_child)
|
||||
return 0;
|
||||
else
|
||||
return child_pid;
|
||||
}
|
||||
#elif defined(__ia64__)
|
||||
/* On ia64 the stack and stack size are passed as separate arguments. */
|
||||
return (int)syscall(__NR_clone, flags | SIGCHLD, NULL, prctl_arg(0));
|
||||
#else
|
||||
return (int)syscall(__NR_clone, flags | SIGCHLD, NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args, unsigned long flags)
|
||||
{
|
||||
pid_t pid;
|
||||
|
||||
pid = lxc_raw_clone(flags);
|
||||
if (pid < 0)
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* exit() is not thread-safe and might mess with the parent's signal
|
||||
* handlers and other stuff when exec() fails.
|
||||
*/
|
||||
if (pid == 0)
|
||||
_exit(fn(args));
|
||||
|
||||
return pid;
|
||||
}
|
||||
|
@ -23,8 +23,52 @@
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE 1
|
||||
#endif
|
||||
#include <sched.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
/*
|
||||
* lxc_raw_clone() - create a new process
|
||||
*
|
||||
* - fork() behavior:
|
||||
* This function returns 0 in the child and > 0 in the parent.
|
||||
*
|
||||
* - copy-on-write:
|
||||
* This function does not allocate a new stack and relies on copy-on-write
|
||||
* semantics.
|
||||
*
|
||||
* - supports subset of ClONE_* flags:
|
||||
* lxc_raw_clone() intentionally only supports a subset of the flags available
|
||||
* to the actual system call. Please refer to the implementation what flags
|
||||
* cannot be used. Also, please don't assume that just because a flag isn't
|
||||
* explicitly checked for as being unsupported that it is supported. If in
|
||||
* doubt or not sufficiently familiar with process creation in the kernel and
|
||||
* interactions with libcs this function should be used.
|
||||
*
|
||||
* - no pthread_atfork() handlers:
|
||||
* This function circumvents - as much as this this is possible - any libc
|
||||
* wrappers and thus does not run any pthread_atfork() handlers. Make sure
|
||||
* that this is safe to do in the context you are trying to call this
|
||||
* function.
|
||||
*
|
||||
* - must call lxc_raw_getpid():
|
||||
* The child must use lxc_raw_getpid() to retrieve its pid.
|
||||
*/
|
||||
extern pid_t lxc_raw_clone(unsigned long flags);
|
||||
|
||||
/*
|
||||
* lxc_raw_clone_cb() - create a new process
|
||||
*
|
||||
* - non-fork() behavior:
|
||||
* Function does return pid of the child or -1 on error. Pass in a callback
|
||||
* function via the "fn" argument that gets executed in the child process.
|
||||
* The "args" argument is passed to "fn".
|
||||
*
|
||||
* All other comments that apply to lxc_raw_clone() apply to lxc_raw_clone_cb()
|
||||
* as well.
|
||||
*/
|
||||
extern pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args, unsigned long flags);
|
||||
|
||||
extern int lxc_raw_execveat(int dirfd, const char *pathname, char *const argv[],
|
||||
char *const envp[], int flags);
|
||||
|
@ -70,6 +70,7 @@
|
||||
#include "monitor.h"
|
||||
#include "namespace.h"
|
||||
#include "network.h"
|
||||
#include "raw_syscalls.h"
|
||||
#include "start.h"
|
||||
#include "storage/storage.h"
|
||||
#include "storage/storage_utils.h"
|
||||
|
@ -51,6 +51,7 @@
|
||||
#include "lxclock.h"
|
||||
#include "namespace.h"
|
||||
#include "parse.h"
|
||||
#include "raw_syscalls.h"
|
||||
#include "syscall_wrappers.h"
|
||||
#include "utils.h"
|
||||
|
||||
|
@ -38,7 +38,7 @@
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "lxctest.h"
|
||||
#include "namespace.h"
|
||||
#include "raw_syscalls.h"
|
||||
#include "utils.h"
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
|
Loading…
Reference in New Issue
Block a user