mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson
synced 2025-08-29 02:59:13 +00:00
syscall_user_dispatch: Add PR_SYS_DISPATCH_INCLUSIVE_ON
There are two possible scenarios for syscall filtering: - having a trusted/allowed range of PCs, and intercepting everything else - or the opposite: a single untrusted/intercepted range and allowing everything else (this is relevant for any kind of sandboxing scenario, or monitoring behavior of a single library) The current API only allows the former use case due to allowed range wrap-around check. Add PR_SYS_DISPATCH_INCLUSIVE_ON that enables the second use case. Add PR_SYS_DISPATCH_EXCLUSIVE_ON alias for PR_SYS_DISPATCH_ON to make it clear how it's different from the new PR_SYS_DISPATCH_INCLUSIVE_ON. Signed-off-by: Dmitry Vyukov <dvyukov@google.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Link: https://lore.kernel.org/all/97947cc8e205ff49675826d7b0327ef2e2c66eea.1747839857.git.dvyukov@google.com
This commit is contained in:
parent
b89732c8c8
commit
a2fc422ed7
@ -53,20 +53,25 @@ following prctl:
|
||||
|
||||
prctl(PR_SET_SYSCALL_USER_DISPATCH, <op>, <offset>, <length>, [selector])
|
||||
|
||||
<op> is either PR_SYS_DISPATCH_ON or PR_SYS_DISPATCH_OFF, to enable and
|
||||
disable the mechanism globally for that thread. When
|
||||
PR_SYS_DISPATCH_OFF is used, the other fields must be zero.
|
||||
<op> is either PR_SYS_DISPATCH_EXCLUSIVE_ON/PR_SYS_DISPATCH_INCLUSIVE_ON
|
||||
or PR_SYS_DISPATCH_OFF, to enable and disable the mechanism globally for
|
||||
that thread. When PR_SYS_DISPATCH_OFF is used, the other fields must be zero.
|
||||
|
||||
[<offset>, <offset>+<length>) delimit a memory region interval
|
||||
from which syscalls are always executed directly, regardless of the
|
||||
userspace selector. This provides a fast path for the C library, which
|
||||
includes the most common syscall dispatchers in the native code
|
||||
applications, and also provides a way for the signal handler to return
|
||||
For PR_SYS_DISPATCH_EXCLUSIVE_ON [<offset>, <offset>+<length>) delimit
|
||||
a memory region interval from which syscalls are always executed directly,
|
||||
regardless of the userspace selector. This provides a fast path for the
|
||||
C library, which includes the most common syscall dispatchers in the native
|
||||
code applications, and also provides a way for the signal handler to return
|
||||
without triggering a nested SIGSYS on (rt\_)sigreturn. Users of this
|
||||
interface should make sure that at least the signal trampoline code is
|
||||
included in this region. In addition, for syscalls that implement the
|
||||
trampoline code on the vDSO, that trampoline is never intercepted.
|
||||
|
||||
For PR_SYS_DISPATCH_INCLUSIVE_ON [<offset>, <offset>+<length>) delimit
|
||||
a memory region interval from which syscalls are dispatched based on
|
||||
the userspace selector. Syscalls from outside of the range are always
|
||||
executed directly.
|
||||
|
||||
[selector] is a pointer to a char-sized region in the process memory
|
||||
region, that provides a quick way to enable disable syscall redirection
|
||||
thread-wide, without the need to invoke the kernel directly. selector
|
||||
|
@ -255,7 +255,12 @@ struct prctl_mm_map {
|
||||
/* Dispatch syscalls to a userspace handler */
|
||||
#define PR_SET_SYSCALL_USER_DISPATCH 59
|
||||
# define PR_SYS_DISPATCH_OFF 0
|
||||
# define PR_SYS_DISPATCH_ON 1
|
||||
/* Enable dispatch except for the specified range */
|
||||
# define PR_SYS_DISPATCH_EXCLUSIVE_ON 1
|
||||
/* Enable dispatch for the specified range */
|
||||
# define PR_SYS_DISPATCH_INCLUSIVE_ON 2
|
||||
/* Legacy name for backwards compatibility */
|
||||
# define PR_SYS_DISPATCH_ON PR_SYS_DISPATCH_EXCLUSIVE_ON
|
||||
/* The control values for the user space selector when dispatch is enabled */
|
||||
# define SYSCALL_DISPATCH_FILTER_ALLOW 0
|
||||
# define SYSCALL_DISPATCH_FILTER_BLOCK 1
|
||||
|
@ -78,7 +78,7 @@ static int task_set_syscall_user_dispatch(struct task_struct *task, unsigned lon
|
||||
if (offset || len || selector)
|
||||
return -EINVAL;
|
||||
break;
|
||||
case PR_SYS_DISPATCH_ON:
|
||||
case PR_SYS_DISPATCH_EXCLUSIVE_ON:
|
||||
/*
|
||||
* Validate the direct dispatcher region just for basic
|
||||
* sanity against overflow and a 0-sized dispatcher
|
||||
@ -87,30 +87,40 @@ static int task_set_syscall_user_dispatch(struct task_struct *task, unsigned lon
|
||||
*/
|
||||
if (offset && offset + len <= offset)
|
||||
return -EINVAL;
|
||||
|
||||
break;
|
||||
case PR_SYS_DISPATCH_INCLUSIVE_ON:
|
||||
if (len == 0 || offset + len <= offset)
|
||||
return -EINVAL;
|
||||
/*
|
||||
* access_ok() will clear memory tags for tagged addresses
|
||||
* if current has memory tagging enabled.
|
||||
|
||||
* To enable a tracer to set a tracees selector the
|
||||
* selector address must be untagged for access_ok(),
|
||||
* otherwise an untagged tracer will always fail to set a
|
||||
* tagged tracees selector.
|
||||
* Invert the range, the check in syscall_user_dispatch()
|
||||
* supports wrap-around.
|
||||
*/
|
||||
if (selector && !access_ok(untagged_addr(selector), sizeof(*selector)))
|
||||
return -EFAULT;
|
||||
|
||||
offset = offset + len;
|
||||
len = -len;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* access_ok() will clear memory tags for tagged addresses
|
||||
* if current has memory tagging enabled.
|
||||
*
|
||||
* To enable a tracer to set a tracees selector the
|
||||
* selector address must be untagged for access_ok(),
|
||||
* otherwise an untagged tracer will always fail to set a
|
||||
* tagged tracees selector.
|
||||
*/
|
||||
if (mode != PR_SYS_DISPATCH_OFF && selector &&
|
||||
!access_ok(untagged_addr(selector), sizeof(*selector)))
|
||||
return -EFAULT;
|
||||
|
||||
task->syscall_dispatch.selector = selector;
|
||||
task->syscall_dispatch.offset = offset;
|
||||
task->syscall_dispatch.len = len;
|
||||
task->syscall_dispatch.on_dispatch = false;
|
||||
|
||||
if (mode == PR_SYS_DISPATCH_ON)
|
||||
if (mode != PR_SYS_DISPATCH_OFF)
|
||||
set_task_syscall_work(task, SYSCALL_USER_DISPATCH);
|
||||
else
|
||||
clear_task_syscall_work(task, SYSCALL_USER_DISPATCH);
|
||||
|
@ -255,7 +255,12 @@ struct prctl_mm_map {
|
||||
/* Dispatch syscalls to a userspace handler */
|
||||
#define PR_SET_SYSCALL_USER_DISPATCH 59
|
||||
# define PR_SYS_DISPATCH_OFF 0
|
||||
# define PR_SYS_DISPATCH_ON 1
|
||||
/* Enable dispatch except for the specified range */
|
||||
# define PR_SYS_DISPATCH_EXCLUSIVE_ON 1
|
||||
/* Enable dispatch for the specified range */
|
||||
# define PR_SYS_DISPATCH_INCLUSIVE_ON 2
|
||||
/* Legacy name for backwards compatibility */
|
||||
# define PR_SYS_DISPATCH_ON PR_SYS_DISPATCH_EXCLUSIVE_ON
|
||||
/* The control values for the user space selector when dispatch is enabled */
|
||||
# define SYSCALL_DISPATCH_FILTER_ALLOW 0
|
||||
# define SYSCALL_DISPATCH_FILTER_BLOCK 1
|
||||
|
Loading…
Reference in New Issue
Block a user