mirror of
https://git.proxmox.com/git/mirror_ubuntu-kernels.git
synced 2025-11-07 08:20:49 +00:00
This set of changes removes tracehook.h, moves modification of all of
the ptrace fields inside of siglock to remove races, adds a missing
permission check to ptrace.c
The removal of tracehook.h is quite significant as it has been a major
source of confusion in recent years. Much of that confusion was
around task_work and TIF_NOTIFY_SIGNAL (which I have now decoupled
making the semantics clearer).
For people who don't know tracehook.h is a vestiage of an attempt to
implement uprobes like functionality that was never fully merged, and
was later superseeded by uprobes when uprobes was merged. For many
years now we have been removing what tracehook functionaly a little
bit at a time. To the point where now anything left in tracehook.h is
some weird strange thing that is difficult to understand.
Eric W. Biederman (15):
ptrace: Move ptrace_report_syscall into ptrace.h
ptrace/arm: Rename tracehook_report_syscall report_syscall
ptrace: Create ptrace_report_syscall_{entry,exit} in ptrace.h
ptrace: Remove arch_syscall_{enter,exit}_tracehook
ptrace: Remove tracehook_signal_handler
task_work: Remove unnecessary include from posix_timers.h
task_work: Introduce task_work_pending
task_work: Call tracehook_notify_signal from get_signal on all architectures
task_work: Decouple TIF_NOTIFY_SIGNAL and task_work
signal: Move set_notify_signal and clear_notify_signal into sched/signal.h
resume_user_mode: Remove #ifdef TIF_NOTIFY_RESUME in set_notify_resume
resume_user_mode: Move to resume_user_mode.h
tracehook: Remove tracehook.h
ptrace: Move setting/clearing ptrace_message into ptrace_stop
ptrace: Return the signal to continue with from ptrace_stop
Jann Horn (1):
ptrace: Check PTRACE_O_SUSPEND_SECCOMP permission on PTRACE_SEIZE
Yang Li (1):
ptrace: Remove duplicated include in ptrace.c
MAINTAINERS | 1 -
arch/Kconfig | 5 +-
arch/alpha/kernel/ptrace.c | 5 +-
arch/alpha/kernel/signal.c | 4 +-
arch/arc/kernel/ptrace.c | 5 +-
arch/arc/kernel/signal.c | 4 +-
arch/arm/kernel/ptrace.c | 12 +-
arch/arm/kernel/signal.c | 4 +-
arch/arm64/kernel/ptrace.c | 14 +--
arch/arm64/kernel/signal.c | 4 +-
arch/csky/kernel/ptrace.c | 5 +-
arch/csky/kernel/signal.c | 4 +-
arch/h8300/kernel/ptrace.c | 5 +-
arch/h8300/kernel/signal.c | 4 +-
arch/hexagon/kernel/process.c | 4 +-
arch/hexagon/kernel/signal.c | 1 -
arch/hexagon/kernel/traps.c | 6 +-
arch/ia64/kernel/process.c | 4 +-
arch/ia64/kernel/ptrace.c | 6 +-
arch/ia64/kernel/signal.c | 1 -
arch/m68k/kernel/ptrace.c | 5 +-
arch/m68k/kernel/signal.c | 4 +-
arch/microblaze/kernel/ptrace.c | 5 +-
arch/microblaze/kernel/signal.c | 4 +-
arch/mips/kernel/ptrace.c | 5 +-
arch/mips/kernel/signal.c | 4 +-
arch/nds32/include/asm/syscall.h | 2 +-
arch/nds32/kernel/ptrace.c | 5 +-
arch/nds32/kernel/signal.c | 4 +-
arch/nios2/kernel/ptrace.c | 5 +-
arch/nios2/kernel/signal.c | 4 +-
arch/openrisc/kernel/ptrace.c | 5 +-
arch/openrisc/kernel/signal.c | 4 +-
arch/parisc/kernel/ptrace.c | 7 +-
arch/parisc/kernel/signal.c | 4 +-
arch/powerpc/kernel/ptrace/ptrace.c | 8 +-
arch/powerpc/kernel/signal.c | 4 +-
arch/riscv/kernel/ptrace.c | 5 +-
arch/riscv/kernel/signal.c | 4 +-
arch/s390/include/asm/entry-common.h | 1 -
arch/s390/kernel/ptrace.c | 1 -
arch/s390/kernel/signal.c | 5 +-
arch/sh/kernel/ptrace_32.c | 5 +-
arch/sh/kernel/signal_32.c | 4 +-
arch/sparc/kernel/ptrace_32.c | 5 +-
arch/sparc/kernel/ptrace_64.c | 5 +-
arch/sparc/kernel/signal32.c | 1 -
arch/sparc/kernel/signal_32.c | 4 +-
arch/sparc/kernel/signal_64.c | 4 +-
arch/um/kernel/process.c | 4 +-
arch/um/kernel/ptrace.c | 5 +-
arch/x86/kernel/ptrace.c | 1 -
arch/x86/kernel/signal.c | 5 +-
arch/x86/mm/tlb.c | 1 +
arch/xtensa/kernel/ptrace.c | 5 +-
arch/xtensa/kernel/signal.c | 4 +-
block/blk-cgroup.c | 2 +-
fs/coredump.c | 1 -
fs/exec.c | 1 -
fs/io-wq.c | 6 +-
fs/io_uring.c | 11 +-
fs/proc/array.c | 1 -
fs/proc/base.c | 1 -
include/asm-generic/syscall.h | 2 +-
include/linux/entry-common.h | 47 +-------
include/linux/entry-kvm.h | 2 +-
include/linux/posix-timers.h | 1 -
include/linux/ptrace.h | 81 ++++++++++++-
include/linux/resume_user_mode.h | 64 ++++++++++
include/linux/sched/signal.h | 17 +++
include/linux/task_work.h | 5 +
include/linux/tracehook.h | 226 -----------------------------------
include/uapi/linux/ptrace.h | 2 +-
kernel/entry/common.c | 19 +--
kernel/entry/kvm.c | 9 +-
kernel/exit.c | 3 +-
kernel/livepatch/transition.c | 1 -
kernel/ptrace.c | 47 +++++---
kernel/seccomp.c | 1 -
kernel/signal.c | 62 +++++-----
kernel/task_work.c | 4 +-
kernel/time/posix-cpu-timers.c | 1 +
mm/memcontrol.c | 2 +-
security/apparmor/domain.c | 1 -
security/selinux/hooks.c | 1 -
85 files changed, 372 insertions(+), 495 deletions(-)
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
-----BEGIN PGP SIGNATURE-----
iQIzBAABCgAdFiEEgjlraLDcwBA2B+6cC/v6Eiajj0AFAmJCQkoACgkQC/v6Eiaj
j0DCWQ/5AZVFU+hX32obUNCLackHTwgcCtSOs3JNBmNA/zL/htPiYYG0ghkvtlDR
Dw5J5DnxC6P7PVAdAqrpvx2uX2FebHYU0bRlyLx8LYUEP5dhyNicxX9jA882Z+vw
Ud0Ue9EojwGWS76dC9YoKUj3slThMATbhA2r4GVEoof8fSNJaBxQIqath44t0FwU
DinWa+tIOvZANGBZr6CUUINNIgqBIZCH/R4h6ArBhMlJpuQ5Ufk2kAaiWFwZCkX4
0LuuAwbKsCKkF8eap5I2KrIg/7zZVgxAg9O3cHOzzm8OPbKzRnNnQClcDe8perqp
S6e/f3MgpE+eavd1EiLxevZ660cJChnmikXVVh8ZYYoefaMKGqBaBSsB38bNcLjY
3+f2dB+TNBFRnZs1aCujK3tWBT9QyjZDKtCBfzxDNWBpXGLhHH6j6lA5Lj+Cef5K
/HNHFb+FuqedlFZh5m1Y+piFQ70hTgCa2u8b+FSOubI2hW9Zd+WzINV0ANaZ2LvZ
4YGtcyDNk1q1+c87lxP9xMRl/xi6rNg+B9T2MCo4IUnHgpSVP6VEB3osgUmrrrN0
eQlUI154G/AaDlqXLgmn1xhRmlPGfmenkxpok1AuzxvNJsfLKnpEwQSc13g3oiZr
disZQxNY0kBO2Nv3G323Z6PLinhbiIIFez6cJzK5v0YJ2WtO3pY=
=uEro
-----END PGP SIGNATURE-----
Merge tag 'ptrace-cleanups-for-v5.18' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull ptrace cleanups from Eric Biederman:
"This set of changes removes tracehook.h, moves modification of all of
the ptrace fields inside of siglock to remove races, adds a missing
permission check to ptrace.c
The removal of tracehook.h is quite significant as it has been a major
source of confusion in recent years. Much of that confusion was around
task_work and TIF_NOTIFY_SIGNAL (which I have now decoupled making the
semantics clearer).
For people who don't know tracehook.h is a vestiage of an attempt to
implement uprobes like functionality that was never fully merged, and
was later superseeded by uprobes when uprobes was merged. For many
years now we have been removing what tracehook functionaly a little
bit at a time. To the point where anything left in tracehook.h was
some weird strange thing that was difficult to understand"
* tag 'ptrace-cleanups-for-v5.18' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
ptrace: Remove duplicated include in ptrace.c
ptrace: Check PTRACE_O_SUSPEND_SECCOMP permission on PTRACE_SEIZE
ptrace: Return the signal to continue with from ptrace_stop
ptrace: Move setting/clearing ptrace_message into ptrace_stop
tracehook: Remove tracehook.h
resume_user_mode: Move to resume_user_mode.h
resume_user_mode: Remove #ifdef TIF_NOTIFY_RESUME in set_notify_resume
signal: Move set_notify_signal and clear_notify_signal into sched/signal.h
task_work: Decouple TIF_NOTIFY_SIGNAL and task_work
task_work: Call tracehook_notify_signal from get_signal on all architectures
task_work: Introduce task_work_pending
task_work: Remove unnecessary include from posix_timers.h
ptrace: Remove tracehook_signal_handler
ptrace: Remove arch_syscall_{enter,exit}_tracehook
ptrace: Create ptrace_report_syscall_{entry,exit} in ptrace.h
ptrace/arm: Rename tracehook_report_syscall report_syscall
ptrace: Move ptrace_report_syscall into ptrace.h
492 lines
12 KiB
C
492 lines
12 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
#include <linux/context_tracking.h>
|
|
#include <linux/entry-common.h>
|
|
#include <linux/resume_user_mode.h>
|
|
#include <linux/highmem.h>
|
|
#include <linux/jump_label.h>
|
|
#include <linux/livepatch.h>
|
|
#include <linux/audit.h>
|
|
#include <linux/tick.h>
|
|
|
|
#include "common.h"
|
|
|
|
#define CREATE_TRACE_POINTS
|
|
#include <trace/events/syscalls.h>
|
|
|
|
/* See comment for enter_from_user_mode() in entry-common.h */
|
|
static __always_inline void __enter_from_user_mode(struct pt_regs *regs)
|
|
{
|
|
arch_check_user_regs(regs);
|
|
lockdep_hardirqs_off(CALLER_ADDR0);
|
|
|
|
CT_WARN_ON(ct_state() != CONTEXT_USER);
|
|
user_exit_irqoff();
|
|
|
|
instrumentation_begin();
|
|
trace_hardirqs_off_finish();
|
|
instrumentation_end();
|
|
}
|
|
|
|
void noinstr enter_from_user_mode(struct pt_regs *regs)
|
|
{
|
|
__enter_from_user_mode(regs);
|
|
}
|
|
|
|
static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
|
|
{
|
|
if (unlikely(audit_context())) {
|
|
unsigned long args[6];
|
|
|
|
syscall_get_arguments(current, regs, args);
|
|
audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]);
|
|
}
|
|
}
|
|
|
|
static long syscall_trace_enter(struct pt_regs *regs, long syscall,
|
|
unsigned long work)
|
|
{
|
|
long ret = 0;
|
|
|
|
/*
|
|
* Handle Syscall User Dispatch. This must comes first, since
|
|
* the ABI here can be something that doesn't make sense for
|
|
* other syscall_work features.
|
|
*/
|
|
if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
|
|
if (syscall_user_dispatch(regs))
|
|
return -1L;
|
|
}
|
|
|
|
/* Handle ptrace */
|
|
if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) {
|
|
ret = ptrace_report_syscall_entry(regs);
|
|
if (ret || (work & SYSCALL_WORK_SYSCALL_EMU))
|
|
return -1L;
|
|
}
|
|
|
|
/* Do seccomp after ptrace, to catch any tracer changes. */
|
|
if (work & SYSCALL_WORK_SECCOMP) {
|
|
ret = __secure_computing(NULL);
|
|
if (ret == -1L)
|
|
return ret;
|
|
}
|
|
|
|
/* Either of the above might have changed the syscall number */
|
|
syscall = syscall_get_nr(current, regs);
|
|
|
|
if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT))
|
|
trace_sys_enter(regs, syscall);
|
|
|
|
syscall_enter_audit(regs, syscall);
|
|
|
|
return ret ? : syscall;
|
|
}
|
|
|
|
static __always_inline long
|
|
__syscall_enter_from_user_work(struct pt_regs *regs, long syscall)
|
|
{
|
|
unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
|
|
|
|
if (work & SYSCALL_WORK_ENTER)
|
|
syscall = syscall_trace_enter(regs, syscall, work);
|
|
|
|
return syscall;
|
|
}
|
|
|
|
long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall)
|
|
{
|
|
return __syscall_enter_from_user_work(regs, syscall);
|
|
}
|
|
|
|
noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
|
|
{
|
|
long ret;
|
|
|
|
__enter_from_user_mode(regs);
|
|
|
|
instrumentation_begin();
|
|
local_irq_enable();
|
|
ret = __syscall_enter_from_user_work(regs, syscall);
|
|
instrumentation_end();
|
|
|
|
return ret;
|
|
}
|
|
|
|
noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs)
|
|
{
|
|
__enter_from_user_mode(regs);
|
|
instrumentation_begin();
|
|
local_irq_enable();
|
|
instrumentation_end();
|
|
}
|
|
|
|
/* See comment for exit_to_user_mode() in entry-common.h */
|
|
static __always_inline void __exit_to_user_mode(void)
|
|
{
|
|
instrumentation_begin();
|
|
trace_hardirqs_on_prepare();
|
|
lockdep_hardirqs_on_prepare(CALLER_ADDR0);
|
|
instrumentation_end();
|
|
|
|
user_enter_irqoff();
|
|
arch_exit_to_user_mode();
|
|
lockdep_hardirqs_on(CALLER_ADDR0);
|
|
}
|
|
|
|
void noinstr exit_to_user_mode(void)
|
|
{
|
|
__exit_to_user_mode();
|
|
}
|
|
|
|
/* Workaround to allow gradual conversion of architecture code */
|
|
void __weak arch_do_signal_or_restart(struct pt_regs *regs) { }
|
|
|
|
#ifdef CONFIG_RT_DELAYED_SIGNALS
|
|
static inline void raise_delayed_signal(void)
|
|
{
|
|
if (unlikely(current->forced_info.si_signo)) {
|
|
force_sig_info(¤t->forced_info);
|
|
current->forced_info.si_signo = 0;
|
|
}
|
|
}
|
|
#else
|
|
static inline void raise_delayed_signal(void) { }
|
|
#endif
|
|
|
|
static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
|
|
unsigned long ti_work)
|
|
{
|
|
/*
|
|
* Before returning to user space ensure that all pending work
|
|
* items have been completed.
|
|
*/
|
|
while (ti_work & EXIT_TO_USER_MODE_WORK) {
|
|
|
|
local_irq_enable_exit_to_user(ti_work);
|
|
|
|
if (ti_work & _TIF_NEED_RESCHED)
|
|
schedule();
|
|
|
|
raise_delayed_signal();
|
|
|
|
if (ti_work & _TIF_UPROBE)
|
|
uprobe_notify_resume(regs);
|
|
|
|
if (ti_work & _TIF_PATCH_PENDING)
|
|
klp_update_patch_state(current);
|
|
|
|
if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
|
|
arch_do_signal_or_restart(regs);
|
|
|
|
if (ti_work & _TIF_NOTIFY_RESUME)
|
|
resume_user_mode_work(regs);
|
|
|
|
/* Architecture specific TIF work */
|
|
arch_exit_to_user_mode_work(regs, ti_work);
|
|
|
|
/*
|
|
* Disable interrupts and reevaluate the work flags as they
|
|
* might have changed while interrupts and preemption was
|
|
* enabled above.
|
|
*/
|
|
local_irq_disable_exit_to_user();
|
|
|
|
/* Check if any of the above work has queued a deferred wakeup */
|
|
tick_nohz_user_enter_prepare();
|
|
|
|
ti_work = read_thread_flags();
|
|
}
|
|
|
|
/* Return the latest work state for arch_exit_to_user_mode() */
|
|
return ti_work;
|
|
}
|
|
|
|
static void exit_to_user_mode_prepare(struct pt_regs *regs)
|
|
{
|
|
unsigned long ti_work = read_thread_flags();
|
|
|
|
lockdep_assert_irqs_disabled();
|
|
|
|
/* Flush pending rcuog wakeup before the last need_resched() check */
|
|
tick_nohz_user_enter_prepare();
|
|
|
|
if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
|
|
ti_work = exit_to_user_mode_loop(regs, ti_work);
|
|
|
|
arch_exit_to_user_mode_prepare(regs, ti_work);
|
|
|
|
/* Ensure that the address limit is intact and no locks are held */
|
|
addr_limit_user_check();
|
|
kmap_assert_nomap();
|
|
lockdep_assert_irqs_disabled();
|
|
lockdep_sys_exit();
|
|
}
|
|
|
|
/*
|
|
* If SYSCALL_EMU is set, then the only reason to report is when
|
|
* SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall
|
|
* instruction has been already reported in syscall_enter_from_user_mode().
|
|
*/
|
|
static inline bool report_single_step(unsigned long work)
|
|
{
|
|
if (work & SYSCALL_WORK_SYSCALL_EMU)
|
|
return false;
|
|
|
|
return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP;
|
|
}
|
|
|
|
static void syscall_exit_work(struct pt_regs *regs, unsigned long work)
|
|
{
|
|
bool step;
|
|
|
|
/*
|
|
* If the syscall was rolled back due to syscall user dispatching,
|
|
* then the tracers below are not invoked for the same reason as
|
|
* the entry side was not invoked in syscall_trace_enter(): The ABI
|
|
* of these syscalls is unknown.
|
|
*/
|
|
if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
|
|
if (unlikely(current->syscall_dispatch.on_dispatch)) {
|
|
current->syscall_dispatch.on_dispatch = false;
|
|
return;
|
|
}
|
|
}
|
|
|
|
audit_syscall_exit(regs);
|
|
|
|
if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT)
|
|
trace_sys_exit(regs, syscall_get_return_value(current, regs));
|
|
|
|
step = report_single_step(work);
|
|
if (step || work & SYSCALL_WORK_SYSCALL_TRACE)
|
|
ptrace_report_syscall_exit(regs, step);
|
|
}
|
|
|
|
/*
|
|
* Syscall specific exit to user mode preparation. Runs with interrupts
|
|
* enabled.
|
|
*/
|
|
static void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)
|
|
{
|
|
unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
|
|
unsigned long nr = syscall_get_nr(current, regs);
|
|
|
|
CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
|
|
|
|
if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
|
|
if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr))
|
|
local_irq_enable();
|
|
}
|
|
|
|
rseq_syscall(regs);
|
|
|
|
/*
|
|
* Do one-time syscall specific work. If these work items are
|
|
* enabled, we want to run them exactly once per syscall exit with
|
|
* interrupts enabled.
|
|
*/
|
|
if (unlikely(work & SYSCALL_WORK_EXIT))
|
|
syscall_exit_work(regs, work);
|
|
}
|
|
|
|
static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs *regs)
|
|
{
|
|
syscall_exit_to_user_mode_prepare(regs);
|
|
local_irq_disable_exit_to_user();
|
|
exit_to_user_mode_prepare(regs);
|
|
}
|
|
|
|
void syscall_exit_to_user_mode_work(struct pt_regs *regs)
|
|
{
|
|
__syscall_exit_to_user_mode_work(regs);
|
|
}
|
|
|
|
__visible noinstr void syscall_exit_to_user_mode(struct pt_regs *regs)
|
|
{
|
|
instrumentation_begin();
|
|
__syscall_exit_to_user_mode_work(regs);
|
|
instrumentation_end();
|
|
__exit_to_user_mode();
|
|
}
|
|
|
|
noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs)
|
|
{
|
|
__enter_from_user_mode(regs);
|
|
}
|
|
|
|
noinstr void irqentry_exit_to_user_mode(struct pt_regs *regs)
|
|
{
|
|
instrumentation_begin();
|
|
exit_to_user_mode_prepare(regs);
|
|
instrumentation_end();
|
|
__exit_to_user_mode();
|
|
}
|
|
|
|
noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
|
|
{
|
|
irqentry_state_t ret = {
|
|
.exit_rcu = false,
|
|
};
|
|
|
|
if (user_mode(regs)) {
|
|
irqentry_enter_from_user_mode(regs);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* If this entry hit the idle task invoke rcu_irq_enter() whether
|
|
* RCU is watching or not.
|
|
*
|
|
* Interrupts can nest when the first interrupt invokes softirq
|
|
* processing on return which enables interrupts.
|
|
*
|
|
* Scheduler ticks in the idle task can mark quiescent state and
|
|
* terminate a grace period, if and only if the timer interrupt is
|
|
* not nested into another interrupt.
|
|
*
|
|
* Checking for rcu_is_watching() here would prevent the nesting
|
|
* interrupt to invoke rcu_irq_enter(). If that nested interrupt is
|
|
* the tick then rcu_flavor_sched_clock_irq() would wrongfully
|
|
* assume that it is the first interrupt and eventually claim
|
|
* quiescent state and end grace periods prematurely.
|
|
*
|
|
* Unconditionally invoke rcu_irq_enter() so RCU state stays
|
|
* consistent.
|
|
*
|
|
* TINY_RCU does not support EQS, so let the compiler eliminate
|
|
* this part when enabled.
|
|
*/
|
|
if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) {
|
|
/*
|
|
* If RCU is not watching then the same careful
|
|
* sequence vs. lockdep and tracing is required
|
|
* as in irqentry_enter_from_user_mode().
|
|
*/
|
|
lockdep_hardirqs_off(CALLER_ADDR0);
|
|
rcu_irq_enter();
|
|
instrumentation_begin();
|
|
trace_hardirqs_off_finish();
|
|
instrumentation_end();
|
|
|
|
ret.exit_rcu = true;
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* If RCU is watching then RCU only wants to check whether it needs
|
|
* to restart the tick in NOHZ mode. rcu_irq_enter_check_tick()
|
|
* already contains a warning when RCU is not watching, so no point
|
|
* in having another one here.
|
|
*/
|
|
lockdep_hardirqs_off(CALLER_ADDR0);
|
|
instrumentation_begin();
|
|
rcu_irq_enter_check_tick();
|
|
trace_hardirqs_off_finish();
|
|
instrumentation_end();
|
|
|
|
return ret;
|
|
}
|
|
|
|
void raw_irqentry_exit_cond_resched(void)
|
|
{
|
|
if (!preempt_count()) {
|
|
/* Sanity check RCU and thread stack */
|
|
rcu_irq_exit_check_preempt();
|
|
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
|
|
WARN_ON_ONCE(!on_thread_stack());
|
|
if (need_resched())
|
|
preempt_schedule_irq();
|
|
}
|
|
}
|
|
#ifdef CONFIG_PREEMPT_DYNAMIC
|
|
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
|
DEFINE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched);
|
|
#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
|
|
DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
|
|
void dynamic_irqentry_exit_cond_resched(void)
|
|
{
|
|
if (!static_key_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
|
|
return;
|
|
raw_irqentry_exit_cond_resched();
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
|
|
{
|
|
lockdep_assert_irqs_disabled();
|
|
|
|
/* Check whether this returns to user mode */
|
|
if (user_mode(regs)) {
|
|
irqentry_exit_to_user_mode(regs);
|
|
} else if (!regs_irqs_disabled(regs)) {
|
|
/*
|
|
* If RCU was not watching on entry this needs to be done
|
|
* carefully and needs the same ordering of lockdep/tracing
|
|
* and RCU as the return to user mode path.
|
|
*/
|
|
if (state.exit_rcu) {
|
|
instrumentation_begin();
|
|
/* Tell the tracer that IRET will enable interrupts */
|
|
trace_hardirqs_on_prepare();
|
|
lockdep_hardirqs_on_prepare(CALLER_ADDR0);
|
|
instrumentation_end();
|
|
rcu_irq_exit();
|
|
lockdep_hardirqs_on(CALLER_ADDR0);
|
|
return;
|
|
}
|
|
|
|
instrumentation_begin();
|
|
if (IS_ENABLED(CONFIG_PREEMPTION))
|
|
irqentry_exit_cond_resched();
|
|
|
|
/* Covers both tracing and lockdep */
|
|
trace_hardirqs_on();
|
|
instrumentation_end();
|
|
} else {
|
|
/*
|
|
* IRQ flags state is correct already. Just tell RCU if it
|
|
* was not watching on entry.
|
|
*/
|
|
if (state.exit_rcu)
|
|
rcu_irq_exit();
|
|
}
|
|
}
|
|
|
|
irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs)
|
|
{
|
|
irqentry_state_t irq_state;
|
|
|
|
irq_state.lockdep = lockdep_hardirqs_enabled();
|
|
|
|
__nmi_enter();
|
|
lockdep_hardirqs_off(CALLER_ADDR0);
|
|
lockdep_hardirq_enter();
|
|
rcu_nmi_enter();
|
|
|
|
instrumentation_begin();
|
|
trace_hardirqs_off_finish();
|
|
ftrace_nmi_enter();
|
|
instrumentation_end();
|
|
|
|
return irq_state;
|
|
}
|
|
|
|
void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state)
|
|
{
|
|
instrumentation_begin();
|
|
ftrace_nmi_exit();
|
|
if (irq_state.lockdep) {
|
|
trace_hardirqs_on_prepare();
|
|
lockdep_hardirqs_on_prepare(CALLER_ADDR0);
|
|
}
|
|
instrumentation_end();
|
|
|
|
rcu_nmi_exit();
|
|
lockdep_hardirq_exit();
|
|
if (irq_state.lockdep)
|
|
lockdep_hardirqs_on(CALLER_ADDR0);
|
|
__nmi_exit();
|
|
}
|