mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson
synced 2025-08-27 06:50:37 +00:00

Cache the results of the unwind to ensure the unwind is only performed once, even when called by multiple tracers. The cache nr_entries gets cleared every time the task exits the kernel. When a stacktrace is requested, nr_entries gets set to the number of entries in the stacktrace. If another stacktrace is requested, if nr_entries is not zero, then it contains the same stacktrace that would be retrieved so it is not processed again and the entries is given to the caller. Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Andrii Nakryiko <andrii@kernel.org> Cc: Indu Bhagat <indu.bhagat@oracle.com> Cc: "Jose E. Marchesi" <jemarch@gnu.org> Cc: Beau Belgrave <beaub@linux.microsoft.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Jens Axboe <axboe@kernel.dk> Cc: Florian Weimer <fweimer@redhat.com> Cc: Sam James <sam@gentoo.org> Link: https://lore.kernel.org/20250729182405.319691167@kernel.org Reviewed-by: Jens Remus <jremus@linux.ibm.com> Reviewed-By: Indu Bhagat <indu.bhagat@oracle.com> Co-developed-by: Steven Rostedt (Google) <rostedt@goodmis.org> Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
216 lines
7.0 KiB
C
216 lines
7.0 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef __LINUX_ENTRYCOMMON_H
|
|
#define __LINUX_ENTRYCOMMON_H
|
|
|
|
#include <linux/irq-entry-common.h>
|
|
#include <linux/ptrace.h>
|
|
#include <linux/seccomp.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/livepatch.h>
|
|
#include <linux/resume_user_mode.h>
|
|
|
|
#include <asm/entry-common.h>
|
|
#include <asm/syscall.h>
|
|
|
|
#ifndef _TIF_UPROBE
|
|
# define _TIF_UPROBE (0)
|
|
#endif
|
|
|
|
/*
|
|
* SYSCALL_WORK flags handled in syscall_enter_from_user_mode()
|
|
*/
|
|
#ifndef ARCH_SYSCALL_WORK_ENTER
|
|
# define ARCH_SYSCALL_WORK_ENTER (0)
|
|
#endif
|
|
|
|
/*
|
|
* SYSCALL_WORK flags handled in syscall_exit_to_user_mode()
|
|
*/
|
|
#ifndef ARCH_SYSCALL_WORK_EXIT
|
|
# define ARCH_SYSCALL_WORK_EXIT (0)
|
|
#endif
|
|
|
|
#define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP | \
|
|
SYSCALL_WORK_SYSCALL_TRACEPOINT | \
|
|
SYSCALL_WORK_SYSCALL_TRACE | \
|
|
SYSCALL_WORK_SYSCALL_EMU | \
|
|
SYSCALL_WORK_SYSCALL_AUDIT | \
|
|
SYSCALL_WORK_SYSCALL_USER_DISPATCH | \
|
|
ARCH_SYSCALL_WORK_ENTER)
|
|
#define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \
|
|
SYSCALL_WORK_SYSCALL_TRACE | \
|
|
SYSCALL_WORK_SYSCALL_AUDIT | \
|
|
SYSCALL_WORK_SYSCALL_USER_DISPATCH | \
|
|
SYSCALL_WORK_SYSCALL_EXIT_TRAP | \
|
|
ARCH_SYSCALL_WORK_EXIT)
|
|
|
|
/**
|
|
* syscall_enter_from_user_mode_prepare - Establish state and enable interrupts
|
|
* @regs: Pointer to currents pt_regs
|
|
*
|
|
* Invoked from architecture specific syscall entry code with interrupts
|
|
* disabled. The calling code has to be non-instrumentable. When the
|
|
* function returns all state is correct, interrupts are enabled and the
|
|
* subsequent functions can be instrumented.
|
|
*
|
|
* This handles lockdep, RCU (context tracking) and tracing state, i.e.
|
|
* the functionality provided by enter_from_user_mode().
|
|
*
|
|
* This is invoked when there is extra architecture specific functionality
|
|
* to be done between establishing state and handling user mode entry work.
|
|
*/
|
|
void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);
|
|
|
|
long syscall_trace_enter(struct pt_regs *regs, long syscall,
|
|
unsigned long work);
|
|
|
|
/**
|
|
* syscall_enter_from_user_mode_work - Check and handle work before invoking
|
|
* a syscall
|
|
* @regs: Pointer to currents pt_regs
|
|
* @syscall: The syscall number
|
|
*
|
|
* Invoked from architecture specific syscall entry code with interrupts
|
|
* enabled after invoking syscall_enter_from_user_mode_prepare() and extra
|
|
* architecture specific work.
|
|
*
|
|
* Returns: The original or a modified syscall number
|
|
*
|
|
* If the returned syscall number is -1 then the syscall should be
|
|
* skipped. In this case the caller may invoke syscall_set_error() or
|
|
* syscall_set_return_value() first. If neither of those are called and -1
|
|
* is returned, then the syscall will fail with ENOSYS.
|
|
*
|
|
* It handles the following work items:
|
|
*
|
|
* 1) syscall_work flag dependent invocations of
|
|
* ptrace_report_syscall_entry(), __secure_computing(), trace_sys_enter()
|
|
* 2) Invocation of audit_syscall_entry()
|
|
*/
|
|
static __always_inline long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall)
|
|
{
|
|
unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
|
|
|
|
if (work & SYSCALL_WORK_ENTER)
|
|
syscall = syscall_trace_enter(regs, syscall, work);
|
|
|
|
return syscall;
|
|
}
|
|
|
|
/**
|
|
* syscall_enter_from_user_mode - Establish state and check and handle work
|
|
* before invoking a syscall
|
|
* @regs: Pointer to currents pt_regs
|
|
* @syscall: The syscall number
|
|
*
|
|
* Invoked from architecture specific syscall entry code with interrupts
|
|
* disabled. The calling code has to be non-instrumentable. When the
|
|
* function returns all state is correct, interrupts are enabled and the
|
|
* subsequent functions can be instrumented.
|
|
*
|
|
* This is combination of syscall_enter_from_user_mode_prepare() and
|
|
* syscall_enter_from_user_mode_work().
|
|
*
|
|
* Returns: The original or a modified syscall number. See
|
|
* syscall_enter_from_user_mode_work() for further explanation.
|
|
*/
|
|
static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
|
|
{
|
|
long ret;
|
|
|
|
enter_from_user_mode(regs);
|
|
|
|
instrumentation_begin();
|
|
local_irq_enable();
|
|
ret = syscall_enter_from_user_mode_work(regs, syscall);
|
|
instrumentation_end();
|
|
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* syscall_exit_work - Handle work before returning to user mode
|
|
* @regs: Pointer to current pt_regs
|
|
* @work: Current thread syscall work
|
|
*
|
|
* Do one-time syscall specific work.
|
|
*/
|
|
void syscall_exit_work(struct pt_regs *regs, unsigned long work);
|
|
|
|
/**
|
|
* syscall_exit_to_user_mode_work - Handle work before returning to user mode
|
|
* @regs: Pointer to currents pt_regs
|
|
*
|
|
* Same as step 1 and 2 of syscall_exit_to_user_mode() but without calling
|
|
* exit_to_user_mode() to perform the final transition to user mode.
|
|
*
|
|
* Calling convention is the same as for syscall_exit_to_user_mode() and it
|
|
* returns with all work handled and interrupts disabled. The caller must
|
|
* invoke exit_to_user_mode() before actually switching to user mode to
|
|
* make the final state transitions. Interrupts must stay disabled between
|
|
* return from this function and the invocation of exit_to_user_mode().
|
|
*/
|
|
static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs)
|
|
{
|
|
unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
|
|
unsigned long nr = syscall_get_nr(current, regs);
|
|
|
|
CT_WARN_ON(ct_state() != CT_STATE_KERNEL);
|
|
|
|
if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
|
|
if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr))
|
|
local_irq_enable();
|
|
}
|
|
|
|
rseq_syscall(regs);
|
|
|
|
/*
|
|
* Do one-time syscall specific work. If these work items are
|
|
* enabled, we want to run them exactly once per syscall exit with
|
|
* interrupts enabled.
|
|
*/
|
|
if (unlikely(work & SYSCALL_WORK_EXIT))
|
|
syscall_exit_work(regs, work);
|
|
local_irq_disable_exit_to_user();
|
|
exit_to_user_mode_prepare(regs);
|
|
}
|
|
|
|
/**
|
|
* syscall_exit_to_user_mode - Handle work before returning to user mode
|
|
* @regs: Pointer to currents pt_regs
|
|
*
|
|
* Invoked with interrupts enabled and fully valid regs. Returns with all
|
|
* work handled, interrupts disabled such that the caller can immediately
|
|
* switch to user mode. Called from architecture specific syscall and ret
|
|
* from fork code.
|
|
*
|
|
* The call order is:
|
|
* 1) One-time syscall exit work:
|
|
* - rseq syscall exit
|
|
* - audit
|
|
* - syscall tracing
|
|
* - ptrace (single stepping)
|
|
*
|
|
* 2) Preparatory work
|
|
* - Exit to user mode loop (common TIF handling). Invokes
|
|
* arch_exit_to_user_mode_work() for architecture specific TIF work
|
|
* - Architecture specific one time work arch_exit_to_user_mode_prepare()
|
|
* - Address limit and lockdep checks
|
|
*
|
|
* 3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the
|
|
* functionality in exit_to_user_mode().
|
|
*
|
|
* This is a combination of syscall_exit_to_user_mode_work() (1,2) and
|
|
* exit_to_user_mode(). This function is preferred unless there is a
|
|
* compelling architectural reason to use the separate functions.
|
|
*/
|
|
static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs)
|
|
{
|
|
instrumentation_begin();
|
|
syscall_exit_to_user_mode_work(regs);
|
|
instrumentation_end();
|
|
exit_to_user_mode();
|
|
}
|
|
|
|
#endif
|