linux-loongson/include/linux/hrtimer_defs.h
Frederic Weisbecker 53dac34539 hrtimers: Force migrate away hrtimers queued after CPUHP_AP_HRTIMERS_DYING
hrtimers are migrated away from the dying CPU to any online target at
the CPUHP_AP_HRTIMERS_DYING stage in order not to delay bandwidth timers
handling tasks involved in the CPU hotplug forward progress.

However wakeups can still be performed by the outgoing CPU after
CPUHP_AP_HRTIMERS_DYING. Those can result again in bandwidth timers being
armed. Depending on several considerations (crystal ball power management
based election, earliest timer already enqueued, timer migration enabled or
not), the target may eventually be the current CPU even if offline. If that
happens, the timer is eventually ignored.

The most notable example is RCU which had to deal with each and every of
those wake-ups by deferring them to an online CPU, along with related
workarounds:

_ e787644caf (rcu: Defer RCU kthreads wakeup when CPU is dying)
_ 9139f93209 (rcu/nocb: Fix RT throttling hrtimer armed from offline CPU)
_ f7345ccc62 (rcu/nocb: Fix rcuog wake-up from offline softirq)

The problem isn't confined to RCU though as the stop machine kthread
(which runs CPUHP_AP_HRTIMERS_DYING) reports its completion at the end
of its work through cpu_stop_signal_done() and performs a wake up that
eventually arms the deadline server timer:

   WARNING: CPU: 94 PID: 588 at kernel/time/hrtimer.c:1086 hrtimer_start_range_ns+0x289/0x2d0
   CPU: 94 UID: 0 PID: 588 Comm: migration/94 Not tainted
   Stopper: multi_cpu_stop+0x0/0x120 <- stop_machine_cpuslocked+0x66/0xc0
   RIP: 0010:hrtimer_start_range_ns+0x289/0x2d0
   Call Trace:
   <TASK>
     start_dl_timer
     enqueue_dl_entity
     dl_server_start
     enqueue_task_fair
     enqueue_task
     ttwu_do_activate
     try_to_wake_up
     complete
     cpu_stopper_thread

Instead of providing yet another bandaid to work around the situation, fix
it in the hrtimers infrastructure instead: always migrate away a timer to
an online target whenever it is enqueued from an offline CPU.

This will also allow to revert all the above RCU disgraceful hacks.

Fixes: 5c0930ccaa ("hrtimers: Push pending hrtimers away from outgoing CPU earlier")
Reported-by: Vlad Poenaru <vlad.wing@gmail.com>
Reported-by: Usama Arif <usamaarif642@gmail.com>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Tested-by: Paul E. McKenney <paulmck@kernel.org>
Link: https://lore.kernel.org/all/20250117232433.24027-1-frederic@kernel.org
Closes: 20241213203739.1519801-1-usamaarif642@gmail.com
2025-01-23 20:06:35 +01:00

133 lines
4.2 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_HRTIMER_DEFS_H
#define _LINUX_HRTIMER_DEFS_H
#include <linux/ktime.h>
#include <linux/timerqueue.h>
#include <linux/seqlock.h>
#ifdef CONFIG_HIGH_RES_TIMERS
/*
* The resolution of the clocks. The resolution value is returned in
* the clock_getres() system call to give application programmers an
* idea of the (in)accuracy of timers. Timer values are rounded up to
* this resolution values.
*/
# define HIGH_RES_NSEC 1
# define KTIME_HIGH_RES (HIGH_RES_NSEC)
# define MONOTONIC_RES_NSEC HIGH_RES_NSEC
# define KTIME_MONOTONIC_RES KTIME_HIGH_RES
#else
# define MONOTONIC_RES_NSEC LOW_RES_NSEC
# define KTIME_MONOTONIC_RES KTIME_LOW_RES
#endif
#ifdef CONFIG_64BIT
# define __hrtimer_clock_base_align ____cacheline_aligned
#else
# define __hrtimer_clock_base_align
#endif
/**
* struct hrtimer_clock_base - the timer base for a specific clock
* @cpu_base: per cpu clock base
* @index: clock type index for per_cpu support when moving a
* timer to a base on another cpu.
* @clockid: clock id for per_cpu support
* @seq: seqcount around __run_hrtimer
* @running: pointer to the currently running hrtimer
* @active: red black tree root node for the active timers
* @get_time: function to retrieve the current time of the clock
* @offset: offset of this clock to the monotonic base
*/
struct hrtimer_clock_base {
struct hrtimer_cpu_base *cpu_base;
unsigned int index;
clockid_t clockid;
seqcount_raw_spinlock_t seq;
struct hrtimer *running;
struct timerqueue_head active;
ktime_t (*get_time)(void);
ktime_t offset;
} __hrtimer_clock_base_align;
enum hrtimer_base_type {
HRTIMER_BASE_MONOTONIC,
HRTIMER_BASE_REALTIME,
HRTIMER_BASE_BOOTTIME,
HRTIMER_BASE_TAI,
HRTIMER_BASE_MONOTONIC_SOFT,
HRTIMER_BASE_REALTIME_SOFT,
HRTIMER_BASE_BOOTTIME_SOFT,
HRTIMER_BASE_TAI_SOFT,
HRTIMER_MAX_CLOCK_BASES,
};
/**
* struct hrtimer_cpu_base - the per cpu clock bases
* @lock: lock protecting the base and associated clock bases
* and timers
* @cpu: cpu number
* @active_bases: Bitfield to mark bases with active timers
* @clock_was_set_seq: Sequence counter of clock was set events
* @hres_active: State of high resolution mode
* @in_hrtirq: hrtimer_interrupt() is currently executing
* @hang_detected: The last hrtimer interrupt detected a hang
* @softirq_activated: displays, if the softirq is raised - update of softirq
* related settings is not required then.
* @nr_events: Total number of hrtimer interrupt events
* @nr_retries: Total number of hrtimer interrupt retries
* @nr_hangs: Total number of hrtimer interrupt hangs
* @max_hang_time: Maximum time spent in hrtimer_interrupt
* @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are
* expired
* @online: CPU is online from an hrtimers point of view
* @timer_waiters: A hrtimer_cancel() invocation waits for the timer
* callback to finish.
* @expires_next: absolute time of the next event, is required for remote
* hrtimer enqueue; it is the total first expiry time (hard
* and soft hrtimer are taken into account)
* @next_timer: Pointer to the first expiring timer
* @softirq_expires_next: Time to check, if soft queues needs also to be expired
* @softirq_next_timer: Pointer to the first expiring softirq based timer
* @clock_base: array of clock bases for this cpu
*
* Note: next_timer is just an optimization for __remove_hrtimer().
* Do not dereference the pointer because it is not reliable on
* cross cpu removals.
*/
struct hrtimer_cpu_base {
raw_spinlock_t lock;
unsigned int cpu;
unsigned int active_bases;
unsigned int clock_was_set_seq;
unsigned int hres_active : 1,
in_hrtirq : 1,
hang_detected : 1,
softirq_activated : 1,
online : 1;
#ifdef CONFIG_HIGH_RES_TIMERS
unsigned int nr_events;
unsigned short nr_retries;
unsigned short nr_hangs;
unsigned int max_hang_time;
#endif
#ifdef CONFIG_PREEMPT_RT
spinlock_t softirq_expiry_lock;
atomic_t timer_waiters;
#endif
ktime_t expires_next;
struct hrtimer *next_timer;
ktime_t softirq_expires_next;
struct hrtimer *softirq_next_timer;
struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
call_single_data_t csd;
} ____cacheline_aligned;
#endif