mirror of
https://git.proxmox.com/git/mirror_ubuntu-kernels.git
synced 2025-12-10 01:46:56 +00:00
Many architectures' switch_mm() (e.g. arm64) do not have an smp_mb()
which the core scheduler code has depended upon since commit:
commit 223baf9d17 ("sched: Fix performance regression introduced by mm_cid")
If switch_mm() doesn't call smp_mb(), sched_mm_cid_remote_clear() can
unset the actively used cid when it fails to observe active task after it
sets lazy_put.
There *is* a memory barrier between storing to rq->curr and _return to
userspace_ (as required by membarrier), but the rseq mm_cid has stricter
requirements: the barrier needs to be issued between store to rq->curr
and switch_mm_cid(), which happens earlier than:
- spin_unlock(),
- switch_to().
So it's fine when the architecture switch_mm() happens to have that
barrier already, but less so when the architecture only provides the
full barrier in switch_to() or spin_unlock().
It is a bug in the rseq switch_mm_cid() implementation. All architectures
that don't have memory barriers in switch_mm(), but rather have the full
barrier either in finish_lock_switch() or switch_to() have them too late
for the needs of switch_mm_cid().
Introduce a new smp_mb__after_switch_mm(), defined as smp_mb() in the
generic barrier.h header, and use it in switch_mm_cid() for scheduler
transitions where switch_mm() is expected to provide a memory barrier.
Architectures can override smp_mb__after_switch_mm() if their
switch_mm() implementation provides an implicit memory barrier.
Override it with a no-op on x86 which implicitly provide this memory
barrier by writing to CR3.
Fixes: 223baf9d17 ("sched: Fix performance regression introduced by mm_cid")
Reported-by: levi.yun <yeoreum.yun@arm.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> # for arm64
Acked-by: Dave Hansen <dave.hansen@linux.intel.com> # for x86
Cc: <stable@vger.kernel.org> # 6.4.x
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20240415152114.59122-2-mathieu.desnoyers@efficios.com
88 lines
2.4 KiB
C
88 lines
2.4 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _ASM_X86_BARRIER_H
|
|
#define _ASM_X86_BARRIER_H
|
|
|
|
#include <asm/alternative.h>
|
|
#include <asm/nops.h>
|
|
|
|
/*
|
|
* Force strict CPU ordering.
|
|
* And yes, this might be required on UP too when we're talking
|
|
* to devices.
|
|
*/
|
|
|
|
#ifdef CONFIG_X86_32
|
|
#define mb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "mfence", \
|
|
X86_FEATURE_XMM2) ::: "memory", "cc")
|
|
#define rmb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "lfence", \
|
|
X86_FEATURE_XMM2) ::: "memory", "cc")
|
|
#define wmb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "sfence", \
|
|
X86_FEATURE_XMM2) ::: "memory", "cc")
|
|
#else
|
|
#define __mb() asm volatile("mfence":::"memory")
|
|
#define __rmb() asm volatile("lfence":::"memory")
|
|
#define __wmb() asm volatile("sfence" ::: "memory")
|
|
#endif
|
|
|
|
/**
|
|
* array_index_mask_nospec() - generate a mask that is ~0UL when the
|
|
* bounds check succeeds and 0 otherwise
|
|
* @index: array element index
|
|
* @size: number of elements in array
|
|
*
|
|
* Returns:
|
|
* 0 - (index < size)
|
|
*/
|
|
static __always_inline unsigned long array_index_mask_nospec(unsigned long index,
|
|
unsigned long size)
|
|
{
|
|
unsigned long mask;
|
|
|
|
asm volatile ("cmp %1,%2; sbb %0,%0;"
|
|
:"=r" (mask)
|
|
:"g"(size),"r" (index)
|
|
:"cc");
|
|
return mask;
|
|
}
|
|
|
|
/* Override the default implementation from linux/nospec.h. */
|
|
#define array_index_mask_nospec array_index_mask_nospec
|
|
|
|
/* Prevent speculative execution past this barrier. */
|
|
#define barrier_nospec() alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC)
|
|
|
|
#define __dma_rmb() barrier()
|
|
#define __dma_wmb() barrier()
|
|
|
|
#define __smp_mb() asm volatile("lock; addl $0,-4(%%" _ASM_SP ")" ::: "memory", "cc")
|
|
|
|
#define __smp_rmb() dma_rmb()
|
|
#define __smp_wmb() barrier()
|
|
#define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
|
|
|
|
#define __smp_store_release(p, v) \
|
|
do { \
|
|
compiletime_assert_atomic_type(*p); \
|
|
barrier(); \
|
|
WRITE_ONCE(*p, v); \
|
|
} while (0)
|
|
|
|
#define __smp_load_acquire(p) \
|
|
({ \
|
|
typeof(*p) ___p1 = READ_ONCE(*p); \
|
|
compiletime_assert_atomic_type(*p); \
|
|
barrier(); \
|
|
___p1; \
|
|
})
|
|
|
|
/* Atomic operations are already serializing on x86 */
|
|
#define __smp_mb__before_atomic() do { } while (0)
|
|
#define __smp_mb__after_atomic() do { } while (0)
|
|
|
|
/* Writing to CR3 provides a full memory barrier in switch_mm(). */
|
|
#define smp_mb__after_switch_mm() do { } while (0)
|
|
|
|
#include <asm-generic/barrier.h>
|
|
|
|
#endif /* _ASM_X86_BARRIER_H */
|