mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson
synced 2025-08-30 21:52:21 +00:00

The use of rcuref_t for reference counting introduces a performance bottleneck when accessed concurrently by multiple threads during futex operations. Replace rcuref_t with special crafted per-CPU reference counters. The lifetime logic remains the same. The newly allocate private hash starts in FR_PERCPU state. In this state, each futex operation that requires the private hash uses a per-CPU counter (an unsigned int) for incrementing or decrementing the reference count. When the private hash is about to be replaced, the per-CPU counters are migrated to a atomic_t counter mm_struct::futex_atomic. The migration process: - Waiting for one RCU grace period to ensure all users observe the current private hash. This can be skipped if a grace period elapsed since the private hash was assigned. - futex_private_hash::state is set to FR_ATOMIC, forcing all users to use mm_struct::futex_atomic for reference counting. - After a RCU grace period, all users are guaranteed to be using the atomic counter. The per-CPU counters can now be summed up and added to the atomic_t counter. If the resulting count is zero, the hash can be safely replaced. Otherwise, active users still hold a valid reference. - Once the atomic reference count drops to zero, the next futex operation will switch to the new private hash. call_rcu_hurry() is used to speed up transition which otherwise might be delay with RCU_LAZY. There is nothing wrong with using call_rcu(). The side effects would be that on auto scaling the new hash is used later and the SET_SLOTS prctl() will block longer. [bigeasy: commit description + mm get/ put_async] Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lore.kernel.org/r/20250710110011.384614-3-bigeasy@linutronix.de
121 lines
3.4 KiB
C
121 lines
3.4 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _LINUX_FUTEX_H
|
|
#define _LINUX_FUTEX_H
|
|
|
|
#include <linux/sched.h>
|
|
#include <linux/ktime.h>
|
|
#include <linux/mm_types.h>
|
|
|
|
#include <uapi/linux/futex.h>
|
|
|
|
struct inode;
|
|
struct task_struct;
|
|
|
|
/*
|
|
* Futexes are matched on equal values of this key.
|
|
* The key type depends on whether it's a shared or private mapping.
|
|
* Don't rearrange members without looking at hash_futex().
|
|
*
|
|
* offset is aligned to a multiple of sizeof(u32) (== 4) by definition.
|
|
* We use the two low order bits of offset to tell what is the kind of key :
|
|
* 00 : Private process futex (PTHREAD_PROCESS_PRIVATE)
|
|
* (no reference on an inode or mm)
|
|
* 01 : Shared futex (PTHREAD_PROCESS_SHARED)
|
|
* mapped on a file (reference on the underlying inode)
|
|
* 10 : Shared futex (PTHREAD_PROCESS_SHARED)
|
|
* (but private mapping on an mm, and reference taken on it)
|
|
*/
|
|
|
|
#define FUT_OFF_INODE 1 /* We set bit 0 if key has a reference on inode */
|
|
#define FUT_OFF_MMSHARED 2 /* We set bit 1 if key has a reference on mm */
|
|
|
|
union futex_key {
|
|
struct {
|
|
u64 i_seq;
|
|
unsigned long pgoff;
|
|
unsigned int offset;
|
|
/* unsigned int node; */
|
|
} shared;
|
|
struct {
|
|
union {
|
|
struct mm_struct *mm;
|
|
u64 __tmp;
|
|
};
|
|
unsigned long address;
|
|
unsigned int offset;
|
|
/* unsigned int node; */
|
|
} private;
|
|
struct {
|
|
u64 ptr;
|
|
unsigned long word;
|
|
unsigned int offset;
|
|
unsigned int node; /* NOT hashed! */
|
|
} both;
|
|
};
|
|
|
|
#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } }
|
|
|
|
#ifdef CONFIG_FUTEX
|
|
enum {
|
|
FUTEX_STATE_OK,
|
|
FUTEX_STATE_EXITING,
|
|
FUTEX_STATE_DEAD,
|
|
};
|
|
|
|
static inline void futex_init_task(struct task_struct *tsk)
|
|
{
|
|
tsk->robust_list = NULL;
|
|
#ifdef CONFIG_COMPAT
|
|
tsk->compat_robust_list = NULL;
|
|
#endif
|
|
INIT_LIST_HEAD(&tsk->pi_state_list);
|
|
tsk->pi_state_cache = NULL;
|
|
tsk->futex_state = FUTEX_STATE_OK;
|
|
mutex_init(&tsk->futex_exit_mutex);
|
|
}
|
|
|
|
void futex_exit_recursive(struct task_struct *tsk);
|
|
void futex_exit_release(struct task_struct *tsk);
|
|
void futex_exec_release(struct task_struct *tsk);
|
|
|
|
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
|
|
u32 __user *uaddr2, u32 val2, u32 val3);
|
|
int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4);
|
|
|
|
#ifdef CONFIG_FUTEX_PRIVATE_HASH
|
|
int futex_hash_allocate_default(void);
|
|
void futex_hash_free(struct mm_struct *mm);
|
|
int futex_mm_init(struct mm_struct *mm);
|
|
|
|
#else /* !CONFIG_FUTEX_PRIVATE_HASH */
|
|
static inline int futex_hash_allocate_default(void) { return 0; }
|
|
static inline int futex_hash_free(struct mm_struct *mm) { return 0; }
|
|
static inline int futex_mm_init(struct mm_struct *mm) { return 0; }
|
|
#endif /* CONFIG_FUTEX_PRIVATE_HASH */
|
|
|
|
#else /* !CONFIG_FUTEX */
|
|
static inline void futex_init_task(struct task_struct *tsk) { }
|
|
static inline void futex_exit_recursive(struct task_struct *tsk) { }
|
|
static inline void futex_exit_release(struct task_struct *tsk) { }
|
|
static inline void futex_exec_release(struct task_struct *tsk) { }
|
|
static inline long do_futex(u32 __user *uaddr, int op, u32 val,
|
|
ktime_t *timeout, u32 __user *uaddr2,
|
|
u32 val2, u32 val3)
|
|
{
|
|
return -EINVAL;
|
|
}
|
|
static inline int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4)
|
|
{
|
|
return -EINVAL;
|
|
}
|
|
static inline int futex_hash_allocate_default(void)
|
|
{
|
|
return 0;
|
|
}
|
|
static inline int futex_hash_free(struct mm_struct *mm) { return 0; }
|
|
static inline int futex_mm_init(struct mm_struct *mm) { return 0; }
|
|
|
|
#endif
|
|
|
|
#endif
|