linux-loongson/include/linux/futex.h
Peter Zijlstra 56180dd20c futex: Use RCU-based per-CPU reference counting instead of rcuref_t
The use of rcuref_t for reference counting introduces a performance bottleneck
when accessed concurrently by multiple threads during futex operations.

Replace rcuref_t with special crafted per-CPU reference counters. The
lifetime logic remains the same.

The newly allocate private hash starts in FR_PERCPU state. In this state, each
futex operation that requires the private hash uses a per-CPU counter (an
unsigned int) for incrementing or decrementing the reference count.

When the private hash is about to be replaced, the per-CPU counters are
migrated to a atomic_t counter mm_struct::futex_atomic.
The migration process:
- Waiting for one RCU grace period to ensure all users observe the
  current private hash. This can be skipped if a grace period elapsed
  since the private hash was assigned.

- futex_private_hash::state is set to FR_ATOMIC, forcing all users to
  use mm_struct::futex_atomic for reference counting.

- After a RCU grace period, all users are guaranteed to be using the
  atomic counter. The per-CPU counters can now be summed up and added to
  the atomic_t counter. If the resulting count is zero, the hash can be
  safely replaced. Otherwise, active users still hold a valid reference.

- Once the atomic reference count drops to zero, the next futex
  operation will switch to the new private hash.

call_rcu_hurry() is used to speed up transition which otherwise might be
delay with RCU_LAZY. There is nothing wrong with using call_rcu(). The
side effects would be that on auto scaling the new hash is used later
and the SET_SLOTS prctl() will block longer.

[bigeasy: commit description + mm get/ put_async]

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20250710110011.384614-3-bigeasy@linutronix.de
2025-07-11 16:02:00 +02:00

121 lines
3.4 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_FUTEX_H
#define _LINUX_FUTEX_H
#include <linux/sched.h>
#include <linux/ktime.h>
#include <linux/mm_types.h>
#include <uapi/linux/futex.h>
struct inode;
struct task_struct;
/*
* Futexes are matched on equal values of this key.
* The key type depends on whether it's a shared or private mapping.
* Don't rearrange members without looking at hash_futex().
*
* offset is aligned to a multiple of sizeof(u32) (== 4) by definition.
* We use the two low order bits of offset to tell what is the kind of key :
* 00 : Private process futex (PTHREAD_PROCESS_PRIVATE)
* (no reference on an inode or mm)
* 01 : Shared futex (PTHREAD_PROCESS_SHARED)
* mapped on a file (reference on the underlying inode)
* 10 : Shared futex (PTHREAD_PROCESS_SHARED)
* (but private mapping on an mm, and reference taken on it)
*/
#define FUT_OFF_INODE 1 /* We set bit 0 if key has a reference on inode */
#define FUT_OFF_MMSHARED 2 /* We set bit 1 if key has a reference on mm */
union futex_key {
struct {
u64 i_seq;
unsigned long pgoff;
unsigned int offset;
/* unsigned int node; */
} shared;
struct {
union {
struct mm_struct *mm;
u64 __tmp;
};
unsigned long address;
unsigned int offset;
/* unsigned int node; */
} private;
struct {
u64 ptr;
unsigned long word;
unsigned int offset;
unsigned int node; /* NOT hashed! */
} both;
};
#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } }
#ifdef CONFIG_FUTEX
enum {
FUTEX_STATE_OK,
FUTEX_STATE_EXITING,
FUTEX_STATE_DEAD,
};
static inline void futex_init_task(struct task_struct *tsk)
{
tsk->robust_list = NULL;
#ifdef CONFIG_COMPAT
tsk->compat_robust_list = NULL;
#endif
INIT_LIST_HEAD(&tsk->pi_state_list);
tsk->pi_state_cache = NULL;
tsk->futex_state = FUTEX_STATE_OK;
mutex_init(&tsk->futex_exit_mutex);
}
void futex_exit_recursive(struct task_struct *tsk);
void futex_exit_release(struct task_struct *tsk);
void futex_exec_release(struct task_struct *tsk);
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
u32 __user *uaddr2, u32 val2, u32 val3);
int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4);
#ifdef CONFIG_FUTEX_PRIVATE_HASH
int futex_hash_allocate_default(void);
void futex_hash_free(struct mm_struct *mm);
int futex_mm_init(struct mm_struct *mm);
#else /* !CONFIG_FUTEX_PRIVATE_HASH */
static inline int futex_hash_allocate_default(void) { return 0; }
static inline int futex_hash_free(struct mm_struct *mm) { return 0; }
static inline int futex_mm_init(struct mm_struct *mm) { return 0; }
#endif /* CONFIG_FUTEX_PRIVATE_HASH */
#else /* !CONFIG_FUTEX */
static inline void futex_init_task(struct task_struct *tsk) { }
static inline void futex_exit_recursive(struct task_struct *tsk) { }
static inline void futex_exit_release(struct task_struct *tsk) { }
static inline void futex_exec_release(struct task_struct *tsk) { }
static inline long do_futex(u32 __user *uaddr, int op, u32 val,
ktime_t *timeout, u32 __user *uaddr2,
u32 val2, u32 val3)
{
return -EINVAL;
}
static inline int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4)
{
return -EINVAL;
}
static inline int futex_hash_allocate_default(void)
{
return 0;
}
static inline int futex_hash_free(struct mm_struct *mm) { return 0; }
static inline int futex_mm_init(struct mm_struct *mm) { return 0; }
#endif
#endif