diff --git a/include/linux/futex.h b/include/linux/futex.h index b70df27d7e85..8f1be08bef18 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -4,11 +4,11 @@ #include #include +#include #include struct inode; -struct mm_struct; struct task_struct; /* @@ -77,7 +77,22 @@ void futex_exec_release(struct task_struct *tsk); long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3); -#else +int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4); + +#ifdef CONFIG_FUTEX_PRIVATE_HASH +void futex_hash_free(struct mm_struct *mm); + +static inline void futex_mm_init(struct mm_struct *mm) +{ + mm->futex_phash = NULL; +} + +#else /* !CONFIG_FUTEX_PRIVATE_HASH */ +static inline void futex_hash_free(struct mm_struct *mm) { } +static inline void futex_mm_init(struct mm_struct *mm) { } +#endif /* CONFIG_FUTEX_PRIVATE_HASH */ + +#else /* !CONFIG_FUTEX */ static inline void futex_init_task(struct task_struct *tsk) { } static inline void futex_exit_recursive(struct task_struct *tsk) { } static inline void futex_exit_release(struct task_struct *tsk) { } @@ -88,6 +103,13 @@ static inline long do_futex(u32 __user *uaddr, int op, u32 val, { return -EINVAL; } +static inline int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4) +{ + return -EINVAL; +} +static inline void futex_hash_free(struct mm_struct *mm) { } +static inline void futex_mm_init(struct mm_struct *mm) { } + #endif #endif diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 56d07edd01f9..a4b5661e4177 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -31,6 +31,7 @@ #define INIT_PASID 0 struct address_space; +struct futex_private_hash; struct mem_cgroup; /* @@ -1031,7 +1032,9 @@ struct mm_struct { */ seqcount_t mm_lock_seq; #endif - +#ifdef CONFIG_FUTEX_PRIVATE_HASH + struct futex_private_hash *futex_phash; +#endif unsigned long hiwater_rss; /* High-watermark of RSS usage */ unsigned long hiwater_vm; /* High-water virtual memory usage */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 15c18ef4eb11..3b93fb906e3c 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -364,4 +364,9 @@ struct prctl_mm_map { # define PR_TIMER_CREATE_RESTORE_IDS_ON 1 # define PR_TIMER_CREATE_RESTORE_IDS_GET 2 +/* FUTEX hash management */ +#define PR_FUTEX_HASH 78 +# define PR_FUTEX_HASH_SET_SLOTS 1 +# define PR_FUTEX_HASH_GET_SLOTS 2 + #endif /* _LINUX_PRCTL_H */ diff --git a/init/Kconfig b/init/Kconfig index 63f5974b9fa6..4b84da2b2ec4 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1699,6 +1699,11 @@ config FUTEX_PI depends on FUTEX && RT_MUTEXES default y +config FUTEX_PRIVATE_HASH + bool + depends on FUTEX && !BASE_SMALL && MMU + default y + config EPOLL bool "Enable eventpoll support" if EXPERT default y diff --git a/kernel/fork.c b/kernel/fork.c index c4b26cd8998b..831dfec45054 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1305,6 +1305,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, RCU_INIT_POINTER(mm->exe_file, NULL); mmu_notifier_subscriptions_init(mm); init_tlb_flush_pending(mm); + futex_mm_init(mm); #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !defined(CONFIG_SPLIT_PMD_PTLOCKS) mm->pmd_huge_pte = NULL; #endif @@ -1387,6 +1388,7 @@ static inline void __mmput(struct mm_struct *mm) if (mm->binfmt) module_put(mm->binfmt->module); lru_gen_del_mm(mm); + futex_hash_free(mm); mmdrop(mm); } diff --git a/kernel/futex/core.c b/kernel/futex/core.c index afc66780f84f..818df7420a1a 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "futex.h" #include "../locking/rtmutex_common.h" @@ -55,6 +56,12 @@ static struct { #define futex_queues (__futex_data.queues) #define futex_hashmask (__futex_data.hashmask) +struct futex_private_hash { + unsigned int hash_mask; + void *mm; + bool custom; + struct futex_hash_bucket queues[]; +}; /* * Fault injections for futexes. @@ -107,9 +114,17 @@ late_initcall(fail_futex_debugfs); #endif /* CONFIG_FAIL_FUTEX */ -struct futex_private_hash *futex_private_hash(void) +static struct futex_hash_bucket * +__futex_hash(union futex_key *key, struct futex_private_hash *fph); + +#ifdef CONFIG_FUTEX_PRIVATE_HASH +static inline bool futex_key_is_private(union futex_key *key) { - return NULL; + /* + * Relies on get_futex_key() to set either bit for shared + * futexes -- see comment with union futex_key. + */ + return !(key->both.offset & (FUT_OFF_INODE | FUT_OFF_MMSHARED)); } bool futex_private_hash_get(struct futex_private_hash *fph) @@ -117,21 +132,8 @@ bool futex_private_hash_get(struct futex_private_hash *fph) return false; } -void futex_private_hash_put(struct futex_private_hash *fph) { } - -/** - * futex_hash - Return the hash bucket in the global hash - * @key: Pointer to the futex key for which the hash is calculated - * - * We hash on the keys returned from get_futex_key (see below) and return the - * corresponding hash bucket in the global hash. - */ -struct futex_hash_bucket *futex_hash(union futex_key *key) +void futex_private_hash_put(struct futex_private_hash *fph) { - u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4, - key->both.offset); - - return &futex_queues[hash & futex_hashmask]; } /** @@ -144,6 +146,84 @@ struct futex_hash_bucket *futex_hash(union futex_key *key) void futex_hash_get(struct futex_hash_bucket *hb) { } void futex_hash_put(struct futex_hash_bucket *hb) { } +static struct futex_hash_bucket * +__futex_hash_private(union futex_key *key, struct futex_private_hash *fph) +{ + u32 hash; + + if (!futex_key_is_private(key)) + return NULL; + + if (!fph) + fph = key->private.mm->futex_phash; + if (!fph || !fph->hash_mask) + return NULL; + + hash = jhash2((void *)&key->private.address, + sizeof(key->private.address) / 4, + key->both.offset); + return &fph->queues[hash & fph->hash_mask]; +} + +struct futex_private_hash *futex_private_hash(void) +{ + struct mm_struct *mm = current->mm; + struct futex_private_hash *fph; + + fph = mm->futex_phash; + return fph; +} + +struct futex_hash_bucket *futex_hash(union futex_key *key) +{ + struct futex_hash_bucket *hb; + + hb = __futex_hash(key, NULL); + return hb; +} + +#else /* !CONFIG_FUTEX_PRIVATE_HASH */ + +static struct futex_hash_bucket * +__futex_hash_private(union futex_key *key, struct futex_private_hash *fph) +{ + return NULL; +} + +struct futex_hash_bucket *futex_hash(union futex_key *key) +{ + return __futex_hash(key, NULL); +} + +#endif /* CONFIG_FUTEX_PRIVATE_HASH */ + +/** + * __futex_hash - Return the hash bucket + * @key: Pointer to the futex key for which the hash is calculated + * @fph: Pointer to private hash if known + * + * We hash on the keys returned from get_futex_key (see below) and return the + * corresponding hash bucket. + * If the FUTEX is PROCESS_PRIVATE then a per-process hash bucket (from the + * private hash) is returned if existing. Otherwise a hash bucket from the + * global hash is returned. + */ +static struct futex_hash_bucket * +__futex_hash(union futex_key *key, struct futex_private_hash *fph) +{ + struct futex_hash_bucket *hb; + u32 hash; + + hb = __futex_hash_private(key, fph); + if (hb) + return hb; + + hash = jhash2((u32 *)key, + offsetof(typeof(*key), both.offset) / 4, + key->both.offset); + return &futex_queues[hash & futex_hashmask]; +} + /** * futex_setup_timer - set up the sleeping hrtimer. * @time: ptr to the given timeout value @@ -985,6 +1065,13 @@ static void exit_pi_state_list(struct task_struct *curr) struct futex_pi_state *pi_state; union futex_key key = FUTEX_KEY_INIT; + /* + * Ensure the hash remains stable (no resize) during the while loop + * below. The hb pointer is acquired under the pi_lock so we can't block + * on the mutex. + */ + WARN_ON(curr != current); + guard(private_hash)(); /* * We are a ZOMBIE and nobody can enqueue itself on * pi_state_list anymore, but we have to be careful @@ -1160,13 +1247,98 @@ void futex_exit_release(struct task_struct *tsk) futex_cleanup_end(tsk, FUTEX_STATE_DEAD); } -static void futex_hash_bucket_init(struct futex_hash_bucket *fhb) +static void futex_hash_bucket_init(struct futex_hash_bucket *fhb, + struct futex_private_hash *fph) { +#ifdef CONFIG_FUTEX_PRIVATE_HASH + fhb->priv = fph; +#endif atomic_set(&fhb->waiters, 0); plist_head_init(&fhb->chain); spin_lock_init(&fhb->lock); } +#ifdef CONFIG_FUTEX_PRIVATE_HASH +void futex_hash_free(struct mm_struct *mm) +{ + kvfree(mm->futex_phash); +} + +static int futex_hash_allocate(unsigned int hash_slots, bool custom) +{ + struct mm_struct *mm = current->mm; + struct futex_private_hash *fph; + int i; + + if (hash_slots && (hash_slots == 1 || !is_power_of_2(hash_slots))) + return -EINVAL; + + if (mm->futex_phash) + return -EALREADY; + + if (!thread_group_empty(current)) + return -EINVAL; + + fph = kvzalloc(struct_size(fph, queues, hash_slots), GFP_KERNEL_ACCOUNT | __GFP_NOWARN); + if (!fph) + return -ENOMEM; + + fph->hash_mask = hash_slots ? hash_slots - 1 : 0; + fph->custom = custom; + fph->mm = mm; + + for (i = 0; i < hash_slots; i++) + futex_hash_bucket_init(&fph->queues[i], fph); + + mm->futex_phash = fph; + return 0; +} + +static int futex_hash_get_slots(void) +{ + struct futex_private_hash *fph; + + fph = current->mm->futex_phash; + if (fph && fph->hash_mask) + return fph->hash_mask + 1; + return 0; +} + +#else + +static int futex_hash_allocate(unsigned int hash_slots, bool custom) +{ + return -EINVAL; +} + +static int futex_hash_get_slots(void) +{ + return 0; +} +#endif + +int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4) +{ + int ret; + + switch (arg2) { + case PR_FUTEX_HASH_SET_SLOTS: + if (arg4 != 0) + return -EINVAL; + ret = futex_hash_allocate(arg3, true); + break; + + case PR_FUTEX_HASH_GET_SLOTS: + ret = futex_hash_get_slots(); + break; + + default: + ret = -EINVAL; + break; + } + return ret; +} + static int __init futex_init(void) { unsigned long hashsize, i; @@ -1185,7 +1357,7 @@ static int __init futex_init(void) hashsize = 1UL << futex_shift; for (i = 0; i < hashsize; i++) - futex_hash_bucket_init(&futex_queues[i]); + futex_hash_bucket_init(&futex_queues[i], NULL); futex_hashmask = hashsize - 1; return 0; diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index 26e69333cb74..899aed5acde1 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -118,6 +118,7 @@ struct futex_hash_bucket { atomic_t waiters; spinlock_t lock; struct plist_head chain; + struct futex_private_hash *priv; } ____cacheline_aligned_in_smp; /* @@ -204,6 +205,7 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout, int flags, u64 range_ns); extern struct futex_hash_bucket *futex_hash(union futex_key *key); +#ifdef CONFIG_FUTEX_PRIVATE_HASH extern void futex_hash_get(struct futex_hash_bucket *hb); extern void futex_hash_put(struct futex_hash_bucket *hb); @@ -211,6 +213,14 @@ extern struct futex_private_hash *futex_private_hash(void); extern bool futex_private_hash_get(struct futex_private_hash *fph); extern void futex_private_hash_put(struct futex_private_hash *fph); +#else /* !CONFIG_FUTEX_PRIVATE_HASH */ +static inline void futex_hash_get(struct futex_hash_bucket *hb) { } +static inline void futex_hash_put(struct futex_hash_bucket *hb) { } +static inline struct futex_private_hash *futex_private_hash(void) { return NULL; } +static inline bool futex_private_hash_get(void) { return false; } +static inline void futex_private_hash_put(struct futex_private_hash *fph) { } +#endif + DEFINE_CLASS(hb, struct futex_hash_bucket *, if (_T) futex_hash_put(_T), futex_hash(key), union futex_key *key); diff --git a/kernel/sys.c b/kernel/sys.c index c434968e9f5d..adc0de0aa364 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include @@ -2820,6 +2821,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, return -EINVAL; error = posixtimer_create_prctl(arg2); break; + case PR_FUTEX_HASH: + error = futex_hash_prctl(arg2, arg3, arg4); + break; default: trace_task_prctl_unknown(option, arg2, arg3, arg4, arg5); error = -EINVAL;