mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson
synced 2025-09-01 15:14:52 +00:00

To enable SLAB_TYPESAFE_BY_RCU for vma cache we need to ensure that object reuse before RCU grace period is over will be detected by lock_vma_under_rcu(). Current checks are sufficient as long as vma is detached before it is freed. The only place this is not currently happening is in exit_mmap(). Add the missing vma_mark_detached() in exit_mmap(). Another issue which might trick lock_vma_under_rcu() during vma reuse is vm_area_dup(), which copies the entire content of the vma into a new one, overriding new vma's vm_refcnt and temporarily making it appear as attached. This might trick a racing lock_vma_under_rcu() to operate on a reused vma if it found the vma before it got reused. To prevent this situation, we should ensure that vm_refcnt stays at detached state (0) when it is copied and advances to attached state only after it is added into the vma tree. Introduce vm_area_init_from() which preserves new vma's vm_refcnt and use it in vm_area_dup(). Since all vmas are in detached state with no current readers when they are freed, lock_vma_under_rcu() will not be able to take vm_refcnt after vma got detached even if vma is reused. vma_mark_attached() in modified to include a release fence to ensure all stores to the vma happen before vm_refcnt gets initialized. Finally, make vm_area_cachep SLAB_TYPESAFE_BY_RCU. This will facilitate vm_area_struct reuse and will minimize the number of call_rcu() calls. [surenb@google.com: remove atomic_set_release() usage in tools/] Link: https://lkml.kernel.org/r/20250217054351.2973666-1-surenb@google.com Link: https://lkml.kernel.org/r/20250213224655.1680278-18-surenb@google.com Signed-off-by: Suren Baghdasaryan <surenb@google.com> Reviewed-by: Vlastimil Babka <vbabka@suse.cz> Tested-by: Shivank Garg <shivankg@amd.com> Link: https://lkml.kernel.org/r/5e19ec93-8307-47c2-bb13-3ddf7150624e@amd.com Cc: Christian Brauner <brauner@kernel.org> Cc: David Hildenbrand <david@redhat.com> Cc: David Howells <dhowells@redhat.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Hugh Dickins <hughd@google.com> Cc: Jann Horn <jannh@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Klara Modin <klarasmodin@gmail.com> Cc: Liam R. Howlett <Liam.Howlett@Oracle.com> Cc: Lokesh Gidra <lokeshgidra@google.com> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Cc: Mateusz Guzik <mjguzik@gmail.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Michal Hocko <mhocko@suse.com> Cc: Minchan Kim <minchan@google.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Pasha Tatashin <pasha.tatashin@soleen.com> Cc: "Paul E . McKenney" <paulmck@kernel.org> Cc: Peter Xu <peterx@redhat.com> Cc: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Shakeel Butt <shakeel.butt@linux.dev> Cc: Sourav Panda <souravpanda@google.com> Cc: Wei Yang <richard.weiyang@gmail.com> Cc: Will Deacon <will@kernel.org> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
158 lines
4.2 KiB
C
158 lines
4.2 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _TOOLS_LINUX_REFCOUNT_H
|
|
#define _TOOLS_LINUX_REFCOUNT_H
|
|
|
|
/*
|
|
* Variant of atomic_t specialized for reference counts.
|
|
*
|
|
* The interface matches the atomic_t interface (to aid in porting) but only
|
|
* provides the few functions one should use for reference counting.
|
|
*
|
|
* It differs in that the counter saturates at UINT_MAX and will not move once
|
|
* there. This avoids wrapping the counter and causing 'spurious'
|
|
* use-after-free issues.
|
|
*
|
|
* Memory ordering rules are slightly relaxed wrt regular atomic_t functions
|
|
* and provide only what is strictly required for refcounts.
|
|
*
|
|
* The increments are fully relaxed; these will not provide ordering. The
|
|
* rationale is that whatever is used to obtain the object we're increasing the
|
|
* reference count on will provide the ordering. For locked data structures,
|
|
* its the lock acquire, for RCU/lockless data structures its the dependent
|
|
* load.
|
|
*
|
|
* Do note that inc_not_zero() provides a control dependency which will order
|
|
* future stores against the inc, this ensures we'll never modify the object
|
|
* if we did not in fact acquire a reference.
|
|
*
|
|
* The decrements will provide release order, such that all the prior loads and
|
|
* stores will be issued before, it also provides a control dependency, which
|
|
* will order us against the subsequent free().
|
|
*
|
|
* The control dependency is against the load of the cmpxchg (ll/sc) that
|
|
* succeeded. This means the stores aren't fully ordered, but this is fine
|
|
* because the 1->0 transition indicates no concurrency.
|
|
*
|
|
* Note that the allocator is responsible for ordering things between free()
|
|
* and alloc().
|
|
*
|
|
*/
|
|
|
|
#include <linux/atomic.h>
|
|
#include <linux/kernel.h>
|
|
|
|
#ifdef NDEBUG
|
|
#define REFCOUNT_WARN(cond, str) (void)(cond)
|
|
#define __refcount_check
|
|
#else
|
|
#define REFCOUNT_WARN(cond, str) BUG_ON(cond)
|
|
#define __refcount_check __must_check
|
|
#endif
|
|
|
|
typedef struct refcount_struct {
|
|
atomic_t refs;
|
|
} refcount_t;
|
|
|
|
#define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), }
|
|
|
|
static inline void refcount_set(refcount_t *r, unsigned int n)
|
|
{
|
|
atomic_set(&r->refs, n);
|
|
}
|
|
|
|
static inline void refcount_set_release(refcount_t *r, unsigned int n)
|
|
{
|
|
atomic_set(&r->refs, n);
|
|
}
|
|
|
|
static inline unsigned int refcount_read(const refcount_t *r)
|
|
{
|
|
return atomic_read(&r->refs);
|
|
}
|
|
|
|
/*
|
|
* Similar to atomic_inc_not_zero(), will saturate at UINT_MAX and WARN.
|
|
*
|
|
* Provides no memory ordering, it is assumed the caller has guaranteed the
|
|
* object memory to be stable (RCU, etc.). It does provide a control dependency
|
|
* and thereby orders future stores. See the comment on top.
|
|
*/
|
|
static inline __refcount_check
|
|
bool refcount_inc_not_zero(refcount_t *r)
|
|
{
|
|
unsigned int old, new, val = atomic_read(&r->refs);
|
|
|
|
for (;;) {
|
|
new = val + 1;
|
|
|
|
if (!val)
|
|
return false;
|
|
|
|
if (unlikely(!new))
|
|
return true;
|
|
|
|
old = atomic_cmpxchg_relaxed(&r->refs, val, new);
|
|
if (old == val)
|
|
break;
|
|
|
|
val = old;
|
|
}
|
|
|
|
REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Similar to atomic_inc(), will saturate at UINT_MAX and WARN.
|
|
*
|
|
* Provides no memory ordering, it is assumed the caller already has a
|
|
* reference on the object, will WARN when this is not so.
|
|
*/
|
|
static inline void refcount_inc(refcount_t *r)
|
|
{
|
|
REFCOUNT_WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n");
|
|
}
|
|
|
|
/*
|
|
* Similar to atomic_dec_and_test(), it will WARN on underflow and fail to
|
|
* decrement when saturated at UINT_MAX.
|
|
*
|
|
* Provides release memory ordering, such that prior loads and stores are done
|
|
* before, and provides a control dependency such that free() must come after.
|
|
* See the comment on top.
|
|
*/
|
|
static inline __refcount_check
|
|
bool refcount_sub_and_test(unsigned int i, refcount_t *r)
|
|
{
|
|
unsigned int old, new, val = atomic_read(&r->refs);
|
|
|
|
for (;;) {
|
|
if (unlikely(val == UINT_MAX))
|
|
return false;
|
|
|
|
new = val - i;
|
|
if (new > val) {
|
|
REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n");
|
|
return false;
|
|
}
|
|
|
|
old = atomic_cmpxchg_release(&r->refs, val, new);
|
|
if (old == val)
|
|
break;
|
|
|
|
val = old;
|
|
}
|
|
|
|
return !new;
|
|
}
|
|
|
|
static inline __refcount_check
|
|
bool refcount_dec_and_test(refcount_t *r)
|
|
{
|
|
return refcount_sub_and_test(1, r);
|
|
}
|
|
|
|
|
|
#endif /* _ATOMIC_LINUX_REFCOUNT_H */
|