mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson
synced 2025-08-27 06:50:37 +00:00

Currently, we crash the kernel when a decompression failure occurs in zswap (either because of memory corruption, or a bug in the compression algorithm). This is overkill. We should only SIGBUS the unfortunate process asking for the zswap entry on zswap load, and skip the corrupted entry in zswap writeback. See [1] for a recent upstream discussion about this. The zswap writeback case is relatively straightforward to fix. For the zswap_load() case, we change the return behavior: * Return 0 on success. * Return -ENOENT (with the folio locked) if zswap does not own the swapped out content. * Return -EIO if zswap owns the swapped out content, but encounters a decompression failure for some reasons. The folio will be unlocked, but not be marked up-to-date, which will eventually cause the process requesting the page to SIGBUS (see the handling of not-up-to-date folio in do_swap_page() in mm/memory.c), without crashing the kernel. * Return -EINVAL if we encounter a large folio, as large folio should not be swapped in while zswap is being used. Similar to the -EIO case, we also unlock the folio but do not mark it as up-to-date to SIGBUS the faulting process. As a side effect, we require one extra zswap tree traversal in the load and writeback paths. Quick benchmarking on a kernel build test shows no performance difference: With the new scheme: real: mean: 125.1s, stdev: 0.12s user: mean: 3265.23s, stdev: 9.62s sys: mean: 2156.41s, stdev: 13.98s The old scheme: real: mean: 125.78s, stdev: 0.45s user: mean: 3287.18s, stdev: 5.95s sys: mean: 2177.08s, stdev: 26.52s [nphamcs@gmail.com: fix documentation of zswap_load()] Link: https://lkml.kernel.org/r/20250306222453.1269456-1-nphamcs@gmail.com Link: https://lore.kernel.org/all/ZsiLElTykamcYZ6J@casper.infradead.org/ [1] Link: https://lkml.kernel.org/r/20250306205011.784787-1-nphamcs@gmail.com Signed-off-by: Nhat Pham <nphamcs@gmail.com> Suggested-by: Matthew Wilcox <willy@infradead.org> Suggested-by: Yosry Ahmed <yosry.ahmed@linux.dev> Suggested-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Chengming Zhou <chengming.zhou@linux.dev> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
75 lines
1.8 KiB
C
75 lines
1.8 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _LINUX_ZSWAP_H
|
|
#define _LINUX_ZSWAP_H
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/mm_types.h>
|
|
|
|
struct lruvec;
|
|
|
|
extern atomic_long_t zswap_stored_pages;
|
|
|
|
#ifdef CONFIG_ZSWAP
|
|
|
|
struct zswap_lruvec_state {
|
|
/*
|
|
* Number of swapped in pages from disk, i.e not found in the zswap pool.
|
|
*
|
|
* This is consumed and subtracted from the lru size in
|
|
* zswap_shrinker_count() to penalize past overshrinking that led to disk
|
|
* swapins. The idea is that had we considered this many more pages in the
|
|
* LRU active/protected and not written them back, we would not have had to
|
|
* swapped them in.
|
|
*/
|
|
atomic_long_t nr_disk_swapins;
|
|
};
|
|
|
|
unsigned long zswap_total_pages(void);
|
|
bool zswap_store(struct folio *folio);
|
|
int zswap_load(struct folio *folio);
|
|
void zswap_invalidate(swp_entry_t swp);
|
|
int zswap_swapon(int type, unsigned long nr_pages);
|
|
void zswap_swapoff(int type);
|
|
void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg);
|
|
void zswap_lruvec_state_init(struct lruvec *lruvec);
|
|
void zswap_folio_swapin(struct folio *folio);
|
|
bool zswap_is_enabled(void);
|
|
bool zswap_never_enabled(void);
|
|
#else
|
|
|
|
struct zswap_lruvec_state {};
|
|
|
|
static inline bool zswap_store(struct folio *folio)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline int zswap_load(struct folio *folio)
|
|
{
|
|
return -ENOENT;
|
|
}
|
|
|
|
static inline void zswap_invalidate(swp_entry_t swp) {}
|
|
static inline int zswap_swapon(int type, unsigned long nr_pages)
|
|
{
|
|
return 0;
|
|
}
|
|
static inline void zswap_swapoff(int type) {}
|
|
static inline void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg) {}
|
|
static inline void zswap_lruvec_state_init(struct lruvec *lruvec) {}
|
|
static inline void zswap_folio_swapin(struct folio *folio) {}
|
|
|
|
static inline bool zswap_is_enabled(void)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline bool zswap_never_enabled(void)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif /* _LINUX_ZSWAP_H */
|