mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson
synced 2025-08-28 09:22:08 +00:00

This patch introduces the use of the Intel QAT to offload EROFS data decompression, aiming to improve the decompression performance. A 285MiB dataset is used with the following command to create EROFS images with different cluster sizes: $ mkfs.erofs -zdeflate,level=9 -C{4096,16384,65536,131072,262144} Fio is used to test the following read patterns: $ fio -filename=testfile -bs=4k -rw=read -name=job1 $ fio -filename=testfile -bs=4k -rw=randread -name=job1 $ fio -filename=testfile -bs=4k -rw=randread --io_size=14m -name=job1 Here are some performance numbers for reference: Processors: Intel(R) Xeon(R) 6766E (144 cores) Memory: 512 GiB |-----------------------------------------------------------------------------| | | Cluster size | sequential read | randread | small randread(5%) | |-----------|--------------|-----------------|-----------|--------------------| | Intel QAT | 4096 | 538 MiB/s | 112 MiB/s | 20.76 MiB/s | | Intel QAT | 16384 | 699 MiB/s | 158 MiB/s | 21.02 MiB/s | | Intel QAT | 65536 | 917 MiB/s | 278 MiB/s | 20.90 MiB/s | | Intel QAT | 131072 | 1056 MiB/s | 351 MiB/s | 23.36 MiB/s | | Intel QAT | 262144 | 1145 MiB/s | 431 MiB/s | 26.66 MiB/s | | deflate | 4096 | 499 MiB/s | 108 MiB/s | 21.50 MiB/s | | deflate | 16384 | 422 MiB/s | 125 MiB/s | 18.94 MiB/s | | deflate | 65536 | 452 MiB/s | 159 MiB/s | 13.02 MiB/s | | deflate | 131072 | 452 MiB/s | 177 MiB/s | 11.44 MiB/s | | deflate | 262144 | 466 MiB/s | 194 MiB/s | 10.60 MiB/s | Signed-off-by: Bo Liu <liubo03@inspur.com> Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com> Link: https://lore.kernel.org/r/20250522094931.28956-1-liubo03@inspur.com [ Gao Xiang: refine the commit message. ] Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
90 lines
2.9 KiB
C
90 lines
2.9 KiB
C
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* Copyright (C) 2019 HUAWEI, Inc.
|
|
* https://www.huawei.com/
|
|
*/
|
|
#ifndef __EROFS_FS_COMPRESS_H
|
|
#define __EROFS_FS_COMPRESS_H
|
|
|
|
#include "internal.h"
|
|
|
|
struct z_erofs_decompress_req {
|
|
struct super_block *sb;
|
|
struct page **in, **out;
|
|
unsigned int inpages, outpages;
|
|
unsigned short pageofs_in, pageofs_out;
|
|
unsigned int inputsize, outputsize;
|
|
|
|
unsigned int alg; /* the algorithm for decompression */
|
|
bool inplace_io, partial_decoding, fillgaps;
|
|
gfp_t gfp; /* allocation flags for extra temporary buffers */
|
|
};
|
|
|
|
struct z_erofs_decompressor {
|
|
int (*config)(struct super_block *sb, struct erofs_super_block *dsb,
|
|
void *data, int size);
|
|
int (*decompress)(struct z_erofs_decompress_req *rq,
|
|
struct page **pagepool);
|
|
int (*init)(void);
|
|
void (*exit)(void);
|
|
char *name;
|
|
};
|
|
|
|
#define Z_EROFS_SHORTLIVED_PAGE (-1UL << 2)
|
|
#define Z_EROFS_PREALLOCATED_FOLIO ((void *)(-2UL << 2))
|
|
|
|
/*
|
|
* Currently, short-lived pages are pages directly from buddy system
|
|
* with specific page->private (Z_EROFS_SHORTLIVED_PAGE).
|
|
* In the future world of Memdescs, it should be type 0 (Misc) memory
|
|
* which type can be checked with a new helper.
|
|
*/
|
|
static inline bool z_erofs_is_shortlived_page(struct page *page)
|
|
{
|
|
return page->private == Z_EROFS_SHORTLIVED_PAGE;
|
|
}
|
|
|
|
static inline bool z_erofs_put_shortlivedpage(struct page **pagepool,
|
|
struct page *page)
|
|
{
|
|
if (!z_erofs_is_shortlived_page(page))
|
|
return false;
|
|
erofs_pagepool_add(pagepool, page);
|
|
return true;
|
|
}
|
|
|
|
extern const struct z_erofs_decompressor z_erofs_lzma_decomp;
|
|
extern const struct z_erofs_decompressor z_erofs_deflate_decomp;
|
|
extern const struct z_erofs_decompressor z_erofs_zstd_decomp;
|
|
extern const struct z_erofs_decompressor *z_erofs_decomp[];
|
|
|
|
struct z_erofs_stream_dctx {
|
|
struct z_erofs_decompress_req *rq;
|
|
int no, ni; /* the current {en,de}coded page # */
|
|
|
|
unsigned int avail_out; /* remaining bytes in the decoded buffer */
|
|
unsigned int inbuf_pos, inbuf_sz;
|
|
/* current status of the encoded buffer */
|
|
u8 *kin, *kout; /* buffer mapped pointers */
|
|
void *bounce; /* bounce buffer for inplace I/Os */
|
|
bool bounced; /* is the bounce buffer used now? */
|
|
};
|
|
|
|
int z_erofs_stream_switch_bufs(struct z_erofs_stream_dctx *dctx, void **dst,
|
|
void **src, struct page **pgpl);
|
|
int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
|
|
unsigned int padbufsize);
|
|
int __init z_erofs_init_decompressor(void);
|
|
void z_erofs_exit_decompressor(void);
|
|
int z_erofs_crypto_decompress(struct z_erofs_decompress_req *rq,
|
|
struct page **pgpl);
|
|
int z_erofs_crypto_enable_engine(const char *name, int len);
|
|
#ifdef CONFIG_EROFS_FS_ZIP_ACCEL
|
|
void z_erofs_crypto_disable_all_engines(void);
|
|
int z_erofs_crypto_show_engines(char *buf, int size, char sep);
|
|
#else
|
|
static inline void z_erofs_crypto_disable_all_engines(void) {}
|
|
static inline int z_erofs_crypto_show_engines(char *buf, int size, char sep) { return 0; }
|
|
#endif
|
|
#endif
|