linux-loongson/fs/erofs/decompressor_crypto.c
Bo Liu b4a29efc51 erofs: support DEFLATE decompression by using Intel QAT
This patch introduces the use of the Intel QAT to offload EROFS data
decompression, aiming to improve the decompression performance.

A 285MiB dataset is used with the following command to create EROFS
images with different cluster sizes:
     $ mkfs.erofs -zdeflate,level=9 -C{4096,16384,65536,131072,262144}

Fio is used to test the following read patterns:
     $ fio -filename=testfile -bs=4k -rw=read -name=job1
     $ fio -filename=testfile -bs=4k -rw=randread -name=job1
     $ fio -filename=testfile -bs=4k -rw=randread --io_size=14m -name=job1

Here are some performance numbers for reference:

Processors: Intel(R) Xeon(R) 6766E (144 cores)
Memory:     512 GiB

|-----------------------------------------------------------------------------|
|           | Cluster size | sequential read | randread  | small randread(5%) |
|-----------|--------------|-----------------|-----------|--------------------|
| Intel QAT |    4096      |    538  MiB/s   | 112 MiB/s |     20.76 MiB/s    |
| Intel QAT |    16384     |    699  MiB/s   | 158 MiB/s |     21.02 MiB/s    |
| Intel QAT |    65536     |    917  MiB/s   | 278 MiB/s |     20.90 MiB/s    |
| Intel QAT |    131072    |    1056 MiB/s   | 351 MiB/s |     23.36 MiB/s    |
| Intel QAT |    262144    |    1145 MiB/s   | 431 MiB/s |     26.66 MiB/s    |
| deflate   |    4096      |    499  MiB/s   | 108 MiB/s |     21.50 MiB/s    |
| deflate   |    16384     |    422  MiB/s   | 125 MiB/s |     18.94 MiB/s    |
| deflate   |    65536     |    452  MiB/s   | 159 MiB/s |     13.02 MiB/s    |
| deflate   |    131072    |    452  MiB/s   | 177 MiB/s |     11.44 MiB/s    |
| deflate   |    262144    |    466  MiB/s   | 194 MiB/s |     10.60 MiB/s    |

Signed-off-by: Bo Liu <liubo03@inspur.com>
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20250522094931.28956-1-liubo03@inspur.com
[ Gao Xiang: refine the commit message. ]
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2025-05-25 15:27:40 +08:00

182 lines
4.2 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
#include <linux/scatterlist.h>
#include <crypto/acompress.h>
#include "compress.h"
static int __z_erofs_crypto_decompress(struct z_erofs_decompress_req *rq,
struct crypto_acomp *tfm)
{
struct sg_table st_src, st_dst;
struct acomp_req *req;
struct crypto_wait wait;
u8 *headpage;
int ret;
headpage = kmap_local_page(*rq->in);
ret = z_erofs_fixup_insize(rq, headpage + rq->pageofs_in,
min_t(unsigned int, rq->inputsize,
rq->sb->s_blocksize - rq->pageofs_in));
kunmap_local(headpage);
if (ret)
return ret;
req = acomp_request_alloc(tfm);
if (!req)
return -ENOMEM;
ret = sg_alloc_table_from_pages_segment(&st_src, rq->in, rq->inpages,
rq->pageofs_in, rq->inputsize, UINT_MAX, GFP_KERNEL);
if (ret < 0)
goto failed_src_alloc;
ret = sg_alloc_table_from_pages_segment(&st_dst, rq->out, rq->outpages,
rq->pageofs_out, rq->outputsize, UINT_MAX, GFP_KERNEL);
if (ret < 0)
goto failed_dst_alloc;
acomp_request_set_params(req, st_src.sgl,
st_dst.sgl, rq->inputsize, rq->outputsize);
crypto_init_wait(&wait);
acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
crypto_req_done, &wait);
ret = crypto_wait_req(crypto_acomp_decompress(req), &wait);
if (ret) {
erofs_err(rq->sb, "failed to decompress %d in[%u, %u] out[%u]",
ret, rq->inputsize, rq->pageofs_in, rq->outputsize);
ret = -EIO;
}
sg_free_table(&st_dst);
failed_dst_alloc:
sg_free_table(&st_src);
failed_src_alloc:
acomp_request_free(req);
return ret;
}
struct z_erofs_crypto_engine {
char *crypto_name;
struct crypto_acomp *tfm;
};
struct z_erofs_crypto_engine *z_erofs_crypto[Z_EROFS_COMPRESSION_MAX] = {
[Z_EROFS_COMPRESSION_LZ4] = (struct z_erofs_crypto_engine[]) {
{},
},
[Z_EROFS_COMPRESSION_LZMA] = (struct z_erofs_crypto_engine[]) {
{},
},
[Z_EROFS_COMPRESSION_DEFLATE] = (struct z_erofs_crypto_engine[]) {
{ .crypto_name = "qat_deflate", },
{},
},
[Z_EROFS_COMPRESSION_ZSTD] = (struct z_erofs_crypto_engine[]) {
{},
},
};
static DECLARE_RWSEM(z_erofs_crypto_rwsem);
static struct crypto_acomp *z_erofs_crypto_get_engine(int alg)
{
struct z_erofs_crypto_engine *e;
for (e = z_erofs_crypto[alg]; e->crypto_name; ++e)
if (e->tfm)
return e->tfm;
return NULL;
}
int z_erofs_crypto_decompress(struct z_erofs_decompress_req *rq,
struct page **pgpl)
{
struct crypto_acomp *tfm;
int i, err;
down_read(&z_erofs_crypto_rwsem);
tfm = z_erofs_crypto_get_engine(rq->alg);
if (!tfm) {
err = -EOPNOTSUPP;
goto out;
}
for (i = 0; i < rq->outpages; i++) {
struct page *const page = rq->out[i];
struct page *victim;
if (!page) {
victim = __erofs_allocpage(pgpl, rq->gfp, true);
if (!victim) {
err = -ENOMEM;
goto out;
}
set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE);
rq->out[i] = victim;
}
}
err = __z_erofs_crypto_decompress(rq, tfm);
out:
up_read(&z_erofs_crypto_rwsem);
return err;
}
int z_erofs_crypto_enable_engine(const char *name, int len)
{
struct z_erofs_crypto_engine *e;
struct crypto_acomp *tfm;
int alg;
down_write(&z_erofs_crypto_rwsem);
for (alg = 0; alg < Z_EROFS_COMPRESSION_MAX; ++alg) {
for (e = z_erofs_crypto[alg]; e->crypto_name; ++e) {
if (!strncmp(name, e->crypto_name, len)) {
if (e->tfm)
break;
tfm = crypto_alloc_acomp(e->crypto_name, 0, 0);
if (IS_ERR(tfm)) {
up_write(&z_erofs_crypto_rwsem);
return -EOPNOTSUPP;
}
e->tfm = tfm;
break;
}
}
}
up_write(&z_erofs_crypto_rwsem);
return 0;
}
void z_erofs_crypto_disable_all_engines(void)
{
struct z_erofs_crypto_engine *e;
int alg;
down_write(&z_erofs_crypto_rwsem);
for (alg = 0; alg < Z_EROFS_COMPRESSION_MAX; ++alg) {
for (e = z_erofs_crypto[alg]; e->crypto_name; ++e) {
if (!e->tfm)
continue;
crypto_free_acomp(e->tfm);
e->tfm = NULL;
}
}
up_write(&z_erofs_crypto_rwsem);
}
int z_erofs_crypto_show_engines(char *buf, int size, char sep)
{
struct z_erofs_crypto_engine *e;
int alg, len = 0;
for (alg = 0; alg < Z_EROFS_COMPRESSION_MAX; ++alg) {
for (e = z_erofs_crypto[alg]; e->crypto_name; ++e) {
if (!e->tfm)
continue;
len += scnprintf(buf + len, size - len, "%s%c",
e->crypto_name, sep);
}
}
return len;
}