vdev_file: make FLUSH and TRIM asynchronous

zfs_file_fsync() and zfs_file_deallocate() are both blocking ops, so the
zio_taskq thread is active and blocked both while waiting for the IO
call and then while calling zio_execute() for the next stage. This is a
particular issue for FLUSH, as the z_flush_iss queue typically only has
one thread; multiple flushes arriving at once can cause long delays if
the underlying fsync() response is particularly slow.

To fix this, we dispatch both FLUSH and TRIM to the z_vdev_file taskq,
just as we do for reads and writes. Further, we return all results
through zio_interrupt(), so neither the issue nor the file taskqs are
blocked.

Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Closes #17064
This commit is contained in:
Rob Norris 2025-02-23 06:16:54 +11:00 committed by Ameer Hamza
parent e085d66f7a
commit 7ea899be04

View File

@ -248,11 +248,22 @@ vdev_file_io_fsync(void *arg)
zio_interrupt(zio);
}
static void
vdev_file_io_deallocate(void *arg)
{
zio_t *zio = (zio_t *)arg;
vdev_file_t *vf = zio->io_vd->vdev_tsd;
zio->io_error = zfs_file_deallocate(vf->vf_file,
zio->io_offset, zio->io_size);
zio_interrupt(zio);
}
static void
vdev_file_io_start(zio_t *zio)
{
vdev_t *vd = zio->io_vd;
vdev_file_t *vf = vd->vdev_tsd;
if (zio->io_type == ZIO_TYPE_FLUSH) {
/* XXPOLICY */
@ -263,33 +274,23 @@ vdev_file_io_start(zio_t *zio)
}
if (zfs_nocacheflush) {
zio_execute(zio);
zio_interrupt(zio);
return;
}
#ifdef __linux__
/*
* We cannot safely call vfs_fsync() when PF_FSTRANS
* is set in the current context. Filesystems like
* XFS include sanity checks to verify it is not
* already set, see xfs_vm_writepage(). Therefore
* the sync must be dispatched to a different context.
*/
if (__spl_pf_fstrans_check()) {
VERIFY3U(taskq_dispatch(vdev_file_taskq,
vdev_file_io_fsync, zio, TQ_SLEEP), !=,
TASKQID_INVALID);
return;
}
#endif
VERIFY3U(taskq_dispatch(vdev_file_taskq,
vdev_file_io_fsync, zio, TQ_SLEEP), !=, TASKQID_INVALID);
vdev_file_io_fsync(zio);
return;
} else if (zio->io_type == ZIO_TYPE_TRIM) {
}
if (zio->io_type == ZIO_TYPE_TRIM) {
ASSERT3U(zio->io_size, !=, 0);
zio->io_error = zfs_file_deallocate(vf->vf_file,
zio->io_offset, zio->io_size);
zio_execute(zio);
VERIFY3U(taskq_dispatch(vdev_file_taskq,
vdev_file_io_deallocate, zio, TQ_SLEEP), !=,
TASKQID_INVALID);
return;
}