diff --git a/block/file-posix.c b/block/file-posix.c index 44e16dda87..56d1972d15 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -194,6 +194,7 @@ static int fd_open(BlockDriverState *bs) } static int64_t raw_getlength(BlockDriverState *bs); +static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs); typedef struct RawPosixAIOData { BlockDriverState *bs; @@ -804,6 +805,13 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, #endif s->needs_alignment = raw_needs_alignment(bs); + bs->supported_write_flags = BDRV_REQ_FUA; + if (s->use_linux_aio && !laio_has_fua()) { + bs->supported_write_flags &= ~BDRV_REQ_FUA; + } else if (s->use_linux_io_uring && !luring_has_fua()) { + bs->supported_write_flags &= ~BDRV_REQ_FUA; + } + bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK; if (S_ISREG(st.st_mode)) { /* When extending regular files, we get zeros from the OS */ @@ -2477,7 +2485,8 @@ static inline bool raw_check_linux_aio(BDRVRawState *s) #endif static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr, - uint64_t bytes, QEMUIOVector *qiov, int type) + uint64_t bytes, QEMUIOVector *qiov, int type, + int flags) { BDRVRawState *s = bs->opaque; RawPosixAIOData acb; @@ -2508,13 +2517,13 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr, #ifdef CONFIG_LINUX_IO_URING } else if (raw_check_linux_io_uring(s)) { assert(qiov->size == bytes); - ret = luring_co_submit(bs, s->fd, offset, qiov, type); + ret = luring_co_submit(bs, s->fd, offset, qiov, type, flags); goto out; #endif #ifdef CONFIG_LINUX_AIO } else if (raw_check_linux_aio(s)) { assert(qiov->size == bytes); - ret = laio_co_submit(s->fd, offset, qiov, type, + ret = laio_co_submit(s->fd, offset, qiov, type, flags, s->aio_max_batch); goto out; #endif @@ -2534,6 +2543,10 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr, assert(qiov->size == bytes); ret = raw_thread_pool_submit(handle_aiocb_rw, &acb); + if (ret == 0 && (flags & BDRV_REQ_FUA)) { + /* TODO Use pwritev2() instead if it's available */ + ret = raw_co_flush_to_disk(bs); + } goto out; /* Avoid the compiler err of unused label */ out: @@ -2571,14 +2584,14 @@ static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) { - return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_READ); + return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_READ, flags); } static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) { - return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_WRITE); + return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_WRITE, flags); } static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs) @@ -2600,12 +2613,12 @@ static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs) #ifdef CONFIG_LINUX_IO_URING if (raw_check_linux_io_uring(s)) { - return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH); + return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH, 0); } #endif #ifdef CONFIG_LINUX_AIO if (s->has_laio_fdsync && raw_check_linux_aio(s)) { - return laio_co_submit(s->fd, 0, NULL, QEMU_AIO_FLUSH, 0); + return laio_co_submit(s->fd, 0, NULL, QEMU_AIO_FLUSH, 0, 0); } #endif return raw_thread_pool_submit(handle_aiocb_flush, &acb); @@ -3540,7 +3553,7 @@ static int coroutine_fn raw_co_zone_append(BlockDriverState *bs, } trace_zbd_zone_append(bs, *offset >> BDRV_SECTOR_BITS); - return raw_co_prw(bs, offset, len, qiov, QEMU_AIO_ZONE_APPEND); + return raw_co_prw(bs, offset, len, qiov, QEMU_AIO_ZONE_APPEND, 0); } #endif diff --git a/block/io_uring.c b/block/io_uring.c index f52b66b340..dd4f304910 100644 --- a/block/io_uring.c +++ b/block/io_uring.c @@ -335,15 +335,24 @@ static void luring_deferred_fn(void *opaque) * */ static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s, - uint64_t offset, int type) + uint64_t offset, int type, BdrvRequestFlags flags) { int ret; struct io_uring_sqe *sqes = &luringcb->sqeq; switch (type) { case QEMU_AIO_WRITE: +#ifdef HAVE_IO_URING_PREP_WRITEV2 + { + int luring_flags = (flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0; + io_uring_prep_writev2(sqes, fd, luringcb->qiov->iov, + luringcb->qiov->niov, offset, luring_flags); + } +#else + assert(flags == 0); io_uring_prep_writev(sqes, fd, luringcb->qiov->iov, luringcb->qiov->niov, offset); +#endif break; case QEMU_AIO_ZONE_APPEND: io_uring_prep_writev(sqes, fd, luringcb->qiov->iov, @@ -380,7 +389,8 @@ static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s, } int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset, - QEMUIOVector *qiov, int type) + QEMUIOVector *qiov, int type, + BdrvRequestFlags flags) { int ret; AioContext *ctx = qemu_get_current_aio_context(); @@ -393,7 +403,7 @@ int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset, }; trace_luring_co_submit(bs, s, &luringcb, fd, offset, qiov ? qiov->size : 0, type); - ret = luring_do_submit(fd, &luringcb, s, offset, type); + ret = luring_do_submit(fd, &luringcb, s, offset, type, flags); if (ret < 0) { return ret; @@ -448,3 +458,12 @@ void luring_cleanup(LuringState *s) trace_luring_cleanup_state(s); g_free(s); } + +bool luring_has_fua(void) +{ +#ifdef HAVE_IO_URING_PREP_WRITEV2 + return true; +#else + return false; +#endif +} diff --git a/block/linux-aio.c b/block/linux-aio.c index 194c8f434f..407369f5c9 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c @@ -368,7 +368,8 @@ static void laio_deferred_fn(void *opaque) } static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset, - int type, uint64_t dev_max_batch) + int type, BdrvRequestFlags flags, + uint64_t dev_max_batch) { LinuxAioState *s = laiocb->ctx; struct iocb *iocbs = &laiocb->iocb; @@ -376,7 +377,15 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset, switch (type) { case QEMU_AIO_WRITE: +#ifdef HAVE_IO_PREP_PWRITEV2 + { + int laio_flags = (flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0; + io_prep_pwritev2(iocbs, fd, qiov->iov, qiov->niov, offset, laio_flags); + } +#else + assert(flags == 0); io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset); +#endif break; case QEMU_AIO_ZONE_APPEND: io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset); @@ -409,7 +418,8 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset, } int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov, - int type, uint64_t dev_max_batch) + int type, BdrvRequestFlags flags, + uint64_t dev_max_batch) { int ret; AioContext *ctx = qemu_get_current_aio_context(); @@ -422,7 +432,7 @@ int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov, .qiov = qiov, }; - ret = laio_do_submit(fd, &laiocb, offset, type, dev_max_batch); + ret = laio_do_submit(fd, &laiocb, offset, type, flags, dev_max_batch); if (ret < 0) { return ret; } @@ -505,3 +515,12 @@ bool laio_has_fdsync(int fd) io_destroy(ctx); return (ret == -EINVAL) ? false : true; } + +bool laio_has_fua(void) +{ +#ifdef HAVE_IO_PREP_PWRITEV2 + return true; +#else + return false; +#endif +} diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h index 626706827f..6570244496 100644 --- a/include/block/raw-aio.h +++ b/include/block/raw-aio.h @@ -17,6 +17,7 @@ #define QEMU_RAW_AIO_H #include "block/aio.h" +#include "block/block-common.h" #include "qemu/iov.h" /* AIO request types */ @@ -58,11 +59,18 @@ void laio_cleanup(LinuxAioState *s); /* laio_co_submit: submit I/O requests in the thread's current AioContext. */ int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov, - int type, uint64_t dev_max_batch); + int type, BdrvRequestFlags flags, + uint64_t dev_max_batch); bool laio_has_fdsync(int); +bool laio_has_fua(void); void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context); void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context); +#else +static inline bool laio_has_fua(void) +{ + return false; +} #endif /* io_uring.c - Linux io_uring implementation */ #ifdef CONFIG_LINUX_IO_URING @@ -71,9 +79,16 @@ void luring_cleanup(LuringState *s); /* luring_co_submit: submit I/O requests in the thread's current AioContext. */ int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset, - QEMUIOVector *qiov, int type); + QEMUIOVector *qiov, int type, + BdrvRequestFlags flags); void luring_detach_aio_context(LuringState *s, AioContext *old_context); void luring_attach_aio_context(LuringState *s, AioContext *new_context); +bool luring_has_fua(void); +#else +static inline bool luring_has_fua(void) +{ + return false; +} #endif #ifdef _WIN32 diff --git a/meson.build b/meson.build index 9d9c11731f..2f43fd81bf 100644 --- a/meson.build +++ b/meson.build @@ -2727,6 +2727,14 @@ config_host_data.set('HAVE_OPTRESET', cc.has_header_symbol('getopt.h', 'optreset')) config_host_data.set('HAVE_IPPROTO_MPTCP', cc.has_header_symbol('netinet/in.h', 'IPPROTO_MPTCP')) +if libaio.found() + config_host_data.set('HAVE_IO_PREP_PWRITEV2', + cc.has_header_symbol('libaio.h', 'io_prep_pwritev2')) +endif +if linux_io_uring.found() + config_host_data.set('HAVE_IO_URING_PREP_WRITEV2', + cc.has_header_symbol('liburing.h', 'io_uring_prep_writev2')) +endif # has_member config_host_data.set('HAVE_SIGEV_NOTIFY_THREAD_ID',