io_uring/io-wq: only create a new worker if it can make progress

Hashed work is serialized by io-wq, intended to be used for cases like
serializing buffered writes to a regular file, where the file system
will serialize the workers anyway with a mutex or similar. Since they
would be forcibly serialized and blocked, it's more efficient for io-wq
to handle these individually rather than issue them in parallel.

If a worker is currently handling a hashed work item and gets blocked,
don't create a new worker if the next work item is also hashed and
mapped to the same bucket. That new worker would not be able to make any
progress anyway.

Reported-by: Fengnan Chang <changfengnan@bytedance.com>
Reported-by: Diangang Li <lidiangang@bytedance.com>
Link: https://lore.kernel.org/io-uring/20250522090909.73212-1-changfengnan@bytedance.com/
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Jens Axboe 2025-05-23 06:08:49 -06:00
parent 8343cae362
commit 0b2b066f8a

View File

@ -419,6 +419,30 @@ static bool io_queue_worker_create(struct io_worker *worker,
return false;
}
/* Defer if current and next work are both hashed to the same chain */
static bool io_wq_hash_defer(struct io_wq_work *work, struct io_wq_acct *acct)
{
unsigned int hash, work_flags;
struct io_wq_work *next;
lockdep_assert_held(&acct->lock);
work_flags = atomic_read(&work->flags);
if (!__io_wq_is_hashed(work_flags))
return false;
/* should not happen, io_acct_run_queue() said we had work */
if (wq_list_empty(&acct->work_list))
return true;
hash = __io_get_work_hash(work_flags);
next = container_of(acct->work_list.first, struct io_wq_work, list);
work_flags = atomic_read(&next->flags);
if (!__io_wq_is_hashed(work_flags))
return false;
return hash == __io_get_work_hash(work_flags);
}
static void io_wq_dec_running(struct io_worker *worker)
{
struct io_wq_acct *acct = io_wq_get_acct(worker);
@ -433,6 +457,10 @@ static void io_wq_dec_running(struct io_worker *worker)
return;
if (!io_acct_run_queue(acct))
return;
if (io_wq_hash_defer(worker->cur_work, acct)) {
raw_spin_unlock(&acct->lock);
return;
}
raw_spin_unlock(&acct->lock);
atomic_inc(&acct->nr_running);