From 4afeffc8572f40d8844b946a30c00b10da4442b1 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Fri, 17 Nov 2017 22:26:16 -0500 Subject: [PATCH 1/4] blockjob: do not allow coroutine double entry or entry-after-completion When block_job_sleep_ns() is called, the co-routine is scheduled for future execution. If we allow the job to be re-entered prior to the scheduled time, we present a race condition in which a coroutine can be entered recursively, or even entered after the coroutine is deleted. The job->busy flag is used by blockjobs when a coroutine is busy executing. The function 'block_job_enter()' obeys the busy flag, and will not enter a coroutine if set. If we sleep a job, we need to leave the busy flag set, so that subsequent calls to block_job_enter() are prevented. This changes the prior behavior of block_job_cancel() being able to immediately wake up and cancel a job; in practice, this should not be an issue, as the coroutine sleep times are generally very small, and the cancel will occur the next time the coroutine wakes up. This fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1508708 Signed-off-by: Jeff Cody Reviewed-by: Stefan Hajnoczi --- blockjob.c | 7 +++++-- include/block/blockjob_int.h | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/blockjob.c b/blockjob.c index 3a0c49137e..ff9a614531 100644 --- a/blockjob.c +++ b/blockjob.c @@ -797,11 +797,14 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns) return; } - job->busy = false; + /* We need to leave job->busy set here, because when we have + * put a coroutine to 'sleep', we have scheduled it to run in + * the future. We cannot enter that same coroutine again before + * it wakes and runs, otherwise we risk double-entry or entry after + * completion. */ if (!block_job_should_pause(job)) { co_aio_sleep_ns(blk_get_aio_context(job->blk), type, ns); } - job->busy = true; block_job_pause_point(job); } diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h index f13ad05c0d..43f3be2965 100644 --- a/include/block/blockjob_int.h +++ b/include/block/blockjob_int.h @@ -143,7 +143,8 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, * @ns: How many nanoseconds to stop for. * * Put the job to sleep (assuming that it wasn't canceled) for @ns - * nanoseconds. Canceling the job will interrupt the wait immediately. + * nanoseconds. Canceling the job will not interrupt the wait, so the + * cancel will not process until the coroutine wakes up. */ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns); From 6133b39f3c36623425a6ede9e89d93175fde15cd Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Fri, 17 Nov 2017 22:27:09 -0500 Subject: [PATCH 2/4] coroutine: abort if we try to schedule or enter a pending coroutine The previous patch fixed a race condition, in which there were coroutines being executing doubly, or after coroutine deletion. We can detect common scenarios when this happens, and print an error message and abort before we corrupt memory / data, or segfault. This patch will abort if an attempt to enter a coroutine is made while it is currently pending execution, either in a specific AioContext bh, or pending execution via a timer. It will also abort if a coroutine is scheduled, before a prior scheduled run has occurred. We cannot rely on the existing co->caller check for recursive re-entry to catch this, as the coroutine may run and exit with COROUTINE_TERMINATE before the scheduled coroutine executes. (This is the scenario that was occurring and fixed in the previous patch). This patch also re-orders the Coroutine struct elements in an attempt to optimize caching. Signed-off-by: Jeff Cody Reviewed-by: Stefan Hajnoczi --- include/qemu/coroutine_int.h | 13 ++++++++++--- util/async.c | 13 +++++++++++++ util/qemu-coroutine-sleep.c | 12 ++++++++++++ util/qemu-coroutine.c | 14 ++++++++++++++ 4 files changed, 49 insertions(+), 3 deletions(-) diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h index cb98892bba..59e8406398 100644 --- a/include/qemu/coroutine_int.h +++ b/include/qemu/coroutine_int.h @@ -46,14 +46,21 @@ struct Coroutine { size_t locks_held; + /* Only used when the coroutine has yielded. */ + AioContext *ctx; + + /* Used to catch and abort on illegal co-routine entry. + * Will contain the name of the function that had first + * scheduled the coroutine. */ + const char *scheduled; + + QSIMPLEQ_ENTRY(Coroutine) co_queue_next; + /* Coroutines that should be woken up when we yield or terminate. * Only used when the coroutine is running. */ QSIMPLEQ_HEAD(, Coroutine) co_queue_wakeup; - /* Only used when the coroutine has yielded. */ - AioContext *ctx; - QSIMPLEQ_ENTRY(Coroutine) co_queue_next; QSLIST_ENTRY(Coroutine) co_scheduled_next; }; diff --git a/util/async.c b/util/async.c index 0e1bd8780a..4dd9d95a9e 100644 --- a/util/async.c +++ b/util/async.c @@ -388,6 +388,9 @@ static void co_schedule_bh_cb(void *opaque) QSLIST_REMOVE_HEAD(&straight, co_scheduled_next); trace_aio_co_schedule_bh_cb(ctx, co); aio_context_acquire(ctx); + + /* Protected by write barrier in qemu_aio_coroutine_enter */ + atomic_set(&co->scheduled, NULL); qemu_coroutine_enter(co); aio_context_release(ctx); } @@ -438,6 +441,16 @@ fail: void aio_co_schedule(AioContext *ctx, Coroutine *co) { trace_aio_co_schedule(ctx, co); + const char *scheduled = atomic_cmpxchg(&co->scheduled, NULL, + __func__); + + if (scheduled) { + fprintf(stderr, + "%s: Co-routine was already scheduled in '%s'\n", + __func__, scheduled); + abort(); + } + QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines, co, co_scheduled_next); qemu_bh_schedule(ctx->co_schedule_bh); diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c index 9c5655041b..254349cdbb 100644 --- a/util/qemu-coroutine-sleep.c +++ b/util/qemu-coroutine-sleep.c @@ -13,6 +13,7 @@ #include "qemu/osdep.h" #include "qemu/coroutine.h" +#include "qemu/coroutine_int.h" #include "qemu/timer.h" #include "block/aio.h" @@ -25,6 +26,8 @@ static void co_sleep_cb(void *opaque) { CoSleepCB *sleep_cb = opaque; + /* Write of schedule protected by barrier write in aio_co_schedule */ + atomic_set(&sleep_cb->co->scheduled, NULL); aio_co_wake(sleep_cb->co); } @@ -34,6 +37,15 @@ void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type, CoSleepCB sleep_cb = { .co = qemu_coroutine_self(), }; + + const char *scheduled = atomic_cmpxchg(&sleep_cb.co->scheduled, NULL, + __func__); + if (scheduled) { + fprintf(stderr, + "%s: Co-routine was already scheduled in '%s'\n", + __func__, scheduled); + abort(); + } sleep_cb.ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, &sleep_cb); timer_mod(sleep_cb.ts, qemu_clock_get_ns(type) + ns); qemu_coroutine_yield(); diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c index d6095c1d5a..9eff7fd450 100644 --- a/util/qemu-coroutine.c +++ b/util/qemu-coroutine.c @@ -107,8 +107,22 @@ void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co) Coroutine *self = qemu_coroutine_self(); CoroutineAction ret; + /* Cannot rely on the read barrier for co in aio_co_wake(), as there are + * callers outside of aio_co_wake() */ + const char *scheduled = atomic_mb_read(&co->scheduled); + trace_qemu_aio_coroutine_enter(ctx, self, co, co->entry_arg); + /* if the Coroutine has already been scheduled, entering it again will + * cause us to enter it twice, potentially even after the coroutine has + * been deleted */ + if (scheduled) { + fprintf(stderr, + "%s: Co-routine was already scheduled in '%s'\n", + __func__, scheduled); + abort(); + } + if (co->caller) { fprintf(stderr, "Co-routine re-entered recursively\n"); abort(); From a2339699c3d35f19253b3b9b51f8a9b8e24f90eb Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Sun, 19 Nov 2017 18:25:50 -0500 Subject: [PATCH 3/4] qemu-iotests: add option in common.qemu for mismatch only Add option to echo response to QMP / HMP command only on mismatch. Useful for ignore all normal responses, but catching things like segfaults. Signed-off-by: Jeff Cody Reviewed-by: Stefan Hajnoczi --- tests/qemu-iotests/common.qemu | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/qemu-iotests/common.qemu b/tests/qemu-iotests/common.qemu index 7b3052dc79..85f66b852c 100644 --- a/tests/qemu-iotests/common.qemu +++ b/tests/qemu-iotests/common.qemu @@ -50,6 +50,8 @@ _in_fd=4 # # If $silent is set to anything but an empty string, then # response is not echoed out. +# If $mismatch_only is set, only non-matching responses will +# be echoed. function _timed_wait_for() { local h=${1} @@ -58,14 +60,18 @@ function _timed_wait_for() QEMU_STATUS[$h]=0 while IFS= read -t ${QEMU_COMM_TIMEOUT} resp <&${QEMU_OUT[$h]} do - if [ -z "${silent}" ]; then + if [ -z "${silent}" ] && [ -z "${mismatch_only}" ]; then echo "${resp}" | _filter_testdir | _filter_qemu \ | _filter_qemu_io | _filter_qmp | _filter_hmp fi grep -q "${*}" < <(echo "${resp}") if [ $? -eq 0 ]; then return + elif [ -z "${silent}" ] && [ -n "${mismatch_only}" ]; then + echo "${resp}" | _filter_testdir | _filter_qemu \ + | _filter_qemu_io | _filter_qmp | _filter_hmp fi + done QEMU_STATUS[$h]=-1 if [ -z "${qemu_error_no_exit}" ]; then From d975301dc8ae56fb3154348878e47a6211843c0b Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Sat, 18 Nov 2017 02:00:18 -0500 Subject: [PATCH 4/4] qemu-iotest: add test for blockjob coroutine race condition Signed-off-by: Jeff Cody Reviewed-by: Stefan Hajnoczi --- tests/qemu-iotests/200 | 99 ++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/200.out | 14 ++++++ tests/qemu-iotests/group | 1 + 3 files changed, 114 insertions(+) create mode 100755 tests/qemu-iotests/200 create mode 100644 tests/qemu-iotests/200.out diff --git a/tests/qemu-iotests/200 b/tests/qemu-iotests/200 new file mode 100755 index 0000000000..d8787ddb46 --- /dev/null +++ b/tests/qemu-iotests/200 @@ -0,0 +1,99 @@ +#!/bin/bash +# +# Block job co-routine race condition test. +# +# See: https://bugzilla.redhat.com/show_bug.cgi?id=1508708 +# +# Copyright (C) 2017 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=jcody@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_qemu + rm -f "${TEST_IMG}" "${BACKING_IMG}" +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter +. ./common.qemu + +_supported_fmt qcow2 qed +_supported_proto file +_supported_os Linux + +BACKING_IMG="${TEST_DIR}/backing.img" +TEST_IMG="${TEST_DIR}/test.img" + +${QEMU_IMG} create -f $IMGFMT "${BACKING_IMG}" 512M | _filter_img_create +${QEMU_IMG} create -f $IMGFMT -F $IMGFMT "${TEST_IMG}" -b "${BACKING_IMG}" 512M | _filter_img_create + +${QEMU_IO} -c "write -P 0xa5 512 300M" "${BACKING_IMG}" | _filter_qemu_io + +echo +echo === Starting QEMU VM === +echo +qemu_comm_method="qmp" +_launch_qemu -device pci-bridge,id=bridge1,chassis_nr=1,bus=pci.0 \ + -object iothread,id=iothread0 \ + -device virtio-scsi-pci,bus=bridge1,addr=0x1f,id=scsi0,iothread=iothread0 \ + -drive file="${TEST_IMG}",media=disk,if=none,cache=none,id=drive_sysdisk,aio=native,format=$IMGFMT \ + -device scsi-hd,drive=drive_sysdisk,bus=scsi0.0,id=sysdisk,bootindex=0 +h1=$QEMU_HANDLE + +_send_qemu_cmd $h1 "{ 'execute': 'qmp_capabilities' }" 'return' + +echo +echo === Sending stream/cancel, checking for SIGSEGV only === +echo +for (( i=1;i<500;i++ )) +do + mismatch_only='y' qemu_error_no_exit='n' _send_qemu_cmd $h1 \ + "{ + 'execute': 'block-stream', + 'arguments': { + 'device': 'drive_sysdisk', + 'speed': 10000000, + 'on-error': 'report', + 'job-id': 'job-$i' + } + } + { + 'execute': 'block-job-cancel', + 'arguments': { + 'device': 'job-$i' + } + }" \ + "{.*{.*}.*}" # should match all well-formed QMP responses +done + +silent='y' _send_qemu_cmd $h1 "{ 'execute': 'quit' }" 'return' + +echo "$i iterations performed" + +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/200.out b/tests/qemu-iotests/200.out new file mode 100644 index 0000000000..af6a809e30 --- /dev/null +++ b/tests/qemu-iotests/200.out @@ -0,0 +1,14 @@ +QA output created by 200 +Formatting 'TEST_DIR/backing.img', fmt=IMGFMT size=536870912 +Formatting 'TEST_DIR/test.img', fmt=IMGFMT size=536870912 backing_file=TEST_DIR/backing.img backing_fmt=IMGFMT +wrote 314572800/314572800 bytes at offset 512 +300 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +=== Starting QEMU VM === + +{"return": {}} + +=== Sending stream/cancel, checking for SIGSEGV only === + +500 iterations performed +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 1fad602152..3e688678dd 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -196,3 +196,4 @@ 196 rw auto quick 197 rw auto quick 198 rw auto +200 rw auto