From 4b156248776f734d63fe37629d56c40234fda9c0 Mon Sep 17 00:00:00 2001 From: WangJian Date: Wed, 9 Feb 2022 10:42:33 +0800 Subject: [PATCH 01/10] nbd/server.c: fix invalid read after client was already free In the process of NBD equipment pressurization, executing QEMU NBD will lead to the failure of IO distribution and go to NBD_ Out process of trip(). If two or more IO go to the out process, client NBD will release in nbd_request_put(). The user after free problem that is read again in close(). Through the NBD_ Save the value of client > closing before the out process in trip to solve the use after free problem. Signed-off-by: wangjian161 --- nbd/server.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/nbd/server.c b/nbd/server.c index 4630dd73225..37515ed5209 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -2606,6 +2606,7 @@ static coroutine_fn void nbd_trip(void *opaque) NBDRequestData *req; NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */ int ret; + bool client_closing; Error *local_err = NULL; trace_nbd_trip(); @@ -2681,8 +2682,11 @@ disconnect: if (local_err) { error_reportf_err(local_err, "Disconnect client, due to: "); } + client_closing = client->closing; nbd_request_put(req); - client_close(client, true); + if (!client_closing) { + client_close(client, true); + } nbd_client_put(client); } -- Gitee From de6f3fb0cf92e04c0989a9065910158eecbe4304 Mon Sep 17 00:00:00 2001 From: WangJian Date: Wed, 9 Feb 2022 10:48:58 +0800 Subject: [PATCH 02/10] qemu-nbd: make native as the default aio mode When the file system is dealing with multithreading concurrent writing to a file, the performance will be degraded because of the lock. At present, the default AIO mode of QEMU NBD is threads. In the case of large blocks, because IO is divided into small pieces and multiple queues, it will become multithreading concurrent writing the same file. Due to the file system, the performance will be greatly reduced. If you change to native mode, this problem will not exist. Signed-off-by: wangjian161 --- qemu-nbd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/qemu-nbd.c b/qemu-nbd.c index c6c20df68a4..15a4bc40189 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -800,6 +800,10 @@ int main(int argc, char **argv) trace_init_file(); qemu_set_log(LOG_TRACE); + if (!seen_aio && (flags & BDRV_O_NOCACHE)) { + flags |= BDRV_O_NATIVE_AIO; + } + socket_activation = check_socket_activation(); if (socket_activation == 0) { setup_address_and_port(&bindto, &port); -- Gitee From f665f7836a019cc8bb8d46d076508afc761923f0 Mon Sep 17 00:00:00 2001 From: WangJian Date: Wed, 9 Feb 2022 10:55:08 +0800 Subject: [PATCH 03/10] qemu-nbd: set timeout to qemu-nbd socket In case of insufficient memory and kill-9, the NBD socket cannot be processed and stuck all the time. Signed-off-by: wangjian161 --- nbd/client.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/nbd/client.c b/nbd/client.c index 30d5383cb19..8ed50140f2d 100644 --- a/nbd/client.c +++ b/nbd/client.c @@ -24,6 +24,8 @@ #include "nbd-internal.h" #include "qemu/cutils.h" +#define NBD_TIMEOUT_SECONDS 30 + /* Definitions for opaque data types */ static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); @@ -1301,6 +1303,12 @@ int nbd_init(int fd, QIOChannelSocket *sioc, NBDExportInfo *info, } } + if (ioctl(fd, NBD_SET_TIMEOUT, NBD_TIMEOUT_SECONDS) < 0) { + int serrno = errno; + error_setg(errp, "Failed setting timeout"); + return -serrno; + } + trace_nbd_init_finish(); return 0; -- Gitee From 56f59125707c0222bbb5d7f820792aba17c3db08 Mon Sep 17 00:00:00 2001 From: WangJian Date: Wed, 9 Feb 2022 11:10:42 +0800 Subject: [PATCH 04/10] qemu-pr: fixed ioctl failed for multipath disk We use ioctl to detect multipath devices. However, we only set flags in struct dm_ioctl (the argument to ioctl) and left other fields in random, which may cause the failure of calling ioctl. Hence, we set other fields to 0 to avoid the failure. Signed-off-by: wangjian161 --- scsi/qemu-pr-helper.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c index f281daeced8..bbb9b577418 100644 --- a/scsi/qemu-pr-helper.c +++ b/scsi/qemu-pr-helper.c @@ -288,9 +288,12 @@ static void multipath_pr_init(void) static int is_mpath(int fd) { - struct dm_ioctl dm = { .flags = DM_NOFLUSH_FLAG }; + struct dm_ioctl dm; struct dm_target_spec *tgt; + memset(&dm, 0, sizeof(struct dm_ioctl)); + dm.flags = DM_NOFLUSH_FLAG; + tgt = dm_dev_ioctl(fd, DM_TABLE_STATUS, &dm); if (!tgt) { if (errno == ENXIO) { -- Gitee From 21b172a3ce13c3b499e4265628f7d7c7e1189749 Mon Sep 17 00:00:00 2001 From: WangJian Date: Wed, 9 Feb 2022 11:18:21 +0800 Subject: [PATCH 05/10] block: enable cache mode of empty cdrom enable cache mode even if cdrom is empty Signed-off-by: wangjian161 --- blockdev.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/blockdev.c b/blockdev.c index 10a73fa423d..37e3ee6f264 100644 --- a/blockdev.c +++ b/blockdev.c @@ -492,6 +492,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, QDict *interval_dict = NULL; QList *interval_list = NULL; const char *id; + const char *cache; BlockdevDetectZeroesOptions detect_zeroes = BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF; const char *throttling_group = NULL; @@ -583,6 +584,21 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, read_only = qemu_opt_get_bool(opts, BDRV_OPT_READ_ONLY, false); + if (!file || !*file) { + cache = qdict_get_try_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH); + if (cache && !strcmp(cache, "on")) { + bdrv_flags |= BDRV_O_NO_FLUSH; + } + + cache = qdict_get_try_str(bs_opts, BDRV_OPT_CACHE_DIRECT); + if (cache && !strcmp(cache, "on")) { + bdrv_flags |= BDRV_O_NOCACHE; + } + + qdict_del(bs_opts, BDRV_OPT_CACHE_NO_FLUSH); + qdict_del(bs_opts, BDRV_OPT_CACHE_DIRECT); + } + /* init */ if ((!file || !*file) && !qdict_size(bs_opts)) { BlockBackendRootState *blk_rs; -- Gitee From 0a2c96ee5a3463e82397afb9cb36f340a93264c2 Mon Sep 17 00:00:00 2001 From: WangJian Date: Wed, 9 Feb 2022 11:29:15 +0800 Subject: [PATCH 06/10] block: disallow block jobs when there is a BDRV_O_INACTIVE flag Currently, migration will put a BDRV_O_INACTIVE flag on bs's open_flags until another resume being called. In that case, any IO from vm or block jobs will cause a qemu crash with an assert 'assert(!(bs->open_flags & BDRV_O_INACTIVE))' failure in bdrv_co_pwritev function. we hereby disallow block jobs by faking a blocker. Signed-off-by: wangjian161 --- block.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/block.c b/block.c index 0ac5b163d2a..26c39825676 100644 --- a/block.c +++ b/block.c @@ -6692,6 +6692,22 @@ bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) bdrv_get_device_or_node_name(bs)); return true; } + + /* + * When migration puts a BDRV_O_INACTIVE flag on driver's open_flags, + * we fake a blocker that doesn't exist. From now on, block jobs + * will not be permitted. + */ + if ((op == BLOCK_OP_TYPE_RESIZE || op == BLOCK_OP_TYPE_COMMIT_SOURCE || + op == BLOCK_OP_TYPE_MIRROR_SOURCE || op == BLOCK_OP_TYPE_MIRROR_TARGET) && + (bs->open_flags & BDRV_O_INACTIVE)) { + if (errp) { + error_setg(errp, "block device is in use by migration with" + " a driver BDRV_O_INACTIVE flag setted"); + } + return true; + } + return false; } -- Gitee From 77496578b22e127eb50a5a8c463e92fb3245a7e0 Mon Sep 17 00:00:00 2001 From: WangJian Date: Wed, 9 Feb 2022 11:42:47 +0800 Subject: [PATCH 07/10] scsi: cdrom: Fix crash after remote cdrom detached There is a small window between the twice blk_is_available in scsi_disk_emulate_command which would cause crash due to the later assertion if the remote cdrom is detached in this window. So this patch replaces assertions with return to avoid qemu crash. Signed-off-by: wangjian161 --- hw/scsi/scsi-disk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index d4914178ea0..a1053f40363 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -1930,7 +1930,10 @@ static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf) memset(outbuf, 0, r->buflen); switch (req->cmd.buf[0]) { case TEST_UNIT_READY: - assert(blk_is_available(s->qdev.conf.blk)); + if (!blk_is_available(s->qdev.conf.blk)) { + scsi_check_condition(r, SENSE_CODE(NO_MEDIUM)); + return 0; + } break; case INQUIRY: buflen = scsi_disk_emulate_inquiry(req, outbuf); -- Gitee From 87d8b7dcd880e0cef0c043dfef5ae649652cfe21 Mon Sep 17 00:00:00 2001 From: WangJian Date: Wed, 9 Feb 2022 11:51:43 +0800 Subject: [PATCH 08/10] block: bugfix: disable process AIO when attach scsi disk When initializing the virtio-scsi disk, hd_geometry_guess() will be called to process AIO. At this time, the scsi disk has not been fully initialized, and some fields in struct SCSIDiskState, such as vendor and version, are NULL. If processing AIO at this time, qemu may crash down. Add aio_disable_external() before hd_geometry_guess() to disable processing AIO at that time. Signed-off-by: wangjian161 --- hw/block/block.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/hw/block/block.c b/hw/block/block.c index 26c0767552f..2cfc93a68e5 100644 --- a/hw/block/block.c +++ b/hw/block/block.c @@ -224,9 +224,16 @@ bool blkconf_geometry(BlockConf *conf, int *ptrans, Error **errp) { if (!conf->cyls && !conf->heads && !conf->secs) { + AioContext *ctx = blk_get_aio_context(conf->blk); + + /* Callers may not expect this function to dispatch aio handlers, so + * disable external aio such as guest device emulation. + */ + aio_disable_external(ctx); hd_geometry_guess(conf->blk, &conf->cyls, &conf->heads, &conf->secs, ptrans); + aio_enable_external(ctx); } else if (ptrans && *ptrans == BIOS_ATA_TRANSLATION_AUTO) { *ptrans = hd_bios_chs_auto_trans(conf->cyls, conf->heads, conf->secs); } -- Gitee From d0586db311e8b78732923ce46f149fdf8251a59c Mon Sep 17 00:00:00 2001 From: WangJian Date: Wed, 9 Feb 2022 16:10:22 +0800 Subject: [PATCH 09/10] block: bugfix: Don't pause vm when NOSPACE EIO happened When backend disk is FULL and disk IO type is 'dataplane', QEMU will pause the vm, and this may cause endless-loop in QEMU main thread if we do the snapshot merge now. When backend disk is FULL, only reporting an error rather than pausing the virtual machine. Signed-off-by: wangjian161 --- blockdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blockdev.c b/blockdev.c index 37e3ee6f264..3ce294ec4a7 100644 --- a/blockdev.c +++ b/blockdev.c @@ -556,7 +556,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, qdict_put_str(bs_opts, "driver", buf); } - on_write_error = BLOCKDEV_ON_ERROR_ENOSPC; + on_write_error = BLOCKDEV_ON_ERROR_REPORT; if ((buf = qemu_opt_get(opts, "werror")) != NULL) { on_write_error = parse_block_error_action(buf, 0, &error); if (error) { -- Gitee From ba8fd8a3d11655da0b51148e69c01b78794a3f69 Mon Sep 17 00:00:00 2001 From: WangJian Date: Wed, 9 Feb 2022 16:34:05 +0800 Subject: [PATCH 10/10] scsi: bugfix: fix division by zero Error of PRDM disk may cause divide by zero in scsi_read_complete(), so add LOG and assert(). Signed-off-by: wangjian161 --- hw/scsi/scsi-generic.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c index 0306ccc7b1e..1f515860480 100644 --- a/hw/scsi/scsi-generic.c +++ b/hw/scsi/scsi-generic.c @@ -179,6 +179,10 @@ static int scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s, int len) (r->req.cmd.buf[1] & 0x01)) { page = r->req.cmd.buf[2]; if (page == 0xb0) { + if (s->blocksize == 0) { + qemu_log("device blocksize is 0!\n"); + abort(); + } uint64_t max_transfer = blk_get_max_hw_transfer(s->conf.blk); uint32_t max_iov = blk_get_max_hw_iov(s->conf.blk); @@ -314,11 +318,23 @@ static void scsi_read_complete(void * opaque, int ret) /* Snoop READ CAPACITY output to set the blocksize. */ if (r->req.cmd.buf[0] == READ_CAPACITY_10 && (ldl_be_p(&r->buf[0]) != 0xffffffffU || s->max_lba == 0)) { - s->blocksize = ldl_be_p(&r->buf[4]); + int new_blocksize = ldl_be_p(&r->buf[4]); + if (s->blocksize != new_blocksize) { + qemu_log("device id=%s type=%d: blocksize %d change to %d\n", + s->qdev.id ? s->qdev.id : "null", s->type, + s->blocksize, new_blocksize); + } + s->blocksize = new_blocksize; s->max_lba = ldl_be_p(&r->buf[0]) & 0xffffffffULL; } else if (r->req.cmd.buf[0] == SERVICE_ACTION_IN_16 && (r->req.cmd.buf[1] & 31) == SAI_READ_CAPACITY_16) { - s->blocksize = ldl_be_p(&r->buf[8]); + int new_blocksize = ldl_be_p(&r->buf[8]); + if (s->blocksize != new_blocksize) { + qemu_log("device id=%s type=%d: blocksize %d change to %d\n", + s->qdev.id ? s->qdev.id : "null", s->type, + s->blocksize, new_blocksize); + } + s->blocksize = new_blocksize; s->max_lba = ldq_be_p(&r->buf[0]); } blk_set_guest_block_size(s->conf.blk, s->blocksize); -- Gitee