From 98cbb6d13484e79b6f9da064a40a281f2983be1d Mon Sep 17 00:00:00 2001 From: Jinhua Cao Date: Wed, 9 Feb 2022 19:58:21 +0800 Subject: [PATCH 01/21] virtio-scsi: bugfix: fix qemu crash for hotplug scsi disk with dataplane The vm will trigger a disk sweep operation after plugging a controller who's io type is iothread. If attach a scsi disk immediately, the sg_inqury request in vm will trigger the assert in virtio_scsi_ctx_check(), which is called by virtio_scsi_handle_cmd_req_prepare(). Add judgment in virtio_scsi_handle_cmd_req_prepare() and return IO Error directly if the device has not been initialized. Signed-off-by: Jinhua Cao --- hw/scsi/virtio-scsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 51fd09522a..781a37fe89 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -638,7 +638,7 @@ static int virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq *req) req->req.cmd.tag, req->req.cmd.cdb[0]); d = virtio_scsi_device_get(s, req->req.cmd.lun); - if (!d) { + if (!d || !d->qdev.realized) { req->resp.cmd.response = VIRTIO_SCSI_S_BAD_TARGET; virtio_scsi_complete_cmd_req(req); return -ENOENT; -- Gitee From cee545754b44b6283408ec6a43eb0e317c98ebb1 Mon Sep 17 00:00:00 2001 From: Jinhua Cao Date: Wed, 9 Feb 2022 20:27:41 +0800 Subject: [PATCH 02/21] virtio: net-tap: bugfix: del net client if net_init_tap_one failed In net_init_tap_one(), if the net-tap initializes successful but other actions failed during vhost-net hot-plugging, the net-tap will remain in the net clients.causing next hot-plug fails again. Signed-off-by: Jinhua Cao --- net/tap.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/net/tap.c b/net/tap.c index c5cbeaa7a2..3f79cd06c2 100644 --- a/net/tap.c +++ b/net/tap.c @@ -684,7 +684,7 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, tap_set_sndbuf(s->fd, tap, &err); if (err) { error_propagate(errp, err); - return; + goto fail; } if (tap->has_fd || tap->has_fds) { @@ -726,13 +726,13 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, } else { warn_report_err(err); } - return; + goto fail; } ret = qemu_try_set_nonblock(vhostfd); if (ret < 0) { error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", name, fd); - return; + goto fail; } } else { vhostfd = open("/dev/vhost-net", O_RDWR); @@ -744,7 +744,7 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, warn_report("tap: open vhost char device failed: %s", strerror(errno)); } - return; + goto fail; } qemu_set_nonblock(vhostfd); } @@ -758,11 +758,17 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, } else { warn_report(VHOST_NET_INIT_FAILED); } - return; + goto fail; } } else if (vhostfdname) { error_setg(errp, "vhostfd(s)= is not valid without vhost"); + goto fail; } + + return; + +fail: + qemu_del_net_client(&s->nc); } static int get_fds(char *str, char *fds[], int max) -- Gitee From 95d334a905e8ddaac4a8cec908dcdb03b2e5993f Mon Sep 17 00:00:00 2001 From: Jinhua Cao Date: Thu, 10 Feb 2022 10:17:20 +0800 Subject: [PATCH 03/21] virtio: bugfix: clean up callback when del virtqueue We will access NULL pointer as follow: 1. Start a vm with multiqueue vhost-net 2. then we write VIRTIO_PCI_GUEST_FEATURES in PCI configuration to trigger multiqueue disable in this vm which will delete the virtqueue. In this step, the tx_bh is deleted but the callback virtio_net_handle_tx_bh still exist. 3. Finally, we write VIRTIO_PCI_QUEUE_NOTIFY in PCI configuration to notify the deleted virtqueue. In this way, virtio_net_handle_tx_bh will be called and qemu will be crashed. Signed-off-by: Jinhua Cao --- hw/net/virtio-net.c | 5 ++++- hw/virtio/virtio.c | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index f2014d5ea0..b3a5d0b19e 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -2644,7 +2644,10 @@ static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) return; } virtio_queue_set_notification(vq, 0); - qemu_bh_schedule(q->tx_bh); + + if (q->tx_bh) { + qemu_bh_schedule(q->tx_bh); + } } static void virtio_net_tx_timer(void *opaque) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 9b4ac58a16..ec3e96af3b 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2417,6 +2417,7 @@ void virtio_delete_queue(VirtQueue *vq) { vq->vring.num = 0; vq->vring.num_default = 0; + vq->vring.align = 0; vq->handle_output = NULL; vq->handle_aio_output = NULL; g_free(vq->used_elems); -- Gitee From 532566ba64b60f2dd2f8ff41d670712ccafe1e98 Mon Sep 17 00:00:00 2001 From: Jinhua Cao Date: Thu, 10 Feb 2022 10:31:38 +0800 Subject: [PATCH 04/21] virtio-net: bugfix: do not delete netdev before virtio net For the vhost-user net-card, it is allow to delete its network backend while the virtio-net device still exists. However, when the status of the device changes in guest, QEMU will check whether the network backend exists, otherwise it will crash. So do not allowed to delete the network backend directly without delete virtio-net device. Signed-off-by: Jinhua Cao --- net/net.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/net.c b/net/net.c index f0d14dbfc1..ed4b1c1740 100644 --- a/net/net.c +++ b/net/net.c @@ -1202,6 +1202,12 @@ void qmp_netdev_del(const char *id, Error **errp) return; } + if (nc->info->type == NET_CLIENT_DRIVER_VHOST_USER && nc->peer) { + error_setg(errp, "Device '%s' is a netdev for vhostuser," + "please delete the peer front-end device (virtio-net) first.", id); + return; + } + qemu_del_net_client(nc); /* -- Gitee From 318f0eda68554af0c779e5374f16bf8cdb895fe7 Mon Sep 17 00:00:00 2001 From: Jinhua Cao Date: Thu, 10 Feb 2022 10:48:27 +0800 Subject: [PATCH 05/21] virtio-net: fix max vring buf size when set ring num Signed-off-by: Jinhua Cao --- hw/virtio/virtio.c | 9 +++++++-- include/hw/virtio/virtio.h | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index ec3e96af3b..03afa36e99 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2241,12 +2241,17 @@ void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc, void virtio_queue_set_num(VirtIODevice *vdev, int n, int num) { + int vq_max_size = VIRTQUEUE_MAX_SIZE; + + if (!strcmp(vdev->name, "virtio-net")) { + vq_max_size = VIRTIO_NET_VQ_MAX_SIZE; + } + /* Don't allow guest to flip queue between existent and * nonexistent states, or to set it to an invalid size. */ if (!!num != !!vdev->vq[n].vring.num || - num > VIRTQUEUE_MAX_SIZE || - num < 0) { + num > vq_max_size || num < 0) { return; } vdev->vq[n].vring.num = num; diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 8bab9cfb75..b3749ce34b 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -49,6 +49,7 @@ size_t virtio_feature_get_config_size(const VirtIOFeature *features, typedef struct VirtQueue VirtQueue; #define VIRTQUEUE_MAX_SIZE 1024 +#define VIRTIO_NET_VQ_MAX_SIZE (4096) typedef struct VirtQueueElement { -- Gitee From 9e04e1c6a7a12e3e1d0a8a7cf07f441597a1dbb7 Mon Sep 17 00:00:00 2001 From: Jinhua Cao Date: Thu, 10 Feb 2022 11:09:36 +0800 Subject: [PATCH 06/21] virtio: check descriptor numbers Check if the vring num is normal in virtio_save(), and add LOG the vm push the wrong viring num down through writing IO Port. Signed-off-by: Jinhua Cao --- hw/virtio/virtio.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 03afa36e99..007f4c9e26 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2860,6 +2860,22 @@ static const VMStateDescription vmstate_virtio = { } }; +static void check_vring_avail_num(VirtIODevice *vdev, int index) +{ + uint16_t nheads; + + /* Check it isn't doing strange things with descriptor numbers. */ + nheads = vring_avail_idx(&vdev->vq[index]) - vdev->vq[index].last_avail_idx; + if (nheads > vdev->vq[index].vring.num) { + qemu_log("VQ %d size 0x%x Guest index 0x%x " + "inconsistent with Host index 0x%x: " + "delta 0x%x\n", + index, vdev->vq[index].vring.num, + vring_avail_idx(&vdev->vq[index]), + vdev->vq[index].last_avail_idx, nheads); + } +} + int virtio_save(VirtIODevice *vdev, QEMUFile *f) { BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); @@ -2890,6 +2906,8 @@ int virtio_save(VirtIODevice *vdev, QEMUFile *f) if (vdev->vq[i].vring.num == 0) break; + check_vring_avail_num(vdev, i); + qemu_put_be32(f, vdev->vq[i].vring.num); if (k->has_variable_vring_alignment) { qemu_put_be32(f, vdev->vq[i].vring.align); -- Gitee From 41aa66e37d04246d48b5417c57967425ecc466a0 Mon Sep 17 00:00:00 2001 From: Jinhua Cao Date: Thu, 10 Feb 2022 11:16:26 +0800 Subject: [PATCH 07/21] virtio: bugfix: add rcu_read_lock when vring_avail_idx is called viring_avail_idx should be called within rcu_read_lock(), or may get NULL caches in vring_get_region_caches() and trigger assert(). Signed-off-by: Jinhua Cao --- hw/virtio/virtio.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 007f4c9e26..0af9684881 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2864,6 +2864,7 @@ static void check_vring_avail_num(VirtIODevice *vdev, int index) { uint16_t nheads; + rcu_read_lock(); /* Check it isn't doing strange things with descriptor numbers. */ nheads = vring_avail_idx(&vdev->vq[index]) - vdev->vq[index].last_avail_idx; if (nheads > vdev->vq[index].vring.num) { @@ -2874,6 +2875,7 @@ static void check_vring_avail_num(VirtIODevice *vdev, int index) vring_avail_idx(&vdev->vq[index]), vdev->vq[index].last_avail_idx, nheads); } + rcu_read_unlock(); } int virtio_save(VirtIODevice *vdev, QEMUFile *f) -- Gitee From 8a08b3b41400e152cc1786ae5a8a53507f8e925c Mon Sep 17 00:00:00 2001 From: Jinhua Cao Date: Thu, 10 Feb 2022 14:16:17 +0800 Subject: [PATCH 08/21] virtio: print the guest virtio_net features that host does not support Signed-off-by: Jinhua Cao --- hw/net/virtio-net.c | 41 ++++++++++++++++++++++++++++++++++++++ hw/virtio/virtio.c | 7 +++++++ include/hw/virtio/virtio.h | 1 + 3 files changed, 49 insertions(+) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index b3a5d0b19e..6874c88bc0 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -3692,6 +3692,46 @@ static Property virtio_net_properties[] = { DEFINE_PROP_END_OF_LIST(), }; +static void virtio_net_print_features(uint64_t features) +{ + Property *props = virtio_net_properties; + int feature_cnt = 0; + + if (!features) { + return; + } + printf("virtio_net_feature: "); + + for (; features && props->name; props++) { + /* The bitnr of property may be default(0) besides 'csum' property. */ + if (props->bitnr == 0 && strcmp(props->name, "csum")) { + continue; + } + + /* Features only support 64bit. */ + if (props->bitnr > 63) { + continue; + } + + if (virtio_has_feature(features, props->bitnr)) { + virtio_clear_feature(&features, props->bitnr); + if (feature_cnt != 0) { + printf(", "); + } + printf("%s", props->name); + feature_cnt++; + } + } + + if (features) { + if (feature_cnt != 0) { + printf(", "); + } + printf("unkown bits 0x%." PRIx64, features); + } + printf("\n"); +} + static void virtio_net_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -3706,6 +3746,7 @@ static void virtio_net_class_init(ObjectClass *klass, void *data) vdc->set_config = virtio_net_set_config; vdc->get_features = virtio_net_get_features; vdc->set_features = virtio_net_set_features; + vdc->print_features = virtio_net_print_features; vdc->bad_features = virtio_net_bad_features; vdc->reset = virtio_net_reset; vdc->set_status = virtio_net_set_status; diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 0af9684881..9a2a83d507 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2967,6 +2967,13 @@ static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val) { VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); bool bad = (val & ~(vdev->host_features)) != 0; + uint64_t feat = val & ~(vdev->host_features); + + if (bad && k->print_features) { + qemu_log("error: Please check host config, "\ + "because host does not support required feature bits 0x%" PRIx64 "\n", feat); + k->print_features(feat); + } val &= vdev->host_features; if (k->set_features) { diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index b3749ce34b..7472145821 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -127,6 +127,7 @@ struct VirtioDeviceClass { int (*validate_features)(VirtIODevice *vdev); void (*get_config)(VirtIODevice *vdev, uint8_t *config); void (*set_config)(VirtIODevice *vdev, const uint8_t *config); + void (*print_features)(uint64_t features); void (*reset)(VirtIODevice *vdev); void (*set_status)(VirtIODevice *vdev, uint8_t val); /* For transitional devices, this is a bitmap of features -- Gitee From 74ab61b4317f12b231fb2cbcd54a333a07efd678 Mon Sep 17 00:00:00 2001 From: Jinhua Cao Date: Thu, 10 Feb 2022 14:37:52 +0800 Subject: [PATCH 09/21] virtio: bugfix: check the value of caches before accessing it Vring caches may be NULL in check_vring_avail_num() if virtio_reset() is called at the same time, such as when the virtual machine starts. So check it before accessing it in vring_avail_idx(). Signed-off-by: Jinhua Cao --- hw/virtio/virtio.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 9a2a83d507..b08fff9419 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2863,8 +2863,19 @@ static const VMStateDescription vmstate_virtio = { static void check_vring_avail_num(VirtIODevice *vdev, int index) { uint16_t nheads; + VRingMemoryRegionCaches *caches; rcu_read_lock(); + caches = qatomic_rcu_read(&vdev->vq[index].vring.caches); + if (caches == NULL) { + /* + * caches may be NULL if virtio_reset is called at the same time, + * such as when the virtual machine starts. + */ + rcu_read_unlock(); + return; + } + /* Check it isn't doing strange things with descriptor numbers. */ nheads = vring_avail_idx(&vdev->vq[index]) - vdev->vq[index].last_avail_idx; if (nheads > vdev->vq[index].vring.num) { -- Gitee From 7beaecc21a8a573d43c7ad7604ac77cdf5bbf405 Mon Sep 17 00:00:00 2001 From: Jinhua Cao Date: Sat, 12 Feb 2022 17:22:38 +0800 Subject: [PATCH 10/21] virtio-net: set the max of queue size to 4096 Signed-off-by: Jinhua Cao --- hw/net/virtio-net.c | 10 +++++----- hw/virtio/virtio.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 6874c88bc0..009dc9f3d1 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -637,7 +637,7 @@ static int virtio_net_max_tx_queue_size(VirtIONet *n) return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; } - return VIRTQUEUE_MAX_SIZE; + return VIRTIO_NET_VQ_MAX_SIZE; } static int peer_attach(VirtIONet *n, int index) @@ -3394,23 +3394,23 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) * help from us (using virtio 1 and up). */ if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || - n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || + n->net_conf.rx_queue_size > VIRTIO_NET_VQ_MAX_SIZE || !is_power_of_2(n->net_conf.rx_queue_size)) { error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " "must be a power of 2 between %d and %d.", n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, - VIRTQUEUE_MAX_SIZE); + VIRTIO_NET_VQ_MAX_SIZE); virtio_cleanup(vdev); return; } if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || - n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE || + n->net_conf.tx_queue_size > VIRTIO_NET_VQ_MAX_SIZE || !is_power_of_2(n->net_conf.tx_queue_size)) { error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " "must be a power of 2 between %d and %d", n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, - VIRTQUEUE_MAX_SIZE); + VIRTIO_NET_VQ_MAX_SIZE); virtio_cleanup(vdev); return; } diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index b08fff9419..120672672e 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2401,7 +2401,7 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, break; } - if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE) { + if (i == VIRTIO_QUEUE_MAX) { qemu_log("unacceptable queue_size (%d) or num (%d)\n", queue_size, i); abort(); -- Gitee From 88dfb4236c735c608f8ca91cfbfb5ac424d654aa Mon Sep 17 00:00:00 2001 From: Jinhua Cao Date: Thu, 10 Feb 2022 17:28:49 +0800 Subject: [PATCH 11/21] virtio-net: update the default and max of rx/tx_queue_size Set the max of tx_queue_size to 4096 even if the backends are not vhost-user. Set the default of rx/tx_queue_size to 2048 if the backends are vhost-user, otherwise to 4096. Signed-off-by: Jinhua Cao --- hw/net/virtio-net.c | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 009dc9f3d1..e887589a30 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -51,12 +51,11 @@ #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ /* previously fixed value */ -#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 -#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 +#define VIRTIO_NET_VHOST_USER_DEFAULT_SIZE 2048 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */ -#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE -#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE +#define VIRTIO_NET_RX_QUEUE_MIN_SIZE 256 +#define VIRTIO_NET_TX_QUEUE_MIN_SIZE 256 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ @@ -622,6 +621,28 @@ static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, } } +static void virtio_net_set_default_queue_size(VirtIONet *n) +{ + NetClientState *peer = n->nic_conf.peers.ncs[0]; + + /* Default value is 0 if not set */ + if (n->net_conf.rx_queue_size == 0) { + if (peer && peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { + n->net_conf.rx_queue_size = VIRTIO_NET_VHOST_USER_DEFAULT_SIZE; + } else { + n->net_conf.rx_queue_size = VIRTIO_NET_VQ_MAX_SIZE; + } + } + + if (n->net_conf.tx_queue_size == 0) { + if (peer && peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { + n->net_conf.tx_queue_size = VIRTIO_NET_VHOST_USER_DEFAULT_SIZE; + } else { + n->net_conf.tx_queue_size = VIRTIO_NET_VQ_MAX_SIZE; + } + } +} + static int virtio_net_max_tx_queue_size(VirtIONet *n) { NetClientState *peer = n->nic_conf.peers.ncs[0]; @@ -630,11 +651,11 @@ static int virtio_net_max_tx_queue_size(VirtIONet *n) * Backends other than vhost-user don't support max queue size. */ if (!peer) { - return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; + return VIRTIO_NET_VQ_MAX_SIZE; } if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) { - return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; + return VIRTIO_NET_VQ_MAX_SIZE; } return VIRTIO_NET_VQ_MAX_SIZE; @@ -3388,6 +3409,8 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) virtio_net_set_config_size(n, n->host_features); virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size); + virtio_net_set_default_queue_size(n); + /* * We set a lower limit on RX queue size to what it always was. * Guests that want a smaller ring can always resize it without @@ -3679,10 +3702,8 @@ static Property virtio_net_properties[] = { TX_TIMER_INTERVAL), DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), - DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, - VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), - DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, - VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), + DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, 0), + DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, 0), DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, true), -- Gitee From a9459c849c5484a022f67a317b72de764c84c845 Mon Sep 17 00:00:00 2001 From: Jinhua Cao Date: Thu, 10 Feb 2022 20:21:33 +0800 Subject: [PATCH 12/21] vhost-user: add unregister_savevm when vhost-user cleanup Signed-off-by: Jinhua Cao --- hw/virtio/vhost-user.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index bf6e50223c..c265e9e92c 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -24,6 +24,7 @@ #include "sysemu/cryptodev.h" #include "migration/migration.h" #include "migration/postcopy-ram.h" +#include "migration/register.h" #include "trace.h" #include @@ -2068,6 +2069,7 @@ static int vhost_user_backend_cleanup(struct vhost_dev *dev) u->region_rb_len = 0; g_free(u); dev->opaque = 0; + unregister_savevm(NULL, "vhost-user", dev); return 0; } -- Gitee From 724134432ef21f1fb2b18bbe55b891d31181ccca Mon Sep 17 00:00:00 2001 From: Jinhua Cao Date: Fri, 11 Feb 2022 14:25:39 +0800 Subject: [PATCH 13/21] qemu-img: block: dont blk_make_zero if discard_zeroes false Signed-off-by: Jinhua Cao --- block/file-posix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/file-posix.c b/block/file-posix.c index b283093e5b..aed7529f44 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -804,7 +804,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, } #endif - bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK; + bs->supported_zero_flags = s->discard_zeroes ? (BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) : 0; if (S_ISREG(st.st_mode)) { /* When extending regular files, we get zeros from the OS */ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; -- Gitee From 3a223111d71307eb4fdc18f5ee46ce3d6cb57660 Mon Sep 17 00:00:00 2001 From: Jinhua Cao Date: Fri, 11 Feb 2022 18:05:47 +0800 Subject: [PATCH 14/21] vhost-user: Add support reconnect vhost-user socket Add support reconnect vhost-user socket, the reconnect time is set to be 3 seconds. Signed-off-by: Jinhua Cao --- chardev/char-socket.c | 19 ++++++++++++++++++- hw/net/vhost_net.c | 4 +++- hw/virtio/vhost-user.c | 6 ++++++ include/chardev/char.h | 16 ++++++++++++++++ net/vhost-user.c | 3 +++ 5 files changed, 46 insertions(+), 2 deletions(-) diff --git a/chardev/char-socket.c b/chardev/char-socket.c index 836cfa0bc2..b1e9f43ec6 100644 --- a/chardev/char-socket.c +++ b/chardev/char-socket.c @@ -393,6 +393,22 @@ static GSource *tcp_chr_add_watch(Chardev *chr, GIOCondition cond) return qio_channel_create_watch(s->ioc, cond); } +static void tcp_chr_set_reconnect_time(Chardev *chr, + int64_t reconnect_time) +{ + SocketChardev *s = SOCKET_CHARDEV(chr); + s->reconnect_time = reconnect_time; +} + +void qemu_chr_set_reconnect_time(Chardev *chr, int64_t reconnect_time) +{ + ChardevClass *cc = CHARDEV_GET_CLASS(chr); + + if (cc->chr_set_reconnect_time) { + cc->chr_set_reconnect_time(chr, reconnect_time); + } +} + static void remove_hup_source(SocketChardev *s) { if (s->hup_source != NULL) { @@ -591,7 +607,7 @@ static int tcp_chr_sync_read(Chardev *chr, const uint8_t *buf, int len) if (s->state != TCP_CHARDEV_STATE_DISCONNECTED) { qio_channel_set_blocking(s->ioc, false, NULL); } - if (size == 0) { + if (size == 0 && chr->chr_for_flag != CHR_FOR_VHOST_USER) { /* connection closed */ tcp_chr_disconnect(chr); } @@ -1585,6 +1601,7 @@ static void char_socket_class_init(ObjectClass *oc, void *data) cc->set_msgfds = tcp_set_msgfds; cc->chr_add_client = tcp_chr_add_client; cc->chr_add_watch = tcp_chr_add_watch; + cc->chr_set_reconnect_time = tcp_chr_set_reconnect_time; cc->chr_update_read_handler = tcp_chr_update_read_handler; object_class_property_add(oc, "addr", "SocketAddress", diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 30379d2ca4..a60f7cef9a 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -376,7 +376,9 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, goto err_start; } - if (peer->vring_enable) { + /* ovs needs to restore all states of vring */ + if (peer->vring_enable || + ncs[i].peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { /* restore vring enable state */ r = vhost_set_vring_enable(peer, peer->vring_enable); diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index c265e9e92c..fc2b1b81c9 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -1926,9 +1926,15 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, uint64_t features, protocol_features, ram_slots; struct vhost_user *u; int err; + Chardev *chr; assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); + chr = qemu_chr_fe_get_driver(((VhostUserState *)opaque)->chr); + if (chr) { + chr->chr_for_flag = CHR_FOR_VHOST_USER; + } + u = g_new0(struct vhost_user, 1); u->user = opaque; u->dev = dev; diff --git a/include/chardev/char.h b/include/chardev/char.h index a319b5fdff..f388d4b109 100644 --- a/include/chardev/char.h +++ b/include/chardev/char.h @@ -14,6 +14,8 @@ #define IAC_SB 250 #define IAC 255 +#define CHR_FOR_VHOST_USER 0x32a1 + /* character device */ typedef struct CharBackend CharBackend; @@ -70,6 +72,7 @@ struct Chardev { GSource *gsource; GMainContext *gcontext; DECLARE_BITMAP(features, QEMU_CHAR_FEATURE_LAST); + int chr_for_flag; }; /** @@ -227,6 +230,16 @@ int qemu_chr_write(Chardev *s, const uint8_t *buf, int len, bool write_all); #define qemu_chr_write_all(s, buf, len) qemu_chr_write(s, buf, len, true) int qemu_chr_wait_connected(Chardev *chr, Error **errp); +/** + * @qemu_chr_set_reconnect_time: + * + * Set reconnect time for char disconnect. + * Currently, only vhost user will call it. + * + * @reconnect_time the reconnect_time to be set + */ +void qemu_chr_set_reconnect_time(Chardev *chr, int64_t reconnect_time); + #define TYPE_CHARDEV "chardev" OBJECT_DECLARE_TYPE(Chardev, ChardevClass, CHARDEV) @@ -306,6 +319,9 @@ struct ChardevClass { /* handle various events */ void (*chr_be_event)(Chardev *s, QEMUChrEvent event); + + /* set reconnect time */ + void (*chr_set_reconnect_time)(Chardev *chr, int64_t reconnect_time); }; Chardev *qemu_chardev_new(const char *id, const char *typename, diff --git a/net/vhost-user.c b/net/vhost-user.c index b1a0247b59..d1aefcb9aa 100644 --- a/net/vhost-user.c +++ b/net/vhost-user.c @@ -21,6 +21,8 @@ #include "qemu/option.h" #include "trace.h" +#define VHOST_USER_RECONNECT_TIME (3) + typedef struct NetVhostUserState { NetClientState nc; CharBackend chr; /* only queue index 0 */ @@ -287,6 +289,7 @@ static void net_vhost_user_event(void *opaque, QEMUChrEvent event) trace_vhost_user_event(chr->label, event); switch (event) { case CHR_EVENT_OPENED: + qemu_chr_set_reconnect_time(chr, VHOST_USER_RECONNECT_TIME); if (vhost_user_start(queues, ncs, s->vhost_user) < 0) { qemu_chr_fe_disconnect(&s->chr); return; -- Gitee From 12af29806ba8ede96567e4df9223f0c02669727c Mon Sep 17 00:00:00 2001 From: Jinhua Cao Date: Fri, 11 Feb 2022 18:49:21 +0800 Subject: [PATCH 15/21] vhost-user: Set the acked_features to vm's featrue Fix the problem when vm restart, the ovs restart and lead to the net unreachable. The soluation is set the acked_features to vm's featrue just the same as guest virtio-net mod load. Signed-off-by: Jinhua Cao --- hw/net/vhost_net.c | 58 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index a60f7cef9a..e8a79db94d 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -152,9 +152,26 @@ static int vhost_net_get_fd(NetClientState *backend) } } +static uint64_t vhost_get_mask_features(const int *feature_bits, uint64_t features) +{ + const int *bit = feature_bits; + uint64_t out_features = 0; + + while (*bit != VHOST_INVALID_FEATURE_BIT) { + uint64_t bit_mask = (1ULL << *bit); + if (features & bit_mask) { + out_features |= bit_mask; + } + bit++; + } + return out_features; +} + struct vhost_net *vhost_net_init(VhostNetOptions *options) { int r; + VirtIONet *n; + VirtIODevice *vdev; bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL; struct vhost_net *net = g_new0(struct vhost_net, 1); uint64_t features = 0; @@ -180,7 +197,46 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) net->backend = r; net->dev.protocol_features = 0; } else { - net->dev.backend_features = 0; + /* for ovs restart when vm start. + * Normal situation: + * 1.vm start. + * 2.vhost_net_init init ok, then dev.acked_features is 0x40000000. + * 3.guest virtio-net mod load. qemu will call virtio_net_set_features set + * dev.acked_features to 0x40408000. + * 4.feature set to ovs's vhostuser(0x40408000). + * 5.ovs restart. + * 6.vhost_user_stop will save net->dev.acked_features(0x40408000) to + * VhostUserState's acked_features(0x40408000). + * 7.restart ok. + * 8.vhost_net_init fun call vhost_user_get_acked_features get the save + * features, and set to net->dev.acked_features. + * Abnormal situation: + * 1.vm start. + * 2.vhost_net_init init ok, then dev.acked_features is 0x40000000. + * 3.ovs restart. + * 4.vhost_user_stop will save net->dev.acked_features(0x40000000) to + * VhostUserState's acked_features(0x40000000). + * 5.guest virtio-net mod load. qemu will call virtio_net_set_features set + * dev.acked_features to 0x40408000. + * 6.restart ok. + * 7.vhost_net_init fun call vhost_user_get_acked_features get the save + * features(0x40000000), and set to net->dev.acked_features(0x40000000). + * 8.feature set to ovs's vhostuser(0x40000000). + * + * in abnormal situation, qemu set the wrong features to ovs's vhostuser, + * then the vm's network will be down. + * in abnormal situation, we found it just lost the guest feartures in + * acked_features, so hear we set the acked_features to vm's featrue + * just the same as guest virtio-net mod load. + */ + if (options->net_backend->peer) { + n = qemu_get_nic_opaque(options->net_backend->peer); + vdev = VIRTIO_DEVICE(n); + net->dev.backend_features = vhost_get_mask_features(vhost_net_get_feature_bits(net), + vdev->guest_features); + } else { + net->dev.backend_features = 0; + } net->dev.protocol_features = 0; net->backend = -1; -- Gitee From 5c753d539a968f2127ff6e5b916cd4b38a08b40c Mon Sep 17 00:00:00 2001 From: Jinhua Cao Date: Fri, 11 Feb 2022 18:59:34 +0800 Subject: [PATCH 16/21] vhost-user: add vhost_set_mem_table when vm load_setup at destination When migrate huge vm, packages lost are 90+. During the load_setup of the destination vm, pass the vm mem structure to ovs, the netcard could be enabled when the migration finish state shifting. Signed-off-by: Jinhua Cao --- hw/virtio/vhost-user.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index fc2b1b81c9..a8feea489b 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -1920,6 +1920,28 @@ static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, return 0; } +static int vhost_user_load_setup(QEMUFile *f, void *opaque) +{ + struct vhost_dev *hdev = opaque; + int r; + + if (hdev->vhost_ops && hdev->vhost_ops->vhost_set_mem_table) { + r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem); + if (r < 0) { + qemu_log("error: vhost_set_mem_table failed: %s(%d)\n", + strerror(errno), errno); + return r; + } else { + qemu_log("info: vhost_set_mem_table OK\n"); + } + } + return 0; +} + +SaveVMHandlers savevm_vhost_user_handlers = { + .load_setup = vhost_user_load_setup, +}; + static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, Error **errp) { @@ -2044,6 +2066,7 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, u->postcopy_notifier.notify = vhost_user_postcopy_notifier; postcopy_add_notifier(&u->postcopy_notifier); + register_savevm_live("vhost-user", -1, 1, &savevm_vhost_user_handlers, dev); return 0; } -- Gitee From 185d7efe768229b43911504f64fccd33ad3650ef Mon Sep 17 00:00:00 2001 From: Jinhua Cao Date: Fri, 11 Feb 2022 19:17:59 +0800 Subject: [PATCH 17/21] vhost-user: add separate memslot counter for vhost-user Used_memslots is equal to dev->mem->nregions now, it is true for vhost kernel, but not for vhost user, which uses the memory regions that have file descriptor. In fact, not all of the memory regions have file descriptor. It is usefully in some scenarios, e.g. used_memslots is 8, and only 5 memory slots can be used by vhost user, it is failed to hot plug a new memory RAM because vhost_has_free_slot just returned false, but we can hot plug it safely in fact. Signed-off-by: Jinhua Cao --- hw/virtio/vhost-backend.c | 14 ++++++++++ hw/virtio/vhost-user.c | 27 ++++++++++++++++++ hw/virtio/vhost.c | 46 +++++++++++++++++++++++++------ include/hw/virtio/vhost-backend.h | 4 +++ 4 files changed, 82 insertions(+), 9 deletions(-) diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c index b65f8f7e97..2acfb750fd 100644 --- a/hw/virtio/vhost-backend.c +++ b/hw/virtio/vhost-backend.c @@ -20,6 +20,8 @@ #include #include +static unsigned int vhost_kernel_used_memslots; + static int vhost_kernel_call(struct vhost_dev *dev, unsigned long int request, void *arg) { @@ -293,6 +295,16 @@ static void vhost_kernel_set_iotlb_callback(struct vhost_dev *dev, qemu_set_fd_handler((uintptr_t)dev->opaque, NULL, NULL, NULL); } +static void vhost_kernel_set_used_memslots(struct vhost_dev *dev) +{ + vhost_kernel_used_memslots = dev->mem->nregions; +} + +static unsigned int vhost_kernel_get_used_memslots(void) +{ + return vhost_kernel_used_memslots; +} + const VhostOps kernel_ops = { .backend_type = VHOST_BACKEND_TYPE_KERNEL, .vhost_backend_init = vhost_kernel_init, @@ -325,6 +337,8 @@ const VhostOps kernel_ops = { #endif /* CONFIG_VHOST_VSOCK */ .vhost_set_iotlb_callback = vhost_kernel_set_iotlb_callback, .vhost_send_device_iotlb_msg = vhost_kernel_send_device_iotlb_msg, + .vhost_set_used_memslots = vhost_kernel_set_used_memslots, + .vhost_get_used_memslots = vhost_kernel_get_used_memslots, }; #endif diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index a8feea489b..176cae9244 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -234,6 +234,7 @@ static VhostUserMsg m __attribute__ ((unused)); /* The version of the protocol we support */ #define VHOST_USER_VERSION (0x1) +static unsigned int vhost_user_used_memslots; struct vhost_user { struct vhost_dev *dev; @@ -2524,6 +2525,30 @@ void vhost_user_cleanup(VhostUserState *user) user->chr = NULL; } +static void vhost_user_set_used_memslots(struct vhost_dev *dev) +{ + unsigned int counter = 0; + int i; + + for (i = 0; i < dev->mem->nregions; ++i) { + struct vhost_memory_region *reg = dev->mem->regions + i; + ram_addr_t offset; + MemoryRegion *mr; + + mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr, + &offset); + if (mr && memory_region_get_fd(mr) > 0) { + counter++; + } + } + vhost_user_used_memslots = counter; +} + +static unsigned int vhost_user_get_used_memslots(void) +{ + return vhost_user_used_memslots; +} + const VhostOps user_ops = { .backend_type = VHOST_BACKEND_TYPE_USER, .vhost_backend_init = vhost_user_backend_init, @@ -2557,4 +2582,6 @@ const VhostOps user_ops = { .vhost_backend_mem_section_filter = vhost_user_mem_section_filter, .vhost_get_inflight_fd = vhost_user_get_inflight_fd, .vhost_set_inflight_fd = vhost_user_set_inflight_fd, + .vhost_set_used_memslots = vhost_user_set_used_memslots, + .vhost_get_used_memslots = vhost_user_get_used_memslots, }; diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index dafb23c481..e4809777bc 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -45,20 +45,20 @@ static struct vhost_log *vhost_log; static struct vhost_log *vhost_log_shm; -static unsigned int used_memslots; static QLIST_HEAD(, vhost_dev) vhost_devices = QLIST_HEAD_INITIALIZER(vhost_devices); bool vhost_has_free_slot(void) { - unsigned int slots_limit = ~0U; struct vhost_dev *hdev; QLIST_FOREACH(hdev, &vhost_devices, entry) { - unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev); - slots_limit = MIN(slots_limit, r); + if (hdev->vhost_ops->vhost_get_used_memslots() >= + hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) { + return false; + } } - return slots_limit > used_memslots; + return true; } static void vhost_dev_sync_region(struct vhost_dev *dev, @@ -521,7 +521,6 @@ static void vhost_commit(MemoryListener *listener) dev->n_mem_sections * sizeof dev->mem->regions[0]; dev->mem = g_realloc(dev->mem, regions_size); dev->mem->nregions = dev->n_mem_sections; - used_memslots = dev->mem->nregions; for (i = 0; i < dev->n_mem_sections; i++) { struct vhost_memory_region *cur_vmr = dev->mem->regions + i; struct MemoryRegionSection *mrs = dev->mem_sections + i; @@ -697,6 +696,7 @@ static void vhost_region_add_section(struct vhost_dev *dev, dev->tmp_sections[dev->n_tmp_sections - 1].fv = NULL; memory_region_ref(section->mr); } + dev->vhost_ops->vhost_set_used_memslots(dev); } /* Used for both add and nop callbacks */ @@ -712,6 +712,17 @@ static void vhost_region_addnop(MemoryListener *listener, vhost_region_add_section(dev, section); } +static void vhost_region_del(MemoryListener *listener, + MemoryRegionSection *section) +{ + struct vhost_dev *dev = container_of(listener, struct vhost_dev, + memory_listener); + if (!vhost_section(dev, section)) { + return; + } + dev->vhost_ops->vhost_set_used_memslots(dev); +} + static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) { struct vhost_iommu *iommu = container_of(n, struct vhost_iommu, n); @@ -1319,6 +1330,18 @@ static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq) event_notifier_cleanup(&vq->masked_notifier); } +static bool vhost_dev_used_memslots_is_exceeded(struct vhost_dev *hdev) +{ + if (hdev->vhost_ops->vhost_get_used_memslots() > + hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) { + error_report("vhost backend memory slots limit is less" + " than current number of present memory slots"); + return true; + } + + return false; +} + int vhost_dev_init(struct vhost_dev *hdev, void *opaque, VhostBackendType backend_type, uint32_t busyloop_timeout, Error **errp) @@ -1374,6 +1397,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, .name = "vhost", .begin = vhost_begin, .commit = vhost_commit, + .region_del = vhost_region_del, .region_add = vhost_region_addnop, .region_nop = vhost_region_addnop, .log_start = vhost_log_start, @@ -1420,9 +1444,13 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, memory_listener_register(&hdev->memory_listener, &address_space_memory); QLIST_INSERT_HEAD(&vhost_devices, hdev, entry); - if (used_memslots > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) { - error_setg(errp, "vhost backend memory slots limit is less" - " than current number of present memory slots"); + /* + * If we started a VM without any vhost device, + * vhost_dev_used_memslots_is_exceeded will always return false for the + * first time vhost device hot-plug(vhost_get_used_memslots is always 0), + * so it needs to double check here + */ + if (vhost_dev_used_memslots_is_exceeded(hdev)) { r = -EINVAL; goto fail_busyloop; } diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index 81bf3109f8..a64708f456 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -125,6 +125,8 @@ typedef int (*vhost_vq_get_addr_op)(struct vhost_dev *dev, typedef int (*vhost_get_device_id_op)(struct vhost_dev *dev, uint32_t *dev_id); typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev); +typedef void (*vhost_set_used_memslots_op)(struct vhost_dev *dev); +typedef unsigned int (*vhost_get_used_memslots_op)(void); typedef struct VhostOps { VhostBackendType backend_type; @@ -171,6 +173,8 @@ typedef struct VhostOps { vhost_vq_get_addr_op vhost_vq_get_addr; vhost_get_device_id_op vhost_get_device_id; vhost_force_iommu_op vhost_force_iommu; + vhost_set_used_memslots_op vhost_set_used_memslots; + vhost_get_used_memslots_op vhost_get_used_memslots; } VhostOps; int vhost_backend_update_device_iotlb(struct vhost_dev *dev, -- Gitee From f46191f24706a6200cfe607a902b3da45f57c9ad Mon Sep 17 00:00:00 2001 From: Jinhua Cao Date: Fri, 11 Feb 2022 19:24:30 +0800 Subject: [PATCH 18/21] vhost-user: quit infinite loop while used memslots is more than the backend limit When used memslots is more than the backend limit, the vhost-user netcard would attach fail and quit infinite loop. Signed-off-by: Jinhua Cao --- hw/virtio/vhost.c | 9 +++++++++ include/hw/virtio/vhost.h | 1 + net/vhost-user.c | 6 ++++++ 3 files changed, 16 insertions(+) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index e4809777bc..4c4072951c 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -48,6 +48,8 @@ static struct vhost_log *vhost_log_shm; static QLIST_HEAD(, vhost_dev) vhost_devices = QLIST_HEAD_INITIALIZER(vhost_devices); +bool used_memslots_exceeded; + bool vhost_has_free_slot(void) { struct vhost_dev *hdev; @@ -1336,9 +1338,11 @@ static bool vhost_dev_used_memslots_is_exceeded(struct vhost_dev *hdev) hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) { error_report("vhost backend memory slots limit is less" " than current number of present memory slots"); + used_memslots_exceeded = true; return true; } + used_memslots_exceeded = false; return false; } @@ -1895,3 +1899,8 @@ int vhost_net_set_backend(struct vhost_dev *hdev, return -1; } + +bool used_memslots_is_exceeded(void) +{ + return used_memslots_exceeded; +} diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 58a73e7b7a..86f36f0106 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -154,4 +154,5 @@ int vhost_dev_set_inflight(struct vhost_dev *dev, struct vhost_inflight *inflight); int vhost_dev_get_inflight(struct vhost_dev *dev, uint16_t queue_size, struct vhost_inflight *inflight); +bool used_memslots_is_exceeded(void); #endif diff --git a/net/vhost-user.c b/net/vhost-user.c index d1aefcb9aa..f910a286e4 100644 --- a/net/vhost-user.c +++ b/net/vhost-user.c @@ -20,6 +20,7 @@ #include "qemu/error-report.h" #include "qemu/option.h" #include "trace.h" +#include "include/hw/virtio/vhost.h" #define VHOST_USER_RECONNECT_TIME (3) @@ -369,6 +370,11 @@ static int net_vhost_user_init(NetClientState *peer, const char *device, qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, net_vhost_user_event, NULL, nc0->name, NULL, true); + if (used_memslots_is_exceeded()) { + error_report("used memslots exceeded the backend limit, quit " + "loop"); + goto err; + } } while (!s->started); assert(s->vhost_net); -- Gitee From 1545a60a8b78490c7dc8909b7012bca63dba63cd Mon Sep 17 00:00:00 2001 From: Jinhua Cao Date: Sat, 12 Feb 2022 15:41:08 +0800 Subject: [PATCH 19/21] qmp: add command to query used memslots of vhost-net and vhost-user Signed-off-by: Jinhua Cao --- hw/virtio/vhost-backend.c | 2 +- hw/virtio/vhost-user.c | 2 +- include/hw/virtio/vhost-backend.h | 2 ++ monitor/qmp-cmds.c | 12 ++++++++++++ qapi/net.json | 18 ++++++++++++++++++ qapi/pragma.json | 4 +++- 6 files changed, 37 insertions(+), 3 deletions(-) diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c index 2acfb750fd..d8e1710758 100644 --- a/hw/virtio/vhost-backend.c +++ b/hw/virtio/vhost-backend.c @@ -300,7 +300,7 @@ static void vhost_kernel_set_used_memslots(struct vhost_dev *dev) vhost_kernel_used_memslots = dev->mem->nregions; } -static unsigned int vhost_kernel_get_used_memslots(void) +unsigned int vhost_kernel_get_used_memslots(void) { return vhost_kernel_used_memslots; } diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 176cae9244..8f69a3b850 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -2544,7 +2544,7 @@ static void vhost_user_set_used_memslots(struct vhost_dev *dev) vhost_user_used_memslots = counter; } -static unsigned int vhost_user_get_used_memslots(void) +unsigned int vhost_user_get_used_memslots(void) { return vhost_user_used_memslots; } diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index a64708f456..7bbc658161 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -190,4 +190,6 @@ int vhost_backend_handle_iotlb_msg(struct vhost_dev *dev, int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd); +unsigned int vhost_kernel_get_used_memslots(void); +unsigned int vhost_user_get_used_memslots(void); #endif /* VHOST_BACKEND_H */ diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c index 98868cee03..f3e80ec8a7 100644 --- a/monitor/qmp-cmds.c +++ b/monitor/qmp-cmds.c @@ -36,6 +36,7 @@ #include "qapi/qapi-commands-machine.h" #include "qapi/qapi-commands-misc.h" #include "qapi/qapi-commands-ui.h" +#include "qapi/qapi-commands-net.h" #include "qapi/type-helpers.h" #include "qapi/qmp/qerror.h" #include "exec/ramlist.h" @@ -43,6 +44,7 @@ #include "hw/acpi/acpi_dev_interface.h" #include "hw/intc/intc.h" #include "hw/rdma/rdma.h" +#include "hw/virtio/vhost-backend.h" NameInfo *qmp_query_name(Error **errp) { @@ -471,3 +473,13 @@ int64_t qmp_query_rtc_date_diff(Error **errp) { return get_rtc_date_diff(); } + +uint32_t qmp_query_vhost_kernel_used_memslots(Error **errp) +{ + return vhost_kernel_get_used_memslots(); +} + +uint32_t qmp_query_vhost_user_used_memslots(Error **errp) +{ + return vhost_user_get_used_memslots(); +} diff --git a/qapi/net.json b/qapi/net.json index 7fab2e7cd8..c9ff849eed 100644 --- a/qapi/net.json +++ b/qapi/net.json @@ -696,3 +696,21 @@ ## { 'event': 'FAILOVER_NEGOTIATED', 'data': {'device-id': 'str'} } + +## +# @query-vhost-kernel-used-memslots: +# +# Get vhost-kernel nic used memslots +# +# Since: 4.1 +## +{ 'command': 'query-vhost-kernel-used-memslots', 'returns': 'uint32' } + +## +# @query-vhost-user-used-memslots: +# +# Get vhost-user nic used memslots +# +# Since: 4.1 +## +{ 'command': 'query-vhost-user-used-memslots', 'returns': 'uint32' } diff --git a/qapi/pragma.json b/qapi/pragma.json index b37f6de445..d35c897acb 100644 --- a/qapi/pragma.json +++ b/qapi/pragma.json @@ -27,7 +27,9 @@ 'query-tpm-models', 'query-tpm-types', 'ringbuf-read', - 'query-rtc-date-diff' ], + 'query-rtc-date-diff', + 'query-vhost-user-used-memslots', + 'query-vhost-kernel-used-memslots' ], # Externally visible types whose member names may use uppercase 'member-name-exceptions': [ # visible in: 'ACPISlotType', # query-acpi-ospm-status -- Gitee From 8c52233c08fe66b2e5c79fd514d4f804aa6fe427 Mon Sep 17 00:00:00 2001 From: Jinhua Cao Date: Fri, 11 Feb 2022 20:13:50 +0800 Subject: [PATCH 20/21] vhost-user-scsi: add support for SPDK hot upgrade In the hot upgrade scenario, the reconnection mechanism of qemu and SPDK after upgrade Signed-off-by: Jinhua Cao --- hw/scsi/vhost-user-scsi.c | 42 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index 1b2f7eed98..052740a76e 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -29,6 +29,9 @@ #include "hw/virtio/virtio-access.h" #include "chardev/char-fe.h" #include "sysemu/sysemu.h" +#include "qemu/log.h" + +#define VHOST_USER_SCSI_RECONNECT_TIME 3 /* Features supported by the host application */ static const int user_feature_bits[] = { @@ -59,7 +62,7 @@ static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status) ret = vhost_scsi_common_start(vsc); if (ret < 0) { error_report("unable to start vhost-user-scsi: %s", strerror(-ret)); - exit(1); + return; } } else { vhost_scsi_common_stop(vsc); @@ -89,11 +92,43 @@ static void vhost_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) { } +static void vhost_user_scsi_event(void *opaque, QEMUChrEvent event) +{ + int ret; + VHostUserSCSI *s = (VHostUserSCSI *)opaque; + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); + VirtIODevice *vdev = VIRTIO_DEVICE(s); + + qemu_log("event:%d, vdev status:%d\n", event, vdev->status); + + /* if CHR_EVENT_CLOSED, do nothing */ + if (event != CHR_EVENT_OPENED) { + return; + }; + + /* if status of vdev is not DRIVER_OK, just waiting. + * vsc should start when status change to DRIVER_OK */ + if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { + return; + } + + /* vsc may not fully start because of vhost app stopping */ + if (vsc->dev.started) { + vhost_scsi_common_stop(vsc); + } + + ret = vhost_scsi_common_start(vsc); + if (ret < 0) { + qemu_log("unable to start vhost-user-scsi: %s\n", strerror(-ret)); + } +} + static void vhost_user_scsi_realize(DeviceState *dev, Error **errp) { VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); VHostUserSCSI *s = VHOST_USER_SCSI(dev); VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); + Chardev *chr; struct vhost_virtqueue *vqs = NULL; Error *err = NULL; int ret; @@ -132,6 +167,11 @@ static void vhost_user_scsi_realize(DeviceState *dev, Error **errp) vsc->lun = 0; vsc->target = vs->conf.boot_tpgt; + chr = qemu_chr_fe_get_driver(&vs->conf.chardev); + qemu_chr_set_reconnect_time(chr, VHOST_USER_SCSI_RECONNECT_TIME); + qemu_chr_fe_set_handlers(&vs->conf.chardev, NULL, NULL, + vhost_user_scsi_event, NULL, s, NULL, true); + return; free_vhost: -- Gitee From 3c283ea7ca1902b9d221897fd65c5edb1d16e004 Mon Sep 17 00:00:00 2001 From: Jinhua Cao Date: Fri, 11 Feb 2022 20:33:47 +0800 Subject: [PATCH 21/21] i6300esb watchdog: bugfix: Add a runstate transition QEMU will abort() for the reasons now: invalid runstate transition: 'prelaunch' -> 'postmigrate' Aborted This happens when: |<- watchdog timeout happened, then sets reset_requested to | SHUTDOWN_CAUSE_GUEST_RESET; |<- hot-migration thread sets vm state to RUN_STATE_FINISH_MIGRATE | before the last time of migration; |<- main thread gets the change of reset_requested and triggers | reset, then sets vm state to RUN_STATE_PRELAUNCH; |<- hot-migration thread sets vm state to RUN_STATE_POSTMIGRATE. Then 'prelaunch' -> 'postmigrate' runstate transition will happen. It is legal so add this transition to runstate_transitions_def. Signed-off-by: Jinhua Cao --- softmmu/runstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/softmmu/runstate.c b/softmmu/runstate.c index 5736d908db..680994cdf8 100644 --- a/softmmu/runstate.c +++ b/softmmu/runstate.c @@ -115,6 +115,7 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_PRELAUNCH, RUN_STATE_RUNNING }, { RUN_STATE_PRELAUNCH, RUN_STATE_FINISH_MIGRATE }, { RUN_STATE_PRELAUNCH, RUN_STATE_INMIGRATE }, + { RUN_STATE_PRELAUNCH, RUN_STATE_POSTMIGRATE }, { RUN_STATE_FINISH_MIGRATE, RUN_STATE_RUNNING }, { RUN_STATE_FINISH_MIGRATE, RUN_STATE_PAUSED }, -- Gitee