From d9fef6139e17976db194d73848baff543c4a2590 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 9 Feb 2022 08:49:41 +0800 Subject: [PATCH 1/6] migration: skip cache_drop for bios bootloader and nvram template Qemu enabled page cache dropping for raw device on the destionation host during shared storage migration. However, fsync may take 300ms to multiple seconds to return in multiple-migration scene, because all domains in a host share bios bootloader file, skip cache_drop for bios bootloader and nvram template to avoid downtime increase. --- block.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index 0ac5b163d2..91f123a354 100644 --- a/block.c +++ b/block.c @@ -67,6 +67,9 @@ #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ +#define DEFAULT_BIOS_BOOT_LOADER_DIR "/usr/share/edk2" +#define DEFAULT_NVRAM_TEMPLATE_DIR "/var/lib/libvirt/qemu/nvram" + static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = QTAILQ_HEAD_INITIALIZER(graph_bdrv_states); @@ -6432,7 +6435,13 @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp) return ret; } - if (bs->drv->bdrv_co_invalidate_cache) { + /* + * It's not necessary for bios bootloader and nvram template to drop cache + * when migration, skip this step for them to avoid dowtime increase. + */ + if (bs->drv->bdrv_co_invalidate_cache && + !strstr(bs->filename, DEFAULT_BIOS_BOOT_LOADER_DIR) && + !strstr(bs->filename, DEFAULT_NVRAM_TEMPLATE_DIR)) { bs->drv->bdrv_co_invalidate_cache(bs, &local_err); if (local_err) { bs->open_flags |= BDRV_O_INACTIVE; -- Gitee From 19523565181bb6efb1b9f819d45a7ca8ea6eca19 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 9 Feb 2022 11:21:28 +0800 Subject: [PATCH 2/6] ps2: fix oob in ps2 kbd fix oob in ps2 kbd --- hw/input/ps2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/input/ps2.c b/hw/input/ps2.c index 9376a8f4ce..5d82ee3cdf 100644 --- a/hw/input/ps2.c +++ b/hw/input/ps2.c @@ -205,7 +205,7 @@ void ps2_queue_noirq(PS2State *s, int b) } q->data[q->wptr] = b; - if (++q->wptr == PS2_BUFFER_SIZE) { + if (++q->wptr >= PS2_BUFFER_SIZE) { q->wptr = 0; } q->count++; @@ -578,7 +578,7 @@ uint32_t ps2_read_data(PS2State *s) val = q->data[index]; } else { val = q->data[q->rptr]; - if (++q->rptr == PS2_BUFFER_SIZE) { + if (++q->rptr >= PS2_BUFFER_SIZE) { q->rptr = 0; } q->count--; -- Gitee From 9e2158495059b485f2c28bb649c8b4a87fb3105c Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 9 Feb 2022 11:24:32 +0800 Subject: [PATCH 3/6] Currently, while kvm and qemu can not handle some kvm exit, qemu will do vm_stop, which will make vm in pause state. This action make vm unrecoverable, so send guest panic to libvirt instead. --- accel/kvm/kvm-all.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index eecd8031cf..b128d311c2 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -2970,7 +2970,7 @@ int kvm_cpu_exec(CPUState *cpu) if (ret < 0) { cpu_dump_state(cpu, stderr, CPU_DUMP_CODE); - vm_stop(RUN_STATE_INTERNAL_ERROR); + qemu_system_guest_panicked(cpu_get_crash_info(cpu)); } qatomic_set(&cpu->exit_request, 0); -- Gitee From 12706113392018fd7aa6471a3cbada62f0180539 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 9 Feb 2022 12:51:19 +0800 Subject: [PATCH 4/6] cpu/features: fix bug for memory leakage strList hash not free after used, Fix it. --- target/i386/cpu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index aa9e636800..b9690e3250 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -4752,6 +4752,7 @@ static void x86_cpu_get_unavailable_features(Object *obj, Visitor *v, x86_cpu_list_feature_names(xc->filtered_features, &result); visit_type_strList(v, "unavailable-features", &result, errp); + qapi_free_strList(result); } /* Check for missing features that may prevent the CPU class from -- Gitee From 7eb28408efe75192a0f976a197f8f1906d9073e8 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 9 Feb 2022 14:13:05 +0800 Subject: [PATCH 5/6] monitor/qmp: drop inflight rsp if qmp client broken If libvirt restart while qemu is handle qmp message, libvirt will reconnect qemu monitor socket, and query status of qemu by qmp. But qemu may return last qmp respond to new connect socket, and libvirt recv unexpected respond, So libvirt think qemu is abnormal, and will kill qemu. This patch add qmp connect id, while reconnect id will change. While respond to libvirt, judge if id is same, if not, drop this respond. --- monitor/monitor-internal.h | 1 + monitor/qmp.c | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/monitor/monitor-internal.h b/monitor/monitor-internal.h index 3da3f86c6a..5435864add 100644 --- a/monitor/monitor-internal.h +++ b/monitor/monitor-internal.h @@ -144,6 +144,7 @@ typedef struct { const QmpCommandList *commands; bool capab_offered[QMP_CAPABILITY__MAX]; /* capabilities offered */ bool capab[QMP_CAPABILITY__MAX]; /* offered and accepted */ + uint64_t qmp_client_id; /*qmp client id, update if peer disconnect */ /* * Protects qmp request/response queue. * Take monitor_lock first when you need both. diff --git a/monitor/qmp.c b/monitor/qmp.c index 092c527b6f..4d1ac66785 100644 --- a/monitor/qmp.c +++ b/monitor/qmp.c @@ -125,18 +125,19 @@ void qmp_send_response(MonitorQMP *mon, const QDict *rsp) * Null @rsp can only happen for commands with QCO_NO_SUCCESS_RESP. * Nothing is emitted then. */ -static void monitor_qmp_respond(MonitorQMP *mon, QDict *rsp) +static void monitor_qmp_respond(MonitorQMP *mon, QDict *rsp, uint64_t req_client_id) { - if (rsp) { - qmp_send_response(mon, rsp); + if (!rsp || (mon->qmp_client_id != req_client_id)) { + return; } + qmp_send_response(mon, rsp); } /* * Runs outside of coroutine context for OOB commands, but in * coroutine context for everything else. */ -static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req) +static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req, uint64_t req_client_id) { QDict *rsp; QDict *error; @@ -156,7 +157,7 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req) } } - monitor_qmp_respond(mon, rsp); + monitor_qmp_respond(mon, rsp, req_client_id); qobject_unref(rsp); } @@ -315,13 +316,13 @@ void coroutine_fn monitor_qmp_dispatcher_co(void *data) trace_monitor_qmp_cmd_in_band(id_json->str); g_string_free(id_json, true); } - monitor_qmp_dispatch(mon, req_obj->req); + monitor_qmp_dispatch(mon, req_obj->req, mon->qmp_client_id); } else { assert(req_obj->err); trace_monitor_qmp_err_in_band(error_get_pretty(req_obj->err)); rsp = qmp_error_response(req_obj->err); req_obj->err = NULL; - monitor_qmp_respond(mon, rsp); + monitor_qmp_respond(mon, rsp, mon->qmp_client_id); qobject_unref(rsp); } @@ -366,7 +367,7 @@ static void handle_qmp_command(void *opaque, QObject *req, Error *err) trace_monitor_qmp_cmd_out_of_band(id_json->str); g_string_free(id_json, true); } - monitor_qmp_dispatch(mon, req); + monitor_qmp_dispatch(mon, req, mon->qmp_client_id); qobject_unref(req); return; } @@ -452,6 +453,7 @@ static void monitor_qmp_event(void *opaque, QEMUChrEvent event) mon_refcount++; break; case CHR_EVENT_CLOSED: + mon->qmp_client_id++; /* * Note: this is only useful when the output of the chardev * backend is still open. For example, when the backend is @@ -505,6 +507,7 @@ void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp) } qemu_chr_fe_set_echo(&mon->common.chr, true); + mon->qmp_client_id = 1; /* Note: we run QMP monitor in I/O thread when @chr supports that */ monitor_data_init(&mon->common, true, false, qemu_chr_has_feature(chr, QEMU_CHAR_FEATURE_GCONTEXT)); -- Gitee From e5fc3a755c9ba9f18bc0416c60f745912cb5b104 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 9 Feb 2022 14:21:39 +0800 Subject: [PATCH 6/6] oslib-posix: optimise vm startup time for 1G hugepage It takes quit a long time to clear 1G-hugepage, which makes glibc pthread_create quit slow. Create touch_pages threads in advance, and then handle the touch_pages callback. Only read lock is held here. --- util/oslib-posix.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/util/oslib-posix.c b/util/oslib-posix.c index e8bdb02e1d..18a38b9464 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -84,6 +84,7 @@ typedef struct MemsetThread MemsetThread; static MemsetThread *memset_thread; static int memset_num_threads; +static int started_num_threads; static bool memset_thread_failed; static QemuMutex page_mutex; @@ -464,6 +465,10 @@ static void *do_touch_pages(void *arg) } qemu_mutex_unlock(&page_mutex); + while (started_num_threads != memset_num_threads) { + smp_mb(); + } + /* unblock SIGBUS */ sigemptyset(&set); sigaddset(&set, SIGBUS); @@ -529,7 +534,7 @@ static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages, memset_thread = g_new0(MemsetThread, memset_num_threads); numpages_per_thread = numpages / memset_num_threads; leftover = numpages % memset_num_threads; - for (i = 0; i < memset_num_threads; i++) { + for (i = 0, started_num_threads = 0; i < memset_num_threads; i++) { memset_thread[i].addr = addr; memset_thread[i].numpages = numpages_per_thread + (i < leftover); memset_thread[i].hpagesize = hpagesize; @@ -537,6 +542,7 @@ static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages, do_touch_pages, &memset_thread[i], QEMU_THREAD_JOINABLE); addr += memset_thread[i].numpages * hpagesize; + started_num_threads++; } qemu_mutex_lock(&page_mutex); -- Gitee