diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 8fb3f2eee75fd9f08553314bfb4c962c32d64aa4..cae6cb969fae66bf2985c39c4a96cf2b348efc6e 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -53,6 +53,9 @@ #include "sysemu/stats.h" #include "sysemu/kvm.h" +#include "qemu/units.h" +#include "migration/migration.h" +#include "migration/options.h" /* This check must be after config-host.h is included */ #ifdef CONFIG_EVENTFD @@ -897,6 +900,9 @@ static void kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, /* We don't have a slot if we want to trap every access. */ return; } + if (migrate_use_ldst() && mem->start_addr >= GiB) { + return; + } if (kvm_slot_get_dirty_log(s, mem)) { kvm_slot_sync_dirty_pages(mem); } diff --git a/migration/ham-protocol.c b/migration/ham-protocol.c new file mode 100644 index 0000000000000000000000000000000000000000..78c553e3bd8b8af6392de1be8ae18731305c84ee --- /dev/null +++ b/migration/ham-protocol.c @@ -0,0 +1,172 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2020-2025. All rights reserved. + * + * Description: Supports VM migration using the HAM protocol. Pre-embed related codes. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + */ + +#include + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qapi/error.h" +#include "migration.h" +#include "multifd.h" +#include "migration-stats.h" +#include "qemu-file.h" +#include "ram.h" +#include "rdma.h" +#include "qemu/sockets.h" +#include "trace.h" +#include "options.h" +#include "socket.h" + +#include "ham-protocol.h" + +static void *handle_ham = NULL; + +typedef struct dl_funcs { + const char *func_name; + void **func; +} dl_funcs; + +static dl_funcs ham_dlfunc_list[] = { +}; + +void ham_start_outgoing_migration(MigrationState *s, + SocketAddress *saddr, + Error **errp) +{ + int ret; + + ret = ham_protocol_dlfunc_init(errp); + if (ret < 0) { + goto err; + } + + s->ham_migration = true; + socket_start_outgoing_migration(s, saddr, errp); + qemu_log("migration: start ham_start_outgoing_migration\n"); + return; + +err: + error_setg(errp, "migration: ham start outgoing migration failed"); +} + +void ham_start_incoming_migration(SocketAddress *saddr, + Error **errp) +{ + MigrationState *s = migrate_get_current(); + int ret; + + ret = ham_protocol_dlfunc_init(errp); + if (ret < 0) { + goto err; + } + + s->ham_migration = true; + socket_start_incoming_migration(saddr, errp); + qemu_log("migration: start ham_start_incoming_migration\n"); + return; + +err: + error_setg(errp, "migration: ham start incoming migration failed"); +} + +static int qemu_ham_save_page(QEMUFile *f, ram_addr_t block_offset, + ram_addr_t offset, size_t size) +{ + return RAM_SAVE_CONTROL_NOT_SUPP; +} + +int ham_control_save_page(QEMUFile *f, ram_addr_t block_offset, + ram_addr_t offset, size_t size) +{ + int ret; + + if (!migrate_ham() || migration_in_postcopy()) { + return RAM_SAVE_CONTROL_NOT_SUPP; + } + + ret = qemu_ham_save_page(f, block_offset, offset, size); + if (ret != RAM_SAVE_CONTROL_DELAYED && + ret != RAM_SAVE_CONTROL_NOT_SUPP) { + if (ret < 0) { + qemu_file_set_error(f, ret); + } + } + return ret; +} + +static void ham_dlfunc_list_set_null(void) +{ + for (int i = 0; i < ARRAY_SIZE(ham_dlfunc_list); i++) { + *ham_dlfunc_list[i].func = NULL; + } +} + +static int ham_dlfunc_open(Error **errp) +{ + char *error = NULL; + + ham_dlfunc_list_set_null(); + handle_ham = dlopen(HAM_SO_PATH, RTLD_LAZY); + if (!handle_ham) { + error_setg(errp, "dlopen error: %s", dlerror()); + return -1; + } + + for (int i = 0; i < ARRAY_SIZE(ham_dlfunc_list); i++) { + *ham_dlfunc_list[i].func = dlsym(handle_ham, ham_dlfunc_list[i].func_name); + if ((error = dlerror()) != NULL) { + error_setg(errp, "dlsym error: %s while getting %s", error, ham_dlfunc_list[i].func_name); + ham_protocol_dlfunc_close(); + return -1; + } + } + + return 0; +} + +int ham_protocol_dlfunc_init(Error **errp) +{ + int ret; + + ret = ham_dlfunc_open(errp); + if (ret < 0) { + qemu_log("dlsym error, open ham dlfunc failed.\n"); + return ret; + } + + return 0; +} + +void ham_protocol_dlfunc_close(void) +{ + if (handle_ham) { + (void) dlclose(handle_ham); + handle_ham = NULL; + } + ham_dlfunc_list_set_null(); +} + +void ham_protocol_migration_cleanup(void) +{ + if (!migrate_ham()) { + return; + } + ham_protocol_dlfunc_close(); +} \ No newline at end of file diff --git a/migration/ham-protocol.h b/migration/ham-protocol.h new file mode 100644 index 0000000000000000000000000000000000000000..6fb1255f9d9e7655cb85488083410f672b521d1a --- /dev/null +++ b/migration/ham-protocol.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. + * + * Description: Supports VM migration using the HAM protocol. Pre-embed related codes. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + */ +#ifndef HAM_PROTOCOL_H +#define HAM_PROTOCOL_H + +#include "qemu/sockets.h" +#include "exec/memory.h" + +#define HAM_SO_PATH "libham.so" + +void ham_start_outgoing_migration(MigrationState *s, + SocketAddress *saddr, Error **errp); +void ham_start_incoming_migration(SocketAddress *saddr, Error **errp); +int ham_control_save_page(QEMUFile *f, ram_addr_t block_offset, + ram_addr_t offset, size_t size); +int ham_protocol_dlfunc_init(Error **errp); +void ham_protocol_dlfunc_close(void); +void ham_protocol_migration_cleanup(void); + +#endif \ No newline at end of file diff --git a/migration/ham.c b/migration/ham.c new file mode 100644 index 0000000000000000000000000000000000000000..5fe38d098d83f35608e96a209e2ad694ec5d27fc --- /dev/null +++ b/migration/ham.c @@ -0,0 +1,383 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * Description: HAM: Migrate Operations + */ + +#include "qemu/osdep.h" +#include "qemu/cutils.h" +#include "qemu/log.h" +#include "qapi/error.h" +#include "migration.h" +#include "dlfcn.h" +#include "ram.h" +#include "qapi/qapi-commands-migration.h" +#include "exec/ramblock.h" +#include "options.h" +#include "qemu-file.h" +#include "ham.h" + +#define HAM_LIB_PATH "/usr/lib64/libham.so" + +#ifndef DATA_INIT +#define DATA_INIT __attribute__((constructor)) static void +#endif + +static bool g_started = false; + +static void *handle_ham = NULL; + +void (*ham_external_log_set_ptr)(ExternalLog logFunc); +int32_t (*ham_register_ptr)(HamRamInfo *src, HamNumaInfo *dst); +int32_t (*ham_migrate_ptr)(HamRamPages *ramList, size_t ram_num, int32_t step); +void (*ham_unregister_ptr)(void); +int32_t (*ham_rollback_ptr)(pid_t pid); +int32_t (*ham_pgtable_modify_ptr)(bool cacheable); + +typedef struct dl_funcs { + const char *func_name; + void **func; +} dl_funcs; + +static dl_funcs ham_dlfunc_list[] = { + {.func_name = "ubturbo_ham_external_log_set", .func = (void**)&ham_external_log_set_ptr}, + {.func_name = "ubturbo_ham_register", .func = (void**)&ham_register_ptr}, + {.func_name = "ubturbo_ham_migrate", .func = (void**)&ham_migrate_ptr}, + {.func_name = "ubturbo_ham_pgtable_modify", .func = (void**)&ham_pgtable_modify_ptr}, + {.func_name = "ubturbo_ham_unregister", .func = (void**)&ham_unregister_ptr}, + {.func_name = "ubturbo_ham_rollback", .func = (void**)&ham_rollback_ptr}, +}; + +const char *log_level_str[] = { + "DEBUG", + "INFO", + "WARN", + "ERROR" +}; + +static int32_t g_commit_pages = 0; +static HamNumaInfo g_dst = { .num = 0 }; + +bool ham_is_vm_ram(const char *name) +{ + if (!name) { + return false; +#ifdef __aarch64__ + } else if (!strcmp(name, "mach-virt.ram")) { +#else + } else if (!strcmp(name, "pc.ram")) { +#endif + return true; + } else if (strstart(name, "memdimm", NULL)) { + return true; + } else if (strstart(name, "ram-node", NULL)) { + return true; + } else if (strstart(name, "/objects/ram-node", NULL)) { + /* For old board before 'pc-i440fx-3.0', they use canonical path for ramblock-id, + * the prefix of the name of ram-node is /objects/ram-node instead of ram-node. + * Thus, this case is add to address the name which use canonical path. + */ + return true; + } + return false; +} + +static void ham_dlfunc_list_set_null(void) +{ + int num = sizeof(ham_dlfunc_list) / sizeof(ham_dlfunc_list[0]); + for (int i = 0; i < num; i++) { + *ham_dlfunc_list[i].func = NULL; + } +} + +static void ham_dlfunc_close(void) +{ + if (handle_ham) { + (void)dlclose(handle_ham); + handle_ham = NULL; + } + ham_dlfunc_list_set_null(); +} + +static int ham_dlfunc_open(void) +{ + char *error = NULL; + int num = sizeof(ham_dlfunc_list) / sizeof(ham_dlfunc_list[0]); + + ham_dlfunc_list_set_null(); + handle_ham = dlopen(HAM_LIB_PATH, RTLD_LAZY); + if (!handle_ham) { + qemu_log("HAM: dlopen error: %s\n", dlerror()); + return -1; + } + + for (size_t i = 0; i < num; i++) { + *ham_dlfunc_list[i].func = dlsym(handle_ham, ham_dlfunc_list[i].func_name); + if ((error = dlerror()) != NULL) { + qemu_log("HAM: dlsym error: %s while getting %s\n", error, ham_dlfunc_list[i].func_name); + ham_dlfunc_close(); + return -1; + } + } + + return 0; +} + +static int ham_dlfunc_init(void) +{ + int ret; + + ret = ham_dlfunc_open(); + if (ret < 0) { + qemu_log("HAM: open ham dlfunc failed\n"); + return ret; + } + + return 0; +} + +static void ham_external_log(int level, const char *funcname, int linenr, const char *logBuf) +{ + if (level >= HAM_LOG_DEBUG && level <= HAM_LOG_ERROR) { + qemu_log("[%s][%s:%d]:%s", log_level_str[level], funcname, linenr, logBuf); + } else { + qemu_log("[UNKNOWN][%s:%d]:%s", funcname, linenr, logBuf); + } +} + +static int ham_prepare(HamRamInfo *src) +{ + int ret; + + if (g_started) { + qemu_log("HAM: repeat prepare, no need\n"); + return 0; + } + + ret = ham_dlfunc_init(); + if (ret) { + qemu_log("HAM: dlfunc init fail, ret:%d\n", ret); + return ret; + } + + g_commit_pages = 0; + + ham_external_log_set_ptr(ham_external_log); + + ret = ham_register_ptr(src, &g_dst); + if (ret) { + qemu_log("HAM: start migration fail, ret:%d\n", ret); + return ret; + } + + g_started = true; + return 0; +} + +int ham_pages_commit(void) +{ + int ret = ham_migrate_ptr(NULL, 0, g_commit_pages); + if (ret != 0) { + qemu_log("HAM: page migration failed, ret:%d\n", ret); + return ret; + } + + g_commit_pages++; + return 0; +} + +static void ham_cleanup(void) +{ + if (!g_started) { + return; + } + + g_started = false; + ham_unregister_ptr(); + ham_dlfunc_close(); +} + +static int ham_pages_rollback(void) +{ + pid_t pid = getpid(); + int ret = ham_dlfunc_init(); + if (ret) { + return ret; + } + + ret = ham_rollback_ptr(pid); + ham_dlfunc_close(); + return ret; +} + +static HamRamInfo g_src_ram = { .num = 0 }; + +static int ham_init_ram_blocks(HamRamInfo *ram_info) +{ + RAMBlock *ram_block = NULL; + uint32_t uuid = 0; + + ram_info->pid = getpid(); + ram_info->num = 0; + WITH_RCU_READ_LOCK_GUARD() { + RAMBLOCK_FOREACH_MIGRATABLE(ram_block) { + if (!ham_is_vm_ram(ram_block->idstr)) { + continue; + } + if (uuid >= BATCH_NUM) { + qemu_log("HAM: ram block num exceeds, limit:%u\n", BATCH_NUM); + return -E2BIG; + } + ram_info->blockList[ram_info->num].uuid = uuid++; + ram_info->blockList[ram_info->num].hva = (uintptr_t)ram_block->host; + ram_info->blockList[ram_info->num].size = ram_block->used_length; + ram_info->num++; + } + } + return 0; +} + +void ham_migrate_prepare(MigrationState *s) +{ + Error *err = NULL; + int ret; + + if (!migrate_use_ldst()) { + return; + } + + ret = ham_init_ram_blocks(&g_src_ram); + if (ret) { + error_setg(&err, "init ram block fail, ret:%d", ret); + goto fail; + } + + ret = ham_prepare(&g_src_ram); + if (ret) { + error_setg(&err, "migrate ham prepare fail, ret:%d", ret); + goto fail; + } + return; + +fail: + migrate_set_error(s, err); + error_report_err(err); + qemu_file_set_error(s->to_dst_file, ret); +} + +void ham_migrate_cleanup(void) +{ + if (!migrate_use_ldst()) { + return; + } + + ham_cleanup(); +} + +void ham_madvise_page(void) +{ + RAMBlock *ram_block; + int64_t start, end; + + start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + WITH_RCU_READ_LOCK_GUARD() { + RAMBLOCK_FOREACH_MIGRATABLE(ram_block) { + if (!ham_is_vm_ram(ram_block->idstr)) { + continue; + } + madvise(ram_block->host, ram_block->used_length, MADV_POPULATE_WRITE); + } + } + end = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + qemu_log("HAM: madvise cost time:%ld ms\n", end - start); +} + +static int ham_modify_pgtable(void) +{ + HamRamInfo ramInfo = { .num = 0 }; + + int ret = ham_dlfunc_init(); + if (ret) { + return ret; + } + ret = ham_init_ram_blocks(&ramInfo); + if (ret) { + goto close_dlfunc; + } + ret = ham_register_ptr(&ramInfo, NULL); + if (ret) { + qemu_log("HAM: start migration fail, ret:%d\n", ret); + goto stop_mig; + } + ret = ham_pgtable_modify_ptr(true); + if (ret) { + qemu_log("HAM: modify pgtable fail, ret:%d\n", ret); + } + +stop_mig: + ham_unregister_ptr(); +close_dlfunc: + ham_dlfunc_close(); + return ret; +} + +struct VmInfo *qmp_query_ramblock(Error **errp) +{ + RAMBlock *ram_block = NULL; + VmInfo *vm_info = g_new0(VmInfo, 1); + RamInfo *info = NULL; + RamInfoList *head = NULL, **tail = &head; + uint32_t uuid = 0; + WITH_RCU_READ_LOCK_GUARD() { + RAMBLOCK_FOREACH_MIGRATABLE(ram_block) { + if (!ham_is_vm_ram(ram_block->idstr)) { + continue; + } + if (uuid >= BATCH_NUM) { + error_setg(errp, "Ram block num exceeds, limit:%u", BATCH_NUM); + return NULL; + } + info = g_malloc0(sizeof(*info)); + info->uuid = uuid++; + info->hva = (uintptr_t)ram_block->host; + info->size = ram_block->used_length; + qemu_log("HAM: uuid:%d hva:%lu size:%lu\n", info->uuid, info->hva, info->size); + QAPI_LIST_APPEND(tail, info); + } + } + vm_info->pid = getpid(); + vm_info->block = head; + qemu_log("HAM: pid = %ld\n", vm_info->pid); + return vm_info; +} + +void qmp_recv_rmtnuma(int64_t pid, uint16_t scna, NumaInfoList *numa_info, Error **errp) +{ + int num = 0; + g_dst.pid = pid; + g_src_ram.scna = scna; + while (numa_info != NULL) { + g_dst.numaList[num].numaId = numa_info->value->numa_id; + g_dst.numaList[num].size = numa_info->value->size; + num++; + numa_info = numa_info->next; + } + g_dst.num = num; +} + +void qmp_rollback_pages(Error **errp) +{ + if (ham_pages_rollback()) { + error_setg(errp, "rollback pages failed"); + return; + } + qemu_log("HAM: completed rollback pages\n"); +} + +void qmp_modify_pgtable(Error **errp) +{ + if (ham_modify_pgtable()) { + error_setg(errp, "modify pgtable failed"); + return; + } + qemu_log("HAM: completed modify pgtable\n"); +} \ No newline at end of file diff --git a/migration/ham.h b/migration/ham.h new file mode 100644 index 0000000000000000000000000000000000000000..fe38f5a0b9413f784d072c2702e3ce349181a5c5 --- /dev/null +++ b/migration/ham.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * Description: HAM: Migrate Operations + */ + +#ifndef HAM_H +#define HAM_H + +#include +#include +#include +#include +#include "qapi/qapi-types-migration.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define PAGE_SIZE_2M_SHIFT 21 +#define PAGE_SHIFT_2MTO4K 9 +#define BATCH_NUM 4 + +typedef enum { + HAM_LOG_DEBUG = 0, + HAM_LOG_INFO, + HAM_LOG_WARNING, + HAM_LOG_ERROR, + HAM_LOG_BUTT +} HamLogLevel; + +typedef struct { + uint32_t uuid; + uintptr_t hva; + size_t size; +} HamRamBlock; + +typedef struct { + pid_t pid; + uint16_t scna; + uint32_t num; + HamRamBlock blockList[BATCH_NUM]; +} HamRamInfo; + +typedef struct { + uint32_t numaId; + size_t size; +} HamNuma; + +typedef struct { + pid_t pid; + uint32_t num; + HamNuma numaList[BATCH_NUM]; +} HamNumaInfo; + +typedef struct { + int32_t uuid; + size_t hvaNum; + uintptr_t *hvaList; +} HamRamPages; + +typedef enum { + HAM_MIGRATE_PRECOPY = 0, + HAM_MIGRATE_COMPLETION = 1, + HAM_MIGRATE_BUTT +} HamMigrateStep; + +typedef void (*ExternalLog)(int level, const char *funcname, int linenr, const char *logBuf); + +int ham_pages_commit(void); + +void ham_migrate_prepare(MigrationState *s); + +void ham_migrate_cleanup(void); + +bool ham_is_vm_ram(const char *name); + +void ham_madvise_page(void); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/migration/meson.build b/migration/meson.build index aba2581705cb53de18765bbe854926a58a2c21cc..af3b86d4fcee2fe11b64352d69ca35bdfe8cbe0a 100644 --- a/migration/meson.build +++ b/migration/meson.build @@ -29,6 +29,8 @@ system_ss.add(files( 'socket.c', 'tls.c', 'threadinfo.c', + 'ham.c', + 'ham-protocol.c', ), gnutls) if get_option('replication').allowed() diff --git a/migration/migration.c b/migration/migration.c index 91b2267c3f86cc5e6892f1a1d368710cf12b21e0..fdd9d03172f9fbb1c9f8f138f756f9921bdcf34c 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -71,6 +71,8 @@ #include "qemu/log-for-trace.h" #include "sysemu/kvm.h" #endif +#include "ham.h" +#include "ham-protocol.h" #define DEFAULT_FD_MAX 4096 @@ -516,6 +518,16 @@ bool migrate_uri_parse(const char *uri, MigrationChannel **channel, errp)) { return false; } + } else if (strstart(uri, "ham:", NULL)) { + addr->transport = MIGRATION_ADDRESS_TYPE_HAM; + SocketAddress *saddr = socket_parse(uri, errp); + if (!saddr) { + return false; + } + addr->u.socket.type = saddr->type; + addr->u.socket.u = saddr->u; + /* Don't free the objects inside; their ownership moved to "addr" */ + g_free(saddr); } else { error_setg(errp, "unknown migration protocol: %s", uri); return false; @@ -599,6 +611,8 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels, exec_start_incoming_migration(addr->u.exec.args, errp); } else if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) { file_start_incoming_migration(&addr->u.file, errp); + } else if (addr->transport == MIGRATION_ADDRESS_TYPE_HAM) { + ham_start_incoming_migration(&addr->u.socket, errp); } else { error_setg(errp, "unknown migration protocol: %s", uri); } @@ -1621,6 +1635,8 @@ int migrate_init(MigrationState *s, Error **errp) s->threshold_size = 0; s->switchover_acked = false; s->rdma_migration = false; + s->ham_migration = false; + s->iteration_num = 0; /* * set mig_stats memory to zero for a new migration */ @@ -2046,6 +2062,8 @@ void qmp_migrate(const char *uri, bool has_channels, exec_start_outgoing_migration(s, addr->u.exec.args, &local_err); } else if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) { file_start_outgoing_migration(s, &addr->u.file, &local_err); + } else if (addr->transport == MIGRATION_ADDRESS_TYPE_HAM) { + ham_start_outgoing_migration(s, &addr->u.socket, &local_err); } else { error_setg(&local_err, QERR_INVALID_PARAMETER_VALUE, "uri", "a valid migration protocol"); @@ -3144,7 +3162,8 @@ static MigIterateState migration_iteration_run(MigrationState *s) trace_migrate_pending_exact(pending_size, must_precopy, can_postcopy); } - if ((!pending_size || pending_size < s->threshold_size) && can_switchover) { + if (((!pending_size || pending_size < s->threshold_size) && can_switchover) || + (migrate_use_ldst() && s->iteration_num >= 1)) { trace_migration_thread_low_pending(pending_size); migration_completion(s); return MIG_ITERATE_BREAK; @@ -3160,6 +3179,7 @@ static MigIterateState migration_iteration_run(MigrationState *s) return MIG_ITERATE_SKIP; } + s->iteration_num++; /* Just another iteration step */ qemu_savevm_state_iterate(s->to_dst_file, in_postcopy); return MIG_ITERATE_RESUME; @@ -3365,6 +3385,8 @@ static void *migration_thread(void *opaque) qemu_savevm_state_header(s->to_dst_file); qemu_mutex_unlock_iothread(); + ham_migrate_prepare(s); + /* * If we opened the return path, we need to make sure dst has it * opened as well. @@ -3438,6 +3460,7 @@ out: object_unref(OBJECT(s)); rcu_unregister_thread(); migration_threads_remove(thread); + ham_migrate_cleanup(); return NULL; } diff --git a/migration/migration.h b/migration/migration.h index 66fe4dd799f9d1c10c7ea17737eeac990ebae283..e6f56c18b3ecf2e0946b08c314b94f8f246bca7c 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -470,6 +470,10 @@ struct MigrationState { bool switchover_acked; /* Is this a rdma migration */ bool rdma_migration; + /* Is this a ham migration */ + bool ham_migration; + /* Number of migration iterations */ + uint64_t iteration_num; }; void migrate_set_state(int *state, int old_state, int new_state); diff --git a/migration/options.c b/migration/options.c index 136a8575df12899ed0e7120ae63a19fd81cb3d17..28f2166eb88c38fc72d71cc2363abf0f6de11d05 100644 --- a/migration/options.c +++ b/migration/options.c @@ -227,6 +227,7 @@ Property migration_properties[] = { DEFINE_PROP_MIG_CAP("x-switchover-ack", MIGRATION_CAPABILITY_SWITCHOVER_ACK), DEFINE_PROP_MIG_CAP("x-dirty-limit", MIGRATION_CAPABILITY_DIRTY_LIMIT), + DEFINE_PROP_MIG_CAP("x-ldst", MIGRATION_CAPABILITY_LDST), DEFINE_PROP_END_OF_LIST(), }; @@ -335,6 +336,20 @@ bool migrate_postcopy_ram(void) return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; } +bool migrate_use_ldst(void) +{ + MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_LDST]; +} + +bool migrate_ham(void) +{ + MigrationState *s = migrate_get_current(); + + return s->ham_migration; +} + bool migrate_rdma_pin_all(void) { MigrationState *s = migrate_get_current(); @@ -353,6 +368,9 @@ bool migrate_return_path(void) { MigrationState *s = migrate_get_current(); + if (migrate_use_ldst()) { + return false; + } return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; } @@ -475,7 +493,8 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, MIGRATION_CAPABILITY_XBZRLE, MIGRATION_CAPABILITY_X_COLO, MIGRATION_CAPABILITY_VALIDATE_UUID, - MIGRATION_CAPABILITY_ZERO_COPY_SEND); + MIGRATION_CAPABILITY_ZERO_COPY_SEND, + MIGRATION_CAPABILITY_LDST); static bool migrate_incoming_started(void) { diff --git a/migration/options.h b/migration/options.h index 6b2a893217f0458018d70a17d7655c86f396a03e..4bfdb790f23673d7573ec90088f961cd0c159bf6 100644 --- a/migration/options.h +++ b/migration/options.h @@ -46,6 +46,8 @@ bool migrate_validate_uuid(void); bool migrate_xbzrle(void); bool migrate_zero_blocks(void); bool migrate_zero_copy_send(void); +bool migrate_use_ldst(void); +bool migrate_ham(void); /* * pseudo capabilities diff --git a/migration/ram.c b/migration/ram.c index b46de7cd6d3596a04bd49be67053e96d871db04c..79f84ec8c917e1ab90435f30d86cbbecdb7272e3 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -65,6 +65,8 @@ #include "sysemu/dirtylimit.h" #include "sysemu/kvm.h" #include "exec/confidential-guest-support.h" +#include "ham.h" +#include "ham-protocol.h" /* Defines RAM_SAVE_ENCRYPTED_PAGE and RAM_SAVE_SHARED_REGION_LIST */ #include "target/i386/sev.h" @@ -1075,6 +1077,12 @@ static void migration_bitmap_sync(RAMState *rs, bool last_stage) qemu_mutex_lock(&rs->bitmap_mutex); WITH_RCU_READ_LOCK_GUARD() { RAMBLOCK_FOREACH_NOT_IGNORED(block) { + if (migrate_use_ldst() && ham_is_vm_ram(block->idstr)) { + unsigned long *bmap = block->bmap; + memset(bmap, 0, sizeof(unsigned long)); + bmap[0] = ~0UL; + continue; + } ramblock_sync_dirty_bitmap(rs, block); } stat64_set(&mig_stats.dirty_bytes_last_sync, ram_bytes_remaining()); @@ -1191,8 +1199,13 @@ static bool control_save_page(PageSearchStatus *pss, { int ret; - ret = rdma_control_save_page(pss->pss_channel, pss->block->offset, offset, - TARGET_PAGE_SIZE); + if (migrate_ham()) { + ret = ham_control_save_page(pss->pss_channel, pss->block->offset, offset, + TARGET_PAGE_SIZE); + } else { + ret = rdma_control_save_page(pss->pss_channel, pss->block->offset, offset, + TARGET_PAGE_SIZE); + } if (ret == RAM_SAVE_CONTROL_NOT_SUPP) { return false; } @@ -1467,6 +1480,9 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss) /* Update pss->page for the next dirty bit in ramblock */ pss_find_next_dirty(pss); + if (migrate_use_ldst() && pss->complete_round) { + return PAGE_ALL_CLEAN; + } if (pss->complete_round && pss->block == rs->last_seen_block && pss->page >= rs->last_page) { /* @@ -2591,6 +2607,21 @@ static int ram_save_csv3_pages(RAMState *rs, PageSearchStatus *pss) return pages; } +static int ram_ham_migrate_page(PageSearchStatus *pss) +{ + int pages = 0; + int ret; + + pss->page += (pss->block->used_length >> TARGET_PAGE_BITS); + pages += (pss->block->used_length >> TARGET_PAGE_BITS); + ret = ham_pages_commit(); + if (ret) { + return ret; + } + + return pages; +} + /** * ram_save_host_page: save a whole host page * @@ -2644,6 +2675,13 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss) /* Update host page boundary information */ pss_host_page_prepare(pss); + if (migrate_use_ldst() && ham_is_vm_ram(pss->block->idstr)) { + pages = ram_ham_migrate_page(pss); + if (pages < 0) { + return pages; + } + goto completed; + } do { page_dirty = migration_bitmap_clear_dirty(rs, pss->block, pss->page); @@ -2683,6 +2721,7 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss) pss_find_next_dirty(pss); } while (pss_within_range(pss)); +completed: pss_host_page_finish(pss); res = ram_save_release_protection(rs, pss, start_page); @@ -3549,6 +3588,9 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) compress_flush_data(); } + if (migrate_use_ldst()) { + continue; + } /* * we want to check in the 1st loop, just in case it was the 1st * time and we had to sync the dirty bitmap. diff --git a/migration/savevm.c b/migration/savevm.c index bde05eebe4754feb537e7b23de0bee0e48f89cd7..fd944ca3ef1f3819ff49b61cfab4499680daeef3 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -70,6 +70,7 @@ #include "yank_functions.h" #include "sysemu/qtest.h" #include "options.h" +#include "ham.h" const unsigned int postcopy_ram_discard_version; @@ -2766,6 +2767,10 @@ static int qemu_loadvm_state_setup(QEMUFile *f) return ret; } } + + if (migrate_use_ldst()) { + ham_madvise_page(); + } return 0; } diff --git a/qapi/migration.json b/qapi/migration.json index 37e1d4857e5cef6b44d9eeea54db56daf97e9015..b3829c46cf208ff8470ecafeb311d1463f4a5ae2 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -461,6 +461,10 @@ # and target or migration will not even start. NOTE: If the # migration fails during postcopy the VM will fail. (since 2.6) # +# @ldst: If enabled, the memory of virtual machine will be migrated +# to the borrowed memory via migrate_pages, completing the live +# migration of the virtual machine. (Since 8.2) +# # @x-colo: If enabled, migration will never end, and the state of the # VM on the primary side will be migrated continuously to the VM # on secondary side, this process is called COarse-Grain LOck @@ -546,7 +550,7 @@ { 'enum': 'MigrationCapability', 'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks', { 'name': 'compress', 'features': [ 'deprecated' ] }, - 'events', 'postcopy-ram', + 'events', 'postcopy-ram', 'ldst', { 'name': 'x-colo', 'features': [ 'unstable' ] }, 'release-ram', { 'name': 'block', 'features': [ 'deprecated' ] }, @@ -609,7 +613,8 @@ # {"state": false, "capability": "compress"}, # {"state": true, "capability": "events"}, # {"state": false, "capability": "postcopy-ram"}, -# {"state": false, "capability": "x-colo"} +# {"state": false, "capability": "x-colo"}, +# {"state": false, "capability": "ldst"} # ]} ## { 'command': 'query-migrate-capabilities', 'returns': ['MigrationCapabilityStatus']} @@ -1763,10 +1768,12 @@ # # @file: Direct the migration stream to a file. # +# @ham: Migration via ham. +# # Since 8.2 ## { 'enum': 'MigrationAddressType', - 'data': [ 'socket', 'exec', 'rdma', 'file' ] } + 'data': [ 'socket', 'exec', 'rdma', 'file', 'ham' ] } ## # @FileMigrationArgs: @@ -1805,7 +1812,8 @@ 'socket': 'SocketAddress', 'exec': 'MigrationExecCommand', 'rdma': 'InetSocketAddress', - 'file': 'FileMigrationArgs' } } + 'file': 'FileMigrationArgs', + 'ham': 'SocketAddress' } } ## # @MigrationChannelType: @@ -2749,3 +2757,115 @@ 'data': { 'job-id': 'str', 'tag': 'str', 'devices': ['str'] } } + +## +# @RamInfo: +# +# Specifies the memory address segment of the VM. +# +# @uuid: indicates the NUMA index of the VM. +# +# @hva: indicates the start address of each NUMA segment of a VM. +# +# @size: size of each NUMA segment on a VM. +# +# Since: 8.2 +## +{ 'struct': 'RamInfo', +'data': { 'uuid': 'uint32', +'hva': 'uint64', +'size': 'size' } } + +## +# @VmInfo: +# +# Basic VM information. +# +# @pid: pid of the VM. +# +# @block: specifies the memory address segment of the VM. +# +# Since: 8.2 +## +{ 'struct': 'VmInfo', +'data': { 'pid': 'int', +'block': ['RamInfo'] } } + +## +# @query-ramblock: +# +# Returns info of RAM blocks in the VM. +# +# Example: +# -> {"execute": "query-ramblock"} +# <- {"return": {"block": [{"hva": 281470478319616, "uuid": 0, "size": 1073741824}, +# {"hva": 281469402480640, "uuid": 1, "size": 1073741824}], +# "pid": 25643}} +# +# Since: 8.2 +## +{ 'command': 'query-ramblock', 'returns': 'VmInfo' } + +## +# @NumaInfo: +# +# NUMA information reported by the remote memory. +# +# @numa-id: NUMA id reported by the remote memory. +# +# @size: NUMA size reported by the remote memory. +# +# Since: 8.2 +## +{ 'struct': 'NumaInfo', +'data': { 'numa-id': 'uint32', +'size': 'size' } } + +## +# @recv-rmtnuma: +# +# Returns info of RAM blocks in the VM. +# +# Example: +# -> { "execute": "recv-rmtnuma", +# "arguments": { +# "pid": 25643, +# 'scna': 1, +# "block":[{"numa-id": 5, "size": 1073741824}, +# {"numa-id": 6, "size": 1073741824}] +# } +# } +# <- { "return": { } } +# +# Since: 8.2 +## +{ 'command': 'recv-rmtnuma', +'data': { 'pid': 'int', +'scna': 'uint16', +'block': ['NumaInfo'] } } + +## +# @rollback-pages: +# +# Roll back migrated pages when the migration fails. +# +# Example: +# -> { "execute": "rollback-pages" } +# <- { "return": { } } +# +# Since: 8.2 +## +{ 'command': 'rollback-pages' } + +## +# @modify-pgtable: +# +# The attribute of the VM memory page table is changed from invalid to cacheable. +# +# Example: +# -> { "execute": "modify-pgtable" } +# <- { "return": { } } +# +# Since: 8.2 +## +{ 'command': 'modify-pgtable' } \ No newline at end of file diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c index 83e84b118694fab100895df49557d38243323e22..531962aa9e1babb28c7df942bc682733b2d15637 100644 --- a/util/qemu-sockets.c +++ b/util/qemu-sockets.c @@ -1117,7 +1117,7 @@ SocketAddress *socket_parse(const char *str, Error **errp) if (vsock_parse(&addr->u.vsock, str + strlen("vsock:"), errp)) { goto fail; } - } else if (strstart(str, "tcp:", NULL)) { + } else if (strstart(str, "tcp:", NULL) || strstart(str, "ham:", NULL)) { addr->type = SOCKET_ADDRESS_TYPE_INET; if (inet_parse(&addr->u.inet, str + strlen("tcp:"), errp)) { goto fail;