diff --git a/include/migration/cpr.h b/include/migration/cpr.h index c669b8b8a3..3a6deb7933 100644 --- a/include/migration/cpr.h +++ b/include/migration/cpr.h @@ -10,6 +10,8 @@ #include "qapi/qapi-types-migration.h" +#define MIG_MODE_NONE -1 + #define QEMU_CPR_FILE_MAGIC 0x51435052 #define QEMU_CPR_FILE_VERSION 0x00000001 @@ -17,6 +19,9 @@ void cpr_save_fd(const char *name, int id, int fd); void cpr_delete_fd(const char *name, int id); int cpr_find_fd(const char *name, int id); +MigMode cpr_get_incoming_mode(void); +void cpr_set_incoming_mode(MigMode mode); + int cpr_state_save(MigrationChannel *channel, Error **errp); int cpr_state_load(MigrationChannel *channel, Error **errp); void cpr_state_close(void); diff --git a/migration/cpr.c b/migration/cpr.c index 87bcfdb5ff..584b0b98f7 100644 --- a/migration/cpr.c +++ b/migration/cpr.c @@ -45,7 +45,7 @@ static const VMStateDescription vmstate_cpr_fd = { VMSTATE_UINT32(namelen, CprFd), VMSTATE_VBUFFER_ALLOC_UINT32(name, CprFd, 0, NULL, namelen), VMSTATE_INT32(id, CprFd), - VMSTATE_INT32(fd, CprFd), + VMSTATE_FD(fd, CprFd), VMSTATE_END_OF_LIST() } }; @@ -116,6 +116,18 @@ QIOChannel *cpr_state_ioc(void) return qemu_file_get_ioc(cpr_state_file); } +static MigMode incoming_mode = MIG_MODE_NONE; + +MigMode cpr_get_incoming_mode(void) +{ + return incoming_mode; +} + +void cpr_set_incoming_mode(MigMode mode) +{ + incoming_mode = mode; +} + int cpr_state_save(MigrationChannel *channel, Error **errp) { int ret; @@ -124,8 +136,14 @@ int cpr_state_save(MigrationChannel *channel, Error **errp) trace_cpr_state_save(MigMode_str(mode)); - /* set f based on mode in a later patch in this series */ - return 0; + if (mode == MIG_MODE_CPR_TRANSFER) { + f = cpr_transfer_output(channel, errp); + } else { + return 0; + } + if (!f) { + return -1; + } qemu_put_be32(f, QEMU_CPR_FILE_MAGIC); qemu_put_be32(f, QEMU_CPR_FILE_VERSION); @@ -155,8 +173,16 @@ int cpr_state_load(MigrationChannel *channel, Error **errp) QEMUFile *f; MigMode mode = 0; - /* set f and mode based on other parameters later in this patch series */ - return 0; + if (channel) { + mode = MIG_MODE_CPR_TRANSFER; + cpr_set_incoming_mode(mode); + f = cpr_transfer_input(channel, errp); + } else { + return 0; + } + if (!f) { + return -1; + } trace_cpr_state_load(MigMode_str(mode)); diff --git a/migration/migration.c b/migration/migration.c index 5f2540fac3..88b09914ec 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -77,6 +77,7 @@ static NotifierWithReturnList migration_state_notifiers[] = { NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_NORMAL), NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_CPR_REBOOT), + NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_CPR_TRANSFER), }; /* Messages sent on the return path from destination to source */ @@ -110,6 +111,7 @@ static int migration_maybe_pause(MigrationState *s, static void migrate_fd_cancel(MigrationState *s); static bool close_return_path_on_source(MigrationState *s); static void migration_completion_end(MigrationState *s); +static void migrate_hup_delete(MigrationState *s); static void migration_downtime_start(MigrationState *s) { @@ -220,6 +222,12 @@ migration_channels_and_transport_compatible(MigrationAddress *addr, return false; } + if (migrate_mode() == MIG_MODE_CPR_TRANSFER && + addr->transport == MIGRATION_ADDRESS_TYPE_FILE) { + error_setg(errp, "Migration requires streamable transport (eg unix)"); + return false; + } + return true; } @@ -435,6 +443,7 @@ void migration_incoming_state_destroy(void) mis->postcopy_qemufile_dst = NULL; } + cpr_set_incoming_mode(MIG_MODE_NONE); yank_unregister_instance(MIGRATION_YANK_INSTANCE); } @@ -747,6 +756,9 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels, } else { error_setg(errp, "unknown migration protocol: %s", uri); } + + /* Close cpr socket to tell source that we are listening */ + cpr_state_close(); } static void process_incoming_migration_bh(void *opaque) @@ -1423,6 +1435,8 @@ static void migrate_fd_cleanup(MigrationState *s) s->vmdesc = NULL; qemu_savevm_state_cleanup(); + cpr_state_close(); + migrate_hup_delete(s); close_return_path_on_source(s); @@ -1534,6 +1548,7 @@ static void migrate_fd_error(MigrationState *s, const Error *error) static void migrate_fd_cancel(MigrationState *s) { int old_state ; + bool setup = (s->state == MIGRATION_STATUS_SETUP); trace_migrate_fd_cancel(); @@ -1568,6 +1583,17 @@ static void migrate_fd_cancel(MigrationState *s) } } } + + /* + * If qmp_migrate_finish has not been called, then there is no path that + * will complete the cancellation. Do it now. + */ + if (setup && !s->to_dst_file) { + migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING, + MIGRATION_STATUS_CANCELLED); + cpr_state_close(); + migrate_hup_delete(s); + } } void migration_add_notifier_mode(NotifierWithReturn *notify, @@ -1665,7 +1691,9 @@ bool migration_thread_is_self(void) bool migrate_mode_is_cpr(MigrationState *s) { - return s->parameters.mode == MIG_MODE_CPR_REBOOT; + MigMode mode = s->parameters.mode; + return mode == MIG_MODE_CPR_REBOOT || + mode == MIG_MODE_CPR_TRANSFER; } int migrate_init(MigrationState *s, Error **errp) @@ -2046,6 +2074,40 @@ static bool migrate_prepare(MigrationState *s, bool resume, Error **errp) return true; } +static void qmp_migrate_finish(MigrationAddress *addr, bool resume_requested, + Error **errp); + +static void migrate_hup_add(MigrationState *s, QIOChannel *ioc, GSourceFunc cb, + void *opaque) +{ + s->hup_source = qio_channel_create_watch(ioc, G_IO_HUP); + g_source_set_callback(s->hup_source, cb, opaque, NULL); + g_source_attach(s->hup_source, NULL); +} + +static void migrate_hup_delete(MigrationState *s) +{ + if (s->hup_source) { + g_source_destroy(s->hup_source); + g_source_unref(s->hup_source); + s->hup_source = NULL; + } +} + +static gboolean qmp_migrate_finish_cb(QIOChannel *channel, + GIOCondition cond, + void *opaque) +{ + MigrationAddress *addr = opaque; + + qmp_migrate_finish(addr, false, NULL); + + cpr_state_close(); + migrate_hup_delete(migrate_get_current()); + qapi_free_MigrationAddress(addr); + return G_SOURCE_REMOVE; +} + void qmp_migrate(const char *uri, bool has_channels, MigrationChannelList *channels, bool has_detach, bool detach, bool has_resume, bool resume, Error **errp) @@ -2056,6 +2118,7 @@ void qmp_migrate(const char *uri, bool has_channels, g_autoptr(MigrationChannel) channel = NULL; MigrationAddress *addr = NULL; MigrationChannel *channelv[MIGRATION_CHANNEL_TYPE__MAX] = { NULL }; + MigrationChannel *cpr_channel = NULL; /* * Having preliminary checks for uri and channel @@ -2076,6 +2139,7 @@ void qmp_migrate(const char *uri, bool has_channels, } channelv[type] = channels->value; } + cpr_channel = channelv[MIGRATION_CHANNEL_TYPE_CPR]; addr = channelv[MIGRATION_CHANNEL_TYPE_MAIN]->addr; if (!addr) { error_setg(errp, "Channel list has no main entry"); @@ -2096,12 +2160,52 @@ void qmp_migrate(const char *uri, bool has_channels, return; } + if (s->parameters.mode == MIG_MODE_CPR_TRANSFER && !cpr_channel) { + error_setg(errp, "missing 'cpr' migration channel"); + return; + } + resume_requested = has_resume && resume; if (!migrate_prepare(s, resume_requested, errp)) { /* Error detected, put into errp */ return; } + if (cpr_state_save(cpr_channel, &local_err)) { + goto out; + } + + /* + * For cpr-transfer, the target may not be listening yet on the migration + * channel, because first it must finish cpr_load_state. The target tells + * us it is listening by closing the cpr-state socket. Wait for that HUP + * event before connecting in qmp_migrate_finish. + * + * The HUP could occur because the target fails while reading CPR state, + * in which case the target will not listen for the incoming migration + * connection, so qmp_migrate_finish will fail to connect, and then recover. + */ + if (s->parameters.mode == MIG_MODE_CPR_TRANSFER) { + migrate_hup_add(s, cpr_state_ioc(), (GSourceFunc)qmp_migrate_finish_cb, + QAPI_CLONE(MigrationAddress, addr)); + + } else { + qmp_migrate_finish(addr, resume_requested, errp); + } + +out: + if (local_err) { + migrate_fd_error(s, local_err); + error_propagate(errp, local_err); + } +} + +static void qmp_migrate_finish(MigrationAddress *addr, bool resume_requested, + Error **errp) +{ + MigrationState *s = migrate_get_current(); + Error *local_err = NULL; + if (!resume_requested) { if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) { return; diff --git a/migration/migration.h b/migration/migration.h index 1d4d4e910d..fb1b8f99d3 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -468,6 +468,8 @@ struct MigrationState { bool switchover_acked; /* Is this a rdma migration */ bool rdma_migration; + + GSource *hup_source; }; void migrate_set_state(MigrationStatus *state, MigrationStatus old_state, diff --git a/migration/options.c b/migration/options.c index b8d5300326..1ad950e397 100644 --- a/migration/options.c +++ b/migration/options.c @@ -22,6 +22,7 @@ #include "qapi/qmp/qnull.h" #include "system/runstate.h" #include "migration/colo.h" +#include "migration/cpr.h" #include "migration/misc.h" #include "migration.h" #include "migration-stats.h" @@ -745,8 +746,11 @@ uint64_t migrate_max_postcopy_bandwidth(void) MigMode migrate_mode(void) { - MigrationState *s = migrate_get_current(); - MigMode mode = s->parameters.mode; + MigMode mode = cpr_get_incoming_mode(); + + if (mode == MIG_MODE_NONE) { + mode = migrate_get_current()->parameters.mode; + } assert(mode >= 0 && mode < MIG_MODE__MAX); return mode; diff --git a/migration/ram.c b/migration/ram.c index ce28328141..5aace00bf1 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -195,7 +195,9 @@ static bool postcopy_preempt_active(void) bool migrate_ram_is_ignored(RAMBlock *block) { + MigMode mode = migrate_mode(); return !qemu_ram_is_migratable(block) || + mode == MIG_MODE_CPR_TRANSFER || (migrate_ignore_shared() && qemu_ram_is_shared(block) && qemu_ram_is_named_file(block)); } diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c index 0319c3568b..741a588b7e 100644 --- a/migration/vmstate-types.c +++ b/migration/vmstate-types.c @@ -15,6 +15,7 @@ #include "qemu-file.h" #include "migration.h" #include "migration/vmstate.h" +#include "migration/client-options.h" #include "qemu/error-report.h" #include "qemu/queue.h" #include "trace.h" diff --git a/qapi/migration.json b/qapi/migration.json index a605dc26db..4679ce9f2a 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -614,9 +614,48 @@ # or COLO. # # (since 8.2) +# +# @cpr-transfer: This mode allows the user to transfer a guest to a +# new QEMU instance on the same host with minimal guest pause +# time by preserving guest RAM in place. Devices and their pinned +# pages will also be preserved in a future QEMU release. +# +# The user starts new QEMU on the same host as old QEMU, with +# command-line arguments to create the same machine, plus the +# -incoming option for the main migration channel, like normal +# live migration. In addition, the user adds a second -incoming +# option with channel type "cpr". This CPR channel must support +# file descriptor transfer with SCM_RIGHTS, i.e. it must be a +# UNIX domain socket. +# +# To initiate CPR, the user issues a migrate command to old QEMU, +# adding a second migration channel of type "cpr" in the channels +# argument. Old QEMU stops the VM, saves state to the migration +# channels, and enters the postmigrate state. Execution resumes +# in new QEMU. +# +# New QEMU reads the CPR channel before opening a monitor, hence +# the CPR channel cannot be specified in the list of channels for +# a migrate-incoming command. It may only be specified on the +# command line. +# +# The main channel address cannot be a file type, and for an +# inet socket, the port cannot be 0 (meaning dynamically choose +# a port). +# +# Memory-backend objects must have the share=on attribute, but +# memory-backend-epc is not supported. The VM must be started +# with the '-machine aux-ram-share=on' option. +# +# When using -incoming defer, you must issue the migrate command +# to old QEMU before issuing any monitor commands to new QEMU. +# However, new QEMU does not open and read the migration stream +# until you issue the migrate incoming command. +# +# (since 10.0) ## { 'enum': 'MigMode', - 'data': [ 'normal', 'cpr-reboot' ] } + 'data': [ 'normal', 'cpr-reboot', 'cpr-transfer' ] } ## # @ZeroPageDetection: @@ -1578,11 +1617,12 @@ # The migration channel-type request options. # # @main: Main outbound migration channel. +# @cpr: Checkpoint and restart state channel. # # Since: 8.1 ## { 'enum': 'MigrationChannelType', - 'data': [ 'main' ] } + 'data': [ 'main', 'cpr' ] } ## # @MigrationChannel: diff --git a/qemu-options.hx b/qemu-options.hx index 3d1af7325b..d19bf533d6 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -112,6 +112,8 @@ SRST specified on the command line, or implicitly created by the -m command line option. The default is off. + To use the cpr-transfer migration mode, you must set aux-ram-share=on. + ``memory-backend='id'`` An alternative to legacy ``-mem-path`` and ``mem-prealloc`` options. Allows to use a memory backend as main RAM. diff --git a/stubs/vmstate.c b/stubs/vmstate.c index 8513d9204e..c190762d7c 100644 --- a/stubs/vmstate.c +++ b/stubs/vmstate.c @@ -1,5 +1,7 @@ #include "qemu/osdep.h" #include "migration/vmstate.h" +#include "qapi/qapi-types-migration.h" +#include "migration/client-options.h" int vmstate_register_with_alias_id(VMStateIf *obj, uint32_t instance_id, @@ -21,3 +23,8 @@ bool vmstate_check_only_migratable(const VMStateDescription *vmsd) { return true; } + +MigMode migrate_mode(void) +{ + return MIG_MODE_NORMAL; +} diff --git a/system/vl.c b/system/vl.c index 504f05b954..db8e604eba 100644 --- a/system/vl.c +++ b/system/vl.c @@ -77,6 +77,7 @@ #include "hw/block/block.h" #include "hw/i386/x86.h" #include "hw/i386/pc.h" +#include "migration/cpr.h" #include "migration/misc.h" #include "migration/snapshot.h" #include "system/tpm.h" @@ -3706,6 +3707,12 @@ void qemu_init(int argc, char **argv) qemu_create_machine(machine_opts_dict); + /* + * Load incoming CPR state before any devices are created, because it + * contains file descriptors that are needed in device initialization code. + */ + cpr_state_load(incoming_channels[MIGRATION_CHANNEL_TYPE_CPR], &error_fatal); + suspend_mux_open(); qemu_disable_default_devices();