All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
From: Yang Hongyang <yanghy@cn.fujitsu.com>
To: xen-devel@lists.xen.org
Cc: wei.liu2@citrix.com, ian.campbell@citrix.com,
	wency@cn.fujitsu.com, andrew.cooper3@citrix.com,
	yunhong.jiang@intel.com, eddie.dong@intel.com,
	guijianfeng@cn.fujitsu.com, rshriram@cs.ubc.ca,
	ian.jackson@eu.citrix.com
Subject: [PATCH v7 COLO 05/18] primary vm suspend/resume/checkpoint code
Date: Thu, 25 Jun 2015 14:30:59 +0800	[thread overview]
Message-ID: <1435213872-10698-6-git-send-email-yanghy@cn.fujitsu.com> (raw)
In-Reply-To: <1435213872-10698-1-git-send-email-yanghy@cn.fujitsu.com>

From: Wen Congyang <wency@cn.fujitsu.com>

We will do the following things again and again:
1. Suspend primary vm
   a. Suspend primary vm
   b. do postsuspend
   c. Read LIBXL_COLO_SVM_SUSPENDED sent by secondary
2. Resume primary vm
   a. Read LIBXL_COLO_SVM_READY from slave
   b. Do presume
   c. Resume primary vm
   d. Read LIBXL_COLO_SVM_RESUMED from slave
3. Wait a new checkpoint
   a. Wait a new checkpoint(not implemented)
   b. Send LIBXL_COLO_NEW_CHECKPOINT to slave

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
---
 tools/libxl/Makefile          |   2 +-
 tools/libxl/libxl.c           |   6 +-
 tools/libxl/libxl_colo.h      |  21 +-
 tools/libxl/libxl_colo_save.c | 565 ++++++++++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_dom_save.c  |  13 +-
 tools/libxl/libxl_internal.h  | 121 +++++----
 tools/libxl/libxl_types.idl   |   1 +
 7 files changed, 662 insertions(+), 67 deletions(-)
 create mode 100644 tools/libxl/libxl_colo_save.c

diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 66ae63d..252c4e9 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -57,7 +57,7 @@ LIBXL_OBJS-y += libxl_nonetbuffer.o
 endif
 
 LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o
-LIBXL_OBJS-y += libxl_colo_restore.o
+LIBXL_OBJS-y += libxl_colo_restore.o libxl_colo_save.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o libxl_libfdt_compat.o
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index f851957..8b866f4 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -17,6 +17,7 @@
 #include "libxl_osdeps.h"
 
 #include "libxl_internal.h"
+#include "libxl_colo.h"
 
 #define PAGE_TO_MEMKB(pages) ((pages) * 4)
 #define BACKEND_STRING_SIZE 5
@@ -842,7 +843,10 @@ int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info,
     assert(info);
 
     /* Point of no return */
-    libxl__remus_setup(egc, &dss->rs);
+    if (libxl_defbool_val(info->colo))
+        libxl__colo_save_setup(egc, &dss->css);
+    else
+        libxl__remus_setup(egc, &dss->rs);
     return AO_INPROGRESS;
 
  out:
diff --git a/tools/libxl/libxl_colo.h b/tools/libxl/libxl_colo.h
index 91df275..49a430b 100644
--- a/tools/libxl/libxl_colo.h
+++ b/tools/libxl/libxl_colo.h
@@ -16,17 +16,6 @@
 #ifndef LIBXL_COLO_H
 #define LIBXL_COLO_H
 
-/*
- * values to control suspend/resume primary vm and secondary vm
- * at the same time
- */
-enum {
-    LIBXL_COLO_NEW_CHECKPOINT = 1,
-    LIBXL_COLO_SVM_SUSPENDED,
-    LIBXL_COLO_SVM_READY,
-    LIBXL_COLO_SVM_RESUMED,
-};
-
 extern void libxl__colo_restore_done(libxl__egc *egc, void *dcs_void,
                                      int ret, int retval, int errnoval);
 extern void libxl__colo_restore_setup(libxl__egc *egc,
@@ -35,4 +24,14 @@ extern void libxl__colo_restore_teardown(libxl__egc *egc,
                                          libxl__colo_restore_state *crs,
                                          int rc);
 
+extern void libxl__colo_save_domain_suspend_callback(void *data);
+extern void libxl__colo_save_domain_checkpoint_callback(void *data);
+extern void libxl__colo_save_domain_resume_callback(void *data);
+extern void libxl__colo_save_domain_should_checkpoint_callback(void *data);
+extern void libxl__colo_save_setup(libxl__egc *egc,
+                                   libxl__colo_save_state *css);
+extern void libxl__colo_save_teardown(libxl__egc *egc,
+                                      libxl__colo_save_state *css,
+                                      int rc);
+
 #endif
diff --git a/tools/libxl/libxl_colo_save.c b/tools/libxl/libxl_colo_save.c
new file mode 100644
index 0000000..4e059cc
--- /dev/null
+++ b/tools/libxl/libxl_colo_save.c
@@ -0,0 +1,565 @@
+/*
+ * Copyright (C) 2014 FUJITSU LIMITED
+ * Author: Wen Congyang <wency@cn.fujitsu.com>
+ *         Yang Hongyang <yanghy@cn.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+#include "libxl_colo.h"
+
+static const libxl__checkpoint_device_instance_ops *colo_ops[] = {
+    NULL,
+};
+
+/* ================= helper functions ================= */
+static int init_device_subkind(libxl__checkpoint_devices_state *cds)
+{
+    /* init device subkind-specific state in the libxl ctx */
+    int rc;
+    STATE_AO_GC(cds->ao);
+
+    rc = 0;
+    return rc;
+}
+
+static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds)
+{
+    /* cleanup device subkind-specific state in the libxl ctx */
+    STATE_AO_GC(cds->ao);
+}
+
+/* ================= colo: setup save environment ================= */
+static void colo_save_setup_done(libxl__egc *egc,
+                                 libxl__checkpoint_devices_state *cds,
+                                 int rc);
+static void colo_save_setup_failed(libxl__egc *egc,
+                                   libxl__checkpoint_devices_state *cds,
+                                   int rc);
+
+void libxl__colo_save_setup(libxl__egc *egc, libxl__colo_save_state *css)
+{
+    libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+    /* Convenience aliases */
+    libxl__checkpoint_devices_state *const cds = &css->cds;
+
+    STATE_AO_GC(dss->ao);
+
+    if (dss->type != LIBXL_DOMAIN_TYPE_HVM) {
+        LOG(ERROR, "COLO only supports hvm now");
+        goto out;
+    }
+
+    css->send_fd = dss->fd;
+    css->recv_fd = dss->recv_fd;
+    css->svm_running = false;
+
+    /* TODO: disk/nic support */
+    cds->device_kind_flags = 0;
+    cds->ops = colo_ops;
+    cds->callback = colo_save_setup_done;
+    cds->ao = ao;
+    cds->domid = dss->domid;
+
+    css->srs.ao = ao;
+    css->srs.fd = css->recv_fd;
+    css->srs.back_channel = true;
+    libxl__stream_read_start(egc, &css->srs);
+
+    if (init_device_subkind(cds))
+        goto out;
+
+    libxl__checkpoint_devices_setup(egc, &css->cds);
+
+    return;
+
+out:
+    libxl__ao_complete(egc, ao, ERROR_FAIL);
+}
+
+static void colo_save_setup_done(libxl__egc *egc,
+                                 libxl__checkpoint_devices_state *cds,
+                                 int rc)
+{
+    libxl__colo_save_state *css = CONTAINER_OF(cds, *css, cds);
+    libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+    STATE_AO_GC(cds->ao);
+
+    if (!rc) {
+        libxl__domain_save(egc, dss);
+        return;
+    }
+
+    LOG(ERROR, "COLO: failed to setup device for guest with domid %u",
+        dss->domid);
+    css->cds.callback = colo_save_setup_failed;
+    libxl__checkpoint_devices_teardown(egc, &css->cds);
+}
+
+static void colo_save_setup_failed(libxl__egc *egc,
+                                   libxl__checkpoint_devices_state *cds,
+                                   int rc)
+{
+    STATE_AO_GC(cds->ao);
+
+    if (rc)
+        LOG(ERROR, "COLO: failed to teardown device after setup failed"
+            " for guest with domid %u, rc %d", cds->domid, rc);
+
+    cleanup_device_subkind(cds);
+    libxl__ao_complete(egc, ao, rc);
+}
+
+
+/* ================= colo: teardown save environment ================= */
+static void colo_teardown_done(libxl__egc *egc,
+                               libxl__checkpoint_devices_state *cds,
+                               int rc);
+
+void libxl__colo_save_teardown(libxl__egc *egc,
+                               libxl__colo_save_state *css,
+                               int rc)
+{
+    libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+    STATE_AO_GC(css->cds.ao);
+
+    LOG(WARN, "COLO: Domain suspend terminated with rc %d,"
+        " teardown COLO devices...", rc);
+    dss->css.cds.callback = colo_teardown_done;
+    libxl__checkpoint_devices_teardown(egc, &dss->css.cds);
+    return;
+}
+
+static void colo_teardown_done(libxl__egc *egc,
+                               libxl__checkpoint_devices_state *cds,
+                               int rc)
+{
+    libxl__colo_save_state *css = CONTAINER_OF(cds, *css, cds);
+    libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+    cleanup_device_subkind(cds);
+    dss->callback(egc, dss, rc);
+}
+
+/*
+ * checkpoint callbacks are called in the following order:
+ * 1. suspend
+ * 2. resume
+ * 3. checkpoint
+ */
+static void colo_common_write_stream_done(libxl__egc *egc,
+                                          libxl__stream_write_state *stream,
+                                          int rc);
+static void colo_common_read_stream_done(libxl__egc *egc,
+                                         libxl__stream_read_state *stream,
+                                         int rc);
+/* ===================== colo: suspend primary vm ===================== */
+
+static void colo_read_svm_suspended_done(libxl__egc *egc,
+                                         libxl__colo_save_state *css,
+                                         int id);
+/*
+ * Do the following things when suspending primary vm:
+ * 1. suspend primary vm
+ * 2. do postsuspend
+ * 3. read LIBXL_COLO_SVM_SUSPENDED
+ * 4. read secondary vm's dirty pages
+ */
+static void colo_suspend_primary_vm_done(libxl__egc *egc,
+                                         libxl__domain_suspend_state *dsps,
+                                         int ok);
+static void colo_postsuspend_cb(libxl__egc *egc,
+                                libxl__checkpoint_devices_state *cds,
+                                int rc);
+
+void libxl__colo_save_domain_suspend_callback(void *data)
+{
+    libxl__save_helper_state *shs = data;
+    libxl__egc *egc = shs->egc;
+    libxl__domain_save_state *dss = CONTAINER_OF(shs, *dss, shs);
+
+    /* Convenience aliases */
+    libxl__domain_suspend_state *dsps = &dss->dsps;
+
+    dsps->callback_common_done = colo_suspend_primary_vm_done;
+    libxl__domain_suspend(egc, dsps);
+}
+
+static void colo_suspend_primary_vm_done(libxl__egc *egc,
+                                         libxl__domain_suspend_state *dsps,
+                                         int ok)
+{
+    libxl__domain_save_state *dss = CONTAINER_OF(dsps, *dss, dsps);
+
+    STATE_AO_GC(dsps->ao);
+
+    if (!ok) {
+        LOG(ERROR, "cannot suspend primary vm");
+        goto out;
+    }
+
+    /* Convenience aliases */
+    libxl__checkpoint_devices_state *const cds = &dss->css.cds;
+
+    cds->callback = colo_postsuspend_cb;
+    libxl__checkpoint_devices_postsuspend(egc, cds);
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok);
+}
+
+static void colo_postsuspend_cb(libxl__egc *egc,
+                                libxl__checkpoint_devices_state *cds,
+                                int rc)
+{
+    int ok = 0;
+    libxl__colo_save_state *css = CONTAINER_OF(cds, *css, cds);
+    libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+    STATE_AO_GC(cds->ao);
+
+    if (rc) {
+        LOG(ERROR, "postsuspend fails");
+        goto out;
+    }
+
+    if (!css->svm_running) {
+        ok = 1;
+        goto out;
+    }
+
+    /*
+     * read COLO_SVM_SUSPENDED
+     */
+    css->callback = colo_read_svm_suspended_done;
+    css->srs.read_records_callback = colo_common_read_stream_done;
+    libxl__stream_read_colo_context(egc, &css->srs);
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok);
+}
+
+static void colo_read_svm_suspended_done(libxl__egc *egc,
+                                         libxl__colo_save_state *css,
+                                         int id)
+{
+    int ok = 0;
+    libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+    STATE_AO_GC(css->cds.ao);
+
+    if (id != COLO_SVM_SUSPENDED) {
+        LOG(ERROR, "invalid section: %d, expected: %d", id, COLO_SVM_SUSPENDED);
+        goto out;
+    }
+
+    ok = 1;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok);
+}
+
+
+/* ===================== colo: send tailbuf ========================== */
+void libxl__colo_save_domain_checkpoint_callback(void *data)
+{
+    libxl__save_helper_state *shs = data;
+    libxl__domain_save_state *dss = CONTAINER_OF(shs, *dss, shs);
+
+    /* Convenience aliases */
+    libxl__colo_save_state *const css = &dss->css;
+
+    /* write toolstack and emulator context, checkpoint end */
+    css->callback = NULL;
+    dss->sws.write_records_callback = colo_common_write_stream_done;
+    libxl__stream_write_start_checkpoint(shs->egc, &dss->sws);
+}
+
+/* ===================== colo: resume primary vm ===================== */
+/*
+ * Do the following things when resuming primary vm:
+ *  1. read LIBXL_COLO_SVM_READY
+ *  2. do preresume
+ *  3. resume primary vm
+ *  4. read LIBXL_COLO_SVM_RESUMED
+ */
+static void colo_preresume_dm_saved(libxl__egc *egc,
+                                    libxl__domain_save_state *dss, int rc);
+static void colo_read_svm_ready_done(libxl__egc *egc,
+                                     libxl__colo_save_state *css,
+                                     int id);
+static void colo_preresume_cb(libxl__egc *egc,
+                              libxl__checkpoint_devices_state *cds,
+                              int rc);
+static void colo_read_svm_resumed_done(libxl__egc *egc,
+                                       libxl__colo_save_state *css,
+                                       int id);
+
+void libxl__colo_save_domain_resume_callback(void *data)
+{
+    libxl__save_helper_state *shs = data;
+    libxl__egc *egc = shs->egc;
+    libxl__domain_save_state *dss = CONTAINER_OF(shs, *dss, shs);
+
+    /* This would go into tailbuf. */
+    if (dss->hvm) {
+        libxl__domain_save_device_model(egc, dss, colo_preresume_dm_saved);
+    } else {
+        colo_preresume_dm_saved(egc, dss, 0);
+    }
+
+    return;
+}
+
+static void colo_preresume_dm_saved(libxl__egc *egc,
+                                    libxl__domain_save_state *dss, int rc)
+{
+    /* Convenience aliases */
+    libxl__colo_save_state *const css = &dss->css;
+
+    STATE_AO_GC(css->cds.ao);
+
+    if (rc) {
+        LOG(ERROR, "Failed to save device model. Terminating COLO..");
+        goto out;
+    }
+
+    /* read COLO_SVM_READY */
+    css->callback = colo_read_svm_ready_done;
+    css->srs.read_records_callback = colo_common_read_stream_done;
+    libxl__stream_read_colo_context(egc, &css->srs);
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, 0);
+}
+
+static void colo_read_svm_ready_done(libxl__egc *egc,
+                                     libxl__colo_save_state *css,
+                                     int id)
+{
+    libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+    STATE_AO_GC(css->cds.ao);
+
+    if (id != COLO_SVM_READY) {
+        LOG(ERROR, "invalid section: %d, expected: %d", id, COLO_SVM_READY);
+        goto out;
+    }
+
+    css->svm_running = true;
+    css->cds.callback = colo_preresume_cb;
+    libxl__checkpoint_devices_preresume(egc, &css->cds);
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, 0);
+}
+
+static void colo_preresume_cb(libxl__egc *egc,
+                              libxl__checkpoint_devices_state *cds,
+                              int rc)
+{
+    libxl__colo_save_state *css = CONTAINER_OF(cds, *css, cds);
+    libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+    STATE_AO_GC(cds->ao);
+
+    if (rc) {
+        LOG(ERROR, "preresume fails");
+        goto out;
+    }
+
+    /* Resumes the domain and the device model */
+    if (libxl__domain_resume(gc, dss->domid, /* Fast Suspend */1)) {
+        LOG(ERROR, "cannot resume primary vm");
+        goto out;
+    }
+
+    /* read COLO_SVM_RESUMED */
+    css->callback = colo_read_svm_resumed_done;
+    css->srs.read_records_callback = colo_common_read_stream_done;
+    libxl__stream_read_colo_context(egc, &css->srs);
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, 0);
+}
+
+static void colo_read_svm_resumed_done(libxl__egc *egc,
+                                       libxl__colo_save_state *css,
+                                       int id)
+{
+    int ok = 0;
+    libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+    STATE_AO_GC(css->cds.ao);
+
+    if (id != COLO_SVM_RESUMED) {
+        LOG(ERROR, "invalid section: %d, expected: %d", id, COLO_SVM_RESUMED);
+        goto out;
+    }
+
+    ok = 1;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok);
+}
+
+
+/* ===================== colo: wait new checkpoint ===================== */
+/*
+ * Do the following things:
+ * 1. do commit
+ * 2. wait for a new checkpoint
+ * 3. write LIBXL_COLO_NEW_CHECKPOINT
+ */
+static void colo_device_commit_cb(libxl__egc *egc,
+                                  libxl__checkpoint_devices_state *cds,
+                                  int rc);
+static void colo_start_new_checkpoint(libxl__egc *egc,
+                                      libxl__checkpoint_devices_state *cds,
+                                      int rc);
+
+void libxl__colo_save_domain_should_checkpoint_callback(void *data)
+{
+    libxl__save_helper_state *shs = data;
+    libxl__domain_save_state *dss = CONTAINER_OF(shs, *dss, shs);
+    libxl__egc *egc = dss->shs.egc;
+
+    /* Convenience aliases */
+    libxl__checkpoint_devices_state *const cds = &dss->css.cds;
+
+    cds->callback = colo_device_commit_cb;
+    libxl__checkpoint_devices_commit(egc, cds);
+}
+
+static void colo_device_commit_cb(libxl__egc *egc,
+                                  libxl__checkpoint_devices_state *cds,
+                                  int rc)
+{
+    libxl__colo_save_state *css = CONTAINER_OF(cds, *css, cds);
+    libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+    STATE_AO_GC(cds->ao);
+
+    if (rc) {
+        LOG(ERROR, "commit fails");
+        goto out;
+    }
+
+    /* TODO: wait a new checkpoint */
+    colo_start_new_checkpoint(egc, cds, 0);
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, 0);
+}
+
+static void colo_start_new_checkpoint(libxl__egc *egc,
+                                      libxl__checkpoint_devices_state *cds,
+                                      int rc)
+{
+    libxl__colo_save_state *css = CONTAINER_OF(cds, *css, cds);
+    libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+    libxl_sr_colo_context colo_context = { .id = COLO_NEW_CHECKPOINT };
+
+    if (rc)
+        goto out;
+
+    /* write COLO_NEW_CHECKPOINT */
+    css->callback = NULL;
+    dss->sws.write_records_callback = colo_common_write_stream_done;
+    libxl__stream_write_colo_context(egc, &dss->sws, &colo_context);
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, 0);
+}
+
+
+/* ===================== colo: common callback ===================== */
+static void colo_common_write_stream_done(libxl__egc *egc,
+                                          libxl__stream_write_state *stream,
+                                          int rc)
+{
+    libxl__domain_save_state *dss = CONTAINER_OF(stream, *dss, sws);
+    int ok;
+
+    /* Convenience aliases */
+    libxl__colo_save_state *const css = &dss->css;
+
+    STATE_AO_GC(stream->ao);
+
+    if (rc < 0) {
+        /* TODO: it may be a internal error, but we don't know */
+        LOG(ERROR, "sending data fails");
+        ok = 2;
+        goto out;
+    }
+
+    if (!css->callback) {
+        /* Everythins is OK */
+        ok = 1;
+        goto out;
+    }
+
+    css->callback(egc, css, 0);
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok);
+}
+
+static void colo_common_read_stream_done(libxl__egc *egc,
+                                         libxl__stream_read_state *stream,
+                                         int rc)
+{
+    libxl__colo_save_state *css = CONTAINER_OF(stream, *css, srs);
+    libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+    int ok;
+
+    STATE_AO_GC(stream->ao);
+
+    if (rc < 0) {
+        /* TODO: it may be a internal error, but we don't know */
+        LOG(ERROR, "sending data fails");
+        ok = 2;
+        goto out;
+    }
+
+    if (!css->callback) {
+        /* Everythins is OK */
+        ok = 1;
+        goto out;
+    }
+
+    /* rc contains the id */
+    css->callback(egc, css, rc);
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok);
+}
diff --git a/tools/libxl/libxl_dom_save.c b/tools/libxl/libxl_dom_save.c
index 9a3d009..26839cb 100644
--- a/tools/libxl/libxl_dom_save.c
+++ b/tools/libxl/libxl_dom_save.c
@@ -16,6 +16,7 @@
 #include "libxl_osdeps.h" /* must come before any other headers */
 
 #include "libxl_internal.h"
+#include "libxl_colo.h"
 
 struct libxl__physmap_info {
     uint64_t phys_offset;
@@ -437,6 +438,11 @@ void libxl__domain_save(libxl__egc *egc, libxl__domain_save_state *dss)
         callbacks->suspend = libxl__remus_domain_suspend_callback;
         callbacks->postcopy = libxl__remus_domain_resume_callback;
         callbacks->checkpoint = libxl__remus_domain_save_checkpoint_callback;
+    } else if (dss->checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_COLO) {
+        callbacks->suspend = libxl__colo_save_domain_suspend_callback;
+        callbacks->postcopy = libxl__colo_save_domain_resume_callback;
+        callbacks->checkpoint = libxl__colo_save_domain_checkpoint_callback;
+        callbacks->should_checkpoint = libxl__colo_save_domain_should_checkpoint_callback;
     } else
         callbacks->suspend = libxl__domain_suspend_callback;
 
@@ -575,12 +581,15 @@ static void domain_save_done(libxl__egc *egc,
     }
 
     /*
-     * With Remus, if we reach this point, it means either
+     * With Remus/COLO, if we reach this point, it means either
      * backup died or some network error occurred preventing us
      * from sending checkpoints. Teardown the network buffers and
      * release netlink resources.  This is an async op.
      */
-    libxl__remus_teardown(egc, &dss->rs, rc);
+    if (libxl_defbool_val(dss->remus->colo))
+        libxl__colo_save_teardown(egc, &dss->css, rc);
+    else
+        libxl__remus_teardown(egc, &dss->rs, rc);
 }
 
 /*========================= Domain restore ============================*/
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 0aafd59..bb5e298 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2655,7 +2655,7 @@ typedef struct libxl__save_helper_state {
 /*
  * The abstract checkpoint device layer exposes a common
  * set of API to [external] libxl for manipulating devices attached to
- * a guest protected by Remus. The device layer also exposes a set of
+ * a guest protected by Remus/COLO. The device layer also exposes a set of
  * [internal] interfaces that every device type must implement.
  *
  * The following API are exposed to libxl:
@@ -2673,7 +2673,7 @@ typedef struct libxl__save_helper_state {
  *  +libxl__checkpoint_devices_commit
  *
  * Each device type needs to implement the interfaces specified in
- * the libxl__checkpoint_device_instance_ops if it wishes to support Remus.
+ * the libxl__checkpoint_device_instance_ops if it wishes to support Remus/COLO.
  *
  * The high-level control flow through the checkpoint device layer is shown
  * below:
@@ -2693,7 +2693,7 @@ typedef struct libxl__checkpoint_device_instance_ops libxl__checkpoint_device_in
 
 /*
  * Interfaces to be implemented by every device subkind that wishes to
- * support Remus. Functions must be implemented unless otherwise
+ * support Remus/COLO. Functions must be implemented unless otherwise
  * stated. Many of these functions are asynchronous. They call
  * dev->aodev.callback when done.  The actual implementations may be
  * synchronous and call dev->aodev.callback directly (as the last
@@ -2873,6 +2873,66 @@ static inline bool libxl__convert_legacy_stream_inuse(
     return libxl__ev_child_inuse(&chs->child);
 }
 
+/* State for manipulating a libxl migration v2 stream */
+typedef struct libxl__stream_read_state libxl__stream_read_state;
+
+struct libxl__stream_read_state {
+    /* filled by the user */
+    libxl__ao *ao;
+    int fd;
+    bool legacy;
+    bool back_channel;
+    void (*completion_callback)(libxl__egc *egc,
+                                libxl__stream_read_state *stream,
+                                int rc);
+    void (*read_records_callback)(libxl__egc *egc,
+                                  libxl__stream_read_state *stream,
+                                  int rc);
+    /* Private */
+    libxl__carefd *v2_carefd;
+    int rc;
+    int joined_rc;
+    bool running;
+    bool in_checkpoint;
+    bool in_colo_context;
+    libxl__datacopier_state dc;
+    size_t expected_len;
+    libxl_sr_hdr hdr;
+    libxl_sr_rec_hdr rec_hdr;
+    void *rec_body;
+};
+
+_hidden void libxl__stream_read_start(libxl__egc *egc,
+                                      libxl__stream_read_state *stream);
+
+_hidden void libxl__stream_read_continue(libxl__egc *egc,
+                                         libxl__stream_read_state *stream);
+_hidden void libxl__stream_read_start_checkpoint(
+    libxl__egc *egc, libxl__stream_read_state *stream);
+_hidden void libxl__stream_read_colo_context(
+    libxl__egc *egc, libxl__stream_read_state *stream);
+
+_hidden void libxl__stream_read_abort(libxl__egc *egc,
+                                      libxl__stream_read_state *stream, int rc);
+
+static inline bool libxl__stream_read_inuse(
+    const libxl__stream_read_state *stream)
+{
+    return stream->running;
+}
+
+/*----- colo related state structure -----*/
+typedef struct libxl__colo_save_state libxl__colo_save_state;
+struct libxl__colo_save_state {
+    libxl__checkpoint_devices_state cds;
+    int send_fd;
+    int recv_fd;
+
+    /* private */
+    libxl__stream_read_state srs;
+    void (*callback)(libxl__egc *, libxl__colo_save_state *, int);
+    bool svm_running;
+};
 
 /*----- Domain suspend (save) state structure -----*/
 
@@ -2978,7 +3038,12 @@ struct libxl__domain_save_state {
     libxl__domain_suspend_state dsps;
     int hvm;
     int xcflags;
-    libxl__remus_state rs;
+    union {
+        /* for Remus */
+        libxl__remus_state rs;
+        /* for COLO */
+        libxl__colo_save_state css;
+    };
     libxl__save_helper_state shs;
     libxl__logdirty_switch logdirty;
     /* private for libxl__domain_save_device_model */
@@ -3232,54 +3297,6 @@ typedef void libxl__domain_create_cb(libxl__egc *egc,
                                      libxl__domain_create_state*,
                                      int rc, uint32_t domid);
 
-/* State for manipulating a libxl migration v2 stream */
-typedef struct libxl__stream_read_state libxl__stream_read_state;
-
-struct libxl__stream_read_state {
-    /* filled by the user */
-    libxl__ao *ao;
-    int fd;
-    bool legacy;
-    bool back_channel;
-    void (*completion_callback)(libxl__egc *egc,
-                                libxl__stream_read_state *stream,
-                                int rc);
-    void (*read_records_callback)(libxl__egc *egc,
-                                  libxl__stream_read_state *stream,
-                                  int rc);
-    /* Private */
-    libxl__carefd *v2_carefd;
-    int rc;
-    int joined_rc;
-    bool running;
-    bool in_checkpoint;
-    bool in_colo_context;
-    libxl__datacopier_state dc;
-    size_t expected_len;
-    libxl_sr_hdr hdr;
-    libxl_sr_rec_hdr rec_hdr;
-    void *rec_body;
-};
-
-_hidden void libxl__stream_read_start(libxl__egc *egc,
-                                      libxl__stream_read_state *stream);
-
-_hidden void libxl__stream_read_continue(libxl__egc *egc,
-                                         libxl__stream_read_state *stream);
-_hidden void libxl__stream_read_start_checkpoint(
-    libxl__egc *egc, libxl__stream_read_state *stream);
-_hidden void libxl__stream_read_colo_context(
-    libxl__egc *egc, libxl__stream_read_state *stream);
-
-_hidden void libxl__stream_read_abort(libxl__egc *egc,
-                                      libxl__stream_read_state *stream, int rc);
-
-static inline bool libxl__stream_read_inuse(
-    const libxl__stream_read_state *stream)
-{
-    return stream->running;
-}
-
 /* colo related structure */
 typedef struct libxl__colo_restore_state libxl__colo_restore_state;
 typedef void libxl__colo_callback(libxl__egc *,
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index e05d12b..cf1eeb2 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -696,6 +696,7 @@ libxl_domain_remus_info = Struct("domain_remus_info",[
     ("netbuf",       libxl_defbool),
     ("netbufscript", string),
     ("diskbuf",      libxl_defbool),
+    ("colo",         libxl_defbool)
     ])
 
 libxl_event_type = Enumeration("event_type", [
-- 
1.9.1

  parent reply	other threads:[~2015-06-25  6:30 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-06-25  6:30 [PATCH v7 COLO 00/18] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Yang Hongyang
2015-06-25  6:30 ` [PATCH v7 COLO 01/18] docs: add colo readme Yang Hongyang
2015-07-14 15:15   ` Ian Campbell
2015-06-25  6:30 ` [PATCH v7 COLO 02/18] tools/libxl: handle colo_context records in a libxl migration v2 stream Yang Hongyang
2015-07-14 15:19   ` Ian Campbell
2015-07-15  0:34     ` Yang Hongyang
2015-06-25  6:30 ` [PATCH v7 COLO 03/18] tools/libxl: write colo_context records into the stream Yang Hongyang
2015-06-25  6:30 ` [PATCH v7 COLO 04/18] secondary vm suspend/resume/checkpoint code Yang Hongyang
2015-06-25  6:30 ` Yang Hongyang [this message]
2015-06-25  6:31 ` [PATCH v7 COLO 06/18] libxc/restore: support COLO restore Yang Hongyang
2015-06-25  6:31 ` [PATCH v7 COLO 07/18] libxc/restore: send dirty bitmap to primary when checkpoint under colo Yang Hongyang
2015-06-25  6:31 ` [PATCH v7 COLO 08/18] send store mfn and console mfn to xl before resuming secondary vm Yang Hongyang
2015-06-25  6:31 ` [PATCH v7 COLO 09/18] libxc/save: support COLO save Yang Hongyang
2015-06-25  6:31 ` [PATCH v7 COLO 10/18] implement the cmdline for COLO Yang Hongyang
2015-06-25  6:31 ` [PATCH v7 COLO 11/18] Support colo mode for qemu disk Yang Hongyang
2015-06-25  6:31 ` [PATCH v7 COLO 12/18] COLO: use qemu block replication Yang Hongyang
2015-06-25  6:31 ` [PATCH v7 COLO 13/18] COLO proxy: implement setup/teardown of COLO proxy module Yang Hongyang
2015-06-25  6:31 ` [PATCH v7 COLO 14/18] COLO proxy: preresume, postresume and checkpoint Yang Hongyang
2015-06-25  6:31 ` [PATCH v7 COLO 15/18] COLO nic: implement COLO nic subkind Yang Hongyang
2015-06-25  6:31 ` [PATCH v7 COLO 16/18] setup and control colo proxy on primary side Yang Hongyang
2015-06-25  6:31 ` [PATCH v7 COLO 17/18] setup and control colo proxy on secondary side Yang Hongyang
2015-06-25  6:31 ` [PATCH v7 COLO 18/18] cmdline switches and config vars to control colo-proxy Yang Hongyang
2015-07-14 15:55 ` [PATCH v7 COLO 00/18] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Ian Campbell
2015-07-15  0:41   ` Yang Hongyang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1435213872-10698-6-git-send-email-yanghy@cn.fujitsu.com \
    --to=yanghy@cn.fujitsu.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=eddie.dong@intel.com \
    --cc=guijianfeng@cn.fujitsu.com \
    --cc=ian.campbell@citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=rshriram@cs.ubc.ca \
    --cc=wei.liu2@citrix.com \
    --cc=wency@cn.fujitsu.com \
    --cc=xen-devel@lists.xen.org \
    --cc=yunhong.jiang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.