All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
From: Yang Hongyang <yanghy@cn.fujitsu.com>
To: xen-devel@lists.xen.org
Cc: wei.liu2@citrix.com, ian.campbell@citrix.com,
	wency@cn.fujitsu.com, andrew.cooper3@citrix.com,
	yunhong.jiang@intel.com, eddie.dong@intel.com,
	guijianfeng@cn.fujitsu.com, rshriram@cs.ubc.ca,
	ian.jackson@eu.citrix.com
Subject: [PATCH v7 COLO 12/18] COLO: use qemu block replication
Date: Thu, 25 Jun 2015 14:31:06 +0800	[thread overview]
Message-ID: <1435213872-10698-13-git-send-email-yanghy@cn.fujitsu.com> (raw)
In-Reply-To: <1435213872-10698-1-git-send-email-yanghy@cn.fujitsu.com>

From: Wen Congyang <wency@cn.fujitsu.com>

Use qemu block replication as our block replication solution.
Note that guest must be paused before starting COLO, otherwise,
the disk won't be consistent between primary and secondary.

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
for commit message,
Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
---
 tools/libxl/Makefile             |   1 +
 tools/libxl/libxl_colo_qdisk.c   | 209 +++++++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_colo_restore.c |  20 +++-
 tools/libxl/libxl_colo_save.c    |  36 ++++++-
 tools/libxl/libxl_internal.h     |  18 ++++
 tools/libxl/libxl_qmp.c          |  31 ++++++
 6 files changed, 311 insertions(+), 4 deletions(-)
 create mode 100644 tools/libxl/libxl_colo_qdisk.c

diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 252c4e9..2e62b88 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -58,6 +58,7 @@ endif
 
 LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o
 LIBXL_OBJS-y += libxl_colo_restore.o libxl_colo_save.o
+LIBXL_OBJS-y += libxl_colo_qdisk.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o libxl_libfdt_compat.o
diff --git a/tools/libxl/libxl_colo_qdisk.c b/tools/libxl/libxl_colo_qdisk.c
new file mode 100644
index 0000000..d73572e
--- /dev/null
+++ b/tools/libxl/libxl_colo_qdisk.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2015 FUJITSU LIMITED
+ * Author: Wen Congyang <wency@cn.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+
+typedef struct libxl__colo_qdisk {
+    libxl__checkpoint_device *dev;
+} libxl__colo_qdisk;
+
+/* ========== init() and cleanup() ========== */
+int init_subkind_qdisk(libxl__checkpoint_devices_state *cds)
+{
+    /*
+     * We don't know if we use qemu block replication, so
+     * we cannot start block replication here.
+     */
+    return 0;
+}
+
+void cleanup_subkind_qdisk(libxl__checkpoint_devices_state *cds)
+{
+}
+
+/* ========== setup() and teardown() ========== */
+static void colo_qdisk_setup(libxl__egc *egc, libxl__checkpoint_device *dev,
+                             bool primary)
+{
+    const libxl_device_disk *disk = dev->backend_dev;
+    const char *addr = NULL;
+    const char *export_name;
+    int ret, rc = 0;
+
+    /* Convenience aliases */
+    libxl__checkpoint_devices_state *const cds = dev->cds;
+    const char *colo_params = disk->colo_params;
+    const int domid = cds->domid;
+
+    EGC_GC;
+
+    if (disk->backend != LIBXL_DISK_BACKEND_QDISK ||
+        !libxl_defbool_val(disk->colo_enable)) {
+        rc = ERROR_CHECKPOINT_DEVOPS_DOES_NOT_MATCH;
+        goto out;
+    }
+
+    export_name = strstr(colo_params, ":exportname=");
+    if (!export_name) {
+        rc = ERROR_CHECKPOINT_DEVOPS_DOES_NOT_MATCH;
+        goto out;
+    }
+    export_name += strlen(":exportname=");
+    if (export_name[0] == 0) {
+        rc = ERROR_CHECKPOINT_DEVOPS_DOES_NOT_MATCH;
+        goto out;
+    }
+
+    dev->matched = 1;
+
+    if (primary) {
+        /* NBD server is not ready, so we cannot start block replication now */
+        goto out;
+    } else {
+        libxl__colo_restore_state *crs = CONTAINER_OF(cds, *crs, cds);
+        int len;
+
+        if (crs->qdisk_setuped)
+            goto out;
+
+        crs->qdisk_setuped = true;
+
+        len = export_name - strlen(":exportname=") - colo_params;
+        addr = libxl__strndup(gc, colo_params, len);
+    }
+
+    ret = libxl__qmp_block_start_replication(gc, domid, primary, addr);
+    if (ret)
+        rc = ERROR_FAIL;
+
+out:
+    dev->aodev.rc = rc;
+    dev->aodev.callback(egc, &dev->aodev);
+}
+
+static void colo_qdisk_teardown(libxl__egc *egc, libxl__checkpoint_device *dev,
+                                bool primary)
+{
+    int ret, rc = 0;
+
+    /* Convenience aliases */
+    libxl__checkpoint_devices_state *const cds = dev->cds;
+    const int domid = cds->domid;
+
+    EGC_GC;
+
+    if (primary) {
+        libxl__colo_save_state *css = CONTAINER_OF(cds, *css, cds);
+
+        if (!css->qdisk_setuped)
+            goto out;
+
+        css->qdisk_setuped = false;
+    } else {
+        libxl__colo_restore_state *crs = CONTAINER_OF(cds, *crs, cds);
+
+        if (!crs->qdisk_setuped)
+            goto out;
+
+        crs->qdisk_setuped = false;
+    }
+
+    ret = libxl__qmp_block_stop_replication(gc, domid, primary);
+    if (ret)
+        rc = ERROR_FAIL;
+
+out:
+    dev->aodev.rc = rc;
+    dev->aodev.callback(egc, &dev->aodev);
+}
+
+/* ========== checkpointing APIs ========== */
+/* should be called after libxl__checkpoint_device_instance_ops.preresume */
+int colo_qdisk_preresume(libxl_ctx *ctx, domid_t domid)
+{
+    GC_INIT(ctx);
+    int ret;
+
+    ret = libxl__qmp_block_do_checkpoint(gc, domid);
+
+    GC_FREE;
+    return ret;
+}
+
+static void colo_qdisk_save_preresume(libxl__egc *egc,
+                                      libxl__checkpoint_device *dev)
+{
+    libxl__colo_save_state *css = CONTAINER_OF(dev->cds, *css, cds);
+    int ret, rc = 0;
+
+    /* Convenience aliases */
+    const int domid = dev->cds->domid;
+
+    EGC_GC;
+
+    if (css->qdisk_setuped)
+        goto out;
+
+    css->qdisk_setuped = true;
+
+    ret = libxl__qmp_block_start_replication(gc, domid, true, NULL);
+    if (ret)
+        rc = ERROR_FAIL;
+
+out:
+    dev->aodev.rc = rc;
+    dev->aodev.callback(egc, &dev->aodev);
+}
+
+/* ======== primary ======== */
+static void colo_qdisk_save_setup(libxl__egc *egc,
+                                  libxl__checkpoint_device *dev)
+{
+    colo_qdisk_setup(egc, dev, true);
+}
+
+static void colo_qdisk_save_teardown(libxl__egc *egc,
+                                   libxl__checkpoint_device *dev)
+{
+    colo_qdisk_teardown(egc, dev, true);
+}
+
+const libxl__checkpoint_device_instance_ops colo_save_device_qdisk = {
+    .kind = LIBXL__DEVICE_KIND_VBD,
+    .setup = colo_qdisk_save_setup,
+    .teardown = colo_qdisk_save_teardown,
+    .preresume = colo_qdisk_save_preresume,
+};
+
+/* ======== secondary ======== */
+static void colo_qdisk_restore_setup(libxl__egc *egc,
+                                     libxl__checkpoint_device *dev)
+{
+    colo_qdisk_setup(egc, dev, false);
+}
+
+static void colo_qdisk_restore_teardown(libxl__egc *egc,
+                                      libxl__checkpoint_device *dev)
+{
+    colo_qdisk_teardown(egc, dev, false);
+}
+
+const libxl__checkpoint_device_instance_ops colo_restore_device_qdisk = {
+    .kind = LIBXL__DEVICE_KIND_VBD,
+    .setup = colo_qdisk_restore_setup,
+    .teardown = colo_qdisk_restore_teardown,
+};
diff --git a/tools/libxl/libxl_colo_restore.c b/tools/libxl/libxl_colo_restore.c
index ada9a35..0a58b86 100644
--- a/tools/libxl/libxl_colo_restore.c
+++ b/tools/libxl/libxl_colo_restore.c
@@ -49,7 +49,10 @@ static void libxl__colo_restore_domain_checkpoint_callback(void *data);
 static void libxl__colo_restore_domain_should_checkpoint_callback(void *data);
 static void libxl__colo_restore_domain_suspend_callback(void *data);
 
+extern const libxl__checkpoint_device_instance_ops colo_restore_device_qdisk;
+
 static const libxl__checkpoint_device_instance_ops *colo_restore_ops[] = {
+    &colo_restore_device_qdisk,
     NULL,
 };
 
@@ -148,7 +151,11 @@ static int init_device_subkind(libxl__checkpoint_devices_state *cds)
     int rc;
     STATE_AO_GC(cds->ao);
 
+    rc = init_subkind_qdisk(cds);
+    if (rc)  goto out;
+
     rc = 0;
+out:
     return rc;
 }
 
@@ -156,6 +163,8 @@ static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds)
 {
     /* cleanup device subkind-specific state in the libxl ctx */
     STATE_AO_GC(cds->ao);
+
+    cleanup_subkind_qdisk(cds);
 }
 
 
@@ -215,6 +224,7 @@ void libxl__colo_restore_setup(libxl__egc *egc,
     GCNEW(crcs);
     crs->crcs = crcs;
     crcs->crs = crs;
+    crs->qdisk_setuped = false;
 
     /* setup dsps */
     crcs->dsps.ao = ao;
@@ -518,6 +528,12 @@ static void colo_restore_preresume_cb(libxl__egc *egc,
         goto out;
     }
 
+    rc = colo_qdisk_preresume(CTX, crs->domid);
+    if (rc) {
+        LOG(ERROR, "colo_qdisk_preresume() fails");
+        goto out;
+    }
+
     colo_restore_resume_vm(egc, crcs);
 
     return;
@@ -673,8 +689,8 @@ static void colo_setup_checkpoint_devices(libxl__egc *egc,
 
     STATE_AO_GC(crs->ao);
 
-    /* TODO: disk/nic support */
-    cds->device_kind_flags = 0;
+    /* TODO: nic support */
+    cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VBD);
     cds->callback = colo_restore_setup_cds_done;
     cds->ao = ao;
     cds->domid = crs->domid;
diff --git a/tools/libxl/libxl_colo_save.c b/tools/libxl/libxl_colo_save.c
index 4e059cc..633887b 100644
--- a/tools/libxl/libxl_colo_save.c
+++ b/tools/libxl/libxl_colo_save.c
@@ -19,7 +19,10 @@
 #include "libxl_internal.h"
 #include "libxl_colo.h"
 
+extern const libxl__checkpoint_device_instance_ops colo_save_device_qdisk;
+
 static const libxl__checkpoint_device_instance_ops *colo_ops[] = {
+    &colo_save_device_qdisk,
     NULL,
 };
 
@@ -30,7 +33,11 @@ static int init_device_subkind(libxl__checkpoint_devices_state *cds)
     int rc;
     STATE_AO_GC(cds->ao);
 
+    rc = init_subkind_qdisk(cds);
+    if (rc) goto out;
+
     rc = 0;
+out:
     return rc;
 }
 
@@ -38,6 +45,8 @@ static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds)
 {
     /* cleanup device subkind-specific state in the libxl ctx */
     STATE_AO_GC(cds->ao);
+
+    cleanup_subkind_qdisk(cds);
 }
 
 /* ================= colo: setup save environment ================= */
@@ -65,9 +74,11 @@ void libxl__colo_save_setup(libxl__egc *egc, libxl__colo_save_state *css)
     css->send_fd = dss->fd;
     css->recv_fd = dss->recv_fd;
     css->svm_running = false;
+    css->paused = true;
+    css->qdisk_setuped = false;
 
-    /* TODO: disk/nic support */
-    cds->device_kind_flags = 0;
+    /* TODO: nic support */
+    cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VBD);
     cds->ops = colo_ops;
     cds->callback = colo_save_setup_done;
     cds->ao = ao;
@@ -388,12 +399,33 @@ static void colo_preresume_cb(libxl__egc *egc,
         goto out;
     }
 
+    if (!css->paused) {
+        rc = colo_qdisk_preresume(CTX, dss->domid);
+        if (rc) {
+            LOG(ERROR, "colo_qdisk_preresume() fails");
+            goto out;
+        }
+    }
+
     /* Resumes the domain and the device model */
     if (libxl__domain_resume(gc, dss->domid, /* Fast Suspend */1)) {
         LOG(ERROR, "cannot resume primary vm");
         goto out;
     }
 
+    /*
+     * The guest should be paused before doing colo because there is
+     * no disk migration.
+     */
+    if (css->paused) {
+        rc = libxl_domain_unpause(CTX, dss->domid);
+        if (rc) {
+            LOG(ERROR, "cannot unpause primary vm");
+            goto out;
+        }
+        css->paused = false;
+    }
+
     /* read COLO_SVM_RESUMED */
     css->callback = colo_read_svm_resumed_done;
     css->srs.read_records_callback = colo_common_read_stream_done;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index bb5e298..18adf66 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -1662,6 +1662,14 @@ _hidden int libxl__qmp_set_global_dirty_log(libxl__gc *gc, int domid, bool enabl
 _hidden int libxl__qmp_insert_cdrom(libxl__gc *gc, int domid, const libxl_device_disk *disk);
 /* Add a virtual CPU */
 _hidden int libxl__qmp_cpu_add(libxl__gc *gc, int domid, int index);
+/* Start block replication */
+_hidden int libxl__qmp_block_start_replication(libxl__gc *gc, int domid,
+                                               bool primary, const char *addr);
+/* Do block checkpoint */
+_hidden int libxl__qmp_block_do_checkpoint(libxl__gc *gc, int domid);
+/* Stop block replication */
+_hidden int libxl__qmp_block_stop_replication(libxl__gc *gc, int domid,
+                                              bool primary);
 /* close and free the QMP handler */
 _hidden void libxl__qmp_close(libxl__qmp_handler *qmp);
 /* remove the socket file, if the file has already been removed,
@@ -2735,6 +2743,9 @@ int init_subkind_nic(libxl__checkpoint_devices_state *cds);
 void cleanup_subkind_nic(libxl__checkpoint_devices_state *cds);
 int init_subkind_drbd_disk(libxl__checkpoint_devices_state *cds);
 void cleanup_subkind_drbd_disk(libxl__checkpoint_devices_state *cds);
+int init_subkind_qdisk(libxl__checkpoint_devices_state *cds);
+void cleanup_subkind_qdisk(libxl__checkpoint_devices_state *cds);
+int colo_qdisk_preresume(libxl_ctx *ctx, domid_t domid);
 
 typedef void libxl__checkpoint_callback(libxl__egc *,
                                         libxl__checkpoint_devices_state *,
@@ -2932,6 +2943,10 @@ struct libxl__colo_save_state {
     libxl__stream_read_state srs;
     void (*callback)(libxl__egc *, libxl__colo_save_state *, int);
     bool svm_running;
+    bool paused;
+
+    /* private, used by qdisk block replication */
+    bool qdisk_setuped;
 };
 
 /*----- Domain suspend (save) state structure -----*/
@@ -3314,6 +3329,9 @@ struct libxl__colo_restore_state {
     libxl__domain_create_cb *saved_cb;
     void *crcs;
     libxl__checkpoint_devices_state cds;
+
+    /* private, used by qdisk block replication */
+    bool qdisk_setuped;
 };
 
 struct libxl__domain_create_state {
diff --git a/tools/libxl/libxl_qmp.c b/tools/libxl/libxl_qmp.c
index a6f1a21..9714bdf 100644
--- a/tools/libxl/libxl_qmp.c
+++ b/tools/libxl/libxl_qmp.c
@@ -965,6 +965,37 @@ int libxl__qmp_cpu_add(libxl__gc *gc, int domid, int idx)
     return qmp_run_command(gc, domid, "cpu-add", args, NULL, NULL);
 }
 
+int libxl__qmp_block_start_replication(libxl__gc *gc, int domid,
+                                       bool primary, const char *addr)
+{
+    libxl__json_object *args = NULL;
+
+    qmp_parameters_add_bool(gc, &args, "enable", true);
+    qmp_parameters_add_bool(gc, &args, "primary", primary);
+    if (!primary)
+        qmp_parameters_add_string(gc, &args, "addr", addr);
+
+    return qmp_run_command(gc, domid, "xen-set-block-replication", args,
+                           NULL, NULL);
+}
+
+int libxl__qmp_block_do_checkpoint(libxl__gc *gc, int domid)
+{
+    return qmp_run_command(gc, domid, "xen-do-block-checkpoint", NULL,
+                           NULL, NULL);
+}
+
+int libxl__qmp_block_stop_replication(libxl__gc *gc, int domid, bool primary)
+{
+    libxl__json_object *args = NULL;
+
+    qmp_parameters_add_bool(gc, &args, "enable", false);
+    qmp_parameters_add_bool(gc, &args, "primary", primary);
+
+    return qmp_run_command(gc, domid, "xen-set-block-replication", args,
+                           NULL, NULL);
+}
+
 int libxl__qmp_initializations(libxl__gc *gc, uint32_t domid,
                                const libxl_domain_config *guest_config)
 {
-- 
1.9.1

  parent reply	other threads:[~2015-06-25  6:31 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-06-25  6:30 [PATCH v7 COLO 00/18] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Yang Hongyang
2015-06-25  6:30 ` [PATCH v7 COLO 01/18] docs: add colo readme Yang Hongyang
2015-07-14 15:15   ` Ian Campbell
2015-06-25  6:30 ` [PATCH v7 COLO 02/18] tools/libxl: handle colo_context records in a libxl migration v2 stream Yang Hongyang
2015-07-14 15:19   ` Ian Campbell
2015-07-15  0:34     ` Yang Hongyang
2015-06-25  6:30 ` [PATCH v7 COLO 03/18] tools/libxl: write colo_context records into the stream Yang Hongyang
2015-06-25  6:30 ` [PATCH v7 COLO 04/18] secondary vm suspend/resume/checkpoint code Yang Hongyang
2015-06-25  6:30 ` [PATCH v7 COLO 05/18] primary " Yang Hongyang
2015-06-25  6:31 ` [PATCH v7 COLO 06/18] libxc/restore: support COLO restore Yang Hongyang
2015-06-25  6:31 ` [PATCH v7 COLO 07/18] libxc/restore: send dirty bitmap to primary when checkpoint under colo Yang Hongyang
2015-06-25  6:31 ` [PATCH v7 COLO 08/18] send store mfn and console mfn to xl before resuming secondary vm Yang Hongyang
2015-06-25  6:31 ` [PATCH v7 COLO 09/18] libxc/save: support COLO save Yang Hongyang
2015-06-25  6:31 ` [PATCH v7 COLO 10/18] implement the cmdline for COLO Yang Hongyang
2015-06-25  6:31 ` [PATCH v7 COLO 11/18] Support colo mode for qemu disk Yang Hongyang
2015-06-25  6:31 ` Yang Hongyang [this message]
2015-06-25  6:31 ` [PATCH v7 COLO 13/18] COLO proxy: implement setup/teardown of COLO proxy module Yang Hongyang
2015-06-25  6:31 ` [PATCH v7 COLO 14/18] COLO proxy: preresume, postresume and checkpoint Yang Hongyang
2015-06-25  6:31 ` [PATCH v7 COLO 15/18] COLO nic: implement COLO nic subkind Yang Hongyang
2015-06-25  6:31 ` [PATCH v7 COLO 16/18] setup and control colo proxy on primary side Yang Hongyang
2015-06-25  6:31 ` [PATCH v7 COLO 17/18] setup and control colo proxy on secondary side Yang Hongyang
2015-06-25  6:31 ` [PATCH v7 COLO 18/18] cmdline switches and config vars to control colo-proxy Yang Hongyang
2015-07-14 15:55 ` [PATCH v7 COLO 00/18] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Ian Campbell
2015-07-15  0:41   ` Yang Hongyang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1435213872-10698-13-git-send-email-yanghy@cn.fujitsu.com \
    --to=yanghy@cn.fujitsu.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=eddie.dong@intel.com \
    --cc=guijianfeng@cn.fujitsu.com \
    --cc=ian.campbell@citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=rshriram@cs.ubc.ca \
    --cc=wei.liu2@citrix.com \
    --cc=wency@cn.fujitsu.com \
    --cc=xen-devel@lists.xen.org \
    --cc=yunhong.jiang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.