From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andrew Cooper Subject: [PATCH v4 14/29] tools/python: Other migration infrastructure Date: Tue, 14 Jul 2015 11:59:29 +0100 Message-ID: <1436871584-6522-15-git-send-email-andrew.cooper3@citrix.com> References: <1436871584-6522-1-git-send-email-andrew.cooper3@citrix.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1436871584-6522-1-git-send-email-andrew.cooper3@citrix.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Xen-devel Cc: Andrew Cooper , Ian Jackson , Wei Liu List-Id: xen-devel@lists.xenproject.org Contains: * Reverse-engineered notes of the legacy format from xg_save_restore.h * Python implementation of the legacy format * Public HVM Params used in the legacy stream * XL header format Signed-off-by: Andrew Cooper Acked-by: Ian Campbell CC: Ian Jackson CC: Wei Liu --- New in v2 - removes various many magic numbers from subsequent scripts --- tools/python/xen/migration/legacy.py | 279 ++++++++++++++++++++++++++++++++++ tools/python/xen/migration/public.py | 21 +++ tools/python/xen/migration/xl.py | 12 ++ 3 files changed, 312 insertions(+) create mode 100644 tools/python/xen/migration/legacy.py create mode 100644 tools/python/xen/migration/public.py create mode 100644 tools/python/xen/migration/xl.py diff --git a/tools/python/xen/migration/legacy.py b/tools/python/xen/migration/legacy.py new file mode 100644 index 0000000..2f2240a --- /dev/null +++ b/tools/python/xen/migration/legacy.py @@ -0,0 +1,279 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +Libxc legacy migration streams + +Documentation and record structures for legacy migration +""" + +""" +SAVE/RESTORE/MIGRATE PROTOCOL +============================= + +The general form of a stream of chunks is a header followed by a +body consisting of a variable number of chunks (terminated by a +chunk with type 0) followed by a trailer. + +For a rolling/checkpoint (e.g. remus) migration then the body and +trailer phases can be repeated until an external event +(e.g. failure) causes the process to terminate and commit to the +most recent complete checkpoint. + +HEADER +------ + +unsigned long : p2m_size + +extended-info (PV-only, optional): + + If first unsigned long == ~0UL then extended info is present, + otherwise unsigned long is part of p2m. Note that p2m_size above + does not include the length of the extended info. + + extended-info: + + unsigned long : signature == ~0UL + uint32_t : number of bytes remaining in extended-info + + 1 or more extended-info blocks of form: + char[4] : block identifier + uint32_t : block data size + bytes : block data + + defined extended-info blocks: + "vcpu" : VCPU context info containing vcpu_guest_context_t. + The precise variant of the context structure + (e.g. 32 vs 64 bit) is distinguished by + the block size. + "extv" : Presence indicates use of extended VCPU context in + tail, data size is 0. + +p2m (PV-only): + + consists of p2m_size bytes comprising an array of xen_pfn_t sized entries. + +BODY PHASE - Format A (for live migration or Remus without compression) +---------- + +A series of chunks with a common header: + int : chunk type + +If the chunk type is +ve then chunk contains guest memory data, and the +type contains the number of pages in the batch: + + unsigned long[] : PFN array, length == number of pages in batch + Each entry consists of XEN_DOMCTL_PFINFO_* + in bits 31-28 and the PFN number in bits 27-0. + page data : PAGE_SIZE bytes for each page marked present in PFN + array + +If the chunk type is -ve then chunk consists of one of a number of +metadata types. See definitions of XC_SAVE_ID_* below. + +If chunk type is 0 then body phase is complete. + + +BODY PHASE - Format B (for Remus with compression) +---------- + +A series of chunks with a common header: + int : chunk type + +If the chunk type is +ve then chunk contains array of PFNs corresponding +to guest memory and type contains the number of PFNs in the batch: + + unsigned long[] : PFN array, length == number of pages in batch + Each entry consists of XEN_DOMCTL_PFINFO_* + in bits 31-28 and the PFN number in bits 27-0. + +If the chunk type is -ve then chunk consists of one of a number of +metadata types. See definitions of XC_SAVE_ID_* below. + +If the chunk type is -ve and equals XC_SAVE_ID_COMPRESSED_DATA, then the +chunk consists of compressed page data, in the following format: + + unsigned long : Size of the compressed chunk to follow + compressed data : variable length data of size indicated above. + This chunk consists of compressed page data. + The number of pages in one chunk depends on + the amount of space available in the sender's + output buffer. + +Format of compressed data: + compressed_data = * + delta = + marker = (RUNFLAG|SKIPFLAG) bitwise-or RUNLEN [1 byte marker] + RUNFLAG = 0 + SKIPFLAG = 1 << 7 + RUNLEN = 7-bit unsigned value indicating number of WORDS in the run + run = string of bytes of length sizeof(WORD) * RUNLEN + + If marker contains RUNFLAG, then RUNLEN * sizeof(WORD) bytes of data following + the marker is copied into the target page at the appropriate offset indicated by + the offset_ptr + If marker contains SKIPFLAG, then the offset_ptr is advanced + by RUNLEN * sizeof(WORD). + +If chunk type is 0 then body phase is complete. + +There can be one or more chunks with type XC_SAVE_ID_COMPRESSED_DATA, +containing compressed pages. The compressed chunks are collated to form +one single compressed chunk for the entire iteration. The number of pages +present in this final compressed chunk will be equal to the total number +of valid PFNs specified by the +ve chunks. + +At the sender side, compressed pages are inserted into the output stream +in the same order as they would have been if compression logic was absent. + +Until last iteration, the BODY is sent in Format A, to maintain live +migration compatibility with receivers of older Xen versions. +At the last iteration, if Remus compression was enabled, the sender sends +a trigger, XC_SAVE_ID_ENABLE_COMPRESSION to tell the receiver to parse the +BODY in Format B from the next iteration onwards. + +An example sequence of chunks received in Format B: + +16 +ve chunk + unsigned long[16] PFN array + +100 +ve chunk + unsigned long[100] PFN array + +50 +ve chunk + unsigned long[50] PFN array + + XC_SAVE_ID_COMPRESSED_DATA TAG + N Length of compressed data + N bytes of DATA Decompresses to 166 pages + + XC_SAVE_ID_* other xc save chunks + 0 END BODY TAG + +Corner case with checkpoint compression: + At sender side, after pausing the domain, dirty pages are usually + copied out to a temporary buffer. After the domain is resumed, + compression is done and the compressed chunk(s) are sent, followed by + other XC_SAVE_ID_* chunks. + If the temporary buffer gets full while scanning for dirty pages, + the sender stops buffering of dirty pages, compresses the temporary + buffer and sends the compressed data with XC_SAVE_ID_COMPRESSED_DATA. + The sender then resumes the buffering of dirty pages and continues + scanning for the dirty pages. + For e.g., assume that the temporary buffer can hold 4096 pages and + there are 5000 dirty pages. The following is the sequence of chunks + that the receiver will see: + + +1024 +ve chunk + unsigned long[1024] PFN array + +1024 +ve chunk + unsigned long[1024] PFN array + +1024 +ve chunk + unsigned long[1024] PFN array + +1024 +ve chunk + unsigned long[1024] PFN array + + XC_SAVE_ID_COMPRESSED_DATA TAG + N Length of compressed data + N bytes of DATA Decompresses to 4096 pages + + +4 +ve chunk + unsigned long[4] PFN array + + XC_SAVE_ID_COMPRESSED_DATA TAG + M Length of compressed data + M bytes of DATA Decompresses to 4 pages + + XC_SAVE_ID_* other xc save chunks + 0 END BODY TAG + + In other words, XC_SAVE_ID_COMPRESSED_DATA can be interleaved with + +ve chunks arbitrarily. But at the receiver end, the following condition + always holds true until the end of BODY PHASE: + num(PFN entries +ve chunks) >= num(pages received in compressed form) + +TAIL PHASE +---------- + +Content differs for PV and HVM guests. + +HVM TAIL: + + "Magic" pages: + uint64_t : I/O req PFN + uint64_t : Buffered I/O req PFN + uint64_t : Store PFN + Xen HVM Context: + uint32_t : Length of context in bytes + bytes : Context data + Qemu context: + char[21] : Signature: + "QemuDeviceModelRecord" : Read Qemu save data until EOF + "DeviceModelRecord0002" : uint32_t length field followed by that many + bytes of Qemu save data + "RemusDeviceModelState" : Currently the same as "DeviceModelRecord0002". + +PV TAIL: + + Unmapped PFN list : list of all the PFNs that were not in map at the close + unsigned int : Number of unmapped pages + unsigned long[] : PFNs of unmapped pages + + VCPU context data : A series of VCPU records, one per present VCPU + Maximum and present map supplied in XC_SAVE_ID_VCPUINFO + bytes: : VCPU context structure. Size is determined by size + provided in extended-info header + bytes[128] : Extended VCPU context (present IFF "extv" block + present in extended-info header) + + Shared Info Page : 4096 bytes of shared info page +""" + +CHUNK_end = 0 +CHUNK_enable_verify_mode = -1 +CHUNK_vcpu_info = -2 +CHUNK_hvm_ident_pt = -3 +CHUNK_hvm_vm86_tss = -4 +CHUNK_tmem = -5 +CHUNK_tmem_extra = -6 +CHUNK_tsc_info = -7 +CHUNK_hvm_console_pfn = -8 +CHUNK_last_checkpoint = -9 +CHUNK_hvm_acpi_ioports_location = -10 +CHUNK_hvm_viridian = -11 +CHUNK_compressed_data = -12 +CHUNK_enable_compression = -13 +CHUNK_hvm_generation_id_addr = -14 +CHUNK_hvm_paging_ring_pfn = -15 +CHUNK_hvm_monitor_ring_pfn = -16 +CHUNK_hvm_sharing_ring_pfn = -17 +CHUNK_toolstack = -18 +CHUNK_hvm_ioreq_server_pfn = -19 +CHUNK_hvm_nr_ioreq_server_pages = -20 + +chunk_type_to_str = { + CHUNK_end : "end", + CHUNK_enable_verify_mode : "enable_verify_mode", + CHUNK_vcpu_info : "vcpu_info", + CHUNK_hvm_ident_pt : "hvm_ident_pt", + CHUNK_hvm_vm86_tss : "hvm_vm86_tss", + CHUNK_tmem : "tmem", + CHUNK_tmem_extra : "tmem_extra", + CHUNK_tsc_info : "tsc_info", + CHUNK_hvm_console_pfn : "hvm_console_pfn", + CHUNK_last_checkpoint : "last_checkpoint", + CHUNK_hvm_acpi_ioports_location : "hvm_acpi_ioports_location", + CHUNK_hvm_viridian : "hvm_viridian", + CHUNK_compressed_data : "compressed_data", + CHUNK_enable_compression : "enable_compression", + CHUNK_hvm_generation_id_addr : "hvm_generation_id_addr", + CHUNK_hvm_paging_ring_pfn : "hvm_paging_ring_pfn", + CHUNK_hvm_monitor_ring_pfn : "hvm_monitor_ring_pfn", + CHUNK_hvm_sharing_ring_pfn : "hvm_sharing_ring_pfn", + CHUNK_toolstack : "toolstack", + CHUNK_hvm_ioreq_server_pfn : "hvm_ioreq_server_pfn", + CHUNK_hvm_nr_ioreq_server_pages : "hvm_nr_ioreq_server_pages", +} + +# Up to 1024 pages (4MB) at a time +MAX_BATCH = 1024 + +# Maximum #VCPUs currently supported for save/restore +MAX_VCPU_ID = 4095 diff --git a/tools/python/xen/migration/public.py b/tools/python/xen/migration/public.py new file mode 100644 index 0000000..fab2f84 --- /dev/null +++ b/tools/python/xen/migration/public.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +Xen public ABI constants, used in migration +""" + +HVM_PARAM_STORE_PFN = 1 +HVM_PARAM_IOREQ_PFN = 5 +HVM_PARAM_BUFIOREQ_PFN = 6 +HVM_PARAM_VIRIDIAN = 9 +HVM_PARAM_IDENT_PT = 12 +HVM_PARAM_VM86_TSS = 15 +HVM_PARAM_CONSOLE_PFN = 17 +HVM_PARAM_ACPI_IOPORTS_LOCATION = 19 +HVM_PARAM_PAGING_RING_PFN = 27 +HVM_PARAM_MONITOR_RING_PFN = 28 +HVM_PARAM_SHARING_RING_PFN = 29 +HVM_PARAM_IOREQ_SERVER_PFN = 32 +HVM_PARAM_NR_IOREQ_SERVER_PAGES = 33 +HVM_PARAM_VM_GENERATION_ID_ADDR = 34 diff --git a/tools/python/xen/migration/xl.py b/tools/python/xen/migration/xl.py new file mode 100644 index 0000000..978e744 --- /dev/null +++ b/tools/python/xen/migration/xl.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +XL migration stream format +""" + +MAGIC = "Xen saved domain, xl format\n \0 \r" + +HEADER_FORMAT = "=IIII" + +MANDATORY_FLAG_STREAMV2 = 2 -- 1.7.10.4