From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:41619) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZDAIj-00055U-2P for qemu-devel@nongnu.org; Thu, 09 Jul 2015 07:47:38 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ZDAId-0005jb-Po for qemu-devel@nongnu.org; Thu, 09 Jul 2015 07:47:37 -0400 Received: from mx1.redhat.com ([209.132.183.28]:53269) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZDAId-0005jJ-FR for qemu-devel@nongnu.org; Thu, 09 Jul 2015 07:47:31 -0400 Received: from int-mx13.intmail.prod.int.phx2.redhat.com (int-mx13.intmail.prod.int.phx2.redhat.com [10.5.11.26]) by mx1.redhat.com (Postfix) with ESMTPS id 17953C99E6 for ; Thu, 9 Jul 2015 11:47:31 +0000 (UTC) From: Igor Mammedov Date: Thu, 9 Jul 2015 13:47:19 +0200 Message-Id: <1436442444-132020-3-git-send-email-imammedo@redhat.com> In-Reply-To: <1436442444-132020-1-git-send-email-imammedo@redhat.com> References: <1436442444-132020-1-git-send-email-imammedo@redhat.com> Subject: [Qemu-devel] [PATCH v4 2/7] memory: introduce MemoryRegion container with reserved HVA range List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Cc: pbonzini@redhat.com, mst@redhat.com Patch adds - memory_region_init_hva_range() - memory_region_add_subregion_to_hva() - memory_region_find_hva_range() API to allocate, map into and lookup reserved HVA MemoryRegion. MemoryRegion with reserved HVA range will be used for providing linear 1:1 HVA->GVA mapping for RAM MemoryRegion-s that are added as subregions inside it. It will be used for memory hotplug and vhost integration, reducing all hotplugged MemoryRegions down to a single memory range descriptor, which allows to overcome vhost's limitation on number of allowed memory ranges. Signed-off-by: Igor Mammedov --- v1->v4: - fix offset calculation in memory_region_find_hva_range() - add memory_region_add_subregion_to_hva() RFC->v1: - rename: memory_region_init_rsvd_hva -> memory_region_init_hva_range memory_region_find_rsvd_hva -> memory_region_find_hva_range - replace using ram_addr with "void *rsvd_hva" - guard linux specific calls with ifdef - split memory reservation into qemu_ram_reserve_hva() --- exec.c | 30 ++++++++++++++++++++++ include/exec/cpu-common.h | 2 ++ include/exec/memory.h | 63 +++++++++++++++++++++++++++++++++++++++++++++-- memory.c | 50 +++++++++++++++++++++++++++++++++++++ 4 files changed, 143 insertions(+), 2 deletions(-) diff --git a/exec.c b/exec.c index ca53537..562dae5 100644 --- a/exec.c +++ b/exec.c @@ -1339,6 +1339,36 @@ static int memory_try_enable_merging(void *addr, size_t len) return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE); } +#ifdef __linux__ +void *qemu_ram_reserve_hva(ram_addr_t length) +{ + return mmap(0, length, PROT_NONE, + MAP_NORESERVE | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); +} + +void qemu_ram_remap_hva(ram_addr_t addr, void *new_hva) +{ + RAMBlock *block = find_ram_block(addr); + + assert(block); + block->host = mremap(block->host, block->used_length, + block->used_length, + MREMAP_MAYMOVE | MREMAP_FIXED, new_hva); + memory_try_enable_merging(block->host, block->used_length); + qemu_ram_setup_dump(block->host, block->used_length); +} +#else +void *qemu_ram_reserve_hva(ram_addr_t length) +{ + return NULL; +} + +void qemu_ram_remap_hva(ram_addr_t addr, void *new_hva) +{ + assert(0); +} +#endif + /* Only legal before guest might have detected the memory size: e.g. on * incoming migration, or right after reset. * diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 9fb1d54..301f50b 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -62,6 +62,8 @@ typedef void CPUWriteMemoryFunc(void *opaque, hwaddr addr, uint32_t value); typedef uint32_t CPUReadMemoryFunc(void *opaque, hwaddr addr); void qemu_ram_remap(ram_addr_t addr, ram_addr_t length); +void *qemu_ram_reserve_hva(ram_addr_t length); +void qemu_ram_remap_hva(ram_addr_t addr, void *new_hva); /* This should not be used by devices. */ MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr); void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev); diff --git a/include/exec/memory.h b/include/exec/memory.h index 1394715..1f2cbd1 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -173,6 +173,7 @@ struct MemoryRegion { bool terminates; bool romd_mode; bool ram; + void *rsvd_hva; bool skip_dump; bool readonly; /* For RAM regions */ bool enabled; @@ -285,6 +286,26 @@ void memory_region_init(MemoryRegion *mr, uint64_t size); /** + * memory_region_init_hva_range: Initialize a reserved HVA memory region + * + * The container for RAM memory regions. + * When adding subregion with memory_region_add_subregion(), subregion's + * backing host memory will be remapped inside of the reserved by this + * region HVA. + * Supported only on Linux. If memory reservation and remapping is not + * implemented for platform, this call degrades to regular memory_region_init(). + * + * @mr: the #MemoryRegion to be initialized + * @owner: the object that tracks the region's reference count + * @name: used for debugging; not visible to the user or ABI + * @size: size of the region; any subregions beyond this size will be clipped + */ +void memory_region_init_hva_range(MemoryRegion *mr, + struct Object *owner, + const char *name, + uint64_t size); + +/** * memory_region_ref: Add 1 to a memory region's reference count * * Whenever memory regions are accessed outside the BQL, they need to be @@ -634,8 +655,8 @@ int memory_region_get_fd(MemoryRegion *mr); * memory_region_get_ram_ptr: Get a pointer into a RAM memory region. * * Returns a host pointer to a RAM memory region (created with - * memory_region_init_ram() or memory_region_init_ram_ptr()). Use with - * care. + * memory_region_init_ram() or memory_region_init_ram_ptr()) or + * memory_region_init_hva_range(). Use with care. * * @mr: the memory region being queried. */ @@ -909,6 +930,24 @@ void memory_region_del_eventfd(MemoryRegion *mr, void memory_region_add_subregion(MemoryRegion *mr, hwaddr offset, MemoryRegion *subregion); + +/** + * memory_region_add_subregion_to_hva: Add a subregion to a HVA container. + * + * the same as memory_region_add_subregion() with only difference that + * it remaps RAM subregion's backing memory into HVA range of @mr. + * If HVA region is not supported by host the call degrades to and behaves as + * memory_region_add_subregion(). + * + * @mr: the region to contain the new subregion; must be a container + * initialized with memory_region_init(). + * @offset: the offset relative to @mr where @subregion is added. + * @subregion: the subregion to be added. + */ +void memory_region_add_subregion_to_hva(MemoryRegion *mr, + hwaddr offset, + MemoryRegion *subregion); + /** * memory_region_add_subregion_overlap: Add a subregion to a container * with overlap. @@ -1052,6 +1091,26 @@ MemoryRegionSection memory_region_find(MemoryRegion *mr, hwaddr addr, uint64_t size); /** + * memory_region_find_hva_range: finds a parent MemoryRegion with + * reserved HVA and translates it into a #MemoryRegionSection. + * + * Locates the first parent #MemoryRegion of @mr that is + * of reserved HVA type. + * + * Returns a #MemoryRegionSection that describes a reserved HVA + * memory region. + * .@offset_within_address_space is offset of found + * (in the .@mr field) memory region relative to the address + * space that contains it. + * .@offset_within_region is offset of @mr relative + * to the returned region (in the .@mr field). + * .@size is size of found memory region + * + * @mr: a MemoryRegion whose HVA parent is looked up + */ +MemoryRegionSection memory_region_find_hva_range(MemoryRegion *mr); + +/** * address_space_sync_dirty_bitmap: synchronize the dirty log for all memory * * Synchronizes the dirty page log for an entire address space. diff --git a/memory.c b/memory.c index ec07ae8..bf6aa4e 100644 --- a/memory.c +++ b/memory.c @@ -929,6 +929,15 @@ void memory_region_init(MemoryRegion *mr, } } +void memory_region_init_hva_range(MemoryRegion *mr, + Object *owner, + const char *name, + uint64_t size) +{ + memory_region_init(mr, owner, name, size); + mr->rsvd_hva = qemu_ram_reserve_hva(memory_region_size(mr)); +} + static void memory_region_get_addr(Object *obj, Visitor *v, void *opaque, const char *name, Error **errp) { @@ -1517,6 +1526,10 @@ int memory_region_get_fd(MemoryRegion *mr) void *memory_region_get_ram_ptr(MemoryRegion *mr) { + if (mr->rsvd_hva) { + return mr->rsvd_hva; + } + if (mr->alias) { return memory_region_get_ram_ptr(mr->alias) + mr->alias_offset; } @@ -1777,6 +1790,17 @@ void memory_region_add_subregion_overlap(MemoryRegion *mr, memory_region_add_subregion_common(mr, offset, subregion); } +void memory_region_add_subregion_to_hva(MemoryRegion *mr, + hwaddr offset, + MemoryRegion *subregion) +{ + if (mr->rsvd_hva && subregion->ram) { + qemu_ram_remap_hva(subregion->ram_addr, + memory_region_get_ram_ptr(mr) + offset); + } + memory_region_add_subregion(mr, offset, subregion); +} + void memory_region_del_subregion(MemoryRegion *mr, MemoryRegion *subregion) { @@ -1897,6 +1921,32 @@ bool memory_region_is_mapped(MemoryRegion *mr) return mr->container ? true : false; } +MemoryRegionSection memory_region_find_hva_range(MemoryRegion *mr) +{ + MemoryRegionSection ret = { .mr = NULL }; + MemoryRegion *hva_container = NULL; + hwaddr addr = 0; + MemoryRegion *root; + + for (root = mr; root->container; root = root->container) { + if (!hva_container && root->rsvd_hva) { + hva_container = root; + ret.offset_within_region = addr; + } + addr += root->addr; + } + + ret.address_space = memory_region_to_address_space(root); + if (!ret.address_space || !hva_container) { + return ret; + } + + ret.mr = hva_container; + ret.offset_within_address_space = addr; + ret.size = int128_make64(memory_region_size(ret.mr)); + return ret; +} + MemoryRegionSection memory_region_find(MemoryRegion *mr, hwaddr addr, uint64_t size) { -- 1.8.3.1