All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: Igor Mammedov <imammedo@redhat.com>
Cc: pbonzini@redhat.com, qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH v4 4/7] pc: fix QEMU crashing when more than ~50 memory hotplugged
Date: Thu, 9 Jul 2015 16:06:14 +0300	[thread overview]
Message-ID: <20150709155919-mutt-send-email-mst@redhat.com> (raw)
In-Reply-To: <1436442444-132020-5-git-send-email-imammedo@redhat.com>

On Thu, Jul 09, 2015 at 01:47:21PM +0200, Igor Mammedov wrote:
> QEMU asserts in vhost due to hitting vhost backend limit
> on number of supported memory regions.
> 
> Describe all hotplugged memory as one continuos range
> to vhost with linear 1:1 HVA->GPA mapping in backend.
> 
> Signed-off-by: Igor Mammedov <imammedo@redhat.com>

Hmm - a bunch of work here to recombine MRs that memory listener
interface breaks up.  In particular KVM could benefit from this too (on
workloads that change the table a lot).  Can't we teach memory core to
pass hva range as a single continuous range to memory listeners?

> ---
>  hw/virtio/vhost.c         | 47 ++++++++++++++++++++++++++++++++++++++++++++---
>  include/hw/virtio/vhost.h |  1 +
>  2 files changed, 45 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> index 2712c6f..7bc27f0 100644
> --- a/hw/virtio/vhost.c
> +++ b/hw/virtio/vhost.c
> @@ -432,6 +432,10 @@ static void vhost_set_memory(MemoryListener *listener,
>  
>      assert(size);
>  
> +    if (!dev->rsvd_hva.mr) {
> +        dev->rsvd_hva = memory_region_find_hva_range(section->mr);
> +    }
> +
>      /* Optimize no-change case. At least cirrus_vga does this a lot at this time. */
>      ram = memory_region_get_ram_ptr(section->mr) + section->offset_within_region;
>      if (add) {
> @@ -472,6 +476,42 @@ static void vhost_begin(MemoryListener *listener)
>      dev->mem_changed_start_addr = -1;
>  }
>  
> +static int vhost_set_mem_table(struct vhost_dev *dev)
> +{
> +    hwaddr start_addr = 0;
> +    ram_addr_t size = 0;
> +    struct vhost_memory *mem;
> +    int r, i;
> +
> +    /* drop memory ranges from continuos HVA */
> +    mem = g_memdup(dev->mem, offsetof(struct vhost_memory, regions) +
> +                       dev->mem->nregions * sizeof dev->mem->regions[0]);
> +    start_addr = dev->rsvd_hva.offset_within_address_space;
> +    size = int128_get64(dev->rsvd_hva.size);
> +    for (i = 0; i < mem->nregions; i++) {
> +        if (mem->regions[i].guest_phys_addr >= start_addr &&
> +            mem->regions[i].guest_phys_addr < start_addr + size) {
> +            mem->nregions--;
> +            memmove(&mem->regions[i], &mem->regions[i + 1],
> +                    (mem->nregions - i) * sizeof mem->regions[0]);
> +        }
> +    }
> +    /* add one continuos HVA entry if memory ranges from it is present */
> +    if (dev->mem->nregions > mem->nregions) {
> +        struct vhost_memory_region *reg = &mem->regions[mem->nregions];
> +
> +        reg->guest_phys_addr = start_addr;
> +        reg->memory_size = size;
> +        reg->userspace_addr =
> +            (__u64)memory_region_get_ram_ptr(dev->rsvd_hva.mr);
> +        mem->nregions++;
> +    }
> +
> +    r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, mem);
> +    g_free(mem);
> +    return r;
> +}
> +
>  static void vhost_commit(MemoryListener *listener)
>  {
>      struct vhost_dev *dev = container_of(listener, struct vhost_dev,
> @@ -500,7 +540,7 @@ static void vhost_commit(MemoryListener *listener)
>      }
>  
>      if (!dev->log_enabled) {
> -        r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, dev->mem);
> +        r = vhost_set_mem_table(dev);
>          assert(r >= 0);
>          dev->memory_changed = false;
>          return;
> @@ -513,7 +553,7 @@ static void vhost_commit(MemoryListener *listener)
>      if (dev->log_size < log_size) {
>          vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER);
>      }
> -    r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, dev->mem);
> +    r = vhost_set_mem_table(dev);
>      assert(r >= 0);
>      /* To log less, can only decrease log size after table update. */
>      if (dev->log_size > log_size + VHOST_LOG_BUFFER) {
> @@ -956,6 +996,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
>          migrate_add_blocker(hdev->migration_blocker);
>      }
>      hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions));
> +    memset(&hdev->rsvd_hva, 0, sizeof hdev->rsvd_hva);
>      hdev->n_mem_sections = 0;
>      hdev->mem_sections = NULL;
>      hdev->log = NULL;
> @@ -1119,7 +1160,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
>      if (r < 0) {
>          goto fail_features;
>      }
> -    r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_MEM_TABLE, hdev->mem);
> +    r = vhost_set_mem_table(hdev);
>      if (r < 0) {
>          r = -errno;
>          goto fail_mem;
> diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
> index dd51050..d41bf2f 100644
> --- a/include/hw/virtio/vhost.h
> +++ b/include/hw/virtio/vhost.h
> @@ -40,6 +40,7 @@ struct vhost_dev {
>      struct vhost_memory *mem;
>      int n_mem_sections;
>      MemoryRegionSection *mem_sections;
> +    MemoryRegionSection rsvd_hva;
>      struct vhost_virtqueue *vqs;
>      int nvqs;
>      /* the first virtqueue which would be used by this vhost dev */
> -- 
> 1.8.3.1

  reply	other threads:[~2015-07-09 13:06 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-07-09 11:47 [Qemu-devel] [PATCH v4 0/7] Fix QEMU crash during memory hotplug with vhost=on Igor Mammedov
2015-07-09 11:47 ` [Qemu-devel] [PATCH v4 1/7] memory: get rid of memory_region_destructor_ram_from_ptr() Igor Mammedov
2015-07-09 11:47 ` [Qemu-devel] [PATCH v4 2/7] memory: introduce MemoryRegion container with reserved HVA range Igor Mammedov
2015-07-09 11:47 ` [Qemu-devel] [PATCH v4 3/7] pc: reserve hotpluggable memory range with memory_region_init_hva_range() Igor Mammedov
2015-07-09 11:47 ` [Qemu-devel] [PATCH v4 4/7] pc: fix QEMU crashing when more than ~50 memory hotplugged Igor Mammedov
2015-07-09 13:06   ` Michael S. Tsirkin [this message]
2015-07-09 13:43     ` Paolo Bonzini
2015-07-09 13:46       ` Michael S. Tsirkin
2015-07-10 10:12         ` Igor Mammedov
2015-07-13  6:55           ` Michael S. Tsirkin
2015-07-13 18:55             ` Igor Mammedov
2015-07-13 20:14               ` Michael S. Tsirkin
2015-07-14 13:02                 ` Igor Mammedov
2015-07-14 13:14                   ` Michael S. Tsirkin
2015-07-09 11:47 ` [Qemu-devel] [PATCH v4 5/7] exec: make sure that RAMBlock descriptor won't be leaked Igor Mammedov
2015-07-09 11:47 ` [Qemu-devel] [PATCH v4 6/7] exec: add qemu_ram_unmap_hva() API for unmapping memory from HVA area Igor Mammedov
2015-07-09 11:47 ` [Qemu-devel] [PATCH v4 7/7] memory: add support for deleting HVA mapped MemoryRegion Igor Mammedov
2015-07-15 15:12 ` [Qemu-devel] [PATCH v4 0/7] Fix QEMU crash during memory hotplug with vhost=on Igor Mammedov
2015-07-15 16:32   ` Michael S. Tsirkin
2015-07-16  7:26     ` Igor Mammedov
2015-07-16  7:35       ` Michael S. Tsirkin
2015-07-16  9:42         ` Igor Mammedov
2015-07-16 10:24           ` Michael S. Tsirkin
2015-07-16 11:11             ` Igor Mammedov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150709155919-mutt-send-email-mst@redhat.com \
    --to=mst@redhat.com \
    --cc=imammedo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.