All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
To: Jan Beulich <JBeulich@suse.com>
Cc: Wei Liu <wei.liu2@citrix.com>,
	Stefano Stabellini <stefano.stabellini@eu.citrix.com>,
	Andrew Cooper <andrew.cooper3@citrix.com>,
	Ian Jackson <Ian.Jackson@eu.citrix.com>, Tim Deegan <tim@xen.org>,
	Ian Campbell <Ian.Campbell@eu.citrix.com>,
	xen-devel <xen-devel@lists.xenproject.org>,
	Keir Fraser <keir@xen.org>
Subject: Re: [PATCH v2] x86/HVM: avoid pointer wraparound in bufioreq handling
Date: Tue, 21 Jul 2015 17:18:24 +0100	[thread overview]
Message-ID: <alpine.DEB.2.02.1507211511010.17378@kaball.uk.xensource.com> (raw)
In-Reply-To: <5582E0660200007800086A27@mail.emea.novell.com>

On Thu, 18 Jun 2015, Jan Beulich wrote:
> The number of slots per page being 511 (i.e. not a power of two) means
> that the (32-bit) read and write indexes going beyond 2^32 will likely
> disturb operation. Extend I/O req server creation so the caller can
> indicate that it is using suitable atomic accesses where needed (not
> all accesses to the two pointers really need to be atomic), allowing
> the hypervisor to atomically canonicalize both pointers when both have
> gone through at least one cycle.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> Acked-by: Ian Campbell <ian.campbell@citrix.com>
> ---
> v2: Limit canonicalization loop to IOREQ_BUFFER_SLOT_NUM iterations.
>     Adjust xc_hvm_create_ioreq_server() documentation.
> 
> --- a/tools/libxc/include/xenctrl.h
> +++ b/tools/libxc/include/xenctrl.h
> @@ -1933,7 +1933,8 @@ int xc_get_hvm_param(xc_interface *handl
>   *
>   * @parm xch a handle to an open hypervisor interface.
>   * @parm domid the domain id to be serviced
> - * @parm handle_bufioreq should the IOREQ Server handle buffered requests?
> + * @parm handle_bufioreq how should the IOREQ Server handle buffered requests
> + *                       (HVM_IOREQSRV_BUFIOREQ_*)?
>   * @parm id pointer to an ioservid_t to receive the IOREQ Server id.
>   * @return 0 on success, -1 on failure.
>   */
> --- a/tools/libxc/xc_domain.c
> +++ b/tools/libxc/xc_domain.c
> @@ -1411,7 +1411,7 @@ int xc_hvm_create_ioreq_server(xc_interf
>      hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg);
>  
>      arg->domid = domid;
> -    arg->handle_bufioreq = !!handle_bufioreq;
> +    arg->handle_bufioreq = handle_bufioreq;
>  
>      rc = do_xen_hypercall(xch, &hypercall);
>  
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -921,7 +921,7 @@ static void hvm_ioreq_server_disable(str
>  
>  static int hvm_ioreq_server_init(struct hvm_ioreq_server *s, struct domain *d,
>                                   domid_t domid, bool_t is_default,
> -                                 bool_t handle_bufioreq, ioservid_t id)
> +                                 int bufioreq_handling, ioservid_t id)

uint8_t?


>  {
>      struct vcpu *v;
>      int rc;
> @@ -938,7 +938,11 @@ static int hvm_ioreq_server_init(struct 
>      if ( rc )
>          return rc;
>  
> -    rc = hvm_ioreq_server_setup_pages(s, is_default, handle_bufioreq);
> +    if ( bufioreq_handling == HVM_IOREQSRV_BUFIOREQ_ATOMIC )
> +        s->bufioreq_atomic = 1;
> +
> +    rc = hvm_ioreq_server_setup_pages(
> +             s, is_default, bufioreq_handling != HVM_IOREQSRV_BUFIOREQ_OFF);
>      if ( rc )
>          goto fail_map;
>  
> @@ -997,12 +1001,15 @@ static ioservid_t next_ioservid(struct d
>  }
>  
>  static int hvm_create_ioreq_server(struct domain *d, domid_t domid,
> -                                   bool_t is_default, bool_t handle_bufioreq,
> +                                   bool_t is_default, int bufioreq_handling,

uint8_t?


>                                     ioservid_t *id)
>  {
>      struct hvm_ioreq_server *s;
>      int rc;
>  
> +    if ( bufioreq_handling > HVM_IOREQSRV_BUFIOREQ_ATOMIC )
> +        return -EINVAL;
> +
>      rc = -ENOMEM;
>      s = xzalloc(struct hvm_ioreq_server);
>      if ( !s )
> @@ -1015,7 +1022,7 @@ static int hvm_create_ioreq_server(struc
>      if ( is_default && d->arch.hvm_domain.default_ioreq_server != NULL )
>          goto fail2;
>  
> -    rc = hvm_ioreq_server_init(s, d, domid, is_default, handle_bufioreq,
> +    rc = hvm_ioreq_server_init(s, d, domid, is_default, bufioreq_handling,
>                                 next_ioservid(d));
>      if ( rc )
>          goto fail3;
> @@ -2560,7 +2567,7 @@ int hvm_buffered_io_send(ioreq_t *p)
>      spin_lock(&s->bufioreq_lock);
>  
> -    if ( (pg->write_pointer - pg->read_pointer) >=
> +    if ( (pg->ptrs.write_pointer - pg->ptrs.read_pointer) >=
>           (IOREQ_BUFFER_SLOT_NUM - qw) )
>      {
>          /* The queue is full: send the iopacket through the normal path. */
> @@ -2568,17 +2575,29 @@ int hvm_buffered_io_send(ioreq_t *p)
>          return 0;
>      }
>  
> -    pg->buf_ioreq[pg->write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp;
> +    pg->buf_ioreq[pg->ptrs.write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp;
>  
>      if ( qw )
>      {
>          bp.data = p->data >> 32;
> -        pg->buf_ioreq[(pg->write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp;
> +        pg->buf_ioreq[(pg->ptrs.write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp;
>      }
>  
>      /* Make the ioreq_t visible /before/ write_pointer. */
>      wmb();
> -    pg->write_pointer += qw ? 2 : 1;
> +    pg->ptrs.write_pointer += qw ? 2 : 1;
> +
> +    /* Canonicalize read/write pointers to prevent their overflow. */
> +    while ( s->bufioreq_atomic && qw++ < IOREQ_BUFFER_SLOT_NUM &&
> +            pg->ptrs.read_pointer >= IOREQ_BUFFER_SLOT_NUM )
> +    {
> +        union bufioreq_pointers old = pg->ptrs, new;
> +        unsigned int n = old.read_pointer / IOREQ_BUFFER_SLOT_NUM;
> +
> +        new.read_pointer = old.read_pointer - n * IOREQ_BUFFER_SLOT_NUM;
> +        new.write_pointer = old.write_pointer - n * IOREQ_BUFFER_SLOT_NUM;
> +        cmpxchg(&pg->ptrs.full, old.full, new.full);
> +    }

This is not safe: if the reader increments read_pointer (atomically)
after you read old.read_pointer and before cmpxchg, the increment is
lost.

I think you need to remove the cmpxchg and subtract a multiple of
IOREQ_BUFFER_SLOT_NUM from read_pointer atomically here. Unfortunately
if we do that, we cannot update both write_pointer and read_pointer
together anymore. But if we decrement write_pointer atomically before
read_pointer, I think we should be fine if we appropriately fix the
reader side too:

QEMU:
  atomic_read(&read_pointer);
  read write_pointer;
  xen_rmb();
  while (read_pointer < write_pointer) {

      [work]

      atomic_add(&read_pointer, stuff);
      read write_pointer;
      xen_rmb();
  }


Xen:
  [work]
  increment write_pointer;
  xen_wmb();

  if (read_pointer >= IOREQ_BUFFER_SLOT_NUM) {
      write_pointer -= IOREQ_BUFFER_SLOT_NUM;
      xen_wmb();
      atomic_sub(read_pointer, IOREQ_BUFFER_SLOT_NUM);
  }

  parent reply	other threads:[~2015-07-21 16:20 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-06-18 13:14 [PATCH v2] x86/HVM: avoid pointer wraparound in bufioreq handling Jan Beulich
2015-06-18 13:41 ` Andrew Cooper
2015-07-21 16:18 ` Stefano Stabellini [this message]
2015-07-22 13:38   ` Jan Beulich
2015-07-22 14:49     ` Stefano Stabellini
2015-07-22 15:21       ` Jan Beulich
2015-07-22 15:45         ` Stefano Stabellini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=alpine.DEB.2.02.1507211511010.17378@kaball.uk.xensource.com \
    --to=stefano.stabellini@eu.citrix.com \
    --cc=Ian.Campbell@eu.citrix.com \
    --cc=Ian.Jackson@eu.citrix.com \
    --cc=JBeulich@suse.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=keir@xen.org \
    --cc=tim@xen.org \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.