[PATCH] xen/HVM: atomically access pointers in bufioreq handling

All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH] xen/HVM: atomically access pointers in bufioreq handling
@ 2015-06-18 13:18 Jan Beulich
  0 siblings, 0 replies; 11+ messages in thread
From: Jan Beulich @ 2015-06-18 13:18 UTC (permalink / raw)
  To: qemu-devel; +Cc: xen-devel, Stefano Stabellini

[-- Attachment #1: Type: text/plain, Size: 3124 bytes --]

The number of slots per page being 511 (i.e. not a power of two) means
that the (32-bit) read and write indexes going beyond 2^32 will likely
disturb operation. The hypervisor side gets I/O req server creation
extended so we can indicate that we're using suitable atomic accesses
where needed (not all accesses to the two pointers really need to be
atomic), allowing it to atomically canonicalize both pointers when both
have gone through at least one cycle.

The Xen side counterpart (which is not a functional prereq to this
change, albeit a build one) can be found at e.g.
http://lists.xenproject.org/archives/html/xen-devel/2015-06/msg02996.html 

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen-hvm.c
+++ b/xen-hvm.c
@@ -981,19 +981,30 @@ static void handle_ioreq(XenIOState *sta
 
 static int handle_buffered_iopage(XenIOState *state)
 {
+    buffered_iopage_t *buf_page = state->buffered_io_page;
     buf_ioreq_t *buf_req = NULL;
     ioreq_t req;
     int qw;
 
-    if (!state->buffered_io_page) {
+    if (!buf_page) {
         return 0;
     }
 
     memset(&req, 0x00, sizeof(req));
 
-    while (state->buffered_io_page->read_pointer != state->buffered_io_page->write_pointer) {
-        buf_req = &state->buffered_io_page->buf_ioreq[
-            state->buffered_io_page->read_pointer % IOREQ_BUFFER_SLOT_NUM];
+    for (;;) {
+        uint32_t rdptr = buf_page->read_pointer, wrptr;
+
+        xen_rmb();
+        wrptr = buf_page->write_pointer;
+        xen_rmb();
+        if (rdptr != buf_page->read_pointer) {
+            continue;
+        }
+        if (rdptr == wrptr) {
+            break;
+        }
+        buf_req = &buf_page->buf_ioreq[rdptr % IOREQ_BUFFER_SLOT_NUM];
         req.size = 1UL << buf_req->size;
         req.count = 1;
         req.addr = buf_req->addr;
@@ -1005,15 +1016,14 @@ static int handle_buffered_iopage(XenIOS
         req.data_is_ptr = 0;
         qw = (req.size == 8);
         if (qw) {
-            buf_req = &state->buffered_io_page->buf_ioreq[
-                (state->buffered_io_page->read_pointer + 1) % IOREQ_BUFFER_SLOT_NUM];
+            buf_req = &buf_page->buf_ioreq[(rdptr + 1) %
+                                           IOREQ_BUFFER_SLOT_NUM];
             req.data |= ((uint64_t)buf_req->data) << 32;
         }
 
         handle_ioreq(state, &req);
 
-        xen_mb();
-        state->buffered_io_page->read_pointer += qw ? 2 : 1;
+        atomic_add(&buf_page->read_pointer, qw + 1);
     }
 
     return req.count;
--- a/include/hw/xen/xen_common.h
+++ b/include/hw/xen/xen_common.h
@@ -370,7 +370,8 @@ static inline void xen_unmap_pcidev(XenX
 static inline int xen_create_ioreq_server(XenXC xc, domid_t dom,
                                           ioservid_t *ioservid)
 {
-    int rc = xc_hvm_create_ioreq_server(xc, dom, 1, ioservid);
+    int rc = xc_hvm_create_ioreq_server(xc, dom, HVM_IOREQSRV_BUFIOREQ_ATOMIC,
+                                        ioservid);
 
     if (rc == 0) {
         trace_xen_ioreq_server_create(*ioservid);




[-- Attachment #2: qemu-bufioreq-atomic-add.patch --]
[-- Type: text/plain, Size: 3177 bytes --]

xen/HVM: atomically access pointers in bufioreq handling

The number of slots per page being 511 (i.e. not a power of two) means
that the (32-bit) read and write indexes going beyond 2^32 will likely
disturb operation. The hypervisor side gets I/O req server creation
extended so we can indicate that we're using suitable atomic accesses
where needed (not all accesses to the two pointers really need to be
atomic), allowing it to atomically canonicalize both pointers when both
have gone through at least one cycle.

The Xen side counterpart (which is not a functional prereq to this
change, albeit a build one) can be found at e.g.
http://lists.xenproject.org/archives/html/xen-devel/2015-06/msg02996.html

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen-hvm.c
+++ b/xen-hvm.c
@@ -981,19 +981,30 @@ static void handle_ioreq(XenIOState *sta
 
 static int handle_buffered_iopage(XenIOState *state)
 {
+    buffered_iopage_t *buf_page = state->buffered_io_page;
     buf_ioreq_t *buf_req = NULL;
     ioreq_t req;
     int qw;
 
-    if (!state->buffered_io_page) {
+    if (!buf_page) {
         return 0;
     }
 
     memset(&req, 0x00, sizeof(req));
 
-    while (state->buffered_io_page->read_pointer != state->buffered_io_page->write_pointer) {
-        buf_req = &state->buffered_io_page->buf_ioreq[
-            state->buffered_io_page->read_pointer % IOREQ_BUFFER_SLOT_NUM];
+    for (;;) {
+        uint32_t rdptr = buf_page->read_pointer, wrptr;
+
+        xen_rmb();
+        wrptr = buf_page->write_pointer;
+        xen_rmb();
+        if (rdptr != buf_page->read_pointer) {
+            continue;
+        }
+        if (rdptr == wrptr) {
+            break;
+        }
+        buf_req = &buf_page->buf_ioreq[rdptr % IOREQ_BUFFER_SLOT_NUM];
         req.size = 1UL << buf_req->size;
         req.count = 1;
         req.addr = buf_req->addr;
@@ -1005,15 +1016,14 @@ static int handle_buffered_iopage(XenIOS
         req.data_is_ptr = 0;
         qw = (req.size == 8);
         if (qw) {
-            buf_req = &state->buffered_io_page->buf_ioreq[
-                (state->buffered_io_page->read_pointer + 1) % IOREQ_BUFFER_SLOT_NUM];
+            buf_req = &buf_page->buf_ioreq[(rdptr + 1) %
+                                           IOREQ_BUFFER_SLOT_NUM];
             req.data |= ((uint64_t)buf_req->data) << 32;
         }
 
         handle_ioreq(state, &req);
 
-        xen_mb();
-        state->buffered_io_page->read_pointer += qw ? 2 : 1;
+        atomic_add(&buf_page->read_pointer, qw + 1);
     }
 
     return req.count;
--- a/include/hw/xen/xen_common.h
+++ b/include/hw/xen/xen_common.h
@@ -370,7 +370,8 @@ static inline void xen_unmap_pcidev(XenX
 static inline int xen_create_ioreq_server(XenXC xc, domid_t dom,
                                           ioservid_t *ioservid)
 {
-    int rc = xc_hvm_create_ioreq_server(xc, dom, 1, ioservid);
+    int rc = xc_hvm_create_ioreq_server(xc, dom, HVM_IOREQSRV_BUFIOREQ_ATOMIC,
+                                        ioservid);
 
     if (rc == 0) {
         trace_xen_ioreq_server_create(*ioservid);

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] xen/HVM: atomically access pointers in bufioreq handling
  2015-06-18 13:18 [Qemu-devel] " Jan Beulich
@ 2015-07-21 13:54 ` Stefano Stabellini
  2015-07-21 13:54 ` [Qemu-devel] " Stefano Stabellini
  1 sibling, 0 replies; 11+ messages in thread
From: Stefano Stabellini @ 2015-07-21 13:54 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, qemu-devel, Stefano Stabellini

On Thu, 18 Jun 2015, Jan Beulich wrote:
> The number of slots per page being 511 (i.e. not a power of two) means
> that the (32-bit) read and write indexes going beyond 2^32 will likely
> disturb operation. The hypervisor side gets I/O req server creation
> extended so we can indicate that we're using suitable atomic accesses
> where needed (not all accesses to the two pointers really need to be
> atomic), allowing it to atomically canonicalize both pointers when both
> have gone through at least one cycle.

The description is a bit terse: which accesses don't really need to be
atomic?


> The Xen side counterpart (which is not a functional prereq to this
> change, albeit a build one) can be found at e.g.
> http://lists.xenproject.org/archives/html/xen-devel/2015-06/msg02996.html 
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> --- a/xen-hvm.c
> +++ b/xen-hvm.c
> @@ -981,19 +981,30 @@ static void handle_ioreq(XenIOState *sta
>  
>  static int handle_buffered_iopage(XenIOState *state)
>  {
> +    buffered_iopage_t *buf_page = state->buffered_io_page;
>      buf_ioreq_t *buf_req = NULL;
>      ioreq_t req;
>      int qw;
>  
> -    if (!state->buffered_io_page) {
> +    if (!buf_page) {
>          return 0;
>      }
>  
>      memset(&req, 0x00, sizeof(req));
>  
> -    while (state->buffered_io_page->read_pointer != state->buffered_io_page->write_pointer) {
> -        buf_req = &state->buffered_io_page->buf_ioreq[
> -            state->buffered_io_page->read_pointer % IOREQ_BUFFER_SLOT_NUM];
> +    for (;;) {
> +        uint32_t rdptr = buf_page->read_pointer, wrptr;
> +
> +        xen_rmb();

We don't need this barrier.


> +        wrptr = buf_page->write_pointer;
> +        xen_rmb();
> +        if (rdptr != buf_page->read_pointer) {

I think you have to use atomic_read to be sure that the second read to
buf_page->read_pointer is up to date and not optimized away.

But if I think that it would be best to simply use atomic_read to read
both pointers at once using uint64_t as type, so you are sure to get a
consistent view and there is no need for this check.


> +            continue;
> +        }
> +        if (rdptr == wrptr) {
> +            break;
> +        }
> +        buf_req = &buf_page->buf_ioreq[rdptr % IOREQ_BUFFER_SLOT_NUM];
>          req.size = 1UL << buf_req->size;
>          req.count = 1;
>          req.addr = buf_req->addr;
> @@ -1005,15 +1016,14 @@ static int handle_buffered_iopage(XenIOS
>          req.data_is_ptr = 0;
>          qw = (req.size == 8);
>          if (qw) {
> -            buf_req = &state->buffered_io_page->buf_ioreq[
> -                (state->buffered_io_page->read_pointer + 1) % IOREQ_BUFFER_SLOT_NUM];
> +            buf_req = &buf_page->buf_ioreq[(rdptr + 1) %
> +                                           IOREQ_BUFFER_SLOT_NUM];
>              req.data |= ((uint64_t)buf_req->data) << 32;
>          }
>  
>          handle_ioreq(state, &req);
>  
> -        xen_mb();
> -        state->buffered_io_page->read_pointer += qw ? 2 : 1;
> +        atomic_add(&buf_page->read_pointer, qw + 1);

I couldn't get specific info on the type of barrier implemented by
__sync_fetch_and_add, so I cannot tell for sure whether removing
xen_mb() is appropriate. Do you have a link? I suspect that given the
strong guarantees of the x86 architecture we'll be fine. I would be less
confident if this code was used on other archs.


>      }
>  
>      return req.count;
> --- a/include/hw/xen/xen_common.h
> +++ b/include/hw/xen/xen_common.h
> @@ -370,7 +370,8 @@ static inline void xen_unmap_pcidev(XenX
>  static inline int xen_create_ioreq_server(XenXC xc, domid_t dom,
>                                            ioservid_t *ioservid)
>  {
> -    int rc = xc_hvm_create_ioreq_server(xc, dom, 1, ioservid);
> +    int rc = xc_hvm_create_ioreq_server(xc, dom, HVM_IOREQSRV_BUFIOREQ_ATOMIC,
> +                                        ioservid);

I am concerned that passing 2 instead of 1 could break older
hypervisors. However handle_bufioreq was never defined as a true
boolean, so maybe it is OK?

The alternative would be to create a xen_xc_hvm_create_ioreq_server
versioned wrapper in include/hw/xen/xen_common.h.



>      if (rc == 0) {
>          trace_xen_ioreq_server_create(*ioservid);

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] xen/HVM: atomically access pointers in bufioreq handling
  2015-07-21 13:54 ` [Qemu-devel] " Stefano Stabellini
@ 2015-07-22 14:03   ` Jan Beulich
  2015-07-22 14:03   ` [Qemu-devel] " Jan Beulich
  1 sibling, 0 replies; 11+ messages in thread
From: Jan Beulich @ 2015-07-22 14:03 UTC (permalink / raw)
  To: Stefano Stabellini; +Cc: xen-devel, qemu-devel

>>> On 21.07.15 at 15:54, <stefano.stabellini@eu.citrix.com> wrote:
> On Thu, 18 Jun 2015, Jan Beulich wrote:
>> The number of slots per page being 511 (i.e. not a power of two) means
>> that the (32-bit) read and write indexes going beyond 2^32 will likely
>> disturb operation. The hypervisor side gets I/O req server creation
>> extended so we can indicate that we're using suitable atomic accesses
>> where needed (not all accesses to the two pointers really need to be
>> atomic), allowing it to atomically canonicalize both pointers when both
>> have gone through at least one cycle.
> 
> The description is a bit terse: which accesses don't really need to be
> atomic?

Perhaps I should drop this part - I more or less copied the hypervisor
side's commit message, and the above really applies to e.g.

    if ( (pg->ptrs.write_pointer - pg->ptrs.read_pointer) >=
         (IOREQ_BUFFER_SLOT_NUM - qw) )

in hypervisor code.

>> --- a/xen-hvm.c
>> +++ b/xen-hvm.c
>> @@ -981,19 +981,30 @@ static void handle_ioreq(XenIOState *sta
>>  
>>  static int handle_buffered_iopage(XenIOState *state)
>>  {
>> +    buffered_iopage_t *buf_page = state->buffered_io_page;
>>      buf_ioreq_t *buf_req = NULL;
>>      ioreq_t req;
>>      int qw;
>>  
>> -    if (!state->buffered_io_page) {
>> +    if (!buf_page) {
>>          return 0;
>>      }
>>  
>>      memset(&req, 0x00, sizeof(req));
>>  
>> -    while (state->buffered_io_page->read_pointer != state->buffered_io_page->write_pointer) {
>> -        buf_req = &state->buffered_io_page->buf_ioreq[
>> -            state->buffered_io_page->read_pointer % IOREQ_BUFFER_SLOT_NUM];
>> +    for (;;) {
>> +        uint32_t rdptr = buf_page->read_pointer, wrptr;
>> +
>> +        xen_rmb();
> 
> We don't need this barrier.

How would we not? We need to make sure we read in this order
read_pointer, write_pointer, and read_pointer again (in the
comparison).  Only that way we can be certain to hold a matching
pair in hands at the end.

>> +        wrptr = buf_page->write_pointer;
>> +        xen_rmb();
>> +        if (rdptr != buf_page->read_pointer) {
> 
> I think you have to use atomic_read to be sure that the second read to
> buf_page->read_pointer is up to date and not optimized away.

No, suppressing such an optimization is an intended (side) effect
of the barriers used.

> But if I think that it would be best to simply use atomic_read to read
> both pointers at once using uint64_t as type, so you are sure to get a
> consistent view and there is no need for this check.

But I'm specifically trying to avoid e.g. a locked cmpxchg8b here on
ix86.

>>          handle_ioreq(state, &req);
>>  
>> -        xen_mb();
>> -        state->buffered_io_page->read_pointer += qw ? 2 : 1;
>> +        atomic_add(&buf_page->read_pointer, qw + 1);
> 
> I couldn't get specific info on the type of barrier implemented by
> __sync_fetch_and_add, so I cannot tell for sure whether removing
> xen_mb() is appropriate. Do you have a link? I suspect that given the
> strong guarantees of the x86 architecture we'll be fine. I would be less
> confident if this code was used on other archs.

gcc.pdf, in the section covering them, says "In most cases, these
built-in functions are considered a full barrier. [...] Further,
instructions are issued as necessary to prevent the processor from
speculating loads across the operation and from queuing stores
after the operation." Details on individual builtins subsequently
tell the exceptions from this general rule, but the one used here is
not among the exceptions.

>> --- a/include/hw/xen/xen_common.h
>> +++ b/include/hw/xen/xen_common.h
>> @@ -370,7 +370,8 @@ static inline void xen_unmap_pcidev(XenX
>>  static inline int xen_create_ioreq_server(XenXC xc, domid_t dom,
>>                                            ioservid_t *ioservid)
>>  {
>> -    int rc = xc_hvm_create_ioreq_server(xc, dom, 1, ioservid);
>> +    int rc = xc_hvm_create_ioreq_server(xc, dom, HVM_IOREQSRV_BUFIOREQ_ATOMIC,
>> +                                        ioservid);
> 
> I am concerned that passing 2 instead of 1 could break older
> hypervisors. However handle_bufioreq was never defined as a true
> boolean, so maybe it is OK?

Indeed I'm building on it only having done == 0 or != 0 checks.

> The alternative would be to create a xen_xc_hvm_create_ioreq_server
> versioned wrapper in include/hw/xen/xen_common.h.

Which is what I aimed at avoiding.

Jan

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] xen/HVM: atomically access pointers in bufioreq handling
  2015-07-22 14:03   ` [Qemu-devel] " Jan Beulich
@ 2015-07-22 14:50     ` Stefano Stabellini
  2015-07-22 14:50     ` [Qemu-devel] " Stefano Stabellini
  1 sibling, 0 replies; 11+ messages in thread
From: Stefano Stabellini @ 2015-07-22 14:50 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, qemu-devel, Stefano Stabellini

On Wed, 22 Jul 2015, Jan Beulich wrote:
> >> The number of slots per page being 511 (i.e. not a power of two) means
> >> that the (32-bit) read and write indexes going beyond 2^32 will likely
> >> disturb operation. The hypervisor side gets I/O req server creation
> >> extended so we can indicate that we're using suitable atomic accesses
> >> where needed (not all accesses to the two pointers really need to be
> >> atomic), allowing it to atomically canonicalize both pointers when both
> >> have gone through at least one cycle.
> > 
> > The description is a bit terse: which accesses don't really need to be
> > atomic?
> 
> Perhaps I should drop this part - I more or less copied the hypervisor
> side's commit message, and the above really applies to e.g.
> 
>     if ( (pg->ptrs.write_pointer - pg->ptrs.read_pointer) >=
>          (IOREQ_BUFFER_SLOT_NUM - qw) )
> 
> in hypervisor code.

Yes, please remove it as it is confusing.


> >> --- a/xen-hvm.c
> >> +++ b/xen-hvm.c
> >> @@ -981,19 +981,30 @@ static void handle_ioreq(XenIOState *sta
> >>  
> >>  static int handle_buffered_iopage(XenIOState *state)
> >>  {
> >> +    buffered_iopage_t *buf_page = state->buffered_io_page;
> >>      buf_ioreq_t *buf_req = NULL;
> >>      ioreq_t req;
> >>      int qw;
> >>  
> >> -    if (!state->buffered_io_page) {
> >> +    if (!buf_page) {
> >>          return 0;
> >>      }
> >>  
> >>      memset(&req, 0x00, sizeof(req));
> >>  
> >> -    while (state->buffered_io_page->read_pointer != state->buffered_io_page->write_pointer) {
> >> -        buf_req = &state->buffered_io_page->buf_ioreq[
> >> -            state->buffered_io_page->read_pointer % IOREQ_BUFFER_SLOT_NUM];
> >> +    for (;;) {
> >> +        uint32_t rdptr = buf_page->read_pointer, wrptr;
> >> +
> >> +        xen_rmb();
> > 
> > We don't need this barrier.
> 
> How would we not? We need to make sure we read in this order
> read_pointer, write_pointer, and read_pointer again (in the
> comparison).  Only that way we can be certain to hold a matching
> pair in hands at the end.

See below


> >> +        wrptr = buf_page->write_pointer;
> >> +        xen_rmb();
> >> +        if (rdptr != buf_page->read_pointer) {
> > 
> > I think you have to use atomic_read to be sure that the second read to
> > buf_page->read_pointer is up to date and not optimized away.
> 
> No, suppressing such an optimization is an intended (side) effect
> of the barriers used.

I understand what you are saying but I am not sure if your assumption
is correct. Can the compiler optimize the second read anyway?
In any case, if you follow my suggestion below, it won't matter.


> > But if I think that it would be best to simply use atomic_read to read
> > both pointers at once using uint64_t as type, so you are sure to get a
> > consistent view and there is no need for this check.
> 
> But I'm specifically trying to avoid e.g. a locked cmpxchg8b here on
> ix86.

OK, but we don't need cmpxchg8b, just:

#define atomic_read(ptr)       (*(__typeof__(*ptr) volatile*) (ptr))

something like:

 for (;;) {
     uint64_t ptrs;
     uint32_t rdptr, wrptr;
 
     ptrs = atomic_read((uint64_t*)&state->buffered_io_page->read_pointer);
     rdptr = (uint32_t)ptrs;
     wrptr = *(((uint32_t*)&ptrs) + 1);
 
     if (rdptr == wrptr) {
         continue;
     }
 
     [work]
 
     atomic_add(&buf_page->read_pointer, qw + 1);
 }

it would work, wouldn't it?


> >>          handle_ioreq(state, &req);
> >>  
> >> -        xen_mb();
> >> -        state->buffered_io_page->read_pointer += qw ? 2 : 1;
> >> +        atomic_add(&buf_page->read_pointer, qw + 1);
> > 
> > I couldn't get specific info on the type of barrier implemented by
> > __sync_fetch_and_add, so I cannot tell for sure whether removing
> > xen_mb() is appropriate. Do you have a link? I suspect that given the
> > strong guarantees of the x86 architecture we'll be fine. I would be less
> > confident if this code was used on other archs.
> 
> gcc.pdf, in the section covering them, says "In most cases, these
> built-in functions are considered a full barrier. [...] Further,
> instructions are issued as necessary to prevent the processor from
> speculating loads across the operation and from queuing stores
> after the operation." Details on individual builtins subsequently
> tell the exceptions from this general rule, but the one used here is
> not among the exceptions.

Good, thanks for looking it up


> >> --- a/include/hw/xen/xen_common.h
> >> +++ b/include/hw/xen/xen_common.h
> >> @@ -370,7 +370,8 @@ static inline void xen_unmap_pcidev(XenX
> >>  static inline int xen_create_ioreq_server(XenXC xc, domid_t dom,
> >>                                            ioservid_t *ioservid)
> >>  {
> >> -    int rc = xc_hvm_create_ioreq_server(xc, dom, 1, ioservid);
> >> +    int rc = xc_hvm_create_ioreq_server(xc, dom, HVM_IOREQSRV_BUFIOREQ_ATOMIC,
> >> +                                        ioservid);
> > 
> > I am concerned that passing 2 instead of 1 could break older
> > hypervisors. However handle_bufioreq was never defined as a true
> > boolean, so maybe it is OK?
> 
> Indeed I'm building on it only having done == 0 or != 0 checks.
> 
> > The alternative would be to create a xen_xc_hvm_create_ioreq_server
> > versioned wrapper in include/hw/xen/xen_common.h.
> 
> Which is what I aimed at avoiding.

OK

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] xen/HVM: atomically access pointers in bufioreq handling
  2015-07-22 14:50     ` [Qemu-devel] " Stefano Stabellini
  2015-07-22 15:34       ` Jan Beulich
@ 2015-07-22 15:34       ` Jan Beulich
  1 sibling, 0 replies; 11+ messages in thread
From: Jan Beulich @ 2015-07-22 15:34 UTC (permalink / raw)
  To: Stefano Stabellini; +Cc: xen-devel, qemu-devel

>>> On 22.07.15 at 16:50, <stefano.stabellini@eu.citrix.com> wrote:
> On Wed, 22 Jul 2015, Jan Beulich wrote:
>> >> --- a/xen-hvm.c
>> >> +++ b/xen-hvm.c
>> >> @@ -981,19 +981,30 @@ static void handle_ioreq(XenIOState *sta
>> >>  
>> >>  static int handle_buffered_iopage(XenIOState *state)
>> >>  {
>> >> +    buffered_iopage_t *buf_page = state->buffered_io_page;
>> >>      buf_ioreq_t *buf_req = NULL;
>> >>      ioreq_t req;
>> >>      int qw;
>> >>  
>> >> -    if (!state->buffered_io_page) {
>> >> +    if (!buf_page) {
>> >>          return 0;
>> >>      }
>> >>  
>> >>      memset(&req, 0x00, sizeof(req));
>> >>  
>> >> -    while (state->buffered_io_page->read_pointer != state->buffered_io_page->write_pointer) {
>> >> -        buf_req = &state->buffered_io_page->buf_ioreq[
>> >> -            state->buffered_io_page->read_pointer % IOREQ_BUFFER_SLOT_NUM];
>> >> +    for (;;) {
>> >> +        uint32_t rdptr = buf_page->read_pointer, wrptr;
>> >> +
>> >> +        xen_rmb();
>> > 
>> > We don't need this barrier.
>> 
>> How would we not? We need to make sure we read in this order
>> read_pointer, write_pointer, and read_pointer again (in the
>> comparison).  Only that way we can be certain to hold a matching
>> pair in hands at the end.
> 
> See below
> 
> 
>> >> +        wrptr = buf_page->write_pointer;
>> >> +        xen_rmb();
>> >> +        if (rdptr != buf_page->read_pointer) {
>> > 
>> > I think you have to use atomic_read to be sure that the second read to
>> > buf_page->read_pointer is up to date and not optimized away.
>> 
>> No, suppressing such an optimization is an intended (side) effect
>> of the barriers used.
> 
> I understand what you are saying but I am not sure if your assumption
> is correct. Can the compiler optimize the second read anyway?

No, it can't, due to the barrier.

>> > But if I think that it would be best to simply use atomic_read to read
>> > both pointers at once using uint64_t as type, so you are sure to get a
>> > consistent view and there is no need for this check.
>> 
>> But I'm specifically trying to avoid e.g. a locked cmpxchg8b here on
>> ix86.
> 
> OK, but we don't need cmpxchg8b, just:
> 
> #define atomic_read(ptr)       (*(__typeof__(*ptr) volatile*) (ptr))

This only gives the impression of being atomic when the type is wider
than a machine word. There's no ix86 (i.e. 32-bit) instruction other
than LOCK CMPXCHG8B (and possibly MMX/SSE/AVX ones) allowing
to atomically read a 64-bit quantity.

> something like:
> 
>  for (;;) {
>      uint64_t ptrs;
>      uint32_t rdptr, wrptr;
>  
>      ptrs = atomic_read((uint64_t*)&state->buffered_io_page->read_pointer);
>      rdptr = (uint32_t)ptrs;
>      wrptr = *(((uint32_t*)&ptrs) + 1);
>  
>      if (rdptr == wrptr) {
>          continue;
>      }
>  
>      [work]
>  
>      atomic_add(&buf_page->read_pointer, qw + 1);
>  }
> 
> it would work, wouldn't it?

Looks like so, but the amount of casts alone makes me wish for
no-one to consider this (but I agree that the casts could be
taken care of). Still I think (as btw done elsewhere) the lock
free access is preferable.

Jan

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] xen/HVM: atomically access pointers in bufioreq handling
  2015-07-22 15:34       ` Jan Beulich
@ 2015-07-22 17:24         ` Stefano Stabellini
  2015-07-22 17:24         ` [Qemu-devel] " Stefano Stabellini
  1 sibling, 0 replies; 11+ messages in thread
From: Stefano Stabellini @ 2015-07-22 17:24 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, qemu-devel, Stefano Stabellini

On Wed, 22 Jul 2015, Jan Beulich wrote:
> >>> On 22.07.15 at 16:50, <stefano.stabellini@eu.citrix.com> wrote:
> > On Wed, 22 Jul 2015, Jan Beulich wrote:
> >> >> --- a/xen-hvm.c
> >> >> +++ b/xen-hvm.c
> >> >> @@ -981,19 +981,30 @@ static void handle_ioreq(XenIOState *sta
> >> >>  
> >> >>  static int handle_buffered_iopage(XenIOState *state)
> >> >>  {
> >> >> +    buffered_iopage_t *buf_page = state->buffered_io_page;
> >> >>      buf_ioreq_t *buf_req = NULL;
> >> >>      ioreq_t req;
> >> >>      int qw;
> >> >>  
> >> >> -    if (!state->buffered_io_page) {
> >> >> +    if (!buf_page) {
> >> >>          return 0;
> >> >>      }
> >> >>  
> >> >>      memset(&req, 0x00, sizeof(req));
> >> >>  
> >> >> -    while (state->buffered_io_page->read_pointer != state->buffered_io_page->write_pointer) {
> >> >> -        buf_req = &state->buffered_io_page->buf_ioreq[
> >> >> -            state->buffered_io_page->read_pointer % IOREQ_BUFFER_SLOT_NUM];
> >> >> +    for (;;) {
> >> >> +        uint32_t rdptr = buf_page->read_pointer, wrptr;
> >> >> +
> >> >> +        xen_rmb();
> >> > 
> >> > We don't need this barrier.
> >> 
> >> How would we not? We need to make sure we read in this order
> >> read_pointer, write_pointer, and read_pointer again (in the
> >> comparison).  Only that way we can be certain to hold a matching
> >> pair in hands at the end.
> > 
> > See below
> > 
> > 
> >> >> +        wrptr = buf_page->write_pointer;
> >> >> +        xen_rmb();
> >> >> +        if (rdptr != buf_page->read_pointer) {
> >> > 
> >> > I think you have to use atomic_read to be sure that the second read to
> >> > buf_page->read_pointer is up to date and not optimized away.
> >> 
> >> No, suppressing such an optimization is an intended (side) effect
> >> of the barriers used.
> > 
> > I understand what you are saying but I am not sure if your assumption
> > is correct. Can the compiler optimize the second read anyway?
> 
> No, it can't, due to the barrier.

OK


> >> > But if I think that it would be best to simply use atomic_read to read
> >> > both pointers at once using uint64_t as type, so you are sure to get a
> >> > consistent view and there is no need for this check.
> >> 
> >> But I'm specifically trying to avoid e.g. a locked cmpxchg8b here on
> >> ix86.
> > 
> > OK, but we don't need cmpxchg8b, just:
> > 
> > #define atomic_read(ptr)       (*(__typeof__(*ptr) volatile*) (ptr))
> 
> This only gives the impression of being atomic when the type is wider
> than a machine word. There's no ix86 (i.e. 32-bit) instruction other
> than LOCK CMPXCHG8B (and possibly MMX/SSE/AVX ones) allowing
> to atomically read a 64-bit quantity.

Damn!


> > something like:
> > 
> >  for (;;) {
> >      uint64_t ptrs;
> >      uint32_t rdptr, wrptr;
> >  
> >      ptrs = atomic_read((uint64_t*)&state->buffered_io_page->read_pointer);
> >      rdptr = (uint32_t)ptrs;
> >      wrptr = *(((uint32_t*)&ptrs) + 1);
> >  
> >      if (rdptr == wrptr) {
> >          continue;
> >      }
> >  
> >      [work]
> >  
> >      atomic_add(&buf_page->read_pointer, qw + 1);
> >  }
> > 
> > it would work, wouldn't it?
> 
> Looks like so, but the amount of casts alone makes me wish for
> no-one to consider this (but I agree that the casts could be
> taken care of). Still I think (as btw done elsewhere) the lock
> free access is preferable.

Actually I think it is conceptually easier to understand, but the
current implementation of atomic_read not working with uint64_t on
x86_32 is a real bummer. In that case I am OK with the lock free loop
too. Thanks for the explanation.

I'll queue this change up for the next QEMU release cycle.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] xen/HVM: atomically access pointers in bufioreq handling
  2015-07-22 17:24         ` [Qemu-devel] " Stefano Stabellini
@ 2015-07-22 17:26           ` Stefano Stabellini
  2015-07-23  7:02           ` [Qemu-devel] " Jan Beulich
  2015-07-23  7:02           ` Jan Beulich
  2 siblings, 0 replies; 11+ messages in thread
From: Stefano Stabellini @ 2015-07-22 17:26 UTC (permalink / raw)
  To: Stefano Stabellini; +Cc: xen-devel, qemu-devel, Jan Beulich

On Wed, 22 Jul 2015, Stefano Stabellini wrote:
> On Wed, 22 Jul 2015, Jan Beulich wrote:
> > >>> On 22.07.15 at 16:50, <stefano.stabellini@eu.citrix.com> wrote:
> > > On Wed, 22 Jul 2015, Jan Beulich wrote:
> > >> >> --- a/xen-hvm.c
> > >> >> +++ b/xen-hvm.c
> > >> >> @@ -981,19 +981,30 @@ static void handle_ioreq(XenIOState *sta
> > >> >>  
> > >> >>  static int handle_buffered_iopage(XenIOState *state)
> > >> >>  {
> > >> >> +    buffered_iopage_t *buf_page = state->buffered_io_page;
> > >> >>      buf_ioreq_t *buf_req = NULL;
> > >> >>      ioreq_t req;
> > >> >>      int qw;
> > >> >>  
> > >> >> -    if (!state->buffered_io_page) {
> > >> >> +    if (!buf_page) {
> > >> >>          return 0;
> > >> >>      }
> > >> >>  
> > >> >>      memset(&req, 0x00, sizeof(req));
> > >> >>  
> > >> >> -    while (state->buffered_io_page->read_pointer != state->buffered_io_page->write_pointer) {
> > >> >> -        buf_req = &state->buffered_io_page->buf_ioreq[
> > >> >> -            state->buffered_io_page->read_pointer % IOREQ_BUFFER_SLOT_NUM];
> > >> >> +    for (;;) {
> > >> >> +        uint32_t rdptr = buf_page->read_pointer, wrptr;
> > >> >> +
> > >> >> +        xen_rmb();
> > >> > 
> > >> > We don't need this barrier.
> > >> 
> > >> How would we not? We need to make sure we read in this order
> > >> read_pointer, write_pointer, and read_pointer again (in the
> > >> comparison).  Only that way we can be certain to hold a matching
> > >> pair in hands at the end.
> > > 
> > > See below
> > > 
> > > 
> > >> >> +        wrptr = buf_page->write_pointer;
> > >> >> +        xen_rmb();
> > >> >> +        if (rdptr != buf_page->read_pointer) {
> > >> > 
> > >> > I think you have to use atomic_read to be sure that the second read to
> > >> > buf_page->read_pointer is up to date and not optimized away.
> > >> 
> > >> No, suppressing such an optimization is an intended (side) effect
> > >> of the barriers used.
> > > 
> > > I understand what you are saying but I am not sure if your assumption
> > > is correct. Can the compiler optimize the second read anyway?
> > 
> > No, it can't, due to the barrier.
> 
> OK
> 
> 
> > >> > But if I think that it would be best to simply use atomic_read to read
> > >> > both pointers at once using uint64_t as type, so you are sure to get a
> > >> > consistent view and there is no need for this check.
> > >> 
> > >> But I'm specifically trying to avoid e.g. a locked cmpxchg8b here on
> > >> ix86.
> > > 
> > > OK, but we don't need cmpxchg8b, just:
> > > 
> > > #define atomic_read(ptr)       (*(__typeof__(*ptr) volatile*) (ptr))
> > 
> > This only gives the impression of being atomic when the type is wider
> > than a machine word. There's no ix86 (i.e. 32-bit) instruction other
> > than LOCK CMPXCHG8B (and possibly MMX/SSE/AVX ones) allowing
> > to atomically read a 64-bit quantity.
> 
> Damn!
> 
> 
> > > something like:
> > > 
> > >  for (;;) {
> > >      uint64_t ptrs;
> > >      uint32_t rdptr, wrptr;
> > >  
> > >      ptrs = atomic_read((uint64_t*)&state->buffered_io_page->read_pointer);
> > >      rdptr = (uint32_t)ptrs;
> > >      wrptr = *(((uint32_t*)&ptrs) + 1);
> > >  
> > >      if (rdptr == wrptr) {
> > >          continue;
> > >      }
> > >  
> > >      [work]
> > >  
> > >      atomic_add(&buf_page->read_pointer, qw + 1);
> > >  }
> > > 
> > > it would work, wouldn't it?
> > 
> > Looks like so, but the amount of casts alone makes me wish for
> > no-one to consider this (but I agree that the casts could be
> > taken care of). Still I think (as btw done elsewhere) the lock
> > free access is preferable.
> 
> Actually I think it is conceptually easier to understand, but the
> current implementation of atomic_read not working with uint64_t on
> x86_32 is a real bummer. In that case I am OK with the lock free loop
> too. Thanks for the explanation.
> 
> I'll queue this change up for the next QEMU release cycle.

I forgot about the commit message change. Please resend, then, provided
that everything is OK, I'll queue it up.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] xen/HVM: atomically access pointers in bufioreq handling
  2015-07-22 17:24         ` [Qemu-devel] " Stefano Stabellini
  2015-07-22 17:26           ` Stefano Stabellini
  2015-07-23  7:02           ` [Qemu-devel] " Jan Beulich
@ 2015-07-23  7:02           ` Jan Beulich
  2 siblings, 0 replies; 11+ messages in thread
From: Jan Beulich @ 2015-07-23  7:02 UTC (permalink / raw)
  To: Stefano Stabellini; +Cc: xen-devel, qemu-devel

>>> On 22.07.15 at 19:24, <stefano.stabellini@eu.citrix.com> wrote:
> I'll queue this change up for the next QEMU release cycle.

Thanks - v2 (with the adjusted description) just sent.

It would however be nice for our variant in 4.6 to also gain this,
perhaps independent of upstream's schedule.

Jan

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] xen/HVM: atomically access pointers in bufioreq handling
  2015-07-23  7:02           ` [Qemu-devel] " Jan Beulich
  2015-07-23 10:04             ` Stefano Stabellini
@ 2015-07-23 10:04             ` Stefano Stabellini
  1 sibling, 0 replies; 11+ messages in thread
From: Stefano Stabellini @ 2015-07-23 10:04 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, qemu-devel, Stefano Stabellini

On Thu, 23 Jul 2015, Jan Beulich wrote:
> >>> On 22.07.15 at 19:24, <stefano.stabellini@eu.citrix.com> wrote:
> > I'll queue this change up for the next QEMU release cycle.
> 
> Thanks - v2 (with the adjusted description) just sent.
> 
> It would however be nice for our variant in 4.6 to also gain this,
> perhaps independent of upstream's schedule.

Is the hypervisor side going to go in 4.6? Do we need a freeze
exception or do we consider this a bug fix?

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] xen/HVM: atomically access pointers in bufioreq handling
  2015-07-23 10:04             ` Stefano Stabellini
@ 2015-07-23 10:09               ` Stefano Stabellini
  2015-07-23 11:20               ` Jan Beulich
  1 sibling, 0 replies; 11+ messages in thread
From: Stefano Stabellini @ 2015-07-23 10:09 UTC (permalink / raw)
  To: Stefano Stabellini; +Cc: xen-devel, qemu-devel, Jan Beulich

On Thu, 23 Jul 2015, Stefano Stabellini wrote:
> On Thu, 23 Jul 2015, Jan Beulich wrote:
> > >>> On 22.07.15 at 19:24, <stefano.stabellini@eu.citrix.com> wrote:
> > > I'll queue this change up for the next QEMU release cycle.
> > 
> > Thanks - v2 (with the adjusted description) just sent.
> > 
> > It would however be nice for our variant in 4.6 to also gain this,
> > perhaps independent of upstream's schedule.
> 
> Is the hypervisor side going to go in 4.6? Do we need a freeze
> exception or do we consider this a bug fix?

Never mind, I have seen it is already in.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] xen/HVM: atomically access pointers in bufioreq handling
  2015-07-23 10:04             ` Stefano Stabellini
  2015-07-23 10:09               ` Stefano Stabellini
@ 2015-07-23 11:20               ` Jan Beulich
  1 sibling, 0 replies; 11+ messages in thread
From: Jan Beulich @ 2015-07-23 11:20 UTC (permalink / raw)
  To: Stefano Stabellini; +Cc: xen-devel, qemu-devel

>>> On 23.07.15 at 12:04, <stefano.stabellini@eu.citrix.com> wrote:
> On Thu, 23 Jul 2015, Jan Beulich wrote:
>> >>> On 22.07.15 at 19:24, <stefano.stabellini@eu.citrix.com> wrote:
>> > I'll queue this change up for the next QEMU release cycle.
>> 
>> Thanks - v2 (with the adjusted description) just sent.
>> 
>> It would however be nice for our variant in 4.6 to also gain this,
>> perhaps independent of upstream's schedule.
> 
> Is the hypervisor side going to go in 4.6? Do we need a freeze
> exception or do we consider this a bug fix?

As said in the updated description in v2, the hypervisor side went in
already some time ago.

Jan

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2015-07-23 11:20 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-06-18 13:18 [PATCH] xen/HVM: atomically access pointers in bufioreq handling Jan Beulich
  -- strict thread matches above, loose matches on Subject: below --
2015-06-18 13:18 [Qemu-devel] " Jan Beulich
2015-07-21 13:54 ` Stefano Stabellini
2015-07-21 13:54 ` [Qemu-devel] " Stefano Stabellini
2015-07-22 14:03   ` Jan Beulich
2015-07-22 14:03   ` [Qemu-devel] " Jan Beulich
2015-07-22 14:50     ` Stefano Stabellini
2015-07-22 14:50     ` [Qemu-devel] " Stefano Stabellini
2015-07-22 15:34       ` Jan Beulich
2015-07-22 17:24         ` Stefano Stabellini
2015-07-22 17:24         ` [Qemu-devel] " Stefano Stabellini
2015-07-22 17:26           ` Stefano Stabellini
2015-07-23  7:02           ` [Qemu-devel] " Jan Beulich
2015-07-23 10:04             ` Stefano Stabellini
2015-07-23 10:09               ` Stefano Stabellini
2015-07-23 11:20               ` Jan Beulich
2015-07-23 10:04             ` Stefano Stabellini
2015-07-23  7:02           ` Jan Beulich
2015-07-22 15:34       ` Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.