All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/1] coroutine: avoid inserting duplicate coroutine to co_queue_wakeup
@ 2024-04-01 12:33 Zhu Yangyang via
  2024-04-01 12:41 ` [PATCH v2 1/1] nbd/server: do not poll within a coroutine context Zhu Yangyang via
  0 siblings, 1 reply; 4+ messages in thread
From: Zhu Yangyang via @ 2024-04-01 12:33 UTC (permalink / raw
  To: Eric Blake, Stefan Hajnoczi
  Cc: qemu-block, qemu-devel, Vladimir Sementsov-Ogievskiy, kwolf,
	luolongmin, suxiaodong1, chenxiaoyu48, wangyan122, yebiaoxiang,
	zhuyangyang14

The problem that inserting duplicate coroutine to co_queue_wakeu has been
resolved by 7c1f51bf38 ("nbd/server: Fix drained_poll to wake coroutine
in right AioContext") that avoids repeatedly waking up the same coroutine.

The key modifications are as follows:

static void qio_channel_restart_read(void *opaque)
{
    QIOChannel *ioc = opaque;
-   Coroutine *co = ioc->read_coroutine;
+   Coroutine *co = qatomic_xchg(&ioc->read_coroutine, NULL);
+
+   if (!co) {
+       return;
+   }

    /* Assert that aio_co_wake() reenters the coroutine directly */
    assert(qemu_get_current_aio_context() ==
           qemu_coroutine_get_aio_context(co));
    aio_co_wake(co);
}

The root cause is that poll() is invoked in coroutine context, so fix it.

Changes in v2:
Drop the changes to aio_co_enter and instead fix the poll() call in the nbd/server.

Zhu Yangyang (1):
  nbd/server: do not poll within a coroutine context

 nbd/client.c       |  7 ++++---
 nbd/common.c       | 19 ++++++++++++++++---
 nbd/nbd-internal.h |  6 +++---
 nbd/server.c       | 10 +++++-----
 4 files changed, 28 insertions(+), 14 deletions(-)

-- 
2.33.0



^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v2 1/1] nbd/server: do not poll within a coroutine context
  2024-04-01 12:33 [PATCH v2 0/1] coroutine: avoid inserting duplicate coroutine to co_queue_wakeup Zhu Yangyang via
@ 2024-04-01 12:41 ` Zhu Yangyang via
  2024-04-01 16:33   ` Eric Blake
  0 siblings, 1 reply; 4+ messages in thread
From: Zhu Yangyang via @ 2024-04-01 12:41 UTC (permalink / raw
  To: zhuyangyang14
  Cc: eblake, stefanha, qemu-block, qemu-devel, vsementsov, kwolf,
	luolongmin, suxiaodong1, chenxiaoyu48, wangyan122, yebiaoxiang

Coroutines are not supposed to block. Instead, they should yield.

Fixes: f95910f ("nbd: implement TLS support in the protocol negotiation")
Signed-off-by: Zhu Yangyang <zhuyangyang14@huawei.com>
---
 nbd/client.c       |  7 ++++---
 nbd/common.c       | 19 ++++++++++++++++---
 nbd/nbd-internal.h |  6 +++---
 nbd/server.c       | 10 +++++-----
 4 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/nbd/client.c b/nbd/client.c
index 29ffc609a4..1ab91ed205 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -619,18 +619,19 @@ static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
         return NULL;
     }
     qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-client-tls");
-    data.loop = g_main_loop_new(g_main_context_default(), FALSE);
     trace_nbd_receive_starttls_tls_handshake();
     qio_channel_tls_handshake(tioc,
-                              nbd_tls_handshake,
+                              nbd_client_tls_handshake,
                               &data,
                               NULL,
                               NULL);
 
     if (!data.complete) {
+        data.loop = g_main_loop_new(g_main_context_default(), FALSE);
         g_main_loop_run(data.loop);
+        g_main_loop_unref(data.loop);
     }
-    g_main_loop_unref(data.loop);
+
     if (data.error) {
         error_propagate(errp, data.error);
         object_unref(OBJECT(tioc));
diff --git a/nbd/common.c b/nbd/common.c
index 3247c1d618..01ca30a5c4 100644
--- a/nbd/common.c
+++ b/nbd/common.c
@@ -47,14 +47,27 @@ int nbd_drop(QIOChannel *ioc, size_t size, Error **errp)
 }
 
 
-void nbd_tls_handshake(QIOTask *task,
-                       void *opaque)
+void nbd_client_tls_handshake(QIOTask *task, void *opaque)
 {
     struct NBDTLSHandshakeData *data = opaque;
 
     qio_task_propagate_error(task, &data->error);
     data->complete = true;
-    g_main_loop_quit(data->loop);
+    if (data->loop) {
+        g_main_loop_quit(data->loop);
+    }
+}
+
+
+void nbd_server_tls_handshake(QIOTask *task, void *opaque)
+{
+    struct NBDTLSHandshakeData *data = opaque;
+
+    qio_task_propagate_error(task, &data->error);
+    data->complete = true;
+    if (!qemu_coroutine_entered(data->co)) {
+        aio_co_wake(data->co);
+    }
 }
 
 
diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index dfa02f77ee..99cca9382c 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -74,13 +74,13 @@ static inline int nbd_write(QIOChannel *ioc, const void *buffer, size_t size,
 
 struct NBDTLSHandshakeData {
     GMainLoop *loop;
+    Coroutine *co;
     bool complete;
     Error *error;
 };
 
-
-void nbd_tls_handshake(QIOTask *task,
-                       void *opaque);
+void nbd_server_tls_handshake(QIOTask *task, void *opaque);
+void nbd_client_tls_handshake(QIOTask *task, void *opaque);
 
 int nbd_drop(QIOChannel *ioc, size_t size, Error **errp);
 
diff --git a/nbd/server.c b/nbd/server.c
index c3484cc1eb..b218512ced 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -777,17 +777,17 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
 
     qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls");
     trace_nbd_negotiate_handle_starttls_handshake();
-    data.loop = g_main_loop_new(g_main_context_default(), FALSE);
+    data.co = qemu_coroutine_self();
     qio_channel_tls_handshake(tioc,
-                              nbd_tls_handshake,
+                              nbd_server_tls_handshake,
                               &data,
                               NULL,
                               NULL);
 
-    if (!data.complete) {
-        g_main_loop_run(data.loop);
+    while (!data.complete) {
+        qemu_coroutine_yield();
     }
-    g_main_loop_unref(data.loop);
+
     if (data.error) {
         object_unref(OBJECT(tioc));
         error_propagate(errp, data.error);
-- 
2.33.0



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v2 1/1] nbd/server: do not poll within a coroutine context
  2024-04-01 12:41 ` [PATCH v2 1/1] nbd/server: do not poll within a coroutine context Zhu Yangyang via
@ 2024-04-01 16:33   ` Eric Blake
  2024-04-02  8:53     ` Zhu Yangyang via
  0 siblings, 1 reply; 4+ messages in thread
From: Eric Blake @ 2024-04-01 16:33 UTC (permalink / raw
  To: Zhu Yangyang
  Cc: stefanha, qemu-block, qemu-devel, vsementsov, kwolf, luolongmin,
	suxiaodong1, chenxiaoyu48, wangyan122, yebiaoxiang

On Mon, Apr 01, 2024 at 08:41:20PM +0800, Zhu Yangyang wrote:
> Coroutines are not supposed to block. Instead, they should yield.
> 
> Fixes: f95910f ("nbd: implement TLS support in the protocol negotiation")
> Signed-off-by: Zhu Yangyang <zhuyangyang14@huawei.com>
> ---
>  nbd/client.c       |  7 ++++---
>  nbd/common.c       | 19 ++++++++++++++++---
>  nbd/nbd-internal.h |  6 +++---
>  nbd/server.c       | 10 +++++-----
>  4 files changed, 28 insertions(+), 14 deletions(-)
> 
> diff --git a/nbd/client.c b/nbd/client.c
> index 29ffc609a4..1ab91ed205 100644
> --- a/nbd/client.c
> +++ b/nbd/client.c
> @@ -619,18 +619,19 @@ static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
>          return NULL;
>      }
>      qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-client-tls");
> -    data.loop = g_main_loop_new(g_main_context_default(), FALSE);
>      trace_nbd_receive_starttls_tls_handshake();
>      qio_channel_tls_handshake(tioc,
> -                              nbd_tls_handshake,
> +                              nbd_client_tls_handshake,
>                                &data,
>                                NULL,
>                                NULL);
>  
>      if (!data.complete) {
> +        data.loop = g_main_loop_new(g_main_context_default(), FALSE);
>          g_main_loop_run(data.loop);
> +        g_main_loop_unref(data.loop);
>      }
> -    g_main_loop_unref(data.loop);
> +

Aha - you figured out an elegant way around the client code not being
in coroutine context that had been stumping me, at least for the sake
of a minimal patch.

>      if (data.error) {
>          error_propagate(errp, data.error);
>          object_unref(OBJECT(tioc));
> diff --git a/nbd/common.c b/nbd/common.c
> index 3247c1d618..01ca30a5c4 100644
> --- a/nbd/common.c
> +++ b/nbd/common.c
> @@ -47,14 +47,27 @@ int nbd_drop(QIOChannel *ioc, size_t size, Error **errp)
>  }
>  
>  
> -void nbd_tls_handshake(QIOTask *task,
> -                       void *opaque)
> +void nbd_client_tls_handshake(QIOTask *task, void *opaque)
>  {
>      struct NBDTLSHandshakeData *data = opaque;
>  
>      qio_task_propagate_error(task, &data->error);
>      data->complete = true;
> -    g_main_loop_quit(data->loop);
> +    if (data->loop) {
> +        g_main_loop_quit(data->loop);
> +    }
> +}
> +
> +
> +void nbd_server_tls_handshake(QIOTask *task, void *opaque)
> +{
> +    struct NBDTLSHandshakeData *data = opaque;
> +
> +    qio_task_propagate_error(task, &data->error);
> +    data->complete = true;
> +    if (!qemu_coroutine_entered(data->co)) {
> +        aio_co_wake(data->co);
> +    }
>  }

This matches up with what I was experimenting with last week, in that
server is in coroutine context while client is not.  However, it means
we no longer have a common implementation between the two, so keeping
the code isolated in nbd/common.c makes less sense than just placing
the two specific callbacks in the two specific files right where their
only use case exists (and even making them static at that point).

Or, can we still merge it into one helper?  It looks like we now have
3 viable possibilities:

data->loop data->co
non-NULL   non-NULL    impossible
NULL       NULL        client, qio_task completed right away
non-NULL   NULL        client, qio_task did not complete right away
NULL       non-NULL    server, waking the coroutine depends on if we are in one

With that, we can still get by with one function, but need good
documentation.  I'll post a v3 along those lines, to see what you
think.

>  
>  
> diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
> index dfa02f77ee..99cca9382c 100644
> --- a/nbd/nbd-internal.h
> +++ b/nbd/nbd-internal.h
> @@ -74,13 +74,13 @@ static inline int nbd_write(QIOChannel *ioc, const void *buffer, size_t size,
>  
>  struct NBDTLSHandshakeData {
>      GMainLoop *loop;
> +    Coroutine *co;
>      bool complete;
>      Error *error;
>  };

I had tried to get rid of the GMainLoop *loop member altogether, but
your change has the benefit of a smaller diff than what I was facing
(I got lost in the weeds trying to see if I could convert all of the
client into running in coroutine context).

>  
> -
> -void nbd_tls_handshake(QIOTask *task,
> -                       void *opaque);
> +void nbd_server_tls_handshake(QIOTask *task, void *opaque);
> +void nbd_client_tls_handshake(QIOTask *task, void *opaque);
>  
>  int nbd_drop(QIOChannel *ioc, size_t size, Error **errp);
>  
> diff --git a/nbd/server.c b/nbd/server.c
> index c3484cc1eb..b218512ced 100644
> --- a/nbd/server.c
> +++ b/nbd/server.c
> @@ -777,17 +777,17 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
>  
>      qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls");
>      trace_nbd_negotiate_handle_starttls_handshake();
> -    data.loop = g_main_loop_new(g_main_context_default(), FALSE);
> +    data.co = qemu_coroutine_self();
>      qio_channel_tls_handshake(tioc,
> -                              nbd_tls_handshake,
> +                              nbd_server_tls_handshake,
>                                &data,
>                                NULL,
>                                NULL);
>  
> -    if (!data.complete) {
> -        g_main_loop_run(data.loop);
> +    while (!data.complete) {
> +        qemu_coroutine_yield();
>      }
> -    g_main_loop_unref(data.loop);
> +
>      if (data.error) {
>          object_unref(OBJECT(tioc));
>          error_propagate(errp, data.error);
> -- 
> 2.33.0
>

Thanks for the updated patch - it looks like we are heading in a good direction.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.
Virtualization:  qemu.org | libguestfs.org



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2 1/1] nbd/server: do not poll within a coroutine context
  2024-04-01 16:33   ` Eric Blake
@ 2024-04-02  8:53     ` Zhu Yangyang via
  0 siblings, 0 replies; 4+ messages in thread
From: Zhu Yangyang via @ 2024-04-02  8:53 UTC (permalink / raw
  To: eblake
  Cc: chenxiaoyu48, kwolf, luolongmin, qemu-block, qemu-devel, stefanha,
	suxiaodong1, vsementsov, wangyan122, yebiaoxiang, zhuyangyang14

On Mon, 1 Apr 2024 11:33:09AM -0500, Eric Blake wrote:
> On Mon, Apr 01, 2024 at 08:41:20PM +0800, Zhu Yangyang wrote:
> > Coroutines are not supposed to block. Instead, they should yield.
> > 
> > Fixes: f95910f ("nbd: implement TLS support in the protocol negotiation")
> > Signed-off-by: Zhu Yangyang <zhuyangyang14@huawei.com>
> > ---
> >  nbd/client.c       |  7 ++++---
> >  nbd/common.c       | 19 ++++++++++++++++---
> >  nbd/nbd-internal.h |  6 +++---
> >  nbd/server.c       | 10 +++++-----
> >  4 files changed, 28 insertions(+), 14 deletions(-)
> > 
> > diff --git a/nbd/client.c b/nbd/client.c
> > index 29ffc609a4..1ab91ed205 100644
> > --- a/nbd/client.c
> > +++ b/nbd/client.c
> > @@ -619,18 +619,19 @@ static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
> >          return NULL;
> >      }
> >      qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-client-tls");
> > -    data.loop = g_main_loop_new(g_main_context_default(), FALSE);
> >      trace_nbd_receive_starttls_tls_handshake();
> >      qio_channel_tls_handshake(tioc,
> > -                              nbd_tls_handshake,
> > +                              nbd_client_tls_handshake,
> >                                &data,
> >                                NULL,
> >                                NULL);
> >  
> >      if (!data.complete) {
> > +        data.loop = g_main_loop_new(g_main_context_default(), FALSE);
> >          g_main_loop_run(data.loop);
> > +        g_main_loop_unref(data.loop);
> >      }
> > -    g_main_loop_unref(data.loop);
> > +
> 
> Aha - you figured out an elegant way around the client code not being
> in coroutine context that had been stumping me, at least for the sake
> of a minimal patch.
> 
> >      if (data.error) {
> >          error_propagate(errp, data.error);
> >          object_unref(OBJECT(tioc));
> > diff --git a/nbd/common.c b/nbd/common.c
> > index 3247c1d618..01ca30a5c4 100644
> > --- a/nbd/common.c
> > +++ b/nbd/common.c
> > @@ -47,14 +47,27 @@ int nbd_drop(QIOChannel *ioc, size_t size, Error **errp)
> >  }
> >  
> >  
> > -void nbd_tls_handshake(QIOTask *task,
> > -                       void *opaque)
> > +void nbd_client_tls_handshake(QIOTask *task, void *opaque)
> >  {
> >      struct NBDTLSHandshakeData *data = opaque;
> >  
> >      qio_task_propagate_error(task, &data->error);
> >      data->complete = true;
> > -    g_main_loop_quit(data->loop);
> > +    if (data->loop) {
> > +        g_main_loop_quit(data->loop);
> > +    }
> > +}
> > +
> > +
> > +void nbd_server_tls_handshake(QIOTask *task, void *opaque)
> > +{
> > +    struct NBDTLSHandshakeData *data = opaque;
> > +
> > +    qio_task_propagate_error(task, &data->error);
> > +    data->complete = true;
> > +    if (!qemu_coroutine_entered(data->co)) {
> > +        aio_co_wake(data->co);
> > +    }
> >  }
> 
> This matches up with what I was experimenting with last week, in that
> server is in coroutine context while client is not.  However, it means
> we no longer have a common implementation between the two, so keeping
> the code isolated in nbd/common.c makes less sense than just placing
> the two specific callbacks in the two specific files right where their
> only use case exists (and even making them static at that point).

Yes, we can implement nbd_tls_handshake() on both client and server side.
It looks a lot clearer.

We can even extract the common code to an nbd_tls_handshake_complete().

nbd/common.c
void nbd_tls_handshake_complete(QIOTask *task, void *opaque) {
    struct NBDTLSHandshakeData *data = opaque;

    qio_task_propagate_error(task, &data->error);
    data->complete = true;
}

server.c / client.c
static void nbd_tls_handshake(QIOTask *task, void *opaque)
{
    struct NBDTLSHandshakeData *data = opaque;

    nbd_tls_handshake_complete(task, opaque);
    ...
}

> 
> Or, can we still merge it into one helper?  It looks like we now have
> 3 viable possibilities:
> 
> data->loop data->co
> non-NULL   non-NULL    impossible
> NULL       NULL        client, qio_task completed right away
> non-NULL   NULL        client, qio_task did not complete right away
> NULL       non-NULL    server, waking the coroutine depends on if we are in one

This seems a little complicated.

> 
> With that, we can still get by with one function, but need good
> documentation.  I'll post a v3 along those lines, to see what you
> think.
> 
> >  
> >  
> > diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
> > index dfa02f77ee..99cca9382c 100644
> > --- a/nbd/nbd-internal.h
> > +++ b/nbd/nbd-internal.h
> > @@ -74,13 +74,13 @@ static inline int nbd_write(QIOChannel *ioc, const void *buffer, size_t size,
> >  
> >  struct NBDTLSHandshakeData {
> >      GMainLoop *loop;
> > +    Coroutine *co;
> >      bool complete;
> >      Error *error;
> >  };
> 
> I had tried to get rid of the GMainLoop *loop member altogether, but
> your change has the benefit of a smaller diff than what I was facing
> (I got lost in the weeds trying to see if I could convert all of the
> client into running in coroutine context).

I saw your reply and also tried to put the client in the coroutine context,
And then I found that the event listener is registered to the default main_context,
This means that we can't use aio_poll(ctx) and AIO_WAIT_WHILE() to wait for
the coroutine to complete.

GMainLoop *loop may not be circumvented.

g_source_attach(source, NULL)
qio_channel_add_watch_full()
qio_channel_tls_handshake_task()
qio_channel_tls_handshake()
nbd_receive_starttls()
nbd_start_negotiate()

> >  
> > -
> > -void nbd_tls_handshake(QIOTask *task,
> > -                       void *opaque);
> > +void nbd_server_tls_handshake(QIOTask *task, void *opaque);
> > +void nbd_client_tls_handshake(QIOTask *task, void *opaque);
> >  
> >  int nbd_drop(QIOChannel *ioc, size_t size, Error **errp);
> >  
> > diff --git a/nbd/server.c b/nbd/server.c
> > index c3484cc1eb..b218512ced 100644
> > --- a/nbd/server.c
> > +++ b/nbd/server.c
> > @@ -777,17 +777,17 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
> >  
> >      qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls");
> >      trace_nbd_negotiate_handle_starttls_handshake();
> > -    data.loop = g_main_loop_new(g_main_context_default(), FALSE);
> > +    data.co = qemu_coroutine_self();
> >      qio_channel_tls_handshake(tioc,
> > -                              nbd_tls_handshake,
> > +                              nbd_server_tls_handshake,
> >                                &data,
> >                                NULL,
> >                                NULL);
> >  
> > -    if (!data.complete) {
> > -        g_main_loop_run(data.loop);
> > +    while (!data.complete) {
> > +        qemu_coroutine_yield();
> >      }
> > -    g_main_loop_unref(data.loop);
> > +
> >      if (data.error) {
> >          object_unref(OBJECT(tioc));
> >          error_propagate(errp, data.error);
> > -- 
> > 2.33.0
> >
> 
> Thanks for the updated patch - it looks like we are heading in a good direction.
> 
> -- 
> Eric Blake, Principal Software Engineer
> Red Hat, Inc.
> Virtualization:  qemu.org | libguestfs.o

--
Best Regards,
Zhu Yangyang


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2024-04-02  8:48 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-04-01 12:33 [PATCH v2 0/1] coroutine: avoid inserting duplicate coroutine to co_queue_wakeup Zhu Yangyang via
2024-04-01 12:41 ` [PATCH v2 1/1] nbd/server: do not poll within a coroutine context Zhu Yangyang via
2024-04-01 16:33   ` Eric Blake
2024-04-02  8:53     ` Zhu Yangyang via

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.