unicorn Ruby/Rack server user+dev discussion/patches/pulls/bugs/help
 help / color / mirror / code / Atom feed
* [PATCH] epollexclusive: avoid Ruby object allocation for buffer
@ 2023-02-15  7:58 Eric Wong
  2023-03-01 23:12 ` Eric Wong
  0 siblings, 1 reply; 4+ messages in thread
From: Eric Wong @ 2023-02-15  7:58 UTC (permalink / raw)
  To: unicorn-public

Leaving a dangling Ruby object around is suboptimal since it
adds to GC pressure.

`__attribute__((__cleanup__(foo)))' is an old gcc extension to
provide automatic cleanup functionality by running a pre-declared
function at the end of scope.

gcc introduced this two decades ago and clang's had it for over
a decade.  Even tinycc supports it since 2019, and I expect it
to be easy to support in any C compiler since they're already
required to do cleanup for on-stack allocations.

So no, it's not standardized in C, but it makes life significantly
easier for C hackers.
---
 ext/unicorn_http/epollexclusive.h | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/ext/unicorn_http/epollexclusive.h b/ext/unicorn_http/epollexclusive.h
index 677e1fe..67a9ba6 100644
--- a/ext/unicorn_http/epollexclusive.h
+++ b/ext/unicorn_http/epollexclusive.h
@@ -78,6 +78,14 @@ static void *do_wait(void *ptr) /* runs w/o GVL */
 				epw->maxevents, epw->timeout_msec);
 }
 
+/* cleanup is supported since 2003 (gcc), 2009 (clang), tinycc: 2019 */
+#define AUTO_XFREE __attribute__((__cleanup__(cleanup_xfree)))
+static void cleanup_xfree(void *any)
+{
+	void **p = any;
+	xfree(*p);
+}
+
 /* :nodoc: */
 /* readers must not change between prepare_readers and get_readers */
 static VALUE
@@ -85,22 +93,18 @@ get_readers(VALUE epio, VALUE ready, VALUE readers, VALUE timeout_msec)
 {
 	struct ep_wait epw;
 	long i, n;
-	VALUE buf;
+	AUTO_XFREE void *buf = NULL;
 
 	Check_Type(ready, T_ARRAY);
 	Check_Type(readers, T_ARRAY);
 	epw.maxevents = RARRAY_LENINT(readers);
-	buf = rb_str_buf_new(sizeof(struct epoll_event) * epw.maxevents);
-	epw.events = (struct epoll_event *)RSTRING_PTR(buf);
+	epw.events = buf = ALLOC_N(struct epoll_event, epw.maxevents);
 	epio = rb_io_get_io(epio);
 	GetOpenFile(epio, epw.fptr);
 
 	epw.timeout_msec = NUM2INT(timeout_msec);
 	n = (long)rb_thread_call_without_gvl(do_wait, &epw, RUBY_UBF_IO, NULL);
-	if (n < 0) {
-		if (errno != EINTR) rb_sys_fail("epoll_wait");
-		n = 0;
-	}
+	if (n < 0 && errno != EINTR) rb_sys_fail("epoll_wait");
 	/* Linux delivers events in order received */
 	for (i = 0; i < n; i++) {
 		struct epoll_event *ev = &epw.events[i];
@@ -109,7 +113,6 @@ get_readers(VALUE epio, VALUE ready, VALUE readers, VALUE timeout_msec)
 		if (RTEST(obj))
 			rb_ary_push(ready, obj);
 	}
-	rb_str_resize(buf, 0);
 	return Qfalse;
 }
 #endif /* USE_EPOLL */

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] epollexclusive: avoid Ruby object allocation for buffer
  2023-02-15  7:58 [PATCH] epollexclusive: avoid Ruby object allocation for buffer Eric Wong
@ 2023-03-01 23:12 ` Eric Wong
  2023-03-22 12:53   ` Jean Boussier
  0 siblings, 1 reply; 4+ messages in thread
From: Eric Wong @ 2023-03-01 23:12 UTC (permalink / raw)
  To: unicorn-public

Eric Wong <bofh@yhbt.net> wrote:
> Leaving a dangling Ruby object around is suboptimal since it
> adds to GC pressure.
> 
> `__attribute__((__cleanup__(foo)))' is an old gcc extension to
> provide automatic cleanup functionality by running a pre-declared
> function at the end of scope.

Unfortunately, this can't be usable due to exceptions.
Even if we avoid throwing exceptions ourselves; Thread#raise can
be called by a Rack app which spawns a background thread

>  	n = (long)rb_thread_call_without_gvl(do_wait, &epw, RUBY_UBF_IO, NULL);

Thread#raise can hit rb_thread_call_wihtout_gvl

> -	if (n < 0) {
> -		if (errno != EINTR) rb_sys_fail("epoll_wait");
> -		n = 0;
> -	}
> +	if (n < 0 && errno != EINTR) rb_sys_fail("epoll_wait");
>  	/* Linux delivers events in order received */
>  	for (i = 0; i < n; i++) {
>  		struct epoll_event *ev = &epw.events[i];
> @@ -109,7 +113,6 @@ get_readers(VALUE epio, VALUE ready, VALUE readers, VALUE timeout_msec)
>  		if (RTEST(obj))
>  			rb_ary_push(ready, obj);
>  	}

While the `for' loop can be moved ahead of the rb_sys_fail call
and limited in scope, Thread#raise can't be predicted.

I suppose rb_ensure works, but that's a PITA.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] epollexclusive: avoid Ruby object allocation for buffer
  2023-03-01 23:12 ` Eric Wong
@ 2023-03-22 12:53   ` Jean Boussier
  2023-03-28 12:24     ` [PATCH] epollexclusive: use maxevents=1 for epoll_wait Eric Wong
  0 siblings, 1 reply; 4+ messages in thread
From: Jean Boussier @ 2023-03-22 12:53 UTC (permalink / raw)
  To: bofh; +Cc: unicorn-public

Interesting patch, I wonder if you have considered RB_ALLOCV_N, e.g:

https://patch-diff.githubusercontent.com/raw/Shopify/pitchfork/pull/37.patch



^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH] epollexclusive: use maxevents=1 for epoll_wait
  2023-03-22 12:53   ` Jean Boussier
@ 2023-03-28 12:24     ` Eric Wong
  0 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2023-03-28 12:24 UTC (permalink / raw)
  To: Jean Boussier; +Cc: unicorn-public

Jean Boussier <jean.boussier@shopify.com> wrote:
> Interesting patch, I wonder if you have considered RB_ALLOCV_N, e.g:
> 
> https://patch-diff.githubusercontent.com/raw/Shopify/pitchfork/pull/37.patch

Were you able to benchmark an improvement using alloca?
I would think it gets lost in the noise.

I don't like alloca in general since it can make the compiler
over-allocate stack (because default cflags favors speed over
size).  It also makes static analysis more difficult.

But C Ruby is already infested with alloca, so one more/less
likely isn't meaningful.  But, maybe I have a better solution
than either alloca or malloc below.

Been busy dealing with other stuff, but something new just
occured to me based on something I've abused for over a decade.

Background:

  Outside of unicorn (and COMPLETELY different designs), I've
  abused epoll_wait with maxevents=1 for over a decade because
  it gives the most fairness in multi-threaded cases.  epoll is
  a queue, too, kqueue just got the more appropriate name.

It never occured to me until right now now, but maxevents=1
combined w/ EPOLLEXCLUSIVE should ALSO improve fairness
(compared to maxevents >1 + EPOLLEXCLUSIVE) in a multi-process,
single-threaded environment like unicorn if configured with
multiple listeners.

Explanation in the commit message and code comments below,
hopefully it makes sense.

Anybody willing to benchmark the following on a large SMP
machine?  (I hate consumerism and refuse to have powerful HW)

---------8<--------
Subject: [PATCH] epollexclusive: use maxevents=1 for epoll_wait

This allows us to avoid both malloc (slow) and alloca
(unpredictable stack usage) at the cost of needing to make more
epoll_wait syscalls in a rare case.

In unicorn (and most servers), I expect the most frequent setup
is to have one active listener serving the majority of the
connections, so the most frequent epoll_wait return value would
be 1.

Even with >1 events, any syscall overhead saved by having
epoll_wait retrieve multiple events is dwarfed by Rack app
processing overhead.

Worse yet, if a worker retrieves an event sooner than it can
process it, the kernel (regardless of EPOLLEXCLUSIVE or not) is
able to enqueue another new event to that worker.  In this
example where `a' and `b' are both listeners:

  U=userspace, K=kernel
  K: client hits `a' and `b', enqueues them both (events #1 and #2)
  U: epoll_wait(maxevents: 2) => [ a, b ]
  K: enqueues another event for `b' (event #3)
  U: process_client(a.accept) # this takes a long time

While process_client(a.accept) is happening, `b' can have two
clients pending on a given worker.  It's actually better to
leave the first `b' event unretrieved so the second `b'
event can go to the ep->rdllist of another worker.

The kernel is only capable of enqueuing an item if it hasn't
been enqueued.  Meaning, it's impossible for epoll_wait to ever
retrieve `[ b, b ]' in one call.
---
 ext/unicorn_http/epollexclusive.h | 31 +++++++++++++------------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/ext/unicorn_http/epollexclusive.h b/ext/unicorn_http/epollexclusive.h
index 677e1fe..8f4ea9a 100644
--- a/ext/unicorn_http/epollexclusive.h
+++ b/ext/unicorn_http/epollexclusive.h
@@ -64,18 +64,22 @@ static VALUE prep_readers(VALUE cls, VALUE readers)
 
 #if USE_EPOLL
 struct ep_wait {
-	struct epoll_event *events;
+	struct epoll_event event;
 	rb_io_t *fptr;
-	int maxevents;
 	int timeout_msec;
 };
 
 static void *do_wait(void *ptr) /* runs w/o GVL */
 {
 	struct ep_wait *epw = ptr;
-
-	return (void *)(long)epoll_wait(epw->fptr->fd, epw->events,
-				epw->maxevents, epw->timeout_msec);
+	/*
+	 * Linux delivers epoll events in the order received, and using
+	 * maxevents=1 ensures we pluck one item off ep->rdllist
+	 * at-a-time (c.f. fs/eventpoll.c in linux.git, it's quite
+	 * easy-to-understand for anybody familiar with Ruby C).
+	 */
+	return (void *)(long)epoll_wait(epw->fptr->fd, &epw->event, 1,
+					epw->timeout_msec);
 }
 
 /* :nodoc: */
@@ -84,14 +88,10 @@ static VALUE
 get_readers(VALUE epio, VALUE ready, VALUE readers, VALUE timeout_msec)
 {
 	struct ep_wait epw;
-	long i, n;
-	VALUE buf;
+	long n;
 
 	Check_Type(ready, T_ARRAY);
 	Check_Type(readers, T_ARRAY);
-	epw.maxevents = RARRAY_LENINT(readers);
-	buf = rb_str_buf_new(sizeof(struct epoll_event) * epw.maxevents);
-	epw.events = (struct epoll_event *)RSTRING_PTR(buf);
 	epio = rb_io_get_io(epio);
 	GetOpenFile(epio, epw.fptr);
 
@@ -99,17 +99,12 @@ get_readers(VALUE epio, VALUE ready, VALUE readers, VALUE timeout_msec)
 	n = (long)rb_thread_call_without_gvl(do_wait, &epw, RUBY_UBF_IO, NULL);
 	if (n < 0) {
 		if (errno != EINTR) rb_sys_fail("epoll_wait");
-		n = 0;
-	}
-	/* Linux delivers events in order received */
-	for (i = 0; i < n; i++) {
-		struct epoll_event *ev = &epw.events[i];
-		VALUE obj = rb_ary_entry(readers, ev->data.u64);
+	} else if (n > 0) { /* maxevents is hardcoded to 1 */
+		VALUE obj = rb_ary_entry(readers, epw.event.data.u64);
 
 		if (RTEST(obj))
 			rb_ary_push(ready, obj);
-	}
-	rb_str_resize(buf, 0);
+	} /* n == 0 : timeout */
 	return Qfalse;
 }
 #endif /* USE_EPOLL */

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-03-28 12:24 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-15  7:58 [PATCH] epollexclusive: avoid Ruby object allocation for buffer Eric Wong
2023-03-01 23:12 ` Eric Wong
2023-03-22 12:53   ` Jean Boussier
2023-03-28 12:24     ` [PATCH] epollexclusive: use maxevents=1 for epoll_wait Eric Wong

Code repositories for project(s) associated with this public inbox

	https://yhbt.net/unicorn.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).