raindrops RubyGem user+dev discussion/patches/pulls/bugs/help
 help / color / mirror / code / Atom feed
* [PATCH] Allow Raindrops objects to be backed by a memfd file
@ 2021-11-22  1:03 KJ Tsanaktsidis
  2021-11-22  8:42 ` Eric Wong
  0 siblings, 1 reply; 4+ messages in thread
From: KJ Tsanaktsidis @ 2021-11-22  1:03 UTC (permalink / raw)
  To: raindrops-public; +Cc: KJ Tsanaktsidis

Currently, all memory used by Raindrops is mapped as MAP_ANONYMOUS. This
means that although Raindrops counters can be shared between processes
that have forked from each other, it is not possible to share the
counter values with another, unrelated process.

This patch adds support for backing the Raindrops mapping with a file
descriptor created from memfd_create. The API for doing this is simply:

    Raindrops.new(size, name: "name_of_raindrop")

This will cause Raindrops to call memfd_create("name_of_raindrop") and
use that file descriptor to back the mapping. An unrelated process can
then obtain a copy of this file descriptor (via a Unix domain socket, or
even just by looking for the name in /proc/$pid/fd) and read out the
counter values.

My use-case for this feature is that I want to collect memory statistics
of a Unicorn master process in a way that does not itself cause any
allocations. This is both because that would bias the measurement, but
more importantly because we very tightly control when the GC runs in our
Unicorn masters and any garbage created by high-frequency polling of
GC.stat would potentially live for a long time.

With this solution, we can simply store values from rb_objspace into a
Raindrops counter directly, and read the values out-of-process for
submission to our metrics collection system.
---
 ext/raindrops/extconf.rb  |  1 +
 ext/raindrops/raindrops.c | 81 ++++++++++++++++++++++++++++++++++++---
 test/test_linux.rb        | 15 ++++++++
 3 files changed, 91 insertions(+), 6 deletions(-)

diff --git a/ext/raindrops/extconf.rb b/ext/raindrops/extconf.rb
index 792e509..1ddcdf2 100644
--- a/ext/raindrops/extconf.rb
+++ b/ext/raindrops/extconf.rb
@@ -7,6 +7,7 @@
 
 $CPPFLAGS += " -D_GNU_SOURCE "
 have_func('mremap', 'sys/mman.h')
+have_func('memfd_create', 'sys/mman.h')
 headers = %w(sys/types.h netdb.h string.h sys/socket.h netinet/in.h)
 if have_header('linux/tcp.h')
   headers << 'linux/tcp.h'
diff --git a/ext/raindrops/raindrops.c b/ext/raindrops/raindrops.c
index 837084c..c9bf8c8 100644
--- a/ext/raindrops/raindrops.c
+++ b/ext/raindrops/raindrops.c
@@ -34,6 +34,7 @@ struct raindrops {
 	size_t size;
 	size_t capa;
 	pid_t pid;
+	int fd;
 	struct raindrop *drops;
 };
 
@@ -47,6 +48,11 @@ static void rd_free(void *ptr)
 		if (rv != 0)
 			rb_bug("munmap failed in gc: %s", strerror(errno));
 	}
+	if (r->fd != -1) {
+	  int rv = close(r->fd);
+	  if (rv != 0)
+	    rb_bug("close failed in gc: %s", strerror(errno));
+	}
 
 	xfree(ptr);
 }
@@ -88,34 +94,74 @@ static struct raindrops *get(VALUE self)
 
 /*
  * call-seq:
- *	Raindrops.new(size)	-> raindrops object
+ *	Raindrops.new(size, name: nil)	-> raindrops object
  *
  * Initializes a Raindrops object to hold +size+ counters.  +size+ is
  * only a hint and the actual number of counters the object has is
  * dependent on the CPU model, number of cores, and page size of
  * the machine.  The actual size of the object will always be equal
  * or greater than the specified +size+.
+ * If +name+ is provided, and the platform is supported, the raindrop
+ * memory region will be backed by a memfd object with the provided
+ * name, so that it can be shared with other, non-child processes.
  */
-static VALUE init(VALUE self, VALUE size)
+static VALUE init(int argc, VALUE *argv, VALUE self)
 {
 	struct raindrops *r = DATA_PTR(self);
 	int tries = 1;
 	size_t tmp;
+  VALUE size;
+  VALUE kwargs_hash;
+  ID kwargs_ids[1] = { rb_intern_const("name") };
+  VALUE kwargs[1];
+  VALUE name;
 
 	if (r->drops != MAP_FAILED)
 		rb_raise(rb_eRuntimeError, "already initialized");
 
+  rb_scan_args(argc, argv, "1:", &size, &kwargs_hash);
+  rb_get_kwargs(kwargs_hash, kwargs_ids, 0, 1, kwargs);
+  name = kwargs[0];
+
 	r->size = NUM2SIZET(size);
 	if (r->size < 1)
 		rb_raise(rb_eArgError, "size must be >= 1");
 
+  if (name != Qundef && name != Qnil) {
+#ifdef HAVE_MEMFD_CREATE
+    r->fd = memfd_create(StringValueCStr(name), MFD_CLOEXEC);
+    if (r->fd == -1) {
+      int err = errno;
+      if (err == ENOSYS) {
+        rb_raise(rb_eRuntimeError, "system does not support memfd_create");
+      } else {
+        rb_raise(rb_eRuntimeError, "error calling memfd_create: %s", strerror(err));
+      }
+    }
+#else
+    rb_raise(rb_eRuntimeError, "extension not compiled with memfd_create");
+#endif
+  } else {
+    r->fd = -1;
+  }
+
 	tmp = PAGE_ALIGN(raindrop_size * r->size);
 	r->capa = tmp / raindrop_size;
 	assert(PAGE_ALIGN(raindrop_size * r->capa) == tmp && "not aligned");
 
 retry:
-	r->drops = mmap(NULL, tmp,
-	                PROT_READ|PROT_WRITE, MAP_ANON|MAP_SHARED, -1, 0);
+  if (r->fd != -1) {
+    if (ftruncate(r->fd, tmp) == -1) {
+      r->drops = MAP_FAILED;
+    } else {
+    r->drops = mmap(NULL, tmp,
+                    PROT_READ|PROT_WRITE, MAP_SHARED, r->fd, 0);
+    }
+  } else {
+  	r->drops = mmap(NULL, tmp,
+  	                PROT_READ|PROT_WRITE, MAP_ANON|MAP_SHARED, -1, 0);
+  }
+
 	if (r->drops == MAP_FAILED) {
 		int err = errno;
 
@@ -153,6 +199,10 @@ static void resize(struct raindrops *r, size_t new_rd_size)
 	if (r->pid != getpid())
 		rb_raise(rb_eRuntimeError, "cannot mremap() from child");
 
+	if (r->fd != -1) {
+	  rb_raise(rb_eRuntimeError, "resize not implemented with named raindrops");
+	}
+
 	rv = mremap(old_address, old_size, new_size, MREMAP_MAYMOVE);
 	if (rv == MAP_FAILED) {
 		int err = errno;
@@ -213,6 +263,18 @@ static VALUE capa(VALUE self)
 	return SIZET2NUM(get(self)->capa);
 }
 
+/*
+ * call-seq:
+ *   rd.fd    -> Integer
+ *
+ * Returns the file descriptor number associated with this Raindrop, if
+ * it was created with a name.
+ */
+static VALUE fd(VALUE self)
+{
+  return INT2NUM(get(self)->fd);
+}
+
 /*
  * call-seq:
  *	rd.dup		-> rd_copy
@@ -223,8 +285,9 @@ static VALUE init_copy(VALUE dest, VALUE source)
 {
 	struct raindrops *dst = DATA_PTR(dest);
 	struct raindrops *src = get(source);
+	VALUE init_argv[1] = { SIZET2NUM(src->size) };
 
-	init(dest, SIZET2NUM(src->size));
+	init(1, init_argv, dest);
 	memcpy(dst->drops, src->drops, raindrop_size * src->size);
 
 	return dest;
@@ -372,6 +435,11 @@ static VALUE evaporate_bang(VALUE self)
 	r->drops = MAP_FAILED;
 	if (munmap(addr, raindrop_size * r->capa) != 0)
 		rb_sys_fail("munmap");
+	if (r->fd != -1) {
+	  if (close(r->fd) != 0)
+	    rb_sys_fail("close");
+	  r->fd = -1;
+	}
 	return Qnil;
 }
 
@@ -433,7 +501,7 @@ void Init_raindrops_ext(void)
 
 	rb_define_alloc_func(cRaindrops, alloc);
 
-	rb_define_method(cRaindrops, "initialize", init, 1);
+	rb_define_method(cRaindrops, "initialize", init, -1);
 	rb_define_method(cRaindrops, "incr", incr, -1);
 	rb_define_method(cRaindrops, "decr", decr, -1);
 	rb_define_method(cRaindrops, "to_ary", to_ary, 0);
@@ -444,6 +512,7 @@ void Init_raindrops_ext(void)
 	rb_define_method(cRaindrops, "capa", capa, 0);
 	rb_define_method(cRaindrops, "initialize_copy", init_copy, 1);
 	rb_define_method(cRaindrops, "evaporate!", evaporate_bang, 0);
+	rb_define_method(cRaindrops, "fd", fd, 0);
 
 #ifdef __linux__
 	Init_raindrops_linux_inet_diag();
diff --git a/test/test_linux.rb b/test/test_linux.rb
index 7808469..b9dc757 100644
--- a/test/test_linux.rb
+++ b/test/test_linux.rb
@@ -278,4 +278,19 @@ def test_tcp_stress_test
     statuses = Process.waitall
     statuses.each { |(_,status)| assert status.success?, status.inspect }
   end if ENV["STRESS"].to_i != 0
+
+  def test_memfd
+    rd = Raindrops.new(1, name: "test_memfd_raindrop")
+    assert rd.fd != -1
+
+    rd.incr(0, 5)
+    assert_equal 5, rd[0]
+
+    raw_data = File.read "/proc/self/fd/#{rd.fd}"
+    assert raw_data.size > 8
+    counter_value = raw_data.unpack("Q")[0]
+    assert_equal 5, counter_value
+
+    rd.evaporate!
+  end
 end if RUBY_PLATFORM =~ /linux/
-- 
2.33.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-11-22 17:56 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-22  1:03 [PATCH] Allow Raindrops objects to be backed by a memfd file KJ Tsanaktsidis
2021-11-22  8:42 ` Eric Wong
2021-11-22 10:13   ` KJ Tsanaktsidis
2021-11-22 17:56     ` Eric Wong

Code repositories for project(s) associated with this public inbox

	https://yhbt.net/raindrops.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).