From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: AS30031 170.10.132.0/23 X-Spam-Status: No, score=-4.1 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,RCVD_IN_DNSWL_HI, RCVD_IN_MSPIKE_H3,RCVD_IN_MSPIKE_WL,SPF_HELO_NONE,SPF_PASS shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from us-smtp-delivery-110.mimecast.com (us-smtp-delivery-110.mimecast.com [170.10.133.110]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dcvr.yhbt.net (Postfix) with ESMTPS id 08CD71F9F4 for ; Mon, 22 Nov 2021 01:03:58 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=zendesk.com; s=mimecast20150210; t=1637543037; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding; bh=WcrLMynuVuLRE9LYPqSiLa0Vw8K3SBjTPNnCbVkJvQ8=; b=RvNlo0Qq1EZXBR38aZDOUOisjDfemO/2IzxO/uuC87zpSNrXEEMoVwcM9rbqV1v6nqaaNA kgaqU39AaF41HL+iwXGyXTrjTl5X6NWQzVF4crpNca/Zs8Gvc9C6zERpTJ98chux2G91Km Aj7C/0SOa+/F5u4ricD/b1I+GJSGpW0= Received: from mail-pf1-f199.google.com (mail-pf1-f199.google.com [209.85.210.199]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-126-j9tmr1vgNzOZD80VZwgo_Q-1; Sun, 21 Nov 2021 20:03:46 -0500 X-MC-Unique: j9tmr1vgNzOZD80VZwgo_Q-1 Received: by mail-pf1-f199.google.com with SMTP id l8-20020a056a0016c800b0049ffee8cebfso9195340pfc.20 for ; Sun, 21 Nov 2021 17:03:46 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:mime-version :content-transfer-encoding; bh=3GmH+s2J3LvGHxWnGWlnjqJJPkAD1ahzKIEaHN/CoBs=; b=DbakJa7OYJGZyJbfMGh1arT6cYB1Fokcibl5fJMIbY5MSwwwbsrdbXkWn8U+yBpIdf VFLONYBjrzMwcIQkV4E+/sr2OaerT3omcLsN0BcwLQlGtnxNdJG3AtBdN1z5wZE4hVLO MUfJrSc0fACJuoT4YTxZHxwB/sfDfJOrUKwxeQvpCUx80Bnzf5lawwFSPL28uJWU6l7F IMJBYQ7YiGzmTXvquZbmENcyr5tY/uVRxSfAFftYPqcpNpY0n0f7hX4h/ccc3raD8OXY CN137fHpuvtYjXBZJFCxI2D3QyFYd2sHN038ZUQG8giwam0zZLph1aejfsmEL0EdvSbQ VRxw== X-Gm-Message-State: AOAM531w0zW393//iRERjcRekr+7g0h6gcEh2GHEGml6twcxmi9jW2nA OSW2M2lo+9UE067xalkqUCbWlKJpj1FaY5o0l9Wv6wT4oJPEHm63o2SJzqVtw1u3HWyHZDX+acn gJAvO1v48mUM/q+o+ToNIcz4kcXHezEqdjexzK0WCLZJ7JoCz7Pa2eUMVIPtlwNRBT+v4CNSKGH TXzx0= X-Received: by 2002:a17:90b:4c4f:: with SMTP id np15mr25617429pjb.65.1637543025315; Sun, 21 Nov 2021 17:03:45 -0800 (PST) X-Google-Smtp-Source: ABdhPJwl78tVtiarDSBVrwrShpByaxM+XMwCvz678LZ6c5qMxxYVTlhHwVMR3V97rt8UHzk8PmSNFQ== X-Received: by 2002:a17:90b:4c4f:: with SMTP id np15mr25617365pjb.65.1637543024837; Sun, 21 Nov 2021 17:03:44 -0800 (PST) Received: from localhost.localdomain (119-17-137-142.771189.mel.static.aussiebb.net. [119.17.137.142]) by smtp.googlemail.com with ESMTPSA id v7sm4503435pgv.86.2021.11.21.17.03.43 (version=TLS1_2 cipher=ECDHE-ECDSA-AES128-GCM-SHA256 bits=128/128); Sun, 21 Nov 2021 17:03:44 -0800 (PST) From: KJ Tsanaktsidis To: raindrops-public@yhbt.net Cc: KJ Tsanaktsidis Subject: [PATCH] Allow Raindrops objects to be backed by a memfd file Date: Mon, 22 Nov 2021 12:03:36 +1100 Message-Id: <20211122010336.43463-1-ktsanaktsidis@zendesk.com> X-Mailer: git-send-email 2.33.1 MIME-Version: 1.0 Authentication-Results: relay.mimecast.com; auth=pass smtp.auth=CUSA10A63 smtp.mailfrom=ktsanaktsidis@zendesk.com X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: zendesk.com Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="US-ASCII" List-Id: Currently, all memory used by Raindrops is mapped as MAP_ANONYMOUS. This means that although Raindrops counters can be shared between processes that have forked from each other, it is not possible to share the counter values with another, unrelated process. This patch adds support for backing the Raindrops mapping with a file descriptor created from memfd_create. The API for doing this is simply: Raindrops.new(size, name: "name_of_raindrop") This will cause Raindrops to call memfd_create("name_of_raindrop") and use that file descriptor to back the mapping. An unrelated process can then obtain a copy of this file descriptor (via a Unix domain socket, or even just by looking for the name in /proc/$pid/fd) and read out the counter values. My use-case for this feature is that I want to collect memory statistics of a Unicorn master process in a way that does not itself cause any allocations. This is both because that would bias the measurement, but more importantly because we very tightly control when the GC runs in our Unicorn masters and any garbage created by high-frequency polling of GC.stat would potentially live for a long time. With this solution, we can simply store values from rb_objspace into a Raindrops counter directly, and read the values out-of-process for submission to our metrics collection system. --- ext/raindrops/extconf.rb | 1 + ext/raindrops/raindrops.c | 81 ++++++++++++++++++++++++++++++++++++--- test/test_linux.rb | 15 ++++++++ 3 files changed, 91 insertions(+), 6 deletions(-) diff --git a/ext/raindrops/extconf.rb b/ext/raindrops/extconf.rb index 792e509..1ddcdf2 100644 --- a/ext/raindrops/extconf.rb +++ b/ext/raindrops/extconf.rb @@ -7,6 +7,7 @@ =20 $CPPFLAGS +=3D " -D_GNU_SOURCE " have_func('mremap', 'sys/mman.h') +have_func('memfd_create', 'sys/mman.h') headers =3D %w(sys/types.h netdb.h string.h sys/socket.h netinet/in.h) if have_header('linux/tcp.h') headers << 'linux/tcp.h' diff --git a/ext/raindrops/raindrops.c b/ext/raindrops/raindrops.c index 837084c..c9bf8c8 100644 --- a/ext/raindrops/raindrops.c +++ b/ext/raindrops/raindrops.c @@ -34,6 +34,7 @@ struct raindrops { =09size_t size; =09size_t capa; =09pid_t pid; +=09int fd; =09struct raindrop *drops; }; =20 @@ -47,6 +48,11 @@ static void rd_free(void *ptr) =09=09if (rv !=3D 0) =09=09=09rb_bug("munmap failed in gc: %s", strerror(errno)); =09} +=09if (r->fd !=3D -1) { +=09 int rv =3D close(r->fd); +=09 if (rv !=3D 0) +=09 rb_bug("close failed in gc: %s", strerror(errno)); +=09} =20 =09xfree(ptr); } @@ -88,34 +94,74 @@ static struct raindrops *get(VALUE self) =20 /* * call-seq: - *=09Raindrops.new(size)=09-> raindrops object + *=09Raindrops.new(size, name: nil)=09-> raindrops object * * Initializes a Raindrops object to hold +size+ counters. +size+ is * only a hint and the actual number of counters the object has is * dependent on the CPU model, number of cores, and page size of * the machine. The actual size of the object will always be equal * or greater than the specified +size+. + * If +name+ is provided, and the platform is supported, the raindrop + * memory region will be backed by a memfd object with the provided + * name, so that it can be shared with other, non-child processes. */ -static VALUE init(VALUE self, VALUE size) +static VALUE init(int argc, VALUE *argv, VALUE self) { =09struct raindrops *r =3D DATA_PTR(self); =09int tries =3D 1; =09size_t tmp; + VALUE size; + VALUE kwargs_hash; + ID kwargs_ids[1] =3D { rb_intern_const("name") }; + VALUE kwargs[1]; + VALUE name; =20 =09if (r->drops !=3D MAP_FAILED) =09=09rb_raise(rb_eRuntimeError, "already initialized"); =20 + rb_scan_args(argc, argv, "1:", &size, &kwargs_hash); + rb_get_kwargs(kwargs_hash, kwargs_ids, 0, 1, kwargs); + name =3D kwargs[0]; + =09r->size =3D NUM2SIZET(size); =09if (r->size < 1) =09=09rb_raise(rb_eArgError, "size must be >=3D 1"); =20 + if (name !=3D Qundef && name !=3D Qnil) { +#ifdef HAVE_MEMFD_CREATE + r->fd =3D memfd_create(StringValueCStr(name), MFD_CLOEXEC); + if (r->fd =3D=3D -1) { + int err =3D errno; + if (err =3D=3D ENOSYS) { + rb_raise(rb_eRuntimeError, "system does not support memfd_create")= ; + } else { + rb_raise(rb_eRuntimeError, "error calling memfd_create: %s", strer= ror(err)); + } + } +#else + rb_raise(rb_eRuntimeError, "extension not compiled with memfd_create")= ; +#endif + } else { + r->fd =3D -1; + } + =09tmp =3D PAGE_ALIGN(raindrop_size * r->size); =09r->capa =3D tmp / raindrop_size; =09assert(PAGE_ALIGN(raindrop_size * r->capa) =3D=3D tmp && "not aligned")= ; =20 retry: -=09r->drops =3D mmap(NULL, tmp, -=09 PROT_READ|PROT_WRITE, MAP_ANON|MAP_SHARED, -1, 0); + if (r->fd !=3D -1) { + if (ftruncate(r->fd, tmp) =3D=3D -1) { + r->drops =3D MAP_FAILED; + } else { + r->drops =3D mmap(NULL, tmp, + PROT_READ|PROT_WRITE, MAP_SHARED, r->fd, 0); + } + } else { + =09r->drops =3D mmap(NULL, tmp, + =09 PROT_READ|PROT_WRITE, MAP_ANON|MAP_SHARED, -1, 0); + } + =09if (r->drops =3D=3D MAP_FAILED) { =09=09int err =3D errno; =20 @@ -153,6 +199,10 @@ static void resize(struct raindrops *r, size_t new_rd_= size) =09if (r->pid !=3D getpid()) =09=09rb_raise(rb_eRuntimeError, "cannot mremap() from child"); =20 +=09if (r->fd !=3D -1) { +=09 rb_raise(rb_eRuntimeError, "resize not implemented with named raindro= ps"); +=09} + =09rv =3D mremap(old_address, old_size, new_size, MREMAP_MAYMOVE); =09if (rv =3D=3D MAP_FAILED) { =09=09int err =3D errno; @@ -213,6 +263,18 @@ static VALUE capa(VALUE self) =09return SIZET2NUM(get(self)->capa); } =20 +/* + * call-seq: + * rd.fd -> Integer + * + * Returns the file descriptor number associated with this Raindrop, if + * it was created with a name. + */ +static VALUE fd(VALUE self) +{ + return INT2NUM(get(self)->fd); +} + /* * call-seq: *=09rd.dup=09=09-> rd_copy @@ -223,8 +285,9 @@ static VALUE init_copy(VALUE dest, VALUE source) { =09struct raindrops *dst =3D DATA_PTR(dest); =09struct raindrops *src =3D get(source); +=09VALUE init_argv[1] =3D { SIZET2NUM(src->size) }; =20 -=09init(dest, SIZET2NUM(src->size)); +=09init(1, init_argv, dest); =09memcpy(dst->drops, src->drops, raindrop_size * src->size); =20 =09return dest; @@ -372,6 +435,11 @@ static VALUE evaporate_bang(VALUE self) =09r->drops =3D MAP_FAILED; =09if (munmap(addr, raindrop_size * r->capa) !=3D 0) =09=09rb_sys_fail("munmap"); +=09if (r->fd !=3D -1) { +=09 if (close(r->fd) !=3D 0) +=09 rb_sys_fail("close"); +=09 r->fd =3D -1; +=09} =09return Qnil; } =20 @@ -433,7 +501,7 @@ void Init_raindrops_ext(void) =20 =09rb_define_alloc_func(cRaindrops, alloc); =20 -=09rb_define_method(cRaindrops, "initialize", init, 1); +=09rb_define_method(cRaindrops, "initialize", init, -1); =09rb_define_method(cRaindrops, "incr", incr, -1); =09rb_define_method(cRaindrops, "decr", decr, -1); =09rb_define_method(cRaindrops, "to_ary", to_ary, 0); @@ -444,6 +512,7 @@ void Init_raindrops_ext(void) =09rb_define_method(cRaindrops, "capa", capa, 0); =09rb_define_method(cRaindrops, "initialize_copy", init_copy, 1); =09rb_define_method(cRaindrops, "evaporate!", evaporate_bang, 0); +=09rb_define_method(cRaindrops, "fd", fd, 0); =20 #ifdef __linux__ =09Init_raindrops_linux_inet_diag(); diff --git a/test/test_linux.rb b/test/test_linux.rb index 7808469..b9dc757 100644 --- a/test/test_linux.rb +++ b/test/test_linux.rb @@ -278,4 +278,19 @@ def test_tcp_stress_test statuses =3D Process.waitall statuses.each { |(_,status)| assert status.success?, status.inspect } end if ENV["STRESS"].to_i !=3D 0 + + def test_memfd + rd =3D Raindrops.new(1, name: "test_memfd_raindrop") + assert rd.fd !=3D -1 + + rd.incr(0, 5) + assert_equal 5, rd[0] + + raw_data =3D File.read "/proc/self/fd/#{rd.fd}" + assert raw_data.size > 8 + counter_value =3D raw_data.unpack("Q")[0] + assert_equal 5, counter_value + + rd.evaporate! + end end if RUBY_PLATFORM =3D~ /linux/ --=20 2.33.1