about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <normalperson@yhbt.net>2013-01-03 03:52:31 +0000
committerEric Wong <normalperson@yhbt.net>2013-01-03 04:16:17 +0000
commit02e5a91b24983d96b342c007661966495ccdb619 (patch)
tree3cdd1e90d72a2d5b23c21e45cae6c70142ab17cc
parentdb1eb67855149050e448c63df12ade22135916cd (diff)
downloadsleepy_penguin-02e5a91b24983d96b342c007661966495ccdb619.tar.gz
Until everybody updates to a version of the Linux kernel with
this fix, we need to enable a workaround for older kernels using
EPOLL_CTL_DEL + EPOLL_CTL_ADD to emulate EPOLL_CTL_MOD behavior.

This race condition is fixed in Linux upstream
commit 128dd1759d96ad36c379240f8b9463e8acfd37a1 and included in
Linux v3.8-rc2 and later.

This fix will likely be backported to stable and vendor kernels,
so we'll whitelist them as we learn of them.
-rw-r--r--ext/sleepy_penguin/epoll.c90
-rw-r--r--test/test_epoll.rb5
-rw-r--r--test/test_epoll_optimizations.rb37
3 files changed, 118 insertions, 14 deletions
diff --git a/ext/sleepy_penguin/epoll.c b/ext/sleepy_penguin/epoll.c
index a507b0c..60a34be 100644
--- a/ext/sleepy_penguin/epoll.c
+++ b/ext/sleepy_penguin/epoll.c
@@ -2,6 +2,9 @@
 #include <sys/epoll.h>
 #include <pthread.h>
 #include <time.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/utsname.h>
 #include "missing_epoll.h"
 #ifdef HAVE_RUBY_ST_H
 #  include <ruby/st.h>
@@ -195,6 +198,75 @@ static VALUE init(int argc, VALUE *argv, VALUE self)
         return self;
 }
 
+static int epoll_ctl_mod_racy;
+static void epoll_ctl_mod_racy_detect(void)
+{
+        struct utsname buf;
+        int rc;
+        unsigned version, patchlevel, sublevel;
+
+        /*
+         * Online/current processors for this process is not enough,
+         * we need all processors since events may be triggered
+         * by interrupt handlers on any CPU in the system
+         */
+        long nproc = sysconf(_SC_NPROCESSORS_CONF);
+
+        /* Eric Wong's personal machines are ancient and weak: */
+        if (nproc == 1)
+                return;
+
+        if (uname(&buf) != 0)
+                rb_sys_fail("uname");
+
+        /* who knows, maybe there'll be an epoll on other OSes one day */
+        if (strcmp(buf.sysname, "Linux"))
+                return;
+
+        rc = sscanf(buf.release, "%u.%u.%u", &version, &patchlevel, &sublevel);
+        if (rc != 3) {
+                rb_warn("sscanf failed to parse kernel version: %s (rc=%d), "
+                        "assuming EPOLL_CTL_MOD is racy on SMP",
+                        buf.release, rc);
+                epoll_ctl_mod_racy = 1;
+                return;
+        }
+
+        /*
+         * TODO: whitelist vendor kernels as fixes are backported
+         * TODO: It is likely 3.7.2 (or 3.7.3) will have this
+         *       fix backported.
+         */
+        if (version <= 2) {
+                epoll_ctl_mod_racy = 1;
+        } else if (version == 3) {
+                if (patchlevel <= 7)
+                        epoll_ctl_mod_racy = 1;
+        }
+}
+
+/*
+ * EPOLL_CTL_MOD is racy under Linux <= 3.7.1 on SMP systems,
+ * so we emulate it with EPOLL_CTL_ADD + EPOLL_CTL_DELETE
+ * This is fixed in Linux commit 128dd1759d96ad36c379240f8b9463e8acfd37a1
+ */
+static int
+fake_epoll_ctl_mod(int epfd, int fd, struct epoll_event *event)
+{
+        int rc = epoll_ctl(epfd, EPOLL_CTL_DEL, fd, event);
+        if (rc == 0)
+                rc = epoll_ctl(epfd, EPOLL_CTL_ADD, fd, event);
+        return rc;
+}
+
+static int my_epoll_ctl(int epfd, int op, int fd, struct epoll_event *event)
+{
+        if (epoll_ctl_mod_racy && op == EPOLL_CTL_MOD)
+                return fake_epoll_ctl_mod(epfd, fd, event);
+        else
+                return epoll_ctl(epfd, op, fd, event);
+}
+
 static VALUE ctl(VALUE self, VALUE io, VALUE flags, int op)
 {
         struct epoll_event event;
@@ -206,11 +278,11 @@ static VALUE ctl(VALUE self, VALUE io, VALUE flags, int op)
         event.events = rb_sp_get_uflags(self, flags);
         pack_event_data(&event, io);
 
-        rv = epoll_ctl(ep->fd, op, fd, &event);
+        rv = my_epoll_ctl(ep->fd, op, fd, &event);
         if (rv == -1) {
                 if (errno == ENOMEM) {
                         rb_gc();
-                        rv = epoll_ctl(ep->fd, op, fd, &event);
+                        rv = my_epoll_ctl(ep->fd, op, fd, &event);
                 }
                 if (rv == -1)
                         rb_sys_fail("epoll_ctl");
@@ -269,7 +341,7 @@ static VALUE set(VALUE self, VALUE io, VALUE flags)
                         return Qnil;
 
 fallback_mod:
-                rv = epoll_ctl(ep->fd, EPOLL_CTL_MOD, fd, &event);
+                rv = my_epoll_ctl(ep->fd, EPOLL_CTL_MOD, fd, &event);
                 if (rv == -1) {
                         if (errno != ENOENT)
                                 rb_sys_fail("epoll_ctl - mod");
@@ -786,4 +858,16 @@ void sleepy_penguin_init_epoll(void)
         rb_define_const(cEpoll, "ONESHOT", UINT2NUM(EPOLLONESHOT));
 
         id_for_fd = rb_intern("for_fd");
+
+        epoll_ctl_mod_racy_detect();
+
+
+        /*
+         * true if EPOLL_CTL_MOD is racy on your version of Linux
+         * (anything before 3.8) under SMP.
+         * Please report patched vendor kernels to
+         * mailto:sleepy.penguin@librelist.org
+         */
+        rb_define_const(cEpoll, "EPOLL_CTL_MOD_RACY",
+                        epoll_ctl_mod_racy ? Qtrue : Qfalse);
 }
diff --git a/test/test_epoll.rb b/test/test_epoll.rb
index fd22654..1eb987e 100644
--- a/test/test_epoll.rb
+++ b/test/test_epoll.rb
@@ -18,7 +18,10 @@ class TestEpoll < Test::Unit::TestCase
 
   def test_constants
     Epoll.constants.each do |const|
-      next if const.to_sym == :IO
+      case const.to_sym
+      when :IO, :EPOLL_CTL_MOD_RACY
+        next
+      end
       nr = Epoll.const_get(const)
       assert nr <= 0xffffffff, "#{const}=#{nr}"
     end
diff --git a/test/test_epoll_optimizations.rb b/test/test_epoll_optimizations.rb
index bd77397..cdb94c8 100644
--- a/test/test_epoll_optimizations.rb
+++ b/test/test_epoll_optimizations.rb
@@ -31,17 +31,34 @@ class TestEpollOptimizations < Test::Unit::TestCase
     assert_equal 1, lines.grep(/^epoll_ctl/).size
     assert_match(/EPOLL_CTL_ADD/, lines.grep(/^epoll_ctl/)[0])
 
-    io, err = Strace.me { @ep.set(@wr, Epoll::OUT | Epoll::ONESHOT) }
-    assert_nil err
-    lines = io.readlines; io.close
-    assert_equal 1, lines.grep(/^epoll_ctl/).size
-    assert_match(/EPOLL_CTL_MOD/, lines.grep(/^epoll_ctl/)[0])
+    if Epoll::EPOLL_CTL_MOD_RACY
+      io, err = Strace.me { @ep.set(@wr, Epoll::OUT | Epoll::ONESHOT) }
+      assert_nil err
+      lines = io.readlines; io.close
+      assert_equal 2, lines.grep(/^epoll_ctl/).size
+      assert_match(/EPOLL_CTL_DEL/, lines.grep(/^epoll_ctl/)[0])
+      assert_match(/EPOLL_CTL_ADD/, lines.grep(/^epoll_ctl/)[1])
+
+      io, err = Strace.me { @ep.set(@wr, Epoll::OUT) }
+      assert_nil err
+      lines = io.readlines; io.close
+      assert_equal 2, lines.grep(/^epoll_ctl/).size
+      assert_match(/EPOLL_CTL_DEL/, lines.grep(/^epoll_ctl/)[0])
+      assert_match(/EPOLL_CTL_ADD/, lines.grep(/^epoll_ctl/)[1])
+    else
+      io, err = Strace.me { @ep.set(@wr, Epoll::OUT | Epoll::ONESHOT) }
+      assert_nil err
+      lines = io.readlines; io.close
+      assert_equal 1, lines.grep(/^epoll_ctl/).size
+      assert_match(/EPOLL_CTL_MOD/, lines.grep(/^epoll_ctl/)[0])
+
+      io, err = Strace.me { @ep.set(@wr, Epoll::OUT) }
+      assert_nil err
+      lines = io.readlines; io.close
+      assert_equal 1, lines.grep(/^epoll_ctl/).size
+      assert_match(/EPOLL_CTL_MOD/, lines.grep(/^epoll_ctl/)[0])
+    end
 
-    io, err = Strace.me { @ep.set(@wr, Epoll::OUT) }
-    assert_nil err
-    lines = io.readlines; io.close
-    assert_equal 1, lines.grep(/^epoll_ctl/).size
-    assert_match(/EPOLL_CTL_MOD/, lines.grep(/^epoll_ctl/)[0])
     @wr.close
     @rd.close