about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-10-09 02:24:45 +0000
committerEric Wong <e@yhbt.net>2021-10-09 03:35:45 +0000
commit8d44aa7c394d8b41af891a84e9b4fcca9358da41 (patch)
tree8912d302ced74146b66040d0c51064c05eb8bfe5
parent88025df70e228fe9a9cf8676772eaa13aba68eb6 (diff)
downloadyahns-8d44aa7c394d8b41af891a84e9b4fcca9358da41.tar.gz
epoll_wait() wakeups from QueueQuitter got lost during graceful
shutdown since there's multiple worker threads operating off the
same FD.  Workaround the problem by re-arming the eventfd for
every worker thread reaped.

Link: https://yhbt.net/lore/lkml/20210405231025.33829-1-dave@stgolabs.net/
-rw-r--r--lib/yahns/queue_epoll.rb4
-rw-r--r--lib/yahns/server.rb17
2 files changed, 14 insertions, 7 deletions
diff --git a/lib/yahns/queue_epoll.rb b/lib/yahns/queue_epoll.rb
index 9e4271a..a198fbf 100644
--- a/lib/yahns/queue_epoll.rb
+++ b/lib/yahns/queue_epoll.rb
@@ -32,6 +32,10 @@ class Yahns::Queue < SleepyPenguin::Epoll::IO # :nodoc:
     epoll_ctl(Epoll::CTL_MOD, io, flags)
   end
 
+  def queue_del(io)
+    epoll_ctl(Epoll::CTL_DEL, io, 0)
+  end
+
   def thr_init
     Thread.current[:yahns_rbuf] = ''.dup
     Thread.current[:yahns_fdmap] = @fdmap
diff --git a/lib/yahns/server.rb b/lib/yahns/server.rb
index 208b5ee..74eeb7e 100644
--- a/lib/yahns/server.rb
+++ b/lib/yahns/server.rb
@@ -438,25 +438,28 @@ class Yahns::Server # :nodoc:
   # This just injects the QueueQuitter object which acts like a
   # monkey wrench thrown into a perfectly good engine :)
   def quit_finish
-    quitter = Yahns::QueueQuitter.new
+    # we must not let quitters get GC-ed if we have any worker threads leftover
+    @quitter = Yahns::QueueQuitter.new
 
     # throw the monkey wrench into the worker threads
-    @queues.each { |q| q.queue_add(quitter, Yahns::Queue::QEV_QUIT) }
+    @queues.each { |q| q.queue_add(@quitter, Yahns::Queue::QEV_QUIT) }
 
     # watch the monkey wrench destroy all the threads!
     # Ugh, this may fail if we have dedicated threads trickling
     # response bodies out (e.g. "tail -F")  Oh well, have a timeout
     begin
       @wthr.delete_if { |t| t.join(0.01) }
+      # Workaround Linux 5.5+ bug (fixed in 5.13+)
+      # https://yhbt.net/lore/lkml/20210405231025.33829-1-dave@stgolabs.net/
+      @wthr[0] && @queues[0].respond_to?(:queue_del) and @queues.each do |q|
+        q.queue_del(@quitter)
+        q.queue_add(@quitter, Yahns::Queue::QEV_QUIT)
+      end
     end while @wthr[0] && Yahns.now <= @shutdown_expire
 
     # cleanup, our job is done
     @queues.each(&:close).clear
-
-    # we must not let quitter get GC-ed if we have any worker threads leftover
-    @quitter = quitter
-
-    quitter.close
+    @quitter.close # keep object around in case @wthr isn't empty
   rescue => e
     Yahns::Log.exception(@logger, "quit finish", e)
   ensure