unicorn Ruby/Rack server user+dev discussion/patches/pulls/bugs/help
 help / color / mirror / code / Atom feed
* [PATCH] Send SIGTERM before SIGKILL on timeout
@ 2018-02-24  8:48 Fumiaki MATSUSHIMA
  2018-02-24 15:15 ` Eric Wong
  0 siblings, 1 reply; 3+ messages in thread
From: Fumiaki MATSUSHIMA @ 2018-02-24  8:48 UTC (permalink / raw)
  To: unicorn-public; +Cc: mtsmfm

To output log / send error to error tracking service,
we need to receive a signal other than SIGKILL first.
---
Hi Unicorn team,

I'm not sure this change is accetable though,
I can find some articles and patches to prevent SIGKILL
on timeout.
I think it's great if this feature is supported by unicorn itself.

Could you give me your opinion?

 lib/unicorn/configurator.rb | 16 +++++++++++++++-
 lib/unicorn/http_server.rb  | 27 +++++++++++++++++----------
 test/unit/test_signals.rb   | 43 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 75 insertions(+), 11 deletions(-)

diff --git a/lib/unicorn/configurator.rb b/lib/unicorn/configurator.rb
index f34d38b..f854032 100644
--- a/lib/unicorn/configurator.rb
+++ b/lib/unicorn/configurator.rb
@@ -30,6 +30,7 @@ class Unicorn::Configurator
   # Default settings for Unicorn
   DEFAULTS = {
     :timeout => 60,
+    :sigterm_timeout => 60,
     :logger => Logger.new($stderr),
     :worker_processes => 1,
     :after_fork => lambda { |server, worker|
@@ -237,11 +238,24 @@ def before_exec(*args, &block)
   #
   # See http://nginx.org/en/docs/http/ngx_http_upstream_module.html
   # for more details on nginx upstream configuration.
-  def timeout(seconds)
+  #
+  # The following options may be specified:
+  #
+  # [:sigterm => seconds]
+  #
+  #   Send SIGTERM when worker process is timed out.
+  #   Workers can't output backtrace if it's received SIGKILL
+  #   so it's useful to send SIGTERM before SIGKILL to find slow codes.
+  #   If you specify sigterm greater than or equal to timeout, workers will be always killed by SIGKILL.
+  #
+  #   Default: same seconds as the timeout
+  def timeout(seconds, options = {})
     set_int(:timeout, seconds, 3)
     # POSIX says 31 days is the smallest allowed maximum timeout for select()
     max = 30 * 60 * 60 * 24
     set[:timeout] = seconds > max ? max : seconds
+
+    set_int(:sigterm_timeout, options[:sigterm] || set[:timeout], 3)
   end

   # Whether to exec in each worker process after forking.  This changes the
diff --git a/lib/unicorn/http_server.rb b/lib/unicorn/http_server.rb
index 8674729..da7c420 100644
--- a/lib/unicorn/http_server.rb
+++ b/lib/unicorn/http_server.rb
@@ -11,7 +11,7 @@
 # See Unicorn::Configurator for information on how to configure unicorn.
 class Unicorn::HttpServer
   # :stopdoc:
-  attr_accessor :app, :timeout, :worker_processes,
+  attr_accessor :app, :timeout, :sigterm_timeout, :worker_processes,
                 :before_fork, :after_fork, :before_exec,
                 :listener_opts, :preload_app,
                 :orig_app, :config, :ready_pipe, :user
@@ -284,10 +284,10 @@ def join
       when nil
         # avoid murdering workers after our master process (or the
         # machine) comes out of suspend/hibernation
-        if (last_check + @timeout) >= (last_check = time_now)
+        if (last_check + @sigterm_timeout) >= (last_check = time_now)
           sleep_time = murder_lazy_workers
         else
-          sleep_time = @timeout/2.0 + 1
+          sleep_time = @sigterm_timeout/2.0 + 1
           @logger.debug("waiting #{sleep_time}s after suspend/hibernation")
         end
         maintain_worker_count if respawn
@@ -495,21 +495,29 @@ def close_sockets_on_exec(sockets)

   # forcibly terminate all workers that haven't checked in in timeout seconds.  The timeout is implemented using an unlinked File
   def murder_lazy_workers
-    next_sleep = @timeout - 1
+    next_sleep = @sigterm_timeout - 1
     now = time_now.to_i
     @workers.dup.each_pair do |wpid, worker|
       tick = worker.tick
       0 == tick and next # skip workers that haven't processed any clients
       diff = now - tick
-      tmp = @timeout - diff
+      tmp = @sigterm_timeout - diff
+
       if tmp >= 0
         next_sleep > tmp and next_sleep = tmp
         next
       end
       next_sleep = 0
-      logger.error "worker=#{worker.nr} PID:#{wpid} timeout " \
-                   "(#{diff}s > #{@timeout}s), killing"
-      kill_worker(:KILL, wpid) # take no prisoners for timeout violations
+
+      if diff > @timeout
+        logger.error "worker=#{worker.nr} PID:#{wpid} timeout " \
+        "(#{diff}s > #{@timeout}s), killing with SIGKILL"
+        kill_worker(:KILL, wpid) # take no prisoners for timeout violations
+      elsif diff > @sigterm_timeout
+        logger.error "worker=#{worker.nr} PID:#{wpid} timeout " \
+        "(#{diff}s > #{@sigterm_timeout}s), killing with SIGTERM"
+        kill_worker(:TERM, wpid) # take no prisoners for timeout violations
+      end
     end
     next_sleep <= 0 ? 1 : next_sleep
   end
@@ -655,7 +663,6 @@ def init_worker_process(worker)
     LISTENERS.each { |sock| sock.close_on_exec = true }

     worker.user(*user) if user.kind_of?(Array) && ! worker.switched
-    self.timeout /= 2.0 # halve it for select()
     @config = nil
     build_app! unless preload_app
     @after_fork = @listener_opts = @orig_app = nil
@@ -718,7 +725,7 @@ def worker_loop(worker)

       # timeout used so we can detect parent death:
       worker.tick = time_now.to_i
-      ret = IO.select(readers, nil, nil, @timeout) and ready = ret[0]
+      ret = IO.select(readers, nil, nil, @sigterm_timeout / 2.0) and ready = ret[0]
     rescue => e
       redo if nr < 0 && readers[0]
       Unicorn.log_error(@logger, "listen loop error", e) if readers[0]
diff --git a/test/unit/test_signals.rb b/test/unit/test_signals.rb
index 4592819..dc74a9b 100644
--- a/test/unit/test_signals.rb
+++ b/test/unit/test_signals.rb
@@ -118,6 +118,49 @@ def test_timeout_slow_response
       Process.kill(:TERM, pid) rescue nil
   end

+  def test_timeout_slow_response_with_sigterm
+    pid = fork {
+      app = lambda { |env|
+        Signal.trap(:TERM) {
+          puts 'Killed by SIGTERM'
+
+          exit
+        }
+
+        sleep
+      }
+      redirect_test_io {
+        Tempfile.open(['config', '.rb']) { |file|
+          file.write(<<-EOS)
+            timeout 100, sigterm: 3
+          EOS
+
+          file.flush
+
+          HttpServer.new(app, @server_opts.merge(config_file: file.path)).start.join
+        }
+      }
+    }
+    t0 = Time.now
+    wait_workers_ready("test_stderr.#{pid}.log", 1)
+    sock = TCPSocket.new('127.0.0.1', @port)
+    sock.syswrite("GET / HTTP/1.0\r\n\r\n")
+
+    buf = nil
+    assert_raises(EOFError,Errno::ECONNRESET,Errno::EPIPE,Errno::EINVAL,
+                  Errno::EBADF) do
+      buf = sock.sysread(4096)
+    end
+    diff = Time.now - t0
+    assert_nil buf
+    assert diff > 1.0, "diff was #{diff.inspect}"
+    assert diff < 60.0
+    assert_equal "Killed by SIGTERM\n", File.read("test_stdout.#{pid}.log")
+    wait_workers_ready("test_stderr.#{pid}.log", 1)
+    ensure
+      Process.kill(:TERM, pid) rescue nil
+  end
+
   def test_response_write
     app = lambda { |env|
       [ 200, { 'Content-Type' => 'text/plain', 'X-Pid' => Process.pid.to_s },
--
2.14.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2018-02-27  4:04 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-02-24  8:48 [PATCH] Send SIGTERM before SIGKILL on timeout Fumiaki MATSUSHIMA
2018-02-24 15:15 ` Eric Wong
2018-02-27  4:03   ` Fumiaki Matsushima

Code repositories for project(s) associated with this public inbox

	https://yhbt.net/unicorn.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).