* [PATCH] Kill lazy workers with TERM before KILL
2014-04-16 3:35 [PATCH] Print stack for lazy workers Camilo Lopez
@ 2014-04-16 3:35 ` Camilo Lopez
2014-04-16 8:44 ` Eric Wong
0 siblings, 1 reply; 3+ messages in thread
From: Camilo Lopez @ 2014-04-16 3:35 UTC (permalink / raw)
To: mongrel-unicorn; +Cc: Camilo Lopez, admin
We want to know what was going on with a lazy worker, KILL is un-trappable so we
are sending TERM instead, then to make sure the process really dies we are
KILLing a second later.
---
lib/unicorn/http_server.rb | 31 +++++++++++++++++++------------
lib/unicorn/worker.rb | 7 ++++++-
2 files changed, 25 insertions(+), 13 deletions(-)
diff --git a/lib/unicorn/http_server.rb b/lib/unicorn/http_server.rb
index 21cb9a1..1b2a33b 100644
--- a/lib/unicorn/http_server.rb
+++ b/lib/unicorn/http_server.rb
@@ -474,22 +474,29 @@ class Unicorn::HttpServer
end
# forcibly terminate all workers that haven't checked in in timeout seconds. The timeout is implemented using an unlinked File
+ # Workers will be killed with TERM first, so the backtrace can be printed, on
+ # the next call to murder_lazy_workers the worker will receive KILL.
def murder_lazy_workers
next_sleep = @timeout - 1
now = Time.now.to_i
WORKERS.dup.each_pair do |wpid, worker|
- tick = worker.tick
- 0 == tick and next # skip workers that haven't processed any clients
- diff = now - tick
- tmp = @timeout - diff
- if tmp >= 0
- next_sleep > tmp and next_sleep = tmp
- next
+ if worker.needs_sig_kill?
+ kill_worker(:KILL, wpid)
+ else
+ tick = worker.tick
+ 0 == tick and next # skip workers that haven't processed any clients
+ diff = now - tick
+ tmp = @timeout - diff
+ if tmp >= 0
+ next_sleep > tmp and next_sleep = tmp
+ next
+ end
+ next_sleep = 0
+ logger.error "worker=#{worker.nr} PID:#{wpid} timeout " \
+ "(#{diff}s > #{@timeout}s), killing"
+ kill_worker(:TERM, wpid)
+ worker.needs_sig_kill = true
end
- next_sleep = 0
- logger.error "worker=#{worker.nr} PID:#{wpid} timeout " \
- "(#{diff}s > #{@timeout}s), killing"
- kill_worker(:KILL, wpid) # take no prisoners for timeout violations
end
next_sleep <= 0 ? 1 : next_sleep
end
@@ -606,7 +613,7 @@ class Unicorn::HttpServer
def init_worker_process(worker)
worker.atfork_child
# we'll re-trap :QUIT later for graceful shutdown iff we accept clients
- EXIT_SIGS.each { |sig| trap(sig) { exit!(0) } }
+ EXIT_SIGS.each { |sig| trap(sig) { STDERR.puts(caller); exit!(0) } }
exit!(0) if (SIG_QUEUE & EXIT_SIGS)[0]
WORKER_QUEUE_SIGS.each { |sig| trap(sig, nil) }
trap(:CHLD, 'DEFAULT')
diff --git a/lib/unicorn/worker.rb b/lib/unicorn/worker.rb
index e74a1c9..4214500 100644
--- a/lib/unicorn/worker.rb
+++ b/lib/unicorn/worker.rb
@@ -11,7 +11,7 @@ require "raindrops"
class Unicorn::Worker
# :stopdoc:
attr_accessor :nr, :switched
- attr_writer :tmp
+ attr_writer :tmp, :needs_sig_kill
attr_reader :to_io # IO.select-compatible
PER_DROP = Raindrops::PAGE_SIZE / Raindrops::SIZE
@@ -25,6 +25,7 @@ class Unicorn::Worker
@nr = nr
@tmp = @switched = false
@to_io, @master = Unicorn.pipe
+ @needs_sig_kill = false
end
def atfork_child # :nodoc:
@@ -149,4 +150,8 @@ class Unicorn::Worker
Process.euid != uid and Process::UID.change_privilege(uid)
@switched = true
end
+
+ def needs_sig_kill?
+ @needs_sig_kill
+ end
end
--
1.9.2
_______________________________________________
Unicorn mailing list - mongrel-unicorn@rubyforge.org
http://rubyforge.org/mailman/listinfo/mongrel-unicorn
Do not quote signatures (like this one) or top post when replying
^ permalink raw reply related [flat|nested] 3+ messages in thread