From c7750815d56c98911c1dc4db26a8778657b45a1b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 29 Oct 2009 13:15:01 -0700 Subject: fix reliability of timeout kills The method introduced in commit 6c8a3d3c55997978bacaecc5dbbb7d03c2fee345 to avoid killing workers after suspend/hibernate interacted badly with the change for OpenBSD fchmod(2) compatibility introduced with the 0.93.3 release. This interaction lead to workers with files stuck in the zero state to never be murdered off for timeout violations. Additionally, the method to avoid killing processes off was never completely reliable and has been reworked even if we entered suspend/hibernate/STOP during client processing. This regression was discovered during continued development of the Rainbows! test suite (which we will bring over as it becomes ready). --- lib/unicorn.rb | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/unicorn.rb b/lib/unicorn.rb index 681b7d9..7081164 100644 --- a/lib/unicorn.rb +++ b/lib/unicorn.rb @@ -277,6 +277,7 @@ module Unicorn # are trapped. See trap_deferred init_self_pipe! respawn = true + last_check = Time.now QUEUE_SIGS.each { |sig| trap_deferred(sig) } trap(:CHLD) { |sig_nr| awaken_master } @@ -287,7 +288,11 @@ module Unicorn reap_all_workers case SIG_QUEUE.shift when nil - murder_lazy_workers + # avoid murdering workers after our master process (or the + # machine) comes out of suspend/hibernation + if (last_check + timeout) >= (last_check = Time.now) + murder_lazy_workers + end maintain_worker_count if respawn master_sleep when :QUIT # graceful shutdown @@ -475,7 +480,6 @@ module Unicorn kill_worker(:QUIT, wpid) next end - stat.mode == 0100000 and next (diff = (Time.now - stat.ctime)) <= timeout and next logger.error "worker=#{worker.nr} PID:#{wpid} timeout " \ "(#{diff}s > #{timeout}s), killing" -- cgit v1.2.3-24-ge0c7