From 8f98c7d125e817d1175ba359375baddf28db4b7b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 21 Feb 2009 06:36:04 -0800 Subject: Fix+test reexec error handling on bad inputs People can screw config files up, it's not my fault if they do, but they do... Don't let the original process get wedged if we can help it.. --- lib/unicorn.rb | 35 +++++++++----- test/exec/test_exec.rb | 127 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 151 insertions(+), 11 deletions(-) diff --git a/lib/unicorn.rb b/lib/unicorn.rb index 838ab11..5b12fc8 100644 --- a/lib/unicorn.rb +++ b/lib/unicorn.rb @@ -155,7 +155,7 @@ module Unicorn @rd_sig, @wr_sig = IO.pipe unless (@rd_sig && @wr_sig) @rd_sig.nonblock = @wr_sig.nonblock = true - %w(QUIT INT TERM USR1 USR2 HUP).each { |sig| trap_deferred(sig) } + reset_master $0 = "unicorn master" logger.info "master process ready" # test relies on this message begin @@ -172,26 +172,23 @@ module Unicorn break when 'USR1' # user-defined (probably something like log reopening) kill_each_worker('USR1') - @mode = :idle - trap_deferred('USR1') + reset_master when 'USR2' # exec binary, stay alive in case something went wrong reexec - @mode = :idle - trap_deferred('USR2') + reset_master when 'HUP' if @config.config_file load_config! - @mode = :idle - trap_deferred('HUP') + reset_master redo # immediate reaping since we may have QUIT workers else # exec binary and exit if there's no config file - logger.info "config_file not present, reexecutingn binary" + logger.info "config_file not present, reexecuting binary" reexec break end else logger.error "master process in unknown mode: #{@mode}, resetting" - @mode = :idle + reset_master end reap_all_workers @@ -210,6 +207,7 @@ module Unicorn rescue Object => e logger.error "Unhandled master loop exception #{e.inspect}." logger.error e.backtrace.join("\n") + reset_master retry end stop # gracefully shutdown all workers on our way out @@ -234,10 +232,16 @@ module Unicorn private + # list of signals we care about and trap in master. + TRAP_SIGS = %w(QUIT INT TERM USR1 USR2 HUP).map { |x| x.freeze }.freeze + # defer a signal for later processing in #join (master process) def trap_deferred(signal) trap(signal) do |sig_nr| - trap(signal, 'IGNORE') # prevent double signalling + # we only handle/defer one signal at a time and ignore all others + # until we're ready again. Queueing signals can lead to more bugs, + # and simplicity is the most important thing + TRAP_SIGS.each { |sig| trap(sig, 'IGNORE') } if Symbol === @mode @mode = signal begin @@ -250,6 +254,12 @@ module Unicorn end end + + def reset_master + @mode = :idle + TRAP_SIGS.each { |sig| trap_deferred(sig) } + end + # reaps all unreaped workers def reap_all_workers begin @@ -292,6 +302,9 @@ module Unicorn logger.error "old PID:#{valid_pid?(old_pid)} running with " \ "existing pid=#{old_pid}, refusing rexec" return + rescue Object => e + logger.error "error writing pid=#{old_pid} #{e.class} #{e.message}" + return end end @@ -366,7 +379,7 @@ module Unicorn # traps for USR1, USR2, and HUP may be set in the @after_fork Proc # by the user. def init_worker_process(worker) - %w(TERM INT QUIT USR1 USR2 HUP).each { |sig| trap(sig, 'IGNORE') } + TRAP_SIGS.each { |sig| trap(sig, 'IGNORE') } trap('CHLD', 'DEFAULT') $0 = "unicorn worker[#{worker.nr}]" @rd_sig.close if @rd_sig diff --git a/test/exec/test_exec.rb b/test/exec/test_exec.rb index 7a4e59b..8974faf 100644 --- a/test/exec/test_exec.rb +++ b/test/exec/test_exec.rb @@ -109,6 +109,133 @@ end # after_fork assert_shutdown(pid) end + def test_broken_reexec_config + File.open("config.ru", "wb") { |fp| fp.syswrite(HI) } + pid_file = "#{@tmpdir}/test.pid" + old_file = "#{pid_file}.oldbin" + ucfg = Tempfile.new('unicorn_test_config') + ucfg.syswrite("listeners %w(#{@addr}:#{@port})\n") + ucfg.syswrite("pid %(#{pid_file})\n") + ucfg.syswrite("logger Logger.new(%(#{@tmpdir}/log))\n") + pid = fork do + redirect_test_io do + exec($unicorn_bin, "-D", "-l#{@addr}:#{@port}", "-c#{ucfg.path}") + end + end + results = retry_hit(["http://#{@addr}:#{@port}/"]) + assert_equal String, results[0].class + + wait_for_file(pid_file) + Process.waitpid(pid) + Process.kill('USR2', File.read(pid_file).to_i) + wait_for_file(old_file) + wait_for_file(pid_file) + Process.kill('QUIT', File.read(old_file).to_i) + + ucfg.syswrite("timeout %(#{pid_file})\n") # introduce a bug + current_pid = File.read(pid_file).to_i + Process.kill('USR2', current_pid) + + # wait for pid_file to restore itself + tries = 100 + begin + while current_pid != File.read(pid_file).to_i + sleep(0.1) and (tries -= 1) > 0 + end + rescue Errno::ENOENT + (sleep(0.1) and (tries -= 1) > 0) and retry + end + assert_equal current_pid, File.read(pid_file).to_i + + tries = 100 + while File.exist?(old_file) + (sleep(0.1) and (tries -= 1) > 0) or break + end + assert ! File.exist?(old_file), "oldbin=#{old_file} gone" + port2 = unused_port(@addr) + + # fix the bug + ucfg.sysseek(0) + ucfg.truncate(0) + ucfg.syswrite("listeners %w(#{@addr}:#{@port} #{@addr}:#{port2})\n") + ucfg.syswrite("pid %(#{pid_file})\n") + Process.kill('USR2', current_pid) + wait_for_file(old_file) + wait_for_file(pid_file) + new_pid = File.read(pid_file).to_i + assert_not_equal current_pid, new_pid + assert_equal current_pid, File.read(old_file).to_i + results = retry_hit(["http://#{@addr}:#{@port}/", + "http://#{@addr}:#{port2}/"]) + assert_equal String, results[0].class + assert_equal String, results[1].class + + assert_nothing_raised do + Process.kill('QUIT', current_pid) + Process.kill('QUIT', new_pid) + end + end + + def test_broken_reexec_ru + File.open("config.ru", "wb") { |fp| fp.syswrite(HI) } + pid_file = "#{@tmpdir}/test.pid" + old_file = "#{pid_file}.oldbin" + ucfg = Tempfile.new('unicorn_test_config') + ucfg.syswrite("pid %(#{pid_file})\n") + ucfg.syswrite("logger Logger.new(%(#{@tmpdir}/log))\n") + pid = fork do + redirect_test_io do + exec($unicorn_bin, "-D", "-l#{@addr}:#{@port}", "-c#{ucfg.path}") + end + end + results = retry_hit(["http://#{@addr}:#{@port}/"]) + assert_equal String, results[0].class + + wait_for_file(pid_file) + Process.waitpid(pid) + Process.kill('USR2', File.read(pid_file).to_i) + wait_for_file(old_file) + wait_for_file(pid_file) + Process.kill('QUIT', File.read(old_file).to_i) + + File.unlink("config.ru") # break reloading + current_pid = File.read(pid_file).to_i + Process.kill('USR2', current_pid) + + # wait for pid_file to restore itself + tries = 100 + begin + while current_pid != File.read(pid_file).to_i + sleep(0.1) and (tries -= 1) > 0 + end + rescue Errno::ENOENT + (sleep(0.1) and (tries -= 1) > 0) and retry + end + assert_equal current_pid, File.read(pid_file).to_i + + tries = 100 + while File.exist?(old_file) + (sleep(0.1) and (tries -= 1) > 0) or break + end + assert ! File.exist?(old_file), "oldbin=#{old_file} gone" + + # fix the bug + File.open("config.ru", "wb") { |fp| fp.syswrite(HI) } + Process.kill('USR2', current_pid) + wait_for_file(old_file) + wait_for_file(pid_file) + new_pid = File.read(pid_file).to_i + assert_not_equal current_pid, new_pid + assert_equal current_pid, File.read(old_file).to_i + results = retry_hit(["http://#{@addr}:#{@port}/"]) + assert_equal String, results[0].class + + assert_nothing_raised do + Process.kill('QUIT', current_pid) + Process.kill('QUIT', new_pid) + end + end + def test_unicorn_config_listeners_overrides_cli port2 = unused_port(@addr) File.open("config.ru", "wb") { |fp| fp.syswrite(HI) } -- cgit v1.2.3-24-ge0c7