From fe0dd93cd9cb97b46f6cfb4b1e370e38717a93f0 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 29 Apr 2011 15:48:35 -0700 Subject: oob_gc: reimplement to fix breakage and add tests This was broken since v3.3.1[1] and v1.1.6[2] since nginx relies on a closed socket (and not Content-Length/Transfer-Encoding) to detect a response completion. We have to close the client socket before invoking GC to ensure the client sees the response in a timely manner. [1] - commit b72a86f66c722d56a6d77ed1d2779ace6ad103ed [2] - commit b7a0074284d33352bb9e732c660b29162f34bf0e (cherry picked from commit faeb3223636c39ea8df4017dc9a9d39ac649b26d) Conflicts: examples/big_app_gc.rb lib/unicorn/oob_gc.rb --- examples/big_app_gc.rb | 35 +--------------- lib/unicorn/oob_gc.rb | 111 +++++++++++++++++++++++++++---------------------- t/oob_gc.ru | 21 ++++++++++ t/oob_gc_path.ru | 21 ++++++++++ t/t9001-oob_gc.sh | 47 +++++++++++++++++++++ t/t9002-oob_gc-path.sh | 75 +++++++++++++++++++++++++++++++++ 6 files changed, 227 insertions(+), 83 deletions(-) create mode 100644 t/oob_gc.ru create mode 100644 t/oob_gc_path.ru create mode 100755 t/t9001-oob_gc.sh create mode 100644 t/t9002-oob_gc-path.sh diff --git a/examples/big_app_gc.rb b/examples/big_app_gc.rb index 779c3ee..c4c8b04 100644 --- a/examples/big_app_gc.rb +++ b/examples/big_app_gc.rb @@ -1,33 +1,2 @@ -# Run GC after every request, before attempting to accept more connections. -# -# You could customize this patch to read REQ["PATH_INFO"] and only -# call GC.start after expensive requests. -# -# We could have this wrap the response body.close as middleware, but the -# scannable stack is would still be bigger than it would be here. -# -# This shouldn't hurt overall performance as long as the server cluster -# is at <=50% CPU capacity, and improves the performance of most memory -# intensive requests. This serves to improve _client-visible_ -# performance (possibly at the cost of overall performance). -# -# We'll call GC after each request is been written out to the socket, so -# the client never sees the extra GC hit it. It's ideal to call the GC -# inside the HTTP server (vs middleware or hooks) since the stack is -# smaller at this point, so the GC will both be faster and more -# effective at releasing unused memory. -# -# This monkey patch is _only_ effective for applications that use a lot -# of memory, and will hurt simpler apps/endpoints that can process -# multiple requests before incurring GC. - -class Unicorn::HttpServer - REQ = Unicorn::HttpRequest::REQ - alias _process_client process_client - undef_method :process_client - def process_client(client) - _process_client(client) - REQ.clear - GC.start - end -end if defined?(Unicorn) +# see {Unicorn::OobGC}[http://unicorn.bogomips.org/Unicorn/OobGC.html] +# Unicorn::OobGC was broken in Unicorn v3.3.1 - v3.6.1 and fixed in v3.6.2 diff --git a/lib/unicorn/oob_gc.rb b/lib/unicorn/oob_gc.rb index 8dc4dcf..a0e8f1d 100644 --- a/lib/unicorn/oob_gc.rb +++ b/lib/unicorn/oob_gc.rb @@ -1,58 +1,69 @@ # -*- encoding: binary -*- -module Unicorn - # Run GC after every request, after closing the client socket and - # before attempting to accept more connections. - # - # This shouldn't hurt overall performance as long as the server cluster - # is at <50% CPU capacity, and improves the performance of most memory - # intensive requests. This serves to improve _client-visible_ - # performance (possibly at the cost of overall performance). - # - # We'll call GC after each request is been written out to the socket, so - # the client never sees the extra GC hit it. - # - # This middleware is _only_ effective for applications that use a lot - # of memory, and will hurt simpler apps/endpoints that can process - # multiple requests before incurring GC. - # - # This middleware is only designed to work with Unicorn, as it harms - # keepalive performance. - # - # Example (in config.ru): - # - # require 'unicorn/oob_gc' - # - # # GC ever two requests that hit /expensive/foo or /more_expensive/foo - # # in your app. By default, this will GC once every 5 requests - # # for all endpoints in your app - # use Unicorn::OobGC, 2, %r{\A/(?:expensive/foo|more_expensive/foo)} - class OobGC < Struct.new(:app, :interval, :path, :nr, :env, :body) - - def initialize(app, interval = 5, path = %r{\A/}) - super(app, interval, path, interval) - end - - def call(env) - status, headers, self.body = app.call(self.env = env) - [ status, headers, self ] - end +# Runs GC after requests, after closing the client socket and +# before attempting to accept more connections. +# +# This shouldn't hurt overall performance as long as the server cluster +# is at <50% CPU capacity, and improves the performance of most memory +# intensive requests. This serves to improve _client-visible_ +# performance (possibly at the cost of overall performance). +# +# Increasing the number of +worker_processes+ may be necessary to +# improve average client response times because some of your workers +# will be busy doing GC and unable to service clients. Think of +# using more workers with this module as a poor man's concurrent GC. +# +# We'll call GC after each request is been written out to the socket, so +# the client never sees the extra GC hit it. +# +# This middleware is _only_ effective for applications that use a lot +# of memory, and will hurt simpler apps/endpoints that can process +# multiple requests before incurring GC. +# +# This middleware is only designed to work with unicorn, as it harms +# performance with keepalive-enabled servers. +# +# Example (in config.ru): +# +# require 'unicorn/oob_gc' +# +# # GC ever two requests that hit /expensive/foo or /more_expensive/foo +# # in your app. By default, this will GC once every 5 requests +# # for all endpoints in your app +# use Unicorn::OobGC, 2, %r{\A/(?:expensive/foo|more_expensive/foo)} +# +# Feedback from users of early implementations of this module: +# * http://comments.gmane.org/gmane.comp.lang.ruby.unicorn.general/486 +# * http://article.gmane.org/gmane.comp.lang.ruby.unicorn.general/596 +module Unicorn::OobGC - def each(&block) - body.each(&block) + # this pretends to be Rack middleware because it used to be + # But we need to hook into unicorn internals so we need to close + # the socket before clearing the request env. + # + # +interval+ is the number of requests matching the +path+ regular + # expression before invoking GC. + def self.new(app, interval = 5, path = %r{\A/}) + @@nr = interval + self.const_set :OOBGC_PATH, path + self.const_set :OOBGC_INTERVAL, interval + self.const_set :OOBGC_ENV, Unicorn::HttpRequest::REQ + ObjectSpace.each_object(Unicorn::HttpServer) do |s| + s.extend(self) end + app # pretend to be Rack middleware since it was in the past + end - # in Unicorn, this is closed _after_ the client socket - def close - body.close if body.respond_to?(:close) - - if path =~ env['PATH_INFO'] && ((self.nr -= 1) <= 0) - self.nr = interval - self.body = nil - env.clear - GC.start - end + #:stopdoc: + PATH_INFO = "PATH_INFO" + def process_client(client) + super(client) # Unicorn::HttpServer#process_client + if OOBGC_PATH =~ OOBGC_ENV[PATH_INFO] && ((@@nr -= 1) <= 0) + @@nr = OOBGC_INTERVAL + OOBGC_ENV.clear + GC.start end - end + + # :startdoc: end diff --git a/t/oob_gc.ru b/t/oob_gc.ru new file mode 100644 index 0000000..c6035b6 --- /dev/null +++ b/t/oob_gc.ru @@ -0,0 +1,21 @@ +#\-E none +require 'unicorn/oob_gc' +use Rack::ContentLength +use Rack::ContentType, "text/plain" +use Unicorn::OobGC +$gc_started = false + +# Mock GC.start +def GC.start + ObjectSpace.each_object(BasicSocket) do |x| + next if Unicorn::HttpServer::LISTENERS.include?(x) + x.closed? or abort "not closed #{x}" + end + $gc_started = true +end +run lambda { |env| + if "/gc_reset" == env["PATH_INFO"] && "POST" == env["REQUEST_METHOD"] + $gc_started = false + end + [ 200, {}, [ "#$gc_started\n" ] ] +} diff --git a/t/oob_gc_path.ru b/t/oob_gc_path.ru new file mode 100644 index 0000000..e936a85 --- /dev/null +++ b/t/oob_gc_path.ru @@ -0,0 +1,21 @@ +#\-E none +require 'unicorn/oob_gc' +use Rack::ContentLength +use Rack::ContentType, "text/plain" +use Unicorn::OobGC, 5, /BAD/ +$gc_started = false + +# Mock GC.start +def GC.start + ObjectSpace.each_object(BasicSocket) do |x| + next if Unicorn::HttpServer::LISTENERS.include?(x) + x.closed? or abort "not closed #{x}" + end + $gc_started = true +end +run lambda { |env| + if "/gc_reset" == env["PATH_INFO"] && "POST" == env["REQUEST_METHOD"] + $gc_started = false + end + [ 200, {}, [ "#$gc_started\n" ] ] +} diff --git a/t/t9001-oob_gc.sh b/t/t9001-oob_gc.sh new file mode 100755 index 0000000..dcd8100 --- /dev/null +++ b/t/t9001-oob_gc.sh @@ -0,0 +1,47 @@ +#!/bin/sh +. ./test-lib.sh +t_plan 9 "OobGC test" + +t_begin "setup and start" && { + unicorn_setup + unicorn -D -c $unicorn_config oob_gc.ru + unicorn_wait_start +} + +t_begin "test default interval (4 requests)" && { + test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp) + test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp) + test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp) + test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp) +} + +t_begin "GC starting-request returns immediately" && { + test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp) +} + +t_begin "GC is started after 5 requests" && { + test xtrue = x$(curl -vsSf http://$listen/ 2>> $tmp) +} + +t_begin "reset GC" && { + test xfalse = x$(curl -vsSf -X POST http://$listen/gc_reset 2>> $tmp) +} + +t_begin "test default interval again (3 requests)" && { + test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp) + test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp) + test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp) +} + +t_begin "GC is started after 5 requests" && { + test xtrue = x$(curl -vsSf http://$listen/ 2>> $tmp) +} + +t_begin "killing succeeds" && { + kill -QUIT $unicorn_pid +} + +t_begin "check_stderr" && check_stderr +dbgcat r_err + +t_done diff --git a/t/t9002-oob_gc-path.sh b/t/t9002-oob_gc-path.sh new file mode 100644 index 0000000..d4e795b --- /dev/null +++ b/t/t9002-oob_gc-path.sh @@ -0,0 +1,75 @@ +#!/bin/sh +. ./test-lib.sh +t_plan 12 "OobGC test with limited path" + +t_begin "setup and start" && { + unicorn_setup + unicorn -D -c $unicorn_config oob_gc_path.ru + unicorn_wait_start +} + +t_begin "test default is noop" && { + test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp) + test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp) + test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp) + test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp) + test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp) + test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp) + test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp) + test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp) + test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp) +} + +t_begin "4 bad requests to bump counter" && { + test xfalse = x$(curl -vsSf http://$listen/BAD 2>> $tmp) + test xfalse = x$(curl -vsSf http://$listen/BAD 2>> $tmp) + test xfalse = x$(curl -vsSf http://$listen/BAD 2>> $tmp) + test xfalse = x$(curl -vsSf http://$listen/BAD 2>> $tmp) +} + +t_begin "GC-starting request returns immediately" && { + test xfalse = x$(curl -vsSf http://$listen/BAD 2>> $tmp) +} + +t_begin "GC was started after 5 requests" && { + test xtrue = x$(curl -vsSf http://$listen/ 2>> $tmp) +} + +t_begin "reset GC" && { + test xfalse = x$(curl -vsSf -X POST http://$listen/gc_reset 2>> $tmp) +} + +t_begin "test default is noop" && { + test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp) + test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp) + test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp) + test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp) + test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp) + test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp) + test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp) + test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp) + test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp) +} + +t_begin "4 bad requests to bump counter" && { + test xfalse = x$(curl -vsSf http://$listen/BAD 2>> $tmp) + test xfalse = x$(curl -vsSf http://$listen/BAD 2>> $tmp) + test xfalse = x$(curl -vsSf http://$listen/BAD 2>> $tmp) + test xfalse = x$(curl -vsSf http://$listen/BAD 2>> $tmp) +} + +t_begin "GC-starting request returns immediately" && { + test xfalse = x$(curl -vsSf http://$listen/BAD 2>> $tmp) +} + +t_begin "GC was started after 5 requests" && { + test xtrue = x$(curl -vsSf http://$listen/ 2>> $tmp) +} + +t_begin "killing succeeds" && { + kill -QUIT $unicorn_pid +} + +t_begin "check_stderr" && check_stderr + +t_done -- cgit v1.2.3-24-ge0c7