From fe0dd93cd9cb97b46f6cfb4b1e370e38717a93f0 Mon Sep 17 00:00:00 2001
From: Eric Wong <normalperson@yhbt.net>
Date: Fri, 29 Apr 2011 15:48:35 -0700
Subject: oob_gc: reimplement to fix breakage and add tests

This was broken since v3.3.1[1] and v1.1.6[2] since nginx relies on
a closed socket (and not Content-Length/Transfer-Encoding) to
detect a response completion.  We have to close the client
socket before invoking GC to ensure the client sees the response
in a timely manner.

[1] - commit b72a86f66c722d56a6d77ed1d2779ace6ad103ed
[2] - commit b7a0074284d33352bb9e732c660b29162f34bf0e

(cherry picked from commit faeb3223636c39ea8df4017dc9a9d39ac649b26d)

Conflicts:

	examples/big_app_gc.rb
	lib/unicorn/oob_gc.rb
---
 examples/big_app_gc.rb |  35 +---------------
 lib/unicorn/oob_gc.rb  | 111 +++++++++++++++++++++++++++----------------------
 t/oob_gc.ru            |  21 ++++++++++
 t/oob_gc_path.ru       |  21 ++++++++++
 t/t9001-oob_gc.sh      |  47 +++++++++++++++++++++
 t/t9002-oob_gc-path.sh |  75 +++++++++++++++++++++++++++++++++
 6 files changed, 227 insertions(+), 83 deletions(-)
 create mode 100644 t/oob_gc.ru
 create mode 100644 t/oob_gc_path.ru
 create mode 100755 t/t9001-oob_gc.sh
 create mode 100644 t/t9002-oob_gc-path.sh

diff --git a/examples/big_app_gc.rb b/examples/big_app_gc.rb
index 779c3ee..c4c8b04 100644
--- a/examples/big_app_gc.rb
+++ b/examples/big_app_gc.rb
@@ -1,33 +1,2 @@
-# Run GC after every request, before attempting to accept more connections.
-#
-# You could customize this patch to read REQ["PATH_INFO"] and only
-# call GC.start after expensive requests.
-#
-# We could have this wrap the response body.close as middleware, but the
-# scannable stack is would still be bigger than it would be here.
-#
-# This shouldn't hurt overall performance as long as the server cluster
-# is at <=50% CPU capacity, and improves the performance of most memory
-# intensive requests.  This serves to improve _client-visible_
-# performance (possibly at the cost of overall performance).
-#
-# We'll call GC after each request is been written out to the socket, so
-# the client never sees the extra GC hit it. It's ideal to call the GC
-# inside the HTTP server (vs middleware or hooks) since the stack is
-# smaller at this point, so the GC will both be faster and more
-# effective at releasing unused memory.
-#
-# This monkey patch is _only_ effective for applications that use a lot
-# of memory, and will hurt simpler apps/endpoints that can process
-# multiple requests before incurring GC.
-
-class Unicorn::HttpServer
-  REQ = Unicorn::HttpRequest::REQ
-  alias _process_client process_client
-  undef_method :process_client
-  def process_client(client)
-    _process_client(client)
-    REQ.clear
-    GC.start
-  end
-end if defined?(Unicorn)
+# see {Unicorn::OobGC}[http://unicorn.bogomips.org/Unicorn/OobGC.html]
+# Unicorn::OobGC was broken in Unicorn v3.3.1 - v3.6.1 and fixed in v3.6.2
diff --git a/lib/unicorn/oob_gc.rb b/lib/unicorn/oob_gc.rb
index 8dc4dcf..a0e8f1d 100644
--- a/lib/unicorn/oob_gc.rb
+++ b/lib/unicorn/oob_gc.rb
@@ -1,58 +1,69 @@
 # -*- encoding: binary -*-
-module Unicorn
 
-  # Run GC after every request, after closing the client socket and
-  # before attempting to accept more connections.
-  #
-  # This shouldn't hurt overall performance as long as the server cluster
-  # is at <50% CPU capacity, and improves the performance of most memory
-  # intensive requests.  This serves to improve _client-visible_
-  # performance (possibly at the cost of overall performance).
-  #
-  # We'll call GC after each request is been written out to the socket, so
-  # the client never sees the extra GC hit it.
-  #
-  # This middleware is _only_ effective for applications that use a lot
-  # of memory, and will hurt simpler apps/endpoints that can process
-  # multiple requests before incurring GC.
-  #
-  # This middleware is only designed to work with Unicorn, as it harms
-  # keepalive performance.
-  #
-  # Example (in config.ru):
-  #
-  #     require 'unicorn/oob_gc'
-  #
-  #     # GC ever two requests that hit /expensive/foo or /more_expensive/foo
-  #     # in your app.  By default, this will GC once every 5 requests
-  #     # for all endpoints in your app
-  #     use Unicorn::OobGC, 2, %r{\A/(?:expensive/foo|more_expensive/foo)}
-  class OobGC < Struct.new(:app, :interval, :path, :nr, :env, :body)
-
-    def initialize(app, interval = 5, path = %r{\A/})
-      super(app, interval, path, interval)
-    end
-
-    def call(env)
-      status, headers, self.body = app.call(self.env = env)
-      [ status, headers, self ]
-    end
+# Runs GC after requests, after closing the client socket and
+# before attempting to accept more connections.
+#
+# This shouldn't hurt overall performance as long as the server cluster
+# is at <50% CPU capacity, and improves the performance of most memory
+# intensive requests.  This serves to improve _client-visible_
+# performance (possibly at the cost of overall performance).
+#
+# Increasing the number of +worker_processes+ may be necessary to
+# improve average client response times because some of your workers
+# will be busy doing GC and unable to service clients.  Think of
+# using more workers with this module as a poor man's concurrent GC.
+#
+# We'll call GC after each request is been written out to the socket, so
+# the client never sees the extra GC hit it.
+#
+# This middleware is _only_ effective for applications that use a lot
+# of memory, and will hurt simpler apps/endpoints that can process
+# multiple requests before incurring GC.
+#
+# This middleware is only designed to work with unicorn, as it harms
+# performance with keepalive-enabled servers.
+#
+# Example (in config.ru):
+#
+#     require 'unicorn/oob_gc'
+#
+#     # GC ever two requests that hit /expensive/foo or /more_expensive/foo
+#     # in your app.  By default, this will GC once every 5 requests
+#     # for all endpoints in your app
+#     use Unicorn::OobGC, 2, %r{\A/(?:expensive/foo|more_expensive/foo)}
+#
+# Feedback from users of early implementations of this module:
+# * http://comments.gmane.org/gmane.comp.lang.ruby.unicorn.general/486
+# * http://article.gmane.org/gmane.comp.lang.ruby.unicorn.general/596
+module Unicorn::OobGC
 
-    def each(&block)
-      body.each(&block)
+  # this pretends to be Rack middleware because it used to be
+  # But we need to hook into unicorn internals so we need to close
+  # the socket before clearing the request env.
+  #
+  # +interval+ is the number of requests matching the +path+ regular
+  # expression before invoking GC.
+  def self.new(app, interval = 5, path = %r{\A/})
+    @@nr = interval
+    self.const_set :OOBGC_PATH, path
+    self.const_set :OOBGC_INTERVAL, interval
+    self.const_set :OOBGC_ENV, Unicorn::HttpRequest::REQ
+    ObjectSpace.each_object(Unicorn::HttpServer) do |s|
+      s.extend(self)
     end
+    app # pretend to be Rack middleware since it was in the past
+  end
 
-    # in Unicorn, this is closed _after_ the client socket
-    def close
-      body.close if body.respond_to?(:close)
-
-      if path =~ env['PATH_INFO'] && ((self.nr -= 1) <= 0)
-        self.nr = interval
-        self.body = nil
-        env.clear
-        GC.start
-      end
+  #:stopdoc:
+  PATH_INFO = "PATH_INFO"
+  def process_client(client)
+    super(client) # Unicorn::HttpServer#process_client
+    if OOBGC_PATH =~ OOBGC_ENV[PATH_INFO] && ((@@nr -= 1) <= 0)
+      @@nr = OOBGC_INTERVAL
+      OOBGC_ENV.clear
+      GC.start
     end
-
   end
+
+  # :startdoc:
 end
diff --git a/t/oob_gc.ru b/t/oob_gc.ru
new file mode 100644
index 0000000..c6035b6
--- /dev/null
+++ b/t/oob_gc.ru
@@ -0,0 +1,21 @@
+#\-E none
+require 'unicorn/oob_gc'
+use Rack::ContentLength
+use Rack::ContentType, "text/plain"
+use Unicorn::OobGC
+$gc_started = false
+
+# Mock GC.start
+def GC.start
+  ObjectSpace.each_object(BasicSocket) do |x|
+    next if Unicorn::HttpServer::LISTENERS.include?(x)
+    x.closed? or abort "not closed #{x}"
+  end
+  $gc_started = true
+end
+run lambda { |env|
+  if "/gc_reset" == env["PATH_INFO"] && "POST" == env["REQUEST_METHOD"]
+    $gc_started = false
+  end
+  [ 200, {}, [ "#$gc_started\n" ] ]
+}
diff --git a/t/oob_gc_path.ru b/t/oob_gc_path.ru
new file mode 100644
index 0000000..e936a85
--- /dev/null
+++ b/t/oob_gc_path.ru
@@ -0,0 +1,21 @@
+#\-E none
+require 'unicorn/oob_gc'
+use Rack::ContentLength
+use Rack::ContentType, "text/plain"
+use Unicorn::OobGC, 5, /BAD/
+$gc_started = false
+
+# Mock GC.start
+def GC.start
+  ObjectSpace.each_object(BasicSocket) do |x|
+    next if Unicorn::HttpServer::LISTENERS.include?(x)
+    x.closed? or abort "not closed #{x}"
+  end
+  $gc_started = true
+end
+run lambda { |env|
+  if "/gc_reset" == env["PATH_INFO"] && "POST" == env["REQUEST_METHOD"]
+    $gc_started = false
+  end
+  [ 200, {}, [ "#$gc_started\n" ] ]
+}
diff --git a/t/t9001-oob_gc.sh b/t/t9001-oob_gc.sh
new file mode 100755
index 0000000..dcd8100
--- /dev/null
+++ b/t/t9001-oob_gc.sh
@@ -0,0 +1,47 @@
+#!/bin/sh
+. ./test-lib.sh
+t_plan 9 "OobGC test"
+
+t_begin "setup and start" && {
+	unicorn_setup
+	unicorn -D -c $unicorn_config oob_gc.ru
+	unicorn_wait_start
+}
+
+t_begin "test default interval (4 requests)" && {
+	test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp)
+	test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp)
+	test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp)
+	test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp)
+}
+
+t_begin "GC starting-request returns immediately" && {
+	test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp)
+}
+
+t_begin "GC is started after 5 requests" && {
+	test xtrue = x$(curl -vsSf http://$listen/ 2>> $tmp)
+}
+
+t_begin "reset GC" && {
+	test xfalse = x$(curl -vsSf -X POST http://$listen/gc_reset 2>> $tmp)
+}
+
+t_begin "test default interval again (3 requests)" && {
+	test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp)
+	test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp)
+	test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp)
+}
+
+t_begin "GC is started after 5 requests" && {
+	test xtrue = x$(curl -vsSf http://$listen/ 2>> $tmp)
+}
+
+t_begin "killing succeeds" && {
+	kill -QUIT $unicorn_pid
+}
+
+t_begin "check_stderr" && check_stderr
+dbgcat r_err
+
+t_done
diff --git a/t/t9002-oob_gc-path.sh b/t/t9002-oob_gc-path.sh
new file mode 100644
index 0000000..d4e795b
--- /dev/null
+++ b/t/t9002-oob_gc-path.sh
@@ -0,0 +1,75 @@
+#!/bin/sh
+. ./test-lib.sh
+t_plan 12 "OobGC test with limited path"
+
+t_begin "setup and start" && {
+	unicorn_setup
+	unicorn -D -c $unicorn_config oob_gc_path.ru
+	unicorn_wait_start
+}
+
+t_begin "test default is noop" && {
+	test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp)
+	test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp)
+	test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp)
+	test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp)
+	test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp)
+	test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp)
+	test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp)
+	test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp)
+	test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp)
+}
+
+t_begin "4 bad requests to bump counter" && {
+	test xfalse = x$(curl -vsSf http://$listen/BAD 2>> $tmp)
+	test xfalse = x$(curl -vsSf http://$listen/BAD 2>> $tmp)
+	test xfalse = x$(curl -vsSf http://$listen/BAD 2>> $tmp)
+	test xfalse = x$(curl -vsSf http://$listen/BAD 2>> $tmp)
+}
+
+t_begin "GC-starting request returns immediately" && {
+	test xfalse = x$(curl -vsSf http://$listen/BAD 2>> $tmp)
+}
+
+t_begin "GC was started after 5 requests" && {
+	test xtrue = x$(curl -vsSf http://$listen/ 2>> $tmp)
+}
+
+t_begin "reset GC" && {
+	test xfalse = x$(curl -vsSf -X POST http://$listen/gc_reset 2>> $tmp)
+}
+
+t_begin "test default is noop" && {
+	test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp)
+	test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp)
+	test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp)
+	test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp)
+	test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp)
+	test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp)
+	test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp)
+	test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp)
+	test xfalse = x$(curl -vsSf http://$listen/ 2>> $tmp)
+}
+
+t_begin "4 bad requests to bump counter" && {
+	test xfalse = x$(curl -vsSf http://$listen/BAD 2>> $tmp)
+	test xfalse = x$(curl -vsSf http://$listen/BAD 2>> $tmp)
+	test xfalse = x$(curl -vsSf http://$listen/BAD 2>> $tmp)
+	test xfalse = x$(curl -vsSf http://$listen/BAD 2>> $tmp)
+}
+
+t_begin "GC-starting request returns immediately" && {
+	test xfalse = x$(curl -vsSf http://$listen/BAD 2>> $tmp)
+}
+
+t_begin "GC was started after 5 requests" && {
+	test xtrue = x$(curl -vsSf http://$listen/ 2>> $tmp)
+}
+
+t_begin "killing succeeds" && {
+	kill -QUIT $unicorn_pid
+}
+
+t_begin "check_stderr" && check_stderr
+
+t_done
-- 
cgit v1.2.3-24-ge0c7