From 8ebc0c9fcd941b8e8b31db9237c5d50b082aff0d Mon Sep 17 00:00:00 2001
From: Eric Wong <normalperson@yhbt.net>
Date: Fri, 6 Feb 2009 21:31:04 -0800
Subject: Refactor HTTP Request processing into HttpRequest

Keeping I/O out of unicorn.rb
---
 lib/unicorn.rb              | 86 +++++++---------------------------------
 lib/unicorn/http_request.rb | 95 ++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 95 insertions(+), 86 deletions(-)

(limited to 'lib')

diff --git a/lib/unicorn.rb b/lib/unicorn.rb
index aec69fb..dc0b339 100644
--- a/lib/unicorn.rb
+++ b/lib/unicorn.rb
@@ -63,8 +63,6 @@ module Unicorn
     def initialize(app, options = {})
       @app = app
       @workers = WorkerTable.new
-      @parser = HttpParser.new
-      @params = Hash.new
 
       (DEFAULTS.to_a + options.to_a).each do |key, value|
         instance_variable_set("@#{key.to_s.downcase}", value)
@@ -73,81 +71,25 @@ module Unicorn
       @listeners.map! { |address| Socket.unicorn_server_new(address, 1024) }
     end
 
-    # Does the majority of the IO processing.  It has been written in Ruby using
-    # about 7 different IO processing strategies and no matter how it's done 
-    # the performance just does not improve.  It is currently carefully constructed
-    # to make sure that it gets the best possible performance, but anyone who
-    # thinks they can make it faster is more than welcome to take a crack at it.
     def process_client(client)
+      env = @request.read(client) or return
+      app_response = @app.call(env)
+      HttpResponse.write(client, app_response)
+    rescue EOFError,Errno::ECONNRESET,Errno::EPIPE,Errno::EINVAL,Errno::EBADF
+      client.close rescue nil
+    rescue Object => e
+      logger.error "Read error: #{e.inspect}"
+      logger.error e.backtrace.join("\n")
+    ensure
       begin
-        parser, params = @parser, @params
-        parser.reset
-        params.clear
-        buffer = @request.buffer
-        data = String.new(client.sysread(Const::CHUNK_SIZE, buffer))
-        nparsed = 0
-
-        # Assumption: nparsed will always be less since data will get filled with more
-        # after each parsing.  If it doesn't get more then there was a problem
-        # with the read operation on the client socket.  Effect is to stop processing when the
-        # socket can't fill the buffer for further parsing.
-        while nparsed < data.length
-          nparsed = parser.execute(params, data, nparsed)
-
-          if parser.finished?
-            if !params[Const::REQUEST_PATH]
-              # It might be a dumbass full host request header
-              uri = URI.parse(params[Const::REQUEST_URI])
-              params[Const::REQUEST_PATH] = uri.path
-            end
-
-            raise "No REQUEST PATH" if !params[Const::REQUEST_PATH]
-
-            params[Const::PATH_INFO] = params[Const::REQUEST_PATH]
-            params[Const::SCRIPT_NAME] = ""
-
-            # From http://www.ietf.org/rfc/rfc3875 :
-            # "Script authors should be aware that the REMOTE_ADDR and REMOTE_HOST
-            #  meta-variables (see sections 4.1.8 and 4.1.9) may not identify the
-            #  ultimate source of the request.  They identify the client for the
-            #  immediate request to the server; that client may be a proxy, gateway,
-            #  or other intermediary acting on behalf of the actual source client."
-            params[Const::REMOTE_ADDR] = client.unicorn_peeraddr.last
-
-            env = @request.consume(params, client) or break
-            app_response = @app.call(env)
-            HttpResponse.write(client, app_response)
-            break #done
-          else
-            # Parser is not done, queue up more data to read and continue
-            # parsing
-            data << client.sysread(Const::CHUNK_SIZE, buffer)
-            if data.length >= Const::MAX_HEADER
-              raise HttpParserError.new("HEADER is longer than allowed, aborting client early.")
-            end
-          end
-        end
-      rescue EOFError,Errno::ECONNRESET,Errno::EPIPE,Errno::EINVAL,Errno::EBADF
-        client.close rescue nil
-      rescue HttpParserError => e
-        logger.error "HTTP parse error, malformed request (#{params[Const::HTTP_X_FORWARDED_FOR] || client.unicorn_peeraddr.last}): #{e.inspect}"
-        logger.error "REQUEST DATA: #{data.inspect}\n---\nPARAMS: #{params.inspect}\n---\n"
-      rescue Errno::EMFILE
-        logger.error "too many files"
+        client.close
+      rescue IOError
+        # Already closed
       rescue Object => e
-        logger.error "Read error: #{e.inspect}"
+        logger.error "Client error: #{e.inspect}"
         logger.error e.backtrace.join("\n")
-      ensure
-        begin
-          client.close
-        rescue IOError
-          # Already closed
-        rescue Object => e
-          logger.error "Client error: #{e.inspect}"
-          logger.error e.backtrace.join("\n")
-        end
-        @request.reset
       end
+      @request.reset
     end
 
     # Runs the thing.  Returns a hash keyed by pid with worker number values
diff --git a/lib/unicorn/http_request.rb b/lib/unicorn/http_request.rb
index 6bbf26e..1f95abf 100644
--- a/lib/unicorn/http_request.rb
+++ b/lib/unicorn/http_request.rb
@@ -6,25 +6,86 @@ module Unicorn
   # a StringIO object.  To be safe, you should assume it works like a file.
   # 
   class HttpRequest
-    attr_reader :logger, :buffer
 
     def initialize(logger)
       @logger = logger
       @tempfile = @body = nil
       @buffer = ' ' * Const::CHUNK_SIZE # initial size, may grow
+      @parser = HttpParser.new
+      @params = Hash.new
     end
 
     def reset
+      @parser.reset
+      @params.clear
       @body.truncate(0) rescue nil
       @body.close rescue nil
       @body = nil
     end
 
+    #
+    # Does the majority of the IO processing.  It has been written in
+    # Ruby using about 7 different IO processing strategies and no
+    # matter how it's done the performance just does not improve.  It is
+    # currently carefully constructed to make sure that it gets the best
+    # possible performance, but anyone who thinks they can make it
+    # faster is more than welcome to take a crack at it.
+    #
     # returns an environment hash suitable for Rack if successful
-    # returns nil if the socket closed prematurely (e.g. user aborted upload)
-    def consume(params, socket)
-      http_body = params[Const::HTTP_BODY]
-      content_length = params[Const::CONTENT_LENGTH].to_i
+    # This does minimal exception trapping and it is up to the caller
+    # to handle any socket errors (e.g. user aborted upload).
+    def read(socket)
+      data = String.new(socket.sysread(Const::CHUNK_SIZE, @buffer))
+      nparsed = 0
+
+      # Assumption: nparsed will always be less since data will get
+      # filled with more after each parsing.  If it doesn't get more
+      # then there was a problem with the read operation on the client
+      # socket.  Effect is to stop processing when the socket can't
+      # fill the buffer for further parsing.
+      while nparsed < data.length
+        nparsed = @parser.execute(@params, data, nparsed)
+
+        if @parser.finished?
+          # From http://www.ietf.org/rfc/rfc3875:
+          # "Script authors should be aware that the REMOTE_ADDR and
+          #  REMOTE_HOST meta-variables (see sections 4.1.8 and 4.1.9)
+          #  may not identify the ultimate source of the request.  They
+          #  identify the client for the immediate request to the server;
+          #  that client may be a proxy, gateway, or other intermediary
+          #  acting on behalf of the actual source client."
+          @params[Const::REMOTE_ADDR] = socket.unicorn_peeraddr.last
+
+          handle_body(socket) and return rack_env # success!
+          return nil # fail
+        else
+          # Parser is not done, queue up more data to read and continue
+          # parsing
+          data << socket.sysread(Const::CHUNK_SIZE, @buffer)
+          if data.length >= Const::MAX_HEADER
+            raise HttpParserError.new("HEADER is longer than allowed, " \
+                                      "aborting client early.")
+          end
+        end
+      end
+      nil # XXX bug?
+      rescue HttpParserError => e
+        @logger.error "HTTP parse error, malformed request " \
+                      "(#{@params[Const::HTTP_X_FORWARDED_FOR] ||
+                          socket.unicorn_peeraddr.last}): #{e.inspect}"
+        @logger.error "REQUEST DATA: #{data.inspect}\n---\n" \
+                      "PARAMS: #{@params.inspect}\n---\n"
+        socket.close rescue nil
+        nil
+    end
+
+    private
+
+    # Handles dealing with the rest of the request
+    # returns true if successful, false if not
+    def handle_body(socket)
+      http_body = @params[Const::HTTP_BODY]
+      content_length = @params[Const::CONTENT_LENGTH].to_i
       remain = content_length - http_body.length
 
       # must read more data to complete body
@@ -36,34 +97,40 @@ module Unicorn
         @body = File.open(@tempfile.path, "wb+")
         @body.sync = true
         @body.syswrite(http_body)
-        @body
       end
 
       # Some clients (like FF1.0) report 0 for body and then send a body.
       # This will probably truncate them but at least the request goes through
       # usually.
       if remain > 0
-        read_body(socket, remain) or return nil # fail!
+        read_body(socket, remain) or return false # fail!
       end
       @body.rewind
       @body.sysseek(0) if @body.respond_to?(:sysseek)
-      rack_env(params)
+      true
     end
 
     # Returns an environment which is rackable:
     # http://rack.rubyforge.org/doc/files/SPEC.html
-    # Copied directly from Rack's old Mongrel handler.
-    def rack_env(params)
-      params["QUERY_STRING"] ||= ''
-      params.delete "HTTP_CONTENT_TYPE"
-      params.delete "HTTP_CONTENT_LENGTH"
-      params.update({ "rack.version" => [0,1],
+    # Based on Rack's old Mongrel handler.
+    def rack_env
+      # It might be a dumbass full host request header
+      @params[Const::REQUEST_PATH] ||=
+                           URI.parse(@params[Const::REQUEST_URI]).path
+      raise "No REQUEST PATH" unless @params[Const::REQUEST_PATH]
+
+      @params["QUERY_STRING"] ||= ''
+      @params.delete "HTTP_CONTENT_TYPE"
+      @params.delete "HTTP_CONTENT_LENGTH"
+      @params.update({ "rack.version" => [0,1],
                       "rack.input" => @body,
                       "rack.errors" => STDERR,
                       "rack.multithread" => false,
                       "rack.multiprocess" => true,
                       "rack.run_once" => false,
                       "rack.url_scheme" => "http",
+                      Const::PATH_INFO => @params[Const::REQUEST_PATH],
+                      Const::SCRIPT_NAME => "",
                     })
     end
 
-- 
cgit v1.2.3-24-ge0c7