about summary refs log tree commit homepage
path: root/lib/unicorn/http_request.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/unicorn/http_request.rb')
-rw-r--r--lib/unicorn/http_request.rb118
1 files changed, 60 insertions, 58 deletions
diff --git a/lib/unicorn/http_request.rb b/lib/unicorn/http_request.rb
index ee407ab..7106f62 100644
--- a/lib/unicorn/http_request.rb
+++ b/lib/unicorn/http_request.rb
@@ -13,6 +13,20 @@ module Unicorn
   #
   class HttpRequest
 
+     # default parameters we merge into the request env for Rack handlers
+     DEF_PARAMS = {
+       "rack.errors" => $stderr,
+       "rack.multiprocess" => true,
+       "rack.multithread" => false,
+       "rack.run_once" => false,
+       "rack.url_scheme" => "http",
+       "rack.version" => [0, 1],
+       "SCRIPT_NAME" => "",
+
+       # this is not in the Rack spec, but some apps may rely on it
+       "SERVER_SOFTWARE" => "Unicorn #{Const::UNICORN_VERSION}"
+     }.freeze
+
     def initialize(logger)
       @logger = logger
       @body = nil
@@ -29,59 +43,39 @@ module Unicorn
       @body = nil
     end
 
-    #
     # Does the majority of the IO processing.  It has been written in
-    # Ruby using about 7 different IO processing strategies and no
-    # matter how it's done the performance just does not improve.  It is
-    # currently carefully constructed to make sure that it gets the best
-    # possible performance, but anyone who thinks they can make it
-    # faster is more than welcome to take a crack at it.
+    # Ruby using about 8 different IO processing strategies.
+    #
+    # It is currently carefully constructed to make sure that it gets
+    # the best possible performance for the common case: GET requests
+    # that are fully complete after a single read(2)
+    #
+    # Anyone who thinks they can make it faster is more than welcome to
+    # take a crack at it.
     #
     # returns an environment hash suitable for Rack if successful
     # This does minimal exception trapping and it is up to the caller
     # to handle any socket errors (e.g. user aborted upload).
     def read(socket)
-      data = String.new(read_socket(socket))
-      nparsed = 0
-
-      # Assumption: nparsed will always be less since data will get
-      # filled with more after each parsing.  If it doesn't get more
-      # then there was a problem with the read operation on the client
-      # socket.  Effect is to stop processing when the socket can't
-      # fill the buffer for further parsing.
-      while nparsed < data.length
-        nparsed = @parser.execute(@params, data, nparsed)
-
-        if @parser.finished?
-          # From http://www.ietf.org/rfc/rfc3875:
-          # "Script authors should be aware that the REMOTE_ADDR and
-          #  REMOTE_HOST meta-variables (see sections 4.1.8 and 4.1.9)
-          #  may not identify the ultimate source of the request.  They
-          #  identify the client for the immediate request to the server;
-          #  that client may be a proxy, gateway, or other intermediary
-          #  acting on behalf of the actual source client."
-          @params[Const::REMOTE_ADDR] = socket.unicorn_peeraddr
-
-          handle_body(socket) and return rack_env # success!
-          return nil # fail
-        else
-          # Parser is not done, queue up more data to read and continue
-          # parsing
-          data << read_socket(socket)
-          if data.length >= Const::MAX_HEADER
-            raise HttpParserError.new("HEADER is longer than allowed, " \
-                                      "aborting client early.")
-          end
-        end
+      # short circuit the common case with small GET requests first
+      @parser.execute(@params, read_socket(socket)) and
+          return handle_body(socket)
+
+      data = @buffer.dup # read_socket will clobber @buffer
+
+      # Parser is not done, queue up more data to read and continue parsing
+      # an Exception thrown from the @parser will throw us out of the loop
+      loop do
+        data << read_socket(socket)
+        @parser.execute(@params, data) and
+            return handle_body(socket)
       end
-      nil # XXX bug?
       rescue HttpParserError => e
         @logger.error "HTTP parse error, malformed request " \
                       "(#{@params[Const::HTTP_X_FORWARDED_FOR] ||
                           socket.unicorn_peeraddr}): #{e.inspect}"
         @logger.error "REQUEST DATA: #{data.inspect}\n---\n" \
                       "PARAMS: #{@params.inspect}\n---\n"
-        socket.closed? or socket.close rescue nil
         nil
     end
 
@@ -109,7 +103,7 @@ module Unicorn
       # This will probably truncate them but at least the request goes through
       # usually.
       if remain > 0
-        read_body(socket, remain) or return false # fail!
+        read_body(socket, remain) or return nil # fail!
       end
       @body.rewind
       @body.sysseek(0) if @body.respond_to?(:sysseek)
@@ -118,29 +112,37 @@ module Unicorn
       # another request, we'll truncate it.  Again, we don't do pipelining
       # or keepalive
       @body.truncate(content_length)
-      true
+      rack_env(socket)
     end
 
     # Returns an environment which is rackable:
     # http://rack.rubyforge.org/doc/files/SPEC.html
     # Based on Rack's old Mongrel handler.
-    def rack_env
+    def rack_env(socket)
+      # I'm considering enabling "unicorn.client".  It gives
+      # applications some rope to do some "interesting" things like
+      # replacing a worker with another process that has full control
+      # over the HTTP response.
+      # @params["unicorn.client"] = socket
+
+      # From http://www.ietf.org/rfc/rfc3875:
+      # "Script authors should be aware that the REMOTE_ADDR and
+      #  REMOTE_HOST meta-variables (see sections 4.1.8 and 4.1.9)
+      #  may not identify the ultimate source of the request.  They
+      #  identify the client for the immediate request to the server;
+      #  that client may be a proxy, gateway, or other intermediary
+      #  acting on behalf of the actual source client."
+      @params[Const::REMOTE_ADDR] = socket.unicorn_peeraddr
+
       # It might be a dumbass full host request header
-      @params[Const::REQUEST_PATH] ||=
-                           URI.parse(@params[Const::REQUEST_URI]).path
-      raise "No REQUEST PATH" unless @params[Const::REQUEST_PATH]
-
-      @params["QUERY_STRING"] ||= ''
-      @params.update({ "rack.version" => [0,1],
-                      "rack.input" => @body,
-                      "rack.errors" => $stderr,
-                      "rack.multithread" => false,
-                      "rack.multiprocess" => true,
-                      "rack.run_once" => false,
-                      "rack.url_scheme" => "http",
-                      Const::PATH_INFO => @params[Const::REQUEST_PATH],
-                      Const::SCRIPT_NAME => "",
-                    })
+      @params[Const::PATH_INFO] = (
+          @params[Const::REQUEST_PATH] ||=
+              URI.parse(@params[Const::REQUEST_URI]).path) or
+         raise "No REQUEST_PATH"
+
+      @params[Const::QUERY_STRING] ||= ''
+      @params[Const::RACK_INPUT] = @body
+      @params.update(DEF_PARAMS)
     end
 
     # Does the heavy lifting of properly reading the larger body requests in