1 files changed, 97 insertions, 123 deletions
diff --git a/lib/unicorn/http_request.rb b/lib/unicorn/http_request.rb
index ce0e408..368305f 100644
--- a/lib/unicorn/http_request.rb
+++ b/lib/unicorn/http_request.rb
@@ -1,5 +1,4 @@
  require 'tempfile'
-require 'uri'
  require 'stringio'
  
  # compiled extension
@@ -13,165 +12,140 @@ module Unicorn
    # 
    class HttpRequest
  
+    # default parameters we merge into the request env for Rack handlers
+    DEFAULTS = {
+      "rack.errors" => $stderr,
+      "rack.multiprocess" => true,
+      "rack.multithread" => false,
+      "rack.run_once" => false,
+      "rack.version" => [1, 0].freeze,
+      "SCRIPT_NAME" => "".freeze,
+
+      # this is not in the Rack spec, but some apps may rely on it
+      "SERVER_SOFTWARE" => "Unicorn #{Const::UNICORN_VERSION}".freeze
+    }
+
+    # Optimize for the common case where there's no request body
+    # (GET/HEAD) requests.
+    NULL_IO = StringIO.new
+    LOCALHOST = '127.0.0.1'.freeze
+
+    # Being explicitly single-threaded, we have certain advantages in
+    # not having to worry about variables being clobbered :)
+    BUFFER = ' ' * Const::CHUNK_SIZE # initial size, may grow
+    PARSER = HttpParser.new
+    PARAMS = Hash.new
+
      def initialize(logger)
        @logger = logger
-      @body = nil
-      @buffer = ' ' * Const::CHUNK_SIZE # initial size, may grow
-      @parser = HttpParser.new
-      @params = Hash.new
-    end
-
-    def reset
-      @parser.reset
-      @params.clear
-      @body.close rescue nil
-      @body = nil
      end
  
-    #
      # Does the majority of the IO processing.  It has been written in
-    # Ruby using about 7 different IO processing strategies and no
-    # matter how it's done the performance just does not improve.  It is
-    # currently carefully constructed to make sure that it gets the best
-    # possible performance, but anyone who thinks they can make it
-    # faster is more than welcome to take a crack at it.
+    # Ruby using about 8 different IO processing strategies.
+    #
+    # It is currently carefully constructed to make sure that it gets
+    # the best possible performance for the common case: GET requests
+    # that are fully complete after a single read(2)
+    #
+    # Anyone who thinks they can make it faster is more than welcome to
+    # take a crack at it.
      #
      # returns an environment hash suitable for Rack if successful
      # This does minimal exception trapping and it is up to the caller
      # to handle any socket errors (e.g. user aborted upload).
      def read(socket)
-      data = String.new(read_socket(socket))
-      nparsed = 0
-
-      # Assumption: nparsed will always be less since data will get
-      # filled with more after each parsing.  If it doesn't get more
-      # then there was a problem with the read operation on the client
-      # socket.  Effect is to stop processing when the socket can't
-      # fill the buffer for further parsing.
-      while nparsed < data.length
-        nparsed = @parser.execute(@params, data, nparsed)
-
-        if @parser.finished?
-          # From http://www.ietf.org/rfc/rfc3875:
-          # "Script authors should be aware that the REMOTE_ADDR and
-          #  REMOTE_HOST meta-variables (see sections 4.1.8 and 4.1.9)
-          #  may not identify the ultimate source of the request.  They
-          #  identify the client for the immediate request to the server;
-          #  that client may be a proxy, gateway, or other intermediary
-          #  acting on behalf of the actual source client."
-          @params[Const::REMOTE_ADDR] = socket.unicorn_peeraddr
-
-          handle_body(socket) and return rack_env # success!
-          return nil # fail
-        else
-          # Parser is not done, queue up more data to read and continue
-          # parsing
-          data << read_socket(socket)
-          if data.length >= Const::MAX_HEADER
-            raise HttpParserError.new("HEADER is longer than allowed, " \
-                                      "aborting client early.")
-          end
-        end
+      # reset the parser
+      unless NULL_IO == (input = PARAMS[Const::RACK_INPUT]) # unlikely
+        input.close rescue nil
+        input.close! rescue nil
        end
-      nil # XXX bug?
+      PARAMS.clear
+      PARSER.reset
+
+      # From http://www.ietf.org/rfc/rfc3875:
+      # "Script authors should be aware that the REMOTE_ADDR and
+      #  REMOTE_HOST meta-variables (see sections 4.1.8 and 4.1.9)
+      #  may not identify the ultimate source of the request.  They
+      #  identify the client for the immediate request to the server;
+      #  that client may be a proxy, gateway, or other intermediary
+      #  acting on behalf of the actual source client."
+      PARAMS[Const::REMOTE_ADDR] =
+                    TCPSocket === socket ? socket.peeraddr.last : LOCALHOST
+
+      # short circuit the common case with small GET requests first
+      PARSER.execute(PARAMS, socket.readpartial(Const::CHUNK_SIZE, BUFFER)) and
+          return handle_body(socket)
+
+      data = BUFFER.dup # socket.readpartial will clobber BUFFER
+
+      # Parser is not done, queue up more data to read and continue parsing
+      # an Exception thrown from the PARSER will throw us out of the loop
+      begin
+        data << socket.readpartial(Const::CHUNK_SIZE, BUFFER)
+        PARSER.execute(PARAMS, data) and return handle_body(socket)
+      end while true
        rescue HttpParserError => e
          @logger.error "HTTP parse error, malformed request " \
-                      "(#{@params[Const::HTTP_X_FORWARDED_FOR] ||
-                          socket.unicorn_peeraddr}): #{e.inspect}"
+                      "(#{PARAMS[Const::HTTP_X_FORWARDED_FOR] ||
+                          PARAMS[Const::REMOTE_ADDR]}): #{e.inspect}"
          @logger.error "REQUEST DATA: #{data.inspect}\n---\n" \
-                      "PARAMS: #{@params.inspect}\n---\n"
-        socket.closed? or socket.close rescue nil
-        nil
+                      "PARAMS: #{PARAMS.inspect}\n---\n"
+        raise e
      end
  
      private
  
      # Handles dealing with the rest of the request
-    # returns true if successful, false if not
+    # returns a Rack environment if successful, raises an exception if not
      def handle_body(socket)
-      http_body = @params[Const::HTTP_BODY]
-      content_length = @params[Const::CONTENT_LENGTH].to_i
-      remain = content_length - http_body.length
+      http_body = PARAMS.delete(:http_body)
+      content_length = PARAMS[Const::CONTENT_LENGTH].to_i
  
-      # must read more data to complete body
-      if remain < Const::MAX_BODY
-        # small body, just use that
-        @body = StringIO.new(http_body)
-      else # huge body, put it in a tempfile
-        @body = Tempfile.new(Const::UNICORN_TMP_BASE)
-        @body.binmode
-        @body.sync = true
-        @body.syswrite(http_body)
+      if content_length == 0 # short circuit the common case
+        PARAMS[Const::RACK_INPUT] = NULL_IO.closed? ? NULL_IO.reopen : NULL_IO
+        return PARAMS.update(DEFAULTS)
        end
  
+      # must read more data to complete body
+      remain = content_length - http_body.length
+
+      body = PARAMS[Const::RACK_INPUT] = (remain < Const::MAX_BODY) ?
+          StringIO.new : Tempfile.new('unicorn')
+
+      body.binmode
+      body.write(http_body)
+
        # Some clients (like FF1.0) report 0 for body and then send a body.
        # This will probably truncate them but at least the request goes through
        # usually.
-      if remain > 0
-        read_body(socket, remain) or return false # fail!
-      end
-      @body.rewind
-      @body.sysseek(0) if @body.respond_to?(:sysseek)
+      read_body(socket, remain, body) if remain > 0
+      body.rewind
  
        # in case read_body overread because the client tried to pipeline
        # another request, we'll truncate it.  Again, we don't do pipelining
        # or keepalive
-      @body.truncate(content_length)
-      true
+      body.truncate(content_length)
+      PARAMS.update(DEFAULTS)
      end
  
-    # Returns an environment which is rackable:
-    # http://rack.rubyforge.org/doc/files/SPEC.html
-    # Based on Rack's old Mongrel handler.
-    def rack_env
-      # It might be a dumbass full host request header
-      @params[Const::REQUEST_PATH] ||=
-                           URI.parse(@params[Const::REQUEST_URI]).path
-      raise "No REQUEST PATH" unless @params[Const::REQUEST_PATH]
-
-      @params["QUERY_STRING"] ||= ''
-      @params.delete "HTTP_CONTENT_TYPE"
-      @params.delete "HTTP_CONTENT_LENGTH"
-      @params.update({ "rack.version" => [0,1],
-                      "rack.input" => @body,
-                      "rack.errors" => $stderr,
-                      "rack.multithread" => false,
-                      "rack.multiprocess" => true,
-                      "rack.run_once" => false,
-                      "rack.url_scheme" => "http",
-                      Const::PATH_INFO => @params[Const::REQUEST_PATH],
-                      Const::SCRIPT_NAME => "",
-                    })
-    end
-
-    # Does the heavy lifting of properly reading the larger body requests in
-    # small chunks.  It expects @body to be an IO object, socket to be valid,
-    # It also expects any initial part of the body that has been read to be in
-    # the @body already.  It will return true if successful and false if not.
-    def read_body(socket, remain)
-      while remain > 0
-        # writes always write the requested amount on a POSIX filesystem
-        remain -= @body.syswrite(read_socket(socket))
-      end
-      true # success!
+    # Does the heavy lifting of properly reading the larger body
+    # requests in small chunks.  It expects PARAMS['rack.input'] to be
+    # an IO object, socket to be valid, It also expects any initial part
+    # of the body that has been read to be in the PARAMS['rack.input']
+    # already.  It will return true if successful and false if not.
+    def read_body(socket, remain, body)
+      begin
+        # write always writes the requested amount on a POSIX filesystem
+        remain -= body.write(socket.readpartial(Const::CHUNK_SIZE, BUFFER))
+      end while remain > 0
      rescue Object => e
-      logger.error "Error reading HTTP body: #{e.inspect}"
-      socket.closed? or socket.close rescue nil
+      @logger.error "Error reading HTTP body: #{e.inspect}"
  
        # Any errors means we should delete the file, including if the file
        # is dumped.  Truncate it ASAP to help avoid page flushes to disk.
-      @body.truncate(0) rescue nil
+      body.truncate(0) rescue nil
        reset
-      false
-    end
-
-    # read(2) on "slow" devices like sockets can be interrupted by signals
-    def read_socket(socket)
-      begin
-        socket.sysread(Const::CHUNK_SIZE, @buffer)
-      rescue Errno::EINTR
-        retry
-      end
+      raise e
      end
  
    end