diff options
Diffstat (limited to 'lib/unicorn/http_request.rb')
-rw-r--r-- | lib/unicorn/http_request.rb | 220 |
1 files changed, 97 insertions, 123 deletions
diff --git a/lib/unicorn/http_request.rb b/lib/unicorn/http_request.rb index ce0e408..368305f 100644 --- a/lib/unicorn/http_request.rb +++ b/lib/unicorn/http_request.rb @@ -1,5 +1,4 @@ require 'tempfile' -require 'uri' require 'stringio' # compiled extension @@ -13,165 +12,140 @@ module Unicorn # class HttpRequest + # default parameters we merge into the request env for Rack handlers + DEFAULTS = { + "rack.errors" => $stderr, + "rack.multiprocess" => true, + "rack.multithread" => false, + "rack.run_once" => false, + "rack.version" => [1, 0].freeze, + "SCRIPT_NAME" => "".freeze, + + # this is not in the Rack spec, but some apps may rely on it + "SERVER_SOFTWARE" => "Unicorn #{Const::UNICORN_VERSION}".freeze + } + + # Optimize for the common case where there's no request body + # (GET/HEAD) requests. + NULL_IO = StringIO.new + LOCALHOST = '127.0.0.1'.freeze + + # Being explicitly single-threaded, we have certain advantages in + # not having to worry about variables being clobbered :) + BUFFER = ' ' * Const::CHUNK_SIZE # initial size, may grow + PARSER = HttpParser.new + PARAMS = Hash.new + def initialize(logger) @logger = logger - @body = nil - @buffer = ' ' * Const::CHUNK_SIZE # initial size, may grow - @parser = HttpParser.new - @params = Hash.new - end - - def reset - @parser.reset - @params.clear - @body.close rescue nil - @body = nil end - # # Does the majority of the IO processing. It has been written in - # Ruby using about 7 different IO processing strategies and no - # matter how it's done the performance just does not improve. It is - # currently carefully constructed to make sure that it gets the best - # possible performance, but anyone who thinks they can make it - # faster is more than welcome to take a crack at it. + # Ruby using about 8 different IO processing strategies. + # + # It is currently carefully constructed to make sure that it gets + # the best possible performance for the common case: GET requests + # that are fully complete after a single read(2) + # + # Anyone who thinks they can make it faster is more than welcome to + # take a crack at it. # # returns an environment hash suitable for Rack if successful # This does minimal exception trapping and it is up to the caller # to handle any socket errors (e.g. user aborted upload). def read(socket) - data = String.new(read_socket(socket)) - nparsed = 0 - - # Assumption: nparsed will always be less since data will get - # filled with more after each parsing. If it doesn't get more - # then there was a problem with the read operation on the client - # socket. Effect is to stop processing when the socket can't - # fill the buffer for further parsing. - while nparsed < data.length - nparsed = @parser.execute(@params, data, nparsed) - - if @parser.finished? - # From http://www.ietf.org/rfc/rfc3875: - # "Script authors should be aware that the REMOTE_ADDR and - # REMOTE_HOST meta-variables (see sections 4.1.8 and 4.1.9) - # may not identify the ultimate source of the request. They - # identify the client for the immediate request to the server; - # that client may be a proxy, gateway, or other intermediary - # acting on behalf of the actual source client." - @params[Const::REMOTE_ADDR] = socket.unicorn_peeraddr - - handle_body(socket) and return rack_env # success! - return nil # fail - else - # Parser is not done, queue up more data to read and continue - # parsing - data << read_socket(socket) - if data.length >= Const::MAX_HEADER - raise HttpParserError.new("HEADER is longer than allowed, " \ - "aborting client early.") - end - end + # reset the parser + unless NULL_IO == (input = PARAMS[Const::RACK_INPUT]) # unlikely + input.close rescue nil + input.close! rescue nil end - nil # XXX bug? + PARAMS.clear + PARSER.reset + + # From http://www.ietf.org/rfc/rfc3875: + # "Script authors should be aware that the REMOTE_ADDR and + # REMOTE_HOST meta-variables (see sections 4.1.8 and 4.1.9) + # may not identify the ultimate source of the request. They + # identify the client for the immediate request to the server; + # that client may be a proxy, gateway, or other intermediary + # acting on behalf of the actual source client." + PARAMS[Const::REMOTE_ADDR] = + TCPSocket === socket ? socket.peeraddr.last : LOCALHOST + + # short circuit the common case with small GET requests first + PARSER.execute(PARAMS, socket.readpartial(Const::CHUNK_SIZE, BUFFER)) and + return handle_body(socket) + + data = BUFFER.dup # socket.readpartial will clobber BUFFER + + # Parser is not done, queue up more data to read and continue parsing + # an Exception thrown from the PARSER will throw us out of the loop + begin + data << socket.readpartial(Const::CHUNK_SIZE, BUFFER) + PARSER.execute(PARAMS, data) and return handle_body(socket) + end while true rescue HttpParserError => e @logger.error "HTTP parse error, malformed request " \ - "(#{@params[Const::HTTP_X_FORWARDED_FOR] || - socket.unicorn_peeraddr}): #{e.inspect}" + "(#{PARAMS[Const::HTTP_X_FORWARDED_FOR] || + PARAMS[Const::REMOTE_ADDR]}): #{e.inspect}" @logger.error "REQUEST DATA: #{data.inspect}\n---\n" \ - "PARAMS: #{@params.inspect}\n---\n" - socket.closed? or socket.close rescue nil - nil + "PARAMS: #{PARAMS.inspect}\n---\n" + raise e end private # Handles dealing with the rest of the request - # returns true if successful, false if not + # returns a Rack environment if successful, raises an exception if not def handle_body(socket) - http_body = @params[Const::HTTP_BODY] - content_length = @params[Const::CONTENT_LENGTH].to_i - remain = content_length - http_body.length + http_body = PARAMS.delete(:http_body) + content_length = PARAMS[Const::CONTENT_LENGTH].to_i - # must read more data to complete body - if remain < Const::MAX_BODY - # small body, just use that - @body = StringIO.new(http_body) - else # huge body, put it in a tempfile - @body = Tempfile.new(Const::UNICORN_TMP_BASE) - @body.binmode - @body.sync = true - @body.syswrite(http_body) + if content_length == 0 # short circuit the common case + PARAMS[Const::RACK_INPUT] = NULL_IO.closed? ? NULL_IO.reopen : NULL_IO + return PARAMS.update(DEFAULTS) end + # must read more data to complete body + remain = content_length - http_body.length + + body = PARAMS[Const::RACK_INPUT] = (remain < Const::MAX_BODY) ? + StringIO.new : Tempfile.new('unicorn') + + body.binmode + body.write(http_body) + # Some clients (like FF1.0) report 0 for body and then send a body. # This will probably truncate them but at least the request goes through # usually. - if remain > 0 - read_body(socket, remain) or return false # fail! - end - @body.rewind - @body.sysseek(0) if @body.respond_to?(:sysseek) + read_body(socket, remain, body) if remain > 0 + body.rewind # in case read_body overread because the client tried to pipeline # another request, we'll truncate it. Again, we don't do pipelining # or keepalive - @body.truncate(content_length) - true + body.truncate(content_length) + PARAMS.update(DEFAULTS) end - # Returns an environment which is rackable: - # http://rack.rubyforge.org/doc/files/SPEC.html - # Based on Rack's old Mongrel handler. - def rack_env - # It might be a dumbass full host request header - @params[Const::REQUEST_PATH] ||= - URI.parse(@params[Const::REQUEST_URI]).path - raise "No REQUEST PATH" unless @params[Const::REQUEST_PATH] - - @params["QUERY_STRING"] ||= '' - @params.delete "HTTP_CONTENT_TYPE" - @params.delete "HTTP_CONTENT_LENGTH" - @params.update({ "rack.version" => [0,1], - "rack.input" => @body, - "rack.errors" => $stderr, - "rack.multithread" => false, - "rack.multiprocess" => true, - "rack.run_once" => false, - "rack.url_scheme" => "http", - Const::PATH_INFO => @params[Const::REQUEST_PATH], - Const::SCRIPT_NAME => "", - }) - end - - # Does the heavy lifting of properly reading the larger body requests in - # small chunks. It expects @body to be an IO object, socket to be valid, - # It also expects any initial part of the body that has been read to be in - # the @body already. It will return true if successful and false if not. - def read_body(socket, remain) - while remain > 0 - # writes always write the requested amount on a POSIX filesystem - remain -= @body.syswrite(read_socket(socket)) - end - true # success! + # Does the heavy lifting of properly reading the larger body + # requests in small chunks. It expects PARAMS['rack.input'] to be + # an IO object, socket to be valid, It also expects any initial part + # of the body that has been read to be in the PARAMS['rack.input'] + # already. It will return true if successful and false if not. + def read_body(socket, remain, body) + begin + # write always writes the requested amount on a POSIX filesystem + remain -= body.write(socket.readpartial(Const::CHUNK_SIZE, BUFFER)) + end while remain > 0 rescue Object => e - logger.error "Error reading HTTP body: #{e.inspect}" - socket.closed? or socket.close rescue nil + @logger.error "Error reading HTTP body: #{e.inspect}" # Any errors means we should delete the file, including if the file # is dumped. Truncate it ASAP to help avoid page flushes to disk. - @body.truncate(0) rescue nil + body.truncate(0) rescue nil reset - false - end - - # read(2) on "slow" devices like sockets can be interrupted by signals - def read_socket(socket) - begin - socket.sysread(Const::CHUNK_SIZE, @buffer) - rescue Errno::EINTR - retry - end + raise e end end |