unicorn.git  about / heads / tags
Rack HTTP server for Unix and fast clients
blob 424a54f1680ea103cdc83c18bf036570ab8d7346 5779 bytes (raw)
$ git show v0.7.1:lib/unicorn/http_request.rb	# shows this blob on the CLI

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
 
require 'tempfile'
require 'stringio'

# compiled extension
require 'unicorn/http11'

module Unicorn
  #
  # The HttpRequest.initialize method will convert any request that is larger than
  # Const::MAX_BODY into a Tempfile and use that as the body.  Otherwise it uses 
  # a StringIO object.  To be safe, you should assume it works like a file.
  # 
  class HttpRequest

     # default parameters we merge into the request env for Rack handlers
     DEF_PARAMS = {
       "rack.errors" => $stderr,
       "rack.multiprocess" => true,
       "rack.multithread" => false,
       "rack.run_once" => false,
       "rack.version" => [1, 0].freeze,
       "SCRIPT_NAME" => "".freeze,

       # this is not in the Rack spec, but some apps may rely on it
       "SERVER_SOFTWARE" => "Unicorn #{Const::UNICORN_VERSION}".freeze
     }.freeze

    LOCALHOST = '127.0.0.1'.freeze

    # Being explicitly single-threaded, we have certain advantages in
    # not having to worry about variables being clobbered :)
    BUFFER = ' ' * Const::CHUNK_SIZE # initial size, may grow
    PARSER = HttpParser.new
    PARAMS = Hash.new

    def initialize(logger)
      @logger = logger
      reset
    end

    def reset
      PARAMS[Const::RACK_INPUT].close rescue nil
      PARAMS[Const::RACK_INPUT].close! rescue nil
      PARSER.reset
      PARAMS.clear
    end

    # Does the majority of the IO processing.  It has been written in
    # Ruby using about 8 different IO processing strategies.
    #
    # It is currently carefully constructed to make sure that it gets
    # the best possible performance for the common case: GET requests
    # that are fully complete after a single read(2)
    #
    # Anyone who thinks they can make it faster is more than welcome to
    # take a crack at it.
    #
    # returns an environment hash suitable for Rack if successful
    # This does minimal exception trapping and it is up to the caller
    # to handle any socket errors (e.g. user aborted upload).
    def read(socket)
      # From http://www.ietf.org/rfc/rfc3875:
      # "Script authors should be aware that the REMOTE_ADDR and
      #  REMOTE_HOST meta-variables (see sections 4.1.8 and 4.1.9)
      #  may not identify the ultimate source of the request.  They
      #  identify the client for the immediate request to the server;
      #  that client may be a proxy, gateway, or other intermediary
      #  acting on behalf of the actual source client."
      PARAMS[Const::REMOTE_ADDR] =
                    TCPSocket === socket ? socket.peeraddr.last : LOCALHOST

      # short circuit the common case with small GET requests first
      PARSER.execute(PARAMS, read_socket(socket)) and
          return handle_body(socket)

      data = BUFFER.dup # read_socket will clobber BUFFER

      # Parser is not done, queue up more data to read and continue parsing
      # an Exception thrown from the PARSER will throw us out of the loop
      begin
        data << read_socket(socket)
        PARSER.execute(PARAMS, data) and return handle_body(socket)
      end while true
      rescue HttpParserError => e
        @logger.error "HTTP parse error, malformed request " \
                      "(#{PARAMS[Const::HTTP_X_FORWARDED_FOR] ||
                          PARAMS[Const::REMOTE_ADDR]}): #{e.inspect}"
        @logger.error "REQUEST DATA: #{data.inspect}\n---\n" \
                      "PARAMS: #{PARAMS.inspect}\n---\n"
        raise e
    end

    private

    # Handles dealing with the rest of the request
    # returns a Rack environment if successful, raises an exception if not
    def handle_body(socket)
      http_body = PARAMS.delete(:http_body)
      content_length = PARAMS[Const::CONTENT_LENGTH].to_i

      if content_length == 0 # short circuit the common case
        PARAMS[Const::RACK_INPUT] = StringIO.new
        return PARAMS.update(DEF_PARAMS)
      end

      # must read more data to complete body
      remain = content_length - http_body.length

      body = PARAMS[Const::RACK_INPUT] = (remain < Const::MAX_BODY) ?
          StringIO.new : Tempfile.new('unicorn')

      body.binmode
      body.sync = true
      body.syswrite(http_body)

      # Some clients (like FF1.0) report 0 for body and then send a body.
      # This will probably truncate them but at least the request goes through
      # usually.
      read_body(socket, remain, body) if remain > 0
      body.rewind
      body.sysseek(0) if body.respond_to?(:sysseek)

      # in case read_body overread because the client tried to pipeline
      # another request, we'll truncate it.  Again, we don't do pipelining
      # or keepalive
      body.truncate(content_length)
      PARAMS.update(DEF_PARAMS)
    end

    # Does the heavy lifting of properly reading the larger body
    # requests in small chunks.  It expects PARAMS['rack.input'] to be
    # an IO object, socket to be valid, It also expects any initial part
    # of the body that has been read to be in the PARAMS['rack.input']
    # already.  It will return true if successful and false if not.
    def read_body(socket, remain, body)
      while remain > 0
        # writes always write the requested amount on a POSIX filesystem
        remain -= body.syswrite(read_socket(socket))
      end
    rescue Object => e
      @logger.error "Error reading HTTP body: #{e.inspect}"

      # Any errors means we should delete the file, including if the file
      # is dumped.  Truncate it ASAP to help avoid page flushes to disk.
      body.truncate(0) rescue nil
      reset
      raise e
    end

    # read(2) on "slow" devices like sockets can be interrupted by signals
    def read_socket(socket)
      begin
        socket.sysread(Const::CHUNK_SIZE, BUFFER)
      rescue Errno::EINTR
        retry
      end
    end

  end
end

git clone https://yhbt.net/unicorn.git