unicorn.git  about / heads / tags
Rack HTTP server for Unix and fast clients
blob a3a1d4de5bb1de4b4b927c3d80cdfe1132f43e94 6240 bytes (raw)
$ git show v0.5.2:lib/unicorn/http_request.rb	# shows this blob on the CLI

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
 
require 'tempfile'
require 'uri'
require 'stringio'

# compiled extension
require 'unicorn/http11'

module Unicorn
  #
  # The HttpRequest.initialize method will convert any request that is larger than
  # Const::MAX_BODY into a Tempfile and use that as the body.  Otherwise it uses 
  # a StringIO object.  To be safe, you should assume it works like a file.
  # 
  class HttpRequest

     # default parameters we merge into the request env for Rack handlers
     DEF_PARAMS = {
       "rack.errors" => $stderr,
       "rack.multiprocess" => true,
       "rack.multithread" => false,
       "rack.run_once" => false,
       "rack.version" => [0, 1],
       "SCRIPT_NAME" => "",

       # this is not in the Rack spec, but some apps may rely on it
       "SERVER_SOFTWARE" => "Unicorn #{Const::UNICORN_VERSION}"
     }.freeze

    def initialize(logger)
      @logger = logger
      @body = nil
      @buffer = ' ' * Const::CHUNK_SIZE # initial size, may grow
      @parser = HttpParser.new
      @params = Hash.new
    end

    def reset
      @parser.reset
      @params.clear
      @body.close rescue nil
      @body.close! rescue nil
      @body = nil
    end

    # Does the majority of the IO processing.  It has been written in
    # Ruby using about 8 different IO processing strategies.
    #
    # It is currently carefully constructed to make sure that it gets
    # the best possible performance for the common case: GET requests
    # that are fully complete after a single read(2)
    #
    # Anyone who thinks they can make it faster is more than welcome to
    # take a crack at it.
    #
    # returns an environment hash suitable for Rack if successful
    # This does minimal exception trapping and it is up to the caller
    # to handle any socket errors (e.g. user aborted upload).
    def read(socket)
      # short circuit the common case with small GET requests first
      @parser.execute(@params, read_socket(socket)) and
          return handle_body(socket)

      data = @buffer.dup # read_socket will clobber @buffer

      # Parser is not done, queue up more data to read and continue parsing
      # an Exception thrown from the @parser will throw us out of the loop
      loop do
        data << read_socket(socket)
        @parser.execute(@params, data) and return handle_body(socket)
      end
      rescue HttpParserError => e
        @logger.error "HTTP parse error, malformed request " \
                      "(#{@params[Const::HTTP_X_FORWARDED_FOR] ||
                          socket.unicorn_peeraddr}): #{e.inspect}"
        @logger.error "REQUEST DATA: #{data.inspect}\n---\n" \
                      "PARAMS: #{@params.inspect}\n---\n"
        raise e
    end

    private

    # Handles dealing with the rest of the request
    # returns a Rack environment if successful, raises an exception if not
    def handle_body(socket)
      http_body = @params.delete(:http_body)
      content_length = @params[Const::CONTENT_LENGTH].to_i
      remain = content_length - http_body.length

      # must read more data to complete body
      if remain < Const::MAX_BODY
        # small body, just use that
        @body = StringIO.new(http_body)
      else # huge body, put it in a tempfile
        @body = Tempfile.new(Const::UNICORN_TMP_BASE)
        @body.binmode
        @body.sync = true
        @body.syswrite(http_body)
      end

      # Some clients (like FF1.0) report 0 for body and then send a body.
      # This will probably truncate them but at least the request goes through
      # usually.
      read_body(socket, remain) if remain > 0
      @body.rewind
      @body.sysseek(0) if @body.respond_to?(:sysseek)

      # in case read_body overread because the client tried to pipeline
      # another request, we'll truncate it.  Again, we don't do pipelining
      # or keepalive
      @body.truncate(content_length)
      rack_env(socket)
    end

    # Returns an environment which is rackable:
    # http://rack.rubyforge.org/doc/files/SPEC.html
    # Based on Rack's old Mongrel handler.
    def rack_env(socket)
      # I'm considering enabling "unicorn.client".  It gives
      # applications some rope to do some "interesting" things like
      # replacing a worker with another process that has full control
      # over the HTTP response.
      # @params["unicorn.client"] = socket

      # From http://www.ietf.org/rfc/rfc3875:
      # "Script authors should be aware that the REMOTE_ADDR and
      #  REMOTE_HOST meta-variables (see sections 4.1.8 and 4.1.9)
      #  may not identify the ultimate source of the request.  They
      #  identify the client for the immediate request to the server;
      #  that client may be a proxy, gateway, or other intermediary
      #  acting on behalf of the actual source client."
      @params[Const::REMOTE_ADDR] = socket.unicorn_peeraddr

      # It might be a dumbass full host request header
      @params[Const::PATH_INFO] = (
          @params[Const::REQUEST_PATH] ||=
              URI.parse(@params[Const::REQUEST_URI]).path) or
         raise "No REQUEST_PATH"

      @params[Const::QUERY_STRING] ||= ''
      @params[Const::RACK_INPUT] = @body
      @params.update(DEF_PARAMS)
    end

    # Does the heavy lifting of properly reading the larger body requests in
    # small chunks.  It expects @body to be an IO object, socket to be valid,
    # It also expects any initial part of the body that has been read to be in
    # the @body already.  It will return true if successful and false if not.
    def read_body(socket, remain)
      while remain > 0
        # writes always write the requested amount on a POSIX filesystem
        remain -= @body.syswrite(read_socket(socket))
      end
    rescue Object => e
      @logger.error "Error reading HTTP body: #{e.inspect}"

      # Any errors means we should delete the file, including if the file
      # is dumped.  Truncate it ASAP to help avoid page flushes to disk.
      @body.truncate(0) rescue nil
      reset
      raise e
    end

    # read(2) on "slow" devices like sockets can be interrupted by signals
    def read_socket(socket)
      begin
        socket.sysread(Const::CHUNK_SIZE, @buffer)
      rescue Errno::EINTR
        retry
      end
    end

  end
end

git clone https://yhbt.net/unicorn.git