diff options
Diffstat (limited to 'lib/mongrel')
-rw-r--r-- | lib/mongrel/const.rb | 110 | ||||
-rw-r--r-- | lib/mongrel/header_out.rb | 28 | ||||
-rw-r--r-- | lib/mongrel/http_request.rb | 155 | ||||
-rw-r--r-- | lib/mongrel/http_response.rb | 163 | ||||
-rw-r--r-- | lib/mongrel/uri_classifier.rb | 76 |
5 files changed, 532 insertions, 0 deletions
diff --git a/lib/mongrel/const.rb b/lib/mongrel/const.rb new file mode 100644 index 0000000..282763b --- /dev/null +++ b/lib/mongrel/const.rb @@ -0,0 +1,110 @@ + +module Mongrel + + # Every standard HTTP code mapped to the appropriate message. These are + # used so frequently that they are placed directly in Mongrel for easy + # access rather than Mongrel::Const itself. + HTTP_STATUS_CODES = { + 100 => 'Continue', + 101 => 'Switching Protocols', + 200 => 'OK', + 201 => 'Created', + 202 => 'Accepted', + 203 => 'Non-Authoritative Information', + 204 => 'No Content', + 205 => 'Reset Content', + 206 => 'Partial Content', + 300 => 'Multiple Choices', + 301 => 'Moved Permanently', + 302 => 'Moved Temporarily', + 303 => 'See Other', + 304 => 'Not Modified', + 305 => 'Use Proxy', + 400 => 'Bad Request', + 401 => 'Unauthorized', + 402 => 'Payment Required', + 403 => 'Forbidden', + 404 => 'Not Found', + 405 => 'Method Not Allowed', + 406 => 'Not Acceptable', + 407 => 'Proxy Authentication Required', + 408 => 'Request Time-out', + 409 => 'Conflict', + 410 => 'Gone', + 411 => 'Length Required', + 412 => 'Precondition Failed', + 413 => 'Request Entity Too Large', + 414 => 'Request-URI Too Large', + 415 => 'Unsupported Media Type', + 500 => 'Internal Server Error', + 501 => 'Not Implemented', + 502 => 'Bad Gateway', + 503 => 'Service Unavailable', + 504 => 'Gateway Time-out', + 505 => 'HTTP Version not supported' + } + + # Frequently used constants when constructing requests or responses. Many times + # the constant just refers to a string with the same contents. Using these constants + # gave about a 3% to 10% performance improvement over using the strings directly. + # Symbols did not really improve things much compared to constants. + # + # While Mongrel does try to emulate the CGI/1.2 protocol, it does not use the REMOTE_IDENT, + # REMOTE_USER, or REMOTE_HOST parameters since those are either a security problem or + # too taxing on performance. + module Const + DATE = "Date".freeze + + # This is the part of the path after the SCRIPT_NAME. URIClassifier will determine this. + PATH_INFO="PATH_INFO".freeze + + # This is the initial part that your handler is identified as by URIClassifier. + SCRIPT_NAME="SCRIPT_NAME".freeze + + # The original URI requested by the client. Passed to URIClassifier to build PATH_INFO and SCRIPT_NAME. + REQUEST_URI='REQUEST_URI'.freeze + REQUEST_PATH='REQUEST_PATH'.freeze + + MONGREL_VERSION="1.0.2".freeze + + MONGREL_TMP_BASE="mongrel".freeze + + # The standard empty 404 response for bad requests. Use Error4040Handler for custom stuff. + ERROR_404_RESPONSE="HTTP/1.1 404 Not Found\r\nConnection: close\r\nServer: Mongrel #{MONGREL_VERSION}\r\n\r\nNOT FOUND".freeze + + CONTENT_LENGTH="CONTENT_LENGTH".freeze + + # A common header for indicating the server is too busy. Not used yet. + ERROR_503_RESPONSE="HTTP/1.1 503 Service Unavailable\r\n\r\nBUSY".freeze + + # The basic max request size we'll try to read. + CHUNK_SIZE=(16 * 1024) + + # This is the maximum header that is allowed before a client is booted. The parser detects + # this, but we'd also like to do this as well. + MAX_HEADER=1024 * (80 + 32) + + # Maximum request body size before it is moved out of memory and into a tempfile for reading. + MAX_BODY=MAX_HEADER + + # A frozen format for this is about 15% faster + STATUS_FORMAT = "HTTP/1.1 %d %s\r\nConnection: close\r\n".freeze + CONTENT_TYPE = "Content-Type".freeze + LAST_MODIFIED = "Last-Modified".freeze + ETAG = "ETag".freeze + SLASH = "/".freeze + REQUEST_METHOD="REQUEST_METHOD".freeze + GET="GET".freeze + HEAD="HEAD".freeze + # ETag is based on the apache standard of hex mtime-size-inode (inode is 0 on win32) + ETAG_FORMAT="\"%x-%x-%x\"".freeze + HEADER_FORMAT="%s: %s\r\n".freeze + LINE_END="\r\n".freeze + REMOTE_ADDR="REMOTE_ADDR".freeze + HTTP_X_FORWARDED_FOR="HTTP_X_FORWARDED_FOR".freeze + HTTP_IF_MODIFIED_SINCE="HTTP_IF_MODIFIED_SINCE".freeze + HTTP_IF_NONE_MATCH="HTTP_IF_NONE_MATCH".freeze + REDIRECT = "HTTP/1.1 302 Found\r\nLocation: %s\r\nConnection: close\r\n\r\n".freeze + HOST = "HOST".freeze + end +end
\ No newline at end of file diff --git a/lib/mongrel/header_out.rb b/lib/mongrel/header_out.rb new file mode 100644 index 0000000..b34e95e --- /dev/null +++ b/lib/mongrel/header_out.rb @@ -0,0 +1,28 @@ +module Mongrel + # This class implements a simple way of constructing the HTTP headers dynamically + # via a Hash syntax. Think of it as a write-only Hash. Refer to HttpResponse for + # information on how this is used. + # + # One consequence of this write-only nature is that you can write multiple headers + # by just doing them twice (which is sometimes needed in HTTP), but that the normal + # semantics for Hash (where doing an insert replaces) is not there. + class HeaderOut + attr_reader :out + attr_accessor :allowed_duplicates + + def initialize(out) + @sent = {} + @allowed_duplicates = {"Set-Cookie" => true, "Set-Cookie2" => true, + "Warning" => true, "WWW-Authenticate" => true} + @out = out + end + + # Simply writes "#{key}: #{value}" to an output buffer. + def[]=(key,value) + if not @sent.has_key?(key) or @allowed_duplicates.has_key?(key) + @sent[key] = true + @out.write(Const::HEADER_FORMAT % [key, value]) + end + end + end +end
\ No newline at end of file diff --git a/lib/mongrel/http_request.rb b/lib/mongrel/http_request.rb new file mode 100644 index 0000000..82ffe42 --- /dev/null +++ b/lib/mongrel/http_request.rb @@ -0,0 +1,155 @@ + +module Mongrel + # + # When a handler is found for a registered URI then this class is constructed + # and passed to your HttpHandler::process method. You should assume that + # *one* handler processes all requests. Included in the HttpRequest is a + # HttpRequest.params Hash that matches common CGI params, and a HttpRequest.body + # which is a string containing the request body (raw for now). + # + # The HttpRequest.initialize method will convert any request that is larger than + # Const::MAX_BODY into a Tempfile and use that as the body. Otherwise it uses + # a StringIO object. To be safe, you should assume it works like a file. + # + # The HttpHandler.request_notify system is implemented by having HttpRequest call + # HttpHandler.request_begins, HttpHandler.request_progress, HttpHandler.process during + # the IO processing. This adds a small amount of overhead but lets you implement + # finer controlled handlers and filters. + # + class HttpRequest + attr_reader :body, :params + + # You don't really call this. It's made for you. + # Main thing it does is hook up the params, and store any remaining + # body data into the HttpRequest.body attribute. + def initialize(params, socket, dispatchers) + @params = params + @socket = socket + @dispatchers = dispatchers + content_length = @params[Const::CONTENT_LENGTH].to_i + remain = content_length - @params.http_body.length + + # tell all dispatchers the request has begun + @dispatchers.each do |dispatcher| + dispatcher.request_begins(@params) + end unless @dispatchers.nil? || @dispatchers.empty? + + # Some clients (like FF1.0) report 0 for body and then send a body. This will probably truncate them but at least the request goes through usually. + if remain <= 0 + # we've got everything, pack it up + @body = StringIO.new + @body.write @params.http_body + update_request_progress(0, content_length) + elsif remain > 0 + # must read more data to complete body + if remain > Const::MAX_BODY + # huge body, put it in a tempfile + @body = Tempfile.new(Const::MONGREL_TMP_BASE) + @body.binmode + else + # small body, just use that + @body = StringIO.new + end + + @body.write @params.http_body + read_body(remain, content_length) + end + + @body.rewind if @body + end + + # updates all dispatchers about our progress + def update_request_progress(clen, total) + return if @dispatchers.nil? || @dispatchers.empty? + @dispatchers.each do |dispatcher| + dispatcher.request_progress(@params, clen, total) + end + end + private :update_request_progress + + # Does the heavy lifting of properly reading the larger body requests in + # small chunks. It expects @body to be an IO object, @socket to be valid, + # and will set @body = nil if the request fails. It also expects any initial + # part of the body that has been read to be in the @body already. + def read_body(remain, total) + begin + # write the odd sized chunk first + @params.http_body = read_socket(remain % Const::CHUNK_SIZE) + + remain -= @body.write(@params.http_body) + + update_request_progress(remain, total) + + # then stream out nothing but perfectly sized chunks + until remain <= 0 or @socket.closed? + # ASSUME: we are writing to a disk and these writes always write the requested amount + @params.http_body = read_socket(Const::CHUNK_SIZE) + remain -= @body.write(@params.http_body) + + update_request_progress(remain, total) + end + rescue Object => e + STDERR.puts "#{Time.now}: Error reading HTTP body: #{e.inspect}" + STDERR.puts e.backtrace.join("\n") + # any errors means we should delete the file, including if the file is dumped + @socket.close rescue nil + @body.delete if @body.class == Tempfile + @body = nil # signals that there was a problem + end + end + + def read_socket(len) + if !@socket.closed? + data = @socket.read(len) + if !data + raise "Socket read return nil" + elsif data.length != len + raise "Socket read returned insufficient data: #{data.length}" + else + data + end + else + raise "Socket already closed when reading." + end + end + + # Performs URI escaping so that you can construct proper + # query strings faster. Use this rather than the cgi.rb + # version since it's faster. (Stolen from Camping). + def self.escape(s) + s.to_s.gsub(/([^ a-zA-Z0-9_.-]+)/n) { + '%'+$1.unpack('H2'*$1.size).join('%').upcase + }.tr(' ', '+') + end + + + # Unescapes a URI escaped string. (Stolen from Camping). + def self.unescape(s) + s.tr('+', ' ').gsub(/((?:%[0-9a-fA-F]{2})+)/n){ + [$1.delete('%')].pack('H*') + } + end + + # Parses a query string by breaking it up at the '&' + # and ';' characters. You can also use this to parse + # cookies by changing the characters used in the second + # parameter (which defaults to '&;'. + def self.query_parse(qs, d = '&;') + params = {} + (qs||'').split(/[#{d}] */n).inject(params) { |h,p| + k, v=unescape(p).split('=',2) + if cur = params[k] + if cur.class == Array + params[k] << v + else + params[k] = [cur, v] + end + else + params[k] = v + end + } + + return params + end + end +end
\ No newline at end of file diff --git a/lib/mongrel/http_response.rb b/lib/mongrel/http_response.rb new file mode 100644 index 0000000..ff715cc --- /dev/null +++ b/lib/mongrel/http_response.rb @@ -0,0 +1,163 @@ +module Mongrel + # Writes and controls your response to the client using the HTTP/1.1 specification. + # You use it by simply doing: + # + # response.start(200) do |head,out| + # head['Content-Type'] = 'text/plain' + # out.write("hello\n") + # end + # + # The parameter to start is the response code--which Mongrel will translate for you + # based on HTTP_STATUS_CODES. The head parameter is how you write custom headers. + # The out parameter is where you write your body. The default status code for + # HttpResponse.start is 200 so the above example is redundant. + # + # As you can see, it's just like using a Hash and as you do this it writes the proper + # header to the output on the fly. You can even intermix specifying headers and + # writing content. The HttpResponse class with write the things in the proper order + # once the HttpResponse.block is ended. + # + # You may also work the HttpResponse object directly using the various attributes available + # for the raw socket, body, header, and status codes. If you do this you're on your own. + # A design decision was made to force the client to not pipeline requests. HTTP/1.1 + # pipelining really kills the performance due to how it has to be handled and how + # unclear the standard is. To fix this the HttpResponse gives a "Connection: close" + # header which forces the client to close right away. The bonus for this is that it + # gives a pretty nice speed boost to most clients since they can close their connection + # immediately. + # + # One additional caveat is that you don't have to specify the Content-length header + # as the HttpResponse will write this for you based on the out length. + class HttpResponse + attr_reader :socket + attr_reader :body + attr_writer :body + attr_reader :header + attr_reader :status + attr_writer :status + attr_reader :body_sent + attr_reader :header_sent + attr_reader :status_sent + + def initialize(socket) + @socket = socket + @body = StringIO.new + @status = 404 + @reason = HTTP_STATUS_CODES[@status] + @header = HeaderOut.new(StringIO.new) + @header[Const::DATE] = Time.now.httpdate + @body_sent = false + @header_sent = false + @status_sent = false + end + + # Receives a block passing it the header and body for you to work with. + # When the block is finished it writes everything you've done to + # the socket in the proper order. This lets you intermix header and + # body content as needed. Handlers are able to modify pretty much + # any part of the request in the chain, and can stop further processing + # by simple passing "finalize=true" to the start method. By default + # all handlers run and then mongrel finalizes the request when they're + # all done. + def start(status=200, finalize=false, reason=HTTP_STATUS_CODES[status]) + @status = status.to_i + @reason = reason + yield @header, @body + finished if finalize + end + + # Primarily used in exception handling to reset the response output in order to write + # an alternative response. It will abort with an exception if you have already + # sent the header or the body. This is pretty catastrophic actually. + def reset + if @body_sent + raise "You have already sent the request body." + elsif @header_sent + raise "You have already sent the request headers." + else + @header.out.truncate(0) + @body.close + @body = StringIO.new + end + end + + def send_status(content_length=@body.length) + if not @status_sent + @header['Content-Length'] = content_length if content_length and @status != 304 + write(Const::STATUS_FORMAT % [@status, @reason]) + @status_sent = true + end + end + + def send_header + if not @header_sent + @header.out.rewind + write(@header.out.read + Const::LINE_END) + @header_sent = true + end + end + + def send_body + if not @body_sent + @body.rewind + write(@body.read) + @body_sent = true + end + end + + # Appends the contents of +path+ to the response stream. The file is opened for binary + # reading and written in chunks to the socket. + # + # Sendfile API support has been removed in 0.3.13.4 due to stability problems. + def send_file(path, small_file = false) + if small_file + File.open(path, "rb") {|f| @socket << f.read } + else + File.open(path, "rb") do |f| + while chunk = f.read(Const::CHUNK_SIZE) and chunk.length > 0 + begin + write(chunk) + rescue Object => exc + break + end + end + end + end + @body_sent = true + end + + def socket_error(details) + # ignore these since it means the client closed off early + @socket.close rescue nil + done = true + raise details + end + + def write(data) + @socket.write(data) + rescue => details + socket_error(details) + end + + # This takes whatever has been done to header and body and then writes it in the + # proper format to make an HTTP/1.1 response. + def finished + send_status + send_header + send_body + end + + # Used during error conditions to mark the response as "done" so there isn't any more processing + # sent to the client. + def done=(val) + @status_sent = true + @header_sent = true + @body_sent = true + end + + def done + (@status_sent and @header_sent and @body_sent) + end + + end +end
\ No newline at end of file diff --git a/lib/mongrel/uri_classifier.rb b/lib/mongrel/uri_classifier.rb new file mode 100644 index 0000000..f39ccc9 --- /dev/null +++ b/lib/mongrel/uri_classifier.rb @@ -0,0 +1,76 @@ + +module Mongrel + class URIClassifier + + class RegistrationError < RuntimeError + end + class UsageError < RuntimeError + end + + attr_reader :handler_map + + # Returns the URIs that have been registered with this classifier so far. + def uris + @handler_map.keys + end + + def initialize + @handler_map = {} + @matcher = // + @root_handler = nil + end + + # Register a handler object at a particular URI. The handler can be whatever + # you want, including an array. It's up to you what to do with it. + # + # Registering a handler is not necessarily threadsafe, so be careful if you go + # mucking around once the server is running. + def register(uri, handler) + raise RegistrationError, "#{uri.inspect} is already registered" if @handler_map[uri] + raise RegistrationError, "URI is empty" if !uri or uri.empty? + raise RegistrationError, "URI must begin with a \"#{Const::SLASH}\"" unless uri[0..0] == Const::SLASH + @handler_map[uri.dup] = handler + rebuild + end + + # Unregister a particular URI and its handler. + def unregister(uri) + handler = @handler_map.delete(uri) + raise RegistrationError, "#{uri.inspect} was not registered" unless handler + rebuild + handler + end + + # Resolve a request URI by finding the best partial match in the registered + # handler URIs. + def resolve(request_uri) + if @root_handler + # Optimization for the pathological case of only one handler on "/"; e.g. Rails + [Const::SLASH, request_uri, @root_handler] + elsif match = @matcher.match(request_uri) + uri = match.to_s + # A root mounted ("/") handler must resolve such that path info matches the original URI. + [uri, (uri == Const::SLASH ? request_uri : match.post_match), @handler_map[uri]] + else + [nil, nil, nil] + end + end + + private + + def rebuild + if @handler_map.size == 1 and @handler_map[Const::SLASH] + @root_handler = @handler_map.values.first + else + @root_handler = nil + routes = @handler_map.keys.sort.sort_by do |uri| + -uri.length + end + @matcher = Regexp.new(routes.map do |uri| + Regexp.new('^' + Regexp.escape(uri)) + end.join('|')) + end + end + + end +end
\ No newline at end of file |