diff options
Diffstat (limited to 'lib/unicorn')
-rw-r--r-- | lib/unicorn/const.rb | 113 | ||||
-rw-r--r-- | lib/unicorn/header_out.rb | 34 | ||||
-rw-r--r-- | lib/unicorn/http_request.rb | 106 | ||||
-rw-r--r-- | lib/unicorn/http_response.rb | 167 | ||||
-rw-r--r-- | lib/unicorn/semaphore.rb | 46 | ||||
-rw-r--r-- | lib/unicorn/tcphack.rb | 18 |
6 files changed, 484 insertions, 0 deletions
diff --git a/lib/unicorn/const.rb b/lib/unicorn/const.rb new file mode 100644 index 0000000..56c3bb4 --- /dev/null +++ b/lib/unicorn/const.rb @@ -0,0 +1,113 @@ + +module Unicorn + + # Every standard HTTP code mapped to the appropriate message. These are + # used so frequently that they are placed directly in Unicorn for easy + # access rather than Unicorn::Const itself. + HTTP_STATUS_CODES = { + 100 => 'Continue', + 101 => 'Switching Protocols', + 200 => 'OK', + 201 => 'Created', + 202 => 'Accepted', + 203 => 'Non-Authoritative Information', + 204 => 'No Content', + 205 => 'Reset Content', + 206 => 'Partial Content', + 300 => 'Multiple Choices', + 301 => 'Moved Permanently', + 302 => 'Moved Temporarily', + 303 => 'See Other', + 304 => 'Not Modified', + 305 => 'Use Proxy', + 400 => 'Bad Request', + 401 => 'Unauthorized', + 402 => 'Payment Required', + 403 => 'Forbidden', + 404 => 'Not Found', + 405 => 'Method Not Allowed', + 406 => 'Not Acceptable', + 407 => 'Proxy Authentication Required', + 408 => 'Request Time-out', + 409 => 'Conflict', + 410 => 'Gone', + 411 => 'Length Required', + 412 => 'Precondition Failed', + 413 => 'Request Entity Too Large', + 414 => 'Request-URI Too Large', + 415 => 'Unsupported Media Type', + 500 => 'Internal Server Error', + 501 => 'Not Implemented', + 502 => 'Bad Gateway', + 503 => 'Service Unavailable', + 504 => 'Gateway Time-out', + 505 => 'HTTP Version not supported' + } + + # Frequently used constants when constructing requests or responses. Many times + # the constant just refers to a string with the same contents. Using these constants + # gave about a 3% to 10% performance improvement over using the strings directly. + # Symbols did not really improve things much compared to constants. + # + # While Unicorn does try to emulate the CGI/1.2 protocol, it does not use the REMOTE_IDENT, + # REMOTE_USER, or REMOTE_HOST parameters since those are either a security problem or + # too taxing on performance. + module Const + DATE="Date".freeze + + # This is the part of the path after the SCRIPT_NAME. + PATH_INFO="PATH_INFO".freeze + + # Request body + HTTP_BODY="HTTP_BODY".freeze + + # This is the initial part that your handler is identified as by URIClassifier. + SCRIPT_NAME="SCRIPT_NAME".freeze + + # The original URI requested by the client. Passed to URIClassifier to build PATH_INFO and SCRIPT_NAME. + REQUEST_URI='REQUEST_URI'.freeze + REQUEST_PATH='REQUEST_PATH'.freeze + + UNICORN_VERSION="0.2.0".freeze + + UNICORN_TMP_BASE="unicorn".freeze + + # The standard empty 404 response for bad requests. Use Error4040Handler for custom stuff. + ERROR_404_RESPONSE="HTTP/1.1 404 Not Found\r\nConnection: close\r\nServer: Unicorn #{UNICORN_VERSION}\r\n\r\nNOT FOUND".freeze + + CONTENT_LENGTH="CONTENT_LENGTH".freeze + + # A common header for indicating the server is too busy. Not used yet. + ERROR_503_RESPONSE="HTTP/1.1 503 Service Unavailable\r\n\r\nBUSY".freeze + + # The basic max request size we'll try to read. + CHUNK_SIZE=(16 * 1024) + + # This is the maximum header that is allowed before a client is booted. The parser detects + # this, but we'd also like to do this as well. + MAX_HEADER=1024 * (80 + 32) + + # Maximum request body size before it is moved out of memory and into a tempfile for reading. + MAX_BODY=MAX_HEADER + + # A frozen format for this is about 15% faster + STATUS_FORMAT = "HTTP/1.1 %d %s\r\nConnection: close\r\n".freeze + CONTENT_TYPE = "Content-Type".freeze + LAST_MODIFIED = "Last-Modified".freeze + ETAG = "ETag".freeze + SLASH = "/".freeze + REQUEST_METHOD="REQUEST_METHOD".freeze + GET="GET".freeze + HEAD="HEAD".freeze + # ETag is based on the apache standard of hex mtime-size-inode (inode is 0 on win32) + ETAG_FORMAT="\"%x-%x-%x\"".freeze + HEADER_FORMAT="%s: %s\r\n".freeze + LINE_END="\r\n".freeze + REMOTE_ADDR="REMOTE_ADDR".freeze + HTTP_X_FORWARDED_FOR="HTTP_X_FORWARDED_FOR".freeze + HTTP_IF_MODIFIED_SINCE="HTTP_IF_MODIFIED_SINCE".freeze + HTTP_IF_NONE_MATCH="HTTP_IF_NONE_MATCH".freeze + REDIRECT = "HTTP/1.1 302 Found\r\nLocation: %s\r\nConnection: close\r\n\r\n".freeze + HOST = "HOST".freeze + end +end diff --git a/lib/unicorn/header_out.rb b/lib/unicorn/header_out.rb new file mode 100644 index 0000000..a4d987c --- /dev/null +++ b/lib/unicorn/header_out.rb @@ -0,0 +1,34 @@ +module Unicorn + # This class implements a simple way of constructing the HTTP headers dynamically + # via a Hash syntax. Think of it as a write-only Hash. Refer to HttpResponse for + # information on how this is used. + # + # One consequence of this write-only nature is that you can write multiple headers + # by just doing them twice (which is sometimes needed in HTTP), but that the normal + # semantics for Hash (where doing an insert replaces) is not there. + class HeaderOut + attr_reader :out + attr_accessor :allowed_duplicates + + def initialize(out = StringIO.new) + @sent = {} + @allowed_duplicates = {"Set-Cookie" => true, "Set-Cookie2" => true, + "Warning" => true, "WWW-Authenticate" => true} + @out = out + end + + def merge!(hash) + hash.each do |key, value| + self[key] = value + end + end + + # Simply writes "#{key}: #{value}" to an output buffer. + def[]=(key,value) + if not @sent.has_key?(key) or @allowed_duplicates.has_key?(key) + @sent[key] = true + @out.write(Const::HEADER_FORMAT % [key, value]) + end + end + end +end diff --git a/lib/unicorn/http_request.rb b/lib/unicorn/http_request.rb new file mode 100644 index 0000000..a76d4e0 --- /dev/null +++ b/lib/unicorn/http_request.rb @@ -0,0 +1,106 @@ + +module Unicorn + # + # The HttpRequest.initialize method will convert any request that is larger than + # Const::MAX_BODY into a Tempfile and use that as the body. Otherwise it uses + # a StringIO object. To be safe, you should assume it works like a file. + # + class HttpRequest + attr_reader :body, :params, :logger + + # You don't really call this. It's made for you. + # Main thing it does is hook up the params, and store any remaining + # body data into the HttpRequest.body attribute. + def initialize(params, socket, logger) + @params = params + @socket = socket + @logger = logger + + content_length = @params[Const::CONTENT_LENGTH].to_i + remain = content_length - @params[Const::HTTP_BODY].length + + # Some clients (like FF1.0) report 0 for body and then send a body. This will probably truncate them but at least the request goes through usually. + if remain <= 0 + # we've got everything, pack it up + @body = StringIO.new + @body.write @params[Const::HTTP_BODY] + elsif remain > 0 + # must read more data to complete body + if remain > Const::MAX_BODY + # huge body, put it in a tempfile + @body = Tempfile.new(Const::UNICORN_TMP_BASE) + @body.binmode + else + # small body, just use that + @body = StringIO.new + end + + @body.write @params[Const::HTTP_BODY] + read_body(remain, content_length) + end + + @body.rewind if @body + end + + # Returns an environment which is rackable: http://rack.rubyforge.org/doc/files/SPEC.html + # Copied directly from Rack's old Unicorn handler. + def env + env = params.clone + env["QUERY_STRING"] ||= '' + env.delete "HTTP_CONTENT_TYPE" + env.delete "HTTP_CONTENT_LENGTH" + env["SCRIPT_NAME"] = "" if env["SCRIPT_NAME"] == "/" + env.update({"rack.version" => [0,1], + "rack.input" => @body, + "rack.errors" => STDERR, + + "rack.multithread" => true, + "rack.multiprocess" => false, # ??? + "rack.run_once" => false, + + "rack.url_scheme" => "http", + }) + end + + # Does the heavy lifting of properly reading the larger body requests in + # small chunks. It expects @body to be an IO object, @socket to be valid, + # and will set @body = nil if the request fails. It also expects any initial + # part of the body that has been read to be in the @body already. + def read_body(remain, total) + begin + # Write the odd sized chunk first + @params[Const::HTTP_BODY] = read_socket(remain % Const::CHUNK_SIZE) + + remain -= @body.write(@params[Const::HTTP_BODY]) + + # Then stream out nothing but perfectly sized chunks + until remain <= 0 or @socket.closed? + # ASSUME: we are writing to a disk and these writes always write the requested amount + @params[Const::HTTP_BODY] = read_socket(Const::CHUNK_SIZE) + remain -= @body.write(@params[Const::HTTP_BODY]) + end + rescue Object => e + logger.error "Error reading HTTP body: #{e.inspect}" + # Any errors means we should delete the file, including if the file is dumped + @socket.close rescue nil + @body.close! if @body.class == Tempfile + @body = nil # signals that there was a problem + end + end + + def read_socket(len) + if !@socket.closed? + data = @socket.read(len) + if !data + raise "Socket read return nil" + elsif data.length != len + raise "Socket read returned insufficient data: #{data.length}" + else + data + end + else + raise "Socket already closed when reading." + end + end + end +end diff --git a/lib/unicorn/http_response.rb b/lib/unicorn/http_response.rb new file mode 100644 index 0000000..5fbc990 --- /dev/null +++ b/lib/unicorn/http_response.rb @@ -0,0 +1,167 @@ +module Unicorn + # Writes and controls your response to the client using the HTTP/1.1 specification. + # You use it by simply doing: + # + # response.start(200) do |head,out| + # head['Content-Type'] = 'text/plain' + # out.write("hello\n") + # end + # + # The parameter to start is the response code--which Unicorn will translate for you + # based on HTTP_STATUS_CODES. The head parameter is how you write custom headers. + # The out parameter is where you write your body. The default status code for + # HttpResponse.start is 200 so the above example is redundant. + # + # As you can see, it's just like using a Hash and as you do this it writes the proper + # header to the output on the fly. You can even intermix specifying headers and + # writing content. The HttpResponse class with write the things in the proper order + # once the HttpResponse.block is ended. + # + # You may also work the HttpResponse object directly using the various attributes available + # for the raw socket, body, header, and status codes. If you do this you're on your own. + # A design decision was made to force the client to not pipeline requests. HTTP/1.1 + # pipelining really kills the performance due to how it has to be handled and how + # unclear the standard is. To fix this the HttpResponse gives a "Connection: close" + # header which forces the client to close right away. The bonus for this is that it + # gives a pretty nice speed boost to most clients since they can close their connection + # immediately. + # + # One additional caveat is that you don't have to specify the Content-length header + # as the HttpResponse will write this for you based on the out length. + class HttpResponse + attr_reader :socket + attr_reader :body + attr_writer :body + attr_reader :header + attr_reader :status + attr_writer :status + attr_reader :body_sent + attr_reader :header_sent + attr_reader :status_sent + + def initialize(socket, app_response) + @socket = socket + @app_response = app_response + @body = StringIO.new + app_response[2].each {|x| @body << x} + @status = app_response[0] + @reason = nil + @header = HeaderOut.new + @header[Const::DATE] = Time.now.httpdate + @header.merge!(app_response[1]) + @body_sent = false + @header_sent = false + @status_sent = false + end + + # Receives a block passing it the header and body for you to work with. + # When the block is finished it writes everything you've done to + # the socket in the proper order. This lets you intermix header and + # body content as needed. Handlers are able to modify pretty much + # any part of the request in the chain, and can stop further processing + # by simple passing "finalize=true" to the start method. By default + # all handlers run and then mongrel finalizes the request when they're + # all done. + # TODO: docs + def start #(status=200, finalize=false, reason=nil) + finished + end + + # Primarily used in exception handling to reset the response output in order to write + # an alternative response. It will abort with an exception if you have already + # sent the header or the body. This is pretty catastrophic actually. + def reset + if @body_sent + raise "You have already sent the request body." + elsif @header_sent + raise "You have already sent the request headers." + else + # XXX Dubious ( http://mongrel.rubyforge.org/ticket/19 ) + @header.out.close + @header = HeaderOut.new(StringIO.new) + + @body.close + @body = StringIO.new + end + end + + def send_status(content_length=@body.length) + if not @status_sent + @header['Content-Length'] = content_length if content_length and @status != 304 + write(Const::STATUS_FORMAT % [@status, HTTP_STATUS_CODES[@status]]) + @status_sent = true + end + end + + def send_header + if not @header_sent + @header.out.rewind + write(@header.out.read + Const::LINE_END) + @header_sent = true + end + end + + def send_body + if not @body_sent + @body.rewind + write(@body.read) + @body_sent = true + end + end + + # Appends the contents of +path+ to the response stream. The file is opened for binary + # reading and written in chunks to the socket. + # + # Sendfile API support has been removed in 0.3.13.4 due to stability problems. + def send_file(path, small_file = false) + if small_file + File.open(path, "rb") {|f| @socket << f.read } + else + File.open(path, "rb") do |f| + while chunk = f.read(Const::CHUNK_SIZE) and chunk.length > 0 + begin + write(chunk) + rescue Object => exc + break + end + end + end + end + @body_sent = true + end + + def socket_error(details) + # ignore these since it means the client closed off early + @socket.close rescue nil + done = true + raise details + end + + def write(data) + @socket.write(data) + rescue => details + socket_error(details) + end + + # This takes whatever has been done to header and body and then writes it in the + # proper format to make an HTTP/1.1 response. + def finished + send_status + send_header + send_body + end + + # Used during error conditions to mark the response as "done" so there isn't any more processing + # sent to the client. + def done=(val) + @status_sent = true + @header_sent = true + @body_sent = true + end + + def done + (@status_sent and @header_sent and @body_sent) + end + + end +end diff --git a/lib/unicorn/semaphore.rb b/lib/unicorn/semaphore.rb new file mode 100644 index 0000000..1c0b87c --- /dev/null +++ b/lib/unicorn/semaphore.rb @@ -0,0 +1,46 @@ +class Semaphore + def initialize(resource_count = 0) + @available_resource_count = resource_count + @mutex = Mutex.new + @waiting_threads = [] + end + + def wait + make_thread_wait unless resource_is_available + end + + def signal + schedule_waiting_thread if thread_is_waiting + end + + def synchronize + self.wait + yield + ensure + self.signal + end + + private + + def resource_is_available + @mutex.synchronize do + return (@available_resource_count -= 1) >= 0 + end + end + + def make_thread_wait + @waiting_threads << Thread.current + Thread.stop + end + + def thread_is_waiting + @mutex.synchronize do + return (@available_resource_count += 1) <= 0 + end + end + + def schedule_waiting_thread + thread = @waiting_threads.shift + thread.wakeup if thread + end +end diff --git a/lib/unicorn/tcphack.rb b/lib/unicorn/tcphack.rb new file mode 100644 index 0000000..634f9dd --- /dev/null +++ b/lib/unicorn/tcphack.rb @@ -0,0 +1,18 @@ +# Copyright (c) 2005 Zed A. Shaw +# You can redistribute it and/or modify it under the same terms as Ruby. +# +# Additional work donated by contributors. See http://mongrel.rubyforge.org/attributions.html +# for more information. + + +# A modification proposed by Sean Treadway that increases the default accept +# queue of TCPServer to 1024 so that it handles more concurrent requests. +class TCPServer + def initialize_with_backlog(*args) + initialize_without_backlog(*args) + listen(1024) + end + + alias_method :initialize_without_backlog, :initialize + alias_method :initialize, :initialize_with_backlog +end |