about summary refs log tree commit homepage
path: root/lib/unicorn
diff options
context:
space:
mode:
Diffstat (limited to 'lib/unicorn')
-rw-r--r--lib/unicorn/const.rb113
-rw-r--r--lib/unicorn/header_out.rb34
-rw-r--r--lib/unicorn/http_request.rb106
-rw-r--r--lib/unicorn/http_response.rb167
-rw-r--r--lib/unicorn/semaphore.rb46
-rw-r--r--lib/unicorn/tcphack.rb18
6 files changed, 484 insertions, 0 deletions
diff --git a/lib/unicorn/const.rb b/lib/unicorn/const.rb
new file mode 100644
index 0000000..56c3bb4
--- /dev/null
+++ b/lib/unicorn/const.rb
@@ -0,0 +1,113 @@
+
+module Unicorn
+
+  # Every standard HTTP code mapped to the appropriate message.  These are
+  # used so frequently that they are placed directly in Unicorn for easy
+  # access rather than Unicorn::Const itself.
+  HTTP_STATUS_CODES = {  
+    100  => 'Continue',
+    101  => 'Switching Protocols',
+    200  => 'OK',
+    201  => 'Created',
+    202  => 'Accepted',
+    203  => 'Non-Authoritative Information',
+    204  => 'No Content',
+    205  => 'Reset Content',
+    206  => 'Partial Content',
+    300  => 'Multiple Choices',
+    301  => 'Moved Permanently',
+    302  => 'Moved Temporarily',
+    303  => 'See Other',
+    304  => 'Not Modified',
+    305  => 'Use Proxy',
+    400  => 'Bad Request',
+    401  => 'Unauthorized',
+    402  => 'Payment Required',
+    403  => 'Forbidden',
+    404  => 'Not Found',
+    405  => 'Method Not Allowed',
+    406  => 'Not Acceptable',
+    407  => 'Proxy Authentication Required',
+    408  => 'Request Time-out',
+    409  => 'Conflict',
+    410  => 'Gone',
+    411  => 'Length Required',
+    412  => 'Precondition Failed',
+    413  => 'Request Entity Too Large',
+    414  => 'Request-URI Too Large',
+    415  => 'Unsupported Media Type',
+    500  => 'Internal Server Error',
+    501  => 'Not Implemented',
+    502  => 'Bad Gateway',
+    503  => 'Service Unavailable',
+    504  => 'Gateway Time-out',
+    505  => 'HTTP Version not supported'
+  }
+
+  # Frequently used constants when constructing requests or responses.  Many times
+  # the constant just refers to a string with the same contents.  Using these constants
+  # gave about a 3% to 10% performance improvement over using the strings directly.
+  # Symbols did not really improve things much compared to constants.
+  #
+  # While Unicorn does try to emulate the CGI/1.2 protocol, it does not use the REMOTE_IDENT,
+  # REMOTE_USER, or REMOTE_HOST parameters since those are either a security problem or
+  # too taxing on performance.
+  module Const
+    DATE="Date".freeze
+
+    # This is the part of the path after the SCRIPT_NAME.
+    PATH_INFO="PATH_INFO".freeze
+    
+    # Request body
+    HTTP_BODY="HTTP_BODY".freeze
+
+    # This is the initial part that your handler is identified as by URIClassifier.
+    SCRIPT_NAME="SCRIPT_NAME".freeze
+
+    # The original URI requested by the client.  Passed to URIClassifier to build PATH_INFO and SCRIPT_NAME.
+    REQUEST_URI='REQUEST_URI'.freeze
+    REQUEST_PATH='REQUEST_PATH'.freeze
+    
+    UNICORN_VERSION="0.2.0".freeze
+
+    UNICORN_TMP_BASE="unicorn".freeze
+
+    # The standard empty 404 response for bad requests.  Use Error4040Handler for custom stuff.
+    ERROR_404_RESPONSE="HTTP/1.1 404 Not Found\r\nConnection: close\r\nServer: Unicorn #{UNICORN_VERSION}\r\n\r\nNOT FOUND".freeze
+
+    CONTENT_LENGTH="CONTENT_LENGTH".freeze
+
+    # A common header for indicating the server is too busy.  Not used yet.
+    ERROR_503_RESPONSE="HTTP/1.1 503 Service Unavailable\r\n\r\nBUSY".freeze
+
+    # The basic max request size we'll try to read.
+    CHUNK_SIZE=(16 * 1024)
+
+    # This is the maximum header that is allowed before a client is booted.  The parser detects
+    # this, but we'd also like to do this as well.
+    MAX_HEADER=1024 * (80 + 32)
+
+    # Maximum request body size before it is moved out of memory and into a tempfile for reading.
+    MAX_BODY=MAX_HEADER
+
+    # A frozen format for this is about 15% faster
+    STATUS_FORMAT = "HTTP/1.1 %d %s\r\nConnection: close\r\n".freeze
+    CONTENT_TYPE = "Content-Type".freeze
+    LAST_MODIFIED = "Last-Modified".freeze
+    ETAG = "ETag".freeze
+    SLASH = "/".freeze
+    REQUEST_METHOD="REQUEST_METHOD".freeze
+    GET="GET".freeze
+    HEAD="HEAD".freeze
+    # ETag is based on the apache standard of hex mtime-size-inode (inode is 0 on win32)
+    ETAG_FORMAT="\"%x-%x-%x\"".freeze
+    HEADER_FORMAT="%s: %s\r\n".freeze
+    LINE_END="\r\n".freeze
+    REMOTE_ADDR="REMOTE_ADDR".freeze
+    HTTP_X_FORWARDED_FOR="HTTP_X_FORWARDED_FOR".freeze
+    HTTP_IF_MODIFIED_SINCE="HTTP_IF_MODIFIED_SINCE".freeze
+    HTTP_IF_NONE_MATCH="HTTP_IF_NONE_MATCH".freeze
+    REDIRECT = "HTTP/1.1 302 Found\r\nLocation: %s\r\nConnection: close\r\n\r\n".freeze
+    HOST = "HOST".freeze
+  end
+end
diff --git a/lib/unicorn/header_out.rb b/lib/unicorn/header_out.rb
new file mode 100644
index 0000000..a4d987c
--- /dev/null
+++ b/lib/unicorn/header_out.rb
@@ -0,0 +1,34 @@
+module Unicorn
+  # This class implements a simple way of constructing the HTTP headers dynamically
+  # via a Hash syntax.  Think of it as a write-only Hash.  Refer to HttpResponse for
+  # information on how this is used.
+  #
+  # One consequence of this write-only nature is that you can write multiple headers
+  # by just doing them twice (which is sometimes needed in HTTP), but that the normal
+  # semantics for Hash (where doing an insert replaces) is not there.
+  class HeaderOut
+    attr_reader :out
+    attr_accessor :allowed_duplicates
+
+    def initialize(out = StringIO.new)
+      @sent = {}
+      @allowed_duplicates = {"Set-Cookie" => true, "Set-Cookie2" => true,
+        "Warning" => true, "WWW-Authenticate" => true}
+      @out = out
+    end
+
+    def merge!(hash)
+      hash.each do |key, value|
+        self[key] = value
+      end
+    end
+
+    # Simply writes "#{key}: #{value}" to an output buffer.
+    def[]=(key,value)
+      if not @sent.has_key?(key) or @allowed_duplicates.has_key?(key)
+        @sent[key] = true
+        @out.write(Const::HEADER_FORMAT % [key, value])
+      end
+    end
+  end
+end
diff --git a/lib/unicorn/http_request.rb b/lib/unicorn/http_request.rb
new file mode 100644
index 0000000..a76d4e0
--- /dev/null
+++ b/lib/unicorn/http_request.rb
@@ -0,0 +1,106 @@
+
+module Unicorn
+  #
+  # The HttpRequest.initialize method will convert any request that is larger than
+  # Const::MAX_BODY into a Tempfile and use that as the body.  Otherwise it uses
+  # a StringIO object.  To be safe, you should assume it works like a file.
+  #
+  class HttpRequest
+    attr_reader :body, :params, :logger
+
+    # You don't really call this.  It's made for you.
+    # Main thing it does is hook up the params, and store any remaining
+    # body data into the HttpRequest.body attribute.
+    def initialize(params, socket, logger)
+      @params = params
+      @socket = socket
+      @logger = logger
+      
+      content_length = @params[Const::CONTENT_LENGTH].to_i
+      remain = content_length - @params[Const::HTTP_BODY].length
+
+      # Some clients (like FF1.0) report 0 for body and then send a body.  This will probably truncate them but at least the request goes through usually.
+      if remain <= 0
+        # we've got everything, pack it up
+        @body = StringIO.new
+        @body.write @params[Const::HTTP_BODY]
+      elsif remain > 0
+        # must read more data to complete body
+        if remain > Const::MAX_BODY
+          # huge body, put it in a tempfile
+          @body = Tempfile.new(Const::UNICORN_TMP_BASE)
+          @body.binmode
+        else
+          # small body, just use that
+          @body = StringIO.new
+        end
+
+        @body.write @params[Const::HTTP_BODY]
+        read_body(remain, content_length)
+      end
+
+      @body.rewind if @body
+    end
+
+    # Returns an environment which is rackable: http://rack.rubyforge.org/doc/files/SPEC.html
+    # Copied directly from Rack's old Unicorn handler.
+    def env
+      env = params.clone
+      env["QUERY_STRING"] ||= ''
+      env.delete "HTTP_CONTENT_TYPE"
+      env.delete "HTTP_CONTENT_LENGTH"
+      env["SCRIPT_NAME"] = "" if env["SCRIPT_NAME"] == "/"
+      env.update({"rack.version" => [0,1],
+              "rack.input" => @body,
+              "rack.errors" => STDERR,
+
+              "rack.multithread" => true,
+              "rack.multiprocess" => false, # ???
+              "rack.run_once" => false,
+
+              "rack.url_scheme" => "http",
+            })
+    end
+
+    # Does the heavy lifting of properly reading the larger body requests in
+    # small chunks.  It expects @body to be an IO object, @socket to be valid,
+    # and will set @body = nil if the request fails.  It also expects any initial
+    # part of the body that has been read to be in the @body already.
+    def read_body(remain, total)
+      begin
+        # Write the odd sized chunk first
+        @params[Const::HTTP_BODY] = read_socket(remain % Const::CHUNK_SIZE)
+
+        remain -= @body.write(@params[Const::HTTP_BODY])
+
+        # Then stream out nothing but perfectly sized chunks
+        until remain <= 0 or @socket.closed?
+          # ASSUME: we are writing to a disk and these writes always write the requested amount
+          @params[Const::HTTP_BODY] = read_socket(Const::CHUNK_SIZE)
+          remain -= @body.write(@params[Const::HTTP_BODY])
+        end
+      rescue Object => e
+        logger.error "Error reading HTTP body: #{e.inspect}"
+        # Any errors means we should delete the file, including if the file is dumped
+        @socket.close rescue nil
+        @body.close! if @body.class == Tempfile
+        @body = nil # signals that there was a problem
+      end
+    end
+
+    def read_socket(len)
+      if !@socket.closed?
+        data = @socket.read(len)
+        if !data
+          raise "Socket read return nil"
+        elsif data.length != len
+          raise "Socket read returned insufficient data: #{data.length}"
+        else
+          data
+        end
+      else
+        raise "Socket already closed when reading."
+      end
+    end
+  end
+end
diff --git a/lib/unicorn/http_response.rb b/lib/unicorn/http_response.rb
new file mode 100644
index 0000000..5fbc990
--- /dev/null
+++ b/lib/unicorn/http_response.rb
@@ -0,0 +1,167 @@
+module Unicorn
+  # Writes and controls your response to the client using the HTTP/1.1 specification.
+  # You use it by simply doing:
+  #
+  #  response.start(200) do |head,out|
+  #    head['Content-Type'] = 'text/plain'
+  #    out.write("hello\n")
+  #  end
+  #
+  # The parameter to start is the response code--which Unicorn will translate for you
+  # based on HTTP_STATUS_CODES.  The head parameter is how you write custom headers.
+  # The out parameter is where you write your body.  The default status code for
+  # HttpResponse.start is 200 so the above example is redundant.
+  #
+  # As you can see, it's just like using a Hash and as you do this it writes the proper
+  # header to the output on the fly.  You can even intermix specifying headers and
+  # writing content.  The HttpResponse class with write the things in the proper order
+  # once the HttpResponse.block is ended.
+  #
+  # You may also work the HttpResponse object directly using the various attributes available
+  # for the raw socket, body, header, and status codes.  If you do this you're on your own.
+  # A design decision was made to force the client to not pipeline requests.  HTTP/1.1
+  # pipelining really kills the performance due to how it has to be handled and how
+  # unclear the standard is.  To fix this the HttpResponse gives a "Connection: close"
+  # header which forces the client to close right away.  The bonus for this is that it
+  # gives a pretty nice speed boost to most clients since they can close their connection
+  # immediately.
+  #
+  # One additional caveat is that you don't have to specify the Content-length header
+  # as the HttpResponse will write this for you based on the out length.
+  class HttpResponse
+    attr_reader :socket
+    attr_reader :body
+    attr_writer :body
+    attr_reader :header
+    attr_reader :status
+    attr_writer :status
+    attr_reader :body_sent
+    attr_reader :header_sent
+    attr_reader :status_sent
+
+    def initialize(socket, app_response)
+      @socket = socket
+      @app_response = app_response
+      @body = StringIO.new
+      app_response[2].each {|x| @body << x}
+      @status = app_response[0]
+      @reason = nil
+      @header = HeaderOut.new
+      @header[Const::DATE] = Time.now.httpdate
+      @header.merge!(app_response[1])
+      @body_sent = false
+      @header_sent = false
+      @status_sent = false
+    end
+
+    # Receives a block passing it the header and body for you to work with.
+    # When the block is finished it writes everything you've done to
+    # the socket in the proper order.  This lets you intermix header and
+    # body content as needed.  Handlers are able to modify pretty much
+    # any part of the request in the chain, and can stop further processing
+    # by simple passing "finalize=true" to the start method.  By default
+    # all handlers run and then mongrel finalizes the request when they're
+    # all done.
+    # TODO: docs
+    def start #(status=200, finalize=false, reason=nil)
+      finished
+    end
+
+    # Primarily used in exception handling to reset the response output in order to write
+    # an alternative response.  It will abort with an exception if you have already
+    # sent the header or the body.  This is pretty catastrophic actually.
+    def reset
+      if @body_sent
+        raise "You have already sent the request body."
+      elsif @header_sent
+        raise "You have already sent the request headers."
+      else
+        # XXX Dubious ( http://mongrel.rubyforge.org/ticket/19 )
+        @header.out.close
+        @header = HeaderOut.new(StringIO.new)
+
+        @body.close
+        @body = StringIO.new
+      end
+    end
+
+    def send_status(content_length=@body.length)
+      if not @status_sent
+        @header['Content-Length'] = content_length if content_length and @status != 304
+        write(Const::STATUS_FORMAT % [@status, HTTP_STATUS_CODES[@status]])
+        @status_sent = true
+      end
+    end
+
+    def send_header
+      if not @header_sent
+        @header.out.rewind
+        write(@header.out.read + Const::LINE_END)
+        @header_sent = true
+      end
+    end
+
+    def send_body
+      if not @body_sent
+        @body.rewind
+        write(@body.read)
+        @body_sent = true
+      end
+    end
+
+    # Appends the contents of +path+ to the response stream.  The file is opened for binary
+    # reading and written in chunks to the socket.
+    #
+    # Sendfile API support has been removed in 0.3.13.4 due to stability problems.
+    def send_file(path, small_file = false)
+      if small_file
+        File.open(path, "rb") {|f| @socket << f.read }
+      else
+        File.open(path, "rb") do |f|
+          while chunk = f.read(Const::CHUNK_SIZE) and chunk.length > 0
+            begin
+              write(chunk)
+            rescue Object => exc
+              break
+            end
+          end
+        end
+      end
+      @body_sent = true
+    end
+
+    def socket_error(details)
+      # ignore these since it means the client closed off early
+      @socket.close rescue nil
+      done = true
+      raise details
+    end
+
+    def write(data)
+      @socket.write(data)
+    rescue => details
+      socket_error(details)
+    end
+
+    # This takes whatever has been done to header and body and then writes it in the
+    # proper format to make an HTTP/1.1 response.
+    def finished
+      send_status
+      send_header
+      send_body
+    end
+
+    # Used during error conditions to mark the response as "done" so there isn't any more processing
+    # sent to the client.
+    def done=(val)
+      @status_sent = true
+      @header_sent = true
+      @body_sent = true
+    end
+
+    def done
+      (@status_sent and @header_sent and @body_sent)
+    end
+
+  end
+end
diff --git a/lib/unicorn/semaphore.rb b/lib/unicorn/semaphore.rb
new file mode 100644
index 0000000..1c0b87c
--- /dev/null
+++ b/lib/unicorn/semaphore.rb
@@ -0,0 +1,46 @@
+class Semaphore
+  def initialize(resource_count = 0)
+    @available_resource_count = resource_count
+    @mutex = Mutex.new
+    @waiting_threads = []
+  end
+  
+  def wait
+    make_thread_wait unless resource_is_available
+  end
+  
+  def signal
+    schedule_waiting_thread if thread_is_waiting
+  end
+  
+  def synchronize
+    self.wait
+    yield
+  ensure
+    self.signal
+  end
+  
+  private
+  
+  def resource_is_available
+    @mutex.synchronize do
+      return (@available_resource_count -= 1) >= 0
+    end
+  end
+  
+  def make_thread_wait
+    @waiting_threads << Thread.current
+    Thread.stop  
+  end
+  
+  def thread_is_waiting
+    @mutex.synchronize do
+      return (@available_resource_count += 1) <= 0
+    end
+  end
+  
+  def schedule_waiting_thread
+    thread = @waiting_threads.shift
+    thread.wakeup if thread
+  end
+end
diff --git a/lib/unicorn/tcphack.rb b/lib/unicorn/tcphack.rb
new file mode 100644
index 0000000..634f9dd
--- /dev/null
+++ b/lib/unicorn/tcphack.rb
@@ -0,0 +1,18 @@
+# Copyright (c) 2005 Zed A. Shaw
+# You can redistribute it and/or modify it under the same terms as Ruby.
+#
+# Additional work donated by contributors.  See http://mongrel.rubyforge.org/attributions.html
+# for more information.
+
+
+# A modification proposed by Sean Treadway that increases the default accept
+# queue of TCPServer to 1024 so that it handles more concurrent requests.
+class TCPServer
+   def initialize_with_backlog(*args)
+     initialize_without_backlog(*args)
+     listen(1024)
+   end
+
+   alias_method :initialize_without_backlog, :initialize
+   alias_method :initialize, :initialize_with_backlog
+end