about summary refs log tree commit homepage
path: root/lib/mongrel.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/mongrel.rb')
-rw-r--r--lib/mongrel.rb290
1 files changed, 242 insertions, 48 deletions
diff --git a/lib/mongrel.rb b/lib/mongrel.rb
index 4912b8a..4b59549 100644
--- a/lib/mongrel.rb
+++ b/lib/mongrel.rb
@@ -8,6 +8,9 @@ require 'stringio'
 # functionality to service web application requests fast as possible.
 module Mongrel
 
+  # Every standard HTTP code mapped to the appropriate message.  These are
+  # used so frequently that they are placed directly in Mongrel for easy
+  # access rather than Mongrel::Const.
   HTTP_STATUS_CODES = {  
     100  => 'Continue',
     101  => 'Switching Protocols',
@@ -48,19 +51,89 @@ module Mongrel
     505  => 'HTTP Version not supported'
   }
 
+  # Frequently used constants when constructing requests or responses.  Many times
+  # the constant just refers to a string with the same contents.  Using these constants
+  # gave about a 3% to 10% performance improvement over using the strings directly.
+  # Symbols did not really improve things much compared to constants.
+  #
+  # While Mongrel does try to emulate the CGI/1.2 protocol, it does not use the REMOTE_IDENT,
+  # REMOTE_USER, or REMOTE_HOST parameters since those are either a security problem or
+  # too taxing on performance.
+  module Const
+    # This is the part of the path after the SCRIPT_NAME.  URIClassifier will determine this.
+    PATH_INFO="PATH_INFO"
+    # This is the intial part that your handler is identified as by URIClassifier.
+    SCRIPT_NAME="SCRIPT_NAME"
+    # The original URI requested by the client.  Passed to URIClassifier to build PATH_INFO and SCRIPT_NAME.
+    REQUEST_URI='REQUEST_URI'
+
+    # Content length (also available as HTTP_CONTENT_LENGTH).
+    CONTENT_LENGTH='CONTENT_LENGTH'
+
+    # Content length (also available as CONTENT_LENGTH).
+    HTTP_CONTENT_LENGTH='HTTP_CONTENT_LENGTH'
+
+    # Content type (also available as HTTP_CONTENT_TYPE).
+    CONTENT_TYPE='CONTENT_TYPE'
+
+    # Content type (also available as CONTENT_TYPE).
+    HTTP_CONTENT_TYPE='HTTP_CONTENT_TYPE'
+
+    # Gateway interface key in the HttpRequest parameters.
+    GATEWAY_INTERFACE='GATEWAY_INTERFACE'
+    # We claim to support CGI/1.2.
+    GATEWAY_INTERFACE_VALUE='CGI/1.2'
+
+    # Hosts remote IP address.  Mongrel does not do DNS resolves since that slows
+    # processing down considerably.
+    REMOTE_ADDR='REMOTE_ADDR'
+
+    # This is not given since Mongrel does not do DNS resolves.  It is only here for
+    # completeness for the CGI standard.
+    REMOTE_HOST='REMOTE_HOST'
+
+    # The name/host of our server as given by the HttpServer.new(host,port) call.
+    SERVER_NAME='SERVER_NAME'
+
+    # The port of our server as given by the HttpServer.new(host,port) call.
+    SERVER_PORT='SERVER_PORT'
+
+    # Official server protocol key in the HttpRequest parameters.
+    SERVER_PROTOCOL='SERVER_PROTOCOL'
+    # Mongrel claims to support HTTP/1.1.
+    SERVER_PROTOCOL_VALUE='HTTP/1.1'
+
+    # The actual server software being used (it's Mongrel man).
+    SERVER_SOFTWARE='SERVER_SOFTWARE'
+    
+    # Current Mongrel version (used for SERVER_SOFTWARE and other response headers).
+    MONGREL_VERSION='Mongrel 0.2.2'
+
+    # The standard empty 404 response for bad requests.  Use Error4040Handler for custom stuff.
+    ERROR_404_RESPONSE="HTTP/1.1 404 Not Found\r\nConnection: close\r\nServer: #{MONGREL_VERSION}\r\n\r\nNOT FOUND"
+
+    # A common header for indicating the server is too busy.  Not used yet.
+    ERROR_503_RESPONSE="HTTP/1.1 503 Service Unavailable\r\n\r\nBUSY"
+
+    # The basic max request size we'll try to read.
+    CHUNK_SIZE=(16 * 1024)
+
+  end
+
+
   # When a handler is found for a registered URI then this class is constructed
   # and passed to your HttpHandler::process method.  You should assume that
   # *one* handler processes all requests.  Included in the HttpReqeust is a
   # HttpRequest.params Hash that matches common CGI params, and a HttpRequest.body
   # which is a string containing the request body (raw for now).
   #
-  # Mongrel really only support small-ish request bodies right now since really
+  # Mongrel really only supports small-ish request bodies right now since really
   # huge ones have to be completely read off the wire and put into a string.
   # Later there will be several options for efficiently handling large file
   # uploads.
   class HttpRequest
     attr_reader :body, :params
-    
+
     # You don't really call this.  It's made for you.
     # Main thing it does is hook up the params, and store any remaining
     # body data into the HttpRequest.body attribute.
@@ -68,13 +141,14 @@ module Mongrel
       @body = initial_body || ""
       @params = params
       @socket = socket
-
+      
       # fix up the CGI requirements
-      params['CONTENT_LENGTH'] = params['HTTP_CONTENT_LENGTH'] || 0
+      params[Const::CONTENT_LENGTH] = params[Const::HTTP_CONTENT_LENGTH] || 0
+      params[Const::CONTENT_TYPE] ||= params[Const::HTTP_CONTENT_TYPE]
 
       # now, if the initial_body isn't long enough for the content length we have to fill it
       # TODO: adapt for big ass stuff by writing to a temp file
-      clen = params['HTTP_CONTENT_LENGTH'].to_i
+      clen = params[Const::HTTP_CONTENT_LENGTH].to_i
       if @body.length < clen
         @body << @socket.read(clen - @body.length)
       end
@@ -82,6 +156,13 @@ module Mongrel
   end
 
 
+  # This class implements a simple way of constructing the HTTP headers dynamically
+  # via a Hash syntax.  Think of it as a write-only Hash.  Refer to HttpResponse for
+  # information on how this is used.
+  #
+  # One consequence of this write-only nature is that you can write multiple headers
+  # by just doing them twice (which is sometimes needed in HTTP), but that the normal
+  # semantics for Hash (where doing an insert replaces) is not there.
   class HeaderOut
     attr_reader :out
 
@@ -89,6 +170,7 @@ module Mongrel
       @out = out
     end
 
+    # Simply writes "#{key}: #{value}" to an output buffer.
     def[]=(key,value)
       @out.write(key)
       @out.write(": ")
@@ -97,7 +179,35 @@ module Mongrel
     end
   end
 
-
+  # Writes and controls your response to the client using the HTTP/1.1 specification.
+  # You use it by simply doing:
+  #
+  #  response.start(200) do |head,out|
+  #    head['Content-Type'] = 'text/plain'
+  #    out.write("hello\n")
+  #  end
+  #
+  # The parameter to start is the response code--which Mongrel will translate for you
+  # based on HTTP_STATUS_CODES.  The head parameter is how you write custom headers.
+  # The out parameter is where you write your body.  The default status code for
+  # HttpResponse.start is 200 so the above example is redundant.
+  #
+  # As you can see, it's just like using a Hash and as you do this it writes the proper
+  # header to the output on the fly.  You can even intermix specifying headers and
+  # writing content.  The HttpResponse class with write the things in the proper order
+  # once the HttpResponse.block is ended.
+  #
+  # You may also work the HttpResponse object directly using the various attributes available
+  # for the raw socket, body, header, and status codes.  If you do this you're on your own.
+  # A design decision was made to force the client to not pipeline requests.  HTTP/1.1
+  # pipelining really kills the performance due to how it has to be handled and how
+  # unclear the standard is.  To fix this the HttpResponse gives a "Connection: close"
+  # header which forces the client to close right away.  The bonus for this is that it
+  # gives a pretty nice speed boost to most clients since they can close their connection
+  # immediately.
+  #
+  # One additional caveat is that you don't have to specify the Content-length header
+  # as the HttpResponse will write this for you based on the out length.
   class HttpResponse
     attr_reader :socket
     attr_reader :body
@@ -112,12 +222,25 @@ module Mongrel
       @header = HeaderOut.new(StringIO.new)
     end
 
+    # Receives a block passing it the header and body for you to work with.
+    # When the block is finished it writes everything you've done to
+    # the socket in the proper order.  This lets you intermix header and
+    # body content as needed.
     def start(status=200)
       @status = status
       yield @header, @body
       finished
     end
-    
+
+    # Primarily used in exception handling to reset the response output in order to write
+    # an alternative response.
+    def reset
+      @header.out.rewind
+      @body.rewind
+    end
+
+    # This takes whatever has been done to header and body and then writes it in the
+    # proper format to make an HTTP/1.1 response.
     def finished
       @header.out.rewind
       @body.rewind
@@ -136,29 +259,8 @@ module Mongrel
   # a response.  Look at the HttpRequest and HttpResponse objects for how
   # to use them.
   class HttpHandler
-    attr_accessor :script_name
-
-    def process(request, response)
-    end
-  end
-
-
-  # The server normally returns a 404 response if a URI is requested, but it
-  # also returns a lame empty message.  This lets you do a 404 response
-  # with a custom message for special URIs.
-  class Error404Handler < HttpHandler
-
-    # Sets the message to return.  This is constructed once for the handler
-    # so it's pretty efficient.
-    def initialize(msg)
-      @response = HttpServer::ERROR_404_RESPONSE + msg
-    end
-    
-    # Just kicks back the standard 404 response with your special message.
     def process(request, response)
-      response.socket.write(@response)
     end
-
   end
 
 
@@ -183,16 +285,6 @@ module Mongrel
   class HttpServer
     attr_reader :acceptor
 
-    # The standard empty 404 response for bad requests.  Use Error4040Handler for custom stuff.
-    ERROR_404_RESPONSE="HTTP/1.1 404 Not Found\r\nConnection: close\r\nServer: Mongrel/0.2\r\n\r\nNOT FOUND"
-    ERROR_503_RESPONSE="HTTP/1.1 503 Service Unavailable\r\n\r\nBUSY"
-
-    # The basic max request size we'll try to read.
-    CHUNK_SIZE=(16 * 1024)
-
-    PATH_INFO="PATH_INFO"
-    SCRIPT_NAME="SCRIPT_NAME"
-    
     # Creates a working server on host:port (strange things happen if port isn't a Number).
     # Use HttpServer::run to start the server.
     #
@@ -210,8 +302,13 @@ module Mongrel
     # Future versions of Mongrel will make this more dynamic (hopefully).
     def initialize(host, port, num_processors=20)
       @socket = TCPServer.new(host, port)
+
       @classifier = URIClassifier.new
       @req_queue = Queue.new
+      @host = host
+      @port = port
+      @num_procesors = num_processors
+
       num_processors.times {|i| Thread.new do
           while client = @req_queue.deq
             process_client(client)
@@ -223,30 +320,35 @@ module Mongrel
 
     # Does the majority of the IO processing.  It has been written in Ruby using
     # about 7 different IO processing strategies and no matter how it's done
-    # the performance just does not improve.  Ruby's use of select to implement
-    # threads means that it will most likely never improve, so the only remaining
-    # approach is to write all or some of this function in C.  That will be the
-    # focus of future releases.
+    # the performance just does not improve.  It is currently carefully constructed
+    # to make sure that it gets the best possible performance, but anyone who
+    # thinks they can make it faster is more than welcome to take a crack at it.
     def process_client(client)
       begin
         parser = HttpParser.new
         params = {}
-        data = client.readpartial(CHUNK_SIZE)
+        data = client.readpartial(Const::CHUNK_SIZE)
 
         while true
           nread = parser.execute(params, data)
           if parser.finished?
-            script_name, path_info, handler = @classifier.resolve(params[PATH_INFO])
+            script_name, path_info, handler = @classifier.resolve(params[Const::REQUEST_URI])
 
             if handler
-              params[PATH_INFO] = path_info
-              params[SCRIPT_NAME] = script_name
+              params[Const::PATH_INFO] = path_info
+              params[Const::SCRIPT_NAME] = script_name
+              params[Const::GATEWAY_INTERFACE]=Const::GATEWAY_INTERFACE_VALUE
+              params[Const::REMOTE_ADDR]=client.peeraddr
+              params[Const::SERVER_NAME]=@host
+              params[Const::SERVER_PORT]=@port
+              params[Const::SERVER_PROTOCOL]=Const::SERVER_PROTOCOL_VALUE
+              params[Const::SERVER_SOFTWARE]=Const::MONGREL_VERSION
 
               request = HttpRequest.new(params, data[nread ... data.length], client)
               response = HttpResponse.new(client)
               handler.process(request, response)
             else
-              client.write(ERROR_404_RESPONSE)
+              client.write(Const::ERROR_404_RESPONSE)
             end
 
             break
@@ -254,7 +356,7 @@ module Mongrel
             # gotta stream and read again until we can get the parser to be character safe
             # TODO: make this more efficient since this means we're parsing a lot repeatedly
             parser.reset
-            data << client.readpartial(CHUNK_SIZE)
+            data << client.readpartial(Const::CHUNK_SIZE)
           end
         end
       rescue EOFError
@@ -274,6 +376,7 @@ module Mongrel
     # Runs the thing.  It returns the thread used so you can "join" it.  You can also
     # access the HttpServer::acceptor attribute to get the thread later.
     def run
+      BasicSocket.do_not_reverse_lookup=true
       @acceptor = Thread.new do
         while true
           @req_queue << @socket.accept
@@ -295,4 +398,95 @@ module Mongrel
       @classifier.unregister(uri)
     end
   end
+
+
+  # The server normally returns a 404 response if a URI is requested, but it
+  # also returns a lame empty message.  This lets you do a 404 response
+  # with a custom message for special URIs.
+  class Error404Handler < HttpHandler
+
+    # Sets the message to return.  This is constructed once for the handler
+    # so it's pretty efficient.
+    def initialize(msg)
+      @response = HttpServer::ERROR_404_RESPONSE + msg
+    end
+    
+    # Just kicks back the standard 404 response with your special message.
+    def process(request, response)
+      response.socket.write(@response)
+    end
+
+  end
+
+
+  # Serves the contents of a directory.  You give it the path to the root
+  # where the files are located, and it tries to find the files based on
+  # the PATH_INFO inside the directory.  If the requested path is a
+  # directory then it returns a simple directory listing.
+  #
+  # It does a simple protection against going outside it's root path by
+  # converting all paths to an absolute expanded path, and then making sure
+  # that the final expanded path includes the root path.  If it doesn't
+  # than it simply gives a 404.
+  class DirHandler < HttpHandler
+
+    def initialize(path, listing_allowed=true)
+      @path = File.expand_path(path)
+      @listing_allowed=listing_allowed
+      puts "DIR: #@path"
+    end
+
+    def send_dir_listing(base, dir, response)
+      if @listing_allowed
+        response.start(200) do |head,out|
+          head['Content-Type'] = "text/html"
+          out << "<html><head><title>Directory Listing</title></head><body>"
+          Dir.entries(dir).each do |child|
+            out << "<a href=\"#{base}/#{child}\">#{child}</a><br/>"
+          end
+          out << "</body></html>"
+        end
+      else
+        response.start(403) do |head,out|
+          out.write("Directory listings not allowed")
+        end
+      end
+    end
+
+
+    def send_file(req, response)
+      response.start(200) do |head,out|
+        open(req, "r") do |f|
+          out.write(f.read)
+        end
+      end
+    end
+
+
+    def process(request, response)
+      req = File.expand_path("." + request.params['PATH_INFO'], @path)
+      puts "FIND: #{req}"
+      if req.index(@path) != 0 or !File.exist? req
+        # not found, return a 404
+        response.start(404) do |head,out|
+          out << "File not found"
+        end
+      else
+        begin
+          if File.directory? req
+            send_dir_listing(request.params["REQUEST_URI"],req, response)
+          else
+            send_file(req, response)
+          end
+        rescue => details
+          response.reset
+          response.start(403) do |head,out|
+            out << "Error accessing file"
+          end
+          STDERR.puts "ERROR: #{details}"
+        end
+      end
+    end
+  end
+
 end