about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/unicorn.rb1
-rw-r--r--lib/unicorn/configurator.rb32
-rw-r--r--lib/unicorn/const.rb4
-rw-r--r--lib/unicorn/http_request.rb13
-rw-r--r--lib/unicorn/http_server.rb9
-rw-r--r--lib/unicorn/preread_input.rb2
-rw-r--r--lib/unicorn/stream_input.rb156
-rw-r--r--lib/unicorn/tee_input.rb147
8 files changed, 225 insertions, 139 deletions
diff --git a/lib/unicorn.rb b/lib/unicorn.rb
index 622dc6c..7891d67 100644
--- a/lib/unicorn.rb
+++ b/lib/unicorn.rb
@@ -73,6 +73,7 @@ class Unicorn::ClientShutdown < EOFError; end
 
 require 'unicorn/const'
 require 'unicorn/socket_helper'
+require 'unicorn/stream_input'
 require 'unicorn/tee_input'
 require 'unicorn/http_request'
 require 'unicorn/configurator'
diff --git a/lib/unicorn/configurator.rb b/lib/unicorn/configurator.rb
index dd515a7..2a83dea 100644
--- a/lib/unicorn/configurator.rb
+++ b/lib/unicorn/configurator.rb
@@ -39,6 +39,7 @@ class Unicorn::Configurator
       },
     :pid => nil,
     :preload_app => false,
+    :rewindable_input => true, # for Rack 2.x: (Rack::VERSION[0] <= 1),
   }
   #:startdoc:
 
@@ -373,12 +374,22 @@ class Unicorn::Configurator
   # cause the master process to exit with an error.
 
   def preload_app(bool)
-    case bool
-    when TrueClass, FalseClass
-      set[:preload_app] = bool
-    else
-      raise ArgumentError, "preload_app=#{bool.inspect} not a boolean"
-    end
+    set_bool(:preload_app, bool)
+  end
+
+  # Toggles making <code>env["rack.input"]</code> rewindable.
+  # Disabling rewindability can improve performance by lowering
+  # I/O and memory usage for applications that accept uploads.
+  # Keep in mind that the Rack 1.x spec requires
+  # <code>env["rack.input"]</code> to be rewindable, so this allows
+  # intentionally violating the current Rack 1.x spec.
+  #
+  # +rewindable_input+ defaults to +true+ when used with Rack 1.x for
+  # Rack conformance.  When Rack 2.x is finalized, this will most
+  # likely default to +false+ while still conforming to the newer
+  # (less demanding) spec.
+  def rewindable_input(bool)
+    set_bool(:rewindable_input, bool)
   end
 
   # Allow redirecting $stderr to a given path.  Unlike doing this from
@@ -469,6 +480,15 @@ private
     end
   end
 
+  def set_bool(var, bool) #:nodoc:
+    case bool
+    when true, false
+      set[var] = bool
+    else
+      raise ArgumentError, "#{var}=#{bool.inspect} not a boolean"
+    end
+  end
+
   def set_hook(var, my_proc, req_arity = 2) #:nodoc:
     case my_proc
     when Proc
diff --git a/lib/unicorn/const.rb b/lib/unicorn/const.rb
index dc75914..375f72f 100644
--- a/lib/unicorn/const.rb
+++ b/lib/unicorn/const.rb
@@ -7,8 +7,8 @@
 # improve things much compared to constants.
 module Unicorn::Const
 
-  # The current version of Unicorn, currently 2.0.1
-  UNICORN_VERSION = "2.0.1"
+  # The current version of Unicorn, currently 3.0.0pre1
+  UNICORN_VERSION = "3.0.0pre1"
 
   # default TCP listen host address (0.0.0.0, all interfaces)
   DEFAULT_HOST = "0.0.0.0"
diff --git a/lib/unicorn/http_request.rb b/lib/unicorn/http_request.rb
index 2dcd839..1e3ac26 100644
--- a/lib/unicorn/http_request.rb
+++ b/lib/unicorn/http_request.rb
@@ -25,7 +25,15 @@ class Unicorn::HttpParser
   # A frozen format for this is about 15% faster
   REMOTE_ADDR = 'REMOTE_ADDR'.freeze
   RACK_INPUT = 'rack.input'.freeze
-  TeeInput = Unicorn::TeeInput
+  @@input_class = Unicorn::TeeInput
+
+  def self.input_class
+    @@input_class
+  end
+
+  def self.input_class=(klass)
+    @@input_class = klass
+  end
   # :startdoc:
 
   # Does the majority of the IO processing.  It has been written in
@@ -63,7 +71,8 @@ class Unicorn::HttpParser
         buf << socket.kgio_read!(16384)
       end while parse.nil?
     end
-    e[RACK_INPUT] = 0 == content_length ? NULL_IO : TeeInput.new(socket, self)
+    e[RACK_INPUT] = 0 == content_length ?
+                    NULL_IO : @@input_class.new(socket, self)
     e.merge!(DEFAULTS)
   end
 end
diff --git a/lib/unicorn/http_server.rb b/lib/unicorn/http_server.rb
index 74b2b24..0bb4359 100644
--- a/lib/unicorn/http_server.rb
+++ b/lib/unicorn/http_server.rb
@@ -355,6 +355,15 @@ class Unicorn::HttpServer
     kill_each_worker(:KILL)
   end
 
+  def rewindable_input
+    Unicorn::HttpRequest.input_class.method_defined?(:rewind)
+  end
+
+  def rewindable_input=(bool)
+    Unicorn::HttpRequest.input_class = bool ?
+                                Unicorn::TeeInput : Unicorn::StreamInput
+  end
+
   private
 
   # wait for a signal hander to wake us up and then consume the pipe
diff --git a/lib/unicorn/preread_input.rb b/lib/unicorn/preread_input.rb
index ec83cb2..7a315b7 100644
--- a/lib/unicorn/preread_input.rb
+++ b/lib/unicorn/preread_input.rb
@@ -20,7 +20,7 @@ class PrereadInput
   def call(env)
     buf = ""
     input = env["rack.input"]
-    if buf = input.read(16384)
+    if input.respond_to?(:rewind)
       true while input.read(16384, buf)
       input.rewind
     end
diff --git a/lib/unicorn/stream_input.rb b/lib/unicorn/stream_input.rb
new file mode 100644
index 0000000..ef8997e
--- /dev/null
+++ b/lib/unicorn/stream_input.rb
@@ -0,0 +1,156 @@
+# -*- encoding: binary -*-
+
+# When processing uploads, Unicorn may expose a StreamInput object under
+# "rack.input" of the Rack (2.x) environment.
+class Unicorn::StreamInput
+  # The I/O chunk size (in +bytes+) for I/O operations where
+  # the size cannot be user-specified when a method is called.
+  # The default is 16 kilobytes.
+  @@io_chunk_size = Unicorn::Const::CHUNK_SIZE
+
+  # Initializes a new StreamInput object.  You normally do not have to call
+  # this unless you are writing an HTTP server.
+  def initialize(socket, request)
+    @chunked = request.content_length.nil?
+    @socket = socket
+    @parser = request
+    @buf = request.buf
+    @rbuf = ''
+    @bytes_read = 0
+    filter_body(@rbuf, @buf) unless @buf.empty?
+  end
+
+  # :call-seq:
+  #   ios.read([length [, buffer ]]) => string, buffer, or nil
+  #
+  # Reads at most length bytes from the I/O stream, or to the end of
+  # file if length is omitted or is nil. length must be a non-negative
+  # integer or nil. If the optional buffer argument is present, it
+  # must reference a String, which will receive the data.
+  #
+  # At end of file, it returns nil or '' depend on length.
+  # ios.read() and ios.read(nil) returns ''.
+  # ios.read(length [, buffer]) returns nil.
+  #
+  # If the Content-Length of the HTTP request is known (as is the common
+  # case for POST requests), then ios.read(length [, buffer]) will block
+  # until the specified length is read (or it is the last chunk).
+  # Otherwise, for uncommon "Transfer-Encoding: chunked" requests,
+  # ios.read(length [, buffer]) will return immediately if there is
+  # any data and only block when nothing is available (providing
+  # IO#readpartial semantics).
+  def read(*args)
+    length = args.shift
+    rv = args.shift || ''
+    if length.nil?
+      read_all(rv)
+    else
+      if length <= @rbuf.size
+        rv.replace(@rbuf.slice(0, length))
+        @rbuf.replace(@rbuf.slice(length, @rbuf.size) || '')
+      else
+        rv.replace(@rbuf)
+        length -= @rbuf.size
+        @rbuf.replace('')
+        until length == 0 || eof? || (rv.size > 0 && @chunked)
+          @socket.kgio_read(length, @buf) or eof!
+          filter_body(@rbuf, @buf)
+          rv << @rbuf
+          length -= @rbuf.size
+          @rbuf.replace('')
+        end
+      end
+      rv = nil if rv.empty? && length != 0
+    end
+    rv
+  end
+
+  # :call-seq:
+  #   ios.gets   => string or nil
+  #
+  # Reads the next ``line'' from the I/O stream; lines are separated
+  # by the global record separator ($/, typically "\n"). A global
+  # record separator of nil reads the entire unread contents of ios.
+  # Returns nil if called at the end of file.
+  # This takes zero arguments for strict Rack::Lint compatibility,
+  # unlike IO#gets.
+  def gets
+    sep = $/
+    if sep.nil?
+      read_all(rv = '')
+      return rv.empty? ? nil : rv
+    end
+    re = /\A(.*?#{Regexp.escape(sep)})/
+
+    begin
+      @rbuf.gsub!(re, '') and return $1
+      if eof?
+        if @rbuf.empty?
+          return nil
+        else
+          rv = @rbuf.dup
+          @rbuf.replace('')
+          return rv
+        end
+      end
+      @socket.kgio_read(@@io_chunk_size, @buf) or eof!
+      filter_body(once = '', @buf)
+      @rbuf << once
+    end while true
+  end
+
+  # :call-seq:
+  #   ios.each { |line| block }  => ios
+  #
+  # Executes the block for every ``line'' in *ios*, where lines are
+  # separated by the global record separator ($/, typically "\n").
+  def each(&block)
+    while line = gets
+      yield line
+    end
+
+    self # Rack does not specify what the return value is here
+  end
+
+private
+
+  def eof?
+    if @parser.body_eof?
+      until @parser.parse
+        once = @socket.kgio_read(@@io_chunk_size) or eof!
+        @buf << once
+      end
+      @socket = nil
+      true
+    else
+      false
+    end
+  end
+
+  def filter_body(dst, src)
+    rv = @parser.filter_body(dst, src)
+    @bytes_read += dst.size
+    rv
+  end
+
+  def read_all(dst)
+    dst.replace(@rbuf)
+    @socket or return
+    until eof?
+      @socket.kgio_read(@@io_chunk_size, @buf) or eof!
+      filter_body(@rbuf, @buf)
+      dst << @rbuf
+    end
+    ensure
+      @rbuf.replace('')
+  end
+
+  def eof!
+    # in case client only did a premature shutdown(SHUT_WR)
+    # we do support clients that shutdown(SHUT_WR) after the
+    # _entire_ request has been sent, and those will not have
+    # raised EOFError on us.
+    @socket.close if @socket
+    raise Unicorn::ClientShutdown, "bytes_read=#{@bytes_read}", []
+  end
+end
diff --git a/lib/unicorn/tee_input.rb b/lib/unicorn/tee_input.rb
index a3e01d2..ee3effd 100644
--- a/lib/unicorn/tee_input.rb
+++ b/lib/unicorn/tee_input.rb
@@ -11,34 +11,18 @@
 #
 # When processing uploads, Unicorn exposes a TeeInput object under
 # "rack.input" of the Rack environment.
-class Unicorn::TeeInput
-  attr_accessor :tmp, :socket, :parser, :env, :buf, :len, :buf2
-
+class Unicorn::TeeInput < Unicorn::StreamInput
   # The maximum size (in +bytes+) to buffer in memory before
   # resorting to a temporary file.  Default is 112 kilobytes.
   @@client_body_buffer_size = Unicorn::Const::MAX_BODY
 
-  # The I/O chunk size (in +bytes+) for I/O operations where
-  # the size cannot be user-specified when a method is called.
-  # The default is 16 kilobytes.
-  @@io_chunk_size = Unicorn::Const::CHUNK_SIZE
-
   # Initializes a new TeeInput object.  You normally do not have to call
   # this unless you are writing an HTTP server.
   def initialize(socket, request)
-    @socket = socket
-    @parser = request
-    @buf = request.buf
-    @env = request.env
     @len = request.content_length
+    super
     @tmp = @len && @len < @@client_body_buffer_size ?
            StringIO.new("") : Unicorn::TmpIO.new
-    @buf2 = ""
-    if @buf.size > 0
-      @parser.filter_body(@buf2, @buf) and finalize_input
-      @tmp.write(@buf2)
-      @tmp.rewind
-    end
   end
 
   # :call-seq:
@@ -59,15 +43,10 @@ class Unicorn::TeeInput
   # specified +length+ in a loop until it returns +nil+.
   def size
     @len and return @len
-
-    if socket
-      pos = @tmp.pos
-      while tee(@@io_chunk_size, @buf2)
-      end
-      @tmp.seek(pos)
-    end
-
-    @len = @tmp.size
+    pos = @bytes_read
+    consume!
+    @tmp.pos = pos
+    @len = @bytes_read
   end
 
   # :call-seq:
@@ -90,24 +69,7 @@ class Unicorn::TeeInput
   # any data and only block when nothing is available (providing
   # IO#readpartial semantics).
   def read(*args)
-    @socket or return @tmp.read(*args)
-
-    length = args.shift
-    if nil == length
-      rv = @tmp.read || ""
-      while tee(@@io_chunk_size, @buf2)
-        rv << @buf2
-      end
-      rv
-    else
-      rv = args.shift || ""
-      diff = @tmp.size - @tmp.pos
-      if 0 == diff
-        ensure_length(tee(length, rv), length)
-      else
-        ensure_length(@tmp.read(diff > length ? length : diff, rv), length)
-      end
-    end
+    @socket ? tee(super) : @tmp.read(*args)
   end
 
   # :call-seq:
@@ -120,43 +82,7 @@ class Unicorn::TeeInput
   # This takes zero arguments for strict Rack::Lint compatibility,
   # unlike IO#gets.
   def gets
-    @socket or return @tmp.gets
-    sep = $/ or return read
-
-    orig_size = @tmp.size
-    if @tmp.pos == orig_size
-      tee(@@io_chunk_size, @buf2) or return nil
-      @tmp.seek(orig_size)
-    end
-
-    sep_size = Rack::Utils.bytesize(sep)
-    line = @tmp.gets # cannot be nil here since size > pos
-    sep == line[-sep_size, sep_size] and return line
-
-    # unlikely, if we got here, then @tmp is at EOF
-    begin
-      orig_size = @tmp.pos
-      tee(@@io_chunk_size, @buf2) or break
-      @tmp.seek(orig_size)
-      line << @tmp.gets
-      sep == line[-sep_size, sep_size] and return line
-      # @tmp is at EOF again here, retry the loop
-    end while true
-
-    line
-  end
-
-  # :call-seq:
-  #   ios.each { |line| block }  => ios
-  #
-  # Executes the block for every ``line'' in *ios*, where lines are
-  # separated by the global record separator ($/, typically "\n").
-  def each(&block)
-    while line = gets
-      yield line
-    end
-
-    self # Rack does not specify what the return value is here
+    @socket ? tee(super) : @tmp.gets
   end
 
   # :call-seq:
@@ -166,59 +92,24 @@ class Unicorn::TeeInput
   # the offset (zero) of the +ios+ pointer.  Subsequent reads will
   # start from the beginning of the previously-buffered input.
   def rewind
+    return 0 if @bytes_read == 0
+    consume! if @socket
     @tmp.rewind # Rack does not specify what the return value is here
   end
 
 private
 
-  # tees off a +length+ chunk of data from the input into the IO
-  # backing store as well as returning it.  +dst+ must be specified.
-  # returns nil if reading from the input returns nil
-  def tee(length, dst)
-    unless @parser.body_eof?
-      r = @socket.kgio_read(length, @buf) or eof!
-      unless @parser.filter_body(dst, @buf)
-        @tmp.write(dst)
-        @tmp.seek(0, IO::SEEK_END) # workaround FreeBSD/OSX + MRI 1.8.x bug
-        return dst
-      end
-    end
-    finalize_input
+  # consumes the stream of the socket
+  def consume!
+    junk = ""
+    nil while read(@@io_chunk_size, junk)
   end
 
-  def finalize_input
-    while @parser.trailers(@env, @buf).nil?
-      r = @socket.kgio_read(@@io_chunk_size) or eof!
-      @buf << r
+  def tee(buffer)
+    if buffer && (n = buffer.size) > 0
+      @tmp.write(buffer)
+      @tmp.seek(0, IO::SEEK_END) # workaround FreeBSD/OSX + MRI 1.8.x bug
     end
-    @socket = nil
-  end
-
-  # tee()s into +dst+ until it is of +length+ bytes (or until
-  # we've reached the Content-Length of the request body).
-  # Returns +dst+ (the exact object, not a duplicate)
-  # To continue supporting applications that need near-real-time
-  # streaming input bodies, this is a no-op for
-  # "Transfer-Encoding: chunked" requests.
-  def ensure_length(dst, length)
-    # len is nil for chunked bodies, so we can't ensure length for those
-    # since they could be streaming bidirectionally and we don't want to
-    # block the caller in that case.
-    return dst if dst.nil? || @len.nil?
-
-    while dst.size < length && tee(length - dst.size, @buf2)
-      dst << @buf2
-    end
-
-    dst
-  end
-
-  def eof!
-    # in case client only did a premature shutdown(SHUT_WR)
-    # we do support clients that shutdown(SHUT_WR) after the
-    # _entire_ request has been sent, and those will not have
-    # raised EOFError on us.
-    @socket.close if @socket
-    raise Unicorn::ClientShutdown, "bytes_read=#{@tmp.size}", []
+    buffer
   end
 end