about summary refs log tree commit homepage
diff options
context:
space:
mode:
-rw-r--r--lib/unicorn.rb1
-rw-r--r--lib/unicorn/stream_input.rb152
-rw-r--r--lib/unicorn/tee_input.rb147
-rw-r--r--test/unit/test_stream_input.rb143
-rw-r--r--test/unit/test_tee_input.rb30
5 files changed, 333 insertions, 140 deletions
diff --git a/lib/unicorn.rb b/lib/unicorn.rb
index 622dc6c..7891d67 100644
--- a/lib/unicorn.rb
+++ b/lib/unicorn.rb
@@ -73,6 +73,7 @@ class Unicorn::ClientShutdown < EOFError; end
 
 require 'unicorn/const'
 require 'unicorn/socket_helper'
+require 'unicorn/stream_input'
 require 'unicorn/tee_input'
 require 'unicorn/http_request'
 require 'unicorn/configurator'
diff --git a/lib/unicorn/stream_input.rb b/lib/unicorn/stream_input.rb
new file mode 100644
index 0000000..2a05337
--- /dev/null
+++ b/lib/unicorn/stream_input.rb
@@ -0,0 +1,152 @@
+# -*- encoding: binary -*-
+
+# When processing uploads, Unicorn may expose a StreamInput object under
+# "rack.input" of the Rack (2.x) environment.
+class Unicorn::StreamInput
+  # The I/O chunk size (in +bytes+) for I/O operations where
+  # the size cannot be user-specified when a method is called.
+  # The default is 16 kilobytes.
+  @@io_chunk_size = Unicorn::Const::CHUNK_SIZE
+
+  # Initializes a new StreamInput object.  You normally do not have to call
+  # this unless you are writing an HTTP server.
+  def initialize(socket, request)
+    @chunked = request.content_length.nil?
+    @socket = socket
+    @parser = request
+    @buf = request.buf
+    @rbuf = ''
+    @bytes_read = 0
+    filter_body(@rbuf, @buf) unless @buf.empty?
+  end
+
+  # :call-seq:
+  #   ios.read([length [, buffer ]]) => string, buffer, or nil
+  #
+  # Reads at most length bytes from the I/O stream, or to the end of
+  # file if length is omitted or is nil. length must be a non-negative
+  # integer or nil. If the optional buffer argument is present, it
+  # must reference a String, which will receive the data.
+  #
+  # At end of file, it returns nil or '' depend on length.
+  # ios.read() and ios.read(nil) returns ''.
+  # ios.read(length [, buffer]) returns nil.
+  #
+  # If the Content-Length of the HTTP request is known (as is the common
+  # case for POST requests), then ios.read(length [, buffer]) will block
+  # until the specified length is read (or it is the last chunk).
+  # Otherwise, for uncommon "Transfer-Encoding: chunked" requests,
+  # ios.read(length [, buffer]) will return immediately if there is
+  # any data and only block when nothing is available (providing
+  # IO#readpartial semantics).
+  def read(*args)
+    length = args.shift
+    rv = args.shift || ''
+    if length.nil?
+      rv.replace(@rbuf)
+      @rbuf.replace('')
+      @socket or return rv
+      until eof?
+        @socket.kgio_read(@@io_chunk_size, @buf) or eof!
+        filter_body(@rbuf, @buf)
+        rv << @rbuf
+      end
+      @rbuf.replace('')
+    else
+      if length <= @rbuf.size
+        rv.replace(@rbuf.slice(0, length))
+        @rbuf.replace(@rbuf.slice(length, @rbuf.size) || '')
+      else
+        rv.replace(@rbuf)
+        length -= @rbuf.size
+        @rbuf.replace('')
+        until length == 0 || eof? || (rv.size > 0 && @chunked)
+          @socket.kgio_read(length, @buf) or eof!
+          filter_body(@rbuf, @buf)
+          rv << @rbuf
+          length -= @rbuf.size
+          @rbuf.replace('')
+        end
+      end
+      rv = nil if rv.empty?
+    end
+    rv
+  end
+
+  # :call-seq:
+  #   ios.gets   => string or nil
+  #
+  # Reads the next ``line'' from the I/O stream; lines are separated
+  # by the global record separator ($/, typically "\n"). A global
+  # record separator of nil reads the entire unread contents of ios.
+  # Returns nil if called at the end of file.
+  # This takes zero arguments for strict Rack::Lint compatibility,
+  # unlike IO#gets.
+  def gets
+    sep = $/
+    if sep.nil?
+      rv = read
+      return rv.empty? ? nil : rv
+    end
+    re = /\A(.*?#{Regexp.escape(sep)})/
+
+    begin
+      @rbuf.gsub!(re, '') and return $1
+      if eof?
+        if @rbuf.empty?
+          return nil
+        else
+          rv = @rbuf.dup
+          @rbuf.replace('')
+          return rv
+        end
+      end
+      @socket.kgio_read(@@io_chunk_size, @buf) or eof!
+      filter_body(once = '', @buf)
+      @rbuf << once
+    end while true
+  end
+
+  # :call-seq:
+  #   ios.each { |line| block }  => ios
+  #
+  # Executes the block for every ``line'' in *ios*, where lines are
+  # separated by the global record separator ($/, typically "\n").
+  def each(&block)
+    while line = gets
+      yield line
+    end
+
+    self # Rack does not specify what the return value is here
+  end
+
+private
+
+  def eof?
+    if @parser.body_eof?
+      until @parser.parse
+        once = @socket.kgio_read(@@io_chunk_size) or eof!
+        @buf << once
+      end
+      @socket = nil
+      true
+    else
+      false
+    end
+  end
+
+  def filter_body(dst, src)
+    rv = @parser.filter_body(dst, src)
+    @bytes_read += dst.size
+    rv
+  end
+
+  def eof!
+    # in case client only did a premature shutdown(SHUT_WR)
+    # we do support clients that shutdown(SHUT_WR) after the
+    # _entire_ request has been sent, and those will not have
+    # raised EOFError on us.
+    @socket.close if @socket
+    raise Unicorn::ClientShutdown, "bytes_read=#{@bytes_read}", []
+  end
+end
diff --git a/lib/unicorn/tee_input.rb b/lib/unicorn/tee_input.rb
index 351d79f..74d9df6 100644
--- a/lib/unicorn/tee_input.rb
+++ b/lib/unicorn/tee_input.rb
@@ -11,34 +11,18 @@
 #
 # When processing uploads, Unicorn exposes a TeeInput object under
 # "rack.input" of the Rack environment.
-class Unicorn::TeeInput
-  attr_accessor :tmp, :socket, :parser, :env, :buf, :len, :buf2
-
+class Unicorn::TeeInput < Unicorn::StreamInput
   # The maximum size (in +bytes+) to buffer in memory before
   # resorting to a temporary file.  Default is 112 kilobytes.
   @@client_body_buffer_size = Unicorn::Const::MAX_BODY
 
-  # The I/O chunk size (in +bytes+) for I/O operations where
-  # the size cannot be user-specified when a method is called.
-  # The default is 16 kilobytes.
-  @@io_chunk_size = Unicorn::Const::CHUNK_SIZE
-
   # Initializes a new TeeInput object.  You normally do not have to call
   # this unless you are writing an HTTP server.
   def initialize(socket, request)
-    @socket = socket
-    @parser = request
-    @buf = request.buf
-    @env = request.env
     @len = request.content_length
+    super
     @tmp = @len && @len < @@client_body_buffer_size ?
            StringIO.new("") : Unicorn::TmpIO.new
-    @buf2 = ""
-    if @buf.size > 0
-      @parser.filter_body(@buf2, @buf) and finalize_input
-      @tmp.write(@buf2)
-      @tmp.rewind
-    end
   end
 
   # :call-seq:
@@ -59,15 +43,9 @@ class Unicorn::TeeInput
   # specified +length+ in a loop until it returns +nil+.
   def size
     @len and return @len
-
-    if socket
-      pos = @tmp.pos
-      while tee(@@io_chunk_size, @buf2)
-      end
-      @tmp.seek(pos)
-    end
-
-    @len = @tmp.size
+    consume!
+    @tmp.rewind
+    @len = @bytes_read
   end
 
   # :call-seq:
@@ -90,24 +68,7 @@ class Unicorn::TeeInput
   # any data and only block when nothing is available (providing
   # IO#readpartial semantics).
   def read(*args)
-    @socket or return @tmp.read(*args)
-
-    length = args.shift
-    if nil == length
-      rv = @tmp.read || ""
-      while tee(@@io_chunk_size, @buf2)
-        rv << @buf2
-      end
-      rv
-    else
-      rv = args.shift || ""
-      diff = @tmp.size - @tmp.pos
-      if 0 == diff
-        ensure_length(tee(length, rv), length)
-      else
-        ensure_length(@tmp.read(diff > length ? length : diff, rv), length)
-      end
-    end
+    @socket ? tee(super) : @tmp.read(*args)
   end
 
   # :call-seq:
@@ -121,42 +82,9 @@ class Unicorn::TeeInput
   # unlike IO#gets.
   def gets
     @socket or return @tmp.gets
-    sep = $/ or return read
-
-    orig_size = @tmp.size
-    if @tmp.pos == orig_size
-      tee(@@io_chunk_size, @buf2) or return nil
-      @tmp.seek(orig_size)
-    end
-
-    sep_size = Rack::Utils.bytesize(sep)
-    line = @tmp.gets # cannot be nil here since size > pos
-    sep == line[-sep_size, sep_size] and return line
-
-    # unlikely, if we got here, then @tmp is at EOF
-    begin
-      orig_size = @tmp.pos
-      tee(@@io_chunk_size, @buf2) or break
-      @tmp.seek(orig_size)
-      line << @tmp.gets
-      sep == line[-sep_size, sep_size] and return line
-      # @tmp is at EOF again here, retry the loop
-    end while true
-
-    line
-  end
-
-  # :call-seq:
-  #   ios.each { |line| block }  => ios
-  #
-  # Executes the block for every ``line'' in *ios*, where lines are
-  # separated by the global record separator ($/, typically "\n").
-  def each(&block)
-    while line = gets
-      yield line
-    end
-
-    self # Rack does not specify what the return value is here
+    rv = super
+    # the $/.nil? case is implemented using read, so don't tee() again
+    $/.nil? ? rv : tee(rv)
   end
 
   # :call-seq:
@@ -166,59 +94,24 @@ class Unicorn::TeeInput
   # the offset (zero) of the +ios+ pointer.  Subsequent reads will
   # start from the beginning of the previously-buffered input.
   def rewind
+    return 0 if @bytes_read == 0
+    consume! if @socket
     @tmp.rewind # Rack does not specify what the return value is here
   end
 
 private
 
-  # tees off a +length+ chunk of data from the input into the IO
-  # backing store as well as returning it.  +dst+ must be specified.
-  # returns nil if reading from the input returns nil
-  def tee(length, dst)
-    unless @parser.body_eof?
-      r = @socket.kgio_read(length, @buf) or eof!
-      unless @parser.filter_body(dst, @buf)
-        @tmp.write(dst)
-        @tmp.seek(0, IO::SEEK_END) # workaround FreeBSD/OSX + MRI 1.8.x bug
-        return dst
-      end
-    end
-    finalize_input
+  # consumes the stream of the socket
+  def consume!
+    junk = ""
+    nil while read(@@io_chunk_size, junk)
   end
 
-  def finalize_input
-    until @parser.parse
-      r = @socket.kgio_read(@@io_chunk_size) or eof!
-      @buf << r
+  def tee(buffer)
+    if buffer && (n = buffer.size) > 0
+      @tmp.write(buffer)
+      @tmp.seek(0, IO::SEEK_END) # workaround FreeBSD/OSX + MRI 1.8.x bug
     end
-    @socket = nil
-  end
-
-  # tee()s into +dst+ until it is of +length+ bytes (or until
-  # we've reached the Content-Length of the request body).
-  # Returns +dst+ (the exact object, not a duplicate)
-  # To continue supporting applications that need near-real-time
-  # streaming input bodies, this is a no-op for
-  # "Transfer-Encoding: chunked" requests.
-  def ensure_length(dst, length)
-    # len is nil for chunked bodies, so we can't ensure length for those
-    # since they could be streaming bidirectionally and we don't want to
-    # block the caller in that case.
-    return dst if dst.nil? || @len.nil?
-
-    while dst.size < length && tee(length - dst.size, @buf2)
-      dst << @buf2
-    end
-
-    dst
-  end
-
-  def eof!
-    # in case client only did a premature shutdown(SHUT_WR)
-    # we do support clients that shutdown(SHUT_WR) after the
-    # _entire_ request has been sent, and those will not have
-    # raised EOFError on us.
-    @socket.close if @socket
-    raise Unicorn::ClientShutdown, "bytes_read=#{@tmp.size}", []
+    buffer
   end
 end
diff --git a/test/unit/test_stream_input.rb b/test/unit/test_stream_input.rb
new file mode 100644
index 0000000..20e048d
--- /dev/null
+++ b/test/unit/test_stream_input.rb
@@ -0,0 +1,143 @@
+# -*- encoding: binary -*-
+
+require 'test/unit'
+require 'digest/sha1'
+require 'unicorn'
+
+class TestStreamInput < Test::Unit::TestCase
+  def setup
+    @rs = $/
+    @env = {}
+    @rd, @wr = Kgio::UNIXSocket.pair
+    @rd.sync = @wr.sync = true
+    @start_pid = $$
+  end
+
+  def teardown
+    return if $$ != @start_pid
+    $/ = @rs
+    @rd.close rescue nil
+    @wr.close rescue nil
+    Process.waitall
+  end
+
+  def test_read_small
+    r = init_request('hello')
+    si = Unicorn::StreamInput.new(@rd, r)
+    assert_equal 'hello', si.read
+    assert_equal '', si.read
+    assert_nil si.read(5)
+    assert_nil si.gets
+  end
+
+  def test_gets_oneliner
+    r = init_request('hello')
+    si = Unicorn::StreamInput.new(@rd, r)
+    assert_equal 'hello', si.gets
+    assert_nil si.gets
+  end
+
+  def test_gets_multiline
+    r = init_request("a\nb\n\n")
+    si = Unicorn::StreamInput.new(@rd, r)
+    assert_equal "a\n", si.gets
+    assert_equal "b\n", si.gets
+    assert_equal "\n", si.gets
+    assert_nil si.gets
+  end
+
+  def test_gets_empty_rs
+    $/ = nil
+    r = init_request("a\nb\n\n")
+    si = Unicorn::StreamInput.new(@rd, r)
+    assert_equal "a\nb\n\n", si.gets
+    assert_nil si.gets
+  end
+
+  def test_read_with_equal_len
+    r = init_request("abcde")
+    si = Unicorn::StreamInput.new(@rd, r)
+    assert_equal "abcde", si.read(5)
+    assert_nil si.read(5)
+  end
+
+  def test_big_body_multi
+    r = init_request('.', Unicorn::Const::MAX_BODY + 1)
+    si = Unicorn::StreamInput.new(@rd, r)
+    assert_equal Unicorn::Const::MAX_BODY, @parser.content_length
+    assert ! @parser.body_eof?
+    nr = Unicorn::Const::MAX_BODY / 4
+    pid = fork {
+      @rd.close
+      nr.times { @wr.write('....') }
+      @wr.close
+    }
+    @wr.close
+    assert_equal '.', si.read(1)
+    nr.times { |x|
+      assert_equal '....', si.read(4), "nr=#{x}"
+    }
+    assert_nil si.read(1)
+    status = nil
+    assert_nothing_raised { pid, status = Process.waitpid2(pid) }
+    assert status.success?
+  end
+
+  def test_gets_long
+    r = init_request("hello", 5 + (4096 * 4 * 3) + "#$/foo#$/".size)
+    si = Unicorn::StreamInput.new(@rd, r)
+    status = line = nil
+    pid = fork {
+      @rd.close
+      3.times { @wr.write("ffff" * 4096) }
+      @wr.write "#$/foo#$/"
+      @wr.close
+    }
+    @wr.close
+    assert_nothing_raised { line = si.gets }
+    assert_equal(4096 * 4 * 3 + 5 + $/.size, line.size)
+    assert_equal("hello" << ("ffff" * 4096 * 3) << "#$/", line)
+    assert_nothing_raised { line = si.gets }
+    assert_equal "foo#$/", line
+    assert_nil si.gets
+    assert_nothing_raised { pid, status = Process.waitpid2(pid) }
+    assert status.success?
+  end
+
+  def test_read_with_buffer
+    r = init_request('hello')
+    si = Unicorn::StreamInput.new(@rd, r)
+    buf = ''
+    rv = si.read(4, buf)
+    assert_equal 'hell', rv
+    assert_equal 'hell', buf
+    assert_equal rv.object_id, buf.object_id
+    assert_equal 'o', si.read
+    assert_equal nil, si.read(5, buf)
+  end
+
+  def test_read_with_buffer_clobbers
+    r = init_request('hello')
+    si = Unicorn::StreamInput.new(@rd, r)
+    buf = 'foo'
+    assert_equal 'hello', si.read(nil, buf)
+    assert_equal 'hello', buf
+    assert_equal '', si.read(nil, buf)
+    assert_equal '', buf
+    buf = 'asdf'
+    assert_nil si.read(5, buf)
+    assert_equal '', buf
+  end
+
+  def init_request(body, size = nil)
+    @parser = Unicorn::HttpParser.new
+    body = body.to_s.freeze
+    @buf = "POST / HTTP/1.1\r\n" \
+           "Host: localhost\r\n" \
+           "Content-Length: #{size || body.size}\r\n" \
+           "\r\n#{body}"
+    assert_equal @env, @parser.headers(@env, @buf)
+    assert_equal body, @buf
+    @parser
+  end
+end
diff --git a/test/unit/test_tee_input.rb b/test/unit/test_tee_input.rb
index a10ca34..b44f609 100644
--- a/test/unit/test_tee_input.rb
+++ b/test/unit/test_tee_input.rb
@@ -4,6 +4,10 @@ require 'test/unit'
 require 'digest/sha1'
 require 'unicorn'
 
+class TeeInput < Unicorn::TeeInput
+  attr_accessor :tmp, :len
+end
+
 class TestTeeInput < Test::Unit::TestCase
 
   def setup
@@ -28,7 +32,7 @@ class TestTeeInput < Test::Unit::TestCase
 
   def test_gets_long
     r = init_request("hello", 5 + (4096 * 4 * 3) + "#$/foo#$/".size)
-    ti = Unicorn::TeeInput.new(@rd, r)
+    ti = TeeInput.new(@rd, r)
     status = line = nil
     pid = fork {
       @rd.close
@@ -49,7 +53,7 @@ class TestTeeInput < Test::Unit::TestCase
 
   def test_gets_short
     r = init_request("hello", 5 + "#$/foo".size)
-    ti = Unicorn::TeeInput.new(@rd, r)
+    ti = TeeInput.new(@rd, r)
     status = line = nil
     pid = fork {
       @rd.close
@@ -68,7 +72,7 @@ class TestTeeInput < Test::Unit::TestCase
 
   def test_small_body
     r = init_request('hello')
-    ti = Unicorn::TeeInput.new(@rd, r)
+    ti = TeeInput.new(@rd, r)
     assert_equal 0, @parser.content_length
     assert @parser.body_eof?
     assert_equal StringIO, ti.tmp.class
@@ -77,11 +81,12 @@ class TestTeeInput < Test::Unit::TestCase
     assert_equal 'hello', ti.read
     assert_equal '', ti.read
     assert_nil ti.read(4096)
+    assert_equal 5, ti.size
   end
 
   def test_read_with_buffer
     r = init_request('hello')
-    ti = Unicorn::TeeInput.new(@rd, r)
+    ti = TeeInput.new(@rd, r)
     buf = ''
     rv = ti.read(4, buf)
     assert_equal 'hell', rv
@@ -96,7 +101,7 @@ class TestTeeInput < Test::Unit::TestCase
 
   def test_big_body
     r = init_request('.' * Unicorn::Const::MAX_BODY << 'a')
-    ti = Unicorn::TeeInput.new(@rd, r)
+    ti = TeeInput.new(@rd, r)
     assert_equal 0, @parser.content_length
     assert @parser.body_eof?
     assert_kind_of File, ti.tmp
@@ -108,7 +113,7 @@ class TestTeeInput < Test::Unit::TestCase
     a, b = 300, 3
     r = init_request('.' * b, 300)
     assert_equal 300, @parser.content_length
-    ti = Unicorn::TeeInput.new(@rd, r)
+    ti = TeeInput.new(@rd, r)
     pid = fork {
       @wr.write('.' * 197)
       sleep 1 # still a *potential* race here that would make the test moot...
@@ -122,12 +127,11 @@ class TestTeeInput < Test::Unit::TestCase
 
   def test_big_body_multi
     r = init_request('.', Unicorn::Const::MAX_BODY + 1)
-    ti = Unicorn::TeeInput.new(@rd, r)
+    ti = TeeInput.new(@rd, r)
     assert_equal Unicorn::Const::MAX_BODY, @parser.content_length
     assert ! @parser.body_eof?
     assert_kind_of File, ti.tmp
     assert_equal 0, ti.tmp.pos
-    assert_equal 1, ti.tmp.size
     assert_equal Unicorn::Const::MAX_BODY + 1, ti.size
     nr = Unicorn::Const::MAX_BODY / 4
     pid = fork {
@@ -138,8 +142,8 @@ class TestTeeInput < Test::Unit::TestCase
     @wr.close
     assert_equal '.', ti.read(1)
     assert_equal Unicorn::Const::MAX_BODY + 1, ti.size
-    nr.times {
-      assert_equal '....', ti.read(4)
+    nr.times { |x|
+      assert_equal '....', ti.read(4), "nr=#{x}"
       assert_equal Unicorn::Const::MAX_BODY + 1, ti.size
     }
     assert_nil ti.read(1)
@@ -163,7 +167,7 @@ class TestTeeInput < Test::Unit::TestCase
       @wr.write("0\r\n\r\n")
     }
     @wr.close
-    ti = Unicorn::TeeInput.new(@rd, @parser)
+    ti = TeeInput.new(@rd, @parser)
     assert_nil @parser.content_length
     assert_nil ti.len
     assert ! @parser.body_eof?
@@ -201,7 +205,7 @@ class TestTeeInput < Test::Unit::TestCase
       end
       @wr.write("0\r\n\r\n")
     }
-    ti = Unicorn::TeeInput.new(@rd, @parser)
+    ti = TeeInput.new(@rd, @parser)
     assert_nil @parser.content_length
     assert_nil ti.len
     assert ! @parser.body_eof?
@@ -230,7 +234,7 @@ class TestTeeInput < Test::Unit::TestCase
       @wr.write("Hello: World\r\n\r\n")
     }
     @wr.close
-    ti = Unicorn::TeeInput.new(@rd, @parser)
+    ti = TeeInput.new(@rd, @parser)
     assert_nil @parser.content_length
     assert_nil ti.len
     assert ! @parser.body_eof?