about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/unicorn.rb3
-rw-r--r--lib/unicorn/chunked_reader.rb96
-rw-r--r--lib/unicorn/const.rb2
-rw-r--r--lib/unicorn/http_request.rb74
-rw-r--r--lib/unicorn/tee_input.rb145
5 files changed, 263 insertions, 57 deletions
diff --git a/lib/unicorn.rb b/lib/unicorn.rb
index f43bb0f..f45c613 100644
--- a/lib/unicorn.rb
+++ b/lib/unicorn.rb
@@ -10,6 +10,8 @@ module Unicorn
   autoload :HttpRequest, 'unicorn/http_request'
   autoload :HttpResponse, 'unicorn/http_response'
   autoload :Configurator, 'unicorn/configurator'
+  autoload :TeeInput, 'unicorn/tee_input'
+  autoload :ChunkedReader, 'unicorn/chunked_reader'
   autoload :Util, 'unicorn/util'
 
   class << self
@@ -465,6 +467,7 @@ module Unicorn
       worker.tempfile.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
       @after_fork.call(self, worker) # can drop perms
       @timeout /= 2.0 # halve it for select()
+      HttpRequest::TEE.setup
       build_app! unless @preload_app
     end
 
diff --git a/lib/unicorn/chunked_reader.rb b/lib/unicorn/chunked_reader.rb
new file mode 100644
index 0000000..b9178a9
--- /dev/null
+++ b/lib/unicorn/chunked_reader.rb
@@ -0,0 +1,96 @@
+module Unicorn; end
+
+module Unicorn
+  class ChunkedReader
+
+    Z = ''
+    Z.force_encoding(Encoding::BINARY) if Z.respond_to?(:force_encoding)
+
+    def initialize
+      @input = @buf = nil
+      @chunk_left = 0
+    end
+
+    def reopen(input, buf)
+      buf ||= Z.dup
+      buf.force_encoding(Encoding::BINARY) if buf.respond_to?(:force_encoding)
+      @input, @buf = input, buf
+      parse_chunk_header
+      self
+    end
+
+    def readpartial(max, buf = Z.dup)
+      buf.force_encoding(Encoding::BINARY) if buf.respond_to?(:force_encoding)
+
+      while @input && @chunk_left <= 0 && ! parse_chunk_header
+        @buf << @input.readpartial(Const::CHUNK_SIZE, buf)
+      end
+
+      if @input
+        begin
+          @buf << @input.read_nonblock(Const::CHUNK_SIZE, buf)
+        rescue Errno::EAGAIN, Errno::EINTR
+        end
+      end
+
+      max = @chunk_left if max > @chunk_left
+      buf.replace(last_block(max) || Z)
+      @chunk_left -= buf.size
+      (0 == buf.size && @input.nil?) and raise EOFError
+      buf
+    end
+
+    def gets
+      line = nil
+      begin
+        line = readpartial(Const::CHUNK_SIZE)
+        begin
+          if line.sub!(%r{\A(.*?#{$/})}, Z)
+            @chunk_left += line.size
+            @buf = @buf ? (line << @buf) : line
+            return $1.dup
+          end
+          line << readpartial(Const::CHUNK_SIZE)
+        end while true
+      rescue EOFError
+        return line
+      end
+    end
+
+  private
+
+    def last_block(max = nil)
+      rv = @buf
+      if max && rv && max < rv.size
+        @buf = rv[max - rv.size, rv.size - max]
+        return rv[0, max]
+      end
+      @buf = Z.dup
+      rv
+    end
+
+    def parse_chunk_header
+      buf = @buf
+      # ignoring chunk-extension info for now, I haven't seen any use for it
+      # (or any users, and TE:chunked sent by clients is rare already)
+      # if there was not enough data in buffer to parse length of the chunk
+      # then just return
+      if buf.sub!(/\A(?:\r\n)?([a-fA-F0-9]{1,8})[^\r]*?\r\n/, Z)
+        @chunk_left = $1.to_i(16)
+        if 0 == @chunk_left # EOF
+          buf.sub!(/\A\r\n(?:\r\n)?/, Z) # cleanup for future requests
+          @input = nil
+        end
+        return @chunk_left
+      end
+
+      buf.size > 256 and
+          raise HttpParserError,
+                "malformed chunk, chunk-length not found in buffer: " \
+                "#{buf.inspect}"
+      nil
+    end
+
+  end
+
+end
diff --git a/lib/unicorn/const.rb b/lib/unicorn/const.rb
index 250868d..b81dce0 100644
--- a/lib/unicorn/const.rb
+++ b/lib/unicorn/const.rb
@@ -26,10 +26,12 @@ module Unicorn
     ERROR_500_RESPONSE = "HTTP/1.1 500 Internal Server Error\r\n\r\n".freeze
 
     # A frozen format for this is about 15% faster
+    HTTP_TRANSFER_ENCODING = 'HTTP_TRANSFER_ENCODING'.freeze
     CONTENT_LENGTH="CONTENT_LENGTH".freeze
     REMOTE_ADDR="REMOTE_ADDR".freeze
     HTTP_X_FORWARDED_FOR="HTTP_X_FORWARDED_FOR".freeze
     RACK_INPUT="rack.input".freeze
+    STREAM_INPUT="unicorn.stream_input".freeze
   end
 
 end
diff --git a/lib/unicorn/http_request.rb b/lib/unicorn/http_request.rb
index 061a6ab..025b125 100644
--- a/lib/unicorn/http_request.rb
+++ b/lib/unicorn/http_request.rb
@@ -1,15 +1,9 @@
-require 'tempfile'
 require 'stringio'
 
 # compiled extension
 require 'unicorn/http11'
 
 module Unicorn
-  #
-  # The HttpRequest.initialize method will convert any request that is larger than
-  # Const::MAX_BODY into a Tempfile and use that as the body.  Otherwise it uses
-  # a StringIO object.  To be safe, you should assume it works like a file.
-  #
   class HttpRequest
 
     attr_accessor :logger
@@ -23,15 +17,20 @@ module Unicorn
       "rack.version" => [1, 0].freeze,
       "SCRIPT_NAME" => "".freeze,
 
+      # some applications (like Echo) may want to change this to true
+      # We disable streaming by default since some (arguably broken)
+      # applications may not ever read the entire body and be confused
+      # when it receives a response after nothing has been sent to it.
+      Const::STREAM_INPUT => false,
       # this is not in the Rack spec, but some apps may rely on it
       "SERVER_SOFTWARE" => "Unicorn #{Const::UNICORN_VERSION}".freeze
     }
 
-    # Optimize for the common case where there's no request body
-    # (GET/HEAD) requests.
     Z = ''
     Z.force_encoding(Encoding::BINARY) if Z.respond_to?(:force_encoding)
     NULL_IO = StringIO.new(Z)
+    TEE = TeeInput.new
+    DECHUNKER = ChunkedReader.new
     LOCALHOST = '127.0.0.1'.freeze
 
     # Being explicitly single-threaded, we have certain advantages in
@@ -58,11 +57,6 @@ module Unicorn
     # This does minimal exception trapping and it is up to the caller
     # to handle any socket errors (e.g. user aborted upload).
     def read(socket)
-      # reset the parser
-      unless NULL_IO == (input = PARAMS[Const::RACK_INPUT]) # unlikely
-        input.close rescue nil
-        input.close! rescue nil
-      end
       PARAMS.clear
       PARSER.reset
 
@@ -100,57 +94,23 @@ module Unicorn
     private
 
     # Handles dealing with the rest of the request
-    # returns a Rack environment if successful, raises an exception if not
+    # returns a Rack environment if successful
     def handle_body(socket)
       http_body = PARAMS.delete(:http_body)
-      content_length = PARAMS[Const::CONTENT_LENGTH].to_i
 
-      if content_length == 0 # short circuit the common case
-        PARAMS[Const::RACK_INPUT] =
-            NULL_IO.closed? ? NULL_IO.reopen(Z) : NULL_IO
-        return PARAMS.update(DEFAULTS)
+      length = PARAMS[Const::CONTENT_LENGTH].to_i
+      if te = PARAMS[Const::HTTP_TRANSFER_ENCODING]
+        if /chunked/i =~ te
+          socket = DECHUNKER.reopen(socket, http_body)
+          length = http_body = nil
+        end
       end
 
-      # must read more data to complete body
-      remain = content_length - http_body.length
-
-      body = PARAMS[Const::RACK_INPUT] = (remain < Const::MAX_BODY) ?
-          StringIO.new : Tempfile.new('unicorn')
-
-      body.binmode
-      body.write(http_body)
-
-      # Some clients (like FF1.0) report 0 for body and then send a body.
-      # This will probably truncate them but at least the request goes through
-      # usually.
-      read_body(socket, remain, body) if remain > 0
-      body.rewind
-
-      # in case read_body overread because the client tried to pipeline
-      # another request, we'll truncate it.  Again, we don't do pipelining
-      # or keepalive
-      body.truncate(content_length)
+      inp = TEE.reopen(socket, length, http_body)
+      PARAMS[Const::RACK_INPUT] =
+                          DEFAULTS[Const::STREAM_INPUT] ? inp : inp.consume
       PARAMS.update(DEFAULTS)
     end
 
-    # Does the heavy lifting of properly reading the larger body
-    # requests in small chunks.  It expects PARAMS['rack.input'] to be
-    # an IO object, socket to be valid, It also expects any initial part
-    # of the body that has been read to be in the PARAMS['rack.input']
-    # already.  It will return true if successful and false if not.
-    def read_body(socket, remain, body)
-      begin
-        # write always writes the requested amount on a POSIX filesystem
-        remain -= body.write(socket.readpartial(Const::CHUNK_SIZE, BUFFER))
-      end while remain > 0
-    rescue Object => e
-      @logger.error "Error reading HTTP body: #{e.inspect}"
-
-      # Any errors means we should delete the file, including if the file
-      # is dumped.  Truncate it ASAP to help avoid page flushes to disk.
-      body.truncate(0) rescue nil
-      raise e
-    end
-
   end
 end
diff --git a/lib/unicorn/tee_input.rb b/lib/unicorn/tee_input.rb
new file mode 100644
index 0000000..1e281b4
--- /dev/null
+++ b/lib/unicorn/tee_input.rb
@@ -0,0 +1,145 @@
+require 'tempfile'
+
+# acts like tee(1) on an input input to provide a input-like stream
+# while providing rewindable semantics through a Tempfile/StringIO
+# backing store.  On the first pass, the input is only read on demand
+# so your Rack application can use input notification (upload progress
+# and like).  This should fully conform to the Rack::InputWrapper
+# specification on the public API.  This class is intended to be a
+# strict interpretation of Rack::InputWrapper functionality and will
+# not support any deviations from it.
+
+module Unicorn
+  class TeeInput
+    Z = ''
+    Z.force_encoding(Encoding::BINARY) if Z.respond_to?(:force_encoding)
+
+    def initialize
+      @rd = @wr = @size = @input = nil
+      setup
+    end
+
+    def setup
+      @tmp = tmp = Tempfile.new(nil)
+      @rd.close if @rd
+      @rd = File.open(tmp.path, 'wb+')
+      @wr.close if @wr
+      @wr = File.open(tmp.path, 'wb')
+      @rd.sync = @wr.sync = true
+
+      def @rd.size
+        stat.size
+      end
+      tmp.close!
+    end
+
+    def reopen(input, size = nil, buffer = nil)
+      @rd.seek(0)
+      @wr.seek(0)
+      @rd.truncate(0) # truncate read to flush luserspace read buffers
+      @wr.write(buffer) if buffer
+      @input = input
+      @size = size # nil if chunked
+      self
+    end
+
+    def consume
+      @input or return
+      buf = Z.dup
+      while tee(Const::CHUNK_SIZE, buf)
+      end
+      @rd
+    end
+
+    # returns the size of the input.  This is what the Content-Length
+    # header value should be, and how large our input is expected to be.
+    # For TE:chunked, this requires consuming all of the input stream
+    # before returning since there's no other way
+    def size
+      @size and return @size
+      @input and consume
+      @size = @wr.stat.size
+    end
+
+    def read(*args)
+      @input or return @rd.read(*args)
+
+      length = args.shift
+      if nil == length
+        rv = @rd.read || Z.dup
+        tmp = Z.dup
+        while tee(Const::CHUNK_SIZE, tmp)
+          rv << tmp
+        end
+        rv
+      else
+        buf = args.shift || Z.dup
+        @rd.read(length, buf) || tee(length, buf)
+      end
+    end
+
+    # takes zero arguments for strict Rack::Lint compatibility, unlike IO#gets
+    def gets
+      @input or return @rd.gets
+      nil == $/ and return read
+
+      line = nil
+      if @rd.pos < @wr.stat.size
+        line = @rd.gets # cannot be nil here
+        $/ == line[-$/.size, $/.size] and return line
+
+        # half the line was already read, and the rest of has not been read
+        if buf = @input.gets
+          @wr.write(buf)
+          line << buf
+        else
+          @input = nil
+        end
+      elsif line = @input.gets
+        @wr.write(line)
+      end
+
+      line
+    end
+
+    def each(&block)
+      while line = gets
+        yield line
+      end
+
+      self # Rack does not specify what the return value here
+    end
+
+    def rewind
+      @rd.rewind # Rack does not specify what the return value here
+    end
+
+  private
+
+    # tees off a +length+ chunk of data from the input into the IO
+    # backing store as well as returning it.  +buf+ must be specified.
+    # returns nil if reading from the input returns nil
+    def tee(length, buf)
+      begin
+        if @size
+          left = @size - @rd.stat.size
+          0 == left and return nil
+          if length >= left
+            @input.readpartial(left, buf) == left and @input = nil
+          elsif @input.nil?
+            return nil
+          else
+            @input.readpartial(length, buf)
+          end
+        else # ChunkedReader#readpartial just raises EOFError when done
+          @input.readpartial(length, buf)
+        end
+      rescue EOFError
+        return @input = nil
+      end
+      @wr.write(buf)
+      buf
+    end
+
+  end
+end