From 6945342a1f0a4caaa918f2b0b1efef88824439e0 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 5 Jun 2009 18:03:46 -0700 Subject: Transfer-Encoding: chunked streaming input support This adds support for handling POST/PUT request bodies sent with chunked transfer encodings ("Transfer-Encoding: chunked"). Attention has been paid to ensure that a client cannot OOM us by sending an extremely large chunk. This implementation is pure Ruby as the Ragel-based implementation in rfuzz didn't offer a streaming interface. It should be reasonably close to RFC-compliant but please test it in an attempt to break it. The more interesting part is the ability to stream data to the hosted Rack application as it is being transferred to the server. This can be done regardless if the input is chunked or not, enabling the streaming of POST/PUT bodies can allow the hosted Rack application to process input as it receives it. See examples/echo.ru for an example echo server over HTTP. Enabling streaming also allows Rack applications to support upload progress monitoring previously supported by Mongrel handlers. Since Rack specifies that the input needs to be rewindable, this input is written to a temporary file (a la tee(1)) as it is streamed to the application the first time. Subsequent rewinded reads will read from the temporary file instead of the socket. Streaming input to the application is disabled by default since applications may not necessarily read the entire input body before returning. Since this is a completely new feature we've never seen in any Ruby HTTP application server before, we're taking the safe route by leaving it disabled by default. Enabling this can only be done globally by changing the Unicorn HttpRequest::DEFAULTS hash: Unicorn::HttpRequest::DEFAULTS["unicorn.stream_input"] = true Similarly, a Rack application can check if streaming input is enabled by checking the value of the "unicorn.stream_input" key in the environment hashed passed to it. All of this code has only been lightly tested and test coverage is lacking at the moment. [1] - http://tools.ietf.org/html/rfc2616#section-3.6.1 --- lib/unicorn/tee_input.rb | 145 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 lib/unicorn/tee_input.rb (limited to 'lib/unicorn/tee_input.rb') diff --git a/lib/unicorn/tee_input.rb b/lib/unicorn/tee_input.rb new file mode 100644 index 0000000..1e281b4 --- /dev/null +++ b/lib/unicorn/tee_input.rb @@ -0,0 +1,145 @@ +require 'tempfile' + +# acts like tee(1) on an input input to provide a input-like stream +# while providing rewindable semantics through a Tempfile/StringIO +# backing store. On the first pass, the input is only read on demand +# so your Rack application can use input notification (upload progress +# and like). This should fully conform to the Rack::InputWrapper +# specification on the public API. This class is intended to be a +# strict interpretation of Rack::InputWrapper functionality and will +# not support any deviations from it. + +module Unicorn + class TeeInput + Z = '' + Z.force_encoding(Encoding::BINARY) if Z.respond_to?(:force_encoding) + + def initialize + @rd = @wr = @size = @input = nil + setup + end + + def setup + @tmp = tmp = Tempfile.new(nil) + @rd.close if @rd + @rd = File.open(tmp.path, 'wb+') + @wr.close if @wr + @wr = File.open(tmp.path, 'wb') + @rd.sync = @wr.sync = true + + def @rd.size + stat.size + end + tmp.close! + end + + def reopen(input, size = nil, buffer = nil) + @rd.seek(0) + @wr.seek(0) + @rd.truncate(0) # truncate read to flush luserspace read buffers + @wr.write(buffer) if buffer + @input = input + @size = size # nil if chunked + self + end + + def consume + @input or return + buf = Z.dup + while tee(Const::CHUNK_SIZE, buf) + end + @rd + end + + # returns the size of the input. This is what the Content-Length + # header value should be, and how large our input is expected to be. + # For TE:chunked, this requires consuming all of the input stream + # before returning since there's no other way + def size + @size and return @size + @input and consume + @size = @wr.stat.size + end + + def read(*args) + @input or return @rd.read(*args) + + length = args.shift + if nil == length + rv = @rd.read || Z.dup + tmp = Z.dup + while tee(Const::CHUNK_SIZE, tmp) + rv << tmp + end + rv + else + buf = args.shift || Z.dup + @rd.read(length, buf) || tee(length, buf) + end + end + + # takes zero arguments for strict Rack::Lint compatibility, unlike IO#gets + def gets + @input or return @rd.gets + nil == $/ and return read + + line = nil + if @rd.pos < @wr.stat.size + line = @rd.gets # cannot be nil here + $/ == line[-$/.size, $/.size] and return line + + # half the line was already read, and the rest of has not been read + if buf = @input.gets + @wr.write(buf) + line << buf + else + @input = nil + end + elsif line = @input.gets + @wr.write(line) + end + + line + end + + def each(&block) + while line = gets + yield line + end + + self # Rack does not specify what the return value here + end + + def rewind + @rd.rewind # Rack does not specify what the return value here + end + + private + + # tees off a +length+ chunk of data from the input into the IO + # backing store as well as returning it. +buf+ must be specified. + # returns nil if reading from the input returns nil + def tee(length, buf) + begin + if @size + left = @size - @rd.stat.size + 0 == left and return nil + if length >= left + @input.readpartial(left, buf) == left and @input = nil + elsif @input.nil? + return nil + else + @input.readpartial(length, buf) + end + else # ChunkedReader#readpartial just raises EOFError when done + @input.readpartial(length, buf) + end + rescue EOFError + return @input = nil + end + @wr.write(buf) + buf + end + + end +end -- cgit v1.2.3-24-ge0c7