diff options
Diffstat (limited to 'lib/unicorn')
-rw-r--r-- | lib/unicorn/chunked_reader.rb | 96 | ||||
-rw-r--r-- | lib/unicorn/const.rb | 2 | ||||
-rw-r--r-- | lib/unicorn/http_request.rb | 74 | ||||
-rw-r--r-- | lib/unicorn/tee_input.rb | 145 |
4 files changed, 260 insertions, 57 deletions
diff --git a/lib/unicorn/chunked_reader.rb b/lib/unicorn/chunked_reader.rb new file mode 100644 index 0000000..b9178a9 --- /dev/null +++ b/lib/unicorn/chunked_reader.rb @@ -0,0 +1,96 @@ +module Unicorn; end + +module Unicorn + class ChunkedReader + + Z = '' + Z.force_encoding(Encoding::BINARY) if Z.respond_to?(:force_encoding) + + def initialize + @input = @buf = nil + @chunk_left = 0 + end + + def reopen(input, buf) + buf ||= Z.dup + buf.force_encoding(Encoding::BINARY) if buf.respond_to?(:force_encoding) + @input, @buf = input, buf + parse_chunk_header + self + end + + def readpartial(max, buf = Z.dup) + buf.force_encoding(Encoding::BINARY) if buf.respond_to?(:force_encoding) + + while @input && @chunk_left <= 0 && ! parse_chunk_header + @buf << @input.readpartial(Const::CHUNK_SIZE, buf) + end + + if @input + begin + @buf << @input.read_nonblock(Const::CHUNK_SIZE, buf) + rescue Errno::EAGAIN, Errno::EINTR + end + end + + max = @chunk_left if max > @chunk_left + buf.replace(last_block(max) || Z) + @chunk_left -= buf.size + (0 == buf.size && @input.nil?) and raise EOFError + buf + end + + def gets + line = nil + begin + line = readpartial(Const::CHUNK_SIZE) + begin + if line.sub!(%r{\A(.*?#{$/})}, Z) + @chunk_left += line.size + @buf = @buf ? (line << @buf) : line + return $1.dup + end + line << readpartial(Const::CHUNK_SIZE) + end while true + rescue EOFError + return line + end + end + + private + + def last_block(max = nil) + rv = @buf + if max && rv && max < rv.size + @buf = rv[max - rv.size, rv.size - max] + return rv[0, max] + end + @buf = Z.dup + rv + end + + def parse_chunk_header + buf = @buf + # ignoring chunk-extension info for now, I haven't seen any use for it + # (or any users, and TE:chunked sent by clients is rare already) + # if there was not enough data in buffer to parse length of the chunk + # then just return + if buf.sub!(/\A(?:\r\n)?([a-fA-F0-9]{1,8})[^\r]*?\r\n/, Z) + @chunk_left = $1.to_i(16) + if 0 == @chunk_left # EOF + buf.sub!(/\A\r\n(?:\r\n)?/, Z) # cleanup for future requests + @input = nil + end + return @chunk_left + end + + buf.size > 256 and + raise HttpParserError, + "malformed chunk, chunk-length not found in buffer: " \ + "#{buf.inspect}" + nil + end + + end + +end diff --git a/lib/unicorn/const.rb b/lib/unicorn/const.rb index 250868d..b81dce0 100644 --- a/lib/unicorn/const.rb +++ b/lib/unicorn/const.rb @@ -26,10 +26,12 @@ module Unicorn ERROR_500_RESPONSE = "HTTP/1.1 500 Internal Server Error\r\n\r\n".freeze # A frozen format for this is about 15% faster + HTTP_TRANSFER_ENCODING = 'HTTP_TRANSFER_ENCODING'.freeze CONTENT_LENGTH="CONTENT_LENGTH".freeze REMOTE_ADDR="REMOTE_ADDR".freeze HTTP_X_FORWARDED_FOR="HTTP_X_FORWARDED_FOR".freeze RACK_INPUT="rack.input".freeze + STREAM_INPUT="unicorn.stream_input".freeze end end diff --git a/lib/unicorn/http_request.rb b/lib/unicorn/http_request.rb index 061a6ab..025b125 100644 --- a/lib/unicorn/http_request.rb +++ b/lib/unicorn/http_request.rb @@ -1,15 +1,9 @@ -require 'tempfile' require 'stringio' # compiled extension require 'unicorn/http11' module Unicorn - # - # The HttpRequest.initialize method will convert any request that is larger than - # Const::MAX_BODY into a Tempfile and use that as the body. Otherwise it uses - # a StringIO object. To be safe, you should assume it works like a file. - # class HttpRequest attr_accessor :logger @@ -23,15 +17,20 @@ module Unicorn "rack.version" => [1, 0].freeze, "SCRIPT_NAME" => "".freeze, + # some applications (like Echo) may want to change this to true + # We disable streaming by default since some (arguably broken) + # applications may not ever read the entire body and be confused + # when it receives a response after nothing has been sent to it. + Const::STREAM_INPUT => false, # this is not in the Rack spec, but some apps may rely on it "SERVER_SOFTWARE" => "Unicorn #{Const::UNICORN_VERSION}".freeze } - # Optimize for the common case where there's no request body - # (GET/HEAD) requests. Z = '' Z.force_encoding(Encoding::BINARY) if Z.respond_to?(:force_encoding) NULL_IO = StringIO.new(Z) + TEE = TeeInput.new + DECHUNKER = ChunkedReader.new LOCALHOST = '127.0.0.1'.freeze # Being explicitly single-threaded, we have certain advantages in @@ -58,11 +57,6 @@ module Unicorn # This does minimal exception trapping and it is up to the caller # to handle any socket errors (e.g. user aborted upload). def read(socket) - # reset the parser - unless NULL_IO == (input = PARAMS[Const::RACK_INPUT]) # unlikely - input.close rescue nil - input.close! rescue nil - end PARAMS.clear PARSER.reset @@ -100,57 +94,23 @@ module Unicorn private # Handles dealing with the rest of the request - # returns a Rack environment if successful, raises an exception if not + # returns a Rack environment if successful def handle_body(socket) http_body = PARAMS.delete(:http_body) - content_length = PARAMS[Const::CONTENT_LENGTH].to_i - if content_length == 0 # short circuit the common case - PARAMS[Const::RACK_INPUT] = - NULL_IO.closed? ? NULL_IO.reopen(Z) : NULL_IO - return PARAMS.update(DEFAULTS) + length = PARAMS[Const::CONTENT_LENGTH].to_i + if te = PARAMS[Const::HTTP_TRANSFER_ENCODING] + if /chunked/i =~ te + socket = DECHUNKER.reopen(socket, http_body) + length = http_body = nil + end end - # must read more data to complete body - remain = content_length - http_body.length - - body = PARAMS[Const::RACK_INPUT] = (remain < Const::MAX_BODY) ? - StringIO.new : Tempfile.new('unicorn') - - body.binmode - body.write(http_body) - - # Some clients (like FF1.0) report 0 for body and then send a body. - # This will probably truncate them but at least the request goes through - # usually. - read_body(socket, remain, body) if remain > 0 - body.rewind - - # in case read_body overread because the client tried to pipeline - # another request, we'll truncate it. Again, we don't do pipelining - # or keepalive - body.truncate(content_length) + inp = TEE.reopen(socket, length, http_body) + PARAMS[Const::RACK_INPUT] = + DEFAULTS[Const::STREAM_INPUT] ? inp : inp.consume PARAMS.update(DEFAULTS) end - # Does the heavy lifting of properly reading the larger body - # requests in small chunks. It expects PARAMS['rack.input'] to be - # an IO object, socket to be valid, It also expects any initial part - # of the body that has been read to be in the PARAMS['rack.input'] - # already. It will return true if successful and false if not. - def read_body(socket, remain, body) - begin - # write always writes the requested amount on a POSIX filesystem - remain -= body.write(socket.readpartial(Const::CHUNK_SIZE, BUFFER)) - end while remain > 0 - rescue Object => e - @logger.error "Error reading HTTP body: #{e.inspect}" - - # Any errors means we should delete the file, including if the file - # is dumped. Truncate it ASAP to help avoid page flushes to disk. - body.truncate(0) rescue nil - raise e - end - end end diff --git a/lib/unicorn/tee_input.rb b/lib/unicorn/tee_input.rb new file mode 100644 index 0000000..1e281b4 --- /dev/null +++ b/lib/unicorn/tee_input.rb @@ -0,0 +1,145 @@ +require 'tempfile' + +# acts like tee(1) on an input input to provide a input-like stream +# while providing rewindable semantics through a Tempfile/StringIO +# backing store. On the first pass, the input is only read on demand +# so your Rack application can use input notification (upload progress +# and like). This should fully conform to the Rack::InputWrapper +# specification on the public API. This class is intended to be a +# strict interpretation of Rack::InputWrapper functionality and will +# not support any deviations from it. + +module Unicorn + class TeeInput + Z = '' + Z.force_encoding(Encoding::BINARY) if Z.respond_to?(:force_encoding) + + def initialize + @rd = @wr = @size = @input = nil + setup + end + + def setup + @tmp = tmp = Tempfile.new(nil) + @rd.close if @rd + @rd = File.open(tmp.path, 'wb+') + @wr.close if @wr + @wr = File.open(tmp.path, 'wb') + @rd.sync = @wr.sync = true + + def @rd.size + stat.size + end + tmp.close! + end + + def reopen(input, size = nil, buffer = nil) + @rd.seek(0) + @wr.seek(0) + @rd.truncate(0) # truncate read to flush luserspace read buffers + @wr.write(buffer) if buffer + @input = input + @size = size # nil if chunked + self + end + + def consume + @input or return + buf = Z.dup + while tee(Const::CHUNK_SIZE, buf) + end + @rd + end + + # returns the size of the input. This is what the Content-Length + # header value should be, and how large our input is expected to be. + # For TE:chunked, this requires consuming all of the input stream + # before returning since there's no other way + def size + @size and return @size + @input and consume + @size = @wr.stat.size + end + + def read(*args) + @input or return @rd.read(*args) + + length = args.shift + if nil == length + rv = @rd.read || Z.dup + tmp = Z.dup + while tee(Const::CHUNK_SIZE, tmp) + rv << tmp + end + rv + else + buf = args.shift || Z.dup + @rd.read(length, buf) || tee(length, buf) + end + end + + # takes zero arguments for strict Rack::Lint compatibility, unlike IO#gets + def gets + @input or return @rd.gets + nil == $/ and return read + + line = nil + if @rd.pos < @wr.stat.size + line = @rd.gets # cannot be nil here + $/ == line[-$/.size, $/.size] and return line + + # half the line was already read, and the rest of has not been read + if buf = @input.gets + @wr.write(buf) + line << buf + else + @input = nil + end + elsif line = @input.gets + @wr.write(line) + end + + line + end + + def each(&block) + while line = gets + yield line + end + + self # Rack does not specify what the return value here + end + + def rewind + @rd.rewind # Rack does not specify what the return value here + end + + private + + # tees off a +length+ chunk of data from the input into the IO + # backing store as well as returning it. +buf+ must be specified. + # returns nil if reading from the input returns nil + def tee(length, buf) + begin + if @size + left = @size - @rd.stat.size + 0 == left and return nil + if length >= left + @input.readpartial(left, buf) == left and @input = nil + elsif @input.nil? + return nil + else + @input.readpartial(length, buf) + end + else # ChunkedReader#readpartial just raises EOFError when done + @input.readpartial(length, buf) + end + rescue EOFError + return @input = nil + end + @wr.write(buf) + buf + end + + end +end |