From 81026ea66279695206ea53287427c05281662572 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 9 Aug 2009 03:02:54 -0700 Subject: Switch to Ragel/C-based chunk/trailer parser This should be more robust, faster and easier to deal with than the ugly proof-of-concept regexp-based ones. --- lib/unicorn.rb | 2 -- lib/unicorn/chunked_reader.rb | 77 ------------------------------------------- lib/unicorn/http_request.rb | 43 +++++++----------------- lib/unicorn/tee_input.rb | 71 ++++++++++++++++++++------------------- lib/unicorn/trailer_parser.rb | 52 ----------------------------- 5 files changed, 50 insertions(+), 195 deletions(-) delete mode 100644 lib/unicorn/chunked_reader.rb delete mode 100644 lib/unicorn/trailer_parser.rb (limited to 'lib') diff --git a/lib/unicorn.rb b/lib/unicorn.rb index 556aba8..b185b25 100644 --- a/lib/unicorn.rb +++ b/lib/unicorn.rb @@ -11,8 +11,6 @@ module Unicorn autoload :HttpResponse, 'unicorn/http_response' autoload :Configurator, 'unicorn/configurator' autoload :TeeInput, 'unicorn/tee_input' - autoload :ChunkedReader, 'unicorn/chunked_reader' - autoload :TrailerParser, 'unicorn/trailer_parser' autoload :Util, 'unicorn/util' Z = '' # the stock empty string we use everywhere... diff --git a/lib/unicorn/chunked_reader.rb b/lib/unicorn/chunked_reader.rb deleted file mode 100644 index b813da6..0000000 --- a/lib/unicorn/chunked_reader.rb +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) 2009 Eric Wong -# You can redistribute it and/or modify it under the same terms as Ruby. - -require 'unicorn' -require 'unicorn_http' - -module Unicorn - class ChunkedReader - - def initialize(env, input, buf) - @env, @input, @buf = env, input, buf - @chunk_left = 0 - parse_chunk_header - end - - def readpartial(max, buf = Z.dup) - while @input && @chunk_left <= 0 && ! parse_chunk_header - @buf << @input.readpartial(Const::CHUNK_SIZE, buf) - end - - if @input - begin - @buf << @input.read_nonblock(Const::CHUNK_SIZE, buf) - rescue Errno::EAGAIN, Errno::EINTR - end - end - - max = @chunk_left if max > @chunk_left - buf.replace(last_block(max) || Z) - @chunk_left -= buf.size - (0 == buf.size && @input.nil?) and raise EOFError - buf - end - - private - - def last_block(max = nil) - rv = @buf - if max && rv && max < rv.size - @buf = rv[max - rv.size, rv.size - max] - return rv[0, max] - end - @buf = Z.dup - rv - end - - def parse_chunk_header - buf = @buf - # ignoring chunk-extension info for now, I haven't seen any use for it - # (or any users, and TE:chunked sent by clients is rare already) - # if there was not enough data in buffer to parse length of the chunk - # then just return - if buf.sub!(/\A(?:\r\n)?([a-fA-F0-9]{1,8})[^\r]*?\r\n/, Z) - @chunk_left = $1.to_i(16) - if 0 == @chunk_left # EOF - buf.sub!(/\A\r\n(?:\r\n)?/, Z) # cleanup for future requests - if trailer = @env[Const::HTTP_TRAILER] - tp = TrailerParser.new(trailer) - while ! tp.execute!(@env, buf) - buf << @input.readpartial(Const::CHUNK_SIZE) - end - end - @input = nil - end - return @chunk_left - end - - buf.size > 256 and - raise HttpParserError, - "malformed chunk, chunk-length not found in buffer: " \ - "#{buf.inspect}" - nil - end - - end - -end diff --git a/lib/unicorn/http_request.rb b/lib/unicorn/http_request.rb index ec215ae..26eff1f 100644 --- a/lib/unicorn/http_request.rb +++ b/lib/unicorn/http_request.rb @@ -1,6 +1,4 @@ -require 'stringio' - -# compiled extension +# coding:binary require 'unicorn_http' module Unicorn @@ -19,18 +17,13 @@ module Unicorn "SERVER_SOFTWARE" => "Unicorn #{Const::UNICORN_VERSION}".freeze } - NULL_IO = StringIO.new(Z) LOCALHOST = '127.0.0.1'.freeze - def initialize - end - # Being explicitly single-threaded, we have certain advantages in # not having to worry about variables being clobbered :) - BUFFER = ' ' * Const::CHUNK_SIZE # initial size, may grow - BUFFER.force_encoding(Encoding::BINARY) if Z.respond_to?(:force_encoding) + BUF = ' ' * Const::CHUNK_SIZE # initial size, may grow PARSER = HttpParser.new - PARAMS = Hash.new + REQ = {} # Does the majority of the IO processing. It has been written in # Ruby using about 8 different IO processing strategies. @@ -46,7 +39,7 @@ module Unicorn # This does minimal exception trapping and it is up to the caller # to handle any socket errors (e.g. user aborted upload). def read(socket) - PARAMS.clear + REQ.clear PARSER.reset # From http://www.ietf.org/rfc/rfc3875: @@ -56,42 +49,30 @@ module Unicorn # identify the client for the immediate request to the server; # that client may be a proxy, gateway, or other intermediary # acting on behalf of the actual source client." - PARAMS[Const::REMOTE_ADDR] = + REQ[Const::REMOTE_ADDR] = TCPSocket === socket ? socket.peeraddr.last : LOCALHOST # short circuit the common case with small GET requests first - PARSER.execute(PARAMS, socket.readpartial(Const::CHUNK_SIZE, BUFFER)) and + PARSER.headers(REQ, socket.readpartial(Const::CHUNK_SIZE, BUF)) and return handle_body(socket) - data = BUFFER.dup # socket.readpartial will clobber BUFFER + data = BUF.dup # socket.readpartial will clobber data # Parser is not done, queue up more data to read and continue parsing # an Exception thrown from the PARSER will throw us out of the loop begin - data << socket.readpartial(Const::CHUNK_SIZE, BUFFER) - PARSER.execute(PARAMS, data) and return handle_body(socket) + BUF << socket.readpartial(Const::CHUNK_SIZE, data) + PARSER.headers(REQ, BUF) and return handle_body(socket) end while true end private # Handles dealing with the rest of the request - # returns a Rack environment if successful + # returns a # Rack environment if successful def handle_body(socket) - PARAMS[Const::RACK_INPUT] = if (body = PARAMS.delete(:http_body)) - length = PARAMS[Const::CONTENT_LENGTH].to_i - - if /\Achunked\z/i =~ PARAMS[Const::HTTP_TRANSFER_ENCODING] - socket = ChunkedReader.new(PARAMS, socket, body) - length = body = nil - end - - TeeInput.new(socket, length, body) - else - NULL_IO - end - - PARAMS.update(DEFAULTS) + REQ[Const::RACK_INPUT] = Unicorn::TeeInput.new(socket) + REQ.update(DEFAULTS) end end diff --git a/lib/unicorn/tee_input.rb b/lib/unicorn/tee_input.rb index bbc496b..07676a6 100644 --- a/lib/unicorn/tee_input.rb +++ b/lib/unicorn/tee_input.rb @@ -13,15 +13,23 @@ module Unicorn class TeeInput - def initialize(input, size, body) - @tmp = Unicorn::Util.tmpio - - if body - @tmp.write(body) + # it's so awesome to not have to care for thread safety... + + RAW = HttpRequest::BUF # :nodoc: + DST = RAW.dup # :nodoc: + PARSER = HttpRequest::PARSER # :nodoc: + REQ = HttpRequest::REQ # :nodoc: + + def initialize(socket) + @tmp = Util.tmpio + @size = PARSER.content_length + return(@input = nil) if 0 == @size + @input = socket + if RAW.size > 0 + PARSER.read_body(DST, RAW) and finalize_input + @tmp.write(DST) @tmp.seek(0) end - @input = input - @size = size # nil if chunked end # returns the size of the input. This is what the Content-Length @@ -32,10 +40,10 @@ module Unicorn @size and return @size if @input - buf = Z.dup - while tee(Const::CHUNK_SIZE, buf) + pos = @tmp.tell + while tee(Const::CHUNK_SIZE, DST) end - @tmp.rewind + @tmp.seek(pos) end @size = @tmp.stat.size @@ -47,13 +55,12 @@ module Unicorn length = args.shift if nil == length rv = @tmp.read || Z.dup - tmp = Z.dup - while tee(Const::CHUNK_SIZE, tmp) - rv << tmp + while tee(Const::CHUNK_SIZE, DST) + rv << DST end rv else - buf = args.shift || Z.dup + buf = args.shift || DST.dup diff = @tmp.stat.size - @tmp.pos if 0 == diff tee(length, buf) @@ -70,7 +77,7 @@ module Unicorn orig_size = @tmp.stat.size if @tmp.pos == orig_size - tee(Const::CHUNK_SIZE, Z.dup) or return nil + tee(Const::CHUNK_SIZE, DST) or return nil @tmp.seek(orig_size) end @@ -79,8 +86,8 @@ module Unicorn # unlikely, if we got here, then @tmp is at EOF begin - orig_size = @tmp.stat.size - tee(Const::CHUNK_SIZE, Z.dup) or break + orig_size = @tmp.pos + tee(Const::CHUNK_SIZE, DST) or break @tmp.seek(orig_size) line << @tmp.gets $/ == line[-$/.size, $/.size] and return line @@ -108,25 +115,23 @@ module Unicorn # backing store as well as returning it. +buf+ must be specified. # returns nil if reading from the input returns nil def tee(length, buf) - begin - if @size - left = @size - @tmp.stat.size - 0 == left and return nil - if length >= left - @input.readpartial(left, buf) == left and @input = nil - elsif @input.nil? - return nil - else - @input.readpartial(length, buf) + unless PARSER.body_eof? + begin + if PARSER.read_body(buf, @input.readpartial(length, RAW)).nil? + @tmp.write(buf) + return buf end - else # ChunkedReader#readpartial just raises EOFError when done - @input.readpartial(length, buf) + rescue EOFError end - rescue EOFError - return @input = nil end - @tmp.write(buf) - buf + finalize_input + end + + def finalize_input + while PARSER.trailers(REQ, RAW).nil? + RAW << @input.readpartial(Const::CHUNK_SIZE, DST) + end + @input = nil end end diff --git a/lib/unicorn/trailer_parser.rb b/lib/unicorn/trailer_parser.rb deleted file mode 100644 index 22f2e1d..0000000 --- a/lib/unicorn/trailer_parser.rb +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) 2009 Eric Wong -# You can redistribute it and/or modify it under the same terms as Ruby. -require 'unicorn' -require 'unicorn_http' - -# Eventually I should integrate this into HttpParser... -module Unicorn - class TrailerParser - - TR_FR = 'a-z-'.freeze - TR_TO = 'A-Z_'.freeze - - # initializes HTTP trailer parser with acceptable +trailer+ - def initialize(http_trailer) - @trailers = http_trailer.split(/\s*,\s*/).inject({}) { |hash, key| - hash[key.tr(TR_FR, TR_TO)] = true - hash - } - end - - # Executes our TrailerParser on +data+ and modifies +env+ This will - # shrink +data+ as it is being consumed. Returns true if it has - # parsed all trailers, false if not. It raises HttpParserError on - # parse failure or unknown headers. It has slightly smaller limits - # than the C-based HTTP parser but should not be an issue in practice - # since Content-MD5 is probably the only legitimate use for it. - def execute!(env, data) - data.size > 0xffff and - raise HttpParserError, "trailer buffer too large: #{data.size} bytes" - - begin - data.sub!(/\A([^\r]+)\r\n/, Z) or return false # need more data - - key, val = $1.split(/:\s*/, 2) - - key.size > 256 and - raise HttpParserError, "trailer key #{key.inspect} is too long" - val.size > 8192 and - raise HttpParserError, "trailer value #{val.inspect} is too long" - - key.tr!(TR_FR, TR_TO) - - @trailers.delete(key) or - raise HttpParserError, "unknown trailer: #{key.inspect}" - env["HTTP_#{key}"] = val - - @trailers.empty? and return true - end while true - end - - end -end -- cgit v1.2.3-24-ge0c7