diff options
Diffstat (limited to 'lib/rack/multipart/parser.rb')
-rw-r--r-- | lib/rack/multipart/parser.rb | 107 |
1 files changed, 56 insertions, 51 deletions
diff --git a/lib/rack/multipart/parser.rb b/lib/rack/multipart/parser.rb index f661da10..7c38d5f3 100644 --- a/lib/rack/multipart/parser.rb +++ b/lib/rack/multipart/parser.rb @@ -1,16 +1,24 @@ +# frozen_string_literal: true + require 'rack/utils' +require 'strscan' +require 'rack/core_ext/regexp' module Rack module Multipart class MultipartPartLimitError < Errno::EMFILE; end class Parser - BUFSIZE = 16384 + using ::Rack::RegexpExtensions + + BUFSIZE = 1_048_576 TEXT_PLAIN = "text/plain" TEMPFILE_FACTORY = lambda { |filename, content_type| - Tempfile.new(["RackMultipart", ::File.extname(filename)]) + Tempfile.new(["RackMultipart", ::File.extname(filename.gsub("\0", '%00'))]) } + BOUNDARY_REGEX = /\A([^\n]*(?:\n|\Z))/ + class BoundedIO # :nodoc: def initialize(io, content_length) @io = io @@ -18,15 +26,15 @@ module Rack @cursor = 0 end - def read(size) + def read(size, outbuf = nil) return if @cursor >= @content_length left = @content_length - @cursor str = if left < size - @io.read left + @io.read left, outbuf else - @io.read size + @io.read size, outbuf end if str @@ -39,8 +47,6 @@ module Rack str end - def eof?; @content_length == @cursor; end - def rewind @io.rewind end @@ -63,13 +69,14 @@ module Rack return EMPTY unless boundary io = BoundedIO.new(io, content_length) if content_length + outbuf = String.new parser = new(boundary, tmpfile, bufsize, qp) - parser.on_read io.read(bufsize), io.eof? + parser.on_read io.read(bufsize, outbuf) loop do break if parser.state == :DONE - parser.on_read io.read(bufsize), io.eof? + parser.on_read io.read(bufsize, outbuf) end io.rewind @@ -92,14 +99,14 @@ module Rack # those which give the lone filename. fn = filename.split(/[\/\\]/).last - data = {:filename => fn, :type => content_type, - :name => name, :tempfile => body, :head => head} + data = { filename: fn, type: content_type, + name: name, tempfile: body, head: head } elsif !filename && content_type && body.is_a?(IO) body.rewind # Generic multipart cases, not coming from a form - data = {:type => content_type, - :name => name, :tempfile => body, :head => head} + data = { type: content_type, + name: name, tempfile: body, head: head } end yield data @@ -135,11 +142,12 @@ module Rack klass = TempfilePart @open_files += 1 else - body = ''.force_encoding(Encoding::ASCII_8BIT) + body = String.new klass = BufferPart end @mime_parts[mime_index] = klass.new(body, head, filename, content_type, name) + check_open_files end @@ -165,25 +173,26 @@ module Rack attr_reader :state def initialize(boundary, tempfile, bufsize, query_parser) - @buf = "".force_encoding(Encoding::ASCII_8BIT) - @query_parser = query_parser @params = query_parser.make_params @boundary = "--#{boundary}" - @boundary_size = @boundary.bytesize + EOL.size @bufsize = bufsize - @rx = /(?:#{EOL})?#{Regexp.quote(@boundary)}(#{EOL}|--)/n @full_boundary = @boundary @end_boundary = @boundary + '--' @state = :FAST_FORWARD @mime_index = 0 @collector = Collector.new tempfile + + @sbuf = StringScanner.new("".dup) + @body_regex = /(.*?)(#{EOL})?#{Regexp.quote(@boundary)}(#{EOL}|--)/m + @rx_max_size = EOL.size + @boundary.bytesize + [EOL.size, '--'.size].max + @head_regex = /(.*?#{EOL})#{EOL}/m end - def on_read content, eof - handle_empty_content!(content, eof) - @buf << content + def on_read content + handle_empty_content!(content) + @sbuf.concat content run_parser end @@ -194,7 +203,6 @@ module Rack @query_parser.normalize_params(@params, part.name, data, @query_parser.param_depth_limit) end end - MultipartInfo.new @params.to_params_hash, @collector.find_all(&:file?).map(&:body) end @@ -221,7 +229,7 @@ module Rack if consume_boundary @state = :MIME_HEAD else - raise EOFError, "bad content body" if @buf.bytesize >= @bufsize + raise EOFError, "bad content body" if @sbuf.rest_size >= @bufsize :want_read end end @@ -229,19 +237,16 @@ module Rack def handle_consume_token tok = consume_boundary # break if we're at the end of a buffer, but not if it is the end of a field - if tok == :END_BOUNDARY || (@buf.empty? && tok != :BOUNDARY) - @state = :DONE + @state = if tok == :END_BOUNDARY || (@sbuf.eos? && tok != :BOUNDARY) + :DONE else - @state = :MIME_HEAD + :MIME_HEAD end end def handle_mime_head - if @buf.index(EOL + EOL) - i = @buf.index(EOL+EOL) - head = @buf.slice!(0, i+2) # First \r\n - @buf.slice!(0, 2) # Second \r\n - + if @sbuf.scan_until(@head_regex) + head = @sbuf[1] content_type = head[MULTIPART_CONTENT_TYPE, 1] if name = head[MULTIPART_CONTENT_DISPOSITION, 1] name = Rack::Auth::Digest::Params::dequote(name) @@ -252,7 +257,7 @@ module Rack filename = get_filename(head) if name.nil? || name.empty? - name = filename || "#{content_type || TEXT_PLAIN}[]" + name = filename || "#{content_type || TEXT_PLAIN}[]".dup end @collector.on_mime_head @mime_index, head, filename, content_type, name @@ -263,31 +268,33 @@ module Rack end def handle_mime_body - if @buf =~ rx - # Save the rest. - if i = @buf.index(rx) - @collector.on_mime_body @mime_index, @buf.slice!(0, i) - @buf.slice!(0, 2) # Remove \r\n after the content - end + if @sbuf.check_until(@body_regex) # check but do not advance the pointer yet + body = @sbuf[1] + @collector.on_mime_body @mime_index, body + @sbuf.pos += body.length + 2 # skip \r\n after the content @state = :CONSUME_TOKEN @mime_index += 1 else + # Save what we have so far + if @rx_max_size < @sbuf.rest_size + delta = @sbuf.rest_size - @rx_max_size + @collector.on_mime_body @mime_index, @sbuf.peek(delta) + @sbuf.pos += delta + @sbuf.string = @sbuf.rest + end :want_read end end def full_boundary; @full_boundary; end - def rx; @rx; end - def consume_boundary - while @buf.gsub!(/\A([^\n]*(?:\n|\Z))/, '') - read_buffer = $1 + while read_buffer = @sbuf.scan_until(BOUNDARY_REGEX) case read_buffer.strip when full_boundary then return :BOUNDARY when @end_boundary then return :END_BOUNDARY end - return if @buf.empty? + return if @sbuf.eos? end end @@ -308,8 +315,8 @@ module Rack return unless filename - if filename.scan(/%.?.?/).all? { |s| s =~ /%[0-9a-fA-F]{2}/ } - filename = Utils.unescape(filename) + if filename.scan(/%.?.?/).all? { |s| /%[0-9a-fA-F]{2}/.match?(s) } + filename = Utils.unescape_path(filename) end filename.scrub! @@ -325,7 +332,7 @@ module Rack filename end - CHARSET = "charset" + CHARSET = "charset" def tag_multipart_encoding(filename, content_type, name, body) name = name.to_s @@ -342,7 +349,7 @@ module Rack if TEXT_PLAIN == type_subtype rest = list.drop 1 rest.each do |param| - k,v = param.split('=', 2) + k, v = param.split('=', 2) k.strip! v.strip! v = v[1..-2] if v.start_with?('"') && v.end_with?('"') @@ -355,11 +362,9 @@ module Rack body.force_encoding(encoding) end - - def handle_empty_content!(content, eof) + def handle_empty_content!(content) if content.nil? || content.empty? - raise EOFError if eof - return true + raise EOFError end end end |