summary refs log tree commit
diff options
context:
space:
mode:
authorJeremy Evans <code@jeremyevans.net>2022-05-25 10:11:53 -0700
committerJeremy Evans <code@jeremyevans.net>2022-05-27 09:48:14 -0700
commitfa68d9df5f3e9c40f8d40674305a94a0cdf63042 (patch)
tree6aa6c0f726a4bd227f6bb4cec488e0b8a0e51571
parentb426cc224908ec6ed6eb8729325392b048215d88 (diff)
downloadrack-fa68d9df5f3e9c40f8d40674305a94a0cdf63042.tar.gz
Refactor multipart boundary parsing
Remove the use of BOUNDARY_REGEX, as well as the @boundary, @full_boundary,
and @end_boundary instance variables.  Replace them with the existing
@body_regex, which already contains the boundary. Tweak body_regex slightly
so the preceding EOF is required if we aren't at the start of the buffer,
since boundaries must be on their own lines.

When looking for the boundary, scan until using @body_regex (i.e. until you
found the boundary).  Differentiate the full_boundary from the end_boundary
by seeing if the scanned data ends with \r\n (full boundary will,
end_boundary must end with --). If no boundary is found, clear the buffer
and try again, for behavior similar to previous.

For the fast forward state, if the first boundary found is the end boundary,
ignore it and keep scanning.  We could alternatively raise an exception, not
sure if that is better.  The previous behavior of treating the end boundary
as the starting boundary was definitely wrong, though.

Add some comments describing this boundary parsing behavior (which was my
initial intent when working on this).

Speeds up test/spec_multipart.rb more than 6x in my testing.
-rw-r--r--lib/rack/multipart/parser.rb49
-rw-r--r--test/multipart/end_boundary_first8
-rw-r--r--test/multipart/preceding_boundary6
-rw-r--r--test/spec_multipart.rb13
4 files changed, 57 insertions, 19 deletions
diff --git a/lib/rack/multipart/parser.rb b/lib/rack/multipart/parser.rb
index ef9637fc..480badaf 100644
--- a/lib/rack/multipart/parser.rb
+++ b/lib/rack/multipart/parser.rb
@@ -48,8 +48,6 @@ module Rack
         Tempfile.new(["RackMultipart", ::File.extname(filename.gsub("\0", '%00'))])
       }
 
-      BOUNDARY_REGEX = /\A([^\n]*(?:\n|\Z))/
-
       class BoundedIO # :nodoc:
         def initialize(io, content_length)
           @io             = io
@@ -195,18 +193,15 @@ module Rack
       def initialize(boundary, tempfile, bufsize, query_parser)
         @query_parser   = query_parser
         @params         = query_parser.make_params
-        @boundary       = "--#{boundary}"
         @bufsize        = bufsize
 
-        @full_boundary = @boundary
-        @end_boundary = @boundary + '--'
         @state = :FAST_FORWARD
         @mime_index = 0
         @collector = Collector.new tempfile
 
         @sbuf = StringScanner.new("".dup)
-        @body_regex = /(?:#{EOL})?#{Regexp.quote(@boundary)}(?:#{EOL}|--)/m
-        @rx_max_size = EOL.size + @boundary.bytesize + [EOL.size, '--'.size].max
+        @body_regex = /(?:#{EOL}|\A)--#{Regexp.quote(boundary)}(?:#{EOL}|--)/m
+        @rx_max_size = boundary.bytesize + 6 # (\r\n-- at start, either \r\n or -- at finish)
         @head_regex = /(.*?#{EOL})#{EOL}/m
       end
 
@@ -257,11 +252,26 @@ module Rack
         @sbuf.concat(content)
       end
 
+      # This handles the initial parser state.  We read until we find the starting
+      # boundary, then we can transition to the next state. If we find the ending
+      # boundary, this is an invalid multipart upload, but keep scanning for opening
+      # boundary in that case. If no boundary found, we need to keep reading data
+      # and retry. It's highly unlikely the initial read will not consume the
+      # boundary.  The client would have to deliberately craft a response
+      # with the opening boundary beyond the buffer size for that to happen.
       def handle_fast_forward
-        if consume_boundary
-          @state = :MIME_HEAD
-        else
-          :want_read
+        while true
+          case consume_boundary
+          when :BOUNDARY
+            # found opening boundary, transition to next state
+            @state = :MIME_HEAD
+            return
+          when :END_BOUNDARY
+            # invalid multipart upload, but retry for opening boundary
+          else
+            # no boundary found, keep reading data
+            return :want_read
+          end
         end
       end
 
@@ -317,15 +327,16 @@ module Rack
         end
       end
 
-      def full_boundary; @full_boundary; end
-
+      # Scan until the we find the start or end of the boundary.
+      # If we find it, return the appropriate symbol for the start or
+      # end of the boundary.  If we don't find the start or end of the
+      # boundary, clear the buffer and return nil.
       def consume_boundary
-        while read_buffer = @sbuf.scan_until(BOUNDARY_REGEX)
-          case read_buffer.strip
-          when full_boundary then return :BOUNDARY
-          when @end_boundary then return :END_BOUNDARY
-          end
-          return if @sbuf.eos?
+        if read_buffer = @sbuf.scan_until(@body_regex)
+          read_buffer.end_with?(EOL) ? :BOUNDARY : :END_BOUNDARY
+        else
+          @sbuf.terminate
+          nil
         end
       end
 
diff --git a/test/multipart/end_boundary_first b/test/multipart/end_boundary_first
new file mode 100644
index 00000000..282c7ff7
--- /dev/null
+++ b/test/multipart/end_boundary_first
@@ -0,0 +1,8 @@
+--AaB03x--
+
+--AaB03x
+Content-Disposition: form-data; name="files"; filename="foo"
+Content-Type: application/octet-stream
+
+contents
+--AaB03x--
diff --git a/test/multipart/preceding_boundary b/test/multipart/preceding_boundary
new file mode 100644
index 00000000..b65e647b
--- /dev/null
+++ b/test/multipart/preceding_boundary
@@ -0,0 +1,6 @@
+A--AaB03x
+Content-Disposition: form-data; name="files"; filename="foo"
+Content-Type: application/octet-stream
+
+contents
+--AaB03x--
diff --git a/test/spec_multipart.rb b/test/spec_multipart.rb
index 53787cb7..a1e0a7e4 100644
--- a/test/spec_multipart.rb
+++ b/test/spec_multipart.rb
@@ -65,6 +65,19 @@ describe Rack::Multipart do
     params["text"].must_equal "contents"
   end
 
+  it "raises for invalid data preceding the boundary" do
+    env = Rack::MockRequest.env_for '/', multipart_fixture(:preceding_boundary)
+    lambda {
+      Rack::Multipart.parse_multipart(env)
+    }.must_raise Rack::Multipart::EmptyContentError
+  end
+
+  it "ignores initial end boundaries" do
+    env = Rack::MockRequest.env_for '/', multipart_fixture(:end_boundary_first)
+    params = Rack::Multipart.parse_multipart(env)
+    params["files"][:filename].must_equal "foo"
+  end
+
   it "parse multipart content with different filename and filename*" do
     env = Rack::MockRequest.env_for '/', multipart_fixture(:filename_multi)
     params = Rack::Multipart.parse_multipart(env)