unicorn Ruby/Rack server user+dev discussion/patches/pulls/bugs/help
 help / color / mirror / code / Atom feed
From: Eric Wong <bofh@yhbt.net>
To: unicorn-public@yhbt.net
Subject: [PATCH] http: improve RFC 7230 conformance
Date: Thu, 19 Mar 2020 02:28:23 +0000	[thread overview]
Message-ID: <20200319022823.32472-1-bofh@yhbt.net> (raw)

We need to favor "Transfer-Encoding: chunked" over
"Content-Length" in the request header if they both exist.
Furthermore, we now reject redundant chunking and cases where
"chunked" is not the final encoding.

We currently do not and have no plans to decode "gzip",
"deflate", or "compress" encoding as described by RFC 7230.
That's a job more appropriate for middleware, anyways.

cf. https://tools.ietf.org/html/rfc7230
    https://www.rfc-editor.org/errata_search.php?rfc=7230
---
 ext/unicorn_http/unicorn_http.rl | 46 ++++++++++++++++--
 lib/unicorn/http_request.rb      | 11 +++++
 test/unit/test_http_parser_ng.rb | 81 ++++++++++++++++++++++++++++++++
 3 files changed, 134 insertions(+), 4 deletions(-)

diff --git a/ext/unicorn_http/unicorn_http.rl b/ext/unicorn_http/unicorn_http.rl
index dfe3a63..21e09d6 100644
--- a/ext/unicorn_http/unicorn_http.rl
+++ b/ext/unicorn_http/unicorn_http.rl
@@ -62,7 +62,8 @@ struct http_parser {
   } len;
 };
 
-static ID id_set_backtrace;
+static ID id_set_backtrace, id_is_chunked_p;
+static VALUE cHttpParser;
 
 #ifdef HAVE_RB_HASH_CLEAR /* Ruby >= 2.0 */
 #  define my_hash_clear(h) (void)rb_hash_clear(h)
@@ -220,6 +221,19 @@ static void write_cont_value(struct http_parser *hp,
   rb_str_buf_cat(hp->cont, vptr, end + 1);
 }
 
+static int is_chunked(VALUE v)
+{
+  /* common case first */
+  if (STR_CSTR_CASE_EQ(v, "chunked"))
+    return 1;
+
+  /*
+   * call Ruby function in unicorn/http_request.rb to deal with unlikely
+   * comma-delimited case
+   */
+  return rb_funcall(cHttpParser, id_is_chunked_p, 1, v) != Qfalse;
+}
+
 static void write_value(struct http_parser *hp,
                         const char *buffer, const char *p)
 {
@@ -246,7 +260,9 @@ static void write_value(struct http_parser *hp,
     f = uncommon_field(field, flen);
   } else if (f == g_http_connection) {
     hp_keepalive_connection(hp, v);
-  } else if (f == g_content_length) {
+  } else if (f == g_content_length && !HP_FL_TEST(hp, CHUNKED)) {
+    if (hp->len.content)
+      parser_raise(eHttpParserError, "Content-Length already set");
     hp->len.content = parse_length(RSTRING_PTR(v), RSTRING_LEN(v));
     if (hp->len.content < 0)
       parser_raise(eHttpParserError, "invalid Content-Length");
@@ -254,9 +270,30 @@ static void write_value(struct http_parser *hp,
       HP_FL_SET(hp, HASBODY);
     hp_invalid_if_trailer(hp);
   } else if (f == g_http_transfer_encoding) {
-    if (STR_CSTR_CASE_EQ(v, "chunked")) {
+    if (is_chunked(v)) {
+      if (HP_FL_TEST(hp, CHUNKED))
+        /*
+         * RFC 7230 3.3.1:
+         * A sender MUST NOT apply chunked more than once to a message body
+         * (i.e., chunking an already chunked message is not allowed).
+         */
+        parser_raise(eHttpParserError, "Transfer-Encoding double chunked");
+
       HP_FL_SET(hp, CHUNKED);
       HP_FL_SET(hp, HASBODY);
+
+      /* RFC 7230 3.3.3, 3: favor chunked if Content-Length exists */
+      hp->len.content = 0;
+    } else if (HP_FL_TEST(hp, CHUNKED)) {
+      /*
+       * RFC 7230 3.3.3, point 3 states:
+       * If a Transfer-Encoding header field is present in a request and
+       * the chunked transfer coding is not the final encoding, the
+       * message body length cannot be determined reliably; the server
+       * MUST respond with the 400 (Bad Request) status code and then
+       * close the connection.
+       */
+      parser_raise(eHttpParserError, "invalid Transfer-Encoding");
     }
     hp_invalid_if_trailer(hp);
   } else if (f == g_http_trailer) {
@@ -931,7 +968,7 @@ static VALUE HttpParser_rssget(VALUE self)
 
 void Init_unicorn_http(void)
 {
-  VALUE mUnicorn, cHttpParser;
+  VALUE mUnicorn;
 
   mUnicorn = rb_define_module("Unicorn");
   cHttpParser = rb_define_class_under(mUnicorn, "HttpParser", rb_cObject);
@@ -991,5 +1028,6 @@ void Init_unicorn_http(void)
 #ifndef HAVE_RB_HASH_CLEAR
   id_clear = rb_intern("clear");
 #endif
+  id_is_chunked_p = rb_intern("is_chunked?");
 }
 #undef SET_GLOBAL
diff --git a/lib/unicorn/http_request.rb b/lib/unicorn/http_request.rb
index bcc1f2d..6ca4592 100644
--- a/lib/unicorn/http_request.rb
+++ b/lib/unicorn/http_request.rb
@@ -188,4 +188,15 @@ def write_http_header(socket) # :nodoc:
       HTTP_RESPONSE_START.each { |c| socket.write(c) }
     end
   end
+
+  # called by ext/unicorn_http/unicorn_http.rl via rb_funcall
+  def self.is_chunked?(v) # :nodoc:
+    vals = v.split(/[ \t]*,[ \t]*/).map!(&:downcase)
+    if vals.pop == 'chunked'.freeze
+      return true unless vals.include?('chunked'.freeze)
+      raise Unicorn::HttpParserError, 'double chunked', []
+    end
+    return false unless vals.include?('chunked'.freeze)
+    raise Unicorn::HttpParserError, 'chunked not last', []
+  end
 end
diff --git a/test/unit/test_http_parser_ng.rb b/test/unit/test_http_parser_ng.rb
index d186f5a..425d5ad 100644
--- a/test/unit/test_http_parser_ng.rb
+++ b/test/unit/test_http_parser_ng.rb
@@ -11,6 +11,20 @@ def setup
     @parser = HttpParser.new
   end
 
+  # RFC 7230 allows gzip/deflate/compress Transfer-Encoding,
+  # but "chunked" must be last if used
+  def test_is_chunked
+    [ 'chunked,chunked', 'chunked,gzip', 'chunked,gzip,chunked' ].each do |x|
+      assert_raise(HttpParserError) { HttpParser.is_chunked?(x) }
+    end
+    [ 'gzip, chunked', 'gzip,chunked', 'gzip ,chunked' ].each do |x|
+      assert HttpParser.is_chunked?(x)
+    end
+    [ 'gzip', 'xhunked', 'xchunked' ].each do |x|
+      assert !HttpParser.is_chunked?(x)
+    end
+  end
+
   def test_parser_max_len
     assert_raises(RangeError) do
       HttpParser.max_header_len = 0xffffffff + 1
@@ -566,6 +580,73 @@ def test_invalid_content_length
     end
   end
 
+  def test_duplicate_content_length
+    str = "PUT / HTTP/1.1\r\n" \
+          "Content-Length: 1\r\n" \
+          "Content-Length: 9\r\n" \
+          "\r\n"
+    assert_raises(HttpParserError) { @parser.headers({}, str) }
+  end
+
+  def test_chunked_overrides_content_length
+    order = [ 'Transfer-Encoding: chunked', 'Content-Length: 666' ]
+    %w(a b).each do |x|
+      str = "PUT /#{x} HTTP/1.1\r\n" \
+            "#{order.join("\r\n")}" \
+            "\r\n\r\na\r\nhelloworld\r\n0\r\n\r\n"
+      order.reverse!
+      env = @parser.headers({}, str)
+      assert_nil @parser.content_length
+      assert_equal 'chunked', env['HTTP_TRANSFER_ENCODING']
+      assert_equal '666', env['CONTENT_LENGTH'],
+        'Content-Length logged so the app can log a possible client bug/attack'
+      @parser.filter_body(dst = '', str)
+      assert_equal 'helloworld', dst
+      @parser.parse # handle the non-existent trailer
+      assert @parser.next?
+    end
+  end
+
+  def test_chunked_order_good
+    str = "PUT /x HTTP/1.1\r\n" \
+          "Transfer-Encoding: gzip\r\n" \
+          "Transfer-Encoding: chunked\r\n" \
+          "\r\n"
+    env = @parser.headers({}, str)
+    assert_equal 'gzip,chunked', env['HTTP_TRANSFER_ENCODING']
+    assert_nil @parser.content_length
+
+    @parser.clear
+    str = "PUT /x HTTP/1.1\r\n" \
+          "Transfer-Encoding: gzip, chunked\r\n" \
+          "\r\n"
+    env = @parser.headers({}, str)
+    assert_equal 'gzip, chunked', env['HTTP_TRANSFER_ENCODING']
+    assert_nil @parser.content_length
+  end
+
+  def test_chunked_order_bad
+    str = "PUT /x HTTP/1.1\r\n" \
+          "Transfer-Encoding: chunked\r\n" \
+          "Transfer-Encoding: gzip\r\n" \
+          "\r\n"
+    assert_raise(HttpParserError) { @parser.headers({}, str) }
+  end
+
+  def test_double_chunked
+    str = "PUT /x HTTP/1.1\r\n" \
+          "Transfer-Encoding: chunked\r\n" \
+          "Transfer-Encoding: chunked\r\n" \
+          "\r\n"
+    assert_raise(HttpParserError) { @parser.headers({}, str) }
+
+    @parser.clear
+    str = "PUT /x HTTP/1.1\r\n" \
+          "Transfer-Encoding: chunked,chunked\r\n" \
+          "\r\n"
+    assert_raise(HttpParserError) { @parser.headers({}, str) }
+  end
+
   def test_backtrace_is_empty
     begin
       @parser.headers({}, "AAADFSFDSFD\r\n\r\n")

                 reply	other threads:[~2020-03-19  2:28 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://yhbt.net/unicorn/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200319022823.32472-1-bofh@yhbt.net \
    --to=bofh@yhbt.net \
    --cc=unicorn-public@yhbt.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://yhbt.net/unicorn.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).