about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <normalperson@yhbt.net>2009-08-18 23:45:04 -0700
committerEric Wong <normalperson@yhbt.net>2009-08-18 23:45:04 -0700
commitc9f80eebc4ec8f717f667970d4c2763f96283ebd (patch)
treeff498218314d8825f71b807563083422258f349d
parent79bba4abefb57f12b54ddd338ed8f9ec828b5e89 (diff)
downloadunicorn-c9f80eebc4ec8f717f667970d4c2763f96283ebd.tar.gz
While I still consider pound to be irrelevant, but I still
sometimes get hand-crafted HTTP requests that come in with
multiline headers.  Since these are part of the HTTP specs and
not difficult to support, we might as well support them for
the sake of completeness.
-rw-r--r--ext/unicorn_http/unicorn_http.rl41
-rw-r--r--ext/unicorn_http/unicorn_http_common.rl5
-rw-r--r--test/unit/test_http_parser.rb85
3 files changed, 119 insertions, 12 deletions
diff --git a/ext/unicorn_http/unicorn_http.rl b/ext/unicorn_http/unicorn_http.rl
index ed9c359..2d829e9 100644
--- a/ext/unicorn_http/unicorn_http.rl
+++ b/ext/unicorn_http/unicorn_http.rl
@@ -36,6 +36,7 @@ struct http_parser {
     size_t field_len; /* only used during header processing */
     size_t dest_offset; /* only used during body processing */
   } s;
+  VALUE cont;
   union {
     off_t content;
     off_t chunk;
@@ -87,6 +88,31 @@ static void invalid_if_trailer(int flags)
     rb_raise(eHttpParserError, "invalid Trailer");
 }
 
+static void write_cont_value(struct http_parser *hp,
+                             const char *buffer, const char *p)
+{
+  char *vptr;
+
+  if (!hp->cont)
+    rb_raise(eHttpParserError, "invalid continuation line");
+
+  assert(hp->mark > 0);
+
+  if (LEN(mark, p) == 0)
+    return;
+
+  if (RSTRING_LEN(hp->cont) > 0)
+    --hp->mark;
+
+  vptr = PTR_TO(mark);
+
+  if (RSTRING_LEN(hp->cont) > 0) {
+    assert(' ' == *vptr || '\t' == *vptr);
+    *vptr = ' ';
+  }
+  rb_str_buf_cat(hp->cont, vptr, LEN(mark, p));
+}
+
 static void write_value(VALUE req, struct http_parser *hp,
                         const char *buffer, const char *p)
 {
@@ -123,11 +149,11 @@ static void write_value(VALUE req, struct http_parser *hp,
 
   e = rb_hash_aref(req, f);
   if (e == Qnil) {
-    rb_hash_aset(req, f, v);
+    hp->cont = rb_hash_aset(req, f, v);
   } else if (f != g_http_host) {
     /* full URLs in REQUEST_URI take precedence for the Host: header */
     rb_str_buf_cat(e, ",", 1);
-    rb_str_buf_append(e, v);
+    hp->cont = rb_str_buf_append(e, v);
   }
 }
 
@@ -144,6 +170,7 @@ static void write_value(VALUE req, struct http_parser *hp,
   action write_field { hp->s.field_len = LEN(start.field, fpc); }
   action start_value { MARK(mark, fpc); }
   action write_value { write_value(req, hp, buffer, fpc); }
+  action write_cont_value { write_cont_value(hp, buffer, fpc); }
   action request_method {
     request_method(hp, req, PTR_TO(mark), LEN(mark, fpc));
   }
@@ -342,10 +369,18 @@ static void finalize_header(VALUE req)
     rb_hash_aset(req, g_query_string, rb_str_new(NULL, 0));
 }
 
+static void hp_mark(void *ptr)
+{
+  struct http_parser *hp = ptr;
+
+  if (hp->cont)
+    rb_gc_mark(hp->cont);
+}
+
 static VALUE HttpParser_alloc(VALUE klass)
 {
   struct http_parser *hp;
-  return Data_Make_Struct(klass, struct http_parser, NULL, NULL, hp);
+  return Data_Make_Struct(klass, struct http_parser, hp_mark, NULL, hp);
 }
 
 
diff --git a/ext/unicorn_http/unicorn_http_common.rl b/ext/unicorn_http/unicorn_http_common.rl
index f1ed138..9b51ba1 100644
--- a/ext/unicorn_http/unicorn_http_common.rl
+++ b/ext/unicorn_http/unicorn_http_common.rl
@@ -19,6 +19,7 @@
   uchar = (unreserved | escape | sorta_safe);
   pchar = (uchar | ":" | "@" | "&" | "=" | "+");
   tspecials = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" | "\"" | "/" | "[" | "]" | "?" | "=" | "{" | "}" | " " | "\t");
+  lws = (" " | "\t");
 
 # elements
   token = (ascii -- (CTL | tspecials));
@@ -49,7 +50,9 @@
 
   field_value = any* >start_value %write_value;
 
-  message_header = field_name ":" " "* field_value :> CRLF;
+  value_cont = lws+ any* >start_value %write_cont_value;
+
+  message_header = ((field_name ":" " "* field_value)|value_cont) :> CRLF;
   chunk_ext_val = token*;
   chunk_ext_name = token*;
   chunk_extension = ( ";" " "* chunk_ext_name ("=" chunk_ext_val)? )*;
diff --git a/test/unit/test_http_parser.rb b/test/unit/test_http_parser.rb
index 42aa1a3..5430614 100644
--- a/test/unit/test_http_parser.rb
+++ b/test/unit/test_http_parser.rb
@@ -134,15 +134,84 @@ class HttpParserTest < Test::Unit::TestCase
     assert_equal req, parser.headers(req, should_be_good)
     assert_equal '', should_be_good
     assert parser.keepalive?
+  end
+
+  # legacy test case from Mongrel that we never supported before...
+  # I still consider Pound irrelevant, unfortunately stupid clients that
+  # send extremely big headers do exist and they've managed to find Unicorn...
+  def test_nasty_pound_header
+    parser = HttpParser.new
+    nasty_pound_header = "GET / HTTP/1.1\r\nX-SSL-Bullshit:   -----BEGIN CERTIFICATE-----\r\n\tMIIFbTCCBFWgAwIBAgICH4cwDQYJKoZIhvcNAQEFBQAwcDELMAkGA1UEBhMCVUsx\r\n\tETAPBgNVBAoTCGVTY2llbmNlMRIwEAYDVQQLEwlBdXRob3JpdHkxCzAJBgNVBAMT\r\n\tAkNBMS0wKwYJKoZIhvcNAQkBFh5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMu\r\n\tdWswHhcNMDYwNzI3MTQxMzI4WhcNMDcwNzI3MTQxMzI4WjBbMQswCQYDVQQGEwJV\r\n\tSzERMA8GA1UEChMIZVNjaWVuY2UxEzARBgNVBAsTCk1hbmNoZXN0ZXIxCzAJBgNV\r\n\tBAcTmrsogriqMWLAk1DMRcwFQYDVQQDEw5taWNoYWVsIHBhcmQYJKoZIhvcNAQEB\r\n\tBQADggEPADCCAQoCggEBANPEQBgl1IaKdSS1TbhF3hEXSl72G9J+WC/1R64fAcEF\r\n\tW51rEyFYiIeZGx/BVzwXbeBoNUK41OK65sxGuflMo5gLflbwJtHBRIEKAfVVp3YR\r\n\tgW7cMA/s/XKgL1GEC7rQw8lIZT8RApukCGqOVHSi/F1SiFlPDxuDfmdiNzL31+sL\r\n\t0iwHDdNkGjy5pyBSB8Y79dsSJtCW/iaLB0/n8Sj7HgvvZJ7x0fr+RQjYOUUfrePP\r\n\tu2MSpFyf+9BbC/aXgaZuiCvSR+8Snv3xApQY+fULK/xY8h8Ua51iXoQ5jrgu2SqR\r\n\twgA7BUi3G8LFzMBl8FRCDYGUDy7M6QaHXx1ZWIPWNKsCAwEAAaOCAiQwggIgMAwG\r\n\tA1UdEwEB/wQCMAAwEQYJYIZIAYb4QgEBBAQDAgWgMA4GA1UdDwEB/wQEAwID6DAs\r\n\tBglghkgBhvhCAQ0EHxYdVUsgZS1TY2llbmNlIFVzZXIgQ2VydGlmaWNhdGUwHQYD\r\n\tVR0OBBYEFDTt/sf9PeMaZDHkUIldrDYMNTBZMIGaBgNVHSMEgZIwgY+AFAI4qxGj\r\n\tloCLDdMVKwiljjDastqooXSkcjBwMQswCQYDVQQGEwJVSzERMA8GA1UEChMIZVNj\r\n\taWVuY2UxEjAQBgNVBAsTCUF1dGhvcml0eTELMAkGA1UEAxMCQ0ExLTArBgkqhkiG\r\n\t9w0BCQEWHmNhLW9wZXJhdG9yQGdyaWQtc3VwcG9ydC5hYy51a4IBADApBgNVHRIE\r\n\tIjAggR5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMudWswGQYDVR0gBBIwEDAO\r\n\tBgwrBgEEAdkvAQEBAQYwPQYJYIZIAYb4QgEEBDAWLmh0dHA6Ly9jYS5ncmlkLXN1\r\n\tcHBvcnQuYWMudmT4sopwqlBWsvcHViL2NybC9jYWNybC5jcmwwPQYJYIZIAYb4QgEDBDAWLmh0\r\n\tdHA6Ly9jYS5ncmlkLXN1cHBvcnQuYWMudWsvcHViL2NybC9jYWNybC5jcmwwPwYD\r\n\tVR0fBDgwNjA0oDKgMIYuaHR0cDovL2NhLmdyaWQt5hYy51ay9wdWIv\r\n\tY3JsL2NhY3JsLmNybDANBgkqhkiG9w0BAQUFAAOCAQEAS/U4iiooBENGW/Hwmmd3\r\n\tXCy6Zrt08YjKCzGNjorT98g8uGsqYjSxv/hmi0qlnlHs+k/3Iobc3LjS5AMYr5L8\r\n\tUO7OSkgFFlLHQyC9JzPfmLCAugvzEbyv4Olnsr8hbxF1MbKZoQxUZtMVu29wjfXk\r\n\thTeApBv7eaKCWpSp7MCbvgzm74izKhu3vlDk9w6qVrxePfGgpKPqfHiOoGhFnbTK\r\n\twTC6o2xq5y0qZ03JonF7OJspEd3I5zKY3E+ov7/ZhW6DqT8UFvsAdjvQbXyhV8Eu\r\n\tYhixw1aKEPzNjNowuIseVogKOLXxWI5vAi5HgXdS0/ES5gDGsABo4fqovUKlgop3\r\n\tRA==\r\n\t-----END CERTIFICATE-----\r\n\r\n"
+    req = {}
+    buf = nasty_pound_header.dup
+
+    assert nasty_pound_header =~ /(-----BEGIN .*--END CERTIFICATE-----)/m
+    expect = $1.dup
+    expect.gsub!(/\r\n\t/, ' ')
+    assert_equal req, parser.headers(req, buf)
+    assert_equal '', buf
+    assert_equal expect, req['HTTP_X_SSL_BULLSHIT']
+  end
 
-    # ref: http://thread.gmane.org/gmane.comp.lang.ruby.mongrel.devel/37/focus=45
-    # (note we got 'pen' mixed up with 'pound' in that thread,
-    # but the gist of it is still relevant: these nasty headers are irrelevant
-    #
-    # nasty_pound_header = "GET / HTTP/1.1\r\nX-SSL-Bullshit:   -----BEGIN CERTIFICATE-----\r\n\tMIIFbTCCBFWgAwIBAgICH4cwDQYJKoZIhvcNAQEFBQAwcDELMAkGA1UEBhMCVUsx\r\n\tETAPBgNVBAoTCGVTY2llbmNlMRIwEAYDVQQLEwlBdXRob3JpdHkxCzAJBgNVBAMT\r\n\tAkNBMS0wKwYJKoZIhvcNAQkBFh5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMu\r\n\tdWswHhcNMDYwNzI3MTQxMzI4WhcNMDcwNzI3MTQxMzI4WjBbMQswCQYDVQQGEwJV\r\n\tSzERMA8GA1UEChMIZVNjaWVuY2UxEzARBgNVBAsTCk1hbmNoZXN0ZXIxCzAJBgNV\r\n\tBAcTmrsogriqMWLAk1DMRcwFQYDVQQDEw5taWNoYWVsIHBhcmQYJKoZIhvcNAQEB\r\n\tBQADggEPADCCAQoCggEBANPEQBgl1IaKdSS1TbhF3hEXSl72G9J+WC/1R64fAcEF\r\n\tW51rEyFYiIeZGx/BVzwXbeBoNUK41OK65sxGuflMo5gLflbwJtHBRIEKAfVVp3YR\r\n\tgW7cMA/s/XKgL1GEC7rQw8lIZT8RApukCGqOVHSi/F1SiFlPDxuDfmdiNzL31+sL\r\n\t0iwHDdNkGjy5pyBSB8Y79dsSJtCW/iaLB0/n8Sj7HgvvZJ7x0fr+RQjYOUUfrePP\r\n\tu2MSpFyf+9BbC/aXgaZuiCvSR+8Snv3xApQY+fULK/xY8h8Ua51iXoQ5jrgu2SqR\r\n\twgA7BUi3G8LFzMBl8FRCDYGUDy7M6QaHXx1ZWIPWNKsCAwEAAaOCAiQwggIgMAwG\r\n\tA1UdEwEB/wQCMAAwEQYJYIZIAYb4QgEBBAQDAgWgMA4GA1UdDwEB/wQEAwID6DAs\r\n\tBglghkgBhvhCAQ0EHxYdVUsgZS1TY2llbmNlIFVzZXIgQ2VydGlmaWNhdGUwHQYD\r\n\tVR0OBBYEFDTt/sf9PeMaZDHkUIldrDYMNTBZMIGaBgNVHSMEgZIwgY+AFAI4qxGj\r\n\tloCLDdMVKwiljjDastqooXSkcjBwMQswCQYDVQQGEwJVSzERMA8GA1UEChMIZVNj\r\n\taWVuY2UxEjAQBgNVBAsTCUF1dGhvcml0eTELMAkGA1UEAxMCQ0ExLTArBgkqhkiG\r\n\t9w0BCQEWHmNhLW9wZXJhdG9yQGdyaWQtc3VwcG9ydC5hYy51a4IBADApBgNVHRIE\r\n\tIjAggR5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMudWswGQYDVR0gBBIwEDAO\r\n\tBgwrBgEEAdkvAQEBAQYwPQYJYIZIAYb4QgEEBDAWLmh0dHA6Ly9jYS5ncmlkLXN1\r\n\tcHBvcnQuYWMudmT4sopwqlBWsvcHViL2NybC9jYWNybC5jcmwwPQYJYIZIAYb4QgEDBDAWLmh0\r\n\tdHA6Ly9jYS5ncmlkLXN1cHBvcnQuYWMudWsvcHViL2NybC9jYWNybC5jcmwwPwYD\r\n\tVR0fBDgwNjA0oDKgMIYuaHR0cDovL2NhLmdyaWQt5hYy51ay9wdWIv\r\n\tY3JsL2NhY3JsLmNybDANBgkqhkiG9w0BAQUFAAOCAQEAS/U4iiooBENGW/Hwmmd3\r\n\tXCy6Zrt08YjKCzGNjorT98g8uGsqYjSxv/hmi0qlnlHs+k/3Iobc3LjS5AMYr5L8\r\n\tUO7OSkgFFlLHQyC9JzPfmLCAugvzEbyv4Olnsr8hbxF1MbKZoQxUZtMVu29wjfXk\r\n\thTeApBv7eaKCWpSp7MCbvgzm74izKhu3vlDk9w6qVrxePfGgpKPqfHiOoGhFnbTK\r\n\twTC6o2xq5y0qZ03JonF7OJspEd3I5zKY3E+ov7/ZhW6DqT8UFvsAdjvQbXyhV8Eu\r\n\tYhixw1aKEPzNjNowuIseVogKOLXxWI5vAi5HgXdS0/ES5gDGsABo4fqovUKlgop3\r\n\tRA==\r\n\t-----END CERTIFICATE-----\r\n\r\n"
-    # parser = HttpParser.new
-    # req = {}
-    # assert parser.execute(req, nasty_pound_header, 0)
+  def test_continuation_eats_leading_spaces
+    parser = HttpParser.new
+    header = "GET / HTTP/1.1\r\n" \
+             "X-ASDF:      \r\n" \
+             "\t\r\n" \
+             "    \r\n" \
+             "  ASDF\r\n\r\n"
+    req = {}
+    assert_equal req, parser.headers(req, header)
+    assert_equal '', header
+    assert_equal 'ASDF', req['HTTP_X_ASDF']
+  end
+
+  def test_continuation_eats_scattered_leading_spaces
+    parser = HttpParser.new
+    header = "GET / HTTP/1.1\r\n" \
+             "X-ASDF:   hi\r\n" \
+             "    y\r\n" \
+             "\t\r\n" \
+             "       x\r\n" \
+             "  ASDF\r\n\r\n"
+    req = {}
+    assert_equal req, parser.headers(req, header)
+    assert_equal '', header
+    assert_equal 'hi y x ASDF', req['HTTP_X_ASDF']
+  end
+
+  # this may seem to be testing more of an implementation detail, but
+  # it also helps ensure we're safe in the presence of multiple parsers
+  # in case we ever go multithreaded/evented...
+  def test_resumable_continuations
+    nr = 1000
+    req = {}
+    header = "GET / HTTP/1.1\r\n" \
+             "X-ASDF:      \r\n" \
+             "  hello\r\n"
+    tmp = []
+    nr.times { |i|
+      parser = HttpParser.new
+      assert parser.headers(req, "#{header} #{i}\r\n").nil?
+      asdf = req['HTTP_X_ASDF']
+      assert_equal "hello #{i}", asdf
+      tmp << [ parser, asdf ]
+      req.clear
+    }
+    tmp.each_with_index { |(parser, asdf), i|
+      assert_equal req, parser.headers(req, "#{header} #{i}\r\n .\r\n\r\n")
+      assert_equal "hello #{i} .", asdf
+    }
+  end
+
+  def test_invalid_continuation
+    parser = HttpParser.new
+    header = "GET / HTTP/1.1\r\n" \
+             "    y\r\n" \
+             "Host: hello\r\n" \
+             "\r\n"
+    req = {}
+    assert_raises(HttpParserError) { parser.headers(req, header) }
   end
 
   def test_parse_ie6_urls