about summary refs log tree commit homepage
diff options
context:
space:
mode:
-rw-r--r--ext/unicorn_http/unicorn_http.rl41
-rw-r--r--ext/unicorn_http/unicorn_http_common.rl5
-rw-r--r--test/unit/test_http_parser.rb85
3 files changed, 119 insertions, 12 deletions
diff --git a/ext/unicorn_http/unicorn_http.rl b/ext/unicorn_http/unicorn_http.rl
index ed9c359..2d829e9 100644
--- a/ext/unicorn_http/unicorn_http.rl
+++ b/ext/unicorn_http/unicorn_http.rl
@@ -36,6 +36,7 @@ struct http_parser {
     size_t field_len; /* only used during header processing */
     size_t dest_offset; /* only used during body processing */
   } s;
+  VALUE cont;
   union {
     off_t content;
     off_t chunk;
@@ -87,6 +88,31 @@ static void invalid_if_trailer(int flags)
     rb_raise(eHttpParserError, "invalid Trailer");
 }
 
+static void write_cont_value(struct http_parser *hp,
+                             const char *buffer, const char *p)
+{
+  char *vptr;
+
+  if (!hp->cont)
+    rb_raise(eHttpParserError, "invalid continuation line");
+
+  assert(hp->mark > 0);
+
+  if (LEN(mark, p) == 0)
+    return;
+
+  if (RSTRING_LEN(hp->cont) > 0)
+    --hp->mark;
+
+  vptr = PTR_TO(mark);
+
+  if (RSTRING_LEN(hp->cont) > 0) {
+    assert(' ' == *vptr || '\t' == *vptr);
+    *vptr = ' ';
+  }
+  rb_str_buf_cat(hp->cont, vptr, LEN(mark, p));
+}
+
 static void write_value(VALUE req, struct http_parser *hp,
                         const char *buffer, const char *p)
 {
@@ -123,11 +149,11 @@ static void write_value(VALUE req, struct http_parser *hp,
 
   e = rb_hash_aref(req, f);
   if (e == Qnil) {
-    rb_hash_aset(req, f, v);
+    hp->cont = rb_hash_aset(req, f, v);
   } else if (f != g_http_host) {
     /* full URLs in REQUEST_URI take precedence for the Host: header */
     rb_str_buf_cat(e, ",", 1);
-    rb_str_buf_append(e, v);
+    hp->cont = rb_str_buf_append(e, v);
   }
 }
 
@@ -144,6 +170,7 @@ static void write_value(VALUE req, struct http_parser *hp,
   action write_field { hp->s.field_len = LEN(start.field, fpc); }
   action start_value { MARK(mark, fpc); }
   action write_value { write_value(req, hp, buffer, fpc); }
+  action write_cont_value { write_cont_value(hp, buffer, fpc); }
   action request_method {
     request_method(hp, req, PTR_TO(mark), LEN(mark, fpc));
   }
@@ -342,10 +369,18 @@ static void finalize_header(VALUE req)
     rb_hash_aset(req, g_query_string, rb_str_new(NULL, 0));
 }
 
+static void hp_mark(void *ptr)
+{
+  struct http_parser *hp = ptr;
+
+  if (hp->cont)
+    rb_gc_mark(hp->cont);
+}
+
 static VALUE HttpParser_alloc(VALUE klass)
 {
   struct http_parser *hp;
-  return Data_Make_Struct(klass, struct http_parser, NULL, NULL, hp);
+  return Data_Make_Struct(klass, struct http_parser, hp_mark, NULL, hp);
 }
 
 
diff --git a/ext/unicorn_http/unicorn_http_common.rl b/ext/unicorn_http/unicorn_http_common.rl
index f1ed138..9b51ba1 100644
--- a/ext/unicorn_http/unicorn_http_common.rl
+++ b/ext/unicorn_http/unicorn_http_common.rl
@@ -19,6 +19,7 @@
   uchar = (unreserved | escape | sorta_safe);
   pchar = (uchar | ":" | "@" | "&" | "=" | "+");
   tspecials = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" | "\"" | "/" | "[" | "]" | "?" | "=" | "{" | "}" | " " | "\t");
+  lws = (" " | "\t");
 
 # elements
   token = (ascii -- (CTL | tspecials));
@@ -49,7 +50,9 @@
 
   field_value = any* >start_value %write_value;
 
-  message_header = field_name ":" " "* field_value :> CRLF;
+  value_cont = lws+ any* >start_value %write_cont_value;
+
+  message_header = ((field_name ":" " "* field_value)|value_cont) :> CRLF;
   chunk_ext_val = token*;
   chunk_ext_name = token*;
   chunk_extension = ( ";" " "* chunk_ext_name ("=" chunk_ext_val)? )*;
diff --git a/test/unit/test_http_parser.rb b/test/unit/test_http_parser.rb
index 42aa1a3..5430614 100644
--- a/test/unit/test_http_parser.rb
+++ b/test/unit/test_http_parser.rb
@@ -134,15 +134,84 @@ class HttpParserTest < Test::Unit::TestCase
     assert_equal req, parser.headers(req, should_be_good)
     assert_equal '', should_be_good
     assert parser.keepalive?
+  end
+
+  # legacy test case from Mongrel that we never supported before...
+  # I still consider Pound irrelevant, unfortunately stupid clients that
+  # send extremely big headers do exist and they've managed to find Unicorn...
+  def test_nasty_pound_header
+    parser = HttpParser.new
+    nasty_pound_header = "GET / HTTP/1.1\r\nX-SSL-Bullshit:   -----BEGIN CERTIFICATE-----\r\n\tMIIFbTCCBFWgAwIBAgICH4cwDQYJKoZIhvcNAQEFBQAwcDELMAkGA1UEBhMCVUsx\r\n\tETAPBgNVBAoTCGVTY2llbmNlMRIwEAYDVQQLEwlBdXRob3JpdHkxCzAJBgNVBAMT\r\n\tAkNBMS0wKwYJKoZIhvcNAQkBFh5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMu\r\n\tdWswHhcNMDYwNzI3MTQxMzI4WhcNMDcwNzI3MTQxMzI4WjBbMQswCQYDVQQGEwJV\r\n\tSzERMA8GA1UEChMIZVNjaWVuY2UxEzARBgNVBAsTCk1hbmNoZXN0ZXIxCzAJBgNV\r\n\tBAcTmrsogriqMWLAk1DMRcwFQYDVQQDEw5taWNoYWVsIHBhcmQYJKoZIhvcNAQEB\r\n\tBQADggEPADCCAQoCggEBANPEQBgl1IaKdSS1TbhF3hEXSl72G9J+WC/1R64fAcEF\r\n\tW51rEyFYiIeZGx/BVzwXbeBoNUK41OK65sxGuflMo5gLflbwJtHBRIEKAfVVp3YR\r\n\tgW7cMA/s/XKgL1GEC7rQw8lIZT8RApukCGqOVHSi/F1SiFlPDxuDfmdiNzL31+sL\r\n\t0iwHDdNkGjy5pyBSB8Y79dsSJtCW/iaLB0/n8Sj7HgvvZJ7x0fr+RQjYOUUfrePP\r\n\tu2MSpFyf+9BbC/aXgaZuiCvSR+8Snv3xApQY+fULK/xY8h8Ua51iXoQ5jrgu2SqR\r\n\twgA7BUi3G8LFzMBl8FRCDYGUDy7M6QaHXx1ZWIPWNKsCAwEAAaOCAiQwggIgMAwG\r\n\tA1UdEwEB/wQCMAAwEQYJYIZIAYb4QgEBBAQDAgWgMA4GA1UdDwEB/wQEAwID6DAs\r\n\tBglghkgBhvhCAQ0EHxYdVUsgZS1TY2llbmNlIFVzZXIgQ2VydGlmaWNhdGUwHQYD\r\n\tVR0OBBYEFDTt/sf9PeMaZDHkUIldrDYMNTBZMIGaBgNVHSMEgZIwgY+AFAI4qxGj\r\n\tloCLDdMVKwiljjDastqooXSkcjBwMQswCQYDVQQGEwJVSzERMA8GA1UEChMIZVNj\r\n\taWVuY2UxEjAQBgNVBAsTCUF1dGhvcml0eTELMAkGA1UEAxMCQ0ExLTArBgkqhkiG\r\n\t9w0BCQEWHmNhLW9wZXJhdG9yQGdyaWQtc3VwcG9ydC5hYy51a4IBADApBgNVHRIE\r\n\tIjAggR5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMudWswGQYDVR0gBBIwEDAO\r\n\tBgwrBgEEAdkvAQEBAQYwPQYJYIZIAYb4QgEEBDAWLmh0dHA6Ly9jYS5ncmlkLXN1\r\n\tcHBvcnQuYWMudmT4sopwqlBWsvcHViL2NybC9jYWNybC5jcmwwPQYJYIZIAYb4QgEDBDAWLmh0\r\n\tdHA6Ly9jYS5ncmlkLXN1cHBvcnQuYWMudWsvcHViL2NybC9jYWNybC5jcmwwPwYD\r\n\tVR0fBDgwNjA0oDKgMIYuaHR0cDovL2NhLmdyaWQt5hYy51ay9wdWIv\r\n\tY3JsL2NhY3JsLmNybDANBgkqhkiG9w0BAQUFAAOCAQEAS/U4iiooBENGW/Hwmmd3\r\n\tXCy6Zrt08YjKCzGNjorT98g8uGsqYjSxv/hmi0qlnlHs+k/3Iobc3LjS5AMYr5L8\r\n\tUO7OSkgFFlLHQyC9JzPfmLCAugvzEbyv4Olnsr8hbxF1MbKZoQxUZtMVu29wjfXk\r\n\thTeApBv7eaKCWpSp7MCbvgzm74izKhu3vlDk9w6qVrxePfGgpKPqfHiOoGhFnbTK\r\n\twTC6o2xq5y0qZ03JonF7OJspEd3I5zKY3E+ov7/ZhW6DqT8UFvsAdjvQbXyhV8Eu\r\n\tYhixw1aKEPzNjNowuIseVogKOLXxWI5vAi5HgXdS0/ES5gDGsABo4fqovUKlgop3\r\n\tRA==\r\n\t-----END CERTIFICATE-----\r\n\r\n"
+    req = {}
+    buf = nasty_pound_header.dup
+
+    assert nasty_pound_header =~ /(-----BEGIN .*--END CERTIFICATE-----)/m
+    expect = $1.dup
+    expect.gsub!(/\r\n\t/, ' ')
+    assert_equal req, parser.headers(req, buf)
+    assert_equal '', buf
+    assert_equal expect, req['HTTP_X_SSL_BULLSHIT']
+  end
 
-    # ref: http://thread.gmane.org/gmane.comp.lang.ruby.mongrel.devel/37/focus=45
-    # (note we got 'pen' mixed up with 'pound' in that thread,
-    # but the gist of it is still relevant: these nasty headers are irrelevant
-    #
-    # nasty_pound_header = "GET / HTTP/1.1\r\nX-SSL-Bullshit:   -----BEGIN CERTIFICATE-----\r\n\tMIIFbTCCBFWgAwIBAgICH4cwDQYJKoZIhvcNAQEFBQAwcDELMAkGA1UEBhMCVUsx\r\n\tETAPBgNVBAoTCGVTY2llbmNlMRIwEAYDVQQLEwlBdXRob3JpdHkxCzAJBgNVBAMT\r\n\tAkNBMS0wKwYJKoZIhvcNAQkBFh5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMu\r\n\tdWswHhcNMDYwNzI3MTQxMzI4WhcNMDcwNzI3MTQxMzI4WjBbMQswCQYDVQQGEwJV\r\n\tSzERMA8GA1UEChMIZVNjaWVuY2UxEzARBgNVBAsTCk1hbmNoZXN0ZXIxCzAJBgNV\r\n\tBAcTmrsogriqMWLAk1DMRcwFQYDVQQDEw5taWNoYWVsIHBhcmQYJKoZIhvcNAQEB\r\n\tBQADggEPADCCAQoCggEBANPEQBgl1IaKdSS1TbhF3hEXSl72G9J+WC/1R64fAcEF\r\n\tW51rEyFYiIeZGx/BVzwXbeBoNUK41OK65sxGuflMo5gLflbwJtHBRIEKAfVVp3YR\r\n\tgW7cMA/s/XKgL1GEC7rQw8lIZT8RApukCGqOVHSi/F1SiFlPDxuDfmdiNzL31+sL\r\n\t0iwHDdNkGjy5pyBSB8Y79dsSJtCW/iaLB0/n8Sj7HgvvZJ7x0fr+RQjYOUUfrePP\r\n\tu2MSpFyf+9BbC/aXgaZuiCvSR+8Snv3xApQY+fULK/xY8h8Ua51iXoQ5jrgu2SqR\r\n\twgA7BUi3G8LFzMBl8FRCDYGUDy7M6QaHXx1ZWIPWNKsCAwEAAaOCAiQwggIgMAwG\r\n\tA1UdEwEB/wQCMAAwEQYJYIZIAYb4QgEBBAQDAgWgMA4GA1UdDwEB/wQEAwID6DAs\r\n\tBglghkgBhvhCAQ0EHxYdVUsgZS1TY2llbmNlIFVzZXIgQ2VydGlmaWNhdGUwHQYD\r\n\tVR0OBBYEFDTt/sf9PeMaZDHkUIldrDYMNTBZMIGaBgNVHSMEgZIwgY+AFAI4qxGj\r\n\tloCLDdMVKwiljjDastqooXSkcjBwMQswCQYDVQQGEwJVSzERMA8GA1UEChMIZVNj\r\n\taWVuY2UxEjAQBgNVBAsTCUF1dGhvcml0eTELMAkGA1UEAxMCQ0ExLTArBgkqhkiG\r\n\t9w0BCQEWHmNhLW9wZXJhdG9yQGdyaWQtc3VwcG9ydC5hYy51a4IBADApBgNVHRIE\r\n\tIjAggR5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMudWswGQYDVR0gBBIwEDAO\r\n\tBgwrBgEEAdkvAQEBAQYwPQYJYIZIAYb4QgEEBDAWLmh0dHA6Ly9jYS5ncmlkLXN1\r\n\tcHBvcnQuYWMudmT4sopwqlBWsvcHViL2NybC9jYWNybC5jcmwwPQYJYIZIAYb4QgEDBDAWLmh0\r\n\tdHA6Ly9jYS5ncmlkLXN1cHBvcnQuYWMudWsvcHViL2NybC9jYWNybC5jcmwwPwYD\r\n\tVR0fBDgwNjA0oDKgMIYuaHR0cDovL2NhLmdyaWQt5hYy51ay9wdWIv\r\n\tY3JsL2NhY3JsLmNybDANBgkqhkiG9w0BAQUFAAOCAQEAS/U4iiooBENGW/Hwmmd3\r\n\tXCy6Zrt08YjKCzGNjorT98g8uGsqYjSxv/hmi0qlnlHs+k/3Iobc3LjS5AMYr5L8\r\n\tUO7OSkgFFlLHQyC9JzPfmLCAugvzEbyv4Olnsr8hbxF1MbKZoQxUZtMVu29wjfXk\r\n\thTeApBv7eaKCWpSp7MCbvgzm74izKhu3vlDk9w6qVrxePfGgpKPqfHiOoGhFnbTK\r\n\twTC6o2xq5y0qZ03JonF7OJspEd3I5zKY3E+ov7/ZhW6DqT8UFvsAdjvQbXyhV8Eu\r\n\tYhixw1aKEPzNjNowuIseVogKOLXxWI5vAi5HgXdS0/ES5gDGsABo4fqovUKlgop3\r\n\tRA==\r\n\t-----END CERTIFICATE-----\r\n\r\n"
-    # parser = HttpParser.new
-    # req = {}
-    # assert parser.execute(req, nasty_pound_header, 0)
+  def test_continuation_eats_leading_spaces
+    parser = HttpParser.new
+    header = "GET / HTTP/1.1\r\n" \
+             "X-ASDF:      \r\n" \
+             "\t\r\n" \
+             "    \r\n" \
+             "  ASDF\r\n\r\n"
+    req = {}
+    assert_equal req, parser.headers(req, header)
+    assert_equal '', header
+    assert_equal 'ASDF', req['HTTP_X_ASDF']
+  end
+
+  def test_continuation_eats_scattered_leading_spaces
+    parser = HttpParser.new
+    header = "GET / HTTP/1.1\r\n" \
+             "X-ASDF:   hi\r\n" \
+             "    y\r\n" \
+             "\t\r\n" \
+             "       x\r\n" \
+             "  ASDF\r\n\r\n"
+    req = {}
+    assert_equal req, parser.headers(req, header)
+    assert_equal '', header
+    assert_equal 'hi y x ASDF', req['HTTP_X_ASDF']
+  end
+
+  # this may seem to be testing more of an implementation detail, but
+  # it also helps ensure we're safe in the presence of multiple parsers
+  # in case we ever go multithreaded/evented...
+  def test_resumable_continuations
+    nr = 1000
+    req = {}
+    header = "GET / HTTP/1.1\r\n" \
+             "X-ASDF:      \r\n" \
+             "  hello\r\n"
+    tmp = []
+    nr.times { |i|
+      parser = HttpParser.new
+      assert parser.headers(req, "#{header} #{i}\r\n").nil?
+      asdf = req['HTTP_X_ASDF']
+      assert_equal "hello #{i}", asdf
+      tmp << [ parser, asdf ]
+      req.clear
+    }
+    tmp.each_with_index { |(parser, asdf), i|
+      assert_equal req, parser.headers(req, "#{header} #{i}\r\n .\r\n\r\n")
+      assert_equal "hello #{i} .", asdf
+    }
+  end
+
+  def test_invalid_continuation
+    parser = HttpParser.new
+    header = "GET / HTTP/1.1\r\n" \
+             "    y\r\n" \
+             "Host: hello\r\n" \
+             "\r\n"
+    req = {}
+    assert_raises(HttpParserError) { parser.headers(req, header) }
   end
 
   def test_parse_ie6_urls