diff options
-rw-r--r-- | ext/unicorn_http/unicorn_http.rl | 41 | ||||
-rw-r--r-- | ext/unicorn_http/unicorn_http_common.rl | 5 | ||||
-rw-r--r-- | test/unit/test_http_parser.rb | 85 |
3 files changed, 119 insertions, 12 deletions
diff --git a/ext/unicorn_http/unicorn_http.rl b/ext/unicorn_http/unicorn_http.rl index ed9c359..2d829e9 100644 --- a/ext/unicorn_http/unicorn_http.rl +++ b/ext/unicorn_http/unicorn_http.rl @@ -36,6 +36,7 @@ struct http_parser { size_t field_len; /* only used during header processing */ size_t dest_offset; /* only used during body processing */ } s; + VALUE cont; union { off_t content; off_t chunk; @@ -87,6 +88,31 @@ static void invalid_if_trailer(int flags) rb_raise(eHttpParserError, "invalid Trailer"); } +static void write_cont_value(struct http_parser *hp, + const char *buffer, const char *p) +{ + char *vptr; + + if (!hp->cont) + rb_raise(eHttpParserError, "invalid continuation line"); + + assert(hp->mark > 0); + + if (LEN(mark, p) == 0) + return; + + if (RSTRING_LEN(hp->cont) > 0) + --hp->mark; + + vptr = PTR_TO(mark); + + if (RSTRING_LEN(hp->cont) > 0) { + assert(' ' == *vptr || '\t' == *vptr); + *vptr = ' '; + } + rb_str_buf_cat(hp->cont, vptr, LEN(mark, p)); +} + static void write_value(VALUE req, struct http_parser *hp, const char *buffer, const char *p) { @@ -123,11 +149,11 @@ static void write_value(VALUE req, struct http_parser *hp, e = rb_hash_aref(req, f); if (e == Qnil) { - rb_hash_aset(req, f, v); + hp->cont = rb_hash_aset(req, f, v); } else if (f != g_http_host) { /* full URLs in REQUEST_URI take precedence for the Host: header */ rb_str_buf_cat(e, ",", 1); - rb_str_buf_append(e, v); + hp->cont = rb_str_buf_append(e, v); } } @@ -144,6 +170,7 @@ static void write_value(VALUE req, struct http_parser *hp, action write_field { hp->s.field_len = LEN(start.field, fpc); } action start_value { MARK(mark, fpc); } action write_value { write_value(req, hp, buffer, fpc); } + action write_cont_value { write_cont_value(hp, buffer, fpc); } action request_method { request_method(hp, req, PTR_TO(mark), LEN(mark, fpc)); } @@ -342,10 +369,18 @@ static void finalize_header(VALUE req) rb_hash_aset(req, g_query_string, rb_str_new(NULL, 0)); } +static void hp_mark(void *ptr) +{ + struct http_parser *hp = ptr; + + if (hp->cont) + rb_gc_mark(hp->cont); +} + static VALUE HttpParser_alloc(VALUE klass) { struct http_parser *hp; - return Data_Make_Struct(klass, struct http_parser, NULL, NULL, hp); + return Data_Make_Struct(klass, struct http_parser, hp_mark, NULL, hp); } diff --git a/ext/unicorn_http/unicorn_http_common.rl b/ext/unicorn_http/unicorn_http_common.rl index f1ed138..9b51ba1 100644 --- a/ext/unicorn_http/unicorn_http_common.rl +++ b/ext/unicorn_http/unicorn_http_common.rl @@ -19,6 +19,7 @@ uchar = (unreserved | escape | sorta_safe); pchar = (uchar | ":" | "@" | "&" | "=" | "+"); tspecials = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" | "\"" | "/" | "[" | "]" | "?" | "=" | "{" | "}" | " " | "\t"); + lws = (" " | "\t"); # elements token = (ascii -- (CTL | tspecials)); @@ -49,7 +50,9 @@ field_value = any* >start_value %write_value; - message_header = field_name ":" " "* field_value :> CRLF; + value_cont = lws+ any* >start_value %write_cont_value; + + message_header = ((field_name ":" " "* field_value)|value_cont) :> CRLF; chunk_ext_val = token*; chunk_ext_name = token*; chunk_extension = ( ";" " "* chunk_ext_name ("=" chunk_ext_val)? )*; diff --git a/test/unit/test_http_parser.rb b/test/unit/test_http_parser.rb index 42aa1a3..5430614 100644 --- a/test/unit/test_http_parser.rb +++ b/test/unit/test_http_parser.rb @@ -134,15 +134,84 @@ class HttpParserTest < Test::Unit::TestCase assert_equal req, parser.headers(req, should_be_good) assert_equal '', should_be_good assert parser.keepalive? + end + + # legacy test case from Mongrel that we never supported before... + # I still consider Pound irrelevant, unfortunately stupid clients that + # send extremely big headers do exist and they've managed to find Unicorn... + def test_nasty_pound_header + parser = HttpParser.new + nasty_pound_header = "GET / HTTP/1.1\r\nX-SSL-Bullshit: -----BEGIN CERTIFICATE-----\r\n\tMIIFbTCCBFWgAwIBAgICH4cwDQYJKoZIhvcNAQEFBQAwcDELMAkGA1UEBhMCVUsx\r\n\tETAPBgNVBAoTCGVTY2llbmNlMRIwEAYDVQQLEwlBdXRob3JpdHkxCzAJBgNVBAMT\r\n\tAkNBMS0wKwYJKoZIhvcNAQkBFh5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMu\r\n\tdWswHhcNMDYwNzI3MTQxMzI4WhcNMDcwNzI3MTQxMzI4WjBbMQswCQYDVQQGEwJV\r\n\tSzERMA8GA1UEChMIZVNjaWVuY2UxEzARBgNVBAsTCk1hbmNoZXN0ZXIxCzAJBgNV\r\n\tBAcTmrsogriqMWLAk1DMRcwFQYDVQQDEw5taWNoYWVsIHBhcmQYJKoZIhvcNAQEB\r\n\tBQADggEPADCCAQoCggEBANPEQBgl1IaKdSS1TbhF3hEXSl72G9J+WC/1R64fAcEF\r\n\tW51rEyFYiIeZGx/BVzwXbeBoNUK41OK65sxGuflMo5gLflbwJtHBRIEKAfVVp3YR\r\n\tgW7cMA/s/XKgL1GEC7rQw8lIZT8RApukCGqOVHSi/F1SiFlPDxuDfmdiNzL31+sL\r\n\t0iwHDdNkGjy5pyBSB8Y79dsSJtCW/iaLB0/n8Sj7HgvvZJ7x0fr+RQjYOUUfrePP\r\n\tu2MSpFyf+9BbC/aXgaZuiCvSR+8Snv3xApQY+fULK/xY8h8Ua51iXoQ5jrgu2SqR\r\n\twgA7BUi3G8LFzMBl8FRCDYGUDy7M6QaHXx1ZWIPWNKsCAwEAAaOCAiQwggIgMAwG\r\n\tA1UdEwEB/wQCMAAwEQYJYIZIAYb4QgEBBAQDAgWgMA4GA1UdDwEB/wQEAwID6DAs\r\n\tBglghkgBhvhCAQ0EHxYdVUsgZS1TY2llbmNlIFVzZXIgQ2VydGlmaWNhdGUwHQYD\r\n\tVR0OBBYEFDTt/sf9PeMaZDHkUIldrDYMNTBZMIGaBgNVHSMEgZIwgY+AFAI4qxGj\r\n\tloCLDdMVKwiljjDastqooXSkcjBwMQswCQYDVQQGEwJVSzERMA8GA1UEChMIZVNj\r\n\taWVuY2UxEjAQBgNVBAsTCUF1dGhvcml0eTELMAkGA1UEAxMCQ0ExLTArBgkqhkiG\r\n\t9w0BCQEWHmNhLW9wZXJhdG9yQGdyaWQtc3VwcG9ydC5hYy51a4IBADApBgNVHRIE\r\n\tIjAggR5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMudWswGQYDVR0gBBIwEDAO\r\n\tBgwrBgEEAdkvAQEBAQYwPQYJYIZIAYb4QgEEBDAWLmh0dHA6Ly9jYS5ncmlkLXN1\r\n\tcHBvcnQuYWMudmT4sopwqlBWsvcHViL2NybC9jYWNybC5jcmwwPQYJYIZIAYb4QgEDBDAWLmh0\r\n\tdHA6Ly9jYS5ncmlkLXN1cHBvcnQuYWMudWsvcHViL2NybC9jYWNybC5jcmwwPwYD\r\n\tVR0fBDgwNjA0oDKgMIYuaHR0cDovL2NhLmdyaWQt5hYy51ay9wdWIv\r\n\tY3JsL2NhY3JsLmNybDANBgkqhkiG9w0BAQUFAAOCAQEAS/U4iiooBENGW/Hwmmd3\r\n\tXCy6Zrt08YjKCzGNjorT98g8uGsqYjSxv/hmi0qlnlHs+k/3Iobc3LjS5AMYr5L8\r\n\tUO7OSkgFFlLHQyC9JzPfmLCAugvzEbyv4Olnsr8hbxF1MbKZoQxUZtMVu29wjfXk\r\n\thTeApBv7eaKCWpSp7MCbvgzm74izKhu3vlDk9w6qVrxePfGgpKPqfHiOoGhFnbTK\r\n\twTC6o2xq5y0qZ03JonF7OJspEd3I5zKY3E+ov7/ZhW6DqT8UFvsAdjvQbXyhV8Eu\r\n\tYhixw1aKEPzNjNowuIseVogKOLXxWI5vAi5HgXdS0/ES5gDGsABo4fqovUKlgop3\r\n\tRA==\r\n\t-----END CERTIFICATE-----\r\n\r\n" + req = {} + buf = nasty_pound_header.dup + + assert nasty_pound_header =~ /(-----BEGIN .*--END CERTIFICATE-----)/m + expect = $1.dup + expect.gsub!(/\r\n\t/, ' ') + assert_equal req, parser.headers(req, buf) + assert_equal '', buf + assert_equal expect, req['HTTP_X_SSL_BULLSHIT'] + end - # ref: http://thread.gmane.org/gmane.comp.lang.ruby.mongrel.devel/37/focus=45 - # (note we got 'pen' mixed up with 'pound' in that thread, - # but the gist of it is still relevant: these nasty headers are irrelevant - # - # nasty_pound_header = "GET / HTTP/1.1\r\nX-SSL-Bullshit: -----BEGIN CERTIFICATE-----\r\n\tMIIFbTCCBFWgAwIBAgICH4cwDQYJKoZIhvcNAQEFBQAwcDELMAkGA1UEBhMCVUsx\r\n\tETAPBgNVBAoTCGVTY2llbmNlMRIwEAYDVQQLEwlBdXRob3JpdHkxCzAJBgNVBAMT\r\n\tAkNBMS0wKwYJKoZIhvcNAQkBFh5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMu\r\n\tdWswHhcNMDYwNzI3MTQxMzI4WhcNMDcwNzI3MTQxMzI4WjBbMQswCQYDVQQGEwJV\r\n\tSzERMA8GA1UEChMIZVNjaWVuY2UxEzARBgNVBAsTCk1hbmNoZXN0ZXIxCzAJBgNV\r\n\tBAcTmrsogriqMWLAk1DMRcwFQYDVQQDEw5taWNoYWVsIHBhcmQYJKoZIhvcNAQEB\r\n\tBQADggEPADCCAQoCggEBANPEQBgl1IaKdSS1TbhF3hEXSl72G9J+WC/1R64fAcEF\r\n\tW51rEyFYiIeZGx/BVzwXbeBoNUK41OK65sxGuflMo5gLflbwJtHBRIEKAfVVp3YR\r\n\tgW7cMA/s/XKgL1GEC7rQw8lIZT8RApukCGqOVHSi/F1SiFlPDxuDfmdiNzL31+sL\r\n\t0iwHDdNkGjy5pyBSB8Y79dsSJtCW/iaLB0/n8Sj7HgvvZJ7x0fr+RQjYOUUfrePP\r\n\tu2MSpFyf+9BbC/aXgaZuiCvSR+8Snv3xApQY+fULK/xY8h8Ua51iXoQ5jrgu2SqR\r\n\twgA7BUi3G8LFzMBl8FRCDYGUDy7M6QaHXx1ZWIPWNKsCAwEAAaOCAiQwggIgMAwG\r\n\tA1UdEwEB/wQCMAAwEQYJYIZIAYb4QgEBBAQDAgWgMA4GA1UdDwEB/wQEAwID6DAs\r\n\tBglghkgBhvhCAQ0EHxYdVUsgZS1TY2llbmNlIFVzZXIgQ2VydGlmaWNhdGUwHQYD\r\n\tVR0OBBYEFDTt/sf9PeMaZDHkUIldrDYMNTBZMIGaBgNVHSMEgZIwgY+AFAI4qxGj\r\n\tloCLDdMVKwiljjDastqooXSkcjBwMQswCQYDVQQGEwJVSzERMA8GA1UEChMIZVNj\r\n\taWVuY2UxEjAQBgNVBAsTCUF1dGhvcml0eTELMAkGA1UEAxMCQ0ExLTArBgkqhkiG\r\n\t9w0BCQEWHmNhLW9wZXJhdG9yQGdyaWQtc3VwcG9ydC5hYy51a4IBADApBgNVHRIE\r\n\tIjAggR5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMudWswGQYDVR0gBBIwEDAO\r\n\tBgwrBgEEAdkvAQEBAQYwPQYJYIZIAYb4QgEEBDAWLmh0dHA6Ly9jYS5ncmlkLXN1\r\n\tcHBvcnQuYWMudmT4sopwqlBWsvcHViL2NybC9jYWNybC5jcmwwPQYJYIZIAYb4QgEDBDAWLmh0\r\n\tdHA6Ly9jYS5ncmlkLXN1cHBvcnQuYWMudWsvcHViL2NybC9jYWNybC5jcmwwPwYD\r\n\tVR0fBDgwNjA0oDKgMIYuaHR0cDovL2NhLmdyaWQt5hYy51ay9wdWIv\r\n\tY3JsL2NhY3JsLmNybDANBgkqhkiG9w0BAQUFAAOCAQEAS/U4iiooBENGW/Hwmmd3\r\n\tXCy6Zrt08YjKCzGNjorT98g8uGsqYjSxv/hmi0qlnlHs+k/3Iobc3LjS5AMYr5L8\r\n\tUO7OSkgFFlLHQyC9JzPfmLCAugvzEbyv4Olnsr8hbxF1MbKZoQxUZtMVu29wjfXk\r\n\thTeApBv7eaKCWpSp7MCbvgzm74izKhu3vlDk9w6qVrxePfGgpKPqfHiOoGhFnbTK\r\n\twTC6o2xq5y0qZ03JonF7OJspEd3I5zKY3E+ov7/ZhW6DqT8UFvsAdjvQbXyhV8Eu\r\n\tYhixw1aKEPzNjNowuIseVogKOLXxWI5vAi5HgXdS0/ES5gDGsABo4fqovUKlgop3\r\n\tRA==\r\n\t-----END CERTIFICATE-----\r\n\r\n" - # parser = HttpParser.new - # req = {} - # assert parser.execute(req, nasty_pound_header, 0) + def test_continuation_eats_leading_spaces + parser = HttpParser.new + header = "GET / HTTP/1.1\r\n" \ + "X-ASDF: \r\n" \ + "\t\r\n" \ + " \r\n" \ + " ASDF\r\n\r\n" + req = {} + assert_equal req, parser.headers(req, header) + assert_equal '', header + assert_equal 'ASDF', req['HTTP_X_ASDF'] + end + + def test_continuation_eats_scattered_leading_spaces + parser = HttpParser.new + header = "GET / HTTP/1.1\r\n" \ + "X-ASDF: hi\r\n" \ + " y\r\n" \ + "\t\r\n" \ + " x\r\n" \ + " ASDF\r\n\r\n" + req = {} + assert_equal req, parser.headers(req, header) + assert_equal '', header + assert_equal 'hi y x ASDF', req['HTTP_X_ASDF'] + end + + # this may seem to be testing more of an implementation detail, but + # it also helps ensure we're safe in the presence of multiple parsers + # in case we ever go multithreaded/evented... + def test_resumable_continuations + nr = 1000 + req = {} + header = "GET / HTTP/1.1\r\n" \ + "X-ASDF: \r\n" \ + " hello\r\n" + tmp = [] + nr.times { |i| + parser = HttpParser.new + assert parser.headers(req, "#{header} #{i}\r\n").nil? + asdf = req['HTTP_X_ASDF'] + assert_equal "hello #{i}", asdf + tmp << [ parser, asdf ] + req.clear + } + tmp.each_with_index { |(parser, asdf), i| + assert_equal req, parser.headers(req, "#{header} #{i}\r\n .\r\n\r\n") + assert_equal "hello #{i} .", asdf + } + end + + def test_invalid_continuation + parser = HttpParser.new + header = "GET / HTTP/1.1\r\n" \ + " y\r\n" \ + "Host: hello\r\n" \ + "\r\n" + req = {} + assert_raises(HttpParserError) { parser.headers(req, header) } end def test_parse_ie6_urls |