From b031b9d2cb1bd0434ce2891ce20a65a5b5b29796 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 21 Apr 2009 11:14:51 -0700 Subject: http11: rfc2616 handling of absolute URIs We now parse the scheme, host and port from Absolute URIs and ignore them if the equivalents are specified in the other headers. --- ext/unicorn/http11/http11.c | 35 ++++-- ext/unicorn/http11/http11_parser.h | 188 ++++++++++++++++------------- ext/unicorn/http11/http11_parser.rl | 4 + ext/unicorn/http11/http11_parser_common.rl | 4 +- 4 files changed, 135 insertions(+), 96 deletions(-) (limited to 'ext') diff --git a/ext/unicorn/http11/http11.c b/ext/unicorn/http11/http11.c index d37b557..cd7a8f7 100644 --- a/ext/unicorn/http11/http11.c +++ b/ext/unicorn/http11/http11.c @@ -173,13 +173,11 @@ static void http_field(void *data, const char *field, size_t flen, const char *value, size_t vlen) { VALUE req = (VALUE)data; - VALUE v = Qnil; VALUE f = Qnil; VALIDATE_MAX_LENGTH(flen, FIELD_NAME); VALIDATE_MAX_LENGTH(vlen, FIELD_VALUE); - v = rb_str_new(value, vlen); f = find_common_field_value(field, flen); if (f == Qnil) { @@ -198,9 +196,11 @@ static void http_field(void *data, const char *field, memcpy(RSTRING_PTR(f) + HTTP_PREFIX_LEN, field, flen); assert(*(RSTRING_PTR(f) + RSTRING_LEN(f)) == '\0'); /* paranoia */ /* fprintf(stderr, "UNKNOWN HEADER <%s>\n", RSTRING_PTR(f)); */ + } else if (f == global_http_host && rb_hash_aref(req, f) != Qnil) { + return; } - rb_hash_aset(req, f, v); + rb_hash_aset(req, f, rb_str_new(value, vlen)); } static void request_method(void *data, const char *at, size_t length) @@ -212,6 +212,16 @@ static void request_method(void *data, const char *at, size_t length) rb_hash_aset(req, global_request_method, val); } +static void scheme(void *data, const char *at, size_t length) +{ + rb_hash_aset((VALUE)data, global_rack_url_scheme, rb_str_new(at, length)); +} + +static void host(void *data, const char *at, size_t length) +{ + rb_hash_aset((VALUE)data, global_http_host, rb_str_new(at, length)); +} + static void request_uri(void *data, const char *at, size_t length) { VALUE req = (VALUE)data; @@ -287,13 +297,17 @@ static void header_done(void *data, const char *at, size_t length) rb_hash_aset(req, global_query_string, rb_str_new(NULL, 0)); /* set rack.url_scheme to "https" or "http", no others are allowed by Rack */ - if ((temp = rb_hash_aref(req, global_http_x_forwarded_proto)) != Qnil && - RSTRING_LEN(temp) == 5 && - !memcmp("https", RSTRING_PTR(temp), 5)) + if ((temp = rb_hash_aref(req, global_rack_url_scheme)) == Qnil) { + if ((temp = rb_hash_aref(req, global_http_x_forwarded_proto)) != Qnil && + RSTRING_LEN(temp) == 5 && + !memcmp("https", RSTRING_PTR(temp), 5)) + server_port = global_port_443; + else + temp = global_http; + rb_hash_aset(req, global_rack_url_scheme, temp); + } else if (RSTRING_LEN(temp) == 5 && !memcmp("https", RSTRING_PTR(temp), 5)) { server_port = global_port_443; - else - temp = global_http; - rb_hash_aset(req, global_rack_url_scheme, temp); + } /* parse and set the SERVER_NAME and SERVER_PORT variables */ if ((temp = rb_hash_aref(req, global_http_host)) != Qnil) { @@ -416,7 +430,6 @@ void Init_http11(void) DEF_GLOBAL(server_port, "SERVER_PORT"); DEF_GLOBAL(server_protocol, "SERVER_PROTOCOL"); DEF_GLOBAL(server_protocol_value, "HTTP/1.1"); - DEF_GLOBAL(http_host, "HTTP_HOST"); DEF_GLOBAL(http_x_forwarded_proto, "HTTP_X_FORWARDED_PROTO"); DEF_GLOBAL(port_80, "80"); DEF_GLOBAL(port_443, "443"); @@ -432,4 +445,6 @@ void Init_http11(void) rb_define_method(cHttpParser, "execute", HttpParser_execute,2); sym_http_body = ID2SYM(rb_intern("http_body")); init_common_fields(); + global_http_host = find_common_field_value("HOST", 4); + assert(global_http_host != Qnil); } diff --git a/ext/unicorn/http11/http11_parser.h b/ext/unicorn/http11/http11_parser.h index 30aff6a..67aae71 100644 --- a/ext/unicorn/http11/http11_parser.h +++ b/ext/unicorn/http11/http11_parser.h @@ -12,6 +12,8 @@ static void http_field(void *data, const char *field, size_t flen, const char *value, size_t vlen); static void request_method(void *data, const char *at, size_t length); +static void scheme(void *data, const char *at, size_t length); +static void host(void *data, const char *at, size_t length); static void request_uri(void *data, const char *at, size_t length); static void fragment(void *data, const char *at, size_t length); static void request_path(void *data, const char *at, size_t length); @@ -59,12 +61,12 @@ static void downcase_char(char *c) /** Machine **/ -#line 105 "http11_parser.rl" +#line 109 "http11_parser.rl" /** Data **/ -#line 68 "http11_parser.h" +#line 70 "http11_parser.h" static const int http_parser_start = 1; static const int http_parser_first_final = 63; static const int http_parser_error = 0; @@ -72,18 +74,18 @@ static const int http_parser_error = 0; static const int http_parser_en_main = 1; -#line 109 "http11_parser.rl" +#line 113 "http11_parser.rl" static void http_parser_init(http_parser *parser) { int cs = 0; memset(parser, 0, sizeof(*parser)); -#line 82 "http11_parser.h" +#line 84 "http11_parser.h" { cs = http_parser_start; } -#line 114 "http11_parser.rl" +#line 118 "http11_parser.rl" parser->cs = cs; } @@ -104,7 +106,7 @@ static void http_parser_execute( assert(pe - p == len - off && "pointers aren't same distance"); -#line 108 "http11_parser.h" +#line 110 "http11_parser.h" { if ( p == pe ) goto _test_eof; @@ -128,14 +130,14 @@ st0: cs = 0; goto _out; tr0: -#line 62 "http11_parser.rl" +#line 64 "http11_parser.rl" {MARK(mark, p); } goto st2; st2: if ( ++p == pe ) goto _test_eof2; case 2: -#line 139 "http11_parser.h" +#line 141 "http11_parser.h" switch( (*p) ) { case 32: goto tr2; case 36: goto st44; @@ -151,7 +153,7 @@ case 2: goto st44; goto st0; tr2: -#line 75 "http11_parser.rl" +#line 77 "http11_parser.rl" { request_method(parser->data, PTR_TO(mark), LEN(mark, p)); } @@ -160,7 +162,7 @@ st3: if ( ++p == pe ) goto _test_eof3; case 3: -#line 164 "http11_parser.h" +#line 166 "http11_parser.h" switch( (*p) ) { case 42: goto tr4; case 47: goto tr5; @@ -169,67 +171,67 @@ case 3: } goto st0; tr4: -#line 62 "http11_parser.rl" +#line 64 "http11_parser.rl" {MARK(mark, p); } goto st4; st4: if ( ++p == pe ) goto _test_eof4; case 4: -#line 180 "http11_parser.h" +#line 182 "http11_parser.h" switch( (*p) ) { case 32: goto tr7; case 35: goto tr8; } goto st0; tr7: -#line 78 "http11_parser.rl" +#line 82 "http11_parser.rl" { request_uri(parser->data, PTR_TO(mark), LEN(mark, p)); } goto st5; tr30: -#line 62 "http11_parser.rl" +#line 64 "http11_parser.rl" {MARK(mark, p); } -#line 81 "http11_parser.rl" +#line 85 "http11_parser.rl" { fragment(parser->data, PTR_TO(mark), LEN(mark, p)); } goto st5; tr33: -#line 81 "http11_parser.rl" +#line 85 "http11_parser.rl" { fragment(parser->data, PTR_TO(mark), LEN(mark, p)); } goto st5; tr37: -#line 94 "http11_parser.rl" +#line 98 "http11_parser.rl" { request_path(parser->data, PTR_TO(mark), LEN(mark,p)); } -#line 78 "http11_parser.rl" +#line 82 "http11_parser.rl" { request_uri(parser->data, PTR_TO(mark), LEN(mark, p)); } goto st5; tr48: -#line 85 "http11_parser.rl" +#line 89 "http11_parser.rl" {MARK(query_start, p); } -#line 86 "http11_parser.rl" +#line 90 "http11_parser.rl" { query_string(parser->data, PTR_TO(query_start), LEN(query_start, p)); } -#line 78 "http11_parser.rl" +#line 82 "http11_parser.rl" { request_uri(parser->data, PTR_TO(mark), LEN(mark, p)); } goto st5; tr52: -#line 86 "http11_parser.rl" +#line 90 "http11_parser.rl" { query_string(parser->data, PTR_TO(query_start), LEN(query_start, p)); } -#line 78 "http11_parser.rl" +#line 82 "http11_parser.rl" { request_uri(parser->data, PTR_TO(mark), LEN(mark, p)); } @@ -238,19 +240,19 @@ st5: if ( ++p == pe ) goto _test_eof5; case 5: -#line 242 "http11_parser.h" +#line 244 "http11_parser.h" if ( (*p) == 72 ) goto tr9; goto st0; tr9: -#line 62 "http11_parser.rl" +#line 64 "http11_parser.rl" {MARK(mark, p); } goto st6; st6: if ( ++p == pe ) goto _test_eof6; case 6: -#line 254 "http11_parser.h" +#line 256 "http11_parser.h" if ( (*p) == 84 ) goto st7; goto st0; @@ -308,21 +310,21 @@ case 13: goto st13; goto st0; tr17: -#line 90 "http11_parser.rl" +#line 94 "http11_parser.rl" { http_version(parser->data, PTR_TO(mark), LEN(mark, p)); } goto st14; tr25: -#line 71 "http11_parser.rl" +#line 73 "http11_parser.rl" { MARK(mark, p); } -#line 72 "http11_parser.rl" +#line 74 "http11_parser.rl" { http_field(parser->data, PTR_TO(field_start), parser->field_len, PTR_TO(mark), LEN(mark, p)); } goto st14; tr28: -#line 72 "http11_parser.rl" +#line 74 "http11_parser.rl" { http_field(parser->data, PTR_TO(field_start), parser->field_len, PTR_TO(mark), LEN(mark, p)); } @@ -331,7 +333,7 @@ st14: if ( ++p == pe ) goto _test_eof14; case 14: -#line 335 "http11_parser.h" +#line 337 "http11_parser.h" if ( (*p) == 10 ) goto st15; goto st0; @@ -371,7 +373,7 @@ case 16: goto tr21; goto st0; tr21: -#line 98 "http11_parser.rl" +#line 102 "http11_parser.rl" { parser->body_start = p - buffer + 1; header_done(parser->data, p + 1, pe - p - 1); @@ -382,23 +384,23 @@ st63: if ( ++p == pe ) goto _test_eof63; case 63: -#line 386 "http11_parser.h" +#line 388 "http11_parser.h" goto st0; tr20: -#line 64 "http11_parser.rl" +#line 66 "http11_parser.rl" { MARK(field_start, p); } -#line 65 "http11_parser.rl" +#line 67 "http11_parser.rl" { snake_upcase_char((char *)p); } goto st17; tr22: -#line 65 "http11_parser.rl" +#line 67 "http11_parser.rl" { snake_upcase_char((char *)p); } goto st17; st17: if ( ++p == pe ) goto _test_eof17; case 17: -#line 402 "http11_parser.h" +#line 404 "http11_parser.h" switch( (*p) ) { case 33: goto tr22; case 58: goto tr23; @@ -424,71 +426,71 @@ case 17: goto tr22; goto st0; tr23: -#line 67 "http11_parser.rl" +#line 69 "http11_parser.rl" { parser->field_len = LEN(field_start, p); } goto st18; tr26: -#line 71 "http11_parser.rl" +#line 73 "http11_parser.rl" { MARK(mark, p); } goto st18; st18: if ( ++p == pe ) goto _test_eof18; case 18: -#line 441 "http11_parser.h" +#line 443 "http11_parser.h" switch( (*p) ) { case 13: goto tr25; case 32: goto tr26; } goto tr24; tr24: -#line 71 "http11_parser.rl" +#line 73 "http11_parser.rl" { MARK(mark, p); } goto st19; st19: if ( ++p == pe ) goto _test_eof19; case 19: -#line 455 "http11_parser.h" +#line 457 "http11_parser.h" if ( (*p) == 13 ) goto tr28; goto st19; tr8: -#line 78 "http11_parser.rl" +#line 82 "http11_parser.rl" { request_uri(parser->data, PTR_TO(mark), LEN(mark, p)); } goto st20; tr38: -#line 94 "http11_parser.rl" +#line 98 "http11_parser.rl" { request_path(parser->data, PTR_TO(mark), LEN(mark,p)); } -#line 78 "http11_parser.rl" +#line 82 "http11_parser.rl" { request_uri(parser->data, PTR_TO(mark), LEN(mark, p)); } goto st20; tr49: -#line 85 "http11_parser.rl" +#line 89 "http11_parser.rl" {MARK(query_start, p); } -#line 86 "http11_parser.rl" +#line 90 "http11_parser.rl" { query_string(parser->data, PTR_TO(query_start), LEN(query_start, p)); } -#line 78 "http11_parser.rl" +#line 82 "http11_parser.rl" { request_uri(parser->data, PTR_TO(mark), LEN(mark, p)); } goto st20; tr53: -#line 86 "http11_parser.rl" +#line 90 "http11_parser.rl" { query_string(parser->data, PTR_TO(query_start), LEN(query_start, p)); } -#line 78 "http11_parser.rl" +#line 82 "http11_parser.rl" { request_uri(parser->data, PTR_TO(mark), LEN(mark, p)); } @@ -497,7 +499,7 @@ st20: if ( ++p == pe ) goto _test_eof20; case 20: -#line 501 "http11_parser.h" +#line 503 "http11_parser.h" switch( (*p) ) { case 32: goto tr30; case 35: goto st0; @@ -508,14 +510,14 @@ case 20: goto st0; goto tr29; tr29: -#line 62 "http11_parser.rl" +#line 64 "http11_parser.rl" {MARK(mark, p); } goto st21; st21: if ( ++p == pe ) goto _test_eof21; case 21: -#line 519 "http11_parser.h" +#line 521 "http11_parser.h" switch( (*p) ) { case 32: goto tr33; case 35: goto st0; @@ -526,14 +528,14 @@ case 21: goto st0; goto st21; tr31: -#line 62 "http11_parser.rl" +#line 64 "http11_parser.rl" {MARK(mark, p); } goto st22; st22: if ( ++p == pe ) goto _test_eof22; case 22: -#line 537 "http11_parser.h" +#line 539 "http11_parser.h" if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto st23; @@ -557,14 +559,20 @@ case 23: goto st21; goto st0; tr5: -#line 62 "http11_parser.rl" +#line 64 "http11_parser.rl" + {MARK(mark, p); } + goto st24; +tr65: +#line 81 "http11_parser.rl" + { host(parser->data, PTR_TO(mark), LEN(mark, p)); } +#line 64 "http11_parser.rl" {MARK(mark, p); } goto st24; st24: if ( ++p == pe ) goto _test_eof24; case 24: -#line 568 "http11_parser.h" +#line 576 "http11_parser.h" switch( (*p) ) { case 32: goto tr37; case 35: goto tr38; @@ -603,7 +611,7 @@ case 26: goto st24; goto st0; tr40: -#line 94 "http11_parser.rl" +#line 98 "http11_parser.rl" { request_path(parser->data, PTR_TO(mark), LEN(mark,p)); } @@ -612,7 +620,7 @@ st27: if ( ++p == pe ) goto _test_eof27; case 27: -#line 616 "http11_parser.h" +#line 624 "http11_parser.h" switch( (*p) ) { case 32: goto tr7; case 35: goto tr8; @@ -650,7 +658,7 @@ case 29: goto st27; goto st0; tr41: -#line 94 "http11_parser.rl" +#line 98 "http11_parser.rl" { request_path(parser->data, PTR_TO(mark), LEN(mark,p)); } @@ -659,7 +667,7 @@ st30: if ( ++p == pe ) goto _test_eof30; case 30: -#line 663 "http11_parser.h" +#line 671 "http11_parser.h" switch( (*p) ) { case 32: goto tr48; case 35: goto tr49; @@ -670,14 +678,14 @@ case 30: goto st0; goto tr47; tr47: -#line 85 "http11_parser.rl" +#line 89 "http11_parser.rl" {MARK(query_start, p); } goto st31; st31: if ( ++p == pe ) goto _test_eof31; case 31: -#line 681 "http11_parser.h" +#line 689 "http11_parser.h" switch( (*p) ) { case 32: goto tr52; case 35: goto tr53; @@ -688,14 +696,14 @@ case 31: goto st0; goto st31; tr50: -#line 85 "http11_parser.rl" +#line 89 "http11_parser.rl" {MARK(query_start, p); } goto st32; st32: if ( ++p == pe ) goto _test_eof32; case 32: -#line 699 "http11_parser.h" +#line 707 "http11_parser.h" if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto st33; @@ -719,66 +727,73 @@ case 33: goto st31; goto st0; tr6: -#line 66 "http11_parser.rl" +#line 64 "http11_parser.rl" + {MARK(mark, p); } +#line 68 "http11_parser.rl" { downcase_char((char *)p); } goto st34; st34: if ( ++p == pe ) goto _test_eof34; case 34: -#line 730 "http11_parser.h" +#line 740 "http11_parser.h" switch( (*p) ) { case 84: goto tr56; case 116: goto tr56; } goto st0; tr56: -#line 66 "http11_parser.rl" +#line 68 "http11_parser.rl" { downcase_char((char *)p); } goto st35; st35: if ( ++p == pe ) goto _test_eof35; case 35: -#line 744 "http11_parser.h" +#line 754 "http11_parser.h" switch( (*p) ) { case 84: goto tr57; case 116: goto tr57; } goto st0; tr57: -#line 66 "http11_parser.rl" +#line 68 "http11_parser.rl" { downcase_char((char *)p); } goto st36; st36: if ( ++p == pe ) goto _test_eof36; case 36: -#line 758 "http11_parser.h" +#line 768 "http11_parser.h" switch( (*p) ) { case 80: goto tr58; case 112: goto tr58; } goto st0; tr58: -#line 66 "http11_parser.rl" +#line 68 "http11_parser.rl" { downcase_char((char *)p); } goto st37; st37: if ( ++p == pe ) goto _test_eof37; case 37: -#line 772 "http11_parser.h" +#line 782 "http11_parser.h" switch( (*p) ) { - case 58: goto st38; + case 58: goto tr59; case 83: goto tr60; case 115: goto tr60; } goto st0; +tr59: +#line 80 "http11_parser.rl" + { scheme(parser->data, PTR_TO(mark), LEN(mark, p)); } + goto st38; st38: if ( ++p == pe ) goto _test_eof38; case 38: +#line 797 "http11_parser.h" if ( (*p) == 47 ) goto st39; goto st0; @@ -795,22 +810,27 @@ st40: case 40: if ( (*p) < 48 ) { if ( 45 <= (*p) && (*p) <= 46 ) - goto st41; + goto tr63; } else if ( (*p) > 57 ) { if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) - goto st41; + goto tr63; } else if ( (*p) >= 65 ) - goto st41; + goto tr63; } else - goto st41; + goto tr63; goto st0; +tr63: +#line 64 "http11_parser.rl" + {MARK(mark, p); } + goto st41; st41: if ( ++p == pe ) goto _test_eof41; case 41: +#line 832 "http11_parser.h" switch( (*p) ) { - case 47: goto tr5; + case 47: goto tr65; case 58: goto st42; } if ( (*p) < 65 ) { @@ -827,21 +847,21 @@ st42: goto _test_eof42; case 42: if ( (*p) == 47 ) - goto tr5; + goto tr65; if ( 48 <= (*p) && (*p) <= 57 ) goto st42; goto st0; tr60: -#line 66 "http11_parser.rl" +#line 68 "http11_parser.rl" { downcase_char((char *)p); } goto st43; st43: if ( ++p == pe ) goto _test_eof43; case 43: -#line 843 "http11_parser.h" +#line 863 "http11_parser.h" if ( (*p) == 58 ) - goto st38; + goto tr59; goto st0; st44: if ( ++p == pe ) @@ -1242,7 +1262,7 @@ case 62: _out: {} } -#line 134 "http11_parser.rl" +#line 138 "http11_parser.rl" if (!http_parser_has_error(parser)) parser->cs = cs; diff --git a/ext/unicorn/http11/http11_parser.rl b/ext/unicorn/http11/http11_parser.rl index 991ba58..9894276 100644 --- a/ext/unicorn/http11/http11_parser.rl +++ b/ext/unicorn/http11/http11_parser.rl @@ -10,6 +10,8 @@ static void http_field(void *data, const char *field, size_t flen, const char *value, size_t vlen); static void request_method(void *data, const char *at, size_t length); +static void scheme(void *data, const char *at, size_t length); +static void host(void *data, const char *at, size_t length); static void request_uri(void *data, const char *at, size_t length); static void fragment(void *data, const char *at, size_t length); static void request_path(void *data, const char *at, size_t length); @@ -75,6 +77,8 @@ static void downcase_char(char *c) action request_method { request_method(parser->data, PTR_TO(mark), LEN(mark, fpc)); } + action scheme { scheme(parser->data, PTR_TO(mark), LEN(mark, fpc)); } + action host { host(parser->data, PTR_TO(mark), LEN(mark, fpc)); } action request_uri { request_uri(parser->data, PTR_TO(mark), LEN(mark, fpc)); } diff --git a/ext/unicorn/http11/http11_parser_common.rl b/ext/unicorn/http11/http11_parser_common.rl index 20fef92..567ed6b 100644 --- a/ext/unicorn/http11/http11_parser_common.rl +++ b/ext/unicorn/http11/http11_parser_common.rl @@ -24,9 +24,9 @@ token = (ascii -- (CTL | tspecials)); # URI schemes and absolute paths - scheme = ( "http"i ("s"i)? ) $downcase_char; + scheme = ( "http"i ("s"i)? ) $downcase_char >mark %scheme; hostname = (alnum | "-" | ".")+; - host_with_port = (hostname (":" digit*)?); + host_with_port = (hostname (":" digit*)?) >mark %host; path = ( pchar+ ( "/" pchar* )* ) ; query = ( uchar | reserved )* %query_string ; -- cgit v1.2.3-24-ge0c7