From 3dc43b27e2ab2740acda0514bb0d9562810b3df1 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 21 Apr 2009 11:14:56 -0700 Subject: http11: support underscores in URI hostnames They aren't common, but apparently there exist URLs with them, so we'll support them. --- ext/unicorn/http11/http11_parser.h | 7 +++++-- ext/unicorn/http11/http11_parser_common.rl | 2 +- test/unit/test_http_parser.rb | 16 ++++++++++++++++ 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/ext/unicorn/http11/http11_parser.h b/ext/unicorn/http11/http11_parser.h index 67aae71..8d95c59 100644 --- a/ext/unicorn/http11/http11_parser.h +++ b/ext/unicorn/http11/http11_parser.h @@ -808,6 +808,8 @@ st40: if ( ++p == pe ) goto _test_eof40; case 40: + if ( (*p) == 95 ) + goto tr63; if ( (*p) < 48 ) { if ( 45 <= (*p) && (*p) <= 46 ) goto tr63; @@ -828,10 +830,11 @@ st41: if ( ++p == pe ) goto _test_eof41; case 41: -#line 832 "http11_parser.h" +#line 834 "http11_parser.h" switch( (*p) ) { case 47: goto tr65; case 58: goto st42; + case 95: goto st41; } if ( (*p) < 65 ) { if ( 45 <= (*p) && (*p) <= 57 ) @@ -859,7 +862,7 @@ st43: if ( ++p == pe ) goto _test_eof43; case 43: -#line 863 "http11_parser.h" +#line 866 "http11_parser.h" if ( (*p) == 58 ) goto tr59; goto st0; diff --git a/ext/unicorn/http11/http11_parser_common.rl b/ext/unicorn/http11/http11_parser_common.rl index 567ed6b..ae01a55 100644 --- a/ext/unicorn/http11/http11_parser_common.rl +++ b/ext/unicorn/http11/http11_parser_common.rl @@ -25,7 +25,7 @@ # URI schemes and absolute paths scheme = ( "http"i ("s"i)? ) $downcase_char >mark %scheme; - hostname = (alnum | "-" | ".")+; + hostname = (alnum | "-" | "." | "_")+; host_with_port = (hostname (":" digit*)?) >mark %host; path = ( pchar+ ( "/" pchar* )* ) ; diff --git a/test/unit/test_http_parser.rb b/test/unit/test_http_parser.rb index ce1213f..45e051a 100644 --- a/test/unit/test_http_parser.rb +++ b/test/unit/test_http_parser.rb @@ -136,6 +136,22 @@ class HttpParserTest < Test::Unit::TestCase assert_equal '', req['QUERY_STRING'] end + # not common, but underscores do appear in practice + def test_absolute_uri_underscores + parser = HttpParser.new + req = {} + http = "GET http://under_score.example.com/foo?q=bar HTTP/1.0\r\n\r\n" + assert parser.execute(req, http) + assert_equal 'http', req['rack.url_scheme'] + assert_equal '/foo?q=bar', req['REQUEST_URI'] + assert_equal '/foo', req['REQUEST_PATH'] + assert_equal 'q=bar', req['QUERY_STRING'] + + assert_equal 'under_score.example.com', req['HTTP_HOST'] + assert_equal 'under_score.example.com', req['SERVER_NAME'] + assert_equal '80', req['SERVER_PORT'] + end + def test_absolute_uri parser = HttpParser.new req = {} -- cgit v1.2.3-24-ge0c7