about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <normalperson@yhbt.net>2009-12-19 01:05:41 -0800
committerEric Wong <normalperson@yhbt.net>2009-12-19 01:12:22 -0800
commit05fa45e54b0a6033250249ec0ed2009d4dbacf48 (patch)
treef33bba0dc9662991971bc539d39eaa75767c6195
parent4c8c891b76c2a44a0ae76eea8f07934b984caab0 (diff)
downloadunicorn-05fa45e54b0a6033250249ec0ed2009d4dbacf48.tar.gz
This is not explicitly specified or listed as an example in in
rfc2616.  However, rfc2616 section 3.2.1 defers to rfc2396[1]
for the definition of absolute URIs, so the userinfo component
should be allowable, even if it does not make any sense.

In the real world, previous versions of Mongrel used URI.parse()
and thus allowed userinfo, so we also have precedence to allow
userinfo to be compatible *in case* our interpretation of the
RFCs is incorrect.  This change is unfortunately needed because
*occasionally* real clients rely on them.

Reported-by: Scott Chacon

[1] rfc3986 obsoletes rfc2396, but also includes userinfo
-rw-r--r--ext/unicorn_http/unicorn_http_common.rl3
-rw-r--r--test/unit/test_http_parser.rb39
2 files changed, 41 insertions, 1 deletions
diff --git a/ext/unicorn_http/unicorn_http_common.rl b/ext/unicorn_http/unicorn_http_common.rl
index 041dfec..6fca604 100644
--- a/ext/unicorn_http/unicorn_http_common.rl
+++ b/ext/unicorn_http/unicorn_http_common.rl
@@ -28,6 +28,7 @@
   scheme = ( "http"i ("s"i)? ) $downcase_char >mark %scheme;
   hostname = (alnum | "-" | "." | "_")+;
   host_with_port = (hostname (":" digit*)?) >mark %host;
+  userinfo = ((unreserved | escape | ";" | ":" | "&" | "=" | "+")+ "@")*;
 
   path = ( pchar+ ( "/" pchar* )* ) ;
   query = ( uchar | reserved )* %query_string ;
@@ -36,7 +37,7 @@
   rel_path = (path? (";" params)? %request_path) ("?" %start_query query)?;
   absolute_path = ( "/"+ rel_path );
   path_uri = absolute_path > mark %request_uri;
-  Absolute_URI = (scheme "://" host_with_port path_uri);
+  Absolute_URI = (scheme "://" userinfo host_with_port path_uri);
 
   Request_URI = ((absolute_path | "*") >mark %request_uri) | Absolute_URI;
   Fragment = ( uchar | reserved )* >mark %fragment;
diff --git a/test/unit/test_http_parser.rb b/test/unit/test_http_parser.rb
index 1b3faaf..0443b46 100644
--- a/test/unit/test_http_parser.rb
+++ b/test/unit/test_http_parser.rb
@@ -298,6 +298,45 @@ class HttpParserTest < Test::Unit::TestCase
     assert ! parser.keepalive?
   end
 
+  # some dumb clients add users because they're stupid
+  def test_absolute_uri_w_user
+    parser = HttpParser.new
+    req = {}
+    http = "GET http://user%20space@example.com/foo?q=bar HTTP/1.0\r\n\r\n"
+    assert_equal req, parser.headers(req, http)
+    assert_equal 'http', req['rack.url_scheme']
+    assert_equal '/foo?q=bar', req['REQUEST_URI']
+    assert_equal '/foo', req['REQUEST_PATH']
+    assert_equal 'q=bar', req['QUERY_STRING']
+
+    assert_equal 'example.com', req['HTTP_HOST']
+    assert_equal 'example.com', req['SERVER_NAME']
+    assert_equal '80', req['SERVER_PORT']
+    assert_equal "", http
+    assert ! parser.keepalive?
+  end
+
+  # since Mongrel supported anything URI.parse supported, we're stuck
+  # supporting everything URI.parse supports
+  def test_absolute_uri_uri_parse
+    "#{URI::REGEXP::PATTERN::UNRESERVED};:&=+$,".split(//).each do |char|
+      parser = HttpParser.new
+      req = {}
+      http = "GET http://#{char}@example.com/ HTTP/1.0\r\n\r\n"
+      assert_equal req, parser.headers(req, http)
+      assert_equal 'http', req['rack.url_scheme']
+      assert_equal '/', req['REQUEST_URI']
+      assert_equal '/', req['REQUEST_PATH']
+      assert_equal '', req['QUERY_STRING']
+
+      assert_equal 'example.com', req['HTTP_HOST']
+      assert_equal 'example.com', req['SERVER_NAME']
+      assert_equal '80', req['SERVER_PORT']
+      assert_equal "", http
+      assert ! parser.keepalive?
+    end
+  end
+
   def test_absolute_uri
     parser = HttpParser.new
     req = {}