kcar RubyGem user+dev discussion/patches/pulls/bugs/help
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: kcar-public@bogomips.org
Subject: [PATCH 04/11] implement request parsing with tests
Date: Sat,  1 Dec 2018 13:31:18 +0000	[thread overview]
Message-ID: <20181201133125.5524-5-e@80x24.org> (raw)
In-Reply-To: <20181201133125.5524-1-e@80x24.org>

Most of these tests are ported over from a Mongrel-derived
server.  Chunked body parsing is not implemented, yet.
---
 ext/kcar/kcar.rl            | 338 ++++++++++++--
 test/test_request_parser.rb | 862 ++++++++++++++++++++++++++++++++++++
 2 files changed, 1162 insertions(+), 38 deletions(-)
 create mode 100644 test/test_request_parser.rb

diff --git a/ext/kcar/kcar.rl b/ext/kcar/kcar.rl
index c0e8bbc..01e4c76 100644
--- a/ext/kcar/kcar.rl
+++ b/ext/kcar/kcar.rl
@@ -16,8 +16,13 @@
 static VALUE eParserError;
 static ID id_uminus, id_sq, id_sq_set;
 static VALUE g_rack_url_scheme,
-  g_HOST, g_PATH_INFO, g_QUERY_STRING,
-  g_REQUEST_METHOD, g_REQUEST_PATH, g_REQUEST_URI;
+  g_80, g_443, g_http, g_https,
+  g_HTTP_HOST, g_HTTP_CONNECTION, g_HTTP_TRAILER, g_HTTP_TRANSFER_ENCODING,
+  g_HTTP_VERSION,
+  g_CONTENT_LENGTH, g_CONTENT_TYPE,
+  g_PATH_INFO, g_QUERY_STRING,
+  g_REQUEST_METHOD, g_REQUEST_PATH, g_REQUEST_URI,
+  g_SERVER_NAME, g_SERVER_PORT, g_SERVER_PROTOCOL;
 static VALUE e413, e414;
 
 /** Defines common length and error messages for input length validation. */
@@ -51,6 +56,9 @@ DEF_MAX_LENGTH(QUERY_STRING, (1024 * 10));
 
 struct http_parser {
   int cs; /* Ragel internal state */
+  unsigned int is_request:1;
+  unsigned int has_query:1;
+  unsigned int has_scheme:1;
   unsigned int chunked:1;
   unsigned int has_body:1;
   unsigned int in_body:1;
@@ -59,7 +67,9 @@ struct http_parser {
   unsigned int in_chunk:1;
   unsigned int persistent:1;
   unsigned int has_header:1;
-  unsigned int padding:24;
+  unsigned int body_eof_seen:1;
+  unsigned int is_https:1;
+  unsigned int padding:19;
   unsigned int mark;
   unsigned int offset;
   union { /* these 2 fields don't nest */
@@ -71,7 +81,11 @@ struct http_parser {
     unsigned int dest_offset; /* only used during body processing */
   } s;
   VALUE cont; /* Qfalse: unset, Qnil: ignored header, T_STRING: append */
-  VALUE status; /* String or Qnil */
+  union {
+    /* String or Qnil */
+    VALUE status; /* status string for responses */
+    VALUE host; /* Host: header for requests */
+  } v;
   union {
     off_t content;
     off_t chunk;
@@ -134,10 +148,69 @@ static VALUE stripped_str_new(const char *str, long len)
   return rb_str_new(str, end + 1);
 }
 
-static void finalize_header(struct http_parser *hp)
+static VALUE request_host_val(struct http_parser *hp)
+{
+  assert(hp->is_request == 1 && "not a request");
+  return NIL_P(hp->v.host) ? Qfalse : hp->v.host;
+}
+
+static void set_server_vars(struct http_parser *hp, VALUE env, VALUE host)
+{
+  char *host_ptr = RSTRING_PTR(host);
+  long host_len = RSTRING_LEN(host);
+  char *colon;
+  VALUE server_name = host;
+  VALUE server_port = hp->is_https ? g_443 : g_80;
+
+  if (*host_ptr == '[') { /* ipv6 address format */
+    char *rbracket = memchr(host_ptr + 1, ']', host_len - 1);
+
+    if (rbracket)
+      colon = (rbracket[1] == ':') ? rbracket + 1 : NULL;
+    else
+      colon = memchr(host_ptr + 1, ':', host_len - 1);
+  } else {
+    colon = memchr(host_ptr, ':', host_len);
+  }
+
+  if (colon) {
+    long port_start = colon - host_ptr + 1;
+    long port_len = host_len - port_start;
+
+    server_name = rb_str_substr(host, 0, colon - host_ptr);
+    server_name = str_dd_freeze(server_name);
+    if (port_len > 0) {
+      server_port = rb_str_substr(host, port_start, port_len);
+      server_port = str_dd_freeze(server_port);
+    }
+  }
+  rb_hash_aset(env, g_SERVER_NAME, server_name);
+  rb_hash_aset(env, g_SERVER_PORT, server_port);
+}
+
+static void http_09_request(VALUE env)
+{
+  VALUE v = str_new_dd_freeze("HTTP/0.9", 8);
+
+  rb_hash_aset(env, g_SERVER_PROTOCOL, v);
+  rb_hash_aset(env, g_HTTP_VERSION, v);
+}
+
+static void finalize_header(struct http_parser *hp, VALUE hdr)
 {
   if (hp->has_trailer && !hp->chunked)
     rb_raise(eParserError, "trailer but not chunked");
+  if (hp->is_request) {
+    if (!hp->has_query)
+      rb_hash_aset(hdr, g_QUERY_STRING, rb_str_new(NULL, 0));
+    if (hp->has_header) {
+      VALUE host = request_host_val(hp);
+      if (host != Qfalse)
+        set_server_vars(hp, hdr, host);
+    } else {
+      http_09_request(hdr);
+    }
+  }
 }
 
 /*
@@ -173,28 +246,58 @@ request_method(VALUE env, const char *ptr, size_t len)
 }
 
 static void
-url_scheme(VALUE env, const char *ptr, size_t len)
+url_scheme(struct http_parser *hp, VALUE env, const char *ptr, size_t len)
 {
-  rb_hash_aset(env, g_rack_url_scheme, str_new_dd_freeze(ptr, len));
+  VALUE val;
+
+  hp->has_scheme = 1;
+  /* Ragel machine downcases and enforces this as "http" or "https" */
+  if (len == 5) {
+    hp->is_https = 1;
+    assert(CONST_MEM_EQ("https", ptr, len) && "len == 5 but not 'https'");
+    val = g_https;
+  } else {
+    assert(CONST_MEM_EQ("http", ptr, len) && "len != 4 but not 'http'");
+    val = g_http;
+  }
+  rb_hash_aset(env, g_rack_url_scheme, val);
 }
 
 static void
-request_host(VALUE env, const char *ptr, size_t len)
+request_host(struct http_parser *hp, VALUE env, const char *ptr, size_t len)
 {
-  rb_hash_aset(env, g_HOST, str_new_dd_freeze(ptr, len));
+  VALUE val = rb_str_new(ptr, len);
+
+  rb_hash_aset(env, g_HTTP_HOST, val);
+  hp->v.host = val;
 }
 
 static void
 request_uri(VALUE env, const char *ptr, size_t len)
 {
+  VALUE val;
+
   VALIDATE_MAX_URI_LENGTH(len, REQUEST_URI);
-  rb_hash_aset(env, g_REQUEST_URI, rb_str_new(ptr, len));
+  val = rb_str_new(ptr, len);
+  rb_hash_aset(env, g_REQUEST_URI, val);
+
+  /*
+   * rack says PATH_INFO must start with "/" or be empty,
+   * but "OPTIONS *" is a valid request
+   */
+  if (CONST_MEM_EQ("*", ptr, len)) {
+    val = rb_str_new(NULL, 0);
+    rb_hash_aset(env, g_PATH_INFO, val);
+    rb_hash_aset(env, g_REQUEST_PATH, val);
+  }
 }
 
 static void
-query_string(VALUE env, const char *ptr, size_t len)
+query_string(struct http_parser *hp, VALUE env, const char *ptr, size_t len)
 {
   VALIDATE_MAX_URI_LENGTH(len, QUERY_STRING);
+
+  hp->has_query = 1;
   rb_hash_aset(env, g_QUERY_STRING, rb_str_new(ptr, len));
 }
 
@@ -204,22 +307,26 @@ request_path(VALUE env, const char *ptr, size_t len)
   VALUE val;
 
   VALIDATE_MAX_URI_LENGTH(len, REQUEST_PATH);
-  val = rb_hash_aset(env, g_REQUEST_PATH, rb_str_new(ptr, len));
-
-  /* rack says PATH_INFO must start with "/" or be empty */
-  if (CONST_MEM_EQ("*", ptr, len))
-    val = rb_str_new(NULL, 0);
+  val = rb_str_new(ptr, len);
 
+  rb_hash_aset(env, g_REQUEST_PATH, val);
   rb_hash_aset(env, g_PATH_INFO, val);
 }
 
 static void
-http_version(struct http_parser *hp, const char *ptr, size_t len)
+http_version(struct http_parser *hp, VALUE env, const char *ptr, size_t len)
 {
   if (CONST_MEM_EQ("HTTP/1.1", ptr, len)) {
     /* HTTP/1.1 implies keepalive unless "Connection: close" is set */
     hp->persistent = 1;
   }
+  if (hp->is_request) {
+    VALUE v = str_new_dd_freeze(ptr, len);
+    hp->has_header = 1;
+
+    rb_hash_aset(env, g_SERVER_PROTOCOL, v);
+    rb_hash_aset(env, g_HTTP_VERSION, v);
+  }
 }
 
 static void
@@ -227,13 +334,13 @@ status_phrase(struct http_parser *hp, VALUE hdr, const char *ptr, size_t len)
 {
   long nr;
 
-  hp->status = str_new_dd_freeze(ptr, len);
+  hp->v.status = str_new_dd_freeze(ptr, len);
 
   /* RSTRING_PTR is null terminated, ptr is not */
-  nr = strtol(RSTRING_PTR(hp->status), NULL, 10);
+  nr = strtol(RSTRING_PTR(hp->v.status), NULL, 10);
 
   if (nr < 100 || nr > 999)
-    rb_raise(eParserError, "invalid status: %s", RSTRING_PTR(hp->status));
+    rb_raise(eParserError, "invalid status: %s", RSTRING_PTR(hp->v.status));
 
   if ( !((nr >= 100 && nr <= 199) || nr == 204 || nr == 304) )
     hp->has_body = 1;
@@ -282,8 +389,8 @@ static void write_cont_value(struct http_parser *hp,
   rb_str_buf_cat(hp->cont, vptr, end + 1);
 }
 
-static void write_value(VALUE hdr, struct http_parser *hp,
-                        const char *buffer, const char *p)
+static void write_response_value(struct http_parser *hp, VALUE hdr,
+                          const char *buffer, const char *p)
 {
   VALUE f, v;
   VALUE hclass;
@@ -382,6 +489,111 @@ static void write_value(VALUE hdr, struct http_parser *hp,
   }
 }
 
+/* TODO cache */
+static VALUE req_field(const char *ptr, size_t len)
+{
+  size_t pfxlen = sizeof("HTTP_") - 1;
+  VALUE str = rb_str_new(NULL, pfxlen + len);
+  char *dst = RSTRING_PTR(str);
+
+  memcpy(dst, "HTTP_", pfxlen);
+  memcpy(dst + pfxlen, ptr, len);
+  assert(*(dst + RSTRING_LEN(str)) == '\0' &&
+         "string didn't end with \\0"); /* paranoia */
+
+  return str_dd_freeze(str);
+}
+
+static void snake_upcase(char *ptr, size_t len)
+{
+  char *c;
+
+  for (c = ptr; len--; c++) {
+    if (*c >= 'a' && *c <= 'z')
+      *c &= ~0x20;
+    else if (*c == '-')
+      *c = '_';
+  }
+}
+
+static void write_request_value(struct http_parser *hp, VALUE env,
+                            char *buffer, const char *p)
+{
+  char *fptr = PTR_TO(start.field);
+  size_t flen = hp->s.field_len;
+  char *vptr = PTR_TO(mark);
+  size_t vlen = LEN(mark, p);
+  VALUE key, val;
+  VALUE existing;
+
+  VALIDATE_MAX_LENGTH(flen, FIELD_NAME);
+  VALIDATE_MAX_LENGTH(LEN(mark, p), FIELD_VALUE);
+  snake_upcase(fptr, flen);
+
+  /*
+   * ignore "Version" headers since they conflict with the HTTP_VERSION
+   * rack env variable.
+   */
+  if (CONST_MEM_EQ("VERSION", fptr, flen)) {
+    hp->cont = Qnil;
+    return;
+  }
+  val = vlen == 0 ? rb_str_new(0, 0) : stripped_str_new(vptr, vlen);
+
+  if (CONST_MEM_EQ("CONNECTION", fptr, flen)) {
+    key = g_HTTP_CONNECTION;
+    hp_keepalive_connection(hp, val);
+  } else if (CONST_MEM_EQ("CONTENT_LENGTH", fptr, flen)) {
+    key = g_CONTENT_LENGTH;
+    hp->len.content = parse_length(vptr, vlen);
+    if (hp->len.content < 0)
+      rb_raise(eParserError, "invalid Content-Length");
+    if (hp->len.content != 0)
+      hp->has_body = 1;
+    invalid_if_trailer(hp);
+  } else if (CONST_MEM_EQ("CONTENT_TYPE", fptr, flen)) {
+    key = g_CONTENT_TYPE;
+  } else if (CONST_MEM_EQ("TRANSFER_ENCODING", fptr, flen)) {
+    key = g_HTTP_TRANSFER_ENCODING;
+    if (STR_CSTR_CASE_EQ(val, "chunked")) {
+      hp->chunked = 1;
+      hp->has_body = 1;
+    }
+    invalid_if_trailer(hp);
+  } else if (CONST_MEM_EQ("TRAILER", fptr, flen)) {
+    key = g_HTTP_TRAILER;
+    hp->has_trailer = 1;
+    invalid_if_trailer(hp);
+  } else if (CONST_MEM_EQ("HOST", fptr, flen)) {
+    key = g_HTTP_HOST;
+    if (NIL_P(hp->v.host))
+      hp->v.host = val;
+  } else {
+    key = req_field(fptr, flen);
+  }
+  existing = rb_hash_aref(env, key);
+  if (NIL_P(existing)) {
+    hp->cont = rb_hash_aset(env, key, val);
+  /*
+   * Ignore repeated Host headers and favor host set by absolute URIs.
+   * absoluteURI Request-URI takes precedence over
+   * the Host: header (ref: rfc 2616, section 5.2.1)
+   */
+  } else if (key == g_HTTP_HOST) {
+     hp->cont = Qnil;
+  } else {
+    rb_str_buf_cat(existing, ",", 1);
+    hp->cont = rb_str_buf_append(existing, val);
+  }
+}
+
+static void write_value(struct http_parser *hp, VALUE hdr,
+                        char *buf, const char *p)
+{
+  hp->is_request ? write_request_value(hp, hdr, buf, p) :
+                   write_response_value(hp, hdr, buf, p);
+}
+
 /** Machine **/
 
 %%{
@@ -392,21 +604,21 @@ static void write_value(VALUE hdr, struct http_parser *hp,
   action snake_upcase_field { snake_upcase_char(deconst(fpc)); }
   action downcase_char { downcase_char(deconst(fpc)); }
   action request_method { request_method(hdr, PTR_TO(mark), LEN(mark, fpc)); }
-  action url_scheme { url_scheme(hdr, PTR_TO(mark), LEN(mark, fpc)); }
-  action host { request_host(hdr, PTR_TO(mark), LEN(mark, fpc)); }
+  action url_scheme { url_scheme(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
+  action host { request_host(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
   action request_uri { request_uri(hdr, PTR_TO(mark), LEN(mark, fpc)); }
 
   action start_query { MARK(start.query, fpc); }
   action query_string {
-    query_string(hdr, PTR_TO(start.query), LEN(start.query, fpc));
+    query_string(hp, hdr, PTR_TO(start.query), LEN(start.query, fpc));
   }
   action request_path { request_path(hdr, PTR_TO(mark), LEN(mark, fpc)); }
   action start_field { MARK(start.field, fpc); }
   action write_field { hp->s.field_len = LEN(start.field, fpc); }
   action start_value { MARK(mark, fpc); }
-  action write_value { write_value(hdr, hp, buffer, fpc); }
+  action write_value { write_value(hp, hdr, buffer, fpc); }
   action write_cont_value { write_cont_value(hp, buffer, fpc); }
-  action http_version { http_version(hp, PTR_TO(mark), LEN(mark, fpc)); }
+  action http_version { http_version(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
   action status_phrase { status_phrase(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
 
   action add_to_chunk_size {
@@ -415,7 +627,7 @@ static void write_value(VALUE hdr, struct http_parser *hp,
       rb_raise(eParserError, "invalid chunk size");
   }
   action header_done {
-    finalize_header(hp);
+    finalize_header(hp, hdr);
     cs = http_parser_first_final;
 
     if (hp->chunked)
@@ -469,7 +681,7 @@ static void http_parser_init(struct http_parser *hp)
   int cs = 0;
   memset(hp, 0, sizeof(struct http_parser));
   hp->cont = Qfalse; /* zero on MRI, should be optimized away by above */
-  hp->status = Qnil;
+  hp->v.status = Qnil;
   hp->len.content = -1;
   %% write init;
   hp->cs = cs;
@@ -513,7 +725,7 @@ static void kcar_mark(void *ptr)
   struct http_parser *hp = ptr;
 
   rb_gc_mark(hp->cont);
-  rb_gc_mark(hp->status);
+  rb_gc_mark(hp->v.status);
 }
 
 static size_t kcar_memsize(const void *ptr)
@@ -642,6 +854,23 @@ static void check_buffer_size(long dlen)
     rb_raise(rb_eRangeError, "headers too large to process (%ld bytes)", dlen);
 }
 
+static void parser_execute(struct http_parser *hp, VALUE hdr, VALUE buf)
+{
+  char *ptr;
+  long len;
+
+  Check_Type(buf, T_STRING);
+  rb_str_modify(buf);
+  ptr = RSTRING_PTR(buf);
+  len = RSTRING_LEN(buf);
+  check_buffer_size(len);
+
+  http_parser_execute(hp, hdr, ptr, len);
+
+  if (hp->cs == http_parser_error)
+    rb_raise(eParserError, "Invalid HTTP format, parsing fails.");
+}
+
 /**
  * Document-method: headers
  * call-seq:
@@ -657,10 +886,11 @@ static void check_buffer_size(long dlen)
 static VALUE headers(VALUE self, VALUE hdr, VALUE data)
 {
   struct http_parser *hp = data_get(self);
-  long dlen = RSTRING_LEN(data);
 
-  check_buffer_size(dlen);
-  http_parser_execute(hp, hdr, RSTRING_PTR(data), dlen);
+  if (hp->is_request)
+    rb_raise(rb_eRuntimeError, "parser is handling a request, not response");
+
+  parser_execute(hp, hdr, data);
   VALIDATE_MAX_LENGTH(hp->offset, HEADER);
 
   if (hp->cs == http_parser_first_final ||
@@ -670,15 +900,33 @@ static VALUE headers(VALUE self, VALUE hdr, VALUE data)
     if (hp->in_trailer)
       return hdr;
     else
-      return rb_ary_new3(2, hp->status, hdr);
+      return rb_ary_new3(2, hp->v.status, hdr);
   }
 
-  if (hp->cs == http_parser_error)
-    rb_raise(eParserError, "Invalid HTTP format, parsing fails.");
-
   return Qnil;
 }
 
+static VALUE request(VALUE self, VALUE env, VALUE buf)
+{
+  struct http_parser *hp = data_get(self);
+
+  hp->is_request = 1;
+  Check_Type(buf, T_STRING);
+  parser_execute(hp, env, buf);
+
+  if (hp->cs == http_parser_first_final ||
+      hp->cs == http_parser_en_ChunkedBody) {
+    advance_str(buf, hp->offset + 1);
+    hp->offset = 0;
+    if (hp->in_trailer)
+      hp->body_eof_seen = 1;
+
+    return env;
+  }
+  return Qnil; /* incomplete */
+}
+
+
 static int chunked_eof(struct http_parser *hp)
 {
   return ((hp->cs == http_parser_first_final) || hp->in_trailer);
@@ -811,6 +1059,7 @@ void Init_kcar_ext(void)
   rb_define_alloc_func(cParser, kcar_alloc);
   rb_define_method(cParser, "initialize", initialize, 0);
   rb_define_method(cParser, "reset", initialize, 0);
+  rb_define_method(cParser, "request", request, 2);
   rb_define_method(cParser, "headers", headers, 2);
   rb_define_method(cParser, "trailers", headers, 2);
   rb_define_method(cParser, "filter_body", filter_body, 2);
@@ -846,13 +1095,26 @@ void Init_kcar_ext(void)
 
   globals = rb_ary_new();
   rb_global_variable(&globals);
-  C(globals, g_HOST, "HOST");
+  C(globals, g_CONTENT_LENGTH, "CONTENT_LENGTH");
+  C(globals, g_CONTENT_TYPE, "CONTENT_TYPE");
+  C(globals, g_HTTP_HOST, "HTTP_HOST");
+  C(globals, g_HTTP_CONNECTION, "HTTP_CONNECTION");
+  C(globals, g_HTTP_TRAILER, "HTTP_TRAILER");
+  C(globals, g_HTTP_TRANSFER_ENCODING, "HTTP_TRANSFER_ENCODING");
+  C(globals, g_HTTP_VERSION, "HTTP_VERSION");
   C(globals, g_PATH_INFO, "PATH_INFO");
   C(globals, g_QUERY_STRING, "QUERY_STRING");
   C(globals, g_REQUEST_METHOD, "REQUEST_METHOD");
   C(globals, g_REQUEST_PATH, "REQUEST_PATH");
   C(globals, g_REQUEST_URI, "REQUEST_URI");
+  C(globals, g_SERVER_NAME, "SERVER_NAME");
+  C(globals, g_SERVER_PORT, "SERVER_PORT");
+  C(globals, g_SERVER_PROTOCOL, "SERVER_PROTOCOL");
   C(globals, g_rack_url_scheme, "rack.url_scheme");
+  C(globals, g_http, "http");
+  C(globals, g_https, "https");
+  C(globals, g_80, "80");
+  C(globals, g_443, "443");
   OBJ_FREEZE(globals);
 #undef C
 }
diff --git a/test/test_request_parser.rb b/test/test_request_parser.rb
new file mode 100644
index 0000000..5e97a0e
--- /dev/null
+++ b/test/test_request_parser.rb
@@ -0,0 +1,862 @@
+# -*- encoding: binary -*-
+## Copyright (c) 2005 Zed A. Shaw
+# You can redistribute it and/or modify it under the same terms as Ruby 1.8
+# or GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
+#
+# Additional work donated by contributors.  See git history of
+# unicorn for more information: git clone https://bogomips.org/unicorn.git
+
+require 'test/unit'
+require 'digest'
+require 'kcar'
+require 'uri'
+
+class TestRequestParser < Test::Unit::TestCase
+  def setup
+    @hp = Kcar::Parser.new
+    @env = {}
+  end
+
+  def test_parse_oneshot_simple
+    buf = "GET / HTTP/1.1\r\n\r\n"
+    http = 'http'.freeze
+    @env['rack.url_scheme'] = http
+    env = @hp.request(@env, buf.dup)
+    assert_same env, @env
+    exp = {
+      'SERVER_PROTOCOL' => 'HTTP/1.1',
+      'HTTP_VERSION' => 'HTTP/1.1',
+      'REQUEST_PATH' => '/',
+      'PATH_INFO' => '/',
+      'REQUEST_URI' => '/',
+      'REQUEST_METHOD' => 'GET',
+      'QUERY_STRING' => '',
+      'rack.url_scheme' => 'http',
+    }
+    assert_equal exp, env
+    assert_same env['HTTP_VERSION'], env['SERVER_PROTOCOL']
+    assert_predicate env['HTTP_VERSION'], :frozen?
+    assert_predicate env['REQUEST_METHOD'], :frozen?
+    assert_same http, env['rack.url_scheme']
+    assert_predicate @hp, :keepalive?
+
+    @hp.reset
+
+    buf = "G"
+    assert_nil @hp.request(@env, buf)
+    # try parsing again to ensure we were reset correctly
+    buf << "ET /hello-world HTTP/1.1\r\n\r\n"
+    assert_same env, @hp.request(env, buf)
+
+    assert_equal 'HTTP/1.1', env['SERVER_PROTOCOL']
+    assert_equal '/hello-world', env['REQUEST_PATH']
+    assert_equal 'HTTP/1.1', env['HTTP_VERSION']
+    assert_equal '/hello-world', env['REQUEST_URI']
+    assert_equal 'GET', env['REQUEST_METHOD']
+    assert_equal '', env['QUERY_STRING']
+    assert @hp.keepalive?
+  end
+
+  def test_tab_lws
+    @hp.request(@env, "GET / HTTP/1.1\r\nHost:\tfoo.bar\r\n\r\n")
+    assert_equal "foo.bar", @env['HTTP_HOST']
+  end
+
+  def test_connection_close_no_ka
+    @hp.request(@env = {}, "GET / HTTP/1.1\r\nConnection: close\r\n\r\n")
+    assert_equal 'GET', @env['REQUEST_METHOD']
+    assert ! @hp.keepalive?
+  end
+
+  def test_connection_keep_alive_ka
+    @hp.request(@env, "HEAD / HTTP/1.1\r\nConnection: keep-alive\r\n\r\n")
+    assert @hp.keepalive?
+  end
+
+  def test_connection_keep_alive_no_body
+    r = @hp.request(@env, "POST / HTTP/1.1\r\nConnection: keep-alive\r\n\r\n")
+    assert_same @env, r
+    assert @hp.keepalive?
+  end
+
+  def test_connection_keep_alive_no_body_empty
+    buf = "POST / HTTP/1.1\r\n" \
+          "Content-Length: 0\r\n" \
+          "Connection: keep-alive\r\n\r\n"
+    assert_same @env, @hp.request(@env, buf)
+    assert @hp.keepalive?
+  end
+
+  def test_connection_keep_alive_ka_bad_version
+    @hp.request(@env, "GET / HTTP/1.0\r\nConnection: keep-alive\r\n\r\n")
+    assert @hp.keepalive?
+  end
+
+  def test_parse_server_host_default_port
+    buf = "GET / HTTP/1.1\r\nHost: foo\r\n\r\n"
+    assert_same @env, @hp.request(@env, buf)
+    assert_equal 'foo', @env['SERVER_NAME']
+    assert_equal '80', @env['SERVER_PORT']
+    assert_equal '', buf
+    assert @hp.keepalive?
+  end
+
+  def test_parse_server_host_alt_port
+    buf = "GET / HTTP/1.1\r\nHost: foo:999\r\n\r\n"
+    @hp.request(@env, buf)
+    assert_equal 'foo', @env['SERVER_NAME']
+    assert_equal '999', @env['SERVER_PORT']
+    assert_equal '', buf
+    assert @hp.keepalive?
+  end
+
+  def test_parse_server_host_empty_port
+    @hp.request(@env, "GET / HTTP/1.1\r\nHost: foo:\r\n\r\n")
+    assert_equal 'foo', @env['SERVER_NAME']
+    assert_equal '80', @env['SERVER_PORT']
+    assert @hp.keepalive?
+  end
+
+  def test_parse_host_cont
+    @hp.request(@env, "GET / HTTP/1.1\r\nHost:\r\n foo\r\n\r\n")
+    assert_equal 'foo', @env['SERVER_NAME']
+    assert_equal '80', @env['SERVER_PORT']
+    assert @hp.keepalive?
+  end
+
+  def test_preserve_existing_server_vars
+    @env = {
+      'SERVER_NAME' => 'example.com',
+      'SERVER_PORT' => '1234',
+      'rack.url_scheme' => 'https'
+    }
+    @hp.request(@env, "GET / HTTP/1.0\r\n\r\n")
+    assert_equal 'example.com', @env['SERVER_NAME']
+    assert_equal '1234', @env['SERVER_PORT']
+    assert_equal 'https', @env['rack.url_scheme']
+  end
+
+  def test_parse_strange_headers
+    should_be_good = "GET / HTTP/1.1\r\naaaaaaaaaaaaa:++++++++++\r\n\r\n"
+    req = @hp.request(@env, should_be_good)
+    assert_same req, @env
+    assert_equal '', should_be_good
+    assert_predicate @hp, :keepalive?
+    assert_equal '++++++++++', @env['HTTP_AAAAAAAAAAAAA']
+  end
+
+  # legacy test case from Mongrel
+  # I still consider Pound irrelevant, unfortunately stupid clients that
+  # send extremely big headers do exist and they've managed to find us...
+  def test_nasty_pound_header
+    nasty_pound_header = "GET / HTTP/1.1\r\n" \
+"X-SSL-Bullshit:   -----BEGIN CERTIFICATE-----\r\n" \
+"\tMIIFbTCCBFWgAwIBAgICH4cwDQYJKoZIhvcNAQEFBQAwcDELMAkGA1UEBhMCVUsx\r\n" \
+"\tETAPBgNVBAoTCGVTY2llbmNlMRIwEAYDVQQLEwlBdXRob3JpdHkxCzAJBgNVBAMT\r\n" \
+"\tAkNBMS0wKwYJKoZIhvcNAQkBFh5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMu\r\n" \
+"\tdWswHhcNMDYwNzI3MTQxMzI4WhcNMDcwNzI3MTQxMzI4WjBbMQswCQYDVQQGEwJV\r\n" \
+"\tSzERMA8GA1UEChMIZVNjaWVuY2UxEzARBgNVBAsTCk1hbmNoZXN0ZXIxCzAJBgNV\r\n" \
+"\tBAcTmrsogriqMWLAk1DMRcwFQYDVQQDEw5taWNoYWVsIHBhcmQYJKoZIhvcNAQEB\r\n" \
+"\tBQADggEPADCCAQoCggEBANPEQBgl1IaKdSS1TbhF3hEXSl72G9J+WC/1R64fAcEF\r\n" \
+"\tW51rEyFYiIeZGx/BVzwXbeBoNUK41OK65sxGuflMo5gLflbwJtHBRIEKAfVVp3YR\r\n" \
+"\tgW7cMA/s/XKgL1GEC7rQw8lIZT8RApukCGqOVHSi/F1SiFlPDxuDfmdiNzL31+sL\r\n" \
+"\t0iwHDdNkGjy5pyBSB8Y79dsSJtCW/iaLB0/n8Sj7HgvvZJ7x0fr+RQjYOUUfrePP\r\n" \
+"\tu2MSpFyf+9BbC/aXgaZuiCvSR+8Snv3xApQY+fULK/xY8h8Ua51iXoQ5jrgu2SqR\r\n" \
+"\twgA7BUi3G8LFzMBl8FRCDYGUDy7M6QaHXx1ZWIPWNKsCAwEAAaOCAiQwggIgMAwG\r\n" \
+"\tA1UdEwEB/wQCMAAwEQYJYIZIAYb4QgEBBAQDAgWgMA4GA1UdDwEB/wQEAwID6DAs\r\n" \
+"\tBglghkgBhvhCAQ0EHxYdVUsgZS1TY2llbmNlIFVzZXIgQ2VydGlmaWNhdGUwHQYD\r\n" \
+"\tVR0OBBYEFDTt/sf9PeMaZDHkUIldrDYMNTBZMIGaBgNVHSMEgZIwgY+AFAI4qxGj\r\n" \
+"\tloCLDdMVKwiljjDastqooXSkcjBwMQswCQYDVQQGEwJVSzERMA8GA1UEChMIZVNj\r\n" \
+"\taWVuY2UxEjAQBgNVBAsTCUF1dGhvcml0eTELMAkGA1UEAxMCQ0ExLTArBgkqhkiG\r\n" \
+"\t9w0BCQEWHmNhLW9wZXJhdG9yQGdyaWQtc3VwcG9ydC5hYy51a4IBADApBgNVHRIE\r\n" \
+"\tIjAggR5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMudWswGQYDVR0gBBIwEDAO\r\n" \
+"\tBgwrBgEEAdkvAQEBAQYwPQYJYIZIAYb4QgEEBDAWLmh0dHA6Ly9jYS5ncmlkLXN1\r\n" \
+"\tcHBvcnQuYWMudmT4sopwqlBWsvcHViL2NybC9jYWNybC5jcmwwPQYJYIZIAYb4QgEDBDAWLmh0\r\n" \
+"\tdHA6Ly9jYS5ncmlkLXN1cHBvcnQuYWMudWsvcHViL2NybC9jYWNybC5jcmwwPwYD\r\n" \
+"\tVR0fBDgwNjA0oDKgMIYuaHR0cDovL2NhLmdyaWQt5hYy51ay9wdWIv\r\n" \
+"\tY3JsL2NhY3JsLmNybDANBgkqhkiG9w0BAQUFAAOCAQEAS/U4iiooBENGW/Hwmmd3\r\n" \
+"\tXCy6Zrt08YjKCzGNjorT98g8uGsqYjSxv/hmi0qlnlHs+k/3Iobc3LjS5AMYr5L8\r\n" \
+"\tUO7OSkgFFlLHQyC9JzPfmLCAugvzEbyv4Olnsr8hbxF1MbKZoQxUZtMVu29wjfXk\r\n" \
+"\thTeApBv7eaKCWpSp7MCbvgzm74izKhu3vlDk9w6qVrxePfGgpKPqfHiOoGhFnbTK\r\n" \
+"\twTC6o2xq5y0qZ03JonF7OJspEd3I5zKY3E+ov7/ZhW6DqT8UFvsAdjvQbXyhV8Eu\r\n" \
+"\tYhixw1aKEPzNjNowuIseVogKOLXxWI5vAi5HgXdS0/ES5gDGsABo4fqovUKlgop3\r\n" \
+"\tRA==\r\n" \
+"\t-----END CERTIFICATE-----\r\n" \
+"\r\n"
+    ok = (/(-----BEGIN .*--END CERTIFICATE-----)/m =~ nasty_pound_header)
+    expect = $1.dup
+    assert ok, 'end certificate matched'
+    expect.gsub!(/\r\n\t/, ' ')
+    req = @hp.request(@env, nasty_pound_header.dup)
+    assert_equal expect, req['HTTP_X_SSL_BULLSHIT']
+  end
+
+  def test_multiline_header_0d0a
+    req = @hp.request(@env, "GET / HTTP/1.0\r\n" \
+      "X-Multiline-Header: foo bar\r\n\tcha cha\r\n\tzha zha\r\n\r\n")
+    assert_same req, @env
+    assert_equal 'foo bar cha cha zha zha', req['HTTP_X_MULTILINE_HEADER']
+  end
+
+  def test_multiline_header_0a
+    req = @hp.request(@env, "GET / HTTP/1.0\n" \
+      "X-Multiline-Header: foo bar\n\tcha cha\n\tzha zha\n\n")
+    assert_same req, @env
+    assert_equal 'foo bar cha cha zha zha', req['HTTP_X_MULTILINE_HEADER']
+  end
+
+  def test_continuation_eats_leading_spaces
+    header = "GET / HTTP/1.1\r\n" \
+             "X-ASDF:      \r\n" \
+             "\t\r\n" \
+             "    \r\n" \
+             "  ASDF\r\n\r\n"
+    req = @hp.request(@env, header)
+    assert_same req, @env
+    assert_equal '', header
+    assert_equal 'ASDF', req['HTTP_X_ASDF']
+  end
+
+  def test_continuation_eats_scattered_leading_spaces
+    header = "GET / HTTP/1.1\r\n" \
+             "X-ASDF:   hi\r\n" \
+             "    y\r\n" \
+             "\t\r\n" \
+             "       x\r\n" \
+             "  ASDF\r\n\r\n"
+    req = @hp.request(@env, header)
+    assert_same req, @env
+    assert_equal '', header
+    assert_equal 'hi y x ASDF', req['HTTP_X_ASDF']
+  end
+
+  def test_continuation_eats_trailing_spaces
+    header = "GET / HTTP/1.1\r\n" \
+             "X-ASDF:      \r\n" \
+             "\t\r\n" \
+             "  b  \r\n" \
+             "  ASDF\r\n\r\nZ"
+    req = @hp.request(@env, header)
+    assert_same req, @env
+    assert_equal 'Z', header
+    assert_equal 'b ASDF', req['HTTP_X_ASDF']
+  end
+
+  def test_continuation_with_absolute_uri_and_ignored_host_header
+    header = "GET http://example.com/ HTTP/1.1\r\n" \
+             "Host: \r\n" \
+             "    example.org\r\n" \
+             "\r\n"
+    req = @hp.request(@env, header)
+    assert_same req, @env
+    assert_equal 'example.com', req['HTTP_HOST']
+    assert_equal 'http', req['rack.url_scheme']
+    assert_equal '/', req['PATH_INFO']
+    assert_equal 'example.com', req['SERVER_NAME']
+    assert_equal '80', req['SERVER_PORT']
+  end
+
+  # this may seem to be testing more of an implementation detail, but
+  # it also helps ensure we're safe in the presence of multiple parsers
+  # in case we ever go multithreaded/evented...
+  def test_resumable_continuations
+    nr = 1000
+    header = "GET / HTTP/1.1\r\n" \
+             "X-ASDF:      \r\n" \
+             "  hello\r\n"
+    tmp = []
+    nr.times { |i|
+      hp = Kcar::Parser.new
+      env = {}
+      assert_nil hp.request(env, buf = "#{header} #{i}\r\n")
+      asdf = env['HTTP_X_ASDF']
+      assert_equal "hello #{i}", asdf
+      tmp << [ hp, asdf, env, buf ]
+    }
+    tmp.each_with_index { |(hp, asdf, env, buf), i|
+      buf << " .\r\n\r\n"
+      assert_same env, hp.request(env, buf)
+      assert_equal "hello #{i} .", asdf
+    }
+  end
+
+  def test_invalid_continuation
+    header = "GET / HTTP/1.1\r\n" \
+             "    y\r\n" \
+             "Host: hello\r\n" \
+             "\r\n"
+    buf = header.dup
+    assert_raises(Kcar::ParserError) do
+      @hp.request(@env, buf)
+    end
+    assert_equal header, buf, 'no modification on invalid'
+  end
+
+  def test_parse_ie6_urls
+    %w(/some/random/path"
+       /some/random/path>
+       /some/random/path<
+       /we/love/you/ie6?q=<"">
+       /url?<="&>="
+       /mal"formed"?
+    ).each do |path|
+      sorta_safe = %(GET #{path} HTTP/1.1\r\n\r\n)
+      assert_same @env, @hp.request(@env, sorta_safe)
+      assert_equal path, @env['REQUEST_URI']
+      assert_equal '', sorta_safe
+      assert @hp.keepalive?
+      @hp.reset
+    end
+  end
+
+  def test_parse_error
+    bad_http = "GET / SsUTF/1.1"
+    assert_raises(Kcar::ParserError) { @hp.request(@env, bad_http) }
+
+    # make sure we can recover
+    @env.clear
+    @hp.reset
+    assert_equal @env, @hp.request(@env, "GET / HTTP/1.0\r\n\r\n")
+    assert ! @hp.keepalive?
+  end
+
+  def test_piecemeal
+    http = "GET"
+    req = @env
+    assert_nil @hp.request(@env, http)
+    assert_nil @hp.request(@env, http)
+    assert_nil @hp.request(@env, http << " / HTTP/1.0")
+    assert_equal '/', req['REQUEST_PATH']
+    assert_equal '/', req['REQUEST_URI']
+    assert_equal 'GET', req['REQUEST_METHOD']
+    assert_nil @hp.request(req, http << "\r\n")
+    assert_equal 'HTTP/1.0', req['HTTP_VERSION']
+    assert_nil @hp.request(req, http << "\r")
+    assert_same req, @hp.request(req, http << "\n")
+    assert_equal 'HTTP/1.0', req['SERVER_PROTOCOL']
+    assert_nil req['FRAGMENT']
+    assert_equal '', req['QUERY_STRING']
+    assert_equal "", http
+    assert ! @hp.keepalive?
+  end
+
+  # not common, but underscores do appear in practice
+  def test_absolute_uri_underscores
+    http = "GET https://under_score.example.com/foo?q=bar HTTP/1.0\r\n\r\n"
+    req = @hp.request(@env, http)
+    assert_same req, @env
+    assert_equal 'https', req['rack.url_scheme']
+    assert_equal '/foo?q=bar', req['REQUEST_URI']
+    assert_equal '/foo', req['REQUEST_PATH']
+    assert_equal 'q=bar', req['QUERY_STRING']
+    assert_equal 'under_score.example.com', req['HTTP_HOST']
+    assert_equal 'under_score.example.com', req['SERVER_NAME']
+    assert_equal '443', req['SERVER_PORT']
+    assert_equal "", http
+    assert ! @hp.keepalive?
+  end
+
+  # some dumb clients add users because they're stupid
+  def test_absolute_uri_w_user
+    http = "GET http://user%20space@example.com/foo?q=bar HTTP/1.0\r\n\r\n"
+    req = @hp.request(@env, http)
+    assert_same req, @env
+    assert_equal 'http', req['rack.url_scheme']
+    assert_equal '/foo?q=bar', req['REQUEST_URI']
+    assert_equal '/foo', req['REQUEST_PATH']
+    assert_equal 'q=bar', req['QUERY_STRING']
+    assert_equal 'example.com', req['HTTP_HOST']
+    assert_equal 'example.com', req['SERVER_NAME']
+    assert_equal '80', req['SERVER_PORT']
+    assert_equal "", http
+    assert ! @hp.keepalive?
+  end
+
+  # since Mongrel supported anything URI.parse supported, we're stuck
+  # supporting everything URI.parse supports
+  def test_absolute_uri_uri_parse
+    require 'uri'
+    "#{URI::REGEXP::PATTERN::UNRESERVED};:&=+$,".split(//).each do |char|
+      http = "GET http://#{char}@example.com/ HTTP/1.0\r\n\r\n"
+      req = @hp.request(@env, http)
+      assert_equal 'http', req['rack.url_scheme']
+      assert_equal '/', req['REQUEST_URI']
+      assert_equal '/', req['REQUEST_PATH']
+      assert_equal '', req['QUERY_STRING']
+
+      assert_equal 'example.com', req['HTTP_HOST']
+      assert_equal 'example.com', req['SERVER_NAME']
+      assert_equal '80', req['SERVER_PORT']
+      assert_equal "", http
+      assert ! @hp.keepalive?
+      @hp.reset
+    end
+  end
+
+  def test_absolute_uri
+    req = @hp.request(@env, "GET http://example.com/foo?q=bar HTTP/1.0\r\n\r\n")
+    assert_equal 'http', req['rack.url_scheme']
+    assert_equal '/foo?q=bar', req['REQUEST_URI']
+    assert_equal '/foo', req['REQUEST_PATH']
+    assert_equal 'q=bar', req['QUERY_STRING']
+    assert_equal '80', req['SERVER_PORT']
+    assert_equal 'example.com', req['HTTP_HOST']
+    assert_equal 'example.com', req['SERVER_NAME']
+    assert ! @hp.keepalive?
+  end
+
+  def test_absolute_uri_https
+    http = "GET https://example.com/foo?q=bar HTTP/1.1\r\n" \
+           "X-Foo: bar\n\r\n"
+    req = @hp.request(@env, http)
+    assert_equal 'https', req['rack.url_scheme']
+    assert_equal '/foo?q=bar', req['REQUEST_URI']
+    assert_equal '/foo', req['REQUEST_PATH']
+    assert_equal 'q=bar', req['QUERY_STRING']
+    assert_equal 'example.com', req['HTTP_HOST']
+    assert_equal 'example.com', req['SERVER_NAME']
+    assert_equal '443', req['SERVER_PORT']
+    assert_equal "", http
+    assert @hp.keepalive?
+  end
+
+  # Host: header should be ignored for absolute URIs
+  def test_absolute_uri_with_port
+    req = @hp.request(@env,"GET http://example.com:8080/foo?q=bar HTTP/1.1\r\n" \
+           "Host: bad.example.com\r\n\r\n")
+    assert_equal 'http', req['rack.url_scheme']
+    assert_equal '/foo?q=bar', req['REQUEST_URI']
+    assert_equal '/foo', req['REQUEST_PATH']
+    assert_equal 'q=bar', req['QUERY_STRING']
+    assert_equal 'example.com:8080', req['HTTP_HOST']
+    assert_equal 'example.com', req['SERVER_NAME']
+    assert_equal '8080', req['SERVER_PORT']
+    assert @hp.keepalive?
+  end
+
+  def test_absolute_uri_with_empty_port
+    req = @hp.request(@env, "GET https://example.com:/foo?q=bar HTTP/1.1\r\n" \
+           "Host: bad.example.com\r\n\r\n")
+    assert_same req, @env
+    assert_equal 'https', req['rack.url_scheme']
+    assert_equal '/foo?q=bar', req['REQUEST_URI']
+    assert_equal '/foo', req['REQUEST_PATH']
+    assert_equal 'q=bar', req['QUERY_STRING']
+    assert_equal 'example.com:', req['HTTP_HOST']
+    assert_equal 'example.com', req['SERVER_NAME']
+    assert_equal '443', req['SERVER_PORT']
+    assert @hp.keepalive?
+  end
+
+  def test_absolute_ipv6_uri
+    url = "http://[::1]/foo?q=bar"
+    http = "GET #{url} HTTP/1.1\r\n" \
+           "Host: bad.example.com\r\n\r\n"
+    req = @hp.request(@env, http)
+    assert_same req, @env
+    assert_equal 'http', req['rack.url_scheme']
+    assert_equal '/foo?q=bar', req['REQUEST_URI']
+    assert_equal '/foo', req['REQUEST_PATH']
+    assert_equal 'q=bar', req['QUERY_STRING']
+
+    uri = URI.parse(url)
+    assert_equal "[::1]", uri.host,
+                 "URI.parse changed upstream for #{url}? host=#{uri.host}"
+    assert_equal "[::1]", req['HTTP_HOST']
+    assert_equal "[::1]", req['SERVER_NAME']
+    assert_equal '80', req['SERVER_PORT']
+    assert_equal "", http
+  end
+
+  def test_absolute_ipv6_uri_alpha
+    url = "http://[::a]/"
+    http = "GET #{url} HTTP/1.1\r\n" \
+           "Host: bad.example.com\r\n\r\n"
+    req = @hp.request(@env, http)
+    assert_equal 'http', req['rack.url_scheme']
+    uri = URI.parse(url)
+    assert_equal "[::a]", uri.host,
+                 "URI.parse changed upstream for #{url}? host=#{uri.host}"
+    assert_equal "[::a]", req['HTTP_HOST']
+    assert_equal "[::a]", req['SERVER_NAME']
+    assert_equal '80', req['SERVER_PORT']
+  end
+
+  def test_absolute_ipv6_uri_alpha_2
+    url = "http://[::B]/"
+    http = "GET #{url} HTTP/1.1\r\n" \
+           "Host: bad.example.com\r\n\r\n"
+    req = @hp.request(@env, http)
+    assert_equal 'http', req['rack.url_scheme']
+
+    uri = URI.parse(url)
+    assert_equal "[::B]", uri.host,
+                 "URI.parse changed upstream for #{url}? host=#{uri.host}"
+    assert_equal "[::B]", req['HTTP_HOST']
+    assert_equal "[::B]", req['SERVER_NAME']
+    assert_equal '80', req['SERVER_PORT']
+  end
+
+  def test_absolute_ipv6_uri_with_empty_port
+    url = "https://[::1]:/foo?q=bar"
+    http = "GET #{url} HTTP/1.1\r\n" \
+           "Host: bad.example.com\r\n\r\n"
+    req = @hp.request(@env, http)
+    assert_equal 'https', req['rack.url_scheme']
+    assert_equal '/foo?q=bar', req['REQUEST_URI']
+    assert_equal '/foo', req['REQUEST_PATH']
+    assert_equal 'q=bar', req['QUERY_STRING']
+
+    uri = URI.parse(url)
+    assert_equal "[::1]", uri.host,
+                 "URI.parse changed upstream for #{url}? host=#{uri.host}"
+    assert_equal "[::1]:", req['HTTP_HOST']
+    assert_equal "[::1]", req['SERVER_NAME']
+    assert_equal '443', req['SERVER_PORT']
+    assert_equal "", http
+  end
+
+  def test_absolute_ipv6_uri_with_port
+    url = "https://[::1]:666/foo?q=bar"
+    http = "GET #{url} HTTP/1.1\r\n" \
+           "Host: bad.example.com\r\n\r\n"
+    req = @hp.request(@env, http)
+    assert_equal 'https', req['rack.url_scheme']
+    assert_equal '/foo?q=bar', req['REQUEST_URI']
+    assert_equal '/foo', req['REQUEST_PATH']
+    assert_equal 'q=bar', req['QUERY_STRING']
+
+    uri = URI.parse(url)
+    assert_equal "[::1]", uri.host,
+                 "URI.parse changed upstream for #{url}? host=#{uri.host}"
+    assert_equal "[::1]:666", req['HTTP_HOST']
+    assert_equal "[::1]", req['SERVER_NAME']
+    assert_equal '666', req['SERVER_PORT']
+    assert_equal "", http
+  end
+
+  def test_ipv6_host_header
+    buf = "GET / HTTP/1.1\r\n" \
+          "Host: [::1]\r\n\r\n"
+    req = @hp.request(@env, buf)
+    assert_equal "[::1]", req['HTTP_HOST']
+    assert_equal "[::1]", req['SERVER_NAME']
+    assert_equal '80', req['SERVER_PORT']
+  end
+
+  def test_ipv6_host_header_with_port
+    req = @hp.request(@env, "GET / HTTP/1.1\r\n" \
+                  "Host: [::1]:666\r\n\r\n")
+    assert_equal "[::1]", req['SERVER_NAME']
+    assert_equal '666', req['SERVER_PORT']
+    assert_equal "[::1]:666", req['HTTP_HOST']
+  end
+
+  def test_ipv6_host_header_with_empty_port
+    req = @hp.request(@env, "GET / HTTP/1.1\r\nHost: [::1]:\r\n\r\n")
+    assert_equal "[::1]", req['SERVER_NAME']
+    assert_equal '80', req['SERVER_PORT']
+    assert_equal "[::1]:", req['HTTP_HOST']
+  end
+
+  # XXX Highly unlikely..., just make sure we don't segfault or assert on it
+  def test_broken_ipv6_host_header
+    req = @hp.request(@env, "GET / HTTP/1.1\r\nHost: [::1:\r\n\r\n")
+    assert_equal "[", req['SERVER_NAME']
+    assert_equal ':1:', req['SERVER_PORT']
+    assert_equal "[::1:", req['HTTP_HOST']
+  end
+
+  def test_put_body_oneshot
+    buf = "PUT / HTTP/1.0\r\nContent-Length: 5\r\n\r\nabcde"
+    req = @hp.request(@env, buf)
+    assert_equal '/', req['REQUEST_PATH']
+    assert_equal '/', req['REQUEST_URI']
+    assert_equal 'PUT', req['REQUEST_METHOD']
+    assert_equal 'HTTP/1.0', req['HTTP_VERSION']
+    assert_equal 'HTTP/1.0', req['SERVER_PROTOCOL']
+    assert_equal "abcde", buf
+  end
+
+  def test_put_body_later
+    buf = "PUT /l HTTP/1.0\r\nContent-Length: 5\r\n\r\n"
+    req = @hp.request(@env, buf)
+    assert_equal '/l', req['REQUEST_PATH']
+    assert_equal '/l', req['REQUEST_URI']
+    assert_equal 'PUT', req['REQUEST_METHOD']
+    assert_equal 'HTTP/1.0', req['HTTP_VERSION']
+    assert_equal 'HTTP/1.0', req['SERVER_PROTOCOL']
+    assert_equal "", buf
+  end
+
+  def test_unknown_methods
+    %w(GETT HEADR XGET XHEAD).each { |m|
+      s = "#{m} /forums/1/topics/2375?page=1#posts-17408 HTTP/1.1\r\n\r\n"
+      req = @hp.request(@env, s)
+      assert_equal '/forums/1/topics/2375?page=1', req['REQUEST_URI']
+      assert_nil req['FRAGMENT']
+      assert_equal 'page=1', req['QUERY_STRING']
+      assert_equal "", s
+      assert_equal m, req['REQUEST_METHOD']
+      @hp.reset
+    }
+  end
+
+  def test_fragment_in_uri
+    get = "GET /forums/1/topics/2375?page=1#posts-17408 HTTP/1.1\r\n\r\n"
+    req = @hp.request(@env, get)
+    assert_equal '/forums/1/topics/2375?page=1', req['REQUEST_URI']
+    assert_nil req['FRAGMENT']
+    assert_equal 'page=1', req['QUERY_STRING']
+    assert_equal '', get
+  end
+
+  # lame random garbage maker
+  def rand_data(min, max, readable=true)
+    count = min + ((rand(max)+1) *10).to_i
+    res = count.to_s + "/"
+
+    if readable
+      res << Digest::SHA1.hexdigest(rand(count * 100).to_s) * (count / 40)
+    else
+      res << Digest::SHA1.digest(rand(count * 100).to_s) * (count / 20)
+    end
+
+    return res
+  end
+
+  def test_horrible_queries
+    # then that large header names are caught
+    10.times do |c|
+      get = "GET /#{rand_data(10,120)} HTTP/1.1\r\nX-#{rand_data(1024, 1024+(c*1024))}: Test\r\n\r\n"
+      assert_raises(Kcar::ParserError, Kcar::RequestURITooLongError) do
+        @hp.request(@env, get)
+        @hp.clear
+      end
+    end
+
+    # then that large mangled field values are caught
+    10.times do |c|
+      get = "GET /#{rand_data(10,120)} HTTP/1.1\r\nX-Test: #{rand_data(1024, 1024+(c*1024), false)}\r\n\r\n"
+      assert_raises(Kcar::ParserError,Kcar::RequestURITooLongError) do
+        @hp.request(@env, get)
+        @hp.clear
+      end
+    end
+
+    # then large headers are rejected too FIXME not supported, yet
+    if false
+      get = "GET /#{rand_data(10,120)} HTTP/1.1\r\n"
+      get << "X-Test: test\r\n" * (80 * 1024)
+      @hp.reset
+      assert_raises(Kcar::ParserError,Kcar::RequestURITooLongError) do
+        @hp.request(@env, get)
+      end
+    end
+
+    # finally just that random garbage gets blocked all the time
+    10.times do |c|
+      get = "GET #{rand_data(1024, 1024+(c*1024), false)} #{rand_data(1024, 1024+(c*1024), false)}\r\n\r\n"
+      @hp.reset
+      assert_raises(Kcar::ParserError,Kcar::RequestURITooLongError) do
+        @hp.request(@env, get)
+      end
+    end
+  end
+
+  def test_leading_tab
+    get = "GET / HTTP/1.1\r\nHost:\texample.com\r\n\r\n"
+    req = @hp.request(@env, get)
+    assert_equal 'example.com', req['HTTP_HOST']
+  end
+
+  def test_trailing_whitespace
+    get = "GET / HTTP/1.1\r\nHost: example.com \r\n\r\n"
+    req = @hp.request(@env, get)
+    assert_equal 'example.com', req['HTTP_HOST']
+  end
+
+  def test_trailing_tab
+    get = "GET / HTTP/1.1\r\nHost: example.com\t\r\n\r\n"
+    req = @hp.request(@env, get)
+    assert_equal 'example.com', req['HTTP_HOST']
+  end
+
+  def test_trailing_multiple_linear_whitespace
+    get = "GET / HTTP/1.1\r\nHost: example.com\t \t \t\r\n\r\n"
+    req = @hp.request(@env, get)
+    assert_equal 'example.com', req['HTTP_HOST']
+  end
+
+  def test_embedded_linear_whitespace_ok
+    get = "GET / HTTP/1.1\r\nX-Space: hello\t world\t \r\n\r\n"
+    req = @hp.request(@env, get)
+    assert_equal "hello\t world", req["HTTP_X_SPACE"]
+  end
+
+  def test_null_byte_header
+    get = "GET / HTTP/1.1\r\nHost: \0\r\n\r\n"
+    assert_raises(Kcar::ParserError) { @hp.request(@env, get) }
+  end
+
+  def test_null_byte_in_middle
+    get = "GET / HTTP/1.1\r\nHost: hello\0world\r\n\r\n"
+    assert_raises(Kcar::ParserError) { @hp.request(@env, get) }
+  end
+
+  def test_null_byte_at_end
+    get = "GET / HTTP/1.1\r\nHost: hello\0\r\n\r\n"
+    assert_raises(Kcar::ParserError) { @hp.request(@env, get) }
+  end
+
+  def test_empty_header
+    get = "GET / HTTP/1.1\r\nHost:  \r\n\r\n"
+    req = @hp.request(@env, get)
+    assert_equal '', req['HTTP_HOST']
+  end
+
+  def test_connection_TE
+    req = @hp.request(@env, "GET / HTTP/1.1\r\nHost: example.com\r\n" \
+                            "Connection: TE\r\n" \
+                            "TE: trailers\r\n\r\n")
+    assert_predicate @hp, :keepalive?
+    assert_equal 'TE', req['HTTP_CONNECTION']
+  end
+
+  def test_repeat_headers
+    str = "PUT / HTTP/1.1\r\n" \
+          "Trailer: Content-MD5\r\n" \
+          "Trailer: Content-SHA1\r\n" \
+          "transfer-Encoding: chunked\r\n\r\n" \
+          "1\r\na\r\n2\r\n..\r\n0\r\n"
+    req = @hp.request(@env, str)
+    assert_equal 'Content-MD5,Content-SHA1', req['HTTP_TRAILER']
+    assert_equal "1\r\na\r\n2\r\n..\r\n0\r\n", str
+    assert @hp.keepalive?
+  end
+
+  def test_http_09
+    buf = "GET /read-rfc1945-if-you-dont-believe-me\r\n"
+    req = @hp.request(@env, buf)
+    assert_equal '', buf
+    expect = {
+      "REQUEST_PATH" => "/read-rfc1945-if-you-dont-believe-me",
+      "PATH_INFO" => "/read-rfc1945-if-you-dont-believe-me",
+      "REQUEST_URI" => "/read-rfc1945-if-you-dont-believe-me",
+      "SERVER_PROTOCOL" => "HTTP/0.9",
+      "HTTP_VERSION" => "HTTP/0.9",
+      "REQUEST_METHOD" => "GET",
+      "QUERY_STRING" => ""
+    }
+    assert_equal expect, req
+  end
+
+  def test_path_info_semicolon
+    qs = "QUERY_STRING"
+    pi = "PATH_INFO"
+    req = {}
+    str = "GET %s HTTP/1.1\r\nHost: example.com\r\n\r\n"
+    {
+      "/1;a=b?c=d&e=f" => { qs => "c=d&e=f", pi => "/1;a=b" },
+      "/1?c=d&e=f" => { qs => "c=d&e=f", pi => "/1" },
+      "/1;a=b" => { qs => "", pi => "/1;a=b" },
+      "/1;a=b?" => { qs => "", pi => "/1;a=b" },
+      "/1?a=b;c=d&e=f" => { qs => "a=b;c=d&e=f", pi => "/1" },
+      "*" => { qs => "", pi => "" },
+    }.each do |uri,expect|
+      @env.clear
+      @hp.reset
+      buf = str % [ uri ]
+      req = @hp.request(@env, buf)
+      assert_equal uri, req["REQUEST_URI"], "REQUEST_URI mismatch"
+      assert_equal expect[qs], req[qs], "#{qs} mismatch"
+      assert_equal expect[pi], req[pi], "#{pi} mismatch"
+      next if uri == "*"
+      uri = URI.parse("http://example.com#{uri}")
+      assert_equal uri.query.to_s, req[qs], "#{qs} mismatch URI.parse disagrees"
+      assert_equal uri.path, req[pi], "#{pi} mismatch URI.parse disagrees"
+    end
+  end
+
+  def test_path_info_semicolon_absolute
+    qs = "QUERY_STRING"
+    pi = "PATH_INFO"
+    str = "GET http://example.com%s HTTP/1.1\r\nHost: www.example.com\r\n\r\n"
+    {
+      "/1;a=b?c=d&e=f" => { qs => "c=d&e=f", pi => "/1;a=b" },
+      "/1?c=d&e=f" => { qs => "c=d&e=f", pi => "/1" },
+      "/1;a=b" => { qs => "", pi => "/1;a=b" },
+      "/1;a=b?" => { qs => "", pi => "/1;a=b" },
+      "/1?a=b;c=d&e=f" => { qs => "a=b;c=d&e=f", pi => "/1" },
+    }.each do |uri,expect|
+      @hp.reset
+      @env.clear
+      buf = str % [ uri ]
+      req = @hp.request(@env, buf)
+      assert_equal uri, req["REQUEST_URI"], "REQUEST_URI mismatch"
+      assert_equal "example.com", req["HTTP_HOST"], "Host: mismatch"
+      assert_equal expect[qs], req[qs], "#{qs} mismatch"
+      assert_equal expect[pi], req[pi], "#{pi} mismatch"
+    end
+  end
+
+  def test_negative_content_length
+    str = "PUT / HTTP/1.1\r\n" \
+          "Content-Length: -1\r\n" \
+          "\r\n"
+    assert_raises(Kcar::ParserError) do
+      @hp.request(@env, str)
+    end
+  end
+
+  def test_invalid_content_length
+    str = "PUT / HTTP/1.1\r\n" \
+          "Content-Length: zzzzz\r\n" \
+          "\r\n"
+    assert_raises(Kcar::ParserError) do
+      @hp.request(@env, str)
+    end
+  end
+
+  def test_ignore_version_header
+    req = @hp.request(@env, "GET / HTTP/1.1\r\nVersion: hello\r\n\r\n")
+    expect = {
+      'REQUEST_PATH' => '/',
+      'SERVER_PROTOCOL' => 'HTTP/1.1',
+      'PATH_INFO' => '/',
+      'HTTP_VERSION' => 'HTTP/1.1',
+      'REQUEST_URI' => '/',
+      'REQUEST_METHOD' => 'GET',
+      'QUERY_STRING' => ''
+    }
+    assert_equal expect, req
+  end
+
+  def test_pipelined_requests
+    expect = {
+      'HTTP_HOST' => 'example.com',
+      'SERVER_NAME' => 'example.com',
+      'SERVER_PORT' => '80',
+      'REQUEST_PATH' => '/',
+      'SERVER_PROTOCOL' => 'HTTP/1.1',
+      'PATH_INFO' => '/',
+      'HTTP_VERSION' => 'HTTP/1.1',
+      'REQUEST_URI' => '/',
+      'REQUEST_METHOD' => 'GET',
+      'QUERY_STRING' => ''
+    }
+    req1 = "GET / HTTP/1.1\r\nHost: example.com\r\n\r\n"
+    req2 = "GET / HTTP/1.1\r\nHost: www.example.com\r\n\r\n"
+    buf = req1 + req2
+    env1 = @hp.request(@env, buf)
+    assert_equal expect, env1
+    assert_equal req2, buf
+    assert_predicate @hp, :keepalive?
+    @env.clear
+    @hp.reset
+    env2 = @hp.request(@env, buf)
+    expect['HTTP_HOST'] = expect['SERVER_NAME'] = 'www.example.com'
+    assert_equal expect, env2
+    assert_equal '', buf
+  end
+end

  parent reply	other threads:[~2018-12-01 13:31 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-01 13:31 [PATCH v2] request parsing bits Eric Wong
2018-12-01 13:31 ` [PATCH 01/11] introduce new str_new_dd_freeze internal function Eric Wong
2018-12-01 13:31 ` [PATCH 02/11] begin implementing request parsing Eric Wong
2018-12-01 13:31 ` [PATCH 03/11] favor bitfields instead flags + macros Eric Wong
2018-12-01 13:31 ` Eric Wong [this message]
2018-12-01 13:31 ` [PATCH 05/11] pkg.mk: enable warnings by default for tests Eric Wong
2018-12-01 13:31 ` [PATCH 06/11] filter_body: rename variables to be like memcpy(3) Eric Wong
2018-12-01 13:31 ` [PATCH 07/11] flesh out filter_body for request parsing Eric Wong
2018-12-01 13:31 ` [PATCH 08/11] do not assume SERVER_PORT Eric Wong
2018-12-01 13:31 ` [PATCH 09/11] do not set "HTTP/0.9" for pre-1.0 requests Eric Wong
2018-12-01 13:31 ` [PATCH 10/11] always set non-negative Content-Length for requests Eric Wong
2018-12-01 13:31 ` [PATCH 11/11] avoid String#-@ call on request parsing under Ruby 2.6 Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://yhbt.net/kcar/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181201133125.5524-5-e@80x24.org \
    --to=e@80x24.org \
    --cc=kcar-public@bogomips.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://yhbt.net/kcar.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).