ext/http11: optimistically optimize setting of common headers

Most HTTP traffic will send a small, common subset of headers. For these, we can avoid recreating RString objects and instead use predefined, frozen RString objects. This results in a ~22% speed improvement in header parsing for common cases where clients send the headers we have predefined, frozen objects for. Additionally, new parser tests have been added to ensure the optimizations work (for MRI). There is an optional qsort(3) and bsearch(3) dependency to improve average lookup time for the frozen strings; but it's not enabled due to portability concerns. The linear search performance is acceptable, and can be hand-optimized for the most frequently seen headers by putting those first. git-svn-id: svn+ssh://rubyforge.org/var/svn/mongrel/trunk@992 19e92222-5c0b-0410-8929-a290d50e31e9
author: normalperson <normalperson@19e92222-5c0b-0410-8929-a290d50e31e9> 2008-03-06 07:41:28 +0000
committer: normalperson <normalperson@19e92222-5c0b-0410-8929-a290d50e31e9> 2008-03-06 07:41:28 +0000
commit: a9ab034e62dbe5740cc1b353aed2320646606c73 (patch)
tree: 50a70afcea03447cd436247735b1cbc21a9c3fbb
parent: 2ba9c8518f7cf1195846ca26b2d401e41f4e0cc9 (diff)
download: unicorn-a9ab034e62dbe5740cc1b353aed2320646606c73.tar.gz
3 files changed, 189 insertions, 10 deletions
diff --git a/ext/http11/ext_help.h b/ext/http11/ext_help.h
index 8b4d754..08c0e1e 100644
--- a/ext/http11/ext_help.h
+++ b/ext/http11/ext_help.h
@@ -4,6 +4,7 @@
  #define RAISE_NOT_NULL(T) if(T == NULL) rb_raise(rb_eArgError, "NULL found for " # T " when shouldn't be.");
  #define DATA_GET(from,type,name) Data_Get_Struct(from,type,name); RAISE_NOT_NULL(name);
  #define REQUIRE_TYPE(V, T) if(TYPE(V) != T) rb_raise(rb_eTypeError, "Wrong argument type for " # V " required " # T);
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
  
  #ifdef DEBUG
  #define TRACE()  fprintf(stderr, "> %s:%d:%s\n", __FILE__, __LINE__, __FUNCTION__)
diff --git a/ext/http11/http11.c b/ext/http11/http11.c
index e7a8658..5af1e8c 100644
--- a/ext/http11/http11.c
+++ b/ext/http11/http11.c
@@ -66,6 +66,114 @@ DEF_MAX_LENGTH(REQUEST_PATH, 1024);
  DEF_MAX_LENGTH(QUERY_STRING, (1024 * 10));
  DEF_MAX_LENGTH(HEADER, (1024 * (80 + 32)));
  
+struct common_field {
+        const signed long len;
+        const char *name;
+        VALUE value;
+};
+
+/*
+ * A list of common HTTP headers we expect to receive.
+ * This allows us to avoid repeatedly creating identical string
+ * objects to be used with rb_hash_aset().
+ */
+static struct common_field common_http_fields[] = {
+# define f(N) { (sizeof(N) - 1), N, Qnil }
+        f("ACCEPT"),
+        f("ACCEPT_CHARSET"),
+        f("ACCEPT_ENCODING"),
+        f("ACCEPT_LANGUAGE"),
+        f("ALLOW"),
+        f("AUTHORIZATION"),
+        f("CACHE_CONTROL"),
+        f("CONNECTION"),
+        f("CONTENT_ENCODING"),
+        f("CONTENT_LENGTH"),
+        f("CONTENT_TYPE"),
+        f("COOKIE"),
+        f("DATE"),
+        f("EXPECT"),
+        f("FROM"),
+        f("HOST"),
+        f("IF_MATCH"),
+        f("IF_MODIFIED_SINCE"),
+        f("IF_NONE_MATCH"),
+        f("IF_RANGE"),
+        f("IF_UNMODIFIED_SINCE"),
+        f("KEEP_ALIVE"), /* Firefox sends this */
+        f("MAX_FORWARDS"),
+        f("PRAGMA"),
+        f("PROXY_AUTHORIZATION"),
+        f("RANGE"),
+        f("REFERER"),
+        f("TE"),
+        f("TRAILER"),
+        f("TRANSFER_ENCODING"),
+        f("UPGRADE"),
+        f("USER_AGENT"),
+        f("VIA"),
+        f("WARNING")
+# undef f
+};
+
+/*
+ * qsort(3) and bsearch(3) improve average performance slightly, but may
+ * not be worth it for lack of portability to certain platforms...
+ */
+#if defined(HAVE_QSORT_BSEARCH)
+/* sort by length, then by name if there's a tie */
+static int common_field_cmp(const void *a, const void *b)
+{
+  struct common_field *cfa = (struct common_field *)a;
+  struct common_field *cfb = (struct common_field *)b;
+  signed long diff = cfa->len - cfb->len;
+  return diff ? diff : memcmp(cfa->name, cfb->name, cfa->len);
+}
+#endif /* HAVE_QSORT_BSEARCH */
+
+static void init_common_fields(void)
+{
+  int i;
+  struct common_field *cf = common_http_fields;
+  char tmp[256]; /* MAX_FIELD_NAME_LENGTH */
+  memcpy(tmp, HTTP_PREFIX, HTTP_PREFIX_LEN);
+
+  for(i = 0; i < ARRAY_SIZE(common_http_fields); cf++, i++) {
+    memcpy(tmp + HTTP_PREFIX_LEN, cf->name, cf->len + 1);
+    cf->value = rb_obj_freeze(rb_str_new(tmp, HTTP_PREFIX_LEN + cf->len));
+    rb_global_variable(&cf->value);
+  }
+
+#if defined(HAVE_QSORT_BSEARCH)
+  qsort(common_http_fields,
+        ARRAY_SIZE(common_http_fields),
+        sizeof(struct common_field),
+        common_field_cmp);
+#endif /* HAVE_QSORT_BSEARCH */
+}
+
+static VALUE find_common_field_value(const char *field, size_t flen)
+{
+#if defined(HAVE_QSORT_BSEARCH)
+  struct common_field key;
+  struct common_field *found;
+  key.name = field;
+  key.len = (signed long)flen;
+  found = (struct common_field *)bsearch(&key, common_http_fields,
+                                         ARRAY_SIZE(common_http_fields),
+                                         sizeof(struct common_field),
+                                         common_field_cmp);
+  return found ? found->value : Qnil;
+#else /* !HAVE_QSORT_BSEARCH */
+  int i;
+  struct common_field *cf = common_http_fields;
+  for(i = 0; i < ARRAY_SIZE(common_http_fields); i++, cf++) {
+    if (cf->len == flen && !memcmp(cf->name, field, flen))
+      return cf->value;
+  }
+  return Qnil;
+#endif /* !HAVE_QSORT_BSEARCH */
+}
  
  void http_field(void *data, const char *field, size_t flen, const char *value, size_t vlen)
  {
@@ -78,16 +186,25 @@ void http_field(void *data, const char *field, size_t flen, const char *value, s
  
    v = rb_str_new(value, vlen);
  
-  /*
-   * using rb_str_new(NULL, len) here is faster than rb_str_buf_new(len)
-   * in my testing, because: there's no minimum allocation length (and
-   * no check for it, either), RSTRING_LEN(f) does not need to be
-   * written twice, and and RSTRING_PTR(f) will already be
-   * null-terminated for us.
-   */
-  f = rb_str_new(NULL, HTTP_PREFIX_LEN + flen);
-  memcpy(RSTRING_PTR(f), HTTP_PREFIX, HTTP_PREFIX_LEN);
-  memcpy(RSTRING_PTR(f) + HTTP_PREFIX_LEN, field, flen);
+  f = find_common_field_value(field, flen);
+
+  if (f == Qnil) {
+    /*
+     * We got a strange header that we don't have a memoized value for.
+     * Fallback to creating a new string to use as a hash key.
+     *
+     * using rb_str_new(NULL, len) here is faster than rb_str_buf_new(len)
+     * in my testing, because: there's no minimum allocation length (and
+     * no check for it, either), RSTRING_LEN(f) does not need to be
+     * written twice, and and RSTRING_PTR(f) will already be
+     * null-terminated for us.
+     */
+    f = rb_str_new(NULL, HTTP_PREFIX_LEN + flen);
+    memcpy(RSTRING_PTR(f), HTTP_PREFIX, HTTP_PREFIX_LEN);
+    memcpy(RSTRING_PTR(f) + HTTP_PREFIX_LEN, field, flen);
+    assert(*(RSTRING_PTR(f) + RSTRING_LEN(f)) == '\0'); /* paranoia */
+    /* fprintf(stderr, "UNKNOWN HEADER <%s>\n", RSTRING_PTR(f)); */
+  }
  
    rb_hash_aset(req, f, v);
  }
@@ -405,4 +522,5 @@ void Init_http11()
    rb_define_method(cHttpParser, "error?", HttpParser_has_error,0);
    rb_define_method(cHttpParser, "finished?", HttpParser_is_finished,0);
    rb_define_method(cHttpParser, "nread", HttpParser_nread,0);
+  init_common_fields();
  }
diff --git a/test/test_http11.rb b/test/test_http11.rb
index 81b1626..deaeaeb 100644
--- a/test/test_http11.rb
+++ b/test/test_http11.rb
@@ -70,6 +70,66 @@ class HttpParserTest < Test::Unit::TestCase
      assert parser.error?, "Parser SHOULD have error"
    end
  
+  def test_parse_like_optimized_header
+    parser = HttpParser.new
+    req = {}
+    should_be_good = "GET / HTTP/1.1\r\nAuthorizationn: zz\r\n\r\n"
+    nread = parser.execute(req, should_be_good, 0)
+    assert_equal should_be_good.length, nread
+    assert parser.finished?
+    assert !parser.error?
+    assert_equal "zz", req["HTTP_AUTHORIZATIONN"]
+    assert ! req["HTTP_AUTHORIZATION"]
+  end
+
+  def test_parse_twin_lookalike_optimized_headers
+    parser = HttpParser.new
+    req = {}
+    should_be_good = "GET / HTTP/1.1\r\n" \
+                     "Accept-Encoding: abcdef\r\n" \
+                     "Accept-Language: zyxvut\r\n" \
+                     "\r\n"
+    nread = parser.execute(req, should_be_good, 0)
+    assert_equal should_be_good.length, nread
+    assert parser.finished?
+    assert !parser.error?
+    assert_equal "abcdef", req["HTTP_ACCEPT_ENCODING"]
+    assert_equal "zyxvut", req["HTTP_ACCEPT_LANGUAGE"]
+  end
+
+  if RUBY_PLATFORM !~ /java/
+    # as of now, the Java version does not have the same global-object
+    # reuse optimization the C version does
+
+    def test_parse_optimized_headers_global_objects_used
+      parser = HttpParser.new
+      req = {}
+      should_be_good = "GET / HTTP/1.1\r\nHost: example.com\r\n\r\n"
+      nread = parser.execute(req, should_be_good, 0)
+      assert_equal should_be_good.length, nread
+      assert parser.finished?
+      assert !parser.error?
+      assert_equal "example.com", req["HTTP_HOST"]
+
+      frozen_host_a = nil
+      req.each { |k,v| k == "HTTP_HOST" && frozen_host_a = k }
+
+      parser = HttpParser.new
+      req = {}
+      should_be_good = "GET / HTTP/1.1\r\nHost: example.com\r\n\r\n"
+      nread = parser.execute(req, should_be_good, 0)
+      assert_equal should_be_good.length, nread
+      assert parser.finished?
+      assert !parser.error?
+
+      frozen_host_b = nil
+      req.each { |k,v| k == "HTTP_HOST" && frozen_host_b = k }
+      assert_equal "HTTP_HOST", frozen_host_a
+      assert_equal "HTTP_HOST", frozen_host_b
+      assert_equal frozen_host_a.object_id, frozen_host_b.object_id
+    end
+  end
+
    def test_fragment_in_uri
      parser = HttpParser.new
      req = {}
author	normalperson <normalperson@19e92222-5c0b-0410-8929-a290d50e31e9>	2008-03-06 07:41:28 +0000
committer	normalperson <normalperson@19e92222-5c0b-0410-8929-a290d50e31e9>	2008-03-06 07:41:28 +0000
commit	a9ab034e62dbe5740cc1b353aed2320646606c73 (patch)
tree	50a70afcea03447cd436247735b1cbc21a9c3fbb
parent	2ba9c8518f7cf1195846ca26b2d401e41f4e0cc9 (diff)
download	unicorn-a9ab034e62dbe5740cc1b353aed2320646606c73.tar.gz