about summary refs log tree commit homepage
path: root/ext
diff options
context:
space:
mode:
authorEric Wong <normalperson@yhbt.net>2009-08-02 18:49:55 -0700
committerEric Wong <normalperson@yhbt.net>2009-08-09 01:30:16 -0700
commit2033101b3e9ca9fdc4efa8a658404594df67131f (patch)
treec3ede7c5f2287d1061d5c61a179bee19e8d70d68 /ext
parent60b4397f51894d5e679a6eed73a8cde957f03c4a (diff)
downloadunicorn-2033101b3e9ca9fdc4efa8a658404594df67131f.tar.gz
Explicitly track if our request will need Content-Length
or chunked body decoding.
Diffstat (limited to 'ext')
-rw-r--r--ext/unicorn_http/c_util.h56
-rw-r--r--ext/unicorn_http/ext_help.h19
-rw-r--r--ext/unicorn_http/global_variables.h3
-rw-r--r--ext/unicorn_http/unicorn_http.rl50
4 files changed, 109 insertions, 19 deletions
diff --git a/ext/unicorn_http/c_util.h b/ext/unicorn_http/c_util.h
index 78ad168..895c686 100644
--- a/ext/unicorn_http/c_util.h
+++ b/ext/unicorn_http/c_util.h
@@ -6,6 +6,9 @@
 #ifndef UH_util_h
 #define UH_util_h
 
+#include <unistd.h>
+#include <assert.h>
+
 #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
 
 #ifndef SIZEOF_OFF_T
@@ -40,4 +43,57 @@ static void downcase_char(char *c)
     *c |= 0x20;
 }
 
+static int hexchar2int(int xdigit)
+{
+  if (xdigit >= 'A' && xdigit <= 'F')
+    return xdigit - 'A' + 10;
+  if (xdigit >= 'a' && xdigit <= 'f')
+    return xdigit - 'a' + 10;
+
+  /* Ragel already does runtime range checking for us in Unicorn: */
+  assert(xdigit >= '0' && xdigit <= '9');
+
+  return xdigit - '0';
+}
+
+/*
+ * multiplies +i+ by +base+ and increments the result by the parsed
+ * integer value of +xdigit+.  +xdigit+ is a character byte
+ * representing a number the range of 0..(base-1)
+ * returns the new value of +i+ on success
+ * returns -1 on errors (including overflow)
+ */
+static off_t step_incr(off_t i, int xdigit, const int base)
+{
+  static const off_t max = UH_OFF_T_MAX;
+  const off_t next_max = (max - (max % base)) / base;
+  off_t offset = hexchar2int(xdigit);
+
+  if (offset > (base - 1))
+    return -1;
+  if (i > next_max)
+    return -1;
+  i *= base;
+
+  if ((offset > (base - 1)) || ((max - i) < offset))
+    return -1;
+
+  return i + offset;
+}
+
+/*
+ * parses a non-negative length according to base-10 and
+ * returns it as an off_t value.  Returns -1 on errors
+ * (including overflow).
+ */
+static off_t parse_length(const char *value, size_t length)
+{
+  off_t rv;
+
+  for (rv = 0; length-- && rv >= 0; ++value)
+    rv = step_incr(rv, *value, 10);
+
+  return rv;
+}
+
 #endif /* UH_util_h */
diff --git a/ext/unicorn_http/ext_help.h b/ext/unicorn_http/ext_help.h
index 19f08c9..5f7c296 100644
--- a/ext/unicorn_http/ext_help.h
+++ b/ext/unicorn_http/ext_help.h
@@ -26,4 +26,23 @@ static inline int str_cstr_eq(VALUE val, const char *ptr, size_t len)
 #define STR_CSTR_EQ(val, const_str) \
   str_cstr_eq(val, const_str, sizeof(const_str) - 1)
 
+/* strcasecmp isn't locale independent */
+static int str_cstr_case_eq(VALUE val, const char *ptr, size_t len)
+{
+  if (RSTRING_LEN(val) == len) {
+    const char *v = RSTRING_PTR(val);
+
+    for (; len--; ++ptr, ++v) {
+      if ((*ptr == *v) || (*v >= 'A' && *v <= 'Z' && (*v | 0x20) == *ptr))
+        continue;
+      return 0;
+    }
+    return 1;
+  }
+  return 0;
+}
+
+#define STR_CSTR_CASE_EQ(val, const_str) \
+  str_cstr_case_eq(val, const_str, sizeof(const_str) - 1)
+
 #endif
diff --git a/ext/unicorn_http/global_variables.h b/ext/unicorn_http/global_variables.h
index 7feef93..3437ee2 100644
--- a/ext/unicorn_http/global_variables.h
+++ b/ext/unicorn_http/global_variables.h
@@ -19,6 +19,9 @@ static VALUE g_server_protocol;
 static VALUE g_server_protocol_value;
 static VALUE g_http_host;
 static VALUE g_http_x_forwarded_proto;
+static VALUE g_http_transfer_encoding;
+static VALUE g_content_length;
+static VALUE g_http_trailer;
 static VALUE g_port_80;
 static VALUE g_port_443;
 static VALUE g_localhost;
diff --git a/ext/unicorn_http/unicorn_http.rl b/ext/unicorn_http/unicorn_http.rl
index 76831bb..f96ce74 100644
--- a/ext/unicorn_http/unicorn_http.rl
+++ b/ext/unicorn_http/unicorn_http.rl
@@ -36,7 +36,6 @@ struct http_parser {
   } len;
 };
 
-static void http_field(VALUE req, const char *field, size_t flen, VALUE val);
 static void header_done(VALUE req, const char *at, size_t length);
 
 #define LEN(AT, FPC) (FPC - buffer - hp->AT)
@@ -44,6 +43,33 @@ static void header_done(VALUE req, const char *at, size_t length);
 #define PTR_TO(F) (buffer + hp->F)
 #define STR_NEW(M,FPC) rb_str_new(PTR_TO(M), LEN(M, FPC))
 
+static void write_value(VALUE req, struct http_parser *hp,
+                        const char *buffer, const char *p)
+{
+  VALUE f = find_common_field(PTR_TO(start.field), hp->s.field_len);
+  VALUE v;
+
+  VALIDATE_MAX_LENGTH(LEN(mark, p), FIELD_VALUE);
+  v = STR_NEW(mark, p);
+  if (f == Qnil) {
+    VALIDATE_MAX_LENGTH(hp->s.field_len, FIELD_NAME);
+    f = uncommon_field(PTR_TO(start.field), hp->s.field_len);
+  } else if (f == g_content_length) {
+    hp->len.content = parse_length(RSTRING_PTR(v), RSTRING_LEN(v));
+    if (hp->len.content < 0)
+      rb_raise(eHttpParserError, "invalid Content-Length");
+    hp->flags |= UH_FL_HASBODY;
+  } else if (f == g_http_transfer_encoding) {
+    if (STR_CSTR_CASE_EQ(v, "chunked"))
+      hp->flags |= UH_FL_CHUNKED | UH_FL_HASBODY;
+  } else if (f == g_http_trailer) {
+    hp->flags |= UH_FL_HASTRAILER;
+  } else if (f == g_http_host && rb_hash_aref(req, f) != Qnil) {
+    return; /* full URLs in REQUEST_URI take precedence */
+  }
+  rb_hash_aset(req, f, v);
+}
+
 /** Machine **/
 
 %%{
@@ -56,10 +82,7 @@ static void header_done(VALUE req, const char *at, size_t length);
   action downcase_char { downcase_char((char *)fpc); }
   action write_field { hp->s.field_len = LEN(start.field, fpc); }
   action start_value { MARK(mark, fpc); }
-  action write_value {
-    VALIDATE_MAX_LENGTH(LEN(mark, fpc), FIELD_VALUE);
-    http_field(req, PTR_TO(start.field), hp->s.field_len, STR_NEW(mark, fpc));
-  }
+  action write_value { write_value(req, hp, buffer, fpc); }
   action request_method {
     rb_hash_aset(req, g_request_method, STR_NEW(mark, fpc));
   }
@@ -164,20 +187,6 @@ static struct http_parser *data_get(VALUE self)
   return hp;
 }
 
-static void http_field(VALUE req, const char *field, size_t flen, VALUE val)
-{
-  VALUE f = find_common_field(field, flen);
-
-  if (f == Qnil) {
-    VALIDATE_MAX_LENGTH(flen, FIELD_NAME);
-    f = uncommon_field(field, flen);
-  } else if (f == g_http_host && rb_hash_aref(req, f) != Qnil) {
-    return;
-  }
-
-  rb_hash_aset(req, f, val);
-}
-
 static void set_server_params(VALUE req)
 {
   VALUE temp = rb_hash_aref(req, g_rack_url_scheme);
@@ -319,5 +328,8 @@ void Init_unicorn_http(void)
   rb_define_method(cHttpParser, "execute", HttpParser_execute,2);
   init_common_fields();
   SET_GLOBAL(g_http_host, "HOST");
+  SET_GLOBAL(g_http_trailer, "TRAILER");
+  SET_GLOBAL(g_http_transfer_encoding, "TRANSFER_ENCODING");
+  SET_GLOBAL(g_content_length, "CONTENT_LENGTH");
 }
 #undef SET_GLOBAL