From 2033101b3e9ca9fdc4efa8a658404594df67131f Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 2 Aug 2009 18:49:55 -0700 Subject: http: process Content-Length and Transfer-Encoding Explicitly track if our request will need Content-Length or chunked body decoding. --- ext/unicorn_http/c_util.h | 56 +++++++++++++++++++++++++++++++++++++ ext/unicorn_http/ext_help.h | 19 +++++++++++++ ext/unicorn_http/global_variables.h | 3 ++ ext/unicorn_http/unicorn_http.rl | 50 ++++++++++++++++++++------------- 4 files changed, 109 insertions(+), 19 deletions(-) diff --git a/ext/unicorn_http/c_util.h b/ext/unicorn_http/c_util.h index 78ad168..895c686 100644 --- a/ext/unicorn_http/c_util.h +++ b/ext/unicorn_http/c_util.h @@ -6,6 +6,9 @@ #ifndef UH_util_h #define UH_util_h +#include +#include + #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) #ifndef SIZEOF_OFF_T @@ -40,4 +43,57 @@ static void downcase_char(char *c) *c |= 0x20; } +static int hexchar2int(int xdigit) +{ + if (xdigit >= 'A' && xdigit <= 'F') + return xdigit - 'A' + 10; + if (xdigit >= 'a' && xdigit <= 'f') + return xdigit - 'a' + 10; + + /* Ragel already does runtime range checking for us in Unicorn: */ + assert(xdigit >= '0' && xdigit <= '9'); + + return xdigit - '0'; +} + +/* + * multiplies +i+ by +base+ and increments the result by the parsed + * integer value of +xdigit+. +xdigit+ is a character byte + * representing a number the range of 0..(base-1) + * returns the new value of +i+ on success + * returns -1 on errors (including overflow) + */ +static off_t step_incr(off_t i, int xdigit, const int base) +{ + static const off_t max = UH_OFF_T_MAX; + const off_t next_max = (max - (max % base)) / base; + off_t offset = hexchar2int(xdigit); + + if (offset > (base - 1)) + return -1; + if (i > next_max) + return -1; + i *= base; + + if ((offset > (base - 1)) || ((max - i) < offset)) + return -1; + + return i + offset; +} + +/* + * parses a non-negative length according to base-10 and + * returns it as an off_t value. Returns -1 on errors + * (including overflow). + */ +static off_t parse_length(const char *value, size_t length) +{ + off_t rv; + + for (rv = 0; length-- && rv >= 0; ++value) + rv = step_incr(rv, *value, 10); + + return rv; +} + #endif /* UH_util_h */ diff --git a/ext/unicorn_http/ext_help.h b/ext/unicorn_http/ext_help.h index 19f08c9..5f7c296 100644 --- a/ext/unicorn_http/ext_help.h +++ b/ext/unicorn_http/ext_help.h @@ -26,4 +26,23 @@ static inline int str_cstr_eq(VALUE val, const char *ptr, size_t len) #define STR_CSTR_EQ(val, const_str) \ str_cstr_eq(val, const_str, sizeof(const_str) - 1) +/* strcasecmp isn't locale independent */ +static int str_cstr_case_eq(VALUE val, const char *ptr, size_t len) +{ + if (RSTRING_LEN(val) == len) { + const char *v = RSTRING_PTR(val); + + for (; len--; ++ptr, ++v) { + if ((*ptr == *v) || (*v >= 'A' && *v <= 'Z' && (*v | 0x20) == *ptr)) + continue; + return 0; + } + return 1; + } + return 0; +} + +#define STR_CSTR_CASE_EQ(val, const_str) \ + str_cstr_case_eq(val, const_str, sizeof(const_str) - 1) + #endif diff --git a/ext/unicorn_http/global_variables.h b/ext/unicorn_http/global_variables.h index 7feef93..3437ee2 100644 --- a/ext/unicorn_http/global_variables.h +++ b/ext/unicorn_http/global_variables.h @@ -19,6 +19,9 @@ static VALUE g_server_protocol; static VALUE g_server_protocol_value; static VALUE g_http_host; static VALUE g_http_x_forwarded_proto; +static VALUE g_http_transfer_encoding; +static VALUE g_content_length; +static VALUE g_http_trailer; static VALUE g_port_80; static VALUE g_port_443; static VALUE g_localhost; diff --git a/ext/unicorn_http/unicorn_http.rl b/ext/unicorn_http/unicorn_http.rl index 76831bb..f96ce74 100644 --- a/ext/unicorn_http/unicorn_http.rl +++ b/ext/unicorn_http/unicorn_http.rl @@ -36,7 +36,6 @@ struct http_parser { } len; }; -static void http_field(VALUE req, const char *field, size_t flen, VALUE val); static void header_done(VALUE req, const char *at, size_t length); #define LEN(AT, FPC) (FPC - buffer - hp->AT) @@ -44,6 +43,33 @@ static void header_done(VALUE req, const char *at, size_t length); #define PTR_TO(F) (buffer + hp->F) #define STR_NEW(M,FPC) rb_str_new(PTR_TO(M), LEN(M, FPC)) +static void write_value(VALUE req, struct http_parser *hp, + const char *buffer, const char *p) +{ + VALUE f = find_common_field(PTR_TO(start.field), hp->s.field_len); + VALUE v; + + VALIDATE_MAX_LENGTH(LEN(mark, p), FIELD_VALUE); + v = STR_NEW(mark, p); + if (f == Qnil) { + VALIDATE_MAX_LENGTH(hp->s.field_len, FIELD_NAME); + f = uncommon_field(PTR_TO(start.field), hp->s.field_len); + } else if (f == g_content_length) { + hp->len.content = parse_length(RSTRING_PTR(v), RSTRING_LEN(v)); + if (hp->len.content < 0) + rb_raise(eHttpParserError, "invalid Content-Length"); + hp->flags |= UH_FL_HASBODY; + } else if (f == g_http_transfer_encoding) { + if (STR_CSTR_CASE_EQ(v, "chunked")) + hp->flags |= UH_FL_CHUNKED | UH_FL_HASBODY; + } else if (f == g_http_trailer) { + hp->flags |= UH_FL_HASTRAILER; + } else if (f == g_http_host && rb_hash_aref(req, f) != Qnil) { + return; /* full URLs in REQUEST_URI take precedence */ + } + rb_hash_aset(req, f, v); +} + /** Machine **/ %%{ @@ -56,10 +82,7 @@ static void header_done(VALUE req, const char *at, size_t length); action downcase_char { downcase_char((char *)fpc); } action write_field { hp->s.field_len = LEN(start.field, fpc); } action start_value { MARK(mark, fpc); } - action write_value { - VALIDATE_MAX_LENGTH(LEN(mark, fpc), FIELD_VALUE); - http_field(req, PTR_TO(start.field), hp->s.field_len, STR_NEW(mark, fpc)); - } + action write_value { write_value(req, hp, buffer, fpc); } action request_method { rb_hash_aset(req, g_request_method, STR_NEW(mark, fpc)); } @@ -164,20 +187,6 @@ static struct http_parser *data_get(VALUE self) return hp; } -static void http_field(VALUE req, const char *field, size_t flen, VALUE val) -{ - VALUE f = find_common_field(field, flen); - - if (f == Qnil) { - VALIDATE_MAX_LENGTH(flen, FIELD_NAME); - f = uncommon_field(field, flen); - } else if (f == g_http_host && rb_hash_aref(req, f) != Qnil) { - return; - } - - rb_hash_aset(req, f, val); -} - static void set_server_params(VALUE req) { VALUE temp = rb_hash_aref(req, g_rack_url_scheme); @@ -319,5 +328,8 @@ void Init_unicorn_http(void) rb_define_method(cHttpParser, "execute", HttpParser_execute,2); init_common_fields(); SET_GLOBAL(g_http_host, "HOST"); + SET_GLOBAL(g_http_trailer, "TRAILER"); + SET_GLOBAL(g_http_transfer_encoding, "TRANSFER_ENCODING"); + SET_GLOBAL(g_content_length, "CONTENT_LENGTH"); } #undef SET_GLOBAL -- cgit v1.2.3-24-ge0c7