From 32b6e838c28b7948811a6470d8c0a49d5767ec69 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 24 Mar 2009 02:35:26 -0700 Subject: simplify the HttpParser interface This cuts the HttpParser interface down to #execute and #reset method. HttpParser#execute will return true if it completes and false if it is not. http->nread state is kept internally so we don't have to keep track of it in Ruby; removing one parameter from #execute. HttpParser#reset is unchanged. All errors are handled through exceptions anyways, so the HttpParser#error? method stopped being useful. Also added some more unit tests to the HttpParser since I know some folks are (rightfully) uncomfortable with changing stable C code. We now have tests for incremental parsing. In summary, we have: * more test cases * less C code * simpler interfaces * small performance improvement => win \o/ --- ext/unicorn/http11/http11.c | 123 +++++++----------------------------- ext/unicorn/http11/http11_parser.c | 47 +++++++------- ext/unicorn/http11/http11_parser.h | 4 +- ext/unicorn/http11/http11_parser.rl | 3 +- 4 files changed, 49 insertions(+), 128 deletions(-) (limited to 'ext') diff --git a/ext/unicorn/http11/http11.c b/ext/unicorn/http11/http11.c index 0b96099..f62dce7 100644 --- a/ext/unicorn/http11/http11.c +++ b/ext/unicorn/http11/http11.c @@ -1,4 +1,5 @@ /** + * Copyright (c) 2009 Eric Wong (all bugs are Eric's fault) * Copyright (c) 2005 Zed A. Shaw * You can redistribute it and/or modify it under the same terms as Ruby. */ @@ -367,113 +368,37 @@ static VALUE HttpParser_reset(VALUE self) /** * call-seq: - * parser.finish -> true/false + * parser.execute(req_hash, data) -> true/false * - * Finishes a parser early which could put in a "good" or bad state. - * You should call reset after finish it or bad things will happen. - */ -static VALUE HttpParser_finish(VALUE self) -{ - http_parser *http = NULL; - DATA_GET(self, http_parser, http); - http_parser_finish(http); - - return http_parser_is_finished(http) ? Qtrue : Qfalse; -} - - -/** - * call-seq: - * parser.execute(req_hash, data, start) -> Integer - * - * Takes a Hash and a String of data, parses the String of data filling in the Hash - * returning an Integer to indicate how much of the data has been read. No matter - * what the return value, you should call HttpParser#finished? and HttpParser#error? - * to figure out if it's done parsing or there was an error. + * Takes a Hash and a String of data, parses the String of data filling + * in the Hash returning a boolean to indicate whether or not parsing + * is finished. * - * This function now throws an exception when there is a parsing error. This makes - * the logic for working with the parser much easier. You can still test for an - * error, but now you need to wrap the parser with an exception handling block. - * - * The third argument allows for parsing a partial request and then continuing - * the parsing from that position. It needs all of the original data as well - * so you have to append to the data buffer as you read. + * This function now throws an exception when there is a parsing error. + * This makes the logic for working with the parser much easier. You + * will need to wrap the parser with an exception handling block. */ -static VALUE HttpParser_execute(VALUE self, VALUE req_hash, - VALUE data, VALUE start) -{ - http_parser *http = NULL; - int from = 0; - char *dptr = NULL; - long dlen = 0; - - DATA_GET(self, http_parser, http); - - from = FIX2INT(start); - dptr = RSTRING_PTR(data); - dlen = RSTRING_LEN(data); - - if(from >= dlen) { - rb_raise(eHttpParserError, "Requested start is after data buffer end."); - } else { - http->data = (void *)req_hash; - http_parser_execute(http, dptr, dlen, from); - - VALIDATE_MAX_LENGTH(http_parser_nread(http), HEADER); - - if(http_parser_has_error(http)) { - rb_raise(eHttpParserError, "Invalid HTTP format, parsing fails."); - } else { - return INT2FIX(http_parser_nread(http)); - } - } -} - - -/** - * call-seq: - * parser.error? -> true/false - * - * Tells you whether the parser is in an error state. - */ -static VALUE HttpParser_has_error(VALUE self) +static VALUE HttpParser_execute(VALUE self, VALUE req_hash, VALUE data) { - http_parser *http = NULL; - DATA_GET(self, http_parser, http); + http_parser *http; + char *dptr = RSTRING_PTR(data); + long dlen = RSTRING_LEN(data); - return http_parser_has_error(http) ? Qtrue : Qfalse; -} - - -/** - * call-seq: - * parser.finished? -> true/false - * - * Tells you whether the parser is finished or not and in a good state. - */ -static VALUE HttpParser_is_finished(VALUE self) -{ - http_parser *http = NULL; DATA_GET(self, http_parser, http); - return http_parser_is_finished(http) ? Qtrue : Qfalse; -} + if (http->nread < dlen) { + http->data = (void *)req_hash; + http_parser_execute(http, dptr, dlen); + VALIDATE_MAX_LENGTH(http->nread, HEADER); -/** - * call-seq: - * parser.nread -> Integer - * - * Returns the amount of data processed so far during this processing cycle. It is - * set to 0 on initialize or reset calls and is incremented each time execute is called. - */ -static VALUE HttpParser_nread(VALUE self) -{ - http_parser *http = NULL; - DATA_GET(self, http_parser, http); + if (!http_parser_has_error(http)) + return http_parser_is_finished(http) ? Qtrue : Qfalse; - return INT2FIX(http->nread); + rb_raise(eHttpParserError, "Invalid HTTP format, parsing fails."); + } + rb_raise(eHttpParserError, "Requested start is after data buffer end."); } void Init_http11() @@ -504,10 +429,6 @@ void Init_http11() rb_define_alloc_func(cHttpParser, HttpParser_alloc); rb_define_method(cHttpParser, "initialize", HttpParser_init,0); rb_define_method(cHttpParser, "reset", HttpParser_reset,0); - rb_define_method(cHttpParser, "finish", HttpParser_finish,0); - rb_define_method(cHttpParser, "execute", HttpParser_execute,3); - rb_define_method(cHttpParser, "error?", HttpParser_has_error,0); - rb_define_method(cHttpParser, "finished?", HttpParser_is_finished,0); - rb_define_method(cHttpParser, "nread", HttpParser_nread,0); + rb_define_method(cHttpParser, "execute", HttpParser_execute,2); init_common_fields(); } diff --git a/ext/unicorn/http11/http11_parser.c b/ext/unicorn/http11/http11_parser.c index d33eed0..b6d55c8 100644 --- a/ext/unicorn/http11/http11_parser.c +++ b/ext/unicorn/http11/http11_parser.c @@ -63,9 +63,10 @@ int http_parser_init(http_parser *parser) { /** exec **/ -size_t http_parser_execute(http_parser *parser, const char *buffer, size_t len, size_t off) { +size_t http_parser_execute(http_parser *parser, const char *buffer, size_t len) { const char *p, *pe; int cs = parser->cs; + size_t off = parser->nread; assert(off <= len && "offset past end of buffer"); @@ -76,7 +77,7 @@ size_t http_parser_execute(http_parser *parser, const char *buffer, size_t len, assert(pe - p == len - off && "pointers aren't same distance"); -#line 80 "http11_parser.c" +#line 81 "http11_parser.c" { if ( p == pe ) goto _test_eof; @@ -107,7 +108,7 @@ st2: if ( ++p == pe ) goto _test_eof2; case 2: -#line 111 "http11_parser.c" +#line 112 "http11_parser.c" switch( (*p) ) { case 32: goto tr2; case 36: goto st38; @@ -133,7 +134,7 @@ st3: if ( ++p == pe ) goto _test_eof3; case 3: -#line 137 "http11_parser.c" +#line 138 "http11_parser.c" switch( (*p) ) { case 42: goto tr4; case 43: goto tr5; @@ -157,7 +158,7 @@ st4: if ( ++p == pe ) goto _test_eof4; case 4: -#line 161 "http11_parser.c" +#line 162 "http11_parser.c" switch( (*p) ) { case 32: goto tr8; case 35: goto tr9; @@ -228,7 +229,7 @@ st5: if ( ++p == pe ) goto _test_eof5; case 5: -#line 232 "http11_parser.c" +#line 233 "http11_parser.c" if ( (*p) == 72 ) goto tr10; goto st0; @@ -240,7 +241,7 @@ st6: if ( ++p == pe ) goto _test_eof6; case 6: -#line 244 "http11_parser.c" +#line 245 "http11_parser.c" if ( (*p) == 84 ) goto st7; goto st0; @@ -326,7 +327,7 @@ st14: if ( ++p == pe ) goto _test_eof14; case 14: -#line 330 "http11_parser.c" +#line 331 "http11_parser.c" if ( (*p) == 10 ) goto st15; goto st0; @@ -378,7 +379,7 @@ st57: if ( ++p == pe ) goto _test_eof57; case 57: -#line 382 "http11_parser.c" +#line 383 "http11_parser.c" goto st0; tr21: #line 37 "http11_parser.rl" @@ -394,7 +395,7 @@ st17: if ( ++p == pe ) goto _test_eof17; case 17: -#line 398 "http11_parser.c" +#line 399 "http11_parser.c" switch( (*p) ) { case 33: goto tr23; case 58: goto tr24; @@ -433,7 +434,7 @@ st18: if ( ++p == pe ) goto _test_eof18; case 18: -#line 437 "http11_parser.c" +#line 438 "http11_parser.c" switch( (*p) ) { case 13: goto tr26; case 32: goto tr27; @@ -447,7 +448,7 @@ st19: if ( ++p == pe ) goto _test_eof19; case 19: -#line 451 "http11_parser.c" +#line 452 "http11_parser.c" if ( (*p) == 13 ) goto tr29; goto st19; @@ -500,7 +501,7 @@ st20: if ( ++p == pe ) goto _test_eof20; case 20: -#line 504 "http11_parser.c" +#line 505 "http11_parser.c" switch( (*p) ) { case 32: goto tr31; case 35: goto st0; @@ -518,7 +519,7 @@ st21: if ( ++p == pe ) goto _test_eof21; case 21: -#line 522 "http11_parser.c" +#line 523 "http11_parser.c" switch( (*p) ) { case 32: goto tr34; case 35: goto st0; @@ -536,7 +537,7 @@ st22: if ( ++p == pe ) goto _test_eof22; case 22: -#line 540 "http11_parser.c" +#line 541 "http11_parser.c" if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto st23; @@ -567,7 +568,7 @@ st24: if ( ++p == pe ) goto _test_eof24; case 24: -#line 571 "http11_parser.c" +#line 572 "http11_parser.c" switch( (*p) ) { case 43: goto st24; case 58: goto st25; @@ -592,7 +593,7 @@ st25: if ( ++p == pe ) goto _test_eof25; case 25: -#line 596 "http11_parser.c" +#line 597 "http11_parser.c" switch( (*p) ) { case 32: goto tr8; case 35: goto tr9; @@ -636,7 +637,7 @@ st28: if ( ++p == pe ) goto _test_eof28; case 28: -#line 640 "http11_parser.c" +#line 641 "http11_parser.c" switch( (*p) ) { case 32: goto tr42; case 35: goto tr43; @@ -685,7 +686,7 @@ st31: if ( ++p == pe ) goto _test_eof31; case 31: -#line 689 "http11_parser.c" +#line 690 "http11_parser.c" switch( (*p) ) { case 32: goto tr8; case 35: goto tr9; @@ -733,7 +734,7 @@ st34: if ( ++p == pe ) goto _test_eof34; case 34: -#line 737 "http11_parser.c" +#line 738 "http11_parser.c" switch( (*p) ) { case 32: goto tr53; case 35: goto tr54; @@ -751,7 +752,7 @@ st35: if ( ++p == pe ) goto _test_eof35; case 35: -#line 755 "http11_parser.c" +#line 756 "http11_parser.c" switch( (*p) ) { case 32: goto tr57; case 35: goto tr58; @@ -769,7 +770,7 @@ st36: if ( ++p == pe ) goto _test_eof36; case 36: -#line 773 "http11_parser.c" +#line 774 "http11_parser.c" if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto st37; @@ -1184,7 +1185,7 @@ case 56: _test_eof: {} _out: {} } -#line 121 "http11_parser.rl" +#line 122 "http11_parser.rl" if (!http_parser_has_error(parser)) parser->cs = cs; diff --git a/ext/unicorn/http11/http11_parser.h b/ext/unicorn/http11/http11_parser.h index c96b3a0..6c332fe 100644 --- a/ext/unicorn/http11/http11_parser.h +++ b/ext/unicorn/http11/http11_parser.h @@ -36,10 +36,8 @@ typedef struct http_parser { int http_parser_init(http_parser *parser); int http_parser_finish(http_parser *parser); -size_t http_parser_execute(http_parser *parser, const char *data, size_t len, size_t off); +size_t http_parser_execute(http_parser *parser, const char *data, size_t len); int http_parser_has_error(http_parser *parser); int http_parser_is_finished(http_parser *parser); -#define http_parser_nread(parser) (parser)->nread - #endif diff --git a/ext/unicorn/http11/http11_parser.rl b/ext/unicorn/http11/http11_parser.rl index c3c4b1f..1fad2ca 100644 --- a/ext/unicorn/http11/http11_parser.rl +++ b/ext/unicorn/http11/http11_parser.rl @@ -105,9 +105,10 @@ int http_parser_init(http_parser *parser) { /** exec **/ -size_t http_parser_execute(http_parser *parser, const char *buffer, size_t len, size_t off) { +size_t http_parser_execute(http_parser *parser, const char *buffer, size_t len) { const char *p, *pe; int cs = parser->cs; + size_t off = parser->nread; assert(off <= len && "offset past end of buffer"); -- cgit v1.2.3-24-ge0c7