From: Eric Wong <e@80x24.org>
To: kcar-public@bogomips.org
Subject: [PATCH 2/7] begin implementing request parsing
Date: Wed, 19 Apr 2017 22:30:20 +0000 [thread overview]
Message-ID: <20170419223025.8093-3-e@80x24.org> (raw)
In-Reply-To: <20170419223025.8093-1-e@80x24.org>
Not wired up, yet; but for now everything compiles
and existing tests run.
---
ext/kcar/kcar.rl | 107 ++++++++++++++++++++++++++++++++++++++++++-
ext/kcar/kcar_http_common.rl | 36 +++++++++++++--
2 files changed, 138 insertions(+), 5 deletions(-)
diff --git a/ext/kcar/kcar.rl b/ext/kcar/kcar.rl
index 79f65db..2774387 100644
--- a/ext/kcar/kcar.rl
+++ b/ext/kcar/kcar.rl
@@ -15,6 +15,10 @@
static VALUE eParserError;
static ID id_uminus, id_sq, id_sq_set;
+static VALUE g_rack_url_scheme,
+ g_HOST, g_PATH_INFO, g_QUERY_STRING,
+ g_REQUEST_METHOD, g_REQUEST_PATH, g_REQUEST_URI;
+static VALUE e413, e414;
/** Defines common length and error messages for input length validation. */
#define DEF_MAX_LENGTH(N, length) \
@@ -31,10 +35,20 @@ static ID id_uminus, id_sq, id_sq_set;
rb_raise(eParserError, MAX_##N##_LENGTH_ERR); \
} while (0)
+#define VALIDATE_MAX_URI_LENGTH(len, N) do { \
+ if (len > MAX_##N##_LENGTH) \
+ rb_raise(e414, MAX_##N##_LENGTH_ERR); \
+} while (0)
+
/* Defines the maximum allowed lengths for various input elements.*/
DEF_MAX_LENGTH(FIELD_NAME, 256);
DEF_MAX_LENGTH(FIELD_VALUE, 80 * 1024);
DEF_MAX_LENGTH(HEADER, (1024 * (80 + 32)));
+DEF_MAX_LENGTH(REQUEST_URI, 1024 * 15);
+DEF_MAX_LENGTH(FRAGMENT, 1024); /* just in case (stolen from Mongrel) */
+DEF_MAX_LENGTH(REQUEST_PATH, 4096); /* common PATH_MAX on modern systems */
+DEF_MAX_LENGTH(QUERY_STRING, (1024 * 10));
+
#define UH_FL_CHUNKED 0x1
#define UH_FL_HASBODY 0x2
@@ -90,6 +104,13 @@ static unsigned int ulong2uint(unsigned long n)
#define HP_FL_UNSET(hp,fl) ((hp)->flags &= ~(UH_FL_##fl))
#define HP_FL_ALL(hp,fl) (HP_FL_TEST(hp, fl) == (UH_FL_##fl))
+/* Downcases a single ASCII character. Locale-agnostic. */
+static void downcase_char(char *c)
+{
+ if (*c >= 'A' && *c <= 'Z')
+ *c |= 0x20;
+}
+
static int is_lws(char c)
{
return (c == ' ' || c == '\t');
@@ -153,7 +174,54 @@ static void hp_keepalive_connection(struct http_parser *hp, VALUE val)
}
static void
-http_version(struct http_parser *hp, VALUE hdr, const char *ptr, size_t len)
+request_method(VALUE env, const char *ptr, size_t len)
+{
+ rb_hash_aset(env, g_REQUEST_METHOD, str_new_dd_freeze(ptr, len));
+}
+
+static void
+url_scheme(VALUE env, const char *ptr, size_t len)
+{
+ rb_hash_aset(env, g_rack_url_scheme, str_new_dd_freeze(ptr, len));
+}
+
+static void
+request_host(VALUE env, const char *ptr, size_t len)
+{
+ rb_hash_aset(env, g_HOST, str_new_dd_freeze(ptr, len));
+}
+
+static void
+request_uri(VALUE env, const char *ptr, size_t len)
+{
+ VALIDATE_MAX_URI_LENGTH(len, REQUEST_URI);
+ rb_hash_aset(env, g_REQUEST_URI, rb_str_new(ptr, len));
+}
+
+static void
+query_string(VALUE env, const char *ptr, size_t len)
+{
+ VALIDATE_MAX_URI_LENGTH(len, QUERY_STRING);
+ rb_hash_aset(env, g_QUERY_STRING, rb_str_new(ptr, len));
+}
+
+static void
+request_path(VALUE env, const char *ptr, size_t len)
+{
+ VALUE val;
+
+ VALIDATE_MAX_URI_LENGTH(len, REQUEST_PATH);
+ val = rb_hash_aset(env, g_REQUEST_PATH, rb_str_new(ptr, len));
+
+ /* rack says PATH_INFO must start with "/" or be empty */
+ if (CONST_MEM_EQ("*", ptr, len))
+ val = rb_str_new(NULL, 0);
+
+ rb_hash_aset(env, g_PATH_INFO, val);
+}
+
+static void
+http_version(struct http_parser *hp, const char *ptr, size_t len)
{
if (CONST_MEM_EQ("HTTP/1.1", ptr, len)) {
/* HTTP/1.1 implies keepalive unless "Connection: close" is set */
@@ -328,12 +396,24 @@ static void write_value(VALUE hdr, struct http_parser *hp,
action mark {MARK(mark, fpc); }
+ action snake_upcase_field { snake_upcase_char(deconst(fpc)); }
+ action downcase_char { downcase_char(deconst(fpc)); }
+ action request_method { request_method(hdr, PTR_TO(mark), LEN(mark, fpc)); }
+ action url_scheme { url_scheme(hdr, PTR_TO(mark), LEN(mark, fpc)); }
+ action host { request_host(hdr, PTR_TO(mark), LEN(mark, fpc)); }
+ action request_uri { request_uri(hdr, PTR_TO(mark), LEN(mark, fpc)); }
+
+ action start_query { MARK(start.query, fpc); }
+ action query_string {
+ query_string(hdr, PTR_TO(start.query), LEN(start.query, fpc));
+ }
+ action request_path { request_path(hdr, PTR_TO(mark), LEN(mark, fpc)); }
action start_field { MARK(start.field, fpc); }
action write_field { hp->s.field_len = LEN(start.field, fpc); }
action start_value { MARK(mark, fpc); }
action write_value { write_value(hdr, hp, buffer, fpc); }
action write_cont_value { write_cont_value(hp, buffer, fpc); }
- action http_version { http_version(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
+ action http_version { http_version(hp, PTR_TO(mark), LEN(mark, fpc)); }
action status_phrase { status_phrase(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
action add_to_chunk_size {
@@ -720,6 +800,7 @@ static VALUE filter_body(VALUE self, VALUE buf, VALUE data)
void Init_kcar_ext(void)
{
+ static VALUE globals;
VALUE mKcar = rb_define_module("Kcar");
VALUE cParser = rb_define_class_under(mKcar, "Parser", rb_cObject);
@@ -729,6 +810,10 @@ void Init_kcar_ext(void)
* This is raised if there are parsing errors.
*/
eParserError = rb_define_class_under(mKcar, "ParserError", rb_eIOError);
+ e413 = rb_define_class_under(mKcar, "RequestEntityTooLargeError",
+ eParserError);
+ e414 = rb_define_class_under(mKcar, "RequestURITooLongError",
+ eParserError);
rb_define_alloc_func(cParser, kcar_alloc);
rb_define_method(cParser, "initialize", initialize, 0);
@@ -759,4 +844,22 @@ void Init_kcar_ext(void)
id_sq = rb_intern("[]");
id_sq_set = rb_intern("[]=");
id_uminus = rb_intern("-@");
+
+ /* TODO: gperf to make a perfect hash of common strings */
+#define C(ary, var, cstr) do { \
+ var = str_new_dd_freeze((cstr), sizeof(cstr) - 1); \
+ rb_ary_push((ary), (var)); \
+} while (0);
+
+ globals = rb_ary_new();
+ C(globals, g_HOST, "HOST");
+ C(globals, g_PATH_INFO, "PATH_INFO");
+ C(globals, g_QUERY_STRING, "QUERY_STRING");
+ C(globals, g_REQUEST_METHOD, "REQUEST_METHOD");
+ C(globals, g_REQUEST_PATH, "REQUEST_PATH");
+ C(globals, g_REQUEST_URI, "REQUEST_URI");
+ C(globals, g_rack_url_scheme, "rack.url_scheme");
+ OBJ_FREEZE(globals);
+ rb_global_variable(&globals);
+#undef C
}
diff --git a/ext/kcar/kcar_http_common.rl b/ext/kcar/kcar_http_common.rl
index cb89248..0c596bc 100644
--- a/ext/kcar/kcar_http_common.rl
+++ b/ext/kcar/kcar_http_common.rl
@@ -25,10 +25,38 @@
# elements
token = (ascii -- (CTL | tspecials));
+
+# URI schemes and absolute paths
+ scheme = ( "http"i ("s"i)? ) $downcase_char >mark %url_scheme;
+ hostname = ((alnum | "-" | "." | "_")+ | ("[" (":" | xdigit)+ "]"));
+ host_with_port = (hostname (":" digit*)?) >mark %host;
+ userinfo = ((unreserved | escape | ";" | ":" | "&" | "=" | "+")+ "@")*;
+
+ path = ( pchar+ ( "/" pchar* )* ) ;
+ query = ( uchar | reserved )* %query_string ;
+ param = ( pchar | "/" )* ;
+ params = ( param ( ";" param )* ) ;
+ rel_path = (path? (";" params)? %request_path) ("?" %start_query query)?;
+ absolute_path = ( "/"+ rel_path );
+ path_uri = absolute_path > mark %request_uri;
+ Absolute_URI = (scheme "://" userinfo host_with_port path_uri);
+
+ Request_URI = ((absolute_path | "*") >mark %request_uri) | Absolute_URI;
+
+ # lets not waste cycles setting fragment in the request,
+ # valid clients do not send it, but we will just silently ignore it.
+ Fragment = ( uchar | reserved )*;
+
+ Method = (token){1,20} >mark %request_method;
+ GetOnly = "GET" >mark %request_method;
+
+ http_number = ( digit+ "." digit+ ) ;
+ HTTP_Version = ( "HTTP/" http_number ) >mark %http_version ;
+ Request_Line = ( Method " " Request_URI ("#" Fragment){0,1} " "
+ HTTP_Version CRLF ) ;
+
phrase = (any -- CRLF)+;
Status_Phrase = (digit+ (" "+ phrase)?) >mark %status_phrase ;
- http_number = (digit+ "." digit+) ;
- HTTP_Version = ("HTTP/" http_number) >mark %http_version ;
Status_Line = HTTP_Version " "+ Status_Phrase :> CRLF;
field_name = ( token -- ":" )+ >start_field %write_field;
@@ -51,7 +79,9 @@
Trailers := (message_header)* CRLF @end_trailers;
FullResponse = Status_Line (message_header)* CRLF @header_done;
+ FullRequest = Request_Line (message_header)* CRLF @header_done;
+ SimpleRequest = GetOnly " " Request_URI ("#"Fragment){0,1} CRLF @header_done;
-main := FullResponse;
+main := FullResponse | FullRequest | SimpleRequest;
}%%
--
EW
next prev parent reply other threads:[~2017-04-19 22:30 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-04-19 22:30 [PATCH 0/7] request parsing bits Eric Wong
2017-04-19 22:30 ` [PATCH 1/7] introduce new str_new_dd_freeze internal function Eric Wong
2017-04-19 22:30 ` Eric Wong [this message]
2017-04-27 18:00 ` [PATCH 2/7] begin implementing request parsing Eric Wong
2017-04-19 22:30 ` [PATCH 3/7] favor bitfields instead flags + macros Eric Wong
2017-04-19 22:30 ` [PATCH 4/7] implement request parsing with tests Eric Wong
2017-04-19 22:30 ` [PATCH 5/7] pkg.mk: enable warnings by default for tests Eric Wong
2017-04-19 22:30 ` [PATCH 6/7] filter_body: rename variables to be like memcpy(3) Eric Wong
2017-04-19 22:30 ` [PATCH 7/7] flesh out filter_body for request parsing Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://yhbt.net/kcar/
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170419223025.8093-3-e@80x24.org \
--to=e@80x24.org \
--cc=kcar-public@bogomips.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://yhbt.net/kcar.git/
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).