From d4ff84b8d3afa45a69c23eb18899194bf7219140 Mon Sep 17 00:00:00 2001 From: evanweaver Date: Fri, 26 Oct 2007 02:59:15 +0000 Subject: reverts for 1.0.2 git-svn-id: svn+ssh://rubyforge.org/var/svn/mongrel/trunk@741 19e92222-5c0b-0410-8929-a290d50e31e9 --- Rakefile | 47 +-- ext/http11/http11.c | 194 +++++++++ ext/http11/tst.h | 40 ++ ext/http11/tst_cleanup.c | 23 ++ ext/http11/tst_delete.c | 146 +++++++ ext/http11/tst_grow_node_free_list.c | 38 ++ ext/http11/tst_init.c | 41 ++ ext/http11/tst_insert.c | 218 +++++++++++ ext/http11/tst_search.c | 73 ++++ ext/http11_java/org/jruby/mongrel/Http11.java | 2 + .../org/jruby/mongrel/URIClassifier.java | 129 ++++++ ext/http11_java/org/jruby/tst/Node.java | 18 + ext/http11_java/org/jruby/tst/NodeLines.java | 16 + .../org/jruby/tst/TernarySearchTree.java | 433 +++++++++++++++++++++ lib/mongrel.rb | 96 ++--- test/test_configurator.rb | 1 - test/test_uriclassifier.rb | 2 +- test/testhelp.rb | 1 - 18 files changed, 1395 insertions(+), 123 deletions(-) create mode 100644 ext/http11/tst.h create mode 100644 ext/http11/tst_cleanup.c create mode 100644 ext/http11/tst_delete.c create mode 100644 ext/http11/tst_grow_node_free_list.c create mode 100644 ext/http11/tst_init.c create mode 100644 ext/http11/tst_insert.c create mode 100644 ext/http11/tst_search.c create mode 100644 ext/http11_java/org/jruby/mongrel/URIClassifier.java create mode 100644 ext/http11_java/org/jruby/tst/Node.java create mode 100644 ext/http11_java/org/jruby/tst/NodeLines.java create mode 100644 ext/http11_java/org/jruby/tst/TernarySearchTree.java diff --git a/Rakefile b/Rakefile index 4d7e389..4df1352 100644 --- a/Rakefile +++ b/Rakefile @@ -6,7 +6,7 @@ require 'echoe' e = Echoe.new("mongrel") do |p| p.summary = "A small fast HTTP library and server that runs Rails, Camping, Nitro and Iowa apps." p.author ="Zed A. Shaw" - p.clean_pattern = ['ext/http11/*.{bundle,so,o,obj,pdb,lib,def,exp}', 'ext/http11/Makefile', 'pkg', 'lib/*.bundle', '*.gem', 'site/output', '.config', 'lib/http11.jar', 'ext/http11_java/classes'] + p.clean_pattern = ['ext/http11/*.{bundle,so,o,obj,pdb,lib,def,exp}', 'ext/http11/Makefile', 'pkg', 'lib/*.bundle', '*.gem', 'site/output', '.config'] p.rdoc_pattern = ['README', 'LICENSE', 'COPYING', 'lib/**/*.rb', 'doc/**/*.rdoc', 'ext/http11/http11.c'] p.ignore_pattern = /^(pkg|site|projects|doc|log)|CVS|\.log/ p.ruby_version = '>= 1.8.4' @@ -30,10 +30,6 @@ e = Echoe.new("mongrel") do |p| extensions.clear self.files += ['lib/http11.so'] self.platform = Gem::Platform::WIN32 - when /java/ - extensions.clear - self.files += ['lib/http11.jar'] - self.platform = 'jruby' else add_dependency('daemons', '>= 1.0.3') add_dependency('fastthread', '>= 1.0.1') @@ -52,21 +48,6 @@ task :ragel do sh "ragel http11_parser.rl | rlgen-cd -G2 -o #{target}" raise "Failed to build C source" unless File.exist? target end - Dir.chdir "ext/http11" do - target = "../../ext/http11_java/org/jruby/mongrel/Http11Parser.java" - File.unlink target if File.exist? target - sh "ragel -J http11_parser.java.rl | rlgen-java -o #{target}" - raise "Failed to build Java source" unless File.exist? target - end -end - -#### XXX Hack around JRuby test/unit interaction problems - -desc "Run each test suite in isolation on JRuby" -task :test_java do - e.test_pattern.each do |f| - sh "/opt/local/jruby/bin/jruby -w -Ilib:ext:bin:test -e 'require \"#{f}\"'" rescue nil - end end #### XXX Hack around RubyGems and Echoe for pre-compiled extensions. @@ -75,18 +56,6 @@ def move_extensions Dir["ext/**/*.#{Config::CONFIG['DLEXT']}"].each { |file| cp file, "lib/" } end -def java_classpath_arg - # A myriad of ways to discover the JRuby classpath - classpath = begin - require 'java' - # Already running in a JRuby JVM - Java::java.lang.System.getProperty('java.class.path') - rescue LoadError - ENV['JRUBY_PARENT_CLASSPATH'] || ENV['JRUBY_HOME'] && FileList["#{ENV['JRUBY_HOME']}/lib/*.jar"].join(File::PATH_SEPARATOR) - end - classpath ? "-cp #{classpath}" : "" -end - case RUBY_PLATFORM when /mswin/ filename = "lib/http11.so" @@ -98,19 +67,6 @@ when /mswin/ move_extensions end task :compile => [filename] - -when /java/ - filename = "lib/http11.jar" - file filename do - build_dir = "ext/http11_java/classes" - mkdir_p build_dir - sources = FileList['ext/http11_java/**/*.java'].join(' ') - sh "javac -target 1.4 -source 1.4 -d #{build_dir} #{java_classpath_arg} #{sources}" - sh "jar cf lib/http11.jar -C #{build_dir} ." - move_extensions - end - task :compile => [filename] - end #### Project-wide install and uninstall tasks @@ -133,7 +89,6 @@ task :package_all => [:package] do sub_project("mongrel_console", :package) sub_project("mongrel_cluster", :package) sub_project("mongrel_service", :package) if RUBY_PLATFORM =~ /mswin/ - sh("rake java package") unless RUBY_PLATFORM =~ /java/ end task :install_requirements do diff --git a/ext/http11/http11.c b/ext/http11/http11.c index 7a84db8..2c6e119 100644 --- a/ext/http11/http11.c +++ b/ext/http11/http11.c @@ -8,9 +8,11 @@ #include #include "http11_parser.h" #include +#include "tst.h" static VALUE mMongrel; static VALUE cHttpParser; +static VALUE cURIClassifier; static VALUE eHttpParserError; #define id_handler_map rb_intern("@handler_map") @@ -361,6 +363,191 @@ VALUE HttpParser_nread(VALUE self) return INT2FIX(http->nread); } + +void URIClassifier_free(void *data) +{ + TRACE(); + + if(data) { + tst_cleanup((struct tst *)data); + } +} + + + +VALUE URIClassifier_alloc(VALUE klass) +{ + VALUE obj; + struct tst *tst = tst_init(TRIE_INCREASE); + TRACE(); + assert(tst && "failed to initialize trie structure"); + + obj = Data_Wrap_Struct(klass, NULL, URIClassifier_free, tst); + + return obj; +} + +/** + * call-seq: + * URIClassifier.new -> URIClassifier + * + * Initializes a new URIClassifier object that you can use to associate URI sequences + * with objects. You can actually use it with any string sequence and any objects, + * but it's mostly used with URIs. + * + * It uses TST from http://www.octavian.org/cs/software.html to build an ternary search + * trie to hold all of the URIs. It uses this to do an initial search for the a URI + * prefix, and then to break the URI into SCRIPT_NAME and PATH_INFO portions. It actually + * will do two searches most of the time in order to find the right handler for the + * registered prefix portion. + * + */ +VALUE URIClassifier_init(VALUE self) +{ + VALUE hash; + + /* we create an internal hash to protect stuff from the GC */ + hash = rb_hash_new(); + rb_ivar_set(self, id_handler_map, hash); + + return self; +} + + +/** + * call-seq: + * uc.register("/someuri", SampleHandler.new) -> nil + * + * Registers the SampleHandler (one for all requests) with the "/someuri". + * When URIClassifier::resolve is called with "/someuri" it'll return + * SampleHandler immediately. When called with "/someuri/iwant" it'll also + * return SomeHandler immediatly, with no additional searches, but it will + * return path info with "/iwant". + * + * You actually can reuse this class to register nearly anything and + * quickly resolve it. This could be used for caching, fast mapping, etc. + * The downside is it uses much more memory than a Hash, but it can be + * a lot faster. It's main advantage is that it works on prefixes, which + * is damn hard to get right with a Hash. + */ +VALUE URIClassifier_register(VALUE self, VALUE uri, VALUE handler) +{ + int rc = 0; + void *ptr = NULL; + struct tst *tst = NULL; + DATA_GET(self, struct tst, tst); + + rc = tst_insert((unsigned char *)StringValueCStr(uri), (void *)handler , tst, 0, &ptr); + + if(rc == TST_DUPLICATE_KEY) { + rb_raise(rb_eStandardError, "Handler already registered with that name"); + } else if(rc == TST_ERROR) { + rb_raise(rb_eStandardError, "Memory error registering handler"); + } else if(rc == TST_NULL_KEY) { + rb_raise(rb_eStandardError, "URI was empty"); + } + + rb_hash_aset(rb_ivar_get(self, id_handler_map), uri, handler); + + return Qnil; +} + + +/** + * call-seq: + * uc.unregister("/someuri") + * + * Yep, just removes this uri and it's handler from the trie. + */ +VALUE URIClassifier_unregister(VALUE self, VALUE uri) +{ + void *handler = NULL; + struct tst *tst = NULL; + DATA_GET(self, struct tst, tst); + + handler = tst_delete((unsigned char *)StringValueCStr(uri), tst); + + if(handler) { + rb_hash_delete(rb_ivar_get(self, id_handler_map), uri); + + return (VALUE)handler; + } else { + return Qnil; + } +} + + +/** + * call-seq: + * uc.resolve("/someuri") -> "/someuri", "", handler + * uc.resolve("/someuri/pathinfo") -> "/someuri", "/pathinfo", handler + * uc.resolve("/notfound/orhere") -> nil, nil, nil + * uc.resolve("/") -> "/", "/", handler # if uc.register("/", handler) + * uc.resolve("/path/from/root") -> "/", "/path/from/root", handler # if uc.register("/", handler) + * + * Attempts to resolve either the whole URI or at the longest prefix, returning + * the prefix (as script_info), path (as path_info), and registered handler + * (usually an HttpHandler). If it doesn't find a handler registered at the longest + * match then it returns nil,nil,nil. + * + * Because the resolver uses a trie you are able to register a handler at *any* character + * in the URI and it will be handled as long as it's the longest prefix. So, if you + * registered handler #1 at "/something/lik", and #2 at "/something/like/that", then a + * a search for "/something/like" would give you #1. A search for "/something/like/that/too" + * would give you #2. + * + * This is very powerful since it means you can also attach handlers to parts of the ; + * (semi-colon) separated path params, any part of the path, use off chars, anything really. + * It also means that it's very efficient to do this only taking as long as the URI has + * characters. + * + * A slight modification to the CGI 1.2 standard is given for handlers registered to "/". + * CGI expects all CGI scripts to be at some script path, so it doesn't really say anything + * about a script that handles the root. To make this work, the resolver will detect that + * the requested handler is at "/", and return that for script_name, and then simply return + * the full URI back as path_info. + * + * It expects strings with no embedded '\0' characters. Don't try other string-like stuff yet. + */ +VALUE URIClassifier_resolve(VALUE self, VALUE uri) +{ + void *handler = NULL; + int pref_len = 0; + struct tst *tst = NULL; + VALUE result; + unsigned char *uri_str = NULL; + + DATA_GET(self, struct tst, tst); + uri_str = (unsigned char *)StringValueCStr(uri); + + handler = tst_search(uri_str, tst, TST_LONGEST_MATCH, &pref_len); + + /* setup for multiple return values */ + result = rb_ary_new(); + + if(handler) { + rb_ary_push(result, rb_str_substr (uri, 0, pref_len)); + /* compensate for a script_name="/" where we need to add the "/" to path_info to keep it consistent */ + if(pref_len == 1 && uri_str[0] == '/') { + /* matches the root URI so we have to use the whole URI as the path_info */ + rb_ary_push(result, uri); + } else { + /* matches a script so process like normal */ + rb_ary_push(result, rb_str_substr(uri, pref_len, RSTRING(uri)->len)); + } + + rb_ary_push(result, (VALUE)handler); + } else { + /* not found so push back nothing */ + rb_ary_push(result, Qnil); + rb_ary_push(result, Qnil); + rb_ary_push(result, Qnil); + } + + return result; +} + + void Init_http11() { @@ -399,4 +586,11 @@ void Init_http11() rb_define_method(cHttpParser, "error?", HttpParser_has_error,0); rb_define_method(cHttpParser, "finished?", HttpParser_is_finished,0); rb_define_method(cHttpParser, "nread", HttpParser_nread,0); + + cURIClassifier = rb_define_class_under(mMongrel, "URIClassifier", rb_cObject); + rb_define_alloc_func(cURIClassifier, URIClassifier_alloc); + rb_define_method(cURIClassifier, "initialize", URIClassifier_init, 0); + rb_define_method(cURIClassifier, "register", URIClassifier_register, 2); + rb_define_method(cURIClassifier, "unregister", URIClassifier_unregister, 1); + rb_define_method(cURIClassifier, "resolve", URIClassifier_resolve, 1); } diff --git a/ext/http11/tst.h b/ext/http11/tst.h new file mode 100644 index 0000000..3a58a65 --- /dev/null +++ b/ext/http11/tst.h @@ -0,0 +1,40 @@ + + +struct node +{ + unsigned char value; + struct node *left; + struct node *middle; + struct node *right; +}; + +struct tst +{ + int node_line_width; + struct node_lines *node_lines; + struct node *free_list; + struct node *head[127]; +}; + +struct node_lines +{ + struct node *node_line; + struct node_lines *next; +}; + +enum tst_constants +{ + TST_OK, TST_ERROR, TST_NULL_KEY, TST_DUPLICATE_KEY, TST_REPLACE, TST_LONGEST_MATCH +}; + +struct tst *tst_init(int node_line_width); + +int tst_insert(unsigned char *key, void *data, struct tst *tst, int option, void **exist_ptr); + +void *tst_search(const unsigned char *key, struct tst *tst, int option, unsigned int *match_len); + +void *tst_delete(unsigned char *key, struct tst *tst); + +void tst_cleanup(struct tst *tst); + + diff --git a/ext/http11/tst_cleanup.c b/ext/http11/tst_cleanup.c new file mode 100644 index 0000000..a85491d --- /dev/null +++ b/ext/http11/tst_cleanup.c @@ -0,0 +1,23 @@ + +#include "tst.h" +#include +#include + +void tst_cleanup(struct tst *tst) +{ + struct node_lines *current_line; + struct node_lines *next_line; + + next_line = tst->node_lines; + + do + { + current_line = next_line; + next_line = current_line->next; + free(current_line->node_line); + free(current_line); + } + while(next_line != NULL); + + free(tst); +} diff --git a/ext/http11/tst_delete.c b/ext/http11/tst_delete.c new file mode 100644 index 0000000..cb18f16 --- /dev/null +++ b/ext/http11/tst_delete.c @@ -0,0 +1,146 @@ + +#include "tst.h" +#include +#include + +void *tst_delete(unsigned char *key, struct tst *tst) +{ + struct node *current_node; + struct node *current_node_parent; + struct node *last_branch; + struct node *last_branch_parent; + struct node *next_node; + struct node *last_branch_replacement; + struct node *last_branch_dangling_child; + int key_index; + + + if(key[0] == 0) + return NULL; + if(tst->head[(int)key[0]] == NULL) + return NULL; + + last_branch = NULL; + last_branch_parent = NULL; + current_node = tst->head[(int)key[0]]; + current_node_parent = NULL; + key_index = 1; + while(current_node != NULL) + { + if(key[key_index] == current_node->value) + { + + if( (current_node->left != NULL) || (current_node->right != NULL) ) + { + last_branch = current_node; + last_branch_parent = current_node_parent; + } + if(key[key_index] == 0) + break; + else + { + current_node_parent = current_node; + current_node = current_node->middle; + key_index++; + continue; + } + } + else if( ((current_node->value == 0) && (key[key_index] < 64)) || + ((current_node->value != 0) && (key[key_index] < + current_node->value)) ) + { + last_branch_parent = current_node; + current_node_parent = current_node; + current_node = current_node->left; + last_branch = current_node; + continue; + } + else + { + last_branch_parent = current_node; + current_node_parent = current_node; + current_node = current_node->right; + last_branch = current_node; + continue; + } + + } + if(current_node == NULL) + return NULL; + + if(last_branch == NULL) + { + + next_node = tst->head[(int)key[0]]; + tst->head[(int)key[0]] = NULL; + } + else if( (last_branch->left == NULL) && (last_branch->right == NULL) ) + { + + if(last_branch_parent->left == last_branch) + last_branch_parent->left = NULL; + else + last_branch_parent->right = NULL; + + next_node = last_branch; + } + else + { + + if( (last_branch->left != NULL) && (last_branch->right != NULL) ) + { + last_branch_replacement = last_branch->right; + last_branch_dangling_child = last_branch->left; + } + else if(last_branch->right != NULL) + { + last_branch_replacement = last_branch->right; + last_branch_dangling_child = NULL; + } + else + { + last_branch_replacement = last_branch->left; + last_branch_dangling_child = NULL; + } + + if(last_branch_parent == NULL) + tst->head[(int)key[0]]=last_branch_replacement; + else + { + if (last_branch_parent->left == last_branch) + last_branch_parent->left = last_branch_replacement; + else if (last_branch_parent->right == last_branch) + last_branch_parent->right = last_branch_replacement; + else + last_branch_parent->middle = last_branch_replacement; + } + + if(last_branch_dangling_child != NULL) + { + current_node = last_branch_replacement; + + while (current_node->left != NULL) + current_node = current_node->left; + + current_node->left = last_branch_dangling_child; + } + + next_node = last_branch; + } + + do + { + current_node = next_node; + next_node = current_node->middle; + + current_node->left = NULL; + current_node->right = NULL; + current_node->middle = tst->free_list; + tst->free_list = current_node; + } + while(current_node->value != 0); + + return next_node; + +} + diff --git a/ext/http11/tst_grow_node_free_list.c b/ext/http11/tst_grow_node_free_list.c new file mode 100644 index 0000000..da21333 --- /dev/null +++ b/ext/http11/tst_grow_node_free_list.c @@ -0,0 +1,38 @@ + +#include "tst.h" +#include +#include + +int tst_grow_node_free_list(struct tst *tst) +{ + struct node *current_node; + struct node_lines *new_line; + int i; + + + if((new_line = (struct node_lines *) malloc(sizeof(struct node_lines))) == NULL) + return TST_ERROR; + + if((new_line->node_line = (struct node *) + calloc(tst->node_line_width, sizeof(struct node))) == NULL) + { + free(new_line); + return TST_ERROR; + } + else + { + new_line->next = tst->node_lines; + tst->node_lines = new_line; + } + + current_node = tst->node_lines->node_line; + tst->free_list = current_node; + for (i = 1; i < tst->node_line_width; i++) + { + current_node->middle = &(tst->node_lines->node_line[i]); + current_node = current_node->middle; + } + current_node->middle = NULL; + return 1; +} + diff --git a/ext/http11/tst_init.c b/ext/http11/tst_init.c new file mode 100644 index 0000000..259734a --- /dev/null +++ b/ext/http11/tst_init.c @@ -0,0 +1,41 @@ + +#include "tst.h" +#include +#include + +struct tst *tst_init(int width) +{ + struct tst *tst; + struct node *current_node; + int i; + + +if((tst = (struct tst *) calloc(1, sizeof(struct tst))) == NULL) + return NULL; + +if ((tst->node_lines = (struct node_lines *) calloc(1, sizeof(struct node_lines))) == NULL) +{ + free(tst); + return NULL; +} + +tst->node_line_width = width; +tst->node_lines->next = NULL; +if ((tst->node_lines->node_line = (struct node *) calloc(width, sizeof(struct node))) == NULL) +{ + free(tst->node_lines); + free(tst); + return NULL; +} + +current_node = tst->node_lines->node_line; +tst->free_list = current_node; +for (i = 1; i < width; i++) +{ + current_node->middle = &(tst->node_lines->node_line[i]); + current_node = current_node->middle; +} +current_node->middle = NULL; +return tst; +} + diff --git a/ext/http11/tst_insert.c b/ext/http11/tst_insert.c new file mode 100644 index 0000000..de50f4a --- /dev/null +++ b/ext/http11/tst_insert.c @@ -0,0 +1,218 @@ + +#include "tst.h" +#include +#include +#include + +int tst_grow_node_free_list(struct tst *tst); +int tst_insert(unsigned char *key, void *data, struct tst *tst, int option, void **exist_ptr) +{ + struct node *current_node; + struct node *new_node_tree_begin = NULL; + struct node *new_node; + int key_index; + int perform_loop = 1; + + if (key == NULL) + return TST_NULL_KEY; + + if(key[0] == 0) + return TST_NULL_KEY; + + if(tst->head[(int)key[0]] == NULL) + { + + if(tst->free_list == NULL) + { + if(tst_grow_node_free_list(tst) != 1) + return TST_ERROR; + } + tst->head[(int)key[0]] = tst->free_list; + + tst->free_list = tst->free_list->middle; + current_node = tst->head[(int)key[0]]; + current_node->value = key[1]; + if(key[1] == 0) + { + current_node->middle = data; + return TST_OK; + } + else + perform_loop = 0; + } + + current_node = tst->head[(int)key[0]]; + key_index = 1; + while(perform_loop == 1) + { + if(key[key_index] == current_node->value) + { + + if(key[key_index] == 0) + { + if (option == TST_REPLACE) + { + if (exist_ptr != NULL) + *exist_ptr = current_node->middle; + + current_node->middle = data; + return TST_OK; + } + else + { + if (exist_ptr != NULL) + *exist_ptr = current_node->middle; + return TST_DUPLICATE_KEY; + } + } + else + { + if(current_node->middle == NULL) + { + + if(tst->free_list == NULL) + { + if(tst_grow_node_free_list(tst) != 1) + return TST_ERROR; + } + current_node->middle = tst->free_list; + + tst->free_list = tst->free_list->middle; + new_node_tree_begin = current_node; + current_node = current_node->middle; + current_node->value = key[key_index]; + break; + } + else + { + current_node = current_node->middle; + key_index++; + continue; + } + } + } + if(key[key_index] == 0) + { + if(tst->free_list == NULL) + { + if(tst_grow_node_free_list(tst) != 1) + return TST_ERROR; + } + new_node = tst->free_list; + tst->free_list = tst->free_list->middle; + + memcpy((void*)new_node, (void*)current_node, sizeof(struct node)); + current_node->value = 0; + if(new_node->value < 64) + { + current_node->left = new_node; + current_node->right = '\0'; + } + else + { + current_node->left = '\0'; + current_node->right = new_node; + } + + current_node->middle = data; + return TST_OK; + } + + if( ((current_node->value == 0) && (key[key_index] < 64)) || + ((current_node->value != 0) && (key[key_index] < + current_node->value)) ) + { + + if (current_node->left == NULL) + { + + if(tst->free_list == NULL) + { + if(tst_grow_node_free_list(tst) != 1) + return TST_ERROR; + } + current_node->left = tst->free_list; + + tst->free_list = tst->free_list->middle; + new_node_tree_begin = current_node; + current_node = current_node->left; + current_node->value = key[key_index]; + if(key[key_index] == 0) + { + current_node->middle = data; + return TST_OK; + } + else + break; + } + else + { + current_node = current_node->left; + continue; + } + } + else + { + + if (current_node->right == NULL) + { + + if(tst->free_list == NULL) + { + if(tst_grow_node_free_list(tst) != 1) + return TST_ERROR; + } + current_node->right = tst->free_list; + + tst->free_list = tst->free_list->middle; + new_node_tree_begin = current_node; + current_node = current_node->right; + current_node->value = key[key_index]; + break; + } + else + { + current_node = current_node->right; + continue; + } + } + } + + do + { + key_index++; + + if(tst->free_list == NULL) + { + if(tst_grow_node_free_list(tst) != 1) + { + current_node = new_node_tree_begin->middle; + + while (current_node->middle != NULL) + current_node = current_node->middle; + + current_node->middle = tst->free_list; + tst->free_list = new_node_tree_begin->middle; + new_node_tree_begin->middle = NULL; + + return TST_ERROR; + } + } + + + if(tst->free_list == NULL) + { + if(tst_grow_node_free_list(tst) != 1) + return TST_ERROR; + } + current_node->middle = tst->free_list; + + tst->free_list = tst->free_list->middle; + current_node = current_node->middle; + current_node->value = key[key_index]; + } while(key[key_index] !=0); + + current_node->middle = data; + return TST_OK; +} + diff --git a/ext/http11/tst_search.c b/ext/http11/tst_search.c new file mode 100644 index 0000000..176f953 --- /dev/null +++ b/ext/http11/tst_search.c @@ -0,0 +1,73 @@ + +#include "tst.h" +#include +#include +#include + + +void *tst_search(const unsigned char *key, struct tst *tst, int option, + unsigned int *match_len) +{ + struct node *current_node; + struct node *longest_match = NULL; + unsigned int longest_match_len = 0; + int key_index; + + assert(key != NULL && "key can't be NULL"); + assert(tst != NULL && "tst can't be NULL"); + + if (key[0] == 0) + return NULL; + + if (tst->head[(int) key[0]] == NULL) + return NULL; + + if (match_len) + *match_len = 0; + + current_node = tst->head[(int) key[0]]; + key_index = 1; + + while (current_node != NULL) { + if (key[key_index] == current_node->value) { + if (current_node->value == 0) { + if (match_len) + *match_len = key_index; + return current_node->middle; + } else { + current_node = current_node->middle; + key_index++; + continue; + } + } else { + if (current_node->value == 0) { + if (option & TST_LONGEST_MATCH) { + longest_match = current_node->middle; + longest_match_len = key_index; + } + + if (key[key_index] < 64) { + current_node = current_node->left; + continue; + } else { + current_node = current_node->right; + continue; + } + } else { + if (key[key_index] < current_node->value) { + current_node = current_node->left; + continue; + } else { + current_node = current_node->right; + continue; + } + } + } + } + + if (match_len) + *match_len = longest_match_len; + + return longest_match; + +} diff --git a/ext/http11_java/org/jruby/mongrel/Http11.java b/ext/http11_java/org/jruby/mongrel/Http11.java index 4b2fd2e..a73c79b 100644 --- a/ext/http11_java/org/jruby/mongrel/Http11.java +++ b/ext/http11_java/org/jruby/mongrel/Http11.java @@ -83,6 +83,8 @@ public class Http11 extends RubyObject { cHttpParser.defineFastMethod("error?",cf.getFastMethod("has_error")); cHttpParser.defineFastMethod("finished?",cf.getFastMethod("is_finished")); cHttpParser.defineFastMethod("nread",cf.getFastMethod("nread")); + + URIClassifier.createURIClassifier(runtime, mMongrel); } private Ruby runtime; diff --git a/ext/http11_java/org/jruby/mongrel/URIClassifier.java b/ext/http11_java/org/jruby/mongrel/URIClassifier.java new file mode 100644 index 0000000..73ba1ba --- /dev/null +++ b/ext/http11_java/org/jruby/mongrel/URIClassifier.java @@ -0,0 +1,129 @@ +/***** BEGIN LICENSE BLOCK ***** + * Version: CPL 1.0/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Common Public + * License Version 1.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.eclipse.org/legal/cpl-v10.html + * + * Software distributed under the License is distributed on an "AS + * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or + * implied. See the License for the specific language governing + * rights and limitations under the License. + * + * Copyright (C) 2007 Ola Bini + * + * Alternatively, the contents of this file may be used under the terms of + * either of the GNU General Public License Version 2 or later (the "GPL"), + * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the CPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the CPL, the GPL or the LGPL. + ***** END LICENSE BLOCK *****/ +package org.jruby.mongrel; + +import org.jruby.Ruby; +import org.jruby.RubyArray; +import org.jruby.RubyClass; +import org.jruby.RubyHash; +import org.jruby.RubyModule; +import org.jruby.RubyObject; +import org.jruby.RubyString; + +import org.jruby.runtime.Block; +import org.jruby.runtime.CallbackFactory; +import org.jruby.runtime.ObjectAllocator; +import org.jruby.runtime.builtin.IRubyObject; + +import org.jruby.tst.TernarySearchTree; + +/** + * @author Ola Bini + */ +public class URIClassifier extends RubyObject { + public final static int TRIE_INCREASE = 30; + + private static ObjectAllocator ALLOCATOR = new ObjectAllocator() { + public IRubyObject allocate(Ruby runtime, RubyClass klass) { + return new URIClassifier(runtime, klass); + } + }; + + public static void createURIClassifier(Ruby runtime, RubyModule mMongrel) { + RubyClass cURIClassifier = mMongrel.defineClassUnder("URIClassifier",runtime.getObject(),ALLOCATOR); + CallbackFactory cf = runtime.callbackFactory(URIClassifier.class); + cURIClassifier.defineFastMethod("initialize",cf.getFastMethod("initialize")); + cURIClassifier.defineFastMethod("register",cf.getFastMethod("register",IRubyObject.class,IRubyObject.class)); + cURIClassifier.defineFastMethod("unregister",cf.getFastMethod("unregister",IRubyObject.class)); + cURIClassifier.defineFastMethod("resolve",cf.getFastMethod("resolve",IRubyObject.class)); + } + + private TernarySearchTree tst; + + public URIClassifier(Ruby runtime, RubyClass clazz) { + super(runtime,clazz); + tst = new TernarySearchTree(TRIE_INCREASE); + } + + public IRubyObject initialize() { + setInstanceVariable("@handler_map",RubyHash.newHash(getRuntime())); + return this; + } + + public IRubyObject register(IRubyObject uri, IRubyObject handler) { + Object[] ptr = new Object[]{null}; + int rc = 0; + rc = tst.insert(uri.toString(),handler,0,ptr); + if(rc == TernarySearchTree.TST_DUPLICATE_KEY) { + throw getRuntime().newStandardError("Handler already registered with that name"); + } else if(rc == TernarySearchTree.TST_ERROR) { + throw getRuntime().newStandardError("Memory error registering handler"); + } else if(rc == TernarySearchTree.TST_NULL_KEY) { + throw getRuntime().newStandardError("URI was empty"); + } + ((RubyHash)getInstanceVariable("@handler_map")).aset(uri,handler); + return getRuntime().getNil(); + } + + public IRubyObject unregister(IRubyObject uri) { + IRubyObject handler = (IRubyObject)tst.delete(uri.toString()); + if(null != handler) { + ((RubyHash)getInstanceVariable("@handler_map")).delete(uri,Block.NULL_BLOCK); + return handler; + } + return getRuntime().getNil(); + } + + public IRubyObject resolve(IRubyObject _ri) { + IRubyObject handler = null; + int[] pref_len = new int[]{0}; + RubyArray result; + String uri_str; + RubyString uri = _ri.convertToString(); + + uri_str = uri.toString(); + handler = (IRubyObject)tst.search(uri_str,pref_len); + + result = getRuntime().newArray(); + + if(handler != null) { + result.append(uri.substr(0, pref_len[0])); + if(pref_len[0] == 1 && uri_str.startsWith("/")) { + result.append(uri); + } else { + result.append(uri.substr(pref_len[0],uri.getByteList().length())); + } + result.append(handler); + } else { + result.append(getRuntime().getNil()); + result.append(getRuntime().getNil()); + result.append(getRuntime().getNil()); + } + return result; + } +}// URIClassifier diff --git a/ext/http11_java/org/jruby/tst/Node.java b/ext/http11_java/org/jruby/tst/Node.java new file mode 100644 index 0000000..334763d --- /dev/null +++ b/ext/http11_java/org/jruby/tst/Node.java @@ -0,0 +1,18 @@ +/* + * See LICENSE file. + */ +/** + * $Id: $ + */ +package org.jruby.tst; + +/** + * @author Ola Bini + * @version $Revision: $ + */ +public class Node { + public char value; + public Node left; + public Object middle; + public Node right; +}// Node diff --git a/ext/http11_java/org/jruby/tst/NodeLines.java b/ext/http11_java/org/jruby/tst/NodeLines.java new file mode 100644 index 0000000..87b56d7 --- /dev/null +++ b/ext/http11_java/org/jruby/tst/NodeLines.java @@ -0,0 +1,16 @@ +/* + * See LICENSE file. + */ +/** + * $Id: $ + */ +package org.jruby.tst; + +/** + * @author Ola Bini + * @version $Revision: $ + */ +public class NodeLines { + public Node[] node_line; + public NodeLines next; +}// NodeLines diff --git a/ext/http11_java/org/jruby/tst/TernarySearchTree.java b/ext/http11_java/org/jruby/tst/TernarySearchTree.java new file mode 100644 index 0000000..5637bf1 --- /dev/null +++ b/ext/http11_java/org/jruby/tst/TernarySearchTree.java @@ -0,0 +1,433 @@ +/* + * See LICENSE file. + */ +/** + * $Id: $ + */ +package org.jruby.tst; + +/** + * @author Ola Bini + * @version $Revision: $ + */ +public class TernarySearchTree { + public final static int TST_OK = 0x1; + public final static int TST_ERROR = 0x2; + public final static int TST_NULL_KEY = 0x4; + public final static int TST_DUPLICATE_KEY = 0x8; + public final static int TST_REPLACE = 0x10; + + public int node_line_width; + public NodeLines node_lines; + public Node free_list; + public Node[] head = new Node[127]; + + public TernarySearchTree(final int width) { + this.node_lines = new NodeLines(); + this.node_line_width = width; + + this.node_lines.next = null; + this.node_lines.node_line = new Node[width]; + for(int i=0;i 0) { + exist_ptr[0] = current_node.middle; + } + current_node.middle = data; + return TST_OK; + } else { + if(exist_ptr.length > 0) { + exist_ptr[0] = current_node.middle; + } + return TST_DUPLICATE_KEY; + } + + } else { + if(current_node.middle == null) { + if(this.free_list == null) { + if(!grow_node_free_list()) { + return TST_ERROR; + } + } + current_node.middle = this.free_list; + this.free_list = (Node)this.free_list.middle; + new_node_tree_begin = current_node; + current_node = (Node)current_node.middle; + current_node.value = curr; + break; + } else { + current_node = (Node)current_node.middle; + key_index++; + continue; + } + } + } + + if((current_node.value == 0 && curr < 64) || + (current_node.value != 0 && curr < current_node.value)) { + if(current_node.left == null) { + if(this.free_list == null) { + if(!grow_node_free_list()) { + return TST_ERROR; + } + } + current_node.left = this.free_list; + this.free_list = (Node)this.free_list.middle; + new_node_tree_begin = current_node; + current_node = current_node.left; + current_node.value = curr; + if(curr == 0) { + current_node.middle = data; + return TST_OK; + } else { + break; + } + } else { + current_node = current_node.left; + continue; + } + } else { + if(current_node.right == null) { + if(this.free_list == null) { + if(!grow_node_free_list()) { + return TST_ERROR; + } + } + current_node.right = this.free_list; + this.free_list = (Node)this.free_list.middle; + new_node_tree_begin = current_node; + current_node = current_node.right; + current_node.value = curr; + break; + } else { + current_node = current_node.right; + continue; + } + } + } + + do { + key_index++; + if(key_index < key.length()) { + curr = key.charAt(key_index); + } else { + curr = 0; + } + + if(this.free_list == null) { + if(!grow_node_free_list()) { + current_node = (Node)new_node_tree_begin.middle; + while(current_node.middle != null) { + current_node = (Node)current_node.middle; + } + current_node.middle = this.free_list; + this.free_list = (Node)new_node_tree_begin.middle; + new_node_tree_begin.middle = null; + return TST_ERROR; + } + } + + if(this.free_list == null) { + if(!grow_node_free_list()) { + return TST_ERROR; + } + } + current_node.middle = this.free_list; + this.free_list = (Node)this.free_list.middle; + current_node = (Node)current_node.middle; + + current_node.value = curr; + } while(curr != 0); + + current_node.middle = data; + return TST_OK; + } + + public Object search(final String key, final int[] prefix_len) { + Node current_node = null; + Object longest_match = null; + int key_index = 0; + + if(key == null) { + throw new IllegalArgumentException("key can't be null"); + } + + if(key.length() == 0 || this.head[key.charAt(0)] == null) { + return null; + } + + if(prefix_len.length > 0) { + prefix_len[0] = 0; + } + + current_node = this.head[key.charAt(0)]; + key_index = 1; + + char curr = 0; + while(null != current_node) { + if(key_index < key.length()) { + curr = key.charAt(key_index); + } else { + curr = 0; + } + + if(curr == current_node.value) { + if(current_node.value == 0) { + if(prefix_len.length>0) { + prefix_len[0] = key_index; + } + return current_node.middle; + } else { + current_node = (Node)current_node.middle; + if(current_node != null && current_node.value == 0) { + if(prefix_len.length>0) { + prefix_len[0] = key_index+1; + } + longest_match = current_node.middle; + } + key_index++; + continue; + } + } else if((current_node.value == 0 && curr < 64) || + (current_node.value != 0 && curr < current_node.value)) { + if(current_node.value == 0) { + if(prefix_len.length>0) { + prefix_len[0] = key_index; + } + longest_match = current_node.middle; + } + current_node = current_node.left; + continue; + } else { + if(current_node.value == 0) { + if(prefix_len.length>0) { + prefix_len[0] = key_index; + } + longest_match = current_node.middle; + } + current_node = current_node.right; + continue; + } + } + + return longest_match; + } + + public Object delete(final String key) { + Node current_node = null; + Node current_node_parent = null; + Node last_branch = null; + Node last_branch_parent = null; + Object next_node = null; + Node last_branch_replacement = null; + Node last_branch_dangling_child = null; + int key_index = 1; + + if(key.length() == 0 || this.head[key.charAt(0)] == null) { + return null; + } + + current_node = this.head[key.charAt(0)]; + + char curr = 0; + while(null != current_node) { + if(key_index < key.length()) { + curr = key.charAt(key_index); + } else { + curr = 0; + } + if(curr == current_node.value) { + if(current_node.left != null || current_node.right != null) { + last_branch = current_node; + last_branch_parent = current_node_parent; + } + if(curr == 0) { + break; + } + current_node_parent = current_node; + current_node = (Node)current_node.middle; + key_index++; + continue; + } else if((current_node.value == 0 && curr < 64) || + (current_node.value != 0&& curr < current_node.value)) { + last_branch_parent = current_node; + current_node_parent = current_node; + current_node = current_node.left; + last_branch = current_node; + continue; + } else { + last_branch_parent = current_node; + current_node_parent = current_node; + current_node = current_node.right; + last_branch = current_node; + continue; + } + } + + if(null == current_node) { + return null; + } + + if(null == last_branch) { + next_node = this.head[key.charAt(0)]; + this.head[key.charAt(0)] = null; + } else if(last_branch.left == null && last_branch.right == null) { + if(last_branch_parent.left == last_branch) { + last_branch_parent.left = null; + } else { + last_branch_parent.right = null; + } + next_node = last_branch; + } else { + if(last_branch.left != null && last_branch.right != null) { + last_branch_replacement = last_branch.right; + last_branch_dangling_child = last_branch.left; + } else if(last_branch.right != null) { + last_branch_replacement = last_branch.right; + last_branch_dangling_child = null; + } else { + last_branch_replacement = last_branch.left; + last_branch_dangling_child = null; + } + + if(last_branch_parent == null) { + this.head[key.charAt(0)] = last_branch_replacement; + } else { + if(last_branch_parent.left == last_branch) { + last_branch_parent.left = last_branch_replacement; + } else if(last_branch_parent.right == last_branch) { + last_branch_parent.right = last_branch_replacement; + } else { + last_branch_parent.middle = last_branch_replacement; + } + } + + if(last_branch_dangling_child != null) { + current_node = last_branch_replacement; + while(current_node.left != null) { + current_node = current_node.left; + } + current_node.left = last_branch_dangling_child; + } + + next_node = last_branch; + } + + do { + current_node = (Node)next_node; + next_node = current_node.middle; + current_node.left = null; + current_node.right = null; + current_node.middle = this.free_list; + this.free_list = current_node; + } while(current_node.value != 0); + + + return next_node; + } + + public boolean grow_node_free_list() { + Node current_node = null; + NodeLines new_line = null; + + new_line = new NodeLines(); + + new_line.node_line = new Node[this.node_line_width]; + for(int i=0;i