5 files changed, 120 insertions, 61 deletions
diff --git a/README b/README
index cd9e868..050c3cf 100644
--- a/README
+++ b/README
@@ -11,7 +11,7 @@ scream without too many portability issues.
  
  == Status
  
-The 0.2.0 release of Mongrel features an HTTP core server that is the fastest possible
+The 0.2.1 release of Mongrel features an HTTP core server that is the fastest possible
  thing I could get without using something other than Ruby.  It features a few bug fixes,
  but mostly just a change to the Mongrel::HttpResponse class to make it more feature 
  complete.  The remaining development will be spent getting Mongrel to work with 
@@ -68,7 +68,7 @@ basic way to give a more complex 404 message.
  
  == Speed
  
-The 0.2.0 release probably consists of the most effort I've ever put into 
+The 0.2.1 release probably consists of the most effort I've ever put into 
  tuning a Ruby library for speed.  It consists of nearly everything I could think
  of to make Mongrel the fastest Ruby HTTP library possible.  I've tried about 
  seven different architectures and IO processing methods and none of them 
diff --git a/Rakefile b/Rakefile
index e379779..f084c88 100644
--- a/Rakefile
+++ b/Rakefile
@@ -27,4 +27,4 @@ setup_extension("http11", "http11")
  
  summary = "An experimental fast simple web server for Ruby."
  test_file = "test/test_ws.rb"
-setup_gem("mongrel", "0.2.0",  "Zed A. Shaw", summary, [], test_file)
+setup_gem("mongrel", "0.2.1",  "Zed A. Shaw", summary, [], test_file)
diff --git a/ext/http11/http11.c b/ext/http11/http11.c
index 1cdb525..c01798a 100644
--- a/ext/http11/http11.c
+++ b/ext/http11/http11.c
@@ -11,15 +11,21 @@ static VALUE cHttpParser;
  static VALUE cURIClassifier;
  static int id_handler_map;
  
+static VALUE global_http_prefix;
+static VALUE global_request_method;
+static VALUE global_path_info;
+static VALUE global_query_string;
+static VALUE global_http_version;
+
  
  void http_field(void *data, const char *field, size_t flen, const char *value, size_t vlen)
  {
    char *ch, *end;
    VALUE req = (VALUE)data;
-  VALUE f = rb_str_new2("HTTP_");
    VALUE v = rb_str_new(value, vlen);
-  
-  rb_str_buf_cat(f, field, flen); 
+  VALUE f = rb_str_dup(global_http_prefix);
+
+  f = rb_str_buf_cat(f, field, flen); 
    
    for(ch = RSTRING(f)->ptr, end = ch + RSTRING(f)->len; ch < end; ch++) {
      if(*ch == '-') {
@@ -36,16 +42,14 @@ void request_method(void *data, const char *at, size_t length)
  {
    VALUE req = (VALUE)data;
    VALUE val = rb_str_new(at, length);
-  VALUE id = rb_str_new2("REQUEST_METHOD");
-  rb_hash_aset(req, id, val);
+  rb_hash_aset(req, global_request_method, val);
  }
  
  void path_info(void *data, const char *at, size_t length)
  {
    VALUE req = (VALUE)data;
    VALUE val = rb_str_new(at, length);
-  VALUE id = rb_str_new2("PATH_INFO");
-  rb_hash_aset(req, id, val);
+  rb_hash_aset(req, global_path_info, val);
  }
  
  
@@ -53,16 +57,14 @@ void query_string(void *data, const char *at, size_t length)
  {
    VALUE req = (VALUE)data;
    VALUE val = rb_str_new(at, length);
-  VALUE id = rb_str_new2("QUERY_STRING");
-  rb_hash_aset(req, id, val);
+  rb_hash_aset(req, global_query_string, val);
  }
  
  void http_version(void *data, const char *at, size_t length)
  {
    VALUE req = (VALUE)data;
    VALUE val = rb_str_new(at, length);
-  VALUE id = rb_str_new2("HTTP_VERSION");
-  rb_hash_aset(req, id, val);
+  rb_hash_aset(req, global_http_version, val);
  }
  
  
@@ -258,23 +260,6 @@ VALUE URIClassifier_alloc(VALUE klass)
   * will do two searches most of the time in order to find the right handler for the
   * registered prefix portion.
   *
- * Here's how it all works.  Let's say you register "/blog" with a BlogHandler.  Great.
- * Now, someone goes to "/blog/zedsucks/ass".  You want SCRIPT_NAME to be "/blog" and
- * PATH_INFO to be "/zedsucks/ass".  URIClassifier first does a TST search and comes
- * up with a failure, but knows that the failure ended at the "/blog" part.  So, that's
- * the SCRIPT_NAME.  It then tries a second search for just "/blog".  If that comes back
- * good then it sets the rest ("/zedsucks/ass") to the PATH_INFO and returns the BlogHandler.
- *
- * The optimal approach would be to not do the search twice, but the TST lib doesn't
- * really support returning prefixes.  Might not be hard to add later.
- *
- * The key though is that it will try to match the *longest* match it can.  If you 
- * also register "/blog/zed" then the above URI will give SCRIPT_NAME="/blog/zed", 
- * PATH_INFO="sucks/ass".  Probably not what you want, so your handler will need to
- * do the 404 thing.
- *
- * Take a look at the postamble of example/tepee.rb to see how this is handled for
- * Camping. 
   */
  VALUE URIClassifier_init(VALUE self)
  {
@@ -283,6 +268,8 @@ VALUE URIClassifier_init(VALUE self)
    // we create an internal hash to protect stuff from the GC
    hash = rb_hash_new();
    rb_ivar_set(self, id_handler_map, hash);
+
+  return self;
  }
  
  
@@ -356,7 +343,19 @@ VALUE URIClassifier_unregister(VALUE self, VALUE uri)
   *
   * Attempts to resolve either the whole URI or at the longest prefix, returning
   * the prefix (as script_info), path (as path_info), and registered handler
- * (usually an HttpHandler).
+ * (usually an HttpHandler).  If it doesn't find a handler registered at the longest
+ * match then it returns nil,nil,nil.
+ *
+ * Because the resolver uses a trie you are able to register a handler at *any* character
+ * in the URI and it will be handled as long as it's the longest prefix.  So, if you 
+ * registered handler #1 at "/something/lik", and #2 at "/something/like/that", then a
+ * a search for "/something/like" would give you #1.  A search for "/something/like/that/too"
+ * would give you #2.
+ * 
+ * This is very powerful since it means you can also attach handlers to parts of the ; 
+ * (semi-colon) separated path params, any part of the path, use off chars, anything really.
+ * It also means that it's very efficient to do this only taking as long as the URI has
+ * characters.
   *
   * It expects strings.  Don't try other string-line stuff yet.
   */
@@ -366,8 +365,6 @@ VALUE URIClassifier_resolve(VALUE self, VALUE uri)
    int pref_len = 0;
    struct tst *tst = NULL;
    VALUE result;
-  VALUE script_name;
-  VALUE path_info;
    unsigned char *uri_str = NULL;
    unsigned char *script_name_str = NULL;
  
@@ -379,32 +376,17 @@ VALUE URIClassifier_resolve(VALUE self, VALUE uri)
    // setup for multiple return values
    result = rb_ary_new();
  
-
-  if(handler == NULL) {
-    script_name = rb_str_substr (uri, 0, pref_len);
-    script_name_str = (unsigned char *)StringValueCStr(script_name);
-
-    handler = tst_search(script_name_str, tst, NULL);
-
-    if(handler == NULL) {
-      // didn't find the script name at all
-      rb_ary_push(result, Qnil);
-      rb_ary_push(result, Qnil);
-      rb_ary_push(result, Qnil);
-      return result;
-    } else {
-      // found a handler, setup the path info and we're good
-      path_info = rb_str_substr(uri, pref_len, RSTRING(uri)->len);
-    }
+  if(handler) {
+    rb_ary_push(result, rb_str_substr (uri, 0, pref_len));
+    rb_ary_push(result, rb_str_substr(uri, pref_len, RSTRING(uri)->len));
+    rb_ary_push(result, (VALUE)handler);
    } else {
-    // whole thing was found, so uri is the script name, path info empty
-    script_name = uri;
-    path_info = rb_str_new2("");
+    // not found so push back nothing
+    rb_ary_push(result, Qnil);
+    rb_ary_push(result, Qnil);
+    rb_ary_push(result, Qnil);
    }
  
-  rb_ary_push(result, script_name);
-  rb_ary_push(result, path_info);
-  rb_ary_push(result, (VALUE)handler);
    return result;
  }
  
@@ -414,6 +396,17 @@ void Init_http11()
  
    mMongrel = rb_define_module("Mongrel");
    id_handler_map = rb_intern("@handler_map");
+
+  global_http_prefix = rb_str_new2("HTTP_");
+  rb_global_variable(&global_http_prefix);
+  global_request_method = rb_str_new2("REQUEST_METHOD");
+  rb_global_variable(&global_request_method);
+  global_path_info = rb_str_new2("PATH_INFO");
+  rb_global_variable(&global_path_info);
+  global_query_string = rb_str_new2("QUERY_STRING");
+  rb_global_variable(&global_query_string);
+  global_http_version = rb_str_new2("HTTP_VERSION");
+  rb_global_variable(&global_http_version);
    
    cHttpParser = rb_define_class_under(mMongrel, "HttpParser", rb_cObject);
    rb_define_alloc_func(cHttpParser, HttpParser_alloc);
diff --git a/ext/http11/tst_search.c b/ext/http11/tst_search.c
index efa1cfa..4cfe6ff 100644
--- a/ext/http11/tst_search.c
+++ b/ext/http11/tst_search.c
@@ -7,18 +7,20 @@
  void *tst_search(unsigned char *key, struct tst *tst, int *prefix_len)
  {
    struct node *current_node;
+  void *longest_match = NULL;
    int key_index;
    
    assert(key != NULL && "key can't be NULL");
    assert(tst != NULL && "tst can't be NULL");
    
-  
    if(key[0] == 0)
      return NULL;
    
    if(tst->head[(int)key[0]] == NULL)
      return NULL;
+
    
+  if(prefix_len) *prefix_len = 0;
    current_node = tst->head[(int)key[0]];
    key_index = 1;
    
@@ -30,7 +32,13 @@ void *tst_search(unsigned char *key, struct tst *tst, int *prefix_len)
              if(prefix_len) *prefix_len = key_index;
              return current_node->middle;
            } else {
+
              current_node = current_node->middle;
+            if(current_node && current_node->value == 0) {
+              if(prefix_len) *prefix_len = key_index+1;
+              longest_match = current_node->middle;
+            }
+
              key_index++;
              continue;
            }
@@ -39,16 +47,23 @@ void *tst_search(unsigned char *key, struct tst *tst, int *prefix_len)
                 ((current_node->value != 0) && (key[key_index] <
                                                 current_node->value)) )
          {
+          if(current_node->left && current_node->value == 0) {
+            if(prefix_len) *prefix_len = key_index;
+            longest_match = current_node->middle;
+          }
            current_node = current_node->left;
            continue;
          }
        else
          {
+          if(current_node->right && current_node->value == 0) {
+            if(prefix_len) *prefix_len = key_index;
+            longest_match = current_node->middle;
+          }
            current_node = current_node->right;
            continue;
          }
      }
    
-  if(prefix_len) *prefix_len = key_index;
-  return NULL;
+  return longest_match;
  }
diff --git a/test/test_uriclassifier.rb b/test/test_uriclassifier.rb
index ae5df1d..05db8d2 100644
--- a/test/test_uriclassifier.rb
+++ b/test/test_uriclassifier.rb
@@ -25,10 +25,12 @@ class URIClassifierTest < Test::Unit::TestCase
      u = URIClassifier.new
      u.register(prefix,1)
  
+    sn,pi,val = u.resolve(prefix)
      sn,pi,val = u.resolve(test)
      assert val != nil, "didn't resolve"
      assert_equal prefix,sn, "wrong script name"
      assert_equal test[sn.length .. -1],pi, "wrong path info"
+    
    end
  
    def test_not_finding
@@ -99,6 +101,55 @@ class URIClassifierTest < Test::Unit::TestCase
      puts "\nRESOLVE(#{count}): #{res}"
      puts "REG_UNREG(#{count}): #{reg_unreg}"
    end
-      
+
+
+  def test_uri_branching
+    u = URIClassifier.new
+    u.register("/test", 1)
+    u.register("/test/this",2)
+  
+    sn,pi,h = u.resolve("/test")
+    sn,pi,h = u.resolve("/test/that")
+    assert_equal "/test", sn, "failed to properly find script off branch portion of uri"
+    assert_equal "/that", pi, "didn't get the right patch info"
+    assert_equal 1, h, "wrong result for branching uri"
+  end
+
+
+  def test_all_prefixing
+    tests = ["/test","/test/that","/test/this"]
+    uri = "/test/this/that"
+    u = URIClassifier.new
+    
+    cur = ""
+    uri.each_byte do |c|
+      cur << c.chr
+      u.register(cur, c)
+    end
+
+    # try to resolve everything with no asserts as a fuzzing
+    tests.each do |prefix|
+      cur = ""
+      prefix.each_byte do |c|
+        cur << c.chr
+        sn, pi, h = u.resolve(cur)
+        assert sn != nil, "didn't get a script name"
+        assert pi != nil, "didn't get path info"
+        assert h != nil, "didn't find the handler"
+      end
+    end
+
+    # assert that we find stuff
+    tests.each do |t|
+      sn, pi, h = u.resolve(t)
+      assert h != nil, "didn't find handler"
+    end
+
+    # assert we don't find stuff
+    sn, pi, h = u.resolve("chicken")
+    assert_nil h, "shoulnd't find anything"
+    assert_nil sn, "shoulnd't find anything"
+    assert_nil pi, "shoulnd't find anything"
+  end
  end