From 1a16539f4258f670c61e8aced1184216a517847b Mon Sep 17 00:00:00 2001
From: zedshaw <zedshaw@19e92222-5c0b-0410-8929-a290d50e31e9>
Date: Sat, 25 Nov 2006 02:54:08 +0000
Subject: Removed experimental mime carving (in branch now).

git-svn-id: svn+ssh://rubyforge.org/var/svn/mongrel/trunk@413 19e92222-5c0b-0410-8929-a290d50e31e9
---
 ext/http11/http11.c    | 315 -------------------------------------------------
 lib/mongrel.rb         |  24 ----
 test/test_bmhsearch.rb | 150 -----------------------
 3 files changed, 489 deletions(-)
 delete mode 100644 test/test_bmhsearch.rb

diff --git a/ext/http11/http11.c b/ext/http11/http11.c
index 0d2c79a..88c0311 100644
--- a/ext/http11/http11.c
+++ b/ext/http11/http11.c
@@ -9,26 +9,11 @@
 #include "http11_parser.h"
 #include <ctype.h>
 #include "tst.h"
-#include <string.h>
-
-typedef struct BMHSearchState {
-  size_t occ[UCHAR_MAX+1];
-  size_t htotal;
-  unsigned char *needle;
-  size_t nlen;
-  size_t skip;
-  size_t max_find;
-
-  size_t *found_at;
-  size_t nfound;
-} BMHSearchState;
 
 static VALUE mMongrel;
 static VALUE cHttpParser;
-static VALUE cBMHSearch;
 static VALUE cURIClassifier;
 static VALUE eHttpParserError;
-static VALUE eBMHSearchError;
 
 #define id_handler_map rb_intern("@handler_map")
 #define id_http_body rb_intern("@http_body")
@@ -192,294 +177,6 @@ void header_done(void *data, const char *at, size_t length)
 }
 
 
-inline size_t max(size_t a, size_t b) { return a>b ? a : b; }
-
-void BMHSearch_free(void *data) {
-  BMHSearchState *S = (BMHSearchState *)data;
-
-  if(data) {
-    if(S->needle) free(S->needle);
-    if(S->found_at) free(S->found_at);
-    free(data);
-  }
-}
-
-
-VALUE BMHSearch_alloc(VALUE klass)
-{
-  VALUE obj;
-
-  BMHSearchState *S = ALLOC_N(BMHSearchState, 1);
-
-  obj = Data_Wrap_Struct(klass, NULL, BMHSearch_free, S);
-
-  return obj;
-}
-
-/**
- * call-seq:
- *    BMHSearch.new(needle, max_find) -> searcher
- *
- * Prepares the needle for searching and allocates enough space to 
- * retain max_find locations.  BMHSearch will throw an exception
- * if you go over the max_find without calling BMHSearch#pop.
- */
-VALUE BMHSearch_init(VALUE self, VALUE needle, VALUE max_find)
-{
-  BMHSearchState *S = NULL;
-  size_t a, b;
-
-  DATA_GET(self, BMHSearchState, S);
-
-  S->htotal = 0;
-  S->nfound = 0;
-  S->skip = 0;
-  S->nlen = RSTRING(needle)->len;
-  S->max_find = FIX2INT(max_find);
-  S->needle = NULL;
-  S->found_at = NULL;
-
-  if(S->nlen <= 0) rb_raise(eBMHSearchError, "Needle can't be 0 length.");
-
-  // copy it so we don't have to worry about Ruby messing it up
-  S->needle = ALLOC_N(unsigned char, S->nlen);
-  memcpy(S->needle, RSTRING(needle)->ptr, S->nlen);
-
-  // setup the number of finds they want
-  S->found_at = ALLOC_N(size_t, S->max_find);
-
-  /* Preprocess */
-  /* Initialize the table to default value */
-  for(a=0; a<UCHAR_MAX+1; a=a+1) S->occ[a] = S->nlen;
-
-  /* Then populate it with the analysis of the needle */
-  b=S->nlen;
-  for(a=0; a < S->nlen; a=a+1)
-  {
-    b=b-1;
-    S->occ[S->needle[a]] = b;
-  }
-
-  return self;
-}
-
-/**
- * call-seq:
- *    searcher.find(haystack) -> nfound
- *
- * This will look for the needle in the haystack and return the total
- * number found so far.  The key ingredient is that you can pass in 
- * as many haystacks as you want as long as they are longer than the
- * needle.  Each time you call find, it adds to the total and number
- * found.  The purpose is to allow you to read a large string in chunks
- * and find the needle.
- *
- * The main purpose of the nfound is so that you can periodically call
- * BMHSearch.pop to clear the list of found locations before you run
- * out of max_finds.
- */
-VALUE BMHSearch_find(VALUE self, VALUE hay)
-{
-  size_t hpos = 0, npos = 0, skip = 0;
-  unsigned char* haystack = NULL;
-  size_t hlen = 0, last = 0;
-  BMHSearchState *S = NULL;
-
-  DATA_GET(self, BMHSearchState, S);
-
-  haystack = RSTRING(hay)->ptr;
-  hlen = RSTRING(hay)->len;
-
-  /* Sanity checks on the parameters */
-  if(S->nlen > hlen) {
-    rb_raise(eBMHSearchError, "Haystack can't be smaller than needle.");
-  } else if(S->nlen <= 0) {
-    rb_raise(eBMHSearchError, "Needle can't be 0 length.");
-  } else if(!haystack || !S->needle) {
-    rb_raise(eBMHSearchError, "Corrupt search state. REALLY BAD!");
-  }
-
-  /* Check for a trailing remainder, which is only possible if skip > 1 */
-  if(S->skip) {
-    // only scan for what should be the rest of the string
-    size_t remainder = S->nlen - S->skip;
-    if(!memcmp(haystack, S->needle + S->skip, remainder)) {
-      // record a find
-      S->found_at[S->nfound++] = S->htotal - S->skip;
-      // move up by the amount found 
-      hpos += remainder; 
-    }
-  }
-
-
-  /* Start searching from the end of S->needle (this is not a typo) */
-  hpos = S->nlen-1;
-
-  while(hpos < hlen) {
-    /* Compare the S->needle backwards, and stop when first mismatch is found */
-    npos = S->nlen-1;
-
-    while(hpos < hlen && haystack[hpos] == S->needle[npos]) {
-      if(npos == 0) {
-        // found one, log it in found_at, but reduce by the skip if we're at the beginning
-        S->found_at[S->nfound++] = hpos + S->htotal;
-        last = hpos;  // keep track of the last one we found in this chunk
-
-        if(S->nfound > S->max_find) {
-          rb_raise(eBMHSearchError, "More than max requested needles found. Use pop.");
-        }
-
-        /* continue at the next possible spot, from end of S->needle (not typo) */
-        hpos += S->nlen + S->nlen - 1; 
-        npos = S->nlen - 1;
-        continue;
-      } else {
-        hpos--;
-        npos--;
-      }
-    }
-
-    // exhausted the string already, done
-    if(hpos > hlen) break;
-
-    /* Find out how much ahead we can skip based on the byte that was found */
-    skip = max(S->nlen - npos, S->occ[haystack[hpos]]);
-    hpos += skip;
-  }
-
-
-  skip = 0;  // skip defaults to 0
-
-  // don't bother if the string ends in the needle completely
-  if(S->nfound == 0 || last != hlen - S->nlen) {
-    // invert the occ array to figure out how much to jump back
-    size_t back = S->nlen - S->occ[haystack[hlen-1]];
-
-    // the needle could have an ending char that's also repeated in the needle
-    // in that case, we have to adjust to search for nlen-1 just in case
-    if(haystack[hlen-1] == S->needle[S->nlen - 1]) {
-      back = S->nlen - 1;
-    }
-
-    int found_it = 0;  // defaults to 0 for test at end of it
-    if(back < S->nlen && back > 0) {
-      // search for the longest possible prefix
-      for(hpos = hlen - back; hpos < hlen; hpos++) {
-        skip = hlen - hpos;
-        if(!memcmp(haystack+hpos, S->needle, skip)) {
-          found_it = 1; // make sure we actually found it
-          break;
-        }
-      }
-    }
-    // skip will be 1 if nothing was found, so we have to force it 0
-    if(!found_it) skip = 0;
-  }
-
-  // keep track of the skip, it's set to 0 by the code above
-  S->skip = skip;
-  // update the total processed so far so indexes will be in sync
-  S->htotal += hlen;
-
-  return INT2FIX(S->nfound);
-}
-
-
-/**
- * call-seq:
- *    searcher.pop -> [1, 2, 3, ..]
- *
- * Pops off the locations in the TOTAL string (over all haystacks processed)
- * clearing the internal buffer for more space and letting you use it.
- */
-VALUE BMHSearch_pop(VALUE self)
-{
-  int i = 0;
-  BMHSearchState *S = NULL;
-  VALUE result;
-
-  DATA_GET(self, BMHSearchState, S);
-
-  result = rb_ary_new();
-  for(i = 0; i < S->nfound; i++) {
-    rb_ary_push(result, INT2FIX(S->found_at[i]));
-  }
-
-  S->nfound = 0;
-  
-  return result;
-}
-
-
-/**
- * call-seq:
- *    searcher.nfound -> Fixed
- *
- * The number found so far.
- */
-VALUE BMHSearch_nfound(VALUE self)
-{
-  BMHSearchState *S = NULL;
-  DATA_GET(self, BMHSearchState, S);
-  return INT2FIX(S->nfound);
-}
-
-/**
- * call-seq:
- *    searcher.max_find -> Fixed
- *
- * The current maximum find setting (cannot be changed).
- */
-VALUE BMHSearch_max_find(VALUE self)
-{
-  BMHSearchState *S = NULL;
-  DATA_GET(self, BMHSearchState, S);
-  return INT2FIX(S->max_find);
-}
-
-/**
- * call-seq:
- *    searcher.total -> Fixed
- *
- * The total bytes processed so far.
- */
-VALUE BMHSearch_total(VALUE self)
-{
-  BMHSearchState *S = NULL;
-  DATA_GET(self, BMHSearchState, S);
-  return INT2FIX(S->htotal);
-}
-
-/**
- * call-seq:
- *    searcher.needle -> String
- *
- * A COPY of the internal needle string being used for searching.
- */
-VALUE BMHSearch_needle(VALUE self)
-{
-  BMHSearchState *S = NULL;
-  DATA_GET(self, BMHSearchState, S);
-  return rb_str_new(S->needle, S->nlen);
-}
-
-/**
- * call-seq:
- *    searcher.has_trailing? -> Boolean
- *
- * Tells you whether the last call to BMHSearch#find has possible
- * trailing matches.  This is mostly for completeness but you
- * might use this to adjust the amount of data you're reading
- * or the buffer size.
- */
-VALUE BMHSearch_has_trailing(VALUE self)
-{
-  BMHSearchState *S = NULL;
-  DATA_GET(self, BMHSearchState, S);
-  return S->skip ? Qtrue : Qfalse;
-}
-
-
 void HttpParser_free(void *data) {
   TRACE();
 
@@ -864,18 +561,6 @@ void Init_http11()
   DEF_GLOBAL(port_80, "80");
 
   eHttpParserError = rb_define_class_under(mMongrel, "HttpParserError", rb_eIOError);
-  eBMHSearchError = rb_define_class_under(mMongrel, "BMHSearchError", rb_eIOError);
-
-  cBMHSearch = rb_define_class_under(mMongrel, "BMHSearch", rb_cObject);
-  rb_define_alloc_func(cBMHSearch, BMHSearch_alloc);
-  rb_define_method(cBMHSearch, "initialize", BMHSearch_init,2);
-  rb_define_method(cBMHSearch, "find", BMHSearch_find,1);
-  rb_define_method(cBMHSearch, "pop", BMHSearch_pop,0);
-  rb_define_method(cBMHSearch, "nfound", BMHSearch_nfound,0);
-  rb_define_method(cBMHSearch, "total", BMHSearch_total,0);
-  rb_define_method(cBMHSearch, "needle", BMHSearch_needle,0);
-  rb_define_method(cBMHSearch, "max_find", BMHSearch_max_find,0);
-  rb_define_method(cBMHSearch, "has_trailing?", BMHSearch_has_trailing,0);
 
   cHttpParser = rb_define_class_under(mMongrel, "HttpParser", rb_cObject);
   rb_define_alloc_func(cHttpParser, HttpParser_alloc);
diff --git a/lib/mongrel.rb b/lib/mongrel.rb
index f9e373b..aad3a31 100644
--- a/lib/mongrel.rb
+++ b/lib/mongrel.rb
@@ -199,20 +199,7 @@ module Mongrel
       content_length = @params[Const::CONTENT_LENGTH].to_i
       remain = content_length - @params.http_body.length
       
-      if @params.has_key? 'CONTENT_TYPE'
-        # see if it's multipart, and carve out the boundary for later
-        @mpart_type, @mpart_boundary = @params['CONTENT_TYPE'].split(/;\s*/)
-        if @mpart_type and @mpart_boundary and @mpart_boundary.include? "="
-          @mpart_boundary = @mpart_boundary.split("=")[1].strip
-          STDERR.puts "boundary: #{@mpart_boundary}"
-          @params['MULTIPART_TYPE'] = @mpart_type
-          @params['MULTIPART_BOUNDARY'] = @mpart_boundary
-          @search = BMHSearch.new(@mpart_boundary, 100)
-        end
-      end
-
       dispatcher.request_begins(@params) if dispatcher
-      @search.find @params.http_body if @search and @params.http_body.length > @mpart_boundary.length
 
       # Some clients (like FF1.0) report 0 for body and then send a body.  This will probably truncate them but at least the request goes through usually.
       if remain <= 0
@@ -235,15 +222,6 @@ module Mongrel
         read_body(remain, content_length, dispatcher)
       end
 
-      if @search and @body
-        STDERR.puts "number of boundaries: #{@search.nfound}"
-        @body.rewind
-        @search.pop.each do |boundary|
-          @body.seek(boundary - 2)
-          STDERR.puts "BOUNDARY at #{boundary}: #{@body.read(@mpart_boundary.length + 2)}"
-        end
-      end
-
       @body.rewind if @body
     end
 
@@ -256,7 +234,6 @@ module Mongrel
       begin
         # write the odd sized chunk first
         @params.http_body = read_socket(remain % Const::CHUNK_SIZE)
-        @search.find(@params.http_body) if @search
 
         remain -= @body.write(@params.http_body)
         dispatcher.request_progress(@params, remain, total) if dispatcher
@@ -265,7 +242,6 @@ module Mongrel
         until remain <= 0 or @socket.closed?
           # ASSUME: we are writing to a disk and these writes always write the requested amount
           @params.http_body = read_socket(Const::CHUNK_SIZE)
-          @search.find(@params.http_body) if @search
           remain -= @body.write(@params.http_body)
           dispatcher.request_progress(@params, remain, total) if dispatcher
         end
diff --git a/test/test_bmhsearch.rb b/test/test_bmhsearch.rb
deleted file mode 100644
index 291d35e..0000000
--- a/test/test_bmhsearch.rb
+++ /dev/null
@@ -1,150 +0,0 @@
-# Copyright (c) 2005 Zed A. Shaw 
-# You can redistribute it and/or modify it under the same terms as Ruby.
-#
-# Additional work donated by contributors.  See http://mongrel.rubyforge.org/attributions.html 
-# for more information.
-
-require 'test/unit'
-require 'mongrel'
-require 'http11'
-require File.dirname(__FILE__) + "/testhelp.rb"
-
-include Mongrel
-
-class BMHSearchTest < Test::Unit::TestCase
-
-  def setup
-    @needle = "needle"
-  end
-
-  def teardown
-  end
-
-  def test_operations
-    to_find = "this has a needle and a need"
-    s = BMHSearch.new(@needle, 10)
-    assert s.needle == @needle, "internal needle does not match needle"
-    assert_equal s.max_find, 10, "max_found isn't right"
-
-    n = s.find(to_find)
-    assert_equal n,1,"wrong number found"
-    assert_equal n,s.nfound, "returned found and nfound don't match"
-
-    f = s.pop
-    assert f, "didn't get the locations"
-    assert_equal f.length,n,"wrong number of returned finds"
-    assert_equal f[0],11,"wrong location returned"
-
-    assert_equal to_find.length, s.total, "total doesn't match searched length"
-    assert s.has_trailing?, "should have trailing data"
-
-    to_find2 = "le through a needle dude"
-    n = s.find(to_find2)
-
-    assert_equal n,2,"second find not working"
-    assert_equal n,s.nfound, "nfound don't match"
-
-    f = s.pop
-    assert f, "pop after second find not working"
-    assert_equal f[0],24,"first location of second find not right"
-    assert_equal f[1],41,"second location of second find not right"
-
-    assert_equal to_find.length+to_find2.length,s.total,"wrong total length"
-    assert !s.has_trailing?, "should not have trailing"
-
-    assert_raise BMHSearchError do
-      11.times { s.find("this has a needle") }
-    end
-  end
-
-  def test_boundaries_over_chunks
-    hay = ["zed is a tool ba",
-      "ggiethis has the baggieb",
-      "aggie of baggie-baggie tricks in another ",
-      "baggie"]
-
-    total = "zed is a tool baggiethis has the baggiebaggie of baggie-baggie tricks in another baggie"
-
-    s = BMHSearch.new("baggie", 20)
-
-    hay.each {|h| s.find(h) }
-    hay_found = s.pop
-   
-    s = BMHSearch.new("baggie", 20)
-    s.find(total)
-    total_found = s.pop
-
-    assert_equal hay_found.length, total_found.length, "wrong number of needles found"
-
-    total_found.length.times do |i|
-      assert_equal total_found[i], hay_found[i], "boundary doesn't match"
-    end
-  end
-
-
-  def test_fuzzing
-    begin
-      has_rfuzz = require 'rfuzz/random'
-    rescue Object
-      has_rfuzz = false
-    end
-
-    if has_rfuzz
-      r = RFuzz::RandomGenerator.new
-      needles = r.base64(20, 64).collect {|n| "\r\n" + n.strip }
-      needles.each do |needle|
-        next if needle.length == 0
-
-        nchunks = r.num(10) + 10
-        bmh = BMHSearch.new(needle, nchunks+1)
-        total = ""  # used to collect the full string for compare
-
-        # each needle is sprinkled into up to 100 chunks
-        nchunks.times do
-          # each chunk is up to 16k in size
-          chunk = r.bytes(r.num(16 * 1024) + needle.length * 2)
-          chunk.gsub! needle, ""
-
-          # make about 60% go across boundaries
-          if r.num(10) < 6
-            # this one gets cut in two
-            cut_at = r.num(needle.length - 1) + 1
-            n1 = needle[0 ... cut_at]
-            n2 = needle[cut_at .. -1]
-            
-            assert_equal n1+n2, needle, "oops, messed up breaking the needle"
-
-            last_nfound = bmh.nfound
-            bmh.find(chunk + n1)
-            assert bmh.has_trailing?, "should have trailing on #{n1}:#{n2} on chunk length: #{chunk.length}"
-            assert_equal last_nfound, bmh.nfound, "shouldn't find it yet"
-
-            bmh.find(n2 + chunk)
-            assert_equal last_nfound+1, bmh.nfound, "should have found the boundary for #{n1}:#{n2} on chunk length: #{chunk.length}"
-            total << chunk + n1 + n2 + chunk
-          else
-            # this one is put in complete
-            bmh.find(chunk + needle)
-            bmh.find(chunk)
-
-            total << chunk + needle + chunk
-          end
-        end
-
-        tbmh = BMHSearch.new(needle, nchunks+1)
-        tbmh.find(total)
-
-        assert_equal total.length, bmh.total, "totals don't match"
-        assert_equal tbmh.nfound, bmh.nfound, "nfound don't match"
-
-        total_found = tbmh.pop
-        hay_found = bmh.pop
-
-        total_found.length.times do |i|
-          assert_equal total_found[i], hay_found[i], "boundary doesn't match"
-        end
-      end
-    end
-  end
-end
-
-- 
cgit v1.2.3-24-ge0c7