More fully tested BMH with extensive fuzzing test.

git-svn-id: svn+ssh://rubyforge.org/var/svn/mongrel/trunk@388 19e92222-5c0b-0410-8929-a290d50e31e9
author: zedshaw <zedshaw@19e92222-5c0b-0410-8929-a290d50e31e9> 2006-11-17 22:48:27 +0000
committer: zedshaw <zedshaw@19e92222-5c0b-0410-8929-a290d50e31e9> 2006-11-17 22:48:27 +0000
commit: 326a43700f255a58c5e41e31f0a3ea57d2e18769 (patch)
tree: c69ecc82f92458b383957a48b0bec605e664cf66 /test
parent: 35d2e74d7af797f820d5a1ccf38e9f281403524f (diff)
download: unicorn-326a43700f255a58c5e41e31f0a3ea57d2e18769.tar.gz
1 files changed, 93 insertions, 0 deletions
diff --git a/test/test_bmhsearch.rb b/test/test_bmhsearch.rb
index 98c83b9..4fd7ee8 100644
--- a/test/test_bmhsearch.rb
+++ b/test/test_bmhsearch.rb
@@ -57,6 +57,99 @@ class BMHSearchTest < Test::Unit::TestCase
      end
    end
  
+  def test_boundaries_over_chunks
+    hay = ["zed is a tool ba",
+      "ggiethis has the baggieb",
+      "aggie of baggie-baggie tricks in another ",
+      "baggie"]
  
+    total = "zed is a tool baggiethis has the baggiebaggie of baggie-baggie tricks in another baggie"
+
+    s = BMHSearch.new("baggie", 20)
+
+    hay.each {|h| s.find(h) }
+    hay_found = s.pop
+   
+    s = BMHSearch.new("baggie", 20)
+    s.find(total)
+    total_found = s.pop
+
+    assert_equal hay_found.length, total_found.length, "wrong number of needles found"
+
+    total_found.length.times do |i|
+      assert_equal total_found[i], hay_found[i], "boundary doesn't match"
+    end
+  end
+
+  def test_no_repeat_begin_chars
+    assert_raises BMHSearchError do
+      b = BMHSearch.new("FFI5LguQEg==", 10)
+    end
+  end
+
+  def test_fuzzing
+    begin
+      has_rfuzz = require 'rfuzz/random'
+    rescue Object
+      has_rfuzz = false
+    end
+
+    if has_rfuzz
+      r = RFuzz::RandomGenerator.new
+      needles = r.base64(2000, 100).collect {|n| "\r\n" + n.strip }
+      needles.each do |needle|
+        next if needle.length == 0
+
+        nchunks = r.num(1000) + 10
+        bmh = BMHSearch.new(needle, nchunks+1)
+        total = ""  # used to collect the full string for compare
+
+        # each needle is sprinkled into up to 100 chunks
+        nchunks.times do
+          # each chunk is up to 16k in size
+          chunk = r.bytes(r.num(16 * 1024) + needle.length * 2)
+          chunk.gsub! needle, ""
+
+          # make about 60% go across boundaries
+          if r.num(10) < 6
+            # this one gets cut in two
+            cut_at = r.num(needle.length - 1) + 1
+            n1 = needle[0 ... cut_at]
+            n2 = needle[cut_at .. -1]
+            
+            assert_equal n1+n2, needle, "oops, messed up breaking the needle"
+
+            last_nfound = bmh.nfound
+            bmh.find(chunk + n1)
+            assert bmh.has_trailing?, "should have trailing on #{n1}:#{n2} on chunk length: #{chunk.length}"
+            assert_equal last_nfound, bmh.nfound, "shouldn't find it yet"
+
+            bmh.find(n2 + chunk)
+            assert_equal last_nfound+1, bmh.nfound, "should have found the boundary for #{n1}:#{n2} on chunk length: #{chunk.length}"
+            total << chunk + n1 + n2 + chunk
+          else
+            # this one is put in complete
+            bmh.find(chunk + needle)
+            bmh.find(chunk)
+
+            total << chunk + needle + chunk
+          end
+        end
+
+        tbmh = BMHSearch.new(needle, nchunks+1)
+        tbmh.find(total)
+
+        assert_equal total.length, bmh.total, "totals don't match"
+        assert_equal tbmh.nfound, bmh.nfound, "nfound don't match"
+
+        total_found = tbmh.pop
+        hay_found = bmh.pop
+
+        total_found.length.times do |i|
+          assert_equal total_found[i], hay_found[i], "boundary doesn't match"
+        end
+      end
+    end
+  end
  end
author	zedshaw <zedshaw@19e92222-5c0b-0410-8929-a290d50e31e9>	2006-11-17 22:48:27 +0000
committer	zedshaw <zedshaw@19e92222-5c0b-0410-8929-a290d50e31e9>	2006-11-17 22:48:27 +0000
commit	326a43700f255a58c5e41e31f0a3ea57d2e18769 (patch)
tree	c69ecc82f92458b383957a48b0bec605e664cf66 /test
parent	35d2e74d7af797f820d5a1ccf38e9f281403524f (diff)
download	unicorn-326a43700f255a58c5e41e31f0a3ea57d2e18769.tar.gz