From 326a43700f255a58c5e41e31f0a3ea57d2e18769 Mon Sep 17 00:00:00 2001 From: zedshaw Date: Fri, 17 Nov 2006 22:48:27 +0000 Subject: More fully tested BMH with extensive fuzzing test. git-svn-id: svn+ssh://rubyforge.org/var/svn/mongrel/trunk@388 19e92222-5c0b-0410-8929-a290d50e31e9 --- test/test_bmhsearch.rb | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) (limited to 'test') diff --git a/test/test_bmhsearch.rb b/test/test_bmhsearch.rb index 98c83b9..4fd7ee8 100644 --- a/test/test_bmhsearch.rb +++ b/test/test_bmhsearch.rb @@ -57,6 +57,99 @@ class BMHSearchTest < Test::Unit::TestCase end end + def test_boundaries_over_chunks + hay = ["zed is a tool ba", + "ggiethis has the baggieb", + "aggie of baggie-baggie tricks in another ", + "baggie"] + total = "zed is a tool baggiethis has the baggiebaggie of baggie-baggie tricks in another baggie" + + s = BMHSearch.new("baggie", 20) + + hay.each {|h| s.find(h) } + hay_found = s.pop + + s = BMHSearch.new("baggie", 20) + s.find(total) + total_found = s.pop + + assert_equal hay_found.length, total_found.length, "wrong number of needles found" + + total_found.length.times do |i| + assert_equal total_found[i], hay_found[i], "boundary doesn't match" + end + end + + def test_no_repeat_begin_chars + assert_raises BMHSearchError do + b = BMHSearch.new("FFI5LguQEg==", 10) + end + end + + def test_fuzzing + begin + has_rfuzz = require 'rfuzz/random' + rescue Object + has_rfuzz = false + end + + if has_rfuzz + r = RFuzz::RandomGenerator.new + needles = r.base64(2000, 100).collect {|n| "\r\n" + n.strip } + needles.each do |needle| + next if needle.length == 0 + + nchunks = r.num(1000) + 10 + bmh = BMHSearch.new(needle, nchunks+1) + total = "" # used to collect the full string for compare + + # each needle is sprinkled into up to 100 chunks + nchunks.times do + # each chunk is up to 16k in size + chunk = r.bytes(r.num(16 * 1024) + needle.length * 2) + chunk.gsub! needle, "" + + # make about 60% go across boundaries + if r.num(10) < 6 + # this one gets cut in two + cut_at = r.num(needle.length - 1) + 1 + n1 = needle[0 ... cut_at] + n2 = needle[cut_at .. -1] + + assert_equal n1+n2, needle, "oops, messed up breaking the needle" + + last_nfound = bmh.nfound + bmh.find(chunk + n1) + assert bmh.has_trailing?, "should have trailing on #{n1}:#{n2} on chunk length: #{chunk.length}" + assert_equal last_nfound, bmh.nfound, "shouldn't find it yet" + + bmh.find(n2 + chunk) + assert_equal last_nfound+1, bmh.nfound, "should have found the boundary for #{n1}:#{n2} on chunk length: #{chunk.length}" + total << chunk + n1 + n2 + chunk + else + # this one is put in complete + bmh.find(chunk + needle) + bmh.find(chunk) + + total << chunk + needle + chunk + end + end + + tbmh = BMHSearch.new(needle, nchunks+1) + tbmh.find(total) + + assert_equal total.length, bmh.total, "totals don't match" + assert_equal tbmh.nfound, bmh.nfound, "nfound don't match" + + total_found = tbmh.pop + hay_found = bmh.pop + + total_found.length.times do |i| + assert_equal total_found[i], hay_found[i], "boundary doesn't match" + end + end + end + end end -- cgit v1.2.3-24-ge0c7