summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <normalperson@yhbt.net>2011-08-18 09:02:21 +0000
committerEric Wong <normalperson@yhbt.net>2011-08-18 18:17:53 +0000
commit903e89b320b9ba29c2348a224c74807f73884e7c (patch)
treebd53ffbd253a9fc61e6cb8947297be1d7fc98be1
parentd4b13c817667479af84f4af58afe05138d53ba3a (diff)
It should be slightly faster than murmur2
-rw-r--r--ext/tdb/hash_functions.c2
-rw-r--r--ext/tdb/murmur3.c99
-rw-r--r--ext/tdb/rbtdb.h1
-rw-r--r--ext/tdb/tdb.c1
-rw-r--r--test/test_tdb.rb3
5 files changed, 105 insertions, 1 deletions
diff --git a/ext/tdb/hash_functions.c b/ext/tdb/hash_functions.c
index 7cf69f1..3a990c3 100644
--- a/ext/tdb/hash_functions.c
+++ b/ext/tdb/hash_functions.c
@@ -16,6 +16,7 @@ HASH_FN(murmur2)
 HASH_FN(murmur2a)
 HASH_FN(murmur2_neutral)
 HASH_FN(murmur2_aligned)
+HASH_FN(murmur3a)
 HASH_FN(fnv1a)
 HASH_FN(djb2)
 HASH_FN(djb3)
@@ -33,6 +34,7 @@ void rbtdb_init_tdb_hash_functions(void)
         HASH_M(murmur2a);
         HASH_M(murmur2_neutral);
         HASH_M(murmur2_aligned);
+        HASH_M(murmur3a);
         HASH_M(fnv1a);
         HASH_M(djb2);
         HASH_M(djb3);
diff --git a/ext/tdb/murmur3.c b/ext/tdb/murmur3.c
new file mode 100644
index 0000000..cdc7f73
--- /dev/null
+++ b/ext/tdb/murmur3.c
@@ -0,0 +1,99 @@
+#include "rbtdb.h"
+/*
+ * https://sites.google.com/site/murmurhash/
+ *
+ * MurmurHash3 was written by Austin Appleby, and is placed in the public
+ * domain. The author hereby disclaims copyright to this source code.
+ *
+ * Eric Wong trivially ported this to C for Ruby tdb (32-bit version only)
+ */
+
+#include <stdint.h>
+#define        FORCE_INLINE __attribute__((always_inline))
+
+static inline uint32_t rotl32(uint32_t x, int8_t r)
+{
+        return (x << r) | (x >> (32 - r));
+}
+
+#define        ROTL32(x,y)        rotl32(x,y)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+/* ----------------------------------------------------------------------------
+ * Block read - if your platform needs to do endian-swapping or can only
+ * handle aligned reads, do the conversion here
+ */
+
+static FORCE_INLINE uint32_t getblock(const uint32_t * p, int i)
+{
+        return p[i];
+}
+
+/* ----------------------------------------------------------------------------
+ * Finalization mix - force all bits of a hash block to avalanche
+ */
+
+static FORCE_INLINE uint32_t fmix(uint32_t h)
+{
+        h ^= h >> 16;
+        h *= 0x85ebca6b;
+        h ^= h >> 13;
+        h *= 0xc2b2ae35;
+        h ^= h >> 16;
+
+        return h;
+}
+
+unsigned int rbtdb_murmur3a(TDB_DATA * key)
+{
+        const uint8_t *data = key->dptr;
+        int len = (int)key->dsize;
+        const int nblocks = len / 4;
+        static const uint32_t seed;
+        uint32_t h1 = seed;
+        int i;
+
+        static const uint32_t c1 = 0xcc9e2d51;
+        static const uint32_t c2 = 0x1b873593;
+
+        /* body */
+        const uint32_t *blocks = (const uint32_t *)(data + nblocks * 4);
+
+        for (i = -nblocks; i; i++) {
+                uint32_t k1 = getblock(blocks, i);
+
+                k1 *= c1;
+                k1 = ROTL32(k1, 15);
+                k1 *= c2;
+
+                h1 ^= k1;
+                h1 = ROTL32(h1, 13);
+                h1 = h1 * 5 + 0xe6546b64;
+        }
+
+        /* tail */
+        {
+                const uint8_t *tail = (const uint8_t *)(data + nblocks * 4);
+                uint32_t k1 = 0;
+
+                switch (len & 3) {
+                case 3:
+                        k1 ^= tail[2] << 16;
+                case 2:
+                        k1 ^= tail[1] << 8;
+                case 1:
+                        k1 ^= tail[0];
+                        k1 *= c1;
+                        k1 = ROTL32(k1, 15);
+                        k1 *= c2;
+                        h1 ^= k1;
+                };
+        }
+
+        /* finalization */
+
+        h1 ^= len;
+
+        return fmix(h1);
+}
diff --git a/ext/tdb/rbtdb.h b/ext/tdb/rbtdb.h
index 2ddbcd3..46ef538 100644
--- a/ext/tdb/rbtdb.h
+++ b/ext/tdb/rbtdb.h
@@ -9,6 +9,7 @@ unsigned int rbtdb_murmur2(TDB_DATA *key);
 unsigned int rbtdb_murmur2a(TDB_DATA *key);
 unsigned int rbtdb_murmur2_neutral(TDB_DATA *key);
 unsigned int rbtdb_murmur2_aligned(TDB_DATA *key);
+unsigned int rbtdb_murmur3a(TDB_DATA *key);
 unsigned int rbtdb_fnv1a(TDB_DATA *key);
 unsigned int rbtdb_djb2(TDB_DATA *key);
 unsigned int rbtdb_djb3(TDB_DATA *key);
diff --git a/ext/tdb/tdb.c b/ext/tdb/tdb.c
index 31f71c8..61227f6 100644
--- a/ext/tdb/tdb.c
+++ b/ext/tdb/tdb.c
@@ -87,6 +87,7 @@ rb_hash_aset(hashes,ID2SYM(rb_intern(#x)),ULONG2NUM((unsigned long)rbtdb_##x))
         HF(murmur2a);
         HF(murmur2_neutral);
         HF(murmur2_aligned);
+        HF(murmur3a);
         HF(fnv1a);
         HF(djb2);
         HF(djb3);
diff --git a/test/test_tdb.rb b/test/test_tdb.rb
index 7864ace..d0ac535 100644
--- a/test/test_tdb.rb
+++ b/test/test_tdb.rb
@@ -189,7 +189,8 @@ class TestTdb < Test::Unit::TestCase
     results = {}
     expect = TDB::HASHES.to_a.map { |k,v| [ k.to_s, v.to_s ] }.sort
     %w(default jenkins_lookup3 djb2 djb3 fnv1a
-       murmur1 murmur1_aligned murmur2 murmur2a murmur2_aligned).each do |h|
+       murmur1 murmur1_aligned murmur2 murmur2a murmur2_aligned
+       murmur3a).each do |h|
       assert_nothing_raised do
         tdb = TDB.new(nil, :hash => h.to_sym)
         TDB::HASHES.each do |k,v|