summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <normalperson@yhbt.net>2011-08-22 21:12:36 +0000
committerEric Wong <normalperson@yhbt.net>2011-08-22 21:12:36 +0000
commit6beda48f95c6ec92a0f83c118273ac223d34af6b (patch)
tree487125b20e50588791a87d81f5f63205e1eb2d54
parent7aefa5372df6ee351d83e8a74ac5be5c15fc9ea4 (diff)
This is faster for larger keys on x86_64
-rw-r--r--ext/tdb/hash_functions.c2
-rw-r--r--ext/tdb/murmur3.c134
-rw-r--r--ext/tdb/rbtdb.h1
-rw-r--r--ext/tdb/tdb.c1
-rw-r--r--test/test_tdb.rb2
5 files changed, 138 insertions, 2 deletions
diff --git a/ext/tdb/hash_functions.c b/ext/tdb/hash_functions.c
index 3a990c3..6c93c4b 100644
--- a/ext/tdb/hash_functions.c
+++ b/ext/tdb/hash_functions.c
@@ -17,6 +17,7 @@ HASH_FN(murmur2a)
 HASH_FN(murmur2_neutral)
 HASH_FN(murmur2_aligned)
 HASH_FN(murmur3a)
+HASH_FN(murmur3f)
 HASH_FN(fnv1a)
 HASH_FN(djb2)
 HASH_FN(djb3)
@@ -35,6 +36,7 @@ void rbtdb_init_tdb_hash_functions(void)
         HASH_M(murmur2_neutral);
         HASH_M(murmur2_aligned);
         HASH_M(murmur3a);
+        HASH_M(murmur3f);
         HASH_M(fnv1a);
         HASH_M(djb2);
         HASH_M(djb3);
diff --git a/ext/tdb/murmur3.c b/ext/tdb/murmur3.c
index cdc7f73..9980b3d 100644
--- a/ext/tdb/murmur3.c
+++ b/ext/tdb/murmur3.c
@@ -5,7 +5,7 @@
  * MurmurHash3 was written by Austin Appleby, and is placed in the public
  * domain. The author hereby disclaims copyright to this source code.
  *
- * Eric Wong trivially ported this to C for Ruby tdb (32-bit version only)
+ * Eric Wong trivially ported this to C for Ruby tdb (32-bit versions only)
  */
 
 #include <stdint.h>
@@ -97,3 +97,135 @@ unsigned int rbtdb_murmur3a(TDB_DATA * key)
 
         return fmix(h1);
 }
+
+static inline uint64_t rotl64(uint64_t x, int8_t r)
+{
+        return (x << r) | (x >> (64 - r));
+}
+
+#define ROTL64(x,y)        rotl64(x,y)
+
+FORCE_INLINE uint64_t getblock64(const uint64_t * p, int i)
+{
+        return p[i];
+}
+
+FORCE_INLINE uint64_t fmix64(uint64_t k)
+{
+        k ^= k >> 33;
+        k *= BIG_CONSTANT(0xff51afd7ed558ccd);
+        k ^= k >> 33;
+        k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
+        k ^= k >> 33;
+
+        return k;
+}
+
+/* this was a 128-bit hash for x86_64, but we only want 32-bits */
+unsigned int rbtdb_murmur3f(TDB_DATA * key)
+{
+        const uint8_t *data = key->dptr;
+        int len = (int)key->dsize;
+        const int nblocks = len / 16;
+        static const uint32_t seed;
+        uint64_t h1 = seed;
+        uint64_t h2 = seed;
+        uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
+        uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
+        int i;
+
+        /* body */
+        const uint64_t *blocks = (const uint64_t *)(data);
+
+        for (i = 0; i < nblocks; i++) {
+                uint64_t k1 = getblock64(blocks, i * 2 + 0);
+                uint64_t k2 = getblock64(blocks, i * 2 + 1);
+
+                k1 *= c1;
+                k1 = ROTL64(k1, 31);
+                k1 *= c2;
+                h1 ^= k1;
+
+                h1 = ROTL64(h1, 27);
+                h1 += h2;
+                h1 = h1 * 5 + 0x52dce729;
+
+                k2 *= c2;
+                k2 = ROTL64(k2, 33);
+                k2 *= c1;
+                h2 ^= k2;
+
+                h2 = ROTL64(h2, 31);
+                h2 += h1;
+                h2 = h2 * 5 + 0x38495ab5;
+        }
+
+        /* tail */
+        {
+                const uint8_t *tail = (const uint8_t *)(data + nblocks * 16);
+
+                uint64_t k1 = 0;
+                uint64_t k2 = 0;
+                #define CAST64(x) ((uint64_t)(x))
+
+                switch (len & 15) {
+                case 15:
+                        k2 ^= CAST64(tail[14]) << 48;
+                case 14:
+                        k2 ^= CAST64(tail[13]) << 40;
+                case 13:
+                        k2 ^= CAST64(tail[12]) << 32;
+                case 12:
+                        k2 ^= CAST64(tail[11]) << 24;
+                case 11:
+                        k2 ^= CAST64(tail[10]) << 16;
+                case 10:
+                        k2 ^= CAST64(tail[9]) << 8;
+                case 9:
+                        k2 ^= CAST64(tail[8]) << 0;
+                        k2 *= c2;
+                        k2 = ROTL64(k2, 33);
+                        k2 *= c1;
+                        h2 ^= k2;
+
+                case 8:
+                        k1 ^= CAST64(tail[7]) << 56;
+                case 7:
+                        k1 ^= CAST64(tail[6]) << 48;
+                case 6:
+                        k1 ^= CAST64(tail[5]) << 40;
+                case 5:
+                        k1 ^= CAST64(tail[4]) << 32;
+                case 4:
+                        k1 ^= CAST64(tail[3]) << 24;
+                case 3:
+                        k1 ^= CAST64(tail[2]) << 16;
+                case 2:
+                        k1 ^= CAST64(tail[1]) << 8;
+                case 1:
+                        k1 ^= CAST64(tail[0]) << 0;
+                        k1 *= c1;
+                        k1 = ROTL64(k1, 31);
+                        k1 *= c2;
+                        h1 ^= k1;
+                };
+        }
+
+        /* finalization */
+
+        h1 ^= len;
+        h2 ^= len;
+
+        h1 += h2;
+        h2 += h1;
+
+        h1 = fmix64(h1);
+        h2 = fmix64(h2);
+
+        h1 += h2;
+
+        /* not needed for 32-bit hash */
+        /* h2 += h1; */
+
+        return (unsigned int)h1; /* truncate to 32-bits */
+}
diff --git a/ext/tdb/rbtdb.h b/ext/tdb/rbtdb.h
index 46ef538..bfceff4 100644
--- a/ext/tdb/rbtdb.h
+++ b/ext/tdb/rbtdb.h
@@ -10,6 +10,7 @@ unsigned int rbtdb_murmur2a(TDB_DATA *key);
 unsigned int rbtdb_murmur2_neutral(TDB_DATA *key);
 unsigned int rbtdb_murmur2_aligned(TDB_DATA *key);
 unsigned int rbtdb_murmur3a(TDB_DATA *key);
+unsigned int rbtdb_murmur3f(TDB_DATA *key);
 unsigned int rbtdb_fnv1a(TDB_DATA *key);
 unsigned int rbtdb_djb2(TDB_DATA *key);
 unsigned int rbtdb_djb3(TDB_DATA *key);
diff --git a/ext/tdb/tdb.c b/ext/tdb/tdb.c
index 61227f6..a4a5ec5 100644
--- a/ext/tdb/tdb.c
+++ b/ext/tdb/tdb.c
@@ -88,6 +88,7 @@ rb_hash_aset(hashes,ID2SYM(rb_intern(#x)),ULONG2NUM((unsigned long)rbtdb_##x))
         HF(murmur2_neutral);
         HF(murmur2_aligned);
         HF(murmur3a);
+        HF(murmur3f);
         HF(fnv1a);
         HF(djb2);
         HF(djb3);
diff --git a/test/test_tdb.rb b/test/test_tdb.rb
index d0ac535..7ae581d 100644
--- a/test/test_tdb.rb
+++ b/test/test_tdb.rb
@@ -190,7 +190,7 @@ class TestTdb < Test::Unit::TestCase
     expect = TDB::HASHES.to_a.map { |k,v| [ k.to_s, v.to_s ] }.sort
     %w(default jenkins_lookup3 djb2 djb3 fnv1a
        murmur1 murmur1_aligned murmur2 murmur2a murmur2_aligned
-       murmur3a).each do |h|
+       murmur3a murmur3f).each do |h|
       assert_nothing_raised do
         tdb = TDB.new(nil, :hash => h.to_sym)
         TDB::HASHES.each do |k,v|