From 6beda48f95c6ec92a0f83c118273ac223d34af6b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 22 Aug 2011 21:12:36 +0000 Subject: add murmur3f (x86_64 128 bit truncated to 32-bits) This is faster for larger keys on x86_64 --- ext/tdb/hash_functions.c | 2 + ext/tdb/murmur3.c | 134 ++++++++++++++++++++++++++++++++++++++++++++++- ext/tdb/rbtdb.h | 1 + ext/tdb/tdb.c | 1 + test/test_tdb.rb | 2 +- 5 files changed, 138 insertions(+), 2 deletions(-) diff --git a/ext/tdb/hash_functions.c b/ext/tdb/hash_functions.c index 3a990c3..6c93c4b 100644 --- a/ext/tdb/hash_functions.c +++ b/ext/tdb/hash_functions.c @@ -17,6 +17,7 @@ HASH_FN(murmur2a) HASH_FN(murmur2_neutral) HASH_FN(murmur2_aligned) HASH_FN(murmur3a) +HASH_FN(murmur3f) HASH_FN(fnv1a) HASH_FN(djb2) HASH_FN(djb3) @@ -35,6 +36,7 @@ void rbtdb_init_tdb_hash_functions(void) HASH_M(murmur2_neutral); HASH_M(murmur2_aligned); HASH_M(murmur3a); + HASH_M(murmur3f); HASH_M(fnv1a); HASH_M(djb2); HASH_M(djb3); diff --git a/ext/tdb/murmur3.c b/ext/tdb/murmur3.c index cdc7f73..9980b3d 100644 --- a/ext/tdb/murmur3.c +++ b/ext/tdb/murmur3.c @@ -5,7 +5,7 @@ * MurmurHash3 was written by Austin Appleby, and is placed in the public * domain. The author hereby disclaims copyright to this source code. * - * Eric Wong trivially ported this to C for Ruby tdb (32-bit version only) + * Eric Wong trivially ported this to C for Ruby tdb (32-bit versions only) */ #include @@ -97,3 +97,135 @@ unsigned int rbtdb_murmur3a(TDB_DATA * key) return fmix(h1); } + +static inline uint64_t rotl64(uint64_t x, int8_t r) +{ + return (x << r) | (x >> (64 - r)); +} + +#define ROTL64(x,y) rotl64(x,y) + +FORCE_INLINE uint64_t getblock64(const uint64_t * p, int i) +{ + return p[i]; +} + +FORCE_INLINE uint64_t fmix64(uint64_t k) +{ + k ^= k >> 33; + k *= BIG_CONSTANT(0xff51afd7ed558ccd); + k ^= k >> 33; + k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); + k ^= k >> 33; + + return k; +} + +/* this was a 128-bit hash for x86_64, but we only want 32-bits */ +unsigned int rbtdb_murmur3f(TDB_DATA * key) +{ + const uint8_t *data = key->dptr; + int len = (int)key->dsize; + const int nblocks = len / 16; + static const uint32_t seed; + uint64_t h1 = seed; + uint64_t h2 = seed; + uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); + uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); + int i; + + /* body */ + const uint64_t *blocks = (const uint64_t *)(data); + + for (i = 0; i < nblocks; i++) { + uint64_t k1 = getblock64(blocks, i * 2 + 0); + uint64_t k2 = getblock64(blocks, i * 2 + 1); + + k1 *= c1; + k1 = ROTL64(k1, 31); + k1 *= c2; + h1 ^= k1; + + h1 = ROTL64(h1, 27); + h1 += h2; + h1 = h1 * 5 + 0x52dce729; + + k2 *= c2; + k2 = ROTL64(k2, 33); + k2 *= c1; + h2 ^= k2; + + h2 = ROTL64(h2, 31); + h2 += h1; + h2 = h2 * 5 + 0x38495ab5; + } + + /* tail */ + { + const uint8_t *tail = (const uint8_t *)(data + nblocks * 16); + + uint64_t k1 = 0; + uint64_t k2 = 0; + #define CAST64(x) ((uint64_t)(x)) + + switch (len & 15) { + case 15: + k2 ^= CAST64(tail[14]) << 48; + case 14: + k2 ^= CAST64(tail[13]) << 40; + case 13: + k2 ^= CAST64(tail[12]) << 32; + case 12: + k2 ^= CAST64(tail[11]) << 24; + case 11: + k2 ^= CAST64(tail[10]) << 16; + case 10: + k2 ^= CAST64(tail[9]) << 8; + case 9: + k2 ^= CAST64(tail[8]) << 0; + k2 *= c2; + k2 = ROTL64(k2, 33); + k2 *= c1; + h2 ^= k2; + + case 8: + k1 ^= CAST64(tail[7]) << 56; + case 7: + k1 ^= CAST64(tail[6]) << 48; + case 6: + k1 ^= CAST64(tail[5]) << 40; + case 5: + k1 ^= CAST64(tail[4]) << 32; + case 4: + k1 ^= CAST64(tail[3]) << 24; + case 3: + k1 ^= CAST64(tail[2]) << 16; + case 2: + k1 ^= CAST64(tail[1]) << 8; + case 1: + k1 ^= CAST64(tail[0]) << 0; + k1 *= c1; + k1 = ROTL64(k1, 31); + k1 *= c2; + h1 ^= k1; + }; + } + + /* finalization */ + + h1 ^= len; + h2 ^= len; + + h1 += h2; + h2 += h1; + + h1 = fmix64(h1); + h2 = fmix64(h2); + + h1 += h2; + + /* not needed for 32-bit hash */ + /* h2 += h1; */ + + return (unsigned int)h1; /* truncate to 32-bits */ +} diff --git a/ext/tdb/rbtdb.h b/ext/tdb/rbtdb.h index 46ef538..bfceff4 100644 --- a/ext/tdb/rbtdb.h +++ b/ext/tdb/rbtdb.h @@ -10,6 +10,7 @@ unsigned int rbtdb_murmur2a(TDB_DATA *key); unsigned int rbtdb_murmur2_neutral(TDB_DATA *key); unsigned int rbtdb_murmur2_aligned(TDB_DATA *key); unsigned int rbtdb_murmur3a(TDB_DATA *key); +unsigned int rbtdb_murmur3f(TDB_DATA *key); unsigned int rbtdb_fnv1a(TDB_DATA *key); unsigned int rbtdb_djb2(TDB_DATA *key); unsigned int rbtdb_djb3(TDB_DATA *key); diff --git a/ext/tdb/tdb.c b/ext/tdb/tdb.c index 61227f6..a4a5ec5 100644 --- a/ext/tdb/tdb.c +++ b/ext/tdb/tdb.c @@ -88,6 +88,7 @@ rb_hash_aset(hashes,ID2SYM(rb_intern(#x)),ULONG2NUM((unsigned long)rbtdb_##x)) HF(murmur2_neutral); HF(murmur2_aligned); HF(murmur3a); + HF(murmur3f); HF(fnv1a); HF(djb2); HF(djb3); diff --git a/test/test_tdb.rb b/test/test_tdb.rb index d0ac535..7ae581d 100644 --- a/test/test_tdb.rb +++ b/test/test_tdb.rb @@ -190,7 +190,7 @@ class TestTdb < Test::Unit::TestCase expect = TDB::HASHES.to_a.map { |k,v| [ k.to_s, v.to_s ] }.sort %w(default jenkins_lookup3 djb2 djb3 fnv1a murmur1 murmur1_aligned murmur2 murmur2a murmur2_aligned - murmur3a).each do |h| + murmur3a murmur3f).each do |h| assert_nothing_raised do tdb = TDB.new(nil, :hash => h.to_sym) TDB::HASHES.each do |k,v| -- cgit v1.2.3-24-ge0c7