From 903e89b320b9ba29c2348a224c74807f73884e7c Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 18 Aug 2011 09:02:21 +0000 Subject: add murmur3a hash It should be slightly faster than murmur2 --- ext/tdb/hash_functions.c | 2 + ext/tdb/murmur3.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++ ext/tdb/rbtdb.h | 1 + ext/tdb/tdb.c | 1 + test/test_tdb.rb | 3 +- 5 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 ext/tdb/murmur3.c diff --git a/ext/tdb/hash_functions.c b/ext/tdb/hash_functions.c index 7cf69f1..3a990c3 100644 --- a/ext/tdb/hash_functions.c +++ b/ext/tdb/hash_functions.c @@ -16,6 +16,7 @@ HASH_FN(murmur2) HASH_FN(murmur2a) HASH_FN(murmur2_neutral) HASH_FN(murmur2_aligned) +HASH_FN(murmur3a) HASH_FN(fnv1a) HASH_FN(djb2) HASH_FN(djb3) @@ -33,6 +34,7 @@ void rbtdb_init_tdb_hash_functions(void) HASH_M(murmur2a); HASH_M(murmur2_neutral); HASH_M(murmur2_aligned); + HASH_M(murmur3a); HASH_M(fnv1a); HASH_M(djb2); HASH_M(djb3); diff --git a/ext/tdb/murmur3.c b/ext/tdb/murmur3.c new file mode 100644 index 0000000..cdc7f73 --- /dev/null +++ b/ext/tdb/murmur3.c @@ -0,0 +1,99 @@ +#include "rbtdb.h" +/* + * https://sites.google.com/site/murmurhash/ + * + * MurmurHash3 was written by Austin Appleby, and is placed in the public + * domain. The author hereby disclaims copyright to this source code. + * + * Eric Wong trivially ported this to C for Ruby tdb (32-bit version only) + */ + +#include +#define FORCE_INLINE __attribute__((always_inline)) + +static inline uint32_t rotl32(uint32_t x, int8_t r) +{ + return (x << r) | (x >> (32 - r)); +} + +#define ROTL32(x,y) rotl32(x,y) + +#define BIG_CONSTANT(x) (x##LLU) + +/* ---------------------------------------------------------------------------- + * Block read - if your platform needs to do endian-swapping or can only + * handle aligned reads, do the conversion here + */ + +static FORCE_INLINE uint32_t getblock(const uint32_t * p, int i) +{ + return p[i]; +} + +/* ---------------------------------------------------------------------------- + * Finalization mix - force all bits of a hash block to avalanche + */ + +static FORCE_INLINE uint32_t fmix(uint32_t h) +{ + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + +unsigned int rbtdb_murmur3a(TDB_DATA * key) +{ + const uint8_t *data = key->dptr; + int len = (int)key->dsize; + const int nblocks = len / 4; + static const uint32_t seed; + uint32_t h1 = seed; + int i; + + static const uint32_t c1 = 0xcc9e2d51; + static const uint32_t c2 = 0x1b873593; + + /* body */ + const uint32_t *blocks = (const uint32_t *)(data + nblocks * 4); + + for (i = -nblocks; i; i++) { + uint32_t k1 = getblock(blocks, i); + + k1 *= c1; + k1 = ROTL32(k1, 15); + k1 *= c2; + + h1 ^= k1; + h1 = ROTL32(h1, 13); + h1 = h1 * 5 + 0xe6546b64; + } + + /* tail */ + { + const uint8_t *tail = (const uint8_t *)(data + nblocks * 4); + uint32_t k1 = 0; + + switch (len & 3) { + case 3: + k1 ^= tail[2] << 16; + case 2: + k1 ^= tail[1] << 8; + case 1: + k1 ^= tail[0]; + k1 *= c1; + k1 = ROTL32(k1, 15); + k1 *= c2; + h1 ^= k1; + }; + } + + /* finalization */ + + h1 ^= len; + + return fmix(h1); +} diff --git a/ext/tdb/rbtdb.h b/ext/tdb/rbtdb.h index 2ddbcd3..46ef538 100644 --- a/ext/tdb/rbtdb.h +++ b/ext/tdb/rbtdb.h @@ -9,6 +9,7 @@ unsigned int rbtdb_murmur2(TDB_DATA *key); unsigned int rbtdb_murmur2a(TDB_DATA *key); unsigned int rbtdb_murmur2_neutral(TDB_DATA *key); unsigned int rbtdb_murmur2_aligned(TDB_DATA *key); +unsigned int rbtdb_murmur3a(TDB_DATA *key); unsigned int rbtdb_fnv1a(TDB_DATA *key); unsigned int rbtdb_djb2(TDB_DATA *key); unsigned int rbtdb_djb3(TDB_DATA *key); diff --git a/ext/tdb/tdb.c b/ext/tdb/tdb.c index 31f71c8..61227f6 100644 --- a/ext/tdb/tdb.c +++ b/ext/tdb/tdb.c @@ -87,6 +87,7 @@ rb_hash_aset(hashes,ID2SYM(rb_intern(#x)),ULONG2NUM((unsigned long)rbtdb_##x)) HF(murmur2a); HF(murmur2_neutral); HF(murmur2_aligned); + HF(murmur3a); HF(fnv1a); HF(djb2); HF(djb3); diff --git a/test/test_tdb.rb b/test/test_tdb.rb index 7864ace..d0ac535 100644 --- a/test/test_tdb.rb +++ b/test/test_tdb.rb @@ -189,7 +189,8 @@ class TestTdb < Test::Unit::TestCase results = {} expect = TDB::HASHES.to_a.map { |k,v| [ k.to_s, v.to_s ] }.sort %w(default jenkins_lookup3 djb2 djb3 fnv1a - murmur1 murmur1_aligned murmur2 murmur2a murmur2_aligned).each do |h| + murmur1 murmur1_aligned murmur2 murmur2a murmur2_aligned + murmur3a).each do |h| assert_nothing_raised do tdb = TDB.new(nil, :hash => h.to_sym) TDB::HASHES.each do |k,v| -- cgit v1.2.3-24-ge0c7