#include "rbtdb.h" /* * https://sites.google.com/site/murmurhash/ * * Public Domain hash functions by Austin Appleby. * * Trivially adapted for use with Ruby TDB by Eric Wong. */ /* * 'm' and 'r' are mixing constants generated offline. * They're not really 'magic', they just happen to work well. */ static const unsigned int m = 0x5bd1e995; static const int r = 24; static const unsigned int seed; unsigned int rbtdb_murmur2(TDB_DATA * key) { const unsigned char *data = key->dptr; int len = (int)key->dsize; /* Initialize the hash to a 'random' value */ unsigned int h = seed ^ len; while (len >= 4) { unsigned int k = *(const unsigned int *)data; k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; data += 4; len -= 4; } /* Handle the last few bytes of the input array */ switch (len) { case 3: h ^= data[2] << 16; case 2: h ^= data[1] << 8; case 1: h ^= data[0]; h *= m; }; /* * Do a few final mixes of the hash to ensure the last few * bytes are well-incorporated. */ h ^= h >> 13; h *= m; h ^= h >> 15; return h; } /* * This is a variant of MurmurHash2 modified to use the Merkle-Damgard * construction. Bulk speed should be identical to Murmur2, small-key speed * will be 10%-20% slower due to the added overhead at the end of the hash. * * This variant fixes a minor issue where null keys were more likely to * collide with each other than expected, and also makes the algorithm * more amenable to incremental implementations. All other caveats from * MurmurHash2 still apply. */ #define mmix(h,k) do { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } while (0) unsigned int rbtdb_murmur2a(TDB_DATA * key) { const unsigned char *data = key->dptr; int len = (int)key->dsize; unsigned int l = (unsigned int)len; unsigned int h = seed; unsigned int t = 0; while (len >= 4) { unsigned int k = *(const unsigned int *)data; mmix(h, k); data += 4; len -= 4; } switch (len) { case 3: t ^= data[2] << 16; case 2: t ^= data[1] << 8; case 1: t ^= data[0]; }; mmix(h, t); mmix(h, l); h ^= h >> 13; h *= m; h ^= h >> 15; return h; } /* * Same algorithm as MurmurHash2, but only does aligned reads - should be safer * on certain platforms * * Performance will be lower than MurmurHash2 */ #define MIX(h,k,m) do { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } while (0) unsigned int rbtdb_murmur2_aligned(TDB_DATA * key) { const unsigned char *data = key->dptr; int len = (int)key->dsize; unsigned int h = seed ^ len; union { const unsigned char *byte; int integer; } cast = { data }; int align = cast.integer & 3; if (align && (len >= 4)) { /* Pre-load the temp registers */ unsigned int t = 0, d = 0; int sl, sr; switch (align) { case 1: t |= data[2] << 16; case 2: t |= data[1] << 8; case 3: t |= data[0]; } t <<= (8 * align); data += 4 - align; len -= 4 - align; sl = 8 * (4 - align); sr = 8 * align; /* Mix */ while (len >= 4) { unsigned int k; d = *(const unsigned int *)data; t = (t >> sr) | (d << sl); k = t; MIX(h, k, m); t = d; data += 4; len -= 4; } /* Handle leftover data in temp registers */ d = 0; if (len >= align) { unsigned int k; switch (align) { case 3: d |= data[2] << 16; case 2: d |= data[1] << 8; case 1: d |= data[0]; } k = (t >> sr) | (d << sl); MIX(h, k, m); data += align; len -= align; /* Handle tail bytes */ switch (len) { case 3: h ^= data[2] << 16; case 2: h ^= data[1] << 8; case 1: h ^= data[0]; h *= m; }; } else { switch (len) { case 3: d |= data[2] << 16; case 2: d |= data[1] << 8; case 1: d |= data[0]; case 0: h ^= (t >> sr) | (d << sl); h *= m; } } h ^= h >> 13; h *= m; h ^= h >> 15; return h; } else { while (len >= 4) { unsigned int k = *(const unsigned int *)data; MIX(h, k, m); data += 4; len -= 4; } /* Handle tail bytes */ switch (len) { case 3: h ^= data[2] << 16; case 2: h ^= data[1] << 8; case 1: h ^= data[0]; h *= m; }; h ^= h >> 13; h *= m; h ^= h >> 15; return h; } } /* * Same as MurmurHash2, but endian- and alignment-neutral. * Half the speed though, alas. */ unsigned int rbtdb_murmur2_neutral(TDB_DATA * key) { const unsigned char *data = key->dptr; int len = (int)key->dsize; unsigned int h = seed ^ len; while (len >= 4) { unsigned int k; k = data[0]; k |= data[1] << 8; k |= data[2] << 16; k |= data[3] << 24; k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; data += 4; len -= 4; } switch (len) { case 3: h ^= data[2] << 16; case 2: h ^= data[1] << 8; case 1: h ^= data[0]; h *= m; }; h ^= h >> 13; h *= m; h ^= h >> 15; return h; }