From 58938c980f38a4581b4a0e8a780fffe7ac95bc93 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 26 Nov 2010 02:27:17 +0000 Subject: initial --- ext/tdb/murmur2.c | 290 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 290 insertions(+) create mode 100644 ext/tdb/murmur2.c (limited to 'ext/tdb/murmur2.c') diff --git a/ext/tdb/murmur2.c b/ext/tdb/murmur2.c new file mode 100644 index 0000000..6b6f8a6 --- /dev/null +++ b/ext/tdb/murmur2.c @@ -0,0 +1,290 @@ +#include "rbtdb.h" +/* + * https://sites.google.com/site/murmurhash/ + * + * Public Domain hash functions by Austin Appleby. + * + * Trivially adapted for use with Ruby TDB by Eric Wong. + */ + +/* + * 'm' and 'r' are mixing constants generated offline. + * They're not really 'magic', they just happen to work well. + */ + +static const unsigned int m = 0x5bd1e995; +static const int r = 24; +static const unsigned int seed; + +unsigned int rbtdb_murmur2(TDB_DATA * key) +{ + const unsigned char *data = key->dptr; + int len = (int)key->dsize; + /* Initialize the hash to a 'random' value */ + unsigned int h = seed ^ len; + + while (len >= 4) { + unsigned int k = *(const unsigned int *)data; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + data += 4; + len -= 4; + } + + /* Handle the last few bytes of the input array */ + switch (len) { + case 3: + h ^= data[2] << 16; + case 2: + h ^= data[1] << 8; + case 1: + h ^= data[0]; + h *= m; + }; + + /* + * Do a few final mixes of the hash to ensure the last few + * bytes are well-incorporated. + */ + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} + +/* + * This is a variant of MurmurHash2 modified to use the Merkle-Damgard + * construction. Bulk speed should be identical to Murmur2, small-key speed + * will be 10%-20% slower due to the added overhead at the end of the hash. + * + * This variant fixes a minor issue where null keys were more likely to + * collide with each other than expected, and also makes the algorithm + * more amenable to incremental implementations. All other caveats from + * MurmurHash2 still apply. + */ + +#define mmix(h,k) do { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } while (0) + +unsigned int rbtdb_murmur2a(TDB_DATA * key) +{ + const unsigned char *data = key->dptr; + int len = (int)key->dsize; + unsigned int l = (unsigned int)len; + unsigned int h = seed; + unsigned int t = 0; + + while (len >= 4) { + unsigned int k = *(const unsigned int *)data; + + mmix(h, k); + + data += 4; + len -= 4; + } + + switch (len) { + case 3: + t ^= data[2] << 16; + case 2: + t ^= data[1] << 8; + case 1: + t ^= data[0]; + }; + + mmix(h, t); + mmix(h, l); + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} + +/* + * Same algorithm as MurmurHash2, but only does aligned reads - should be safer + * on certain platforms + * + * Performance will be lower than MurmurHash2 + */ + +#define MIX(h,k,m) do { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } while (0) + +unsigned int rbtdb_murmur2_aligned(TDB_DATA * key) +{ + const unsigned char *data = key->dptr; + int len = (int)key->dsize; + unsigned int h = seed ^ len; + union { const unsigned char *byte; int integer; } cast = { data }; + int align = cast.integer & 3; + + if (align && (len >= 4)) { + /* Pre-load the temp registers */ + unsigned int t = 0, d = 0; + int sl, sr; + + switch (align) { + case 1: + t |= data[2] << 16; + case 2: + t |= data[1] << 8; + case 3: + t |= data[0]; + } + + t <<= (8 * align); + + data += 4 - align; + len -= 4 - align; + + sl = 8 * (4 - align); + sr = 8 * align; + + /* Mix */ + while (len >= 4) { + unsigned int k; + + d = *(const unsigned int *)data; + t = (t >> sr) | (d << sl); + + k = t; + + MIX(h, k, m); + + t = d; + + data += 4; + len -= 4; + } + + /* Handle leftover data in temp registers */ + d = 0; + if (len >= align) { + unsigned int k; + + switch (align) { + case 3: + d |= data[2] << 16; + case 2: + d |= data[1] << 8; + case 1: + d |= data[0]; + } + + k = (t >> sr) | (d << sl); + MIX(h, k, m); + + data += align; + len -= align; + + /* Handle tail bytes */ + switch (len) { + case 3: + h ^= data[2] << 16; + case 2: + h ^= data[1] << 8; + case 1: + h ^= data[0]; + h *= m; + }; + } else { + switch (len) { + case 3: + d |= data[2] << 16; + case 2: + d |= data[1] << 8; + case 1: + d |= data[0]; + case 0: + h ^= (t >> sr) | (d << sl); + h *= m; + } + } + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; + } else { + while (len >= 4) { + unsigned int k = *(const unsigned int *)data; + + MIX(h, k, m); + + data += 4; + len -= 4; + } + + /* Handle tail bytes */ + switch (len) { + case 3: + h ^= data[2] << 16; + case 2: + h ^= data[1] << 8; + case 1: + h ^= data[0]; + h *= m; + }; + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; + } +} + +/* + * Same as MurmurHash2, but endian- and alignment-neutral. + * Half the speed though, alas. + */ +unsigned int rbtdb_murmur2_neutral(TDB_DATA * key) +{ + const unsigned char *data = key->dptr; + int len = (int)key->dsize; + unsigned int h = seed ^ len; + + while (len >= 4) { + unsigned int k; + + k = data[0]; + k |= data[1] << 8; + k |= data[2] << 16; + k |= data[3] << 24; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + data += 4; + len -= 4; + } + + switch (len) { + case 3: + h ^= data[2] << 16; + case 2: + h ^= data[1] << 8; + case 1: + h ^= data[0]; + h *= m; + }; + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} -- cgit v1.2.3-24-ge0c7