From 58938c980f38a4581b4a0e8a780fffe7ac95bc93 Mon Sep 17 00:00:00 2001
From: Eric Wong <normalperson@yhbt.net>
Date: Fri, 26 Nov 2010 02:27:17 +0000
Subject: initial

---
 ext/tdb/murmur2.c | 290 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 290 insertions(+)
 create mode 100644 ext/tdb/murmur2.c

(limited to 'ext/tdb/murmur2.c')

diff --git a/ext/tdb/murmur2.c b/ext/tdb/murmur2.c
new file mode 100644
index 0000000..6b6f8a6
--- /dev/null
+++ b/ext/tdb/murmur2.c
@@ -0,0 +1,290 @@
+#include "rbtdb.h"
+/*
+ * https://sites.google.com/site/murmurhash/
+ *
+ * Public Domain hash functions by Austin Appleby.
+ *
+ * Trivially adapted for use with Ruby TDB by Eric Wong.
+ */
+
+/*
+ * 'm' and 'r' are mixing constants generated offline.
+ * They're not really 'magic', they just happen to work well.
+ */
+
+static const unsigned int m = 0x5bd1e995;
+static const int r = 24;
+static const unsigned int seed;
+
+unsigned int rbtdb_murmur2(TDB_DATA * key)
+{
+	const unsigned char *data = key->dptr;
+	int len = (int)key->dsize;
+	/* Initialize the hash to a 'random' value */
+	unsigned int h = seed ^ len;
+
+	while (len >= 4) {
+		unsigned int k = *(const unsigned int *)data;
+
+		k *= m;
+		k ^= k >> r;
+		k *= m;
+
+		h *= m;
+		h ^= k;
+
+		data += 4;
+		len -= 4;
+	}
+
+	/* Handle the last few bytes of the input array */
+	switch (len) {
+	case 3:
+		h ^= data[2] << 16;
+	case 2:
+		h ^= data[1] << 8;
+	case 1:
+		h ^= data[0];
+		h *= m;
+	};
+
+	/*
+	 * Do a few final mixes of the hash to ensure the last few
+	 * bytes are well-incorporated.
+	 */
+
+	h ^= h >> 13;
+	h *= m;
+	h ^= h >> 15;
+
+	return h;
+}
+
+/*
+ * This is a variant of MurmurHash2 modified to use the Merkle-Damgard
+ * construction. Bulk speed should be identical to Murmur2, small-key speed
+ * will be 10%-20% slower due to the added overhead at the end of the hash.
+ *
+ * This variant fixes a minor issue where null keys were more likely to
+ * collide with each other than expected, and also makes the algorithm
+ * more amenable to incremental implementations. All other caveats from
+ * MurmurHash2 still apply.
+ */
+
+#define mmix(h,k) do { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } while (0)
+
+unsigned int rbtdb_murmur2a(TDB_DATA * key)
+{
+	const unsigned char *data = key->dptr;
+	int len = (int)key->dsize;
+	unsigned int l = (unsigned int)len;
+	unsigned int h = seed;
+	unsigned int t = 0;
+
+	while (len >= 4) {
+		unsigned int k = *(const unsigned int *)data;
+
+		mmix(h, k);
+
+		data += 4;
+		len -= 4;
+	}
+
+	switch (len) {
+	case 3:
+		t ^= data[2] << 16;
+	case 2:
+		t ^= data[1] << 8;
+	case 1:
+		t ^= data[0];
+	};
+
+	mmix(h, t);
+	mmix(h, l);
+
+	h ^= h >> 13;
+	h *= m;
+	h ^= h >> 15;
+
+	return h;
+}
+
+/*
+ * Same algorithm as MurmurHash2, but only does aligned reads - should be safer
+ * on certain platforms
+ *
+ * Performance will be lower than MurmurHash2
+ */
+
+#define MIX(h,k,m) do { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } while (0)
+
+unsigned int rbtdb_murmur2_aligned(TDB_DATA * key)
+{
+	const unsigned char *data = key->dptr;
+	int len = (int)key->dsize;
+	unsigned int h = seed ^ len;
+	union { const unsigned char *byte; int integer; } cast = { data };
+	int align = cast.integer & 3;
+
+	if (align && (len >= 4)) {
+		/* Pre-load the temp registers */
+		unsigned int t = 0, d = 0;
+		int sl, sr;
+
+		switch (align) {
+		case 1:
+			t |= data[2] << 16;
+		case 2:
+			t |= data[1] << 8;
+		case 3:
+			t |= data[0];
+		}
+
+		t <<= (8 * align);
+
+		data += 4 - align;
+		len -= 4 - align;
+
+		sl = 8 * (4 - align);
+		sr = 8 * align;
+
+		/* Mix */
+		while (len >= 4) {
+			unsigned int k;
+
+			d = *(const unsigned int *)data;
+			t = (t >> sr) | (d << sl);
+
+			k = t;
+
+			MIX(h, k, m);
+
+			t = d;
+
+			data += 4;
+			len -= 4;
+		}
+
+		/* Handle leftover data in temp registers */
+		d = 0;
+		if (len >= align) {
+			unsigned int k;
+
+			switch (align) {
+			case 3:
+				d |= data[2] << 16;
+			case 2:
+				d |= data[1] << 8;
+			case 1:
+				d |= data[0];
+			}
+
+			k = (t >> sr) | (d << sl);
+			MIX(h, k, m);
+
+			data += align;
+			len -= align;
+
+			/* Handle tail bytes */
+			switch (len) {
+			case 3:
+				h ^= data[2] << 16;
+			case 2:
+				h ^= data[1] << 8;
+			case 1:
+				h ^= data[0];
+				h *= m;
+			};
+		} else {
+			switch (len) {
+			case 3:
+				d |= data[2] << 16;
+			case 2:
+				d |= data[1] << 8;
+			case 1:
+				d |= data[0];
+			case 0:
+				h ^= (t >> sr) | (d << sl);
+				h *= m;
+			}
+		}
+
+		h ^= h >> 13;
+		h *= m;
+		h ^= h >> 15;
+
+		return h;
+	} else {
+		while (len >= 4) {
+			unsigned int k = *(const unsigned int *)data;
+
+			MIX(h, k, m);
+
+			data += 4;
+			len -= 4;
+		}
+
+		/* Handle tail bytes */
+		switch (len) {
+		case 3:
+			h ^= data[2] << 16;
+		case 2:
+			h ^= data[1] << 8;
+		case 1:
+			h ^= data[0];
+			h *= m;
+		};
+
+		h ^= h >> 13;
+		h *= m;
+		h ^= h >> 15;
+
+		return h;
+	}
+}
+
+/*
+ * Same as MurmurHash2, but endian- and alignment-neutral.
+ * Half the speed though, alas.
+ */
+unsigned int rbtdb_murmur2_neutral(TDB_DATA * key)
+{
+	const unsigned char *data = key->dptr;
+	int len = (int)key->dsize;
+	unsigned int h = seed ^ len;
+
+	while (len >= 4) {
+		unsigned int k;
+
+		k = data[0];
+		k |= data[1] << 8;
+		k |= data[2] << 16;
+		k |= data[3] << 24;
+
+		k *= m;
+		k ^= k >> r;
+		k *= m;
+
+		h *= m;
+		h ^= k;
+
+		data += 4;
+		len -= 4;
+	}
+
+	switch (len) {
+	case 3:
+		h ^= data[2] << 16;
+	case 2:
+		h ^= data[1] << 8;
+	case 1:
+		h ^= data[0];
+		h *= m;
+	};
+
+	h ^= h >> 13;
+	h *= m;
+	h ^= h >> 15;
+
+	return h;
+}
-- 
cgit v1.2.3-24-ge0c7