about summary refs log tree commit homepage
path: root/ext/tdb/murmur1.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/tdb/murmur1.c')
-rw-r--r--ext/tdb/murmur1.c151
1 files changed, 151 insertions, 0 deletions
diff --git a/ext/tdb/murmur1.c b/ext/tdb/murmur1.c
new file mode 100644
index 0000000..1880cc6
--- /dev/null
+++ b/ext/tdb/murmur1.c
@@ -0,0 +1,151 @@
+#include "rbtdb.h"
+#include <assert.h>
+
+/*
+ * https://sites.google.com/site/murmurhash/
+ *
+ * Public Domain hash functions by Austin Appleby.
+ *
+ * Trivially adapted for use with Ruby TDB by Eric Wong.
+ */
+
+/*
+ * 'm' and 'r' are mixing constants generated offline.
+ * They're not really 'magic', they just happen to work well.
+ */
+static const unsigned int m = 0xc6a4a793;
+static const int r = 16;
+static const unsigned int seed;
+
+unsigned int rbtdb_murmur1(TDB_DATA * key)
+{
+        const unsigned char *data = key->dptr;
+        int len = (int)key->dsize;
+        /* Initialize the hash to a 'random' value */
+        unsigned int h = seed ^ (len * m);
+
+        while (len >= 4) {
+                h += *(const unsigned int *)data;
+                h *= m;
+                h ^= h >> r;
+
+                data += 4;
+                len -= 4;
+        }
+
+        /* Handle the last few bytes of the input array */
+        switch (len) {
+        case 3:
+                h += data[2] << 16;
+        case 2:
+                h += data[1] << 8;
+        case 1:
+                h += data[0];
+                h *= m;
+                h ^= h >> r;
+        };
+
+        /*
+         * Do a few final mixes of the hash to ensure the last few
+         * bytes are well-incorporated.
+         */
+        h *= m;
+        h ^= h >> 10;
+        h *= m;
+        h ^= h >> 17;
+
+        return h;
+}
+
+/* adapted from MurmurHashAligned */
+unsigned int rbtdb_murmur1_aligned(TDB_DATA * key)
+{
+        const unsigned char *data = key->dptr;
+        int len = (int)key->dsize;
+        unsigned int h = seed ^ (len * m);
+        union { const unsigned char *byte; int integer; } cast = { data };
+        int align = cast.integer & 3;
+
+        if (align & (len >= 4)) {
+                /* Pre-load the temp registers */
+                unsigned int t = 0, d = 0;
+                int sl, sr, pack;
+
+                switch (align) {
+                case 1: t |= data[2] << 16;
+                case 2: t |= data[1] << 8;
+                case 3: t |= data[0];
+                }
+
+                t <<= (8 * align);
+
+                data += 4 - align;
+                len -= 4 - align;
+
+                sl = 8 * (4 - align);
+                sr = 8 * align;
+
+                /* Mix */
+                while (len >= 4) {
+                        assert((cast.integer & 3) == 0);
+
+                        d = *(const unsigned int *)data;
+                        t = (t >> sr) | (d << sl);
+                        h += t;
+                        h *= m;
+                        h ^= h >> r;
+                        t = d;
+
+                        data += 4;
+                        len -= 4;
+                }
+
+                /* Handle leftover data in temp registers */
+                pack = len < align ? len : align;
+                d = 0;
+
+                switch (pack) {
+                case 3:
+                        d |= data[2] << 16;
+                case 2:
+                        d |= data[1] << 8;
+                case 1:
+                        d |= data[0];
+                case 0:
+                        h += (t >> sr) | (d << sl);
+                        h *= m;
+                        h ^= h >> r;
+                }
+
+                data += pack;
+                len -= pack;
+        } else {
+                while (len >= 4) {
+                        h += *(const unsigned int *)data;
+                        h *= m;
+                        h ^= h >> r;
+
+                        data += 4;
+                        len -= 4;
+                }
+        }
+
+        /* Handle tail bytes */
+        switch (len) {
+        case 3:
+                h += data[2] << 16;
+        case 2:
+                h += data[1] << 8;
+        case 1:
+                h += data[0];
+                h *= m;
+                h ^= h >> r;
+        };
+
+        h *= m;
+        h ^= h >> 10;
+        h *= m;
+        h ^= h >> 17;
+
+        return h;
+}