DM-Devel Archive mirror
 help / color / mirror / Atom feed
From: Matthew Sakai <msakai@redhat.com>
To: dm-devel@lists.linux.dev, linux-block@vger.kernel.org
Cc: Matthew Sakai <msakai@redhat.com>
Subject: [PATCH v4 02/39] dm vdo: add the MurmurHash3 fast hashing algorithm
Date: Thu, 26 Oct 2023 17:40:59 -0400	[thread overview]
Message-ID: <20231026214136.1067410-3-msakai@redhat.com> (raw)
In-Reply-To: <20231026214136.1067410-1-msakai@redhat.com>

MurmurHash3 is a fast, non-cryptographic, 128-bit hash. It was originally
written by Austin Appleby and placed in the public domain. This version has
been modified to produce the same result on both big endian and little
endian processors, making it suitable for use in portable persistent data.

Co-developed-by: J. corwin Coburn <corwin@hurlbutnet.net>
Signed-off-by: J. corwin Coburn <corwin@hurlbutnet.net>
Co-developed-by: Ken Raeburn <raeburn@redhat.com>
Signed-off-by: Ken Raeburn <raeburn@redhat.com>
Co-developed-by: John Wiele <jwiele@redhat.com>
Signed-off-by: John Wiele <jwiele@redhat.com>
Signed-off-by: Matthew Sakai <msakai@redhat.com>
---
 drivers/md/dm-vdo/murmurhash3.c | 175 ++++++++++++++++++++++++++++++++
 drivers/md/dm-vdo/murmurhash3.h |  15 +++
 2 files changed, 190 insertions(+)
 create mode 100644 drivers/md/dm-vdo/murmurhash3.c
 create mode 100644 drivers/md/dm-vdo/murmurhash3.h

diff --git a/drivers/md/dm-vdo/murmurhash3.c b/drivers/md/dm-vdo/murmurhash3.c
new file mode 100644
index 000000000000..c68d0d153904
--- /dev/null
+++ b/drivers/md/dm-vdo/murmurhash3.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: LGPL-2.1+
+/*
+ * MurmurHash3 was written by Austin Appleby, and is placed in the public
+ * domain. The author hereby disclaims copyright to this source code.
+ *
+ * Adapted by John Wiele (jwiele@redhat.com).
+ */
+
+#include "murmurhash3.h"
+
+static inline u64 rotl64(u64 x, s8 r)
+{
+	return (x << r) | (x >> (64 - r));
+}
+
+#define ROTL64(x, y) rotl64(x, y)
+static __always_inline u64 getblock64(const u64 *p, int i)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	return p[i];
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+	return __builtin_bswap64(p[i]);
+#else
+#error "can't figure out byte order"
+#endif
+}
+
+static __always_inline void putblock64(u64 *p, int i, u64 value)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	p[i] = value;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+	p[i] = __builtin_bswap64(value);
+#else
+#error "can't figure out byte order"
+#endif
+}
+
+/* Finalization mix - force all bits of a hash block to avalanche */
+
+static __always_inline u64 fmix64(u64 k)
+{
+	k ^= k >> 33;
+	k *= 0xff51afd7ed558ccdLLU;
+	k ^= k >> 33;
+	k *= 0xc4ceb9fe1a85ec53LLU;
+	k ^= k >> 33;
+
+	return k;
+}
+
+void murmurhash3_128(const void *key, const int len, const u32 seed, void *out)
+{
+	const u8 *data = (const u8 *)key;
+	const int nblocks = len / 16;
+
+	u64 h1 = seed;
+	u64 h2 = seed;
+
+	const u64 c1 = 0x87c37b91114253d5LLU;
+	const u64 c2 = 0x4cf5ad432745937fLLU;
+
+	/* body */
+
+	const u64 *blocks = (const u64 *)(data);
+
+	int i;
+
+	for (i = 0; i < nblocks; i++) {
+		u64 k1 = getblock64(blocks, i * 2 + 0);
+		u64 k2 = getblock64(blocks, i * 2 + 1);
+
+		k1 *= c1;
+		k1 = ROTL64(k1, 31);
+		k1 *= c2;
+		h1 ^= k1;
+
+		h1 = ROTL64(h1, 27);
+		h1 += h2;
+		h1 = h1 * 5 + 0x52dce729;
+
+		k2 *= c2;
+		k2 = ROTL64(k2, 33);
+		k2 *= c1;
+		h2 ^= k2;
+
+		h2 = ROTL64(h2, 31);
+		h2 += h1;
+		h2 = h2 * 5 + 0x38495ab5;
+	}
+
+	/* tail */
+
+	{
+		const u8 *tail = (const u8 *)(data + nblocks * 16);
+
+		u64 k1 = 0;
+		u64 k2 = 0;
+
+		switch (len & 15) {
+		case 15:
+			k2 ^= ((u64)tail[14]) << 48;
+			fallthrough;
+		case 14:
+			k2 ^= ((u64)tail[13]) << 40;
+			fallthrough;
+		case 13:
+			k2 ^= ((u64)tail[12]) << 32;
+			fallthrough;
+		case 12:
+			k2 ^= ((u64)tail[11]) << 24;
+			fallthrough;
+		case 11:
+			k2 ^= ((u64)tail[10]) << 16;
+			fallthrough;
+		case 10:
+			k2 ^= ((u64)tail[9]) << 8;
+			fallthrough;
+		case 9:
+			k2 ^= ((u64)tail[8]) << 0;
+			k2 *= c2;
+			k2 = ROTL64(k2, 33);
+			k2 *= c1;
+			h2 ^= k2;
+			fallthrough;
+
+		case 8:
+			k1 ^= ((u64)tail[7]) << 56;
+			fallthrough;
+		case 7:
+			k1 ^= ((u64)tail[6]) << 48;
+			fallthrough;
+		case 6:
+			k1 ^= ((u64)tail[5]) << 40;
+			fallthrough;
+		case 5:
+			k1 ^= ((u64)tail[4]) << 32;
+			fallthrough;
+		case 4:
+			k1 ^= ((u64)tail[3]) << 24;
+			fallthrough;
+		case 3:
+			k1 ^= ((u64)tail[2]) << 16;
+			fallthrough;
+		case 2:
+			k1 ^= ((u64)tail[1]) << 8;
+			fallthrough;
+		case 1:
+			k1 ^= ((u64)tail[0]) << 0;
+			k1 *= c1;
+			k1 = ROTL64(k1, 31);
+			k1 *= c2;
+			h1 ^= k1;
+			break;
+		default:
+			break;
+		};
+	}
+	/* finalization */
+
+	h1 ^= len;
+	h2 ^= len;
+
+	h1 += h2;
+	h2 += h1;
+
+	h1 = fmix64(h1);
+	h2 = fmix64(h2);
+
+	h1 += h2;
+	h2 += h1;
+
+	putblock64((u64 *)out, 0, h1);
+	putblock64((u64 *)out, 1, h2);
+}
diff --git a/drivers/md/dm-vdo/murmurhash3.h b/drivers/md/dm-vdo/murmurhash3.h
new file mode 100644
index 000000000000..d84711ddb659
--- /dev/null
+++ b/drivers/md/dm-vdo/murmurhash3.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/*
+ * MurmurHash3 was written by Austin Appleby, and is placed in the public
+ * domain. The author hereby disclaims copyright to this source code.
+ */
+
+#ifndef _MURMURHASH3_H_
+#define _MURMURHASH3_H_
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+void murmurhash3_128(const void *key, int len, u32 seed, void *out);
+
+#endif /* _MURMURHASH3_H_ */
-- 
2.40.0


  parent reply	other threads:[~2023-10-26 21:41 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-26 21:40 [PATCH v4 00/39] dm vdo: add the dm-vdo deduplication and compression DM target Matthew Sakai
2023-10-26 21:40 ` [PATCH v4 01/39] dm: add documentation for dm-vdo target Matthew Sakai
2023-10-26 21:40 ` Matthew Sakai [this message]
2024-03-14 23:35   ` [PATCH v4 02/39] dm vdo: add the MurmurHash3 fast hashing algorithm Guenter Roeck
2024-03-18 20:37     ` Kenneth Raeburn
2024-03-18 20:54       ` Guenter Roeck
2024-03-19  1:14         ` Matthew Sakai
2024-03-20 21:44         ` Matthew Sakai
2024-03-20 22:59           ` Guenter Roeck
2023-10-26 21:41 ` [PATCH v4 03/39] dm vdo: add memory allocation utilities Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 04/39] dm vdo: add basic logging and support utilities Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 05/39] dm vdo: add vdo type declarations, constants, and simple data structures Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 06/39] dm vdo: add thread and synchronization utilities Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 07/39] dm vdo: add specialized request queueing functionality Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 08/39] dm vdo: add basic hash map data structures Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 09/39] dm vdo: add deduplication configuration structures Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 10/39] dm vdo: add deduplication index storage interface Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 11/39] dm vdo: implement the delta index Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 12/39] dm vdo: implement the volume index Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 13/39] dm vdo: implement the open chapter and chapter indexes Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 14/39] dm vdo: implement the chapter volume store Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 15/39] dm vdo: implement top-level deduplication index Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 16/39] dm vdo: implement external deduplication index interface Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 17/39] dm vdo: add administrative state and action manager Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 18/39] dm vdo: add vio, the request object for vdo metadata Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 19/39] dm vdo: add data_vio, the request object which services incoming bios Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 20/39] dm vdo: add flush support Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 21/39] dm vdo: add the vdo io_submitter Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 22/39] dm vdo: add hash locks and hash zones Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 23/39] dm vdo: add use of deduplication index in " Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 24/39] dm vdo: add the compressed block bin packer Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 25/39] dm vdo: add slab structure, slab journal and reference counters Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 26/39] dm vdo: add the slab summary Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 27/39] dm vdo: add the block allocators and physical zones Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 28/39] dm vdo: add the slab depot Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 29/39] dm vdo: add the block map Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 30/39] dm vdo: implement the block map page cache Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 31/39] dm vdo: add the recovery journal Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 32/39] dm vdo: add repair of damaged vdo volumes Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 33/39] dm vdo: add the primary vdo structure Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 34/39] dm vdo: add the on-disk formats and marshalling of vdo structures Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 35/39] dm vdo: add statistics reporting Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 36/39] dm vdo: add sysfs support for setting parameters and fetching stats Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 37/39] dm vdo: add debugging support Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 38/39] dm vdo: add the top-level DM target Matthew Sakai
2023-10-26 21:41 ` [PATCH v4 39/39] dm vdo: enable configuration and building of dm-vdo Matthew Sakai
2023-11-01 18:28 ` [PATCH v4 00/39] dm vdo: add the dm-vdo deduplication and compression DM target Mike Snitzer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231026214136.1067410-3-msakai@redhat.com \
    --to=msakai@redhat.com \
    --cc=dm-devel@lists.linux.dev \
    --cc=linux-block@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).