All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
From: Yoan Picchi <yoan.picchi@arm.com>
To: Thomas Monjalon <thomas@monjalon.net>,
	Yipeng Wang <yipeng1.wang@intel.com>,
	Sameh Gobriel <sameh.gobriel@intel.com>,
	Bruce Richardson <bruce.richardson@intel.com>,
	Vladimir Medvedkin <vladimir.medvedkin@intel.com>
Cc: dev@dpdk.org, Yoan Picchi <yoan.picchi@arm.com>
Subject: [PATCH v2 1/4] hash: pack the hitmask for hash in bulk lookup
Date: Fri, 20 Oct 2023 16:51:56 +0000	[thread overview]
Message-ID: <20231020165159.1649282-2-yoan.picchi@arm.com> (raw)
In-Reply-To: <20231020165159.1649282-1-yoan.picchi@arm.com>

Current hitmask includes padding due to Intel's SIMD
implementation detail. This patch allows non Intel SIMD
implementations to benefit from a dense hitmask.

Signed-off-by: Yoan Picchi <yoan.picchi@arm.com>
---
 .mailmap                   |   2 +
 lib/hash/rte_cuckoo_hash.c | 118 ++++++++++++++++++++++++++-----------
 2 files changed, 86 insertions(+), 34 deletions(-)

diff --git a/.mailmap b/.mailmap
index 3f5bab26a8..b9c49aa7f6 100644
--- a/.mailmap
+++ b/.mailmap
@@ -485,6 +485,7 @@ Hari Kumar Vemula <hari.kumarx.vemula@intel.com>
 Harini Ramakrishnan <harini.ramakrishnan@microsoft.com>
 Hariprasad Govindharajan <hariprasad.govindharajan@intel.com>
 Harish Patil <harish.patil@cavium.com> <harish.patil@qlogic.com>
+Harjot Singh <harjot.singh@arm.com>
 Harman Kalra <hkalra@marvell.com>
 Harneet Singh <harneet.singh@intel.com>
 Harold Huang <baymaxhuang@gmail.com>
@@ -1602,6 +1603,7 @@ Yixue Wang <yixue.wang@intel.com>
 Yi Yang <yangyi01@inspur.com> <yi.y.yang@intel.com>
 Yi Zhang <zhang.yi75@zte.com.cn>
 Yoann Desmouceaux <ydesmouc@cisco.com>
+Yoan Picchi <yoan.picchi@arm.com>
 Yogesh Jangra <yogesh.jangra@intel.com>
 Yogev Chaimovich <yogev@cgstowernetworks.com>
 Yongjie Gu <yongjiex.gu@intel.com>
diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c
index 19b23f2a97..2aa96eb862 100644
--- a/lib/hash/rte_cuckoo_hash.c
+++ b/lib/hash/rte_cuckoo_hash.c
@@ -1850,8 +1850,50 @@ rte_hash_free_key_with_position(const struct rte_hash *h,
 
 }
 
+#if defined(__ARM_NEON)
+
+static inline void
+compare_signatures_dense(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
+			const struct rte_hash_bucket *prim_bkt,
+			const struct rte_hash_bucket *sec_bkt,
+			uint16_t sig,
+			enum rte_hash_sig_compare_function sig_cmp_fn)
+{
+	unsigned int i;
+
+	/* For match mask every bits indicates the match */
+	switch (sig_cmp_fn) {
+	case RTE_HASH_COMPARE_NEON: {
+		uint16x8_t vmat, vsig, x;
+		int16x8_t shift = {0, 1, 2, 3, 4, 5, 6, 7};
+
+		vsig = vld1q_dup_u16((uint16_t const *)&sig);
+		/* Compare all signatures in the primary bucket */
+		vmat = vceqq_u16(vsig,
+			vld1q_u16((uint16_t const *)prim_bkt->sig_current));
+		x = vshlq_u16(vandq_u16(vmat, vdupq_n_u16(0x0001)), shift);
+		*prim_hash_matches = (uint32_t)(vaddvq_u16(x));
+		/* Compare all signatures in the secondary bucket */
+		vmat = vceqq_u16(vsig,
+			vld1q_u16((uint16_t const *)sec_bkt->sig_current));
+		x = vshlq_u16(vandq_u16(vmat, vdupq_n_u16(0x0001)), shift);
+		*sec_hash_matches = (uint32_t)(vaddvq_u16(x));
+		}
+		break;
+	default:
+		for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+			*prim_hash_matches |=
+				((sig == prim_bkt->sig_current[i]) << i);
+			*sec_hash_matches |=
+				((sig == sec_bkt->sig_current[i]) << i);
+		}
+	}
+}
+
+#else
+
 static inline void
-compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
+compare_signatures_sparse(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
 			const struct rte_hash_bucket *prim_bkt,
 			const struct rte_hash_bucket *sec_bkt,
 			uint16_t sig,
@@ -1878,25 +1920,7 @@ compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
 		/* Extract the even-index bits only */
 		*sec_hash_matches &= 0x5555;
 		break;
-#elif defined(__ARM_NEON)
-	case RTE_HASH_COMPARE_NEON: {
-		uint16x8_t vmat, vsig, x;
-		int16x8_t shift = {-15, -13, -11, -9, -7, -5, -3, -1};
-
-		vsig = vld1q_dup_u16((uint16_t const *)&sig);
-		/* Compare all signatures in the primary bucket */
-		vmat = vceqq_u16(vsig,
-			vld1q_u16((uint16_t const *)prim_bkt->sig_current));
-		x = vshlq_u16(vandq_u16(vmat, vdupq_n_u16(0x8000)), shift);
-		*prim_hash_matches = (uint32_t)(vaddvq_u16(x));
-		/* Compare all signatures in the secondary bucket */
-		vmat = vceqq_u16(vsig,
-			vld1q_u16((uint16_t const *)sec_bkt->sig_current));
-		x = vshlq_u16(vandq_u16(vmat, vdupq_n_u16(0x8000)), shift);
-		*sec_hash_matches = (uint32_t)(vaddvq_u16(x));
-		}
-		break;
-#endif
+#endif /* defined(__SSE2__) */
 	default:
 		for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
 			*prim_hash_matches |=
@@ -1907,6 +1931,8 @@ compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
 	}
 }
 
+#endif /* defined(__ARM_NEON) */
+
 static inline void
 __bulk_lookup_l(const struct rte_hash *h, const void **keys,
 		const struct rte_hash_bucket **primary_bkt,
@@ -1921,18 +1947,30 @@ __bulk_lookup_l(const struct rte_hash *h, const void **keys,
 	uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
 	struct rte_hash_bucket *cur_bkt, *next_bkt;
 
+#if defined(__ARM_NEON)
+	const int hitmask_padding = 0;
+#else
+	const int hitmask_padding = 1;
+#endif
+
 	__hash_rw_reader_lock(h);
 
 	/* Compare signatures and prefetch key slot of first hit */
 	for (i = 0; i < num_keys; i++) {
-		compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
+#if defined(__ARM_NEON)
+		compare_signatures_dense(&prim_hitmask[i], &sec_hitmask[i],
+			primary_bkt[i], secondary_bkt[i],
+			sig[i], h->sig_cmp_fn);
+#else
+		compare_signatures_sparse(&prim_hitmask[i], &sec_hitmask[i],
 			primary_bkt[i], secondary_bkt[i],
 			sig[i], h->sig_cmp_fn);
+#endif
 
 		if (prim_hitmask[i]) {
 			uint32_t first_hit =
 					__builtin_ctzl(prim_hitmask[i])
-					>> 1;
+					>> hitmask_padding;
 			uint32_t key_idx =
 				primary_bkt[i]->key_idx[first_hit];
 			const struct rte_hash_key *key_slot =
@@ -1946,7 +1984,7 @@ __bulk_lookup_l(const struct rte_hash *h, const void **keys,
 		if (sec_hitmask[i]) {
 			uint32_t first_hit =
 					__builtin_ctzl(sec_hitmask[i])
-					>> 1;
+					>> hitmask_padding;
 			uint32_t key_idx =
 				secondary_bkt[i]->key_idx[first_hit];
 			const struct rte_hash_key *key_slot =
@@ -1963,7 +2001,7 @@ __bulk_lookup_l(const struct rte_hash *h, const void **keys,
 		while (prim_hitmask[i]) {
 			uint32_t hit_index =
 					__builtin_ctzl(prim_hitmask[i])
-					>> 1;
+					>> hitmask_padding;
 			uint32_t key_idx =
 				primary_bkt[i]->key_idx[hit_index];
 			const struct rte_hash_key *key_slot =
@@ -1985,13 +2023,13 @@ __bulk_lookup_l(const struct rte_hash *h, const void **keys,
 				positions[i] = key_idx - 1;
 				goto next_key;
 			}
-			prim_hitmask[i] &= ~(3ULL << (hit_index << 1));
+			prim_hitmask[i] &= ~(1 << (hit_index << hitmask_padding));
 		}
 
 		while (sec_hitmask[i]) {
 			uint32_t hit_index =
 					__builtin_ctzl(sec_hitmask[i])
-					>> 1;
+					>> hitmask_padding;
 			uint32_t key_idx =
 				secondary_bkt[i]->key_idx[hit_index];
 			const struct rte_hash_key *key_slot =
@@ -2014,7 +2052,7 @@ __bulk_lookup_l(const struct rte_hash *h, const void **keys,
 				positions[i] = key_idx - 1;
 				goto next_key;
 			}
-			sec_hitmask[i] &= ~(3ULL << (hit_index << 1));
+			sec_hitmask[i] &= ~(1 << (hit_index << hitmask_padding));
 		}
 next_key:
 		continue;
@@ -2069,6 +2107,12 @@ __bulk_lookup_lf(const struct rte_hash *h, const void **keys,
 	struct rte_hash_bucket *cur_bkt, *next_bkt;
 	uint32_t cnt_b, cnt_a;
 
+#if defined(__ARM_NEON)
+	const int hitmask_padding = 0;
+#else
+	const int hitmask_padding = 1;
+#endif
+
 	for (i = 0; i < num_keys; i++)
 		positions[i] = -ENOENT;
 
@@ -2082,14 +2126,20 @@ __bulk_lookup_lf(const struct rte_hash *h, const void **keys,
 
 		/* Compare signatures and prefetch key slot of first hit */
 		for (i = 0; i < num_keys; i++) {
-			compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
+#if defined(__ARM_NEON)
+			compare_signatures_dense(&prim_hitmask[i], &sec_hitmask[i],
 				primary_bkt[i], secondary_bkt[i],
 				sig[i], h->sig_cmp_fn);
+#else
+			compare_signatures_sparse(&prim_hitmask[i], &sec_hitmask[i],
+				primary_bkt[i], secondary_bkt[i],
+				sig[i], h->sig_cmp_fn);
+#endif
 
 			if (prim_hitmask[i]) {
 				uint32_t first_hit =
 						__builtin_ctzl(prim_hitmask[i])
-						>> 1;
+						>> hitmask_padding;
 				uint32_t key_idx =
 					primary_bkt[i]->key_idx[first_hit];
 				const struct rte_hash_key *key_slot =
@@ -2103,7 +2153,7 @@ __bulk_lookup_lf(const struct rte_hash *h, const void **keys,
 			if (sec_hitmask[i]) {
 				uint32_t first_hit =
 						__builtin_ctzl(sec_hitmask[i])
-						>> 1;
+						>> hitmask_padding;
 				uint32_t key_idx =
 					secondary_bkt[i]->key_idx[first_hit];
 				const struct rte_hash_key *key_slot =
@@ -2119,7 +2169,7 @@ __bulk_lookup_lf(const struct rte_hash *h, const void **keys,
 			while (prim_hitmask[i]) {
 				uint32_t hit_index =
 						__builtin_ctzl(prim_hitmask[i])
-						>> 1;
+						>> hitmask_padding;
 				uint32_t key_idx =
 				__atomic_load_n(
 					&primary_bkt[i]->key_idx[hit_index],
@@ -2145,13 +2195,13 @@ __bulk_lookup_lf(const struct rte_hash *h, const void **keys,
 					positions[i] = key_idx - 1;
 					goto next_key;
 				}
-				prim_hitmask[i] &= ~(3ULL << (hit_index << 1));
+				prim_hitmask[i] &= ~(1 << (hit_index << hitmask_padding));
 			}
 
 			while (sec_hitmask[i]) {
 				uint32_t hit_index =
 						__builtin_ctzl(sec_hitmask[i])
-						>> 1;
+						>> hitmask_padding;
 				uint32_t key_idx =
 				__atomic_load_n(
 					&secondary_bkt[i]->key_idx[hit_index],
@@ -2178,7 +2228,7 @@ __bulk_lookup_lf(const struct rte_hash *h, const void **keys,
 					positions[i] = key_idx - 1;
 					goto next_key;
 				}
-				sec_hitmask[i] &= ~(3ULL << (hit_index << 1));
+				sec_hitmask[i] &= ~(1 << (hit_index << hitmask_padding));
 			}
 next_key:
 			continue;
-- 
2.25.1


  reply	other threads:[~2023-10-23  8:55 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-20 16:51 [PATCH v2 0/4] hash: add SVE support for bulk key lookup Yoan Picchi
2023-10-20 16:51 ` Yoan Picchi [this message]
2023-10-20 16:51 ` [PATCH v2 2/4] hash: optimize compare signature for NEON Yoan Picchi
2023-10-20 16:51 ` [PATCH v2 3/4] test/hash: check bulk lookup of keys after collision Yoan Picchi
2023-10-20 16:51 ` [PATCH v2 4/4] hash: add SVE support for bulk key lookup Yoan Picchi
2024-02-27 17:41 ` [PATCH v5 0/4] " Yoan Picchi
2024-02-27 17:42   ` [PATCH v5 1/4] hash: pack the hitmask for hash in bulk lookup Yoan Picchi
2024-02-27 17:42   ` [PATCH v5 2/4] hash: optimize compare signature for NEON Yoan Picchi
2024-02-27 17:42   ` [PATCH v5 3/4] test/hash: check bulk lookup of keys after collision Yoan Picchi
2024-02-27 17:42   ` [PATCH v5 4/4] hash: add SVE support for bulk key lookup Yoan Picchi
2024-02-28 10:56     ` Konstantin Ananyev
2024-02-28 14:48       ` Yoan Picchi
2024-03-04 13:35         ` Konstantin Ananyev
2024-03-05 15:36           ` Yoan Picchi
2024-03-11 23:21 ` [PATCH v6 0/4] " Yoan Picchi
2024-03-11 23:21   ` [PATCH v6 1/4] hash: pack the hitmask for hash in bulk lookup Yoan Picchi
2024-03-11 23:21   ` [PATCH v6 2/4] hash: optimize compare signature for NEON Yoan Picchi
2024-03-11 23:21   ` [PATCH v6 3/4] test/hash: check bulk lookup of keys after collision Yoan Picchi
2024-03-11 23:21   ` [PATCH v6 4/4] hash: add SVE support for bulk key lookup Yoan Picchi
2024-03-12  3:57     ` fengchengwen
2024-03-12 15:08       ` Yoan Picchi
2024-03-12 15:42 ` [PATCH v7 0/4] " Yoan Picchi
2024-03-12 15:42   ` [PATCH v7 1/4] hash: pack the hitmask for hash in bulk lookup Yoan Picchi
2024-03-19 10:41     ` Konstantin Ananyev
2024-03-19 13:09       ` Yoan Picchi
2024-03-19 13:25         ` Konstantin Ananyev
2024-03-19 16:09     ` Stephen Hemminger
2024-03-12 15:42   ` [PATCH v7 2/4] hash: optimize compare signature for NEON Yoan Picchi
2024-03-20  7:37     ` [EXTERNAL] " Pavan Nikhilesh Bhagavatula
2024-04-11 13:32       ` Yoan Picchi
2024-03-12 15:42   ` [PATCH v7 3/4] test/hash: check bulk lookup of keys after collision Yoan Picchi
2024-03-12 15:42   ` [PATCH v7 4/4] hash: add SVE support for bulk key lookup Yoan Picchi
2024-04-17 16:08 ` [PATCH v8 0/4] " Yoan Picchi
2024-04-17 16:08   ` [PATCH v8 1/4] hash: pack the hitmask for hash in bulk lookup Yoan Picchi
2024-04-17 18:12     ` Stephen Hemminger
2024-04-17 16:08   ` [PATCH v8 2/4] hash: optimize compare signature for NEON Yoan Picchi
2024-04-17 16:08   ` [PATCH v8 3/4] test/hash: check bulk lookup of keys after collision Yoan Picchi
2024-04-17 16:08   ` [PATCH v8 4/4] hash: add SVE support for bulk key lookup Yoan Picchi
2024-04-30 16:27 ` [PATCH v9 0/4] " Yoan Picchi
2024-04-30 16:27   ` [PATCH v9 1/4] hash: pack the hitmask for hash in bulk lookup Yoan Picchi
2024-06-14 13:42     ` David Marchand
2024-04-30 16:27   ` [PATCH v9 2/4] hash: optimize compare signature for NEON Yoan Picchi
2024-04-30 16:27   ` [PATCH v9 3/4] test/hash: check bulk lookup of keys after collision Yoan Picchi
2024-04-30 16:27   ` [PATCH v9 4/4] hash: add SVE support for bulk key lookup Yoan Picchi
2024-06-14 13:42     ` David Marchand
2024-06-14 13:43   ` [PATCH v9 0/4] " David Marchand
2024-06-18 15:55     ` Konstantin Ananyev

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231020165159.1649282-2-yoan.picchi@arm.com \
    --to=yoan.picchi@arm.com \
    --cc=bruce.richardson@intel.com \
    --cc=dev@dpdk.org \
    --cc=sameh.gobriel@intel.com \
    --cc=thomas@monjalon.net \
    --cc=vladimir.medvedkin@intel.com \
    --cc=yipeng1.wang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.