cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed
From: Alexander Aring <aahringo@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [PATCH dlm/next 12/12] fs: dlm: add send ack threshold and append acks to msgs
Date: Mon, 29 May 2023 17:44:40 -0400	[thread overview]
Message-ID: <20230529214440.2542721-12-aahringo@redhat.com> (raw)
In-Reply-To: <20230529214440.2542721-1-aahringo@redhat.com>

This patch changes the time when we sending an ack back to tell the
other side it can free some message because it is arrived on the
receiver node, due random reconnects e.g. TCP resets this is handled as
well on application layer to not let DLM run into a deadlock state.

The current handling has the following problems:

1. We end in situations that we only send an ack back message of 16
   bytes out and no other messages. Whereas DLM has logic to combine
   so much messages as it can in one send() socket call. This behaviour
   can be discovered by "trace-cmd start -e dlm_recv" and observing the
   ret field being 16 bytes.

2. When processing of DLM messages will never end because we receive a
   lot of messages, we will not send an ack back as it happens when
   the processing loop ends.

This patch introduces a likely and unlikely threshold case. The likely
case will send an ack back on a transmit path if the threshold is
triggered of amount of processed upper layer protocol. This will solve
issue 1 because it will be send when another normal DLM message will be
sent. It solves issue 2 because it is not part of the processing loop.

There is however a unlikely case, the unlikely case has a bigger
threshold and will be triggered when we only receive messages and do not
sent any message back. This case avoids that the sending node will keep
a lot of message for a long time as we send sometimes ack backs to tell
the sender to finally release messages.

The atomic cmpxchg() is there to provide a atomically ack send with
reset of the upper layer protocol delivery counter.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
---
 fs/dlm/lowcomms.c | 30 -------------------
 fs/dlm/midcomms.c | 76 +++++++++++++++++++----------------------------
 2 files changed, 31 insertions(+), 75 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 345a316ae54c..68092f953830 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -860,30 +860,8 @@ struct dlm_processed_nodes {
 	struct list_head list;
 };
 
-static void add_processed_node(int nodeid, struct list_head *processed_nodes)
-{
-	struct dlm_processed_nodes *n;
-
-	list_for_each_entry(n, processed_nodes, list) {
-		/* we already remembered this node */
-		if (n->nodeid == nodeid)
-			return;
-	}
-
-	/* if it's fails in worst case we simple don't send an ack back.
-	 * We try it next time.
-	 */
-	n = kmalloc(sizeof(*n), GFP_NOFS);
-	if (!n)
-		return;
-
-	n->nodeid = nodeid;
-	list_add(&n->list, processed_nodes);
-}
-
 static void process_dlm_messages(struct work_struct *work)
 {
-	struct dlm_processed_nodes *n, *n_tmp;
 	struct processqueue_entry *pentry;
 	LIST_HEAD(processed_nodes);
 
@@ -902,7 +880,6 @@ static void process_dlm_messages(struct work_struct *work)
 	for (;;) {
 		dlm_process_incoming_buffer(pentry->nodeid, pentry->buf,
 					    pentry->buflen);
-		add_processed_node(pentry->nodeid, &processed_nodes);
 		free_processqueue_entry(pentry);
 
 		spin_lock(&processqueue_lock);
@@ -917,13 +894,6 @@ static void process_dlm_messages(struct work_struct *work)
 		list_del(&pentry->list);
 		spin_unlock(&processqueue_lock);
 	}
-
-	/* send ack back after we processed couple of messages */
-	list_for_each_entry_safe(n, n_tmp, &processed_nodes, list) {
-		list_del(&n->list);
-		dlm_midcomms_receive_done(n->nodeid);
-		kfree(n);
-	}
 }
 
 /* Data received from remote end */
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index 70286737eb0a..e1a0df67b566 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -148,6 +148,8 @@
 /* 5 seconds wait to sync ending of dlm */
 #define DLM_SHUTDOWN_TIMEOUT	msecs_to_jiffies(5000)
 #define DLM_VERSION_NOT_SET	0
+#define DLM_SEND_ACK_BACK_MSG_THRESHOLD 32
+#define DLM_RECV_ACK_BACK_MSG_THRESHOLD (DLM_SEND_ACK_BACK_MSG_THRESHOLD * 8)
 
 struct midcomms_node {
 	int nodeid;
@@ -165,7 +167,7 @@ struct midcomms_node {
 #define DLM_NODE_FLAG_CLOSE	1
 #define DLM_NODE_FLAG_STOP_TX	2
 #define DLM_NODE_FLAG_STOP_RX	3
-#define DLM_NODE_ULP_DELIVERED	4
+	atomic_t ulp_delivered;
 	unsigned long flags;
 	wait_queue_head_t shutdown_wait;
 
@@ -319,6 +321,7 @@ static void midcomms_node_reset(struct midcomms_node *node)
 
 	atomic_set(&node->seq_next, DLM_SEQ_INIT);
 	atomic_set(&node->seq_send, DLM_SEQ_INIT);
+	atomic_set(&node->ulp_delivered, 0);
 	node->version = DLM_VERSION_NOT_SET;
 	node->flags = 0;
 
@@ -393,6 +396,28 @@ static int dlm_send_ack(int nodeid, uint32_t seq)
 	return 0;
 }
 
+static void dlm_send_ack_threshold(struct midcomms_node *node,
+				   uint32_t threshold)
+{
+	uint32_t oval, nval;
+	bool send_ack;
+
+	/* let only send one user trigger threshold to send ack back */
+	do {
+		oval = atomic_read(&node->ulp_delivered);
+		send_ack = (oval > threshold);
+		/* abort if threshold is not reached */
+		if (!send_ack)
+			break;
+
+		nval = 0;
+		/* try to reset ulp_delivered counter */
+	} while (atomic_cmpxchg(&node->ulp_delivered, oval, nval) != oval);
+
+	if (send_ack)
+		dlm_send_ack(node->nodeid, atomic_read(&node->seq_next));
+}
+
 static int dlm_send_fin(struct midcomms_node *node,
 			void (*ack_rcv)(struct midcomms_node *node))
 {
@@ -560,7 +585,9 @@ static void dlm_midcomms_receive_buffer(union dlm_packet *p,
 			WARN_ON_ONCE(test_bit(DLM_NODE_FLAG_STOP_RX, &node->flags));
 			dlm_receive_buffer_3_2_trace(seq, p);
 			dlm_receive_buffer(p, node->nodeid);
-			set_bit(DLM_NODE_ULP_DELIVERED, &node->flags);
+			atomic_inc(&node->ulp_delivered);
+			/* unlikely case to send ack back when we don't transmit */
+			dlm_send_ack_threshold(node, DLM_RECV_ACK_BACK_MSG_THRESHOLD);
 			break;
 		}
 	} else {
@@ -969,49 +996,6 @@ int dlm_process_incoming_buffer(int nodeid, unsigned char *buf, int len)
 	return ret;
 }
 
-void dlm_midcomms_receive_done(int nodeid)
-{
-	struct midcomms_node *node;
-	int idx;
-
-	idx = srcu_read_lock(&nodes_srcu);
-	node = nodeid2node(nodeid, 0);
-	if (!node) {
-		srcu_read_unlock(&nodes_srcu, idx);
-		return;
-	}
-
-	/* old protocol, we do nothing */
-	switch (node->version) {
-	case DLM_VERSION_3_2:
-		break;
-	default:
-		srcu_read_unlock(&nodes_srcu, idx);
-		return;
-	}
-
-	/* do nothing if we didn't delivered stateful to ulp */
-	if (!test_and_clear_bit(DLM_NODE_ULP_DELIVERED,
-				&node->flags)) {
-		srcu_read_unlock(&nodes_srcu, idx);
-		return;
-	}
-
-	spin_lock(&node->state_lock);
-	/* we only ack if state is ESTABLISHED */
-	switch (node->state) {
-	case DLM_ESTABLISHED:
-		spin_unlock(&node->state_lock);
-		dlm_send_ack(node->nodeid, atomic_read(&node->seq_next));
-		break;
-	default:
-		spin_unlock(&node->state_lock);
-		/* do nothing FIN has it's own ack send */
-		break;
-	}
-	srcu_read_unlock(&nodes_srcu, idx);
-}
-
 void dlm_midcomms_unack_msg_resend(int nodeid)
 {
 	struct midcomms_node *node;
@@ -1142,6 +1126,8 @@ struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len,
 			goto err;
 		}
 
+		/* send ack back if necessary */
+		dlm_send_ack_threshold(node, DLM_SEND_ACK_BACK_MSG_THRESHOLD);
 		break;
 	default:
 		dlm_free_mhandle(mh);
-- 
2.31.1


      parent reply	other threads:[~2023-05-29 21:44 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-29 21:44 [Cluster-devel] [PATCH dlm/next 01/12] fs: dlm: revert check required context while close Alexander Aring
2023-05-29 21:44 ` [Cluster-devel] [PATCH dlm/next 02/12] fs: dlm: clear pending bit when queue was empty Alexander Aring
2023-05-29 21:44 ` [Cluster-devel] [PATCH dlm/next 03/12] fs: dlm: fix missing pending to false Alexander Aring
2023-05-29 21:44 ` [Cluster-devel] [PATCH dlm/next 04/12] fs: dlm: unregister memory at the very last Alexander Aring
2023-05-29 21:44 ` [Cluster-devel] [PATCH dlm/next 05/12] fs: dlm: don't check othercon twice Alexander Aring
2023-05-29 21:44 ` [Cluster-devel] [PATCH dlm/next 06/12] fs: dlm: cleanup STOP_IO bitflag set when stop io Alexander Aring
2023-05-29 21:44 ` [Cluster-devel] [PATCH dlm/next 07/12] fs: dlm: move dlm_purge_lkb_callbacks to user module Alexander Aring
2023-05-29 21:44 ` [Cluster-devel] [PATCH dlm/next 08/12] fs: dlm: warn about messages from left nodes Alexander Aring
2023-05-29 21:44 ` [Cluster-devel] [PATCH dlm/next 09/12] fs: dlm: filter ourself midcomms calls Alexander Aring
2023-05-29 21:44 ` [Cluster-devel] [PATCH dlm/next 10/12] fs: dlm: handle lkb wait count as atomic_t Alexander Aring
2023-05-29 21:44 ` [Cluster-devel] [PATCH dlm/next 11/12] fs: dlm: handle sequence numbers as atomic Alexander Aring
2023-05-29 21:44 ` Alexander Aring [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230529214440.2542721-12-aahringo@redhat.com \
    --to=aahringo@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).