LKML Archive mirror
 help / color / mirror / Atom feed
From: Haiyang Zhang <haiyangz@microsoft.com>
To: linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
Cc: haiyangz@microsoft.com, kys@microsoft.com, olaf@aepfle.de,
	vkuznets@redhat.com, davem@davemloft.net, weiwan@google.com,
	tim.gardner@canonical.com, corbet@lwn.net, edumazet@google.com,
	kuba@kernel.org, pabeni@redhat.com, dsahern@kernel.org,
	atenart@kernel.org, bagasdotme@gmail.com, ykaliuta@redhat.com,
	kuniyu@amazon.com, stephen@networkplumber.org,
	simon.horman@corigine.com, maheshb@google.com,
	liushixin2@huawei.com, linux-doc@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: [PATCH net-next] tcp: Make pingpong threshold tunable
Date: Fri,  9 Jun 2023 09:25:59 -0700	[thread overview]
Message-ID: <1686327959-13478-1-git-send-email-haiyangz@microsoft.com> (raw)

TCP pingpong threshold is 1 by default. But some applications, like SQL DB
may prefer a higher pingpong threshold to activate delayed acks in quick
ack mode for better performance.

The pingpong threshold and related code were changed to 3 in the year
2019, and reverted to 1 in the year 2022. There is no single value that
fits all applications.

Add net.core.tcp_pingpong_thresh sysctl tunable, so it can be tuned for
optimal performance based on the application needs.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
---
 Documentation/admin-guide/sysctl/net.rst |  8 ++++++++
 include/net/inet_connection_sock.h       | 14 +++++++++++---
 net/core/sysctl_net_core.c               |  9 +++++++++
 net/ipv4/tcp.c                           |  2 ++
 net/ipv4/tcp_output.c                    | 17 +++++++++++++++--
 5 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index 4877563241f3..16f54be9461f 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -413,6 +413,14 @@ historical importance.
 
 Default: 0
 
+tcp_pingpong_thresh
+-------------------
+
+TCP pingpong threshold is 1 by default, but some application may need a higher
+threshold for optimal performance.
+
+Default: 1, min: 1, max: 3
+
 2. /proc/sys/net/unix - Parameters for Unix domain sockets
 ----------------------------------------------------------
 
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index c2b15f7e5516..e84e33ddae49 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -324,11 +324,11 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
 
 struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
 
-#define TCP_PINGPONG_THRESH	1
+extern int tcp_pingpong_thresh;
 
 static inline void inet_csk_enter_pingpong_mode(struct sock *sk)
 {
-	inet_csk(sk)->icsk_ack.pingpong = TCP_PINGPONG_THRESH;
+	inet_csk(sk)->icsk_ack.pingpong = tcp_pingpong_thresh;
 }
 
 static inline void inet_csk_exit_pingpong_mode(struct sock *sk)
@@ -338,7 +338,15 @@ static inline void inet_csk_exit_pingpong_mode(struct sock *sk)
 
 static inline bool inet_csk_in_pingpong_mode(struct sock *sk)
 {
-	return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
+	return inet_csk(sk)->icsk_ack.pingpong >= tcp_pingpong_thresh;
+}
+
+static inline void inet_csk_inc_pingpong_cnt(struct sock *sk)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+
+	if (icsk->icsk_ack.pingpong < U8_MAX)
+		icsk->icsk_ack.pingpong++;
 }
 
 static inline bool inet_csk_has_ulp(struct sock *sk)
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 782273bb93c2..b5253567f2bd 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -653,6 +653,15 @@ static struct ctl_table net_core_table[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ZERO,
 	},
+	{
+		.procname	= "tcp_pingpong_thresh",
+		.data		= &tcp_pingpong_thresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ONE,
+		.extra2		= SYSCTL_THREE,
+	},
 	{ }
 };
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 53b7751b68e1..dcd143193d41 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -308,6 +308,8 @@ EXPORT_SYMBOL(tcp_have_smc);
 struct percpu_counter tcp_sockets_allocated ____cacheline_aligned_in_smp;
 EXPORT_SYMBOL(tcp_sockets_allocated);
 
+int tcp_pingpong_thresh __read_mostly = 1;
+
 /*
  * TCP splice context
  */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index cfe128b81a01..576d21621778 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -167,12 +167,25 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
 	if (tcp_packets_in_flight(tp) == 0)
 		tcp_ca_event(sk, CA_EVENT_TX_START);
 
+	/* If tcp_pingpong_thresh > 1, and
+	 * this is the first data packet sent in response to the
+	 * previous received data,
+	 * and it is a reply for ato after last received packet,
+	 * increase pingpong count.
+	 */
+	if (tcp_pingpong_thresh > 1 &&
+	    before(tp->lsndtime, icsk->icsk_ack.lrcvtime) &&
+	    (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
+		inet_csk_inc_pingpong_cnt(sk);
+
 	tp->lsndtime = now;
 
-	/* If it is a reply for ato after last received
+	/* If tcp_pingpong_thresh == 1, and
+	 * it is a reply for ato after last received
 	 * packet, enter pingpong mode.
 	 */
-	if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
+	if (tcp_pingpong_thresh == 1 &&
+	    (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
 		inet_csk_enter_pingpong_mode(sk);
 }
 
-- 
2.25.1


             reply	other threads:[~2023-06-09 16:26 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-09 16:25 Haiyang Zhang [this message]
2023-06-09 19:16 ` [PATCH net-next] tcp: Make pingpong threshold tunable Neal Cardwell
2023-06-09 19:53   ` Kuniyuki Iwashima
2023-06-12 22:05     ` Haiyang Zhang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1686327959-13478-1-git-send-email-haiyangz@microsoft.com \
    --to=haiyangz@microsoft.com \
    --cc=atenart@kernel.org \
    --cc=bagasdotme@gmail.com \
    --cc=corbet@lwn.net \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=edumazet@google.com \
    --cc=kuba@kernel.org \
    --cc=kuniyu@amazon.com \
    --cc=kys@microsoft.com \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-hyperv@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=liushixin2@huawei.com \
    --cc=maheshb@google.com \
    --cc=netdev@vger.kernel.org \
    --cc=olaf@aepfle.de \
    --cc=pabeni@redhat.com \
    --cc=simon.horman@corigine.com \
    --cc=stephen@networkplumber.org \
    --cc=tim.gardner@canonical.com \
    --cc=vkuznets@redhat.com \
    --cc=weiwan@google.com \
    --cc=ykaliuta@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).