All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
From: Cong Wang <xiyou.wangcong@gmail.com>
To: netdev@vger.kernel.org
Cc: bpf@vger.kernel.org, Cong Wang <cong.wang@bytedance.com>,
	John Fastabend <john.fastabend@gmail.com>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Jakub Sitnicki <jakub@cloudflare.com>,
	Lorenz Bauer <lmb@cloudflare.com>
Subject: [PATCH RESEND bpf v3 1/8] skmsg: improve udp_bpf_recvmsg() accuracy
Date: Mon, 14 Jun 2021 19:13:35 -0700	[thread overview]
Message-ID: <20210615021342.7416-2-xiyou.wangcong@gmail.com> (raw)
In-Reply-To: <20210615021342.7416-1-xiyou.wangcong@gmail.com>

From: Cong Wang <cong.wang@bytedance.com>

I tried to reuse sk_msg_wait_data() for different protocols,
but it turns out it can not be simply reused. For example,
UDP actually uses two queues to receive skb:
udp_sk(sk)->reader_queue and sk->sk_receive_queue. So we have
to check both of them to know whether we have received any
packet.

Also, UDP does not lock the sock during BH Rx path, it makes
no sense for its ->recvmsg() to lock the sock. It is always
possible for ->recvmsg() to be called before packets actually
arrive in the receive queue, we just use best effort to make
it accurate here.

Fixes: 1f5be6b3b063 ("udp: Implement udp_bpf_recvmsg() for sockmap")
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jakub Sitnicki <jakub@cloudflare.com>
Cc: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
 include/linux/skmsg.h |  2 --
 net/core/skmsg.c      | 23 ---------------------
 net/ipv4/tcp_bpf.c    | 24 +++++++++++++++++++++-
 net/ipv4/udp_bpf.c    | 47 ++++++++++++++++++++++++++++++++++++++-----
 4 files changed, 65 insertions(+), 31 deletions(-)

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index aba0f0f429be..e3d080c299f6 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -126,8 +126,6 @@ int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
 			      struct sk_msg *msg, u32 bytes);
 int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
 			     struct sk_msg *msg, u32 bytes);
-int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags,
-		     long timeo, int *err);
 int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
 		   int len, int flags);
 
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 43ce17a6a585..f9a81b314e4c 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -399,29 +399,6 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
 }
 EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
 
-int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags,
-		     long timeo, int *err)
-{
-	DEFINE_WAIT_FUNC(wait, woken_wake_function);
-	int ret = 0;
-
-	if (sk->sk_shutdown & RCV_SHUTDOWN)
-		return 1;
-
-	if (!timeo)
-		return ret;
-
-	add_wait_queue(sk_sleep(sk), &wait);
-	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-	ret = sk_wait_event(sk, &timeo,
-			    !list_empty(&psock->ingress_msg) ||
-			    !skb_queue_empty(&sk->sk_receive_queue), &wait);
-	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-	remove_wait_queue(sk_sleep(sk), &wait);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(sk_msg_wait_data);
-
 /* Receive sk_msg from psock->ingress_msg to @msg. */
 int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
 		   int len, int flags)
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index ad9d17923fc5..bb49b52d7be8 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -163,6 +163,28 @@ static bool tcp_bpf_stream_read(const struct sock *sk)
 	return !empty;
 }
 
+static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags,
+			     long timeo, int *err)
+{
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+	int ret = 0;
+
+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		return 1;
+
+	if (!timeo)
+		return ret;
+
+	add_wait_queue(sk_sleep(sk), &wait);
+	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+	ret = sk_wait_event(sk, &timeo,
+			    !list_empty(&psock->ingress_msg) ||
+			    !skb_queue_empty(&sk->sk_receive_queue), &wait);
+	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+	remove_wait_queue(sk_sleep(sk), &wait);
+	return ret;
+}
+
 static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		    int nonblock, int flags, int *addr_len)
 {
@@ -188,7 +210,7 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		long timeo;
 
 		timeo = sock_rcvtimeo(sk, nonblock);
-		data = sk_msg_wait_data(sk, psock, flags, timeo, &err);
+		data = tcp_msg_wait_data(sk, psock, flags, timeo, &err);
 		if (data) {
 			if (!sk_psock_queue_empty(psock))
 				goto msg_bytes_ready;
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index 954c4591a6fd..565a70040c57 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -21,6 +21,45 @@ static int sk_udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	return udp_prot.recvmsg(sk, msg, len, noblock, flags, addr_len);
 }
 
+static bool udp_sk_has_data(struct sock *sk)
+{
+	return !skb_queue_empty(&udp_sk(sk)->reader_queue) ||
+	       !skb_queue_empty(&sk->sk_receive_queue);
+}
+
+static bool psock_has_data(struct sk_psock *psock)
+{
+	return !skb_queue_empty(&psock->ingress_skb) ||
+	       !sk_psock_queue_empty(psock);
+}
+
+#define udp_msg_has_data(__sk, __psock)	\
+		({ udp_sk_has_data(__sk) || psock_has_data(__psock); })
+
+static int udp_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags,
+			     long timeo, int *err)
+{
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+	int ret = 0;
+
+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		return 1;
+
+	if (!timeo)
+		return ret;
+
+	add_wait_queue(sk_sleep(sk), &wait);
+	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+	ret = udp_msg_has_data(sk, psock);
+	if (!ret) {
+		wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
+		ret = udp_msg_has_data(sk, psock);
+	}
+	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+	remove_wait_queue(sk_sleep(sk), &wait);
+	return ret;
+}
+
 static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 			   int nonblock, int flags, int *addr_len)
 {
@@ -34,8 +73,7 @@ static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	if (unlikely(!psock))
 		return sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
 
-	lock_sock(sk);
-	if (sk_psock_queue_empty(psock)) {
+	if (!psock_has_data(psock)) {
 		ret = sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
 		goto out;
 	}
@@ -47,9 +85,9 @@ static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		long timeo;
 
 		timeo = sock_rcvtimeo(sk, nonblock);
-		data = sk_msg_wait_data(sk, psock, flags, timeo, &err);
+		data = udp_msg_wait_data(sk, psock, flags, timeo, &err);
 		if (data) {
-			if (!sk_psock_queue_empty(psock))
+			if (psock_has_data(psock))
 				goto msg_bytes_ready;
 			ret = sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
 			goto out;
@@ -62,7 +100,6 @@ static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	}
 	ret = copied;
 out:
-	release_sock(sk);
 	sk_psock_put(sk, psock);
 	return ret;
 }
-- 
2.25.1


  reply	other threads:[~2021-06-15  2:23 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-15  2:13 [PATCH RESEND bpf v3 0/8] sock_map: some bug fixes and improvements Cong Wang
2021-06-15  2:13 ` Cong Wang [this message]
2021-06-15  2:13 ` [PATCH RESEND bpf v3 2/8] selftests/bpf: Retry for EAGAIN in udp_redir_to_connected() Cong Wang
2021-06-15  2:13 ` [PATCH RESEND bpf v3 3/8] udp: fix a memory leak in udp_read_sock() Cong Wang
2021-06-15  2:13 ` [PATCH RESEND bpf v3 4/8] skmsg: clear skb redirect pointer before dropping it Cong Wang
2021-06-15  2:13 ` [PATCH RESEND bpf v3 5/8] skmsg: fix a memory leak in sk_psock_verdict_apply() Cong Wang
2021-06-15  2:13 ` [PATCH RESEND bpf v3 6/8] skmsg: teach sk_psock_verdict_apply() to return errors Cong Wang
2021-06-15  2:13 ` [PATCH RESEND bpf v3 7/8] skmsg: pass source psock to sk_psock_skb_redirect() Cong Wang
2021-06-15  2:13 ` [PATCH RESEND bpf v3 8/8] skmsg: increase sk->sk_drops when dropping packets Cong Wang
2021-06-19  9:57 ` [PATCH RESEND bpf v3 0/8] sock_map: some bug fixes and improvements Jakub Sitnicki
2021-06-21 15:00 ` patchwork-bot+netdevbpf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210615021342.7416-2-xiyou.wangcong@gmail.com \
    --to=xiyou.wangcong@gmail.com \
    --cc=bpf@vger.kernel.org \
    --cc=cong.wang@bytedance.com \
    --cc=daniel@iogearbox.net \
    --cc=jakub@cloudflare.com \
    --cc=john.fastabend@gmail.com \
    --cc=lmb@cloudflare.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.