[PATCH bpf-next 0/2] BPF iterator for UNIX domain socket.

All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH bpf-next 0/2] BPF iterator for UNIX domain socket.
@ 2021-07-29 23:36 Kuniyuki Iwashima
  2021-07-29 23:36 ` [PATCH bpf-next 1/2] bpf: af_unix: Implement " Kuniyuki Iwashima
  2021-07-29 23:36 ` [PATCH bpf-next 2/2] selftest/bpf: Implement sample UNIX domain socket iterator program Kuniyuki Iwashima
  0 siblings, 2 replies; 15+ messages in thread
From: Kuniyuki Iwashima @ 2021-07-29 23:36 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh
  Cc: Benjamin Herrenschmidt, Kuniyuki Iwashima, Kuniyuki Iwashima, bpf,
	netdev

This patch set adds BPF iterator support for UNIX domain socket.  The first
patch implements it and the second adds a selftest.


Kuniyuki Iwashima (2):
  bpf: af_unix: Implement BPF iterator for UNIX domain socket.
  selftest/bpf: Implement sample UNIX domain socket iterator program.

 include/linux/btf_ids.h                       |  3 +-
 net/unix/af_unix.c                            | 78 +++++++++++++++++++
 .../selftests/bpf/prog_tests/bpf_iter.c       | 17 ++++
 .../selftests/bpf/progs/bpf_iter_unix.c       | 75 ++++++++++++++++++
 4 files changed, 172 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_unix.c

-- 
2.30.2


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH bpf-next 1/2] bpf: af_unix: Implement BPF iterator for UNIX domain socket.
  2021-07-29 23:36 [PATCH bpf-next 0/2] BPF iterator for UNIX domain socket Kuniyuki Iwashima
@ 2021-07-29 23:36 ` Kuniyuki Iwashima
  2021-07-30  6:19     ` kernel test robot
  2021-07-30  6:24   ` Yonghong Song
  2021-07-29 23:36 ` [PATCH bpf-next 2/2] selftest/bpf: Implement sample UNIX domain socket iterator program Kuniyuki Iwashima
  1 sibling, 2 replies; 15+ messages in thread
From: Kuniyuki Iwashima @ 2021-07-29 23:36 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh
  Cc: Benjamin Herrenschmidt, Kuniyuki Iwashima, Kuniyuki Iwashima, bpf,
	netdev

This patch implements the BPF iterator for the UNIX domain socket.

Currently, the batch optimization introduced for the TCP iterator in the
commit 04c7820b776f ("bpf: tcp: Bpf iter batching and lock_sock") is not
applied.  It will require replacing the big lock for the hash table with
small locks for each hash list not to block other processes.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
---
 include/linux/btf_ids.h |  3 +-
 net/unix/af_unix.c      | 78 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 80 insertions(+), 1 deletion(-)

diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index 57890b357f85..bed4b9964581 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -172,7 +172,8 @@ extern struct btf_id_set name;
 	BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock)		\
 	BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock)			\
 	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock)			\
-	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)			\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock)
 
 enum {
 #define BTF_SOCK_TYPE(name, str) name,
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 89927678c0dc..d45ad87e3a49 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -113,6 +113,7 @@
 #include <linux/security.h>
 #include <linux/freezer.h>
 #include <linux/file.h>
+#include <linux/btf_ids.h>
 
 #include "scm.h"
 
@@ -2935,6 +2936,49 @@ static const struct seq_operations unix_seq_ops = {
 	.stop   = unix_seq_stop,
 	.show   = unix_seq_show,
 };
+
+#ifdef CONFIG_BPF_SYSCALL
+struct bpf_iter__unix {
+	__bpf_md_ptr(struct bpf_iter_meta *, meta);
+	__bpf_md_ptr(struct unix_sock *, unix_sk);
+	uid_t uid __aligned(8);
+};
+
+static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
+			      struct unix_sock *unix_sk, uid_t uid)
+{
+	struct bpf_iter__unix ctx;
+
+	meta->seq_num--;  /* skip SEQ_START_TOKEN */
+	ctx.meta = meta;
+	ctx.unix_sk = unix_sk;
+	ctx.uid = uid;
+	return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
+{
+	struct bpf_iter_meta meta;
+	struct bpf_prog *prog;
+	struct sock *sk = v;
+	uid_t uid;
+
+	if (v == SEQ_START_TOKEN)
+		return 0;
+
+	uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
+	meta.seq = seq;
+	prog = bpf_iter_get_info(&meta, false);
+	return unix_prog_seq_show(prog, &meta, v, uid);
+}
+
+static const struct seq_operations bpf_iter_unix_seq_ops = {
+	.start	= unix_seq_start,
+	.next	= unix_seq_next,
+	.stop	= unix_seq_stop,
+	.show	= bpf_iter_unix_seq_show,
+};
+#endif
 #endif
 
 static const struct net_proto_family unix_family_ops = {
@@ -2975,6 +3019,35 @@ static struct pernet_operations unix_net_ops = {
 	.exit = unix_net_exit,
 };
 
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
+		     struct unix_sock *unix_sk, uid_t uid)
+
+static const struct bpf_iter_seq_info unix_seq_info = {
+	.seq_ops		= &bpf_iter_unix_seq_ops,
+	.init_seq_private	= bpf_iter_init_seq_net,
+	.fini_seq_private	= bpf_iter_fini_seq_net,
+	.seq_priv_size		= sizeof(struct seq_net_private),
+};
+
+static struct bpf_iter_reg unix_reg_info = {
+	.target			= "unix",
+	.ctx_arg_info_size	= 1,
+	.ctx_arg_info		= {
+		{ offsetof(struct bpf_iter__unix, unix_sk),
+		  PTR_TO_BTF_ID_OR_NULL },
+	},
+	.seq_info		= &unix_seq_info,
+};
+
+static void __init bpf_iter_register(void)
+{
+	unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
+	if (bpf_iter_reg_target(&unix_reg_info))
+		pr_warn("Warning: could not register bpf iterator unix\n");
+}
+#endif
+
 static int __init af_unix_init(void)
 {
 	int rc = -1;
@@ -2990,6 +3063,11 @@ static int __init af_unix_init(void)
 	sock_register(&unix_family_ops);
 	register_pernet_subsys(&unix_net_ops);
 	unix_bpf_build_proto();
+
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+	bpf_iter_register();
+#endif
+
 out:
 	return rc;
 }
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH bpf-next 2/2] selftest/bpf: Implement sample UNIX domain socket iterator program.
  2021-07-29 23:36 [PATCH bpf-next 0/2] BPF iterator for UNIX domain socket Kuniyuki Iwashima
  2021-07-29 23:36 ` [PATCH bpf-next 1/2] bpf: af_unix: Implement " Kuniyuki Iwashima
@ 2021-07-29 23:36 ` Kuniyuki Iwashima
  2021-07-30  6:54   ` Yonghong Song
  2021-07-30 19:34   ` [PATCH bpf-next 2/2] selftest/bpf: Implement sample UNIX domain socket iterator program Andrii Nakryiko
  1 sibling, 2 replies; 15+ messages in thread
From: Kuniyuki Iwashima @ 2021-07-29 23:36 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh
  Cc: Benjamin Herrenschmidt, Kuniyuki Iwashima, Kuniyuki Iwashima, bpf,
	netdev

If there are no abstract sockets, this prog can output the same result
compared to /proc/net/unix.

  # cat /sys/fs/bpf/unix | head -n 2
  Num       RefCount Protocol Flags    Type St Inode Path
  ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer

  # cat /proc/net/unix | head -n 2
  Num       RefCount Protocol Flags    Type St Inode Path
  ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
---
 .../selftests/bpf/prog_tests/bpf_iter.c       | 17 +++++
 .../selftests/bpf/progs/bpf_iter_unix.c       | 75 +++++++++++++++++++
 2 files changed, 92 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_unix.c

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 1f1aade56504..4746bac68d36 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -13,6 +13,7 @@
 #include "bpf_iter_tcp6.skel.h"
 #include "bpf_iter_udp4.skel.h"
 #include "bpf_iter_udp6.skel.h"
+#include "bpf_iter_unix.skel.h"
 #include "bpf_iter_test_kern1.skel.h"
 #include "bpf_iter_test_kern2.skel.h"
 #include "bpf_iter_test_kern3.skel.h"
@@ -313,6 +314,20 @@ static void test_udp6(void)
 	bpf_iter_udp6__destroy(skel);
 }
 
+static void test_unix(void)
+{
+	struct bpf_iter_unix *skel;
+
+	skel = bpf_iter_unix__open_and_load();
+	if (CHECK(!skel, "bpf_iter_unix__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_unix);
+
+	bpf_iter_unix__destroy(skel);
+}
+
 /* The expected string is less than 16 bytes */
 static int do_read_with_fd(int iter_fd, const char *expected,
 			   bool read_one_char)
@@ -1255,6 +1270,8 @@ void test_bpf_iter(void)
 		test_udp4();
 	if (test__start_subtest("udp6"))
 		test_udp6();
+	if (test__start_subtest("unix"))
+		test_unix();
 	if (test__start_subtest("anon"))
 		test_anon_iter(false);
 	if (test__start_subtest("anon-read-one-char"))
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
new file mode 100644
index 000000000000..285ec2f7944d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define __SO_ACCEPTCON		(1 << 16)
+#define UNIX_HASH_SIZE		256
+#define UNIX_ABSTRACT(unix_sk)	(unix_sk->addr->hash < UNIX_HASH_SIZE)
+
+static long sock_i_ino(const struct sock *sk)
+{
+	const struct socket *sk_socket = sk->sk_socket;
+	const struct inode *inode;
+	unsigned long ino;
+
+	if (!sk_socket)
+		return 0;
+
+	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+	bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
+	return ino;
+}
+
+SEC("iter/unix")
+int dump_unix(struct bpf_iter__unix *ctx)
+{
+	struct unix_sock *unix_sk = ctx->unix_sk;
+	struct sock *sk = (struct sock *)unix_sk;
+	struct seq_file *seq;
+	__u32 seq_num;
+
+	if (!unix_sk)
+		return 0;
+
+	seq = ctx->meta->seq;
+	seq_num = ctx->meta->seq_num;
+	if (seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "Num       RefCount Protocol Flags    "
+			       "Type St Inode Path\n");
+
+	BPF_SEQ_PRINTF(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
+		       unix_sk,
+		       sk->sk_refcnt.refs.counter,
+		       0,
+		       sk->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
+		       sk->sk_type,
+		       sk->sk_socket ?
+		       (sk->sk_state == TCP_ESTABLISHED ?
+			SS_CONNECTED : SS_UNCONNECTED) :
+		       (sk->sk_state == TCP_ESTABLISHED ?
+			SS_CONNECTING : SS_DISCONNECTING),
+		       sock_i_ino(sk));
+
+	if (unix_sk->addr) {
+		if (UNIX_ABSTRACT(unix_sk))
+			/* Abstract UNIX domain socket can contain '\0' in
+			 * the path, and it should be escaped.  However, it
+			 * requires loops and the BPF verifier rejects it.
+			 * So here, print only the escaped first byte to
+			 * indicate it is an abstract UNIX domain socket.
+			 * (See: unix_seq_show() and commit e7947ea770d0d)
+			 */
+			BPF_SEQ_PRINTF(seq, " @");
+		else
+			BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
+	}
+
+	BPF_SEQ_PRINTF(seq, "\n");
+
+	return 0;
+}
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: af_unix: Implement BPF iterator for UNIX domain socket.
  2021-07-29 23:36 ` [PATCH bpf-next 1/2] bpf: af_unix: Implement " Kuniyuki Iwashima
@ 2021-07-30  6:19     ` kernel test robot
  2021-07-30  6:24   ` Yonghong Song
  1 sibling, 0 replies; 15+ messages in thread
From: kernel test robot @ 2021-07-30  6:19 UTC (permalink / raw)
  To: Kuniyuki Iwashima, David S. Miller, Jakub Kicinski,
	Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend
  Cc: kbuild-all, netdev

[-- Attachment #1: Type: text/plain, Size: 1769 bytes --]

Hi Kuniyuki,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on bpf-next/master]

url:    https://github.com/0day-ci/linux/commits/Kuniyuki-Iwashima/BPF-iterator-for-UNIX-domain-socket/20210730-073919
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: powerpc-randconfig-r031-20210730 (attached as .config)
compiler: powerpc-linux-gcc (GCC) 10.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/4e824a068d6173a101001fd6a45acd79e938e420
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Kuniyuki-Iwashima/BPF-iterator-for-UNIX-domain-socket/20210730-073919
        git checkout 4e824a068d6173a101001fd6a45acd79e938e420
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-10.3.0 make.cross ARCH=powerpc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>, old ones prefixed by <<):

>> ERROR: modpost: "bpf_iter_fini_seq_net" [net/unix/unix.ko] undefined!
>> ERROR: modpost: "bpf_iter_run_prog" [net/unix/unix.ko] undefined!
>> ERROR: modpost: "bpf_iter_get_info" [net/unix/unix.ko] undefined!
>> ERROR: modpost: "bpf_iter_reg_target" [net/unix/unix.ko] undefined!
>> ERROR: modpost: "btf_sock_ids" [net/unix/unix.ko] undefined!
>> ERROR: modpost: "bpf_iter_init_seq_net" [net/unix/unix.ko] undefined!

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 30055 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: af_unix: Implement BPF iterator for UNIX domain socket.
@ 2021-07-30  6:19     ` kernel test robot
  0 siblings, 0 replies; 15+ messages in thread
From: kernel test robot @ 2021-07-30  6:19 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 1806 bytes --]

Hi Kuniyuki,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on bpf-next/master]

url:    https://github.com/0day-ci/linux/commits/Kuniyuki-Iwashima/BPF-iterator-for-UNIX-domain-socket/20210730-073919
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: powerpc-randconfig-r031-20210730 (attached as .config)
compiler: powerpc-linux-gcc (GCC) 10.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/4e824a068d6173a101001fd6a45acd79e938e420
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Kuniyuki-Iwashima/BPF-iterator-for-UNIX-domain-socket/20210730-073919
        git checkout 4e824a068d6173a101001fd6a45acd79e938e420
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-10.3.0 make.cross ARCH=powerpc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>, old ones prefixed by <<):

>> ERROR: modpost: "bpf_iter_fini_seq_net" [net/unix/unix.ko] undefined!
>> ERROR: modpost: "bpf_iter_run_prog" [net/unix/unix.ko] undefined!
>> ERROR: modpost: "bpf_iter_get_info" [net/unix/unix.ko] undefined!
>> ERROR: modpost: "bpf_iter_reg_target" [net/unix/unix.ko] undefined!
>> ERROR: modpost: "btf_sock_ids" [net/unix/unix.ko] undefined!
>> ERROR: modpost: "bpf_iter_init_seq_net" [net/unix/unix.ko] undefined!

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 30055 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: af_unix: Implement BPF iterator for UNIX domain socket.
  2021-07-29 23:36 ` [PATCH bpf-next 1/2] bpf: af_unix: Implement " Kuniyuki Iwashima
  2021-07-30  6:19     ` kernel test robot
@ 2021-07-30  6:24   ` Yonghong Song
  2021-07-30  6:53     ` Kuniyuki Iwashima
  1 sibling, 1 reply; 15+ messages in thread
From: Yonghong Song @ 2021-07-30  6:24 UTC (permalink / raw)
  To: Kuniyuki Iwashima, David S. Miller, Jakub Kicinski,
	Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Martin KaFai Lau, Song Liu, John Fastabend, KP Singh
  Cc: Benjamin Herrenschmidt, Kuniyuki Iwashima, bpf, netdev



On 7/29/21 4:36 PM, Kuniyuki Iwashima wrote:
> This patch implements the BPF iterator for the UNIX domain socket.
> 
> Currently, the batch optimization introduced for the TCP iterator in the
> commit 04c7820b776f ("bpf: tcp: Bpf iter batching and lock_sock") is not
> applied.  It will require replacing the big lock for the hash table with
> small locks for each hash list not to block other processes.

Thanks for the contribution. The patch looks okay except
missing seq_ops->stop implementation, see below for more explanation.

> 
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
> ---
>   include/linux/btf_ids.h |  3 +-
>   net/unix/af_unix.c      | 78 +++++++++++++++++++++++++++++++++++++++++
>   2 files changed, 80 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
> index 57890b357f85..bed4b9964581 100644
> --- a/include/linux/btf_ids.h
> +++ b/include/linux/btf_ids.h
> @@ -172,7 +172,8 @@ extern struct btf_id_set name;
>   	BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock)		\
>   	BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock)			\
>   	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock)			\
> -	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)
> +	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)			\
> +	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock)
>   
>   enum {
>   #define BTF_SOCK_TYPE(name, str) name,
> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> index 89927678c0dc..d45ad87e3a49 100644
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -113,6 +113,7 @@
>   #include <linux/security.h>
>   #include <linux/freezer.h>
>   #include <linux/file.h>
> +#include <linux/btf_ids.h>
>   
>   #include "scm.h"
>   
> @@ -2935,6 +2936,49 @@ static const struct seq_operations unix_seq_ops = {
>   	.stop   = unix_seq_stop,
>   	.show   = unix_seq_show,
>   };
> +
> +#ifdef CONFIG_BPF_SYSCALL
> +struct bpf_iter__unix {
> +	__bpf_md_ptr(struct bpf_iter_meta *, meta);
> +	__bpf_md_ptr(struct unix_sock *, unix_sk);
> +	uid_t uid __aligned(8);
> +};
> +
> +static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
> +			      struct unix_sock *unix_sk, uid_t uid)
> +{
> +	struct bpf_iter__unix ctx;
> +
> +	meta->seq_num--;  /* skip SEQ_START_TOKEN */
> +	ctx.meta = meta;
> +	ctx.unix_sk = unix_sk;
> +	ctx.uid = uid;
> +	return bpf_iter_run_prog(prog, &ctx);
> +}
> +
> +static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
> +{
> +	struct bpf_iter_meta meta;
> +	struct bpf_prog *prog;
> +	struct sock *sk = v;
> +	uid_t uid;
> +
> +	if (v == SEQ_START_TOKEN)
> +		return 0;
> +
> +	uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
> +	meta.seq = seq;
> +	prog = bpf_iter_get_info(&meta, false);
> +	return unix_prog_seq_show(prog, &meta, v, uid);
> +}
> +
> +static const struct seq_operations bpf_iter_unix_seq_ops = {
> +	.start	= unix_seq_start,
> +	.next	= unix_seq_next,
> +	.stop	= unix_seq_stop,

Although it is not required for /proc/net/unix, we should still
implement bpf_iter version of seq_ops->stop here. The main purpose
of bpf_iter specific seq_ops->stop is to call bpf program one
more time after ALL elements have been traversed. Such
functionality is implemented in all other bpf_iter variants.

> +	.show	= bpf_iter_unix_seq_show,
> +};
> +#endif
>   #endif
>   
>   static const struct net_proto_family unix_family_ops = {
> @@ -2975,6 +3019,35 @@ static struct pernet_operations unix_net_ops = {
>   	.exit = unix_net_exit,
>   };
>   
[...]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: af_unix: Implement BPF iterator for UNIX domain socket.
  2021-07-30  6:24   ` Yonghong Song
@ 2021-07-30  6:53     ` Kuniyuki Iwashima
  2021-07-30  7:09       ` Yonghong Song
  0 siblings, 1 reply; 15+ messages in thread
From: Kuniyuki Iwashima @ 2021-07-30  6:53 UTC (permalink / raw)
  To: yhs
  Cc: andrii, ast, benh, bpf, daniel, davem, john.fastabend, kafai,
	kpsingh, kuba, kuni1840, kuniyu, netdev, songliubraving

From:   Yonghong Song <yhs@fb.com>
Date:   Thu, 29 Jul 2021 23:24:41 -0700
> On 7/29/21 4:36 PM, Kuniyuki Iwashima wrote:
> > This patch implements the BPF iterator for the UNIX domain socket.
> > 
> > Currently, the batch optimization introduced for the TCP iterator in the
> > commit 04c7820b776f ("bpf: tcp: Bpf iter batching and lock_sock") is not
> > applied.  It will require replacing the big lock for the hash table with
> > small locks for each hash list not to block other processes.
> 
> Thanks for the contribution. The patch looks okay except
> missing seq_ops->stop implementation, see below for more explanation.
> 
> > 
> > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
> > ---
> >   include/linux/btf_ids.h |  3 +-
> >   net/unix/af_unix.c      | 78 +++++++++++++++++++++++++++++++++++++++++
> >   2 files changed, 80 insertions(+), 1 deletion(-)
> > 
> > diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
> > index 57890b357f85..bed4b9964581 100644
> > --- a/include/linux/btf_ids.h
> > +++ b/include/linux/btf_ids.h
> > @@ -172,7 +172,8 @@ extern struct btf_id_set name;
> >   	BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock)		\
> >   	BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock)			\
> >   	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock)			\
> > -	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)
> > +	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)			\
> > +	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock)
> >   
> >   enum {
> >   #define BTF_SOCK_TYPE(name, str) name,
> > diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> > index 89927678c0dc..d45ad87e3a49 100644
> > --- a/net/unix/af_unix.c
> > +++ b/net/unix/af_unix.c
> > @@ -113,6 +113,7 @@
> >   #include <linux/security.h>
> >   #include <linux/freezer.h>
> >   #include <linux/file.h>
> > +#include <linux/btf_ids.h>
> >   
> >   #include "scm.h"
> >   
> > @@ -2935,6 +2936,49 @@ static const struct seq_operations unix_seq_ops = {
> >   	.stop   = unix_seq_stop,
> >   	.show   = unix_seq_show,
> >   };
> > +
> > +#ifdef CONFIG_BPF_SYSCALL
> > +struct bpf_iter__unix {
> > +	__bpf_md_ptr(struct bpf_iter_meta *, meta);
> > +	__bpf_md_ptr(struct unix_sock *, unix_sk);
> > +	uid_t uid __aligned(8);
> > +};
> > +
> > +static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
> > +			      struct unix_sock *unix_sk, uid_t uid)
> > +{
> > +	struct bpf_iter__unix ctx;
> > +
> > +	meta->seq_num--;  /* skip SEQ_START_TOKEN */
> > +	ctx.meta = meta;
> > +	ctx.unix_sk = unix_sk;
> > +	ctx.uid = uid;
> > +	return bpf_iter_run_prog(prog, &ctx);
> > +}
> > +
> > +static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
> > +{
> > +	struct bpf_iter_meta meta;
> > +	struct bpf_prog *prog;
> > +	struct sock *sk = v;
> > +	uid_t uid;
> > +
> > +	if (v == SEQ_START_TOKEN)
> > +		return 0;
> > +
> > +	uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
> > +	meta.seq = seq;
> > +	prog = bpf_iter_get_info(&meta, false);
> > +	return unix_prog_seq_show(prog, &meta, v, uid);
> > +}
> > +
> > +static const struct seq_operations bpf_iter_unix_seq_ops = {
> > +	.start	= unix_seq_start,
> > +	.next	= unix_seq_next,
> > +	.stop	= unix_seq_stop,
> 
> Although it is not required for /proc/net/unix, we should still
> implement bpf_iter version of seq_ops->stop here. The main purpose
> of bpf_iter specific seq_ops->stop is to call bpf program one
> more time after ALL elements have been traversed. Such
> functionality is implemented in all other bpf_iter variants.

Thanks for your review!
I will implement the extra call in the next spin.

Just out of curiosity, is there a specific use case for the last call?

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH bpf-next 2/2] selftest/bpf: Implement sample UNIX domain socket iterator program.
  2021-07-29 23:36 ` [PATCH bpf-next 2/2] selftest/bpf: Implement sample UNIX domain socket iterator program Kuniyuki Iwashima
@ 2021-07-30  6:54   ` Yonghong Song
  2021-07-30  7:58     ` [PATCH bpf-next 2/2] selftest/bpf: Implement sample UNIX domain Kuniyuki Iwashima
  2021-07-30 19:34   ` [PATCH bpf-next 2/2] selftest/bpf: Implement sample UNIX domain socket iterator program Andrii Nakryiko
  1 sibling, 1 reply; 15+ messages in thread
From: Yonghong Song @ 2021-07-30  6:54 UTC (permalink / raw)
  To: Kuniyuki Iwashima, David S. Miller, Jakub Kicinski,
	Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Martin KaFai Lau, Song Liu, John Fastabend, KP Singh
  Cc: Benjamin Herrenschmidt, Kuniyuki Iwashima, bpf, netdev



On 7/29/21 4:36 PM, Kuniyuki Iwashima wrote:
> If there are no abstract sockets, this prog can output the same result
> compared to /proc/net/unix.
> 
>    # cat /sys/fs/bpf/unix | head -n 2
>    Num       RefCount Protocol Flags    Type St Inode Path
>    ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
> 
>    # cat /proc/net/unix | head -n 2
>    Num       RefCount Protocol Flags    Type St Inode Path
>    ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
> 
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
> ---
>   .../selftests/bpf/prog_tests/bpf_iter.c       | 17 +++++
>   .../selftests/bpf/progs/bpf_iter_unix.c       | 75 +++++++++++++++++++
>   2 files changed, 92 insertions(+)
>   create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> 
> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> index 1f1aade56504..4746bac68d36 100644
> --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> @@ -13,6 +13,7 @@
>   #include "bpf_iter_tcp6.skel.h"
>   #include "bpf_iter_udp4.skel.h"
>   #include "bpf_iter_udp6.skel.h"
> +#include "bpf_iter_unix.skel.h"
>   #include "bpf_iter_test_kern1.skel.h"
>   #include "bpf_iter_test_kern2.skel.h"
>   #include "bpf_iter_test_kern3.skel.h"
> @@ -313,6 +314,20 @@ static void test_udp6(void)
>   	bpf_iter_udp6__destroy(skel);
>   }
>   
> +static void test_unix(void)
> +{
> +	struct bpf_iter_unix *skel;
> +
> +	skel = bpf_iter_unix__open_and_load();
> +	if (CHECK(!skel, "bpf_iter_unix__open_and_load",
> +		  "skeleton open_and_load failed\n"))
> +		return;
> +
> +	do_dummy_read(skel->progs.dump_unix);
> +
> +	bpf_iter_unix__destroy(skel);
> +}
> +
>   /* The expected string is less than 16 bytes */
>   static int do_read_with_fd(int iter_fd, const char *expected,
>   			   bool read_one_char)
> @@ -1255,6 +1270,8 @@ void test_bpf_iter(void)
>   		test_udp4();
>   	if (test__start_subtest("udp6"))
>   		test_udp6();
> +	if (test__start_subtest("unix"))
> +		test_unix();
>   	if (test__start_subtest("anon"))
>   		test_anon_iter(false);
>   	if (test__start_subtest("anon-read-one-char"))
> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> new file mode 100644
> index 000000000000..285ec2f7944d
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> @@ -0,0 +1,75 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright Amazon.com Inc. or its affiliates. */
> +#include "bpf_iter.h"

Could you add bpf_iter__unix to bpf_iter.h similar to bpf_iter__sockmap?
The main purpose is to make test tolerating with old vmlinux.h.

> +#include "bpf_tracing_net.h"
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_endian.h>
> +
> +char _license[] SEC("license") = "GPL";
> +
> +#define __SO_ACCEPTCON		(1 << 16)
> +#define UNIX_HASH_SIZE		256
> +#define UNIX_ABSTRACT(unix_sk)	(unix_sk->addr->hash < UNIX_HASH_SIZE)

Could you add the above three define's in bpf_tracing_net.h?
We try to keep all these common defines in a common header for
potential reusability.

> +
> +static long sock_i_ino(const struct sock *sk)
> +{
> +	const struct socket *sk_socket = sk->sk_socket;
> +	const struct inode *inode;
> +	unsigned long ino;
> +
> +	if (!sk_socket)
> +		return 0;
> +
> +	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
> +	bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
> +	return ino;
> +}
> +
> +SEC("iter/unix")
> +int dump_unix(struct bpf_iter__unix *ctx)
> +{
> +	struct unix_sock *unix_sk = ctx->unix_sk;
> +	struct sock *sk = (struct sock *)unix_sk;
> +	struct seq_file *seq;
> +	__u32 seq_num;
> +
> +	if (!unix_sk)
> +		return 0;
> +
> +	seq = ctx->meta->seq;
> +	seq_num = ctx->meta->seq_num;
> +	if (seq_num == 0)
> +		BPF_SEQ_PRINTF(seq, "Num       RefCount Protocol Flags    "
> +			       "Type St Inode Path\n");
> +
> +	BPF_SEQ_PRINTF(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
> +		       unix_sk,
> +		       sk->sk_refcnt.refs.counter,
> +		       0,
> +		       sk->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
> +		       sk->sk_type,
> +		       sk->sk_socket ?
> +		       (sk->sk_state == TCP_ESTABLISHED ?
> +			SS_CONNECTED : SS_UNCONNECTED) :
> +		       (sk->sk_state == TCP_ESTABLISHED ?
> +			SS_CONNECTING : SS_DISCONNECTING),
> +		       sock_i_ino(sk));
> +
> +	if (unix_sk->addr) {
> +		if (UNIX_ABSTRACT(unix_sk))
> +			/* Abstract UNIX domain socket can contain '\0' in
> +			 * the path, and it should be escaped.  However, it
> +			 * requires loops and the BPF verifier rejects it.
> +			 * So here, print only the escaped first byte to
> +			 * indicate it is an abstract UNIX domain socket.
> +			 * (See: unix_seq_show() and commit e7947ea770d0d)
> +			 */
> +			BPF_SEQ_PRINTF(seq, " @");
> +		else
> +			BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
> +	}

I looked at af_unix.c, for the above "if (unix_sk->addr) { ... }" code,
the following is the kernel source code,

                 if (u->addr) {  // under unix_table_lock here
                         int i, len;
                         seq_putc(seq, ' ');

                         i = 0;
                         len = u->addr->len - sizeof(short);
                         if (!UNIX_ABSTRACT(s))
                                 len--;
                         else {
                                 seq_putc(seq, '@');
                                 i++;
                         }
                         for ( ; i < len; i++)
                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
                                          '@');
                 }

It does not seem to match bpf program non UNIX_ABSTRACT case.
I am not familiar with unix socket so it would be good if you can 
explain a little more.

For verifier issue with loops, do we have a maximum upper bound for 
u->addr->len? If yes, does bounded loop work?

> +
> +	BPF_SEQ_PRINTF(seq, "\n");
> +
> +	return 0;
> +}
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: af_unix: Implement BPF iterator for UNIX domain socket.
  2021-07-30  6:53     ` Kuniyuki Iwashima
@ 2021-07-30  7:09       ` Yonghong Song
  2021-07-30  8:05         ` Kuniyuki Iwashima
  0 siblings, 1 reply; 15+ messages in thread
From: Yonghong Song @ 2021-07-30  7:09 UTC (permalink / raw)
  To: Kuniyuki Iwashima
  Cc: andrii, ast, benh, bpf, daniel, davem, john.fastabend, kafai,
	kpsingh, kuba, kuni1840, netdev, songliubraving



On 7/29/21 11:53 PM, Kuniyuki Iwashima wrote:
> From:   Yonghong Song <yhs@fb.com>
> Date:   Thu, 29 Jul 2021 23:24:41 -0700
>> On 7/29/21 4:36 PM, Kuniyuki Iwashima wrote:
>>> This patch implements the BPF iterator for the UNIX domain socket.
>>>
>>> Currently, the batch optimization introduced for the TCP iterator in the
>>> commit 04c7820b776f ("bpf: tcp: Bpf iter batching and lock_sock") is not
>>> applied.  It will require replacing the big lock for the hash table with
>>> small locks for each hash list not to block other processes.
>>
>> Thanks for the contribution. The patch looks okay except
>> missing seq_ops->stop implementation, see below for more explanation.
>>
>>>
>>> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
>>> ---
>>>    include/linux/btf_ids.h |  3 +-
>>>    net/unix/af_unix.c      | 78 +++++++++++++++++++++++++++++++++++++++++
>>>    2 files changed, 80 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
>>> index 57890b357f85..bed4b9964581 100644
>>> --- a/include/linux/btf_ids.h
>>> +++ b/include/linux/btf_ids.h
>>> @@ -172,7 +172,8 @@ extern struct btf_id_set name;
>>>    	BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock)		\
>>>    	BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock)			\
>>>    	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock)			\
>>> -	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)
>>> +	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)			\
>>> +	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock)
>>>    
>>>    enum {
>>>    #define BTF_SOCK_TYPE(name, str) name,
>>> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
>>> index 89927678c0dc..d45ad87e3a49 100644
>>> --- a/net/unix/af_unix.c
>>> +++ b/net/unix/af_unix.c
>>> @@ -113,6 +113,7 @@
>>>    #include <linux/security.h>
>>>    #include <linux/freezer.h>
>>>    #include <linux/file.h>
>>> +#include <linux/btf_ids.h>
>>>    
>>>    #include "scm.h"
>>>    
>>> @@ -2935,6 +2936,49 @@ static const struct seq_operations unix_seq_ops = {
>>>    	.stop   = unix_seq_stop,
>>>    	.show   = unix_seq_show,
>>>    };
>>> +
>>> +#ifdef CONFIG_BPF_SYSCALL
>>> +struct bpf_iter__unix {
>>> +	__bpf_md_ptr(struct bpf_iter_meta *, meta);
>>> +	__bpf_md_ptr(struct unix_sock *, unix_sk);
>>> +	uid_t uid __aligned(8);
>>> +};
>>> +
>>> +static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
>>> +			      struct unix_sock *unix_sk, uid_t uid)
>>> +{
>>> +	struct bpf_iter__unix ctx;
>>> +
>>> +	meta->seq_num--;  /* skip SEQ_START_TOKEN */
>>> +	ctx.meta = meta;
>>> +	ctx.unix_sk = unix_sk;
>>> +	ctx.uid = uid;
>>> +	return bpf_iter_run_prog(prog, &ctx);
>>> +}
>>> +
>>> +static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
>>> +{
>>> +	struct bpf_iter_meta meta;
>>> +	struct bpf_prog *prog;
>>> +	struct sock *sk = v;
>>> +	uid_t uid;
>>> +
>>> +	if (v == SEQ_START_TOKEN)
>>> +		return 0;
>>> +
>>> +	uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
>>> +	meta.seq = seq;
>>> +	prog = bpf_iter_get_info(&meta, false);
>>> +	return unix_prog_seq_show(prog, &meta, v, uid);
>>> +}
>>> +
>>> +static const struct seq_operations bpf_iter_unix_seq_ops = {
>>> +	.start	= unix_seq_start,
>>> +	.next	= unix_seq_next,
>>> +	.stop	= unix_seq_stop,
>>
>> Although it is not required for /proc/net/unix, we should still
>> implement bpf_iter version of seq_ops->stop here. The main purpose
>> of bpf_iter specific seq_ops->stop is to call bpf program one
>> more time after ALL elements have been traversed. Such
>> functionality is implemented in all other bpf_iter variants.
> 
> Thanks for your review!
> I will implement the extra call in the next spin.
> 
> Just out of curiosity, is there a specific use case for the last call?

We don't have use cases for dumps similar to /proc/net/... etc.
The original thinking is to permit in-kernel aggregation and the
seq_ops->stop() bpf program will have an indication as the last
bpf program invocation for the iterator at which point bpf program
may wrap up aggregation and send/signal the result to user space.
I am not sure whether people already used this feature or not, or
people may have different way to do that (e.g., from user space
directly checking map value if read() length is 0). But
bpf seq_ops->stop() provides an in-kernel way for bpf program
to respond to the end of iterating.

> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH bpf-next 2/2] selftest/bpf: Implement sample UNIX domain
  2021-07-30  6:54   ` Yonghong Song
@ 2021-07-30  7:58     ` Kuniyuki Iwashima
  2021-07-30 16:22       ` Yonghong Song
  0 siblings, 1 reply; 15+ messages in thread
From: Kuniyuki Iwashima @ 2021-07-30  7:58 UTC (permalink / raw)
  To: yhs
  Cc: andrii, ast, benh, bpf, daniel, davem, john.fastabend, kafai,
	kpsingh, kuba, kuni1840, kuniyu, netdev, songliubraving

From:   Yonghong Song <yhs@fb.com>
Date:   Thu, 29 Jul 2021 23:54:26 -0700
> On 7/29/21 4:36 PM, Kuniyuki Iwashima wrote:
> > If there are no abstract sockets, this prog can output the same result
> > compared to /proc/net/unix.
> > 
> >    # cat /sys/fs/bpf/unix | head -n 2
> >    Num       RefCount Protocol Flags    Type St Inode Path
> >    ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
> > 
> >    # cat /proc/net/unix | head -n 2
> >    Num       RefCount Protocol Flags    Type St Inode Path
> >    ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
> > 
> > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
> > ---
> >   .../selftests/bpf/prog_tests/bpf_iter.c       | 17 +++++
> >   .../selftests/bpf/progs/bpf_iter_unix.c       | 75 +++++++++++++++++++
> >   2 files changed, 92 insertions(+)
> >   create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> > 
> > diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> > index 1f1aade56504..4746bac68d36 100644
> > --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> > +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> > @@ -13,6 +13,7 @@
> >   #include "bpf_iter_tcp6.skel.h"
> >   #include "bpf_iter_udp4.skel.h"
> >   #include "bpf_iter_udp6.skel.h"
> > +#include "bpf_iter_unix.skel.h"
> >   #include "bpf_iter_test_kern1.skel.h"
> >   #include "bpf_iter_test_kern2.skel.h"
> >   #include "bpf_iter_test_kern3.skel.h"
> > @@ -313,6 +314,20 @@ static void test_udp6(void)
> >   	bpf_iter_udp6__destroy(skel);
> >   }
> >   
> > +static void test_unix(void)
> > +{
> > +	struct bpf_iter_unix *skel;
> > +
> > +	skel = bpf_iter_unix__open_and_load();
> > +	if (CHECK(!skel, "bpf_iter_unix__open_and_load",
> > +		  "skeleton open_and_load failed\n"))
> > +		return;
> > +
> > +	do_dummy_read(skel->progs.dump_unix);
> > +
> > +	bpf_iter_unix__destroy(skel);
> > +}
> > +
> >   /* The expected string is less than 16 bytes */
> >   static int do_read_with_fd(int iter_fd, const char *expected,
> >   			   bool read_one_char)
> > @@ -1255,6 +1270,8 @@ void test_bpf_iter(void)
> >   		test_udp4();
> >   	if (test__start_subtest("udp6"))
> >   		test_udp6();
> > +	if (test__start_subtest("unix"))
> > +		test_unix();
> >   	if (test__start_subtest("anon"))
> >   		test_anon_iter(false);
> >   	if (test__start_subtest("anon-read-one-char"))
> > diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> > new file mode 100644
> > index 000000000000..285ec2f7944d
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> > @@ -0,0 +1,75 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/* Copyright Amazon.com Inc. or its affiliates. */
> > +#include "bpf_iter.h"
> 
> Could you add bpf_iter__unix to bpf_iter.h similar to bpf_iter__sockmap?
> The main purpose is to make test tolerating with old vmlinux.h.

Thank you for explanation!
I've understood why it is needed even when the same struct is defined.
I'll add it in the next spin. 


> 
> > +#include "bpf_tracing_net.h"
> > +#include <bpf/bpf_helpers.h>
> > +#include <bpf/bpf_endian.h>
> > +
> > +char _license[] SEC("license") = "GPL";
> > +
> > +#define __SO_ACCEPTCON		(1 << 16)
> > +#define UNIX_HASH_SIZE		256
> > +#define UNIX_ABSTRACT(unix_sk)	(unix_sk->addr->hash < UNIX_HASH_SIZE)
> 
> Could you add the above three define's in bpf_tracing_net.h?
> We try to keep all these common defines in a common header for
> potential reusability.

Will do.


> 
> > +
> > +static long sock_i_ino(const struct sock *sk)
> > +{
> > +	const struct socket *sk_socket = sk->sk_socket;
> > +	const struct inode *inode;
> > +	unsigned long ino;
> > +
> > +	if (!sk_socket)
> > +		return 0;
> > +
> > +	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
> > +	bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
> > +	return ino;
> > +}
> > +
> > +SEC("iter/unix")
> > +int dump_unix(struct bpf_iter__unix *ctx)
> > +{
> > +	struct unix_sock *unix_sk = ctx->unix_sk;
> > +	struct sock *sk = (struct sock *)unix_sk;
> > +	struct seq_file *seq;
> > +	__u32 seq_num;
> > +
> > +	if (!unix_sk)
> > +		return 0;
> > +
> > +	seq = ctx->meta->seq;
> > +	seq_num = ctx->meta->seq_num;
> > +	if (seq_num == 0)
> > +		BPF_SEQ_PRINTF(seq, "Num       RefCount Protocol Flags    "
> > +			       "Type St Inode Path\n");
> > +
> > +	BPF_SEQ_PRINTF(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
> > +		       unix_sk,
> > +		       sk->sk_refcnt.refs.counter,
> > +		       0,
> > +		       sk->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
> > +		       sk->sk_type,
> > +		       sk->sk_socket ?
> > +		       (sk->sk_state == TCP_ESTABLISHED ?
> > +			SS_CONNECTED : SS_UNCONNECTED) :
> > +		       (sk->sk_state == TCP_ESTABLISHED ?
> > +			SS_CONNECTING : SS_DISCONNECTING),
> > +		       sock_i_ino(sk));
> > +
> > +	if (unix_sk->addr) {
> > +		if (UNIX_ABSTRACT(unix_sk))
> > +			/* Abstract UNIX domain socket can contain '\0' in
> > +			 * the path, and it should be escaped.  However, it
> > +			 * requires loops and the BPF verifier rejects it.
> > +			 * So here, print only the escaped first byte to
> > +			 * indicate it is an abstract UNIX domain socket.
> > +			 * (See: unix_seq_show() and commit e7947ea770d0d)
> > +			 */
> > +			BPF_SEQ_PRINTF(seq, " @");
> > +		else
> > +			BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
> > +	}
> 
> I looked at af_unix.c, for the above "if (unix_sk->addr) { ... }" code,
> the following is the kernel source code,
> 
>                  if (u->addr) {  // under unix_table_lock here
>                          int i, len;
>                          seq_putc(seq, ' ');
> 
>                          i = 0;
>                          len = u->addr->len - sizeof(short);
>                          if (!UNIX_ABSTRACT(s))
>                                  len--;
>                          else {
>                                  seq_putc(seq, '@');
>                                  i++;
>                          }
>                          for ( ; i < len; i++)
>                                  seq_putc(seq, u->addr->name->sun_path[i] ?:
>                                           '@');
>                  }
> 
> It does not seem to match bpf program non UNIX_ABSTRACT case.
> I am not familiar with unix socket so it would be good if you can 
> explain a little more.

There is three kinds of unix sockets: pathname, unnamed, abstract.  The
first two terminate the addr with `\0`, but abstract must start with `\0`
and can contain `\0` anywhere in addr.  The `\0` in addr of abstract socket
does not have special meaning. [1]

They are inserted into the same hash table in unix_bind(), so the bpf prog
matches all of them.

``` net/unix/af_unix.c
  1114		if (sun_path[0])
  1115			err = unix_bind_bsd(sk, addr);
  1116		else
  1117			err = unix_bind_abstract(sk, addr);
```

[1]: https://man7.org/linux/man-pages/man7/unix.7.html


> 
> For verifier issue with loops, do we have a maximum upper bound for 
> u->addr->len? If yes, does bounded loop work?

That has a maximum length in unix_mkname(): sizeof(struct sockaddr_un).

``` net/unix/af_unix.c
   223	/*
   224	 *	Check unix socket name:
   225	 *		- should be not zero length.
   226	 *	        - if started by not zero, should be NULL terminated (FS object)
   227	 *		- if started by zero, it is abstract name.
   228	 */
   229	
   230	static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
   231	{
...
   234		if (len <= sizeof(short) || len > sizeof(*sunaddr))
   235			return -EINVAL;
...
   253	}
```

So, I rewrote the test like this, but it still causes an error.

```
	if (unix_sk->addr) {
		int i, len;

		len = unix_sk->addr->len - sizeof(short);

		if (!UNIX_ABSTRACT(unix_sk)) {
			BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
		} else {
			BPF_SEQ_PRINTF(seq, " @");
			i++;

			if (len < sizeof(struct sockaddr_un)) {
				for (i = 1 ; i < len; i++)
					BPF_SEQ_PRINTF(seq, "%c",
						       unix_sk->addr->name->sun_path[i] ?:
						       '@');
			}
		}
	}
```

```
processed 196505 insns (limit 1000000) max_states_per_insn 4 total_states 1830 peak_states 1830 mark_read 3
```


> 
> > +
> > +	BPF_SEQ_PRINTF(seq, "\n");
> > +
> > +	return 0;
> > +}

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: af_unix: Implement BPF iterator for UNIX domain socket.
  2021-07-30  7:09       ` Yonghong Song
@ 2021-07-30  8:05         ` Kuniyuki Iwashima
  0 siblings, 0 replies; 15+ messages in thread
From: Kuniyuki Iwashima @ 2021-07-30  8:05 UTC (permalink / raw)
  To: yhs
  Cc: andrii, ast, benh, bpf, daniel, davem, john.fastabend, kafai,
	kpsingh, kuba, kuni1840, kuniyu, netdev, songliubraving

From:   Yonghong Song <yhs@fb.com>
Date:   Fri, 30 Jul 2021 00:09:08 -0700
> On 7/29/21 11:53 PM, Kuniyuki Iwashima wrote:
> > From:   Yonghong Song <yhs@fb.com>
> > Date:   Thu, 29 Jul 2021 23:24:41 -0700
> >> On 7/29/21 4:36 PM, Kuniyuki Iwashima wrote:
> >>> This patch implements the BPF iterator for the UNIX domain socket.
> >>>
> >>> Currently, the batch optimization introduced for the TCP iterator in the
> >>> commit 04c7820b776f ("bpf: tcp: Bpf iter batching and lock_sock") is not
> >>> applied.  It will require replacing the big lock for the hash table with
> >>> small locks for each hash list not to block other processes.
> >>
> >> Thanks for the contribution. The patch looks okay except
> >> missing seq_ops->stop implementation, see below for more explanation.
> >>
> >>>
> >>> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
> >>> ---
> >>>    include/linux/btf_ids.h |  3 +-
> >>>    net/unix/af_unix.c      | 78 +++++++++++++++++++++++++++++++++++++++++
> >>>    2 files changed, 80 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
> >>> index 57890b357f85..bed4b9964581 100644
> >>> --- a/include/linux/btf_ids.h
> >>> +++ b/include/linux/btf_ids.h
> >>> @@ -172,7 +172,8 @@ extern struct btf_id_set name;
> >>>    	BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock)		\
> >>>    	BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock)			\
> >>>    	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock)			\
> >>> -	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)
> >>> +	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)			\
> >>> +	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock)
> >>>    
> >>>    enum {
> >>>    #define BTF_SOCK_TYPE(name, str) name,
> >>> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> >>> index 89927678c0dc..d45ad87e3a49 100644
> >>> --- a/net/unix/af_unix.c
> >>> +++ b/net/unix/af_unix.c
> >>> @@ -113,6 +113,7 @@
> >>>    #include <linux/security.h>
> >>>    #include <linux/freezer.h>
> >>>    #include <linux/file.h>
> >>> +#include <linux/btf_ids.h>
> >>>    
> >>>    #include "scm.h"
> >>>    
> >>> @@ -2935,6 +2936,49 @@ static const struct seq_operations unix_seq_ops = {
> >>>    	.stop   = unix_seq_stop,
> >>>    	.show   = unix_seq_show,
> >>>    };
> >>> +
> >>> +#ifdef CONFIG_BPF_SYSCALL
> >>> +struct bpf_iter__unix {
> >>> +	__bpf_md_ptr(struct bpf_iter_meta *, meta);
> >>> +	__bpf_md_ptr(struct unix_sock *, unix_sk);
> >>> +	uid_t uid __aligned(8);
> >>> +};
> >>> +
> >>> +static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
> >>> +			      struct unix_sock *unix_sk, uid_t uid)
> >>> +{
> >>> +	struct bpf_iter__unix ctx;
> >>> +
> >>> +	meta->seq_num--;  /* skip SEQ_START_TOKEN */
> >>> +	ctx.meta = meta;
> >>> +	ctx.unix_sk = unix_sk;
> >>> +	ctx.uid = uid;
> >>> +	return bpf_iter_run_prog(prog, &ctx);
> >>> +}
> >>> +
> >>> +static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
> >>> +{
> >>> +	struct bpf_iter_meta meta;
> >>> +	struct bpf_prog *prog;
> >>> +	struct sock *sk = v;
> >>> +	uid_t uid;
> >>> +
> >>> +	if (v == SEQ_START_TOKEN)
> >>> +		return 0;
> >>> +
> >>> +	uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
> >>> +	meta.seq = seq;
> >>> +	prog = bpf_iter_get_info(&meta, false);
> >>> +	return unix_prog_seq_show(prog, &meta, v, uid);
> >>> +}
> >>> +
> >>> +static const struct seq_operations bpf_iter_unix_seq_ops = {
> >>> +	.start	= unix_seq_start,
> >>> +	.next	= unix_seq_next,
> >>> +	.stop	= unix_seq_stop,
> >>
> >> Although it is not required for /proc/net/unix, we should still
> >> implement bpf_iter version of seq_ops->stop here. The main purpose
> >> of bpf_iter specific seq_ops->stop is to call bpf program one
> >> more time after ALL elements have been traversed. Such
> >> functionality is implemented in all other bpf_iter variants.
> > 
> > Thanks for your review!
> > I will implement the extra call in the next spin.
> > 
> > Just out of curiosity, is there a specific use case for the last call?
> 
> We don't have use cases for dumps similar to /proc/net/... etc.
> The original thinking is to permit in-kernel aggregation and the
> seq_ops->stop() bpf program will have an indication as the last
> bpf program invocation for the iterator at which point bpf program
> may wrap up aggregation and send/signal the result to user space.
> I am not sure whether people already used this feature or not, or
> people may have different way to do that (e.g., from user space
> directly checking map value if read() length is 0). But
> bpf seq_ops->stop() provides an in-kernel way for bpf program
> to respond to the end of iterating.

Aggregation, that makes sense.
Thank you!

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH bpf-next 2/2] selftest/bpf: Implement sample UNIX domain
  2021-07-30  7:58     ` [PATCH bpf-next 2/2] selftest/bpf: Implement sample UNIX domain Kuniyuki Iwashima
@ 2021-07-30 16:22       ` Yonghong Song
  2021-07-30 22:55         ` Kuniyuki Iwashima
  0 siblings, 1 reply; 15+ messages in thread
From: Yonghong Song @ 2021-07-30 16:22 UTC (permalink / raw)
  To: Kuniyuki Iwashima
  Cc: andrii, ast, benh, bpf, daniel, davem, john.fastabend, kafai,
	kpsingh, kuba, kuni1840, netdev, songliubraving



On 7/30/21 12:58 AM, Kuniyuki Iwashima wrote:
> From:   Yonghong Song <yhs@fb.com>
> Date:   Thu, 29 Jul 2021 23:54:26 -0700
>> On 7/29/21 4:36 PM, Kuniyuki Iwashima wrote:
>>> If there are no abstract sockets, this prog can output the same result
>>> compared to /proc/net/unix.
>>>
>>>     # cat /sys/fs/bpf/unix | head -n 2
>>>     Num       RefCount Protocol Flags    Type St Inode Path
>>>     ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
>>>
>>>     # cat /proc/net/unix | head -n 2
>>>     Num       RefCount Protocol Flags    Type St Inode Path
>>>     ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
>>>
>>> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
>>> ---
>>>    .../selftests/bpf/prog_tests/bpf_iter.c       | 17 +++++
>>>    .../selftests/bpf/progs/bpf_iter_unix.c       | 75 +++++++++++++++++++
>>>    2 files changed, 92 insertions(+)
>>>    create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_unix.c
>>>
>>> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
>>> index 1f1aade56504..4746bac68d36 100644
>>> --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
>>> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
>>> @@ -13,6 +13,7 @@
>>>    #include "bpf_iter_tcp6.skel.h"
>>>    #include "bpf_iter_udp4.skel.h"
>>>    #include "bpf_iter_udp6.skel.h"
>>> +#include "bpf_iter_unix.skel.h"
>>>    #include "bpf_iter_test_kern1.skel.h"
>>>    #include "bpf_iter_test_kern2.skel.h"
>>>    #include "bpf_iter_test_kern3.skel.h"
>>> @@ -313,6 +314,20 @@ static void test_udp6(void)
>>>    	bpf_iter_udp6__destroy(skel);
>>>    }
>>>    
>>> +static void test_unix(void)
>>> +{
>>> +	struct bpf_iter_unix *skel;
>>> +
>>> +	skel = bpf_iter_unix__open_and_load();
>>> +	if (CHECK(!skel, "bpf_iter_unix__open_and_load",
>>> +		  "skeleton open_and_load failed\n"))
>>> +		return;
>>> +
>>> +	do_dummy_read(skel->progs.dump_unix);
>>> +
>>> +	bpf_iter_unix__destroy(skel);
>>> +}
>>> +
>>>    /* The expected string is less than 16 bytes */
>>>    static int do_read_with_fd(int iter_fd, const char *expected,
>>>    			   bool read_one_char)
>>> @@ -1255,6 +1270,8 @@ void test_bpf_iter(void)
>>>    		test_udp4();
>>>    	if (test__start_subtest("udp6"))
>>>    		test_udp6();
>>> +	if (test__start_subtest("unix"))
>>> +		test_unix();
>>>    	if (test__start_subtest("anon"))
>>>    		test_anon_iter(false);
>>>    	if (test__start_subtest("anon-read-one-char"))
>>> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
>>> new file mode 100644
>>> index 000000000000..285ec2f7944d
>>> --- /dev/null
>>> +++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
>>> @@ -0,0 +1,75 @@
>>> +// SPDX-License-Identifier: GPL-2.0
>>> +/* Copyright Amazon.com Inc. or its affiliates. */
>>> +#include "bpf_iter.h"
>>
>> Could you add bpf_iter__unix to bpf_iter.h similar to bpf_iter__sockmap?
>> The main purpose is to make test tolerating with old vmlinux.h.
> 
> Thank you for explanation!
> I've understood why it is needed even when the same struct is defined.
> I'll add it in the next spin.
> 
> 
>>
>>> +#include "bpf_tracing_net.h"
>>> +#include <bpf/bpf_helpers.h>
>>> +#include <bpf/bpf_endian.h>
>>> +
>>> +char _license[] SEC("license") = "GPL";
>>> +
>>> +#define __SO_ACCEPTCON		(1 << 16)
>>> +#define UNIX_HASH_SIZE		256
>>> +#define UNIX_ABSTRACT(unix_sk)	(unix_sk->addr->hash < UNIX_HASH_SIZE)
>>
>> Could you add the above three define's in bpf_tracing_net.h?
>> We try to keep all these common defines in a common header for
>> potential reusability.
> 
> Will do.
> 
> 
>>
>>> +
>>> +static long sock_i_ino(const struct sock *sk)
>>> +{
>>> +	const struct socket *sk_socket = sk->sk_socket;
>>> +	const struct inode *inode;
>>> +	unsigned long ino;
>>> +
>>> +	if (!sk_socket)
>>> +		return 0;
>>> +
>>> +	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
>>> +	bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
>>> +	return ino;
>>> +}
>>> +
>>> +SEC("iter/unix")
>>> +int dump_unix(struct bpf_iter__unix *ctx)
>>> +{
>>> +	struct unix_sock *unix_sk = ctx->unix_sk;
>>> +	struct sock *sk = (struct sock *)unix_sk;
>>> +	struct seq_file *seq;
>>> +	__u32 seq_num;
>>> +
>>> +	if (!unix_sk)
>>> +		return 0;
>>> +
>>> +	seq = ctx->meta->seq;
>>> +	seq_num = ctx->meta->seq_num;
>>> +	if (seq_num == 0)
>>> +		BPF_SEQ_PRINTF(seq, "Num       RefCount Protocol Flags    "
>>> +			       "Type St Inode Path\n");
>>> +
>>> +	BPF_SEQ_PRINTF(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
>>> +		       unix_sk,
>>> +		       sk->sk_refcnt.refs.counter,
>>> +		       0,
>>> +		       sk->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
>>> +		       sk->sk_type,
>>> +		       sk->sk_socket ?
>>> +		       (sk->sk_state == TCP_ESTABLISHED ?
>>> +			SS_CONNECTED : SS_UNCONNECTED) :
>>> +		       (sk->sk_state == TCP_ESTABLISHED ?
>>> +			SS_CONNECTING : SS_DISCONNECTING),
>>> +		       sock_i_ino(sk));
>>> +
>>> +	if (unix_sk->addr) {
>>> +		if (UNIX_ABSTRACT(unix_sk))
>>> +			/* Abstract UNIX domain socket can contain '\0' in
>>> +			 * the path, and it should be escaped.  However, it
>>> +			 * requires loops and the BPF verifier rejects it.
>>> +			 * So here, print only the escaped first byte to
>>> +			 * indicate it is an abstract UNIX domain socket.
>>> +			 * (See: unix_seq_show() and commit e7947ea770d0d)
>>> +			 */
>>> +			BPF_SEQ_PRINTF(seq, " @");
>>> +		else
>>> +			BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
>>> +	}
>>
>> I looked at af_unix.c, for the above "if (unix_sk->addr) { ... }" code,
>> the following is the kernel source code,
>>
>>                   if (u->addr) {  // under unix_table_lock here
>>                           int i, len;
>>                           seq_putc(seq, ' ');
>>
>>                           i = 0;
>>                           len = u->addr->len - sizeof(short);
>>                           if (!UNIX_ABSTRACT(s))
>>                                   len--;
>>                           else {
>>                                   seq_putc(seq, '@');
>>                                   i++;
>>                           }
>>                           for ( ; i < len; i++)
>>                                   seq_putc(seq, u->addr->name->sun_path[i] ?:
>>                                            '@');
>>                   }
>>
>> It does not seem to match bpf program non UNIX_ABSTRACT case.
>> I am not familiar with unix socket so it would be good if you can
>> explain a little more.
> 
> There is three kinds of unix sockets: pathname, unnamed, abstract.  The
> first two terminate the addr with `\0`, but abstract must start with `\0`
> and can contain `\0` anywhere in addr.  The `\0` in addr of abstract socket
> does not have special meaning. [1]
> 
> They are inserted into the same hash table in unix_bind(), so the bpf prog
> matches all of them.
> 
> ``` net/unix/af_unix.c
>    1114		if (sun_path[0])
>    1115			err = unix_bind_bsd(sk, addr);
>    1116		else
>    1117			err = unix_bind_abstract(sk, addr);
> ```
> 
> [1]: https://man7.org/linux/man-pages/man7/unix.7.html
> 
> 
>>
>> For verifier issue with loops, do we have a maximum upper bound for
>> u->addr->len? If yes, does bounded loop work?
> 
> That has a maximum length in unix_mkname(): sizeof(struct sockaddr_un).
> 
> ``` net/unix/af_unix.c
>     223	/*
>     224	 *	Check unix socket name:
>     225	 *		- should be not zero length.
>     226	 *	        - if started by not zero, should be NULL terminated (FS object)
>     227	 *		- if started by zero, it is abstract name.
>     228	 */
>     229	
>     230	static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
>     231	{
> ...
>     234		if (len <= sizeof(short) || len > sizeof(*sunaddr))
>     235			return -EINVAL;
> ...
>     253	}
> ```
> 
> So, I rewrote the test like this, but it still causes an error.
> 
> ```
> 	if (unix_sk->addr) {
> 		int i, len;
> 
> 		len = unix_sk->addr->len - sizeof(short);
> 
> 		if (!UNIX_ABSTRACT(unix_sk)) {
> 			BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
> 		} else {
> 			BPF_SEQ_PRINTF(seq, " @");
> 			i++;

i++ is not useful here and "i" is not initialized earlier.

> 
> 			if (len < sizeof(struct sockaddr_un)) {
> 				for (i = 1 ; i < len; i++)
> 					BPF_SEQ_PRINTF(seq, "%c",
> 						       unix_sk->addr->name->sun_path[i] ?:
> 						       '@');
> 			}
> 		}
> 	}
> ```
> 
> ```
> processed 196505 insns (limit 1000000) max_states_per_insn 4 total_states 1830 peak_states 1830 mark_read 3
> ```

I did some debugging, the main reason is that llvm compiler used "!=" 
instead of "<" for "i < len" comparison.

      107:       b4 05 00 00 08 00 00 00 w5 = 8
      108:       85 00 00 00 7e 00 00 00 call 126
;                               for (i = 1 ; i < len; i++)
      109:       07 09 00 00 01 00 00 00 r9 += 1
      110:       5d 98 09 00 00 00 00 00 if r8 != r9 goto +9 <LBB0_18>


Considering "len" is not a constant, for verifier, r8 will never be 
equal to r9 in the above.

Digging into llvm compilation, it is llvm pass Induction Variable 
simplication pass made this code change. I will try to dig more and
find a solution. In your next revision, could you add the above code
as a comment so once llvm code gen is improved, we can have proper
implementation to match /proc/net/unix?

> 
>>
>>> +
>>> +	BPF_SEQ_PRINTF(seq, "\n");
>>> +
>>> +	return 0;
>>> +}

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH bpf-next 2/2] selftest/bpf: Implement sample UNIX domain socket iterator program.
  2021-07-29 23:36 ` [PATCH bpf-next 2/2] selftest/bpf: Implement sample UNIX domain socket iterator program Kuniyuki Iwashima
  2021-07-30  6:54   ` Yonghong Song
@ 2021-07-30 19:34   ` Andrii Nakryiko
  2021-07-30 23:03     ` Kuniyuki Iwashima
  1 sibling, 1 reply; 15+ messages in thread
From: Andrii Nakryiko @ 2021-07-30 19:34 UTC (permalink / raw)
  To: Kuniyuki Iwashima
  Cc: David S. Miller, Jakub Kicinski, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Benjamin Herrenschmidt,
	Kuniyuki Iwashima, bpf, Networking

On Thu, Jul 29, 2021 at 4:37 PM Kuniyuki Iwashima <kuniyu@amazon.co.jp> wrote:
>
> If there are no abstract sockets, this prog can output the same result
> compared to /proc/net/unix.
>
>   # cat /sys/fs/bpf/unix | head -n 2
>   Num       RefCount Protocol Flags    Type St Inode Path
>   ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
>
>   # cat /proc/net/unix | head -n 2
>   Num       RefCount Protocol Flags    Type St Inode Path
>   ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
> ---
>  .../selftests/bpf/prog_tests/bpf_iter.c       | 17 +++++
>  .../selftests/bpf/progs/bpf_iter_unix.c       | 75 +++++++++++++++++++
>  2 files changed, 92 insertions(+)
>  create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_unix.c
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> index 1f1aade56504..4746bac68d36 100644
> --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> @@ -13,6 +13,7 @@
>  #include "bpf_iter_tcp6.skel.h"
>  #include "bpf_iter_udp4.skel.h"
>  #include "bpf_iter_udp6.skel.h"
> +#include "bpf_iter_unix.skel.h"
>  #include "bpf_iter_test_kern1.skel.h"
>  #include "bpf_iter_test_kern2.skel.h"
>  #include "bpf_iter_test_kern3.skel.h"
> @@ -313,6 +314,20 @@ static void test_udp6(void)
>         bpf_iter_udp6__destroy(skel);
>  }
>
> +static void test_unix(void)
> +{
> +       struct bpf_iter_unix *skel;
> +
> +       skel = bpf_iter_unix__open_and_load();
> +       if (CHECK(!skel, "bpf_iter_unix__open_and_load",

please use new ASSERT_PTR_OK() macro instead

> +                 "skeleton open_and_load failed\n"))
> +               return;
> +
> +       do_dummy_read(skel->progs.dump_unix);
> +
> +       bpf_iter_unix__destroy(skel);
> +}
> +
>  /* The expected string is less than 16 bytes */
>  static int do_read_with_fd(int iter_fd, const char *expected,
>                            bool read_one_char)
> @@ -1255,6 +1270,8 @@ void test_bpf_iter(void)
>                 test_udp4();
>         if (test__start_subtest("udp6"))
>                 test_udp6();
> +       if (test__start_subtest("unix"))
> +               test_unix();
>         if (test__start_subtest("anon"))
>                 test_anon_iter(false);
>         if (test__start_subtest("anon-read-one-char"))
> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> new file mode 100644
> index 000000000000..285ec2f7944d
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> @@ -0,0 +1,75 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright Amazon.com Inc. or its affiliates. */
> +#include "bpf_iter.h"
> +#include "bpf_tracing_net.h"
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_endian.h>
> +
> +char _license[] SEC("license") = "GPL";
> +
> +#define __SO_ACCEPTCON         (1 << 16)
> +#define UNIX_HASH_SIZE         256
> +#define UNIX_ABSTRACT(unix_sk) (unix_sk->addr->hash < UNIX_HASH_SIZE)
> +
> +static long sock_i_ino(const struct sock *sk)
> +{
> +       const struct socket *sk_socket = sk->sk_socket;
> +       const struct inode *inode;
> +       unsigned long ino;
> +
> +       if (!sk_socket)
> +               return 0;
> +
> +       inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
> +       bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
> +       return ino;
> +}
> +
> +SEC("iter/unix")
> +int dump_unix(struct bpf_iter__unix *ctx)
> +{
> +       struct unix_sock *unix_sk = ctx->unix_sk;
> +       struct sock *sk = (struct sock *)unix_sk;
> +       struct seq_file *seq;
> +       __u32 seq_num;
> +
> +       if (!unix_sk)
> +               return 0;
> +
> +       seq = ctx->meta->seq;
> +       seq_num = ctx->meta->seq_num;
> +       if (seq_num == 0)
> +               BPF_SEQ_PRINTF(seq, "Num       RefCount Protocol Flags    "
> +                              "Type St Inode Path\n");
> +
> +       BPF_SEQ_PRINTF(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
> +                      unix_sk,
> +                      sk->sk_refcnt.refs.counter,
> +                      0,
> +                      sk->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
> +                      sk->sk_type,
> +                      sk->sk_socket ?
> +                      (sk->sk_state == TCP_ESTABLISHED ?
> +                       SS_CONNECTED : SS_UNCONNECTED) :
> +                      (sk->sk_state == TCP_ESTABLISHED ?
> +                       SS_CONNECTING : SS_DISCONNECTING),

nit: I'd keep these ternary operators on a single line for
readability. Same for header PRINTF above.

> +                      sock_i_ino(sk));
> +
> +       if (unix_sk->addr) {
> +               if (UNIX_ABSTRACT(unix_sk))
> +                       /* Abstract UNIX domain socket can contain '\0' in
> +                        * the path, and it should be escaped.  However, it
> +                        * requires loops and the BPF verifier rejects it.
> +                        * So here, print only the escaped first byte to
> +                        * indicate it is an abstract UNIX domain socket.
> +                        * (See: unix_seq_show() and commit e7947ea770d0d)
> +                        */
> +                       BPF_SEQ_PRINTF(seq, " @");
> +               else
> +                       BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
> +       }
> +
> +       BPF_SEQ_PRINTF(seq, "\n");
> +
> +       return 0;
> +}
> --
> 2.30.2
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH bpf-next 2/2] selftest/bpf: Implement sample UNIX domain
  2021-07-30 16:22       ` Yonghong Song
@ 2021-07-30 22:55         ` Kuniyuki Iwashima
  0 siblings, 0 replies; 15+ messages in thread
From: Kuniyuki Iwashima @ 2021-07-30 22:55 UTC (permalink / raw)
  To: yhs
  Cc: andrii, ast, benh, bpf, daniel, davem, john.fastabend, kafai,
	kpsingh, kuba, kuni1840, kuniyu, netdev, songliubraving

From:   Yonghong Song <yhs@fb.com>
Date:   Fri, 30 Jul 2021 09:22:32 -0700
> On 7/30/21 12:58 AM, Kuniyuki Iwashima wrote:
> > From:   Yonghong Song <yhs@fb.com>
> > Date:   Thu, 29 Jul 2021 23:54:26 -0700
> >> On 7/29/21 4:36 PM, Kuniyuki Iwashima wrote:
> >>> If there are no abstract sockets, this prog can output the same result
> >>> compared to /proc/net/unix.
> >>>
> >>>     # cat /sys/fs/bpf/unix | head -n 2
> >>>     Num       RefCount Protocol Flags    Type St Inode Path
> >>>     ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
> >>>
> >>>     # cat /proc/net/unix | head -n 2
> >>>     Num       RefCount Protocol Flags    Type St Inode Path
> >>>     ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
> >>>
> >>> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
> >>> ---
> >>>    .../selftests/bpf/prog_tests/bpf_iter.c       | 17 +++++
> >>>    .../selftests/bpf/progs/bpf_iter_unix.c       | 75 +++++++++++++++++++
> >>>    2 files changed, 92 insertions(+)
> >>>    create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> >>>
> >>> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> >>> index 1f1aade56504..4746bac68d36 100644
> >>> --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> >>> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> >>> @@ -13,6 +13,7 @@
> >>>    #include "bpf_iter_tcp6.skel.h"
> >>>    #include "bpf_iter_udp4.skel.h"
> >>>    #include "bpf_iter_udp6.skel.h"
> >>> +#include "bpf_iter_unix.skel.h"
> >>>    #include "bpf_iter_test_kern1.skel.h"
> >>>    #include "bpf_iter_test_kern2.skel.h"
> >>>    #include "bpf_iter_test_kern3.skel.h"
> >>> @@ -313,6 +314,20 @@ static void test_udp6(void)
> >>>    	bpf_iter_udp6__destroy(skel);
> >>>    }
> >>>    
> >>> +static void test_unix(void)
> >>> +{
> >>> +	struct bpf_iter_unix *skel;
> >>> +
> >>> +	skel = bpf_iter_unix__open_and_load();
> >>> +	if (CHECK(!skel, "bpf_iter_unix__open_and_load",
> >>> +		  "skeleton open_and_load failed\n"))
> >>> +		return;
> >>> +
> >>> +	do_dummy_read(skel->progs.dump_unix);
> >>> +
> >>> +	bpf_iter_unix__destroy(skel);
> >>> +}
> >>> +
> >>>    /* The expected string is less than 16 bytes */
> >>>    static int do_read_with_fd(int iter_fd, const char *expected,
> >>>    			   bool read_one_char)
> >>> @@ -1255,6 +1270,8 @@ void test_bpf_iter(void)
> >>>    		test_udp4();
> >>>    	if (test__start_subtest("udp6"))
> >>>    		test_udp6();
> >>> +	if (test__start_subtest("unix"))
> >>> +		test_unix();
> >>>    	if (test__start_subtest("anon"))
> >>>    		test_anon_iter(false);
> >>>    	if (test__start_subtest("anon-read-one-char"))
> >>> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> >>> new file mode 100644
> >>> index 000000000000..285ec2f7944d
> >>> --- /dev/null
> >>> +++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> >>> @@ -0,0 +1,75 @@
> >>> +// SPDX-License-Identifier: GPL-2.0
> >>> +/* Copyright Amazon.com Inc. or its affiliates. */
> >>> +#include "bpf_iter.h"
> >>
> >> Could you add bpf_iter__unix to bpf_iter.h similar to bpf_iter__sockmap?
> >> The main purpose is to make test tolerating with old vmlinux.h.
> > 
> > Thank you for explanation!
> > I've understood why it is needed even when the same struct is defined.
> > I'll add it in the next spin.
> > 
> > 
> >>
> >>> +#include "bpf_tracing_net.h"
> >>> +#include <bpf/bpf_helpers.h>
> >>> +#include <bpf/bpf_endian.h>
> >>> +
> >>> +char _license[] SEC("license") = "GPL";
> >>> +
> >>> +#define __SO_ACCEPTCON		(1 << 16)
> >>> +#define UNIX_HASH_SIZE		256
> >>> +#define UNIX_ABSTRACT(unix_sk)	(unix_sk->addr->hash < UNIX_HASH_SIZE)
> >>
> >> Could you add the above three define's in bpf_tracing_net.h?
> >> We try to keep all these common defines in a common header for
> >> potential reusability.
> > 
> > Will do.
> > 
> > 
> >>
> >>> +
> >>> +static long sock_i_ino(const struct sock *sk)
> >>> +{
> >>> +	const struct socket *sk_socket = sk->sk_socket;
> >>> +	const struct inode *inode;
> >>> +	unsigned long ino;
> >>> +
> >>> +	if (!sk_socket)
> >>> +		return 0;
> >>> +
> >>> +	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
> >>> +	bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
> >>> +	return ino;
> >>> +}
> >>> +
> >>> +SEC("iter/unix")
> >>> +int dump_unix(struct bpf_iter__unix *ctx)
> >>> +{
> >>> +	struct unix_sock *unix_sk = ctx->unix_sk;
> >>> +	struct sock *sk = (struct sock *)unix_sk;
> >>> +	struct seq_file *seq;
> >>> +	__u32 seq_num;
> >>> +
> >>> +	if (!unix_sk)
> >>> +		return 0;
> >>> +
> >>> +	seq = ctx->meta->seq;
> >>> +	seq_num = ctx->meta->seq_num;
> >>> +	if (seq_num == 0)
> >>> +		BPF_SEQ_PRINTF(seq, "Num       RefCount Protocol Flags    "
> >>> +			       "Type St Inode Path\n");
> >>> +
> >>> +	BPF_SEQ_PRINTF(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
> >>> +		       unix_sk,
> >>> +		       sk->sk_refcnt.refs.counter,
> >>> +		       0,
> >>> +		       sk->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
> >>> +		       sk->sk_type,
> >>> +		       sk->sk_socket ?
> >>> +		       (sk->sk_state == TCP_ESTABLISHED ?
> >>> +			SS_CONNECTED : SS_UNCONNECTED) :
> >>> +		       (sk->sk_state == TCP_ESTABLISHED ?
> >>> +			SS_CONNECTING : SS_DISCONNECTING),
> >>> +		       sock_i_ino(sk));
> >>> +
> >>> +	if (unix_sk->addr) {
> >>> +		if (UNIX_ABSTRACT(unix_sk))
> >>> +			/* Abstract UNIX domain socket can contain '\0' in
> >>> +			 * the path, and it should be escaped.  However, it
> >>> +			 * requires loops and the BPF verifier rejects it.
> >>> +			 * So here, print only the escaped first byte to
> >>> +			 * indicate it is an abstract UNIX domain socket.
> >>> +			 * (See: unix_seq_show() and commit e7947ea770d0d)
> >>> +			 */
> >>> +			BPF_SEQ_PRINTF(seq, " @");
> >>> +		else
> >>> +			BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
> >>> +	}
> >>
> >> I looked at af_unix.c, for the above "if (unix_sk->addr) { ... }" code,
> >> the following is the kernel source code,
> >>
> >>                   if (u->addr) {  // under unix_table_lock here
> >>                           int i, len;
> >>                           seq_putc(seq, ' ');
> >>
> >>                           i = 0;
> >>                           len = u->addr->len - sizeof(short);
> >>                           if (!UNIX_ABSTRACT(s))
> >>                                   len--;
> >>                           else {
> >>                                   seq_putc(seq, '@');
> >>                                   i++;
> >>                           }
> >>                           for ( ; i < len; i++)
> >>                                   seq_putc(seq, u->addr->name->sun_path[i] ?:
> >>                                            '@');
> >>                   }
> >>
> >> It does not seem to match bpf program non UNIX_ABSTRACT case.
> >> I am not familiar with unix socket so it would be good if you can
> >> explain a little more.
> > 
> > There is three kinds of unix sockets: pathname, unnamed, abstract.  The
> > first two terminate the addr with `\0`, but abstract must start with `\0`
> > and can contain `\0` anywhere in addr.  The `\0` in addr of abstract socket
> > does not have special meaning. [1]
> > 
> > They are inserted into the same hash table in unix_bind(), so the bpf prog
> > matches all of them.
> > 
> > ``` net/unix/af_unix.c
> >    1114		if (sun_path[0])
> >    1115			err = unix_bind_bsd(sk, addr);
> >    1116		else
> >    1117			err = unix_bind_abstract(sk, addr);
> > ```
> > 
> > [1]: https://man7.org/linux/man-pages/man7/unix.7.html
> > 
> > 
> >>
> >> For verifier issue with loops, do we have a maximum upper bound for
> >> u->addr->len? If yes, does bounded loop work?
> > 
> > That has a maximum length in unix_mkname(): sizeof(struct sockaddr_un).
> > 
> > ``` net/unix/af_unix.c
> >     223	/*
> >     224	 *	Check unix socket name:
> >     225	 *		- should be not zero length.
> >     226	 *	        - if started by not zero, should be NULL terminated (FS object)
> >     227	 *		- if started by zero, it is abstract name.
> >     228	 */
> >     229	
> >     230	static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
> >     231	{
> > ...
> >     234		if (len <= sizeof(short) || len > sizeof(*sunaddr))
> >     235			return -EINVAL;
> > ...
> >     253	}
> > ```
> > 
> > So, I rewrote the test like this, but it still causes an error.
> > 
> > ```
> > 	if (unix_sk->addr) {
> > 		int i, len;
> > 
> > 		len = unix_sk->addr->len - sizeof(short);
> > 
> > 		if (!UNIX_ABSTRACT(unix_sk)) {
> > 			BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
> > 		} else {
> > 			BPF_SEQ_PRINTF(seq, " @");
> > 			i++;
> 
> i++ is not useful here and "i" is not initialized earlier.

Good catch.
I'll remove this.


> 
> > 
> > 			if (len < sizeof(struct sockaddr_un)) {
> > 				for (i = 1 ; i < len; i++)
> > 					BPF_SEQ_PRINTF(seq, "%c",
> > 						       unix_sk->addr->name->sun_path[i] ?:
> > 						       '@');
> > 			}
> > 		}
> > 	}
> > ```
> > 
> > ```
> > processed 196505 insns (limit 1000000) max_states_per_insn 4 total_states 1830 peak_states 1830 mark_read 3
> > ```
> 
> I did some debugging, the main reason is that llvm compiler used "!=" 
> instead of "<" for "i < len" comparison.
> 
>       107:       b4 05 00 00 08 00 00 00 w5 = 8
>       108:       85 00 00 00 7e 00 00 00 call 126
> ;                               for (i = 1 ; i < len; i++)
>       109:       07 09 00 00 01 00 00 00 r9 += 1
>       110:       5d 98 09 00 00 00 00 00 if r8 != r9 goto +9 <LBB0_18>
> 
> 
> Considering "len" is not a constant, for verifier, r8 will never be 
> equal to r9 in the above.
> 
> Digging into llvm compilation, it is llvm pass Induction Variable 
> simplication pass made this code change. I will try to dig more and
> find a solution. In your next revision, could you add the above code
> as a comment so once llvm code gen is improved, we can have proper
> implementation to match /proc/net/unix?

Thanks for debugging!
Ok, I'll include the code and add some note and this thread link in the
commit log.


> 
> > 
> >>
> >>> +
> >>> +	BPF_SEQ_PRINTF(seq, "\n");
> >>> +
> >>> +	return 0;
> >>> +}

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH bpf-next 2/2] selftest/bpf: Implement sample UNIX domain socket iterator program.
  2021-07-30 19:34   ` [PATCH bpf-next 2/2] selftest/bpf: Implement sample UNIX domain socket iterator program Andrii Nakryiko
@ 2021-07-30 23:03     ` Kuniyuki Iwashima
  0 siblings, 0 replies; 15+ messages in thread
From: Kuniyuki Iwashima @ 2021-07-30 23:03 UTC (permalink / raw)
  To: andrii.nakryiko
  Cc: andrii, ast, benh, bpf, daniel, davem, john.fastabend, kafai,
	kpsingh, kuba, kuni1840, kuniyu, netdev, songliubraving, yhs

From:   Andrii Nakryiko <andrii.nakryiko@gmail.com>
Date:   Fri, 30 Jul 2021 12:34:43 -0700
> On Thu, Jul 29, 2021 at 4:37 PM Kuniyuki Iwashima <kuniyu@amazon.co.jp> wrote:
> >
> > If there are no abstract sockets, this prog can output the same result
> > compared to /proc/net/unix.
> >
> >   # cat /sys/fs/bpf/unix | head -n 2
> >   Num       RefCount Protocol Flags    Type St Inode Path
> >   ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
> >
> >   # cat /proc/net/unix | head -n 2
> >   Num       RefCount Protocol Flags    Type St Inode Path
> >   ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
> >
> > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
> > ---
> >  .../selftests/bpf/prog_tests/bpf_iter.c       | 17 +++++
> >  .../selftests/bpf/progs/bpf_iter_unix.c       | 75 +++++++++++++++++++
> >  2 files changed, 92 insertions(+)
> >  create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> >
> > diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> > index 1f1aade56504..4746bac68d36 100644
> > --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> > +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> > @@ -13,6 +13,7 @@
> >  #include "bpf_iter_tcp6.skel.h"
> >  #include "bpf_iter_udp4.skel.h"
> >  #include "bpf_iter_udp6.skel.h"
> > +#include "bpf_iter_unix.skel.h"
> >  #include "bpf_iter_test_kern1.skel.h"
> >  #include "bpf_iter_test_kern2.skel.h"
> >  #include "bpf_iter_test_kern3.skel.h"
> > @@ -313,6 +314,20 @@ static void test_udp6(void)
> >         bpf_iter_udp6__destroy(skel);
> >  }
> >
> > +static void test_unix(void)
> > +{
> > +       struct bpf_iter_unix *skel;
> > +
> > +       skel = bpf_iter_unix__open_and_load();
> > +       if (CHECK(!skel, "bpf_iter_unix__open_and_load",
> 
> please use new ASSERT_PTR_OK() macro instead

I'll use ASSERT_PTR_OK().


> 
> > +                 "skeleton open_and_load failed\n"))
> > +               return;
> > +
> > +       do_dummy_read(skel->progs.dump_unix);
> > +
> > +       bpf_iter_unix__destroy(skel);
> > +}
> > +
> >  /* The expected string is less than 16 bytes */
> >  static int do_read_with_fd(int iter_fd, const char *expected,
> >                            bool read_one_char)
> > @@ -1255,6 +1270,8 @@ void test_bpf_iter(void)
> >                 test_udp4();
> >         if (test__start_subtest("udp6"))
> >                 test_udp6();
> > +       if (test__start_subtest("unix"))
> > +               test_unix();
> >         if (test__start_subtest("anon"))
> >                 test_anon_iter(false);
> >         if (test__start_subtest("anon-read-one-char"))
> > diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> > new file mode 100644
> > index 000000000000..285ec2f7944d
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> > @@ -0,0 +1,75 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/* Copyright Amazon.com Inc. or its affiliates. */
> > +#include "bpf_iter.h"
> > +#include "bpf_tracing_net.h"
> > +#include <bpf/bpf_helpers.h>
> > +#include <bpf/bpf_endian.h>
> > +
> > +char _license[] SEC("license") = "GPL";
> > +
> > +#define __SO_ACCEPTCON         (1 << 16)
> > +#define UNIX_HASH_SIZE         256
> > +#define UNIX_ABSTRACT(unix_sk) (unix_sk->addr->hash < UNIX_HASH_SIZE)
> > +
> > +static long sock_i_ino(const struct sock *sk)
> > +{
> > +       const struct socket *sk_socket = sk->sk_socket;
> > +       const struct inode *inode;
> > +       unsigned long ino;
> > +
> > +       if (!sk_socket)
> > +               return 0;
> > +
> > +       inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
> > +       bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
> > +       return ino;
> > +}
> > +
> > +SEC("iter/unix")
> > +int dump_unix(struct bpf_iter__unix *ctx)
> > +{
> > +       struct unix_sock *unix_sk = ctx->unix_sk;
> > +       struct sock *sk = (struct sock *)unix_sk;
> > +       struct seq_file *seq;
> > +       __u32 seq_num;
> > +
> > +       if (!unix_sk)
> > +               return 0;
> > +
> > +       seq = ctx->meta->seq;
> > +       seq_num = ctx->meta->seq_num;
> > +       if (seq_num == 0)
> > +               BPF_SEQ_PRINTF(seq, "Num       RefCount Protocol Flags    "
> > +                              "Type St Inode Path\n");
> > +
> > +       BPF_SEQ_PRINTF(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
> > +                      unix_sk,
> > +                      sk->sk_refcnt.refs.counter,
> > +                      0,
> > +                      sk->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
> > +                      sk->sk_type,
> > +                      sk->sk_socket ?
> > +                      (sk->sk_state == TCP_ESTABLISHED ?
> > +                       SS_CONNECTED : SS_UNCONNECTED) :
> > +                      (sk->sk_state == TCP_ESTABLISHED ?
> > +                       SS_CONNECTING : SS_DISCONNECTING),
> 
> nit: I'd keep these ternary operators on a single line for
> readability. Same for header PRINTF above.

I'll make the two ternary operations of `sk->sk_socket ?` on single lines.

Thanks for your review!


> 
> > +                      sock_i_ino(sk));
> > +
> > +       if (unix_sk->addr) {
> > +               if (UNIX_ABSTRACT(unix_sk))
> > +                       /* Abstract UNIX domain socket can contain '\0' in
> > +                        * the path, and it should be escaped.  However, it
> > +                        * requires loops and the BPF verifier rejects it.
> > +                        * So here, print only the escaped first byte to
> > +                        * indicate it is an abstract UNIX domain socket.
> > +                        * (See: unix_seq_show() and commit e7947ea770d0d)
> > +                        */
> > +                       BPF_SEQ_PRINTF(seq, " @");
> > +               else
> > +                       BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
> > +       }
> > +
> > +       BPF_SEQ_PRINTF(seq, "\n");
> > +
> > +       return 0;
> > +}
> > --
> > 2.30.2

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2021-07-30 23:03 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-29 23:36 [PATCH bpf-next 0/2] BPF iterator for UNIX domain socket Kuniyuki Iwashima
2021-07-29 23:36 ` [PATCH bpf-next 1/2] bpf: af_unix: Implement " Kuniyuki Iwashima
2021-07-30  6:19   ` kernel test robot
2021-07-30  6:19     ` kernel test robot
2021-07-30  6:24   ` Yonghong Song
2021-07-30  6:53     ` Kuniyuki Iwashima
2021-07-30  7:09       ` Yonghong Song
2021-07-30  8:05         ` Kuniyuki Iwashima
2021-07-29 23:36 ` [PATCH bpf-next 2/2] selftest/bpf: Implement sample UNIX domain socket iterator program Kuniyuki Iwashima
2021-07-30  6:54   ` Yonghong Song
2021-07-30  7:58     ` [PATCH bpf-next 2/2] selftest/bpf: Implement sample UNIX domain Kuniyuki Iwashima
2021-07-30 16:22       ` Yonghong Song
2021-07-30 22:55         ` Kuniyuki Iwashima
2021-07-30 19:34   ` [PATCH bpf-next 2/2] selftest/bpf: Implement sample UNIX domain socket iterator program Andrii Nakryiko
2021-07-30 23:03     ` Kuniyuki Iwashima

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.