BPF Archive mirror
 help / color / mirror / Atom feed
From: Mykyta Yatsenko mykyta.yatsenko5@gmail.com
To: bpf@vger.kernel.org, ast@kernel.org, andrii@kernel.org,
	daniel@iogearbox.net, kafai@meta.com, kernel-team@meta.com
Cc: Mykyta Yatsenko <yatsenko@meta.com>
Subject: [PATCH bpf-next] bpftool: introduce btf c dump sorting
Date: Mon,  6 May 2024 14:44:58 +0100	[thread overview]
Message-ID: <20240506134458.727621-1-yatsenko@meta.com> (raw)

From: Mykyta Yatsenko <yatsenko@meta.com>

Provide a way to sort bpftool c dump output, to simplify vmlinux.h
diffing and forcing more natural definitions ordering.

Use `normalized` argument in bpftool CLI after `format c` for example:
```
bpftool btf dump file /sys/kernel/btf/fuse format c normalized
```

Definitions are sorted by their BTF kind ranks, lexicographically and
typedefs are forced to go right after their base type.

Type ranks

Assign ranks to btf kinds (defined in function btf_type_rank) to set
next order:
1. Anonymous enums
2. Anonymous enums64
3. Named enums
4. Named enums64
5. Trivial types typedefs (ints, then floats)
6. Structs
7. Unions
8. Function prototypes
9. Forward declarations

Lexicographical ordering

Definitions within the same BTF kind are ordered by their names.
Anonymous enums are ordered by their first element.

Forcing typedefs to go right after their base type

To make sure that typedefs are emitted right after their base type,
we build a list of type's typedefs (struct typedef_ref) and after
emitting type, its typedefs are emitted as well (lexicographically)

There is a small flaw in this implementation:
Type dependencies are resolved by bpf lib, so when type is dumped
because it is a dependency, its typedefs are not output right after it,
as bpflib does not have the list of typedefs for a given type.

Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
---
 tools/bpf/bpftool/btf.c | 264 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 259 insertions(+), 5 deletions(-)

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 91fcb75babe3..93c876e90b04 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -11,6 +11,7 @@
 #include <linux/btf.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <linux/list.h>
 
 #include <bpf/bpf.h>
 #include <bpf/btf.h>
@@ -43,6 +44,20 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
 	[BTF_KIND_ENUM64]	= "ENUM64",
 };
 
+struct typedef_ref {
+	struct sort_datum *datum;
+	struct list_head list;
+};
+
+struct sort_datum {
+	__u32 index;
+	int type_rank;
+	bool emitted;
+	const char *name;
+	// List of typedefs of this type
+	struct list_head *typedef_list;
+};
+
 static const char *btf_int_enc_str(__u8 encoding)
 {
 	switch (encoding) {
@@ -460,8 +475,233 @@ static void __printf(2, 0) btf_dump_printf(void *ctx,
 	vfprintf(stdout, fmt, args);
 }
 
+static int btf_type_rank(const struct btf *btf, __u32 index, bool has_name)
+{
+	const struct btf_type *btf_type = btf__type_by_id(btf, index);
+	const int max_rank = 1000;
+
+	has_name |= (bool)btf_type->name_off;
+
+	switch (btf_kind(btf_type)) {
+	case BTF_KIND_ENUM:
+		return 100 + (btf_type->name_off == 0 ? 0 : 1);
+	case BTF_KIND_ENUM64:
+		return 200 + (btf_type->name_off == 0 ? 0 : 1);
+	case BTF_KIND_INT:
+		return 300;
+	case BTF_KIND_FLOAT:
+		return 400;
+	case BTF_KIND_VAR:
+		return 500;
+
+	case BTF_KIND_STRUCT:
+		return 600 + (has_name ? 0 : max_rank);
+	case BTF_KIND_UNION:
+		return 700 + (has_name ? 0 : max_rank);
+	case BTF_KIND_FUNC_PROTO:
+		return 800 + (has_name ? 0 : max_rank);
+
+	case BTF_KIND_FWD:
+		return 900;
+
+	case BTF_KIND_ARRAY:
+		return 1 + btf_type_rank(btf, btf_array(btf_type)->type, has_name);
+
+	case BTF_KIND_CONST:
+	case BTF_KIND_PTR:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_RESTRICT:
+	case BTF_KIND_TYPE_TAG:
+	case BTF_KIND_TYPEDEF:
+		return 1 + btf_type_rank(btf, btf_type->type, has_name);
+
+	default:
+		return max_rank;
+	}
+}
+
+static const char *btf_type_sort_name(const struct btf *btf, __u32 index)
+{
+	const struct btf_type *btf_type = btf__type_by_id(btf, index);
+	const int kind = btf_kind(btf_type);
+	const char *name = btf__name_by_offset(btf, btf_type->name_off);
+
+	// Use name of the first element for anonymous enums
+	if (!btf_type->name_off && (kind == BTF_KIND_ENUM || kind == BTF_KIND_ENUM64))
+		name = btf__name_by_offset(btf, btf_enum(btf_type)->name_off);
+
+	return name;
+}
+
+static int btf_type_compare(const void *left, const void *right)
+{
+	const struct sort_datum *datum1 = (const struct sort_datum *)left;
+	const struct sort_datum *datum2 = (const struct sort_datum *)right;
+
+	if (datum1->type_rank != datum2->type_rank)
+		return datum1->type_rank < datum2->type_rank ? -1 : 1;
+
+	return strcmp(datum1->name, datum2->name);
+}
+
+static int emit_typedefs(struct list_head *typedef_list, int *sorted_indexes)
+{
+	struct typedef_ref *type;
+	int current_index = 0;
+
+	if (!typedef_list)
+		return 0;
+	list_for_each_entry(type, typedef_list, list) {
+		if (type->datum->emitted)
+			continue;
+		type->datum->emitted = true;
+		sorted_indexes[current_index++] = type->datum->index;
+		current_index += emit_typedefs(type->datum->typedef_list,
+					sorted_indexes + current_index);
+	}
+	return current_index;
+}
+
+static void free_typedefs(struct list_head *typedef_list)
+{
+	struct typedef_ref *type;
+	struct typedef_ref *temp_type;
+
+	if (!typedef_list)
+		return;
+	list_for_each_entry_safe(type, temp_type, typedef_list, list) {
+		list_del(&type->list);
+		free(type);
+	}
+	free(typedef_list);
+}
+
+static void add_typedef_ref(const struct btf *btf, struct sort_datum *parent,
+			    struct typedef_ref *new_ref)
+{
+	struct typedef_ref *current_child;
+	const char *new_child_name = new_ref->datum->name;
+
+	if (!parent->typedef_list) {
+		parent->typedef_list = malloc(sizeof(struct list_head));
+		INIT_LIST_HEAD(parent->typedef_list);
+		list_add(&new_ref->list, parent->typedef_list);
+		return;
+	}
+	list_for_each_entry(current_child, parent->typedef_list, list) {
+		const struct btf_type *t = btf__type_by_id(btf, current_child->datum->index);
+		const char *current_name = btf_str(btf, t->name_off);
+
+		if (list_is_last(&current_child->list, parent->typedef_list)) {
+			list_add(&new_ref->list, &current_child->list);
+			return;
+		}
+		if (strcmp(new_child_name, current_name) < 0) {
+			list_add_tail(&new_ref->list, &current_child->list);
+			return;
+		}
+	}
+}
+
+static int find_base_typedef_type(const struct btf *btf, int index)
+{
+	const struct btf_type *type = btf__type_by_id(btf, index);
+	int kind = btf_kind(type);
+	int base_idx;
+
+	if (kind != BTF_KIND_TYPEDEF)
+		return 0;
+
+	do {
+		base_idx = kind == BTF_KIND_ARRAY ? btf_array(type)->type : type->type;
+		type = btf__type_by_id(btf, base_idx);
+		kind = btf_kind(type);
+	} while (kind == BTF_KIND_ARRAY ||
+		   kind == BTF_KIND_PTR ||
+		   kind == BTF_KIND_CONST ||
+		   kind == BTF_KIND_VOLATILE ||
+		   kind == BTF_KIND_RESTRICT ||
+		   kind == BTF_KIND_TYPE_TAG);
+
+	return base_idx;
+}
+
+static int *sort_btf_c(const struct btf *btf)
+{
+	int total_root_types;
+	struct sort_datum *datums;
+	int *sorted_indexes = NULL;
+	int *type_index_to_datum_index;
+
+	if (!btf)
+		return sorted_indexes;
+
+	total_root_types = btf__type_cnt(btf);
+	datums = malloc(sizeof(struct sort_datum) * total_root_types);
+
+	for (int i = 1; i < total_root_types; ++i) {
+		struct sort_datum *current_datum = datums + i;
+
+		current_datum->index = i;
+		current_datum->name = btf_type_sort_name(btf, i);
+		current_datum->type_rank = btf_type_rank(btf, i, false);
+		current_datum->emitted = false;
+		current_datum->typedef_list = NULL;
+	}
+
+	qsort(datums + 1, total_root_types - 1, sizeof(struct sort_datum), btf_type_compare);
+
+	// Build a mapping from btf type id to datums array index
+	type_index_to_datum_index = malloc(sizeof(int) * total_root_types);
+	type_index_to_datum_index[0] = 0;
+	for (int i = 1; i < total_root_types; ++i)
+		type_index_to_datum_index[datums[i].index] = i;
+
+	for (int i = 1; i < total_root_types; ++i) {
+		struct sort_datum *current_datum = datums + i;
+		const struct btf_type *current_type = btf__type_by_id(btf, current_datum->index);
+		int base_index;
+		struct sort_datum *base_datum;
+		const struct btf_type *base_type;
+		struct typedef_ref *new_ref;
+
+		if (btf_kind(current_type) != BTF_KIND_TYPEDEF)
+			continue;
+
+		base_index = find_base_typedef_type(btf, current_datum->index);
+		if (!base_index)
+			continue;
+
+		base_datum = datums + type_index_to_datum_index[base_index];
+		base_type = btf__type_by_id(btf, base_datum->index);
+		if (!base_type->name_off)
+			continue;
+
+		new_ref = malloc(sizeof(struct typedef_ref));
+		new_ref->datum = current_datum;
+
+		add_typedef_ref(btf, base_datum, new_ref);
+	}
+
+	sorted_indexes = malloc(sizeof(int) * total_root_types);
+	sorted_indexes[0] = 0;
+	for (int emit_index = 1, datum_index = 1; emit_index < total_root_types; ++datum_index) {
+		struct sort_datum *datum = datums + datum_index;
+
+		if (datum->emitted)
+			continue;
+		datum->emitted = true;
+		sorted_indexes[emit_index++] = datum->index;
+		emit_index += emit_typedefs(datum->typedef_list, sorted_indexes + emit_index);
+		free_typedefs(datum->typedef_list);
+	}
+	free(type_index_to_datum_index);
+	free(datums);
+	return sorted_indexes;
+}
+
 static int dump_btf_c(const struct btf *btf,
-		      __u32 *root_type_ids, int root_type_cnt)
+		      __u32 *root_type_ids, int root_type_cnt, bool normalized)
 {
 	struct btf_dump *d;
 	int err = 0, i;
@@ -485,12 +725,17 @@ static int dump_btf_c(const struct btf *btf,
 		}
 	} else {
 		int cnt = btf__type_cnt(btf);
-
+		int *sorted_indexes = normalized ? sort_btf_c(btf) : NULL;
 		for (i = 1; i < cnt; i++) {
-			err = btf_dump__dump_type(d, i);
+			int idx = sorted_indexes ? sorted_indexes[i] : i;
+
+			err = btf_dump__dump_type(d, idx);
 			if (err)
-				goto done;
+				break;
 		}
+		free(sorted_indexes);
+		if (err)
+			goto done;
 	}
 
 	printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n");
@@ -553,6 +798,7 @@ static int do_dump(int argc, char **argv)
 	__u32 root_type_ids[2];
 	int root_type_cnt = 0;
 	bool dump_c = false;
+	bool normalized = false;
 	__u32 btf_id = -1;
 	const char *src;
 	int fd = -1;
@@ -663,6 +909,14 @@ static int do_dump(int argc, char **argv)
 				goto done;
 			}
 			NEXT_ARG();
+		} else if (strcmp(*argv, "normalized") == 0) {
+			if (!dump_c) {
+				p_err("Only C dump supports normalization");
+				err = -EINVAL;
+				goto done;
+			}
+			normalized = true;
+			NEXT_ARG();
 		} else {
 			p_err("unrecognized option: '%s'", *argv);
 			err = -EINVAL;
@@ -691,7 +945,7 @@ static int do_dump(int argc, char **argv)
 			err = -ENOTSUP;
 			goto done;
 		}
-		err = dump_btf_c(btf, root_type_ids, root_type_cnt);
+		err = dump_btf_c(btf, root_type_ids, root_type_cnt, normalized);
 	} else {
 		err = dump_btf_raw(btf, root_type_ids, root_type_cnt);
 	}
-- 
2.44.0


             reply	other threads:[~2024-05-06 13:45 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-05-06 13:44 Mykyta [this message]
2024-05-07 21:02 ` [PATCH bpf-next] bpftool: introduce btf c dump sorting Andrii Nakryiko
2024-05-07 23:30   ` Quentin Monnet
2024-05-08 16:21     ` Andrii Nakryiko
2024-05-08 23:07   ` Mykyta Yatsenko
2024-05-08 23:21     ` Andrii Nakryiko
2024-05-08 23:35       ` Mykyta Yatsenko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240506134458.727621-1-yatsenko@meta.com \
    --to=bpf@vger.kernel.org \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=kafai@meta.com \
    --cc=kernel-team@meta.com \
    --cc=yatsenko@meta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).