LKML Archive mirror
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: torvalds@linux-foundation.org, mingo@redhat.com,
	peterz@infradead.org, juri.lelli@redhat.com,
	vincent.guittot@linaro.org, dietmar.eggemann@arm.com,
	rostedt@goodmis.org, bsegall@google.com, mgorman@suse.de,
	bristot@redhat.com, vschneid@redhat.com, ast@kernel.org,
	daniel@iogearbox.net, andrii@kernel.org, martin.lau@kernel.org,
	joshdon@google.com, brho@google.com, pjt@google.com,
	derkling@google.com, haoluo@google.com, dvernet@meta.com,
	dschatzberg@meta.com, dskarlat@cs.cmu.edu, riel@surriel.com,
	changwoo@igalia.com, himadrics@inria.fr, memxor@gmail.com,
	andrea.righi@canonical.com, joel@joelfernandes.org
Cc: linux-kernel@vger.kernel.org, bpf@vger.kernel.org,
	kernel-team@meta.com, Tejun Heo <tj@kernel.org>
Subject: [PATCH 06/39] sched: Factor out cgroup weight conversion functions
Date: Wed,  1 May 2024 05:09:41 -1000	[thread overview]
Message-ID: <20240501151312.635565-7-tj@kernel.org> (raw)
In-Reply-To: <20240501151312.635565-1-tj@kernel.org>

Factor out sched_weight_from/to_cgroup() which convert between scheduler
shares and cgroup weight. No functional change. The factored out functions
will be used by a new BPF extensible sched_class so that the weights can be
exposed to the BPF programs in a way which is consistent cgroup weights and
easier to interpret.

The weight conversions will be used regardless of cgroup usage. It's just
borrowing the cgroup weight range as it's more intuitive.
CGROUP_WEIGHT_MIN/DFL/MAX constants are moved outside CONFIG_CGROUPS so that
the conversion helpers can always be defined.

v2: The helpers are now defined regardless of COFNIG_CGROUPS.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: David Vernet <dvernet@meta.com>
Acked-by: Josh Don <joshdon@google.com>
Acked-by: Hao Luo <haoluo@google.com>
Acked-by: Barret Rhoden <brho@google.com>
---
 include/linux/cgroup.h |  4 ++--
 kernel/sched/core.c    | 28 +++++++++++++---------------
 kernel/sched/sched.h   | 18 ++++++++++++++++++
 3 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 509e2e8a1d35..32679fcff0a7 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -29,8 +29,6 @@
 
 struct kernel_clone_args;
 
-#ifdef CONFIG_CGROUPS
-
 /*
  * All weight knobs on the default hierarchy should use the following min,
  * default and max values.  The default value is the logarithmic center of
@@ -40,6 +38,8 @@ struct kernel_clone_args;
 #define CGROUP_WEIGHT_DFL		100
 #define CGROUP_WEIGHT_MAX		10000
 
+#ifdef CONFIG_CGROUPS
+
 enum {
 	CSS_TASK_ITER_PROCS    = (1U << 0),  /* walk only threadgroup leaders */
 	CSS_TASK_ITER_THREADED = (1U << 1),  /* walk all threaded css_sets in the domain */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 311efc00da63..9b60df944263 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -11313,29 +11313,27 @@ static int cpu_local_stat_show(struct seq_file *sf,
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
+
+static unsigned long tg_weight(struct task_group *tg)
+{
+	return scale_load_down(tg->shares);
+}
+
 static u64 cpu_weight_read_u64(struct cgroup_subsys_state *css,
 			       struct cftype *cft)
 {
-	struct task_group *tg = css_tg(css);
-	u64 weight = scale_load_down(tg->shares);
-
-	return DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, 1024);
+	return sched_weight_to_cgroup(tg_weight(css_tg(css)));
 }
 
 static int cpu_weight_write_u64(struct cgroup_subsys_state *css,
-				struct cftype *cft, u64 weight)
+				struct cftype *cft, u64 cgrp_weight)
 {
-	/*
-	 * cgroup weight knobs should use the common MIN, DFL and MAX
-	 * values which are 1, 100 and 10000 respectively.  While it loses
-	 * a bit of range on both ends, it maps pretty well onto the shares
-	 * value used by scheduler and the round-trip conversions preserve
-	 * the original value over the entire range.
-	 */
-	if (weight < CGROUP_WEIGHT_MIN || weight > CGROUP_WEIGHT_MAX)
+	unsigned long weight;
+
+	if (cgrp_weight < CGROUP_WEIGHT_MIN || cgrp_weight > CGROUP_WEIGHT_MAX)
 		return -ERANGE;
 
-	weight = DIV_ROUND_CLOSEST_ULL(weight * 1024, CGROUP_WEIGHT_DFL);
+	weight = sched_weight_from_cgroup(cgrp_weight);
 
 	return sched_group_set_shares(css_tg(css), scale_load(weight));
 }
@@ -11343,7 +11341,7 @@ static int cpu_weight_write_u64(struct cgroup_subsys_state *css,
 static s64 cpu_weight_nice_read_s64(struct cgroup_subsys_state *css,
 				    struct cftype *cft)
 {
-	unsigned long weight = scale_load_down(css_tg(css)->shares);
+	unsigned long weight = tg_weight(css_tg(css));
 	int last_delta = INT_MAX;
 	int prio, delta;
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 99e292368d11..24b3d120700b 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -221,6 +221,24 @@ static inline void update_avg(u64 *avg, u64 sample)
 #define shr_bound(val, shift)							\
 	(val >> min_t(typeof(shift), shift, BITS_PER_TYPE(typeof(val)) - 1))
 
+/*
+ * cgroup weight knobs should use the common MIN, DFL and MAX values which are
+ * 1, 100 and 10000 respectively. While it loses a bit of range on both ends, it
+ * maps pretty well onto the shares value used by scheduler and the round-trip
+ * conversions preserve the original value over the entire range.
+ */
+static inline unsigned long sched_weight_from_cgroup(unsigned long cgrp_weight)
+{
+	return DIV_ROUND_CLOSEST_ULL(cgrp_weight * 1024, CGROUP_WEIGHT_DFL);
+}
+
+static inline unsigned long sched_weight_to_cgroup(unsigned long weight)
+{
+	return clamp_t(unsigned long,
+		       DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, 1024),
+		       CGROUP_WEIGHT_MIN, CGROUP_WEIGHT_MAX);
+}
+
 /*
  * !! For sched_setattr_nocheck() (kernel) only !!
  *
-- 
2.44.0


  parent reply	other threads:[~2024-05-01 15:13 UTC|newest]

Thread overview: 75+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-05-01 15:09 [PATCHSET v6] sched: Implement BPF extensible scheduler class Tejun Heo
2024-05-01 15:09 ` [PATCH 01/39] cgroup: Implement cgroup_show_cftypes() Tejun Heo
2024-05-01 15:09 ` [PATCH 02/39] sched: Restructure sched_class order sanity checks in sched_init() Tejun Heo
2024-05-01 15:09 ` [PATCH 03/39] sched: Allow sched_cgroup_fork() to fail and introduce sched_cancel_fork() Tejun Heo
2024-05-01 15:09 ` [PATCH 04/39] sched: Add sched_class->reweight_task() Tejun Heo
2024-05-01 15:09 ` [PATCH 05/39] sched: Add sched_class->switching_to() and expose check_class_changing/changed() Tejun Heo
2024-05-01 15:09 ` Tejun Heo [this message]
2024-05-01 15:09 ` [PATCH 07/39] sched: Expose css_tg() and __setscheduler_prio() Tejun Heo
2024-05-01 15:09 ` [PATCH 08/39] sched: Enumerate CPU cgroup file types Tejun Heo
2024-05-01 15:09 ` [PATCH 09/39] sched: Add @reason to sched_class->rq_{on|off}line() Tejun Heo
2024-05-01 15:09 ` [PATCH 10/39] sched: Factor out update_other_load_avgs() from __update_blocked_others() Tejun Heo
2024-05-01 15:09 ` [PATCH 11/39] cpufreq_schedutil: Refactor sugov_cpu_is_busy() Tejun Heo
2024-05-01 15:09 ` [PATCH 12/39] sched: Add normal_policy() Tejun Heo
2024-05-01 15:09 ` [PATCH 13/39] sched_ext: Add boilerplate for extensible scheduler class Tejun Heo
2024-05-01 15:09 ` [PATCH 14/39] sched_ext: Implement BPF " Tejun Heo
2024-05-01 15:09 ` [PATCH 15/39] sched_ext: Add scx_simple and scx_example_qmap example schedulers Tejun Heo
2024-05-01 15:09 ` [PATCH 16/39] sched_ext: Add sysrq-S which disables the BPF scheduler Tejun Heo
2024-05-01 15:09 ` [PATCH 17/39] sched_ext: Implement runnable task stall watchdog Tejun Heo
2024-05-01 15:09 ` [PATCH 18/39] sched_ext: Allow BPF schedulers to disallow specific tasks from joining SCHED_EXT Tejun Heo
2024-05-01 15:09 ` [PATCH 19/39] sched_ext: Print sched_ext info when dumping stack Tejun Heo
2024-05-01 15:09 ` [PATCH 20/39] sched_ext: Print debug dump after an error exit Tejun Heo
2024-05-01 15:09 ` [PATCH 21/39] tools/sched_ext: Add scx_show_state.py Tejun Heo
2024-05-01 15:09 ` [PATCH 22/39] sched_ext: Implement scx_bpf_kick_cpu() and task preemption support Tejun Heo
2024-05-01 15:09 ` [PATCH 23/39] sched_ext: Add a central scheduler which makes all scheduling decisions on one CPU Tejun Heo
2024-05-01 15:09 ` [PATCH 24/39] sched_ext: Make watchdog handle ops.dispatch() looping stall Tejun Heo
2024-05-01 15:10 ` [PATCH 25/39] sched_ext: Add task state tracking operations Tejun Heo
2024-05-01 15:10 ` [PATCH 26/39] sched_ext: Implement tickless support Tejun Heo
2024-05-01 15:10 ` [PATCH 27/39] sched_ext: Track tasks that are subjects of the in-flight SCX operation Tejun Heo
2024-05-01 15:10 ` [PATCH 28/39] sched_ext: Add cgroup support Tejun Heo
2024-05-01 15:10 ` [PATCH 29/39] sched_ext: Add a cgroup scheduler which uses flattened hierarchy Tejun Heo
2024-05-01 15:10 ` [PATCH 30/39] sched_ext: Implement SCX_KICK_WAIT Tejun Heo
2024-05-01 15:10 ` [PATCH 31/39] sched_ext: Implement sched_ext_ops.cpu_acquire/release() Tejun Heo
2024-05-01 15:10 ` [PATCH 32/39] sched_ext: Implement sched_ext_ops.cpu_online/offline() Tejun Heo
2024-05-01 15:10 ` [PATCH 33/39] sched_ext: Bypass BPF scheduler while PM events are in progress Tejun Heo
2024-05-01 15:10 ` [PATCH 34/39] sched_ext: Implement core-sched support Tejun Heo
2024-05-01 15:10 ` [PATCH 35/39] sched_ext: Add vtime-ordered priority queue to dispatch_q's Tejun Heo
2024-05-01 15:10 ` [PATCH 36/39] sched_ext: Implement DSQ iterator Tejun Heo
2024-05-01 15:10 ` [PATCH 37/39] sched_ext: Add cpuperf support Tejun Heo
2024-05-01 15:10 ` [PATCH 38/39] sched_ext: Documentation: scheduler: Document extensible scheduler class Tejun Heo
2024-05-02  2:24   ` Bagas Sanjaya
2024-05-01 15:10 ` [PATCH 39/39] sched_ext: Add selftests Tejun Heo
2024-05-02  8:48 ` [PATCHSET v6] sched: Implement BPF extensible scheduler class Peter Zijlstra
2024-05-02 19:20   ` Tejun Heo
2024-05-03  8:52     ` Peter Zijlstra
2024-05-05 23:31       ` Tejun Heo
2024-05-13  8:03         ` Peter Zijlstra
2024-05-13 18:26           ` Steven Rostedt
2024-05-14  0:07             ` Qais Yousef
2024-05-14 21:34               ` David Vernet
2024-05-27 21:25                 ` Qais Yousef
2024-05-28 23:46                   ` Tejun Heo
2024-05-29 22:09                     ` Qais Yousef
2024-05-17  9:58               ` Peter Zijlstra
2024-05-27 20:29                 ` Qais Yousef
2024-05-14 20:22           ` Chris Mason
2024-05-14 22:06           ` Josh Don
2024-05-15 20:41           ` Tejun Heo
2024-05-21  0:19             ` Tejun Heo
2024-05-30 16:49               ` Tejun Heo
2024-05-06 18:47       ` Rik van Riel
2024-05-07 19:33         ` Tejun Heo
2024-05-07 19:47           ` Rik van Riel
2024-05-09  7:38       ` Changwoo Min
2024-05-10 18:24 ` Peter Jung
2024-05-13 20:36 ` Andrea Righi
2024-06-11 21:34 ` Linus Torvalds
2024-06-13 23:38   ` Tejun Heo
2024-06-19 20:56   ` Thomas Gleixner
2024-06-19 22:10     ` Linus Torvalds
2024-06-19 22:27       ` Thomas Gleixner
2024-06-19 22:55         ` Linus Torvalds
2024-06-20  2:35           ` Thomas Gleixner
2024-06-20  5:07             ` Linus Torvalds
2024-06-20 17:11               ` Linus Torvalds
2024-06-20 17:41                 ` Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240501151312.635565-7-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=andrea.righi@canonical.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=brho@google.com \
    --cc=bristot@redhat.com \
    --cc=bsegall@google.com \
    --cc=changwoo@igalia.com \
    --cc=daniel@iogearbox.net \
    --cc=derkling@google.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=dschatzberg@meta.com \
    --cc=dskarlat@cs.cmu.edu \
    --cc=dvernet@meta.com \
    --cc=haoluo@google.com \
    --cc=himadrics@inria.fr \
    --cc=joel@joelfernandes.org \
    --cc=joshdon@google.com \
    --cc=juri.lelli@redhat.com \
    --cc=kernel-team@meta.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=martin.lau@kernel.org \
    --cc=memxor@gmail.com \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=riel@surriel.com \
    --cc=rostedt@goodmis.org \
    --cc=torvalds@linux-foundation.org \
    --cc=vincent.guittot@linaro.org \
    --cc=vschneid@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).