LKML Archive mirror
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: mingo@redhat.com, peterz@infradead.org, juri.lelli@redhat.com,
	vincent.guittot@linaro.org, dietmar.eggemann@arm.com,
	rostedt@goodmis.org, bsegall@google.com, mgorman@suse.de,
	bristot@redhat.com, vschneid@redhat.com,
	linux-kernel@vger.kernel.org
Cc: kprateek.nayak@amd.com, wuyun.abel@bytedance.com,
	tglx@linutronix.de, efault@gmx.de
Subject: [RFC][PATCH 07/10] sched/fair: Re-organize dequeue_task_fair()
Date: Fri, 05 Apr 2024 12:28:01 +0200	[thread overview]
Message-ID: <20240405110010.522077707@infradead.org> (raw)
In-Reply-To: 20240405102754.435410987@infradead.org

Working towards delaying dequeue, notably also inside the hierachy,
rework dequeue_task_fair() such that it can 'resume' an interrupted
hierarchy walk.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/sched/fair.c |   82 ++++++++++++++++++++++++++++++++++------------------
 1 file changed, 55 insertions(+), 27 deletions(-)

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6824,33 +6824,45 @@ enqueue_task_fair(struct rq *rq, struct
 static void set_next_buddy(struct sched_entity *se);
 
 /*
- * The dequeue_task method is called before nr_running is
- * decreased. We remove the task from the rbtree and
- * update the fair scheduling stats:
+ * Basically dequeue_task_fair(), except it can deal with dequeue_entity()
+ * failing half-way through and resume the dequeue later.
+ *
+ * Returns:
+ * -1 - dequeue delayed
+ *  0 - dequeue throttled
+ *  1 - dequeue complete
  */
-static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
 {
-	struct cfs_rq *cfs_rq;
-	struct sched_entity *se = &p->se;
-	int task_sleep = flags & DEQUEUE_SLEEP;
-	int idle_h_nr_running = task_has_idle_policy(p);
 	bool was_sched_idle = sched_idle_rq(rq);
+	bool task_sleep = flags & DEQUEUE_SLEEP;
+	struct task_struct *p = NULL;
+	struct cfs_rq *cfs_rq;
+	int idle_h_nr_running;
 
-	util_est_dequeue(&rq->cfs, p);
+	if (entity_is_task(se)) {
+		p = task_of(se);
+		idle_h_nr_running = task_has_idle_policy(p);
+	} else {
+		idle_h_nr_running = cfs_rq_is_idle(group_cfs_rq(se));
+	}
 
 	for_each_sched_entity(se) {
 		cfs_rq = cfs_rq_of(se);
 		dequeue_entity(cfs_rq, se, flags);
 
-		cfs_rq->h_nr_running--;
-		cfs_rq->idle_h_nr_running -= idle_h_nr_running;
+		/* h_nr_running is the hierachical count of tasks */
+		if (p) {
+			cfs_rq->h_nr_running--;
+			cfs_rq->idle_h_nr_running -= idle_h_nr_running;
 
-		if (cfs_rq_is_idle(cfs_rq))
-			idle_h_nr_running = 1;
+			if (cfs_rq_is_idle(cfs_rq))
+				idle_h_nr_running = 1;
+		}
 
 		/* end evaluation on encountering a throttled cfs_rq */
 		if (cfs_rq_throttled(cfs_rq))
-			goto dequeue_throttle;
+			return 0;
 
 		/* Don't dequeue parent if it has other entities besides us */
 		if (cfs_rq->load.weight) {
@@ -6870,33 +6882,49 @@ static bool dequeue_task_fair(struct rq
 	for_each_sched_entity(se) {
 		cfs_rq = cfs_rq_of(se);
 
+		// XXX avoid these load updates for delayed dequeues ?
 		update_load_avg(cfs_rq, se, UPDATE_TG);
 		se_update_runnable(se);
 		update_cfs_group(se);
 
-		cfs_rq->h_nr_running--;
-		cfs_rq->idle_h_nr_running -= idle_h_nr_running;
+		if (p) {
+			cfs_rq->h_nr_running--;
+			cfs_rq->idle_h_nr_running -= idle_h_nr_running;
 
-		if (cfs_rq_is_idle(cfs_rq))
-			idle_h_nr_running = 1;
+			if (cfs_rq_is_idle(cfs_rq))
+				idle_h_nr_running = 1;
+		}
 
 		/* end evaluation on encountering a throttled cfs_rq */
 		if (cfs_rq_throttled(cfs_rq))
-			goto dequeue_throttle;
+			return 0;
+	}
+
+	if (p) {
+		sub_nr_running(rq, 1);
 
+		/* balance early to pull high priority tasks */
+		if (unlikely(!was_sched_idle && sched_idle_rq(rq)))
+			rq->next_balance = jiffies;
 	}
 
-	/* At this point se is NULL and we are at root level*/
-	sub_nr_running(rq, 1);
+	return 1;
+}
 
-	/* balance early to pull high priority tasks */
-	if (unlikely(!was_sched_idle && sched_idle_rq(rq)))
-		rq->next_balance = jiffies;
+/*
+ * The dequeue_task method is called before nr_running is
+ * decreased. We remove the task from the rbtree and
+ * update the fair scheduling stats:
+ */
+static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+{
+	util_est_dequeue(&rq->cfs, p);
 
-dequeue_throttle:
-	util_est_update(&rq->cfs, p, task_sleep);
-	hrtick_update(rq);
+	if (dequeue_entities(rq, &p->se, flags) < 0)
+		return false;
 
+	util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP);
+	hrtick_update(rq);
 	return true;
 }
 



  parent reply	other threads:[~2024-04-05 11:04 UTC|newest]

Thread overview: 76+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-05 10:27 [RFC][PATCH 00/10] sched/fair: Complete EEVDF Peter Zijlstra
2024-04-05 10:27 ` [RFC][PATCH 01/10] sched/eevdf: Add feature comments Peter Zijlstra
2024-04-05 10:27 ` [RFC][PATCH 02/10] sched/eevdf: Remove min_vruntime_copy Peter Zijlstra
2024-04-05 10:27 ` [RFC][PATCH 03/10] sched/fair: Cleanup pick_task_fair() vs throttle Peter Zijlstra
2024-04-05 21:11   ` Benjamin Segall
2024-04-05 10:27 ` [RFC][PATCH 04/10] sched/fair: Cleanup pick_task_fair()s curr Peter Zijlstra
2024-04-05 10:27 ` [RFC][PATCH 05/10] sched/fair: Unify pick_{,next_}_task_fair() Peter Zijlstra
2024-04-06  2:20   ` Mike Galbraith
2024-04-05 10:28 ` [RFC][PATCH 06/10] sched: Allow sched_class::dequeue_task() to fail Peter Zijlstra
2024-04-05 10:28 ` Peter Zijlstra [this message]
2024-04-05 10:28 ` [RFC][PATCH 08/10] sched/fair: Implement delayed dequeue Peter Zijlstra
2024-04-06  9:23   ` Chen Yu
2024-04-08  9:06     ` Peter Zijlstra
2024-04-11  1:32       ` Yan-Jie Wang
2024-04-25 10:25         ` Peter Zijlstra
2024-04-12 10:42   ` K Prateek Nayak
2024-04-15 10:56     ` Mike Galbraith
2024-04-16  3:18       ` K Prateek Nayak
2024-04-16  5:36         ` Mike Galbraith
2024-04-18 16:24           ` Mike Galbraith
2024-04-18 17:08             ` K Prateek Nayak
2024-04-24 15:20             ` Peter Zijlstra
2024-04-25 11:28             ` Peter Zijlstra
2024-04-26 10:56               ` Peter Zijlstra
2024-04-26 11:16                 ` Peter Zijlstra
2024-04-26 16:03                   ` Mike Galbraith
2024-04-27  6:42                     ` Mike Galbraith
2024-04-28 16:32                       ` Mike Galbraith
2024-04-29 12:14                         ` Peter Zijlstra
2024-04-15 17:07   ` Luis Machado
2024-04-24 15:15     ` Luis Machado
2024-04-25 10:42       ` Peter Zijlstra
2024-04-25 11:49         ` Peter Zijlstra
2024-04-26  9:32           ` Peter Zijlstra
2024-04-26  9:36             ` Peter Zijlstra
2024-04-26 10:16             ` Luis Machado
2024-04-29 14:33             ` Luis Machado
2024-05-02 10:26               ` Luis Machado
2024-05-10 14:49                 ` Luis Machado
2024-05-15  9:36                   ` Peter Zijlstra
2024-05-15 11:48                     ` Peter Zijlstra
2024-05-15 18:03                       ` Mike Galbraith
2024-05-20 15:20                       ` Luis Machado
2024-05-29 22:50                 ` Peter Zijlstra
2024-06-03 19:30                   ` Luis Machado
2024-06-04 10:11                     ` Peter Zijlstra
2024-06-04 13:59                       ` Hongyan Xia
2024-06-04 14:23                       ` Luis Machado
2024-06-04 14:49                         ` Hongyan Xia
2024-06-04 19:12                         ` Peter Zijlstra
2024-06-05  7:22                           ` Peter Zijlstra
2024-06-05  9:14                             ` Luis Machado
2024-06-05  9:42                               ` Peter Zijlstra
2024-06-12 15:08                                 ` Luis Machado
2024-05-23  8:45               ` Peter Zijlstra
2024-05-23  9:06                 ` Luis Machado
2024-05-23  9:33                   ` Peter Zijlstra
2024-06-03 15:57                     ` Hongyan Xia
2024-04-26 10:15         ` Luis Machado
2024-04-20  5:57   ` Mike Galbraith
2024-04-22 13:13   ` Tobias Huschle
2024-04-05 10:28 ` [RFC][PATCH 09/10] sched/eevdf: Allow shorter slices to wakeup-preempt Peter Zijlstra
2024-04-05 10:28 ` [RFC][PATCH 10/10] sched/eevdf: Use sched_attr::sched_runtime to set request/slice suggestion Peter Zijlstra
2024-04-06  8:16   ` Hillf Danton
2024-05-07  5:34   ` Mike Galbraith
2024-05-15 10:13     ` Peter Zijlstra
2024-05-07 15:15   ` Chen Yu
2024-05-08 13:52     ` Mike Galbraith
2024-05-09  3:48       ` Chen Yu
2024-05-09  5:00         ` Mike Galbraith
2024-05-13  4:07     ` K Prateek Nayak
2024-05-14  9:18       ` Chen Yu
2024-05-14 15:23         ` K Prateek Nayak
2024-05-14 16:15           ` Chen Yu
2024-05-22 14:48           ` Chen Yu
2024-05-27 10:11 ` [RFC][PATCH 00/10] sched/fair: Complete EEVDF K Prateek Nayak

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240405110010.522077707@infradead.org \
    --to=peterz@infradead.org \
    --cc=bristot@redhat.com \
    --cc=bsegall@google.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=efault@gmx.de \
    --cc=juri.lelli@redhat.com \
    --cc=kprateek.nayak@amd.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    --cc=vincent.guittot@linaro.org \
    --cc=vschneid@redhat.com \
    --cc=wuyun.abel@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).