From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <linux-kernel-owner@vger.kernel.org>
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1752711AbbFKPcW (ORCPT <rfc822;w@1wt.eu>);
	Thu, 11 Jun 2015 11:32:22 -0400
Received: from relay.parallels.com ([195.214.232.42]:44830 "EHLO
	relay.parallels.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
	with ESMTP id S1750698AbbFKPcS (ORCPT
	<rfc822;linux-kernel@vger.kernel.org>);
	Thu, 11 Jun 2015 11:32:18 -0400
Message-ID: <1434036728.1649.9.camel@odin.com>
Subject: Re: [PATCH 01/18] sched: Replace post_schedule with a balance
 callback list
From: Kirill Tkhai <ktkhai@odin.com>
To: Peter Zijlstra <peterz@infradead.org>
CC: <umgwanakikbuti@gmail.com>, <mingo@elte.hu>, <ktkhai@parallels.com>,
        <rostedt@goodmis.org>, <tglx@linutronix.de>, <juri.lelli@gmail.com>,
        <pang.xunlei@linaro.org>, <oleg@redhat.com>,
        <wanpeng.li@linux.intel.com>, <linux-kernel@vger.kernel.org>
Date: Thu, 11 Jun 2015 18:32:08 +0300
In-Reply-To: <20150611124742.424032725@infradead.org>
References: <20150611124636.448700267@infradead.org>
	 <20150611124742.424032725@infradead.org>
Content-Type: text/plain; charset="UTF-8"
X-Mailer: Evolution 3.12.11-1 
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Originating-IP: [10.30.16.109]
Sender: linux-kernel-owner@vger.kernel.org
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org

A just came thought

В Чт, 11/06/2015 в 14:46 +0200, Peter Zijlstra пишет:
> Generalize the post_schedule() stuff into a balance callback list.
> This allows us to more easily use it outside of schedule() and cross
> sched_class.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  kernel/sched/core.c     |   36 ++++++++++++++++++++++++------------
>  kernel/sched/deadline.c |   21 +++++++++++----------
>  kernel/sched/rt.c       |   25 +++++++++++--------------
>  kernel/sched/sched.h    |   19 +++++++++++++++++--
>  4 files changed, 63 insertions(+), 38 deletions(-)
> 
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -2277,23 +2277,35 @@ static struct rq *finish_task_switch(str
>  #ifdef CONFIG_SMP
>  
>  /* rq->lock is NOT held, but preemption is disabled */
> -static inline void post_schedule(struct rq *rq)
> +static void __balance_callback(struct rq *rq)
>  {
> -       if (rq->post_schedule) {
> -               unsigned long flags;
> +       struct callback_head *head, *next;
> +       void (*func)(struct rq *rq);
> +       unsigned long flags;
>  
> -               raw_spin_lock_irqsave(&rq->lock, flags);
> -               if (rq->curr->sched_class->post_schedule)
> -                       rq->curr->sched_class->post_schedule(rq);
> -               raw_spin_unlock_irqrestore(&rq->lock, flags);
> +       raw_spin_lock_irqsave(&rq->lock, flags);
> +       head = rq->balance_callback;
> +       rq->balance_callback = NULL;
> +       while (head) {
> +               func = (void (*)(struct rq *))head->func;
> +               next = head->next;
> +               head->next = NULL;
> +               head = next;
>  
> -               rq->post_schedule = 0;
> +               func(rq);
>         }
> +       raw_spin_unlock_irqrestore(&rq->lock, flags);
> +}
> +
> +static inline void balance_callback(struct rq *rq)
> +{
> +       if (unlikely(rq->balance_callback))
> +               __balance_callback(rq);
>  }
>  
>  #else
>  
> -static inline void post_schedule(struct rq *rq)
> +static inline void balance_callback(struct rq *rq)
>  {
>  }
>  
> @@ -2311,7 +2323,7 @@ asmlinkage __visible void schedule_tail(
>         /* finish_task_switch() drops rq->lock and enables preemtion */
>         preempt_disable();
>         rq = finish_task_switch(prev);
> -       post_schedule(rq);
> +       balance_callback(rq);
>         preempt_enable();
>  
>         if (current->set_child_tid)
> @@ -2822,7 +2834,7 @@ static void __sched __schedule(void)
>         } else
>                 raw_spin_unlock_irq(&rq->lock);
>  
> -       post_schedule(rq);
> +       balance_callback(rq);
>  }
>  
>  static inline void sched_submit_work(struct task_struct *tsk)
> @@ -7216,7 +7228,7 @@ void __init sched_init(void)
>                 rq->sd = NULL;
>                 rq->rd = NULL;
>                 rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE;
> -               rq->post_schedule = 0;
> +               rq->balance_callback = NULL;
>                 rq->active_balance = 0;
>                 rq->next_balance = jiffies;
>                 rq->push_cpu = 0;
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -213,9 +213,16 @@ static inline bool need_pull_dl_task(str
>         return dl_task(prev);
>  }
>  
> -static inline void set_post_schedule(struct rq *rq)
> +static DEFINE_PER_CPU(struct callback_head, dl_balance_head);
> +
> +static void push_dl_tasks(struct rq *);
> +
> +static inline void queue_push_tasks(struct rq *rq)
>  {
> -       rq->post_schedule = has_pushable_dl_tasks(rq);
> +       if (!has_pushable_dl_tasks(rq))
> +               return;
> +
> +       queue_balance_callback(rq, &per_cpu(dl_balance_head, rq->cpu), push_dl_tasks);
>  }
>  
>  static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq);
> @@ -296,7 +303,7 @@ static inline int pull_dl_task(struct rq
>         return 0;
>  }
>  
> -static inline void set_post_schedule(struct rq *rq)
> +static inline void queue_push_tasks(struct rq *rq)
>  {
>  }
>  #endif /* CONFIG_SMP */
> @@ -1126,7 +1133,7 @@ struct task_struct *pick_next_task_dl(st
>         if (hrtick_enabled(rq))
>                 start_hrtick_dl(rq, p);
>  
> -       set_post_schedule(rq);
> +       queue_push_tasks(rq);
>  
>         return p;
>  }
> @@ -1544,11 +1551,6 @@ static int pull_dl_task(struct rq *this_
>         return ret;
>  }
>  
> -static void post_schedule_dl(struct rq *rq)
> -{
> -       push_dl_tasks(rq);
> -}
> -
>  /*
>   * Since the task is not running and a reschedule is not going to happen
>   * anytime soon on its runqueue, we try pushing it away now.
> @@ -1784,7 +1786,6 @@ const struct sched_class dl_sched_class
>         .set_cpus_allowed       = set_cpus_allowed_dl,
>         .rq_online              = rq_online_dl,
>         .rq_offline             = rq_offline_dl,
> -       .post_schedule          = post_schedule_dl,
>         .task_woken             = task_woken_dl,
>  #endif
>  
> --- a/kernel/sched/rt.c
> +++ b/kernel/sched/rt.c
> @@ -354,13 +354,16 @@ static inline int has_pushable_tasks(str
>         return !plist_head_empty(&rq->rt.pushable_tasks);
>  }
>  
> -static inline void set_post_schedule(struct rq *rq)
> +static DEFINE_PER_CPU(struct callback_head, rt_balance_head);
> +
> +static void push_rt_tasks(struct rq *);
> +
> +static inline void queue_push_tasks(struct rq *rq)
>  {
> -       /*
> -        * We detect this state here so that we can avoid taking the RQ
> -        * lock again later if there is no need to push
> -        */
> -       rq->post_schedule = has_pushable_tasks(rq);
> +       if (!has_pushable_tasks(rq))
> +               return;
> +
> +       queue_balance_callback(rq, &per_cpu(rt_balance_head, rq->cpu), push_rt_tasks);
>  }
>  
>  static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
> @@ -417,7 +420,7 @@ static inline int pull_rt_task(struct rq
>         return 0;
>  }
>  
> -static inline void set_post_schedule(struct rq *rq)
> +static inline void queue_push_tasks(struct rq *rq)
>  {
>  }
>  #endif /* CONFIG_SMP */
> @@ -1497,7 +1500,7 @@ pick_next_task_rt(struct rq *rq, struct
>         /* The running task is never eligible for pushing */
>         dequeue_pushable_task(rq, p);
>  
> -       set_post_schedule(rq);
> +       queue_push_tasks(rq);
>  
>         return p;
>  }
> @@ -2042,11 +2045,6 @@ static int pull_rt_task(struct rq *this_
>         return ret;
>  }
>  
> -static void post_schedule_rt(struct rq *rq)
> -{
> -       push_rt_tasks(rq);
> -}
> -
>  /*
>   * If we are not running and we are not going to reschedule soon, we should
>   * try to push tasks away now
> @@ -2318,7 +2316,6 @@ const struct sched_class rt_sched_class
>         .set_cpus_allowed       = set_cpus_allowed_rt,
>         .rq_online              = rq_online_rt,
>         .rq_offline             = rq_offline_rt,
> -       .post_schedule          = post_schedule_rt,
>         .task_woken             = task_woken_rt,
>         .switched_from          = switched_from_rt,
>  #endif
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -624,9 +624,10 @@ struct rq {
>         unsigned long cpu_capacity;
>         unsigned long cpu_capacity_orig;
>  
> +       struct callback_head *balance_callback;
> +
>         unsigned char idle_balance;
>         /* For active balancing */
> -       int post_schedule;
>         int active_balance;
>         int push_cpu;
>         struct cpu_stop_work active_balance_work;
> @@ -767,6 +768,21 @@ extern int migrate_swap(struct task_stru
>  
>  #ifdef CONFIG_SMP
>  
> +static inline void
> +queue_balance_callback(struct rq *rq,
> +                      struct callback_head *head,
> +                      void (*func)(struct rq *rq))
> +{
> +       lockdep_assert_held(&rq->lock);
> +
> +       if (unlikely(head->next))
> +               return;
> +
> +       head->func = (void (*)(struct callback_head *))func;
> +       head->next = rq->balance_callback;
> +       rq->balance_callback = head;
> +}

Maybe, we should queue a higher priority callback at the head?
And set aside a callback, which class's priority lower rq->curr's.

> +
>  extern void sched_ttwu_pending(void);
>  
>  #define rcu_dereference_check_sched_domain(p) \
> @@ -1192,7 +1208,6 @@ struct sched_class {
>         int  (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
>         void (*migrate_task_rq)(struct task_struct *p, int next_cpu);
>  
> -       void (*post_schedule) (struct rq *this_rq);
>         void (*task_waking) (struct task_struct *task);
>         void (*task_woken) (struct rq *this_rq, struct task_struct *task);
>  
> 
>