Re: [PATCH 1/2] sched/fair: consider hk_mask early in triggering ilb

From: Mukesh Kumar Chaurasiya

Date: Thu Mar 19 2026 - 04:16:06 EST


On Thu, Mar 19, 2026 at 12:23:13PM +0530, Shrikanth Hegde wrote:
> Current code around nohz_balancer_kick and kick_ilb:
> 1. Checks for nohz.idle_cpus_mask to see if idle load balance(ilb) is
> needed.
> 2. Does a few checks to see if any conditions meet the criteria.
> 3. Tries to find the idle CPU. But the idle CPU found should be part of
> housekeeping CPUs.
>
> If there is no housekeeping idle CPU, then step 2 is done
> un-necessarily, since 3 bails out without doing the ilb.
>
> Fix that by making the decision early and pass it on to find_new_ilb.
> Use a percpu cpumask instead of allocating it everytime since this is in
> fastpath.
>
> If flags is set to NOHZ_STATS_KICK since the time is after nohz.next_blocked
> but before nohz.next_balance and there are idle CPUs which are part of
> housekeeping, need to copy the same logic there too.
>
> While there, fix the stale comments around nohz.nr_cpus
>
> Signed-off-by: Shrikanth Hegde <sshegde@xxxxxxxxxxxxx>
> ---
>
> Didn't add the fixes tag since it addresses more than stale comments.
>
> kernel/sched/fair.c | 45 +++++++++++++++++++++++++++++++--------------
> 1 file changed, 31 insertions(+), 14 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index b19aeaa51ebc..02cca2c7a98d 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -7392,6 +7392,7 @@ static inline unsigned int cfs_h_nr_delayed(struct rq *rq)
> static DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
> static DEFINE_PER_CPU(cpumask_var_t, select_rq_mask);
> static DEFINE_PER_CPU(cpumask_var_t, should_we_balance_tmpmask);
> +static DEFINE_PER_CPU(cpumask_var_t, kick_ilb_tmpmask);
>
> #ifdef CONFIG_NO_HZ_COMMON
>
> @@ -12629,15 +12630,14 @@ static inline int on_null_domain(struct rq *rq)
> * - When one of the busy CPUs notices that there may be an idle rebalancing
> * needed, they will kick the idle load balancer, which then does idle
> * load balancing for all the idle CPUs.
> + *
> + * @cpus idle CPUs in HK_TYPE_KERNEL_NOISE housekeeping
> */
> -static inline int find_new_ilb(void)
> +static inline int find_new_ilb(struct cpumask *cpus)
> {
> - const struct cpumask *hk_mask;
> int ilb_cpu;
>
> - hk_mask = housekeeping_cpumask(HK_TYPE_KERNEL_NOISE);
> -
> - for_each_cpu_and(ilb_cpu, nohz.idle_cpus_mask, hk_mask) {
> + for_each_cpu(ilb_cpu, cpus) {
>
> if (ilb_cpu == smp_processor_id())
> continue;
> @@ -12656,7 +12656,7 @@ static inline int find_new_ilb(void)
> * We pick the first idle CPU in the HK_TYPE_KERNEL_NOISE housekeeping set
> * (if there is one).
> */
> -static void kick_ilb(unsigned int flags)
> +static void kick_ilb(unsigned int flags, struct cpumask *cpus)
> {
> int ilb_cpu;
>
> @@ -12667,7 +12667,7 @@ static void kick_ilb(unsigned int flags)
> if (flags & NOHZ_BALANCE_KICK)
> nohz.next_balance = jiffies+1;
>
> - ilb_cpu = find_new_ilb();
> + ilb_cpu = find_new_ilb(cpus);
> if (ilb_cpu < 0)
> return;
>
> @@ -12700,6 +12700,7 @@ static void kick_ilb(unsigned int flags)
> */
> static void nohz_balancer_kick(struct rq *rq)
> {
> + struct cpumask *ilb_cpus = this_cpu_cpumask_var_ptr(kick_ilb_tmpmask);
> unsigned long now = jiffies;
> struct sched_domain_shared *sds;
> struct sched_domain *sd;
> @@ -12715,27 +12716,41 @@ static void nohz_balancer_kick(struct rq *rq)
> */
> nohz_balance_exit_idle(rq);
>
> + /* ILB considers only HK_TYPE_KERNEL_NOISE housekeeping CPUs */
> +
> if (READ_ONCE(nohz.has_blocked_load) &&
> - time_after(now, READ_ONCE(nohz.next_blocked)))
> + time_after(now, READ_ONCE(nohz.next_blocked))) {
> flags = NOHZ_STATS_KICK;
> + cpumask_and(ilb_cpus, nohz.idle_cpus_mask,
> + housekeeping_cpumask(HK_TYPE_KERNEL_NOISE));
> + }
>
> /*
> - * Most of the time system is not 100% busy. i.e nohz.nr_cpus > 0
> - * Skip the read if time is not due.
> + * Most of the time system is not 100% busy. i.e there are idle
> + * housekeeping CPUs.
> + *
> + * So, Skip the reading idle_cpus_mask if time is not due.
> *
> * If none are in tickless mode, there maybe a narrow window
> * (28 jiffies, HZ=1000) where flags maybe set and kick_ilb called.
> * But idle load balancing is not done as find_new_ilb fails.
> - * That's very rare. So read nohz.nr_cpus only if time is due.
> + * That's very rare. So check (idle_cpus_mask & HK_TYPE_KERNEL_NOISE)
> + * only if time is due.
> + *
> */
> if (time_before(now, nohz.next_balance))
> goto out;
>
> + /* Avoid the double computation */
> + if (flags != NOHZ_STATS_KICK)
> + cpumask_and(ilb_cpus, nohz.idle_cpus_mask,
> + housekeeping_cpumask(HK_TYPE_KERNEL_NOISE));
> +
There is no usage of ilb_cpus till this point. We can avoid this if
condition and get the ilb_cpus here itself instead of earlier.
> /*
> * None are in tickless mode and hence no need for NOHZ idle load
> * balancing
> */
> - if (unlikely(cpumask_empty(nohz.idle_cpus_mask)))
> + if (unlikely(cpumask_empty(ilb_cpus)))
> return;
>
> if (rq->nr_running >= 2) {
> @@ -12767,7 +12782,7 @@ static void nohz_balancer_kick(struct rq *rq)
> * When balancing between cores, all the SMT siblings of the
> * preferred CPU must be idle.
> */
> - for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
> + for_each_cpu_and(i, sched_domain_span(sd), ilb_cpus) {
> if (sched_asym(sd, i, cpu)) {
> flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
> goto unlock;
> @@ -12820,7 +12835,7 @@ static void nohz_balancer_kick(struct rq *rq)
> flags |= NOHZ_NEXT_KICK;
>
> if (flags)
> - kick_ilb(flags);
> + kick_ilb(flags, ilb_cpus);
> }
>
> static void set_cpu_sd_state_busy(int cpu)
> @@ -14253,6 +14268,8 @@ __init void init_sched_fair_class(void)
> zalloc_cpumask_var_node(&per_cpu(select_rq_mask, i), GFP_KERNEL, cpu_to_node(i));
> zalloc_cpumask_var_node(&per_cpu(should_we_balance_tmpmask, i),
> GFP_KERNEL, cpu_to_node(i));
> + zalloc_cpumask_var_node(&per_cpu(kick_ilb_tmpmask, i),
> + GFP_KERNEL, cpu_to_node(i));
>
> #ifdef CONFIG_CFS_BANDWIDTH
> INIT_CSD(&cpu_rq(i)->cfsb_csd, __cfsb_csd_unthrottle, cpu_rq(i));
> --
> 2.43.0
>
Rest LGTM

Regards,
Mukesh