Re: [PATCH 1/5] sched/fair: Drop redundant RCU read lock in NOHZ kick path

From: Marek Szyprowski

Date: Thu May 21 2026 - 15:47:16 EST


On 09.05.2026 20:07, Andrea Righi wrote:
> nohz_balancer_kick() is reached from sched_balance_trigger(), which is
> called from sched_tick(). sched_tick() runs with IRQs disabled, so the
> additional rcu_read_lock/unlock() used around sched_domain accesses in
> this path is redundant. Rely on the existing IRQ-disabled context (and
> the rcu_dereference_all() checking) instead.
>
> The same applies to set_cpu_sd_state_idle(), called from the idle entry
> path with IRQs disabled, and to set_cpu_sd_state_busy(), reachable via
> nohz_balance_exit_idle() from two contexts: nohz_balancer_kick() (IRQs
> disabled, as above) and sched_cpu_deactivate() (the CPUHP_AP_ACTIVE
> teardown, which runs under cpus_write_lock(), so it cannot race with
> sched-domain rebuilds). In both cases the rcu_dereference_all()
> validation is sufficient.
>
> No functional change intended.
>
> Cc: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
> Cc: Dietmar Eggemann <dietmar.eggemann@xxxxxxx>
> Suggested-by: K Prateek Nayak <kprateek.nayak@xxxxxxx>
> Reviewed-by: K Prateek Nayak <kprateek.nayak@xxxxxxx>
> Signed-off-by: Andrea Righi <arighi@xxxxxxxxxx>
This patch landed in today's linux-next as commit c9d93a73ce87 ("sched/fair: Drop
redundant RCU read lock in NOHZ kick path"). In my tests I found that it introduced
the following warning during the CPU hot-plug tests:


root@target:~# for i in /sys/devices/system/cpu/cpu[1-9]; do echo 0 >$i/online; done

=============================
WARNING: suspicious RCU usage
7.1.0-rc2+ #12775 Not tainted
-----------------------------
kernel/sched/fair.c:12793 suspicious rcu_dereference_check() usage!

other info that might help us debug this:


rcu_scheduler_active = 2, debug_locks = 1
2 locks held by cpuhp/1/20:
 #0: ffffffff81a16220 (cpu_hotplug_lock){++++}-{0:0}, at: cpuhp_thread_fun+0x42/0x1ae
 #1: ffffffff81a16270 (cpuhp_state-down){+.+.}-{0:0}, at: cpuhp_thread_fun+0x72/0x1ae

stack backtrace:
CPU: 1 UID: 0 PID: 20 Comm: cpuhp/1 Not tainted 7.1.0-rc2+ #12775 PREEMPTLAZY
Hardware name: StarFive VisionFive 2 v1.2A (DT)
Call Trace:
[<ffffffff8001827c>] dump_backtrace+0x1c/0x24
[<ffffffff800014c0>] show_stack+0x28/0x34
[<ffffffff80010d42>] dump_stack_lvl+0x5e/0x86
[<ffffffff80010d7e>] dump_stack+0x14/0x1c
[<ffffffff800987ec>] lockdep_rcu_suspicious+0x14c/0x1b8
[<ffffffff80079992>] nohz_balance_exit_idle+0xf4/0xf6
[<ffffffff800664e6>] sched_cpu_deactivate+0x6c/0x1c8
[<ffffffff8002a5d0>] cpuhp_invoke_callback+0xf8/0x1ce
[<ffffffff8002a944>] cpuhp_thread_fun+0x150/0x1ae
[<ffffffff8005dc64>] smpboot_thread_fn+0x138/0x2a4
[<ffffffff800554ae>] kthread+0xea/0x10c
[<ffffffff800134c4>] ret_from_fork_kernel+0x22/0x386
[<ffffffff80c278ee>] ret_from_fork_kernel_asm+0x16/0x18
CPU1: off
CPU2: off
CPU3: off

This issue is observed on most of my ARM 32bit, ARM 64bit and RiscV64 based boards.


> ---
> kernel/sched/fair.c | 38 +++++++++++---------------------------
> 1 file changed, 11 insertions(+), 27 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 3ebec186f9823..6b059ee80b631 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -12785,8 +12785,6 @@ static void nohz_balancer_kick(struct rq *rq)
> goto out;
> }
>
> - rcu_read_lock();
> -
> sd = rcu_dereference_all(rq->sd);
> if (sd) {
> /*
> @@ -12794,8 +12792,8 @@ static void nohz_balancer_kick(struct rq *rq)
> * capacity, kick the ILB to see if there's a better CPU to run on:
> */
> if (rq->cfs.h_nr_runnable >= 1 && check_cpu_capacity(rq, sd)) {
> - flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
> - goto unlock;
> + flags |= NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
> + goto out;
> }
> }
>
> @@ -12811,8 +12809,8 @@ static void nohz_balancer_kick(struct rq *rq)
> */
> for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
> if (sched_asym(sd, i, cpu)) {
> - flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
> - goto unlock;
> + flags |= NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
> + goto out;
> }
> }
> }
> @@ -12823,10 +12821,8 @@ static void nohz_balancer_kick(struct rq *rq)
> * When ASYM_CPUCAPACITY; see if there's a higher capacity CPU
> * to run the misfit task on.
> */
> - if (check_misfit_status(rq)) {
> - flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
> - goto unlock;
> - }
> + if (check_misfit_status(rq))
> + flags |= NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
>
> /*
> * For asymmetric systems, we do not want to nicely balance
> @@ -12835,7 +12831,7 @@ static void nohz_balancer_kick(struct rq *rq)
> *
> * Skip the LLC logic because it's not relevant in that case.
> */
> - goto unlock;
> + goto out;
> }
>
> sds = rcu_dereference_all(per_cpu(sd_llc_shared, cpu));
> @@ -12850,13 +12846,9 @@ static void nohz_balancer_kick(struct rq *rq)
> * like this LLC domain has tasks we could move.
> */
> nr_busy = atomic_read(&sds->nr_busy_cpus);
> - if (nr_busy > 1) {
> - flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
> - goto unlock;
> - }
> + if (nr_busy > 1)
> + flags |= NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
> }
> -unlock:
> - rcu_read_unlock();
> out:
> if (READ_ONCE(nohz.needs_update))
> flags |= NOHZ_NEXT_KICK;
> @@ -12868,17 +12860,13 @@ static void nohz_balancer_kick(struct rq *rq)
> static void set_cpu_sd_state_busy(int cpu)
> {
> struct sched_domain *sd;
> -
> - rcu_read_lock();
> sd = rcu_dereference_all(per_cpu(sd_llc, cpu));
>
> if (!sd || !sd->nohz_idle)
> - goto unlock;
> + return;
> sd->nohz_idle = 0;
>
> atomic_inc(&sd->shared->nr_busy_cpus);
> -unlock:
> - rcu_read_unlock();
> }
>
> void nohz_balance_exit_idle(struct rq *rq)
> @@ -12897,17 +12885,13 @@ void nohz_balance_exit_idle(struct rq *rq)
> static void set_cpu_sd_state_idle(int cpu)
> {
> struct sched_domain *sd;
> -
> - rcu_read_lock();
> sd = rcu_dereference_all(per_cpu(sd_llc, cpu));
>
> if (!sd || sd->nohz_idle)
> - goto unlock;
> + return;
> sd->nohz_idle = 1;
>
> atomic_dec(&sd->shared->nr_busy_cpus);
> -unlock:
> - rcu_read_unlock();
> }
>
> /*

Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland