[Patch v4 01/16] sched/cache: Allow only 1 thread of the process to calculate the LLC occupancy

From: Tim Chen

Date: Wed May 13 2026 - 16:33:57 EST


From: Jianyong Wu <wujianyong@xxxxxxxx>

Scanning online CPUs to calculate the occupancy might be
time-consuming. Only allow 1 thread of the process to scan
the CPUs at the same time, which is similar to what
NUMA balance does in task_numa_work().

Signed-off-by: Jianyong Wu <wujianyong@xxxxxxxx>
Signed-off-by: Chen Yu <yu.c.chen@xxxxxxxxx>
Signed-off-by: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx>
---
include/linux/sched.h | 1 +
kernel/sched/fair.c | 11 +++++++++++
2 files changed, 12 insertions(+)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d2010483cd77..6d883f109ba3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2423,6 +2423,7 @@ struct sched_cache_stat {
struct sched_cache_time __percpu *pcpu_sched;
raw_spinlock_t lock;
unsigned long epoch;
+ unsigned long next_scan;
int cpu;
} ____cacheline_aligned_in_smp;

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5f22e5a097cf..a759ea669d74 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1451,6 +1451,7 @@ void mm_init_sched(struct mm_struct *mm,
raw_spin_lock_init(&mm->sc_stat.lock);
mm->sc_stat.epoch = epoch;
mm->sc_stat.cpu = -1;
+ mm->sc_stat.next_scan = jiffies;

/*
* The update to mm->sc_stat should not be reordered
@@ -1661,6 +1662,7 @@ static void get_scan_cpumasks(cpumask_var_t cpus, struct task_struct *p)

static void task_cache_work(struct callback_head *work)
{
+ unsigned long next_scan, now = jiffies;
struct task_struct *p = current;
struct mm_struct *mm = p->mm;
unsigned long m_a_occ = 0;
@@ -1675,6 +1677,15 @@ static void task_cache_work(struct callback_head *work)
if (p->flags & PF_EXITING)
return;

+ next_scan = READ_ONCE(mm->sc_stat.next_scan);
+ if (time_before(now, next_scan))
+ return;
+
+ /* only 1 thread is allowed to scan */
+ if (!try_cmpxchg(&mm->sc_stat.next_scan, &next_scan,
+ now + EPOCH_PERIOD))
+ return;
+
if (!zalloc_cpumask_var(&cpus, GFP_KERNEL))
return;

--
2.32.0