[PATCH v3 16/20] sched/core: Compute steal values at regular intervals
From: Shrikanth Hegde
Date: Thu May 14 2026 - 11:42:33 EST
Kick off the work to compute the steal time at regular interval.
Gated with steal monitor enabled static key check to avoid any overhead
when its disabled.
The sampling period can changed at runtime using steal_mon/sampling_period.
By default is 1000 milliseconds. I.e. 1 second
This work is done by first online housekeeping CPU only. Hence it won't
need any complicated synchronization.
Signed-off-by: Shrikanth Hegde <sshegde@xxxxxxxxxxxxx>
---
include/linux/sched.h | 2 ++
kernel/sched/core.c | 26 ++++++++++++++++++++++++++
kernel/sched/debug.c | 1 +
kernel/sched/sched.h | 7 +++++++
4 files changed, 36 insertions(+)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ee5f19a96118..738f17d63943 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2527,6 +2527,8 @@ struct steal_monitor_t {
unsigned int high_threshold;
unsigned int sampling_period_ms;
};
+
+extern struct steal_monitor_t steal_mon;
#endif
#endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 907c6b38460b..a3f65e9c7d30 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5719,6 +5719,9 @@ void sched_tick(void)
rq->idle_balance = idle_cpu(cpu);
sched_balance_trigger(rq);
}
+
+ if (sched_steal_mon_enabled())
+ sched_trigger_steal_computation(cpu);
}
#ifdef CONFIG_NO_HZ_FULL
@@ -11375,4 +11378,27 @@ void sched_steal_detection_work(struct work_struct *work)
now = ktime_get();
sm->prev_time = now;
}
+
+void sched_trigger_steal_computation(int cpu)
+{
+ int first_hk_cpu = cpumask_first_and(housekeeping_cpumask(HK_TYPE_KERNEL_NOISE),
+ cpu_online_mask);
+ ktime_t now;
+
+ /* Done by first online housekeeping CPU only */
+ if (likely(cpu != first_hk_cpu))
+ return;
+
+ /*
+ * Since everything is updated by first housekeeping CPU,
+ * There is no need for complex syncronization.
+ */
+ now = ktime_get();
+
+ /* Default is once per second */
+ if (likely(ktime_ms_delta(now, steal_mon.prev_time) < steal_mon.sampling_period_ms))
+ return;
+
+ schedule_work_on(first_hk_cpu, &steal_mon.work);
+}
#endif
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index be8d223b43fd..f00c08581253 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -606,6 +606,7 @@ static ssize_t sched_sm_en_write(struct file *filp, const char __user *ubuf,
static_branch_enable(&__sched_sm_enable);
} else if (!sched_sm_wr_enable && orig) {
static_branch_disable(&__sched_sm_enable);
+ cancel_work_sync(&steal_mon.work);
cpumask_copy(&__cpu_preferred_mask, cpu_online_mask);
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index d674f8e8e854..cc90012a85fc 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -4145,9 +4145,16 @@ DECLARE_STATIC_KEY_FALSE(__sched_sm_enable);
void sched_push_current_non_preferred_cpu(struct rq *rq);
void sched_init_steal_monitor(void);
void sched_steal_detection_work(struct work_struct *work);
+void sched_trigger_steal_computation(int cpu);
+static inline bool sched_steal_mon_enabled(void)
+{
+ return static_branch_unlikely(&__sched_sm_enable);
+}
#else /* !CONFIG_PREFERRED_CPU */
static inline void sched_push_current_non_preferred_cpu(struct rq *rq) { }
static inline void sched_init_steal_monitor(void) { }
+static inline void sched_trigger_steal_computation(int cpu) { }
+static inline bool sched_steal_mon_enabled(void) { return false; }
#endif
#endif /* _KERNEL_SCHED_SCHED_H */
--
2.47.3