[PATCH v3 2/7] sched/fair: Add cgroup_mode: up

From: Peter Zijlstra

Date: Fri Jun 05 2026 - 08:51:19 EST


Instead of calculating the proportional fraction of the group weight for each
CPU, just give each CPU the full measure, ignoring these pesky SMP problems.

This makes the SMP cgroup fraction (F_g_n) equal to 1, and ensures a single
task in a cgroup competes on equal footing to a task in a level above.

However, as already explored, this is not a very good policy because it gets
the SMP weight distribution wrong. Included for completeness.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
kernel/sched/debug.c | 5 ++++-
kernel/sched/fair.c | 31 +++++++++++++++++++++++++++++--
kernel/sched/sched.h | 1 +
3 files changed, 34 insertions(+), 3 deletions(-)

--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -271,6 +271,7 @@ static ssize_t sched_dynamic_write(struc
if (mode < 0)
return mode;

+ __sched_cgroup_mode_update(mode);
sched_dynamic_update(mode);

*ppos += cnt;
@@ -634,9 +635,11 @@ static void debugfs_fair_server_init(voi
}

#ifdef CONFIG_FAIR_GROUP_SCHED
-static int cgroup_mode = 0;
+static int cgroup_mode = 1;

+/* See __sched_cgroup_mode_update(). */
static const char *cgroup_mode_str[] = {
+ "up",
"smp",
};

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -38,6 +38,7 @@
#include <linux/sched/isolation.h>
#include <linux/sched/nohz.h>
#include <linux/sched/prio.h>
+#include <linux/static_call.h>

#include <linux/cpuidle.h>
#include <linux/interrupt.h>
@@ -4800,7 +4801,7 @@ static inline int throttled_hierarchy(st
*
* hence icky!
*/
-static long calc_group_shares(struct cfs_rq *cfs_rq)
+static long calc_smp(struct cfs_rq *cfs_rq)
{
long tg_weight, tg_shares, load, shares;
struct task_group *tg = cfs_rq->tg;
@@ -4835,6 +4836,32 @@ static long calc_group_shares(struct cfs
}

/*
+ * Ignore this pesky SMP stuff, use (4).
+ */
+static long calc_up_shares(struct cfs_rq *cfs_rq)
+{
+ struct task_group *tg = cfs_rq->tg;
+ return READ_ONCE(tg->shares);
+}
+
+DEFINE_STATIC_CALL(calc_group_shares, calc_smp_shares);
+
+void __sched_cgroup_mode_update(int mode)
+{
+ long (*func)(struct cfs_rq *);
+ switch (mode) {
+ case 0:
+ func = &calc_up_shares;
+ break;
+ case 1:
+ default:
+ func = &calc_smp_shares;
+ break;
+ }
+ static_call_update(calc_group_shares, func);
+}
+
+/*
* Recomputes the group entity based on the current state of its group
* runqueue.
*/
@@ -4850,7 +4877,7 @@ static void update_cfs_group(struct sche
if (!gcfs_rq || !gcfs_rq->load.weight)
return;

- shares = calc_group_shares(gcfs_rq);
+ shares = static_call(calc_group_shares)(gcfs_rq);
if (unlikely(se->load.weight != shares))
reweight_entity(cfs_rq_of(se), se, shares);
}
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -571,6 +571,7 @@ extern void free_fair_sched_group(struct
extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent);
extern void online_fair_sched_group(struct task_group *tg);
extern void unregister_fair_sched_group(struct task_group *tg);
+extern void __sched_cgroup_mode_update(int mode);
#else /* !CONFIG_FAIR_GROUP_SCHED: */
static inline void free_fair_sched_group(struct task_group *tg) { }
static inline int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)