[PATCH 3/3] arm64/sched: Enable CPPC-based asympacking

From: Christian Loehle

Date: Wed Mar 25 2026 - 14:15:07 EST

To handle minor capacity differences (<5%) use asym-packing to steer
tasks towards higher performance CPUs, replacing capacity-aware
scheduling in those cases and skip setting SD_ASYM_CPUCAPACITY.
This is implemented by using highest_perf values as priorities to steer
towards.
highest_perf-based asympacking is a global ordering that is applied
at all levels of the hierarchy for now.

Signed-off-by: Christian Loehle <christian.loehle@xxxxxxx>
---
arch/arm64/include/asm/topology.h | 6 ++++++
arch/arm64/kernel/topology.c | 34 +++++++++++++++++++++++++++++++
kernel/sched/topology.c | 26 ++++++++++++++++-------
3 files changed, 59 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index b9eaf4ad7085..e0b039e1a5bb 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -39,6 +39,12 @@ void update_freq_counters_refs(void);
#undef arch_cpu_is_threaded
#define arch_cpu_is_threaded() (read_cpuid_mpidr() & MPIDR_MT_BITMASK)

+#undef arch_asym_cpu_priority
+#define arch_asym_cpu_priority arm64_arch_asym_cpu_priority
+#define arch_sched_asym_flags arm64_arch_sched_asym_flags
+extern int arm64_arch_asym_cpu_priority(int cpu);
+extern int arm64_arch_sched_asym_flags(void);
+
#include <asm-generic/topology.h>

#endif /* _ASM_ARM_TOPOLOGY_H */
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index b32f13358fbb..4e3582d44a26 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -19,6 +19,7 @@
#include <linux/init.h>
#include <linux/percpu.h>
#include <linux/sched/isolation.h>
+#include <linux/sched/topology.h>
#include <linux/xarray.h>

#include <asm/cpu.h>
@@ -373,6 +374,26 @@ core_initcall(init_amu_fie);
#ifdef CONFIG_ACPI_CPPC_LIB
#include <acpi/cppc_acpi.h>

+static bool __read_mostly sched_cppc_asym_active;
+DEFINE_PER_CPU_READ_MOSTLY(int, sched_cppc_priority);
+
+int arm64_arch_asym_cpu_priority(int cpu)
+{
+ if (!READ_ONCE(sched_cppc_asym_active))
+ return -cpu;
+ return per_cpu(sched_cppc_priority, cpu);
+}
+
+int arm64_arch_sched_asym_flags(void)
+{
+ return READ_ONCE(sched_cppc_asym_active) ? SD_ASYM_PACKING : 0;
+}
+
+void arch_topology_init_cppc_asym(void)
+{
+ WRITE_ONCE(sched_cppc_asym_active, topology_init_cppc_asym_packing(&sched_cppc_priority));
+}
+
static void cpu_read_corecnt(void *val)
{
/*
@@ -473,4 +494,17 @@ int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
{
return -EOPNOTSUPP;
}
+
+#else
+int arm64_arch_asym_cpu_priority(int cpu)
+{
+ return -cpu;
+}
+
+int arm64_arch_sched_asym_flags(void)
+{
+ return 0;
+}
+
+void arch_topology_init_cppc_asym(void) { }
#endif /* CONFIG_ACPI_CPPC_LIB */
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index b0c590dfdb01..758b8796b62d 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1396,6 +1396,8 @@ asym_cpu_capacity_classify(const struct cpumask *sd_span,
const struct cpumask *cpu_map)
{
struct asym_cap_data *entry;
+ unsigned long max_cap = 0, min_cap = ULONG_MAX;
+ bool has_cap = false;
int count = 0, miss = 0;

/*
@@ -1405,9 +1407,12 @@ asym_cpu_capacity_classify(const struct cpumask *sd_span,
* skip those.
*/
list_for_each_entry(entry, &asym_cap_list, link) {
- if (cpumask_intersects(sd_span, cpu_capacity_span(entry)))
+ if (cpumask_intersects(sd_span, cpu_capacity_span(entry))) {
++count;
- else if (cpumask_intersects(cpu_map, cpu_capacity_span(entry)))
+ max_cap = max(max_cap, entry->capacity);
+ min_cap = min(min_cap, entry->capacity);
+ has_cap = true;
+ } else if (cpumask_intersects(cpu_map, cpu_capacity_span(entry)))
++miss;
}

@@ -1419,10 +1424,12 @@ asym_cpu_capacity_classify(const struct cpumask *sd_span,
/* Some of the available CPU capacity values have not been detected */
if (miss)
return SD_ASYM_CPUCAPACITY;
+ /* When asym packing is active, ignore small capacity differences. */
+ if (arch_sched_asym_flags() && has_cap && !capacity_greater(max_cap, min_cap))
+ return 0;

/* Full asymmetry */
return SD_ASYM_CPUCAPACITY | SD_ASYM_CPUCAPACITY_FULL;
-
}

static void free_asym_cap_entry(struct rcu_head *head)
@@ -1753,7 +1760,7 @@ static inline int topology_arch_sched_asym_flags(void)
#ifdef CONFIG_SCHED_SMT
int cpu_smt_flags(void)
{
- return SD_SHARE_CPUCAPACITY | SD_SHARE_LLC;
+ return SD_SHARE_CPUCAPACITY | SD_SHARE_LLC | arch_sched_asym_flags();
}

const struct cpumask *tl_smt_mask(struct sched_domain_topology_level *tl, int cpu)
@@ -1765,7 +1772,7 @@ const struct cpumask *tl_smt_mask(struct sched_domain_topology_level *tl, int cp
#ifdef CONFIG_SCHED_CLUSTER
int cpu_cluster_flags(void)
{
- return SD_CLUSTER | SD_SHARE_LLC;
+ return SD_CLUSTER | SD_SHARE_LLC | arch_sched_asym_flags();
}

const struct cpumask *tl_cls_mask(struct sched_domain_topology_level *tl, int cpu)
@@ -1777,7 +1784,7 @@ const struct cpumask *tl_cls_mask(struct sched_domain_topology_level *tl, int cp
#ifdef CONFIG_SCHED_MC
int cpu_core_flags(void)
{
- return SD_SHARE_LLC;
+ return SD_SHARE_LLC | arch_sched_asym_flags();
}

const struct cpumask *tl_mc_mask(struct sched_domain_topology_level *tl, int cpu)
@@ -1791,6 +1798,11 @@ const struct cpumask *tl_pkg_mask(struct sched_domain_topology_level *tl, int cp
return cpu_node_mask(cpu);
}

+static int cpu_pkg_flags(void)
+{
+ return arch_sched_asym_flags();
+}
+
/*
* Topology list, bottom-up.
*/
@@ -1806,7 +1818,7 @@ static struct sched_domain_topology_level default_topology[] = {
#ifdef CONFIG_SCHED_MC
SDTL_INIT(tl_mc_mask, cpu_core_flags, MC),
#endif
- SDTL_INIT(tl_pkg_mask, NULL, PKG),
+ SDTL_INIT(tl_pkg_mask, cpu_pkg_flags, PKG),
{ NULL, },
};

--
2.34.1