[PATCH RFC] x86/cacheinfo: Remove get_cache_id() due to ID collisions
From: Ricardo Neri
Date: Thu Mar 19 2026 - 08:39:44 EST
calc_cache_topo_id() and get_cache_id() both compute cache IDs used to
determine which CPUs share a cache. Intel processors use
calc_cache_topo_id(), while Hygon and recent AMD processors use
get_cache_id(). Intel processors use get_cache_id() as well, but only to
populate /sys/devices/system/cpu/cpuN/cache/index*/id. Besides duplicating
logic, get_cache_id() returns incorrect and colliding IDs.
calc_cache_topo_id() masks off the least-significant N bits of an APIC ID.
N is the smallest integer such that 2^N is greater than or equal to the
number of logical CPUs sharing a cache as reported in CPUID.4.<level>.
EAX[25:14]. This produces unique IDs for all CPUs that share a cache.
get_cache_id() instead right-shifts the APIC ID by N. This only works when
N is consistent across CPUs. On systems with heterogeneous cores (e.g.,
Intel hybrid processors), different cores report different numbers of
logical CPUs sharing a cache. This results in inconsistent shifts and,
consequently, cache ID collisions between CPUs that do not actually share
the cache.
Consider the APIC IDs and CPUID leaf 4 data for the L1 cache of selected
CPUs from a Meteor Lake-based Lenovo ThinkPad T16 Gen 3:
CPU APICID num_threads_sharing calc_cache_topo_id() get_cache_id()
2 0x18 2 0x18 0xc
3 0x19 2 0x18 0xc
10 0xc 1 0xc 0xc
CPUs 2 and 3 are the two logical CPUs of one core. They share the L1 cache.
CPU10 does not, yet get_cache_id() assigns it the same ID, illustrating
the collision.
While masking and shifting are equivalent when N is identical across CPUs,
using a shift makes the computation dependent on per-CPU topology and
therefore unsafe.
Switch all users to calc_cache_topo_id() and remove get_cache_id(). The
values exposed in /sys/devices/system/cpu/cpuN/cache/index*/id will change,
but they will remain unique and continue to reflect shared caches
correctly.
Reported-by: Len Brown <len.brown@xxxxxxxxx>
Tested-by: Len Brown <len.brown@xxxxxxxxx>
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@xxxxxxxxxxxxxxx>
---
Hi Prateek, Ahmed,
I would like to get your feedback on this patch before involving the x86
maintainers. I want to ensure that I don't break Hygon or AMD systems or
miss any subtle detail.
I tested this patch on Genoa and Turin systems, both of which used
get_cache_id() to compute L3 cache IDs. The IDs changed but the rest of
the files under /sys/devices/system/cpu/cpuN/cache/index* remain the
the same.
---
arch/x86/kernel/cpu/cacheinfo.c | 34 ++++++++--------------------------
1 file changed, 8 insertions(+), 26 deletions(-)
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index 51a95b07831f..56194ef55d8e 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -289,20 +289,14 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c)
return i;
}
-/*
- * The max shared threads number comes from CPUID(0x4) EAX[25-14] with input
- * ECX as cache index. Then right shift apicid by the number's order to get
- * cache id for this cache node.
- */
-static unsigned int get_cache_id(u32 apicid, const struct _cpuid4_info *id4)
+static unsigned int calc_cache_topo_id(struct cpuinfo_x86 *c, const struct _cpuid4_info *id4)
{
- unsigned long num_threads_sharing;
+ unsigned int num_threads_sharing;
int index_msb;
num_threads_sharing = 1 + id4->eax.split.num_threads_sharing;
index_msb = get_count_order(num_threads_sharing);
-
- return apicid >> index_msb;
+ return c->topo.apicid & ~((1 << index_msb) - 1);
}
/*
@@ -332,7 +326,7 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id)
struct _cpuid4_info id4 = {};
if (!amd_fill_cpuid4_info(llc_index, &id4))
- c->topo.llc_id = get_cache_id(c->topo.apicid, &id4);
+ c->topo.llc_id = calc_cache_topo_id(c, &id4);
}
}
@@ -410,16 +404,6 @@ static void intel_cacheinfo_0x2(struct cpuinfo_x86 *c)
intel_cacheinfo_done(c, l3, l2, l1i, l1d);
}
-static unsigned int calc_cache_topo_id(struct cpuinfo_x86 *c, const struct _cpuid4_info *id4)
-{
- unsigned int num_threads_sharing;
- int index_msb;
-
- num_threads_sharing = 1 + id4->eax.split.num_threads_sharing;
- index_msb = get_count_order(num_threads_sharing);
- return c->topo.apicid & ~((1 << index_msb) - 1);
-}
-
static bool intel_cacheinfo_0x4(struct cpuinfo_x86 *c)
{
struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
@@ -551,7 +535,7 @@ static void __cache_cpumap_setup(unsigned int cpu, int index,
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct cacheinfo *ci, *sibling_ci;
unsigned long num_threads_sharing;
- int index_msb, i;
+ int i;
if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) {
if (__cache_amd_cpumap_setup(cpu, index, id4))
@@ -565,10 +549,9 @@ static void __cache_cpumap_setup(unsigned int cpu, int index,
if (num_threads_sharing == 1)
return;
- index_msb = get_count_order(num_threads_sharing);
-
for_each_online_cpu(i)
- if (cpu_data(i).topo.apicid >> index_msb == c->topo.apicid >> index_msb) {
+ /* Caller calculated the cache ID of @cpu */
+ if (id4->id == calc_cache_topo_id(&cpu_data(i), id4)) {
struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
/* Skip if itself or no cacheinfo */
@@ -612,7 +595,6 @@ int populate_cache_leaves(unsigned int cpu)
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
struct cacheinfo *ci = this_cpu_ci->info_list;
u8 cpu_vendor = boot_cpu_data.x86_vendor;
- u32 apicid = cpu_data(cpu).topo.apicid;
struct amd_northbridge *nb = NULL;
struct _cpuid4_info id4 = {};
int idx, ret;
@@ -622,7 +604,7 @@ int populate_cache_leaves(unsigned int cpu)
if (ret)
return ret;
- id4.id = get_cache_id(apicid, &id4);
+ id4.id = calc_cache_topo_id(&cpu_data(cpu), &id4);
if (cpu_vendor == X86_VENDOR_AMD || cpu_vendor == X86_VENDOR_HYGON)
nb = amd_init_l3_cache(idx);
---
base-commit: 9fefc4bad1a3db12ecf6ca8e9886b29721249e77
change-id: 20260220-rneri-x86-cacheinfoid-bug-71e3f7e28e94
Best regards,
--
Ricardo Neri <ricardo.neri-calderon@xxxxxxxxxxxxxxx>