[PATCH V2 7/8] perf/x86/intel/uncore: Fix uncore_box ref/unref ordering on CPU hotplug

From: Zide Chen

Date: Mon Jun 01 2026 - 13:15:33 EST


In uncore_event_cpu_online(), uncore_box_ref() was called before
uncore_change_context(). uncore_box_ref() gates on box->cpu >= 0,
but box->cpu is still -1 at that point because uncore_change_context()
has not run yet. As a result, the box is never initialized on the
first CPU to come online in a die, leaving it permanently
uninitialized in the single-CPU-per-die case.

Thus, box->refcnt is one count below the true value, and in the CPU
offline path, the box will be torn down on the second-to-last CPU.

In uncore_event_cpu_offline(), uncore_box_unref() was called after
uncore_change_context(), so box->cpu is already -1 when the collector
CPU goes offline, which prevents it from tearing down the box.

Fix by swapping the call order in both paths so that
uncore_box_{ref,unref}() runs at the point where box->cpu reflects
the correct context.

Fixes: c74443d92f68 ("perf/x86/uncore: Support per PMU cpumask")
Reviewed-by: Ian Rogers <irogers@xxxxxxxxxx>
Signed-off-by: Zide Chen <zide.chen@xxxxxxxxx>
---
arch/x86/events/intel/uncore.c | 50 ++++++++++++++++------------------
1 file changed, 23 insertions(+), 27 deletions(-)

diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index f2cb3fde2dda..6d710aef52ac 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1577,9 +1577,15 @@ static int uncore_event_cpu_offline(unsigned int cpu)
{
int die, target;

+ /* Clear the references */
+ die = topology_logical_die_id(cpu);
+ uncore_box_unref(uncore_msr_uncores, die);
+ uncore_box_unref(uncore_mmio_uncores, die);
+
/* Check if exiting cpu is used for collecting uncore events */
if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
- goto unref;
+ return 0;
+
/* Find a new cpu to collect uncore events */
target = cpumask_any_but(topology_die_cpumask(cpu), cpu);

@@ -1592,16 +1598,10 @@ static int uncore_event_cpu_offline(unsigned int cpu)
uncore_change_context(uncore_msr_uncores, cpu, target);
uncore_change_context(uncore_mmio_uncores, cpu, target);
uncore_change_context(uncore_pci_uncores, cpu, target);
-
-unref:
- /* Clear the references */
- die = topology_logical_die_id(cpu);
- uncore_box_unref(uncore_msr_uncores, die);
- uncore_box_unref(uncore_mmio_uncores, die);
return 0;
}

-static int allocate_boxes(struct intel_uncore_type **types,
+static void allocate_boxes(struct intel_uncore_type **types,
unsigned int die, unsigned int cpu)
{
struct intel_uncore_box *box, *tmp;
@@ -1618,8 +1618,10 @@ static int allocate_boxes(struct intel_uncore_type **types,
if (pmu->boxes[die] || uncore_pmu_broken(pmu))
continue;
box = uncore_alloc_box(type, cpu_to_node(cpu));
- if (!box)
+ if (!box) {
+ uncore_pmu_set_broken(pmu);
goto cleanup;
+ }
box->pmu = pmu;
box->dieid = die;
list_add(&box->active_list, &allocated);
@@ -1630,14 +1632,13 @@ static int allocate_boxes(struct intel_uncore_type **types,
list_del_init(&box->active_list);
box->pmu->boxes[die] = box;
}
- return 0;
+ return;

cleanup:
list_for_each_entry_safe(box, tmp, &allocated, active_list) {
list_del_init(&box->active_list);
kfree(box);
}
- return -ENOMEM;
}

static int uncore_box_ref(struct intel_uncore_type **types,
@@ -1646,11 +1647,7 @@ static int uncore_box_ref(struct intel_uncore_type **types,
struct intel_uncore_type *type;
struct intel_uncore_pmu *pmu;
struct intel_uncore_box *box;
- int i, ret;
-
- ret = allocate_boxes(types, die, cpu);
- if (ret)
- return ret;
+ int i;

for (; *types; types++) {
type = *types;
@@ -1666,27 +1663,26 @@ static int uncore_box_ref(struct intel_uncore_type **types,

static int uncore_event_cpu_online(unsigned int cpu)
{
- int die, target, msr_ret, mmio_ret;
+ int die, target;

die = topology_logical_die_id(cpu);
- msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu);
- mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu);
+ allocate_boxes(uncore_msr_uncores, die, cpu);
+ allocate_boxes(uncore_mmio_uncores, die, cpu);

/*
* Check if there is an online cpu in the package
* which collects uncore events already.
*/
target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu));
- if (target < nr_cpu_ids)
- return 0;
-
- cpumask_set_cpu(cpu, &uncore_cpu_mask);
-
- if (!msr_ret)
+ if (target >= nr_cpu_ids) {
+ cpumask_set_cpu(cpu, &uncore_cpu_mask);
uncore_change_context(uncore_msr_uncores, -1, cpu);
- if (!mmio_ret)
uncore_change_context(uncore_mmio_uncores, -1, cpu);
- uncore_change_context(uncore_pci_uncores, -1, cpu);
+ uncore_change_context(uncore_pci_uncores, -1, cpu);
+ }
+
+ uncore_box_ref(uncore_msr_uncores, die, cpu);
+ uncore_box_ref(uncore_mmio_uncores, die, cpu);
return 0;
}

--
2.54.0