[Patch v7 4/4] perf regs: Enable dumping of SIMD registers

From: Dapeng Mi

Date: Mon Mar 23 2026 - 21:10:19 EST


From: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>

This patch adds support for dumping SIMD registers using the new
PERF_SAMPLE_REGS_ABI_SIMD ABI.

Currently, the XMM, YMM, ZMM, OPMASK, eGPRs, and SSP registers on x86
platforms are supported with the PERF_SAMPLE_REGS_ABI_SIMD ABI.

An example of the output is displayed below.

Example:

$perf record -e cycles:p -IXMM,YMM,OPMASK,SSP ./test
$perf report -D
... ...
237538985992962 0x454d0 [0x480]: PERF_RECORD_SAMPLE(IP, 0x1):
179370/179370: 0xffffffff969627fc period: 124999 addr: 0
... intr regs: mask 0x20000000000 ABI 64-bit
.... SSP 0x0000000000000000
... SIMD ABI nr_vectors 32 vector_qwords 4 nr_pred 8 pred_qwords 1
.... YMM [0] 0x0000000000004000
.... YMM [0] 0x000055e828695270
.... YMM [0] 0x0000000000000000
.... YMM [0] 0x0000000000000000
.... YMM [1] 0x000055e8286990e0
.... YMM [1] 0x000055e828698dd0
.... YMM [1] 0x0000000000000000
.... YMM [1] 0x0000000000000000
... ...
.... YMM [31] 0x0000000000000000
.... YMM [31] 0x0000000000000000
.... YMM [31] 0x0000000000000000
.... YMM [31] 0x0000000000000000
.... OPMASK[0] 0x0000000000100221
.... OPMASK[1] 0x0000000000000020
.... OPMASK[2] 0x000000007fffffff
.... OPMASK[3] 0x0000000000000000
.... OPMASK[4] 0x0000000000000000
.... OPMASK[5] 0x0000000000000000
.... OPMASK[6] 0x0000000000000000
.... OPMASK[7] 0x0000000000000000
... ...

Signed-off-by: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
Co-developed-by: Dapeng Mi <dapeng1.mi@xxxxxxxxxxxxxxx>
Signed-off-by: Dapeng Mi <dapeng1.mi@xxxxxxxxxxxxxxx>
---

V7: 1) add assert() check for SIMD fields in sample data.
2) optimize regs_abi[] defination.

tools/perf/util/evsel.c | 36 +++++++++++++++++++++
tools/perf/util/sample.h | 10 ++++++
tools/perf/util/session.c | 66 ++++++++++++++++++++++++++++++++++++++-
3 files changed, 111 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 5f00489e714a..24cc7ba71ae1 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -3520,6 +3520,24 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
regs->mask = mask;
regs->regs = (u64 *)array;
array = (void *)array + sz;
+
+ if (regs->abi & PERF_SAMPLE_REGS_ABI_SIMD) {
+ assert(regs->nr_vectors <=
+ hweight64(evsel->core.attr.sample_simd_vec_reg_user));
+ assert(regs->vector_qwords <=
+ evsel->core.attr.sample_simd_vec_reg_qwords);
+ assert(regs->nr_pred <=
+ hweight64(evsel->core.attr.sample_simd_pred_reg_user));
+ assert(regs->pred_qwords <=
+ evsel->core.attr.sample_simd_pred_reg_qwords);
+ regs->config = *(u64 *)array;
+ array = (void *)array + sizeof(u64);
+ regs->simd_data = (u64 *)array;
+ sz = (regs->nr_vectors * regs->vector_qwords +
+ regs->nr_pred * regs->pred_qwords) * sizeof(u64);
+ OVERFLOW_CHECK(array, sz, max_size);
+ array = (void *)array + sz;
+ }
}
}

@@ -3577,6 +3595,24 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
regs->mask = mask;
regs->regs = (u64 *)array;
array = (void *)array + sz;
+
+ if (regs->abi & PERF_SAMPLE_REGS_ABI_SIMD) {
+ assert(regs->nr_vectors <=
+ hweight64(evsel->core.attr.sample_simd_vec_reg_intr));
+ assert(regs->vector_qwords <=
+ evsel->core.attr.sample_simd_vec_reg_qwords);
+ assert(regs->nr_pred <=
+ hweight64(evsel->core.attr.sample_simd_pred_reg_intr));
+ assert(regs->pred_qwords <=
+ evsel->core.attr.sample_simd_pred_reg_qwords);
+ regs->config = *(u64 *)array;
+ array = (void *)array + sizeof(u64);
+ regs->simd_data = (u64 *)array;
+ sz = (regs->nr_vectors * regs->vector_qwords +
+ regs->nr_pred * regs->pred_qwords) * sizeof(u64);
+ OVERFLOW_CHECK(array, sz, max_size);
+ array = (void *)array + sz;
+ }
}
}

diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
index 3cce8dd202aa..21f3416d3755 100644
--- a/tools/perf/util/sample.h
+++ b/tools/perf/util/sample.h
@@ -15,6 +15,16 @@ struct regs_dump {
u64 abi;
u64 mask;
u64 *regs;
+ union {
+ u64 config;
+ struct {
+ u16 nr_vectors;
+ u16 vector_qwords;
+ u16 nr_pred;
+ u16 pred_qwords;
+ };
+ };
+ u64 *simd_data;

/* Cached values/mask filled by first register access. */
u64 cache_regs[PERF_SAMPLE_REGS_CACHE_SIZE];
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 7cf7bf86205d..453d44d32162 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -972,15 +972,77 @@ static void regs_dump__printf(u64 mask, struct regs_dump *regs,
}
}

+static void simd_regs_dump__printf(uint16_t e_machine, struct regs_dump *regs, bool intr)
+{
+ const char *name = "unknown";
+ int i, idx = 0;
+ uint16_t qwords;
+ int reg_c;
+
+ if (!(regs->abi & PERF_SAMPLE_REGS_ABI_SIMD))
+ return;
+
+ printf("... SIMD ABI nr_vectors %d vector_qwords %d nr_pred %d pred_qwords %d\n",
+ regs->nr_vectors, regs->vector_qwords,
+ regs->nr_pred, regs->pred_qwords);
+
+ for (reg_c = 0; reg_c < 64; reg_c++) {
+ if (intr) {
+ perf_intr_simd_reg_class_bitmap_qwords(e_machine, reg_c,
+ &qwords, /*pred=*/false);
+ } else {
+ perf_user_simd_reg_class_bitmap_qwords(e_machine, reg_c,
+ &qwords, /*pred=*/false);
+ }
+ if (regs->vector_qwords == qwords) {
+ name = perf_simd_reg_class_name(e_machine, reg_c, /*pred=*/false);
+ break;
+ }
+ }
+
+ for (i = 0; i < regs->nr_vectors; i++) {
+ printf(".... %-5s[%d] 0x%016" PRIx64 "\n", name, i, regs->simd_data[idx++]);
+ printf(".... %-5s[%d] 0x%016" PRIx64 "\n", name, i, regs->simd_data[idx++]);
+ if (regs->vector_qwords > 2) {
+ printf(".... %-5s[%d] 0x%016" PRIx64 "\n", name, i, regs->simd_data[idx++]);
+ printf(".... %-5s[%d] 0x%016" PRIx64 "\n", name, i, regs->simd_data[idx++]);
+ }
+ if (regs->vector_qwords > 4) {
+ printf(".... %-5s[%d] 0x%016" PRIx64 "\n", name, i, regs->simd_data[idx++]);
+ printf(".... %-5s[%d] 0x%016" PRIx64 "\n", name, i, regs->simd_data[idx++]);
+ printf(".... %-5s[%d] 0x%016" PRIx64 "\n", name, i, regs->simd_data[idx++]);
+ printf(".... %-5s[%d] 0x%016" PRIx64 "\n", name, i, regs->simd_data[idx++]);
+ }
+ }
+
+ name = "unknown";
+ for (reg_c = 0; reg_c < 64; reg_c++) {
+ if (intr) {
+ perf_intr_simd_reg_class_bitmap_qwords(e_machine, reg_c,
+ &qwords, /*pred=*/true);
+ } else {
+ perf_user_simd_reg_class_bitmap_qwords(e_machine, reg_c,
+ &qwords, /*pred=*/true);
+ }
+ if (regs->pred_qwords == qwords) {
+ name = perf_simd_reg_class_name(e_machine, reg_c, /*pred=*/true);
+ break;
+ }
+ }
+ for (i = 0; i < regs->nr_pred; i++)
+ printf(".... %-5s[%d] 0x%016" PRIx64 "\n", name, i, regs->simd_data[idx++]);
+}
+
static const char *regs_abi[] = {
[PERF_SAMPLE_REGS_ABI_NONE] = "none",
[PERF_SAMPLE_REGS_ABI_32] = "32-bit",
[PERF_SAMPLE_REGS_ABI_64] = "64-bit",
+ [PERF_SAMPLE_REGS_ABI_SIMD | PERF_SAMPLE_REGS_ABI_64] = "64-bit SIMD",
};

static inline const char *regs_dump_abi(struct regs_dump *d)
{
- if (d->abi > PERF_SAMPLE_REGS_ABI_64)
+ if (d->abi >= ARRAY_SIZE(regs_abi) || !regs_abi[d->abi])
return "unknown";

return regs_abi[d->abi];
@@ -1010,6 +1072,7 @@ static void regs_user__printf(struct perf_sample *sample, uint16_t e_machine, ui

if (user_regs->regs)
regs__printf("user", user_regs, e_machine, e_flags);
+ simd_regs_dump__printf(e_machine, user_regs, /*intr=*/false);
}

static void regs_intr__printf(struct perf_sample *sample, uint16_t e_machine, uint32_t e_flags)
@@ -1023,6 +1086,7 @@ static void regs_intr__printf(struct perf_sample *sample, uint16_t e_machine, ui

if (intr_regs->regs)
regs__printf("intr", intr_regs, e_machine, e_flags);
+ simd_regs_dump__printf(e_machine, intr_regs, /*intr=*/true);
}

static void stack_user__printf(struct stack_dump *dump)
--
2.34.1