Re: [Patch v8 5/5] perf dwarf-regs: Add SIMD/eGPRs support for x86 DWARF registers
From: Mi, Dapeng
Date: Tue Jun 02 2026 - 02:06:56 EST
On 5/30/2026 1:56 AM, Ian Rogers wrote:
> On Fri, May 29, 2026 at 1:30 AM Dapeng Mi <dapeng1.mi@xxxxxxxxxxxxxxx> wrote:
>> Enhance the x86-specific DWARF register handling by adding support for
>> SIMD and eGPRs registers.
>>
>> This update is based on the "DWARF Register Number Mapping" table from
>> the "System V Application Binary Interface AMD64 Architecture Processor
>> Supplement" (version 1.0).
>>
>> Modifications include:
>> - Updating the x86_64_regidx_table[] array to incorporate SIMD and eGPRs
>> registers.
>> - Enhancing the __get_dwarf_regnum_for_perf_regnum_x86_64() function to
>> retrieve the DWARF register index for eGPRs.
>> - Enlarge the x86_64 supported register number to 146 to cover eGPRs and
>> SIMD registers (get_libdw_frame_nregs()).
>>
>> Signed-off-by: Dapeng Mi <dapeng1.mi@xxxxxxxxxxxxxxx>
>> ---
>> .../util/dwarf-regs-arch/dwarf-regs-x86.c | 138 +++++++++++++++---
>> tools/perf/util/dwarf-regs.c | 7 +-
>> tools/perf/util/include/dwarf-regs.h | 7 +-
>> tools/perf/util/unwind-libdw.c | 6 +-
>> 4 files changed, 129 insertions(+), 29 deletions(-)
>>
>> diff --git a/tools/perf/util/dwarf-regs-arch/dwarf-regs-x86.c b/tools/perf/util/dwarf-regs-arch/dwarf-regs-x86.c
>> index cadef120aeb4..b014a36d21b5 100644
>> --- a/tools/perf/util/dwarf-regs-arch/dwarf-regs-x86.c
>> +++ b/tools/perf/util/dwarf-regs-arch/dwarf-regs-x86.c
>> @@ -90,22 +90,22 @@ static const struct dwarf_regs_idx x86_64_regidx_table[] = {
>> { "r14", 14 }, { "r14d", 14 }, { "r14w", 14 }, { "r14b", 14 },
>> { "r15", 15 }, { "r15d", 15 }, { "r15w", 15 }, { "r15b", 15 },
>> // 16 - Return Address RA
>> - { "xmm0", 17},
>> - { "xmm1", 18},
>> - { "xmm2", 19},
>> - { "xmm3", 20},
>> - { "xmm4", 21},
>> - { "xmm5", 22},
>> - { "xmm6", 23},
>> - { "xmm7", 24},
>> - { "xmm8", 25},
>> - { "xmm9", 26},
>> - { "xmm10", 27},
>> - { "xmm11", 28},
>> - { "xmm12", 29},
>> - { "xmm13", 30},
>> - { "xmm14", 31},
>> - { "xmm15", 32},
>> + { "zmm0", 17 }, { "ymm0", 17 }, { "xmm0", 17 },
>> + { "zmm1", 18 }, { "ymm1", 18 }, { "xmm1", 18 },
>> + { "zmm2", 19 }, { "ymm2", 19 }, { "xmm2", 19 },
>> + { "zmm3", 20 }, { "ymm3", 20 }, { "xmm3", 20 },
>> + { "zmm4", 21 }, { "ymm4", 21 }, { "xmm4", 21 },
>> + { "zmm5", 22 }, { "ymm5", 22 }, { "xmm5", 22 },
>> + { "zmm6", 23 }, { "ymm6", 23 }, { "xmm6", 23 },
>> + { "zmm7", 24 }, { "ymm7", 24 }, { "xmm7", 24 },
>> + { "zmm8", 25 }, { "ymm8", 25 }, { "xmm8", 25 },
>> + { "zmm9", 26 }, { "ymm9", 26 }, { "xmm9", 26 },
>> + { "zmm10", 27 }, { "ymm10", 27 }, { "xmm10", 27 },
>> + { "zmm11", 28 }, { "ymm11", 28 }, { "xmm11", 28 },
>> + { "zmm12", 29 }, { "ymm12", 29 }, { "xmm12", 29 },
>> + { "zmm13", 30 }, { "ymm13", 30 }, { "xmm13", 30 },
>> + { "zmm14", 31 }, { "ymm14", 31 }, { "xmm14", 31 },
>> + { "zmm15", 32 }, { "ymm15", 32 }, { "xmm15", 32 },
>> { "st0", 33},
>> { "st1", 34},
>> { "st2", 35},
>> @@ -129,7 +129,7 @@ static const struct dwarf_regs_idx x86_64_regidx_table[] = {
>> { "ds", 53},
>> { "fs", 54},
>> { "gs", 55},
>> - // 56-47 - reserved
>> + // 56-57 - reserved
>> { "fs.base", 58},
>> { "gs.base", 59},
>> // 60-61 - reserved
>> @@ -138,6 +138,49 @@ static const struct dwarf_regs_idx x86_64_regidx_table[] = {
>> { "mxcsr", 64}, // 128-bit Media Control and Status
>> { "fcw", 65}, // x87 Control Word
>> { "fsw", 66}, // x87 Status Word
>> + // 67-82 - Upper Vector Registers 16–31
>> + { "zmm16", 67 }, { "ymm16", 67 }, { "xmm16", 67 },
>> + { "zmm17", 68 }, { "ymm17", 68 }, { "xmm17", 68 },
>> + { "zmm18", 69 }, { "ymm18", 69 }, { "xmm18", 69 },
>> + { "zmm19", 70 }, { "ymm19", 70 }, { "xmm19", 70 },
>> + { "zmm20", 71 }, { "ymm20", 71 }, { "xmm20", 71 },
>> + { "zmm21", 72 }, { "ymm21", 72 }, { "xmm21", 72 },
>> + { "zmm22", 73 }, { "ymm22", 73 }, { "xmm22", 73 },
>> + { "zmm23", 74 }, { "ymm23", 74 }, { "xmm23", 74 },
>> + { "zmm24", 75 }, { "ymm24", 75 }, { "xmm24", 75 },
>> + { "zmm25", 76 }, { "ymm25", 76 }, { "xmm25", 76 },
>> + { "zmm26", 77 }, { "ymm26", 77 }, { "xmm26", 77 },
>> + { "zmm27", 78 }, { "ymm27", 78 }, { "xmm27", 78 },
>> + { "zmm28", 79 }, { "ymm28", 79 }, { "xmm28", 79 },
>> + { "zmm29", 80 }, { "ymm29", 80 }, { "xmm29", 80 },
>> + { "zmm30", 81 }, { "ymm30", 81 }, { "xmm30", 81 },
>> + { "zmm31", 82 }, { "ymm31", 82 }, { "xmm31", 82 },
>> + // 118-125 - Vector Mask Registers 0–7
>> + { "k0", 118 },
>> + { "k1", 119 },
>> + { "k2", 120 },
>> + { "k3", 121 },
>> + { "k4", 122 },
>> + { "k5", 123 },
>> + { "k6", 124 },
>> + { "k7", 125 },
>> + // 130-145 - APX Integer Registers 16-31
>> + { "r16", 130 }, { "r16d", 130 }, { "r16w", 130 }, { "r16b", 130 },
>> + { "r17", 131 }, { "r17d", 131 }, { "r17w", 131 }, { "r17b", 131 },
>> + { "r18", 132 }, { "r18d", 132 }, { "r18w", 132 }, { "r18b", 132 },
>> + { "r19", 133 }, { "r19d", 133 }, { "r19w", 133 }, { "r19b", 133 },
>> + { "r20", 134 }, { "r20d", 134 }, { "r20w", 134 }, { "r20b", 134 },
>> + { "r21", 135 }, { "r21d", 135 }, { "r21w", 135 }, { "r21b", 135 },
>> + { "r22", 136 }, { "r22d", 136 }, { "r22w", 136 }, { "r22b", 136 },
>> + { "r23", 137 }, { "r23d", 137 }, { "r23w", 137 }, { "r23b", 137 },
>> + { "r24", 138 }, { "r24d", 138 }, { "r24w", 138 }, { "r24b", 138 },
>> + { "r25", 139 }, { "r25d", 139 }, { "r25w", 139 }, { "r25b", 139 },
>> + { "r26", 140 }, { "r26d", 140 }, { "r26w", 140 }, { "r26b", 140 },
>> + { "r27", 141 }, { "r27d", 141 }, { "r27w", 141 }, { "r27b", 141 },
>> + { "r28", 142 }, { "r28d", 142 }, { "r28w", 142 }, { "r28b", 142 },
>> + { "r29", 143 }, { "r29d", 143 }, { "r29w", 143 }, { "r29b", 143 },
>> + { "r30", 144 }, { "r30d", 144 }, { "r30w", 144 }, { "r30b", 144 },
>> + { "r31", 145 }, { "r31d", 145 }, { "r31w", 145 }, { "r31b", 145 },
>> // End of regular dwarf registers.
>> { "rip", DWARF_REG_PC }, { "eip", DWARF_REG_PC }, { "ip", DWARF_REG_PC },
>> };
>> @@ -204,7 +247,7 @@ int __get_dwarf_regnum_for_perf_regnum_i386(int perf_regnum)
>> return dwarf_i386_regnums[perf_regnum];
>> }
>>
>> -int __get_dwarf_regnum_for_perf_regnum_x86_64(int perf_regnum)
>> +int __get_dwarf_regnum_for_perf_regnum_x86_64(int perf_regnum, int abi)
>> {
>> static const int dwarf_x86_64_regnums[] = {
>> [PERF_REG_X86_AX] = 0,
>> @@ -248,13 +291,66 @@ int __get_dwarf_regnum_for_perf_regnum_x86_64(int perf_regnum)
>> [PERF_REG_X86_XMM14] = 31,
>> [PERF_REG_X86_XMM15] = 32,
>> };
>> + static const int dwarf_x86_64_regnums_apx[] = {
>> + [PERF_REG_X86_AX] = 0,
>> + [PERF_REG_X86_BX] = 3,
>> + [PERF_REG_X86_CX] = 2,
>> + [PERF_REG_X86_DX] = 1,
>> + [PERF_REG_X86_SI] = 4,
>> + [PERF_REG_X86_DI] = 5,
>> + [PERF_REG_X86_BP] = 6,
>> + [PERF_REG_X86_SP] = 7,
>> + [PERF_REG_X86_IP] = 16,
>> + [PERF_REG_X86_FLAGS] = 49,
>> + [PERF_REG_X86_CS] = 51,
>> + [PERF_REG_X86_SS] = 52,
>> + [PERF_REG_X86_DS] = 53,
>> + [PERF_REG_X86_ES] = 50,
>> + [PERF_REG_X86_FS] = 54,
>> + [PERF_REG_X86_GS] = 55,
>> + [PERF_REG_X86_R8] = 8,
>> + [PERF_REG_X86_R9] = 9,
>> + [PERF_REG_X86_R10] = 10,
>> + [PERF_REG_X86_R11] = 11,
>> + [PERF_REG_X86_R12] = 12,
>> + [PERF_REG_X86_R13] = 13,
>> + [PERF_REG_X86_R14] = 14,
>> + [PERF_REG_X86_R15] = 15,
>> + [PERF_REG_X86_R16] = 130,
>> + [PERF_REG_X86_R17] = 131,
>> + [PERF_REG_X86_R18] = 132,
>> + [PERF_REG_X86_R19] = 133,
>> + [PERF_REG_X86_R20] = 134,
>> + [PERF_REG_X86_R21] = 135,
>> + [PERF_REG_X86_R22] = 136,
>> + [PERF_REG_X86_R23] = 137,
>> + [PERF_REG_X86_R24] = 138,
>> + [PERF_REG_X86_R25] = 139,
>> + [PERF_REG_X86_R26] = 140,
>> + [PERF_REG_X86_R27] = 141,
>> + [PERF_REG_X86_R28] = 142,
>> + [PERF_REG_X86_R29] = 143,
>> + [PERF_REG_X86_R30] = 144,
>> + [PERF_REG_X86_R31] = 145,
>> + };
>>
>> if (perf_regnum == 0)
>> return 0;
>>
>> - if (perf_regnum < 0 || perf_regnum > (int)ARRAY_SIZE(dwarf_x86_64_regnums) ||
>> - dwarf_x86_64_regnums[perf_regnum] == 0)
>> + if (perf_regnum < 0)
>> + return -ENOENT;
>> +
>> + if (!(abi & PERF_SAMPLE_REGS_ABI_SIMD) &&
>> + (perf_regnum >= (int)ARRAY_SIZE(dwarf_x86_64_regnums) ||
>> + dwarf_x86_64_regnums[perf_regnum] == 0))
>> + return -ENOENT;
>> +
>> + if ((abi & PERF_SAMPLE_REGS_ABI_SIMD) &&
>> + (perf_regnum >= (int)ARRAY_SIZE(dwarf_x86_64_regnums_apx) ||
>> + dwarf_x86_64_regnums_apx[perf_regnum] == 0))
>> return -ENOENT;
>>
>> - return dwarf_x86_64_regnums[perf_regnum];
>> + return abi & PERF_SAMPLE_REGS_ABI_SIMD ?
>> + dwarf_x86_64_regnums_apx[perf_regnum] :
>> + dwarf_x86_64_regnums[perf_regnum];
>> }
>> diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c
>> index 797f455eba0d..9e2a0c93ecc9 100644
>> --- a/tools/perf/util/dwarf-regs.c
>> +++ b/tools/perf/util/dwarf-regs.c
>> @@ -158,7 +158,7 @@ static int get_libdw_frame_nregs(unsigned int machine, unsigned int flags __mayb
>> {
>> switch (machine) {
>> case EM_X86_64:
>> - return 17;
>> + return 146; /* Support APX eGPRs. */
> So the 17 comes from libdw as I believe there are places in the libdw
> stack walking code where additional registers cause libdw to fail. The
> key is `libdw_set_initial_registers`, which passes the flag
> `/*only_libdw_supported=*/true`. This is where capping the registers
> at 17 will have an effect. Presumably increasing this value requires
> some degree of libdw support? We could add a feature test for this,
> but it may be safer to leave it as 17 as it is unlikely we're using
> vector registers when walking the stack. A comment capturing this
> would be good, as clearly the intent in the code hadn't been clear
> enough.
As my previous comments said, currently it looks libdw still doesn't
support SIMD and eGPRs registers yet, so I suppose it's fine to still keep
17 here. But I would add a WARN_ONCE() here if the reg idx is larger than
17. It can remind us to increase the reg_num.
Thanks.
>
> Thanks,
> Ian
>
>> case EM_386:
>> return 9;
>> case EM_ARM:
>> @@ -187,13 +187,14 @@ static int get_libdw_frame_nregs(unsigned int machine, unsigned int flags __mayb
>> }
>>
>> int get_dwarf_regnum_for_perf_regnum(int perf_regnum, unsigned int machine,
>> - unsigned int flags, bool only_libdw_supported)
>> + unsigned int flags,
>> + bool only_libdw_supported, int abi)
>> {
>> int reg;
>>
>> switch (machine) {
>> case EM_X86_64:
>> - reg = __get_dwarf_regnum_for_perf_regnum_x86_64(perf_regnum);
>> + reg = __get_dwarf_regnum_for_perf_regnum_x86_64(perf_regnum, abi);
>> break;
>> case EM_386:
>> reg = __get_dwarf_regnum_for_perf_regnum_i386(perf_regnum);
>> diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h
>> index 46a764cf322f..92cf0af93e9e 100644
>> --- a/tools/perf/util/include/dwarf-regs.h
>> +++ b/tools/perf/util/include/dwarf-regs.h
>> @@ -103,7 +103,7 @@ int __get_csky_regnum(const char *name, unsigned int flags);
>> int __get_dwarf_regnum_i386(const char *name);
>> int __get_dwarf_regnum_x86_64(const char *name);
>> int __get_dwarf_regnum_for_perf_regnum_i386(int perf_regnum);
>> -int __get_dwarf_regnum_for_perf_regnum_x86_64(int perf_regnum);
>> +int __get_dwarf_regnum_for_perf_regnum_x86_64(int perf_regnum, int abi);
>>
>> int __get_dwarf_regnum_for_perf_regnum_arm(int perf_regnum);
>> int __get_dwarf_regnum_for_perf_regnum_arm64(int perf_regnum);
>> @@ -125,8 +125,9 @@ int get_dwarf_regnum(const char *name, unsigned int machine, unsigned int flags)
>> /*
>> * get_dwarf_regnum - Returns DWARF regnum from perf register number.
>> */
>> -int get_dwarf_regnum_for_perf_regnum(int perf_regnum, unsigned int machine, unsigned int flags,
>> - bool only_libdw_supported);
>> +int get_dwarf_regnum_for_perf_regnum(int perf_regnum, unsigned int machine,
>> + unsigned int flags,
>> + bool only_libdw_supported, int abi);
>>
>> void get_powerpc_regs(u32 raw_insn, int is_source, struct annotated_op_loc *op_loc);
>>
>> diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
>> index 05e8e68bd49c..678db5a65ada 100644
>> --- a/tools/perf/util/unwind-libdw.c
>> +++ b/tools/perf/util/unwind-libdw.c
>> @@ -273,7 +273,8 @@ static bool libdw_set_initial_registers(Dwfl_Thread *thread, void *arg)
>> int dwarf_reg =
>> get_dwarf_regnum_for_perf_regnum(perf_reg, e_machine,
>> e_flags,
>> - /*only_libdw_supported=*/true);
>> + /*only_libdw_supported=*/true,
>> + user_regs->abi);
>> if (dwarf_reg > max_dwarf_reg)
>> max_dwarf_reg = dwarf_reg;
>> }
>> @@ -288,7 +289,8 @@ static bool libdw_set_initial_registers(Dwfl_Thread *thread, void *arg)
>> int dwarf_reg =
>> get_dwarf_regnum_for_perf_regnum(perf_reg, e_machine,
>> e_flags,
>> - /*only_libdw_supported=*/true);
>> + /*only_libdw_supported=*/true,
>> + user_regs->abi);
>> if (dwarf_reg >= 0) {
>> val = 0;
>> if (perf_reg_value(&val, user_regs, perf_reg) == 0)
>> --
>> 2.34.1
>>