Re: [PATCH] tools: perf: add comm_ignore_digit column
From: Stephen Brennan
Date: Mon Mar 16 2026 - 14:02:21 EST
On 3/16/26 10:48 AM, Namhyung Kim wrote:
> Hello,
>
> On Thu, Mar 05, 2026 at 10:18:47AM -0800, Stephen Brennan wrote:
>> The "comm" column allows grouping events by the process command. It is
>> intended to group like programs, despite having different PIDs. But some
>> workloads may adjust their own command, so that a unique identifier
>> (e.g. a PID or some other numeric value) is part of the command name.
>> This destroys the utility of "comm", forcing perf to place each unique
>> process name into its own bucket, which can contribute to a
>> combinatorial explosion of memory use in perf report.
>>
>> Create a less strict version of this column, which ignores digits when
>> comparing command names. This allows "similar looking" processes to
>> again be placed in the same bucket.
>
> Can you please rebase this onto the current perf-tools-next?
Hi Namhung,
Thanks for taking a look. I went ahead and sent v2 rebased on
perf-tools-next, which resolved the conflict with commit cbd41c6d4c26c
("perf report: Update sort key state from -F option").
Thanks,
Stephen
> Thanks,
> Namhyung
>
>>
>> Signed-off-by: Stephen Brennan <stephen.s.brennan@xxxxxxxxxx>
>> ---
>> tools/perf/util/hist.c | 1 +
>> tools/perf/util/hist.h | 1 +
>> tools/perf/util/sort.c | 92 +++++++++++++++++++++++++++++++++++++++++-
>> tools/perf/util/sort.h | 1 +
>> 4 files changed, 94 insertions(+), 1 deletion(-)
>>
>> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
>> index ef4b569f7df46..6759826be8344 100644
>> --- a/tools/perf/util/hist.c
>> +++ b/tools/perf/util/hist.c
>> @@ -110,6 +110,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
>> len = thread__comm_len(h->thread);
>> if (hists__new_col_len(hists, HISTC_COMM, len))
>> hists__set_col_len(hists, HISTC_THREAD, len + 8);
>> + hists__new_col_len(hists, HISTC_COMM_IGNORE_DIGIT, len);
>>
>> if (h->ms.map) {
>> len = dso__name_len(map__dso(h->ms.map));
>> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
>> index 1d5ea632ca4e1..ae7e98bd9e46d 100644
>> --- a/tools/perf/util/hist.h
>> +++ b/tools/perf/util/hist.h
>> @@ -44,6 +44,7 @@ enum hist_column {
>> HISTC_THREAD,
>> HISTC_TGID,
>> HISTC_COMM,
>> + HISTC_COMM_IGNORE_DIGIT,
>> HISTC_CGROUP_ID,
>> HISTC_CGROUP,
>> HISTC_PARENT,
>> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
>> index f3a565b0e2307..e6012b2457c5d 100644
>> --- a/tools/perf/util/sort.c
>> +++ b/tools/perf/util/sort.c
>> @@ -1,4 +1,5 @@
>> // SPDX-License-Identifier: GPL-2.0
>> +#include <ctype.h>
>> #include <errno.h>
>> #include <inttypes.h>
>> #include <regex.h>
>> @@ -265,6 +266,93 @@ struct sort_entry sort_comm = {
>> .se_width_idx = HISTC_COMM,
>> };
>>
>> +/* --sort comm_ignore_digit */
>> +
>> +static int64_t strcmp_nodigit(const char *left, const char *right)
>> +{
>> + for (;;) {
>> + while (*left && isdigit(*left))
>> + left++;
>> + while (*right && isdigit(*right))
>> + right++;
>> + if (*left == *right && !*left) {
>> + return 0;
>> + } else if (*left == *right) {
>> + left++;
>> + right++;
>> + } else {
>> + return (int64_t)*left - (int64_t)*right;
>> + }
>> + }
>> +}
>> +
>> +static int64_t
>> +sort__comm_ignore_digit_cmp(struct hist_entry *left, struct hist_entry *right)
>> +{
>> + return strcmp_nodigit(comm__str(right->comm), comm__str(left->comm));
>> +}
>> +
>> +static int64_t
>> +sort__comm_ignore_digit_collapse(struct hist_entry *left, struct hist_entry *right)
>> +{
>> + return strcmp_nodigit(comm__str(right->comm), comm__str(left->comm));
>> +}
>> +
>> +static int64_t
>> +sort__comm_ignore_digit_sort(struct hist_entry *left, struct hist_entry *right)
>> +{
>> + return strcmp_nodigit(comm__str(right->comm), comm__str(left->comm));
>> +}
>> +
>> +static int hist_entry__comm_ignore_digit_snprintf(struct hist_entry *he, char *bf,
>> + size_t size, unsigned int width)
>> +{
>> + int ret = 0;
>> + unsigned int print_len, printed = 0, start = 0, end = 0;
>> + bool in_digit;
>> + const char *comm = comm__str(he->comm), *print;
>> +
>> + while (printed < width && printed < size && comm[start]) {
>> + in_digit = !!isdigit(comm[start]);
>> + end = start + 1;
>> + while (comm[end] && !!isdigit(comm[end]) == in_digit)
>> + end++;
>> + if (in_digit) {
>> + print_len = 3; /* <N> */
>> + print = "<N>";
>> + } else {
>> + print_len = end - start;
>> + print = &comm[start];
>> + }
>> + print_len = min(print_len, width - printed);
>> + ret = repsep_snprintf(bf + printed, size - printed, "%-.*s",
>> + print_len, print);
>> + if (ret < 0)
>> + return ret;
>> + start = end;
>> + printed += ret;
>> + }
>> + /* Pad to width if necessary */
>> + if (printed < width && printed < size) {
>> + ret = repsep_snprintf(bf + printed, size - printed, "%-*.*s",
>> + width - printed, width - printed, "");
>> + if (ret < 0)
>> + return ret;
>> + printed += ret;
>> + }
>> + return printed;
>> +}
>> +
>> +struct sort_entry sort_comm_ignore_digit = {
>> + .se_header = "CommandIgnoreDigit",
>> + .se_cmp = sort__comm_ignore_digit_cmp,
>> + .se_collapse = sort__comm_ignore_digit_collapse,
>> + .se_sort = sort__comm_ignore_digit_sort,
>> + .se_snprintf = hist_entry__comm_ignore_digit_snprintf,
>> + .se_filter = hist_entry__thread_filter,
>> + .se_width_idx = HISTC_COMM_IGNORE_DIGIT,
>> +};
>> +
>> /* --sort dso */
>>
>> static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r)
>> @@ -2576,6 +2664,7 @@ static struct sort_dimension common_sort_dimensions[] = {
>> DIM(SORT_PID, "pid", sort_thread),
>> DIM(SORT_TGID, "tgid", sort_tgid),
>> DIM(SORT_COMM, "comm", sort_comm),
>> + DIM(SORT_COMM_IGNORE_DIGIT, "comm_ignore_digit", sort_comm_ignore_digit),
>> DIM(SORT_DSO, "dso", sort_dso),
>> DIM(SORT_SYM, "symbol", sort_sym),
>> DIM(SORT_PARENT, "parent", sort_parent),
>> @@ -3675,7 +3764,7 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
>> list->socket = 1;
>> } else if (sd->entry == &sort_thread) {
>> list->thread = 1;
>> - } else if (sd->entry == &sort_comm) {
>> + } else if (sd->entry == &sort_comm || sd->entry == &sort_comm_ignore_digit) {
>> list->comm = 1;
>> } else if (sd->entry == &sort_type_offset) {
>> symbol_conf.annotate_data_member = true;
>> @@ -4022,6 +4111,7 @@ static bool get_elide(int idx, FILE *output)
>> case HISTC_DSO:
>> return __get_elide(symbol_conf.dso_list, "dso", output);
>> case HISTC_COMM:
>> + case HISTC_COMM_IGNORE_DIGIT:
>> return __get_elide(symbol_conf.comm_list, "comm", output);
>> default:
>> break;
>> diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
>> index d7787958e06b9..6819934b4d48a 100644
>> --- a/tools/perf/util/sort.h
>> +++ b/tools/perf/util/sort.h
>> @@ -43,6 +43,7 @@ enum sort_type {
>> /* common sort keys */
>> SORT_PID,
>> SORT_COMM,
>> + SORT_COMM_IGNORE_DIGIT,
>> SORT_DSO,
>> SORT_SYM,
>> SORT_PARENT,
>> --
>> 2.47.3
>>