[PATCH v3] tools: perf: add comm_nodigit column

From: Stephen Brennan

Date: Fri Mar 20 2026 - 19:46:47 EST


The "comm" column allows grouping events by the process command. It is
intended to group like programs, despite having different PIDs. But some
workloads may adjust their own command, so that a unique identifier
(e.g. a PID or some other numeric value) is part of the command name.
This destroys the utility of "comm", forcing perf to place each unique
process name into its own bucket, which can contribute to a
combinatorial explosion of memory use in perf report.

Create a less strict version of this column, which ignores digits when
comparing command names. Commands whose names are the same (ignoring
digits) are sorted into the same histogram buckets, and displayed with
the placeholder value "<N>" in the place of digits. For example,
hypothetical command names "kworker/1" "kworker/2" "kworker/3" would
sort into the same bucket and be represented as "kworker/<N>".

Signed-off-by: Stephen Brennan <stephen.s.brennan@xxxxxxxxxx>
---
tools/perf/Documentation/perf-report.txt | 3 +-
tools/perf/util/hist.c | 3 +
tools/perf/util/hist.h | 2 +
tools/perf/util/sort.c | 114 +++++++++++++++++++++++
tools/perf/util/sort.h | 2 +
5 files changed, 123 insertions(+), 1 deletion(-)

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 802f931ae64db..52f316628e43a 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -88,7 +88,7 @@ OPTIONS
Sort histogram entries by given key(s) - multiple keys can be specified
in CSV format. Following sort keys are available:
pid, comm, dso, symbol, parent, cpu, socket, srcline, weight,
- local_weight, cgroup_id, addr.
+ local_weight, cgroup_id, addr, comm_nodigit.

Each key has following meaning:

@@ -143,6 +143,7 @@ OPTIONS
- weight1: Average value of event specific weight (1st field of weight_struct).
- weight2: Average value of event specific weight (2nd field of weight_struct).
- weight3: Average value of event specific weight (3rd field of weight_struct).
+ - comm_nodigit: same as comm, with numbers replaced by "<N>"

By default, overhead, comm, dso and symbol keys are used.
(i.e. --sort overhead,comm,dso,symbol).
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 7ffaa3d9851b4..fc737a0a8e4d7 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -110,6 +110,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
len = thread__comm_len(h->thread);
if (hists__new_col_len(hists, HISTC_COMM, len))
hists__set_col_len(hists, HISTC_THREAD, len + 8);
+ if (hists->hpp_list->comm_nodigit)
+ hists__new_col_len(hists, HISTC_COMM_NODIGIT,
+ (u16) sort__comm_nodigit_len(h));

if (h->ms.map) {
len = dso__name_len(map__dso(h->ms.map));
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 1d5ea632ca4e1..d97a4efb92505 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -44,6 +44,7 @@ enum hist_column {
HISTC_THREAD,
HISTC_TGID,
HISTC_COMM,
+ HISTC_COMM_NODIGIT,
HISTC_CGROUP_ID,
HISTC_CGROUP,
HISTC_PARENT,
@@ -522,6 +523,7 @@ struct perf_hpp_list {
int socket;
int thread;
int comm;
+ int comm_nodigit;
};

extern struct perf_hpp_list perf_hpp_list;
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 42d5cd7ef4e23..fda8fcfa46e07 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <ctype.h>
#include <errno.h>
#include <inttypes.h>
#include <regex.h>
@@ -265,6 +266,115 @@ struct sort_entry sort_comm = {
.se_width_idx = HISTC_COMM,
};

+/* --sort comm_nodigit */
+
+size_t sort__comm_nodigit_len(struct hist_entry *entry)
+{
+ const char *comm = comm__str(entry->comm);
+ size_t index, len_nodigit = 0;
+ bool in_number = false;
+
+ if (!comm)
+ return 0;
+
+ for (index = 0; comm[index]; index++) {
+ if (!isdigit((unsigned char)comm[index])) {
+ in_number = false;
+ len_nodigit++;
+ } else if (!in_number) {
+ in_number = true;
+ len_nodigit += 3; /* <N> */
+ }
+ }
+
+ return len_nodigit;
+}
+
+static int64_t strcmp_nodigit(const char *left, const char *right)
+{
+ for (;;) {
+ while (*left && isdigit((unsigned char)*left))
+ left++;
+ while (*right && isdigit((unsigned char)*right))
+ right++;
+ if (*left == *right && !*left) {
+ return 0;
+ } else if (*left == *right) {
+ left++;
+ right++;
+ } else {
+ return (int64_t)((unsigned char)*left - (unsigned char)*right);
+ }
+ }
+}
+
+static int64_t
+sort__comm_nodigit_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return strcmp_nodigit(comm__str(right->comm), comm__str(left->comm));
+}
+
+static int64_t
+sort__comm_nodigit_collapse(struct hist_entry *left, struct hist_entry *right)
+{
+ return strcmp_nodigit(comm__str(right->comm), comm__str(left->comm));
+}
+
+static int64_t
+sort__comm_nodigit_sort(struct hist_entry *left, struct hist_entry *right)
+{
+ return strcmp_nodigit(comm__str(right->comm), comm__str(left->comm));
+}
+
+static int hist_entry__comm_nodigit_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ int ret = 0;
+ unsigned int print_len, printed = 0, start = 0, end = 0;
+ bool in_digit;
+ const char *comm = comm__str(he->comm), *print;
+
+ while (printed < width && printed < size && comm[start]) {
+ in_digit = !!isdigit((unsigned char)comm[start]);
+ end = start + 1;
+ while (comm[end] && !!isdigit((unsigned char)comm[end]) == in_digit)
+ end++;
+ if (in_digit) {
+ print_len = 3; /* <N> */
+ print = "<N>";
+ } else {
+ print_len = end - start;
+ print = &comm[start];
+ }
+ print_len = min(print_len, width - printed);
+ ret = repsep_snprintf(bf + printed, size - printed, "%-.*s",
+ print_len, print);
+ if (ret < 0)
+ return ret;
+ start = end;
+ printed += ret;
+ }
+ /* Pad to width if necessary */
+ if (printed < width && printed < size) {
+ ret = repsep_snprintf(bf + printed, size - printed, "%-*.*s",
+ width - printed, width - printed, "");
+ if (ret < 0)
+ return ret;
+ printed += ret;
+ }
+ return printed;
+}
+
+struct sort_entry sort_comm_nodigit = {
+ .se_header = "CommandNoDigit",
+ .se_cmp = sort__comm_nodigit_cmp,
+ .se_collapse = sort__comm_nodigit_collapse,
+ .se_sort = sort__comm_nodigit_sort,
+ .se_snprintf = hist_entry__comm_nodigit_snprintf,
+ .se_filter = hist_entry__thread_filter,
+ .se_width_idx = HISTC_COMM_NODIGIT,
+};
+
/* --sort dso */

static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r)
@@ -2583,6 +2693,7 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_PID, "pid", sort_thread),
DIM(SORT_TGID, "tgid", sort_tgid),
DIM(SORT_COMM, "comm", sort_comm),
+ DIM(SORT_COMM_NODIGIT, "comm_nodigit", sort_comm_nodigit),
DIM(SORT_DSO, "dso", sort_dso),
DIM(SORT_SYM, "symbol", sort_sym),
DIM(SORT_PARENT, "parent", sort_parent),
@@ -3579,6 +3690,8 @@ static int __sort_dimension__update(struct sort_dimension *sd,
list->thread = 1;
} else if (sd->entry == &sort_comm) {
list->comm = 1;
+ } else if (sd->entry == &sort_comm_nodigit) {
+ list->comm_nodigit = list->comm = 1;
} else if (sd->entry == &sort_type_offset) {
symbol_conf.annotate_data_member = true;
} else if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to) {
@@ -4040,6 +4153,7 @@ static bool get_elide(int idx, FILE *output)
case HISTC_DSO:
return __get_elide(symbol_conf.dso_list, "dso", output);
case HISTC_COMM:
+ case HISTC_COMM_NODIGIT:
return __get_elide(symbol_conf.comm_list, "comm", output);
default:
break;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index d7787958e06b9..c962e77e4b938 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -43,6 +43,7 @@ enum sort_type {
/* common sort keys */
SORT_PID,
SORT_COMM,
+ SORT_COMM_NODIGIT,
SORT_DSO,
SORT_SYM,
SORT_PARENT,
@@ -158,4 +159,5 @@ sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right);
int64_t
_sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r);
char *hist_entry__srcline(struct hist_entry *he);
+size_t sort__comm_nodigit_len(struct hist_entry *entry);
#endif /* __PERF_SORT_H */
--
2.47.3