Re: [patch v2 01/14] x86/irq: Optimize interrupts decimals printing
From: Radu Rendec
Date: Sat Mar 21 2026 - 12:17:53 EST
On Fri, 2026-03-20 at 14:21 +0100, Thomas Gleixner wrote:
> From: Dmitry Ilvokhin <d@xxxxxxxxxxxx>
>
> Monitoring tools periodically scan /proc/interrupts to export metrics as a
> timeseries for future analysis and investigation.
>
> In large fleets, /proc/interrupts is polled (often every few seconds) on
> every machine. The cumulative overhead adds up quickly across thousands
> of nodes, so reducing the cost of generating these stats does have a
> measurable operational impact. With the ongoing trend toward higher core
> counts per machine, this cost becomes even more noticeable over time,
> since interrupt counters are per-CPU. In Meta's fleet, we have observed
> this overhead at scale.
>
> Although a binary /proc interface would be a better long-term solution
> due to lower formatting (kernel side) and parsing (userspace side)
> overhead, the text interface will remain in use for some time, even if
> better solutions will be available. Optimizing the /proc/interrupts
> printing code is therefore still beneficial.
>
> Function seq_printf() supports rich format string for decimals printing,
> but it doesn't required for printing /proc/interrupts per CPU counters,
> seq_put_decimal_ull_width() function can be used instead to print per
> CPU counters, because very limited formatting is required for this case.
> Similar optimization idea is already used in show_interrupts().
>
> Performance counter stats (truncated) for 'sh -c cat /proc/interrupts
>
> Before:
>
> 3.42 msec task-clock # 0.802 CPUs utilized ( +- 0.05% )
> 1 context-switches # 291.991 /sec ( +- 0.74% )
> 0 cpu-migrations # 0.000 /sec
> 343 page-faults # 100.153 K/sec ( +- 0.01% )
> 8,932,242 instructions # 1.66 insn per cycle ( +- 0.34% )
> 5,374,427 cycles # 1.569 GHz ( +- 0.04% )
> 1,483,154 branches # 433.068 M/sec ( +- 0.22% )
> 28,768 branch-misses # 1.94% of all branches ( +- 0.31% )
>
> 0.00427182 +- 0.00000215 seconds time elapsed ( +- 0.05% )
>
> After:
>
> 2.39 msec task-clock # 0.796 CPUs utilized ( +- 0.06% )
> 1 context-switches # 418.541 /sec ( +- 0.70% )
> 0 cpu-migrations # 0.000 /sec
> 343 page-faults # 143.560 K/sec ( +- 0.01% )
> 7,020,982 instructions # 1.30 insn per cycle ( +- 0.52% )
> 5,397,266 cycles # 2.259 GHz ( +- 0.06% )
> 1,569,648 branches # 656.962 M/sec ( +- 0.08% )
> 25,419 branch-misses # 1.62% of all branches ( +- 0.72% )
>
> 0.00299996 +- 0.00000206 seconds time elapsed ( +- 0.07% )
>
> Relative speed up in time elapsed is around 29%.
>
> [ tglx: Fixed it up so it applies to current mainline ]
>
> Signed-off-by: Dmitry Ilvokhin <d@xxxxxxxxxxxx>
> Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxx>
> Link: https://patch.msgid.link/aQj5mGZ6_BBlAm3B@xxxxxxxxxxxxxxxxxx
>
> ---
> Changes v2:
> - Expanded commit message: add more rationale for the proposed change.
> - Renamed helper put_spaced_decimal() -> put_decimal() primarely to make
> checkpatch.pl --strict pass.
>
> arch/x86/kernel/irq.c | 112 ++++++++++++++++++++++++++------------------------
> 1 file changed, 59 insertions(+), 53 deletions(-)
> --- a/arch/x86/kernel/irq.c
> +++ b/arch/x86/kernel/irq.c
> @@ -62,6 +62,18 @@ void ack_bad_irq(unsigned int irq)
> apic_eoi();
> }
>
> +/*
> + * A helper routine for putting space and decimal number without overhead
> + * from rich format of printf().
> + */
> +static void put_decimal(struct seq_file *p, unsigned long long num)
> +{
> + const char *delimiter = " ";
> + unsigned int width = 10;
> +
> + seq_put_decimal_ull_width(p, delimiter, num, width);
> +}
> +
> #define irq_stats(x) (&per_cpu(irq_stat, x))
> /*
> * /proc/interrupts printing for arch specific interrupts
> @@ -70,103 +82,101 @@ int arch_show_interrupts(struct seq_file
> {
> int j;
>
> - seq_printf(p, "%*s: ", prec, "NMI");
> + seq_printf(p, "%*s:", prec, "NMI");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->__nmi_count);
> + put_decimal(p, irq_stats(j)->__nmi_count);
> seq_puts(p, " Non-maskable interrupts\n");
> #ifdef CONFIG_X86_LOCAL_APIC
> - seq_printf(p, "%*s: ", prec, "LOC");
> + seq_printf(p, "%*s:", prec, "LOC");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
> + put_decimal(p, irq_stats(j)->apic_timer_irqs);
> seq_puts(p, " Local timer interrupts\n");
>
> - seq_printf(p, "%*s: ", prec, "SPU");
> + seq_printf(p, "%*s:", prec, "SPU");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
> + put_decimal(p, irq_stats(j)->irq_spurious_count);
> seq_puts(p, " Spurious interrupts\n");
> - seq_printf(p, "%*s: ", prec, "PMI");
> + seq_printf(p, "%*s:", prec, "PMI");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
> + put_decimal(p, irq_stats(j)->apic_perf_irqs);
> seq_puts(p, " Performance monitoring interrupts\n");
> - seq_printf(p, "%*s: ", prec, "IWI");
> + seq_printf(p, "%*s:", prec, "IWI");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
> + put_decimal(p, irq_stats(j)->apic_irq_work_irqs);
> seq_puts(p, " IRQ work interrupts\n");
> - seq_printf(p, "%*s: ", prec, "RTR");
> + seq_printf(p, "%*s:", prec, "RTR");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count);
> + put_decimal(p, irq_stats(j)->icr_read_retry_count);
> seq_puts(p, " APIC ICR read retries\n");
> if (x86_platform_ipi_callback) {
> - seq_printf(p, "%*s: ", prec, "PLT");
> + seq_printf(p, "%*s:", prec, "PLT");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis);
> + put_decimal(p, irq_stats(j)->x86_platform_ipis);
> seq_puts(p, " Platform interrupts\n");
> }
> #endif
> #ifdef CONFIG_SMP
> - seq_printf(p, "%*s: ", prec, "RES");
> + seq_printf(p, "%*s:", prec, "RES");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count);
> + put_decimal(p, irq_stats(j)->irq_resched_count);
> seq_puts(p, " Rescheduling interrupts\n");
> - seq_printf(p, "%*s: ", prec, "CAL");
> + seq_printf(p, "%*s:", prec, "CAL");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
> + put_decimal(p, irq_stats(j)->irq_call_count);
> seq_puts(p, " Function call interrupts\n");
> - seq_printf(p, "%*s: ", prec, "TLB");
> + seq_printf(p, "%*s:", prec, "TLB");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
> + put_decimal(p, irq_stats(j)->irq_tlb_count);
> seq_puts(p, " TLB shootdowns\n");
> #endif
> #ifdef CONFIG_X86_THERMAL_VECTOR
> - seq_printf(p, "%*s: ", prec, "TRM");
> + seq_printf(p, "%*s:", prec, "TRM");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
> + put_decimal(p, irq_stats(j)->irq_thermal_count);
> seq_puts(p, " Thermal event interrupts\n");
> #endif
> #ifdef CONFIG_X86_MCE_THRESHOLD
> - seq_printf(p, "%*s: ", prec, "THR");
> + seq_printf(p, "%*s:", prec, "THR");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
> + put_decimal(p, irq_stats(j)->irq_threshold_count);
> seq_puts(p, " Threshold APIC interrupts\n");
> #endif
> #ifdef CONFIG_X86_MCE_AMD
> - seq_printf(p, "%*s: ", prec, "DFR");
> + seq_printf(p, "%*s:", prec, "DFR");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->irq_deferred_error_count);
> + put_decimal(p, irq_stats(j)->irq_deferred_error_count);
> seq_puts(p, " Deferred Error APIC interrupts\n");
> #endif
> #ifdef CONFIG_X86_MCE
> - seq_printf(p, "%*s: ", prec, "MCE");
> + seq_printf(p, "%*s:", prec, "MCE");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
> + put_decimal(p, per_cpu(mce_exception_count, j));
> seq_puts(p, " Machine check exceptions\n");
> - seq_printf(p, "%*s: ", prec, "MCP");
> + seq_printf(p, "%*s:", prec, "MCP");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
> + put_decimal(p, per_cpu(mce_poll_count, j));
> seq_puts(p, " Machine check polls\n");
> #endif
> #ifdef CONFIG_X86_HV_CALLBACK_VECTOR
> if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) {
> - seq_printf(p, "%*s: ", prec, "HYP");
> + seq_printf(p, "%*s:", prec, "HYP");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ",
> - irq_stats(j)->irq_hv_callback_count);
> + put_decimal(p, irq_stats(j)->irq_hv_callback_count);
> seq_puts(p, " Hypervisor callback interrupts\n");
> }
> #endif
> #if IS_ENABLED(CONFIG_HYPERV)
> if (test_bit(HYPERV_REENLIGHTENMENT_VECTOR, system_vectors)) {
> - seq_printf(p, "%*s: ", prec, "HRE");
> + seq_printf(p, "%*s:", prec, "HRE");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ",
> - irq_stats(j)->irq_hv_reenlightenment_count);
> + put_decimal(p,
> + irq_stats(j)->irq_hv_reenlightenment_count);
> seq_puts(p, " Hyper-V reenlightenment interrupts\n");
> }
> if (test_bit(HYPERV_STIMER0_VECTOR, system_vectors)) {
> - seq_printf(p, "%*s: ", prec, "HVS");
> + seq_printf(p, "%*s:", prec, "HVS");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ",
> - irq_stats(j)->hyperv_stimer0_count);
> + put_decimal(p, irq_stats(j)->hyperv_stimer0_count);
> seq_puts(p, " Hyper-V stimer0 interrupts\n");
> }
> #endif
> @@ -175,35 +185,31 @@ int arch_show_interrupts(struct seq_file
> seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
> #endif
> #if IS_ENABLED(CONFIG_KVM)
> - seq_printf(p, "%*s: ", prec, "PIN");
> + seq_printf(p, "%*s:", prec, "PIN");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis);
> + put_decimal(p, irq_stats(j)->kvm_posted_intr_ipis);
> seq_puts(p, " Posted-interrupt notification event\n");
>
> - seq_printf(p, "%*s: ", prec, "NPI");
> + seq_printf(p, "%*s:", prec, "NPI");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ",
> - irq_stats(j)->kvm_posted_intr_nested_ipis);
> + put_decimal(p, irq_stats(j)->kvm_posted_intr_nested_ipis);
> seq_puts(p, " Nested posted-interrupt event\n");
>
> - seq_printf(p, "%*s: ", prec, "PIW");
> + seq_printf(p, "%*s:", prec, "PIW");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ",
> - irq_stats(j)->kvm_posted_intr_wakeup_ipis);
> + put_decimal(p, irq_stats(j)->kvm_posted_intr_wakeup_ipis);
> seq_puts(p, " Posted-interrupt wakeup event\n");
> #endif
> #ifdef CONFIG_GUEST_PERF_EVENTS
> - seq_printf(p, "%*s: ", prec, "VPMI");
> + seq_printf(p, "%*s:", prec, "VPMI");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ",
> - irq_stats(j)->perf_guest_mediated_pmis);
> + put_decimal(p, irq_stats(j)->perf_guest_mediated_pmis);
> seq_puts(p, " Perf Guest Mediated PMI\n");
> #endif
> #ifdef CONFIG_X86_POSTED_MSI
> - seq_printf(p, "%*s: ", prec, "PMN");
> + seq_printf(p, "%*s:", prec, "PMN");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ",
> - irq_stats(j)->posted_msi_notification_count);
> + put_decimal(p, irq_stats(j)->posted_msi_notification_count);
> seq_puts(p, " Posted MSI notification event\n");
> #endif
> return 0;
Nit: The patch changes the alignment of the descriptions by 1 space (it
moves the descriptions to the left). If that's intentional, perhaps it
should be added to the description? TBH, I like it better like that
because the description is now aligned with the generic interrupts.
Reviewed-by: Radu Rendec <radu@xxxxxxxxxx>