[patch V4 00/15] Improve /proc/interrupts further

From: Thomas Gleixner

Date: Tue Mar 31 2026 - 03:31:15 EST


This is a follow up to v3 which can be found here:

https://lore.kernel.org/20260326214345.019130211@xxxxxxxxxx

The v1 cover letter contains a full analysis, explanation and numbers:

https://lore.kernel.org/20260303150539.513068586@xxxxxxxxxx

TLDR:

- The performance of reading of /proc/interrupts has been improved
piecewise over the years, but most of the low hanging fruit has been
left on the table.

Changes vs. V2:

- Reworked the NMI interrupt cleanup to be RT compatible. That was
noticed when trying to address the next point.

- Made per CPU and NMI type teardown update the valid for proc condition

- Fixed the brainfart of writing to __ro_after_init memory by seperating
the decision to emit a particular counter out into a simple bitmap and
making the descriptor table const.

- Updated the GDB script changes to the bitmap mechanism and removed the
stale ERR/MIS prints which are not longer required.

- Made irq_stat_init() a late initcall so it's guaranteed that the
platform IPI and the posted MSI mechanisms have been initialized

- Add the AMD/HYGON deferred MCE vector to the conditionals and omit it
from emission when running on a CPU from other vendors

Delta patch against v3 is below.

The series applies on top of v7.0-rc3 and is also available via git:

git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git irq-proc-v4

Thanks,

tglx
---
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 25d22e33e58c..194dfff84cb1 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -47,6 +47,4 @@ void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
#endif

-void irq_init_stats(void);
-
#endif /* _ASM_X86_IRQ_H */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 0b3723cec0b9..de1c35fa5e75 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -76,7 +76,7 @@ struct irq_stat_info {
#define IDS(idx, sym, txt) [IRQ_COUNT_##idx] = \
{ .skip_vector = DEFAULT_SUPPRESSED_VECTOR, .symbol = sym, .text = txt }

-static struct irq_stat_info irq_stat_info[IRQ_COUNT_MAX] __ro_after_init = {
+static const struct irq_stat_info irq_stat_info[IRQ_COUNT_MAX] = {
ISS(NMI, "NMI", " Non-maskable interrupts\n"),
#ifdef CONFIG_X86_LOCAL_APIC
ISS(APIC_TIMER, "LOC", " Local timer interrupts\n"),
@@ -128,26 +128,36 @@ static struct irq_stat_info irq_stat_info[IRQ_COUNT_MAX] __ro_after_init = {
#endif
};

-void __init irq_init_stats(void)
+static DECLARE_BITMAP(irq_stat_count_show, IRQ_COUNT_MAX) __read_mostly;
+
+static int __init irq_init_stats(void)
{
- struct irq_stat_info *info = irq_stat_info;
+ const struct irq_stat_info *info = irq_stat_info;

for (unsigned int i = 0; i < ARRAY_SIZE(irq_stat_info); i++, info++) {
- if (info->skip_vector && info->skip_vector != DEFAULT_SUPPRESSED_VECTOR &&
- test_bit(info->skip_vector, system_vectors))
- info->skip_vector = 0;
+ if (!info->skip_vector || (info->skip_vector != DEFAULT_SUPPRESSED_VECTOR &&
+ test_bit(info->skip_vector, system_vectors)))
+ set_bit(i, irq_stat_count_show);
}

#ifdef CONFIG_X86_LOCAL_APIC
if (!x86_platform_ipi_callback)
- irq_stat_info[IRQ_COUNT_X86_PLATFORM_IPI].skip_vector = 1;
+ clear_bit(IRQ_COUNT_X86_PLATFORM_IPI, irq_stat_count_show);
#endif

#ifdef CONFIG_X86_POSTED_MSI
if (!posted_msi_enabled())
- irq_stat_info[IRQ_COUNT_POSTED_MSI_NOTIFICATION].skip_vector = 1;
+ clear_bit(IRQ_COUNT_POSTED_MSI_NOTIFICATION, irq_stat_count_show);
+#endif
+
+#ifdef CONFIG_X86_MCE_AMD
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
+ clear_bit(IRQ_COUNT_DEFERRED_ERROR, irq_stat_count_show);
#endif
+ return 0;
}
+late_initcall(irq_init_stats);

/*
* Used for default enabled counters to increment the stats and to enable the
@@ -156,8 +166,7 @@ void __init irq_init_stats(void)
void irq_stat_inc_and_enable(enum irq_stat_counts which)
{
this_cpu_inc(irq_stat.counts[which]);
- /* Pairs with the READ_ONCE() in arch_show_interrupts() */
- WRITE_ONCE(irq_stat_info[which].skip_vector, 0);
+ set_bit(which, irq_stat_count_show);
}

#ifdef CONFIG_PROC_FS
@@ -169,7 +178,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
const struct irq_stat_info *info = irq_stat_info;

for (unsigned int i = 0; i < ARRAY_SIZE(irq_stat_info); i++, info++) {
- if (READ_ONCE(info->skip_vector))
+ if (!test_bit(i, irq_stat_count_show))
continue;

seq_printf(p, "%*s:", prec, info->symbol);
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 325c0ad8fb9c..6ab9eac64670 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -104,8 +104,6 @@ void __init native_init_IRQ(void)
if (!cpu_feature_enabled(X86_FEATURE_FRED))
idt_setup_apic_and_irq_gates();

- irq_init_stats();
-
lapic_assign_system_vectors();

if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs()) {
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index c4ebd17233f3..ae8b06e01948 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -2028,24 +2028,32 @@ const void *free_irq(unsigned int irq, void *dev_id)
}
EXPORT_SYMBOL(free_irq);

-/* This function must be called with desc->lock held */
static const void *__cleanup_nmi(unsigned int irq, struct irq_desc *desc)
{
+ struct irqaction *action = NULL;
const char *devname = NULL;

- desc->istate &= ~IRQS_NMI;
+ scoped_guard(raw_spinlock_irqsave, &desc->lock) {
+ irq_nmi_teardown(desc);

- if (!WARN_ON(desc->action == NULL)) {
- irq_pm_remove_action(desc, desc->action);
- devname = desc->action->name;
- unregister_handler_proc(irq, desc->action);
+ desc->istate &= ~IRQS_NMI;

- kfree(desc->action);
+ if (!WARN_ON(desc->action == NULL)) {
+ action = desc->action;
+ irq_pm_remove_action(desc, action);
+ devname = action->name;
+ }
desc->action = NULL;
+
+ irq_settings_clr_disable_unlazy(desc);
+ irq_shutdown_and_deactivate(desc);
}

- irq_settings_clr_disable_unlazy(desc);
- irq_shutdown_and_deactivate(desc);
+ irq_proc_update_valid(desc);
+
+ if (action)
+ unregister_handler_proc(irq, action);
+ kfree(action);

irq_release_resources(desc);

@@ -2058,6 +2066,7 @@ static const void *__cleanup_nmi(unsigned int irq, struct irq_desc *desc)
const void *free_nmi(unsigned int irq, void *dev_id)
{
struct irq_desc *desc = irq_to_desc(irq);
+ void *ret;

if (!desc || WARN_ON(!irq_is_nmi(desc)))
return NULL;
@@ -2069,8 +2078,6 @@ const void *free_nmi(unsigned int irq, void *dev_id)
if (WARN_ON(desc->depth == 0))
disable_nmi_nosync(irq);

- guard(raw_spinlock_irqsave)(&desc->lock);
- irq_nmi_teardown(desc);
return __cleanup_nmi(irq, desc);
}

@@ -2320,13 +2327,14 @@ int request_nmi(unsigned int irq, irq_handler_t handler,
/* Setup NMI state */
desc->istate |= IRQS_NMI;
retval = irq_nmi_setup(desc);
- if (retval) {
- __cleanup_nmi(irq, desc);
- return -EINVAL;
- }
- return 0;
}

+ if (retval) {
+ __cleanup_nmi(irq, desc);
+ return -EINVAL;
+ }
+ return 0;
+
err_irq_setup:
irq_chip_pm_put(&desc->irq_data);
err_out:
@@ -2430,8 +2438,10 @@ static struct irqaction *__free_percpu_irq(unsigned int irq, void __percpu *dev_
*action_ptr = action->next;

/* Demote from NMI if we killed the last action */
- if (!desc->action)
+ if (!desc->action) {
desc->istate &= ~IRQS_NMI;
+ irq_proc_update_valid(desc);
+ }
}

unregister_handler_proc(irq, action);
diff --git a/scripts/gdb/linux/interrupts.py b/scripts/gdb/linux/interrupts.py
index 6ca7e32f35b0..418f3ece2f0f 100644
--- a/scripts/gdb/linux/interrupts.py
+++ b/scripts/gdb/linux/interrupts.py
@@ -90,13 +90,6 @@ def show_irq_desc(prec, chip_width, irq):

return text

-def show_irq_err_count(prec):
- cnt = utils.gdb_eval_or_none("irq_err_count")
- text = ""
- if cnt is not None:
- text += "%*s: %10u\n" % (prec, "ERR", cnt['counter'])
- return text
-
def x86_show_irqstat(prec, pfx, idx, desc):
irq_stat = gdb.parse_and_eval("&irq_stat.counts[%d]" %idx)
text = "%*s: " % (prec, pfx)
@@ -109,22 +102,18 @@ def x86_show_irqstat(prec, pfx, idx, desc):
def x86_show_interupts(prec):
info_type = gdb.lookup_type('struct irq_stat_info')
info = gdb.parse_and_eval('irq_stat_info')
+ bitmap = gdb.parse_and_eval('irq_stat_count_show')
+ nbits = 8 * int(bitmap.type.sizeof)

text = ""
for idx in range(int(info.type.sizeof / info_type.sizeof)):
- if info[idx]['skip_vector']:
+ show = bitmap[idx / nbits]
+ if not show & 1 << (idx % nbits):
continue
pfx = info[idx]['symbol'].string()
desc = info[idx]['text'].string()
text += x86_show_irqstat(prec, pfx, idx, desc)

- text += show_irq_err_count(prec)
-
- if constants.LX_CONFIG_X86_IO_APIC:
- cnt = utils.gdb_eval_or_none("irq_mis_count")
- if cnt is not None:
- text += "%*s: %10u\n" % (prec, "MIS", cnt['counter'])
-
return text

def arm_common_show_interrupts(prec):