[PATCH 1/2] iommu/amd: Keep x2apic enabled when appending amd_iommu=off

From: Adrian Huang (Lenovo)

Date: Thu Mar 19 2026 - 02:16:05 EST


When booting with amd_iommu=off, the AMD IOMMU driver currently
disables the entire IOMMU subsystem, which also results in x2APIC
being turned off. Consequently, the APIC mode falls back from
x2APIC to physical flat mode.

The dmesg log shows the transition:
x2apic: enabled by BIOS, switching to x2apic ops
...
APIC: Switched APIC routing to: cluster x2apic
...
APIC: Switch to symmetric I/O mode setup
x2apic: IRQ remapping doesn't support X2APIC mode
x2apic disabled
APIC: Switched APIC routing to: physical flat

Since physical flat mode supports only up to 255 APIC IDs,
systems with large CPU counts cannot fully initialize. For example,
on a 448-core system, the kernel reports repeated errors such as:
smpboot: CPU 112 has invalid APIC ID 100. Aborting bringup
smpboot: CPU 113 has invalid APIC ID 102. Aborting bringup
...
smp: Brought up 2 nodes, 224 CPUs

Eventually, only 224 CPUs are brought up because of valid APIC IDs
and the APIC ID limitation of the physical flat mode.

In contrast, on an Intel platform with 960 cores, booting with
intel_iommu=off does not disable x2APIC:
x2apic: enabled by BIOS, switching to x2apic ops
...
APIC: Switched APIC routing to: cluster x2apic
...
APIC: Switch to symmetric I/O mode setup
DMAR: Host address width 46
...
smpboot: CPU0: Intel(R) Xeon(R) Platinum 8490H (family: 0x6,
model: 0x8f, stepping: 0x6)
...
smp: Brought up 8 nodes, 960 CPUs

This confirms that x2APIC remains enabled when intel_iommu=off
is specified.

Adjust the semantics of "amd_iommu=off" so that:

* DMA translation is disabled
* x2apic remains enabled

This preserves x2APIC functionality and allows large CPU count
systems to operate correctly, while still disabling DMA remapping.

With this patch, the system correctly brings up all 448 cores when
booting with amd_iommu=off, as verified by the logs below:
x2apic: enabled by BIOS, switching to x2apic ops
...
APIC: Switched APIC routing to: cluster x2apic
...
APIC: Switch to symmetric I/O mode setup
...
smp: Brought up 2 nodes, 448 CPUs
...
AMD-Vi: Interrupt remapping enabled
AMD-Vi: X2APIC enabled
...

Signed-off-by: Adrian Huang (Lenovo) <adrianhuang0701@xxxxxxxxx>
---
drivers/iommu/amd/amd_iommu.h | 1 +
drivers/iommu/amd/init.c | 56 ++++++++++++++++++++++++++++-------
drivers/iommu/amd/iommu.c | 14 +++++++++
3 files changed, 61 insertions(+), 10 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 1342e764a548..82c10f55f0ea 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -22,6 +22,7 @@ void amd_iommu_restart_event_logging(struct amd_iommu *iommu);
void amd_iommu_restart_ga_log(struct amd_iommu *iommu);
void amd_iommu_restart_ppr_log(struct amd_iommu *iommu);
void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid);
+void amd_iommu_dev_set_pci_msi_domain(struct device *dev);
void iommu_feature_enable(struct amd_iommu *iommu, u8 bit);
void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu,
gfp_t gfp, size_t size);
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index f3fd7f39efb4..0f577534702d 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -3396,6 +3396,41 @@ static __init void iommu_snp_enable(void)
#endif
}

+static int __init amd_iommu_devices_set_pci_msi_domain(void)
+{
+ struct pci_dev *dev = NULL;
+ struct amd_iommu *iommu;
+ int ret = 0;
+
+ /* Register IRQ handler for each iommu device. */
+ for_each_iommu(iommu) {
+ iommu->dev = pci_get_domain_bus_and_slot(iommu->pci_seg->id,
+ PCI_BUS_NUM(iommu->devid),
+ iommu->devid & 0xff);
+ if (!iommu->dev)
+ return -ENODEV;
+
+ ret = iommu_init_irq(iommu);
+ if (ret)
+ return ret;
+
+ iommu->dev->irq_managed = 1;
+ }
+
+ /*
+ * In configurations where the IOMMU is disabled but x2APIC is
+ * required for high CPU counts (> 256), the kernel must explicitly
+ * map PCI Message Signaled Interrupt (MSI) domains to the IOMMU
+ * hardware's interrupt domain to ensure valid interrupt routing.
+ */
+ for_each_pci_dev(dev)
+ amd_iommu_dev_set_pci_msi_domain(&dev->dev);
+
+ print_iommu_info();
+
+ return ret;
+}
+
/****************************************************************************
*
* AMD IOMMU Initialization State Machine
@@ -3416,13 +3451,8 @@ static int __init state_next(void)
}
break;
case IOMMU_IVRS_DETECTED:
- if (amd_iommu_disabled) {
- init_state = IOMMU_CMDLINE_DISABLED;
- ret = -EINVAL;
- } else {
- ret = early_amd_iommu_init();
- init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
- }
+ ret = early_amd_iommu_init();
+ init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
break;
case IOMMU_ACPI_FINISHED:
early_enable_iommus();
@@ -3430,9 +3460,15 @@ static int __init state_next(void)
init_state = IOMMU_ENABLED;
break;
case IOMMU_ENABLED:
- register_syscore(&amd_iommu_syscore);
- iommu_snp_enable();
- ret = amd_iommu_init_pci();
+ if (amd_iommu_disabled) {
+ amd_iommu_devices_set_pci_msi_domain();
+ init_state = IOMMU_CMDLINE_DISABLED;
+ ret = -EINVAL;
+ } else {
+ register_syscore(&amd_iommu_syscore);
+ iommu_snp_enable();
+ ret = amd_iommu_init_pci();
+ }
init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT;
break;
case IOMMU_PCI_INIT:
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 81c4d7733872..75b10eca9ecf 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2439,6 +2439,20 @@ static void detach_device(struct device *dev)
mutex_unlock(&dev_data->mutex);
}

+void amd_iommu_dev_set_pci_msi_domain(struct device *dev)
+{
+ struct amd_iommu *iommu;
+
+ if (!check_device(dev))
+ return;
+
+ iommu = rlookup_amd_iommu(dev);
+ if (!iommu)
+ return;
+
+ amd_iommu_set_pci_msi_domain(dev, iommu);
+}
+
static struct iommu_device *amd_iommu_probe_device(struct device *dev)
{
struct iommu_device *iommu_dev;
--
2.47.3