Re: [PATCH v2 09/16] iommu/vt-d: Restore IOMMU state and reclaimed domain ids

From: Pranjal Shrivastava

Date: Tue May 19 2026 - 17:48:26 EST


On Mon, Apr 27, 2026 at 05:56:26PM +0000, Samiullah Khawaja wrote:
> During boot fetch the preserved state of IOMMU unit and if found then
> restore the state.
>
> - Reuse the root_table that was preserved in the previous kernel.
> - Reclaim the domain ids of the preserved domains for each preserved
> devices so these are not acquired by another domain.
>
> Signed-off-by: Samiullah Khawaja <skhawaja@xxxxxxxxxx>
> ---
> drivers/iommu/intel/iommu.c | 55 ++++++++++++++++++++++--------
> drivers/iommu/intel/iommu.h | 7 ++++
> drivers/iommu/intel/liveupdate.c | 57 ++++++++++++++++++++++++++++++++
> 3 files changed, 105 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> index 68fecd4e57fa..4118a0861f38 100644
> --- a/drivers/iommu/intel/iommu.c
> +++ b/drivers/iommu/intel/iommu.c
> @@ -670,10 +670,17 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
> #endif
>
> /* iommu handling */
> -static int iommu_alloc_root_entry(struct intel_iommu *iommu)
> +static int iommu_alloc_root_entry(struct intel_iommu *iommu,
> + struct iommu_hw_ser *iommu_ser)
> {
> struct root_entry *root;
>
> + if (iommu_ser) {
> + intel_iommu_liveupdate_restore_root_table(iommu, iommu_ser);
> + __iommu_flush_cache(iommu, iommu->root_entry, ROOT_SIZE);
> + return 0;
> + }
> +

Minor nit: I still believe this condition block can be moved into the
caller? Since the called fetches iommu_ser, it can call this as a stand
alone helper and bypass calling iommu_alloc_root_entry if (iommu_ser).

> root = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC, SZ_4K);
> if (!root) {
> pr_err("Allocating root entry for %s failed\n",
> @@ -992,15 +999,16 @@ static void disable_dmar_iommu(struct intel_iommu *iommu)
> iommu_disable_translation(iommu);
> }
>
> -static void free_dmar_iommu(struct intel_iommu *iommu)
> +static void free_dmar_iommu(struct intel_iommu *iommu, struct iommu_hw_ser *iommu_ser)
> {
> if (iommu->copied_tables) {
> bitmap_free(iommu->copied_tables);
> iommu->copied_tables = NULL;
> }
>
> - /* free context mapping */
> - free_context_table(iommu);
> + /* free context mapping if there is no serialized state. */
> + if (!iommu_ser)
> + free_context_table(iommu);
>
> if (ecap_prs(iommu->ecap))
> intel_iommu_finish_prq(iommu);
> @@ -1611,6 +1619,7 @@ static int copy_translation_tables(struct intel_iommu *iommu)
>
> static int __init init_dmars(void)
> {
> + struct iommu_hw_ser *iommu_ser = NULL;
> struct dmar_drhd_unit *drhd;
> struct intel_iommu *iommu;
> int ret;
> @@ -1633,8 +1642,12 @@ static int __init init_dmars(void)
> intel_pasid_max_id);
> }
>
> + iommu_ser = iommu_get_preserved_data(iommu->reg_phys, IOMMU_INTEL);
> +
> intel_iommu_init_qi(iommu);
> - init_translation_status(iommu);
> +
> + if (!iommu_ser)
> + init_translation_status(iommu);
>
> if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
> iommu_disable_translation(iommu);
> @@ -1648,7 +1661,7 @@ static int __init init_dmars(void)
> * we could share the same root & context tables
> * among all IOMMU's. Need to Split it later.
> */
> - ret = iommu_alloc_root_entry(iommu);
> + ret = iommu_alloc_root_entry(iommu, iommu_ser);
> if (ret)
> goto free_iommu;
>
> @@ -1732,8 +1745,12 @@ static int __init init_dmars(void)
>
> free_iommu:
> for_each_active_iommu(iommu, drhd) {
> - disable_dmar_iommu(iommu);
> - free_dmar_iommu(iommu);
> + iommu_ser = iommu_get_preserved_data(iommu->reg_phys, IOMMU_INTEL);
> +
> + if (!iommu_ser)
> + disable_dmar_iommu(iommu);
> +
> + free_dmar_iommu(iommu, iommu_ser);
> }
>

I'm wondering what happens if init_dmars fails for a preserved iommu?
Since we have non NULL iommu_ser, we'll skip disable_dmar_iommu and skip
free_context_table() inside free_dmar_iommu() as well. I'm concerned we
might leak some resources in this case. If the failure happens after we
set restored = 1, even a rmmod -> modprobe cycle won't help fix the leak

> return ret;
> @@ -2107,15 +2124,19 @@ int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg)
> static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
> {
> struct intel_iommu *iommu = dmaru->iommu;
> + struct iommu_hw_ser *iommu_ser = NULL;
> int ret;
>
> + /* Use IOMMU HW unit MMIO base to identify the preserved state. */
> + iommu_ser = iommu_get_preserved_data(iommu->reg_phys, IOMMU_INTEL);

Thanks for adding that comment! Really clever btw :)

> +
> /*
> * Disable translation if already enabled prior to OS handover.
> */
> - if (iommu->gcmd & DMA_GCMD_TE)
> + if (!iommu_ser && iommu->gcmd & DMA_GCMD_TE)
> iommu_disable_translation(iommu);
>
> - ret = iommu_alloc_root_entry(iommu);
> + ret = iommu_alloc_root_entry(iommu, iommu_ser);
> if (ret)
> goto out;
>

[snip]

> +
> +static int _restore_used_domain_ids(struct iommu_device_ser *ser, void *arg)
> +{
> + int id = ser->domain_iommu_ser.attachment_id;
> + struct iommu_hw_ser *iommu_hw_ser;
> + struct intel_iommu *iommu = arg;
> +
> + iommu_hw_ser = phys_to_virt(ser->domain_iommu_ser.iommu_phys);

We should check for iommu_phys being NULL here.. I know corruptions can
be funnier but a WARN_ON(!iommu_phys) could help catch NULL corruptions

> + if (iommu_hw_ser->type != IOMMU_INTEL)
> + return 0;
> +
> + /* Only allocate domain ID from associated IOMMU HW unit */
> + if (iommu_hw_ser->intel.phys_addr != iommu->reg_phys)
> + return 0;
> +
> + /*
> + * This can fail as multiple preserved devices can share the same domain
> + * ID. Since this is done during DMAR init so these failures can be
> + * ignored.
> + */
> + ida_alloc_range(&iommu->domain_ida, id, id, GFP_ATOMIC);
> + return 0;
> +}
> +

Thanks,
Praan