Re: [PATCH v2 04/16] iommu: Implement device and IOMMU HW preservation

From: Samiullah Khawaja

Date: Mon May 18 2026 - 14:44:54 EST


On Mon, May 18, 2026 at 01:55:47PM +0000, Pranjal Shrivastava wrote:
On Mon, Apr 27, 2026 at 05:56:21PM +0000, Samiullah Khawaja wrote:
Add IOMMU ops to preserve/unpreserve a device. These can be implemented
by the IOMMU drivers that support preservation of devices that have
their IOMMU domains preserved. During device preservation the state of
the associated IOMMU is also preserved as dependency.

Signed-off-by: Samiullah Khawaja <skhawaja@xxxxxxxxxx>
---
drivers/iommu/liveupdate.c | 162 +++++++++++++++++++++++++++++++
include/linux/iommu-liveupdate.h | 33 +++++++
include/linux/iommu.h | 20 ++++
3 files changed, 215 insertions(+)

diff --git a/drivers/iommu/liveupdate.c b/drivers/iommu/liveupdate.c
index f71f14518248..765d042e22e3 100644
--- a/drivers/iommu/liveupdate.c
+++ b/drivers/iommu/liveupdate.c
@@ -11,6 +11,7 @@
#include <linux/liveupdate.h>
#include <linux/iommu-liveupdate.h>
#include <linux/iommu.h>
+#include <linux/pci.h>
#include <linux/errno.h>

#define iommu_max_objs_per_page(_array) \
@@ -293,3 +294,164 @@ void iommu_domain_unpreserve(struct iommu_domain *domain)
domain->preserved_state = NULL;
}
EXPORT_SYMBOL_GPL(iommu_domain_unpreserve);
+
+static struct iommu_hw_ser *alloc_iommu_hw_ser(struct iommu_flb_obj *flb)
+{
+ int idx;
+
+ idx = alloc_object_ser((struct iommu_array_hdr_ser **)&flb->curr_iommu_array,
+ iommu_max_objs_per_page(flb->curr_iommu_array));

Nit: Same thing about brittle casts here, shall we make them void ** and
cast then within alloc_object_set ?

Agreed. Will change this.


+ if (idx < 0)
+ return ERR_PTR(idx);
+
+ flb->curr_iommu_array->objects[idx].hdr.ref_count = 1;
+ return &flb->curr_iommu_array->objects[idx];
+}
+
+static int iommu_preserve_locked(struct iommu_device *iommu,
+ struct iommu_flb_obj *flb_obj)
+{
+ struct iommu_hw_ser *iommu_hw_ser;
+ int ret;
+
+ if (!iommu->ops->preserve)
+ return -EOPNOTSUPP;
+
+ lockdep_assert_held(&flb_obj->lock);
+ if (iommu->outgoing_preserved_state) {
+ iommu->outgoing_preserved_state->hdr.ref_count++;
+ return 0;
+ }
+
+ iommu_hw_ser = alloc_iommu_hw_ser(flb_obj);
+ if (IS_ERR(iommu_hw_ser))
+ return PTR_ERR(iommu_hw_ser);
+
+ ret = iommu->ops->preserve(iommu, iommu_hw_ser);
+ if (ret) {
+ iommu_hw_ser->hdr.deleted = true;
+ return ret;
+ }
+
+ iommu->outgoing_preserved_state = iommu_hw_ser;
+ return ret;
+}
+
+static void iommu_unpreserve_locked(struct iommu_device *iommu,
+ struct iommu_flb_obj *flb_obj)
+{
+ struct iommu_hw_ser *iommu_hw_ser = iommu->outgoing_preserved_state;
+
+ lockdep_assert_held(&flb_obj->lock);
+ iommu_hw_ser->hdr.ref_count--;
+ if (iommu_hw_ser->hdr.ref_count)

Shall we add a defensive if (WARN_ON(!iommu_hw_ser)) ? I'm aware we
check this on within iommu_unpreserve_device() but we don't seem to
check it before calling iommu_unpreserve_locked() in the error path of
iommu_preserve_device.

Agreed. I will add this.

+ return;
+
+ iommu->outgoing_preserved_state = NULL;
+ iommu->ops->unpreserve(iommu, iommu_hw_ser);

We seem to assume we'll always have unpreserve implemented? If so, we
should check it during the iommu registration itself and fail it, i.e.

inside iommu_device_register() we could add something like:

#ifdef CONFIG_IOMMU_LIVEUPDATE
if ((iommu->ops->preserve && !iommu->ops->unpreserve) ||
(!iommu->ops->preserve && iommu->ops->unpreserve)) {
pr_err("IOMMU: %s: Asymmetric live-update operations detected\n",
dev_name(iommu->dev));
return -EINVAL;
}
#endif

This prevents a half-baked iommu driver from ever spinning up, completely
eliminating the need to check for it inside the live-update session paths.

Replied to this in the other thread where you suggested this inline with
Baolu's comment.

+ iommu_hw_ser->hdr.deleted = true;
+}
+
+static struct iommu_device_ser *alloc_iommu_device_ser(struct iommu_flb_obj *flb)
+{
+ int idx;
+
+ idx = alloc_object_ser((struct iommu_array_hdr_ser **)&flb->curr_device_array,

Nit: Same thing about brittle casts here, shall we make them void ** and
cast then within alloc_object_set ?

Will update this in next revision.

+ iommu_max_objs_per_page(flb->curr_device_array));
+ if (idx < 0)
+ return ERR_PTR(idx);
+
+ flb->curr_device_array->objects[idx].hdr.ref_count = 1;
+ return &flb->curr_device_array->objects[idx];
+}
+

[snip]
+
+ ret = iommu_preserve_locked(iommu->iommu_dev, flb_obj);
+ if (ret) {
+ device_ser->hdr.deleted = true;
+ return ret;
+ }
+
+ device_ser->domain_iommu_ser.domain_phys = __pa(domain->preserved_state);
+ device_ser->domain_iommu_ser.iommu_phys = __pa(iommu->iommu_dev->outgoing_preserved_state);

Nit: Should these be updated to use virt_to_phys as well?

Will update these.

+ device_ser->devid = pci_dev_id(pdev);
+ device_ser->pci_domain_nr = pci_domain_nr(pdev->bus);
+
+ ret = iommu->iommu_dev->ops->preserve_device(dev, device_ser);
+ if (ret) {
+ device_ser->hdr.deleted = true;
+ iommu_unpreserve_locked(iommu->iommu_dev, flb_obj);
+ return ret;
+ }
+
+ dev->iommu->device_ser = device_ser;
+ *preserved_state = virt_to_phys(device_ser);
+ return 0;
+}
+

[...]

Thanks,
Praan


Sami