Re: [PATCH 0/6] iommu/amd: Refactors for ATS updates

From: Vasant Hegde

Date: Tue Jun 02 2026 - 04:54:59 EST


Pranjal,


On 6/1/2026 7:11 PM, Pranjal Shrivastava wrote:
> This series addresses some issues identified by Sashiko in the AMD IOMMU
> driver during the review of the subsystem-wide ATS update work [1].
>
> Patches 1-2 address the pre-existing bugs in iommu_ignore_device()
> regarding the order of alias clearing and cleaning up the DTEs.
>

Thanks for the series. While this series fixes few important bugs, at the end
certain checks are still scatters. I really want to move around the code bit so
that it makes easy to read.

ex: even after this series, in probe path its calling dev_is_pci() check 3
times! Device capability is still scattered between probe() and
iommu_init_device() function.

Does something like below makes sense (untested code)? This doesn't address
iommu_ignore_device() issues. May be we should rename iommu_ignore_device() to
something liek disable_device_dma that clears TV bit and other things.


-Vasant

---<---

diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 57dc8fabc7d9..c628a2e7e3a8 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -672,11 +672,12 @@ static void pdev_disable_caps(struct pci_dev *pdev)
* This function checks if the driver got a valid device from the caller to
* avoid dereferencing invalid pointers.
*/
-static bool check_device(struct device *dev)
+static bool iommu_lookup_device(struct device *dev,
+ struct amd_iommu **iommu_out, u16 *devid_out)
{
struct amd_iommu_pci_seg *pci_seg;
- struct amd_iommu *iommu;
- int devid, sbdf;
+ int sbdf;
+ u16 devid;

if (!dev)
return false;
@@ -687,7 +688,8 @@ static bool check_device(struct device *dev)
devid = PCI_SBDF_TO_DEVID(sbdf);

iommu = rlookup_amd_iommu(dev);
- if (!iommu)
+ /* Not registered yet? */
+ if (!iommu || !iommu->iommu.ops)
return false;

/* Out of our scope? */
@@ -695,25 +697,19 @@ static bool check_device(struct device *dev)
if (devid > pci_seg->last_bdf)
return false;

+ *iommu_out = iommu;
+ *devid_out = devid;
return true;
}

-static int iommu_init_device(struct amd_iommu *iommu, struct device *dev)
+static struct iommu_dev_data *iommu_init_device(struct amd_iommu *iommu,
+ struct device *dev, u16 devid)
{
struct iommu_dev_data *dev_data;
- int devid, sbdf;
-
- if (dev_iommu_priv_get(dev))
- return 0;

- sbdf = get_device_sbdf_id(dev);
- if (sbdf < 0)
- return sbdf;
-
- devid = PCI_SBDF_TO_DEVID(sbdf);
dev_data = find_dev_data(iommu, devid);
if (!dev_data)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);

dev_data->dev = dev;

@@ -724,6 +720,25 @@ static int iommu_init_device(struct amd_iommu *iommu,
struct device *dev)
dev_iommu_priv_set(dev, dev_data);
setup_aliases(iommu, dev);

+ iommu_completion_wait(iommu);
+
+ return dev_data;
+}
+
+static void iommu_init_device_caps(struct iommu_dev_data *dev_data,
+ struct device *dev,
+ struct amd_iommu *iommu)
+{
+ if (FEATURE_NUM_INT_REMAP_SUP_2K(amd_iommu_efr2))
+ dev_data->max_irqs = MAX_IRQS_PER_TABLE_2K;
+ else
+ dev_data->max_irqs = MAX_IRQS_PER_TABLE_512;
+
+ amd_iommu_set_pci_msi_domain(dev, iommu);
+
+ if (!dev_is_pci(dev))
+ return;
+
/*
* By default we use passthrough mode for IOMMUv2 capable device.
* But if amd_iommu=force_isolation is set (e.g. to debug DMA to
@@ -731,11 +746,21 @@ static int iommu_init_device(struct amd_iommu *iommu,
struct device *dev)
* it'll be forced to go into translation mode.
*/
if ((iommu_default_passthrough() || !amd_iommu_force_isolation) &&
- dev_is_pci(dev) && amd_iommu_gt_ppr_supported()) {
+ amd_iommu_gt_ppr_supported()) {
dev_data->flags = pdev_get_caps(to_pci_dev(dev));
}

- return 0;
+ /*
+ * If IOMMU and device supports PASID then it will contain max
+ * supported PASIDs, else it will be zero.
+ */
+ if (amd_iommu_pasid_supported() &&
+ pdev_pasid_supported(dev_data)) {
+ dev_data->max_pasids = min_t(u32, iommu->iommu.max_pasids,
+ pci_max_pasids(to_pci_dev(dev)));
+ }
+
+ pci_prepare_ats(to_pci_dev(dev), PAGE_SHIFT);
}

static void iommu_ignore_device(struct amd_iommu *iommu, struct device *dev)
@@ -2451,43 +2476,29 @@ static struct iommu_device
*amd_iommu_probe_device(struct device *dev)
struct amd_iommu *iommu;
struct iommu_dev_data *dev_data;
int ret;
+ u16 devid;

- if (!check_device(dev))
- return ERR_PTR(-ENODEV);
-
- iommu = rlookup_amd_iommu(dev);
- if (!iommu)
- return ERR_PTR(-ENODEV);
-
- /* Not registered yet? */
- if (!iommu->iommu.ops)
+ if (!iommu_lookup_device(dev, &iommu, &devid))
return ERR_PTR(-ENODEV);

if (dev_iommu_priv_get(dev))
return &iommu->iommu;

- ret = iommu_init_device(iommu, dev);
- if (ret) {
- dev_err(dev, "Failed to initialize - trying to proceed anyway\n");
- iommu_dev = ERR_PTR(ret);
+ dev_data = iommu_init_device(iommu, dev, devid);
+ if (IS_ERR(dev_data)) {
+ dev_err(dev, "Failed to initialize \n", devid);
iommu_ignore_device(iommu, dev);
- goto out_err;
+ return ERR_CAST(dev_data);
}

- amd_iommu_set_pci_msi_domain(dev, iommu);
+ iommu_init_device_caps(dev_data, dev, iommu);
iommu_dev = &iommu->iommu;

/*
- * If IOMMU and device supports PASID then it will contain max
- * supported PASIDs, else it will be zero.
+ * When DMA translation is unavailable return error so the iommu core
+ * won't attempt domain attach for this device. But interrupt-remap
+ * is still supported. Hence do not ignore the device.
*/
- dev_data = dev_iommu_priv_get(dev);
- if (amd_iommu_pasid_supported() && dev_is_pci(dev) &&
- pdev_pasid_supported(dev_data)) {
- dev_data->max_pasids = min_t(u32, iommu->iommu.max_pasids,
- pci_max_pasids(to_pci_dev(dev)));
- }
-
if (amd_iommu_pgtable == PD_MODE_NONE) {
pr_warn_once("%s: DMA translation not supported by iommu.\n",
__func__);
@@ -2495,16 +2506,6 @@ static struct iommu_device *amd_iommu_probe_device(struct
device *dev)
goto out_err;
}

- iommu_completion_wait(iommu);
-
- if (FEATURE_NUM_INT_REMAP_SUP_2K(amd_iommu_efr2))
- dev_data->max_irqs = MAX_IRQS_PER_TABLE_2K;
- else
- dev_data->max_irqs = MAX_IRQS_PER_TABLE_512;
-
- if (dev_is_pci(dev))
- pci_prepare_ats(to_pci_dev(dev), PAGE_SHIFT);
-
out_err:
return iommu_dev;
}









> Patches 3-4 refine the probe error paths. As noted[2] by Ankit & Sashiko,
> unconditionally calling iommu_ignore_device() on probe failure breaks
> IRQ remapping for devices in PD_MODE_NONE. We now split the error paths
> in probe_device to preserve interrupt mapping for non-fatal failures
> while ensuring that dangling device pointers are cleared to prevent
> potential UAFs.
>
> Finally, patch 5 implements the Fail Hard pattern being standardized
> for ATS, ensuring configuration errors are caught during probe_device and
> ATS enablement failures are reported with a WARN_ON().
>
> Patch 6 is carried forward as is from the original ATS work [3] to
> maintain bisectibility.
>
> [1] https://lore.kernel.org/all/20260529111208.387412-1-praan@xxxxxxxxxx/
> [2] https://lore.kernel.org/all/20260529153216.2AD1E1F00899@xxxxxxxxxxxxxxx/
> [3] https://lore.kernel.org/all/20260529111208.387412-4-praan@xxxxxxxxxx/
>
> Thanks,
> Praan
>
> Pranjal Shrivastava (6):
> iommu/amd: Clear aliases before setting the rlookup_table to NULL
> iommu/amd: Clear DTE with update_dte256 in iommu_ignore_device()
> iommu/amd: Split probe error paths to preserve IRQ remapping
> iommu/amd: Fix Use-After-Free in non-fatal probe error path
> iommu/amd: Fail probe on ATS configuration failure
> PCI/ATS: Mandate checking pci_ats_supported() before pci_prepare_ats()
>
> drivers/iommu/amd/iommu.c | 61 ++++++++++++++++++++++++++++++++-------
> drivers/pci/ats.c | 6 ++--
> 2 files changed, 55 insertions(+), 12 deletions(-)
>
> base-commit: 283d245468a2b61c41aa8b582f25ed5615d1c304