[PATCH v12 01/22] gpu: nova-core: set DMA mask width based on GPU architecture

From: John Hubbard

Date: Mon Jun 01 2026 - 23:24:05 EST

Replace the hardcoded 47-bit DMA mask with a GPU HAL method that
provides the correct value for the architecture.

Set the DMA mask in Gpu::new(). Gpu owns all DMA allocations for
the device, so no concurrent allocations can exist while the
constructor is still running.

Acked-by: Danilo Krummrich <dakr@xxxxxxxxxx>
Reviewed-by: Gary Guo <gary@xxxxxxxxxxx>
Co-developed-by: Alexandre Courbot <acourbot@xxxxxxxxxx>
Signed-off-by: Alexandre Courbot <acourbot@xxxxxxxxxx>
Signed-off-by: John Hubbard <jhubbard@xxxxxxxxxx>
---
drivers/gpu/nova-core/driver.rs | 15 ---------------
drivers/gpu/nova-core/gpu.rs | 12 ++++++++++--
drivers/gpu/nova-core/gpu/hal.rs | 8 +++++++-
drivers/gpu/nova-core/gpu/hal/gh100.rs | 9 ++++++++-
drivers/gpu/nova-core/gpu/hal/tu102.rs | 5 +++++
5 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs
index cff5034c2dcd..ade73da68be5 100644
--- a/drivers/gpu/nova-core/driver.rs
+++ b/drivers/gpu/nova-core/driver.rs
@@ -3,8 +3,6 @@
use kernel::{
auxiliary,
device::Core,
- dma::Device,
- dma::DmaMask,
pci,
pci::{
Class,
@@ -38,14 +36,6 @@ pub(crate) struct NovaCore<'bound> {

const BAR0_SIZE: usize = SZ_16M;

-// For now we only support Ampere which can use up to 47-bit DMA addresses.
-//
-// TODO: Add an abstraction for this to support newer GPUs which may support
-// larger DMA addresses. Limiting these GPUs to smaller address widths won't
-// have any adverse affects, unless installed on systems which require larger
-// DMA addresses. These systems should be quite rare.
-const GPU_DMA_BITS: u32 = 47;
-
pub(crate) type Bar0 = kernel::io::Mmio<BAR0_SIZE>;

kernel::pci_device_table!(
@@ -88,11 +78,6 @@ fn probe<'bound>(
pdev.enable_device_mem()?;
pdev.set_master();

- // SAFETY: No concurrent DMA allocations or mappings can be made because
- // the device is still being probed and therefore isn't being used by
- // other threads of execution.
- unsafe { pdev.dma_set_mask_and_coherent(DmaMask::new::<GPU_DMA_BITS>())? };
-
Ok(try_pin_init!(NovaCore {
bar: pdev.iomap_region_sized::<BAR0_SIZE>(0, c"nova-core/bar0")?,
// TODO: Use `&bar` self-referential pin-init syntax once available.
diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index aed992488db3..38c75df77e16 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -2,6 +2,7 @@

use kernel::{
device,
+ dma::Device,
fmt,
io::Io,
num::Bounded,
@@ -269,7 +270,7 @@ pub(crate) struct Gpu<'gpu> {

impl<'gpu> Gpu<'gpu> {
pub(crate) fn new(
- pdev: &'gpu pci::Device<device::Bound>,
+ pdev: &'gpu pci::Device<device::Core<'_>>,
bar: &'gpu Bar0,
) -> impl PinInit<Self, Error> + 'gpu {
try_pin_init!(Self {
@@ -280,7 +281,14 @@ pub(crate) fn new(

// We must wait for GFW_BOOT completion before doing any significant setup on the GPU.
_: {
- hal::gpu_hal(spec.chipset).wait_gfw_boot_completion(bar)
+ let hal = hal::gpu_hal(spec.chipset);
+ let dma_mask = hal.dma_mask();
+
+ // SAFETY: `Gpu` owns all DMA allocations for this device, and we are
+ // still constructing it, so no concurrent DMA allocations can exist.
+ unsafe { pdev.dma_set_mask_and_coherent(dma_mask)? };
+
+ hal.wait_gfw_boot_completion(bar)
.inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?;
},

diff --git a/drivers/gpu/nova-core/gpu/hal.rs b/drivers/gpu/nova-core/gpu/hal.rs
index 788de20ab5d3..0b636b713593 100644
--- a/drivers/gpu/nova-core/gpu/hal.rs
+++ b/drivers/gpu/nova-core/gpu/hal.rs
@@ -1,6 +1,9 @@
// SPDX-License-Identifier: GPL-2.0

-use kernel::prelude::*;
+use kernel::{
+ dma::DmaMask,
+ prelude::*, //
+};

use crate::{
driver::Bar0,
@@ -16,6 +19,9 @@
pub(crate) trait GpuHal {
/// Waits for GFW_BOOT completion if required by this hardware family.
fn wait_gfw_boot_completion(&self, bar: &Bar0) -> Result;
+
+ /// Returns the DMA mask for the current architecture.
+ fn dma_mask(&self) -> DmaMask;
}

pub(super) fn gpu_hal(chipset: Chipset) -> &'static dyn GpuHal {
diff --git a/drivers/gpu/nova-core/gpu/hal/gh100.rs b/drivers/gpu/nova-core/gpu/hal/gh100.rs
index 1ed5bccdda1d..41fbabb04ff8 100644
--- a/drivers/gpu/nova-core/gpu/hal/gh100.rs
+++ b/drivers/gpu/nova-core/gpu/hal/gh100.rs
@@ -1,6 +1,9 @@
// SPDX-License-Identifier: GPL-2.0

-use kernel::prelude::*;
+use kernel::{
+ dma::DmaMask,
+ prelude::*, //
+};

use crate::driver::Bar0;

@@ -12,6 +15,10 @@ impl GpuHal for Gh100 {
fn wait_gfw_boot_completion(&self, _bar: &Bar0) -> Result {
Ok(())
}
+
+ fn dma_mask(&self) -> DmaMask {
+ DmaMask::new::<52>()
+ }
}

const GH100: Gh100 = Gh100;
diff --git a/drivers/gpu/nova-core/gpu/hal/tu102.rs b/drivers/gpu/nova-core/gpu/hal/tu102.rs
index 08dd4434bd72..2881ab03dbcd 100644
--- a/drivers/gpu/nova-core/gpu/hal/tu102.rs
+++ b/drivers/gpu/nova-core/gpu/hal/tu102.rs
@@ -19,6 +19,7 @@
//! Note that the devinit sequence also needs to run during suspend/resume.

use kernel::{
+ dma::DmaMask,
io::{
poll::read_poll_timeout,
Io, //
@@ -80,6 +81,10 @@ fn wait_gfw_boot_completion(&self, bar: &Bar0) -> Result {
)
.map(|_| ())
}
+
+ fn dma_mask(&self) -> DmaMask {
+ DmaMask::new::<47>()
+ }
}

const TU102: Tu102 = Tu102;
--
2.54.0